1 /* 2 * Virtio Network Device 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qemu/iov.h" 16 #include "hw/virtio/virtio.h" 17 #include "net/net.h" 18 #include "net/checksum.h" 19 #include "net/tap.h" 20 #include "qemu/error-report.h" 21 #include "qemu/timer.h" 22 #include "hw/virtio/virtio-net.h" 23 #include "net/vhost_net.h" 24 #include "hw/virtio/virtio-bus.h" 25 #include "qapi/error.h" 26 #include "qapi/qapi-events-net.h" 27 #include "hw/virtio/virtio-access.h" 28 #include "migration/misc.h" 29 #include "standard-headers/linux/ethtool.h" 30 31 #define VIRTIO_NET_VM_VERSION 11 32 33 #define MAC_TABLE_ENTRIES 64 34 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ 35 36 /* previously fixed value */ 37 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 38 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 39 40 /* for now, only allow larger queues; with virtio-1, guest can downsize */ 41 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 42 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 43 44 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ 45 46 #define VIRTIO_NET_TCP_FLAG 0x3F 47 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000 48 49 /* IPv4 max payload, 16 bits in the header */ 50 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header)) 51 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535 52 53 /* header length value in ip header without option */ 54 #define VIRTIO_NET_IP4_HEADER_LENGTH 5 55 56 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ 57 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD 58 59 /* Purge coalesced packets timer interval, This value affects the performance 60 a lot, and should be tuned carefully, '300000'(300us) is the recommended 61 value to pass the WHQL test, '50000' can gain 2x netperf throughput with 62 tso/gso/gro 'off'. */ 63 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000 64 65 /* temporary until standard header include it */ 66 #if !defined(VIRTIO_NET_HDR_F_RSC_INFO) 67 68 #define VIRTIO_NET_HDR_F_RSC_INFO 4 /* rsc_ext data in csum_ fields */ 69 #define VIRTIO_NET_F_RSC_EXT 61 70 71 static inline __virtio16 *virtio_net_rsc_ext_num_packets( 72 struct virtio_net_hdr *hdr) 73 { 74 return &hdr->csum_start; 75 } 76 77 static inline __virtio16 *virtio_net_rsc_ext_num_dupacks( 78 struct virtio_net_hdr *hdr) 79 { 80 return &hdr->csum_offset; 81 } 82 83 #endif 84 85 /* 86 * Calculate the number of bytes up to and including the given 'field' of 87 * 'container'. 88 */ 89 #define endof(container, field) \ 90 (offsetof(container, field) + sizeof_field(container, field)) 91 92 typedef struct VirtIOFeature { 93 uint64_t flags; 94 size_t end; 95 } VirtIOFeature; 96 97 static VirtIOFeature feature_sizes[] = { 98 {.flags = 1ULL << VIRTIO_NET_F_MAC, 99 .end = endof(struct virtio_net_config, mac)}, 100 {.flags = 1ULL << VIRTIO_NET_F_STATUS, 101 .end = endof(struct virtio_net_config, status)}, 102 {.flags = 1ULL << VIRTIO_NET_F_MQ, 103 .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, 104 {.flags = 1ULL << VIRTIO_NET_F_MTU, 105 .end = endof(struct virtio_net_config, mtu)}, 106 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, 107 .end = endof(struct virtio_net_config, duplex)}, 108 {} 109 }; 110 111 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc) 112 { 113 VirtIONet *n = qemu_get_nic_opaque(nc); 114 115 return &n->vqs[nc->queue_index]; 116 } 117 118 static int vq2q(int queue_index) 119 { 120 return queue_index / 2; 121 } 122 123 /* TODO 124 * - we could suppress RX interrupt if we were so inclined. 125 */ 126 127 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) 128 { 129 VirtIONet *n = VIRTIO_NET(vdev); 130 struct virtio_net_config netcfg; 131 132 virtio_stw_p(vdev, &netcfg.status, n->status); 133 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues); 134 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); 135 memcpy(netcfg.mac, n->mac, ETH_ALEN); 136 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); 137 netcfg.duplex = n->net_conf.duplex; 138 memcpy(config, &netcfg, n->config_size); 139 } 140 141 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) 142 { 143 VirtIONet *n = VIRTIO_NET(vdev); 144 struct virtio_net_config netcfg = {}; 145 146 memcpy(&netcfg, config, n->config_size); 147 148 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) && 149 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) && 150 memcmp(netcfg.mac, n->mac, ETH_ALEN)) { 151 memcpy(n->mac, netcfg.mac, ETH_ALEN); 152 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 153 } 154 } 155 156 static bool virtio_net_started(VirtIONet *n, uint8_t status) 157 { 158 VirtIODevice *vdev = VIRTIO_DEVICE(n); 159 return (status & VIRTIO_CONFIG_S_DRIVER_OK) && 160 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running; 161 } 162 163 static void virtio_net_announce_timer(void *opaque) 164 { 165 VirtIONet *n = opaque; 166 VirtIODevice *vdev = VIRTIO_DEVICE(n); 167 168 n->announce_counter--; 169 n->status |= VIRTIO_NET_S_ANNOUNCE; 170 virtio_notify_config(vdev); 171 } 172 173 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) 174 { 175 VirtIODevice *vdev = VIRTIO_DEVICE(n); 176 NetClientState *nc = qemu_get_queue(n->nic); 177 int queues = n->multiqueue ? n->max_queues : 1; 178 179 if (!get_vhost_net(nc->peer)) { 180 return; 181 } 182 183 if ((virtio_net_started(n, status) && !nc->peer->link_down) == 184 !!n->vhost_started) { 185 return; 186 } 187 if (!n->vhost_started) { 188 int r, i; 189 190 if (n->needs_vnet_hdr_swap) { 191 error_report("backend does not support %s vnet headers; " 192 "falling back on userspace virtio", 193 virtio_is_big_endian(vdev) ? "BE" : "LE"); 194 return; 195 } 196 197 /* Any packets outstanding? Purge them to avoid touching rings 198 * when vhost is running. 199 */ 200 for (i = 0; i < queues; i++) { 201 NetClientState *qnc = qemu_get_subqueue(n->nic, i); 202 203 /* Purge both directions: TX and RX. */ 204 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc); 205 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer); 206 } 207 208 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) { 209 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu); 210 if (r < 0) { 211 error_report("%uBytes MTU not supported by the backend", 212 n->net_conf.mtu); 213 214 return; 215 } 216 } 217 218 n->vhost_started = 1; 219 r = vhost_net_start(vdev, n->nic->ncs, queues); 220 if (r < 0) { 221 error_report("unable to start vhost net: %d: " 222 "falling back on userspace virtio", -r); 223 n->vhost_started = 0; 224 } 225 } else { 226 vhost_net_stop(vdev, n->nic->ncs, queues); 227 n->vhost_started = 0; 228 } 229 } 230 231 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev, 232 NetClientState *peer, 233 bool enable) 234 { 235 if (virtio_is_big_endian(vdev)) { 236 return qemu_set_vnet_be(peer, enable); 237 } else { 238 return qemu_set_vnet_le(peer, enable); 239 } 240 } 241 242 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs, 243 int queues, bool enable) 244 { 245 int i; 246 247 for (i = 0; i < queues; i++) { 248 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 && 249 enable) { 250 while (--i >= 0) { 251 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false); 252 } 253 254 return true; 255 } 256 } 257 258 return false; 259 } 260 261 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status) 262 { 263 VirtIODevice *vdev = VIRTIO_DEVICE(n); 264 int queues = n->multiqueue ? n->max_queues : 1; 265 266 if (virtio_net_started(n, status)) { 267 /* Before using the device, we tell the network backend about the 268 * endianness to use when parsing vnet headers. If the backend 269 * can't do it, we fallback onto fixing the headers in the core 270 * virtio-net code. 271 */ 272 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs, 273 queues, true); 274 } else if (virtio_net_started(n, vdev->status)) { 275 /* After using the device, we need to reset the network backend to 276 * the default (guest native endianness), otherwise the guest may 277 * lose network connectivity if it is rebooted into a different 278 * endianness. 279 */ 280 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false); 281 } 282 } 283 284 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq) 285 { 286 unsigned int dropped = virtqueue_drop_all(vq); 287 if (dropped) { 288 virtio_notify(vdev, vq); 289 } 290 } 291 292 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) 293 { 294 VirtIONet *n = VIRTIO_NET(vdev); 295 VirtIONetQueue *q; 296 int i; 297 uint8_t queue_status; 298 299 virtio_net_vnet_endian_status(n, status); 300 virtio_net_vhost_status(n, status); 301 302 for (i = 0; i < n->max_queues; i++) { 303 NetClientState *ncs = qemu_get_subqueue(n->nic, i); 304 bool queue_started; 305 q = &n->vqs[i]; 306 307 if ((!n->multiqueue && i != 0) || i >= n->curr_queues) { 308 queue_status = 0; 309 } else { 310 queue_status = status; 311 } 312 queue_started = 313 virtio_net_started(n, queue_status) && !n->vhost_started; 314 315 if (queue_started) { 316 qemu_flush_queued_packets(ncs); 317 } 318 319 if (!q->tx_waiting) { 320 continue; 321 } 322 323 if (queue_started) { 324 if (q->tx_timer) { 325 timer_mod(q->tx_timer, 326 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 327 } else { 328 qemu_bh_schedule(q->tx_bh); 329 } 330 } else { 331 if (q->tx_timer) { 332 timer_del(q->tx_timer); 333 } else { 334 qemu_bh_cancel(q->tx_bh); 335 } 336 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 && 337 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) && 338 vdev->vm_running) { 339 /* if tx is waiting we are likely have some packets in tx queue 340 * and disabled notification */ 341 q->tx_waiting = 0; 342 virtio_queue_set_notification(q->tx_vq, 1); 343 virtio_net_drop_tx_queue_data(vdev, q->tx_vq); 344 } 345 } 346 } 347 } 348 349 static void virtio_net_set_link_status(NetClientState *nc) 350 { 351 VirtIONet *n = qemu_get_nic_opaque(nc); 352 VirtIODevice *vdev = VIRTIO_DEVICE(n); 353 uint16_t old_status = n->status; 354 355 if (nc->link_down) 356 n->status &= ~VIRTIO_NET_S_LINK_UP; 357 else 358 n->status |= VIRTIO_NET_S_LINK_UP; 359 360 if (n->status != old_status) 361 virtio_notify_config(vdev); 362 363 virtio_net_set_status(vdev, vdev->status); 364 } 365 366 static void rxfilter_notify(NetClientState *nc) 367 { 368 VirtIONet *n = qemu_get_nic_opaque(nc); 369 370 if (nc->rxfilter_notify_enabled) { 371 gchar *path = object_get_canonical_path(OBJECT(n->qdev)); 372 qapi_event_send_nic_rx_filter_changed(!!n->netclient_name, 373 n->netclient_name, path); 374 g_free(path); 375 376 /* disable event notification to avoid events flooding */ 377 nc->rxfilter_notify_enabled = 0; 378 } 379 } 380 381 static intList *get_vlan_table(VirtIONet *n) 382 { 383 intList *list, *entry; 384 int i, j; 385 386 list = NULL; 387 for (i = 0; i < MAX_VLAN >> 5; i++) { 388 for (j = 0; n->vlans[i] && j <= 0x1f; j++) { 389 if (n->vlans[i] & (1U << j)) { 390 entry = g_malloc0(sizeof(*entry)); 391 entry->value = (i << 5) + j; 392 entry->next = list; 393 list = entry; 394 } 395 } 396 } 397 398 return list; 399 } 400 401 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc) 402 { 403 VirtIONet *n = qemu_get_nic_opaque(nc); 404 VirtIODevice *vdev = VIRTIO_DEVICE(n); 405 RxFilterInfo *info; 406 strList *str_list, *entry; 407 int i; 408 409 info = g_malloc0(sizeof(*info)); 410 info->name = g_strdup(nc->name); 411 info->promiscuous = n->promisc; 412 413 if (n->nouni) { 414 info->unicast = RX_STATE_NONE; 415 } else if (n->alluni) { 416 info->unicast = RX_STATE_ALL; 417 } else { 418 info->unicast = RX_STATE_NORMAL; 419 } 420 421 if (n->nomulti) { 422 info->multicast = RX_STATE_NONE; 423 } else if (n->allmulti) { 424 info->multicast = RX_STATE_ALL; 425 } else { 426 info->multicast = RX_STATE_NORMAL; 427 } 428 429 info->broadcast_allowed = n->nobcast; 430 info->multicast_overflow = n->mac_table.multi_overflow; 431 info->unicast_overflow = n->mac_table.uni_overflow; 432 433 info->main_mac = qemu_mac_strdup_printf(n->mac); 434 435 str_list = NULL; 436 for (i = 0; i < n->mac_table.first_multi; i++) { 437 entry = g_malloc0(sizeof(*entry)); 438 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN); 439 entry->next = str_list; 440 str_list = entry; 441 } 442 info->unicast_table = str_list; 443 444 str_list = NULL; 445 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 446 entry = g_malloc0(sizeof(*entry)); 447 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN); 448 entry->next = str_list; 449 str_list = entry; 450 } 451 info->multicast_table = str_list; 452 info->vlan_table = get_vlan_table(n); 453 454 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) { 455 info->vlan = RX_STATE_ALL; 456 } else if (!info->vlan_table) { 457 info->vlan = RX_STATE_NONE; 458 } else { 459 info->vlan = RX_STATE_NORMAL; 460 } 461 462 /* enable event notification after query */ 463 nc->rxfilter_notify_enabled = 1; 464 465 return info; 466 } 467 468 static void virtio_net_reset(VirtIODevice *vdev) 469 { 470 VirtIONet *n = VIRTIO_NET(vdev); 471 int i; 472 473 /* Reset back to compatibility mode */ 474 n->promisc = 1; 475 n->allmulti = 0; 476 n->alluni = 0; 477 n->nomulti = 0; 478 n->nouni = 0; 479 n->nobcast = 0; 480 /* multiqueue is disabled by default */ 481 n->curr_queues = 1; 482 timer_del(n->announce_timer); 483 n->announce_counter = 0; 484 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 485 486 /* Flush any MAC and VLAN filter table state */ 487 n->mac_table.in_use = 0; 488 n->mac_table.first_multi = 0; 489 n->mac_table.multi_overflow = 0; 490 n->mac_table.uni_overflow = 0; 491 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN); 492 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac)); 493 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 494 memset(n->vlans, 0, MAX_VLAN >> 3); 495 496 /* Flush any async TX */ 497 for (i = 0; i < n->max_queues; i++) { 498 NetClientState *nc = qemu_get_subqueue(n->nic, i); 499 500 if (nc->peer) { 501 qemu_flush_or_purge_queued_packets(nc->peer, true); 502 assert(!virtio_net_get_subqueue(nc)->async_tx.elem); 503 } 504 } 505 } 506 507 static void peer_test_vnet_hdr(VirtIONet *n) 508 { 509 NetClientState *nc = qemu_get_queue(n->nic); 510 if (!nc->peer) { 511 return; 512 } 513 514 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer); 515 } 516 517 static int peer_has_vnet_hdr(VirtIONet *n) 518 { 519 return n->has_vnet_hdr; 520 } 521 522 static int peer_has_ufo(VirtIONet *n) 523 { 524 if (!peer_has_vnet_hdr(n)) 525 return 0; 526 527 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer); 528 529 return n->has_ufo; 530 } 531 532 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, 533 int version_1) 534 { 535 int i; 536 NetClientState *nc; 537 538 n->mergeable_rx_bufs = mergeable_rx_bufs; 539 540 if (version_1) { 541 n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); 542 } else { 543 n->guest_hdr_len = n->mergeable_rx_bufs ? 544 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 545 sizeof(struct virtio_net_hdr); 546 } 547 548 for (i = 0; i < n->max_queues; i++) { 549 nc = qemu_get_subqueue(n->nic, i); 550 551 if (peer_has_vnet_hdr(n) && 552 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) { 553 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len); 554 n->host_hdr_len = n->guest_hdr_len; 555 } 556 } 557 } 558 559 static int virtio_net_max_tx_queue_size(VirtIONet *n) 560 { 561 NetClientState *peer = n->nic_conf.peers.ncs[0]; 562 563 /* 564 * Backends other than vhost-user don't support max queue size. 565 */ 566 if (!peer) { 567 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 568 } 569 570 if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) { 571 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 572 } 573 574 return VIRTQUEUE_MAX_SIZE; 575 } 576 577 static int peer_attach(VirtIONet *n, int index) 578 { 579 NetClientState *nc = qemu_get_subqueue(n->nic, index); 580 581 if (!nc->peer) { 582 return 0; 583 } 584 585 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 586 vhost_set_vring_enable(nc->peer, 1); 587 } 588 589 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 590 return 0; 591 } 592 593 if (n->max_queues == 1) { 594 return 0; 595 } 596 597 return tap_enable(nc->peer); 598 } 599 600 static int peer_detach(VirtIONet *n, int index) 601 { 602 NetClientState *nc = qemu_get_subqueue(n->nic, index); 603 604 if (!nc->peer) { 605 return 0; 606 } 607 608 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 609 vhost_set_vring_enable(nc->peer, 0); 610 } 611 612 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 613 return 0; 614 } 615 616 return tap_disable(nc->peer); 617 } 618 619 static void virtio_net_set_queues(VirtIONet *n) 620 { 621 int i; 622 int r; 623 624 if (n->nic->peer_deleted) { 625 return; 626 } 627 628 for (i = 0; i < n->max_queues; i++) { 629 if (i < n->curr_queues) { 630 r = peer_attach(n, i); 631 assert(!r); 632 } else { 633 r = peer_detach(n, i); 634 assert(!r); 635 } 636 } 637 } 638 639 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue); 640 641 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, 642 Error **errp) 643 { 644 VirtIONet *n = VIRTIO_NET(vdev); 645 NetClientState *nc = qemu_get_queue(n->nic); 646 647 /* Firstly sync all virtio-net possible supported features */ 648 features |= n->host_features; 649 650 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 651 652 if (!peer_has_vnet_hdr(n)) { 653 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); 654 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); 655 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); 656 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); 657 658 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); 659 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); 660 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); 661 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); 662 } 663 664 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { 665 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); 666 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); 667 } 668 669 if (!get_vhost_net(nc->peer)) { 670 return features; 671 } 672 673 features = vhost_net_get_features(get_vhost_net(nc->peer), features); 674 vdev->backend_features = features; 675 676 if (n->mtu_bypass_backend && 677 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { 678 features |= (1ULL << VIRTIO_NET_F_MTU); 679 } 680 681 return features; 682 } 683 684 static uint64_t virtio_net_bad_features(VirtIODevice *vdev) 685 { 686 uint64_t features = 0; 687 688 /* Linux kernel 2.6.25. It understood MAC (as everyone must), 689 * but also these: */ 690 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 691 virtio_add_feature(&features, VIRTIO_NET_F_CSUM); 692 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4); 693 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6); 694 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN); 695 696 return features; 697 } 698 699 static void virtio_net_apply_guest_offloads(VirtIONet *n) 700 { 701 qemu_set_offload(qemu_get_queue(n->nic)->peer, 702 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)), 703 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)), 704 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)), 705 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)), 706 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO))); 707 } 708 709 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features) 710 { 711 static const uint64_t guest_offloads_mask = 712 (1ULL << VIRTIO_NET_F_GUEST_CSUM) | 713 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | 714 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | 715 (1ULL << VIRTIO_NET_F_GUEST_ECN) | 716 (1ULL << VIRTIO_NET_F_GUEST_UFO); 717 718 return guest_offloads_mask & features; 719 } 720 721 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) 722 { 723 VirtIODevice *vdev = VIRTIO_DEVICE(n); 724 return virtio_net_guest_offloads_by_features(vdev->guest_features); 725 } 726 727 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) 728 { 729 VirtIONet *n = VIRTIO_NET(vdev); 730 int i; 731 732 if (n->mtu_bypass_backend && 733 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) { 734 features &= ~(1ULL << VIRTIO_NET_F_MTU); 735 } 736 737 virtio_net_set_multiqueue(n, 738 virtio_has_feature(features, VIRTIO_NET_F_MQ)); 739 740 virtio_net_set_mrg_rx_bufs(n, 741 virtio_has_feature(features, 742 VIRTIO_NET_F_MRG_RXBUF), 743 virtio_has_feature(features, 744 VIRTIO_F_VERSION_1)); 745 746 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 747 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4); 748 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 749 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6); 750 751 if (n->has_vnet_hdr) { 752 n->curr_guest_offloads = 753 virtio_net_guest_offloads_by_features(features); 754 virtio_net_apply_guest_offloads(n); 755 } 756 757 for (i = 0; i < n->max_queues; i++) { 758 NetClientState *nc = qemu_get_subqueue(n->nic, i); 759 760 if (!get_vhost_net(nc->peer)) { 761 continue; 762 } 763 vhost_net_ack_features(get_vhost_net(nc->peer), features); 764 } 765 766 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { 767 memset(n->vlans, 0, MAX_VLAN >> 3); 768 } else { 769 memset(n->vlans, 0xff, MAX_VLAN >> 3); 770 } 771 } 772 773 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd, 774 struct iovec *iov, unsigned int iov_cnt) 775 { 776 uint8_t on; 777 size_t s; 778 NetClientState *nc = qemu_get_queue(n->nic); 779 780 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on)); 781 if (s != sizeof(on)) { 782 return VIRTIO_NET_ERR; 783 } 784 785 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) { 786 n->promisc = on; 787 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) { 788 n->allmulti = on; 789 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) { 790 n->alluni = on; 791 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) { 792 n->nomulti = on; 793 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) { 794 n->nouni = on; 795 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) { 796 n->nobcast = on; 797 } else { 798 return VIRTIO_NET_ERR; 799 } 800 801 rxfilter_notify(nc); 802 803 return VIRTIO_NET_OK; 804 } 805 806 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd, 807 struct iovec *iov, unsigned int iov_cnt) 808 { 809 VirtIODevice *vdev = VIRTIO_DEVICE(n); 810 uint64_t offloads; 811 size_t s; 812 813 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 814 return VIRTIO_NET_ERR; 815 } 816 817 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads)); 818 if (s != sizeof(offloads)) { 819 return VIRTIO_NET_ERR; 820 } 821 822 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) { 823 uint64_t supported_offloads; 824 825 offloads = virtio_ldq_p(vdev, &offloads); 826 827 if (!n->has_vnet_hdr) { 828 return VIRTIO_NET_ERR; 829 } 830 831 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 832 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4); 833 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 834 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6); 835 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT); 836 837 supported_offloads = virtio_net_supported_guest_offloads(n); 838 if (offloads & ~supported_offloads) { 839 return VIRTIO_NET_ERR; 840 } 841 842 n->curr_guest_offloads = offloads; 843 virtio_net_apply_guest_offloads(n); 844 845 return VIRTIO_NET_OK; 846 } else { 847 return VIRTIO_NET_ERR; 848 } 849 } 850 851 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd, 852 struct iovec *iov, unsigned int iov_cnt) 853 { 854 VirtIODevice *vdev = VIRTIO_DEVICE(n); 855 struct virtio_net_ctrl_mac mac_data; 856 size_t s; 857 NetClientState *nc = qemu_get_queue(n->nic); 858 859 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) { 860 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) { 861 return VIRTIO_NET_ERR; 862 } 863 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac)); 864 assert(s == sizeof(n->mac)); 865 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 866 rxfilter_notify(nc); 867 868 return VIRTIO_NET_OK; 869 } 870 871 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) { 872 return VIRTIO_NET_ERR; 873 } 874 875 int in_use = 0; 876 int first_multi = 0; 877 uint8_t uni_overflow = 0; 878 uint8_t multi_overflow = 0; 879 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 880 881 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 882 sizeof(mac_data.entries)); 883 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 884 if (s != sizeof(mac_data.entries)) { 885 goto error; 886 } 887 iov_discard_front(&iov, &iov_cnt, s); 888 889 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) { 890 goto error; 891 } 892 893 if (mac_data.entries <= MAC_TABLE_ENTRIES) { 894 s = iov_to_buf(iov, iov_cnt, 0, macs, 895 mac_data.entries * ETH_ALEN); 896 if (s != mac_data.entries * ETH_ALEN) { 897 goto error; 898 } 899 in_use += mac_data.entries; 900 } else { 901 uni_overflow = 1; 902 } 903 904 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN); 905 906 first_multi = in_use; 907 908 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 909 sizeof(mac_data.entries)); 910 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 911 if (s != sizeof(mac_data.entries)) { 912 goto error; 913 } 914 915 iov_discard_front(&iov, &iov_cnt, s); 916 917 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) { 918 goto error; 919 } 920 921 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) { 922 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN], 923 mac_data.entries * ETH_ALEN); 924 if (s != mac_data.entries * ETH_ALEN) { 925 goto error; 926 } 927 in_use += mac_data.entries; 928 } else { 929 multi_overflow = 1; 930 } 931 932 n->mac_table.in_use = in_use; 933 n->mac_table.first_multi = first_multi; 934 n->mac_table.uni_overflow = uni_overflow; 935 n->mac_table.multi_overflow = multi_overflow; 936 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN); 937 g_free(macs); 938 rxfilter_notify(nc); 939 940 return VIRTIO_NET_OK; 941 942 error: 943 g_free(macs); 944 return VIRTIO_NET_ERR; 945 } 946 947 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd, 948 struct iovec *iov, unsigned int iov_cnt) 949 { 950 VirtIODevice *vdev = VIRTIO_DEVICE(n); 951 uint16_t vid; 952 size_t s; 953 NetClientState *nc = qemu_get_queue(n->nic); 954 955 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid)); 956 vid = virtio_lduw_p(vdev, &vid); 957 if (s != sizeof(vid)) { 958 return VIRTIO_NET_ERR; 959 } 960 961 if (vid >= MAX_VLAN) 962 return VIRTIO_NET_ERR; 963 964 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD) 965 n->vlans[vid >> 5] |= (1U << (vid & 0x1f)); 966 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL) 967 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f)); 968 else 969 return VIRTIO_NET_ERR; 970 971 rxfilter_notify(nc); 972 973 return VIRTIO_NET_OK; 974 } 975 976 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, 977 struct iovec *iov, unsigned int iov_cnt) 978 { 979 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK && 980 n->status & VIRTIO_NET_S_ANNOUNCE) { 981 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 982 if (n->announce_counter) { 983 timer_mod(n->announce_timer, 984 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 985 self_announce_delay(n->announce_counter)); 986 } 987 return VIRTIO_NET_OK; 988 } else { 989 return VIRTIO_NET_ERR; 990 } 991 } 992 993 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, 994 struct iovec *iov, unsigned int iov_cnt) 995 { 996 VirtIODevice *vdev = VIRTIO_DEVICE(n); 997 struct virtio_net_ctrl_mq mq; 998 size_t s; 999 uint16_t queues; 1000 1001 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq)); 1002 if (s != sizeof(mq)) { 1003 return VIRTIO_NET_ERR; 1004 } 1005 1006 if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { 1007 return VIRTIO_NET_ERR; 1008 } 1009 1010 queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs); 1011 1012 if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 1013 queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 1014 queues > n->max_queues || 1015 !n->multiqueue) { 1016 return VIRTIO_NET_ERR; 1017 } 1018 1019 n->curr_queues = queues; 1020 /* stop the backend before changing the number of queues to avoid handling a 1021 * disabled queue */ 1022 virtio_net_set_status(vdev, vdev->status); 1023 virtio_net_set_queues(n); 1024 1025 return VIRTIO_NET_OK; 1026 } 1027 1028 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) 1029 { 1030 VirtIONet *n = VIRTIO_NET(vdev); 1031 struct virtio_net_ctrl_hdr ctrl; 1032 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1033 VirtQueueElement *elem; 1034 size_t s; 1035 struct iovec *iov, *iov2; 1036 unsigned int iov_cnt; 1037 1038 for (;;) { 1039 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 1040 if (!elem) { 1041 break; 1042 } 1043 if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) || 1044 iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) { 1045 virtio_error(vdev, "virtio-net ctrl missing headers"); 1046 virtqueue_detach_element(vq, elem, 0); 1047 g_free(elem); 1048 break; 1049 } 1050 1051 iov_cnt = elem->out_num; 1052 iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num); 1053 s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl)); 1054 iov_discard_front(&iov, &iov_cnt, sizeof(ctrl)); 1055 if (s != sizeof(ctrl)) { 1056 status = VIRTIO_NET_ERR; 1057 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { 1058 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt); 1059 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { 1060 status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt); 1061 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { 1062 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt); 1063 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { 1064 status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt); 1065 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { 1066 status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt); 1067 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { 1068 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt); 1069 } 1070 1071 s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status)); 1072 assert(s == sizeof(status)); 1073 1074 virtqueue_push(vq, elem, sizeof(status)); 1075 virtio_notify(vdev, vq); 1076 g_free(iov2); 1077 g_free(elem); 1078 } 1079 } 1080 1081 /* RX */ 1082 1083 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) 1084 { 1085 VirtIONet *n = VIRTIO_NET(vdev); 1086 int queue_index = vq2q(virtio_get_queue_index(vq)); 1087 1088 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index)); 1089 } 1090 1091 static int virtio_net_can_receive(NetClientState *nc) 1092 { 1093 VirtIONet *n = qemu_get_nic_opaque(nc); 1094 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1095 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1096 1097 if (!vdev->vm_running) { 1098 return 0; 1099 } 1100 1101 if (nc->queue_index >= n->curr_queues) { 1102 return 0; 1103 } 1104 1105 if (!virtio_queue_ready(q->rx_vq) || 1106 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 1107 return 0; 1108 } 1109 1110 return 1; 1111 } 1112 1113 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) 1114 { 1115 VirtIONet *n = q->n; 1116 if (virtio_queue_empty(q->rx_vq) || 1117 (n->mergeable_rx_bufs && 1118 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1119 virtio_queue_set_notification(q->rx_vq, 1); 1120 1121 /* To avoid a race condition where the guest has made some buffers 1122 * available after the above check but before notification was 1123 * enabled, check for available buffers again. 1124 */ 1125 if (virtio_queue_empty(q->rx_vq) || 1126 (n->mergeable_rx_bufs && 1127 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1128 return 0; 1129 } 1130 } 1131 1132 virtio_queue_set_notification(q->rx_vq, 0); 1133 return 1; 1134 } 1135 1136 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr) 1137 { 1138 virtio_tswap16s(vdev, &hdr->hdr_len); 1139 virtio_tswap16s(vdev, &hdr->gso_size); 1140 virtio_tswap16s(vdev, &hdr->csum_start); 1141 virtio_tswap16s(vdev, &hdr->csum_offset); 1142 } 1143 1144 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so 1145 * it never finds out that the packets don't have valid checksums. This 1146 * causes dhclient to get upset. Fedora's carried a patch for ages to 1147 * fix this with Xen but it hasn't appeared in an upstream release of 1148 * dhclient yet. 1149 * 1150 * To avoid breaking existing guests, we catch udp packets and add 1151 * checksums. This is terrible but it's better than hacking the guest 1152 * kernels. 1153 * 1154 * N.B. if we introduce a zero-copy API, this operation is no longer free so 1155 * we should provide a mechanism to disable it to avoid polluting the host 1156 * cache. 1157 */ 1158 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr, 1159 uint8_t *buf, size_t size) 1160 { 1161 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */ 1162 (size > 27 && size < 1500) && /* normal sized MTU */ 1163 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */ 1164 (buf[23] == 17) && /* ip.protocol == UDP */ 1165 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */ 1166 net_checksum_calculate(buf, size); 1167 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; 1168 } 1169 } 1170 1171 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt, 1172 const void *buf, size_t size) 1173 { 1174 if (n->has_vnet_hdr) { 1175 /* FIXME this cast is evil */ 1176 void *wbuf = (void *)buf; 1177 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len, 1178 size - n->host_hdr_len); 1179 1180 if (n->needs_vnet_hdr_swap) { 1181 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf); 1182 } 1183 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr)); 1184 } else { 1185 struct virtio_net_hdr hdr = { 1186 .flags = 0, 1187 .gso_type = VIRTIO_NET_HDR_GSO_NONE 1188 }; 1189 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr); 1190 } 1191 } 1192 1193 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) 1194 { 1195 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; 1196 static const uint8_t vlan[] = {0x81, 0x00}; 1197 uint8_t *ptr = (uint8_t *)buf; 1198 int i; 1199 1200 if (n->promisc) 1201 return 1; 1202 1203 ptr += n->host_hdr_len; 1204 1205 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) { 1206 int vid = lduw_be_p(ptr + 14) & 0xfff; 1207 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f)))) 1208 return 0; 1209 } 1210 1211 if (ptr[0] & 1) { // multicast 1212 if (!memcmp(ptr, bcast, sizeof(bcast))) { 1213 return !n->nobcast; 1214 } else if (n->nomulti) { 1215 return 0; 1216 } else if (n->allmulti || n->mac_table.multi_overflow) { 1217 return 1; 1218 } 1219 1220 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 1221 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1222 return 1; 1223 } 1224 } 1225 } else { // unicast 1226 if (n->nouni) { 1227 return 0; 1228 } else if (n->alluni || n->mac_table.uni_overflow) { 1229 return 1; 1230 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) { 1231 return 1; 1232 } 1233 1234 for (i = 0; i < n->mac_table.first_multi; i++) { 1235 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1236 return 1; 1237 } 1238 } 1239 } 1240 1241 return 0; 1242 } 1243 1244 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, 1245 size_t size) 1246 { 1247 VirtIONet *n = qemu_get_nic_opaque(nc); 1248 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1249 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1250 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; 1251 struct virtio_net_hdr_mrg_rxbuf mhdr; 1252 unsigned mhdr_cnt = 0; 1253 size_t offset, i, guest_offset; 1254 1255 if (!virtio_net_can_receive(nc)) { 1256 return -1; 1257 } 1258 1259 /* hdr_len refers to the header we supply to the guest */ 1260 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) { 1261 return 0; 1262 } 1263 1264 if (!receive_filter(n, buf, size)) 1265 return size; 1266 1267 offset = i = 0; 1268 1269 while (offset < size) { 1270 VirtQueueElement *elem; 1271 int len, total; 1272 const struct iovec *sg; 1273 1274 total = 0; 1275 1276 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement)); 1277 if (!elem) { 1278 if (i) { 1279 virtio_error(vdev, "virtio-net unexpected empty queue: " 1280 "i %zd mergeable %d offset %zd, size %zd, " 1281 "guest hdr len %zd, host hdr len %zd " 1282 "guest features 0x%" PRIx64, 1283 i, n->mergeable_rx_bufs, offset, size, 1284 n->guest_hdr_len, n->host_hdr_len, 1285 vdev->guest_features); 1286 } 1287 return -1; 1288 } 1289 1290 if (elem->in_num < 1) { 1291 virtio_error(vdev, 1292 "virtio-net receive queue contains no in buffers"); 1293 virtqueue_detach_element(q->rx_vq, elem, 0); 1294 g_free(elem); 1295 return -1; 1296 } 1297 1298 sg = elem->in_sg; 1299 if (i == 0) { 1300 assert(offset == 0); 1301 if (n->mergeable_rx_bufs) { 1302 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg), 1303 sg, elem->in_num, 1304 offsetof(typeof(mhdr), num_buffers), 1305 sizeof(mhdr.num_buffers)); 1306 } 1307 1308 receive_header(n, sg, elem->in_num, buf, size); 1309 offset = n->host_hdr_len; 1310 total += n->guest_hdr_len; 1311 guest_offset = n->guest_hdr_len; 1312 } else { 1313 guest_offset = 0; 1314 } 1315 1316 /* copy in packet. ugh */ 1317 len = iov_from_buf(sg, elem->in_num, guest_offset, 1318 buf + offset, size - offset); 1319 total += len; 1320 offset += len; 1321 /* If buffers can't be merged, at this point we 1322 * must have consumed the complete packet. 1323 * Otherwise, drop it. */ 1324 if (!n->mergeable_rx_bufs && offset < size) { 1325 virtqueue_unpop(q->rx_vq, elem, total); 1326 g_free(elem); 1327 return size; 1328 } 1329 1330 /* signal other side */ 1331 virtqueue_fill(q->rx_vq, elem, total, i++); 1332 g_free(elem); 1333 } 1334 1335 if (mhdr_cnt) { 1336 virtio_stw_p(vdev, &mhdr.num_buffers, i); 1337 iov_from_buf(mhdr_sg, mhdr_cnt, 1338 0, 1339 &mhdr.num_buffers, sizeof mhdr.num_buffers); 1340 } 1341 1342 virtqueue_flush(q->rx_vq, i); 1343 virtio_notify(vdev, q->rx_vq); 1344 1345 return size; 1346 } 1347 1348 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf, 1349 size_t size) 1350 { 1351 ssize_t r; 1352 1353 rcu_read_lock(); 1354 r = virtio_net_receive_rcu(nc, buf, size); 1355 rcu_read_unlock(); 1356 return r; 1357 } 1358 1359 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain, 1360 const uint8_t *buf, 1361 VirtioNetRscUnit *unit) 1362 { 1363 uint16_t ip_hdrlen; 1364 struct ip_header *ip; 1365 1366 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len 1367 + sizeof(struct eth_header)); 1368 unit->ip = (void *)ip; 1369 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2; 1370 unit->ip_plen = &ip->ip_len; 1371 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen); 1372 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 1373 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen; 1374 } 1375 1376 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain, 1377 const uint8_t *buf, 1378 VirtioNetRscUnit *unit) 1379 { 1380 struct ip6_header *ip6; 1381 1382 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len 1383 + sizeof(struct eth_header)); 1384 unit->ip = ip6; 1385 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 1386 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\ 1387 + sizeof(struct ip6_header)); 1388 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 1389 1390 /* There is a difference between payload lenght in ipv4 and v6, 1391 ip header is excluded in ipv6 */ 1392 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen; 1393 } 1394 1395 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain, 1396 VirtioNetRscSeg *seg) 1397 { 1398 int ret; 1399 struct virtio_net_hdr *h; 1400 1401 h = (struct virtio_net_hdr *)seg->buf; 1402 h->flags = 0; 1403 h->gso_type = VIRTIO_NET_HDR_GSO_NONE; 1404 1405 if (seg->is_coalesced) { 1406 *virtio_net_rsc_ext_num_packets(h) = seg->packets; 1407 *virtio_net_rsc_ext_num_dupacks(h) = seg->dup_ack; 1408 h->flags = VIRTIO_NET_HDR_F_RSC_INFO; 1409 if (chain->proto == ETH_P_IP) { 1410 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 1411 } else { 1412 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 1413 } 1414 } 1415 1416 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); 1417 QTAILQ_REMOVE(&chain->buffers, seg, next); 1418 g_free(seg->buf); 1419 g_free(seg); 1420 1421 return ret; 1422 } 1423 1424 static void virtio_net_rsc_purge(void *opq) 1425 { 1426 VirtioNetRscSeg *seg, *rn; 1427 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq; 1428 1429 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) { 1430 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 1431 chain->stat.purge_failed++; 1432 continue; 1433 } 1434 } 1435 1436 chain->stat.timer++; 1437 if (!QTAILQ_EMPTY(&chain->buffers)) { 1438 timer_mod(chain->drain_timer, 1439 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 1440 } 1441 } 1442 1443 static void virtio_net_rsc_cleanup(VirtIONet *n) 1444 { 1445 VirtioNetRscChain *chain, *rn_chain; 1446 VirtioNetRscSeg *seg, *rn_seg; 1447 1448 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) { 1449 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) { 1450 QTAILQ_REMOVE(&chain->buffers, seg, next); 1451 g_free(seg->buf); 1452 g_free(seg); 1453 } 1454 1455 timer_del(chain->drain_timer); 1456 timer_free(chain->drain_timer); 1457 QTAILQ_REMOVE(&n->rsc_chains, chain, next); 1458 g_free(chain); 1459 } 1460 } 1461 1462 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain, 1463 NetClientState *nc, 1464 const uint8_t *buf, size_t size) 1465 { 1466 uint16_t hdr_len; 1467 VirtioNetRscSeg *seg; 1468 1469 hdr_len = chain->n->guest_hdr_len; 1470 seg = g_malloc(sizeof(VirtioNetRscSeg)); 1471 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header) 1472 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD); 1473 memcpy(seg->buf, buf, size); 1474 seg->size = size; 1475 seg->packets = 1; 1476 seg->dup_ack = 0; 1477 seg->is_coalesced = 0; 1478 seg->nc = nc; 1479 1480 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); 1481 chain->stat.cache++; 1482 1483 switch (chain->proto) { 1484 case ETH_P_IP: 1485 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); 1486 break; 1487 case ETH_P_IPV6: 1488 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); 1489 break; 1490 default: 1491 g_assert_not_reached(); 1492 } 1493 } 1494 1495 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain, 1496 VirtioNetRscSeg *seg, 1497 const uint8_t *buf, 1498 struct tcp_header *n_tcp, 1499 struct tcp_header *o_tcp) 1500 { 1501 uint32_t nack, oack; 1502 uint16_t nwin, owin; 1503 1504 nack = htonl(n_tcp->th_ack); 1505 nwin = htons(n_tcp->th_win); 1506 oack = htonl(o_tcp->th_ack); 1507 owin = htons(o_tcp->th_win); 1508 1509 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) { 1510 chain->stat.ack_out_of_win++; 1511 return RSC_FINAL; 1512 } else if (nack == oack) { 1513 /* duplicated ack or window probe */ 1514 if (nwin == owin) { 1515 /* duplicated ack, add dup ack count due to whql test up to 1 */ 1516 chain->stat.dup_ack++; 1517 return RSC_FINAL; 1518 } else { 1519 /* Coalesce window update */ 1520 o_tcp->th_win = n_tcp->th_win; 1521 chain->stat.win_update++; 1522 return RSC_COALESCE; 1523 } 1524 } else { 1525 /* pure ack, go to 'C', finalize*/ 1526 chain->stat.pure_ack++; 1527 return RSC_FINAL; 1528 } 1529 } 1530 1531 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain, 1532 VirtioNetRscSeg *seg, 1533 const uint8_t *buf, 1534 VirtioNetRscUnit *n_unit) 1535 { 1536 void *data; 1537 uint16_t o_ip_len; 1538 uint32_t nseq, oseq; 1539 VirtioNetRscUnit *o_unit; 1540 1541 o_unit = &seg->unit; 1542 o_ip_len = htons(*o_unit->ip_plen); 1543 nseq = htonl(n_unit->tcp->th_seq); 1544 oseq = htonl(o_unit->tcp->th_seq); 1545 1546 /* out of order or retransmitted. */ 1547 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) { 1548 chain->stat.data_out_of_win++; 1549 return RSC_FINAL; 1550 } 1551 1552 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen; 1553 if (nseq == oseq) { 1554 if ((o_unit->payload == 0) && n_unit->payload) { 1555 /* From no payload to payload, normal case, not a dup ack or etc */ 1556 chain->stat.data_after_pure_ack++; 1557 goto coalesce; 1558 } else { 1559 return virtio_net_rsc_handle_ack(chain, seg, buf, 1560 n_unit->tcp, o_unit->tcp); 1561 } 1562 } else if ((nseq - oseq) != o_unit->payload) { 1563 /* Not a consistent packet, out of order */ 1564 chain->stat.data_out_of_order++; 1565 return RSC_FINAL; 1566 } else { 1567 coalesce: 1568 if ((o_ip_len + n_unit->payload) > chain->max_payload) { 1569 chain->stat.over_size++; 1570 return RSC_FINAL; 1571 } 1572 1573 /* Here comes the right data, the payload length in v4/v6 is different, 1574 so use the field value to update and record the new data len */ 1575 o_unit->payload += n_unit->payload; /* update new data len */ 1576 1577 /* update field in ip header */ 1578 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload); 1579 1580 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced 1581 for windows guest, while this may change the behavior for linux 1582 guest (only if it uses RSC feature). */ 1583 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags; 1584 1585 o_unit->tcp->th_ack = n_unit->tcp->th_ack; 1586 o_unit->tcp->th_win = n_unit->tcp->th_win; 1587 1588 memmove(seg->buf + seg->size, data, n_unit->payload); 1589 seg->size += n_unit->payload; 1590 seg->packets++; 1591 chain->stat.coalesced++; 1592 return RSC_COALESCE; 1593 } 1594 } 1595 1596 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain, 1597 VirtioNetRscSeg *seg, 1598 const uint8_t *buf, size_t size, 1599 VirtioNetRscUnit *unit) 1600 { 1601 struct ip_header *ip1, *ip2; 1602 1603 ip1 = (struct ip_header *)(unit->ip); 1604 ip2 = (struct ip_header *)(seg->unit.ip); 1605 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst) 1606 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 1607 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 1608 chain->stat.no_match++; 1609 return RSC_NO_MATCH; 1610 } 1611 1612 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 1613 } 1614 1615 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain, 1616 VirtioNetRscSeg *seg, 1617 const uint8_t *buf, size_t size, 1618 VirtioNetRscUnit *unit) 1619 { 1620 struct ip6_header *ip1, *ip2; 1621 1622 ip1 = (struct ip6_header *)(unit->ip); 1623 ip2 = (struct ip6_header *)(seg->unit.ip); 1624 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address)) 1625 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address)) 1626 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 1627 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 1628 chain->stat.no_match++; 1629 return RSC_NO_MATCH; 1630 } 1631 1632 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 1633 } 1634 1635 /* Packets with 'SYN' should bypass, other flag should be sent after drain 1636 * to prevent out of order */ 1637 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain, 1638 struct tcp_header *tcp) 1639 { 1640 uint16_t tcp_hdr; 1641 uint16_t tcp_flag; 1642 1643 tcp_flag = htons(tcp->th_offset_flags); 1644 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10; 1645 tcp_flag &= VIRTIO_NET_TCP_FLAG; 1646 tcp_flag = htons(tcp->th_offset_flags) & 0x3F; 1647 if (tcp_flag & TH_SYN) { 1648 chain->stat.tcp_syn++; 1649 return RSC_BYPASS; 1650 } 1651 1652 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) { 1653 chain->stat.tcp_ctrl_drain++; 1654 return RSC_FINAL; 1655 } 1656 1657 if (tcp_hdr > sizeof(struct tcp_header)) { 1658 chain->stat.tcp_all_opt++; 1659 return RSC_FINAL; 1660 } 1661 1662 return RSC_CANDIDATE; 1663 } 1664 1665 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain, 1666 NetClientState *nc, 1667 const uint8_t *buf, size_t size, 1668 VirtioNetRscUnit *unit) 1669 { 1670 int ret; 1671 VirtioNetRscSeg *seg, *nseg; 1672 1673 if (QTAILQ_EMPTY(&chain->buffers)) { 1674 chain->stat.empty_cache++; 1675 virtio_net_rsc_cache_buf(chain, nc, buf, size); 1676 timer_mod(chain->drain_timer, 1677 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 1678 return size; 1679 } 1680 1681 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 1682 if (chain->proto == ETH_P_IP) { 1683 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit); 1684 } else { 1685 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit); 1686 } 1687 1688 if (ret == RSC_FINAL) { 1689 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 1690 /* Send failed */ 1691 chain->stat.final_failed++; 1692 return 0; 1693 } 1694 1695 /* Send current packet */ 1696 return virtio_net_do_receive(nc, buf, size); 1697 } else if (ret == RSC_NO_MATCH) { 1698 continue; 1699 } else { 1700 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ 1701 seg->is_coalesced = 1; 1702 return size; 1703 } 1704 } 1705 1706 chain->stat.no_match_cache++; 1707 virtio_net_rsc_cache_buf(chain, nc, buf, size); 1708 return size; 1709 } 1710 1711 /* Drain a connection data, this is to avoid out of order segments */ 1712 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain, 1713 NetClientState *nc, 1714 const uint8_t *buf, size_t size, 1715 uint16_t ip_start, uint16_t ip_size, 1716 uint16_t tcp_port) 1717 { 1718 VirtioNetRscSeg *seg, *nseg; 1719 uint32_t ppair1, ppair2; 1720 1721 ppair1 = *(uint32_t *)(buf + tcp_port); 1722 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 1723 ppair2 = *(uint32_t *)(seg->buf + tcp_port); 1724 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size) 1725 || (ppair1 != ppair2)) { 1726 continue; 1727 } 1728 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 1729 chain->stat.drain_failed++; 1730 } 1731 1732 break; 1733 } 1734 1735 return virtio_net_do_receive(nc, buf, size); 1736 } 1737 1738 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain, 1739 struct ip_header *ip, 1740 const uint8_t *buf, size_t size) 1741 { 1742 uint16_t ip_len; 1743 1744 /* Not an ipv4 packet */ 1745 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) { 1746 chain->stat.ip_option++; 1747 return RSC_BYPASS; 1748 } 1749 1750 /* Don't handle packets with ip option */ 1751 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) { 1752 chain->stat.ip_option++; 1753 return RSC_BYPASS; 1754 } 1755 1756 if (ip->ip_p != IPPROTO_TCP) { 1757 chain->stat.bypass_not_tcp++; 1758 return RSC_BYPASS; 1759 } 1760 1761 /* Don't handle packets with ip fragment */ 1762 if (!(htons(ip->ip_off) & IP_DF)) { 1763 chain->stat.ip_frag++; 1764 return RSC_BYPASS; 1765 } 1766 1767 /* Don't handle packets with ecn flag */ 1768 if (IPTOS_ECN(ip->ip_tos)) { 1769 chain->stat.ip_ecn++; 1770 return RSC_BYPASS; 1771 } 1772 1773 ip_len = htons(ip->ip_len); 1774 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header)) 1775 || ip_len > (size - chain->n->guest_hdr_len - 1776 sizeof(struct eth_header))) { 1777 chain->stat.ip_hacked++; 1778 return RSC_BYPASS; 1779 } 1780 1781 return RSC_CANDIDATE; 1782 } 1783 1784 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain, 1785 NetClientState *nc, 1786 const uint8_t *buf, size_t size) 1787 { 1788 int32_t ret; 1789 uint16_t hdr_len; 1790 VirtioNetRscUnit unit; 1791 1792 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 1793 1794 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header) 1795 + sizeof(struct tcp_header))) { 1796 chain->stat.bypass_not_tcp++; 1797 return virtio_net_do_receive(nc, buf, size); 1798 } 1799 1800 virtio_net_rsc_extract_unit4(chain, buf, &unit); 1801 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size) 1802 != RSC_CANDIDATE) { 1803 return virtio_net_do_receive(nc, buf, size); 1804 } 1805 1806 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 1807 if (ret == RSC_BYPASS) { 1808 return virtio_net_do_receive(nc, buf, size); 1809 } else if (ret == RSC_FINAL) { 1810 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 1811 ((hdr_len + sizeof(struct eth_header)) + 12), 1812 VIRTIO_NET_IP4_ADDR_SIZE, 1813 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)); 1814 } 1815 1816 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 1817 } 1818 1819 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain, 1820 struct ip6_header *ip6, 1821 const uint8_t *buf, size_t size) 1822 { 1823 uint16_t ip_len; 1824 1825 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4) 1826 != IP_HEADER_VERSION_6) { 1827 return RSC_BYPASS; 1828 } 1829 1830 /* Both option and protocol is checked in this */ 1831 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) { 1832 chain->stat.bypass_not_tcp++; 1833 return RSC_BYPASS; 1834 } 1835 1836 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 1837 if (ip_len < sizeof(struct tcp_header) || 1838 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header) 1839 - sizeof(struct ip6_header))) { 1840 chain->stat.ip_hacked++; 1841 return RSC_BYPASS; 1842 } 1843 1844 /* Don't handle packets with ecn flag */ 1845 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) { 1846 chain->stat.ip_ecn++; 1847 return RSC_BYPASS; 1848 } 1849 1850 return RSC_CANDIDATE; 1851 } 1852 1853 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc, 1854 const uint8_t *buf, size_t size) 1855 { 1856 int32_t ret; 1857 uint16_t hdr_len; 1858 VirtioNetRscChain *chain; 1859 VirtioNetRscUnit unit; 1860 1861 chain = (VirtioNetRscChain *)opq; 1862 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 1863 1864 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header) 1865 + sizeof(tcp_header))) { 1866 return virtio_net_do_receive(nc, buf, size); 1867 } 1868 1869 virtio_net_rsc_extract_unit6(chain, buf, &unit); 1870 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain, 1871 unit.ip, buf, size)) { 1872 return virtio_net_do_receive(nc, buf, size); 1873 } 1874 1875 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 1876 if (ret == RSC_BYPASS) { 1877 return virtio_net_do_receive(nc, buf, size); 1878 } else if (ret == RSC_FINAL) { 1879 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 1880 ((hdr_len + sizeof(struct eth_header)) + 8), 1881 VIRTIO_NET_IP6_ADDR_SIZE, 1882 hdr_len + sizeof(struct eth_header) 1883 + sizeof(struct ip6_header)); 1884 } 1885 1886 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 1887 } 1888 1889 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n, 1890 NetClientState *nc, 1891 uint16_t proto) 1892 { 1893 VirtioNetRscChain *chain; 1894 1895 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) { 1896 return NULL; 1897 } 1898 1899 QTAILQ_FOREACH(chain, &n->rsc_chains, next) { 1900 if (chain->proto == proto) { 1901 return chain; 1902 } 1903 } 1904 1905 chain = g_malloc(sizeof(*chain)); 1906 chain->n = n; 1907 chain->proto = proto; 1908 if (proto == (uint16_t)ETH_P_IP) { 1909 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD; 1910 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 1911 } else { 1912 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD; 1913 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 1914 } 1915 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST, 1916 virtio_net_rsc_purge, chain); 1917 memset(&chain->stat, 0, sizeof(chain->stat)); 1918 1919 QTAILQ_INIT(&chain->buffers); 1920 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); 1921 1922 return chain; 1923 } 1924 1925 static ssize_t virtio_net_rsc_receive(NetClientState *nc, 1926 const uint8_t *buf, 1927 size_t size) 1928 { 1929 uint16_t proto; 1930 VirtioNetRscChain *chain; 1931 struct eth_header *eth; 1932 VirtIONet *n; 1933 1934 n = qemu_get_nic_opaque(nc); 1935 if (size < (n->host_hdr_len + sizeof(struct eth_header))) { 1936 return virtio_net_do_receive(nc, buf, size); 1937 } 1938 1939 eth = (struct eth_header *)(buf + n->guest_hdr_len); 1940 proto = htons(eth->h_proto); 1941 1942 chain = virtio_net_rsc_lookup_chain(n, nc, proto); 1943 if (chain) { 1944 chain->stat.received++; 1945 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) { 1946 return virtio_net_rsc_receive4(chain, nc, buf, size); 1947 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) { 1948 return virtio_net_rsc_receive6(chain, nc, buf, size); 1949 } 1950 } 1951 return virtio_net_do_receive(nc, buf, size); 1952 } 1953 1954 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, 1955 size_t size) 1956 { 1957 VirtIONet *n = qemu_get_nic_opaque(nc); 1958 if ((n->rsc4_enabled || n->rsc6_enabled)) { 1959 return virtio_net_rsc_receive(nc, buf, size); 1960 } else { 1961 return virtio_net_do_receive(nc, buf, size); 1962 } 1963 } 1964 1965 static int32_t virtio_net_flush_tx(VirtIONetQueue *q); 1966 1967 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) 1968 { 1969 VirtIONet *n = qemu_get_nic_opaque(nc); 1970 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1971 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1972 1973 virtqueue_push(q->tx_vq, q->async_tx.elem, 0); 1974 virtio_notify(vdev, q->tx_vq); 1975 1976 g_free(q->async_tx.elem); 1977 q->async_tx.elem = NULL; 1978 1979 virtio_queue_set_notification(q->tx_vq, 1); 1980 virtio_net_flush_tx(q); 1981 } 1982 1983 /* TX */ 1984 static int32_t virtio_net_flush_tx(VirtIONetQueue *q) 1985 { 1986 VirtIONet *n = q->n; 1987 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1988 VirtQueueElement *elem; 1989 int32_t num_packets = 0; 1990 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq)); 1991 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 1992 return num_packets; 1993 } 1994 1995 if (q->async_tx.elem) { 1996 virtio_queue_set_notification(q->tx_vq, 0); 1997 return num_packets; 1998 } 1999 2000 for (;;) { 2001 ssize_t ret; 2002 unsigned int out_num; 2003 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg; 2004 struct virtio_net_hdr_mrg_rxbuf mhdr; 2005 2006 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement)); 2007 if (!elem) { 2008 break; 2009 } 2010 2011 out_num = elem->out_num; 2012 out_sg = elem->out_sg; 2013 if (out_num < 1) { 2014 virtio_error(vdev, "virtio-net header not in first element"); 2015 virtqueue_detach_element(q->tx_vq, elem, 0); 2016 g_free(elem); 2017 return -EINVAL; 2018 } 2019 2020 if (n->has_vnet_hdr) { 2021 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) < 2022 n->guest_hdr_len) { 2023 virtio_error(vdev, "virtio-net header incorrect"); 2024 virtqueue_detach_element(q->tx_vq, elem, 0); 2025 g_free(elem); 2026 return -EINVAL; 2027 } 2028 if (n->needs_vnet_hdr_swap) { 2029 virtio_net_hdr_swap(vdev, (void *) &mhdr); 2030 sg2[0].iov_base = &mhdr; 2031 sg2[0].iov_len = n->guest_hdr_len; 2032 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, 2033 out_sg, out_num, 2034 n->guest_hdr_len, -1); 2035 if (out_num == VIRTQUEUE_MAX_SIZE) { 2036 goto drop; 2037 } 2038 out_num += 1; 2039 out_sg = sg2; 2040 } 2041 } 2042 /* 2043 * If host wants to see the guest header as is, we can 2044 * pass it on unchanged. Otherwise, copy just the parts 2045 * that host is interested in. 2046 */ 2047 assert(n->host_hdr_len <= n->guest_hdr_len); 2048 if (n->host_hdr_len != n->guest_hdr_len) { 2049 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg), 2050 out_sg, out_num, 2051 0, n->host_hdr_len); 2052 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num, 2053 out_sg, out_num, 2054 n->guest_hdr_len, -1); 2055 out_num = sg_num; 2056 out_sg = sg; 2057 } 2058 2059 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index), 2060 out_sg, out_num, virtio_net_tx_complete); 2061 if (ret == 0) { 2062 virtio_queue_set_notification(q->tx_vq, 0); 2063 q->async_tx.elem = elem; 2064 return -EBUSY; 2065 } 2066 2067 drop: 2068 virtqueue_push(q->tx_vq, elem, 0); 2069 virtio_notify(vdev, q->tx_vq); 2070 g_free(elem); 2071 2072 if (++num_packets >= n->tx_burst) { 2073 break; 2074 } 2075 } 2076 return num_packets; 2077 } 2078 2079 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) 2080 { 2081 VirtIONet *n = VIRTIO_NET(vdev); 2082 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2083 2084 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2085 virtio_net_drop_tx_queue_data(vdev, vq); 2086 return; 2087 } 2088 2089 /* This happens when device was stopped but VCPU wasn't. */ 2090 if (!vdev->vm_running) { 2091 q->tx_waiting = 1; 2092 return; 2093 } 2094 2095 if (q->tx_waiting) { 2096 virtio_queue_set_notification(vq, 1); 2097 timer_del(q->tx_timer); 2098 q->tx_waiting = 0; 2099 if (virtio_net_flush_tx(q) == -EINVAL) { 2100 return; 2101 } 2102 } else { 2103 timer_mod(q->tx_timer, 2104 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2105 q->tx_waiting = 1; 2106 virtio_queue_set_notification(vq, 0); 2107 } 2108 } 2109 2110 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) 2111 { 2112 VirtIONet *n = VIRTIO_NET(vdev); 2113 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2114 2115 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2116 virtio_net_drop_tx_queue_data(vdev, vq); 2117 return; 2118 } 2119 2120 if (unlikely(q->tx_waiting)) { 2121 return; 2122 } 2123 q->tx_waiting = 1; 2124 /* This happens when device was stopped but VCPU wasn't. */ 2125 if (!vdev->vm_running) { 2126 return; 2127 } 2128 virtio_queue_set_notification(vq, 0); 2129 qemu_bh_schedule(q->tx_bh); 2130 } 2131 2132 static void virtio_net_tx_timer(void *opaque) 2133 { 2134 VirtIONetQueue *q = opaque; 2135 VirtIONet *n = q->n; 2136 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2137 /* This happens when device was stopped but BH wasn't. */ 2138 if (!vdev->vm_running) { 2139 /* Make sure tx waiting is set, so we'll run when restarted. */ 2140 assert(q->tx_waiting); 2141 return; 2142 } 2143 2144 q->tx_waiting = 0; 2145 2146 /* Just in case the driver is not ready on more */ 2147 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2148 return; 2149 } 2150 2151 virtio_queue_set_notification(q->tx_vq, 1); 2152 virtio_net_flush_tx(q); 2153 } 2154 2155 static void virtio_net_tx_bh(void *opaque) 2156 { 2157 VirtIONetQueue *q = opaque; 2158 VirtIONet *n = q->n; 2159 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2160 int32_t ret; 2161 2162 /* This happens when device was stopped but BH wasn't. */ 2163 if (!vdev->vm_running) { 2164 /* Make sure tx waiting is set, so we'll run when restarted. */ 2165 assert(q->tx_waiting); 2166 return; 2167 } 2168 2169 q->tx_waiting = 0; 2170 2171 /* Just in case the driver is not ready on more */ 2172 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) { 2173 return; 2174 } 2175 2176 ret = virtio_net_flush_tx(q); 2177 if (ret == -EBUSY || ret == -EINVAL) { 2178 return; /* Notification re-enable handled by tx_complete or device 2179 * broken */ 2180 } 2181 2182 /* If we flush a full burst of packets, assume there are 2183 * more coming and immediately reschedule */ 2184 if (ret >= n->tx_burst) { 2185 qemu_bh_schedule(q->tx_bh); 2186 q->tx_waiting = 1; 2187 return; 2188 } 2189 2190 /* If less than a full burst, re-enable notification and flush 2191 * anything that may have come in while we weren't looking. If 2192 * we find something, assume the guest is still active and reschedule */ 2193 virtio_queue_set_notification(q->tx_vq, 1); 2194 ret = virtio_net_flush_tx(q); 2195 if (ret == -EINVAL) { 2196 return; 2197 } else if (ret > 0) { 2198 virtio_queue_set_notification(q->tx_vq, 0); 2199 qemu_bh_schedule(q->tx_bh); 2200 q->tx_waiting = 1; 2201 } 2202 } 2203 2204 static void virtio_net_add_queue(VirtIONet *n, int index) 2205 { 2206 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2207 2208 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size, 2209 virtio_net_handle_rx); 2210 2211 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { 2212 n->vqs[index].tx_vq = 2213 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2214 virtio_net_handle_tx_timer); 2215 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 2216 virtio_net_tx_timer, 2217 &n->vqs[index]); 2218 } else { 2219 n->vqs[index].tx_vq = 2220 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2221 virtio_net_handle_tx_bh); 2222 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]); 2223 } 2224 2225 n->vqs[index].tx_waiting = 0; 2226 n->vqs[index].n = n; 2227 } 2228 2229 static void virtio_net_del_queue(VirtIONet *n, int index) 2230 { 2231 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2232 VirtIONetQueue *q = &n->vqs[index]; 2233 NetClientState *nc = qemu_get_subqueue(n->nic, index); 2234 2235 qemu_purge_queued_packets(nc); 2236 2237 virtio_del_queue(vdev, index * 2); 2238 if (q->tx_timer) { 2239 timer_del(q->tx_timer); 2240 timer_free(q->tx_timer); 2241 q->tx_timer = NULL; 2242 } else { 2243 qemu_bh_delete(q->tx_bh); 2244 q->tx_bh = NULL; 2245 } 2246 q->tx_waiting = 0; 2247 virtio_del_queue(vdev, index * 2 + 1); 2248 } 2249 2250 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues) 2251 { 2252 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2253 int old_num_queues = virtio_get_num_queues(vdev); 2254 int new_num_queues = new_max_queues * 2 + 1; 2255 int i; 2256 2257 assert(old_num_queues >= 3); 2258 assert(old_num_queues % 2 == 1); 2259 2260 if (old_num_queues == new_num_queues) { 2261 return; 2262 } 2263 2264 /* 2265 * We always need to remove and add ctrl vq if 2266 * old_num_queues != new_num_queues. Remove ctrl_vq first, 2267 * and then we only enter one of the following too loops. 2268 */ 2269 virtio_del_queue(vdev, old_num_queues - 1); 2270 2271 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) { 2272 /* new_num_queues < old_num_queues */ 2273 virtio_net_del_queue(n, i / 2); 2274 } 2275 2276 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) { 2277 /* new_num_queues > old_num_queues */ 2278 virtio_net_add_queue(n, i / 2); 2279 } 2280 2281 /* add ctrl_vq last */ 2282 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 2283 } 2284 2285 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) 2286 { 2287 int max = multiqueue ? n->max_queues : 1; 2288 2289 n->multiqueue = multiqueue; 2290 virtio_net_change_num_queues(n, max); 2291 2292 virtio_net_set_queues(n); 2293 } 2294 2295 static int virtio_net_post_load_device(void *opaque, int version_id) 2296 { 2297 VirtIONet *n = opaque; 2298 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2299 int i, link_down; 2300 2301 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs, 2302 virtio_vdev_has_feature(vdev, 2303 VIRTIO_F_VERSION_1)); 2304 2305 /* MAC_TABLE_ENTRIES may be different from the saved image */ 2306 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) { 2307 n->mac_table.in_use = 0; 2308 } 2309 2310 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 2311 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n); 2312 } 2313 2314 if (peer_has_vnet_hdr(n)) { 2315 virtio_net_apply_guest_offloads(n); 2316 } 2317 2318 virtio_net_set_queues(n); 2319 2320 /* Find the first multicast entry in the saved MAC filter */ 2321 for (i = 0; i < n->mac_table.in_use; i++) { 2322 if (n->mac_table.macs[i * ETH_ALEN] & 1) { 2323 break; 2324 } 2325 } 2326 n->mac_table.first_multi = i; 2327 2328 /* nc.link_down can't be migrated, so infer link_down according 2329 * to link status bit in n->status */ 2330 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0; 2331 for (i = 0; i < n->max_queues; i++) { 2332 qemu_get_subqueue(n->nic, i)->link_down = link_down; 2333 } 2334 2335 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 2336 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 2337 n->announce_counter = SELF_ANNOUNCE_ROUNDS; 2338 timer_mod(n->announce_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL)); 2339 } 2340 2341 return 0; 2342 } 2343 2344 /* tx_waiting field of a VirtIONetQueue */ 2345 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = { 2346 .name = "virtio-net-queue-tx_waiting", 2347 .fields = (VMStateField[]) { 2348 VMSTATE_UINT32(tx_waiting, VirtIONetQueue), 2349 VMSTATE_END_OF_LIST() 2350 }, 2351 }; 2352 2353 static bool max_queues_gt_1(void *opaque, int version_id) 2354 { 2355 return VIRTIO_NET(opaque)->max_queues > 1; 2356 } 2357 2358 static bool has_ctrl_guest_offloads(void *opaque, int version_id) 2359 { 2360 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque), 2361 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 2362 } 2363 2364 static bool mac_table_fits(void *opaque, int version_id) 2365 { 2366 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES; 2367 } 2368 2369 static bool mac_table_doesnt_fit(void *opaque, int version_id) 2370 { 2371 return !mac_table_fits(opaque, version_id); 2372 } 2373 2374 /* This temporary type is shared by all the WITH_TMP methods 2375 * although only some fields are used by each. 2376 */ 2377 struct VirtIONetMigTmp { 2378 VirtIONet *parent; 2379 VirtIONetQueue *vqs_1; 2380 uint16_t curr_queues_1; 2381 uint8_t has_ufo; 2382 uint32_t has_vnet_hdr; 2383 }; 2384 2385 /* The 2nd and subsequent tx_waiting flags are loaded later than 2386 * the 1st entry in the queues and only if there's more than one 2387 * entry. We use the tmp mechanism to calculate a temporary 2388 * pointer and count and also validate the count. 2389 */ 2390 2391 static int virtio_net_tx_waiting_pre_save(void *opaque) 2392 { 2393 struct VirtIONetMigTmp *tmp = opaque; 2394 2395 tmp->vqs_1 = tmp->parent->vqs + 1; 2396 tmp->curr_queues_1 = tmp->parent->curr_queues - 1; 2397 if (tmp->parent->curr_queues == 0) { 2398 tmp->curr_queues_1 = 0; 2399 } 2400 2401 return 0; 2402 } 2403 2404 static int virtio_net_tx_waiting_pre_load(void *opaque) 2405 { 2406 struct VirtIONetMigTmp *tmp = opaque; 2407 2408 /* Reuse the pointer setup from save */ 2409 virtio_net_tx_waiting_pre_save(opaque); 2410 2411 if (tmp->parent->curr_queues > tmp->parent->max_queues) { 2412 error_report("virtio-net: curr_queues %x > max_queues %x", 2413 tmp->parent->curr_queues, tmp->parent->max_queues); 2414 2415 return -EINVAL; 2416 } 2417 2418 return 0; /* all good */ 2419 } 2420 2421 static const VMStateDescription vmstate_virtio_net_tx_waiting = { 2422 .name = "virtio-net-tx_waiting", 2423 .pre_load = virtio_net_tx_waiting_pre_load, 2424 .pre_save = virtio_net_tx_waiting_pre_save, 2425 .fields = (VMStateField[]) { 2426 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp, 2427 curr_queues_1, 2428 vmstate_virtio_net_queue_tx_waiting, 2429 struct VirtIONetQueue), 2430 VMSTATE_END_OF_LIST() 2431 }, 2432 }; 2433 2434 /* the 'has_ufo' flag is just tested; if the incoming stream has the 2435 * flag set we need to check that we have it 2436 */ 2437 static int virtio_net_ufo_post_load(void *opaque, int version_id) 2438 { 2439 struct VirtIONetMigTmp *tmp = opaque; 2440 2441 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) { 2442 error_report("virtio-net: saved image requires TUN_F_UFO support"); 2443 return -EINVAL; 2444 } 2445 2446 return 0; 2447 } 2448 2449 static int virtio_net_ufo_pre_save(void *opaque) 2450 { 2451 struct VirtIONetMigTmp *tmp = opaque; 2452 2453 tmp->has_ufo = tmp->parent->has_ufo; 2454 2455 return 0; 2456 } 2457 2458 static const VMStateDescription vmstate_virtio_net_has_ufo = { 2459 .name = "virtio-net-ufo", 2460 .post_load = virtio_net_ufo_post_load, 2461 .pre_save = virtio_net_ufo_pre_save, 2462 .fields = (VMStateField[]) { 2463 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp), 2464 VMSTATE_END_OF_LIST() 2465 }, 2466 }; 2467 2468 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the 2469 * flag set we need to check that we have it 2470 */ 2471 static int virtio_net_vnet_post_load(void *opaque, int version_id) 2472 { 2473 struct VirtIONetMigTmp *tmp = opaque; 2474 2475 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) { 2476 error_report("virtio-net: saved image requires vnet_hdr=on"); 2477 return -EINVAL; 2478 } 2479 2480 return 0; 2481 } 2482 2483 static int virtio_net_vnet_pre_save(void *opaque) 2484 { 2485 struct VirtIONetMigTmp *tmp = opaque; 2486 2487 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr; 2488 2489 return 0; 2490 } 2491 2492 static const VMStateDescription vmstate_virtio_net_has_vnet = { 2493 .name = "virtio-net-vnet", 2494 .post_load = virtio_net_vnet_post_load, 2495 .pre_save = virtio_net_vnet_pre_save, 2496 .fields = (VMStateField[]) { 2497 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp), 2498 VMSTATE_END_OF_LIST() 2499 }, 2500 }; 2501 2502 static const VMStateDescription vmstate_virtio_net_device = { 2503 .name = "virtio-net-device", 2504 .version_id = VIRTIO_NET_VM_VERSION, 2505 .minimum_version_id = VIRTIO_NET_VM_VERSION, 2506 .post_load = virtio_net_post_load_device, 2507 .fields = (VMStateField[]) { 2508 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN), 2509 VMSTATE_STRUCT_POINTER(vqs, VirtIONet, 2510 vmstate_virtio_net_queue_tx_waiting, 2511 VirtIONetQueue), 2512 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet), 2513 VMSTATE_UINT16(status, VirtIONet), 2514 VMSTATE_UINT8(promisc, VirtIONet), 2515 VMSTATE_UINT8(allmulti, VirtIONet), 2516 VMSTATE_UINT32(mac_table.in_use, VirtIONet), 2517 2518 /* Guarded pair: If it fits we load it, else we throw it away 2519 * - can happen if source has a larger MAC table.; post-load 2520 * sets flags in this case. 2521 */ 2522 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet, 2523 0, mac_table_fits, mac_table.in_use, 2524 ETH_ALEN), 2525 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0, 2526 mac_table.in_use, ETH_ALEN), 2527 2528 /* Note: This is an array of uint32's that's always been saved as a 2529 * buffer; hold onto your endiannesses; it's actually used as a bitmap 2530 * but based on the uint. 2531 */ 2532 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3), 2533 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 2534 vmstate_virtio_net_has_vnet), 2535 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet), 2536 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet), 2537 VMSTATE_UINT8(alluni, VirtIONet), 2538 VMSTATE_UINT8(nomulti, VirtIONet), 2539 VMSTATE_UINT8(nouni, VirtIONet), 2540 VMSTATE_UINT8(nobcast, VirtIONet), 2541 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 2542 vmstate_virtio_net_has_ufo), 2543 VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0, 2544 vmstate_info_uint16_equal, uint16_t), 2545 VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1), 2546 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 2547 vmstate_virtio_net_tx_waiting), 2548 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet, 2549 has_ctrl_guest_offloads), 2550 VMSTATE_END_OF_LIST() 2551 }, 2552 }; 2553 2554 static NetClientInfo net_virtio_info = { 2555 .type = NET_CLIENT_DRIVER_NIC, 2556 .size = sizeof(NICState), 2557 .can_receive = virtio_net_can_receive, 2558 .receive = virtio_net_receive, 2559 .link_status_changed = virtio_net_set_link_status, 2560 .query_rx_filter = virtio_net_query_rxfilter, 2561 }; 2562 2563 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) 2564 { 2565 VirtIONet *n = VIRTIO_NET(vdev); 2566 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); 2567 assert(n->vhost_started); 2568 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); 2569 } 2570 2571 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, 2572 bool mask) 2573 { 2574 VirtIONet *n = VIRTIO_NET(vdev); 2575 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); 2576 assert(n->vhost_started); 2577 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), 2578 vdev, idx, mask); 2579 } 2580 2581 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) 2582 { 2583 int i, config_size = 0; 2584 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC); 2585 2586 for (i = 0; feature_sizes[i].flags != 0; i++) { 2587 if (host_features & feature_sizes[i].flags) { 2588 config_size = MAX(feature_sizes[i].end, config_size); 2589 } 2590 } 2591 n->config_size = config_size; 2592 } 2593 2594 void virtio_net_set_netclient_name(VirtIONet *n, const char *name, 2595 const char *type) 2596 { 2597 /* 2598 * The name can be NULL, the netclient name will be type.x. 2599 */ 2600 assert(type != NULL); 2601 2602 g_free(n->netclient_name); 2603 g_free(n->netclient_type); 2604 n->netclient_name = g_strdup(name); 2605 n->netclient_type = g_strdup(type); 2606 } 2607 2608 static void virtio_net_device_realize(DeviceState *dev, Error **errp) 2609 { 2610 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 2611 VirtIONet *n = VIRTIO_NET(dev); 2612 NetClientState *nc; 2613 int i; 2614 2615 if (n->net_conf.mtu) { 2616 n->host_features |= (1ULL << VIRTIO_NET_F_MTU); 2617 } 2618 2619 if (n->net_conf.duplex_str) { 2620 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) { 2621 n->net_conf.duplex = DUPLEX_HALF; 2622 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) { 2623 n->net_conf.duplex = DUPLEX_FULL; 2624 } else { 2625 error_setg(errp, "'duplex' must be 'half' or 'full'"); 2626 } 2627 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 2628 } else { 2629 n->net_conf.duplex = DUPLEX_UNKNOWN; 2630 } 2631 2632 if (n->net_conf.speed < SPEED_UNKNOWN) { 2633 error_setg(errp, "'speed' must be between 0 and INT_MAX"); 2634 } else if (n->net_conf.speed >= 0) { 2635 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 2636 } 2637 2638 virtio_net_set_config_size(n, n->host_features); 2639 virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size); 2640 2641 /* 2642 * We set a lower limit on RX queue size to what it always was. 2643 * Guests that want a smaller ring can always resize it without 2644 * help from us (using virtio 1 and up). 2645 */ 2646 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || 2647 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || 2648 !is_power_of_2(n->net_conf.rx_queue_size)) { 2649 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " 2650 "must be a power of 2 between %d and %d.", 2651 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, 2652 VIRTQUEUE_MAX_SIZE); 2653 virtio_cleanup(vdev); 2654 return; 2655 } 2656 2657 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || 2658 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE || 2659 !is_power_of_2(n->net_conf.tx_queue_size)) { 2660 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " 2661 "must be a power of 2 between %d and %d", 2662 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, 2663 VIRTQUEUE_MAX_SIZE); 2664 virtio_cleanup(vdev); 2665 return; 2666 } 2667 2668 n->max_queues = MAX(n->nic_conf.peers.queues, 1); 2669 if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) { 2670 error_setg(errp, "Invalid number of queues (= %" PRIu32 "), " 2671 "must be a positive integer less than %d.", 2672 n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2); 2673 virtio_cleanup(vdev); 2674 return; 2675 } 2676 n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues); 2677 n->curr_queues = 1; 2678 n->tx_timeout = n->net_conf.txtimer; 2679 2680 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer") 2681 && strcmp(n->net_conf.tx, "bh")) { 2682 warn_report("virtio-net: " 2683 "Unknown option tx=%s, valid options: \"timer\" \"bh\"", 2684 n->net_conf.tx); 2685 error_printf("Defaulting to \"bh\""); 2686 } 2687 2688 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), 2689 n->net_conf.tx_queue_size); 2690 2691 for (i = 0; i < n->max_queues; i++) { 2692 virtio_net_add_queue(n, i); 2693 } 2694 2695 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 2696 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr); 2697 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac)); 2698 n->status = VIRTIO_NET_S_LINK_UP; 2699 n->announce_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, 2700 virtio_net_announce_timer, n); 2701 2702 if (n->netclient_type) { 2703 /* 2704 * Happen when virtio_net_set_netclient_name has been called. 2705 */ 2706 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 2707 n->netclient_type, n->netclient_name, n); 2708 } else { 2709 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 2710 object_get_typename(OBJECT(dev)), dev->id, n); 2711 } 2712 2713 peer_test_vnet_hdr(n); 2714 if (peer_has_vnet_hdr(n)) { 2715 for (i = 0; i < n->max_queues; i++) { 2716 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true); 2717 } 2718 n->host_hdr_len = sizeof(struct virtio_net_hdr); 2719 } else { 2720 n->host_hdr_len = 0; 2721 } 2722 2723 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a); 2724 2725 n->vqs[0].tx_waiting = 0; 2726 n->tx_burst = n->net_conf.txburst; 2727 virtio_net_set_mrg_rx_bufs(n, 0, 0); 2728 n->promisc = 1; /* for compatibility */ 2729 2730 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 2731 2732 n->vlans = g_malloc0(MAX_VLAN >> 3); 2733 2734 nc = qemu_get_queue(n->nic); 2735 nc->rxfilter_notify_enabled = 1; 2736 2737 QTAILQ_INIT(&n->rsc_chains); 2738 n->qdev = dev; 2739 } 2740 2741 static void virtio_net_device_unrealize(DeviceState *dev, Error **errp) 2742 { 2743 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 2744 VirtIONet *n = VIRTIO_NET(dev); 2745 int i, max_queues; 2746 2747 /* This will stop vhost backend if appropriate. */ 2748 virtio_net_set_status(vdev, 0); 2749 2750 g_free(n->netclient_name); 2751 n->netclient_name = NULL; 2752 g_free(n->netclient_type); 2753 n->netclient_type = NULL; 2754 2755 g_free(n->mac_table.macs); 2756 g_free(n->vlans); 2757 2758 max_queues = n->multiqueue ? n->max_queues : 1; 2759 for (i = 0; i < max_queues; i++) { 2760 virtio_net_del_queue(n, i); 2761 } 2762 2763 timer_del(n->announce_timer); 2764 timer_free(n->announce_timer); 2765 g_free(n->vqs); 2766 qemu_del_nic(n->nic); 2767 virtio_net_rsc_cleanup(n); 2768 virtio_cleanup(vdev); 2769 } 2770 2771 static void virtio_net_instance_init(Object *obj) 2772 { 2773 VirtIONet *n = VIRTIO_NET(obj); 2774 2775 /* 2776 * The default config_size is sizeof(struct virtio_net_config). 2777 * Can be overriden with virtio_net_set_config_size. 2778 */ 2779 n->config_size = sizeof(struct virtio_net_config); 2780 device_add_bootindex_property(obj, &n->nic_conf.bootindex, 2781 "bootindex", "/ethernet-phy@0", 2782 DEVICE(n), NULL); 2783 } 2784 2785 static int virtio_net_pre_save(void *opaque) 2786 { 2787 VirtIONet *n = opaque; 2788 2789 /* At this point, backend must be stopped, otherwise 2790 * it might keep writing to memory. */ 2791 assert(!n->vhost_started); 2792 2793 return 0; 2794 } 2795 2796 static const VMStateDescription vmstate_virtio_net = { 2797 .name = "virtio-net", 2798 .minimum_version_id = VIRTIO_NET_VM_VERSION, 2799 .version_id = VIRTIO_NET_VM_VERSION, 2800 .fields = (VMStateField[]) { 2801 VMSTATE_VIRTIO_DEVICE, 2802 VMSTATE_END_OF_LIST() 2803 }, 2804 .pre_save = virtio_net_pre_save, 2805 }; 2806 2807 static Property virtio_net_properties[] = { 2808 DEFINE_PROP_BIT64("csum", VirtIONet, host_features, 2809 VIRTIO_NET_F_CSUM, true), 2810 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features, 2811 VIRTIO_NET_F_GUEST_CSUM, true), 2812 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), 2813 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features, 2814 VIRTIO_NET_F_GUEST_TSO4, true), 2815 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features, 2816 VIRTIO_NET_F_GUEST_TSO6, true), 2817 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features, 2818 VIRTIO_NET_F_GUEST_ECN, true), 2819 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features, 2820 VIRTIO_NET_F_GUEST_UFO, true), 2821 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features, 2822 VIRTIO_NET_F_GUEST_ANNOUNCE, true), 2823 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features, 2824 VIRTIO_NET_F_HOST_TSO4, true), 2825 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features, 2826 VIRTIO_NET_F_HOST_TSO6, true), 2827 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features, 2828 VIRTIO_NET_F_HOST_ECN, true), 2829 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features, 2830 VIRTIO_NET_F_HOST_UFO, true), 2831 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features, 2832 VIRTIO_NET_F_MRG_RXBUF, true), 2833 DEFINE_PROP_BIT64("status", VirtIONet, host_features, 2834 VIRTIO_NET_F_STATUS, true), 2835 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features, 2836 VIRTIO_NET_F_CTRL_VQ, true), 2837 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features, 2838 VIRTIO_NET_F_CTRL_RX, true), 2839 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features, 2840 VIRTIO_NET_F_CTRL_VLAN, true), 2841 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features, 2842 VIRTIO_NET_F_CTRL_RX_EXTRA, true), 2843 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features, 2844 VIRTIO_NET_F_CTRL_MAC_ADDR, true), 2845 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features, 2846 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), 2847 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), 2848 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features, 2849 VIRTIO_NET_F_RSC_EXT, false), 2850 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout, 2851 VIRTIO_NET_RSC_DEFAULT_INTERVAL), 2852 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf), 2853 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer, 2854 TX_TIMER_INTERVAL), 2855 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), 2856 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), 2857 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, 2858 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), 2859 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, 2860 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), 2861 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), 2862 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, 2863 true), 2864 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN), 2865 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str), 2866 DEFINE_PROP_END_OF_LIST(), 2867 }; 2868 2869 static void virtio_net_class_init(ObjectClass *klass, void *data) 2870 { 2871 DeviceClass *dc = DEVICE_CLASS(klass); 2872 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 2873 2874 dc->props = virtio_net_properties; 2875 dc->vmsd = &vmstate_virtio_net; 2876 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 2877 vdc->realize = virtio_net_device_realize; 2878 vdc->unrealize = virtio_net_device_unrealize; 2879 vdc->get_config = virtio_net_get_config; 2880 vdc->set_config = virtio_net_set_config; 2881 vdc->get_features = virtio_net_get_features; 2882 vdc->set_features = virtio_net_set_features; 2883 vdc->bad_features = virtio_net_bad_features; 2884 vdc->reset = virtio_net_reset; 2885 vdc->set_status = virtio_net_set_status; 2886 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; 2887 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; 2888 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO); 2889 vdc->vmsd = &vmstate_virtio_net_device; 2890 } 2891 2892 static const TypeInfo virtio_net_info = { 2893 .name = TYPE_VIRTIO_NET, 2894 .parent = TYPE_VIRTIO_DEVICE, 2895 .instance_size = sizeof(VirtIONet), 2896 .instance_init = virtio_net_instance_init, 2897 .class_init = virtio_net_class_init, 2898 }; 2899 2900 static void virtio_register_types(void) 2901 { 2902 type_register_static(&virtio_net_info); 2903 } 2904 2905 type_init(virtio_register_types) 2906