1 /* 2 * Virtio Network Device 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qemu/iov.h" 16 #include "hw/virtio/virtio.h" 17 #include "net/net.h" 18 #include "net/checksum.h" 19 #include "net/tap.h" 20 #include "qemu/error-report.h" 21 #include "qemu/timer.h" 22 #include "hw/virtio/virtio-net.h" 23 #include "net/vhost_net.h" 24 #include "net/announce.h" 25 #include "hw/virtio/virtio-bus.h" 26 #include "qapi/error.h" 27 #include "qapi/qapi-events-net.h" 28 #include "hw/virtio/virtio-access.h" 29 #include "migration/misc.h" 30 #include "standard-headers/linux/ethtool.h" 31 #include "trace.h" 32 33 #define VIRTIO_NET_VM_VERSION 11 34 35 #define MAC_TABLE_ENTRIES 64 36 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ 37 38 /* previously fixed value */ 39 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 40 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 41 42 /* for now, only allow larger queues; with virtio-1, guest can downsize */ 43 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 44 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 45 46 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ 47 48 #define VIRTIO_NET_TCP_FLAG 0x3F 49 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000 50 51 /* IPv4 max payload, 16 bits in the header */ 52 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header)) 53 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535 54 55 /* header length value in ip header without option */ 56 #define VIRTIO_NET_IP4_HEADER_LENGTH 5 57 58 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ 59 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD 60 61 /* Purge coalesced packets timer interval, This value affects the performance 62 a lot, and should be tuned carefully, '300000'(300us) is the recommended 63 value to pass the WHQL test, '50000' can gain 2x netperf throughput with 64 tso/gso/gro 'off'. */ 65 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000 66 67 /* temporary until standard header include it */ 68 #if !defined(VIRTIO_NET_HDR_F_RSC_INFO) 69 70 #define VIRTIO_NET_HDR_F_RSC_INFO 4 /* rsc_ext data in csum_ fields */ 71 #define VIRTIO_NET_F_RSC_EXT 61 72 73 static inline __virtio16 *virtio_net_rsc_ext_num_packets( 74 struct virtio_net_hdr *hdr) 75 { 76 return &hdr->csum_start; 77 } 78 79 static inline __virtio16 *virtio_net_rsc_ext_num_dupacks( 80 struct virtio_net_hdr *hdr) 81 { 82 return &hdr->csum_offset; 83 } 84 85 #endif 86 87 static VirtIOFeature feature_sizes[] = { 88 {.flags = 1ULL << VIRTIO_NET_F_MAC, 89 .end = virtio_endof(struct virtio_net_config, mac)}, 90 {.flags = 1ULL << VIRTIO_NET_F_STATUS, 91 .end = virtio_endof(struct virtio_net_config, status)}, 92 {.flags = 1ULL << VIRTIO_NET_F_MQ, 93 .end = virtio_endof(struct virtio_net_config, max_virtqueue_pairs)}, 94 {.flags = 1ULL << VIRTIO_NET_F_MTU, 95 .end = virtio_endof(struct virtio_net_config, mtu)}, 96 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, 97 .end = virtio_endof(struct virtio_net_config, duplex)}, 98 {} 99 }; 100 101 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc) 102 { 103 VirtIONet *n = qemu_get_nic_opaque(nc); 104 105 return &n->vqs[nc->queue_index]; 106 } 107 108 static int vq2q(int queue_index) 109 { 110 return queue_index / 2; 111 } 112 113 /* TODO 114 * - we could suppress RX interrupt if we were so inclined. 115 */ 116 117 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) 118 { 119 VirtIONet *n = VIRTIO_NET(vdev); 120 struct virtio_net_config netcfg; 121 122 virtio_stw_p(vdev, &netcfg.status, n->status); 123 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues); 124 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); 125 memcpy(netcfg.mac, n->mac, ETH_ALEN); 126 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); 127 netcfg.duplex = n->net_conf.duplex; 128 memcpy(config, &netcfg, n->config_size); 129 } 130 131 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) 132 { 133 VirtIONet *n = VIRTIO_NET(vdev); 134 struct virtio_net_config netcfg = {}; 135 136 memcpy(&netcfg, config, n->config_size); 137 138 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) && 139 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) && 140 memcmp(netcfg.mac, n->mac, ETH_ALEN)) { 141 memcpy(n->mac, netcfg.mac, ETH_ALEN); 142 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 143 } 144 } 145 146 static bool virtio_net_started(VirtIONet *n, uint8_t status) 147 { 148 VirtIODevice *vdev = VIRTIO_DEVICE(n); 149 return (status & VIRTIO_CONFIG_S_DRIVER_OK) && 150 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running; 151 } 152 153 static void virtio_net_announce_timer(void *opaque) 154 { 155 VirtIONet *n = opaque; 156 VirtIODevice *vdev = VIRTIO_DEVICE(n); 157 trace_virtio_net_announce_timer(n->announce_timer.round); 158 159 n->announce_timer.round--; 160 n->status |= VIRTIO_NET_S_ANNOUNCE; 161 virtio_notify_config(vdev); 162 } 163 164 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) 165 { 166 VirtIODevice *vdev = VIRTIO_DEVICE(n); 167 NetClientState *nc = qemu_get_queue(n->nic); 168 int queues = n->multiqueue ? n->max_queues : 1; 169 170 if (!get_vhost_net(nc->peer)) { 171 return; 172 } 173 174 if ((virtio_net_started(n, status) && !nc->peer->link_down) == 175 !!n->vhost_started) { 176 return; 177 } 178 if (!n->vhost_started) { 179 int r, i; 180 181 if (n->needs_vnet_hdr_swap) { 182 error_report("backend does not support %s vnet headers; " 183 "falling back on userspace virtio", 184 virtio_is_big_endian(vdev) ? "BE" : "LE"); 185 return; 186 } 187 188 /* Any packets outstanding? Purge them to avoid touching rings 189 * when vhost is running. 190 */ 191 for (i = 0; i < queues; i++) { 192 NetClientState *qnc = qemu_get_subqueue(n->nic, i); 193 194 /* Purge both directions: TX and RX. */ 195 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc); 196 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer); 197 } 198 199 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) { 200 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu); 201 if (r < 0) { 202 error_report("%uBytes MTU not supported by the backend", 203 n->net_conf.mtu); 204 205 return; 206 } 207 } 208 209 n->vhost_started = 1; 210 r = vhost_net_start(vdev, n->nic->ncs, queues); 211 if (r < 0) { 212 error_report("unable to start vhost net: %d: " 213 "falling back on userspace virtio", -r); 214 n->vhost_started = 0; 215 } 216 } else { 217 vhost_net_stop(vdev, n->nic->ncs, queues); 218 n->vhost_started = 0; 219 } 220 } 221 222 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev, 223 NetClientState *peer, 224 bool enable) 225 { 226 if (virtio_is_big_endian(vdev)) { 227 return qemu_set_vnet_be(peer, enable); 228 } else { 229 return qemu_set_vnet_le(peer, enable); 230 } 231 } 232 233 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs, 234 int queues, bool enable) 235 { 236 int i; 237 238 for (i = 0; i < queues; i++) { 239 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 && 240 enable) { 241 while (--i >= 0) { 242 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false); 243 } 244 245 return true; 246 } 247 } 248 249 return false; 250 } 251 252 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status) 253 { 254 VirtIODevice *vdev = VIRTIO_DEVICE(n); 255 int queues = n->multiqueue ? n->max_queues : 1; 256 257 if (virtio_net_started(n, status)) { 258 /* Before using the device, we tell the network backend about the 259 * endianness to use when parsing vnet headers. If the backend 260 * can't do it, we fallback onto fixing the headers in the core 261 * virtio-net code. 262 */ 263 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs, 264 queues, true); 265 } else if (virtio_net_started(n, vdev->status)) { 266 /* After using the device, we need to reset the network backend to 267 * the default (guest native endianness), otherwise the guest may 268 * lose network connectivity if it is rebooted into a different 269 * endianness. 270 */ 271 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false); 272 } 273 } 274 275 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq) 276 { 277 unsigned int dropped = virtqueue_drop_all(vq); 278 if (dropped) { 279 virtio_notify(vdev, vq); 280 } 281 } 282 283 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) 284 { 285 VirtIONet *n = VIRTIO_NET(vdev); 286 VirtIONetQueue *q; 287 int i; 288 uint8_t queue_status; 289 290 virtio_net_vnet_endian_status(n, status); 291 virtio_net_vhost_status(n, status); 292 293 for (i = 0; i < n->max_queues; i++) { 294 NetClientState *ncs = qemu_get_subqueue(n->nic, i); 295 bool queue_started; 296 q = &n->vqs[i]; 297 298 if ((!n->multiqueue && i != 0) || i >= n->curr_queues) { 299 queue_status = 0; 300 } else { 301 queue_status = status; 302 } 303 queue_started = 304 virtio_net_started(n, queue_status) && !n->vhost_started; 305 306 if (queue_started) { 307 qemu_flush_queued_packets(ncs); 308 } 309 310 if (!q->tx_waiting) { 311 continue; 312 } 313 314 if (queue_started) { 315 if (q->tx_timer) { 316 timer_mod(q->tx_timer, 317 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 318 } else { 319 qemu_bh_schedule(q->tx_bh); 320 } 321 } else { 322 if (q->tx_timer) { 323 timer_del(q->tx_timer); 324 } else { 325 qemu_bh_cancel(q->tx_bh); 326 } 327 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 && 328 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) && 329 vdev->vm_running) { 330 /* if tx is waiting we are likely have some packets in tx queue 331 * and disabled notification */ 332 q->tx_waiting = 0; 333 virtio_queue_set_notification(q->tx_vq, 1); 334 virtio_net_drop_tx_queue_data(vdev, q->tx_vq); 335 } 336 } 337 } 338 } 339 340 static void virtio_net_set_link_status(NetClientState *nc) 341 { 342 VirtIONet *n = qemu_get_nic_opaque(nc); 343 VirtIODevice *vdev = VIRTIO_DEVICE(n); 344 uint16_t old_status = n->status; 345 346 if (nc->link_down) 347 n->status &= ~VIRTIO_NET_S_LINK_UP; 348 else 349 n->status |= VIRTIO_NET_S_LINK_UP; 350 351 if (n->status != old_status) 352 virtio_notify_config(vdev); 353 354 virtio_net_set_status(vdev, vdev->status); 355 } 356 357 static void rxfilter_notify(NetClientState *nc) 358 { 359 VirtIONet *n = qemu_get_nic_opaque(nc); 360 361 if (nc->rxfilter_notify_enabled) { 362 gchar *path = object_get_canonical_path(OBJECT(n->qdev)); 363 qapi_event_send_nic_rx_filter_changed(!!n->netclient_name, 364 n->netclient_name, path); 365 g_free(path); 366 367 /* disable event notification to avoid events flooding */ 368 nc->rxfilter_notify_enabled = 0; 369 } 370 } 371 372 static intList *get_vlan_table(VirtIONet *n) 373 { 374 intList *list, *entry; 375 int i, j; 376 377 list = NULL; 378 for (i = 0; i < MAX_VLAN >> 5; i++) { 379 for (j = 0; n->vlans[i] && j <= 0x1f; j++) { 380 if (n->vlans[i] & (1U << j)) { 381 entry = g_malloc0(sizeof(*entry)); 382 entry->value = (i << 5) + j; 383 entry->next = list; 384 list = entry; 385 } 386 } 387 } 388 389 return list; 390 } 391 392 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc) 393 { 394 VirtIONet *n = qemu_get_nic_opaque(nc); 395 VirtIODevice *vdev = VIRTIO_DEVICE(n); 396 RxFilterInfo *info; 397 strList *str_list, *entry; 398 int i; 399 400 info = g_malloc0(sizeof(*info)); 401 info->name = g_strdup(nc->name); 402 info->promiscuous = n->promisc; 403 404 if (n->nouni) { 405 info->unicast = RX_STATE_NONE; 406 } else if (n->alluni) { 407 info->unicast = RX_STATE_ALL; 408 } else { 409 info->unicast = RX_STATE_NORMAL; 410 } 411 412 if (n->nomulti) { 413 info->multicast = RX_STATE_NONE; 414 } else if (n->allmulti) { 415 info->multicast = RX_STATE_ALL; 416 } else { 417 info->multicast = RX_STATE_NORMAL; 418 } 419 420 info->broadcast_allowed = n->nobcast; 421 info->multicast_overflow = n->mac_table.multi_overflow; 422 info->unicast_overflow = n->mac_table.uni_overflow; 423 424 info->main_mac = qemu_mac_strdup_printf(n->mac); 425 426 str_list = NULL; 427 for (i = 0; i < n->mac_table.first_multi; i++) { 428 entry = g_malloc0(sizeof(*entry)); 429 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN); 430 entry->next = str_list; 431 str_list = entry; 432 } 433 info->unicast_table = str_list; 434 435 str_list = NULL; 436 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 437 entry = g_malloc0(sizeof(*entry)); 438 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN); 439 entry->next = str_list; 440 str_list = entry; 441 } 442 info->multicast_table = str_list; 443 info->vlan_table = get_vlan_table(n); 444 445 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) { 446 info->vlan = RX_STATE_ALL; 447 } else if (!info->vlan_table) { 448 info->vlan = RX_STATE_NONE; 449 } else { 450 info->vlan = RX_STATE_NORMAL; 451 } 452 453 /* enable event notification after query */ 454 nc->rxfilter_notify_enabled = 1; 455 456 return info; 457 } 458 459 static void virtio_net_reset(VirtIODevice *vdev) 460 { 461 VirtIONet *n = VIRTIO_NET(vdev); 462 int i; 463 464 /* Reset back to compatibility mode */ 465 n->promisc = 1; 466 n->allmulti = 0; 467 n->alluni = 0; 468 n->nomulti = 0; 469 n->nouni = 0; 470 n->nobcast = 0; 471 /* multiqueue is disabled by default */ 472 n->curr_queues = 1; 473 timer_del(n->announce_timer.tm); 474 n->announce_timer.round = 0; 475 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 476 477 /* Flush any MAC and VLAN filter table state */ 478 n->mac_table.in_use = 0; 479 n->mac_table.first_multi = 0; 480 n->mac_table.multi_overflow = 0; 481 n->mac_table.uni_overflow = 0; 482 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN); 483 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac)); 484 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 485 memset(n->vlans, 0, MAX_VLAN >> 3); 486 487 /* Flush any async TX */ 488 for (i = 0; i < n->max_queues; i++) { 489 NetClientState *nc = qemu_get_subqueue(n->nic, i); 490 491 if (nc->peer) { 492 qemu_flush_or_purge_queued_packets(nc->peer, true); 493 assert(!virtio_net_get_subqueue(nc)->async_tx.elem); 494 } 495 } 496 } 497 498 static void peer_test_vnet_hdr(VirtIONet *n) 499 { 500 NetClientState *nc = qemu_get_queue(n->nic); 501 if (!nc->peer) { 502 return; 503 } 504 505 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer); 506 } 507 508 static int peer_has_vnet_hdr(VirtIONet *n) 509 { 510 return n->has_vnet_hdr; 511 } 512 513 static int peer_has_ufo(VirtIONet *n) 514 { 515 if (!peer_has_vnet_hdr(n)) 516 return 0; 517 518 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer); 519 520 return n->has_ufo; 521 } 522 523 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, 524 int version_1) 525 { 526 int i; 527 NetClientState *nc; 528 529 n->mergeable_rx_bufs = mergeable_rx_bufs; 530 531 if (version_1) { 532 n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); 533 } else { 534 n->guest_hdr_len = n->mergeable_rx_bufs ? 535 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 536 sizeof(struct virtio_net_hdr); 537 } 538 539 for (i = 0; i < n->max_queues; i++) { 540 nc = qemu_get_subqueue(n->nic, i); 541 542 if (peer_has_vnet_hdr(n) && 543 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) { 544 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len); 545 n->host_hdr_len = n->guest_hdr_len; 546 } 547 } 548 } 549 550 static int virtio_net_max_tx_queue_size(VirtIONet *n) 551 { 552 NetClientState *peer = n->nic_conf.peers.ncs[0]; 553 554 /* 555 * Backends other than vhost-user don't support max queue size. 556 */ 557 if (!peer) { 558 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 559 } 560 561 if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) { 562 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 563 } 564 565 return VIRTQUEUE_MAX_SIZE; 566 } 567 568 static int peer_attach(VirtIONet *n, int index) 569 { 570 NetClientState *nc = qemu_get_subqueue(n->nic, index); 571 572 if (!nc->peer) { 573 return 0; 574 } 575 576 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 577 vhost_set_vring_enable(nc->peer, 1); 578 } 579 580 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 581 return 0; 582 } 583 584 if (n->max_queues == 1) { 585 return 0; 586 } 587 588 return tap_enable(nc->peer); 589 } 590 591 static int peer_detach(VirtIONet *n, int index) 592 { 593 NetClientState *nc = qemu_get_subqueue(n->nic, index); 594 595 if (!nc->peer) { 596 return 0; 597 } 598 599 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 600 vhost_set_vring_enable(nc->peer, 0); 601 } 602 603 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 604 return 0; 605 } 606 607 return tap_disable(nc->peer); 608 } 609 610 static void virtio_net_set_queues(VirtIONet *n) 611 { 612 int i; 613 int r; 614 615 if (n->nic->peer_deleted) { 616 return; 617 } 618 619 for (i = 0; i < n->max_queues; i++) { 620 if (i < n->curr_queues) { 621 r = peer_attach(n, i); 622 assert(!r); 623 } else { 624 r = peer_detach(n, i); 625 assert(!r); 626 } 627 } 628 } 629 630 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue); 631 632 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, 633 Error **errp) 634 { 635 VirtIONet *n = VIRTIO_NET(vdev); 636 NetClientState *nc = qemu_get_queue(n->nic); 637 638 /* Firstly sync all virtio-net possible supported features */ 639 features |= n->host_features; 640 641 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 642 643 if (!peer_has_vnet_hdr(n)) { 644 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); 645 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); 646 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); 647 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); 648 649 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); 650 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); 651 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); 652 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); 653 } 654 655 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { 656 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); 657 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); 658 } 659 660 if (!get_vhost_net(nc->peer)) { 661 return features; 662 } 663 664 features = vhost_net_get_features(get_vhost_net(nc->peer), features); 665 vdev->backend_features = features; 666 667 if (n->mtu_bypass_backend && 668 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { 669 features |= (1ULL << VIRTIO_NET_F_MTU); 670 } 671 672 return features; 673 } 674 675 static uint64_t virtio_net_bad_features(VirtIODevice *vdev) 676 { 677 uint64_t features = 0; 678 679 /* Linux kernel 2.6.25. It understood MAC (as everyone must), 680 * but also these: */ 681 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 682 virtio_add_feature(&features, VIRTIO_NET_F_CSUM); 683 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4); 684 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6); 685 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN); 686 687 return features; 688 } 689 690 static void virtio_net_apply_guest_offloads(VirtIONet *n) 691 { 692 qemu_set_offload(qemu_get_queue(n->nic)->peer, 693 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)), 694 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)), 695 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)), 696 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)), 697 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO))); 698 } 699 700 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features) 701 { 702 static const uint64_t guest_offloads_mask = 703 (1ULL << VIRTIO_NET_F_GUEST_CSUM) | 704 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | 705 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | 706 (1ULL << VIRTIO_NET_F_GUEST_ECN) | 707 (1ULL << VIRTIO_NET_F_GUEST_UFO); 708 709 return guest_offloads_mask & features; 710 } 711 712 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) 713 { 714 VirtIODevice *vdev = VIRTIO_DEVICE(n); 715 return virtio_net_guest_offloads_by_features(vdev->guest_features); 716 } 717 718 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) 719 { 720 VirtIONet *n = VIRTIO_NET(vdev); 721 int i; 722 723 if (n->mtu_bypass_backend && 724 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) { 725 features &= ~(1ULL << VIRTIO_NET_F_MTU); 726 } 727 728 virtio_net_set_multiqueue(n, 729 virtio_has_feature(features, VIRTIO_NET_F_MQ)); 730 731 virtio_net_set_mrg_rx_bufs(n, 732 virtio_has_feature(features, 733 VIRTIO_NET_F_MRG_RXBUF), 734 virtio_has_feature(features, 735 VIRTIO_F_VERSION_1)); 736 737 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 738 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4); 739 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 740 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6); 741 742 if (n->has_vnet_hdr) { 743 n->curr_guest_offloads = 744 virtio_net_guest_offloads_by_features(features); 745 virtio_net_apply_guest_offloads(n); 746 } 747 748 for (i = 0; i < n->max_queues; i++) { 749 NetClientState *nc = qemu_get_subqueue(n->nic, i); 750 751 if (!get_vhost_net(nc->peer)) { 752 continue; 753 } 754 vhost_net_ack_features(get_vhost_net(nc->peer), features); 755 } 756 757 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { 758 memset(n->vlans, 0, MAX_VLAN >> 3); 759 } else { 760 memset(n->vlans, 0xff, MAX_VLAN >> 3); 761 } 762 } 763 764 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd, 765 struct iovec *iov, unsigned int iov_cnt) 766 { 767 uint8_t on; 768 size_t s; 769 NetClientState *nc = qemu_get_queue(n->nic); 770 771 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on)); 772 if (s != sizeof(on)) { 773 return VIRTIO_NET_ERR; 774 } 775 776 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) { 777 n->promisc = on; 778 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) { 779 n->allmulti = on; 780 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) { 781 n->alluni = on; 782 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) { 783 n->nomulti = on; 784 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) { 785 n->nouni = on; 786 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) { 787 n->nobcast = on; 788 } else { 789 return VIRTIO_NET_ERR; 790 } 791 792 rxfilter_notify(nc); 793 794 return VIRTIO_NET_OK; 795 } 796 797 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd, 798 struct iovec *iov, unsigned int iov_cnt) 799 { 800 VirtIODevice *vdev = VIRTIO_DEVICE(n); 801 uint64_t offloads; 802 size_t s; 803 804 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 805 return VIRTIO_NET_ERR; 806 } 807 808 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads)); 809 if (s != sizeof(offloads)) { 810 return VIRTIO_NET_ERR; 811 } 812 813 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) { 814 uint64_t supported_offloads; 815 816 offloads = virtio_ldq_p(vdev, &offloads); 817 818 if (!n->has_vnet_hdr) { 819 return VIRTIO_NET_ERR; 820 } 821 822 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 823 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4); 824 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 825 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6); 826 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT); 827 828 supported_offloads = virtio_net_supported_guest_offloads(n); 829 if (offloads & ~supported_offloads) { 830 return VIRTIO_NET_ERR; 831 } 832 833 n->curr_guest_offloads = offloads; 834 virtio_net_apply_guest_offloads(n); 835 836 return VIRTIO_NET_OK; 837 } else { 838 return VIRTIO_NET_ERR; 839 } 840 } 841 842 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd, 843 struct iovec *iov, unsigned int iov_cnt) 844 { 845 VirtIODevice *vdev = VIRTIO_DEVICE(n); 846 struct virtio_net_ctrl_mac mac_data; 847 size_t s; 848 NetClientState *nc = qemu_get_queue(n->nic); 849 850 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) { 851 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) { 852 return VIRTIO_NET_ERR; 853 } 854 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac)); 855 assert(s == sizeof(n->mac)); 856 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 857 rxfilter_notify(nc); 858 859 return VIRTIO_NET_OK; 860 } 861 862 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) { 863 return VIRTIO_NET_ERR; 864 } 865 866 int in_use = 0; 867 int first_multi = 0; 868 uint8_t uni_overflow = 0; 869 uint8_t multi_overflow = 0; 870 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 871 872 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 873 sizeof(mac_data.entries)); 874 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 875 if (s != sizeof(mac_data.entries)) { 876 goto error; 877 } 878 iov_discard_front(&iov, &iov_cnt, s); 879 880 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) { 881 goto error; 882 } 883 884 if (mac_data.entries <= MAC_TABLE_ENTRIES) { 885 s = iov_to_buf(iov, iov_cnt, 0, macs, 886 mac_data.entries * ETH_ALEN); 887 if (s != mac_data.entries * ETH_ALEN) { 888 goto error; 889 } 890 in_use += mac_data.entries; 891 } else { 892 uni_overflow = 1; 893 } 894 895 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN); 896 897 first_multi = in_use; 898 899 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 900 sizeof(mac_data.entries)); 901 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 902 if (s != sizeof(mac_data.entries)) { 903 goto error; 904 } 905 906 iov_discard_front(&iov, &iov_cnt, s); 907 908 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) { 909 goto error; 910 } 911 912 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) { 913 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN], 914 mac_data.entries * ETH_ALEN); 915 if (s != mac_data.entries * ETH_ALEN) { 916 goto error; 917 } 918 in_use += mac_data.entries; 919 } else { 920 multi_overflow = 1; 921 } 922 923 n->mac_table.in_use = in_use; 924 n->mac_table.first_multi = first_multi; 925 n->mac_table.uni_overflow = uni_overflow; 926 n->mac_table.multi_overflow = multi_overflow; 927 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN); 928 g_free(macs); 929 rxfilter_notify(nc); 930 931 return VIRTIO_NET_OK; 932 933 error: 934 g_free(macs); 935 return VIRTIO_NET_ERR; 936 } 937 938 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd, 939 struct iovec *iov, unsigned int iov_cnt) 940 { 941 VirtIODevice *vdev = VIRTIO_DEVICE(n); 942 uint16_t vid; 943 size_t s; 944 NetClientState *nc = qemu_get_queue(n->nic); 945 946 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid)); 947 vid = virtio_lduw_p(vdev, &vid); 948 if (s != sizeof(vid)) { 949 return VIRTIO_NET_ERR; 950 } 951 952 if (vid >= MAX_VLAN) 953 return VIRTIO_NET_ERR; 954 955 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD) 956 n->vlans[vid >> 5] |= (1U << (vid & 0x1f)); 957 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL) 958 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f)); 959 else 960 return VIRTIO_NET_ERR; 961 962 rxfilter_notify(nc); 963 964 return VIRTIO_NET_OK; 965 } 966 967 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, 968 struct iovec *iov, unsigned int iov_cnt) 969 { 970 trace_virtio_net_handle_announce(n->announce_timer.round); 971 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK && 972 n->status & VIRTIO_NET_S_ANNOUNCE) { 973 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 974 if (n->announce_timer.round) { 975 qemu_announce_timer_step(&n->announce_timer); 976 } 977 return VIRTIO_NET_OK; 978 } else { 979 return VIRTIO_NET_ERR; 980 } 981 } 982 983 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, 984 struct iovec *iov, unsigned int iov_cnt) 985 { 986 VirtIODevice *vdev = VIRTIO_DEVICE(n); 987 struct virtio_net_ctrl_mq mq; 988 size_t s; 989 uint16_t queues; 990 991 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq)); 992 if (s != sizeof(mq)) { 993 return VIRTIO_NET_ERR; 994 } 995 996 if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { 997 return VIRTIO_NET_ERR; 998 } 999 1000 queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs); 1001 1002 if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 1003 queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 1004 queues > n->max_queues || 1005 !n->multiqueue) { 1006 return VIRTIO_NET_ERR; 1007 } 1008 1009 n->curr_queues = queues; 1010 /* stop the backend before changing the number of queues to avoid handling a 1011 * disabled queue */ 1012 virtio_net_set_status(vdev, vdev->status); 1013 virtio_net_set_queues(n); 1014 1015 return VIRTIO_NET_OK; 1016 } 1017 1018 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) 1019 { 1020 VirtIONet *n = VIRTIO_NET(vdev); 1021 struct virtio_net_ctrl_hdr ctrl; 1022 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1023 VirtQueueElement *elem; 1024 size_t s; 1025 struct iovec *iov, *iov2; 1026 unsigned int iov_cnt; 1027 1028 for (;;) { 1029 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 1030 if (!elem) { 1031 break; 1032 } 1033 if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) || 1034 iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) { 1035 virtio_error(vdev, "virtio-net ctrl missing headers"); 1036 virtqueue_detach_element(vq, elem, 0); 1037 g_free(elem); 1038 break; 1039 } 1040 1041 iov_cnt = elem->out_num; 1042 iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num); 1043 s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl)); 1044 iov_discard_front(&iov, &iov_cnt, sizeof(ctrl)); 1045 if (s != sizeof(ctrl)) { 1046 status = VIRTIO_NET_ERR; 1047 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { 1048 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt); 1049 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { 1050 status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt); 1051 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { 1052 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt); 1053 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { 1054 status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt); 1055 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { 1056 status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt); 1057 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { 1058 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt); 1059 } 1060 1061 s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status)); 1062 assert(s == sizeof(status)); 1063 1064 virtqueue_push(vq, elem, sizeof(status)); 1065 virtio_notify(vdev, vq); 1066 g_free(iov2); 1067 g_free(elem); 1068 } 1069 } 1070 1071 /* RX */ 1072 1073 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) 1074 { 1075 VirtIONet *n = VIRTIO_NET(vdev); 1076 int queue_index = vq2q(virtio_get_queue_index(vq)); 1077 1078 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index)); 1079 } 1080 1081 static int virtio_net_can_receive(NetClientState *nc) 1082 { 1083 VirtIONet *n = qemu_get_nic_opaque(nc); 1084 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1085 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1086 1087 if (!vdev->vm_running) { 1088 return 0; 1089 } 1090 1091 if (nc->queue_index >= n->curr_queues) { 1092 return 0; 1093 } 1094 1095 if (!virtio_queue_ready(q->rx_vq) || 1096 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 1097 return 0; 1098 } 1099 1100 return 1; 1101 } 1102 1103 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) 1104 { 1105 VirtIONet *n = q->n; 1106 if (virtio_queue_empty(q->rx_vq) || 1107 (n->mergeable_rx_bufs && 1108 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1109 virtio_queue_set_notification(q->rx_vq, 1); 1110 1111 /* To avoid a race condition where the guest has made some buffers 1112 * available after the above check but before notification was 1113 * enabled, check for available buffers again. 1114 */ 1115 if (virtio_queue_empty(q->rx_vq) || 1116 (n->mergeable_rx_bufs && 1117 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1118 return 0; 1119 } 1120 } 1121 1122 virtio_queue_set_notification(q->rx_vq, 0); 1123 return 1; 1124 } 1125 1126 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr) 1127 { 1128 virtio_tswap16s(vdev, &hdr->hdr_len); 1129 virtio_tswap16s(vdev, &hdr->gso_size); 1130 virtio_tswap16s(vdev, &hdr->csum_start); 1131 virtio_tswap16s(vdev, &hdr->csum_offset); 1132 } 1133 1134 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so 1135 * it never finds out that the packets don't have valid checksums. This 1136 * causes dhclient to get upset. Fedora's carried a patch for ages to 1137 * fix this with Xen but it hasn't appeared in an upstream release of 1138 * dhclient yet. 1139 * 1140 * To avoid breaking existing guests, we catch udp packets and add 1141 * checksums. This is terrible but it's better than hacking the guest 1142 * kernels. 1143 * 1144 * N.B. if we introduce a zero-copy API, this operation is no longer free so 1145 * we should provide a mechanism to disable it to avoid polluting the host 1146 * cache. 1147 */ 1148 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr, 1149 uint8_t *buf, size_t size) 1150 { 1151 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */ 1152 (size > 27 && size < 1500) && /* normal sized MTU */ 1153 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */ 1154 (buf[23] == 17) && /* ip.protocol == UDP */ 1155 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */ 1156 net_checksum_calculate(buf, size); 1157 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; 1158 } 1159 } 1160 1161 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt, 1162 const void *buf, size_t size) 1163 { 1164 if (n->has_vnet_hdr) { 1165 /* FIXME this cast is evil */ 1166 void *wbuf = (void *)buf; 1167 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len, 1168 size - n->host_hdr_len); 1169 1170 if (n->needs_vnet_hdr_swap) { 1171 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf); 1172 } 1173 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr)); 1174 } else { 1175 struct virtio_net_hdr hdr = { 1176 .flags = 0, 1177 .gso_type = VIRTIO_NET_HDR_GSO_NONE 1178 }; 1179 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr); 1180 } 1181 } 1182 1183 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) 1184 { 1185 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; 1186 static const uint8_t vlan[] = {0x81, 0x00}; 1187 uint8_t *ptr = (uint8_t *)buf; 1188 int i; 1189 1190 if (n->promisc) 1191 return 1; 1192 1193 ptr += n->host_hdr_len; 1194 1195 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) { 1196 int vid = lduw_be_p(ptr + 14) & 0xfff; 1197 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f)))) 1198 return 0; 1199 } 1200 1201 if (ptr[0] & 1) { // multicast 1202 if (!memcmp(ptr, bcast, sizeof(bcast))) { 1203 return !n->nobcast; 1204 } else if (n->nomulti) { 1205 return 0; 1206 } else if (n->allmulti || n->mac_table.multi_overflow) { 1207 return 1; 1208 } 1209 1210 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 1211 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1212 return 1; 1213 } 1214 } 1215 } else { // unicast 1216 if (n->nouni) { 1217 return 0; 1218 } else if (n->alluni || n->mac_table.uni_overflow) { 1219 return 1; 1220 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) { 1221 return 1; 1222 } 1223 1224 for (i = 0; i < n->mac_table.first_multi; i++) { 1225 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1226 return 1; 1227 } 1228 } 1229 } 1230 1231 return 0; 1232 } 1233 1234 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, 1235 size_t size) 1236 { 1237 VirtIONet *n = qemu_get_nic_opaque(nc); 1238 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1239 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1240 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; 1241 struct virtio_net_hdr_mrg_rxbuf mhdr; 1242 unsigned mhdr_cnt = 0; 1243 size_t offset, i, guest_offset; 1244 1245 if (!virtio_net_can_receive(nc)) { 1246 return -1; 1247 } 1248 1249 /* hdr_len refers to the header we supply to the guest */ 1250 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) { 1251 return 0; 1252 } 1253 1254 if (!receive_filter(n, buf, size)) 1255 return size; 1256 1257 offset = i = 0; 1258 1259 while (offset < size) { 1260 VirtQueueElement *elem; 1261 int len, total; 1262 const struct iovec *sg; 1263 1264 total = 0; 1265 1266 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement)); 1267 if (!elem) { 1268 if (i) { 1269 virtio_error(vdev, "virtio-net unexpected empty queue: " 1270 "i %zd mergeable %d offset %zd, size %zd, " 1271 "guest hdr len %zd, host hdr len %zd " 1272 "guest features 0x%" PRIx64, 1273 i, n->mergeable_rx_bufs, offset, size, 1274 n->guest_hdr_len, n->host_hdr_len, 1275 vdev->guest_features); 1276 } 1277 return -1; 1278 } 1279 1280 if (elem->in_num < 1) { 1281 virtio_error(vdev, 1282 "virtio-net receive queue contains no in buffers"); 1283 virtqueue_detach_element(q->rx_vq, elem, 0); 1284 g_free(elem); 1285 return -1; 1286 } 1287 1288 sg = elem->in_sg; 1289 if (i == 0) { 1290 assert(offset == 0); 1291 if (n->mergeable_rx_bufs) { 1292 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg), 1293 sg, elem->in_num, 1294 offsetof(typeof(mhdr), num_buffers), 1295 sizeof(mhdr.num_buffers)); 1296 } 1297 1298 receive_header(n, sg, elem->in_num, buf, size); 1299 offset = n->host_hdr_len; 1300 total += n->guest_hdr_len; 1301 guest_offset = n->guest_hdr_len; 1302 } else { 1303 guest_offset = 0; 1304 } 1305 1306 /* copy in packet. ugh */ 1307 len = iov_from_buf(sg, elem->in_num, guest_offset, 1308 buf + offset, size - offset); 1309 total += len; 1310 offset += len; 1311 /* If buffers can't be merged, at this point we 1312 * must have consumed the complete packet. 1313 * Otherwise, drop it. */ 1314 if (!n->mergeable_rx_bufs && offset < size) { 1315 virtqueue_unpop(q->rx_vq, elem, total); 1316 g_free(elem); 1317 return size; 1318 } 1319 1320 /* signal other side */ 1321 virtqueue_fill(q->rx_vq, elem, total, i++); 1322 g_free(elem); 1323 } 1324 1325 if (mhdr_cnt) { 1326 virtio_stw_p(vdev, &mhdr.num_buffers, i); 1327 iov_from_buf(mhdr_sg, mhdr_cnt, 1328 0, 1329 &mhdr.num_buffers, sizeof mhdr.num_buffers); 1330 } 1331 1332 virtqueue_flush(q->rx_vq, i); 1333 virtio_notify(vdev, q->rx_vq); 1334 1335 return size; 1336 } 1337 1338 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf, 1339 size_t size) 1340 { 1341 ssize_t r; 1342 1343 rcu_read_lock(); 1344 r = virtio_net_receive_rcu(nc, buf, size); 1345 rcu_read_unlock(); 1346 return r; 1347 } 1348 1349 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain, 1350 const uint8_t *buf, 1351 VirtioNetRscUnit *unit) 1352 { 1353 uint16_t ip_hdrlen; 1354 struct ip_header *ip; 1355 1356 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len 1357 + sizeof(struct eth_header)); 1358 unit->ip = (void *)ip; 1359 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2; 1360 unit->ip_plen = &ip->ip_len; 1361 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen); 1362 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 1363 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen; 1364 } 1365 1366 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain, 1367 const uint8_t *buf, 1368 VirtioNetRscUnit *unit) 1369 { 1370 struct ip6_header *ip6; 1371 1372 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len 1373 + sizeof(struct eth_header)); 1374 unit->ip = ip6; 1375 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 1376 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\ 1377 + sizeof(struct ip6_header)); 1378 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 1379 1380 /* There is a difference between payload lenght in ipv4 and v6, 1381 ip header is excluded in ipv6 */ 1382 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen; 1383 } 1384 1385 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain, 1386 VirtioNetRscSeg *seg) 1387 { 1388 int ret; 1389 struct virtio_net_hdr *h; 1390 1391 h = (struct virtio_net_hdr *)seg->buf; 1392 h->flags = 0; 1393 h->gso_type = VIRTIO_NET_HDR_GSO_NONE; 1394 1395 if (seg->is_coalesced) { 1396 *virtio_net_rsc_ext_num_packets(h) = seg->packets; 1397 *virtio_net_rsc_ext_num_dupacks(h) = seg->dup_ack; 1398 h->flags = VIRTIO_NET_HDR_F_RSC_INFO; 1399 if (chain->proto == ETH_P_IP) { 1400 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 1401 } else { 1402 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 1403 } 1404 } 1405 1406 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); 1407 QTAILQ_REMOVE(&chain->buffers, seg, next); 1408 g_free(seg->buf); 1409 g_free(seg); 1410 1411 return ret; 1412 } 1413 1414 static void virtio_net_rsc_purge(void *opq) 1415 { 1416 VirtioNetRscSeg *seg, *rn; 1417 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq; 1418 1419 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) { 1420 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 1421 chain->stat.purge_failed++; 1422 continue; 1423 } 1424 } 1425 1426 chain->stat.timer++; 1427 if (!QTAILQ_EMPTY(&chain->buffers)) { 1428 timer_mod(chain->drain_timer, 1429 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 1430 } 1431 } 1432 1433 static void virtio_net_rsc_cleanup(VirtIONet *n) 1434 { 1435 VirtioNetRscChain *chain, *rn_chain; 1436 VirtioNetRscSeg *seg, *rn_seg; 1437 1438 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) { 1439 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) { 1440 QTAILQ_REMOVE(&chain->buffers, seg, next); 1441 g_free(seg->buf); 1442 g_free(seg); 1443 } 1444 1445 timer_del(chain->drain_timer); 1446 timer_free(chain->drain_timer); 1447 QTAILQ_REMOVE(&n->rsc_chains, chain, next); 1448 g_free(chain); 1449 } 1450 } 1451 1452 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain, 1453 NetClientState *nc, 1454 const uint8_t *buf, size_t size) 1455 { 1456 uint16_t hdr_len; 1457 VirtioNetRscSeg *seg; 1458 1459 hdr_len = chain->n->guest_hdr_len; 1460 seg = g_malloc(sizeof(VirtioNetRscSeg)); 1461 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header) 1462 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD); 1463 memcpy(seg->buf, buf, size); 1464 seg->size = size; 1465 seg->packets = 1; 1466 seg->dup_ack = 0; 1467 seg->is_coalesced = 0; 1468 seg->nc = nc; 1469 1470 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); 1471 chain->stat.cache++; 1472 1473 switch (chain->proto) { 1474 case ETH_P_IP: 1475 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); 1476 break; 1477 case ETH_P_IPV6: 1478 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); 1479 break; 1480 default: 1481 g_assert_not_reached(); 1482 } 1483 } 1484 1485 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain, 1486 VirtioNetRscSeg *seg, 1487 const uint8_t *buf, 1488 struct tcp_header *n_tcp, 1489 struct tcp_header *o_tcp) 1490 { 1491 uint32_t nack, oack; 1492 uint16_t nwin, owin; 1493 1494 nack = htonl(n_tcp->th_ack); 1495 nwin = htons(n_tcp->th_win); 1496 oack = htonl(o_tcp->th_ack); 1497 owin = htons(o_tcp->th_win); 1498 1499 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) { 1500 chain->stat.ack_out_of_win++; 1501 return RSC_FINAL; 1502 } else if (nack == oack) { 1503 /* duplicated ack or window probe */ 1504 if (nwin == owin) { 1505 /* duplicated ack, add dup ack count due to whql test up to 1 */ 1506 chain->stat.dup_ack++; 1507 return RSC_FINAL; 1508 } else { 1509 /* Coalesce window update */ 1510 o_tcp->th_win = n_tcp->th_win; 1511 chain->stat.win_update++; 1512 return RSC_COALESCE; 1513 } 1514 } else { 1515 /* pure ack, go to 'C', finalize*/ 1516 chain->stat.pure_ack++; 1517 return RSC_FINAL; 1518 } 1519 } 1520 1521 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain, 1522 VirtioNetRscSeg *seg, 1523 const uint8_t *buf, 1524 VirtioNetRscUnit *n_unit) 1525 { 1526 void *data; 1527 uint16_t o_ip_len; 1528 uint32_t nseq, oseq; 1529 VirtioNetRscUnit *o_unit; 1530 1531 o_unit = &seg->unit; 1532 o_ip_len = htons(*o_unit->ip_plen); 1533 nseq = htonl(n_unit->tcp->th_seq); 1534 oseq = htonl(o_unit->tcp->th_seq); 1535 1536 /* out of order or retransmitted. */ 1537 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) { 1538 chain->stat.data_out_of_win++; 1539 return RSC_FINAL; 1540 } 1541 1542 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen; 1543 if (nseq == oseq) { 1544 if ((o_unit->payload == 0) && n_unit->payload) { 1545 /* From no payload to payload, normal case, not a dup ack or etc */ 1546 chain->stat.data_after_pure_ack++; 1547 goto coalesce; 1548 } else { 1549 return virtio_net_rsc_handle_ack(chain, seg, buf, 1550 n_unit->tcp, o_unit->tcp); 1551 } 1552 } else if ((nseq - oseq) != o_unit->payload) { 1553 /* Not a consistent packet, out of order */ 1554 chain->stat.data_out_of_order++; 1555 return RSC_FINAL; 1556 } else { 1557 coalesce: 1558 if ((o_ip_len + n_unit->payload) > chain->max_payload) { 1559 chain->stat.over_size++; 1560 return RSC_FINAL; 1561 } 1562 1563 /* Here comes the right data, the payload length in v4/v6 is different, 1564 so use the field value to update and record the new data len */ 1565 o_unit->payload += n_unit->payload; /* update new data len */ 1566 1567 /* update field in ip header */ 1568 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload); 1569 1570 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced 1571 for windows guest, while this may change the behavior for linux 1572 guest (only if it uses RSC feature). */ 1573 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags; 1574 1575 o_unit->tcp->th_ack = n_unit->tcp->th_ack; 1576 o_unit->tcp->th_win = n_unit->tcp->th_win; 1577 1578 memmove(seg->buf + seg->size, data, n_unit->payload); 1579 seg->size += n_unit->payload; 1580 seg->packets++; 1581 chain->stat.coalesced++; 1582 return RSC_COALESCE; 1583 } 1584 } 1585 1586 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain, 1587 VirtioNetRscSeg *seg, 1588 const uint8_t *buf, size_t size, 1589 VirtioNetRscUnit *unit) 1590 { 1591 struct ip_header *ip1, *ip2; 1592 1593 ip1 = (struct ip_header *)(unit->ip); 1594 ip2 = (struct ip_header *)(seg->unit.ip); 1595 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst) 1596 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 1597 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 1598 chain->stat.no_match++; 1599 return RSC_NO_MATCH; 1600 } 1601 1602 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 1603 } 1604 1605 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain, 1606 VirtioNetRscSeg *seg, 1607 const uint8_t *buf, size_t size, 1608 VirtioNetRscUnit *unit) 1609 { 1610 struct ip6_header *ip1, *ip2; 1611 1612 ip1 = (struct ip6_header *)(unit->ip); 1613 ip2 = (struct ip6_header *)(seg->unit.ip); 1614 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address)) 1615 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address)) 1616 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 1617 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 1618 chain->stat.no_match++; 1619 return RSC_NO_MATCH; 1620 } 1621 1622 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 1623 } 1624 1625 /* Packets with 'SYN' should bypass, other flag should be sent after drain 1626 * to prevent out of order */ 1627 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain, 1628 struct tcp_header *tcp) 1629 { 1630 uint16_t tcp_hdr; 1631 uint16_t tcp_flag; 1632 1633 tcp_flag = htons(tcp->th_offset_flags); 1634 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10; 1635 tcp_flag &= VIRTIO_NET_TCP_FLAG; 1636 tcp_flag = htons(tcp->th_offset_flags) & 0x3F; 1637 if (tcp_flag & TH_SYN) { 1638 chain->stat.tcp_syn++; 1639 return RSC_BYPASS; 1640 } 1641 1642 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) { 1643 chain->stat.tcp_ctrl_drain++; 1644 return RSC_FINAL; 1645 } 1646 1647 if (tcp_hdr > sizeof(struct tcp_header)) { 1648 chain->stat.tcp_all_opt++; 1649 return RSC_FINAL; 1650 } 1651 1652 return RSC_CANDIDATE; 1653 } 1654 1655 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain, 1656 NetClientState *nc, 1657 const uint8_t *buf, size_t size, 1658 VirtioNetRscUnit *unit) 1659 { 1660 int ret; 1661 VirtioNetRscSeg *seg, *nseg; 1662 1663 if (QTAILQ_EMPTY(&chain->buffers)) { 1664 chain->stat.empty_cache++; 1665 virtio_net_rsc_cache_buf(chain, nc, buf, size); 1666 timer_mod(chain->drain_timer, 1667 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 1668 return size; 1669 } 1670 1671 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 1672 if (chain->proto == ETH_P_IP) { 1673 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit); 1674 } else { 1675 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit); 1676 } 1677 1678 if (ret == RSC_FINAL) { 1679 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 1680 /* Send failed */ 1681 chain->stat.final_failed++; 1682 return 0; 1683 } 1684 1685 /* Send current packet */ 1686 return virtio_net_do_receive(nc, buf, size); 1687 } else if (ret == RSC_NO_MATCH) { 1688 continue; 1689 } else { 1690 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ 1691 seg->is_coalesced = 1; 1692 return size; 1693 } 1694 } 1695 1696 chain->stat.no_match_cache++; 1697 virtio_net_rsc_cache_buf(chain, nc, buf, size); 1698 return size; 1699 } 1700 1701 /* Drain a connection data, this is to avoid out of order segments */ 1702 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain, 1703 NetClientState *nc, 1704 const uint8_t *buf, size_t size, 1705 uint16_t ip_start, uint16_t ip_size, 1706 uint16_t tcp_port) 1707 { 1708 VirtioNetRscSeg *seg, *nseg; 1709 uint32_t ppair1, ppair2; 1710 1711 ppair1 = *(uint32_t *)(buf + tcp_port); 1712 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 1713 ppair2 = *(uint32_t *)(seg->buf + tcp_port); 1714 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size) 1715 || (ppair1 != ppair2)) { 1716 continue; 1717 } 1718 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 1719 chain->stat.drain_failed++; 1720 } 1721 1722 break; 1723 } 1724 1725 return virtio_net_do_receive(nc, buf, size); 1726 } 1727 1728 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain, 1729 struct ip_header *ip, 1730 const uint8_t *buf, size_t size) 1731 { 1732 uint16_t ip_len; 1733 1734 /* Not an ipv4 packet */ 1735 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) { 1736 chain->stat.ip_option++; 1737 return RSC_BYPASS; 1738 } 1739 1740 /* Don't handle packets with ip option */ 1741 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) { 1742 chain->stat.ip_option++; 1743 return RSC_BYPASS; 1744 } 1745 1746 if (ip->ip_p != IPPROTO_TCP) { 1747 chain->stat.bypass_not_tcp++; 1748 return RSC_BYPASS; 1749 } 1750 1751 /* Don't handle packets with ip fragment */ 1752 if (!(htons(ip->ip_off) & IP_DF)) { 1753 chain->stat.ip_frag++; 1754 return RSC_BYPASS; 1755 } 1756 1757 /* Don't handle packets with ecn flag */ 1758 if (IPTOS_ECN(ip->ip_tos)) { 1759 chain->stat.ip_ecn++; 1760 return RSC_BYPASS; 1761 } 1762 1763 ip_len = htons(ip->ip_len); 1764 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header)) 1765 || ip_len > (size - chain->n->guest_hdr_len - 1766 sizeof(struct eth_header))) { 1767 chain->stat.ip_hacked++; 1768 return RSC_BYPASS; 1769 } 1770 1771 return RSC_CANDIDATE; 1772 } 1773 1774 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain, 1775 NetClientState *nc, 1776 const uint8_t *buf, size_t size) 1777 { 1778 int32_t ret; 1779 uint16_t hdr_len; 1780 VirtioNetRscUnit unit; 1781 1782 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 1783 1784 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header) 1785 + sizeof(struct tcp_header))) { 1786 chain->stat.bypass_not_tcp++; 1787 return virtio_net_do_receive(nc, buf, size); 1788 } 1789 1790 virtio_net_rsc_extract_unit4(chain, buf, &unit); 1791 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size) 1792 != RSC_CANDIDATE) { 1793 return virtio_net_do_receive(nc, buf, size); 1794 } 1795 1796 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 1797 if (ret == RSC_BYPASS) { 1798 return virtio_net_do_receive(nc, buf, size); 1799 } else if (ret == RSC_FINAL) { 1800 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 1801 ((hdr_len + sizeof(struct eth_header)) + 12), 1802 VIRTIO_NET_IP4_ADDR_SIZE, 1803 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)); 1804 } 1805 1806 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 1807 } 1808 1809 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain, 1810 struct ip6_header *ip6, 1811 const uint8_t *buf, size_t size) 1812 { 1813 uint16_t ip_len; 1814 1815 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4) 1816 != IP_HEADER_VERSION_6) { 1817 return RSC_BYPASS; 1818 } 1819 1820 /* Both option and protocol is checked in this */ 1821 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) { 1822 chain->stat.bypass_not_tcp++; 1823 return RSC_BYPASS; 1824 } 1825 1826 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 1827 if (ip_len < sizeof(struct tcp_header) || 1828 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header) 1829 - sizeof(struct ip6_header))) { 1830 chain->stat.ip_hacked++; 1831 return RSC_BYPASS; 1832 } 1833 1834 /* Don't handle packets with ecn flag */ 1835 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) { 1836 chain->stat.ip_ecn++; 1837 return RSC_BYPASS; 1838 } 1839 1840 return RSC_CANDIDATE; 1841 } 1842 1843 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc, 1844 const uint8_t *buf, size_t size) 1845 { 1846 int32_t ret; 1847 uint16_t hdr_len; 1848 VirtioNetRscChain *chain; 1849 VirtioNetRscUnit unit; 1850 1851 chain = (VirtioNetRscChain *)opq; 1852 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 1853 1854 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header) 1855 + sizeof(tcp_header))) { 1856 return virtio_net_do_receive(nc, buf, size); 1857 } 1858 1859 virtio_net_rsc_extract_unit6(chain, buf, &unit); 1860 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain, 1861 unit.ip, buf, size)) { 1862 return virtio_net_do_receive(nc, buf, size); 1863 } 1864 1865 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 1866 if (ret == RSC_BYPASS) { 1867 return virtio_net_do_receive(nc, buf, size); 1868 } else if (ret == RSC_FINAL) { 1869 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 1870 ((hdr_len + sizeof(struct eth_header)) + 8), 1871 VIRTIO_NET_IP6_ADDR_SIZE, 1872 hdr_len + sizeof(struct eth_header) 1873 + sizeof(struct ip6_header)); 1874 } 1875 1876 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 1877 } 1878 1879 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n, 1880 NetClientState *nc, 1881 uint16_t proto) 1882 { 1883 VirtioNetRscChain *chain; 1884 1885 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) { 1886 return NULL; 1887 } 1888 1889 QTAILQ_FOREACH(chain, &n->rsc_chains, next) { 1890 if (chain->proto == proto) { 1891 return chain; 1892 } 1893 } 1894 1895 chain = g_malloc(sizeof(*chain)); 1896 chain->n = n; 1897 chain->proto = proto; 1898 if (proto == (uint16_t)ETH_P_IP) { 1899 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD; 1900 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 1901 } else { 1902 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD; 1903 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 1904 } 1905 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST, 1906 virtio_net_rsc_purge, chain); 1907 memset(&chain->stat, 0, sizeof(chain->stat)); 1908 1909 QTAILQ_INIT(&chain->buffers); 1910 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); 1911 1912 return chain; 1913 } 1914 1915 static ssize_t virtio_net_rsc_receive(NetClientState *nc, 1916 const uint8_t *buf, 1917 size_t size) 1918 { 1919 uint16_t proto; 1920 VirtioNetRscChain *chain; 1921 struct eth_header *eth; 1922 VirtIONet *n; 1923 1924 n = qemu_get_nic_opaque(nc); 1925 if (size < (n->host_hdr_len + sizeof(struct eth_header))) { 1926 return virtio_net_do_receive(nc, buf, size); 1927 } 1928 1929 eth = (struct eth_header *)(buf + n->guest_hdr_len); 1930 proto = htons(eth->h_proto); 1931 1932 chain = virtio_net_rsc_lookup_chain(n, nc, proto); 1933 if (chain) { 1934 chain->stat.received++; 1935 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) { 1936 return virtio_net_rsc_receive4(chain, nc, buf, size); 1937 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) { 1938 return virtio_net_rsc_receive6(chain, nc, buf, size); 1939 } 1940 } 1941 return virtio_net_do_receive(nc, buf, size); 1942 } 1943 1944 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, 1945 size_t size) 1946 { 1947 VirtIONet *n = qemu_get_nic_opaque(nc); 1948 if ((n->rsc4_enabled || n->rsc6_enabled)) { 1949 return virtio_net_rsc_receive(nc, buf, size); 1950 } else { 1951 return virtio_net_do_receive(nc, buf, size); 1952 } 1953 } 1954 1955 static int32_t virtio_net_flush_tx(VirtIONetQueue *q); 1956 1957 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) 1958 { 1959 VirtIONet *n = qemu_get_nic_opaque(nc); 1960 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1961 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1962 1963 virtqueue_push(q->tx_vq, q->async_tx.elem, 0); 1964 virtio_notify(vdev, q->tx_vq); 1965 1966 g_free(q->async_tx.elem); 1967 q->async_tx.elem = NULL; 1968 1969 virtio_queue_set_notification(q->tx_vq, 1); 1970 virtio_net_flush_tx(q); 1971 } 1972 1973 /* TX */ 1974 static int32_t virtio_net_flush_tx(VirtIONetQueue *q) 1975 { 1976 VirtIONet *n = q->n; 1977 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1978 VirtQueueElement *elem; 1979 int32_t num_packets = 0; 1980 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq)); 1981 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 1982 return num_packets; 1983 } 1984 1985 if (q->async_tx.elem) { 1986 virtio_queue_set_notification(q->tx_vq, 0); 1987 return num_packets; 1988 } 1989 1990 for (;;) { 1991 ssize_t ret; 1992 unsigned int out_num; 1993 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg; 1994 struct virtio_net_hdr_mrg_rxbuf mhdr; 1995 1996 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement)); 1997 if (!elem) { 1998 break; 1999 } 2000 2001 out_num = elem->out_num; 2002 out_sg = elem->out_sg; 2003 if (out_num < 1) { 2004 virtio_error(vdev, "virtio-net header not in first element"); 2005 virtqueue_detach_element(q->tx_vq, elem, 0); 2006 g_free(elem); 2007 return -EINVAL; 2008 } 2009 2010 if (n->has_vnet_hdr) { 2011 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) < 2012 n->guest_hdr_len) { 2013 virtio_error(vdev, "virtio-net header incorrect"); 2014 virtqueue_detach_element(q->tx_vq, elem, 0); 2015 g_free(elem); 2016 return -EINVAL; 2017 } 2018 if (n->needs_vnet_hdr_swap) { 2019 virtio_net_hdr_swap(vdev, (void *) &mhdr); 2020 sg2[0].iov_base = &mhdr; 2021 sg2[0].iov_len = n->guest_hdr_len; 2022 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, 2023 out_sg, out_num, 2024 n->guest_hdr_len, -1); 2025 if (out_num == VIRTQUEUE_MAX_SIZE) { 2026 goto drop; 2027 } 2028 out_num += 1; 2029 out_sg = sg2; 2030 } 2031 } 2032 /* 2033 * If host wants to see the guest header as is, we can 2034 * pass it on unchanged. Otherwise, copy just the parts 2035 * that host is interested in. 2036 */ 2037 assert(n->host_hdr_len <= n->guest_hdr_len); 2038 if (n->host_hdr_len != n->guest_hdr_len) { 2039 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg), 2040 out_sg, out_num, 2041 0, n->host_hdr_len); 2042 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num, 2043 out_sg, out_num, 2044 n->guest_hdr_len, -1); 2045 out_num = sg_num; 2046 out_sg = sg; 2047 } 2048 2049 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index), 2050 out_sg, out_num, virtio_net_tx_complete); 2051 if (ret == 0) { 2052 virtio_queue_set_notification(q->tx_vq, 0); 2053 q->async_tx.elem = elem; 2054 return -EBUSY; 2055 } 2056 2057 drop: 2058 virtqueue_push(q->tx_vq, elem, 0); 2059 virtio_notify(vdev, q->tx_vq); 2060 g_free(elem); 2061 2062 if (++num_packets >= n->tx_burst) { 2063 break; 2064 } 2065 } 2066 return num_packets; 2067 } 2068 2069 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) 2070 { 2071 VirtIONet *n = VIRTIO_NET(vdev); 2072 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2073 2074 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2075 virtio_net_drop_tx_queue_data(vdev, vq); 2076 return; 2077 } 2078 2079 /* This happens when device was stopped but VCPU wasn't. */ 2080 if (!vdev->vm_running) { 2081 q->tx_waiting = 1; 2082 return; 2083 } 2084 2085 if (q->tx_waiting) { 2086 virtio_queue_set_notification(vq, 1); 2087 timer_del(q->tx_timer); 2088 q->tx_waiting = 0; 2089 if (virtio_net_flush_tx(q) == -EINVAL) { 2090 return; 2091 } 2092 } else { 2093 timer_mod(q->tx_timer, 2094 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2095 q->tx_waiting = 1; 2096 virtio_queue_set_notification(vq, 0); 2097 } 2098 } 2099 2100 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) 2101 { 2102 VirtIONet *n = VIRTIO_NET(vdev); 2103 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2104 2105 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2106 virtio_net_drop_tx_queue_data(vdev, vq); 2107 return; 2108 } 2109 2110 if (unlikely(q->tx_waiting)) { 2111 return; 2112 } 2113 q->tx_waiting = 1; 2114 /* This happens when device was stopped but VCPU wasn't. */ 2115 if (!vdev->vm_running) { 2116 return; 2117 } 2118 virtio_queue_set_notification(vq, 0); 2119 qemu_bh_schedule(q->tx_bh); 2120 } 2121 2122 static void virtio_net_tx_timer(void *opaque) 2123 { 2124 VirtIONetQueue *q = opaque; 2125 VirtIONet *n = q->n; 2126 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2127 /* This happens when device was stopped but BH wasn't. */ 2128 if (!vdev->vm_running) { 2129 /* Make sure tx waiting is set, so we'll run when restarted. */ 2130 assert(q->tx_waiting); 2131 return; 2132 } 2133 2134 q->tx_waiting = 0; 2135 2136 /* Just in case the driver is not ready on more */ 2137 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2138 return; 2139 } 2140 2141 virtio_queue_set_notification(q->tx_vq, 1); 2142 virtio_net_flush_tx(q); 2143 } 2144 2145 static void virtio_net_tx_bh(void *opaque) 2146 { 2147 VirtIONetQueue *q = opaque; 2148 VirtIONet *n = q->n; 2149 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2150 int32_t ret; 2151 2152 /* This happens when device was stopped but BH wasn't. */ 2153 if (!vdev->vm_running) { 2154 /* Make sure tx waiting is set, so we'll run when restarted. */ 2155 assert(q->tx_waiting); 2156 return; 2157 } 2158 2159 q->tx_waiting = 0; 2160 2161 /* Just in case the driver is not ready on more */ 2162 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) { 2163 return; 2164 } 2165 2166 ret = virtio_net_flush_tx(q); 2167 if (ret == -EBUSY || ret == -EINVAL) { 2168 return; /* Notification re-enable handled by tx_complete or device 2169 * broken */ 2170 } 2171 2172 /* If we flush a full burst of packets, assume there are 2173 * more coming and immediately reschedule */ 2174 if (ret >= n->tx_burst) { 2175 qemu_bh_schedule(q->tx_bh); 2176 q->tx_waiting = 1; 2177 return; 2178 } 2179 2180 /* If less than a full burst, re-enable notification and flush 2181 * anything that may have come in while we weren't looking. If 2182 * we find something, assume the guest is still active and reschedule */ 2183 virtio_queue_set_notification(q->tx_vq, 1); 2184 ret = virtio_net_flush_tx(q); 2185 if (ret == -EINVAL) { 2186 return; 2187 } else if (ret > 0) { 2188 virtio_queue_set_notification(q->tx_vq, 0); 2189 qemu_bh_schedule(q->tx_bh); 2190 q->tx_waiting = 1; 2191 } 2192 } 2193 2194 static void virtio_net_add_queue(VirtIONet *n, int index) 2195 { 2196 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2197 2198 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size, 2199 virtio_net_handle_rx); 2200 2201 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { 2202 n->vqs[index].tx_vq = 2203 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2204 virtio_net_handle_tx_timer); 2205 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 2206 virtio_net_tx_timer, 2207 &n->vqs[index]); 2208 } else { 2209 n->vqs[index].tx_vq = 2210 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2211 virtio_net_handle_tx_bh); 2212 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]); 2213 } 2214 2215 n->vqs[index].tx_waiting = 0; 2216 n->vqs[index].n = n; 2217 } 2218 2219 static void virtio_net_del_queue(VirtIONet *n, int index) 2220 { 2221 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2222 VirtIONetQueue *q = &n->vqs[index]; 2223 NetClientState *nc = qemu_get_subqueue(n->nic, index); 2224 2225 qemu_purge_queued_packets(nc); 2226 2227 virtio_del_queue(vdev, index * 2); 2228 if (q->tx_timer) { 2229 timer_del(q->tx_timer); 2230 timer_free(q->tx_timer); 2231 q->tx_timer = NULL; 2232 } else { 2233 qemu_bh_delete(q->tx_bh); 2234 q->tx_bh = NULL; 2235 } 2236 q->tx_waiting = 0; 2237 virtio_del_queue(vdev, index * 2 + 1); 2238 } 2239 2240 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues) 2241 { 2242 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2243 int old_num_queues = virtio_get_num_queues(vdev); 2244 int new_num_queues = new_max_queues * 2 + 1; 2245 int i; 2246 2247 assert(old_num_queues >= 3); 2248 assert(old_num_queues % 2 == 1); 2249 2250 if (old_num_queues == new_num_queues) { 2251 return; 2252 } 2253 2254 /* 2255 * We always need to remove and add ctrl vq if 2256 * old_num_queues != new_num_queues. Remove ctrl_vq first, 2257 * and then we only enter one of the following too loops. 2258 */ 2259 virtio_del_queue(vdev, old_num_queues - 1); 2260 2261 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) { 2262 /* new_num_queues < old_num_queues */ 2263 virtio_net_del_queue(n, i / 2); 2264 } 2265 2266 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) { 2267 /* new_num_queues > old_num_queues */ 2268 virtio_net_add_queue(n, i / 2); 2269 } 2270 2271 /* add ctrl_vq last */ 2272 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 2273 } 2274 2275 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) 2276 { 2277 int max = multiqueue ? n->max_queues : 1; 2278 2279 n->multiqueue = multiqueue; 2280 virtio_net_change_num_queues(n, max); 2281 2282 virtio_net_set_queues(n); 2283 } 2284 2285 static int virtio_net_post_load_device(void *opaque, int version_id) 2286 { 2287 VirtIONet *n = opaque; 2288 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2289 int i, link_down; 2290 2291 trace_virtio_net_post_load_device(); 2292 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs, 2293 virtio_vdev_has_feature(vdev, 2294 VIRTIO_F_VERSION_1)); 2295 2296 /* MAC_TABLE_ENTRIES may be different from the saved image */ 2297 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) { 2298 n->mac_table.in_use = 0; 2299 } 2300 2301 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 2302 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n); 2303 } 2304 2305 if (peer_has_vnet_hdr(n)) { 2306 virtio_net_apply_guest_offloads(n); 2307 } 2308 2309 virtio_net_set_queues(n); 2310 2311 /* Find the first multicast entry in the saved MAC filter */ 2312 for (i = 0; i < n->mac_table.in_use; i++) { 2313 if (n->mac_table.macs[i * ETH_ALEN] & 1) { 2314 break; 2315 } 2316 } 2317 n->mac_table.first_multi = i; 2318 2319 /* nc.link_down can't be migrated, so infer link_down according 2320 * to link status bit in n->status */ 2321 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0; 2322 for (i = 0; i < n->max_queues; i++) { 2323 qemu_get_subqueue(n->nic, i)->link_down = link_down; 2324 } 2325 2326 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 2327 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 2328 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 2329 QEMU_CLOCK_VIRTUAL, 2330 virtio_net_announce_timer, n); 2331 if (n->announce_timer.round) { 2332 timer_mod(n->announce_timer.tm, 2333 qemu_clock_get_ms(n->announce_timer.type)); 2334 } else { 2335 qemu_announce_timer_del(&n->announce_timer); 2336 } 2337 } 2338 2339 return 0; 2340 } 2341 2342 /* tx_waiting field of a VirtIONetQueue */ 2343 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = { 2344 .name = "virtio-net-queue-tx_waiting", 2345 .fields = (VMStateField[]) { 2346 VMSTATE_UINT32(tx_waiting, VirtIONetQueue), 2347 VMSTATE_END_OF_LIST() 2348 }, 2349 }; 2350 2351 static bool max_queues_gt_1(void *opaque, int version_id) 2352 { 2353 return VIRTIO_NET(opaque)->max_queues > 1; 2354 } 2355 2356 static bool has_ctrl_guest_offloads(void *opaque, int version_id) 2357 { 2358 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque), 2359 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 2360 } 2361 2362 static bool mac_table_fits(void *opaque, int version_id) 2363 { 2364 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES; 2365 } 2366 2367 static bool mac_table_doesnt_fit(void *opaque, int version_id) 2368 { 2369 return !mac_table_fits(opaque, version_id); 2370 } 2371 2372 /* This temporary type is shared by all the WITH_TMP methods 2373 * although only some fields are used by each. 2374 */ 2375 struct VirtIONetMigTmp { 2376 VirtIONet *parent; 2377 VirtIONetQueue *vqs_1; 2378 uint16_t curr_queues_1; 2379 uint8_t has_ufo; 2380 uint32_t has_vnet_hdr; 2381 }; 2382 2383 /* The 2nd and subsequent tx_waiting flags are loaded later than 2384 * the 1st entry in the queues and only if there's more than one 2385 * entry. We use the tmp mechanism to calculate a temporary 2386 * pointer and count and also validate the count. 2387 */ 2388 2389 static int virtio_net_tx_waiting_pre_save(void *opaque) 2390 { 2391 struct VirtIONetMigTmp *tmp = opaque; 2392 2393 tmp->vqs_1 = tmp->parent->vqs + 1; 2394 tmp->curr_queues_1 = tmp->parent->curr_queues - 1; 2395 if (tmp->parent->curr_queues == 0) { 2396 tmp->curr_queues_1 = 0; 2397 } 2398 2399 return 0; 2400 } 2401 2402 static int virtio_net_tx_waiting_pre_load(void *opaque) 2403 { 2404 struct VirtIONetMigTmp *tmp = opaque; 2405 2406 /* Reuse the pointer setup from save */ 2407 virtio_net_tx_waiting_pre_save(opaque); 2408 2409 if (tmp->parent->curr_queues > tmp->parent->max_queues) { 2410 error_report("virtio-net: curr_queues %x > max_queues %x", 2411 tmp->parent->curr_queues, tmp->parent->max_queues); 2412 2413 return -EINVAL; 2414 } 2415 2416 return 0; /* all good */ 2417 } 2418 2419 static const VMStateDescription vmstate_virtio_net_tx_waiting = { 2420 .name = "virtio-net-tx_waiting", 2421 .pre_load = virtio_net_tx_waiting_pre_load, 2422 .pre_save = virtio_net_tx_waiting_pre_save, 2423 .fields = (VMStateField[]) { 2424 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp, 2425 curr_queues_1, 2426 vmstate_virtio_net_queue_tx_waiting, 2427 struct VirtIONetQueue), 2428 VMSTATE_END_OF_LIST() 2429 }, 2430 }; 2431 2432 /* the 'has_ufo' flag is just tested; if the incoming stream has the 2433 * flag set we need to check that we have it 2434 */ 2435 static int virtio_net_ufo_post_load(void *opaque, int version_id) 2436 { 2437 struct VirtIONetMigTmp *tmp = opaque; 2438 2439 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) { 2440 error_report("virtio-net: saved image requires TUN_F_UFO support"); 2441 return -EINVAL; 2442 } 2443 2444 return 0; 2445 } 2446 2447 static int virtio_net_ufo_pre_save(void *opaque) 2448 { 2449 struct VirtIONetMigTmp *tmp = opaque; 2450 2451 tmp->has_ufo = tmp->parent->has_ufo; 2452 2453 return 0; 2454 } 2455 2456 static const VMStateDescription vmstate_virtio_net_has_ufo = { 2457 .name = "virtio-net-ufo", 2458 .post_load = virtio_net_ufo_post_load, 2459 .pre_save = virtio_net_ufo_pre_save, 2460 .fields = (VMStateField[]) { 2461 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp), 2462 VMSTATE_END_OF_LIST() 2463 }, 2464 }; 2465 2466 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the 2467 * flag set we need to check that we have it 2468 */ 2469 static int virtio_net_vnet_post_load(void *opaque, int version_id) 2470 { 2471 struct VirtIONetMigTmp *tmp = opaque; 2472 2473 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) { 2474 error_report("virtio-net: saved image requires vnet_hdr=on"); 2475 return -EINVAL; 2476 } 2477 2478 return 0; 2479 } 2480 2481 static int virtio_net_vnet_pre_save(void *opaque) 2482 { 2483 struct VirtIONetMigTmp *tmp = opaque; 2484 2485 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr; 2486 2487 return 0; 2488 } 2489 2490 static const VMStateDescription vmstate_virtio_net_has_vnet = { 2491 .name = "virtio-net-vnet", 2492 .post_load = virtio_net_vnet_post_load, 2493 .pre_save = virtio_net_vnet_pre_save, 2494 .fields = (VMStateField[]) { 2495 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp), 2496 VMSTATE_END_OF_LIST() 2497 }, 2498 }; 2499 2500 static const VMStateDescription vmstate_virtio_net_device = { 2501 .name = "virtio-net-device", 2502 .version_id = VIRTIO_NET_VM_VERSION, 2503 .minimum_version_id = VIRTIO_NET_VM_VERSION, 2504 .post_load = virtio_net_post_load_device, 2505 .fields = (VMStateField[]) { 2506 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN), 2507 VMSTATE_STRUCT_POINTER(vqs, VirtIONet, 2508 vmstate_virtio_net_queue_tx_waiting, 2509 VirtIONetQueue), 2510 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet), 2511 VMSTATE_UINT16(status, VirtIONet), 2512 VMSTATE_UINT8(promisc, VirtIONet), 2513 VMSTATE_UINT8(allmulti, VirtIONet), 2514 VMSTATE_UINT32(mac_table.in_use, VirtIONet), 2515 2516 /* Guarded pair: If it fits we load it, else we throw it away 2517 * - can happen if source has a larger MAC table.; post-load 2518 * sets flags in this case. 2519 */ 2520 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet, 2521 0, mac_table_fits, mac_table.in_use, 2522 ETH_ALEN), 2523 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0, 2524 mac_table.in_use, ETH_ALEN), 2525 2526 /* Note: This is an array of uint32's that's always been saved as a 2527 * buffer; hold onto your endiannesses; it's actually used as a bitmap 2528 * but based on the uint. 2529 */ 2530 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3), 2531 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 2532 vmstate_virtio_net_has_vnet), 2533 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet), 2534 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet), 2535 VMSTATE_UINT8(alluni, VirtIONet), 2536 VMSTATE_UINT8(nomulti, VirtIONet), 2537 VMSTATE_UINT8(nouni, VirtIONet), 2538 VMSTATE_UINT8(nobcast, VirtIONet), 2539 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 2540 vmstate_virtio_net_has_ufo), 2541 VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0, 2542 vmstate_info_uint16_equal, uint16_t), 2543 VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1), 2544 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 2545 vmstate_virtio_net_tx_waiting), 2546 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet, 2547 has_ctrl_guest_offloads), 2548 VMSTATE_END_OF_LIST() 2549 }, 2550 }; 2551 2552 static NetClientInfo net_virtio_info = { 2553 .type = NET_CLIENT_DRIVER_NIC, 2554 .size = sizeof(NICState), 2555 .can_receive = virtio_net_can_receive, 2556 .receive = virtio_net_receive, 2557 .link_status_changed = virtio_net_set_link_status, 2558 .query_rx_filter = virtio_net_query_rxfilter, 2559 }; 2560 2561 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) 2562 { 2563 VirtIONet *n = VIRTIO_NET(vdev); 2564 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); 2565 assert(n->vhost_started); 2566 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); 2567 } 2568 2569 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, 2570 bool mask) 2571 { 2572 VirtIONet *n = VIRTIO_NET(vdev); 2573 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); 2574 assert(n->vhost_started); 2575 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), 2576 vdev, idx, mask); 2577 } 2578 2579 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) 2580 { 2581 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC); 2582 2583 n->config_size = virtio_feature_get_config_size(feature_sizes, 2584 host_features); 2585 } 2586 2587 void virtio_net_set_netclient_name(VirtIONet *n, const char *name, 2588 const char *type) 2589 { 2590 /* 2591 * The name can be NULL, the netclient name will be type.x. 2592 */ 2593 assert(type != NULL); 2594 2595 g_free(n->netclient_name); 2596 g_free(n->netclient_type); 2597 n->netclient_name = g_strdup(name); 2598 n->netclient_type = g_strdup(type); 2599 } 2600 2601 static void virtio_net_device_realize(DeviceState *dev, Error **errp) 2602 { 2603 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 2604 VirtIONet *n = VIRTIO_NET(dev); 2605 NetClientState *nc; 2606 int i; 2607 2608 if (n->net_conf.mtu) { 2609 n->host_features |= (1ULL << VIRTIO_NET_F_MTU); 2610 } 2611 2612 if (n->net_conf.duplex_str) { 2613 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) { 2614 n->net_conf.duplex = DUPLEX_HALF; 2615 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) { 2616 n->net_conf.duplex = DUPLEX_FULL; 2617 } else { 2618 error_setg(errp, "'duplex' must be 'half' or 'full'"); 2619 } 2620 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 2621 } else { 2622 n->net_conf.duplex = DUPLEX_UNKNOWN; 2623 } 2624 2625 if (n->net_conf.speed < SPEED_UNKNOWN) { 2626 error_setg(errp, "'speed' must be between 0 and INT_MAX"); 2627 } else if (n->net_conf.speed >= 0) { 2628 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 2629 } 2630 2631 virtio_net_set_config_size(n, n->host_features); 2632 virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size); 2633 2634 /* 2635 * We set a lower limit on RX queue size to what it always was. 2636 * Guests that want a smaller ring can always resize it without 2637 * help from us (using virtio 1 and up). 2638 */ 2639 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || 2640 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || 2641 !is_power_of_2(n->net_conf.rx_queue_size)) { 2642 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " 2643 "must be a power of 2 between %d and %d.", 2644 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, 2645 VIRTQUEUE_MAX_SIZE); 2646 virtio_cleanup(vdev); 2647 return; 2648 } 2649 2650 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || 2651 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE || 2652 !is_power_of_2(n->net_conf.tx_queue_size)) { 2653 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " 2654 "must be a power of 2 between %d and %d", 2655 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, 2656 VIRTQUEUE_MAX_SIZE); 2657 virtio_cleanup(vdev); 2658 return; 2659 } 2660 2661 n->max_queues = MAX(n->nic_conf.peers.queues, 1); 2662 if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) { 2663 error_setg(errp, "Invalid number of queues (= %" PRIu32 "), " 2664 "must be a positive integer less than %d.", 2665 n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2); 2666 virtio_cleanup(vdev); 2667 return; 2668 } 2669 n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues); 2670 n->curr_queues = 1; 2671 n->tx_timeout = n->net_conf.txtimer; 2672 2673 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer") 2674 && strcmp(n->net_conf.tx, "bh")) { 2675 warn_report("virtio-net: " 2676 "Unknown option tx=%s, valid options: \"timer\" \"bh\"", 2677 n->net_conf.tx); 2678 error_printf("Defaulting to \"bh\""); 2679 } 2680 2681 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), 2682 n->net_conf.tx_queue_size); 2683 2684 for (i = 0; i < n->max_queues; i++) { 2685 virtio_net_add_queue(n, i); 2686 } 2687 2688 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 2689 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr); 2690 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac)); 2691 n->status = VIRTIO_NET_S_LINK_UP; 2692 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 2693 QEMU_CLOCK_VIRTUAL, 2694 virtio_net_announce_timer, n); 2695 2696 if (n->netclient_type) { 2697 /* 2698 * Happen when virtio_net_set_netclient_name has been called. 2699 */ 2700 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 2701 n->netclient_type, n->netclient_name, n); 2702 } else { 2703 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 2704 object_get_typename(OBJECT(dev)), dev->id, n); 2705 } 2706 2707 peer_test_vnet_hdr(n); 2708 if (peer_has_vnet_hdr(n)) { 2709 for (i = 0; i < n->max_queues; i++) { 2710 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true); 2711 } 2712 n->host_hdr_len = sizeof(struct virtio_net_hdr); 2713 } else { 2714 n->host_hdr_len = 0; 2715 } 2716 2717 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a); 2718 2719 n->vqs[0].tx_waiting = 0; 2720 n->tx_burst = n->net_conf.txburst; 2721 virtio_net_set_mrg_rx_bufs(n, 0, 0); 2722 n->promisc = 1; /* for compatibility */ 2723 2724 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 2725 2726 n->vlans = g_malloc0(MAX_VLAN >> 3); 2727 2728 nc = qemu_get_queue(n->nic); 2729 nc->rxfilter_notify_enabled = 1; 2730 2731 QTAILQ_INIT(&n->rsc_chains); 2732 n->qdev = dev; 2733 } 2734 2735 static void virtio_net_device_unrealize(DeviceState *dev, Error **errp) 2736 { 2737 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 2738 VirtIONet *n = VIRTIO_NET(dev); 2739 int i, max_queues; 2740 2741 /* This will stop vhost backend if appropriate. */ 2742 virtio_net_set_status(vdev, 0); 2743 2744 g_free(n->netclient_name); 2745 n->netclient_name = NULL; 2746 g_free(n->netclient_type); 2747 n->netclient_type = NULL; 2748 2749 g_free(n->mac_table.macs); 2750 g_free(n->vlans); 2751 2752 max_queues = n->multiqueue ? n->max_queues : 1; 2753 for (i = 0; i < max_queues; i++) { 2754 virtio_net_del_queue(n, i); 2755 } 2756 2757 qemu_announce_timer_del(&n->announce_timer); 2758 g_free(n->vqs); 2759 qemu_del_nic(n->nic); 2760 virtio_net_rsc_cleanup(n); 2761 virtio_cleanup(vdev); 2762 } 2763 2764 static void virtio_net_instance_init(Object *obj) 2765 { 2766 VirtIONet *n = VIRTIO_NET(obj); 2767 2768 /* 2769 * The default config_size is sizeof(struct virtio_net_config). 2770 * Can be overriden with virtio_net_set_config_size. 2771 */ 2772 n->config_size = sizeof(struct virtio_net_config); 2773 device_add_bootindex_property(obj, &n->nic_conf.bootindex, 2774 "bootindex", "/ethernet-phy@0", 2775 DEVICE(n), NULL); 2776 } 2777 2778 static int virtio_net_pre_save(void *opaque) 2779 { 2780 VirtIONet *n = opaque; 2781 2782 /* At this point, backend must be stopped, otherwise 2783 * it might keep writing to memory. */ 2784 assert(!n->vhost_started); 2785 2786 return 0; 2787 } 2788 2789 static const VMStateDescription vmstate_virtio_net = { 2790 .name = "virtio-net", 2791 .minimum_version_id = VIRTIO_NET_VM_VERSION, 2792 .version_id = VIRTIO_NET_VM_VERSION, 2793 .fields = (VMStateField[]) { 2794 VMSTATE_VIRTIO_DEVICE, 2795 VMSTATE_END_OF_LIST() 2796 }, 2797 .pre_save = virtio_net_pre_save, 2798 }; 2799 2800 static Property virtio_net_properties[] = { 2801 DEFINE_PROP_BIT64("csum", VirtIONet, host_features, 2802 VIRTIO_NET_F_CSUM, true), 2803 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features, 2804 VIRTIO_NET_F_GUEST_CSUM, true), 2805 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), 2806 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features, 2807 VIRTIO_NET_F_GUEST_TSO4, true), 2808 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features, 2809 VIRTIO_NET_F_GUEST_TSO6, true), 2810 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features, 2811 VIRTIO_NET_F_GUEST_ECN, true), 2812 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features, 2813 VIRTIO_NET_F_GUEST_UFO, true), 2814 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features, 2815 VIRTIO_NET_F_GUEST_ANNOUNCE, true), 2816 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features, 2817 VIRTIO_NET_F_HOST_TSO4, true), 2818 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features, 2819 VIRTIO_NET_F_HOST_TSO6, true), 2820 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features, 2821 VIRTIO_NET_F_HOST_ECN, true), 2822 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features, 2823 VIRTIO_NET_F_HOST_UFO, true), 2824 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features, 2825 VIRTIO_NET_F_MRG_RXBUF, true), 2826 DEFINE_PROP_BIT64("status", VirtIONet, host_features, 2827 VIRTIO_NET_F_STATUS, true), 2828 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features, 2829 VIRTIO_NET_F_CTRL_VQ, true), 2830 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features, 2831 VIRTIO_NET_F_CTRL_RX, true), 2832 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features, 2833 VIRTIO_NET_F_CTRL_VLAN, true), 2834 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features, 2835 VIRTIO_NET_F_CTRL_RX_EXTRA, true), 2836 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features, 2837 VIRTIO_NET_F_CTRL_MAC_ADDR, true), 2838 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features, 2839 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), 2840 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), 2841 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features, 2842 VIRTIO_NET_F_RSC_EXT, false), 2843 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout, 2844 VIRTIO_NET_RSC_DEFAULT_INTERVAL), 2845 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf), 2846 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer, 2847 TX_TIMER_INTERVAL), 2848 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), 2849 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), 2850 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, 2851 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), 2852 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, 2853 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), 2854 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), 2855 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, 2856 true), 2857 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN), 2858 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str), 2859 DEFINE_PROP_END_OF_LIST(), 2860 }; 2861 2862 static void virtio_net_class_init(ObjectClass *klass, void *data) 2863 { 2864 DeviceClass *dc = DEVICE_CLASS(klass); 2865 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 2866 2867 dc->props = virtio_net_properties; 2868 dc->vmsd = &vmstate_virtio_net; 2869 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 2870 vdc->realize = virtio_net_device_realize; 2871 vdc->unrealize = virtio_net_device_unrealize; 2872 vdc->get_config = virtio_net_get_config; 2873 vdc->set_config = virtio_net_set_config; 2874 vdc->get_features = virtio_net_get_features; 2875 vdc->set_features = virtio_net_set_features; 2876 vdc->bad_features = virtio_net_bad_features; 2877 vdc->reset = virtio_net_reset; 2878 vdc->set_status = virtio_net_set_status; 2879 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; 2880 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; 2881 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO); 2882 vdc->vmsd = &vmstate_virtio_net_device; 2883 } 2884 2885 static const TypeInfo virtio_net_info = { 2886 .name = TYPE_VIRTIO_NET, 2887 .parent = TYPE_VIRTIO_DEVICE, 2888 .instance_size = sizeof(VirtIONet), 2889 .instance_init = virtio_net_instance_init, 2890 .class_init = virtio_net_class_init, 2891 }; 2892 2893 static void virtio_register_types(void) 2894 { 2895 type_register_static(&virtio_net_info); 2896 } 2897 2898 type_init(virtio_register_types) 2899