1 /* 2 * Virtio Network Device 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qemu/atomic.h" 16 #include "qemu/iov.h" 17 #include "qemu/main-loop.h" 18 #include "qemu/module.h" 19 #include "hw/virtio/virtio.h" 20 #include "net/net.h" 21 #include "net/checksum.h" 22 #include "net/tap.h" 23 #include "qemu/error-report.h" 24 #include "qemu/timer.h" 25 #include "qemu/option.h" 26 #include "qemu/option_int.h" 27 #include "qemu/config-file.h" 28 #include "qapi/qmp/qdict.h" 29 #include "hw/virtio/virtio-net.h" 30 #include "net/vhost_net.h" 31 #include "net/announce.h" 32 #include "hw/virtio/virtio-bus.h" 33 #include "qapi/error.h" 34 #include "qapi/qapi-events-net.h" 35 #include "hw/qdev-properties.h" 36 #include "qapi/qapi-types-migration.h" 37 #include "qapi/qapi-events-migration.h" 38 #include "hw/virtio/virtio-access.h" 39 #include "migration/misc.h" 40 #include "standard-headers/linux/ethtool.h" 41 #include "sysemu/sysemu.h" 42 #include "trace.h" 43 #include "monitor/qdev.h" 44 #include "hw/pci/pci.h" 45 #include "net_rx_pkt.h" 46 #include "hw/virtio/vhost.h" 47 48 #define VIRTIO_NET_VM_VERSION 11 49 50 #define MAC_TABLE_ENTRIES 64 51 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ 52 53 /* previously fixed value */ 54 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 55 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 56 57 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */ 58 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 59 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 60 61 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ 62 63 #define VIRTIO_NET_TCP_FLAG 0x3F 64 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000 65 66 /* IPv4 max payload, 16 bits in the header */ 67 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header)) 68 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535 69 70 /* header length value in ip header without option */ 71 #define VIRTIO_NET_IP4_HEADER_LENGTH 5 72 73 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ 74 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD 75 76 /* Purge coalesced packets timer interval, This value affects the performance 77 a lot, and should be tuned carefully, '300000'(300us) is the recommended 78 value to pass the WHQL test, '50000' can gain 2x netperf throughput with 79 tso/gso/gro 'off'. */ 80 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000 81 82 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \ 83 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \ 84 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \ 85 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \ 86 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \ 87 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \ 88 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \ 89 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \ 90 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) 91 92 static const VirtIOFeature feature_sizes[] = { 93 {.flags = 1ULL << VIRTIO_NET_F_MAC, 94 .end = endof(struct virtio_net_config, mac)}, 95 {.flags = 1ULL << VIRTIO_NET_F_STATUS, 96 .end = endof(struct virtio_net_config, status)}, 97 {.flags = 1ULL << VIRTIO_NET_F_MQ, 98 .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, 99 {.flags = 1ULL << VIRTIO_NET_F_MTU, 100 .end = endof(struct virtio_net_config, mtu)}, 101 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, 102 .end = endof(struct virtio_net_config, duplex)}, 103 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT), 104 .end = endof(struct virtio_net_config, supported_hash_types)}, 105 {} 106 }; 107 108 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc) 109 { 110 VirtIONet *n = qemu_get_nic_opaque(nc); 111 112 return &n->vqs[nc->queue_index]; 113 } 114 115 static int vq2q(int queue_index) 116 { 117 return queue_index / 2; 118 } 119 120 /* TODO 121 * - we could suppress RX interrupt if we were so inclined. 122 */ 123 124 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) 125 { 126 VirtIONet *n = VIRTIO_NET(vdev); 127 struct virtio_net_config netcfg; 128 NetClientState *nc = qemu_get_queue(n->nic); 129 static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } }; 130 131 int ret = 0; 132 memset(&netcfg, 0 , sizeof(struct virtio_net_config)); 133 virtio_stw_p(vdev, &netcfg.status, n->status); 134 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs); 135 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); 136 memcpy(netcfg.mac, n->mac, ETH_ALEN); 137 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); 138 netcfg.duplex = n->net_conf.duplex; 139 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE; 140 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length, 141 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ? 142 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1); 143 virtio_stl_p(vdev, &netcfg.supported_hash_types, 144 VIRTIO_NET_RSS_SUPPORTED_HASHES); 145 memcpy(config, &netcfg, n->config_size); 146 147 /* 148 * Is this VDPA? No peer means not VDPA: there's no way to 149 * disconnect/reconnect a VDPA peer. 150 */ 151 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 152 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg, 153 n->config_size); 154 if (ret != -1) { 155 /* 156 * Some NIC/kernel combinations present 0 as the mac address. As 157 * that is not a legal address, try to proceed with the 158 * address from the QEMU command line in the hope that the 159 * address has been configured correctly elsewhere - just not 160 * reported by the device. 161 */ 162 if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) { 163 info_report("Zero hardware mac address detected. Ignoring."); 164 memcpy(netcfg.mac, n->mac, ETH_ALEN); 165 } 166 memcpy(config, &netcfg, n->config_size); 167 } 168 } 169 } 170 171 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) 172 { 173 VirtIONet *n = VIRTIO_NET(vdev); 174 struct virtio_net_config netcfg = {}; 175 NetClientState *nc = qemu_get_queue(n->nic); 176 177 memcpy(&netcfg, config, n->config_size); 178 179 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) && 180 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) && 181 memcmp(netcfg.mac, n->mac, ETH_ALEN)) { 182 memcpy(n->mac, netcfg.mac, ETH_ALEN); 183 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 184 } 185 186 /* 187 * Is this VDPA? No peer means not VDPA: there's no way to 188 * disconnect/reconnect a VDPA peer. 189 */ 190 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 191 vhost_net_set_config(get_vhost_net(nc->peer), 192 (uint8_t *)&netcfg, 0, n->config_size, 193 VHOST_SET_CONFIG_TYPE_MASTER); 194 } 195 } 196 197 static bool virtio_net_started(VirtIONet *n, uint8_t status) 198 { 199 VirtIODevice *vdev = VIRTIO_DEVICE(n); 200 return (status & VIRTIO_CONFIG_S_DRIVER_OK) && 201 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running; 202 } 203 204 static void virtio_net_announce_notify(VirtIONet *net) 205 { 206 VirtIODevice *vdev = VIRTIO_DEVICE(net); 207 trace_virtio_net_announce_notify(); 208 209 net->status |= VIRTIO_NET_S_ANNOUNCE; 210 virtio_notify_config(vdev); 211 } 212 213 static void virtio_net_announce_timer(void *opaque) 214 { 215 VirtIONet *n = opaque; 216 trace_virtio_net_announce_timer(n->announce_timer.round); 217 218 n->announce_timer.round--; 219 virtio_net_announce_notify(n); 220 } 221 222 static void virtio_net_announce(NetClientState *nc) 223 { 224 VirtIONet *n = qemu_get_nic_opaque(nc); 225 VirtIODevice *vdev = VIRTIO_DEVICE(n); 226 227 /* 228 * Make sure the virtio migration announcement timer isn't running 229 * If it is, let it trigger announcement so that we do not cause 230 * confusion. 231 */ 232 if (n->announce_timer.round) { 233 return; 234 } 235 236 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 237 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 238 virtio_net_announce_notify(n); 239 } 240 } 241 242 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) 243 { 244 VirtIODevice *vdev = VIRTIO_DEVICE(n); 245 NetClientState *nc = qemu_get_queue(n->nic); 246 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 247 int cvq = n->max_ncs - n->max_queue_pairs; 248 249 if (!get_vhost_net(nc->peer)) { 250 return; 251 } 252 253 if ((virtio_net_started(n, status) && !nc->peer->link_down) == 254 !!n->vhost_started) { 255 return; 256 } 257 if (!n->vhost_started) { 258 int r, i; 259 260 if (n->needs_vnet_hdr_swap) { 261 error_report("backend does not support %s vnet headers; " 262 "falling back on userspace virtio", 263 virtio_is_big_endian(vdev) ? "BE" : "LE"); 264 return; 265 } 266 267 /* Any packets outstanding? Purge them to avoid touching rings 268 * when vhost is running. 269 */ 270 for (i = 0; i < queue_pairs; i++) { 271 NetClientState *qnc = qemu_get_subqueue(n->nic, i); 272 273 /* Purge both directions: TX and RX. */ 274 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc); 275 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer); 276 } 277 278 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) { 279 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu); 280 if (r < 0) { 281 error_report("%uBytes MTU not supported by the backend", 282 n->net_conf.mtu); 283 284 return; 285 } 286 } 287 288 n->vhost_started = 1; 289 r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq); 290 if (r < 0) { 291 error_report("unable to start vhost net: %d: " 292 "falling back on userspace virtio", -r); 293 n->vhost_started = 0; 294 } 295 } else { 296 vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq); 297 n->vhost_started = 0; 298 } 299 } 300 301 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev, 302 NetClientState *peer, 303 bool enable) 304 { 305 if (virtio_is_big_endian(vdev)) { 306 return qemu_set_vnet_be(peer, enable); 307 } else { 308 return qemu_set_vnet_le(peer, enable); 309 } 310 } 311 312 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs, 313 int queue_pairs, bool enable) 314 { 315 int i; 316 317 for (i = 0; i < queue_pairs; i++) { 318 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 && 319 enable) { 320 while (--i >= 0) { 321 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false); 322 } 323 324 return true; 325 } 326 } 327 328 return false; 329 } 330 331 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status) 332 { 333 VirtIODevice *vdev = VIRTIO_DEVICE(n); 334 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 335 336 if (virtio_net_started(n, status)) { 337 /* Before using the device, we tell the network backend about the 338 * endianness to use when parsing vnet headers. If the backend 339 * can't do it, we fallback onto fixing the headers in the core 340 * virtio-net code. 341 */ 342 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs, 343 queue_pairs, true); 344 } else if (virtio_net_started(n, vdev->status)) { 345 /* After using the device, we need to reset the network backend to 346 * the default (guest native endianness), otherwise the guest may 347 * lose network connectivity if it is rebooted into a different 348 * endianness. 349 */ 350 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false); 351 } 352 } 353 354 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq) 355 { 356 unsigned int dropped = virtqueue_drop_all(vq); 357 if (dropped) { 358 virtio_notify(vdev, vq); 359 } 360 } 361 362 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) 363 { 364 VirtIONet *n = VIRTIO_NET(vdev); 365 VirtIONetQueue *q; 366 int i; 367 uint8_t queue_status; 368 369 virtio_net_vnet_endian_status(n, status); 370 virtio_net_vhost_status(n, status); 371 372 for (i = 0; i < n->max_queue_pairs; i++) { 373 NetClientState *ncs = qemu_get_subqueue(n->nic, i); 374 bool queue_started; 375 q = &n->vqs[i]; 376 377 if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) { 378 queue_status = 0; 379 } else { 380 queue_status = status; 381 } 382 queue_started = 383 virtio_net_started(n, queue_status) && !n->vhost_started; 384 385 if (queue_started) { 386 qemu_flush_queued_packets(ncs); 387 } 388 389 if (!q->tx_waiting) { 390 continue; 391 } 392 393 if (queue_started) { 394 if (q->tx_timer) { 395 timer_mod(q->tx_timer, 396 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 397 } else { 398 qemu_bh_schedule(q->tx_bh); 399 } 400 } else { 401 if (q->tx_timer) { 402 timer_del(q->tx_timer); 403 } else { 404 qemu_bh_cancel(q->tx_bh); 405 } 406 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 && 407 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) && 408 vdev->vm_running) { 409 /* if tx is waiting we are likely have some packets in tx queue 410 * and disabled notification */ 411 q->tx_waiting = 0; 412 virtio_queue_set_notification(q->tx_vq, 1); 413 virtio_net_drop_tx_queue_data(vdev, q->tx_vq); 414 } 415 } 416 } 417 } 418 419 static void virtio_net_set_link_status(NetClientState *nc) 420 { 421 VirtIONet *n = qemu_get_nic_opaque(nc); 422 VirtIODevice *vdev = VIRTIO_DEVICE(n); 423 uint16_t old_status = n->status; 424 425 if (nc->link_down) 426 n->status &= ~VIRTIO_NET_S_LINK_UP; 427 else 428 n->status |= VIRTIO_NET_S_LINK_UP; 429 430 if (n->status != old_status) 431 virtio_notify_config(vdev); 432 433 virtio_net_set_status(vdev, vdev->status); 434 } 435 436 static void rxfilter_notify(NetClientState *nc) 437 { 438 VirtIONet *n = qemu_get_nic_opaque(nc); 439 440 if (nc->rxfilter_notify_enabled) { 441 char *path = object_get_canonical_path(OBJECT(n->qdev)); 442 qapi_event_send_nic_rx_filter_changed(!!n->netclient_name, 443 n->netclient_name, path); 444 g_free(path); 445 446 /* disable event notification to avoid events flooding */ 447 nc->rxfilter_notify_enabled = 0; 448 } 449 } 450 451 static intList *get_vlan_table(VirtIONet *n) 452 { 453 intList *list; 454 int i, j; 455 456 list = NULL; 457 for (i = 0; i < MAX_VLAN >> 5; i++) { 458 for (j = 0; n->vlans[i] && j <= 0x1f; j++) { 459 if (n->vlans[i] & (1U << j)) { 460 QAPI_LIST_PREPEND(list, (i << 5) + j); 461 } 462 } 463 } 464 465 return list; 466 } 467 468 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc) 469 { 470 VirtIONet *n = qemu_get_nic_opaque(nc); 471 VirtIODevice *vdev = VIRTIO_DEVICE(n); 472 RxFilterInfo *info; 473 strList *str_list; 474 int i; 475 476 info = g_malloc0(sizeof(*info)); 477 info->name = g_strdup(nc->name); 478 info->promiscuous = n->promisc; 479 480 if (n->nouni) { 481 info->unicast = RX_STATE_NONE; 482 } else if (n->alluni) { 483 info->unicast = RX_STATE_ALL; 484 } else { 485 info->unicast = RX_STATE_NORMAL; 486 } 487 488 if (n->nomulti) { 489 info->multicast = RX_STATE_NONE; 490 } else if (n->allmulti) { 491 info->multicast = RX_STATE_ALL; 492 } else { 493 info->multicast = RX_STATE_NORMAL; 494 } 495 496 info->broadcast_allowed = n->nobcast; 497 info->multicast_overflow = n->mac_table.multi_overflow; 498 info->unicast_overflow = n->mac_table.uni_overflow; 499 500 info->main_mac = qemu_mac_strdup_printf(n->mac); 501 502 str_list = NULL; 503 for (i = 0; i < n->mac_table.first_multi; i++) { 504 QAPI_LIST_PREPEND(str_list, 505 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 506 } 507 info->unicast_table = str_list; 508 509 str_list = NULL; 510 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 511 QAPI_LIST_PREPEND(str_list, 512 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 513 } 514 info->multicast_table = str_list; 515 info->vlan_table = get_vlan_table(n); 516 517 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) { 518 info->vlan = RX_STATE_ALL; 519 } else if (!info->vlan_table) { 520 info->vlan = RX_STATE_NONE; 521 } else { 522 info->vlan = RX_STATE_NORMAL; 523 } 524 525 /* enable event notification after query */ 526 nc->rxfilter_notify_enabled = 1; 527 528 return info; 529 } 530 531 static void virtio_net_reset(VirtIODevice *vdev) 532 { 533 VirtIONet *n = VIRTIO_NET(vdev); 534 int i; 535 536 /* Reset back to compatibility mode */ 537 n->promisc = 1; 538 n->allmulti = 0; 539 n->alluni = 0; 540 n->nomulti = 0; 541 n->nouni = 0; 542 n->nobcast = 0; 543 /* multiqueue is disabled by default */ 544 n->curr_queue_pairs = 1; 545 timer_del(n->announce_timer.tm); 546 n->announce_timer.round = 0; 547 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 548 549 /* Flush any MAC and VLAN filter table state */ 550 n->mac_table.in_use = 0; 551 n->mac_table.first_multi = 0; 552 n->mac_table.multi_overflow = 0; 553 n->mac_table.uni_overflow = 0; 554 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN); 555 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac)); 556 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 557 memset(n->vlans, 0, MAX_VLAN >> 3); 558 559 /* Flush any async TX */ 560 for (i = 0; i < n->max_queue_pairs; i++) { 561 NetClientState *nc = qemu_get_subqueue(n->nic, i); 562 563 if (nc->peer) { 564 qemu_flush_or_purge_queued_packets(nc->peer, true); 565 assert(!virtio_net_get_subqueue(nc)->async_tx.elem); 566 } 567 } 568 } 569 570 static void peer_test_vnet_hdr(VirtIONet *n) 571 { 572 NetClientState *nc = qemu_get_queue(n->nic); 573 if (!nc->peer) { 574 return; 575 } 576 577 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer); 578 } 579 580 static int peer_has_vnet_hdr(VirtIONet *n) 581 { 582 return n->has_vnet_hdr; 583 } 584 585 static int peer_has_ufo(VirtIONet *n) 586 { 587 if (!peer_has_vnet_hdr(n)) 588 return 0; 589 590 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer); 591 592 return n->has_ufo; 593 } 594 595 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, 596 int version_1, int hash_report) 597 { 598 int i; 599 NetClientState *nc; 600 601 n->mergeable_rx_bufs = mergeable_rx_bufs; 602 603 if (version_1) { 604 n->guest_hdr_len = hash_report ? 605 sizeof(struct virtio_net_hdr_v1_hash) : 606 sizeof(struct virtio_net_hdr_mrg_rxbuf); 607 n->rss_data.populate_hash = !!hash_report; 608 } else { 609 n->guest_hdr_len = n->mergeable_rx_bufs ? 610 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 611 sizeof(struct virtio_net_hdr); 612 } 613 614 for (i = 0; i < n->max_queue_pairs; i++) { 615 nc = qemu_get_subqueue(n->nic, i); 616 617 if (peer_has_vnet_hdr(n) && 618 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) { 619 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len); 620 n->host_hdr_len = n->guest_hdr_len; 621 } 622 } 623 } 624 625 static int virtio_net_max_tx_queue_size(VirtIONet *n) 626 { 627 NetClientState *peer = n->nic_conf.peers.ncs[0]; 628 629 /* 630 * Backends other than vhost-user don't support max queue size. 631 */ 632 if (!peer) { 633 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 634 } 635 636 if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) { 637 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 638 } 639 640 return VIRTQUEUE_MAX_SIZE; 641 } 642 643 static int peer_attach(VirtIONet *n, int index) 644 { 645 NetClientState *nc = qemu_get_subqueue(n->nic, index); 646 647 if (!nc->peer) { 648 return 0; 649 } 650 651 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 652 vhost_set_vring_enable(nc->peer, 1); 653 } 654 655 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 656 return 0; 657 } 658 659 if (n->max_queue_pairs == 1) { 660 return 0; 661 } 662 663 return tap_enable(nc->peer); 664 } 665 666 static int peer_detach(VirtIONet *n, int index) 667 { 668 NetClientState *nc = qemu_get_subqueue(n->nic, index); 669 670 if (!nc->peer) { 671 return 0; 672 } 673 674 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 675 vhost_set_vring_enable(nc->peer, 0); 676 } 677 678 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 679 return 0; 680 } 681 682 return tap_disable(nc->peer); 683 } 684 685 static void virtio_net_set_queue_pairs(VirtIONet *n) 686 { 687 int i; 688 int r; 689 690 if (n->nic->peer_deleted) { 691 return; 692 } 693 694 for (i = 0; i < n->max_queue_pairs; i++) { 695 if (i < n->curr_queue_pairs) { 696 r = peer_attach(n, i); 697 assert(!r); 698 } else { 699 r = peer_detach(n, i); 700 assert(!r); 701 } 702 } 703 } 704 705 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue); 706 707 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, 708 Error **errp) 709 { 710 VirtIONet *n = VIRTIO_NET(vdev); 711 NetClientState *nc = qemu_get_queue(n->nic); 712 713 /* Firstly sync all virtio-net possible supported features */ 714 features |= n->host_features; 715 716 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 717 718 if (!peer_has_vnet_hdr(n)) { 719 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); 720 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); 721 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); 722 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); 723 724 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); 725 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); 726 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); 727 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); 728 729 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); 730 } 731 732 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { 733 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); 734 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); 735 } 736 737 if (!get_vhost_net(nc->peer)) { 738 return features; 739 } 740 741 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 742 virtio_clear_feature(&features, VIRTIO_NET_F_RSS); 743 } 744 features = vhost_net_get_features(get_vhost_net(nc->peer), features); 745 vdev->backend_features = features; 746 747 if (n->mtu_bypass_backend && 748 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { 749 features |= (1ULL << VIRTIO_NET_F_MTU); 750 } 751 752 return features; 753 } 754 755 static uint64_t virtio_net_bad_features(VirtIODevice *vdev) 756 { 757 uint64_t features = 0; 758 759 /* Linux kernel 2.6.25. It understood MAC (as everyone must), 760 * but also these: */ 761 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 762 virtio_add_feature(&features, VIRTIO_NET_F_CSUM); 763 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4); 764 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6); 765 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN); 766 767 return features; 768 } 769 770 static void virtio_net_apply_guest_offloads(VirtIONet *n) 771 { 772 qemu_set_offload(qemu_get_queue(n->nic)->peer, 773 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)), 774 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)), 775 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)), 776 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)), 777 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO))); 778 } 779 780 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features) 781 { 782 static const uint64_t guest_offloads_mask = 783 (1ULL << VIRTIO_NET_F_GUEST_CSUM) | 784 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | 785 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | 786 (1ULL << VIRTIO_NET_F_GUEST_ECN) | 787 (1ULL << VIRTIO_NET_F_GUEST_UFO); 788 789 return guest_offloads_mask & features; 790 } 791 792 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) 793 { 794 VirtIODevice *vdev = VIRTIO_DEVICE(n); 795 return virtio_net_guest_offloads_by_features(vdev->guest_features); 796 } 797 798 typedef struct { 799 VirtIONet *n; 800 DeviceState *dev; 801 } FailoverDevice; 802 803 /** 804 * Set the failover primary device 805 * 806 * @opaque: FailoverId to setup 807 * @opts: opts for device we are handling 808 * @errp: returns an error if this function fails 809 */ 810 static int failover_set_primary(DeviceState *dev, void *opaque) 811 { 812 FailoverDevice *fdev = opaque; 813 PCIDevice *pci_dev = (PCIDevice *) 814 object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE); 815 816 if (!pci_dev) { 817 return 0; 818 } 819 820 if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) { 821 fdev->dev = dev; 822 return 1; 823 } 824 825 return 0; 826 } 827 828 /** 829 * Find the primary device for this failover virtio-net 830 * 831 * @n: VirtIONet device 832 * @errp: returns an error if this function fails 833 */ 834 static DeviceState *failover_find_primary_device(VirtIONet *n) 835 { 836 FailoverDevice fdev = { 837 .n = n, 838 }; 839 840 qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL, 841 NULL, NULL, &fdev); 842 return fdev.dev; 843 } 844 845 static void failover_add_primary(VirtIONet *n, Error **errp) 846 { 847 Error *err = NULL; 848 DeviceState *dev = failover_find_primary_device(n); 849 850 if (dev) { 851 return; 852 } 853 854 if (!n->primary_opts) { 855 error_setg(errp, "Primary device not found"); 856 error_append_hint(errp, "Virtio-net failover will not work. Make " 857 "sure primary device has parameter" 858 " failover_pair_id=%s\n", n->netclient_name); 859 return; 860 } 861 862 dev = qdev_device_add_from_qdict(n->primary_opts, 863 n->primary_opts_from_json, 864 &err); 865 if (err) { 866 qobject_unref(n->primary_opts); 867 n->primary_opts = NULL; 868 } else { 869 object_unref(OBJECT(dev)); 870 } 871 error_propagate(errp, err); 872 } 873 874 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) 875 { 876 VirtIONet *n = VIRTIO_NET(vdev); 877 Error *err = NULL; 878 int i; 879 880 if (n->mtu_bypass_backend && 881 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) { 882 features &= ~(1ULL << VIRTIO_NET_F_MTU); 883 } 884 885 virtio_net_set_multiqueue(n, 886 virtio_has_feature(features, VIRTIO_NET_F_RSS) || 887 virtio_has_feature(features, VIRTIO_NET_F_MQ)); 888 889 virtio_net_set_mrg_rx_bufs(n, 890 virtio_has_feature(features, 891 VIRTIO_NET_F_MRG_RXBUF), 892 virtio_has_feature(features, 893 VIRTIO_F_VERSION_1), 894 virtio_has_feature(features, 895 VIRTIO_NET_F_HASH_REPORT)); 896 897 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 898 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4); 899 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 900 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6); 901 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS); 902 903 if (n->has_vnet_hdr) { 904 n->curr_guest_offloads = 905 virtio_net_guest_offloads_by_features(features); 906 virtio_net_apply_guest_offloads(n); 907 } 908 909 for (i = 0; i < n->max_queue_pairs; i++) { 910 NetClientState *nc = qemu_get_subqueue(n->nic, i); 911 912 if (!get_vhost_net(nc->peer)) { 913 continue; 914 } 915 vhost_net_ack_features(get_vhost_net(nc->peer), features); 916 } 917 918 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { 919 memset(n->vlans, 0, MAX_VLAN >> 3); 920 } else { 921 memset(n->vlans, 0xff, MAX_VLAN >> 3); 922 } 923 924 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) { 925 qapi_event_send_failover_negotiated(n->netclient_name); 926 qatomic_set(&n->failover_primary_hidden, false); 927 failover_add_primary(n, &err); 928 if (err) { 929 warn_report_err(err); 930 } 931 } 932 } 933 934 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd, 935 struct iovec *iov, unsigned int iov_cnt) 936 { 937 uint8_t on; 938 size_t s; 939 NetClientState *nc = qemu_get_queue(n->nic); 940 941 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on)); 942 if (s != sizeof(on)) { 943 return VIRTIO_NET_ERR; 944 } 945 946 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) { 947 n->promisc = on; 948 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) { 949 n->allmulti = on; 950 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) { 951 n->alluni = on; 952 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) { 953 n->nomulti = on; 954 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) { 955 n->nouni = on; 956 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) { 957 n->nobcast = on; 958 } else { 959 return VIRTIO_NET_ERR; 960 } 961 962 rxfilter_notify(nc); 963 964 return VIRTIO_NET_OK; 965 } 966 967 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd, 968 struct iovec *iov, unsigned int iov_cnt) 969 { 970 VirtIODevice *vdev = VIRTIO_DEVICE(n); 971 uint64_t offloads; 972 size_t s; 973 974 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 975 return VIRTIO_NET_ERR; 976 } 977 978 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads)); 979 if (s != sizeof(offloads)) { 980 return VIRTIO_NET_ERR; 981 } 982 983 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) { 984 uint64_t supported_offloads; 985 986 offloads = virtio_ldq_p(vdev, &offloads); 987 988 if (!n->has_vnet_hdr) { 989 return VIRTIO_NET_ERR; 990 } 991 992 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 993 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4); 994 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 995 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6); 996 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT); 997 998 supported_offloads = virtio_net_supported_guest_offloads(n); 999 if (offloads & ~supported_offloads) { 1000 return VIRTIO_NET_ERR; 1001 } 1002 1003 n->curr_guest_offloads = offloads; 1004 virtio_net_apply_guest_offloads(n); 1005 1006 return VIRTIO_NET_OK; 1007 } else { 1008 return VIRTIO_NET_ERR; 1009 } 1010 } 1011 1012 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd, 1013 struct iovec *iov, unsigned int iov_cnt) 1014 { 1015 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1016 struct virtio_net_ctrl_mac mac_data; 1017 size_t s; 1018 NetClientState *nc = qemu_get_queue(n->nic); 1019 1020 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) { 1021 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) { 1022 return VIRTIO_NET_ERR; 1023 } 1024 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac)); 1025 assert(s == sizeof(n->mac)); 1026 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 1027 rxfilter_notify(nc); 1028 1029 return VIRTIO_NET_OK; 1030 } 1031 1032 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) { 1033 return VIRTIO_NET_ERR; 1034 } 1035 1036 int in_use = 0; 1037 int first_multi = 0; 1038 uint8_t uni_overflow = 0; 1039 uint8_t multi_overflow = 0; 1040 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 1041 1042 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1043 sizeof(mac_data.entries)); 1044 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1045 if (s != sizeof(mac_data.entries)) { 1046 goto error; 1047 } 1048 iov_discard_front(&iov, &iov_cnt, s); 1049 1050 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) { 1051 goto error; 1052 } 1053 1054 if (mac_data.entries <= MAC_TABLE_ENTRIES) { 1055 s = iov_to_buf(iov, iov_cnt, 0, macs, 1056 mac_data.entries * ETH_ALEN); 1057 if (s != mac_data.entries * ETH_ALEN) { 1058 goto error; 1059 } 1060 in_use += mac_data.entries; 1061 } else { 1062 uni_overflow = 1; 1063 } 1064 1065 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN); 1066 1067 first_multi = in_use; 1068 1069 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1070 sizeof(mac_data.entries)); 1071 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1072 if (s != sizeof(mac_data.entries)) { 1073 goto error; 1074 } 1075 1076 iov_discard_front(&iov, &iov_cnt, s); 1077 1078 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) { 1079 goto error; 1080 } 1081 1082 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) { 1083 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN], 1084 mac_data.entries * ETH_ALEN); 1085 if (s != mac_data.entries * ETH_ALEN) { 1086 goto error; 1087 } 1088 in_use += mac_data.entries; 1089 } else { 1090 multi_overflow = 1; 1091 } 1092 1093 n->mac_table.in_use = in_use; 1094 n->mac_table.first_multi = first_multi; 1095 n->mac_table.uni_overflow = uni_overflow; 1096 n->mac_table.multi_overflow = multi_overflow; 1097 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN); 1098 g_free(macs); 1099 rxfilter_notify(nc); 1100 1101 return VIRTIO_NET_OK; 1102 1103 error: 1104 g_free(macs); 1105 return VIRTIO_NET_ERR; 1106 } 1107 1108 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd, 1109 struct iovec *iov, unsigned int iov_cnt) 1110 { 1111 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1112 uint16_t vid; 1113 size_t s; 1114 NetClientState *nc = qemu_get_queue(n->nic); 1115 1116 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid)); 1117 vid = virtio_lduw_p(vdev, &vid); 1118 if (s != sizeof(vid)) { 1119 return VIRTIO_NET_ERR; 1120 } 1121 1122 if (vid >= MAX_VLAN) 1123 return VIRTIO_NET_ERR; 1124 1125 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD) 1126 n->vlans[vid >> 5] |= (1U << (vid & 0x1f)); 1127 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL) 1128 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f)); 1129 else 1130 return VIRTIO_NET_ERR; 1131 1132 rxfilter_notify(nc); 1133 1134 return VIRTIO_NET_OK; 1135 } 1136 1137 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, 1138 struct iovec *iov, unsigned int iov_cnt) 1139 { 1140 trace_virtio_net_handle_announce(n->announce_timer.round); 1141 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK && 1142 n->status & VIRTIO_NET_S_ANNOUNCE) { 1143 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 1144 if (n->announce_timer.round) { 1145 qemu_announce_timer_step(&n->announce_timer); 1146 } 1147 return VIRTIO_NET_OK; 1148 } else { 1149 return VIRTIO_NET_ERR; 1150 } 1151 } 1152 1153 static void virtio_net_detach_epbf_rss(VirtIONet *n); 1154 1155 static void virtio_net_disable_rss(VirtIONet *n) 1156 { 1157 if (n->rss_data.enabled) { 1158 trace_virtio_net_rss_disable(); 1159 } 1160 n->rss_data.enabled = false; 1161 1162 virtio_net_detach_epbf_rss(n); 1163 } 1164 1165 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd) 1166 { 1167 NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0); 1168 if (nc == NULL || nc->info->set_steering_ebpf == NULL) { 1169 return false; 1170 } 1171 1172 return nc->info->set_steering_ebpf(nc, prog_fd); 1173 } 1174 1175 static void rss_data_to_rss_config(struct VirtioNetRssData *data, 1176 struct EBPFRSSConfig *config) 1177 { 1178 config->redirect = data->redirect; 1179 config->populate_hash = data->populate_hash; 1180 config->hash_types = data->hash_types; 1181 config->indirections_len = data->indirections_len; 1182 config->default_queue = data->default_queue; 1183 } 1184 1185 static bool virtio_net_attach_epbf_rss(VirtIONet *n) 1186 { 1187 struct EBPFRSSConfig config = {}; 1188 1189 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 1190 return false; 1191 } 1192 1193 rss_data_to_rss_config(&n->rss_data, &config); 1194 1195 if (!ebpf_rss_set_all(&n->ebpf_rss, &config, 1196 n->rss_data.indirections_table, n->rss_data.key)) { 1197 return false; 1198 } 1199 1200 if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) { 1201 return false; 1202 } 1203 1204 return true; 1205 } 1206 1207 static void virtio_net_detach_epbf_rss(VirtIONet *n) 1208 { 1209 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1210 } 1211 1212 static bool virtio_net_load_ebpf(VirtIONet *n) 1213 { 1214 if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) { 1215 /* backend does't support steering ebpf */ 1216 return false; 1217 } 1218 1219 return ebpf_rss_load(&n->ebpf_rss); 1220 } 1221 1222 static void virtio_net_unload_ebpf(VirtIONet *n) 1223 { 1224 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1225 ebpf_rss_unload(&n->ebpf_rss); 1226 } 1227 1228 static uint16_t virtio_net_handle_rss(VirtIONet *n, 1229 struct iovec *iov, 1230 unsigned int iov_cnt, 1231 bool do_rss) 1232 { 1233 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1234 struct virtio_net_rss_config cfg; 1235 size_t s, offset = 0, size_get; 1236 uint16_t queue_pairs, i; 1237 struct { 1238 uint16_t us; 1239 uint8_t b; 1240 } QEMU_PACKED temp; 1241 const char *err_msg = ""; 1242 uint32_t err_value = 0; 1243 1244 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) { 1245 err_msg = "RSS is not negotiated"; 1246 goto error; 1247 } 1248 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) { 1249 err_msg = "Hash report is not negotiated"; 1250 goto error; 1251 } 1252 size_get = offsetof(struct virtio_net_rss_config, indirection_table); 1253 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get); 1254 if (s != size_get) { 1255 err_msg = "Short command buffer"; 1256 err_value = (uint32_t)s; 1257 goto error; 1258 } 1259 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types); 1260 n->rss_data.indirections_len = 1261 virtio_lduw_p(vdev, &cfg.indirection_table_mask); 1262 n->rss_data.indirections_len++; 1263 if (!do_rss) { 1264 n->rss_data.indirections_len = 1; 1265 } 1266 if (!is_power_of_2(n->rss_data.indirections_len)) { 1267 err_msg = "Invalid size of indirection table"; 1268 err_value = n->rss_data.indirections_len; 1269 goto error; 1270 } 1271 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) { 1272 err_msg = "Too large indirection table"; 1273 err_value = n->rss_data.indirections_len; 1274 goto error; 1275 } 1276 n->rss_data.default_queue = do_rss ? 1277 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0; 1278 if (n->rss_data.default_queue >= n->max_queue_pairs) { 1279 err_msg = "Invalid default queue"; 1280 err_value = n->rss_data.default_queue; 1281 goto error; 1282 } 1283 offset += size_get; 1284 size_get = sizeof(uint16_t) * n->rss_data.indirections_len; 1285 g_free(n->rss_data.indirections_table); 1286 n->rss_data.indirections_table = g_malloc(size_get); 1287 if (!n->rss_data.indirections_table) { 1288 err_msg = "Can't allocate indirections table"; 1289 err_value = n->rss_data.indirections_len; 1290 goto error; 1291 } 1292 s = iov_to_buf(iov, iov_cnt, offset, 1293 n->rss_data.indirections_table, size_get); 1294 if (s != size_get) { 1295 err_msg = "Short indirection table buffer"; 1296 err_value = (uint32_t)s; 1297 goto error; 1298 } 1299 for (i = 0; i < n->rss_data.indirections_len; ++i) { 1300 uint16_t val = n->rss_data.indirections_table[i]; 1301 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val); 1302 } 1303 offset += size_get; 1304 size_get = sizeof(temp); 1305 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get); 1306 if (s != size_get) { 1307 err_msg = "Can't get queue_pairs"; 1308 err_value = (uint32_t)s; 1309 goto error; 1310 } 1311 queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs; 1312 if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) { 1313 err_msg = "Invalid number of queue_pairs"; 1314 err_value = queue_pairs; 1315 goto error; 1316 } 1317 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) { 1318 err_msg = "Invalid key size"; 1319 err_value = temp.b; 1320 goto error; 1321 } 1322 if (!temp.b && n->rss_data.hash_types) { 1323 err_msg = "No key provided"; 1324 err_value = 0; 1325 goto error; 1326 } 1327 if (!temp.b && !n->rss_data.hash_types) { 1328 virtio_net_disable_rss(n); 1329 return queue_pairs; 1330 } 1331 offset += size_get; 1332 size_get = temp.b; 1333 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get); 1334 if (s != size_get) { 1335 err_msg = "Can get key buffer"; 1336 err_value = (uint32_t)s; 1337 goto error; 1338 } 1339 n->rss_data.enabled = true; 1340 1341 if (!n->rss_data.populate_hash) { 1342 if (!virtio_net_attach_epbf_rss(n)) { 1343 /* EBPF must be loaded for vhost */ 1344 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 1345 warn_report("Can't load eBPF RSS for vhost"); 1346 goto error; 1347 } 1348 /* fallback to software RSS */ 1349 warn_report("Can't load eBPF RSS - fallback to software RSS"); 1350 n->rss_data.enabled_software_rss = true; 1351 } 1352 } else { 1353 /* use software RSS for hash populating */ 1354 /* and detach eBPF if was loaded before */ 1355 virtio_net_detach_epbf_rss(n); 1356 n->rss_data.enabled_software_rss = true; 1357 } 1358 1359 trace_virtio_net_rss_enable(n->rss_data.hash_types, 1360 n->rss_data.indirections_len, 1361 temp.b); 1362 return queue_pairs; 1363 error: 1364 trace_virtio_net_rss_error(err_msg, err_value); 1365 virtio_net_disable_rss(n); 1366 return 0; 1367 } 1368 1369 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, 1370 struct iovec *iov, unsigned int iov_cnt) 1371 { 1372 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1373 uint16_t queue_pairs; 1374 1375 virtio_net_disable_rss(n); 1376 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { 1377 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false); 1378 return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR; 1379 } 1380 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) { 1381 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true); 1382 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { 1383 struct virtio_net_ctrl_mq mq; 1384 size_t s; 1385 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) { 1386 return VIRTIO_NET_ERR; 1387 } 1388 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq)); 1389 if (s != sizeof(mq)) { 1390 return VIRTIO_NET_ERR; 1391 } 1392 queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs); 1393 1394 } else { 1395 return VIRTIO_NET_ERR; 1396 } 1397 1398 if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 1399 queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 1400 queue_pairs > n->max_queue_pairs || 1401 !n->multiqueue) { 1402 return VIRTIO_NET_ERR; 1403 } 1404 1405 n->curr_queue_pairs = queue_pairs; 1406 /* stop the backend before changing the number of queue_pairs to avoid handling a 1407 * disabled queue */ 1408 virtio_net_set_status(vdev, vdev->status); 1409 virtio_net_set_queue_pairs(n); 1410 1411 return VIRTIO_NET_OK; 1412 } 1413 1414 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) 1415 { 1416 VirtIONet *n = VIRTIO_NET(vdev); 1417 struct virtio_net_ctrl_hdr ctrl; 1418 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1419 VirtQueueElement *elem; 1420 size_t s; 1421 struct iovec *iov, *iov2; 1422 unsigned int iov_cnt; 1423 1424 for (;;) { 1425 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 1426 if (!elem) { 1427 break; 1428 } 1429 if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) || 1430 iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) { 1431 virtio_error(vdev, "virtio-net ctrl missing headers"); 1432 virtqueue_detach_element(vq, elem, 0); 1433 g_free(elem); 1434 break; 1435 } 1436 1437 iov_cnt = elem->out_num; 1438 iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num); 1439 s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl)); 1440 iov_discard_front(&iov, &iov_cnt, sizeof(ctrl)); 1441 if (s != sizeof(ctrl)) { 1442 status = VIRTIO_NET_ERR; 1443 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { 1444 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt); 1445 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { 1446 status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt); 1447 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { 1448 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt); 1449 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { 1450 status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt); 1451 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { 1452 status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt); 1453 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { 1454 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt); 1455 } 1456 1457 s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status)); 1458 assert(s == sizeof(status)); 1459 1460 virtqueue_push(vq, elem, sizeof(status)); 1461 virtio_notify(vdev, vq); 1462 g_free(iov2); 1463 g_free(elem); 1464 } 1465 } 1466 1467 /* RX */ 1468 1469 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) 1470 { 1471 VirtIONet *n = VIRTIO_NET(vdev); 1472 int queue_index = vq2q(virtio_get_queue_index(vq)); 1473 1474 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index)); 1475 } 1476 1477 static bool virtio_net_can_receive(NetClientState *nc) 1478 { 1479 VirtIONet *n = qemu_get_nic_opaque(nc); 1480 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1481 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1482 1483 if (!vdev->vm_running) { 1484 return false; 1485 } 1486 1487 if (nc->queue_index >= n->curr_queue_pairs) { 1488 return false; 1489 } 1490 1491 if (!virtio_queue_ready(q->rx_vq) || 1492 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 1493 return false; 1494 } 1495 1496 return true; 1497 } 1498 1499 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) 1500 { 1501 VirtIONet *n = q->n; 1502 if (virtio_queue_empty(q->rx_vq) || 1503 (n->mergeable_rx_bufs && 1504 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1505 virtio_queue_set_notification(q->rx_vq, 1); 1506 1507 /* To avoid a race condition where the guest has made some buffers 1508 * available after the above check but before notification was 1509 * enabled, check for available buffers again. 1510 */ 1511 if (virtio_queue_empty(q->rx_vq) || 1512 (n->mergeable_rx_bufs && 1513 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1514 return 0; 1515 } 1516 } 1517 1518 virtio_queue_set_notification(q->rx_vq, 0); 1519 return 1; 1520 } 1521 1522 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr) 1523 { 1524 virtio_tswap16s(vdev, &hdr->hdr_len); 1525 virtio_tswap16s(vdev, &hdr->gso_size); 1526 virtio_tswap16s(vdev, &hdr->csum_start); 1527 virtio_tswap16s(vdev, &hdr->csum_offset); 1528 } 1529 1530 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so 1531 * it never finds out that the packets don't have valid checksums. This 1532 * causes dhclient to get upset. Fedora's carried a patch for ages to 1533 * fix this with Xen but it hasn't appeared in an upstream release of 1534 * dhclient yet. 1535 * 1536 * To avoid breaking existing guests, we catch udp packets and add 1537 * checksums. This is terrible but it's better than hacking the guest 1538 * kernels. 1539 * 1540 * N.B. if we introduce a zero-copy API, this operation is no longer free so 1541 * we should provide a mechanism to disable it to avoid polluting the host 1542 * cache. 1543 */ 1544 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr, 1545 uint8_t *buf, size_t size) 1546 { 1547 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */ 1548 (size > 27 && size < 1500) && /* normal sized MTU */ 1549 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */ 1550 (buf[23] == 17) && /* ip.protocol == UDP */ 1551 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */ 1552 net_checksum_calculate(buf, size, CSUM_UDP); 1553 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; 1554 } 1555 } 1556 1557 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt, 1558 const void *buf, size_t size) 1559 { 1560 if (n->has_vnet_hdr) { 1561 /* FIXME this cast is evil */ 1562 void *wbuf = (void *)buf; 1563 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len, 1564 size - n->host_hdr_len); 1565 1566 if (n->needs_vnet_hdr_swap) { 1567 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf); 1568 } 1569 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr)); 1570 } else { 1571 struct virtio_net_hdr hdr = { 1572 .flags = 0, 1573 .gso_type = VIRTIO_NET_HDR_GSO_NONE 1574 }; 1575 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr); 1576 } 1577 } 1578 1579 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) 1580 { 1581 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; 1582 static const uint8_t vlan[] = {0x81, 0x00}; 1583 uint8_t *ptr = (uint8_t *)buf; 1584 int i; 1585 1586 if (n->promisc) 1587 return 1; 1588 1589 ptr += n->host_hdr_len; 1590 1591 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) { 1592 int vid = lduw_be_p(ptr + 14) & 0xfff; 1593 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f)))) 1594 return 0; 1595 } 1596 1597 if (ptr[0] & 1) { // multicast 1598 if (!memcmp(ptr, bcast, sizeof(bcast))) { 1599 return !n->nobcast; 1600 } else if (n->nomulti) { 1601 return 0; 1602 } else if (n->allmulti || n->mac_table.multi_overflow) { 1603 return 1; 1604 } 1605 1606 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 1607 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1608 return 1; 1609 } 1610 } 1611 } else { // unicast 1612 if (n->nouni) { 1613 return 0; 1614 } else if (n->alluni || n->mac_table.uni_overflow) { 1615 return 1; 1616 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) { 1617 return 1; 1618 } 1619 1620 for (i = 0; i < n->mac_table.first_multi; i++) { 1621 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1622 return 1; 1623 } 1624 } 1625 } 1626 1627 return 0; 1628 } 1629 1630 static uint8_t virtio_net_get_hash_type(bool isip4, 1631 bool isip6, 1632 bool isudp, 1633 bool istcp, 1634 uint32_t types) 1635 { 1636 if (isip4) { 1637 if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) { 1638 return NetPktRssIpV4Tcp; 1639 } 1640 if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) { 1641 return NetPktRssIpV4Udp; 1642 } 1643 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 1644 return NetPktRssIpV4; 1645 } 1646 } else if (isip6) { 1647 uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | 1648 VIRTIO_NET_RSS_HASH_TYPE_TCPv6; 1649 1650 if (istcp && (types & mask)) { 1651 return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ? 1652 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp; 1653 } 1654 mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6; 1655 if (isudp && (types & mask)) { 1656 return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ? 1657 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp; 1658 } 1659 mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6; 1660 if (types & mask) { 1661 return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ? 1662 NetPktRssIpV6Ex : NetPktRssIpV6; 1663 } 1664 } 1665 return 0xff; 1666 } 1667 1668 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report, 1669 uint32_t hash) 1670 { 1671 struct virtio_net_hdr_v1_hash *hdr = (void *)buf; 1672 hdr->hash_value = hash; 1673 hdr->hash_report = report; 1674 } 1675 1676 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf, 1677 size_t size) 1678 { 1679 VirtIONet *n = qemu_get_nic_opaque(nc); 1680 unsigned int index = nc->queue_index, new_index = index; 1681 struct NetRxPkt *pkt = n->rx_pkt; 1682 uint8_t net_hash_type; 1683 uint32_t hash; 1684 bool isip4, isip6, isudp, istcp; 1685 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = { 1686 VIRTIO_NET_HASH_REPORT_IPv4, 1687 VIRTIO_NET_HASH_REPORT_TCPv4, 1688 VIRTIO_NET_HASH_REPORT_TCPv6, 1689 VIRTIO_NET_HASH_REPORT_IPv6, 1690 VIRTIO_NET_HASH_REPORT_IPv6_EX, 1691 VIRTIO_NET_HASH_REPORT_TCPv6_EX, 1692 VIRTIO_NET_HASH_REPORT_UDPv4, 1693 VIRTIO_NET_HASH_REPORT_UDPv6, 1694 VIRTIO_NET_HASH_REPORT_UDPv6_EX 1695 }; 1696 1697 net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len, 1698 size - n->host_hdr_len); 1699 net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); 1700 if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) { 1701 istcp = isudp = false; 1702 } 1703 if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) { 1704 istcp = isudp = false; 1705 } 1706 net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp, 1707 n->rss_data.hash_types); 1708 if (net_hash_type > NetPktRssIpV6UdpEx) { 1709 if (n->rss_data.populate_hash) { 1710 virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0); 1711 } 1712 return n->rss_data.redirect ? n->rss_data.default_queue : -1; 1713 } 1714 1715 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key); 1716 1717 if (n->rss_data.populate_hash) { 1718 virtio_set_packet_hash(buf, reports[net_hash_type], hash); 1719 } 1720 1721 if (n->rss_data.redirect) { 1722 new_index = hash & (n->rss_data.indirections_len - 1); 1723 new_index = n->rss_data.indirections_table[new_index]; 1724 } 1725 1726 return (index == new_index) ? -1 : new_index; 1727 } 1728 1729 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, 1730 size_t size, bool no_rss) 1731 { 1732 VirtIONet *n = qemu_get_nic_opaque(nc); 1733 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1734 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1735 VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE]; 1736 size_t lens[VIRTQUEUE_MAX_SIZE]; 1737 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; 1738 struct virtio_net_hdr_mrg_rxbuf mhdr; 1739 unsigned mhdr_cnt = 0; 1740 size_t offset, i, guest_offset, j; 1741 ssize_t err; 1742 1743 if (!virtio_net_can_receive(nc)) { 1744 return -1; 1745 } 1746 1747 if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) { 1748 int index = virtio_net_process_rss(nc, buf, size); 1749 if (index >= 0) { 1750 NetClientState *nc2 = qemu_get_subqueue(n->nic, index); 1751 return virtio_net_receive_rcu(nc2, buf, size, true); 1752 } 1753 } 1754 1755 /* hdr_len refers to the header we supply to the guest */ 1756 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) { 1757 return 0; 1758 } 1759 1760 if (!receive_filter(n, buf, size)) 1761 return size; 1762 1763 offset = i = 0; 1764 1765 while (offset < size) { 1766 VirtQueueElement *elem; 1767 int len, total; 1768 const struct iovec *sg; 1769 1770 total = 0; 1771 1772 if (i == VIRTQUEUE_MAX_SIZE) { 1773 virtio_error(vdev, "virtio-net unexpected long buffer chain"); 1774 err = size; 1775 goto err; 1776 } 1777 1778 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement)); 1779 if (!elem) { 1780 if (i) { 1781 virtio_error(vdev, "virtio-net unexpected empty queue: " 1782 "i %zd mergeable %d offset %zd, size %zd, " 1783 "guest hdr len %zd, host hdr len %zd " 1784 "guest features 0x%" PRIx64, 1785 i, n->mergeable_rx_bufs, offset, size, 1786 n->guest_hdr_len, n->host_hdr_len, 1787 vdev->guest_features); 1788 } 1789 err = -1; 1790 goto err; 1791 } 1792 1793 if (elem->in_num < 1) { 1794 virtio_error(vdev, 1795 "virtio-net receive queue contains no in buffers"); 1796 virtqueue_detach_element(q->rx_vq, elem, 0); 1797 g_free(elem); 1798 err = -1; 1799 goto err; 1800 } 1801 1802 sg = elem->in_sg; 1803 if (i == 0) { 1804 assert(offset == 0); 1805 if (n->mergeable_rx_bufs) { 1806 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg), 1807 sg, elem->in_num, 1808 offsetof(typeof(mhdr), num_buffers), 1809 sizeof(mhdr.num_buffers)); 1810 } 1811 1812 receive_header(n, sg, elem->in_num, buf, size); 1813 if (n->rss_data.populate_hash) { 1814 offset = sizeof(mhdr); 1815 iov_from_buf(sg, elem->in_num, offset, 1816 buf + offset, n->host_hdr_len - sizeof(mhdr)); 1817 } 1818 offset = n->host_hdr_len; 1819 total += n->guest_hdr_len; 1820 guest_offset = n->guest_hdr_len; 1821 } else { 1822 guest_offset = 0; 1823 } 1824 1825 /* copy in packet. ugh */ 1826 len = iov_from_buf(sg, elem->in_num, guest_offset, 1827 buf + offset, size - offset); 1828 total += len; 1829 offset += len; 1830 /* If buffers can't be merged, at this point we 1831 * must have consumed the complete packet. 1832 * Otherwise, drop it. */ 1833 if (!n->mergeable_rx_bufs && offset < size) { 1834 virtqueue_unpop(q->rx_vq, elem, total); 1835 g_free(elem); 1836 err = size; 1837 goto err; 1838 } 1839 1840 elems[i] = elem; 1841 lens[i] = total; 1842 i++; 1843 } 1844 1845 if (mhdr_cnt) { 1846 virtio_stw_p(vdev, &mhdr.num_buffers, i); 1847 iov_from_buf(mhdr_sg, mhdr_cnt, 1848 0, 1849 &mhdr.num_buffers, sizeof mhdr.num_buffers); 1850 } 1851 1852 for (j = 0; j < i; j++) { 1853 /* signal other side */ 1854 virtqueue_fill(q->rx_vq, elems[j], lens[j], j); 1855 g_free(elems[j]); 1856 } 1857 1858 virtqueue_flush(q->rx_vq, i); 1859 virtio_notify(vdev, q->rx_vq); 1860 1861 return size; 1862 1863 err: 1864 for (j = 0; j < i; j++) { 1865 g_free(elems[j]); 1866 } 1867 1868 return err; 1869 } 1870 1871 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf, 1872 size_t size) 1873 { 1874 RCU_READ_LOCK_GUARD(); 1875 1876 return virtio_net_receive_rcu(nc, buf, size, false); 1877 } 1878 1879 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain, 1880 const uint8_t *buf, 1881 VirtioNetRscUnit *unit) 1882 { 1883 uint16_t ip_hdrlen; 1884 struct ip_header *ip; 1885 1886 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len 1887 + sizeof(struct eth_header)); 1888 unit->ip = (void *)ip; 1889 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2; 1890 unit->ip_plen = &ip->ip_len; 1891 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen); 1892 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 1893 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen; 1894 } 1895 1896 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain, 1897 const uint8_t *buf, 1898 VirtioNetRscUnit *unit) 1899 { 1900 struct ip6_header *ip6; 1901 1902 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len 1903 + sizeof(struct eth_header)); 1904 unit->ip = ip6; 1905 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 1906 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) 1907 + sizeof(struct ip6_header)); 1908 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 1909 1910 /* There is a difference between payload lenght in ipv4 and v6, 1911 ip header is excluded in ipv6 */ 1912 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen; 1913 } 1914 1915 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain, 1916 VirtioNetRscSeg *seg) 1917 { 1918 int ret; 1919 struct virtio_net_hdr_v1 *h; 1920 1921 h = (struct virtio_net_hdr_v1 *)seg->buf; 1922 h->flags = 0; 1923 h->gso_type = VIRTIO_NET_HDR_GSO_NONE; 1924 1925 if (seg->is_coalesced) { 1926 h->rsc.segments = seg->packets; 1927 h->rsc.dup_acks = seg->dup_ack; 1928 h->flags = VIRTIO_NET_HDR_F_RSC_INFO; 1929 if (chain->proto == ETH_P_IP) { 1930 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 1931 } else { 1932 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 1933 } 1934 } 1935 1936 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); 1937 QTAILQ_REMOVE(&chain->buffers, seg, next); 1938 g_free(seg->buf); 1939 g_free(seg); 1940 1941 return ret; 1942 } 1943 1944 static void virtio_net_rsc_purge(void *opq) 1945 { 1946 VirtioNetRscSeg *seg, *rn; 1947 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq; 1948 1949 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) { 1950 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 1951 chain->stat.purge_failed++; 1952 continue; 1953 } 1954 } 1955 1956 chain->stat.timer++; 1957 if (!QTAILQ_EMPTY(&chain->buffers)) { 1958 timer_mod(chain->drain_timer, 1959 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 1960 } 1961 } 1962 1963 static void virtio_net_rsc_cleanup(VirtIONet *n) 1964 { 1965 VirtioNetRscChain *chain, *rn_chain; 1966 VirtioNetRscSeg *seg, *rn_seg; 1967 1968 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) { 1969 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) { 1970 QTAILQ_REMOVE(&chain->buffers, seg, next); 1971 g_free(seg->buf); 1972 g_free(seg); 1973 } 1974 1975 timer_free(chain->drain_timer); 1976 QTAILQ_REMOVE(&n->rsc_chains, chain, next); 1977 g_free(chain); 1978 } 1979 } 1980 1981 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain, 1982 NetClientState *nc, 1983 const uint8_t *buf, size_t size) 1984 { 1985 uint16_t hdr_len; 1986 VirtioNetRscSeg *seg; 1987 1988 hdr_len = chain->n->guest_hdr_len; 1989 seg = g_malloc(sizeof(VirtioNetRscSeg)); 1990 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header) 1991 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD); 1992 memcpy(seg->buf, buf, size); 1993 seg->size = size; 1994 seg->packets = 1; 1995 seg->dup_ack = 0; 1996 seg->is_coalesced = 0; 1997 seg->nc = nc; 1998 1999 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); 2000 chain->stat.cache++; 2001 2002 switch (chain->proto) { 2003 case ETH_P_IP: 2004 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); 2005 break; 2006 case ETH_P_IPV6: 2007 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); 2008 break; 2009 default: 2010 g_assert_not_reached(); 2011 } 2012 } 2013 2014 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain, 2015 VirtioNetRscSeg *seg, 2016 const uint8_t *buf, 2017 struct tcp_header *n_tcp, 2018 struct tcp_header *o_tcp) 2019 { 2020 uint32_t nack, oack; 2021 uint16_t nwin, owin; 2022 2023 nack = htonl(n_tcp->th_ack); 2024 nwin = htons(n_tcp->th_win); 2025 oack = htonl(o_tcp->th_ack); 2026 owin = htons(o_tcp->th_win); 2027 2028 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) { 2029 chain->stat.ack_out_of_win++; 2030 return RSC_FINAL; 2031 } else if (nack == oack) { 2032 /* duplicated ack or window probe */ 2033 if (nwin == owin) { 2034 /* duplicated ack, add dup ack count due to whql test up to 1 */ 2035 chain->stat.dup_ack++; 2036 return RSC_FINAL; 2037 } else { 2038 /* Coalesce window update */ 2039 o_tcp->th_win = n_tcp->th_win; 2040 chain->stat.win_update++; 2041 return RSC_COALESCE; 2042 } 2043 } else { 2044 /* pure ack, go to 'C', finalize*/ 2045 chain->stat.pure_ack++; 2046 return RSC_FINAL; 2047 } 2048 } 2049 2050 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain, 2051 VirtioNetRscSeg *seg, 2052 const uint8_t *buf, 2053 VirtioNetRscUnit *n_unit) 2054 { 2055 void *data; 2056 uint16_t o_ip_len; 2057 uint32_t nseq, oseq; 2058 VirtioNetRscUnit *o_unit; 2059 2060 o_unit = &seg->unit; 2061 o_ip_len = htons(*o_unit->ip_plen); 2062 nseq = htonl(n_unit->tcp->th_seq); 2063 oseq = htonl(o_unit->tcp->th_seq); 2064 2065 /* out of order or retransmitted. */ 2066 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) { 2067 chain->stat.data_out_of_win++; 2068 return RSC_FINAL; 2069 } 2070 2071 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen; 2072 if (nseq == oseq) { 2073 if ((o_unit->payload == 0) && n_unit->payload) { 2074 /* From no payload to payload, normal case, not a dup ack or etc */ 2075 chain->stat.data_after_pure_ack++; 2076 goto coalesce; 2077 } else { 2078 return virtio_net_rsc_handle_ack(chain, seg, buf, 2079 n_unit->tcp, o_unit->tcp); 2080 } 2081 } else if ((nseq - oseq) != o_unit->payload) { 2082 /* Not a consistent packet, out of order */ 2083 chain->stat.data_out_of_order++; 2084 return RSC_FINAL; 2085 } else { 2086 coalesce: 2087 if ((o_ip_len + n_unit->payload) > chain->max_payload) { 2088 chain->stat.over_size++; 2089 return RSC_FINAL; 2090 } 2091 2092 /* Here comes the right data, the payload length in v4/v6 is different, 2093 so use the field value to update and record the new data len */ 2094 o_unit->payload += n_unit->payload; /* update new data len */ 2095 2096 /* update field in ip header */ 2097 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload); 2098 2099 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced 2100 for windows guest, while this may change the behavior for linux 2101 guest (only if it uses RSC feature). */ 2102 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags; 2103 2104 o_unit->tcp->th_ack = n_unit->tcp->th_ack; 2105 o_unit->tcp->th_win = n_unit->tcp->th_win; 2106 2107 memmove(seg->buf + seg->size, data, n_unit->payload); 2108 seg->size += n_unit->payload; 2109 seg->packets++; 2110 chain->stat.coalesced++; 2111 return RSC_COALESCE; 2112 } 2113 } 2114 2115 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain, 2116 VirtioNetRscSeg *seg, 2117 const uint8_t *buf, size_t size, 2118 VirtioNetRscUnit *unit) 2119 { 2120 struct ip_header *ip1, *ip2; 2121 2122 ip1 = (struct ip_header *)(unit->ip); 2123 ip2 = (struct ip_header *)(seg->unit.ip); 2124 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst) 2125 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2126 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2127 chain->stat.no_match++; 2128 return RSC_NO_MATCH; 2129 } 2130 2131 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2132 } 2133 2134 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain, 2135 VirtioNetRscSeg *seg, 2136 const uint8_t *buf, size_t size, 2137 VirtioNetRscUnit *unit) 2138 { 2139 struct ip6_header *ip1, *ip2; 2140 2141 ip1 = (struct ip6_header *)(unit->ip); 2142 ip2 = (struct ip6_header *)(seg->unit.ip); 2143 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address)) 2144 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address)) 2145 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2146 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2147 chain->stat.no_match++; 2148 return RSC_NO_MATCH; 2149 } 2150 2151 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2152 } 2153 2154 /* Packets with 'SYN' should bypass, other flag should be sent after drain 2155 * to prevent out of order */ 2156 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain, 2157 struct tcp_header *tcp) 2158 { 2159 uint16_t tcp_hdr; 2160 uint16_t tcp_flag; 2161 2162 tcp_flag = htons(tcp->th_offset_flags); 2163 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10; 2164 tcp_flag &= VIRTIO_NET_TCP_FLAG; 2165 if (tcp_flag & TH_SYN) { 2166 chain->stat.tcp_syn++; 2167 return RSC_BYPASS; 2168 } 2169 2170 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) { 2171 chain->stat.tcp_ctrl_drain++; 2172 return RSC_FINAL; 2173 } 2174 2175 if (tcp_hdr > sizeof(struct tcp_header)) { 2176 chain->stat.tcp_all_opt++; 2177 return RSC_FINAL; 2178 } 2179 2180 return RSC_CANDIDATE; 2181 } 2182 2183 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain, 2184 NetClientState *nc, 2185 const uint8_t *buf, size_t size, 2186 VirtioNetRscUnit *unit) 2187 { 2188 int ret; 2189 VirtioNetRscSeg *seg, *nseg; 2190 2191 if (QTAILQ_EMPTY(&chain->buffers)) { 2192 chain->stat.empty_cache++; 2193 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2194 timer_mod(chain->drain_timer, 2195 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 2196 return size; 2197 } 2198 2199 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2200 if (chain->proto == ETH_P_IP) { 2201 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit); 2202 } else { 2203 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit); 2204 } 2205 2206 if (ret == RSC_FINAL) { 2207 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2208 /* Send failed */ 2209 chain->stat.final_failed++; 2210 return 0; 2211 } 2212 2213 /* Send current packet */ 2214 return virtio_net_do_receive(nc, buf, size); 2215 } else if (ret == RSC_NO_MATCH) { 2216 continue; 2217 } else { 2218 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ 2219 seg->is_coalesced = 1; 2220 return size; 2221 } 2222 } 2223 2224 chain->stat.no_match_cache++; 2225 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2226 return size; 2227 } 2228 2229 /* Drain a connection data, this is to avoid out of order segments */ 2230 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain, 2231 NetClientState *nc, 2232 const uint8_t *buf, size_t size, 2233 uint16_t ip_start, uint16_t ip_size, 2234 uint16_t tcp_port) 2235 { 2236 VirtioNetRscSeg *seg, *nseg; 2237 uint32_t ppair1, ppair2; 2238 2239 ppair1 = *(uint32_t *)(buf + tcp_port); 2240 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2241 ppair2 = *(uint32_t *)(seg->buf + tcp_port); 2242 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size) 2243 || (ppair1 != ppair2)) { 2244 continue; 2245 } 2246 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2247 chain->stat.drain_failed++; 2248 } 2249 2250 break; 2251 } 2252 2253 return virtio_net_do_receive(nc, buf, size); 2254 } 2255 2256 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain, 2257 struct ip_header *ip, 2258 const uint8_t *buf, size_t size) 2259 { 2260 uint16_t ip_len; 2261 2262 /* Not an ipv4 packet */ 2263 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) { 2264 chain->stat.ip_option++; 2265 return RSC_BYPASS; 2266 } 2267 2268 /* Don't handle packets with ip option */ 2269 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) { 2270 chain->stat.ip_option++; 2271 return RSC_BYPASS; 2272 } 2273 2274 if (ip->ip_p != IPPROTO_TCP) { 2275 chain->stat.bypass_not_tcp++; 2276 return RSC_BYPASS; 2277 } 2278 2279 /* Don't handle packets with ip fragment */ 2280 if (!(htons(ip->ip_off) & IP_DF)) { 2281 chain->stat.ip_frag++; 2282 return RSC_BYPASS; 2283 } 2284 2285 /* Don't handle packets with ecn flag */ 2286 if (IPTOS_ECN(ip->ip_tos)) { 2287 chain->stat.ip_ecn++; 2288 return RSC_BYPASS; 2289 } 2290 2291 ip_len = htons(ip->ip_len); 2292 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header)) 2293 || ip_len > (size - chain->n->guest_hdr_len - 2294 sizeof(struct eth_header))) { 2295 chain->stat.ip_hacked++; 2296 return RSC_BYPASS; 2297 } 2298 2299 return RSC_CANDIDATE; 2300 } 2301 2302 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain, 2303 NetClientState *nc, 2304 const uint8_t *buf, size_t size) 2305 { 2306 int32_t ret; 2307 uint16_t hdr_len; 2308 VirtioNetRscUnit unit; 2309 2310 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2311 2312 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header) 2313 + sizeof(struct tcp_header))) { 2314 chain->stat.bypass_not_tcp++; 2315 return virtio_net_do_receive(nc, buf, size); 2316 } 2317 2318 virtio_net_rsc_extract_unit4(chain, buf, &unit); 2319 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size) 2320 != RSC_CANDIDATE) { 2321 return virtio_net_do_receive(nc, buf, size); 2322 } 2323 2324 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2325 if (ret == RSC_BYPASS) { 2326 return virtio_net_do_receive(nc, buf, size); 2327 } else if (ret == RSC_FINAL) { 2328 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2329 ((hdr_len + sizeof(struct eth_header)) + 12), 2330 VIRTIO_NET_IP4_ADDR_SIZE, 2331 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)); 2332 } 2333 2334 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2335 } 2336 2337 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain, 2338 struct ip6_header *ip6, 2339 const uint8_t *buf, size_t size) 2340 { 2341 uint16_t ip_len; 2342 2343 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4) 2344 != IP_HEADER_VERSION_6) { 2345 return RSC_BYPASS; 2346 } 2347 2348 /* Both option and protocol is checked in this */ 2349 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) { 2350 chain->stat.bypass_not_tcp++; 2351 return RSC_BYPASS; 2352 } 2353 2354 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 2355 if (ip_len < sizeof(struct tcp_header) || 2356 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header) 2357 - sizeof(struct ip6_header))) { 2358 chain->stat.ip_hacked++; 2359 return RSC_BYPASS; 2360 } 2361 2362 /* Don't handle packets with ecn flag */ 2363 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) { 2364 chain->stat.ip_ecn++; 2365 return RSC_BYPASS; 2366 } 2367 2368 return RSC_CANDIDATE; 2369 } 2370 2371 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc, 2372 const uint8_t *buf, size_t size) 2373 { 2374 int32_t ret; 2375 uint16_t hdr_len; 2376 VirtioNetRscChain *chain; 2377 VirtioNetRscUnit unit; 2378 2379 chain = (VirtioNetRscChain *)opq; 2380 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2381 2382 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header) 2383 + sizeof(tcp_header))) { 2384 return virtio_net_do_receive(nc, buf, size); 2385 } 2386 2387 virtio_net_rsc_extract_unit6(chain, buf, &unit); 2388 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain, 2389 unit.ip, buf, size)) { 2390 return virtio_net_do_receive(nc, buf, size); 2391 } 2392 2393 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2394 if (ret == RSC_BYPASS) { 2395 return virtio_net_do_receive(nc, buf, size); 2396 } else if (ret == RSC_FINAL) { 2397 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2398 ((hdr_len + sizeof(struct eth_header)) + 8), 2399 VIRTIO_NET_IP6_ADDR_SIZE, 2400 hdr_len + sizeof(struct eth_header) 2401 + sizeof(struct ip6_header)); 2402 } 2403 2404 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2405 } 2406 2407 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n, 2408 NetClientState *nc, 2409 uint16_t proto) 2410 { 2411 VirtioNetRscChain *chain; 2412 2413 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) { 2414 return NULL; 2415 } 2416 2417 QTAILQ_FOREACH(chain, &n->rsc_chains, next) { 2418 if (chain->proto == proto) { 2419 return chain; 2420 } 2421 } 2422 2423 chain = g_malloc(sizeof(*chain)); 2424 chain->n = n; 2425 chain->proto = proto; 2426 if (proto == (uint16_t)ETH_P_IP) { 2427 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD; 2428 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2429 } else { 2430 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD; 2431 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2432 } 2433 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST, 2434 virtio_net_rsc_purge, chain); 2435 memset(&chain->stat, 0, sizeof(chain->stat)); 2436 2437 QTAILQ_INIT(&chain->buffers); 2438 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); 2439 2440 return chain; 2441 } 2442 2443 static ssize_t virtio_net_rsc_receive(NetClientState *nc, 2444 const uint8_t *buf, 2445 size_t size) 2446 { 2447 uint16_t proto; 2448 VirtioNetRscChain *chain; 2449 struct eth_header *eth; 2450 VirtIONet *n; 2451 2452 n = qemu_get_nic_opaque(nc); 2453 if (size < (n->host_hdr_len + sizeof(struct eth_header))) { 2454 return virtio_net_do_receive(nc, buf, size); 2455 } 2456 2457 eth = (struct eth_header *)(buf + n->guest_hdr_len); 2458 proto = htons(eth->h_proto); 2459 2460 chain = virtio_net_rsc_lookup_chain(n, nc, proto); 2461 if (chain) { 2462 chain->stat.received++; 2463 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) { 2464 return virtio_net_rsc_receive4(chain, nc, buf, size); 2465 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) { 2466 return virtio_net_rsc_receive6(chain, nc, buf, size); 2467 } 2468 } 2469 return virtio_net_do_receive(nc, buf, size); 2470 } 2471 2472 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, 2473 size_t size) 2474 { 2475 VirtIONet *n = qemu_get_nic_opaque(nc); 2476 if ((n->rsc4_enabled || n->rsc6_enabled)) { 2477 return virtio_net_rsc_receive(nc, buf, size); 2478 } else { 2479 return virtio_net_do_receive(nc, buf, size); 2480 } 2481 } 2482 2483 static int32_t virtio_net_flush_tx(VirtIONetQueue *q); 2484 2485 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) 2486 { 2487 VirtIONet *n = qemu_get_nic_opaque(nc); 2488 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 2489 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2490 2491 virtqueue_push(q->tx_vq, q->async_tx.elem, 0); 2492 virtio_notify(vdev, q->tx_vq); 2493 2494 g_free(q->async_tx.elem); 2495 q->async_tx.elem = NULL; 2496 2497 virtio_queue_set_notification(q->tx_vq, 1); 2498 virtio_net_flush_tx(q); 2499 } 2500 2501 /* TX */ 2502 static int32_t virtio_net_flush_tx(VirtIONetQueue *q) 2503 { 2504 VirtIONet *n = q->n; 2505 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2506 VirtQueueElement *elem; 2507 int32_t num_packets = 0; 2508 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq)); 2509 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2510 return num_packets; 2511 } 2512 2513 if (q->async_tx.elem) { 2514 virtio_queue_set_notification(q->tx_vq, 0); 2515 return num_packets; 2516 } 2517 2518 for (;;) { 2519 ssize_t ret; 2520 unsigned int out_num; 2521 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg; 2522 struct virtio_net_hdr_mrg_rxbuf mhdr; 2523 2524 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement)); 2525 if (!elem) { 2526 break; 2527 } 2528 2529 out_num = elem->out_num; 2530 out_sg = elem->out_sg; 2531 if (out_num < 1) { 2532 virtio_error(vdev, "virtio-net header not in first element"); 2533 virtqueue_detach_element(q->tx_vq, elem, 0); 2534 g_free(elem); 2535 return -EINVAL; 2536 } 2537 2538 if (n->has_vnet_hdr) { 2539 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) < 2540 n->guest_hdr_len) { 2541 virtio_error(vdev, "virtio-net header incorrect"); 2542 virtqueue_detach_element(q->tx_vq, elem, 0); 2543 g_free(elem); 2544 return -EINVAL; 2545 } 2546 if (n->needs_vnet_hdr_swap) { 2547 virtio_net_hdr_swap(vdev, (void *) &mhdr); 2548 sg2[0].iov_base = &mhdr; 2549 sg2[0].iov_len = n->guest_hdr_len; 2550 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, 2551 out_sg, out_num, 2552 n->guest_hdr_len, -1); 2553 if (out_num == VIRTQUEUE_MAX_SIZE) { 2554 goto drop; 2555 } 2556 out_num += 1; 2557 out_sg = sg2; 2558 } 2559 } 2560 /* 2561 * If host wants to see the guest header as is, we can 2562 * pass it on unchanged. Otherwise, copy just the parts 2563 * that host is interested in. 2564 */ 2565 assert(n->host_hdr_len <= n->guest_hdr_len); 2566 if (n->host_hdr_len != n->guest_hdr_len) { 2567 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg), 2568 out_sg, out_num, 2569 0, n->host_hdr_len); 2570 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num, 2571 out_sg, out_num, 2572 n->guest_hdr_len, -1); 2573 out_num = sg_num; 2574 out_sg = sg; 2575 } 2576 2577 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index), 2578 out_sg, out_num, virtio_net_tx_complete); 2579 if (ret == 0) { 2580 virtio_queue_set_notification(q->tx_vq, 0); 2581 q->async_tx.elem = elem; 2582 return -EBUSY; 2583 } 2584 2585 drop: 2586 virtqueue_push(q->tx_vq, elem, 0); 2587 virtio_notify(vdev, q->tx_vq); 2588 g_free(elem); 2589 2590 if (++num_packets >= n->tx_burst) { 2591 break; 2592 } 2593 } 2594 return num_packets; 2595 } 2596 2597 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) 2598 { 2599 VirtIONet *n = VIRTIO_NET(vdev); 2600 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2601 2602 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2603 virtio_net_drop_tx_queue_data(vdev, vq); 2604 return; 2605 } 2606 2607 /* This happens when device was stopped but VCPU wasn't. */ 2608 if (!vdev->vm_running) { 2609 q->tx_waiting = 1; 2610 return; 2611 } 2612 2613 if (q->tx_waiting) { 2614 virtio_queue_set_notification(vq, 1); 2615 timer_del(q->tx_timer); 2616 q->tx_waiting = 0; 2617 if (virtio_net_flush_tx(q) == -EINVAL) { 2618 return; 2619 } 2620 } else { 2621 timer_mod(q->tx_timer, 2622 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2623 q->tx_waiting = 1; 2624 virtio_queue_set_notification(vq, 0); 2625 } 2626 } 2627 2628 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) 2629 { 2630 VirtIONet *n = VIRTIO_NET(vdev); 2631 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2632 2633 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2634 virtio_net_drop_tx_queue_data(vdev, vq); 2635 return; 2636 } 2637 2638 if (unlikely(q->tx_waiting)) { 2639 return; 2640 } 2641 q->tx_waiting = 1; 2642 /* This happens when device was stopped but VCPU wasn't. */ 2643 if (!vdev->vm_running) { 2644 return; 2645 } 2646 virtio_queue_set_notification(vq, 0); 2647 qemu_bh_schedule(q->tx_bh); 2648 } 2649 2650 static void virtio_net_tx_timer(void *opaque) 2651 { 2652 VirtIONetQueue *q = opaque; 2653 VirtIONet *n = q->n; 2654 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2655 /* This happens when device was stopped but BH wasn't. */ 2656 if (!vdev->vm_running) { 2657 /* Make sure tx waiting is set, so we'll run when restarted. */ 2658 assert(q->tx_waiting); 2659 return; 2660 } 2661 2662 q->tx_waiting = 0; 2663 2664 /* Just in case the driver is not ready on more */ 2665 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2666 return; 2667 } 2668 2669 virtio_queue_set_notification(q->tx_vq, 1); 2670 virtio_net_flush_tx(q); 2671 } 2672 2673 static void virtio_net_tx_bh(void *opaque) 2674 { 2675 VirtIONetQueue *q = opaque; 2676 VirtIONet *n = q->n; 2677 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2678 int32_t ret; 2679 2680 /* This happens when device was stopped but BH wasn't. */ 2681 if (!vdev->vm_running) { 2682 /* Make sure tx waiting is set, so we'll run when restarted. */ 2683 assert(q->tx_waiting); 2684 return; 2685 } 2686 2687 q->tx_waiting = 0; 2688 2689 /* Just in case the driver is not ready on more */ 2690 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) { 2691 return; 2692 } 2693 2694 ret = virtio_net_flush_tx(q); 2695 if (ret == -EBUSY || ret == -EINVAL) { 2696 return; /* Notification re-enable handled by tx_complete or device 2697 * broken */ 2698 } 2699 2700 /* If we flush a full burst of packets, assume there are 2701 * more coming and immediately reschedule */ 2702 if (ret >= n->tx_burst) { 2703 qemu_bh_schedule(q->tx_bh); 2704 q->tx_waiting = 1; 2705 return; 2706 } 2707 2708 /* If less than a full burst, re-enable notification and flush 2709 * anything that may have come in while we weren't looking. If 2710 * we find something, assume the guest is still active and reschedule */ 2711 virtio_queue_set_notification(q->tx_vq, 1); 2712 ret = virtio_net_flush_tx(q); 2713 if (ret == -EINVAL) { 2714 return; 2715 } else if (ret > 0) { 2716 virtio_queue_set_notification(q->tx_vq, 0); 2717 qemu_bh_schedule(q->tx_bh); 2718 q->tx_waiting = 1; 2719 } 2720 } 2721 2722 static void virtio_net_add_queue(VirtIONet *n, int index) 2723 { 2724 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2725 2726 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size, 2727 virtio_net_handle_rx); 2728 2729 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { 2730 n->vqs[index].tx_vq = 2731 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2732 virtio_net_handle_tx_timer); 2733 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 2734 virtio_net_tx_timer, 2735 &n->vqs[index]); 2736 } else { 2737 n->vqs[index].tx_vq = 2738 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2739 virtio_net_handle_tx_bh); 2740 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]); 2741 } 2742 2743 n->vqs[index].tx_waiting = 0; 2744 n->vqs[index].n = n; 2745 } 2746 2747 static void virtio_net_del_queue(VirtIONet *n, int index) 2748 { 2749 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2750 VirtIONetQueue *q = &n->vqs[index]; 2751 NetClientState *nc = qemu_get_subqueue(n->nic, index); 2752 2753 qemu_purge_queued_packets(nc); 2754 2755 virtio_del_queue(vdev, index * 2); 2756 if (q->tx_timer) { 2757 timer_free(q->tx_timer); 2758 q->tx_timer = NULL; 2759 } else { 2760 qemu_bh_delete(q->tx_bh); 2761 q->tx_bh = NULL; 2762 } 2763 q->tx_waiting = 0; 2764 virtio_del_queue(vdev, index * 2 + 1); 2765 } 2766 2767 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs) 2768 { 2769 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2770 int old_num_queues = virtio_get_num_queues(vdev); 2771 int new_num_queues = new_max_queue_pairs * 2 + 1; 2772 int i; 2773 2774 assert(old_num_queues >= 3); 2775 assert(old_num_queues % 2 == 1); 2776 2777 if (old_num_queues == new_num_queues) { 2778 return; 2779 } 2780 2781 /* 2782 * We always need to remove and add ctrl vq if 2783 * old_num_queues != new_num_queues. Remove ctrl_vq first, 2784 * and then we only enter one of the following two loops. 2785 */ 2786 virtio_del_queue(vdev, old_num_queues - 1); 2787 2788 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) { 2789 /* new_num_queues < old_num_queues */ 2790 virtio_net_del_queue(n, i / 2); 2791 } 2792 2793 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) { 2794 /* new_num_queues > old_num_queues */ 2795 virtio_net_add_queue(n, i / 2); 2796 } 2797 2798 /* add ctrl_vq last */ 2799 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 2800 } 2801 2802 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) 2803 { 2804 int max = multiqueue ? n->max_queue_pairs : 1; 2805 2806 n->multiqueue = multiqueue; 2807 virtio_net_change_num_queue_pairs(n, max); 2808 2809 virtio_net_set_queue_pairs(n); 2810 } 2811 2812 static int virtio_net_post_load_device(void *opaque, int version_id) 2813 { 2814 VirtIONet *n = opaque; 2815 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2816 int i, link_down; 2817 2818 trace_virtio_net_post_load_device(); 2819 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs, 2820 virtio_vdev_has_feature(vdev, 2821 VIRTIO_F_VERSION_1), 2822 virtio_vdev_has_feature(vdev, 2823 VIRTIO_NET_F_HASH_REPORT)); 2824 2825 /* MAC_TABLE_ENTRIES may be different from the saved image */ 2826 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) { 2827 n->mac_table.in_use = 0; 2828 } 2829 2830 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 2831 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n); 2832 } 2833 2834 /* 2835 * curr_guest_offloads will be later overwritten by the 2836 * virtio_set_features_nocheck call done from the virtio_load. 2837 * Here we make sure it is preserved and restored accordingly 2838 * in the virtio_net_post_load_virtio callback. 2839 */ 2840 n->saved_guest_offloads = n->curr_guest_offloads; 2841 2842 virtio_net_set_queue_pairs(n); 2843 2844 /* Find the first multicast entry in the saved MAC filter */ 2845 for (i = 0; i < n->mac_table.in_use; i++) { 2846 if (n->mac_table.macs[i * ETH_ALEN] & 1) { 2847 break; 2848 } 2849 } 2850 n->mac_table.first_multi = i; 2851 2852 /* nc.link_down can't be migrated, so infer link_down according 2853 * to link status bit in n->status */ 2854 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0; 2855 for (i = 0; i < n->max_queue_pairs; i++) { 2856 qemu_get_subqueue(n->nic, i)->link_down = link_down; 2857 } 2858 2859 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 2860 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 2861 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 2862 QEMU_CLOCK_VIRTUAL, 2863 virtio_net_announce_timer, n); 2864 if (n->announce_timer.round) { 2865 timer_mod(n->announce_timer.tm, 2866 qemu_clock_get_ms(n->announce_timer.type)); 2867 } else { 2868 qemu_announce_timer_del(&n->announce_timer, false); 2869 } 2870 } 2871 2872 if (n->rss_data.enabled) { 2873 n->rss_data.enabled_software_rss = n->rss_data.populate_hash; 2874 if (!n->rss_data.populate_hash) { 2875 if (!virtio_net_attach_epbf_rss(n)) { 2876 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 2877 warn_report("Can't post-load eBPF RSS for vhost"); 2878 } else { 2879 warn_report("Can't post-load eBPF RSS - " 2880 "fallback to software RSS"); 2881 n->rss_data.enabled_software_rss = true; 2882 } 2883 } 2884 } 2885 2886 trace_virtio_net_rss_enable(n->rss_data.hash_types, 2887 n->rss_data.indirections_len, 2888 sizeof(n->rss_data.key)); 2889 } else { 2890 trace_virtio_net_rss_disable(); 2891 } 2892 return 0; 2893 } 2894 2895 static int virtio_net_post_load_virtio(VirtIODevice *vdev) 2896 { 2897 VirtIONet *n = VIRTIO_NET(vdev); 2898 /* 2899 * The actual needed state is now in saved_guest_offloads, 2900 * see virtio_net_post_load_device for detail. 2901 * Restore it back and apply the desired offloads. 2902 */ 2903 n->curr_guest_offloads = n->saved_guest_offloads; 2904 if (peer_has_vnet_hdr(n)) { 2905 virtio_net_apply_guest_offloads(n); 2906 } 2907 2908 return 0; 2909 } 2910 2911 /* tx_waiting field of a VirtIONetQueue */ 2912 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = { 2913 .name = "virtio-net-queue-tx_waiting", 2914 .fields = (VMStateField[]) { 2915 VMSTATE_UINT32(tx_waiting, VirtIONetQueue), 2916 VMSTATE_END_OF_LIST() 2917 }, 2918 }; 2919 2920 static bool max_queue_pairs_gt_1(void *opaque, int version_id) 2921 { 2922 return VIRTIO_NET(opaque)->max_queue_pairs > 1; 2923 } 2924 2925 static bool has_ctrl_guest_offloads(void *opaque, int version_id) 2926 { 2927 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque), 2928 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 2929 } 2930 2931 static bool mac_table_fits(void *opaque, int version_id) 2932 { 2933 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES; 2934 } 2935 2936 static bool mac_table_doesnt_fit(void *opaque, int version_id) 2937 { 2938 return !mac_table_fits(opaque, version_id); 2939 } 2940 2941 /* This temporary type is shared by all the WITH_TMP methods 2942 * although only some fields are used by each. 2943 */ 2944 struct VirtIONetMigTmp { 2945 VirtIONet *parent; 2946 VirtIONetQueue *vqs_1; 2947 uint16_t curr_queue_pairs_1; 2948 uint8_t has_ufo; 2949 uint32_t has_vnet_hdr; 2950 }; 2951 2952 /* The 2nd and subsequent tx_waiting flags are loaded later than 2953 * the 1st entry in the queue_pairs and only if there's more than one 2954 * entry. We use the tmp mechanism to calculate a temporary 2955 * pointer and count and also validate the count. 2956 */ 2957 2958 static int virtio_net_tx_waiting_pre_save(void *opaque) 2959 { 2960 struct VirtIONetMigTmp *tmp = opaque; 2961 2962 tmp->vqs_1 = tmp->parent->vqs + 1; 2963 tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1; 2964 if (tmp->parent->curr_queue_pairs == 0) { 2965 tmp->curr_queue_pairs_1 = 0; 2966 } 2967 2968 return 0; 2969 } 2970 2971 static int virtio_net_tx_waiting_pre_load(void *opaque) 2972 { 2973 struct VirtIONetMigTmp *tmp = opaque; 2974 2975 /* Reuse the pointer setup from save */ 2976 virtio_net_tx_waiting_pre_save(opaque); 2977 2978 if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) { 2979 error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x", 2980 tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs); 2981 2982 return -EINVAL; 2983 } 2984 2985 return 0; /* all good */ 2986 } 2987 2988 static const VMStateDescription vmstate_virtio_net_tx_waiting = { 2989 .name = "virtio-net-tx_waiting", 2990 .pre_load = virtio_net_tx_waiting_pre_load, 2991 .pre_save = virtio_net_tx_waiting_pre_save, 2992 .fields = (VMStateField[]) { 2993 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp, 2994 curr_queue_pairs_1, 2995 vmstate_virtio_net_queue_tx_waiting, 2996 struct VirtIONetQueue), 2997 VMSTATE_END_OF_LIST() 2998 }, 2999 }; 3000 3001 /* the 'has_ufo' flag is just tested; if the incoming stream has the 3002 * flag set we need to check that we have it 3003 */ 3004 static int virtio_net_ufo_post_load(void *opaque, int version_id) 3005 { 3006 struct VirtIONetMigTmp *tmp = opaque; 3007 3008 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) { 3009 error_report("virtio-net: saved image requires TUN_F_UFO support"); 3010 return -EINVAL; 3011 } 3012 3013 return 0; 3014 } 3015 3016 static int virtio_net_ufo_pre_save(void *opaque) 3017 { 3018 struct VirtIONetMigTmp *tmp = opaque; 3019 3020 tmp->has_ufo = tmp->parent->has_ufo; 3021 3022 return 0; 3023 } 3024 3025 static const VMStateDescription vmstate_virtio_net_has_ufo = { 3026 .name = "virtio-net-ufo", 3027 .post_load = virtio_net_ufo_post_load, 3028 .pre_save = virtio_net_ufo_pre_save, 3029 .fields = (VMStateField[]) { 3030 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp), 3031 VMSTATE_END_OF_LIST() 3032 }, 3033 }; 3034 3035 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the 3036 * flag set we need to check that we have it 3037 */ 3038 static int virtio_net_vnet_post_load(void *opaque, int version_id) 3039 { 3040 struct VirtIONetMigTmp *tmp = opaque; 3041 3042 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) { 3043 error_report("virtio-net: saved image requires vnet_hdr=on"); 3044 return -EINVAL; 3045 } 3046 3047 return 0; 3048 } 3049 3050 static int virtio_net_vnet_pre_save(void *opaque) 3051 { 3052 struct VirtIONetMigTmp *tmp = opaque; 3053 3054 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr; 3055 3056 return 0; 3057 } 3058 3059 static const VMStateDescription vmstate_virtio_net_has_vnet = { 3060 .name = "virtio-net-vnet", 3061 .post_load = virtio_net_vnet_post_load, 3062 .pre_save = virtio_net_vnet_pre_save, 3063 .fields = (VMStateField[]) { 3064 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp), 3065 VMSTATE_END_OF_LIST() 3066 }, 3067 }; 3068 3069 static bool virtio_net_rss_needed(void *opaque) 3070 { 3071 return VIRTIO_NET(opaque)->rss_data.enabled; 3072 } 3073 3074 static const VMStateDescription vmstate_virtio_net_rss = { 3075 .name = "virtio-net-device/rss", 3076 .version_id = 1, 3077 .minimum_version_id = 1, 3078 .needed = virtio_net_rss_needed, 3079 .fields = (VMStateField[]) { 3080 VMSTATE_BOOL(rss_data.enabled, VirtIONet), 3081 VMSTATE_BOOL(rss_data.redirect, VirtIONet), 3082 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet), 3083 VMSTATE_UINT32(rss_data.hash_types, VirtIONet), 3084 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet), 3085 VMSTATE_UINT16(rss_data.default_queue, VirtIONet), 3086 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet, 3087 VIRTIO_NET_RSS_MAX_KEY_SIZE), 3088 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet, 3089 rss_data.indirections_len, 0, 3090 vmstate_info_uint16, uint16_t), 3091 VMSTATE_END_OF_LIST() 3092 }, 3093 }; 3094 3095 static const VMStateDescription vmstate_virtio_net_device = { 3096 .name = "virtio-net-device", 3097 .version_id = VIRTIO_NET_VM_VERSION, 3098 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3099 .post_load = virtio_net_post_load_device, 3100 .fields = (VMStateField[]) { 3101 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN), 3102 VMSTATE_STRUCT_POINTER(vqs, VirtIONet, 3103 vmstate_virtio_net_queue_tx_waiting, 3104 VirtIONetQueue), 3105 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet), 3106 VMSTATE_UINT16(status, VirtIONet), 3107 VMSTATE_UINT8(promisc, VirtIONet), 3108 VMSTATE_UINT8(allmulti, VirtIONet), 3109 VMSTATE_UINT32(mac_table.in_use, VirtIONet), 3110 3111 /* Guarded pair: If it fits we load it, else we throw it away 3112 * - can happen if source has a larger MAC table.; post-load 3113 * sets flags in this case. 3114 */ 3115 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet, 3116 0, mac_table_fits, mac_table.in_use, 3117 ETH_ALEN), 3118 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0, 3119 mac_table.in_use, ETH_ALEN), 3120 3121 /* Note: This is an array of uint32's that's always been saved as a 3122 * buffer; hold onto your endiannesses; it's actually used as a bitmap 3123 * but based on the uint. 3124 */ 3125 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3), 3126 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3127 vmstate_virtio_net_has_vnet), 3128 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet), 3129 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet), 3130 VMSTATE_UINT8(alluni, VirtIONet), 3131 VMSTATE_UINT8(nomulti, VirtIONet), 3132 VMSTATE_UINT8(nouni, VirtIONet), 3133 VMSTATE_UINT8(nobcast, VirtIONet), 3134 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3135 vmstate_virtio_net_has_ufo), 3136 VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0, 3137 vmstate_info_uint16_equal, uint16_t), 3138 VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1), 3139 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3140 vmstate_virtio_net_tx_waiting), 3141 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet, 3142 has_ctrl_guest_offloads), 3143 VMSTATE_END_OF_LIST() 3144 }, 3145 .subsections = (const VMStateDescription * []) { 3146 &vmstate_virtio_net_rss, 3147 NULL 3148 } 3149 }; 3150 3151 static NetClientInfo net_virtio_info = { 3152 .type = NET_CLIENT_DRIVER_NIC, 3153 .size = sizeof(NICState), 3154 .can_receive = virtio_net_can_receive, 3155 .receive = virtio_net_receive, 3156 .link_status_changed = virtio_net_set_link_status, 3157 .query_rx_filter = virtio_net_query_rxfilter, 3158 .announce = virtio_net_announce, 3159 }; 3160 3161 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) 3162 { 3163 VirtIONet *n = VIRTIO_NET(vdev); 3164 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3165 assert(n->vhost_started); 3166 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); 3167 } 3168 3169 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, 3170 bool mask) 3171 { 3172 VirtIONet *n = VIRTIO_NET(vdev); 3173 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3174 assert(n->vhost_started); 3175 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), 3176 vdev, idx, mask); 3177 } 3178 3179 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) 3180 { 3181 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC); 3182 3183 n->config_size = virtio_feature_get_config_size(feature_sizes, 3184 host_features); 3185 } 3186 3187 void virtio_net_set_netclient_name(VirtIONet *n, const char *name, 3188 const char *type) 3189 { 3190 /* 3191 * The name can be NULL, the netclient name will be type.x. 3192 */ 3193 assert(type != NULL); 3194 3195 g_free(n->netclient_name); 3196 g_free(n->netclient_type); 3197 n->netclient_name = g_strdup(name); 3198 n->netclient_type = g_strdup(type); 3199 } 3200 3201 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev) 3202 { 3203 HotplugHandler *hotplug_ctrl; 3204 PCIDevice *pci_dev; 3205 Error *err = NULL; 3206 3207 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3208 if (hotplug_ctrl) { 3209 pci_dev = PCI_DEVICE(dev); 3210 pci_dev->partially_hotplugged = true; 3211 hotplug_handler_unplug_request(hotplug_ctrl, dev, &err); 3212 if (err) { 3213 error_report_err(err); 3214 return false; 3215 } 3216 } else { 3217 return false; 3218 } 3219 return true; 3220 } 3221 3222 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev, 3223 Error **errp) 3224 { 3225 Error *err = NULL; 3226 HotplugHandler *hotplug_ctrl; 3227 PCIDevice *pdev = PCI_DEVICE(dev); 3228 BusState *primary_bus; 3229 3230 if (!pdev->partially_hotplugged) { 3231 return true; 3232 } 3233 primary_bus = dev->parent_bus; 3234 if (!primary_bus) { 3235 error_setg(errp, "virtio_net: couldn't find primary bus"); 3236 return false; 3237 } 3238 qdev_set_parent_bus(dev, primary_bus, &error_abort); 3239 qatomic_set(&n->failover_primary_hidden, false); 3240 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3241 if (hotplug_ctrl) { 3242 hotplug_handler_pre_plug(hotplug_ctrl, dev, &err); 3243 if (err) { 3244 goto out; 3245 } 3246 hotplug_handler_plug(hotplug_ctrl, dev, &err); 3247 } 3248 pdev->partially_hotplugged = false; 3249 3250 out: 3251 error_propagate(errp, err); 3252 return !err; 3253 } 3254 3255 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s) 3256 { 3257 bool should_be_hidden; 3258 Error *err = NULL; 3259 DeviceState *dev = failover_find_primary_device(n); 3260 3261 if (!dev) { 3262 return; 3263 } 3264 3265 should_be_hidden = qatomic_read(&n->failover_primary_hidden); 3266 3267 if (migration_in_setup(s) && !should_be_hidden) { 3268 if (failover_unplug_primary(n, dev)) { 3269 vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev); 3270 qapi_event_send_unplug_primary(dev->id); 3271 qatomic_set(&n->failover_primary_hidden, true); 3272 } else { 3273 warn_report("couldn't unplug primary device"); 3274 } 3275 } else if (migration_has_failed(s)) { 3276 /* We already unplugged the device let's plug it back */ 3277 if (!failover_replug_primary(n, dev, &err)) { 3278 if (err) { 3279 error_report_err(err); 3280 } 3281 } 3282 } 3283 } 3284 3285 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data) 3286 { 3287 MigrationState *s = data; 3288 VirtIONet *n = container_of(notifier, VirtIONet, migration_state); 3289 virtio_net_handle_migration_primary(n, s); 3290 } 3291 3292 static bool failover_hide_primary_device(DeviceListener *listener, 3293 const QDict *device_opts, 3294 bool from_json, 3295 Error **errp) 3296 { 3297 VirtIONet *n = container_of(listener, VirtIONet, primary_listener); 3298 const char *standby_id; 3299 3300 if (!device_opts) { 3301 return false; 3302 } 3303 3304 if (!qdict_haskey(device_opts, "failover_pair_id")) { 3305 return false; 3306 } 3307 3308 if (!qdict_haskey(device_opts, "id")) { 3309 error_setg(errp, "Device with failover_pair_id needs to have id"); 3310 return false; 3311 } 3312 3313 standby_id = qdict_get_str(device_opts, "failover_pair_id"); 3314 if (g_strcmp0(standby_id, n->netclient_name) != 0) { 3315 return false; 3316 } 3317 3318 /* 3319 * The hide helper can be called several times for a given device. 3320 * Check there is only one primary for a virtio-net device but 3321 * don't duplicate the qdict several times if it's called for the same 3322 * device. 3323 */ 3324 if (n->primary_opts) { 3325 const char *old, *new; 3326 /* devices with failover_pair_id always have an id */ 3327 old = qdict_get_str(n->primary_opts, "id"); 3328 new = qdict_get_str(device_opts, "id"); 3329 if (strcmp(old, new) != 0) { 3330 error_setg(errp, "Cannot attach more than one primary device to " 3331 "'%s': '%s' and '%s'", n->netclient_name, old, new); 3332 return false; 3333 } 3334 } else { 3335 n->primary_opts = qdict_clone_shallow(device_opts); 3336 n->primary_opts_from_json = from_json; 3337 } 3338 3339 /* failover_primary_hidden is set during feature negotiation */ 3340 return qatomic_read(&n->failover_primary_hidden); 3341 } 3342 3343 static void virtio_net_device_realize(DeviceState *dev, Error **errp) 3344 { 3345 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3346 VirtIONet *n = VIRTIO_NET(dev); 3347 NetClientState *nc; 3348 int i; 3349 3350 if (n->net_conf.mtu) { 3351 n->host_features |= (1ULL << VIRTIO_NET_F_MTU); 3352 } 3353 3354 if (n->net_conf.duplex_str) { 3355 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) { 3356 n->net_conf.duplex = DUPLEX_HALF; 3357 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) { 3358 n->net_conf.duplex = DUPLEX_FULL; 3359 } else { 3360 error_setg(errp, "'duplex' must be 'half' or 'full'"); 3361 return; 3362 } 3363 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3364 } else { 3365 n->net_conf.duplex = DUPLEX_UNKNOWN; 3366 } 3367 3368 if (n->net_conf.speed < SPEED_UNKNOWN) { 3369 error_setg(errp, "'speed' must be between 0 and INT_MAX"); 3370 return; 3371 } 3372 if (n->net_conf.speed >= 0) { 3373 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3374 } 3375 3376 if (n->failover) { 3377 n->primary_listener.hide_device = failover_hide_primary_device; 3378 qatomic_set(&n->failover_primary_hidden, true); 3379 device_listener_register(&n->primary_listener); 3380 n->migration_state.notify = virtio_net_migration_state_notifier; 3381 add_migration_state_change_notifier(&n->migration_state); 3382 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY); 3383 } 3384 3385 virtio_net_set_config_size(n, n->host_features); 3386 virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size); 3387 3388 /* 3389 * We set a lower limit on RX queue size to what it always was. 3390 * Guests that want a smaller ring can always resize it without 3391 * help from us (using virtio 1 and up). 3392 */ 3393 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || 3394 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || 3395 !is_power_of_2(n->net_conf.rx_queue_size)) { 3396 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " 3397 "must be a power of 2 between %d and %d.", 3398 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, 3399 VIRTQUEUE_MAX_SIZE); 3400 virtio_cleanup(vdev); 3401 return; 3402 } 3403 3404 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || 3405 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE || 3406 !is_power_of_2(n->net_conf.tx_queue_size)) { 3407 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " 3408 "must be a power of 2 between %d and %d", 3409 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, 3410 VIRTQUEUE_MAX_SIZE); 3411 virtio_cleanup(vdev); 3412 return; 3413 } 3414 3415 n->max_ncs = MAX(n->nic_conf.peers.queues, 1); 3416 3417 /* 3418 * Figure out the datapath queue pairs since the backend could 3419 * provide control queue via peers as well. 3420 */ 3421 if (n->nic_conf.peers.queues) { 3422 for (i = 0; i < n->max_ncs; i++) { 3423 if (n->nic_conf.peers.ncs[i]->is_datapath) { 3424 ++n->max_queue_pairs; 3425 } 3426 } 3427 } 3428 n->max_queue_pairs = MAX(n->max_queue_pairs, 1); 3429 3430 if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) { 3431 error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), " 3432 "must be a positive integer less than %d.", 3433 n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2); 3434 virtio_cleanup(vdev); 3435 return; 3436 } 3437 n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queue_pairs); 3438 n->curr_queue_pairs = 1; 3439 n->tx_timeout = n->net_conf.txtimer; 3440 3441 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer") 3442 && strcmp(n->net_conf.tx, "bh")) { 3443 warn_report("virtio-net: " 3444 "Unknown option tx=%s, valid options: \"timer\" \"bh\"", 3445 n->net_conf.tx); 3446 error_printf("Defaulting to \"bh\""); 3447 } 3448 3449 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), 3450 n->net_conf.tx_queue_size); 3451 3452 for (i = 0; i < n->max_queue_pairs; i++) { 3453 virtio_net_add_queue(n, i); 3454 } 3455 3456 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 3457 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr); 3458 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac)); 3459 n->status = VIRTIO_NET_S_LINK_UP; 3460 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 3461 QEMU_CLOCK_VIRTUAL, 3462 virtio_net_announce_timer, n); 3463 n->announce_timer.round = 0; 3464 3465 if (n->netclient_type) { 3466 /* 3467 * Happen when virtio_net_set_netclient_name has been called. 3468 */ 3469 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3470 n->netclient_type, n->netclient_name, n); 3471 } else { 3472 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3473 object_get_typename(OBJECT(dev)), dev->id, n); 3474 } 3475 3476 for (i = 0; i < n->max_queue_pairs; i++) { 3477 n->nic->ncs[i].do_not_pad = true; 3478 } 3479 3480 peer_test_vnet_hdr(n); 3481 if (peer_has_vnet_hdr(n)) { 3482 for (i = 0; i < n->max_queue_pairs; i++) { 3483 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true); 3484 } 3485 n->host_hdr_len = sizeof(struct virtio_net_hdr); 3486 } else { 3487 n->host_hdr_len = 0; 3488 } 3489 3490 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a); 3491 3492 n->vqs[0].tx_waiting = 0; 3493 n->tx_burst = n->net_conf.txburst; 3494 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0); 3495 n->promisc = 1; /* for compatibility */ 3496 3497 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 3498 3499 n->vlans = g_malloc0(MAX_VLAN >> 3); 3500 3501 nc = qemu_get_queue(n->nic); 3502 nc->rxfilter_notify_enabled = 1; 3503 3504 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 3505 struct virtio_net_config netcfg = {}; 3506 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN); 3507 vhost_net_set_config(get_vhost_net(nc->peer), 3508 (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER); 3509 } 3510 QTAILQ_INIT(&n->rsc_chains); 3511 n->qdev = dev; 3512 3513 net_rx_pkt_init(&n->rx_pkt, false); 3514 3515 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3516 virtio_net_load_ebpf(n); 3517 } 3518 } 3519 3520 static void virtio_net_device_unrealize(DeviceState *dev) 3521 { 3522 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3523 VirtIONet *n = VIRTIO_NET(dev); 3524 int i, max_queue_pairs; 3525 3526 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3527 virtio_net_unload_ebpf(n); 3528 } 3529 3530 /* This will stop vhost backend if appropriate. */ 3531 virtio_net_set_status(vdev, 0); 3532 3533 g_free(n->netclient_name); 3534 n->netclient_name = NULL; 3535 g_free(n->netclient_type); 3536 n->netclient_type = NULL; 3537 3538 g_free(n->mac_table.macs); 3539 g_free(n->vlans); 3540 3541 if (n->failover) { 3542 qobject_unref(n->primary_opts); 3543 device_listener_unregister(&n->primary_listener); 3544 remove_migration_state_change_notifier(&n->migration_state); 3545 } else { 3546 assert(n->primary_opts == NULL); 3547 } 3548 3549 max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 3550 for (i = 0; i < max_queue_pairs; i++) { 3551 virtio_net_del_queue(n, i); 3552 } 3553 /* delete also control vq */ 3554 virtio_del_queue(vdev, max_queue_pairs * 2); 3555 qemu_announce_timer_del(&n->announce_timer, false); 3556 g_free(n->vqs); 3557 qemu_del_nic(n->nic); 3558 virtio_net_rsc_cleanup(n); 3559 g_free(n->rss_data.indirections_table); 3560 net_rx_pkt_uninit(n->rx_pkt); 3561 virtio_cleanup(vdev); 3562 } 3563 3564 static void virtio_net_instance_init(Object *obj) 3565 { 3566 VirtIONet *n = VIRTIO_NET(obj); 3567 3568 /* 3569 * The default config_size is sizeof(struct virtio_net_config). 3570 * Can be overriden with virtio_net_set_config_size. 3571 */ 3572 n->config_size = sizeof(struct virtio_net_config); 3573 device_add_bootindex_property(obj, &n->nic_conf.bootindex, 3574 "bootindex", "/ethernet-phy@0", 3575 DEVICE(n)); 3576 3577 ebpf_rss_init(&n->ebpf_rss); 3578 } 3579 3580 static int virtio_net_pre_save(void *opaque) 3581 { 3582 VirtIONet *n = opaque; 3583 3584 /* At this point, backend must be stopped, otherwise 3585 * it might keep writing to memory. */ 3586 assert(!n->vhost_started); 3587 3588 return 0; 3589 } 3590 3591 static bool primary_unplug_pending(void *opaque) 3592 { 3593 DeviceState *dev = opaque; 3594 DeviceState *primary; 3595 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3596 VirtIONet *n = VIRTIO_NET(vdev); 3597 3598 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 3599 return false; 3600 } 3601 primary = failover_find_primary_device(n); 3602 return primary ? primary->pending_deleted_event : false; 3603 } 3604 3605 static bool dev_unplug_pending(void *opaque) 3606 { 3607 DeviceState *dev = opaque; 3608 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 3609 3610 return vdc->primary_unplug_pending(dev); 3611 } 3612 3613 static const VMStateDescription vmstate_virtio_net = { 3614 .name = "virtio-net", 3615 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3616 .version_id = VIRTIO_NET_VM_VERSION, 3617 .fields = (VMStateField[]) { 3618 VMSTATE_VIRTIO_DEVICE, 3619 VMSTATE_END_OF_LIST() 3620 }, 3621 .pre_save = virtio_net_pre_save, 3622 .dev_unplug_pending = dev_unplug_pending, 3623 }; 3624 3625 static Property virtio_net_properties[] = { 3626 DEFINE_PROP_BIT64("csum", VirtIONet, host_features, 3627 VIRTIO_NET_F_CSUM, true), 3628 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features, 3629 VIRTIO_NET_F_GUEST_CSUM, true), 3630 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), 3631 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features, 3632 VIRTIO_NET_F_GUEST_TSO4, true), 3633 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features, 3634 VIRTIO_NET_F_GUEST_TSO6, true), 3635 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features, 3636 VIRTIO_NET_F_GUEST_ECN, true), 3637 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features, 3638 VIRTIO_NET_F_GUEST_UFO, true), 3639 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features, 3640 VIRTIO_NET_F_GUEST_ANNOUNCE, true), 3641 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features, 3642 VIRTIO_NET_F_HOST_TSO4, true), 3643 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features, 3644 VIRTIO_NET_F_HOST_TSO6, true), 3645 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features, 3646 VIRTIO_NET_F_HOST_ECN, true), 3647 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features, 3648 VIRTIO_NET_F_HOST_UFO, true), 3649 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features, 3650 VIRTIO_NET_F_MRG_RXBUF, true), 3651 DEFINE_PROP_BIT64("status", VirtIONet, host_features, 3652 VIRTIO_NET_F_STATUS, true), 3653 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features, 3654 VIRTIO_NET_F_CTRL_VQ, true), 3655 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features, 3656 VIRTIO_NET_F_CTRL_RX, true), 3657 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features, 3658 VIRTIO_NET_F_CTRL_VLAN, true), 3659 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features, 3660 VIRTIO_NET_F_CTRL_RX_EXTRA, true), 3661 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features, 3662 VIRTIO_NET_F_CTRL_MAC_ADDR, true), 3663 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features, 3664 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), 3665 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), 3666 DEFINE_PROP_BIT64("rss", VirtIONet, host_features, 3667 VIRTIO_NET_F_RSS, false), 3668 DEFINE_PROP_BIT64("hash", VirtIONet, host_features, 3669 VIRTIO_NET_F_HASH_REPORT, false), 3670 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features, 3671 VIRTIO_NET_F_RSC_EXT, false), 3672 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout, 3673 VIRTIO_NET_RSC_DEFAULT_INTERVAL), 3674 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf), 3675 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer, 3676 TX_TIMER_INTERVAL), 3677 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), 3678 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), 3679 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, 3680 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), 3681 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, 3682 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), 3683 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), 3684 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, 3685 true), 3686 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN), 3687 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str), 3688 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false), 3689 DEFINE_PROP_END_OF_LIST(), 3690 }; 3691 3692 static void virtio_net_class_init(ObjectClass *klass, void *data) 3693 { 3694 DeviceClass *dc = DEVICE_CLASS(klass); 3695 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 3696 3697 device_class_set_props(dc, virtio_net_properties); 3698 dc->vmsd = &vmstate_virtio_net; 3699 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 3700 vdc->realize = virtio_net_device_realize; 3701 vdc->unrealize = virtio_net_device_unrealize; 3702 vdc->get_config = virtio_net_get_config; 3703 vdc->set_config = virtio_net_set_config; 3704 vdc->get_features = virtio_net_get_features; 3705 vdc->set_features = virtio_net_set_features; 3706 vdc->bad_features = virtio_net_bad_features; 3707 vdc->reset = virtio_net_reset; 3708 vdc->set_status = virtio_net_set_status; 3709 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; 3710 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; 3711 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO); 3712 vdc->post_load = virtio_net_post_load_virtio; 3713 vdc->vmsd = &vmstate_virtio_net_device; 3714 vdc->primary_unplug_pending = primary_unplug_pending; 3715 } 3716 3717 static const TypeInfo virtio_net_info = { 3718 .name = TYPE_VIRTIO_NET, 3719 .parent = TYPE_VIRTIO_DEVICE, 3720 .instance_size = sizeof(VirtIONet), 3721 .instance_init = virtio_net_instance_init, 3722 .class_init = virtio_net_class_init, 3723 }; 3724 3725 static void virtio_register_types(void) 3726 { 3727 type_register_static(&virtio_net_info); 3728 } 3729 3730 type_init(virtio_register_types) 3731