1 /* 2 * Virtio Network Device 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qemu/atomic.h" 16 #include "qemu/iov.h" 17 #include "qemu/log.h" 18 #include "qemu/main-loop.h" 19 #include "qemu/module.h" 20 #include "hw/virtio/virtio.h" 21 #include "net/net.h" 22 #include "net/checksum.h" 23 #include "net/tap.h" 24 #include "qemu/error-report.h" 25 #include "qemu/timer.h" 26 #include "qemu/option.h" 27 #include "qemu/option_int.h" 28 #include "qemu/config-file.h" 29 #include "qapi/qmp/qdict.h" 30 #include "hw/virtio/virtio-net.h" 31 #include "net/vhost_net.h" 32 #include "net/announce.h" 33 #include "hw/virtio/virtio-bus.h" 34 #include "qapi/error.h" 35 #include "qapi/qapi-events-net.h" 36 #include "hw/qdev-properties.h" 37 #include "qapi/qapi-types-migration.h" 38 #include "qapi/qapi-events-migration.h" 39 #include "hw/virtio/virtio-access.h" 40 #include "migration/misc.h" 41 #include "standard-headers/linux/ethtool.h" 42 #include "sysemu/sysemu.h" 43 #include "trace.h" 44 #include "monitor/qdev.h" 45 #include "hw/pci/pci.h" 46 #include "net_rx_pkt.h" 47 #include "hw/virtio/vhost.h" 48 #include "sysemu/qtest.h" 49 50 #define VIRTIO_NET_VM_VERSION 11 51 52 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ 53 54 /* previously fixed value */ 55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 57 58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */ 59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 61 62 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ 63 64 #define VIRTIO_NET_TCP_FLAG 0x3F 65 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000 66 67 /* IPv4 max payload, 16 bits in the header */ 68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header)) 69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535 70 71 /* header length value in ip header without option */ 72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5 73 74 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ 75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD 76 77 /* Purge coalesced packets timer interval, This value affects the performance 78 a lot, and should be tuned carefully, '300000'(300us) is the recommended 79 value to pass the WHQL test, '50000' can gain 2x netperf throughput with 80 tso/gso/gro 'off'. */ 81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000 82 83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \ 84 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \ 85 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \ 86 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \ 87 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \ 88 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \ 89 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \ 90 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \ 91 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) 92 93 static const VirtIOFeature feature_sizes[] = { 94 {.flags = 1ULL << VIRTIO_NET_F_MAC, 95 .end = endof(struct virtio_net_config, mac)}, 96 {.flags = 1ULL << VIRTIO_NET_F_STATUS, 97 .end = endof(struct virtio_net_config, status)}, 98 {.flags = 1ULL << VIRTIO_NET_F_MQ, 99 .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, 100 {.flags = 1ULL << VIRTIO_NET_F_MTU, 101 .end = endof(struct virtio_net_config, mtu)}, 102 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, 103 .end = endof(struct virtio_net_config, duplex)}, 104 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT), 105 .end = endof(struct virtio_net_config, supported_hash_types)}, 106 {} 107 }; 108 109 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc) 110 { 111 VirtIONet *n = qemu_get_nic_opaque(nc); 112 113 return &n->vqs[nc->queue_index]; 114 } 115 116 static int vq2q(int queue_index) 117 { 118 return queue_index / 2; 119 } 120 121 /* TODO 122 * - we could suppress RX interrupt if we were so inclined. 123 */ 124 125 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) 126 { 127 VirtIONet *n = VIRTIO_NET(vdev); 128 struct virtio_net_config netcfg; 129 NetClientState *nc = qemu_get_queue(n->nic); 130 static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } }; 131 132 int ret = 0; 133 memset(&netcfg, 0 , sizeof(struct virtio_net_config)); 134 virtio_stw_p(vdev, &netcfg.status, n->status); 135 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs); 136 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); 137 memcpy(netcfg.mac, n->mac, ETH_ALEN); 138 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); 139 netcfg.duplex = n->net_conf.duplex; 140 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE; 141 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length, 142 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ? 143 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1); 144 virtio_stl_p(vdev, &netcfg.supported_hash_types, 145 VIRTIO_NET_RSS_SUPPORTED_HASHES); 146 memcpy(config, &netcfg, n->config_size); 147 148 /* 149 * Is this VDPA? No peer means not VDPA: there's no way to 150 * disconnect/reconnect a VDPA peer. 151 */ 152 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 153 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg, 154 n->config_size); 155 if (ret != -1) { 156 /* 157 * Some NIC/kernel combinations present 0 as the mac address. As 158 * that is not a legal address, try to proceed with the 159 * address from the QEMU command line in the hope that the 160 * address has been configured correctly elsewhere - just not 161 * reported by the device. 162 */ 163 if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) { 164 info_report("Zero hardware mac address detected. Ignoring."); 165 memcpy(netcfg.mac, n->mac, ETH_ALEN); 166 } 167 memcpy(config, &netcfg, n->config_size); 168 } 169 } 170 } 171 172 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) 173 { 174 VirtIONet *n = VIRTIO_NET(vdev); 175 struct virtio_net_config netcfg = {}; 176 NetClientState *nc = qemu_get_queue(n->nic); 177 178 memcpy(&netcfg, config, n->config_size); 179 180 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) && 181 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) && 182 memcmp(netcfg.mac, n->mac, ETH_ALEN)) { 183 memcpy(n->mac, netcfg.mac, ETH_ALEN); 184 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 185 } 186 187 /* 188 * Is this VDPA? No peer means not VDPA: there's no way to 189 * disconnect/reconnect a VDPA peer. 190 */ 191 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 192 vhost_net_set_config(get_vhost_net(nc->peer), 193 (uint8_t *)&netcfg, 0, n->config_size, 194 VHOST_SET_CONFIG_TYPE_MASTER); 195 } 196 } 197 198 static bool virtio_net_started(VirtIONet *n, uint8_t status) 199 { 200 VirtIODevice *vdev = VIRTIO_DEVICE(n); 201 return (status & VIRTIO_CONFIG_S_DRIVER_OK) && 202 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running; 203 } 204 205 static void virtio_net_announce_notify(VirtIONet *net) 206 { 207 VirtIODevice *vdev = VIRTIO_DEVICE(net); 208 trace_virtio_net_announce_notify(); 209 210 net->status |= VIRTIO_NET_S_ANNOUNCE; 211 virtio_notify_config(vdev); 212 } 213 214 static void virtio_net_announce_timer(void *opaque) 215 { 216 VirtIONet *n = opaque; 217 trace_virtio_net_announce_timer(n->announce_timer.round); 218 219 n->announce_timer.round--; 220 virtio_net_announce_notify(n); 221 } 222 223 static void virtio_net_announce(NetClientState *nc) 224 { 225 VirtIONet *n = qemu_get_nic_opaque(nc); 226 VirtIODevice *vdev = VIRTIO_DEVICE(n); 227 228 /* 229 * Make sure the virtio migration announcement timer isn't running 230 * If it is, let it trigger announcement so that we do not cause 231 * confusion. 232 */ 233 if (n->announce_timer.round) { 234 return; 235 } 236 237 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 238 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 239 virtio_net_announce_notify(n); 240 } 241 } 242 243 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) 244 { 245 VirtIODevice *vdev = VIRTIO_DEVICE(n); 246 NetClientState *nc = qemu_get_queue(n->nic); 247 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 248 int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? 249 n->max_ncs - n->max_queue_pairs : 0; 250 251 if (!get_vhost_net(nc->peer)) { 252 return; 253 } 254 255 if ((virtio_net_started(n, status) && !nc->peer->link_down) == 256 !!n->vhost_started) { 257 return; 258 } 259 if (!n->vhost_started) { 260 int r, i; 261 262 if (n->needs_vnet_hdr_swap) { 263 error_report("backend does not support %s vnet headers; " 264 "falling back on userspace virtio", 265 virtio_is_big_endian(vdev) ? "BE" : "LE"); 266 return; 267 } 268 269 /* Any packets outstanding? Purge them to avoid touching rings 270 * when vhost is running. 271 */ 272 for (i = 0; i < queue_pairs; i++) { 273 NetClientState *qnc = qemu_get_subqueue(n->nic, i); 274 275 /* Purge both directions: TX and RX. */ 276 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc); 277 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer); 278 } 279 280 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) { 281 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu); 282 if (r < 0) { 283 error_report("%uBytes MTU not supported by the backend", 284 n->net_conf.mtu); 285 286 return; 287 } 288 } 289 290 n->vhost_started = 1; 291 r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq); 292 if (r < 0) { 293 error_report("unable to start vhost net: %d: " 294 "falling back on userspace virtio", -r); 295 n->vhost_started = 0; 296 } 297 } else { 298 vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq); 299 n->vhost_started = 0; 300 } 301 } 302 303 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev, 304 NetClientState *peer, 305 bool enable) 306 { 307 if (virtio_is_big_endian(vdev)) { 308 return qemu_set_vnet_be(peer, enable); 309 } else { 310 return qemu_set_vnet_le(peer, enable); 311 } 312 } 313 314 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs, 315 int queue_pairs, bool enable) 316 { 317 int i; 318 319 for (i = 0; i < queue_pairs; i++) { 320 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 && 321 enable) { 322 while (--i >= 0) { 323 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false); 324 } 325 326 return true; 327 } 328 } 329 330 return false; 331 } 332 333 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status) 334 { 335 VirtIODevice *vdev = VIRTIO_DEVICE(n); 336 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 337 338 if (virtio_net_started(n, status)) { 339 /* Before using the device, we tell the network backend about the 340 * endianness to use when parsing vnet headers. If the backend 341 * can't do it, we fallback onto fixing the headers in the core 342 * virtio-net code. 343 */ 344 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs, 345 queue_pairs, true); 346 } else if (virtio_net_started(n, vdev->status)) { 347 /* After using the device, we need to reset the network backend to 348 * the default (guest native endianness), otherwise the guest may 349 * lose network connectivity if it is rebooted into a different 350 * endianness. 351 */ 352 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false); 353 } 354 } 355 356 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq) 357 { 358 unsigned int dropped = virtqueue_drop_all(vq); 359 if (dropped) { 360 virtio_notify(vdev, vq); 361 } 362 } 363 364 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) 365 { 366 VirtIONet *n = VIRTIO_NET(vdev); 367 VirtIONetQueue *q; 368 int i; 369 uint8_t queue_status; 370 371 virtio_net_vnet_endian_status(n, status); 372 virtio_net_vhost_status(n, status); 373 374 for (i = 0; i < n->max_queue_pairs; i++) { 375 NetClientState *ncs = qemu_get_subqueue(n->nic, i); 376 bool queue_started; 377 q = &n->vqs[i]; 378 379 if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) { 380 queue_status = 0; 381 } else { 382 queue_status = status; 383 } 384 queue_started = 385 virtio_net_started(n, queue_status) && !n->vhost_started; 386 387 if (queue_started) { 388 qemu_flush_queued_packets(ncs); 389 } 390 391 if (!q->tx_waiting) { 392 continue; 393 } 394 395 if (queue_started) { 396 if (q->tx_timer) { 397 timer_mod(q->tx_timer, 398 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 399 } else { 400 qemu_bh_schedule(q->tx_bh); 401 } 402 } else { 403 if (q->tx_timer) { 404 timer_del(q->tx_timer); 405 } else { 406 qemu_bh_cancel(q->tx_bh); 407 } 408 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 && 409 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) && 410 vdev->vm_running) { 411 /* if tx is waiting we are likely have some packets in tx queue 412 * and disabled notification */ 413 q->tx_waiting = 0; 414 virtio_queue_set_notification(q->tx_vq, 1); 415 virtio_net_drop_tx_queue_data(vdev, q->tx_vq); 416 } 417 } 418 } 419 } 420 421 static void virtio_net_set_link_status(NetClientState *nc) 422 { 423 VirtIONet *n = qemu_get_nic_opaque(nc); 424 VirtIODevice *vdev = VIRTIO_DEVICE(n); 425 uint16_t old_status = n->status; 426 427 if (nc->link_down) 428 n->status &= ~VIRTIO_NET_S_LINK_UP; 429 else 430 n->status |= VIRTIO_NET_S_LINK_UP; 431 432 if (n->status != old_status) 433 virtio_notify_config(vdev); 434 435 virtio_net_set_status(vdev, vdev->status); 436 } 437 438 static void rxfilter_notify(NetClientState *nc) 439 { 440 VirtIONet *n = qemu_get_nic_opaque(nc); 441 442 if (nc->rxfilter_notify_enabled) { 443 char *path = object_get_canonical_path(OBJECT(n->qdev)); 444 qapi_event_send_nic_rx_filter_changed(!!n->netclient_name, 445 n->netclient_name, path); 446 g_free(path); 447 448 /* disable event notification to avoid events flooding */ 449 nc->rxfilter_notify_enabled = 0; 450 } 451 } 452 453 static intList *get_vlan_table(VirtIONet *n) 454 { 455 intList *list; 456 int i, j; 457 458 list = NULL; 459 for (i = 0; i < MAX_VLAN >> 5; i++) { 460 for (j = 0; n->vlans[i] && j <= 0x1f; j++) { 461 if (n->vlans[i] & (1U << j)) { 462 QAPI_LIST_PREPEND(list, (i << 5) + j); 463 } 464 } 465 } 466 467 return list; 468 } 469 470 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc) 471 { 472 VirtIONet *n = qemu_get_nic_opaque(nc); 473 VirtIODevice *vdev = VIRTIO_DEVICE(n); 474 RxFilterInfo *info; 475 strList *str_list; 476 int i; 477 478 info = g_malloc0(sizeof(*info)); 479 info->name = g_strdup(nc->name); 480 info->promiscuous = n->promisc; 481 482 if (n->nouni) { 483 info->unicast = RX_STATE_NONE; 484 } else if (n->alluni) { 485 info->unicast = RX_STATE_ALL; 486 } else { 487 info->unicast = RX_STATE_NORMAL; 488 } 489 490 if (n->nomulti) { 491 info->multicast = RX_STATE_NONE; 492 } else if (n->allmulti) { 493 info->multicast = RX_STATE_ALL; 494 } else { 495 info->multicast = RX_STATE_NORMAL; 496 } 497 498 info->broadcast_allowed = n->nobcast; 499 info->multicast_overflow = n->mac_table.multi_overflow; 500 info->unicast_overflow = n->mac_table.uni_overflow; 501 502 info->main_mac = qemu_mac_strdup_printf(n->mac); 503 504 str_list = NULL; 505 for (i = 0; i < n->mac_table.first_multi; i++) { 506 QAPI_LIST_PREPEND(str_list, 507 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 508 } 509 info->unicast_table = str_list; 510 511 str_list = NULL; 512 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 513 QAPI_LIST_PREPEND(str_list, 514 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 515 } 516 info->multicast_table = str_list; 517 info->vlan_table = get_vlan_table(n); 518 519 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) { 520 info->vlan = RX_STATE_ALL; 521 } else if (!info->vlan_table) { 522 info->vlan = RX_STATE_NONE; 523 } else { 524 info->vlan = RX_STATE_NORMAL; 525 } 526 527 /* enable event notification after query */ 528 nc->rxfilter_notify_enabled = 1; 529 530 return info; 531 } 532 533 static void virtio_net_reset(VirtIODevice *vdev) 534 { 535 VirtIONet *n = VIRTIO_NET(vdev); 536 int i; 537 538 /* Reset back to compatibility mode */ 539 n->promisc = 1; 540 n->allmulti = 0; 541 n->alluni = 0; 542 n->nomulti = 0; 543 n->nouni = 0; 544 n->nobcast = 0; 545 /* multiqueue is disabled by default */ 546 n->curr_queue_pairs = 1; 547 timer_del(n->announce_timer.tm); 548 n->announce_timer.round = 0; 549 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 550 551 /* Flush any MAC and VLAN filter table state */ 552 n->mac_table.in_use = 0; 553 n->mac_table.first_multi = 0; 554 n->mac_table.multi_overflow = 0; 555 n->mac_table.uni_overflow = 0; 556 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN); 557 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac)); 558 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 559 memset(n->vlans, 0, MAX_VLAN >> 3); 560 561 /* Flush any async TX */ 562 for (i = 0; i < n->max_queue_pairs; i++) { 563 NetClientState *nc = qemu_get_subqueue(n->nic, i); 564 565 if (nc->peer) { 566 qemu_flush_or_purge_queued_packets(nc->peer, true); 567 assert(!virtio_net_get_subqueue(nc)->async_tx.elem); 568 } 569 } 570 } 571 572 static void peer_test_vnet_hdr(VirtIONet *n) 573 { 574 NetClientState *nc = qemu_get_queue(n->nic); 575 if (!nc->peer) { 576 return; 577 } 578 579 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer); 580 } 581 582 static int peer_has_vnet_hdr(VirtIONet *n) 583 { 584 return n->has_vnet_hdr; 585 } 586 587 static int peer_has_ufo(VirtIONet *n) 588 { 589 if (!peer_has_vnet_hdr(n)) 590 return 0; 591 592 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer); 593 594 return n->has_ufo; 595 } 596 597 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, 598 int version_1, int hash_report) 599 { 600 int i; 601 NetClientState *nc; 602 603 n->mergeable_rx_bufs = mergeable_rx_bufs; 604 605 if (version_1) { 606 n->guest_hdr_len = hash_report ? 607 sizeof(struct virtio_net_hdr_v1_hash) : 608 sizeof(struct virtio_net_hdr_mrg_rxbuf); 609 n->rss_data.populate_hash = !!hash_report; 610 } else { 611 n->guest_hdr_len = n->mergeable_rx_bufs ? 612 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 613 sizeof(struct virtio_net_hdr); 614 } 615 616 for (i = 0; i < n->max_queue_pairs; i++) { 617 nc = qemu_get_subqueue(n->nic, i); 618 619 if (peer_has_vnet_hdr(n) && 620 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) { 621 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len); 622 n->host_hdr_len = n->guest_hdr_len; 623 } 624 } 625 } 626 627 static int virtio_net_max_tx_queue_size(VirtIONet *n) 628 { 629 NetClientState *peer = n->nic_conf.peers.ncs[0]; 630 631 /* 632 * Backends other than vhost-user or vhost-vdpa don't support max queue 633 * size. 634 */ 635 if (!peer) { 636 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 637 } 638 639 switch(peer->info->type) { 640 case NET_CLIENT_DRIVER_VHOST_USER: 641 case NET_CLIENT_DRIVER_VHOST_VDPA: 642 return VIRTQUEUE_MAX_SIZE; 643 default: 644 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 645 }; 646 } 647 648 static int peer_attach(VirtIONet *n, int index) 649 { 650 NetClientState *nc = qemu_get_subqueue(n->nic, index); 651 652 if (!nc->peer) { 653 return 0; 654 } 655 656 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 657 vhost_set_vring_enable(nc->peer, 1); 658 } 659 660 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 661 return 0; 662 } 663 664 if (n->max_queue_pairs == 1) { 665 return 0; 666 } 667 668 return tap_enable(nc->peer); 669 } 670 671 static int peer_detach(VirtIONet *n, int index) 672 { 673 NetClientState *nc = qemu_get_subqueue(n->nic, index); 674 675 if (!nc->peer) { 676 return 0; 677 } 678 679 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 680 vhost_set_vring_enable(nc->peer, 0); 681 } 682 683 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 684 return 0; 685 } 686 687 return tap_disable(nc->peer); 688 } 689 690 static void virtio_net_set_queue_pairs(VirtIONet *n) 691 { 692 int i; 693 int r; 694 695 if (n->nic->peer_deleted) { 696 return; 697 } 698 699 for (i = 0; i < n->max_queue_pairs; i++) { 700 if (i < n->curr_queue_pairs) { 701 r = peer_attach(n, i); 702 assert(!r); 703 } else { 704 r = peer_detach(n, i); 705 assert(!r); 706 } 707 } 708 } 709 710 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue); 711 712 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, 713 Error **errp) 714 { 715 VirtIONet *n = VIRTIO_NET(vdev); 716 NetClientState *nc = qemu_get_queue(n->nic); 717 718 /* Firstly sync all virtio-net possible supported features */ 719 features |= n->host_features; 720 721 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 722 723 if (!peer_has_vnet_hdr(n)) { 724 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); 725 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); 726 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); 727 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); 728 729 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); 730 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); 731 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); 732 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); 733 734 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); 735 } 736 737 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { 738 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); 739 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); 740 } 741 742 if (!get_vhost_net(nc->peer)) { 743 return features; 744 } 745 746 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 747 virtio_clear_feature(&features, VIRTIO_NET_F_RSS); 748 } 749 features = vhost_net_get_features(get_vhost_net(nc->peer), features); 750 vdev->backend_features = features; 751 752 if (n->mtu_bypass_backend && 753 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { 754 features |= (1ULL << VIRTIO_NET_F_MTU); 755 } 756 757 return features; 758 } 759 760 static uint64_t virtio_net_bad_features(VirtIODevice *vdev) 761 { 762 uint64_t features = 0; 763 764 /* Linux kernel 2.6.25. It understood MAC (as everyone must), 765 * but also these: */ 766 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 767 virtio_add_feature(&features, VIRTIO_NET_F_CSUM); 768 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4); 769 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6); 770 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN); 771 772 return features; 773 } 774 775 static void virtio_net_apply_guest_offloads(VirtIONet *n) 776 { 777 qemu_set_offload(qemu_get_queue(n->nic)->peer, 778 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)), 779 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)), 780 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)), 781 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)), 782 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO))); 783 } 784 785 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features) 786 { 787 static const uint64_t guest_offloads_mask = 788 (1ULL << VIRTIO_NET_F_GUEST_CSUM) | 789 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | 790 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | 791 (1ULL << VIRTIO_NET_F_GUEST_ECN) | 792 (1ULL << VIRTIO_NET_F_GUEST_UFO); 793 794 return guest_offloads_mask & features; 795 } 796 797 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) 798 { 799 VirtIODevice *vdev = VIRTIO_DEVICE(n); 800 return virtio_net_guest_offloads_by_features(vdev->guest_features); 801 } 802 803 typedef struct { 804 VirtIONet *n; 805 DeviceState *dev; 806 } FailoverDevice; 807 808 /** 809 * Set the failover primary device 810 * 811 * @opaque: FailoverId to setup 812 * @opts: opts for device we are handling 813 * @errp: returns an error if this function fails 814 */ 815 static int failover_set_primary(DeviceState *dev, void *opaque) 816 { 817 FailoverDevice *fdev = opaque; 818 PCIDevice *pci_dev = (PCIDevice *) 819 object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE); 820 821 if (!pci_dev) { 822 return 0; 823 } 824 825 if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) { 826 fdev->dev = dev; 827 return 1; 828 } 829 830 return 0; 831 } 832 833 /** 834 * Find the primary device for this failover virtio-net 835 * 836 * @n: VirtIONet device 837 * @errp: returns an error if this function fails 838 */ 839 static DeviceState *failover_find_primary_device(VirtIONet *n) 840 { 841 FailoverDevice fdev = { 842 .n = n, 843 }; 844 845 qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL, 846 NULL, NULL, &fdev); 847 return fdev.dev; 848 } 849 850 static void failover_add_primary(VirtIONet *n, Error **errp) 851 { 852 Error *err = NULL; 853 DeviceState *dev = failover_find_primary_device(n); 854 855 if (dev) { 856 return; 857 } 858 859 if (!n->primary_opts) { 860 error_setg(errp, "Primary device not found"); 861 error_append_hint(errp, "Virtio-net failover will not work. Make " 862 "sure primary device has parameter" 863 " failover_pair_id=%s\n", n->netclient_name); 864 return; 865 } 866 867 dev = qdev_device_add_from_qdict(n->primary_opts, 868 n->primary_opts_from_json, 869 &err); 870 if (err) { 871 qobject_unref(n->primary_opts); 872 n->primary_opts = NULL; 873 } else { 874 object_unref(OBJECT(dev)); 875 } 876 error_propagate(errp, err); 877 } 878 879 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) 880 { 881 VirtIONet *n = VIRTIO_NET(vdev); 882 Error *err = NULL; 883 int i; 884 885 if (n->mtu_bypass_backend && 886 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) { 887 features &= ~(1ULL << VIRTIO_NET_F_MTU); 888 } 889 890 virtio_net_set_multiqueue(n, 891 virtio_has_feature(features, VIRTIO_NET_F_RSS) || 892 virtio_has_feature(features, VIRTIO_NET_F_MQ)); 893 894 virtio_net_set_mrg_rx_bufs(n, 895 virtio_has_feature(features, 896 VIRTIO_NET_F_MRG_RXBUF), 897 virtio_has_feature(features, 898 VIRTIO_F_VERSION_1), 899 virtio_has_feature(features, 900 VIRTIO_NET_F_HASH_REPORT)); 901 902 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 903 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4); 904 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 905 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6); 906 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS); 907 908 if (n->has_vnet_hdr) { 909 n->curr_guest_offloads = 910 virtio_net_guest_offloads_by_features(features); 911 virtio_net_apply_guest_offloads(n); 912 } 913 914 for (i = 0; i < n->max_queue_pairs; i++) { 915 NetClientState *nc = qemu_get_subqueue(n->nic, i); 916 917 if (!get_vhost_net(nc->peer)) { 918 continue; 919 } 920 vhost_net_ack_features(get_vhost_net(nc->peer), features); 921 } 922 923 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { 924 memset(n->vlans, 0, MAX_VLAN >> 3); 925 } else { 926 memset(n->vlans, 0xff, MAX_VLAN >> 3); 927 } 928 929 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) { 930 qapi_event_send_failover_negotiated(n->netclient_name); 931 qatomic_set(&n->failover_primary_hidden, false); 932 failover_add_primary(n, &err); 933 if (err) { 934 if (!qtest_enabled()) { 935 warn_report_err(err); 936 } else { 937 error_free(err); 938 } 939 } 940 } 941 } 942 943 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd, 944 struct iovec *iov, unsigned int iov_cnt) 945 { 946 uint8_t on; 947 size_t s; 948 NetClientState *nc = qemu_get_queue(n->nic); 949 950 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on)); 951 if (s != sizeof(on)) { 952 return VIRTIO_NET_ERR; 953 } 954 955 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) { 956 n->promisc = on; 957 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) { 958 n->allmulti = on; 959 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) { 960 n->alluni = on; 961 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) { 962 n->nomulti = on; 963 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) { 964 n->nouni = on; 965 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) { 966 n->nobcast = on; 967 } else { 968 return VIRTIO_NET_ERR; 969 } 970 971 rxfilter_notify(nc); 972 973 return VIRTIO_NET_OK; 974 } 975 976 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd, 977 struct iovec *iov, unsigned int iov_cnt) 978 { 979 VirtIODevice *vdev = VIRTIO_DEVICE(n); 980 uint64_t offloads; 981 size_t s; 982 983 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 984 return VIRTIO_NET_ERR; 985 } 986 987 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads)); 988 if (s != sizeof(offloads)) { 989 return VIRTIO_NET_ERR; 990 } 991 992 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) { 993 uint64_t supported_offloads; 994 995 offloads = virtio_ldq_p(vdev, &offloads); 996 997 if (!n->has_vnet_hdr) { 998 return VIRTIO_NET_ERR; 999 } 1000 1001 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1002 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4); 1003 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1004 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6); 1005 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT); 1006 1007 supported_offloads = virtio_net_supported_guest_offloads(n); 1008 if (offloads & ~supported_offloads) { 1009 return VIRTIO_NET_ERR; 1010 } 1011 1012 n->curr_guest_offloads = offloads; 1013 virtio_net_apply_guest_offloads(n); 1014 1015 return VIRTIO_NET_OK; 1016 } else { 1017 return VIRTIO_NET_ERR; 1018 } 1019 } 1020 1021 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd, 1022 struct iovec *iov, unsigned int iov_cnt) 1023 { 1024 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1025 struct virtio_net_ctrl_mac mac_data; 1026 size_t s; 1027 NetClientState *nc = qemu_get_queue(n->nic); 1028 1029 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) { 1030 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) { 1031 return VIRTIO_NET_ERR; 1032 } 1033 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac)); 1034 assert(s == sizeof(n->mac)); 1035 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 1036 rxfilter_notify(nc); 1037 1038 return VIRTIO_NET_OK; 1039 } 1040 1041 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) { 1042 return VIRTIO_NET_ERR; 1043 } 1044 1045 int in_use = 0; 1046 int first_multi = 0; 1047 uint8_t uni_overflow = 0; 1048 uint8_t multi_overflow = 0; 1049 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 1050 1051 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1052 sizeof(mac_data.entries)); 1053 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1054 if (s != sizeof(mac_data.entries)) { 1055 goto error; 1056 } 1057 iov_discard_front(&iov, &iov_cnt, s); 1058 1059 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) { 1060 goto error; 1061 } 1062 1063 if (mac_data.entries <= MAC_TABLE_ENTRIES) { 1064 s = iov_to_buf(iov, iov_cnt, 0, macs, 1065 mac_data.entries * ETH_ALEN); 1066 if (s != mac_data.entries * ETH_ALEN) { 1067 goto error; 1068 } 1069 in_use += mac_data.entries; 1070 } else { 1071 uni_overflow = 1; 1072 } 1073 1074 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN); 1075 1076 first_multi = in_use; 1077 1078 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1079 sizeof(mac_data.entries)); 1080 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1081 if (s != sizeof(mac_data.entries)) { 1082 goto error; 1083 } 1084 1085 iov_discard_front(&iov, &iov_cnt, s); 1086 1087 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) { 1088 goto error; 1089 } 1090 1091 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) { 1092 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN], 1093 mac_data.entries * ETH_ALEN); 1094 if (s != mac_data.entries * ETH_ALEN) { 1095 goto error; 1096 } 1097 in_use += mac_data.entries; 1098 } else { 1099 multi_overflow = 1; 1100 } 1101 1102 n->mac_table.in_use = in_use; 1103 n->mac_table.first_multi = first_multi; 1104 n->mac_table.uni_overflow = uni_overflow; 1105 n->mac_table.multi_overflow = multi_overflow; 1106 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN); 1107 g_free(macs); 1108 rxfilter_notify(nc); 1109 1110 return VIRTIO_NET_OK; 1111 1112 error: 1113 g_free(macs); 1114 return VIRTIO_NET_ERR; 1115 } 1116 1117 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd, 1118 struct iovec *iov, unsigned int iov_cnt) 1119 { 1120 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1121 uint16_t vid; 1122 size_t s; 1123 NetClientState *nc = qemu_get_queue(n->nic); 1124 1125 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid)); 1126 vid = virtio_lduw_p(vdev, &vid); 1127 if (s != sizeof(vid)) { 1128 return VIRTIO_NET_ERR; 1129 } 1130 1131 if (vid >= MAX_VLAN) 1132 return VIRTIO_NET_ERR; 1133 1134 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD) 1135 n->vlans[vid >> 5] |= (1U << (vid & 0x1f)); 1136 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL) 1137 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f)); 1138 else 1139 return VIRTIO_NET_ERR; 1140 1141 rxfilter_notify(nc); 1142 1143 return VIRTIO_NET_OK; 1144 } 1145 1146 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, 1147 struct iovec *iov, unsigned int iov_cnt) 1148 { 1149 trace_virtio_net_handle_announce(n->announce_timer.round); 1150 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK && 1151 n->status & VIRTIO_NET_S_ANNOUNCE) { 1152 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 1153 if (n->announce_timer.round) { 1154 qemu_announce_timer_step(&n->announce_timer); 1155 } 1156 return VIRTIO_NET_OK; 1157 } else { 1158 return VIRTIO_NET_ERR; 1159 } 1160 } 1161 1162 static void virtio_net_detach_epbf_rss(VirtIONet *n); 1163 1164 static void virtio_net_disable_rss(VirtIONet *n) 1165 { 1166 if (n->rss_data.enabled) { 1167 trace_virtio_net_rss_disable(); 1168 } 1169 n->rss_data.enabled = false; 1170 1171 virtio_net_detach_epbf_rss(n); 1172 } 1173 1174 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd) 1175 { 1176 NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0); 1177 if (nc == NULL || nc->info->set_steering_ebpf == NULL) { 1178 return false; 1179 } 1180 1181 return nc->info->set_steering_ebpf(nc, prog_fd); 1182 } 1183 1184 static void rss_data_to_rss_config(struct VirtioNetRssData *data, 1185 struct EBPFRSSConfig *config) 1186 { 1187 config->redirect = data->redirect; 1188 config->populate_hash = data->populate_hash; 1189 config->hash_types = data->hash_types; 1190 config->indirections_len = data->indirections_len; 1191 config->default_queue = data->default_queue; 1192 } 1193 1194 static bool virtio_net_attach_epbf_rss(VirtIONet *n) 1195 { 1196 struct EBPFRSSConfig config = {}; 1197 1198 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 1199 return false; 1200 } 1201 1202 rss_data_to_rss_config(&n->rss_data, &config); 1203 1204 if (!ebpf_rss_set_all(&n->ebpf_rss, &config, 1205 n->rss_data.indirections_table, n->rss_data.key)) { 1206 return false; 1207 } 1208 1209 if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) { 1210 return false; 1211 } 1212 1213 return true; 1214 } 1215 1216 static void virtio_net_detach_epbf_rss(VirtIONet *n) 1217 { 1218 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1219 } 1220 1221 static bool virtio_net_load_ebpf(VirtIONet *n) 1222 { 1223 if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) { 1224 /* backend does't support steering ebpf */ 1225 return false; 1226 } 1227 1228 return ebpf_rss_load(&n->ebpf_rss); 1229 } 1230 1231 static void virtio_net_unload_ebpf(VirtIONet *n) 1232 { 1233 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1234 ebpf_rss_unload(&n->ebpf_rss); 1235 } 1236 1237 static uint16_t virtio_net_handle_rss(VirtIONet *n, 1238 struct iovec *iov, 1239 unsigned int iov_cnt, 1240 bool do_rss) 1241 { 1242 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1243 struct virtio_net_rss_config cfg; 1244 size_t s, offset = 0, size_get; 1245 uint16_t queue_pairs, i; 1246 struct { 1247 uint16_t us; 1248 uint8_t b; 1249 } QEMU_PACKED temp; 1250 const char *err_msg = ""; 1251 uint32_t err_value = 0; 1252 1253 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) { 1254 err_msg = "RSS is not negotiated"; 1255 goto error; 1256 } 1257 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) { 1258 err_msg = "Hash report is not negotiated"; 1259 goto error; 1260 } 1261 size_get = offsetof(struct virtio_net_rss_config, indirection_table); 1262 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get); 1263 if (s != size_get) { 1264 err_msg = "Short command buffer"; 1265 err_value = (uint32_t)s; 1266 goto error; 1267 } 1268 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types); 1269 n->rss_data.indirections_len = 1270 virtio_lduw_p(vdev, &cfg.indirection_table_mask); 1271 n->rss_data.indirections_len++; 1272 if (!do_rss) { 1273 n->rss_data.indirections_len = 1; 1274 } 1275 if (!is_power_of_2(n->rss_data.indirections_len)) { 1276 err_msg = "Invalid size of indirection table"; 1277 err_value = n->rss_data.indirections_len; 1278 goto error; 1279 } 1280 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) { 1281 err_msg = "Too large indirection table"; 1282 err_value = n->rss_data.indirections_len; 1283 goto error; 1284 } 1285 n->rss_data.default_queue = do_rss ? 1286 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0; 1287 if (n->rss_data.default_queue >= n->max_queue_pairs) { 1288 err_msg = "Invalid default queue"; 1289 err_value = n->rss_data.default_queue; 1290 goto error; 1291 } 1292 offset += size_get; 1293 size_get = sizeof(uint16_t) * n->rss_data.indirections_len; 1294 g_free(n->rss_data.indirections_table); 1295 n->rss_data.indirections_table = g_malloc(size_get); 1296 if (!n->rss_data.indirections_table) { 1297 err_msg = "Can't allocate indirections table"; 1298 err_value = n->rss_data.indirections_len; 1299 goto error; 1300 } 1301 s = iov_to_buf(iov, iov_cnt, offset, 1302 n->rss_data.indirections_table, size_get); 1303 if (s != size_get) { 1304 err_msg = "Short indirection table buffer"; 1305 err_value = (uint32_t)s; 1306 goto error; 1307 } 1308 for (i = 0; i < n->rss_data.indirections_len; ++i) { 1309 uint16_t val = n->rss_data.indirections_table[i]; 1310 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val); 1311 } 1312 offset += size_get; 1313 size_get = sizeof(temp); 1314 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get); 1315 if (s != size_get) { 1316 err_msg = "Can't get queue_pairs"; 1317 err_value = (uint32_t)s; 1318 goto error; 1319 } 1320 queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs; 1321 if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) { 1322 err_msg = "Invalid number of queue_pairs"; 1323 err_value = queue_pairs; 1324 goto error; 1325 } 1326 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) { 1327 err_msg = "Invalid key size"; 1328 err_value = temp.b; 1329 goto error; 1330 } 1331 if (!temp.b && n->rss_data.hash_types) { 1332 err_msg = "No key provided"; 1333 err_value = 0; 1334 goto error; 1335 } 1336 if (!temp.b && !n->rss_data.hash_types) { 1337 virtio_net_disable_rss(n); 1338 return queue_pairs; 1339 } 1340 offset += size_get; 1341 size_get = temp.b; 1342 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get); 1343 if (s != size_get) { 1344 err_msg = "Can get key buffer"; 1345 err_value = (uint32_t)s; 1346 goto error; 1347 } 1348 n->rss_data.enabled = true; 1349 1350 if (!n->rss_data.populate_hash) { 1351 if (!virtio_net_attach_epbf_rss(n)) { 1352 /* EBPF must be loaded for vhost */ 1353 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 1354 warn_report("Can't load eBPF RSS for vhost"); 1355 goto error; 1356 } 1357 /* fallback to software RSS */ 1358 warn_report("Can't load eBPF RSS - fallback to software RSS"); 1359 n->rss_data.enabled_software_rss = true; 1360 } 1361 } else { 1362 /* use software RSS for hash populating */ 1363 /* and detach eBPF if was loaded before */ 1364 virtio_net_detach_epbf_rss(n); 1365 n->rss_data.enabled_software_rss = true; 1366 } 1367 1368 trace_virtio_net_rss_enable(n->rss_data.hash_types, 1369 n->rss_data.indirections_len, 1370 temp.b); 1371 return queue_pairs; 1372 error: 1373 trace_virtio_net_rss_error(err_msg, err_value); 1374 virtio_net_disable_rss(n); 1375 return 0; 1376 } 1377 1378 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, 1379 struct iovec *iov, unsigned int iov_cnt) 1380 { 1381 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1382 uint16_t queue_pairs; 1383 NetClientState *nc = qemu_get_queue(n->nic); 1384 1385 virtio_net_disable_rss(n); 1386 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { 1387 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false); 1388 return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR; 1389 } 1390 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) { 1391 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true); 1392 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { 1393 struct virtio_net_ctrl_mq mq; 1394 size_t s; 1395 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) { 1396 return VIRTIO_NET_ERR; 1397 } 1398 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq)); 1399 if (s != sizeof(mq)) { 1400 return VIRTIO_NET_ERR; 1401 } 1402 queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs); 1403 1404 } else { 1405 return VIRTIO_NET_ERR; 1406 } 1407 1408 if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 1409 queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 1410 queue_pairs > n->max_queue_pairs || 1411 !n->multiqueue) { 1412 return VIRTIO_NET_ERR; 1413 } 1414 1415 /* Avoid changing the number of queue_pairs for vdpa device in 1416 * userspace handler. A future fix is needed to handle the mq 1417 * change in userspace handler with vhost-vdpa. Let's disable 1418 * the mq handling from userspace for now and only allow get 1419 * done through the kernel. Ripples may be seen when falling 1420 * back to userspace, but without doing it qemu process would 1421 * crash on a recursive entry to virtio_net_set_status(). 1422 */ 1423 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 1424 return VIRTIO_NET_ERR; 1425 } 1426 1427 n->curr_queue_pairs = queue_pairs; 1428 /* stop the backend before changing the number of queue_pairs to avoid handling a 1429 * disabled queue */ 1430 virtio_net_set_status(vdev, vdev->status); 1431 virtio_net_set_queue_pairs(n); 1432 1433 return VIRTIO_NET_OK; 1434 } 1435 1436 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, 1437 const struct iovec *in_sg, unsigned in_num, 1438 const struct iovec *out_sg, 1439 unsigned out_num) 1440 { 1441 VirtIONet *n = VIRTIO_NET(vdev); 1442 struct virtio_net_ctrl_hdr ctrl; 1443 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1444 size_t s; 1445 struct iovec *iov, *iov2; 1446 1447 if (iov_size(in_sg, in_num) < sizeof(status) || 1448 iov_size(out_sg, out_num) < sizeof(ctrl)) { 1449 virtio_error(vdev, "virtio-net ctrl missing headers"); 1450 return 0; 1451 } 1452 1453 iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num); 1454 s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl)); 1455 iov_discard_front(&iov, &out_num, sizeof(ctrl)); 1456 if (s != sizeof(ctrl)) { 1457 status = VIRTIO_NET_ERR; 1458 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { 1459 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num); 1460 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { 1461 status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num); 1462 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { 1463 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num); 1464 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { 1465 status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num); 1466 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { 1467 status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num); 1468 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { 1469 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num); 1470 } 1471 1472 s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status)); 1473 assert(s == sizeof(status)); 1474 1475 g_free(iov2); 1476 return sizeof(status); 1477 } 1478 1479 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) 1480 { 1481 VirtQueueElement *elem; 1482 1483 for (;;) { 1484 size_t written; 1485 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 1486 if (!elem) { 1487 break; 1488 } 1489 1490 written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num, 1491 elem->out_sg, elem->out_num); 1492 if (written > 0) { 1493 virtqueue_push(vq, elem, written); 1494 virtio_notify(vdev, vq); 1495 g_free(elem); 1496 } else { 1497 virtqueue_detach_element(vq, elem, 0); 1498 g_free(elem); 1499 break; 1500 } 1501 } 1502 } 1503 1504 /* RX */ 1505 1506 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) 1507 { 1508 VirtIONet *n = VIRTIO_NET(vdev); 1509 int queue_index = vq2q(virtio_get_queue_index(vq)); 1510 1511 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index)); 1512 } 1513 1514 static bool virtio_net_can_receive(NetClientState *nc) 1515 { 1516 VirtIONet *n = qemu_get_nic_opaque(nc); 1517 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1518 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1519 1520 if (!vdev->vm_running) { 1521 return false; 1522 } 1523 1524 if (nc->queue_index >= n->curr_queue_pairs) { 1525 return false; 1526 } 1527 1528 if (!virtio_queue_ready(q->rx_vq) || 1529 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 1530 return false; 1531 } 1532 1533 return true; 1534 } 1535 1536 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) 1537 { 1538 VirtIONet *n = q->n; 1539 if (virtio_queue_empty(q->rx_vq) || 1540 (n->mergeable_rx_bufs && 1541 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1542 virtio_queue_set_notification(q->rx_vq, 1); 1543 1544 /* To avoid a race condition where the guest has made some buffers 1545 * available after the above check but before notification was 1546 * enabled, check for available buffers again. 1547 */ 1548 if (virtio_queue_empty(q->rx_vq) || 1549 (n->mergeable_rx_bufs && 1550 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1551 return 0; 1552 } 1553 } 1554 1555 virtio_queue_set_notification(q->rx_vq, 0); 1556 return 1; 1557 } 1558 1559 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr) 1560 { 1561 virtio_tswap16s(vdev, &hdr->hdr_len); 1562 virtio_tswap16s(vdev, &hdr->gso_size); 1563 virtio_tswap16s(vdev, &hdr->csum_start); 1564 virtio_tswap16s(vdev, &hdr->csum_offset); 1565 } 1566 1567 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so 1568 * it never finds out that the packets don't have valid checksums. This 1569 * causes dhclient to get upset. Fedora's carried a patch for ages to 1570 * fix this with Xen but it hasn't appeared in an upstream release of 1571 * dhclient yet. 1572 * 1573 * To avoid breaking existing guests, we catch udp packets and add 1574 * checksums. This is terrible but it's better than hacking the guest 1575 * kernels. 1576 * 1577 * N.B. if we introduce a zero-copy API, this operation is no longer free so 1578 * we should provide a mechanism to disable it to avoid polluting the host 1579 * cache. 1580 */ 1581 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr, 1582 uint8_t *buf, size_t size) 1583 { 1584 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */ 1585 (size > 27 && size < 1500) && /* normal sized MTU */ 1586 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */ 1587 (buf[23] == 17) && /* ip.protocol == UDP */ 1588 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */ 1589 net_checksum_calculate(buf, size, CSUM_UDP); 1590 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; 1591 } 1592 } 1593 1594 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt, 1595 const void *buf, size_t size) 1596 { 1597 if (n->has_vnet_hdr) { 1598 /* FIXME this cast is evil */ 1599 void *wbuf = (void *)buf; 1600 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len, 1601 size - n->host_hdr_len); 1602 1603 if (n->needs_vnet_hdr_swap) { 1604 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf); 1605 } 1606 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr)); 1607 } else { 1608 struct virtio_net_hdr hdr = { 1609 .flags = 0, 1610 .gso_type = VIRTIO_NET_HDR_GSO_NONE 1611 }; 1612 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr); 1613 } 1614 } 1615 1616 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) 1617 { 1618 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; 1619 static const uint8_t vlan[] = {0x81, 0x00}; 1620 uint8_t *ptr = (uint8_t *)buf; 1621 int i; 1622 1623 if (n->promisc) 1624 return 1; 1625 1626 ptr += n->host_hdr_len; 1627 1628 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) { 1629 int vid = lduw_be_p(ptr + 14) & 0xfff; 1630 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f)))) 1631 return 0; 1632 } 1633 1634 if (ptr[0] & 1) { // multicast 1635 if (!memcmp(ptr, bcast, sizeof(bcast))) { 1636 return !n->nobcast; 1637 } else if (n->nomulti) { 1638 return 0; 1639 } else if (n->allmulti || n->mac_table.multi_overflow) { 1640 return 1; 1641 } 1642 1643 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 1644 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1645 return 1; 1646 } 1647 } 1648 } else { // unicast 1649 if (n->nouni) { 1650 return 0; 1651 } else if (n->alluni || n->mac_table.uni_overflow) { 1652 return 1; 1653 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) { 1654 return 1; 1655 } 1656 1657 for (i = 0; i < n->mac_table.first_multi; i++) { 1658 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1659 return 1; 1660 } 1661 } 1662 } 1663 1664 return 0; 1665 } 1666 1667 static uint8_t virtio_net_get_hash_type(bool isip4, 1668 bool isip6, 1669 bool isudp, 1670 bool istcp, 1671 uint32_t types) 1672 { 1673 if (isip4) { 1674 if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) { 1675 return NetPktRssIpV4Tcp; 1676 } 1677 if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) { 1678 return NetPktRssIpV4Udp; 1679 } 1680 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 1681 return NetPktRssIpV4; 1682 } 1683 } else if (isip6) { 1684 uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | 1685 VIRTIO_NET_RSS_HASH_TYPE_TCPv6; 1686 1687 if (istcp && (types & mask)) { 1688 return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ? 1689 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp; 1690 } 1691 mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6; 1692 if (isudp && (types & mask)) { 1693 return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ? 1694 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp; 1695 } 1696 mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6; 1697 if (types & mask) { 1698 return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ? 1699 NetPktRssIpV6Ex : NetPktRssIpV6; 1700 } 1701 } 1702 return 0xff; 1703 } 1704 1705 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report, 1706 uint32_t hash) 1707 { 1708 struct virtio_net_hdr_v1_hash *hdr = (void *)buf; 1709 hdr->hash_value = hash; 1710 hdr->hash_report = report; 1711 } 1712 1713 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf, 1714 size_t size) 1715 { 1716 VirtIONet *n = qemu_get_nic_opaque(nc); 1717 unsigned int index = nc->queue_index, new_index = index; 1718 struct NetRxPkt *pkt = n->rx_pkt; 1719 uint8_t net_hash_type; 1720 uint32_t hash; 1721 bool isip4, isip6, isudp, istcp; 1722 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = { 1723 VIRTIO_NET_HASH_REPORT_IPv4, 1724 VIRTIO_NET_HASH_REPORT_TCPv4, 1725 VIRTIO_NET_HASH_REPORT_TCPv6, 1726 VIRTIO_NET_HASH_REPORT_IPv6, 1727 VIRTIO_NET_HASH_REPORT_IPv6_EX, 1728 VIRTIO_NET_HASH_REPORT_TCPv6_EX, 1729 VIRTIO_NET_HASH_REPORT_UDPv4, 1730 VIRTIO_NET_HASH_REPORT_UDPv6, 1731 VIRTIO_NET_HASH_REPORT_UDPv6_EX 1732 }; 1733 1734 net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len, 1735 size - n->host_hdr_len); 1736 net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); 1737 if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) { 1738 istcp = isudp = false; 1739 } 1740 if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) { 1741 istcp = isudp = false; 1742 } 1743 net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp, 1744 n->rss_data.hash_types); 1745 if (net_hash_type > NetPktRssIpV6UdpEx) { 1746 if (n->rss_data.populate_hash) { 1747 virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0); 1748 } 1749 return n->rss_data.redirect ? n->rss_data.default_queue : -1; 1750 } 1751 1752 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key); 1753 1754 if (n->rss_data.populate_hash) { 1755 virtio_set_packet_hash(buf, reports[net_hash_type], hash); 1756 } 1757 1758 if (n->rss_data.redirect) { 1759 new_index = hash & (n->rss_data.indirections_len - 1); 1760 new_index = n->rss_data.indirections_table[new_index]; 1761 } 1762 1763 return (index == new_index) ? -1 : new_index; 1764 } 1765 1766 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, 1767 size_t size, bool no_rss) 1768 { 1769 VirtIONet *n = qemu_get_nic_opaque(nc); 1770 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1771 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1772 VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE]; 1773 size_t lens[VIRTQUEUE_MAX_SIZE]; 1774 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; 1775 struct virtio_net_hdr_mrg_rxbuf mhdr; 1776 unsigned mhdr_cnt = 0; 1777 size_t offset, i, guest_offset, j; 1778 ssize_t err; 1779 1780 if (!virtio_net_can_receive(nc)) { 1781 return -1; 1782 } 1783 1784 if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) { 1785 int index = virtio_net_process_rss(nc, buf, size); 1786 if (index >= 0) { 1787 NetClientState *nc2 = qemu_get_subqueue(n->nic, index); 1788 return virtio_net_receive_rcu(nc2, buf, size, true); 1789 } 1790 } 1791 1792 /* hdr_len refers to the header we supply to the guest */ 1793 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) { 1794 return 0; 1795 } 1796 1797 if (!receive_filter(n, buf, size)) 1798 return size; 1799 1800 offset = i = 0; 1801 1802 while (offset < size) { 1803 VirtQueueElement *elem; 1804 int len, total; 1805 const struct iovec *sg; 1806 1807 total = 0; 1808 1809 if (i == VIRTQUEUE_MAX_SIZE) { 1810 virtio_error(vdev, "virtio-net unexpected long buffer chain"); 1811 err = size; 1812 goto err; 1813 } 1814 1815 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement)); 1816 if (!elem) { 1817 if (i) { 1818 virtio_error(vdev, "virtio-net unexpected empty queue: " 1819 "i %zd mergeable %d offset %zd, size %zd, " 1820 "guest hdr len %zd, host hdr len %zd " 1821 "guest features 0x%" PRIx64, 1822 i, n->mergeable_rx_bufs, offset, size, 1823 n->guest_hdr_len, n->host_hdr_len, 1824 vdev->guest_features); 1825 } 1826 err = -1; 1827 goto err; 1828 } 1829 1830 if (elem->in_num < 1) { 1831 virtio_error(vdev, 1832 "virtio-net receive queue contains no in buffers"); 1833 virtqueue_detach_element(q->rx_vq, elem, 0); 1834 g_free(elem); 1835 err = -1; 1836 goto err; 1837 } 1838 1839 sg = elem->in_sg; 1840 if (i == 0) { 1841 assert(offset == 0); 1842 if (n->mergeable_rx_bufs) { 1843 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg), 1844 sg, elem->in_num, 1845 offsetof(typeof(mhdr), num_buffers), 1846 sizeof(mhdr.num_buffers)); 1847 } 1848 1849 receive_header(n, sg, elem->in_num, buf, size); 1850 if (n->rss_data.populate_hash) { 1851 offset = sizeof(mhdr); 1852 iov_from_buf(sg, elem->in_num, offset, 1853 buf + offset, n->host_hdr_len - sizeof(mhdr)); 1854 } 1855 offset = n->host_hdr_len; 1856 total += n->guest_hdr_len; 1857 guest_offset = n->guest_hdr_len; 1858 } else { 1859 guest_offset = 0; 1860 } 1861 1862 /* copy in packet. ugh */ 1863 len = iov_from_buf(sg, elem->in_num, guest_offset, 1864 buf + offset, size - offset); 1865 total += len; 1866 offset += len; 1867 /* If buffers can't be merged, at this point we 1868 * must have consumed the complete packet. 1869 * Otherwise, drop it. */ 1870 if (!n->mergeable_rx_bufs && offset < size) { 1871 virtqueue_unpop(q->rx_vq, elem, total); 1872 g_free(elem); 1873 err = size; 1874 goto err; 1875 } 1876 1877 elems[i] = elem; 1878 lens[i] = total; 1879 i++; 1880 } 1881 1882 if (mhdr_cnt) { 1883 virtio_stw_p(vdev, &mhdr.num_buffers, i); 1884 iov_from_buf(mhdr_sg, mhdr_cnt, 1885 0, 1886 &mhdr.num_buffers, sizeof mhdr.num_buffers); 1887 } 1888 1889 for (j = 0; j < i; j++) { 1890 /* signal other side */ 1891 virtqueue_fill(q->rx_vq, elems[j], lens[j], j); 1892 g_free(elems[j]); 1893 } 1894 1895 virtqueue_flush(q->rx_vq, i); 1896 virtio_notify(vdev, q->rx_vq); 1897 1898 return size; 1899 1900 err: 1901 for (j = 0; j < i; j++) { 1902 virtqueue_detach_element(q->rx_vq, elems[j], lens[j]); 1903 g_free(elems[j]); 1904 } 1905 1906 return err; 1907 } 1908 1909 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf, 1910 size_t size) 1911 { 1912 RCU_READ_LOCK_GUARD(); 1913 1914 return virtio_net_receive_rcu(nc, buf, size, false); 1915 } 1916 1917 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain, 1918 const uint8_t *buf, 1919 VirtioNetRscUnit *unit) 1920 { 1921 uint16_t ip_hdrlen; 1922 struct ip_header *ip; 1923 1924 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len 1925 + sizeof(struct eth_header)); 1926 unit->ip = (void *)ip; 1927 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2; 1928 unit->ip_plen = &ip->ip_len; 1929 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen); 1930 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 1931 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen; 1932 } 1933 1934 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain, 1935 const uint8_t *buf, 1936 VirtioNetRscUnit *unit) 1937 { 1938 struct ip6_header *ip6; 1939 1940 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len 1941 + sizeof(struct eth_header)); 1942 unit->ip = ip6; 1943 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 1944 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) 1945 + sizeof(struct ip6_header)); 1946 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 1947 1948 /* There is a difference between payload lenght in ipv4 and v6, 1949 ip header is excluded in ipv6 */ 1950 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen; 1951 } 1952 1953 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain, 1954 VirtioNetRscSeg *seg) 1955 { 1956 int ret; 1957 struct virtio_net_hdr_v1 *h; 1958 1959 h = (struct virtio_net_hdr_v1 *)seg->buf; 1960 h->flags = 0; 1961 h->gso_type = VIRTIO_NET_HDR_GSO_NONE; 1962 1963 if (seg->is_coalesced) { 1964 h->rsc.segments = seg->packets; 1965 h->rsc.dup_acks = seg->dup_ack; 1966 h->flags = VIRTIO_NET_HDR_F_RSC_INFO; 1967 if (chain->proto == ETH_P_IP) { 1968 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 1969 } else { 1970 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 1971 } 1972 } 1973 1974 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); 1975 QTAILQ_REMOVE(&chain->buffers, seg, next); 1976 g_free(seg->buf); 1977 g_free(seg); 1978 1979 return ret; 1980 } 1981 1982 static void virtio_net_rsc_purge(void *opq) 1983 { 1984 VirtioNetRscSeg *seg, *rn; 1985 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq; 1986 1987 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) { 1988 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 1989 chain->stat.purge_failed++; 1990 continue; 1991 } 1992 } 1993 1994 chain->stat.timer++; 1995 if (!QTAILQ_EMPTY(&chain->buffers)) { 1996 timer_mod(chain->drain_timer, 1997 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 1998 } 1999 } 2000 2001 static void virtio_net_rsc_cleanup(VirtIONet *n) 2002 { 2003 VirtioNetRscChain *chain, *rn_chain; 2004 VirtioNetRscSeg *seg, *rn_seg; 2005 2006 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) { 2007 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) { 2008 QTAILQ_REMOVE(&chain->buffers, seg, next); 2009 g_free(seg->buf); 2010 g_free(seg); 2011 } 2012 2013 timer_free(chain->drain_timer); 2014 QTAILQ_REMOVE(&n->rsc_chains, chain, next); 2015 g_free(chain); 2016 } 2017 } 2018 2019 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain, 2020 NetClientState *nc, 2021 const uint8_t *buf, size_t size) 2022 { 2023 uint16_t hdr_len; 2024 VirtioNetRscSeg *seg; 2025 2026 hdr_len = chain->n->guest_hdr_len; 2027 seg = g_new(VirtioNetRscSeg, 1); 2028 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header) 2029 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD); 2030 memcpy(seg->buf, buf, size); 2031 seg->size = size; 2032 seg->packets = 1; 2033 seg->dup_ack = 0; 2034 seg->is_coalesced = 0; 2035 seg->nc = nc; 2036 2037 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); 2038 chain->stat.cache++; 2039 2040 switch (chain->proto) { 2041 case ETH_P_IP: 2042 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); 2043 break; 2044 case ETH_P_IPV6: 2045 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); 2046 break; 2047 default: 2048 g_assert_not_reached(); 2049 } 2050 } 2051 2052 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain, 2053 VirtioNetRscSeg *seg, 2054 const uint8_t *buf, 2055 struct tcp_header *n_tcp, 2056 struct tcp_header *o_tcp) 2057 { 2058 uint32_t nack, oack; 2059 uint16_t nwin, owin; 2060 2061 nack = htonl(n_tcp->th_ack); 2062 nwin = htons(n_tcp->th_win); 2063 oack = htonl(o_tcp->th_ack); 2064 owin = htons(o_tcp->th_win); 2065 2066 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) { 2067 chain->stat.ack_out_of_win++; 2068 return RSC_FINAL; 2069 } else if (nack == oack) { 2070 /* duplicated ack or window probe */ 2071 if (nwin == owin) { 2072 /* duplicated ack, add dup ack count due to whql test up to 1 */ 2073 chain->stat.dup_ack++; 2074 return RSC_FINAL; 2075 } else { 2076 /* Coalesce window update */ 2077 o_tcp->th_win = n_tcp->th_win; 2078 chain->stat.win_update++; 2079 return RSC_COALESCE; 2080 } 2081 } else { 2082 /* pure ack, go to 'C', finalize*/ 2083 chain->stat.pure_ack++; 2084 return RSC_FINAL; 2085 } 2086 } 2087 2088 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain, 2089 VirtioNetRscSeg *seg, 2090 const uint8_t *buf, 2091 VirtioNetRscUnit *n_unit) 2092 { 2093 void *data; 2094 uint16_t o_ip_len; 2095 uint32_t nseq, oseq; 2096 VirtioNetRscUnit *o_unit; 2097 2098 o_unit = &seg->unit; 2099 o_ip_len = htons(*o_unit->ip_plen); 2100 nseq = htonl(n_unit->tcp->th_seq); 2101 oseq = htonl(o_unit->tcp->th_seq); 2102 2103 /* out of order or retransmitted. */ 2104 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) { 2105 chain->stat.data_out_of_win++; 2106 return RSC_FINAL; 2107 } 2108 2109 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen; 2110 if (nseq == oseq) { 2111 if ((o_unit->payload == 0) && n_unit->payload) { 2112 /* From no payload to payload, normal case, not a dup ack or etc */ 2113 chain->stat.data_after_pure_ack++; 2114 goto coalesce; 2115 } else { 2116 return virtio_net_rsc_handle_ack(chain, seg, buf, 2117 n_unit->tcp, o_unit->tcp); 2118 } 2119 } else if ((nseq - oseq) != o_unit->payload) { 2120 /* Not a consistent packet, out of order */ 2121 chain->stat.data_out_of_order++; 2122 return RSC_FINAL; 2123 } else { 2124 coalesce: 2125 if ((o_ip_len + n_unit->payload) > chain->max_payload) { 2126 chain->stat.over_size++; 2127 return RSC_FINAL; 2128 } 2129 2130 /* Here comes the right data, the payload length in v4/v6 is different, 2131 so use the field value to update and record the new data len */ 2132 o_unit->payload += n_unit->payload; /* update new data len */ 2133 2134 /* update field in ip header */ 2135 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload); 2136 2137 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced 2138 for windows guest, while this may change the behavior for linux 2139 guest (only if it uses RSC feature). */ 2140 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags; 2141 2142 o_unit->tcp->th_ack = n_unit->tcp->th_ack; 2143 o_unit->tcp->th_win = n_unit->tcp->th_win; 2144 2145 memmove(seg->buf + seg->size, data, n_unit->payload); 2146 seg->size += n_unit->payload; 2147 seg->packets++; 2148 chain->stat.coalesced++; 2149 return RSC_COALESCE; 2150 } 2151 } 2152 2153 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain, 2154 VirtioNetRscSeg *seg, 2155 const uint8_t *buf, size_t size, 2156 VirtioNetRscUnit *unit) 2157 { 2158 struct ip_header *ip1, *ip2; 2159 2160 ip1 = (struct ip_header *)(unit->ip); 2161 ip2 = (struct ip_header *)(seg->unit.ip); 2162 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst) 2163 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2164 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2165 chain->stat.no_match++; 2166 return RSC_NO_MATCH; 2167 } 2168 2169 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2170 } 2171 2172 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain, 2173 VirtioNetRscSeg *seg, 2174 const uint8_t *buf, size_t size, 2175 VirtioNetRscUnit *unit) 2176 { 2177 struct ip6_header *ip1, *ip2; 2178 2179 ip1 = (struct ip6_header *)(unit->ip); 2180 ip2 = (struct ip6_header *)(seg->unit.ip); 2181 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address)) 2182 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address)) 2183 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2184 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2185 chain->stat.no_match++; 2186 return RSC_NO_MATCH; 2187 } 2188 2189 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2190 } 2191 2192 /* Packets with 'SYN' should bypass, other flag should be sent after drain 2193 * to prevent out of order */ 2194 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain, 2195 struct tcp_header *tcp) 2196 { 2197 uint16_t tcp_hdr; 2198 uint16_t tcp_flag; 2199 2200 tcp_flag = htons(tcp->th_offset_flags); 2201 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10; 2202 tcp_flag &= VIRTIO_NET_TCP_FLAG; 2203 if (tcp_flag & TH_SYN) { 2204 chain->stat.tcp_syn++; 2205 return RSC_BYPASS; 2206 } 2207 2208 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) { 2209 chain->stat.tcp_ctrl_drain++; 2210 return RSC_FINAL; 2211 } 2212 2213 if (tcp_hdr > sizeof(struct tcp_header)) { 2214 chain->stat.tcp_all_opt++; 2215 return RSC_FINAL; 2216 } 2217 2218 return RSC_CANDIDATE; 2219 } 2220 2221 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain, 2222 NetClientState *nc, 2223 const uint8_t *buf, size_t size, 2224 VirtioNetRscUnit *unit) 2225 { 2226 int ret; 2227 VirtioNetRscSeg *seg, *nseg; 2228 2229 if (QTAILQ_EMPTY(&chain->buffers)) { 2230 chain->stat.empty_cache++; 2231 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2232 timer_mod(chain->drain_timer, 2233 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 2234 return size; 2235 } 2236 2237 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2238 if (chain->proto == ETH_P_IP) { 2239 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit); 2240 } else { 2241 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit); 2242 } 2243 2244 if (ret == RSC_FINAL) { 2245 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2246 /* Send failed */ 2247 chain->stat.final_failed++; 2248 return 0; 2249 } 2250 2251 /* Send current packet */ 2252 return virtio_net_do_receive(nc, buf, size); 2253 } else if (ret == RSC_NO_MATCH) { 2254 continue; 2255 } else { 2256 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ 2257 seg->is_coalesced = 1; 2258 return size; 2259 } 2260 } 2261 2262 chain->stat.no_match_cache++; 2263 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2264 return size; 2265 } 2266 2267 /* Drain a connection data, this is to avoid out of order segments */ 2268 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain, 2269 NetClientState *nc, 2270 const uint8_t *buf, size_t size, 2271 uint16_t ip_start, uint16_t ip_size, 2272 uint16_t tcp_port) 2273 { 2274 VirtioNetRscSeg *seg, *nseg; 2275 uint32_t ppair1, ppair2; 2276 2277 ppair1 = *(uint32_t *)(buf + tcp_port); 2278 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2279 ppair2 = *(uint32_t *)(seg->buf + tcp_port); 2280 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size) 2281 || (ppair1 != ppair2)) { 2282 continue; 2283 } 2284 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2285 chain->stat.drain_failed++; 2286 } 2287 2288 break; 2289 } 2290 2291 return virtio_net_do_receive(nc, buf, size); 2292 } 2293 2294 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain, 2295 struct ip_header *ip, 2296 const uint8_t *buf, size_t size) 2297 { 2298 uint16_t ip_len; 2299 2300 /* Not an ipv4 packet */ 2301 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) { 2302 chain->stat.ip_option++; 2303 return RSC_BYPASS; 2304 } 2305 2306 /* Don't handle packets with ip option */ 2307 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) { 2308 chain->stat.ip_option++; 2309 return RSC_BYPASS; 2310 } 2311 2312 if (ip->ip_p != IPPROTO_TCP) { 2313 chain->stat.bypass_not_tcp++; 2314 return RSC_BYPASS; 2315 } 2316 2317 /* Don't handle packets with ip fragment */ 2318 if (!(htons(ip->ip_off) & IP_DF)) { 2319 chain->stat.ip_frag++; 2320 return RSC_BYPASS; 2321 } 2322 2323 /* Don't handle packets with ecn flag */ 2324 if (IPTOS_ECN(ip->ip_tos)) { 2325 chain->stat.ip_ecn++; 2326 return RSC_BYPASS; 2327 } 2328 2329 ip_len = htons(ip->ip_len); 2330 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header)) 2331 || ip_len > (size - chain->n->guest_hdr_len - 2332 sizeof(struct eth_header))) { 2333 chain->stat.ip_hacked++; 2334 return RSC_BYPASS; 2335 } 2336 2337 return RSC_CANDIDATE; 2338 } 2339 2340 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain, 2341 NetClientState *nc, 2342 const uint8_t *buf, size_t size) 2343 { 2344 int32_t ret; 2345 uint16_t hdr_len; 2346 VirtioNetRscUnit unit; 2347 2348 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2349 2350 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header) 2351 + sizeof(struct tcp_header))) { 2352 chain->stat.bypass_not_tcp++; 2353 return virtio_net_do_receive(nc, buf, size); 2354 } 2355 2356 virtio_net_rsc_extract_unit4(chain, buf, &unit); 2357 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size) 2358 != RSC_CANDIDATE) { 2359 return virtio_net_do_receive(nc, buf, size); 2360 } 2361 2362 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2363 if (ret == RSC_BYPASS) { 2364 return virtio_net_do_receive(nc, buf, size); 2365 } else if (ret == RSC_FINAL) { 2366 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2367 ((hdr_len + sizeof(struct eth_header)) + 12), 2368 VIRTIO_NET_IP4_ADDR_SIZE, 2369 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)); 2370 } 2371 2372 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2373 } 2374 2375 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain, 2376 struct ip6_header *ip6, 2377 const uint8_t *buf, size_t size) 2378 { 2379 uint16_t ip_len; 2380 2381 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4) 2382 != IP_HEADER_VERSION_6) { 2383 return RSC_BYPASS; 2384 } 2385 2386 /* Both option and protocol is checked in this */ 2387 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) { 2388 chain->stat.bypass_not_tcp++; 2389 return RSC_BYPASS; 2390 } 2391 2392 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 2393 if (ip_len < sizeof(struct tcp_header) || 2394 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header) 2395 - sizeof(struct ip6_header))) { 2396 chain->stat.ip_hacked++; 2397 return RSC_BYPASS; 2398 } 2399 2400 /* Don't handle packets with ecn flag */ 2401 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) { 2402 chain->stat.ip_ecn++; 2403 return RSC_BYPASS; 2404 } 2405 2406 return RSC_CANDIDATE; 2407 } 2408 2409 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc, 2410 const uint8_t *buf, size_t size) 2411 { 2412 int32_t ret; 2413 uint16_t hdr_len; 2414 VirtioNetRscChain *chain; 2415 VirtioNetRscUnit unit; 2416 2417 chain = (VirtioNetRscChain *)opq; 2418 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2419 2420 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header) 2421 + sizeof(tcp_header))) { 2422 return virtio_net_do_receive(nc, buf, size); 2423 } 2424 2425 virtio_net_rsc_extract_unit6(chain, buf, &unit); 2426 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain, 2427 unit.ip, buf, size)) { 2428 return virtio_net_do_receive(nc, buf, size); 2429 } 2430 2431 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2432 if (ret == RSC_BYPASS) { 2433 return virtio_net_do_receive(nc, buf, size); 2434 } else if (ret == RSC_FINAL) { 2435 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2436 ((hdr_len + sizeof(struct eth_header)) + 8), 2437 VIRTIO_NET_IP6_ADDR_SIZE, 2438 hdr_len + sizeof(struct eth_header) 2439 + sizeof(struct ip6_header)); 2440 } 2441 2442 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2443 } 2444 2445 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n, 2446 NetClientState *nc, 2447 uint16_t proto) 2448 { 2449 VirtioNetRscChain *chain; 2450 2451 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) { 2452 return NULL; 2453 } 2454 2455 QTAILQ_FOREACH(chain, &n->rsc_chains, next) { 2456 if (chain->proto == proto) { 2457 return chain; 2458 } 2459 } 2460 2461 chain = g_malloc(sizeof(*chain)); 2462 chain->n = n; 2463 chain->proto = proto; 2464 if (proto == (uint16_t)ETH_P_IP) { 2465 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD; 2466 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2467 } else { 2468 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD; 2469 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2470 } 2471 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST, 2472 virtio_net_rsc_purge, chain); 2473 memset(&chain->stat, 0, sizeof(chain->stat)); 2474 2475 QTAILQ_INIT(&chain->buffers); 2476 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); 2477 2478 return chain; 2479 } 2480 2481 static ssize_t virtio_net_rsc_receive(NetClientState *nc, 2482 const uint8_t *buf, 2483 size_t size) 2484 { 2485 uint16_t proto; 2486 VirtioNetRscChain *chain; 2487 struct eth_header *eth; 2488 VirtIONet *n; 2489 2490 n = qemu_get_nic_opaque(nc); 2491 if (size < (n->host_hdr_len + sizeof(struct eth_header))) { 2492 return virtio_net_do_receive(nc, buf, size); 2493 } 2494 2495 eth = (struct eth_header *)(buf + n->guest_hdr_len); 2496 proto = htons(eth->h_proto); 2497 2498 chain = virtio_net_rsc_lookup_chain(n, nc, proto); 2499 if (chain) { 2500 chain->stat.received++; 2501 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) { 2502 return virtio_net_rsc_receive4(chain, nc, buf, size); 2503 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) { 2504 return virtio_net_rsc_receive6(chain, nc, buf, size); 2505 } 2506 } 2507 return virtio_net_do_receive(nc, buf, size); 2508 } 2509 2510 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, 2511 size_t size) 2512 { 2513 VirtIONet *n = qemu_get_nic_opaque(nc); 2514 if ((n->rsc4_enabled || n->rsc6_enabled)) { 2515 return virtio_net_rsc_receive(nc, buf, size); 2516 } else { 2517 return virtio_net_do_receive(nc, buf, size); 2518 } 2519 } 2520 2521 static int32_t virtio_net_flush_tx(VirtIONetQueue *q); 2522 2523 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) 2524 { 2525 VirtIONet *n = qemu_get_nic_opaque(nc); 2526 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 2527 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2528 2529 virtqueue_push(q->tx_vq, q->async_tx.elem, 0); 2530 virtio_notify(vdev, q->tx_vq); 2531 2532 g_free(q->async_tx.elem); 2533 q->async_tx.elem = NULL; 2534 2535 virtio_queue_set_notification(q->tx_vq, 1); 2536 virtio_net_flush_tx(q); 2537 } 2538 2539 /* TX */ 2540 static int32_t virtio_net_flush_tx(VirtIONetQueue *q) 2541 { 2542 VirtIONet *n = q->n; 2543 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2544 VirtQueueElement *elem; 2545 int32_t num_packets = 0; 2546 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq)); 2547 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2548 return num_packets; 2549 } 2550 2551 if (q->async_tx.elem) { 2552 virtio_queue_set_notification(q->tx_vq, 0); 2553 return num_packets; 2554 } 2555 2556 for (;;) { 2557 ssize_t ret; 2558 unsigned int out_num; 2559 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg; 2560 struct virtio_net_hdr_mrg_rxbuf mhdr; 2561 2562 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement)); 2563 if (!elem) { 2564 break; 2565 } 2566 2567 out_num = elem->out_num; 2568 out_sg = elem->out_sg; 2569 if (out_num < 1) { 2570 virtio_error(vdev, "virtio-net header not in first element"); 2571 virtqueue_detach_element(q->tx_vq, elem, 0); 2572 g_free(elem); 2573 return -EINVAL; 2574 } 2575 2576 if (n->has_vnet_hdr) { 2577 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) < 2578 n->guest_hdr_len) { 2579 virtio_error(vdev, "virtio-net header incorrect"); 2580 virtqueue_detach_element(q->tx_vq, elem, 0); 2581 g_free(elem); 2582 return -EINVAL; 2583 } 2584 if (n->needs_vnet_hdr_swap) { 2585 virtio_net_hdr_swap(vdev, (void *) &mhdr); 2586 sg2[0].iov_base = &mhdr; 2587 sg2[0].iov_len = n->guest_hdr_len; 2588 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, 2589 out_sg, out_num, 2590 n->guest_hdr_len, -1); 2591 if (out_num == VIRTQUEUE_MAX_SIZE) { 2592 goto drop; 2593 } 2594 out_num += 1; 2595 out_sg = sg2; 2596 } 2597 } 2598 /* 2599 * If host wants to see the guest header as is, we can 2600 * pass it on unchanged. Otherwise, copy just the parts 2601 * that host is interested in. 2602 */ 2603 assert(n->host_hdr_len <= n->guest_hdr_len); 2604 if (n->host_hdr_len != n->guest_hdr_len) { 2605 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg), 2606 out_sg, out_num, 2607 0, n->host_hdr_len); 2608 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num, 2609 out_sg, out_num, 2610 n->guest_hdr_len, -1); 2611 out_num = sg_num; 2612 out_sg = sg; 2613 } 2614 2615 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index), 2616 out_sg, out_num, virtio_net_tx_complete); 2617 if (ret == 0) { 2618 virtio_queue_set_notification(q->tx_vq, 0); 2619 q->async_tx.elem = elem; 2620 return -EBUSY; 2621 } 2622 2623 drop: 2624 virtqueue_push(q->tx_vq, elem, 0); 2625 virtio_notify(vdev, q->tx_vq); 2626 g_free(elem); 2627 2628 if (++num_packets >= n->tx_burst) { 2629 break; 2630 } 2631 } 2632 return num_packets; 2633 } 2634 2635 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) 2636 { 2637 VirtIONet *n = VIRTIO_NET(vdev); 2638 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2639 2640 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2641 virtio_net_drop_tx_queue_data(vdev, vq); 2642 return; 2643 } 2644 2645 /* This happens when device was stopped but VCPU wasn't. */ 2646 if (!vdev->vm_running) { 2647 q->tx_waiting = 1; 2648 return; 2649 } 2650 2651 if (q->tx_waiting) { 2652 virtio_queue_set_notification(vq, 1); 2653 timer_del(q->tx_timer); 2654 q->tx_waiting = 0; 2655 if (virtio_net_flush_tx(q) == -EINVAL) { 2656 return; 2657 } 2658 } else { 2659 timer_mod(q->tx_timer, 2660 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2661 q->tx_waiting = 1; 2662 virtio_queue_set_notification(vq, 0); 2663 } 2664 } 2665 2666 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) 2667 { 2668 VirtIONet *n = VIRTIO_NET(vdev); 2669 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2670 2671 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2672 virtio_net_drop_tx_queue_data(vdev, vq); 2673 return; 2674 } 2675 2676 if (unlikely(q->tx_waiting)) { 2677 return; 2678 } 2679 q->tx_waiting = 1; 2680 /* This happens when device was stopped but VCPU wasn't. */ 2681 if (!vdev->vm_running) { 2682 return; 2683 } 2684 virtio_queue_set_notification(vq, 0); 2685 qemu_bh_schedule(q->tx_bh); 2686 } 2687 2688 static void virtio_net_tx_timer(void *opaque) 2689 { 2690 VirtIONetQueue *q = opaque; 2691 VirtIONet *n = q->n; 2692 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2693 /* This happens when device was stopped but BH wasn't. */ 2694 if (!vdev->vm_running) { 2695 /* Make sure tx waiting is set, so we'll run when restarted. */ 2696 assert(q->tx_waiting); 2697 return; 2698 } 2699 2700 q->tx_waiting = 0; 2701 2702 /* Just in case the driver is not ready on more */ 2703 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2704 return; 2705 } 2706 2707 virtio_queue_set_notification(q->tx_vq, 1); 2708 virtio_net_flush_tx(q); 2709 } 2710 2711 static void virtio_net_tx_bh(void *opaque) 2712 { 2713 VirtIONetQueue *q = opaque; 2714 VirtIONet *n = q->n; 2715 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2716 int32_t ret; 2717 2718 /* This happens when device was stopped but BH wasn't. */ 2719 if (!vdev->vm_running) { 2720 /* Make sure tx waiting is set, so we'll run when restarted. */ 2721 assert(q->tx_waiting); 2722 return; 2723 } 2724 2725 q->tx_waiting = 0; 2726 2727 /* Just in case the driver is not ready on more */ 2728 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) { 2729 return; 2730 } 2731 2732 ret = virtio_net_flush_tx(q); 2733 if (ret == -EBUSY || ret == -EINVAL) { 2734 return; /* Notification re-enable handled by tx_complete or device 2735 * broken */ 2736 } 2737 2738 /* If we flush a full burst of packets, assume there are 2739 * more coming and immediately reschedule */ 2740 if (ret >= n->tx_burst) { 2741 qemu_bh_schedule(q->tx_bh); 2742 q->tx_waiting = 1; 2743 return; 2744 } 2745 2746 /* If less than a full burst, re-enable notification and flush 2747 * anything that may have come in while we weren't looking. If 2748 * we find something, assume the guest is still active and reschedule */ 2749 virtio_queue_set_notification(q->tx_vq, 1); 2750 ret = virtio_net_flush_tx(q); 2751 if (ret == -EINVAL) { 2752 return; 2753 } else if (ret > 0) { 2754 virtio_queue_set_notification(q->tx_vq, 0); 2755 qemu_bh_schedule(q->tx_bh); 2756 q->tx_waiting = 1; 2757 } 2758 } 2759 2760 static void virtio_net_add_queue(VirtIONet *n, int index) 2761 { 2762 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2763 2764 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size, 2765 virtio_net_handle_rx); 2766 2767 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { 2768 n->vqs[index].tx_vq = 2769 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2770 virtio_net_handle_tx_timer); 2771 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 2772 virtio_net_tx_timer, 2773 &n->vqs[index]); 2774 } else { 2775 n->vqs[index].tx_vq = 2776 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2777 virtio_net_handle_tx_bh); 2778 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]); 2779 } 2780 2781 n->vqs[index].tx_waiting = 0; 2782 n->vqs[index].n = n; 2783 } 2784 2785 static void virtio_net_del_queue(VirtIONet *n, int index) 2786 { 2787 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2788 VirtIONetQueue *q = &n->vqs[index]; 2789 NetClientState *nc = qemu_get_subqueue(n->nic, index); 2790 2791 qemu_purge_queued_packets(nc); 2792 2793 virtio_del_queue(vdev, index * 2); 2794 if (q->tx_timer) { 2795 timer_free(q->tx_timer); 2796 q->tx_timer = NULL; 2797 } else { 2798 qemu_bh_delete(q->tx_bh); 2799 q->tx_bh = NULL; 2800 } 2801 q->tx_waiting = 0; 2802 virtio_del_queue(vdev, index * 2 + 1); 2803 } 2804 2805 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs) 2806 { 2807 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2808 int old_num_queues = virtio_get_num_queues(vdev); 2809 int new_num_queues = new_max_queue_pairs * 2 + 1; 2810 int i; 2811 2812 assert(old_num_queues >= 3); 2813 assert(old_num_queues % 2 == 1); 2814 2815 if (old_num_queues == new_num_queues) { 2816 return; 2817 } 2818 2819 /* 2820 * We always need to remove and add ctrl vq if 2821 * old_num_queues != new_num_queues. Remove ctrl_vq first, 2822 * and then we only enter one of the following two loops. 2823 */ 2824 virtio_del_queue(vdev, old_num_queues - 1); 2825 2826 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) { 2827 /* new_num_queues < old_num_queues */ 2828 virtio_net_del_queue(n, i / 2); 2829 } 2830 2831 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) { 2832 /* new_num_queues > old_num_queues */ 2833 virtio_net_add_queue(n, i / 2); 2834 } 2835 2836 /* add ctrl_vq last */ 2837 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 2838 } 2839 2840 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) 2841 { 2842 int max = multiqueue ? n->max_queue_pairs : 1; 2843 2844 n->multiqueue = multiqueue; 2845 virtio_net_change_num_queue_pairs(n, max); 2846 2847 virtio_net_set_queue_pairs(n); 2848 } 2849 2850 static int virtio_net_post_load_device(void *opaque, int version_id) 2851 { 2852 VirtIONet *n = opaque; 2853 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2854 int i, link_down; 2855 2856 trace_virtio_net_post_load_device(); 2857 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs, 2858 virtio_vdev_has_feature(vdev, 2859 VIRTIO_F_VERSION_1), 2860 virtio_vdev_has_feature(vdev, 2861 VIRTIO_NET_F_HASH_REPORT)); 2862 2863 /* MAC_TABLE_ENTRIES may be different from the saved image */ 2864 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) { 2865 n->mac_table.in_use = 0; 2866 } 2867 2868 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 2869 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n); 2870 } 2871 2872 /* 2873 * curr_guest_offloads will be later overwritten by the 2874 * virtio_set_features_nocheck call done from the virtio_load. 2875 * Here we make sure it is preserved and restored accordingly 2876 * in the virtio_net_post_load_virtio callback. 2877 */ 2878 n->saved_guest_offloads = n->curr_guest_offloads; 2879 2880 virtio_net_set_queue_pairs(n); 2881 2882 /* Find the first multicast entry in the saved MAC filter */ 2883 for (i = 0; i < n->mac_table.in_use; i++) { 2884 if (n->mac_table.macs[i * ETH_ALEN] & 1) { 2885 break; 2886 } 2887 } 2888 n->mac_table.first_multi = i; 2889 2890 /* nc.link_down can't be migrated, so infer link_down according 2891 * to link status bit in n->status */ 2892 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0; 2893 for (i = 0; i < n->max_queue_pairs; i++) { 2894 qemu_get_subqueue(n->nic, i)->link_down = link_down; 2895 } 2896 2897 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 2898 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 2899 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 2900 QEMU_CLOCK_VIRTUAL, 2901 virtio_net_announce_timer, n); 2902 if (n->announce_timer.round) { 2903 timer_mod(n->announce_timer.tm, 2904 qemu_clock_get_ms(n->announce_timer.type)); 2905 } else { 2906 qemu_announce_timer_del(&n->announce_timer, false); 2907 } 2908 } 2909 2910 if (n->rss_data.enabled) { 2911 n->rss_data.enabled_software_rss = n->rss_data.populate_hash; 2912 if (!n->rss_data.populate_hash) { 2913 if (!virtio_net_attach_epbf_rss(n)) { 2914 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 2915 warn_report("Can't post-load eBPF RSS for vhost"); 2916 } else { 2917 warn_report("Can't post-load eBPF RSS - " 2918 "fallback to software RSS"); 2919 n->rss_data.enabled_software_rss = true; 2920 } 2921 } 2922 } 2923 2924 trace_virtio_net_rss_enable(n->rss_data.hash_types, 2925 n->rss_data.indirections_len, 2926 sizeof(n->rss_data.key)); 2927 } else { 2928 trace_virtio_net_rss_disable(); 2929 } 2930 return 0; 2931 } 2932 2933 static int virtio_net_post_load_virtio(VirtIODevice *vdev) 2934 { 2935 VirtIONet *n = VIRTIO_NET(vdev); 2936 /* 2937 * The actual needed state is now in saved_guest_offloads, 2938 * see virtio_net_post_load_device for detail. 2939 * Restore it back and apply the desired offloads. 2940 */ 2941 n->curr_guest_offloads = n->saved_guest_offloads; 2942 if (peer_has_vnet_hdr(n)) { 2943 virtio_net_apply_guest_offloads(n); 2944 } 2945 2946 return 0; 2947 } 2948 2949 /* tx_waiting field of a VirtIONetQueue */ 2950 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = { 2951 .name = "virtio-net-queue-tx_waiting", 2952 .fields = (VMStateField[]) { 2953 VMSTATE_UINT32(tx_waiting, VirtIONetQueue), 2954 VMSTATE_END_OF_LIST() 2955 }, 2956 }; 2957 2958 static bool max_queue_pairs_gt_1(void *opaque, int version_id) 2959 { 2960 return VIRTIO_NET(opaque)->max_queue_pairs > 1; 2961 } 2962 2963 static bool has_ctrl_guest_offloads(void *opaque, int version_id) 2964 { 2965 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque), 2966 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 2967 } 2968 2969 static bool mac_table_fits(void *opaque, int version_id) 2970 { 2971 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES; 2972 } 2973 2974 static bool mac_table_doesnt_fit(void *opaque, int version_id) 2975 { 2976 return !mac_table_fits(opaque, version_id); 2977 } 2978 2979 /* This temporary type is shared by all the WITH_TMP methods 2980 * although only some fields are used by each. 2981 */ 2982 struct VirtIONetMigTmp { 2983 VirtIONet *parent; 2984 VirtIONetQueue *vqs_1; 2985 uint16_t curr_queue_pairs_1; 2986 uint8_t has_ufo; 2987 uint32_t has_vnet_hdr; 2988 }; 2989 2990 /* The 2nd and subsequent tx_waiting flags are loaded later than 2991 * the 1st entry in the queue_pairs and only if there's more than one 2992 * entry. We use the tmp mechanism to calculate a temporary 2993 * pointer and count and also validate the count. 2994 */ 2995 2996 static int virtio_net_tx_waiting_pre_save(void *opaque) 2997 { 2998 struct VirtIONetMigTmp *tmp = opaque; 2999 3000 tmp->vqs_1 = tmp->parent->vqs + 1; 3001 tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1; 3002 if (tmp->parent->curr_queue_pairs == 0) { 3003 tmp->curr_queue_pairs_1 = 0; 3004 } 3005 3006 return 0; 3007 } 3008 3009 static int virtio_net_tx_waiting_pre_load(void *opaque) 3010 { 3011 struct VirtIONetMigTmp *tmp = opaque; 3012 3013 /* Reuse the pointer setup from save */ 3014 virtio_net_tx_waiting_pre_save(opaque); 3015 3016 if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) { 3017 error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x", 3018 tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs); 3019 3020 return -EINVAL; 3021 } 3022 3023 return 0; /* all good */ 3024 } 3025 3026 static const VMStateDescription vmstate_virtio_net_tx_waiting = { 3027 .name = "virtio-net-tx_waiting", 3028 .pre_load = virtio_net_tx_waiting_pre_load, 3029 .pre_save = virtio_net_tx_waiting_pre_save, 3030 .fields = (VMStateField[]) { 3031 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp, 3032 curr_queue_pairs_1, 3033 vmstate_virtio_net_queue_tx_waiting, 3034 struct VirtIONetQueue), 3035 VMSTATE_END_OF_LIST() 3036 }, 3037 }; 3038 3039 /* the 'has_ufo' flag is just tested; if the incoming stream has the 3040 * flag set we need to check that we have it 3041 */ 3042 static int virtio_net_ufo_post_load(void *opaque, int version_id) 3043 { 3044 struct VirtIONetMigTmp *tmp = opaque; 3045 3046 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) { 3047 error_report("virtio-net: saved image requires TUN_F_UFO support"); 3048 return -EINVAL; 3049 } 3050 3051 return 0; 3052 } 3053 3054 static int virtio_net_ufo_pre_save(void *opaque) 3055 { 3056 struct VirtIONetMigTmp *tmp = opaque; 3057 3058 tmp->has_ufo = tmp->parent->has_ufo; 3059 3060 return 0; 3061 } 3062 3063 static const VMStateDescription vmstate_virtio_net_has_ufo = { 3064 .name = "virtio-net-ufo", 3065 .post_load = virtio_net_ufo_post_load, 3066 .pre_save = virtio_net_ufo_pre_save, 3067 .fields = (VMStateField[]) { 3068 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp), 3069 VMSTATE_END_OF_LIST() 3070 }, 3071 }; 3072 3073 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the 3074 * flag set we need to check that we have it 3075 */ 3076 static int virtio_net_vnet_post_load(void *opaque, int version_id) 3077 { 3078 struct VirtIONetMigTmp *tmp = opaque; 3079 3080 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) { 3081 error_report("virtio-net: saved image requires vnet_hdr=on"); 3082 return -EINVAL; 3083 } 3084 3085 return 0; 3086 } 3087 3088 static int virtio_net_vnet_pre_save(void *opaque) 3089 { 3090 struct VirtIONetMigTmp *tmp = opaque; 3091 3092 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr; 3093 3094 return 0; 3095 } 3096 3097 static const VMStateDescription vmstate_virtio_net_has_vnet = { 3098 .name = "virtio-net-vnet", 3099 .post_load = virtio_net_vnet_post_load, 3100 .pre_save = virtio_net_vnet_pre_save, 3101 .fields = (VMStateField[]) { 3102 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp), 3103 VMSTATE_END_OF_LIST() 3104 }, 3105 }; 3106 3107 static bool virtio_net_rss_needed(void *opaque) 3108 { 3109 return VIRTIO_NET(opaque)->rss_data.enabled; 3110 } 3111 3112 static const VMStateDescription vmstate_virtio_net_rss = { 3113 .name = "virtio-net-device/rss", 3114 .version_id = 1, 3115 .minimum_version_id = 1, 3116 .needed = virtio_net_rss_needed, 3117 .fields = (VMStateField[]) { 3118 VMSTATE_BOOL(rss_data.enabled, VirtIONet), 3119 VMSTATE_BOOL(rss_data.redirect, VirtIONet), 3120 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet), 3121 VMSTATE_UINT32(rss_data.hash_types, VirtIONet), 3122 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet), 3123 VMSTATE_UINT16(rss_data.default_queue, VirtIONet), 3124 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet, 3125 VIRTIO_NET_RSS_MAX_KEY_SIZE), 3126 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet, 3127 rss_data.indirections_len, 0, 3128 vmstate_info_uint16, uint16_t), 3129 VMSTATE_END_OF_LIST() 3130 }, 3131 }; 3132 3133 static const VMStateDescription vmstate_virtio_net_device = { 3134 .name = "virtio-net-device", 3135 .version_id = VIRTIO_NET_VM_VERSION, 3136 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3137 .post_load = virtio_net_post_load_device, 3138 .fields = (VMStateField[]) { 3139 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN), 3140 VMSTATE_STRUCT_POINTER(vqs, VirtIONet, 3141 vmstate_virtio_net_queue_tx_waiting, 3142 VirtIONetQueue), 3143 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet), 3144 VMSTATE_UINT16(status, VirtIONet), 3145 VMSTATE_UINT8(promisc, VirtIONet), 3146 VMSTATE_UINT8(allmulti, VirtIONet), 3147 VMSTATE_UINT32(mac_table.in_use, VirtIONet), 3148 3149 /* Guarded pair: If it fits we load it, else we throw it away 3150 * - can happen if source has a larger MAC table.; post-load 3151 * sets flags in this case. 3152 */ 3153 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet, 3154 0, mac_table_fits, mac_table.in_use, 3155 ETH_ALEN), 3156 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0, 3157 mac_table.in_use, ETH_ALEN), 3158 3159 /* Note: This is an array of uint32's that's always been saved as a 3160 * buffer; hold onto your endiannesses; it's actually used as a bitmap 3161 * but based on the uint. 3162 */ 3163 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3), 3164 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3165 vmstate_virtio_net_has_vnet), 3166 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet), 3167 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet), 3168 VMSTATE_UINT8(alluni, VirtIONet), 3169 VMSTATE_UINT8(nomulti, VirtIONet), 3170 VMSTATE_UINT8(nouni, VirtIONet), 3171 VMSTATE_UINT8(nobcast, VirtIONet), 3172 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3173 vmstate_virtio_net_has_ufo), 3174 VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0, 3175 vmstate_info_uint16_equal, uint16_t), 3176 VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1), 3177 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3178 vmstate_virtio_net_tx_waiting), 3179 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet, 3180 has_ctrl_guest_offloads), 3181 VMSTATE_END_OF_LIST() 3182 }, 3183 .subsections = (const VMStateDescription * []) { 3184 &vmstate_virtio_net_rss, 3185 NULL 3186 } 3187 }; 3188 3189 static NetClientInfo net_virtio_info = { 3190 .type = NET_CLIENT_DRIVER_NIC, 3191 .size = sizeof(NICState), 3192 .can_receive = virtio_net_can_receive, 3193 .receive = virtio_net_receive, 3194 .link_status_changed = virtio_net_set_link_status, 3195 .query_rx_filter = virtio_net_query_rxfilter, 3196 .announce = virtio_net_announce, 3197 }; 3198 3199 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) 3200 { 3201 VirtIONet *n = VIRTIO_NET(vdev); 3202 NetClientState *nc; 3203 assert(n->vhost_started); 3204 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { 3205 /* Must guard against invalid features and bogus queue index 3206 * from being set by malicious guest, or penetrated through 3207 * buggy migration stream. 3208 */ 3209 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3210 qemu_log_mask(LOG_GUEST_ERROR, 3211 "%s: bogus vq index ignored\n", __func__); 3212 return false; 3213 } 3214 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3215 } else { 3216 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3217 } 3218 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); 3219 } 3220 3221 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, 3222 bool mask) 3223 { 3224 VirtIONet *n = VIRTIO_NET(vdev); 3225 NetClientState *nc; 3226 assert(n->vhost_started); 3227 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { 3228 /* Must guard against invalid features and bogus queue index 3229 * from being set by malicious guest, or penetrated through 3230 * buggy migration stream. 3231 */ 3232 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3233 qemu_log_mask(LOG_GUEST_ERROR, 3234 "%s: bogus vq index ignored\n", __func__); 3235 return; 3236 } 3237 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3238 } else { 3239 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3240 } 3241 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), 3242 vdev, idx, mask); 3243 } 3244 3245 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) 3246 { 3247 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC); 3248 3249 n->config_size = virtio_feature_get_config_size(feature_sizes, 3250 host_features); 3251 } 3252 3253 void virtio_net_set_netclient_name(VirtIONet *n, const char *name, 3254 const char *type) 3255 { 3256 /* 3257 * The name can be NULL, the netclient name will be type.x. 3258 */ 3259 assert(type != NULL); 3260 3261 g_free(n->netclient_name); 3262 g_free(n->netclient_type); 3263 n->netclient_name = g_strdup(name); 3264 n->netclient_type = g_strdup(type); 3265 } 3266 3267 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev) 3268 { 3269 HotplugHandler *hotplug_ctrl; 3270 PCIDevice *pci_dev; 3271 Error *err = NULL; 3272 3273 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3274 if (hotplug_ctrl) { 3275 pci_dev = PCI_DEVICE(dev); 3276 pci_dev->partially_hotplugged = true; 3277 hotplug_handler_unplug_request(hotplug_ctrl, dev, &err); 3278 if (err) { 3279 error_report_err(err); 3280 return false; 3281 } 3282 } else { 3283 return false; 3284 } 3285 return true; 3286 } 3287 3288 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev, 3289 Error **errp) 3290 { 3291 Error *err = NULL; 3292 HotplugHandler *hotplug_ctrl; 3293 PCIDevice *pdev = PCI_DEVICE(dev); 3294 BusState *primary_bus; 3295 3296 if (!pdev->partially_hotplugged) { 3297 return true; 3298 } 3299 primary_bus = dev->parent_bus; 3300 if (!primary_bus) { 3301 error_setg(errp, "virtio_net: couldn't find primary bus"); 3302 return false; 3303 } 3304 qdev_set_parent_bus(dev, primary_bus, &error_abort); 3305 qatomic_set(&n->failover_primary_hidden, false); 3306 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3307 if (hotplug_ctrl) { 3308 hotplug_handler_pre_plug(hotplug_ctrl, dev, &err); 3309 if (err) { 3310 goto out; 3311 } 3312 hotplug_handler_plug(hotplug_ctrl, dev, &err); 3313 } 3314 pdev->partially_hotplugged = false; 3315 3316 out: 3317 error_propagate(errp, err); 3318 return !err; 3319 } 3320 3321 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s) 3322 { 3323 bool should_be_hidden; 3324 Error *err = NULL; 3325 DeviceState *dev = failover_find_primary_device(n); 3326 3327 if (!dev) { 3328 return; 3329 } 3330 3331 should_be_hidden = qatomic_read(&n->failover_primary_hidden); 3332 3333 if (migration_in_setup(s) && !should_be_hidden) { 3334 if (failover_unplug_primary(n, dev)) { 3335 vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev); 3336 qapi_event_send_unplug_primary(dev->id); 3337 qatomic_set(&n->failover_primary_hidden, true); 3338 } else { 3339 warn_report("couldn't unplug primary device"); 3340 } 3341 } else if (migration_has_failed(s)) { 3342 /* We already unplugged the device let's plug it back */ 3343 if (!failover_replug_primary(n, dev, &err)) { 3344 if (err) { 3345 error_report_err(err); 3346 } 3347 } 3348 } 3349 } 3350 3351 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data) 3352 { 3353 MigrationState *s = data; 3354 VirtIONet *n = container_of(notifier, VirtIONet, migration_state); 3355 virtio_net_handle_migration_primary(n, s); 3356 } 3357 3358 static bool failover_hide_primary_device(DeviceListener *listener, 3359 const QDict *device_opts, 3360 bool from_json, 3361 Error **errp) 3362 { 3363 VirtIONet *n = container_of(listener, VirtIONet, primary_listener); 3364 const char *standby_id; 3365 3366 if (!device_opts) { 3367 return false; 3368 } 3369 3370 if (!qdict_haskey(device_opts, "failover_pair_id")) { 3371 return false; 3372 } 3373 3374 if (!qdict_haskey(device_opts, "id")) { 3375 error_setg(errp, "Device with failover_pair_id needs to have id"); 3376 return false; 3377 } 3378 3379 standby_id = qdict_get_str(device_opts, "failover_pair_id"); 3380 if (g_strcmp0(standby_id, n->netclient_name) != 0) { 3381 return false; 3382 } 3383 3384 /* 3385 * The hide helper can be called several times for a given device. 3386 * Check there is only one primary for a virtio-net device but 3387 * don't duplicate the qdict several times if it's called for the same 3388 * device. 3389 */ 3390 if (n->primary_opts) { 3391 const char *old, *new; 3392 /* devices with failover_pair_id always have an id */ 3393 old = qdict_get_str(n->primary_opts, "id"); 3394 new = qdict_get_str(device_opts, "id"); 3395 if (strcmp(old, new) != 0) { 3396 error_setg(errp, "Cannot attach more than one primary device to " 3397 "'%s': '%s' and '%s'", n->netclient_name, old, new); 3398 return false; 3399 } 3400 } else { 3401 n->primary_opts = qdict_clone_shallow(device_opts); 3402 n->primary_opts_from_json = from_json; 3403 } 3404 3405 /* failover_primary_hidden is set during feature negotiation */ 3406 return qatomic_read(&n->failover_primary_hidden); 3407 } 3408 3409 static void virtio_net_device_realize(DeviceState *dev, Error **errp) 3410 { 3411 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3412 VirtIONet *n = VIRTIO_NET(dev); 3413 NetClientState *nc; 3414 int i; 3415 3416 if (n->net_conf.mtu) { 3417 n->host_features |= (1ULL << VIRTIO_NET_F_MTU); 3418 } 3419 3420 if (n->net_conf.duplex_str) { 3421 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) { 3422 n->net_conf.duplex = DUPLEX_HALF; 3423 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) { 3424 n->net_conf.duplex = DUPLEX_FULL; 3425 } else { 3426 error_setg(errp, "'duplex' must be 'half' or 'full'"); 3427 return; 3428 } 3429 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3430 } else { 3431 n->net_conf.duplex = DUPLEX_UNKNOWN; 3432 } 3433 3434 if (n->net_conf.speed < SPEED_UNKNOWN) { 3435 error_setg(errp, "'speed' must be between 0 and INT_MAX"); 3436 return; 3437 } 3438 if (n->net_conf.speed >= 0) { 3439 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3440 } 3441 3442 if (n->failover) { 3443 n->primary_listener.hide_device = failover_hide_primary_device; 3444 qatomic_set(&n->failover_primary_hidden, true); 3445 device_listener_register(&n->primary_listener); 3446 n->migration_state.notify = virtio_net_migration_state_notifier; 3447 add_migration_state_change_notifier(&n->migration_state); 3448 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY); 3449 } 3450 3451 virtio_net_set_config_size(n, n->host_features); 3452 virtio_init(vdev, VIRTIO_ID_NET, n->config_size); 3453 3454 /* 3455 * We set a lower limit on RX queue size to what it always was. 3456 * Guests that want a smaller ring can always resize it without 3457 * help from us (using virtio 1 and up). 3458 */ 3459 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || 3460 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || 3461 !is_power_of_2(n->net_conf.rx_queue_size)) { 3462 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " 3463 "must be a power of 2 between %d and %d.", 3464 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, 3465 VIRTQUEUE_MAX_SIZE); 3466 virtio_cleanup(vdev); 3467 return; 3468 } 3469 3470 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || 3471 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE || 3472 !is_power_of_2(n->net_conf.tx_queue_size)) { 3473 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " 3474 "must be a power of 2 between %d and %d", 3475 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, 3476 VIRTQUEUE_MAX_SIZE); 3477 virtio_cleanup(vdev); 3478 return; 3479 } 3480 3481 n->max_ncs = MAX(n->nic_conf.peers.queues, 1); 3482 3483 /* 3484 * Figure out the datapath queue pairs since the backend could 3485 * provide control queue via peers as well. 3486 */ 3487 if (n->nic_conf.peers.queues) { 3488 for (i = 0; i < n->max_ncs; i++) { 3489 if (n->nic_conf.peers.ncs[i]->is_datapath) { 3490 ++n->max_queue_pairs; 3491 } 3492 } 3493 } 3494 n->max_queue_pairs = MAX(n->max_queue_pairs, 1); 3495 3496 if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) { 3497 error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), " 3498 "must be a positive integer less than %d.", 3499 n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2); 3500 virtio_cleanup(vdev); 3501 return; 3502 } 3503 n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs); 3504 n->curr_queue_pairs = 1; 3505 n->tx_timeout = n->net_conf.txtimer; 3506 3507 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer") 3508 && strcmp(n->net_conf.tx, "bh")) { 3509 warn_report("virtio-net: " 3510 "Unknown option tx=%s, valid options: \"timer\" \"bh\"", 3511 n->net_conf.tx); 3512 error_printf("Defaulting to \"bh\""); 3513 } 3514 3515 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), 3516 n->net_conf.tx_queue_size); 3517 3518 for (i = 0; i < n->max_queue_pairs; i++) { 3519 virtio_net_add_queue(n, i); 3520 } 3521 3522 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 3523 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr); 3524 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac)); 3525 n->status = VIRTIO_NET_S_LINK_UP; 3526 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 3527 QEMU_CLOCK_VIRTUAL, 3528 virtio_net_announce_timer, n); 3529 n->announce_timer.round = 0; 3530 3531 if (n->netclient_type) { 3532 /* 3533 * Happen when virtio_net_set_netclient_name has been called. 3534 */ 3535 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3536 n->netclient_type, n->netclient_name, n); 3537 } else { 3538 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3539 object_get_typename(OBJECT(dev)), dev->id, n); 3540 } 3541 3542 for (i = 0; i < n->max_queue_pairs; i++) { 3543 n->nic->ncs[i].do_not_pad = true; 3544 } 3545 3546 peer_test_vnet_hdr(n); 3547 if (peer_has_vnet_hdr(n)) { 3548 for (i = 0; i < n->max_queue_pairs; i++) { 3549 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true); 3550 } 3551 n->host_hdr_len = sizeof(struct virtio_net_hdr); 3552 } else { 3553 n->host_hdr_len = 0; 3554 } 3555 3556 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a); 3557 3558 n->vqs[0].tx_waiting = 0; 3559 n->tx_burst = n->net_conf.txburst; 3560 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0); 3561 n->promisc = 1; /* for compatibility */ 3562 3563 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 3564 3565 n->vlans = g_malloc0(MAX_VLAN >> 3); 3566 3567 nc = qemu_get_queue(n->nic); 3568 nc->rxfilter_notify_enabled = 1; 3569 3570 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 3571 struct virtio_net_config netcfg = {}; 3572 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN); 3573 vhost_net_set_config(get_vhost_net(nc->peer), 3574 (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER); 3575 } 3576 QTAILQ_INIT(&n->rsc_chains); 3577 n->qdev = dev; 3578 3579 net_rx_pkt_init(&n->rx_pkt, false); 3580 3581 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3582 virtio_net_load_ebpf(n); 3583 } 3584 } 3585 3586 static void virtio_net_device_unrealize(DeviceState *dev) 3587 { 3588 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3589 VirtIONet *n = VIRTIO_NET(dev); 3590 int i, max_queue_pairs; 3591 3592 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3593 virtio_net_unload_ebpf(n); 3594 } 3595 3596 /* This will stop vhost backend if appropriate. */ 3597 virtio_net_set_status(vdev, 0); 3598 3599 g_free(n->netclient_name); 3600 n->netclient_name = NULL; 3601 g_free(n->netclient_type); 3602 n->netclient_type = NULL; 3603 3604 g_free(n->mac_table.macs); 3605 g_free(n->vlans); 3606 3607 if (n->failover) { 3608 qobject_unref(n->primary_opts); 3609 device_listener_unregister(&n->primary_listener); 3610 remove_migration_state_change_notifier(&n->migration_state); 3611 } else { 3612 assert(n->primary_opts == NULL); 3613 } 3614 3615 max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 3616 for (i = 0; i < max_queue_pairs; i++) { 3617 virtio_net_del_queue(n, i); 3618 } 3619 /* delete also control vq */ 3620 virtio_del_queue(vdev, max_queue_pairs * 2); 3621 qemu_announce_timer_del(&n->announce_timer, false); 3622 g_free(n->vqs); 3623 qemu_del_nic(n->nic); 3624 virtio_net_rsc_cleanup(n); 3625 g_free(n->rss_data.indirections_table); 3626 net_rx_pkt_uninit(n->rx_pkt); 3627 virtio_cleanup(vdev); 3628 } 3629 3630 static void virtio_net_instance_init(Object *obj) 3631 { 3632 VirtIONet *n = VIRTIO_NET(obj); 3633 3634 /* 3635 * The default config_size is sizeof(struct virtio_net_config). 3636 * Can be overriden with virtio_net_set_config_size. 3637 */ 3638 n->config_size = sizeof(struct virtio_net_config); 3639 device_add_bootindex_property(obj, &n->nic_conf.bootindex, 3640 "bootindex", "/ethernet-phy@0", 3641 DEVICE(n)); 3642 3643 ebpf_rss_init(&n->ebpf_rss); 3644 } 3645 3646 static int virtio_net_pre_save(void *opaque) 3647 { 3648 VirtIONet *n = opaque; 3649 3650 /* At this point, backend must be stopped, otherwise 3651 * it might keep writing to memory. */ 3652 assert(!n->vhost_started); 3653 3654 return 0; 3655 } 3656 3657 static bool primary_unplug_pending(void *opaque) 3658 { 3659 DeviceState *dev = opaque; 3660 DeviceState *primary; 3661 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3662 VirtIONet *n = VIRTIO_NET(vdev); 3663 3664 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 3665 return false; 3666 } 3667 primary = failover_find_primary_device(n); 3668 return primary ? primary->pending_deleted_event : false; 3669 } 3670 3671 static bool dev_unplug_pending(void *opaque) 3672 { 3673 DeviceState *dev = opaque; 3674 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 3675 3676 return vdc->primary_unplug_pending(dev); 3677 } 3678 3679 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev) 3680 { 3681 VirtIONet *n = VIRTIO_NET(vdev); 3682 NetClientState *nc = qemu_get_queue(n->nic); 3683 struct vhost_net *net = get_vhost_net(nc->peer); 3684 return &net->dev; 3685 } 3686 3687 static const VMStateDescription vmstate_virtio_net = { 3688 .name = "virtio-net", 3689 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3690 .version_id = VIRTIO_NET_VM_VERSION, 3691 .fields = (VMStateField[]) { 3692 VMSTATE_VIRTIO_DEVICE, 3693 VMSTATE_END_OF_LIST() 3694 }, 3695 .pre_save = virtio_net_pre_save, 3696 .dev_unplug_pending = dev_unplug_pending, 3697 }; 3698 3699 static Property virtio_net_properties[] = { 3700 DEFINE_PROP_BIT64("csum", VirtIONet, host_features, 3701 VIRTIO_NET_F_CSUM, true), 3702 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features, 3703 VIRTIO_NET_F_GUEST_CSUM, true), 3704 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), 3705 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features, 3706 VIRTIO_NET_F_GUEST_TSO4, true), 3707 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features, 3708 VIRTIO_NET_F_GUEST_TSO6, true), 3709 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features, 3710 VIRTIO_NET_F_GUEST_ECN, true), 3711 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features, 3712 VIRTIO_NET_F_GUEST_UFO, true), 3713 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features, 3714 VIRTIO_NET_F_GUEST_ANNOUNCE, true), 3715 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features, 3716 VIRTIO_NET_F_HOST_TSO4, true), 3717 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features, 3718 VIRTIO_NET_F_HOST_TSO6, true), 3719 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features, 3720 VIRTIO_NET_F_HOST_ECN, true), 3721 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features, 3722 VIRTIO_NET_F_HOST_UFO, true), 3723 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features, 3724 VIRTIO_NET_F_MRG_RXBUF, true), 3725 DEFINE_PROP_BIT64("status", VirtIONet, host_features, 3726 VIRTIO_NET_F_STATUS, true), 3727 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features, 3728 VIRTIO_NET_F_CTRL_VQ, true), 3729 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features, 3730 VIRTIO_NET_F_CTRL_RX, true), 3731 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features, 3732 VIRTIO_NET_F_CTRL_VLAN, true), 3733 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features, 3734 VIRTIO_NET_F_CTRL_RX_EXTRA, true), 3735 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features, 3736 VIRTIO_NET_F_CTRL_MAC_ADDR, true), 3737 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features, 3738 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), 3739 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), 3740 DEFINE_PROP_BIT64("rss", VirtIONet, host_features, 3741 VIRTIO_NET_F_RSS, false), 3742 DEFINE_PROP_BIT64("hash", VirtIONet, host_features, 3743 VIRTIO_NET_F_HASH_REPORT, false), 3744 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features, 3745 VIRTIO_NET_F_RSC_EXT, false), 3746 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout, 3747 VIRTIO_NET_RSC_DEFAULT_INTERVAL), 3748 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf), 3749 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer, 3750 TX_TIMER_INTERVAL), 3751 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), 3752 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), 3753 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, 3754 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), 3755 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, 3756 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), 3757 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), 3758 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, 3759 true), 3760 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN), 3761 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str), 3762 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false), 3763 DEFINE_PROP_END_OF_LIST(), 3764 }; 3765 3766 static void virtio_net_class_init(ObjectClass *klass, void *data) 3767 { 3768 DeviceClass *dc = DEVICE_CLASS(klass); 3769 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 3770 3771 device_class_set_props(dc, virtio_net_properties); 3772 dc->vmsd = &vmstate_virtio_net; 3773 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 3774 vdc->realize = virtio_net_device_realize; 3775 vdc->unrealize = virtio_net_device_unrealize; 3776 vdc->get_config = virtio_net_get_config; 3777 vdc->set_config = virtio_net_set_config; 3778 vdc->get_features = virtio_net_get_features; 3779 vdc->set_features = virtio_net_set_features; 3780 vdc->bad_features = virtio_net_bad_features; 3781 vdc->reset = virtio_net_reset; 3782 vdc->set_status = virtio_net_set_status; 3783 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; 3784 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; 3785 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO); 3786 vdc->post_load = virtio_net_post_load_virtio; 3787 vdc->vmsd = &vmstate_virtio_net_device; 3788 vdc->primary_unplug_pending = primary_unplug_pending; 3789 vdc->get_vhost = virtio_net_get_vhost; 3790 } 3791 3792 static const TypeInfo virtio_net_info = { 3793 .name = TYPE_VIRTIO_NET, 3794 .parent = TYPE_VIRTIO_DEVICE, 3795 .instance_size = sizeof(VirtIONet), 3796 .instance_init = virtio_net_instance_init, 3797 .class_init = virtio_net_class_init, 3798 }; 3799 3800 static void virtio_register_types(void) 3801 { 3802 type_register_static(&virtio_net_info); 3803 } 3804 3805 type_init(virtio_register_types) 3806