1 /* 2 * Virtio Network Device 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qemu/atomic.h" 16 #include "qemu/iov.h" 17 #include "qemu/log.h" 18 #include "qemu/main-loop.h" 19 #include "qemu/module.h" 20 #include "hw/virtio/virtio.h" 21 #include "net/net.h" 22 #include "net/checksum.h" 23 #include "net/tap.h" 24 #include "qemu/error-report.h" 25 #include "qemu/timer.h" 26 #include "qemu/option.h" 27 #include "qemu/option_int.h" 28 #include "qemu/config-file.h" 29 #include "qapi/qmp/qdict.h" 30 #include "hw/virtio/virtio-net.h" 31 #include "net/vhost_net.h" 32 #include "net/announce.h" 33 #include "hw/virtio/virtio-bus.h" 34 #include "qapi/error.h" 35 #include "qapi/qapi-events-net.h" 36 #include "hw/qdev-properties.h" 37 #include "qapi/qapi-types-migration.h" 38 #include "qapi/qapi-events-migration.h" 39 #include "hw/virtio/virtio-access.h" 40 #include "migration/misc.h" 41 #include "standard-headers/linux/ethtool.h" 42 #include "sysemu/sysemu.h" 43 #include "trace.h" 44 #include "monitor/qdev.h" 45 #include "hw/pci/pci.h" 46 #include "net_rx_pkt.h" 47 #include "hw/virtio/vhost.h" 48 #include "sysemu/qtest.h" 49 50 #define VIRTIO_NET_VM_VERSION 11 51 52 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ 53 54 /* previously fixed value */ 55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 57 58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */ 59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 61 62 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ 63 64 #define VIRTIO_NET_TCP_FLAG 0x3F 65 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000 66 67 /* IPv4 max payload, 16 bits in the header */ 68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header)) 69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535 70 71 /* header length value in ip header without option */ 72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5 73 74 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ 75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD 76 77 /* Purge coalesced packets timer interval, This value affects the performance 78 a lot, and should be tuned carefully, '300000'(300us) is the recommended 79 value to pass the WHQL test, '50000' can gain 2x netperf throughput with 80 tso/gso/gro 'off'. */ 81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000 82 83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \ 84 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \ 85 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \ 86 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \ 87 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \ 88 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \ 89 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \ 90 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \ 91 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) 92 93 static const VirtIOFeature feature_sizes[] = { 94 {.flags = 1ULL << VIRTIO_NET_F_MAC, 95 .end = endof(struct virtio_net_config, mac)}, 96 {.flags = 1ULL << VIRTIO_NET_F_STATUS, 97 .end = endof(struct virtio_net_config, status)}, 98 {.flags = 1ULL << VIRTIO_NET_F_MQ, 99 .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, 100 {.flags = 1ULL << VIRTIO_NET_F_MTU, 101 .end = endof(struct virtio_net_config, mtu)}, 102 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, 103 .end = endof(struct virtio_net_config, duplex)}, 104 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT), 105 .end = endof(struct virtio_net_config, supported_hash_types)}, 106 {} 107 }; 108 109 static const VirtIOConfigSizeParams cfg_size_params = { 110 .min_size = endof(struct virtio_net_config, mac), 111 .max_size = sizeof(struct virtio_net_config), 112 .feature_sizes = feature_sizes 113 }; 114 115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc) 116 { 117 VirtIONet *n = qemu_get_nic_opaque(nc); 118 119 return &n->vqs[nc->queue_index]; 120 } 121 122 static int vq2q(int queue_index) 123 { 124 return queue_index / 2; 125 } 126 127 static void flush_or_purge_queued_packets(NetClientState *nc) 128 { 129 if (!nc->peer) { 130 return; 131 } 132 133 qemu_flush_or_purge_queued_packets(nc->peer, true); 134 assert(!virtio_net_get_subqueue(nc)->async_tx.elem); 135 } 136 137 /* TODO 138 * - we could suppress RX interrupt if we were so inclined. 139 */ 140 141 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) 142 { 143 VirtIONet *n = VIRTIO_NET(vdev); 144 struct virtio_net_config netcfg; 145 NetClientState *nc = qemu_get_queue(n->nic); 146 static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } }; 147 148 int ret = 0; 149 memset(&netcfg, 0 , sizeof(struct virtio_net_config)); 150 virtio_stw_p(vdev, &netcfg.status, n->status); 151 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs); 152 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); 153 memcpy(netcfg.mac, n->mac, ETH_ALEN); 154 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); 155 netcfg.duplex = n->net_conf.duplex; 156 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE; 157 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length, 158 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ? 159 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1); 160 virtio_stl_p(vdev, &netcfg.supported_hash_types, 161 VIRTIO_NET_RSS_SUPPORTED_HASHES); 162 memcpy(config, &netcfg, n->config_size); 163 164 /* 165 * Is this VDPA? No peer means not VDPA: there's no way to 166 * disconnect/reconnect a VDPA peer. 167 */ 168 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 169 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg, 170 n->config_size); 171 if (ret != -1) { 172 /* 173 * Some NIC/kernel combinations present 0 as the mac address. As 174 * that is not a legal address, try to proceed with the 175 * address from the QEMU command line in the hope that the 176 * address has been configured correctly elsewhere - just not 177 * reported by the device. 178 */ 179 if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) { 180 info_report("Zero hardware mac address detected. Ignoring."); 181 memcpy(netcfg.mac, n->mac, ETH_ALEN); 182 } 183 memcpy(config, &netcfg, n->config_size); 184 } 185 } 186 } 187 188 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) 189 { 190 VirtIONet *n = VIRTIO_NET(vdev); 191 struct virtio_net_config netcfg = {}; 192 NetClientState *nc = qemu_get_queue(n->nic); 193 194 memcpy(&netcfg, config, n->config_size); 195 196 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) && 197 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) && 198 memcmp(netcfg.mac, n->mac, ETH_ALEN)) { 199 memcpy(n->mac, netcfg.mac, ETH_ALEN); 200 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 201 } 202 203 /* 204 * Is this VDPA? No peer means not VDPA: there's no way to 205 * disconnect/reconnect a VDPA peer. 206 */ 207 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 208 vhost_net_set_config(get_vhost_net(nc->peer), 209 (uint8_t *)&netcfg, 0, n->config_size, 210 VHOST_SET_CONFIG_TYPE_MASTER); 211 } 212 } 213 214 static bool virtio_net_started(VirtIONet *n, uint8_t status) 215 { 216 VirtIODevice *vdev = VIRTIO_DEVICE(n); 217 return (status & VIRTIO_CONFIG_S_DRIVER_OK) && 218 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running; 219 } 220 221 static void virtio_net_announce_notify(VirtIONet *net) 222 { 223 VirtIODevice *vdev = VIRTIO_DEVICE(net); 224 trace_virtio_net_announce_notify(); 225 226 net->status |= VIRTIO_NET_S_ANNOUNCE; 227 virtio_notify_config(vdev); 228 } 229 230 static void virtio_net_announce_timer(void *opaque) 231 { 232 VirtIONet *n = opaque; 233 trace_virtio_net_announce_timer(n->announce_timer.round); 234 235 n->announce_timer.round--; 236 virtio_net_announce_notify(n); 237 } 238 239 static void virtio_net_announce(NetClientState *nc) 240 { 241 VirtIONet *n = qemu_get_nic_opaque(nc); 242 VirtIODevice *vdev = VIRTIO_DEVICE(n); 243 244 /* 245 * Make sure the virtio migration announcement timer isn't running 246 * If it is, let it trigger announcement so that we do not cause 247 * confusion. 248 */ 249 if (n->announce_timer.round) { 250 return; 251 } 252 253 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 254 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 255 virtio_net_announce_notify(n); 256 } 257 } 258 259 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) 260 { 261 VirtIODevice *vdev = VIRTIO_DEVICE(n); 262 NetClientState *nc = qemu_get_queue(n->nic); 263 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 264 int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? 265 n->max_ncs - n->max_queue_pairs : 0; 266 267 if (!get_vhost_net(nc->peer)) { 268 return; 269 } 270 271 if ((virtio_net_started(n, status) && !nc->peer->link_down) == 272 !!n->vhost_started) { 273 return; 274 } 275 if (!n->vhost_started) { 276 int r, i; 277 278 if (n->needs_vnet_hdr_swap) { 279 error_report("backend does not support %s vnet headers; " 280 "falling back on userspace virtio", 281 virtio_is_big_endian(vdev) ? "BE" : "LE"); 282 return; 283 } 284 285 /* Any packets outstanding? Purge them to avoid touching rings 286 * when vhost is running. 287 */ 288 for (i = 0; i < queue_pairs; i++) { 289 NetClientState *qnc = qemu_get_subqueue(n->nic, i); 290 291 /* Purge both directions: TX and RX. */ 292 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc); 293 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer); 294 } 295 296 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) { 297 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu); 298 if (r < 0) { 299 error_report("%uBytes MTU not supported by the backend", 300 n->net_conf.mtu); 301 302 return; 303 } 304 } 305 306 n->vhost_started = 1; 307 r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq); 308 if (r < 0) { 309 error_report("unable to start vhost net: %d: " 310 "falling back on userspace virtio", -r); 311 n->vhost_started = 0; 312 } 313 } else { 314 vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq); 315 n->vhost_started = 0; 316 } 317 } 318 319 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev, 320 NetClientState *peer, 321 bool enable) 322 { 323 if (virtio_is_big_endian(vdev)) { 324 return qemu_set_vnet_be(peer, enable); 325 } else { 326 return qemu_set_vnet_le(peer, enable); 327 } 328 } 329 330 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs, 331 int queue_pairs, bool enable) 332 { 333 int i; 334 335 for (i = 0; i < queue_pairs; i++) { 336 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 && 337 enable) { 338 while (--i >= 0) { 339 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false); 340 } 341 342 return true; 343 } 344 } 345 346 return false; 347 } 348 349 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status) 350 { 351 VirtIODevice *vdev = VIRTIO_DEVICE(n); 352 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 353 354 if (virtio_net_started(n, status)) { 355 /* Before using the device, we tell the network backend about the 356 * endianness to use when parsing vnet headers. If the backend 357 * can't do it, we fallback onto fixing the headers in the core 358 * virtio-net code. 359 */ 360 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs, 361 queue_pairs, true); 362 } else if (virtio_net_started(n, vdev->status)) { 363 /* After using the device, we need to reset the network backend to 364 * the default (guest native endianness), otherwise the guest may 365 * lose network connectivity if it is rebooted into a different 366 * endianness. 367 */ 368 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false); 369 } 370 } 371 372 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq) 373 { 374 unsigned int dropped = virtqueue_drop_all(vq); 375 if (dropped) { 376 virtio_notify(vdev, vq); 377 } 378 } 379 380 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) 381 { 382 VirtIONet *n = VIRTIO_NET(vdev); 383 VirtIONetQueue *q; 384 int i; 385 uint8_t queue_status; 386 387 virtio_net_vnet_endian_status(n, status); 388 virtio_net_vhost_status(n, status); 389 390 for (i = 0; i < n->max_queue_pairs; i++) { 391 NetClientState *ncs = qemu_get_subqueue(n->nic, i); 392 bool queue_started; 393 q = &n->vqs[i]; 394 395 if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) { 396 queue_status = 0; 397 } else { 398 queue_status = status; 399 } 400 queue_started = 401 virtio_net_started(n, queue_status) && !n->vhost_started; 402 403 if (queue_started) { 404 qemu_flush_queued_packets(ncs); 405 } 406 407 if (!q->tx_waiting) { 408 continue; 409 } 410 411 if (queue_started) { 412 if (q->tx_timer) { 413 timer_mod(q->tx_timer, 414 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 415 } else { 416 qemu_bh_schedule(q->tx_bh); 417 } 418 } else { 419 if (q->tx_timer) { 420 timer_del(q->tx_timer); 421 } else { 422 qemu_bh_cancel(q->tx_bh); 423 } 424 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 && 425 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) && 426 vdev->vm_running) { 427 /* if tx is waiting we are likely have some packets in tx queue 428 * and disabled notification */ 429 q->tx_waiting = 0; 430 virtio_queue_set_notification(q->tx_vq, 1); 431 virtio_net_drop_tx_queue_data(vdev, q->tx_vq); 432 } 433 } 434 } 435 } 436 437 static void virtio_net_set_link_status(NetClientState *nc) 438 { 439 VirtIONet *n = qemu_get_nic_opaque(nc); 440 VirtIODevice *vdev = VIRTIO_DEVICE(n); 441 uint16_t old_status = n->status; 442 443 if (nc->link_down) 444 n->status &= ~VIRTIO_NET_S_LINK_UP; 445 else 446 n->status |= VIRTIO_NET_S_LINK_UP; 447 448 if (n->status != old_status) 449 virtio_notify_config(vdev); 450 451 virtio_net_set_status(vdev, vdev->status); 452 } 453 454 static void rxfilter_notify(NetClientState *nc) 455 { 456 VirtIONet *n = qemu_get_nic_opaque(nc); 457 458 if (nc->rxfilter_notify_enabled) { 459 char *path = object_get_canonical_path(OBJECT(n->qdev)); 460 qapi_event_send_nic_rx_filter_changed(!!n->netclient_name, 461 n->netclient_name, path); 462 g_free(path); 463 464 /* disable event notification to avoid events flooding */ 465 nc->rxfilter_notify_enabled = 0; 466 } 467 } 468 469 static intList *get_vlan_table(VirtIONet *n) 470 { 471 intList *list; 472 int i, j; 473 474 list = NULL; 475 for (i = 0; i < MAX_VLAN >> 5; i++) { 476 for (j = 0; n->vlans[i] && j <= 0x1f; j++) { 477 if (n->vlans[i] & (1U << j)) { 478 QAPI_LIST_PREPEND(list, (i << 5) + j); 479 } 480 } 481 } 482 483 return list; 484 } 485 486 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc) 487 { 488 VirtIONet *n = qemu_get_nic_opaque(nc); 489 VirtIODevice *vdev = VIRTIO_DEVICE(n); 490 RxFilterInfo *info; 491 strList *str_list; 492 int i; 493 494 info = g_malloc0(sizeof(*info)); 495 info->name = g_strdup(nc->name); 496 info->promiscuous = n->promisc; 497 498 if (n->nouni) { 499 info->unicast = RX_STATE_NONE; 500 } else if (n->alluni) { 501 info->unicast = RX_STATE_ALL; 502 } else { 503 info->unicast = RX_STATE_NORMAL; 504 } 505 506 if (n->nomulti) { 507 info->multicast = RX_STATE_NONE; 508 } else if (n->allmulti) { 509 info->multicast = RX_STATE_ALL; 510 } else { 511 info->multicast = RX_STATE_NORMAL; 512 } 513 514 info->broadcast_allowed = n->nobcast; 515 info->multicast_overflow = n->mac_table.multi_overflow; 516 info->unicast_overflow = n->mac_table.uni_overflow; 517 518 info->main_mac = qemu_mac_strdup_printf(n->mac); 519 520 str_list = NULL; 521 for (i = 0; i < n->mac_table.first_multi; i++) { 522 QAPI_LIST_PREPEND(str_list, 523 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 524 } 525 info->unicast_table = str_list; 526 527 str_list = NULL; 528 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 529 QAPI_LIST_PREPEND(str_list, 530 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 531 } 532 info->multicast_table = str_list; 533 info->vlan_table = get_vlan_table(n); 534 535 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) { 536 info->vlan = RX_STATE_ALL; 537 } else if (!info->vlan_table) { 538 info->vlan = RX_STATE_NONE; 539 } else { 540 info->vlan = RX_STATE_NORMAL; 541 } 542 543 /* enable event notification after query */ 544 nc->rxfilter_notify_enabled = 1; 545 546 return info; 547 } 548 549 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index) 550 { 551 VirtIONet *n = VIRTIO_NET(vdev); 552 NetClientState *nc; 553 554 /* validate queue_index and skip for cvq */ 555 if (queue_index >= n->max_queue_pairs * 2) { 556 return; 557 } 558 559 nc = qemu_get_subqueue(n->nic, vq2q(queue_index)); 560 561 if (!nc->peer) { 562 return; 563 } 564 565 if (get_vhost_net(nc->peer) && 566 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) { 567 vhost_net_virtqueue_reset(vdev, nc, queue_index); 568 } 569 570 flush_or_purge_queued_packets(nc); 571 } 572 573 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index) 574 { 575 VirtIONet *n = VIRTIO_NET(vdev); 576 NetClientState *nc; 577 int r; 578 579 /* validate queue_index and skip for cvq */ 580 if (queue_index >= n->max_queue_pairs * 2) { 581 return; 582 } 583 584 nc = qemu_get_subqueue(n->nic, vq2q(queue_index)); 585 586 if (!nc->peer || !vdev->vhost_started) { 587 return; 588 } 589 590 if (get_vhost_net(nc->peer) && 591 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) { 592 r = vhost_net_virtqueue_restart(vdev, nc, queue_index); 593 if (r < 0) { 594 error_report("unable to restart vhost net virtqueue: %d, " 595 "when resetting the queue", queue_index); 596 } 597 } 598 } 599 600 static void virtio_net_reset(VirtIODevice *vdev) 601 { 602 VirtIONet *n = VIRTIO_NET(vdev); 603 int i; 604 605 /* Reset back to compatibility mode */ 606 n->promisc = 1; 607 n->allmulti = 0; 608 n->alluni = 0; 609 n->nomulti = 0; 610 n->nouni = 0; 611 n->nobcast = 0; 612 /* multiqueue is disabled by default */ 613 n->curr_queue_pairs = 1; 614 timer_del(n->announce_timer.tm); 615 n->announce_timer.round = 0; 616 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 617 618 /* Flush any MAC and VLAN filter table state */ 619 n->mac_table.in_use = 0; 620 n->mac_table.first_multi = 0; 621 n->mac_table.multi_overflow = 0; 622 n->mac_table.uni_overflow = 0; 623 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN); 624 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac)); 625 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 626 memset(n->vlans, 0, MAX_VLAN >> 3); 627 628 /* Flush any async TX */ 629 for (i = 0; i < n->max_queue_pairs; i++) { 630 flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i)); 631 } 632 } 633 634 static void peer_test_vnet_hdr(VirtIONet *n) 635 { 636 NetClientState *nc = qemu_get_queue(n->nic); 637 if (!nc->peer) { 638 return; 639 } 640 641 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer); 642 } 643 644 static int peer_has_vnet_hdr(VirtIONet *n) 645 { 646 return n->has_vnet_hdr; 647 } 648 649 static int peer_has_ufo(VirtIONet *n) 650 { 651 if (!peer_has_vnet_hdr(n)) 652 return 0; 653 654 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer); 655 656 return n->has_ufo; 657 } 658 659 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, 660 int version_1, int hash_report) 661 { 662 int i; 663 NetClientState *nc; 664 665 n->mergeable_rx_bufs = mergeable_rx_bufs; 666 667 if (version_1) { 668 n->guest_hdr_len = hash_report ? 669 sizeof(struct virtio_net_hdr_v1_hash) : 670 sizeof(struct virtio_net_hdr_mrg_rxbuf); 671 n->rss_data.populate_hash = !!hash_report; 672 } else { 673 n->guest_hdr_len = n->mergeable_rx_bufs ? 674 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 675 sizeof(struct virtio_net_hdr); 676 } 677 678 for (i = 0; i < n->max_queue_pairs; i++) { 679 nc = qemu_get_subqueue(n->nic, i); 680 681 if (peer_has_vnet_hdr(n) && 682 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) { 683 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len); 684 n->host_hdr_len = n->guest_hdr_len; 685 } 686 } 687 } 688 689 static int virtio_net_max_tx_queue_size(VirtIONet *n) 690 { 691 NetClientState *peer = n->nic_conf.peers.ncs[0]; 692 693 /* 694 * Backends other than vhost-user or vhost-vdpa don't support max queue 695 * size. 696 */ 697 if (!peer) { 698 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 699 } 700 701 switch(peer->info->type) { 702 case NET_CLIENT_DRIVER_VHOST_USER: 703 case NET_CLIENT_DRIVER_VHOST_VDPA: 704 return VIRTQUEUE_MAX_SIZE; 705 default: 706 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 707 }; 708 } 709 710 static int peer_attach(VirtIONet *n, int index) 711 { 712 NetClientState *nc = qemu_get_subqueue(n->nic, index); 713 714 if (!nc->peer) { 715 return 0; 716 } 717 718 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 719 vhost_set_vring_enable(nc->peer, 1); 720 } 721 722 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 723 return 0; 724 } 725 726 if (n->max_queue_pairs == 1) { 727 return 0; 728 } 729 730 return tap_enable(nc->peer); 731 } 732 733 static int peer_detach(VirtIONet *n, int index) 734 { 735 NetClientState *nc = qemu_get_subqueue(n->nic, index); 736 737 if (!nc->peer) { 738 return 0; 739 } 740 741 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 742 vhost_set_vring_enable(nc->peer, 0); 743 } 744 745 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 746 return 0; 747 } 748 749 return tap_disable(nc->peer); 750 } 751 752 static void virtio_net_set_queue_pairs(VirtIONet *n) 753 { 754 int i; 755 int r; 756 757 if (n->nic->peer_deleted) { 758 return; 759 } 760 761 for (i = 0; i < n->max_queue_pairs; i++) { 762 if (i < n->curr_queue_pairs) { 763 r = peer_attach(n, i); 764 assert(!r); 765 } else { 766 r = peer_detach(n, i); 767 assert(!r); 768 } 769 } 770 } 771 772 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue); 773 774 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, 775 Error **errp) 776 { 777 VirtIONet *n = VIRTIO_NET(vdev); 778 NetClientState *nc = qemu_get_queue(n->nic); 779 780 /* Firstly sync all virtio-net possible supported features */ 781 features |= n->host_features; 782 783 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 784 785 if (!peer_has_vnet_hdr(n)) { 786 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); 787 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); 788 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); 789 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); 790 791 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); 792 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); 793 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); 794 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); 795 796 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); 797 } 798 799 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { 800 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); 801 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); 802 } 803 804 if (!get_vhost_net(nc->peer)) { 805 virtio_add_feature(&features, VIRTIO_F_RING_RESET); 806 return features; 807 } 808 809 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 810 virtio_clear_feature(&features, VIRTIO_NET_F_RSS); 811 } 812 features = vhost_net_get_features(get_vhost_net(nc->peer), features); 813 vdev->backend_features = features; 814 815 if (n->mtu_bypass_backend && 816 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { 817 features |= (1ULL << VIRTIO_NET_F_MTU); 818 } 819 820 return features; 821 } 822 823 static uint64_t virtio_net_bad_features(VirtIODevice *vdev) 824 { 825 uint64_t features = 0; 826 827 /* Linux kernel 2.6.25. It understood MAC (as everyone must), 828 * but also these: */ 829 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 830 virtio_add_feature(&features, VIRTIO_NET_F_CSUM); 831 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4); 832 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6); 833 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN); 834 835 return features; 836 } 837 838 static void virtio_net_apply_guest_offloads(VirtIONet *n) 839 { 840 qemu_set_offload(qemu_get_queue(n->nic)->peer, 841 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)), 842 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)), 843 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)), 844 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)), 845 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO))); 846 } 847 848 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features) 849 { 850 static const uint64_t guest_offloads_mask = 851 (1ULL << VIRTIO_NET_F_GUEST_CSUM) | 852 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | 853 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | 854 (1ULL << VIRTIO_NET_F_GUEST_ECN) | 855 (1ULL << VIRTIO_NET_F_GUEST_UFO); 856 857 return guest_offloads_mask & features; 858 } 859 860 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) 861 { 862 VirtIODevice *vdev = VIRTIO_DEVICE(n); 863 return virtio_net_guest_offloads_by_features(vdev->guest_features); 864 } 865 866 typedef struct { 867 VirtIONet *n; 868 DeviceState *dev; 869 } FailoverDevice; 870 871 /** 872 * Set the failover primary device 873 * 874 * @opaque: FailoverId to setup 875 * @opts: opts for device we are handling 876 * @errp: returns an error if this function fails 877 */ 878 static int failover_set_primary(DeviceState *dev, void *opaque) 879 { 880 FailoverDevice *fdev = opaque; 881 PCIDevice *pci_dev = (PCIDevice *) 882 object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE); 883 884 if (!pci_dev) { 885 return 0; 886 } 887 888 if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) { 889 fdev->dev = dev; 890 return 1; 891 } 892 893 return 0; 894 } 895 896 /** 897 * Find the primary device for this failover virtio-net 898 * 899 * @n: VirtIONet device 900 * @errp: returns an error if this function fails 901 */ 902 static DeviceState *failover_find_primary_device(VirtIONet *n) 903 { 904 FailoverDevice fdev = { 905 .n = n, 906 }; 907 908 qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL, 909 NULL, NULL, &fdev); 910 return fdev.dev; 911 } 912 913 static void failover_add_primary(VirtIONet *n, Error **errp) 914 { 915 Error *err = NULL; 916 DeviceState *dev = failover_find_primary_device(n); 917 918 if (dev) { 919 return; 920 } 921 922 if (!n->primary_opts) { 923 error_setg(errp, "Primary device not found"); 924 error_append_hint(errp, "Virtio-net failover will not work. Make " 925 "sure primary device has parameter" 926 " failover_pair_id=%s\n", n->netclient_name); 927 return; 928 } 929 930 dev = qdev_device_add_from_qdict(n->primary_opts, 931 n->primary_opts_from_json, 932 &err); 933 if (err) { 934 qobject_unref(n->primary_opts); 935 n->primary_opts = NULL; 936 } else { 937 object_unref(OBJECT(dev)); 938 } 939 error_propagate(errp, err); 940 } 941 942 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) 943 { 944 VirtIONet *n = VIRTIO_NET(vdev); 945 Error *err = NULL; 946 int i; 947 948 if (n->mtu_bypass_backend && 949 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) { 950 features &= ~(1ULL << VIRTIO_NET_F_MTU); 951 } 952 953 virtio_net_set_multiqueue(n, 954 virtio_has_feature(features, VIRTIO_NET_F_RSS) || 955 virtio_has_feature(features, VIRTIO_NET_F_MQ)); 956 957 virtio_net_set_mrg_rx_bufs(n, 958 virtio_has_feature(features, 959 VIRTIO_NET_F_MRG_RXBUF), 960 virtio_has_feature(features, 961 VIRTIO_F_VERSION_1), 962 virtio_has_feature(features, 963 VIRTIO_NET_F_HASH_REPORT)); 964 965 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 966 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4); 967 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 968 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6); 969 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS); 970 971 if (n->has_vnet_hdr) { 972 n->curr_guest_offloads = 973 virtio_net_guest_offloads_by_features(features); 974 virtio_net_apply_guest_offloads(n); 975 } 976 977 for (i = 0; i < n->max_queue_pairs; i++) { 978 NetClientState *nc = qemu_get_subqueue(n->nic, i); 979 980 if (!get_vhost_net(nc->peer)) { 981 continue; 982 } 983 vhost_net_ack_features(get_vhost_net(nc->peer), features); 984 } 985 986 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { 987 memset(n->vlans, 0, MAX_VLAN >> 3); 988 } else { 989 memset(n->vlans, 0xff, MAX_VLAN >> 3); 990 } 991 992 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) { 993 qapi_event_send_failover_negotiated(n->netclient_name); 994 qatomic_set(&n->failover_primary_hidden, false); 995 failover_add_primary(n, &err); 996 if (err) { 997 if (!qtest_enabled()) { 998 warn_report_err(err); 999 } else { 1000 error_free(err); 1001 } 1002 } 1003 } 1004 } 1005 1006 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd, 1007 struct iovec *iov, unsigned int iov_cnt) 1008 { 1009 uint8_t on; 1010 size_t s; 1011 NetClientState *nc = qemu_get_queue(n->nic); 1012 1013 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on)); 1014 if (s != sizeof(on)) { 1015 return VIRTIO_NET_ERR; 1016 } 1017 1018 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) { 1019 n->promisc = on; 1020 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) { 1021 n->allmulti = on; 1022 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) { 1023 n->alluni = on; 1024 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) { 1025 n->nomulti = on; 1026 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) { 1027 n->nouni = on; 1028 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) { 1029 n->nobcast = on; 1030 } else { 1031 return VIRTIO_NET_ERR; 1032 } 1033 1034 rxfilter_notify(nc); 1035 1036 return VIRTIO_NET_OK; 1037 } 1038 1039 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd, 1040 struct iovec *iov, unsigned int iov_cnt) 1041 { 1042 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1043 uint64_t offloads; 1044 size_t s; 1045 1046 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 1047 return VIRTIO_NET_ERR; 1048 } 1049 1050 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads)); 1051 if (s != sizeof(offloads)) { 1052 return VIRTIO_NET_ERR; 1053 } 1054 1055 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) { 1056 uint64_t supported_offloads; 1057 1058 offloads = virtio_ldq_p(vdev, &offloads); 1059 1060 if (!n->has_vnet_hdr) { 1061 return VIRTIO_NET_ERR; 1062 } 1063 1064 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1065 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4); 1066 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1067 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6); 1068 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT); 1069 1070 supported_offloads = virtio_net_supported_guest_offloads(n); 1071 if (offloads & ~supported_offloads) { 1072 return VIRTIO_NET_ERR; 1073 } 1074 1075 n->curr_guest_offloads = offloads; 1076 virtio_net_apply_guest_offloads(n); 1077 1078 return VIRTIO_NET_OK; 1079 } else { 1080 return VIRTIO_NET_ERR; 1081 } 1082 } 1083 1084 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd, 1085 struct iovec *iov, unsigned int iov_cnt) 1086 { 1087 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1088 struct virtio_net_ctrl_mac mac_data; 1089 size_t s; 1090 NetClientState *nc = qemu_get_queue(n->nic); 1091 1092 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) { 1093 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) { 1094 return VIRTIO_NET_ERR; 1095 } 1096 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac)); 1097 assert(s == sizeof(n->mac)); 1098 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 1099 rxfilter_notify(nc); 1100 1101 return VIRTIO_NET_OK; 1102 } 1103 1104 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) { 1105 return VIRTIO_NET_ERR; 1106 } 1107 1108 int in_use = 0; 1109 int first_multi = 0; 1110 uint8_t uni_overflow = 0; 1111 uint8_t multi_overflow = 0; 1112 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 1113 1114 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1115 sizeof(mac_data.entries)); 1116 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1117 if (s != sizeof(mac_data.entries)) { 1118 goto error; 1119 } 1120 iov_discard_front(&iov, &iov_cnt, s); 1121 1122 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) { 1123 goto error; 1124 } 1125 1126 if (mac_data.entries <= MAC_TABLE_ENTRIES) { 1127 s = iov_to_buf(iov, iov_cnt, 0, macs, 1128 mac_data.entries * ETH_ALEN); 1129 if (s != mac_data.entries * ETH_ALEN) { 1130 goto error; 1131 } 1132 in_use += mac_data.entries; 1133 } else { 1134 uni_overflow = 1; 1135 } 1136 1137 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN); 1138 1139 first_multi = in_use; 1140 1141 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1142 sizeof(mac_data.entries)); 1143 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1144 if (s != sizeof(mac_data.entries)) { 1145 goto error; 1146 } 1147 1148 iov_discard_front(&iov, &iov_cnt, s); 1149 1150 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) { 1151 goto error; 1152 } 1153 1154 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) { 1155 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN], 1156 mac_data.entries * ETH_ALEN); 1157 if (s != mac_data.entries * ETH_ALEN) { 1158 goto error; 1159 } 1160 in_use += mac_data.entries; 1161 } else { 1162 multi_overflow = 1; 1163 } 1164 1165 n->mac_table.in_use = in_use; 1166 n->mac_table.first_multi = first_multi; 1167 n->mac_table.uni_overflow = uni_overflow; 1168 n->mac_table.multi_overflow = multi_overflow; 1169 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN); 1170 g_free(macs); 1171 rxfilter_notify(nc); 1172 1173 return VIRTIO_NET_OK; 1174 1175 error: 1176 g_free(macs); 1177 return VIRTIO_NET_ERR; 1178 } 1179 1180 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd, 1181 struct iovec *iov, unsigned int iov_cnt) 1182 { 1183 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1184 uint16_t vid; 1185 size_t s; 1186 NetClientState *nc = qemu_get_queue(n->nic); 1187 1188 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid)); 1189 vid = virtio_lduw_p(vdev, &vid); 1190 if (s != sizeof(vid)) { 1191 return VIRTIO_NET_ERR; 1192 } 1193 1194 if (vid >= MAX_VLAN) 1195 return VIRTIO_NET_ERR; 1196 1197 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD) 1198 n->vlans[vid >> 5] |= (1U << (vid & 0x1f)); 1199 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL) 1200 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f)); 1201 else 1202 return VIRTIO_NET_ERR; 1203 1204 rxfilter_notify(nc); 1205 1206 return VIRTIO_NET_OK; 1207 } 1208 1209 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, 1210 struct iovec *iov, unsigned int iov_cnt) 1211 { 1212 trace_virtio_net_handle_announce(n->announce_timer.round); 1213 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK && 1214 n->status & VIRTIO_NET_S_ANNOUNCE) { 1215 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 1216 if (n->announce_timer.round) { 1217 qemu_announce_timer_step(&n->announce_timer); 1218 } 1219 return VIRTIO_NET_OK; 1220 } else { 1221 return VIRTIO_NET_ERR; 1222 } 1223 } 1224 1225 static void virtio_net_detach_epbf_rss(VirtIONet *n); 1226 1227 static void virtio_net_disable_rss(VirtIONet *n) 1228 { 1229 if (n->rss_data.enabled) { 1230 trace_virtio_net_rss_disable(); 1231 } 1232 n->rss_data.enabled = false; 1233 1234 virtio_net_detach_epbf_rss(n); 1235 } 1236 1237 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd) 1238 { 1239 NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0); 1240 if (nc == NULL || nc->info->set_steering_ebpf == NULL) { 1241 return false; 1242 } 1243 1244 return nc->info->set_steering_ebpf(nc, prog_fd); 1245 } 1246 1247 static void rss_data_to_rss_config(struct VirtioNetRssData *data, 1248 struct EBPFRSSConfig *config) 1249 { 1250 config->redirect = data->redirect; 1251 config->populate_hash = data->populate_hash; 1252 config->hash_types = data->hash_types; 1253 config->indirections_len = data->indirections_len; 1254 config->default_queue = data->default_queue; 1255 } 1256 1257 static bool virtio_net_attach_epbf_rss(VirtIONet *n) 1258 { 1259 struct EBPFRSSConfig config = {}; 1260 1261 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 1262 return false; 1263 } 1264 1265 rss_data_to_rss_config(&n->rss_data, &config); 1266 1267 if (!ebpf_rss_set_all(&n->ebpf_rss, &config, 1268 n->rss_data.indirections_table, n->rss_data.key)) { 1269 return false; 1270 } 1271 1272 if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) { 1273 return false; 1274 } 1275 1276 return true; 1277 } 1278 1279 static void virtio_net_detach_epbf_rss(VirtIONet *n) 1280 { 1281 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1282 } 1283 1284 static bool virtio_net_load_ebpf(VirtIONet *n) 1285 { 1286 if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) { 1287 /* backend does't support steering ebpf */ 1288 return false; 1289 } 1290 1291 return ebpf_rss_load(&n->ebpf_rss); 1292 } 1293 1294 static void virtio_net_unload_ebpf(VirtIONet *n) 1295 { 1296 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1297 ebpf_rss_unload(&n->ebpf_rss); 1298 } 1299 1300 static uint16_t virtio_net_handle_rss(VirtIONet *n, 1301 struct iovec *iov, 1302 unsigned int iov_cnt, 1303 bool do_rss) 1304 { 1305 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1306 struct virtio_net_rss_config cfg; 1307 size_t s, offset = 0, size_get; 1308 uint16_t queue_pairs, i; 1309 struct { 1310 uint16_t us; 1311 uint8_t b; 1312 } QEMU_PACKED temp; 1313 const char *err_msg = ""; 1314 uint32_t err_value = 0; 1315 1316 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) { 1317 err_msg = "RSS is not negotiated"; 1318 goto error; 1319 } 1320 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) { 1321 err_msg = "Hash report is not negotiated"; 1322 goto error; 1323 } 1324 size_get = offsetof(struct virtio_net_rss_config, indirection_table); 1325 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get); 1326 if (s != size_get) { 1327 err_msg = "Short command buffer"; 1328 err_value = (uint32_t)s; 1329 goto error; 1330 } 1331 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types); 1332 n->rss_data.indirections_len = 1333 virtio_lduw_p(vdev, &cfg.indirection_table_mask); 1334 n->rss_data.indirections_len++; 1335 if (!do_rss) { 1336 n->rss_data.indirections_len = 1; 1337 } 1338 if (!is_power_of_2(n->rss_data.indirections_len)) { 1339 err_msg = "Invalid size of indirection table"; 1340 err_value = n->rss_data.indirections_len; 1341 goto error; 1342 } 1343 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) { 1344 err_msg = "Too large indirection table"; 1345 err_value = n->rss_data.indirections_len; 1346 goto error; 1347 } 1348 n->rss_data.default_queue = do_rss ? 1349 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0; 1350 if (n->rss_data.default_queue >= n->max_queue_pairs) { 1351 err_msg = "Invalid default queue"; 1352 err_value = n->rss_data.default_queue; 1353 goto error; 1354 } 1355 offset += size_get; 1356 size_get = sizeof(uint16_t) * n->rss_data.indirections_len; 1357 g_free(n->rss_data.indirections_table); 1358 n->rss_data.indirections_table = g_malloc(size_get); 1359 if (!n->rss_data.indirections_table) { 1360 err_msg = "Can't allocate indirections table"; 1361 err_value = n->rss_data.indirections_len; 1362 goto error; 1363 } 1364 s = iov_to_buf(iov, iov_cnt, offset, 1365 n->rss_data.indirections_table, size_get); 1366 if (s != size_get) { 1367 err_msg = "Short indirection table buffer"; 1368 err_value = (uint32_t)s; 1369 goto error; 1370 } 1371 for (i = 0; i < n->rss_data.indirections_len; ++i) { 1372 uint16_t val = n->rss_data.indirections_table[i]; 1373 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val); 1374 } 1375 offset += size_get; 1376 size_get = sizeof(temp); 1377 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get); 1378 if (s != size_get) { 1379 err_msg = "Can't get queue_pairs"; 1380 err_value = (uint32_t)s; 1381 goto error; 1382 } 1383 queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs; 1384 if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) { 1385 err_msg = "Invalid number of queue_pairs"; 1386 err_value = queue_pairs; 1387 goto error; 1388 } 1389 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) { 1390 err_msg = "Invalid key size"; 1391 err_value = temp.b; 1392 goto error; 1393 } 1394 if (!temp.b && n->rss_data.hash_types) { 1395 err_msg = "No key provided"; 1396 err_value = 0; 1397 goto error; 1398 } 1399 if (!temp.b && !n->rss_data.hash_types) { 1400 virtio_net_disable_rss(n); 1401 return queue_pairs; 1402 } 1403 offset += size_get; 1404 size_get = temp.b; 1405 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get); 1406 if (s != size_get) { 1407 err_msg = "Can get key buffer"; 1408 err_value = (uint32_t)s; 1409 goto error; 1410 } 1411 n->rss_data.enabled = true; 1412 1413 if (!n->rss_data.populate_hash) { 1414 if (!virtio_net_attach_epbf_rss(n)) { 1415 /* EBPF must be loaded for vhost */ 1416 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 1417 warn_report("Can't load eBPF RSS for vhost"); 1418 goto error; 1419 } 1420 /* fallback to software RSS */ 1421 warn_report("Can't load eBPF RSS - fallback to software RSS"); 1422 n->rss_data.enabled_software_rss = true; 1423 } 1424 } else { 1425 /* use software RSS for hash populating */ 1426 /* and detach eBPF if was loaded before */ 1427 virtio_net_detach_epbf_rss(n); 1428 n->rss_data.enabled_software_rss = true; 1429 } 1430 1431 trace_virtio_net_rss_enable(n->rss_data.hash_types, 1432 n->rss_data.indirections_len, 1433 temp.b); 1434 return queue_pairs; 1435 error: 1436 trace_virtio_net_rss_error(err_msg, err_value); 1437 virtio_net_disable_rss(n); 1438 return 0; 1439 } 1440 1441 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, 1442 struct iovec *iov, unsigned int iov_cnt) 1443 { 1444 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1445 uint16_t queue_pairs; 1446 NetClientState *nc = qemu_get_queue(n->nic); 1447 1448 virtio_net_disable_rss(n); 1449 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { 1450 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false); 1451 return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR; 1452 } 1453 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) { 1454 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true); 1455 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { 1456 struct virtio_net_ctrl_mq mq; 1457 size_t s; 1458 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) { 1459 return VIRTIO_NET_ERR; 1460 } 1461 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq)); 1462 if (s != sizeof(mq)) { 1463 return VIRTIO_NET_ERR; 1464 } 1465 queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs); 1466 1467 } else { 1468 return VIRTIO_NET_ERR; 1469 } 1470 1471 if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 1472 queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 1473 queue_pairs > n->max_queue_pairs || 1474 !n->multiqueue) { 1475 return VIRTIO_NET_ERR; 1476 } 1477 1478 n->curr_queue_pairs = queue_pairs; 1479 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 1480 /* 1481 * Avoid updating the backend for a vdpa device: We're only interested 1482 * in updating the device model queues. 1483 */ 1484 return VIRTIO_NET_OK; 1485 } 1486 /* stop the backend before changing the number of queue_pairs to avoid handling a 1487 * disabled queue */ 1488 virtio_net_set_status(vdev, vdev->status); 1489 virtio_net_set_queue_pairs(n); 1490 1491 return VIRTIO_NET_OK; 1492 } 1493 1494 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, 1495 const struct iovec *in_sg, unsigned in_num, 1496 const struct iovec *out_sg, 1497 unsigned out_num) 1498 { 1499 VirtIONet *n = VIRTIO_NET(vdev); 1500 struct virtio_net_ctrl_hdr ctrl; 1501 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1502 size_t s; 1503 struct iovec *iov, *iov2; 1504 1505 if (iov_size(in_sg, in_num) < sizeof(status) || 1506 iov_size(out_sg, out_num) < sizeof(ctrl)) { 1507 virtio_error(vdev, "virtio-net ctrl missing headers"); 1508 return 0; 1509 } 1510 1511 iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num); 1512 s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl)); 1513 iov_discard_front(&iov, &out_num, sizeof(ctrl)); 1514 if (s != sizeof(ctrl)) { 1515 status = VIRTIO_NET_ERR; 1516 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { 1517 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num); 1518 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { 1519 status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num); 1520 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { 1521 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num); 1522 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { 1523 status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num); 1524 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { 1525 status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num); 1526 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { 1527 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num); 1528 } 1529 1530 s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status)); 1531 assert(s == sizeof(status)); 1532 1533 g_free(iov2); 1534 return sizeof(status); 1535 } 1536 1537 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) 1538 { 1539 VirtQueueElement *elem; 1540 1541 for (;;) { 1542 size_t written; 1543 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 1544 if (!elem) { 1545 break; 1546 } 1547 1548 written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num, 1549 elem->out_sg, elem->out_num); 1550 if (written > 0) { 1551 virtqueue_push(vq, elem, written); 1552 virtio_notify(vdev, vq); 1553 g_free(elem); 1554 } else { 1555 virtqueue_detach_element(vq, elem, 0); 1556 g_free(elem); 1557 break; 1558 } 1559 } 1560 } 1561 1562 /* RX */ 1563 1564 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) 1565 { 1566 VirtIONet *n = VIRTIO_NET(vdev); 1567 int queue_index = vq2q(virtio_get_queue_index(vq)); 1568 1569 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index)); 1570 } 1571 1572 static bool virtio_net_can_receive(NetClientState *nc) 1573 { 1574 VirtIONet *n = qemu_get_nic_opaque(nc); 1575 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1576 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1577 1578 if (!vdev->vm_running) { 1579 return false; 1580 } 1581 1582 if (nc->queue_index >= n->curr_queue_pairs) { 1583 return false; 1584 } 1585 1586 if (!virtio_queue_ready(q->rx_vq) || 1587 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 1588 return false; 1589 } 1590 1591 return true; 1592 } 1593 1594 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) 1595 { 1596 VirtIONet *n = q->n; 1597 if (virtio_queue_empty(q->rx_vq) || 1598 (n->mergeable_rx_bufs && 1599 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1600 virtio_queue_set_notification(q->rx_vq, 1); 1601 1602 /* To avoid a race condition where the guest has made some buffers 1603 * available after the above check but before notification was 1604 * enabled, check for available buffers again. 1605 */ 1606 if (virtio_queue_empty(q->rx_vq) || 1607 (n->mergeable_rx_bufs && 1608 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1609 return 0; 1610 } 1611 } 1612 1613 virtio_queue_set_notification(q->rx_vq, 0); 1614 return 1; 1615 } 1616 1617 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr) 1618 { 1619 virtio_tswap16s(vdev, &hdr->hdr_len); 1620 virtio_tswap16s(vdev, &hdr->gso_size); 1621 virtio_tswap16s(vdev, &hdr->csum_start); 1622 virtio_tswap16s(vdev, &hdr->csum_offset); 1623 } 1624 1625 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so 1626 * it never finds out that the packets don't have valid checksums. This 1627 * causes dhclient to get upset. Fedora's carried a patch for ages to 1628 * fix this with Xen but it hasn't appeared in an upstream release of 1629 * dhclient yet. 1630 * 1631 * To avoid breaking existing guests, we catch udp packets and add 1632 * checksums. This is terrible but it's better than hacking the guest 1633 * kernels. 1634 * 1635 * N.B. if we introduce a zero-copy API, this operation is no longer free so 1636 * we should provide a mechanism to disable it to avoid polluting the host 1637 * cache. 1638 */ 1639 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr, 1640 uint8_t *buf, size_t size) 1641 { 1642 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */ 1643 (size > 27 && size < 1500) && /* normal sized MTU */ 1644 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */ 1645 (buf[23] == 17) && /* ip.protocol == UDP */ 1646 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */ 1647 net_checksum_calculate(buf, size, CSUM_UDP); 1648 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; 1649 } 1650 } 1651 1652 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt, 1653 const void *buf, size_t size) 1654 { 1655 if (n->has_vnet_hdr) { 1656 /* FIXME this cast is evil */ 1657 void *wbuf = (void *)buf; 1658 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len, 1659 size - n->host_hdr_len); 1660 1661 if (n->needs_vnet_hdr_swap) { 1662 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf); 1663 } 1664 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr)); 1665 } else { 1666 struct virtio_net_hdr hdr = { 1667 .flags = 0, 1668 .gso_type = VIRTIO_NET_HDR_GSO_NONE 1669 }; 1670 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr); 1671 } 1672 } 1673 1674 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) 1675 { 1676 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; 1677 static const uint8_t vlan[] = {0x81, 0x00}; 1678 uint8_t *ptr = (uint8_t *)buf; 1679 int i; 1680 1681 if (n->promisc) 1682 return 1; 1683 1684 ptr += n->host_hdr_len; 1685 1686 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) { 1687 int vid = lduw_be_p(ptr + 14) & 0xfff; 1688 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f)))) 1689 return 0; 1690 } 1691 1692 if (ptr[0] & 1) { // multicast 1693 if (!memcmp(ptr, bcast, sizeof(bcast))) { 1694 return !n->nobcast; 1695 } else if (n->nomulti) { 1696 return 0; 1697 } else if (n->allmulti || n->mac_table.multi_overflow) { 1698 return 1; 1699 } 1700 1701 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 1702 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1703 return 1; 1704 } 1705 } 1706 } else { // unicast 1707 if (n->nouni) { 1708 return 0; 1709 } else if (n->alluni || n->mac_table.uni_overflow) { 1710 return 1; 1711 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) { 1712 return 1; 1713 } 1714 1715 for (i = 0; i < n->mac_table.first_multi; i++) { 1716 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1717 return 1; 1718 } 1719 } 1720 } 1721 1722 return 0; 1723 } 1724 1725 static uint8_t virtio_net_get_hash_type(bool isip4, 1726 bool isip6, 1727 bool isudp, 1728 bool istcp, 1729 uint32_t types) 1730 { 1731 if (isip4) { 1732 if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) { 1733 return NetPktRssIpV4Tcp; 1734 } 1735 if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) { 1736 return NetPktRssIpV4Udp; 1737 } 1738 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 1739 return NetPktRssIpV4; 1740 } 1741 } else if (isip6) { 1742 uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | 1743 VIRTIO_NET_RSS_HASH_TYPE_TCPv6; 1744 1745 if (istcp && (types & mask)) { 1746 return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ? 1747 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp; 1748 } 1749 mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6; 1750 if (isudp && (types & mask)) { 1751 return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ? 1752 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp; 1753 } 1754 mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6; 1755 if (types & mask) { 1756 return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ? 1757 NetPktRssIpV6Ex : NetPktRssIpV6; 1758 } 1759 } 1760 return 0xff; 1761 } 1762 1763 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report, 1764 uint32_t hash) 1765 { 1766 struct virtio_net_hdr_v1_hash *hdr = (void *)buf; 1767 hdr->hash_value = hash; 1768 hdr->hash_report = report; 1769 } 1770 1771 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf, 1772 size_t size) 1773 { 1774 VirtIONet *n = qemu_get_nic_opaque(nc); 1775 unsigned int index = nc->queue_index, new_index = index; 1776 struct NetRxPkt *pkt = n->rx_pkt; 1777 uint8_t net_hash_type; 1778 uint32_t hash; 1779 bool isip4, isip6, isudp, istcp; 1780 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = { 1781 VIRTIO_NET_HASH_REPORT_IPv4, 1782 VIRTIO_NET_HASH_REPORT_TCPv4, 1783 VIRTIO_NET_HASH_REPORT_TCPv6, 1784 VIRTIO_NET_HASH_REPORT_IPv6, 1785 VIRTIO_NET_HASH_REPORT_IPv6_EX, 1786 VIRTIO_NET_HASH_REPORT_TCPv6_EX, 1787 VIRTIO_NET_HASH_REPORT_UDPv4, 1788 VIRTIO_NET_HASH_REPORT_UDPv6, 1789 VIRTIO_NET_HASH_REPORT_UDPv6_EX 1790 }; 1791 1792 net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len, 1793 size - n->host_hdr_len); 1794 net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); 1795 if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) { 1796 istcp = isudp = false; 1797 } 1798 if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) { 1799 istcp = isudp = false; 1800 } 1801 net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp, 1802 n->rss_data.hash_types); 1803 if (net_hash_type > NetPktRssIpV6UdpEx) { 1804 if (n->rss_data.populate_hash) { 1805 virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0); 1806 } 1807 return n->rss_data.redirect ? n->rss_data.default_queue : -1; 1808 } 1809 1810 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key); 1811 1812 if (n->rss_data.populate_hash) { 1813 virtio_set_packet_hash(buf, reports[net_hash_type], hash); 1814 } 1815 1816 if (n->rss_data.redirect) { 1817 new_index = hash & (n->rss_data.indirections_len - 1); 1818 new_index = n->rss_data.indirections_table[new_index]; 1819 } 1820 1821 return (index == new_index) ? -1 : new_index; 1822 } 1823 1824 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, 1825 size_t size, bool no_rss) 1826 { 1827 VirtIONet *n = qemu_get_nic_opaque(nc); 1828 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1829 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1830 VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE]; 1831 size_t lens[VIRTQUEUE_MAX_SIZE]; 1832 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; 1833 struct virtio_net_hdr_mrg_rxbuf mhdr; 1834 unsigned mhdr_cnt = 0; 1835 size_t offset, i, guest_offset, j; 1836 ssize_t err; 1837 1838 if (!virtio_net_can_receive(nc)) { 1839 return -1; 1840 } 1841 1842 if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) { 1843 int index = virtio_net_process_rss(nc, buf, size); 1844 if (index >= 0) { 1845 NetClientState *nc2 = qemu_get_subqueue(n->nic, index); 1846 return virtio_net_receive_rcu(nc2, buf, size, true); 1847 } 1848 } 1849 1850 /* hdr_len refers to the header we supply to the guest */ 1851 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) { 1852 return 0; 1853 } 1854 1855 if (!receive_filter(n, buf, size)) 1856 return size; 1857 1858 offset = i = 0; 1859 1860 while (offset < size) { 1861 VirtQueueElement *elem; 1862 int len, total; 1863 const struct iovec *sg; 1864 1865 total = 0; 1866 1867 if (i == VIRTQUEUE_MAX_SIZE) { 1868 virtio_error(vdev, "virtio-net unexpected long buffer chain"); 1869 err = size; 1870 goto err; 1871 } 1872 1873 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement)); 1874 if (!elem) { 1875 if (i) { 1876 virtio_error(vdev, "virtio-net unexpected empty queue: " 1877 "i %zd mergeable %d offset %zd, size %zd, " 1878 "guest hdr len %zd, host hdr len %zd " 1879 "guest features 0x%" PRIx64, 1880 i, n->mergeable_rx_bufs, offset, size, 1881 n->guest_hdr_len, n->host_hdr_len, 1882 vdev->guest_features); 1883 } 1884 err = -1; 1885 goto err; 1886 } 1887 1888 if (elem->in_num < 1) { 1889 virtio_error(vdev, 1890 "virtio-net receive queue contains no in buffers"); 1891 virtqueue_detach_element(q->rx_vq, elem, 0); 1892 g_free(elem); 1893 err = -1; 1894 goto err; 1895 } 1896 1897 sg = elem->in_sg; 1898 if (i == 0) { 1899 assert(offset == 0); 1900 if (n->mergeable_rx_bufs) { 1901 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg), 1902 sg, elem->in_num, 1903 offsetof(typeof(mhdr), num_buffers), 1904 sizeof(mhdr.num_buffers)); 1905 } 1906 1907 receive_header(n, sg, elem->in_num, buf, size); 1908 if (n->rss_data.populate_hash) { 1909 offset = sizeof(mhdr); 1910 iov_from_buf(sg, elem->in_num, offset, 1911 buf + offset, n->host_hdr_len - sizeof(mhdr)); 1912 } 1913 offset = n->host_hdr_len; 1914 total += n->guest_hdr_len; 1915 guest_offset = n->guest_hdr_len; 1916 } else { 1917 guest_offset = 0; 1918 } 1919 1920 /* copy in packet. ugh */ 1921 len = iov_from_buf(sg, elem->in_num, guest_offset, 1922 buf + offset, size - offset); 1923 total += len; 1924 offset += len; 1925 /* If buffers can't be merged, at this point we 1926 * must have consumed the complete packet. 1927 * Otherwise, drop it. */ 1928 if (!n->mergeable_rx_bufs && offset < size) { 1929 virtqueue_unpop(q->rx_vq, elem, total); 1930 g_free(elem); 1931 err = size; 1932 goto err; 1933 } 1934 1935 elems[i] = elem; 1936 lens[i] = total; 1937 i++; 1938 } 1939 1940 if (mhdr_cnt) { 1941 virtio_stw_p(vdev, &mhdr.num_buffers, i); 1942 iov_from_buf(mhdr_sg, mhdr_cnt, 1943 0, 1944 &mhdr.num_buffers, sizeof mhdr.num_buffers); 1945 } 1946 1947 for (j = 0; j < i; j++) { 1948 /* signal other side */ 1949 virtqueue_fill(q->rx_vq, elems[j], lens[j], j); 1950 g_free(elems[j]); 1951 } 1952 1953 virtqueue_flush(q->rx_vq, i); 1954 virtio_notify(vdev, q->rx_vq); 1955 1956 return size; 1957 1958 err: 1959 for (j = 0; j < i; j++) { 1960 virtqueue_detach_element(q->rx_vq, elems[j], lens[j]); 1961 g_free(elems[j]); 1962 } 1963 1964 return err; 1965 } 1966 1967 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf, 1968 size_t size) 1969 { 1970 RCU_READ_LOCK_GUARD(); 1971 1972 return virtio_net_receive_rcu(nc, buf, size, false); 1973 } 1974 1975 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain, 1976 const uint8_t *buf, 1977 VirtioNetRscUnit *unit) 1978 { 1979 uint16_t ip_hdrlen; 1980 struct ip_header *ip; 1981 1982 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len 1983 + sizeof(struct eth_header)); 1984 unit->ip = (void *)ip; 1985 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2; 1986 unit->ip_plen = &ip->ip_len; 1987 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen); 1988 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 1989 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen; 1990 } 1991 1992 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain, 1993 const uint8_t *buf, 1994 VirtioNetRscUnit *unit) 1995 { 1996 struct ip6_header *ip6; 1997 1998 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len 1999 + sizeof(struct eth_header)); 2000 unit->ip = ip6; 2001 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 2002 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) 2003 + sizeof(struct ip6_header)); 2004 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 2005 2006 /* There is a difference between payload lenght in ipv4 and v6, 2007 ip header is excluded in ipv6 */ 2008 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen; 2009 } 2010 2011 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain, 2012 VirtioNetRscSeg *seg) 2013 { 2014 int ret; 2015 struct virtio_net_hdr_v1 *h; 2016 2017 h = (struct virtio_net_hdr_v1 *)seg->buf; 2018 h->flags = 0; 2019 h->gso_type = VIRTIO_NET_HDR_GSO_NONE; 2020 2021 if (seg->is_coalesced) { 2022 h->rsc.segments = seg->packets; 2023 h->rsc.dup_acks = seg->dup_ack; 2024 h->flags = VIRTIO_NET_HDR_F_RSC_INFO; 2025 if (chain->proto == ETH_P_IP) { 2026 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2027 } else { 2028 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2029 } 2030 } 2031 2032 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); 2033 QTAILQ_REMOVE(&chain->buffers, seg, next); 2034 g_free(seg->buf); 2035 g_free(seg); 2036 2037 return ret; 2038 } 2039 2040 static void virtio_net_rsc_purge(void *opq) 2041 { 2042 VirtioNetRscSeg *seg, *rn; 2043 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq; 2044 2045 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) { 2046 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2047 chain->stat.purge_failed++; 2048 continue; 2049 } 2050 } 2051 2052 chain->stat.timer++; 2053 if (!QTAILQ_EMPTY(&chain->buffers)) { 2054 timer_mod(chain->drain_timer, 2055 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 2056 } 2057 } 2058 2059 static void virtio_net_rsc_cleanup(VirtIONet *n) 2060 { 2061 VirtioNetRscChain *chain, *rn_chain; 2062 VirtioNetRscSeg *seg, *rn_seg; 2063 2064 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) { 2065 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) { 2066 QTAILQ_REMOVE(&chain->buffers, seg, next); 2067 g_free(seg->buf); 2068 g_free(seg); 2069 } 2070 2071 timer_free(chain->drain_timer); 2072 QTAILQ_REMOVE(&n->rsc_chains, chain, next); 2073 g_free(chain); 2074 } 2075 } 2076 2077 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain, 2078 NetClientState *nc, 2079 const uint8_t *buf, size_t size) 2080 { 2081 uint16_t hdr_len; 2082 VirtioNetRscSeg *seg; 2083 2084 hdr_len = chain->n->guest_hdr_len; 2085 seg = g_new(VirtioNetRscSeg, 1); 2086 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header) 2087 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD); 2088 memcpy(seg->buf, buf, size); 2089 seg->size = size; 2090 seg->packets = 1; 2091 seg->dup_ack = 0; 2092 seg->is_coalesced = 0; 2093 seg->nc = nc; 2094 2095 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); 2096 chain->stat.cache++; 2097 2098 switch (chain->proto) { 2099 case ETH_P_IP: 2100 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); 2101 break; 2102 case ETH_P_IPV6: 2103 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); 2104 break; 2105 default: 2106 g_assert_not_reached(); 2107 } 2108 } 2109 2110 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain, 2111 VirtioNetRscSeg *seg, 2112 const uint8_t *buf, 2113 struct tcp_header *n_tcp, 2114 struct tcp_header *o_tcp) 2115 { 2116 uint32_t nack, oack; 2117 uint16_t nwin, owin; 2118 2119 nack = htonl(n_tcp->th_ack); 2120 nwin = htons(n_tcp->th_win); 2121 oack = htonl(o_tcp->th_ack); 2122 owin = htons(o_tcp->th_win); 2123 2124 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) { 2125 chain->stat.ack_out_of_win++; 2126 return RSC_FINAL; 2127 } else if (nack == oack) { 2128 /* duplicated ack or window probe */ 2129 if (nwin == owin) { 2130 /* duplicated ack, add dup ack count due to whql test up to 1 */ 2131 chain->stat.dup_ack++; 2132 return RSC_FINAL; 2133 } else { 2134 /* Coalesce window update */ 2135 o_tcp->th_win = n_tcp->th_win; 2136 chain->stat.win_update++; 2137 return RSC_COALESCE; 2138 } 2139 } else { 2140 /* pure ack, go to 'C', finalize*/ 2141 chain->stat.pure_ack++; 2142 return RSC_FINAL; 2143 } 2144 } 2145 2146 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain, 2147 VirtioNetRscSeg *seg, 2148 const uint8_t *buf, 2149 VirtioNetRscUnit *n_unit) 2150 { 2151 void *data; 2152 uint16_t o_ip_len; 2153 uint32_t nseq, oseq; 2154 VirtioNetRscUnit *o_unit; 2155 2156 o_unit = &seg->unit; 2157 o_ip_len = htons(*o_unit->ip_plen); 2158 nseq = htonl(n_unit->tcp->th_seq); 2159 oseq = htonl(o_unit->tcp->th_seq); 2160 2161 /* out of order or retransmitted. */ 2162 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) { 2163 chain->stat.data_out_of_win++; 2164 return RSC_FINAL; 2165 } 2166 2167 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen; 2168 if (nseq == oseq) { 2169 if ((o_unit->payload == 0) && n_unit->payload) { 2170 /* From no payload to payload, normal case, not a dup ack or etc */ 2171 chain->stat.data_after_pure_ack++; 2172 goto coalesce; 2173 } else { 2174 return virtio_net_rsc_handle_ack(chain, seg, buf, 2175 n_unit->tcp, o_unit->tcp); 2176 } 2177 } else if ((nseq - oseq) != o_unit->payload) { 2178 /* Not a consistent packet, out of order */ 2179 chain->stat.data_out_of_order++; 2180 return RSC_FINAL; 2181 } else { 2182 coalesce: 2183 if ((o_ip_len + n_unit->payload) > chain->max_payload) { 2184 chain->stat.over_size++; 2185 return RSC_FINAL; 2186 } 2187 2188 /* Here comes the right data, the payload length in v4/v6 is different, 2189 so use the field value to update and record the new data len */ 2190 o_unit->payload += n_unit->payload; /* update new data len */ 2191 2192 /* update field in ip header */ 2193 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload); 2194 2195 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced 2196 for windows guest, while this may change the behavior for linux 2197 guest (only if it uses RSC feature). */ 2198 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags; 2199 2200 o_unit->tcp->th_ack = n_unit->tcp->th_ack; 2201 o_unit->tcp->th_win = n_unit->tcp->th_win; 2202 2203 memmove(seg->buf + seg->size, data, n_unit->payload); 2204 seg->size += n_unit->payload; 2205 seg->packets++; 2206 chain->stat.coalesced++; 2207 return RSC_COALESCE; 2208 } 2209 } 2210 2211 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain, 2212 VirtioNetRscSeg *seg, 2213 const uint8_t *buf, size_t size, 2214 VirtioNetRscUnit *unit) 2215 { 2216 struct ip_header *ip1, *ip2; 2217 2218 ip1 = (struct ip_header *)(unit->ip); 2219 ip2 = (struct ip_header *)(seg->unit.ip); 2220 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst) 2221 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2222 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2223 chain->stat.no_match++; 2224 return RSC_NO_MATCH; 2225 } 2226 2227 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2228 } 2229 2230 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain, 2231 VirtioNetRscSeg *seg, 2232 const uint8_t *buf, size_t size, 2233 VirtioNetRscUnit *unit) 2234 { 2235 struct ip6_header *ip1, *ip2; 2236 2237 ip1 = (struct ip6_header *)(unit->ip); 2238 ip2 = (struct ip6_header *)(seg->unit.ip); 2239 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address)) 2240 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address)) 2241 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2242 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2243 chain->stat.no_match++; 2244 return RSC_NO_MATCH; 2245 } 2246 2247 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2248 } 2249 2250 /* Packets with 'SYN' should bypass, other flag should be sent after drain 2251 * to prevent out of order */ 2252 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain, 2253 struct tcp_header *tcp) 2254 { 2255 uint16_t tcp_hdr; 2256 uint16_t tcp_flag; 2257 2258 tcp_flag = htons(tcp->th_offset_flags); 2259 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10; 2260 tcp_flag &= VIRTIO_NET_TCP_FLAG; 2261 if (tcp_flag & TH_SYN) { 2262 chain->stat.tcp_syn++; 2263 return RSC_BYPASS; 2264 } 2265 2266 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) { 2267 chain->stat.tcp_ctrl_drain++; 2268 return RSC_FINAL; 2269 } 2270 2271 if (tcp_hdr > sizeof(struct tcp_header)) { 2272 chain->stat.tcp_all_opt++; 2273 return RSC_FINAL; 2274 } 2275 2276 return RSC_CANDIDATE; 2277 } 2278 2279 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain, 2280 NetClientState *nc, 2281 const uint8_t *buf, size_t size, 2282 VirtioNetRscUnit *unit) 2283 { 2284 int ret; 2285 VirtioNetRscSeg *seg, *nseg; 2286 2287 if (QTAILQ_EMPTY(&chain->buffers)) { 2288 chain->stat.empty_cache++; 2289 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2290 timer_mod(chain->drain_timer, 2291 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 2292 return size; 2293 } 2294 2295 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2296 if (chain->proto == ETH_P_IP) { 2297 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit); 2298 } else { 2299 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit); 2300 } 2301 2302 if (ret == RSC_FINAL) { 2303 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2304 /* Send failed */ 2305 chain->stat.final_failed++; 2306 return 0; 2307 } 2308 2309 /* Send current packet */ 2310 return virtio_net_do_receive(nc, buf, size); 2311 } else if (ret == RSC_NO_MATCH) { 2312 continue; 2313 } else { 2314 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ 2315 seg->is_coalesced = 1; 2316 return size; 2317 } 2318 } 2319 2320 chain->stat.no_match_cache++; 2321 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2322 return size; 2323 } 2324 2325 /* Drain a connection data, this is to avoid out of order segments */ 2326 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain, 2327 NetClientState *nc, 2328 const uint8_t *buf, size_t size, 2329 uint16_t ip_start, uint16_t ip_size, 2330 uint16_t tcp_port) 2331 { 2332 VirtioNetRscSeg *seg, *nseg; 2333 uint32_t ppair1, ppair2; 2334 2335 ppair1 = *(uint32_t *)(buf + tcp_port); 2336 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2337 ppair2 = *(uint32_t *)(seg->buf + tcp_port); 2338 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size) 2339 || (ppair1 != ppair2)) { 2340 continue; 2341 } 2342 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2343 chain->stat.drain_failed++; 2344 } 2345 2346 break; 2347 } 2348 2349 return virtio_net_do_receive(nc, buf, size); 2350 } 2351 2352 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain, 2353 struct ip_header *ip, 2354 const uint8_t *buf, size_t size) 2355 { 2356 uint16_t ip_len; 2357 2358 /* Not an ipv4 packet */ 2359 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) { 2360 chain->stat.ip_option++; 2361 return RSC_BYPASS; 2362 } 2363 2364 /* Don't handle packets with ip option */ 2365 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) { 2366 chain->stat.ip_option++; 2367 return RSC_BYPASS; 2368 } 2369 2370 if (ip->ip_p != IPPROTO_TCP) { 2371 chain->stat.bypass_not_tcp++; 2372 return RSC_BYPASS; 2373 } 2374 2375 /* Don't handle packets with ip fragment */ 2376 if (!(htons(ip->ip_off) & IP_DF)) { 2377 chain->stat.ip_frag++; 2378 return RSC_BYPASS; 2379 } 2380 2381 /* Don't handle packets with ecn flag */ 2382 if (IPTOS_ECN(ip->ip_tos)) { 2383 chain->stat.ip_ecn++; 2384 return RSC_BYPASS; 2385 } 2386 2387 ip_len = htons(ip->ip_len); 2388 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header)) 2389 || ip_len > (size - chain->n->guest_hdr_len - 2390 sizeof(struct eth_header))) { 2391 chain->stat.ip_hacked++; 2392 return RSC_BYPASS; 2393 } 2394 2395 return RSC_CANDIDATE; 2396 } 2397 2398 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain, 2399 NetClientState *nc, 2400 const uint8_t *buf, size_t size) 2401 { 2402 int32_t ret; 2403 uint16_t hdr_len; 2404 VirtioNetRscUnit unit; 2405 2406 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2407 2408 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header) 2409 + sizeof(struct tcp_header))) { 2410 chain->stat.bypass_not_tcp++; 2411 return virtio_net_do_receive(nc, buf, size); 2412 } 2413 2414 virtio_net_rsc_extract_unit4(chain, buf, &unit); 2415 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size) 2416 != RSC_CANDIDATE) { 2417 return virtio_net_do_receive(nc, buf, size); 2418 } 2419 2420 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2421 if (ret == RSC_BYPASS) { 2422 return virtio_net_do_receive(nc, buf, size); 2423 } else if (ret == RSC_FINAL) { 2424 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2425 ((hdr_len + sizeof(struct eth_header)) + 12), 2426 VIRTIO_NET_IP4_ADDR_SIZE, 2427 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)); 2428 } 2429 2430 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2431 } 2432 2433 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain, 2434 struct ip6_header *ip6, 2435 const uint8_t *buf, size_t size) 2436 { 2437 uint16_t ip_len; 2438 2439 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4) 2440 != IP_HEADER_VERSION_6) { 2441 return RSC_BYPASS; 2442 } 2443 2444 /* Both option and protocol is checked in this */ 2445 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) { 2446 chain->stat.bypass_not_tcp++; 2447 return RSC_BYPASS; 2448 } 2449 2450 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 2451 if (ip_len < sizeof(struct tcp_header) || 2452 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header) 2453 - sizeof(struct ip6_header))) { 2454 chain->stat.ip_hacked++; 2455 return RSC_BYPASS; 2456 } 2457 2458 /* Don't handle packets with ecn flag */ 2459 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) { 2460 chain->stat.ip_ecn++; 2461 return RSC_BYPASS; 2462 } 2463 2464 return RSC_CANDIDATE; 2465 } 2466 2467 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc, 2468 const uint8_t *buf, size_t size) 2469 { 2470 int32_t ret; 2471 uint16_t hdr_len; 2472 VirtioNetRscChain *chain; 2473 VirtioNetRscUnit unit; 2474 2475 chain = (VirtioNetRscChain *)opq; 2476 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2477 2478 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header) 2479 + sizeof(tcp_header))) { 2480 return virtio_net_do_receive(nc, buf, size); 2481 } 2482 2483 virtio_net_rsc_extract_unit6(chain, buf, &unit); 2484 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain, 2485 unit.ip, buf, size)) { 2486 return virtio_net_do_receive(nc, buf, size); 2487 } 2488 2489 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2490 if (ret == RSC_BYPASS) { 2491 return virtio_net_do_receive(nc, buf, size); 2492 } else if (ret == RSC_FINAL) { 2493 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2494 ((hdr_len + sizeof(struct eth_header)) + 8), 2495 VIRTIO_NET_IP6_ADDR_SIZE, 2496 hdr_len + sizeof(struct eth_header) 2497 + sizeof(struct ip6_header)); 2498 } 2499 2500 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2501 } 2502 2503 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n, 2504 NetClientState *nc, 2505 uint16_t proto) 2506 { 2507 VirtioNetRscChain *chain; 2508 2509 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) { 2510 return NULL; 2511 } 2512 2513 QTAILQ_FOREACH(chain, &n->rsc_chains, next) { 2514 if (chain->proto == proto) { 2515 return chain; 2516 } 2517 } 2518 2519 chain = g_malloc(sizeof(*chain)); 2520 chain->n = n; 2521 chain->proto = proto; 2522 if (proto == (uint16_t)ETH_P_IP) { 2523 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD; 2524 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2525 } else { 2526 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD; 2527 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2528 } 2529 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST, 2530 virtio_net_rsc_purge, chain); 2531 memset(&chain->stat, 0, sizeof(chain->stat)); 2532 2533 QTAILQ_INIT(&chain->buffers); 2534 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); 2535 2536 return chain; 2537 } 2538 2539 static ssize_t virtio_net_rsc_receive(NetClientState *nc, 2540 const uint8_t *buf, 2541 size_t size) 2542 { 2543 uint16_t proto; 2544 VirtioNetRscChain *chain; 2545 struct eth_header *eth; 2546 VirtIONet *n; 2547 2548 n = qemu_get_nic_opaque(nc); 2549 if (size < (n->host_hdr_len + sizeof(struct eth_header))) { 2550 return virtio_net_do_receive(nc, buf, size); 2551 } 2552 2553 eth = (struct eth_header *)(buf + n->guest_hdr_len); 2554 proto = htons(eth->h_proto); 2555 2556 chain = virtio_net_rsc_lookup_chain(n, nc, proto); 2557 if (chain) { 2558 chain->stat.received++; 2559 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) { 2560 return virtio_net_rsc_receive4(chain, nc, buf, size); 2561 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) { 2562 return virtio_net_rsc_receive6(chain, nc, buf, size); 2563 } 2564 } 2565 return virtio_net_do_receive(nc, buf, size); 2566 } 2567 2568 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, 2569 size_t size) 2570 { 2571 VirtIONet *n = qemu_get_nic_opaque(nc); 2572 if ((n->rsc4_enabled || n->rsc6_enabled)) { 2573 return virtio_net_rsc_receive(nc, buf, size); 2574 } else { 2575 return virtio_net_do_receive(nc, buf, size); 2576 } 2577 } 2578 2579 static int32_t virtio_net_flush_tx(VirtIONetQueue *q); 2580 2581 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) 2582 { 2583 VirtIONet *n = qemu_get_nic_opaque(nc); 2584 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 2585 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2586 int ret; 2587 2588 virtqueue_push(q->tx_vq, q->async_tx.elem, 0); 2589 virtio_notify(vdev, q->tx_vq); 2590 2591 g_free(q->async_tx.elem); 2592 q->async_tx.elem = NULL; 2593 2594 virtio_queue_set_notification(q->tx_vq, 1); 2595 ret = virtio_net_flush_tx(q); 2596 if (ret >= n->tx_burst) { 2597 /* 2598 * the flush has been stopped by tx_burst 2599 * we will not receive notification for the 2600 * remainining part, so re-schedule 2601 */ 2602 virtio_queue_set_notification(q->tx_vq, 0); 2603 if (q->tx_bh) { 2604 qemu_bh_schedule(q->tx_bh); 2605 } else { 2606 timer_mod(q->tx_timer, 2607 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2608 } 2609 q->tx_waiting = 1; 2610 } 2611 } 2612 2613 /* TX */ 2614 static int32_t virtio_net_flush_tx(VirtIONetQueue *q) 2615 { 2616 VirtIONet *n = q->n; 2617 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2618 VirtQueueElement *elem; 2619 int32_t num_packets = 0; 2620 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq)); 2621 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2622 return num_packets; 2623 } 2624 2625 if (q->async_tx.elem) { 2626 virtio_queue_set_notification(q->tx_vq, 0); 2627 return num_packets; 2628 } 2629 2630 for (;;) { 2631 ssize_t ret; 2632 unsigned int out_num; 2633 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg; 2634 struct virtio_net_hdr_mrg_rxbuf mhdr; 2635 2636 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement)); 2637 if (!elem) { 2638 break; 2639 } 2640 2641 out_num = elem->out_num; 2642 out_sg = elem->out_sg; 2643 if (out_num < 1) { 2644 virtio_error(vdev, "virtio-net header not in first element"); 2645 virtqueue_detach_element(q->tx_vq, elem, 0); 2646 g_free(elem); 2647 return -EINVAL; 2648 } 2649 2650 if (n->has_vnet_hdr) { 2651 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) < 2652 n->guest_hdr_len) { 2653 virtio_error(vdev, "virtio-net header incorrect"); 2654 virtqueue_detach_element(q->tx_vq, elem, 0); 2655 g_free(elem); 2656 return -EINVAL; 2657 } 2658 if (n->needs_vnet_hdr_swap) { 2659 virtio_net_hdr_swap(vdev, (void *) &mhdr); 2660 sg2[0].iov_base = &mhdr; 2661 sg2[0].iov_len = n->guest_hdr_len; 2662 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, 2663 out_sg, out_num, 2664 n->guest_hdr_len, -1); 2665 if (out_num == VIRTQUEUE_MAX_SIZE) { 2666 goto drop; 2667 } 2668 out_num += 1; 2669 out_sg = sg2; 2670 } 2671 } 2672 /* 2673 * If host wants to see the guest header as is, we can 2674 * pass it on unchanged. Otherwise, copy just the parts 2675 * that host is interested in. 2676 */ 2677 assert(n->host_hdr_len <= n->guest_hdr_len); 2678 if (n->host_hdr_len != n->guest_hdr_len) { 2679 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg), 2680 out_sg, out_num, 2681 0, n->host_hdr_len); 2682 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num, 2683 out_sg, out_num, 2684 n->guest_hdr_len, -1); 2685 out_num = sg_num; 2686 out_sg = sg; 2687 } 2688 2689 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index), 2690 out_sg, out_num, virtio_net_tx_complete); 2691 if (ret == 0) { 2692 virtio_queue_set_notification(q->tx_vq, 0); 2693 q->async_tx.elem = elem; 2694 return -EBUSY; 2695 } 2696 2697 drop: 2698 virtqueue_push(q->tx_vq, elem, 0); 2699 virtio_notify(vdev, q->tx_vq); 2700 g_free(elem); 2701 2702 if (++num_packets >= n->tx_burst) { 2703 break; 2704 } 2705 } 2706 return num_packets; 2707 } 2708 2709 static void virtio_net_tx_timer(void *opaque); 2710 2711 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) 2712 { 2713 VirtIONet *n = VIRTIO_NET(vdev); 2714 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2715 2716 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2717 virtio_net_drop_tx_queue_data(vdev, vq); 2718 return; 2719 } 2720 2721 /* This happens when device was stopped but VCPU wasn't. */ 2722 if (!vdev->vm_running) { 2723 q->tx_waiting = 1; 2724 return; 2725 } 2726 2727 if (q->tx_waiting) { 2728 /* We already have queued packets, immediately flush */ 2729 timer_del(q->tx_timer); 2730 virtio_net_tx_timer(q); 2731 } else { 2732 /* re-arm timer to flush it (and more) on next tick */ 2733 timer_mod(q->tx_timer, 2734 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2735 q->tx_waiting = 1; 2736 virtio_queue_set_notification(vq, 0); 2737 } 2738 } 2739 2740 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) 2741 { 2742 VirtIONet *n = VIRTIO_NET(vdev); 2743 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2744 2745 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2746 virtio_net_drop_tx_queue_data(vdev, vq); 2747 return; 2748 } 2749 2750 if (unlikely(q->tx_waiting)) { 2751 return; 2752 } 2753 q->tx_waiting = 1; 2754 /* This happens when device was stopped but VCPU wasn't. */ 2755 if (!vdev->vm_running) { 2756 return; 2757 } 2758 virtio_queue_set_notification(vq, 0); 2759 qemu_bh_schedule(q->tx_bh); 2760 } 2761 2762 static void virtio_net_tx_timer(void *opaque) 2763 { 2764 VirtIONetQueue *q = opaque; 2765 VirtIONet *n = q->n; 2766 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2767 int ret; 2768 2769 /* This happens when device was stopped but BH wasn't. */ 2770 if (!vdev->vm_running) { 2771 /* Make sure tx waiting is set, so we'll run when restarted. */ 2772 assert(q->tx_waiting); 2773 return; 2774 } 2775 2776 q->tx_waiting = 0; 2777 2778 /* Just in case the driver is not ready on more */ 2779 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2780 return; 2781 } 2782 2783 ret = virtio_net_flush_tx(q); 2784 if (ret == -EBUSY || ret == -EINVAL) { 2785 return; 2786 } 2787 /* 2788 * If we flush a full burst of packets, assume there are 2789 * more coming and immediately rearm 2790 */ 2791 if (ret >= n->tx_burst) { 2792 q->tx_waiting = 1; 2793 timer_mod(q->tx_timer, 2794 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2795 return; 2796 } 2797 /* 2798 * If less than a full burst, re-enable notification and flush 2799 * anything that may have come in while we weren't looking. If 2800 * we find something, assume the guest is still active and rearm 2801 */ 2802 virtio_queue_set_notification(q->tx_vq, 1); 2803 ret = virtio_net_flush_tx(q); 2804 if (ret > 0) { 2805 virtio_queue_set_notification(q->tx_vq, 0); 2806 q->tx_waiting = 1; 2807 timer_mod(q->tx_timer, 2808 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2809 } 2810 } 2811 2812 static void virtio_net_tx_bh(void *opaque) 2813 { 2814 VirtIONetQueue *q = opaque; 2815 VirtIONet *n = q->n; 2816 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2817 int32_t ret; 2818 2819 /* This happens when device was stopped but BH wasn't. */ 2820 if (!vdev->vm_running) { 2821 /* Make sure tx waiting is set, so we'll run when restarted. */ 2822 assert(q->tx_waiting); 2823 return; 2824 } 2825 2826 q->tx_waiting = 0; 2827 2828 /* Just in case the driver is not ready on more */ 2829 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) { 2830 return; 2831 } 2832 2833 ret = virtio_net_flush_tx(q); 2834 if (ret == -EBUSY || ret == -EINVAL) { 2835 return; /* Notification re-enable handled by tx_complete or device 2836 * broken */ 2837 } 2838 2839 /* If we flush a full burst of packets, assume there are 2840 * more coming and immediately reschedule */ 2841 if (ret >= n->tx_burst) { 2842 qemu_bh_schedule(q->tx_bh); 2843 q->tx_waiting = 1; 2844 return; 2845 } 2846 2847 /* If less than a full burst, re-enable notification and flush 2848 * anything that may have come in while we weren't looking. If 2849 * we find something, assume the guest is still active and reschedule */ 2850 virtio_queue_set_notification(q->tx_vq, 1); 2851 ret = virtio_net_flush_tx(q); 2852 if (ret == -EINVAL) { 2853 return; 2854 } else if (ret > 0) { 2855 virtio_queue_set_notification(q->tx_vq, 0); 2856 qemu_bh_schedule(q->tx_bh); 2857 q->tx_waiting = 1; 2858 } 2859 } 2860 2861 static void virtio_net_add_queue(VirtIONet *n, int index) 2862 { 2863 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2864 2865 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size, 2866 virtio_net_handle_rx); 2867 2868 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { 2869 n->vqs[index].tx_vq = 2870 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2871 virtio_net_handle_tx_timer); 2872 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 2873 virtio_net_tx_timer, 2874 &n->vqs[index]); 2875 } else { 2876 n->vqs[index].tx_vq = 2877 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2878 virtio_net_handle_tx_bh); 2879 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]); 2880 } 2881 2882 n->vqs[index].tx_waiting = 0; 2883 n->vqs[index].n = n; 2884 } 2885 2886 static void virtio_net_del_queue(VirtIONet *n, int index) 2887 { 2888 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2889 VirtIONetQueue *q = &n->vqs[index]; 2890 NetClientState *nc = qemu_get_subqueue(n->nic, index); 2891 2892 qemu_purge_queued_packets(nc); 2893 2894 virtio_del_queue(vdev, index * 2); 2895 if (q->tx_timer) { 2896 timer_free(q->tx_timer); 2897 q->tx_timer = NULL; 2898 } else { 2899 qemu_bh_delete(q->tx_bh); 2900 q->tx_bh = NULL; 2901 } 2902 q->tx_waiting = 0; 2903 virtio_del_queue(vdev, index * 2 + 1); 2904 } 2905 2906 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs) 2907 { 2908 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2909 int old_num_queues = virtio_get_num_queues(vdev); 2910 int new_num_queues = new_max_queue_pairs * 2 + 1; 2911 int i; 2912 2913 assert(old_num_queues >= 3); 2914 assert(old_num_queues % 2 == 1); 2915 2916 if (old_num_queues == new_num_queues) { 2917 return; 2918 } 2919 2920 /* 2921 * We always need to remove and add ctrl vq if 2922 * old_num_queues != new_num_queues. Remove ctrl_vq first, 2923 * and then we only enter one of the following two loops. 2924 */ 2925 virtio_del_queue(vdev, old_num_queues - 1); 2926 2927 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) { 2928 /* new_num_queues < old_num_queues */ 2929 virtio_net_del_queue(n, i / 2); 2930 } 2931 2932 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) { 2933 /* new_num_queues > old_num_queues */ 2934 virtio_net_add_queue(n, i / 2); 2935 } 2936 2937 /* add ctrl_vq last */ 2938 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 2939 } 2940 2941 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) 2942 { 2943 int max = multiqueue ? n->max_queue_pairs : 1; 2944 2945 n->multiqueue = multiqueue; 2946 virtio_net_change_num_queue_pairs(n, max); 2947 2948 virtio_net_set_queue_pairs(n); 2949 } 2950 2951 static int virtio_net_post_load_device(void *opaque, int version_id) 2952 { 2953 VirtIONet *n = opaque; 2954 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2955 int i, link_down; 2956 2957 trace_virtio_net_post_load_device(); 2958 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs, 2959 virtio_vdev_has_feature(vdev, 2960 VIRTIO_F_VERSION_1), 2961 virtio_vdev_has_feature(vdev, 2962 VIRTIO_NET_F_HASH_REPORT)); 2963 2964 /* MAC_TABLE_ENTRIES may be different from the saved image */ 2965 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) { 2966 n->mac_table.in_use = 0; 2967 } 2968 2969 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 2970 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n); 2971 } 2972 2973 /* 2974 * curr_guest_offloads will be later overwritten by the 2975 * virtio_set_features_nocheck call done from the virtio_load. 2976 * Here we make sure it is preserved and restored accordingly 2977 * in the virtio_net_post_load_virtio callback. 2978 */ 2979 n->saved_guest_offloads = n->curr_guest_offloads; 2980 2981 virtio_net_set_queue_pairs(n); 2982 2983 /* Find the first multicast entry in the saved MAC filter */ 2984 for (i = 0; i < n->mac_table.in_use; i++) { 2985 if (n->mac_table.macs[i * ETH_ALEN] & 1) { 2986 break; 2987 } 2988 } 2989 n->mac_table.first_multi = i; 2990 2991 /* nc.link_down can't be migrated, so infer link_down according 2992 * to link status bit in n->status */ 2993 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0; 2994 for (i = 0; i < n->max_queue_pairs; i++) { 2995 qemu_get_subqueue(n->nic, i)->link_down = link_down; 2996 } 2997 2998 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 2999 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3000 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 3001 QEMU_CLOCK_VIRTUAL, 3002 virtio_net_announce_timer, n); 3003 if (n->announce_timer.round) { 3004 timer_mod(n->announce_timer.tm, 3005 qemu_clock_get_ms(n->announce_timer.type)); 3006 } else { 3007 qemu_announce_timer_del(&n->announce_timer, false); 3008 } 3009 } 3010 3011 if (n->rss_data.enabled) { 3012 n->rss_data.enabled_software_rss = n->rss_data.populate_hash; 3013 if (!n->rss_data.populate_hash) { 3014 if (!virtio_net_attach_epbf_rss(n)) { 3015 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 3016 warn_report("Can't post-load eBPF RSS for vhost"); 3017 } else { 3018 warn_report("Can't post-load eBPF RSS - " 3019 "fallback to software RSS"); 3020 n->rss_data.enabled_software_rss = true; 3021 } 3022 } 3023 } 3024 3025 trace_virtio_net_rss_enable(n->rss_data.hash_types, 3026 n->rss_data.indirections_len, 3027 sizeof(n->rss_data.key)); 3028 } else { 3029 trace_virtio_net_rss_disable(); 3030 } 3031 return 0; 3032 } 3033 3034 static int virtio_net_post_load_virtio(VirtIODevice *vdev) 3035 { 3036 VirtIONet *n = VIRTIO_NET(vdev); 3037 /* 3038 * The actual needed state is now in saved_guest_offloads, 3039 * see virtio_net_post_load_device for detail. 3040 * Restore it back and apply the desired offloads. 3041 */ 3042 n->curr_guest_offloads = n->saved_guest_offloads; 3043 if (peer_has_vnet_hdr(n)) { 3044 virtio_net_apply_guest_offloads(n); 3045 } 3046 3047 return 0; 3048 } 3049 3050 /* tx_waiting field of a VirtIONetQueue */ 3051 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = { 3052 .name = "virtio-net-queue-tx_waiting", 3053 .fields = (VMStateField[]) { 3054 VMSTATE_UINT32(tx_waiting, VirtIONetQueue), 3055 VMSTATE_END_OF_LIST() 3056 }, 3057 }; 3058 3059 static bool max_queue_pairs_gt_1(void *opaque, int version_id) 3060 { 3061 return VIRTIO_NET(opaque)->max_queue_pairs > 1; 3062 } 3063 3064 static bool has_ctrl_guest_offloads(void *opaque, int version_id) 3065 { 3066 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque), 3067 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 3068 } 3069 3070 static bool mac_table_fits(void *opaque, int version_id) 3071 { 3072 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES; 3073 } 3074 3075 static bool mac_table_doesnt_fit(void *opaque, int version_id) 3076 { 3077 return !mac_table_fits(opaque, version_id); 3078 } 3079 3080 /* This temporary type is shared by all the WITH_TMP methods 3081 * although only some fields are used by each. 3082 */ 3083 struct VirtIONetMigTmp { 3084 VirtIONet *parent; 3085 VirtIONetQueue *vqs_1; 3086 uint16_t curr_queue_pairs_1; 3087 uint8_t has_ufo; 3088 uint32_t has_vnet_hdr; 3089 }; 3090 3091 /* The 2nd and subsequent tx_waiting flags are loaded later than 3092 * the 1st entry in the queue_pairs and only if there's more than one 3093 * entry. We use the tmp mechanism to calculate a temporary 3094 * pointer and count and also validate the count. 3095 */ 3096 3097 static int virtio_net_tx_waiting_pre_save(void *opaque) 3098 { 3099 struct VirtIONetMigTmp *tmp = opaque; 3100 3101 tmp->vqs_1 = tmp->parent->vqs + 1; 3102 tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1; 3103 if (tmp->parent->curr_queue_pairs == 0) { 3104 tmp->curr_queue_pairs_1 = 0; 3105 } 3106 3107 return 0; 3108 } 3109 3110 static int virtio_net_tx_waiting_pre_load(void *opaque) 3111 { 3112 struct VirtIONetMigTmp *tmp = opaque; 3113 3114 /* Reuse the pointer setup from save */ 3115 virtio_net_tx_waiting_pre_save(opaque); 3116 3117 if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) { 3118 error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x", 3119 tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs); 3120 3121 return -EINVAL; 3122 } 3123 3124 return 0; /* all good */ 3125 } 3126 3127 static const VMStateDescription vmstate_virtio_net_tx_waiting = { 3128 .name = "virtio-net-tx_waiting", 3129 .pre_load = virtio_net_tx_waiting_pre_load, 3130 .pre_save = virtio_net_tx_waiting_pre_save, 3131 .fields = (VMStateField[]) { 3132 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp, 3133 curr_queue_pairs_1, 3134 vmstate_virtio_net_queue_tx_waiting, 3135 struct VirtIONetQueue), 3136 VMSTATE_END_OF_LIST() 3137 }, 3138 }; 3139 3140 /* the 'has_ufo' flag is just tested; if the incoming stream has the 3141 * flag set we need to check that we have it 3142 */ 3143 static int virtio_net_ufo_post_load(void *opaque, int version_id) 3144 { 3145 struct VirtIONetMigTmp *tmp = opaque; 3146 3147 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) { 3148 error_report("virtio-net: saved image requires TUN_F_UFO support"); 3149 return -EINVAL; 3150 } 3151 3152 return 0; 3153 } 3154 3155 static int virtio_net_ufo_pre_save(void *opaque) 3156 { 3157 struct VirtIONetMigTmp *tmp = opaque; 3158 3159 tmp->has_ufo = tmp->parent->has_ufo; 3160 3161 return 0; 3162 } 3163 3164 static const VMStateDescription vmstate_virtio_net_has_ufo = { 3165 .name = "virtio-net-ufo", 3166 .post_load = virtio_net_ufo_post_load, 3167 .pre_save = virtio_net_ufo_pre_save, 3168 .fields = (VMStateField[]) { 3169 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp), 3170 VMSTATE_END_OF_LIST() 3171 }, 3172 }; 3173 3174 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the 3175 * flag set we need to check that we have it 3176 */ 3177 static int virtio_net_vnet_post_load(void *opaque, int version_id) 3178 { 3179 struct VirtIONetMigTmp *tmp = opaque; 3180 3181 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) { 3182 error_report("virtio-net: saved image requires vnet_hdr=on"); 3183 return -EINVAL; 3184 } 3185 3186 return 0; 3187 } 3188 3189 static int virtio_net_vnet_pre_save(void *opaque) 3190 { 3191 struct VirtIONetMigTmp *tmp = opaque; 3192 3193 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr; 3194 3195 return 0; 3196 } 3197 3198 static const VMStateDescription vmstate_virtio_net_has_vnet = { 3199 .name = "virtio-net-vnet", 3200 .post_load = virtio_net_vnet_post_load, 3201 .pre_save = virtio_net_vnet_pre_save, 3202 .fields = (VMStateField[]) { 3203 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp), 3204 VMSTATE_END_OF_LIST() 3205 }, 3206 }; 3207 3208 static bool virtio_net_rss_needed(void *opaque) 3209 { 3210 return VIRTIO_NET(opaque)->rss_data.enabled; 3211 } 3212 3213 static const VMStateDescription vmstate_virtio_net_rss = { 3214 .name = "virtio-net-device/rss", 3215 .version_id = 1, 3216 .minimum_version_id = 1, 3217 .needed = virtio_net_rss_needed, 3218 .fields = (VMStateField[]) { 3219 VMSTATE_BOOL(rss_data.enabled, VirtIONet), 3220 VMSTATE_BOOL(rss_data.redirect, VirtIONet), 3221 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet), 3222 VMSTATE_UINT32(rss_data.hash_types, VirtIONet), 3223 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet), 3224 VMSTATE_UINT16(rss_data.default_queue, VirtIONet), 3225 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet, 3226 VIRTIO_NET_RSS_MAX_KEY_SIZE), 3227 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet, 3228 rss_data.indirections_len, 0, 3229 vmstate_info_uint16, uint16_t), 3230 VMSTATE_END_OF_LIST() 3231 }, 3232 }; 3233 3234 static const VMStateDescription vmstate_virtio_net_device = { 3235 .name = "virtio-net-device", 3236 .version_id = VIRTIO_NET_VM_VERSION, 3237 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3238 .post_load = virtio_net_post_load_device, 3239 .fields = (VMStateField[]) { 3240 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN), 3241 VMSTATE_STRUCT_POINTER(vqs, VirtIONet, 3242 vmstate_virtio_net_queue_tx_waiting, 3243 VirtIONetQueue), 3244 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet), 3245 VMSTATE_UINT16(status, VirtIONet), 3246 VMSTATE_UINT8(promisc, VirtIONet), 3247 VMSTATE_UINT8(allmulti, VirtIONet), 3248 VMSTATE_UINT32(mac_table.in_use, VirtIONet), 3249 3250 /* Guarded pair: If it fits we load it, else we throw it away 3251 * - can happen if source has a larger MAC table.; post-load 3252 * sets flags in this case. 3253 */ 3254 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet, 3255 0, mac_table_fits, mac_table.in_use, 3256 ETH_ALEN), 3257 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0, 3258 mac_table.in_use, ETH_ALEN), 3259 3260 /* Note: This is an array of uint32's that's always been saved as a 3261 * buffer; hold onto your endiannesses; it's actually used as a bitmap 3262 * but based on the uint. 3263 */ 3264 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3), 3265 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3266 vmstate_virtio_net_has_vnet), 3267 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet), 3268 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet), 3269 VMSTATE_UINT8(alluni, VirtIONet), 3270 VMSTATE_UINT8(nomulti, VirtIONet), 3271 VMSTATE_UINT8(nouni, VirtIONet), 3272 VMSTATE_UINT8(nobcast, VirtIONet), 3273 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3274 vmstate_virtio_net_has_ufo), 3275 VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0, 3276 vmstate_info_uint16_equal, uint16_t), 3277 VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1), 3278 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3279 vmstate_virtio_net_tx_waiting), 3280 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet, 3281 has_ctrl_guest_offloads), 3282 VMSTATE_END_OF_LIST() 3283 }, 3284 .subsections = (const VMStateDescription * []) { 3285 &vmstate_virtio_net_rss, 3286 NULL 3287 } 3288 }; 3289 3290 static NetClientInfo net_virtio_info = { 3291 .type = NET_CLIENT_DRIVER_NIC, 3292 .size = sizeof(NICState), 3293 .can_receive = virtio_net_can_receive, 3294 .receive = virtio_net_receive, 3295 .link_status_changed = virtio_net_set_link_status, 3296 .query_rx_filter = virtio_net_query_rxfilter, 3297 .announce = virtio_net_announce, 3298 }; 3299 3300 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) 3301 { 3302 VirtIONet *n = VIRTIO_NET(vdev); 3303 NetClientState *nc; 3304 assert(n->vhost_started); 3305 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { 3306 /* Must guard against invalid features and bogus queue index 3307 * from being set by malicious guest, or penetrated through 3308 * buggy migration stream. 3309 */ 3310 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3311 qemu_log_mask(LOG_GUEST_ERROR, 3312 "%s: bogus vq index ignored\n", __func__); 3313 return false; 3314 } 3315 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3316 } else { 3317 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3318 } 3319 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); 3320 } 3321 3322 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, 3323 bool mask) 3324 { 3325 VirtIONet *n = VIRTIO_NET(vdev); 3326 NetClientState *nc; 3327 assert(n->vhost_started); 3328 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { 3329 /* Must guard against invalid features and bogus queue index 3330 * from being set by malicious guest, or penetrated through 3331 * buggy migration stream. 3332 */ 3333 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3334 qemu_log_mask(LOG_GUEST_ERROR, 3335 "%s: bogus vq index ignored\n", __func__); 3336 return; 3337 } 3338 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3339 } else { 3340 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3341 } 3342 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), 3343 vdev, idx, mask); 3344 } 3345 3346 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) 3347 { 3348 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC); 3349 3350 n->config_size = virtio_get_config_size(&cfg_size_params, host_features); 3351 } 3352 3353 void virtio_net_set_netclient_name(VirtIONet *n, const char *name, 3354 const char *type) 3355 { 3356 /* 3357 * The name can be NULL, the netclient name will be type.x. 3358 */ 3359 assert(type != NULL); 3360 3361 g_free(n->netclient_name); 3362 g_free(n->netclient_type); 3363 n->netclient_name = g_strdup(name); 3364 n->netclient_type = g_strdup(type); 3365 } 3366 3367 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev) 3368 { 3369 HotplugHandler *hotplug_ctrl; 3370 PCIDevice *pci_dev; 3371 Error *err = NULL; 3372 3373 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3374 if (hotplug_ctrl) { 3375 pci_dev = PCI_DEVICE(dev); 3376 pci_dev->partially_hotplugged = true; 3377 hotplug_handler_unplug_request(hotplug_ctrl, dev, &err); 3378 if (err) { 3379 error_report_err(err); 3380 return false; 3381 } 3382 } else { 3383 return false; 3384 } 3385 return true; 3386 } 3387 3388 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev, 3389 Error **errp) 3390 { 3391 Error *err = NULL; 3392 HotplugHandler *hotplug_ctrl; 3393 PCIDevice *pdev = PCI_DEVICE(dev); 3394 BusState *primary_bus; 3395 3396 if (!pdev->partially_hotplugged) { 3397 return true; 3398 } 3399 primary_bus = dev->parent_bus; 3400 if (!primary_bus) { 3401 error_setg(errp, "virtio_net: couldn't find primary bus"); 3402 return false; 3403 } 3404 qdev_set_parent_bus(dev, primary_bus, &error_abort); 3405 qatomic_set(&n->failover_primary_hidden, false); 3406 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3407 if (hotplug_ctrl) { 3408 hotplug_handler_pre_plug(hotplug_ctrl, dev, &err); 3409 if (err) { 3410 goto out; 3411 } 3412 hotplug_handler_plug(hotplug_ctrl, dev, &err); 3413 } 3414 pdev->partially_hotplugged = false; 3415 3416 out: 3417 error_propagate(errp, err); 3418 return !err; 3419 } 3420 3421 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s) 3422 { 3423 bool should_be_hidden; 3424 Error *err = NULL; 3425 DeviceState *dev = failover_find_primary_device(n); 3426 3427 if (!dev) { 3428 return; 3429 } 3430 3431 should_be_hidden = qatomic_read(&n->failover_primary_hidden); 3432 3433 if (migration_in_setup(s) && !should_be_hidden) { 3434 if (failover_unplug_primary(n, dev)) { 3435 vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev); 3436 qapi_event_send_unplug_primary(dev->id); 3437 qatomic_set(&n->failover_primary_hidden, true); 3438 } else { 3439 warn_report("couldn't unplug primary device"); 3440 } 3441 } else if (migration_has_failed(s)) { 3442 /* We already unplugged the device let's plug it back */ 3443 if (!failover_replug_primary(n, dev, &err)) { 3444 if (err) { 3445 error_report_err(err); 3446 } 3447 } 3448 } 3449 } 3450 3451 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data) 3452 { 3453 MigrationState *s = data; 3454 VirtIONet *n = container_of(notifier, VirtIONet, migration_state); 3455 virtio_net_handle_migration_primary(n, s); 3456 } 3457 3458 static bool failover_hide_primary_device(DeviceListener *listener, 3459 const QDict *device_opts, 3460 bool from_json, 3461 Error **errp) 3462 { 3463 VirtIONet *n = container_of(listener, VirtIONet, primary_listener); 3464 const char *standby_id; 3465 3466 if (!device_opts) { 3467 return false; 3468 } 3469 3470 if (!qdict_haskey(device_opts, "failover_pair_id")) { 3471 return false; 3472 } 3473 3474 if (!qdict_haskey(device_opts, "id")) { 3475 error_setg(errp, "Device with failover_pair_id needs to have id"); 3476 return false; 3477 } 3478 3479 standby_id = qdict_get_str(device_opts, "failover_pair_id"); 3480 if (g_strcmp0(standby_id, n->netclient_name) != 0) { 3481 return false; 3482 } 3483 3484 /* 3485 * The hide helper can be called several times for a given device. 3486 * Check there is only one primary for a virtio-net device but 3487 * don't duplicate the qdict several times if it's called for the same 3488 * device. 3489 */ 3490 if (n->primary_opts) { 3491 const char *old, *new; 3492 /* devices with failover_pair_id always have an id */ 3493 old = qdict_get_str(n->primary_opts, "id"); 3494 new = qdict_get_str(device_opts, "id"); 3495 if (strcmp(old, new) != 0) { 3496 error_setg(errp, "Cannot attach more than one primary device to " 3497 "'%s': '%s' and '%s'", n->netclient_name, old, new); 3498 return false; 3499 } 3500 } else { 3501 n->primary_opts = qdict_clone_shallow(device_opts); 3502 n->primary_opts_from_json = from_json; 3503 } 3504 3505 /* failover_primary_hidden is set during feature negotiation */ 3506 return qatomic_read(&n->failover_primary_hidden); 3507 } 3508 3509 static void virtio_net_device_realize(DeviceState *dev, Error **errp) 3510 { 3511 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3512 VirtIONet *n = VIRTIO_NET(dev); 3513 NetClientState *nc; 3514 int i; 3515 3516 if (n->net_conf.mtu) { 3517 n->host_features |= (1ULL << VIRTIO_NET_F_MTU); 3518 } 3519 3520 if (n->net_conf.duplex_str) { 3521 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) { 3522 n->net_conf.duplex = DUPLEX_HALF; 3523 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) { 3524 n->net_conf.duplex = DUPLEX_FULL; 3525 } else { 3526 error_setg(errp, "'duplex' must be 'half' or 'full'"); 3527 return; 3528 } 3529 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3530 } else { 3531 n->net_conf.duplex = DUPLEX_UNKNOWN; 3532 } 3533 3534 if (n->net_conf.speed < SPEED_UNKNOWN) { 3535 error_setg(errp, "'speed' must be between 0 and INT_MAX"); 3536 return; 3537 } 3538 if (n->net_conf.speed >= 0) { 3539 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3540 } 3541 3542 if (n->failover) { 3543 n->primary_listener.hide_device = failover_hide_primary_device; 3544 qatomic_set(&n->failover_primary_hidden, true); 3545 device_listener_register(&n->primary_listener); 3546 n->migration_state.notify = virtio_net_migration_state_notifier; 3547 add_migration_state_change_notifier(&n->migration_state); 3548 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY); 3549 } 3550 3551 virtio_net_set_config_size(n, n->host_features); 3552 virtio_init(vdev, VIRTIO_ID_NET, n->config_size); 3553 3554 /* 3555 * We set a lower limit on RX queue size to what it always was. 3556 * Guests that want a smaller ring can always resize it without 3557 * help from us (using virtio 1 and up). 3558 */ 3559 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || 3560 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || 3561 !is_power_of_2(n->net_conf.rx_queue_size)) { 3562 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " 3563 "must be a power of 2 between %d and %d.", 3564 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, 3565 VIRTQUEUE_MAX_SIZE); 3566 virtio_cleanup(vdev); 3567 return; 3568 } 3569 3570 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || 3571 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE || 3572 !is_power_of_2(n->net_conf.tx_queue_size)) { 3573 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " 3574 "must be a power of 2 between %d and %d", 3575 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, 3576 VIRTQUEUE_MAX_SIZE); 3577 virtio_cleanup(vdev); 3578 return; 3579 } 3580 3581 n->max_ncs = MAX(n->nic_conf.peers.queues, 1); 3582 3583 /* 3584 * Figure out the datapath queue pairs since the backend could 3585 * provide control queue via peers as well. 3586 */ 3587 if (n->nic_conf.peers.queues) { 3588 for (i = 0; i < n->max_ncs; i++) { 3589 if (n->nic_conf.peers.ncs[i]->is_datapath) { 3590 ++n->max_queue_pairs; 3591 } 3592 } 3593 } 3594 n->max_queue_pairs = MAX(n->max_queue_pairs, 1); 3595 3596 if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) { 3597 error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), " 3598 "must be a positive integer less than %d.", 3599 n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2); 3600 virtio_cleanup(vdev); 3601 return; 3602 } 3603 n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs); 3604 n->curr_queue_pairs = 1; 3605 n->tx_timeout = n->net_conf.txtimer; 3606 3607 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer") 3608 && strcmp(n->net_conf.tx, "bh")) { 3609 warn_report("virtio-net: " 3610 "Unknown option tx=%s, valid options: \"timer\" \"bh\"", 3611 n->net_conf.tx); 3612 error_printf("Defaulting to \"bh\""); 3613 } 3614 3615 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), 3616 n->net_conf.tx_queue_size); 3617 3618 for (i = 0; i < n->max_queue_pairs; i++) { 3619 virtio_net_add_queue(n, i); 3620 } 3621 3622 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 3623 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr); 3624 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac)); 3625 n->status = VIRTIO_NET_S_LINK_UP; 3626 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 3627 QEMU_CLOCK_VIRTUAL, 3628 virtio_net_announce_timer, n); 3629 n->announce_timer.round = 0; 3630 3631 if (n->netclient_type) { 3632 /* 3633 * Happen when virtio_net_set_netclient_name has been called. 3634 */ 3635 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3636 n->netclient_type, n->netclient_name, n); 3637 } else { 3638 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3639 object_get_typename(OBJECT(dev)), dev->id, n); 3640 } 3641 3642 for (i = 0; i < n->max_queue_pairs; i++) { 3643 n->nic->ncs[i].do_not_pad = true; 3644 } 3645 3646 peer_test_vnet_hdr(n); 3647 if (peer_has_vnet_hdr(n)) { 3648 for (i = 0; i < n->max_queue_pairs; i++) { 3649 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true); 3650 } 3651 n->host_hdr_len = sizeof(struct virtio_net_hdr); 3652 } else { 3653 n->host_hdr_len = 0; 3654 } 3655 3656 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a); 3657 3658 n->vqs[0].tx_waiting = 0; 3659 n->tx_burst = n->net_conf.txburst; 3660 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0); 3661 n->promisc = 1; /* for compatibility */ 3662 3663 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 3664 3665 n->vlans = g_malloc0(MAX_VLAN >> 3); 3666 3667 nc = qemu_get_queue(n->nic); 3668 nc->rxfilter_notify_enabled = 1; 3669 3670 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 3671 struct virtio_net_config netcfg = {}; 3672 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN); 3673 vhost_net_set_config(get_vhost_net(nc->peer), 3674 (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER); 3675 } 3676 QTAILQ_INIT(&n->rsc_chains); 3677 n->qdev = dev; 3678 3679 net_rx_pkt_init(&n->rx_pkt, false); 3680 3681 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3682 virtio_net_load_ebpf(n); 3683 } 3684 } 3685 3686 static void virtio_net_device_unrealize(DeviceState *dev) 3687 { 3688 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3689 VirtIONet *n = VIRTIO_NET(dev); 3690 int i, max_queue_pairs; 3691 3692 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3693 virtio_net_unload_ebpf(n); 3694 } 3695 3696 /* This will stop vhost backend if appropriate. */ 3697 virtio_net_set_status(vdev, 0); 3698 3699 g_free(n->netclient_name); 3700 n->netclient_name = NULL; 3701 g_free(n->netclient_type); 3702 n->netclient_type = NULL; 3703 3704 g_free(n->mac_table.macs); 3705 g_free(n->vlans); 3706 3707 if (n->failover) { 3708 qobject_unref(n->primary_opts); 3709 device_listener_unregister(&n->primary_listener); 3710 remove_migration_state_change_notifier(&n->migration_state); 3711 } else { 3712 assert(n->primary_opts == NULL); 3713 } 3714 3715 max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 3716 for (i = 0; i < max_queue_pairs; i++) { 3717 virtio_net_del_queue(n, i); 3718 } 3719 /* delete also control vq */ 3720 virtio_del_queue(vdev, max_queue_pairs * 2); 3721 qemu_announce_timer_del(&n->announce_timer, false); 3722 g_free(n->vqs); 3723 qemu_del_nic(n->nic); 3724 virtio_net_rsc_cleanup(n); 3725 g_free(n->rss_data.indirections_table); 3726 net_rx_pkt_uninit(n->rx_pkt); 3727 virtio_cleanup(vdev); 3728 } 3729 3730 static void virtio_net_instance_init(Object *obj) 3731 { 3732 VirtIONet *n = VIRTIO_NET(obj); 3733 3734 /* 3735 * The default config_size is sizeof(struct virtio_net_config). 3736 * Can be overriden with virtio_net_set_config_size. 3737 */ 3738 n->config_size = sizeof(struct virtio_net_config); 3739 device_add_bootindex_property(obj, &n->nic_conf.bootindex, 3740 "bootindex", "/ethernet-phy@0", 3741 DEVICE(n)); 3742 3743 ebpf_rss_init(&n->ebpf_rss); 3744 } 3745 3746 static int virtio_net_pre_save(void *opaque) 3747 { 3748 VirtIONet *n = opaque; 3749 3750 /* At this point, backend must be stopped, otherwise 3751 * it might keep writing to memory. */ 3752 assert(!n->vhost_started); 3753 3754 return 0; 3755 } 3756 3757 static bool primary_unplug_pending(void *opaque) 3758 { 3759 DeviceState *dev = opaque; 3760 DeviceState *primary; 3761 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3762 VirtIONet *n = VIRTIO_NET(vdev); 3763 3764 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 3765 return false; 3766 } 3767 primary = failover_find_primary_device(n); 3768 return primary ? primary->pending_deleted_event : false; 3769 } 3770 3771 static bool dev_unplug_pending(void *opaque) 3772 { 3773 DeviceState *dev = opaque; 3774 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 3775 3776 return vdc->primary_unplug_pending(dev); 3777 } 3778 3779 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev) 3780 { 3781 VirtIONet *n = VIRTIO_NET(vdev); 3782 NetClientState *nc = qemu_get_queue(n->nic); 3783 struct vhost_net *net = get_vhost_net(nc->peer); 3784 return &net->dev; 3785 } 3786 3787 static const VMStateDescription vmstate_virtio_net = { 3788 .name = "virtio-net", 3789 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3790 .version_id = VIRTIO_NET_VM_VERSION, 3791 .fields = (VMStateField[]) { 3792 VMSTATE_VIRTIO_DEVICE, 3793 VMSTATE_END_OF_LIST() 3794 }, 3795 .pre_save = virtio_net_pre_save, 3796 .dev_unplug_pending = dev_unplug_pending, 3797 }; 3798 3799 static Property virtio_net_properties[] = { 3800 DEFINE_PROP_BIT64("csum", VirtIONet, host_features, 3801 VIRTIO_NET_F_CSUM, true), 3802 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features, 3803 VIRTIO_NET_F_GUEST_CSUM, true), 3804 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), 3805 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features, 3806 VIRTIO_NET_F_GUEST_TSO4, true), 3807 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features, 3808 VIRTIO_NET_F_GUEST_TSO6, true), 3809 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features, 3810 VIRTIO_NET_F_GUEST_ECN, true), 3811 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features, 3812 VIRTIO_NET_F_GUEST_UFO, true), 3813 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features, 3814 VIRTIO_NET_F_GUEST_ANNOUNCE, true), 3815 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features, 3816 VIRTIO_NET_F_HOST_TSO4, true), 3817 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features, 3818 VIRTIO_NET_F_HOST_TSO6, true), 3819 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features, 3820 VIRTIO_NET_F_HOST_ECN, true), 3821 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features, 3822 VIRTIO_NET_F_HOST_UFO, true), 3823 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features, 3824 VIRTIO_NET_F_MRG_RXBUF, true), 3825 DEFINE_PROP_BIT64("status", VirtIONet, host_features, 3826 VIRTIO_NET_F_STATUS, true), 3827 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features, 3828 VIRTIO_NET_F_CTRL_VQ, true), 3829 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features, 3830 VIRTIO_NET_F_CTRL_RX, true), 3831 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features, 3832 VIRTIO_NET_F_CTRL_VLAN, true), 3833 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features, 3834 VIRTIO_NET_F_CTRL_RX_EXTRA, true), 3835 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features, 3836 VIRTIO_NET_F_CTRL_MAC_ADDR, true), 3837 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features, 3838 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), 3839 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), 3840 DEFINE_PROP_BIT64("rss", VirtIONet, host_features, 3841 VIRTIO_NET_F_RSS, false), 3842 DEFINE_PROP_BIT64("hash", VirtIONet, host_features, 3843 VIRTIO_NET_F_HASH_REPORT, false), 3844 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features, 3845 VIRTIO_NET_F_RSC_EXT, false), 3846 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout, 3847 VIRTIO_NET_RSC_DEFAULT_INTERVAL), 3848 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf), 3849 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer, 3850 TX_TIMER_INTERVAL), 3851 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), 3852 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), 3853 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, 3854 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), 3855 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, 3856 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), 3857 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), 3858 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, 3859 true), 3860 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN), 3861 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str), 3862 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false), 3863 DEFINE_PROP_END_OF_LIST(), 3864 }; 3865 3866 static void virtio_net_class_init(ObjectClass *klass, void *data) 3867 { 3868 DeviceClass *dc = DEVICE_CLASS(klass); 3869 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 3870 3871 device_class_set_props(dc, virtio_net_properties); 3872 dc->vmsd = &vmstate_virtio_net; 3873 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 3874 vdc->realize = virtio_net_device_realize; 3875 vdc->unrealize = virtio_net_device_unrealize; 3876 vdc->get_config = virtio_net_get_config; 3877 vdc->set_config = virtio_net_set_config; 3878 vdc->get_features = virtio_net_get_features; 3879 vdc->set_features = virtio_net_set_features; 3880 vdc->bad_features = virtio_net_bad_features; 3881 vdc->reset = virtio_net_reset; 3882 vdc->queue_reset = virtio_net_queue_reset; 3883 vdc->queue_enable = virtio_net_queue_enable; 3884 vdc->set_status = virtio_net_set_status; 3885 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; 3886 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; 3887 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO); 3888 vdc->post_load = virtio_net_post_load_virtio; 3889 vdc->vmsd = &vmstate_virtio_net_device; 3890 vdc->primary_unplug_pending = primary_unplug_pending; 3891 vdc->get_vhost = virtio_net_get_vhost; 3892 } 3893 3894 static const TypeInfo virtio_net_info = { 3895 .name = TYPE_VIRTIO_NET, 3896 .parent = TYPE_VIRTIO_DEVICE, 3897 .instance_size = sizeof(VirtIONet), 3898 .instance_init = virtio_net_instance_init, 3899 .class_init = virtio_net_class_init, 3900 }; 3901 3902 static void virtio_register_types(void) 3903 { 3904 type_register_static(&virtio_net_info); 3905 } 3906 3907 type_init(virtio_register_types) 3908