1 /* 2 * Virtio Network Device 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qemu/atomic.h" 16 #include "qemu/iov.h" 17 #include "qemu/log.h" 18 #include "qemu/main-loop.h" 19 #include "qemu/module.h" 20 #include "hw/virtio/virtio.h" 21 #include "net/net.h" 22 #include "net/checksum.h" 23 #include "net/tap.h" 24 #include "qemu/error-report.h" 25 #include "qemu/timer.h" 26 #include "qemu/option.h" 27 #include "qemu/option_int.h" 28 #include "qemu/config-file.h" 29 #include "qapi/qmp/qdict.h" 30 #include "hw/virtio/virtio-net.h" 31 #include "net/vhost_net.h" 32 #include "net/announce.h" 33 #include "hw/virtio/virtio-bus.h" 34 #include "qapi/error.h" 35 #include "qapi/qapi-events-net.h" 36 #include "hw/qdev-properties.h" 37 #include "qapi/qapi-types-migration.h" 38 #include "qapi/qapi-events-migration.h" 39 #include "hw/virtio/virtio-access.h" 40 #include "migration/misc.h" 41 #include "standard-headers/linux/ethtool.h" 42 #include "sysemu/sysemu.h" 43 #include "trace.h" 44 #include "monitor/qdev.h" 45 #include "hw/pci/pci.h" 46 #include "net_rx_pkt.h" 47 #include "hw/virtio/vhost.h" 48 #include "sysemu/qtest.h" 49 50 #define VIRTIO_NET_VM_VERSION 11 51 52 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ 53 54 /* previously fixed value */ 55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 57 58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */ 59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 61 62 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ 63 64 #define VIRTIO_NET_TCP_FLAG 0x3F 65 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000 66 67 /* IPv4 max payload, 16 bits in the header */ 68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header)) 69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535 70 71 /* header length value in ip header without option */ 72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5 73 74 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ 75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD 76 77 /* Purge coalesced packets timer interval, This value affects the performance 78 a lot, and should be tuned carefully, '300000'(300us) is the recommended 79 value to pass the WHQL test, '50000' can gain 2x netperf throughput with 80 tso/gso/gro 'off'. */ 81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000 82 83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \ 84 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \ 85 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \ 86 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \ 87 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \ 88 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \ 89 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \ 90 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \ 91 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) 92 93 static const VirtIOFeature feature_sizes[] = { 94 {.flags = 1ULL << VIRTIO_NET_F_MAC, 95 .end = endof(struct virtio_net_config, mac)}, 96 {.flags = 1ULL << VIRTIO_NET_F_STATUS, 97 .end = endof(struct virtio_net_config, status)}, 98 {.flags = 1ULL << VIRTIO_NET_F_MQ, 99 .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, 100 {.flags = 1ULL << VIRTIO_NET_F_MTU, 101 .end = endof(struct virtio_net_config, mtu)}, 102 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, 103 .end = endof(struct virtio_net_config, duplex)}, 104 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT), 105 .end = endof(struct virtio_net_config, supported_hash_types)}, 106 {} 107 }; 108 109 static const VirtIOConfigSizeParams cfg_size_params = { 110 .min_size = endof(struct virtio_net_config, mac), 111 .max_size = sizeof(struct virtio_net_config), 112 .feature_sizes = feature_sizes 113 }; 114 115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc) 116 { 117 VirtIONet *n = qemu_get_nic_opaque(nc); 118 119 return &n->vqs[nc->queue_index]; 120 } 121 122 static int vq2q(int queue_index) 123 { 124 return queue_index / 2; 125 } 126 127 /* TODO 128 * - we could suppress RX interrupt if we were so inclined. 129 */ 130 131 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) 132 { 133 VirtIONet *n = VIRTIO_NET(vdev); 134 struct virtio_net_config netcfg; 135 NetClientState *nc = qemu_get_queue(n->nic); 136 static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } }; 137 138 int ret = 0; 139 memset(&netcfg, 0 , sizeof(struct virtio_net_config)); 140 virtio_stw_p(vdev, &netcfg.status, n->status); 141 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs); 142 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); 143 memcpy(netcfg.mac, n->mac, ETH_ALEN); 144 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); 145 netcfg.duplex = n->net_conf.duplex; 146 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE; 147 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length, 148 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ? 149 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1); 150 virtio_stl_p(vdev, &netcfg.supported_hash_types, 151 VIRTIO_NET_RSS_SUPPORTED_HASHES); 152 memcpy(config, &netcfg, n->config_size); 153 154 /* 155 * Is this VDPA? No peer means not VDPA: there's no way to 156 * disconnect/reconnect a VDPA peer. 157 */ 158 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 159 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg, 160 n->config_size); 161 if (ret != -1) { 162 /* 163 * Some NIC/kernel combinations present 0 as the mac address. As 164 * that is not a legal address, try to proceed with the 165 * address from the QEMU command line in the hope that the 166 * address has been configured correctly elsewhere - just not 167 * reported by the device. 168 */ 169 if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) { 170 info_report("Zero hardware mac address detected. Ignoring."); 171 memcpy(netcfg.mac, n->mac, ETH_ALEN); 172 } 173 memcpy(config, &netcfg, n->config_size); 174 } 175 } 176 } 177 178 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) 179 { 180 VirtIONet *n = VIRTIO_NET(vdev); 181 struct virtio_net_config netcfg = {}; 182 NetClientState *nc = qemu_get_queue(n->nic); 183 184 memcpy(&netcfg, config, n->config_size); 185 186 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) && 187 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) && 188 memcmp(netcfg.mac, n->mac, ETH_ALEN)) { 189 memcpy(n->mac, netcfg.mac, ETH_ALEN); 190 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 191 } 192 193 /* 194 * Is this VDPA? No peer means not VDPA: there's no way to 195 * disconnect/reconnect a VDPA peer. 196 */ 197 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 198 vhost_net_set_config(get_vhost_net(nc->peer), 199 (uint8_t *)&netcfg, 0, n->config_size, 200 VHOST_SET_CONFIG_TYPE_MASTER); 201 } 202 } 203 204 static bool virtio_net_started(VirtIONet *n, uint8_t status) 205 { 206 VirtIODevice *vdev = VIRTIO_DEVICE(n); 207 return (status & VIRTIO_CONFIG_S_DRIVER_OK) && 208 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running; 209 } 210 211 static void virtio_net_announce_notify(VirtIONet *net) 212 { 213 VirtIODevice *vdev = VIRTIO_DEVICE(net); 214 trace_virtio_net_announce_notify(); 215 216 net->status |= VIRTIO_NET_S_ANNOUNCE; 217 virtio_notify_config(vdev); 218 } 219 220 static void virtio_net_announce_timer(void *opaque) 221 { 222 VirtIONet *n = opaque; 223 trace_virtio_net_announce_timer(n->announce_timer.round); 224 225 n->announce_timer.round--; 226 virtio_net_announce_notify(n); 227 } 228 229 static void virtio_net_announce(NetClientState *nc) 230 { 231 VirtIONet *n = qemu_get_nic_opaque(nc); 232 VirtIODevice *vdev = VIRTIO_DEVICE(n); 233 234 /* 235 * Make sure the virtio migration announcement timer isn't running 236 * If it is, let it trigger announcement so that we do not cause 237 * confusion. 238 */ 239 if (n->announce_timer.round) { 240 return; 241 } 242 243 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 244 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 245 virtio_net_announce_notify(n); 246 } 247 } 248 249 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) 250 { 251 VirtIODevice *vdev = VIRTIO_DEVICE(n); 252 NetClientState *nc = qemu_get_queue(n->nic); 253 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 254 int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? 255 n->max_ncs - n->max_queue_pairs : 0; 256 257 if (!get_vhost_net(nc->peer)) { 258 return; 259 } 260 261 if ((virtio_net_started(n, status) && !nc->peer->link_down) == 262 !!n->vhost_started) { 263 return; 264 } 265 if (!n->vhost_started) { 266 int r, i; 267 268 if (n->needs_vnet_hdr_swap) { 269 error_report("backend does not support %s vnet headers; " 270 "falling back on userspace virtio", 271 virtio_is_big_endian(vdev) ? "BE" : "LE"); 272 return; 273 } 274 275 /* Any packets outstanding? Purge them to avoid touching rings 276 * when vhost is running. 277 */ 278 for (i = 0; i < queue_pairs; i++) { 279 NetClientState *qnc = qemu_get_subqueue(n->nic, i); 280 281 /* Purge both directions: TX and RX. */ 282 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc); 283 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer); 284 } 285 286 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) { 287 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu); 288 if (r < 0) { 289 error_report("%uBytes MTU not supported by the backend", 290 n->net_conf.mtu); 291 292 return; 293 } 294 } 295 296 n->vhost_started = 1; 297 r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq); 298 if (r < 0) { 299 error_report("unable to start vhost net: %d: " 300 "falling back on userspace virtio", -r); 301 n->vhost_started = 0; 302 } 303 } else { 304 vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq); 305 n->vhost_started = 0; 306 } 307 } 308 309 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev, 310 NetClientState *peer, 311 bool enable) 312 { 313 if (virtio_is_big_endian(vdev)) { 314 return qemu_set_vnet_be(peer, enable); 315 } else { 316 return qemu_set_vnet_le(peer, enable); 317 } 318 } 319 320 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs, 321 int queue_pairs, bool enable) 322 { 323 int i; 324 325 for (i = 0; i < queue_pairs; i++) { 326 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 && 327 enable) { 328 while (--i >= 0) { 329 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false); 330 } 331 332 return true; 333 } 334 } 335 336 return false; 337 } 338 339 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status) 340 { 341 VirtIODevice *vdev = VIRTIO_DEVICE(n); 342 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 343 344 if (virtio_net_started(n, status)) { 345 /* Before using the device, we tell the network backend about the 346 * endianness to use when parsing vnet headers. If the backend 347 * can't do it, we fallback onto fixing the headers in the core 348 * virtio-net code. 349 */ 350 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs, 351 queue_pairs, true); 352 } else if (virtio_net_started(n, vdev->status)) { 353 /* After using the device, we need to reset the network backend to 354 * the default (guest native endianness), otherwise the guest may 355 * lose network connectivity if it is rebooted into a different 356 * endianness. 357 */ 358 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false); 359 } 360 } 361 362 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq) 363 { 364 unsigned int dropped = virtqueue_drop_all(vq); 365 if (dropped) { 366 virtio_notify(vdev, vq); 367 } 368 } 369 370 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) 371 { 372 VirtIONet *n = VIRTIO_NET(vdev); 373 VirtIONetQueue *q; 374 int i; 375 uint8_t queue_status; 376 377 virtio_net_vnet_endian_status(n, status); 378 virtio_net_vhost_status(n, status); 379 380 for (i = 0; i < n->max_queue_pairs; i++) { 381 NetClientState *ncs = qemu_get_subqueue(n->nic, i); 382 bool queue_started; 383 q = &n->vqs[i]; 384 385 if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) { 386 queue_status = 0; 387 } else { 388 queue_status = status; 389 } 390 queue_started = 391 virtio_net_started(n, queue_status) && !n->vhost_started; 392 393 if (queue_started) { 394 qemu_flush_queued_packets(ncs); 395 } 396 397 if (!q->tx_waiting) { 398 continue; 399 } 400 401 if (queue_started) { 402 if (q->tx_timer) { 403 timer_mod(q->tx_timer, 404 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 405 } else { 406 qemu_bh_schedule(q->tx_bh); 407 } 408 } else { 409 if (q->tx_timer) { 410 timer_del(q->tx_timer); 411 } else { 412 qemu_bh_cancel(q->tx_bh); 413 } 414 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 && 415 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) && 416 vdev->vm_running) { 417 /* if tx is waiting we are likely have some packets in tx queue 418 * and disabled notification */ 419 q->tx_waiting = 0; 420 virtio_queue_set_notification(q->tx_vq, 1); 421 virtio_net_drop_tx_queue_data(vdev, q->tx_vq); 422 } 423 } 424 } 425 } 426 427 static void virtio_net_set_link_status(NetClientState *nc) 428 { 429 VirtIONet *n = qemu_get_nic_opaque(nc); 430 VirtIODevice *vdev = VIRTIO_DEVICE(n); 431 uint16_t old_status = n->status; 432 433 if (nc->link_down) 434 n->status &= ~VIRTIO_NET_S_LINK_UP; 435 else 436 n->status |= VIRTIO_NET_S_LINK_UP; 437 438 if (n->status != old_status) 439 virtio_notify_config(vdev); 440 441 virtio_net_set_status(vdev, vdev->status); 442 } 443 444 static void rxfilter_notify(NetClientState *nc) 445 { 446 VirtIONet *n = qemu_get_nic_opaque(nc); 447 448 if (nc->rxfilter_notify_enabled) { 449 char *path = object_get_canonical_path(OBJECT(n->qdev)); 450 qapi_event_send_nic_rx_filter_changed(!!n->netclient_name, 451 n->netclient_name, path); 452 g_free(path); 453 454 /* disable event notification to avoid events flooding */ 455 nc->rxfilter_notify_enabled = 0; 456 } 457 } 458 459 static intList *get_vlan_table(VirtIONet *n) 460 { 461 intList *list; 462 int i, j; 463 464 list = NULL; 465 for (i = 0; i < MAX_VLAN >> 5; i++) { 466 for (j = 0; n->vlans[i] && j <= 0x1f; j++) { 467 if (n->vlans[i] & (1U << j)) { 468 QAPI_LIST_PREPEND(list, (i << 5) + j); 469 } 470 } 471 } 472 473 return list; 474 } 475 476 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc) 477 { 478 VirtIONet *n = qemu_get_nic_opaque(nc); 479 VirtIODevice *vdev = VIRTIO_DEVICE(n); 480 RxFilterInfo *info; 481 strList *str_list; 482 int i; 483 484 info = g_malloc0(sizeof(*info)); 485 info->name = g_strdup(nc->name); 486 info->promiscuous = n->promisc; 487 488 if (n->nouni) { 489 info->unicast = RX_STATE_NONE; 490 } else if (n->alluni) { 491 info->unicast = RX_STATE_ALL; 492 } else { 493 info->unicast = RX_STATE_NORMAL; 494 } 495 496 if (n->nomulti) { 497 info->multicast = RX_STATE_NONE; 498 } else if (n->allmulti) { 499 info->multicast = RX_STATE_ALL; 500 } else { 501 info->multicast = RX_STATE_NORMAL; 502 } 503 504 info->broadcast_allowed = n->nobcast; 505 info->multicast_overflow = n->mac_table.multi_overflow; 506 info->unicast_overflow = n->mac_table.uni_overflow; 507 508 info->main_mac = qemu_mac_strdup_printf(n->mac); 509 510 str_list = NULL; 511 for (i = 0; i < n->mac_table.first_multi; i++) { 512 QAPI_LIST_PREPEND(str_list, 513 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 514 } 515 info->unicast_table = str_list; 516 517 str_list = NULL; 518 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 519 QAPI_LIST_PREPEND(str_list, 520 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 521 } 522 info->multicast_table = str_list; 523 info->vlan_table = get_vlan_table(n); 524 525 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) { 526 info->vlan = RX_STATE_ALL; 527 } else if (!info->vlan_table) { 528 info->vlan = RX_STATE_NONE; 529 } else { 530 info->vlan = RX_STATE_NORMAL; 531 } 532 533 /* enable event notification after query */ 534 nc->rxfilter_notify_enabled = 1; 535 536 return info; 537 } 538 539 static void virtio_net_reset(VirtIODevice *vdev) 540 { 541 VirtIONet *n = VIRTIO_NET(vdev); 542 int i; 543 544 /* Reset back to compatibility mode */ 545 n->promisc = 1; 546 n->allmulti = 0; 547 n->alluni = 0; 548 n->nomulti = 0; 549 n->nouni = 0; 550 n->nobcast = 0; 551 /* multiqueue is disabled by default */ 552 n->curr_queue_pairs = 1; 553 timer_del(n->announce_timer.tm); 554 n->announce_timer.round = 0; 555 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 556 557 /* Flush any MAC and VLAN filter table state */ 558 n->mac_table.in_use = 0; 559 n->mac_table.first_multi = 0; 560 n->mac_table.multi_overflow = 0; 561 n->mac_table.uni_overflow = 0; 562 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN); 563 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac)); 564 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 565 memset(n->vlans, 0, MAX_VLAN >> 3); 566 567 /* Flush any async TX */ 568 for (i = 0; i < n->max_queue_pairs; i++) { 569 NetClientState *nc = qemu_get_subqueue(n->nic, i); 570 571 if (nc->peer) { 572 qemu_flush_or_purge_queued_packets(nc->peer, true); 573 assert(!virtio_net_get_subqueue(nc)->async_tx.elem); 574 } 575 } 576 } 577 578 static void peer_test_vnet_hdr(VirtIONet *n) 579 { 580 NetClientState *nc = qemu_get_queue(n->nic); 581 if (!nc->peer) { 582 return; 583 } 584 585 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer); 586 } 587 588 static int peer_has_vnet_hdr(VirtIONet *n) 589 { 590 return n->has_vnet_hdr; 591 } 592 593 static int peer_has_ufo(VirtIONet *n) 594 { 595 if (!peer_has_vnet_hdr(n)) 596 return 0; 597 598 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer); 599 600 return n->has_ufo; 601 } 602 603 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, 604 int version_1, int hash_report) 605 { 606 int i; 607 NetClientState *nc; 608 609 n->mergeable_rx_bufs = mergeable_rx_bufs; 610 611 if (version_1) { 612 n->guest_hdr_len = hash_report ? 613 sizeof(struct virtio_net_hdr_v1_hash) : 614 sizeof(struct virtio_net_hdr_mrg_rxbuf); 615 n->rss_data.populate_hash = !!hash_report; 616 } else { 617 n->guest_hdr_len = n->mergeable_rx_bufs ? 618 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 619 sizeof(struct virtio_net_hdr); 620 } 621 622 for (i = 0; i < n->max_queue_pairs; i++) { 623 nc = qemu_get_subqueue(n->nic, i); 624 625 if (peer_has_vnet_hdr(n) && 626 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) { 627 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len); 628 n->host_hdr_len = n->guest_hdr_len; 629 } 630 } 631 } 632 633 static int virtio_net_max_tx_queue_size(VirtIONet *n) 634 { 635 NetClientState *peer = n->nic_conf.peers.ncs[0]; 636 637 /* 638 * Backends other than vhost-user or vhost-vdpa don't support max queue 639 * size. 640 */ 641 if (!peer) { 642 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 643 } 644 645 switch(peer->info->type) { 646 case NET_CLIENT_DRIVER_VHOST_USER: 647 case NET_CLIENT_DRIVER_VHOST_VDPA: 648 return VIRTQUEUE_MAX_SIZE; 649 default: 650 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 651 }; 652 } 653 654 static int peer_attach(VirtIONet *n, int index) 655 { 656 NetClientState *nc = qemu_get_subqueue(n->nic, index); 657 658 if (!nc->peer) { 659 return 0; 660 } 661 662 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 663 vhost_set_vring_enable(nc->peer, 1); 664 } 665 666 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 667 return 0; 668 } 669 670 if (n->max_queue_pairs == 1) { 671 return 0; 672 } 673 674 return tap_enable(nc->peer); 675 } 676 677 static int peer_detach(VirtIONet *n, int index) 678 { 679 NetClientState *nc = qemu_get_subqueue(n->nic, index); 680 681 if (!nc->peer) { 682 return 0; 683 } 684 685 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 686 vhost_set_vring_enable(nc->peer, 0); 687 } 688 689 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 690 return 0; 691 } 692 693 return tap_disable(nc->peer); 694 } 695 696 static void virtio_net_set_queue_pairs(VirtIONet *n) 697 { 698 int i; 699 int r; 700 701 if (n->nic->peer_deleted) { 702 return; 703 } 704 705 for (i = 0; i < n->max_queue_pairs; i++) { 706 if (i < n->curr_queue_pairs) { 707 r = peer_attach(n, i); 708 assert(!r); 709 } else { 710 r = peer_detach(n, i); 711 assert(!r); 712 } 713 } 714 } 715 716 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue); 717 718 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, 719 Error **errp) 720 { 721 VirtIONet *n = VIRTIO_NET(vdev); 722 NetClientState *nc = qemu_get_queue(n->nic); 723 724 /* Firstly sync all virtio-net possible supported features */ 725 features |= n->host_features; 726 727 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 728 729 if (!peer_has_vnet_hdr(n)) { 730 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); 731 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); 732 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); 733 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); 734 735 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); 736 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); 737 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); 738 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); 739 740 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); 741 } 742 743 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { 744 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); 745 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); 746 } 747 748 if (!get_vhost_net(nc->peer)) { 749 return features; 750 } 751 752 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 753 virtio_clear_feature(&features, VIRTIO_NET_F_RSS); 754 } 755 features = vhost_net_get_features(get_vhost_net(nc->peer), features); 756 vdev->backend_features = features; 757 758 if (n->mtu_bypass_backend && 759 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { 760 features |= (1ULL << VIRTIO_NET_F_MTU); 761 } 762 763 return features; 764 } 765 766 static uint64_t virtio_net_bad_features(VirtIODevice *vdev) 767 { 768 uint64_t features = 0; 769 770 /* Linux kernel 2.6.25. It understood MAC (as everyone must), 771 * but also these: */ 772 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 773 virtio_add_feature(&features, VIRTIO_NET_F_CSUM); 774 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4); 775 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6); 776 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN); 777 778 return features; 779 } 780 781 static void virtio_net_apply_guest_offloads(VirtIONet *n) 782 { 783 qemu_set_offload(qemu_get_queue(n->nic)->peer, 784 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)), 785 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)), 786 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)), 787 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)), 788 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO))); 789 } 790 791 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features) 792 { 793 static const uint64_t guest_offloads_mask = 794 (1ULL << VIRTIO_NET_F_GUEST_CSUM) | 795 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | 796 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | 797 (1ULL << VIRTIO_NET_F_GUEST_ECN) | 798 (1ULL << VIRTIO_NET_F_GUEST_UFO); 799 800 return guest_offloads_mask & features; 801 } 802 803 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) 804 { 805 VirtIODevice *vdev = VIRTIO_DEVICE(n); 806 return virtio_net_guest_offloads_by_features(vdev->guest_features); 807 } 808 809 typedef struct { 810 VirtIONet *n; 811 DeviceState *dev; 812 } FailoverDevice; 813 814 /** 815 * Set the failover primary device 816 * 817 * @opaque: FailoverId to setup 818 * @opts: opts for device we are handling 819 * @errp: returns an error if this function fails 820 */ 821 static int failover_set_primary(DeviceState *dev, void *opaque) 822 { 823 FailoverDevice *fdev = opaque; 824 PCIDevice *pci_dev = (PCIDevice *) 825 object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE); 826 827 if (!pci_dev) { 828 return 0; 829 } 830 831 if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) { 832 fdev->dev = dev; 833 return 1; 834 } 835 836 return 0; 837 } 838 839 /** 840 * Find the primary device for this failover virtio-net 841 * 842 * @n: VirtIONet device 843 * @errp: returns an error if this function fails 844 */ 845 static DeviceState *failover_find_primary_device(VirtIONet *n) 846 { 847 FailoverDevice fdev = { 848 .n = n, 849 }; 850 851 qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL, 852 NULL, NULL, &fdev); 853 return fdev.dev; 854 } 855 856 static void failover_add_primary(VirtIONet *n, Error **errp) 857 { 858 Error *err = NULL; 859 DeviceState *dev = failover_find_primary_device(n); 860 861 if (dev) { 862 return; 863 } 864 865 if (!n->primary_opts) { 866 error_setg(errp, "Primary device not found"); 867 error_append_hint(errp, "Virtio-net failover will not work. Make " 868 "sure primary device has parameter" 869 " failover_pair_id=%s\n", n->netclient_name); 870 return; 871 } 872 873 dev = qdev_device_add_from_qdict(n->primary_opts, 874 n->primary_opts_from_json, 875 &err); 876 if (err) { 877 qobject_unref(n->primary_opts); 878 n->primary_opts = NULL; 879 } else { 880 object_unref(OBJECT(dev)); 881 } 882 error_propagate(errp, err); 883 } 884 885 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) 886 { 887 VirtIONet *n = VIRTIO_NET(vdev); 888 Error *err = NULL; 889 int i; 890 891 if (n->mtu_bypass_backend && 892 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) { 893 features &= ~(1ULL << VIRTIO_NET_F_MTU); 894 } 895 896 virtio_net_set_multiqueue(n, 897 virtio_has_feature(features, VIRTIO_NET_F_RSS) || 898 virtio_has_feature(features, VIRTIO_NET_F_MQ)); 899 900 virtio_net_set_mrg_rx_bufs(n, 901 virtio_has_feature(features, 902 VIRTIO_NET_F_MRG_RXBUF), 903 virtio_has_feature(features, 904 VIRTIO_F_VERSION_1), 905 virtio_has_feature(features, 906 VIRTIO_NET_F_HASH_REPORT)); 907 908 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 909 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4); 910 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 911 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6); 912 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS); 913 914 if (n->has_vnet_hdr) { 915 n->curr_guest_offloads = 916 virtio_net_guest_offloads_by_features(features); 917 virtio_net_apply_guest_offloads(n); 918 } 919 920 for (i = 0; i < n->max_queue_pairs; i++) { 921 NetClientState *nc = qemu_get_subqueue(n->nic, i); 922 923 if (!get_vhost_net(nc->peer)) { 924 continue; 925 } 926 vhost_net_ack_features(get_vhost_net(nc->peer), features); 927 } 928 929 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { 930 memset(n->vlans, 0, MAX_VLAN >> 3); 931 } else { 932 memset(n->vlans, 0xff, MAX_VLAN >> 3); 933 } 934 935 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) { 936 qapi_event_send_failover_negotiated(n->netclient_name); 937 qatomic_set(&n->failover_primary_hidden, false); 938 failover_add_primary(n, &err); 939 if (err) { 940 if (!qtest_enabled()) { 941 warn_report_err(err); 942 } else { 943 error_free(err); 944 } 945 } 946 } 947 } 948 949 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd, 950 struct iovec *iov, unsigned int iov_cnt) 951 { 952 uint8_t on; 953 size_t s; 954 NetClientState *nc = qemu_get_queue(n->nic); 955 956 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on)); 957 if (s != sizeof(on)) { 958 return VIRTIO_NET_ERR; 959 } 960 961 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) { 962 n->promisc = on; 963 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) { 964 n->allmulti = on; 965 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) { 966 n->alluni = on; 967 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) { 968 n->nomulti = on; 969 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) { 970 n->nouni = on; 971 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) { 972 n->nobcast = on; 973 } else { 974 return VIRTIO_NET_ERR; 975 } 976 977 rxfilter_notify(nc); 978 979 return VIRTIO_NET_OK; 980 } 981 982 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd, 983 struct iovec *iov, unsigned int iov_cnt) 984 { 985 VirtIODevice *vdev = VIRTIO_DEVICE(n); 986 uint64_t offloads; 987 size_t s; 988 989 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 990 return VIRTIO_NET_ERR; 991 } 992 993 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads)); 994 if (s != sizeof(offloads)) { 995 return VIRTIO_NET_ERR; 996 } 997 998 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) { 999 uint64_t supported_offloads; 1000 1001 offloads = virtio_ldq_p(vdev, &offloads); 1002 1003 if (!n->has_vnet_hdr) { 1004 return VIRTIO_NET_ERR; 1005 } 1006 1007 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1008 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4); 1009 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1010 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6); 1011 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT); 1012 1013 supported_offloads = virtio_net_supported_guest_offloads(n); 1014 if (offloads & ~supported_offloads) { 1015 return VIRTIO_NET_ERR; 1016 } 1017 1018 n->curr_guest_offloads = offloads; 1019 virtio_net_apply_guest_offloads(n); 1020 1021 return VIRTIO_NET_OK; 1022 } else { 1023 return VIRTIO_NET_ERR; 1024 } 1025 } 1026 1027 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd, 1028 struct iovec *iov, unsigned int iov_cnt) 1029 { 1030 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1031 struct virtio_net_ctrl_mac mac_data; 1032 size_t s; 1033 NetClientState *nc = qemu_get_queue(n->nic); 1034 1035 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) { 1036 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) { 1037 return VIRTIO_NET_ERR; 1038 } 1039 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac)); 1040 assert(s == sizeof(n->mac)); 1041 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 1042 rxfilter_notify(nc); 1043 1044 return VIRTIO_NET_OK; 1045 } 1046 1047 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) { 1048 return VIRTIO_NET_ERR; 1049 } 1050 1051 int in_use = 0; 1052 int first_multi = 0; 1053 uint8_t uni_overflow = 0; 1054 uint8_t multi_overflow = 0; 1055 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 1056 1057 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1058 sizeof(mac_data.entries)); 1059 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1060 if (s != sizeof(mac_data.entries)) { 1061 goto error; 1062 } 1063 iov_discard_front(&iov, &iov_cnt, s); 1064 1065 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) { 1066 goto error; 1067 } 1068 1069 if (mac_data.entries <= MAC_TABLE_ENTRIES) { 1070 s = iov_to_buf(iov, iov_cnt, 0, macs, 1071 mac_data.entries * ETH_ALEN); 1072 if (s != mac_data.entries * ETH_ALEN) { 1073 goto error; 1074 } 1075 in_use += mac_data.entries; 1076 } else { 1077 uni_overflow = 1; 1078 } 1079 1080 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN); 1081 1082 first_multi = in_use; 1083 1084 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1085 sizeof(mac_data.entries)); 1086 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1087 if (s != sizeof(mac_data.entries)) { 1088 goto error; 1089 } 1090 1091 iov_discard_front(&iov, &iov_cnt, s); 1092 1093 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) { 1094 goto error; 1095 } 1096 1097 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) { 1098 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN], 1099 mac_data.entries * ETH_ALEN); 1100 if (s != mac_data.entries * ETH_ALEN) { 1101 goto error; 1102 } 1103 in_use += mac_data.entries; 1104 } else { 1105 multi_overflow = 1; 1106 } 1107 1108 n->mac_table.in_use = in_use; 1109 n->mac_table.first_multi = first_multi; 1110 n->mac_table.uni_overflow = uni_overflow; 1111 n->mac_table.multi_overflow = multi_overflow; 1112 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN); 1113 g_free(macs); 1114 rxfilter_notify(nc); 1115 1116 return VIRTIO_NET_OK; 1117 1118 error: 1119 g_free(macs); 1120 return VIRTIO_NET_ERR; 1121 } 1122 1123 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd, 1124 struct iovec *iov, unsigned int iov_cnt) 1125 { 1126 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1127 uint16_t vid; 1128 size_t s; 1129 NetClientState *nc = qemu_get_queue(n->nic); 1130 1131 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid)); 1132 vid = virtio_lduw_p(vdev, &vid); 1133 if (s != sizeof(vid)) { 1134 return VIRTIO_NET_ERR; 1135 } 1136 1137 if (vid >= MAX_VLAN) 1138 return VIRTIO_NET_ERR; 1139 1140 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD) 1141 n->vlans[vid >> 5] |= (1U << (vid & 0x1f)); 1142 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL) 1143 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f)); 1144 else 1145 return VIRTIO_NET_ERR; 1146 1147 rxfilter_notify(nc); 1148 1149 return VIRTIO_NET_OK; 1150 } 1151 1152 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, 1153 struct iovec *iov, unsigned int iov_cnt) 1154 { 1155 trace_virtio_net_handle_announce(n->announce_timer.round); 1156 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK && 1157 n->status & VIRTIO_NET_S_ANNOUNCE) { 1158 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 1159 if (n->announce_timer.round) { 1160 qemu_announce_timer_step(&n->announce_timer); 1161 } 1162 return VIRTIO_NET_OK; 1163 } else { 1164 return VIRTIO_NET_ERR; 1165 } 1166 } 1167 1168 static void virtio_net_detach_epbf_rss(VirtIONet *n); 1169 1170 static void virtio_net_disable_rss(VirtIONet *n) 1171 { 1172 if (n->rss_data.enabled) { 1173 trace_virtio_net_rss_disable(); 1174 } 1175 n->rss_data.enabled = false; 1176 1177 virtio_net_detach_epbf_rss(n); 1178 } 1179 1180 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd) 1181 { 1182 NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0); 1183 if (nc == NULL || nc->info->set_steering_ebpf == NULL) { 1184 return false; 1185 } 1186 1187 return nc->info->set_steering_ebpf(nc, prog_fd); 1188 } 1189 1190 static void rss_data_to_rss_config(struct VirtioNetRssData *data, 1191 struct EBPFRSSConfig *config) 1192 { 1193 config->redirect = data->redirect; 1194 config->populate_hash = data->populate_hash; 1195 config->hash_types = data->hash_types; 1196 config->indirections_len = data->indirections_len; 1197 config->default_queue = data->default_queue; 1198 } 1199 1200 static bool virtio_net_attach_epbf_rss(VirtIONet *n) 1201 { 1202 struct EBPFRSSConfig config = {}; 1203 1204 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 1205 return false; 1206 } 1207 1208 rss_data_to_rss_config(&n->rss_data, &config); 1209 1210 if (!ebpf_rss_set_all(&n->ebpf_rss, &config, 1211 n->rss_data.indirections_table, n->rss_data.key)) { 1212 return false; 1213 } 1214 1215 if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) { 1216 return false; 1217 } 1218 1219 return true; 1220 } 1221 1222 static void virtio_net_detach_epbf_rss(VirtIONet *n) 1223 { 1224 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1225 } 1226 1227 static bool virtio_net_load_ebpf(VirtIONet *n) 1228 { 1229 if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) { 1230 /* backend does't support steering ebpf */ 1231 return false; 1232 } 1233 1234 return ebpf_rss_load(&n->ebpf_rss); 1235 } 1236 1237 static void virtio_net_unload_ebpf(VirtIONet *n) 1238 { 1239 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1240 ebpf_rss_unload(&n->ebpf_rss); 1241 } 1242 1243 static uint16_t virtio_net_handle_rss(VirtIONet *n, 1244 struct iovec *iov, 1245 unsigned int iov_cnt, 1246 bool do_rss) 1247 { 1248 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1249 struct virtio_net_rss_config cfg; 1250 size_t s, offset = 0, size_get; 1251 uint16_t queue_pairs, i; 1252 struct { 1253 uint16_t us; 1254 uint8_t b; 1255 } QEMU_PACKED temp; 1256 const char *err_msg = ""; 1257 uint32_t err_value = 0; 1258 1259 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) { 1260 err_msg = "RSS is not negotiated"; 1261 goto error; 1262 } 1263 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) { 1264 err_msg = "Hash report is not negotiated"; 1265 goto error; 1266 } 1267 size_get = offsetof(struct virtio_net_rss_config, indirection_table); 1268 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get); 1269 if (s != size_get) { 1270 err_msg = "Short command buffer"; 1271 err_value = (uint32_t)s; 1272 goto error; 1273 } 1274 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types); 1275 n->rss_data.indirections_len = 1276 virtio_lduw_p(vdev, &cfg.indirection_table_mask); 1277 n->rss_data.indirections_len++; 1278 if (!do_rss) { 1279 n->rss_data.indirections_len = 1; 1280 } 1281 if (!is_power_of_2(n->rss_data.indirections_len)) { 1282 err_msg = "Invalid size of indirection table"; 1283 err_value = n->rss_data.indirections_len; 1284 goto error; 1285 } 1286 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) { 1287 err_msg = "Too large indirection table"; 1288 err_value = n->rss_data.indirections_len; 1289 goto error; 1290 } 1291 n->rss_data.default_queue = do_rss ? 1292 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0; 1293 if (n->rss_data.default_queue >= n->max_queue_pairs) { 1294 err_msg = "Invalid default queue"; 1295 err_value = n->rss_data.default_queue; 1296 goto error; 1297 } 1298 offset += size_get; 1299 size_get = sizeof(uint16_t) * n->rss_data.indirections_len; 1300 g_free(n->rss_data.indirections_table); 1301 n->rss_data.indirections_table = g_malloc(size_get); 1302 if (!n->rss_data.indirections_table) { 1303 err_msg = "Can't allocate indirections table"; 1304 err_value = n->rss_data.indirections_len; 1305 goto error; 1306 } 1307 s = iov_to_buf(iov, iov_cnt, offset, 1308 n->rss_data.indirections_table, size_get); 1309 if (s != size_get) { 1310 err_msg = "Short indirection table buffer"; 1311 err_value = (uint32_t)s; 1312 goto error; 1313 } 1314 for (i = 0; i < n->rss_data.indirections_len; ++i) { 1315 uint16_t val = n->rss_data.indirections_table[i]; 1316 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val); 1317 } 1318 offset += size_get; 1319 size_get = sizeof(temp); 1320 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get); 1321 if (s != size_get) { 1322 err_msg = "Can't get queue_pairs"; 1323 err_value = (uint32_t)s; 1324 goto error; 1325 } 1326 queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs; 1327 if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) { 1328 err_msg = "Invalid number of queue_pairs"; 1329 err_value = queue_pairs; 1330 goto error; 1331 } 1332 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) { 1333 err_msg = "Invalid key size"; 1334 err_value = temp.b; 1335 goto error; 1336 } 1337 if (!temp.b && n->rss_data.hash_types) { 1338 err_msg = "No key provided"; 1339 err_value = 0; 1340 goto error; 1341 } 1342 if (!temp.b && !n->rss_data.hash_types) { 1343 virtio_net_disable_rss(n); 1344 return queue_pairs; 1345 } 1346 offset += size_get; 1347 size_get = temp.b; 1348 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get); 1349 if (s != size_get) { 1350 err_msg = "Can get key buffer"; 1351 err_value = (uint32_t)s; 1352 goto error; 1353 } 1354 n->rss_data.enabled = true; 1355 1356 if (!n->rss_data.populate_hash) { 1357 if (!virtio_net_attach_epbf_rss(n)) { 1358 /* EBPF must be loaded for vhost */ 1359 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 1360 warn_report("Can't load eBPF RSS for vhost"); 1361 goto error; 1362 } 1363 /* fallback to software RSS */ 1364 warn_report("Can't load eBPF RSS - fallback to software RSS"); 1365 n->rss_data.enabled_software_rss = true; 1366 } 1367 } else { 1368 /* use software RSS for hash populating */ 1369 /* and detach eBPF if was loaded before */ 1370 virtio_net_detach_epbf_rss(n); 1371 n->rss_data.enabled_software_rss = true; 1372 } 1373 1374 trace_virtio_net_rss_enable(n->rss_data.hash_types, 1375 n->rss_data.indirections_len, 1376 temp.b); 1377 return queue_pairs; 1378 error: 1379 trace_virtio_net_rss_error(err_msg, err_value); 1380 virtio_net_disable_rss(n); 1381 return 0; 1382 } 1383 1384 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, 1385 struct iovec *iov, unsigned int iov_cnt) 1386 { 1387 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1388 uint16_t queue_pairs; 1389 NetClientState *nc = qemu_get_queue(n->nic); 1390 1391 virtio_net_disable_rss(n); 1392 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { 1393 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false); 1394 return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR; 1395 } 1396 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) { 1397 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true); 1398 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { 1399 struct virtio_net_ctrl_mq mq; 1400 size_t s; 1401 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) { 1402 return VIRTIO_NET_ERR; 1403 } 1404 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq)); 1405 if (s != sizeof(mq)) { 1406 return VIRTIO_NET_ERR; 1407 } 1408 queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs); 1409 1410 } else { 1411 return VIRTIO_NET_ERR; 1412 } 1413 1414 if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 1415 queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 1416 queue_pairs > n->max_queue_pairs || 1417 !n->multiqueue) { 1418 return VIRTIO_NET_ERR; 1419 } 1420 1421 n->curr_queue_pairs = queue_pairs; 1422 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 1423 /* 1424 * Avoid updating the backend for a vdpa device: We're only interested 1425 * in updating the device model queues. 1426 */ 1427 return VIRTIO_NET_OK; 1428 } 1429 /* stop the backend before changing the number of queue_pairs to avoid handling a 1430 * disabled queue */ 1431 virtio_net_set_status(vdev, vdev->status); 1432 virtio_net_set_queue_pairs(n); 1433 1434 return VIRTIO_NET_OK; 1435 } 1436 1437 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, 1438 const struct iovec *in_sg, unsigned in_num, 1439 const struct iovec *out_sg, 1440 unsigned out_num) 1441 { 1442 VirtIONet *n = VIRTIO_NET(vdev); 1443 struct virtio_net_ctrl_hdr ctrl; 1444 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1445 size_t s; 1446 struct iovec *iov, *iov2; 1447 1448 if (iov_size(in_sg, in_num) < sizeof(status) || 1449 iov_size(out_sg, out_num) < sizeof(ctrl)) { 1450 virtio_error(vdev, "virtio-net ctrl missing headers"); 1451 return 0; 1452 } 1453 1454 iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num); 1455 s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl)); 1456 iov_discard_front(&iov, &out_num, sizeof(ctrl)); 1457 if (s != sizeof(ctrl)) { 1458 status = VIRTIO_NET_ERR; 1459 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { 1460 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num); 1461 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { 1462 status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num); 1463 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { 1464 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num); 1465 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { 1466 status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num); 1467 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { 1468 status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num); 1469 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { 1470 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num); 1471 } 1472 1473 s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status)); 1474 assert(s == sizeof(status)); 1475 1476 g_free(iov2); 1477 return sizeof(status); 1478 } 1479 1480 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) 1481 { 1482 VirtQueueElement *elem; 1483 1484 for (;;) { 1485 size_t written; 1486 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 1487 if (!elem) { 1488 break; 1489 } 1490 1491 written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num, 1492 elem->out_sg, elem->out_num); 1493 if (written > 0) { 1494 virtqueue_push(vq, elem, written); 1495 virtio_notify(vdev, vq); 1496 g_free(elem); 1497 } else { 1498 virtqueue_detach_element(vq, elem, 0); 1499 g_free(elem); 1500 break; 1501 } 1502 } 1503 } 1504 1505 /* RX */ 1506 1507 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) 1508 { 1509 VirtIONet *n = VIRTIO_NET(vdev); 1510 int queue_index = vq2q(virtio_get_queue_index(vq)); 1511 1512 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index)); 1513 } 1514 1515 static bool virtio_net_can_receive(NetClientState *nc) 1516 { 1517 VirtIONet *n = qemu_get_nic_opaque(nc); 1518 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1519 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1520 1521 if (!vdev->vm_running) { 1522 return false; 1523 } 1524 1525 if (nc->queue_index >= n->curr_queue_pairs) { 1526 return false; 1527 } 1528 1529 if (!virtio_queue_ready(q->rx_vq) || 1530 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 1531 return false; 1532 } 1533 1534 return true; 1535 } 1536 1537 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) 1538 { 1539 VirtIONet *n = q->n; 1540 if (virtio_queue_empty(q->rx_vq) || 1541 (n->mergeable_rx_bufs && 1542 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1543 virtio_queue_set_notification(q->rx_vq, 1); 1544 1545 /* To avoid a race condition where the guest has made some buffers 1546 * available after the above check but before notification was 1547 * enabled, check for available buffers again. 1548 */ 1549 if (virtio_queue_empty(q->rx_vq) || 1550 (n->mergeable_rx_bufs && 1551 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1552 return 0; 1553 } 1554 } 1555 1556 virtio_queue_set_notification(q->rx_vq, 0); 1557 return 1; 1558 } 1559 1560 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr) 1561 { 1562 virtio_tswap16s(vdev, &hdr->hdr_len); 1563 virtio_tswap16s(vdev, &hdr->gso_size); 1564 virtio_tswap16s(vdev, &hdr->csum_start); 1565 virtio_tswap16s(vdev, &hdr->csum_offset); 1566 } 1567 1568 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so 1569 * it never finds out that the packets don't have valid checksums. This 1570 * causes dhclient to get upset. Fedora's carried a patch for ages to 1571 * fix this with Xen but it hasn't appeared in an upstream release of 1572 * dhclient yet. 1573 * 1574 * To avoid breaking existing guests, we catch udp packets and add 1575 * checksums. This is terrible but it's better than hacking the guest 1576 * kernels. 1577 * 1578 * N.B. if we introduce a zero-copy API, this operation is no longer free so 1579 * we should provide a mechanism to disable it to avoid polluting the host 1580 * cache. 1581 */ 1582 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr, 1583 uint8_t *buf, size_t size) 1584 { 1585 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */ 1586 (size > 27 && size < 1500) && /* normal sized MTU */ 1587 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */ 1588 (buf[23] == 17) && /* ip.protocol == UDP */ 1589 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */ 1590 net_checksum_calculate(buf, size, CSUM_UDP); 1591 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; 1592 } 1593 } 1594 1595 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt, 1596 const void *buf, size_t size) 1597 { 1598 if (n->has_vnet_hdr) { 1599 /* FIXME this cast is evil */ 1600 void *wbuf = (void *)buf; 1601 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len, 1602 size - n->host_hdr_len); 1603 1604 if (n->needs_vnet_hdr_swap) { 1605 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf); 1606 } 1607 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr)); 1608 } else { 1609 struct virtio_net_hdr hdr = { 1610 .flags = 0, 1611 .gso_type = VIRTIO_NET_HDR_GSO_NONE 1612 }; 1613 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr); 1614 } 1615 } 1616 1617 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) 1618 { 1619 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; 1620 static const uint8_t vlan[] = {0x81, 0x00}; 1621 uint8_t *ptr = (uint8_t *)buf; 1622 int i; 1623 1624 if (n->promisc) 1625 return 1; 1626 1627 ptr += n->host_hdr_len; 1628 1629 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) { 1630 int vid = lduw_be_p(ptr + 14) & 0xfff; 1631 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f)))) 1632 return 0; 1633 } 1634 1635 if (ptr[0] & 1) { // multicast 1636 if (!memcmp(ptr, bcast, sizeof(bcast))) { 1637 return !n->nobcast; 1638 } else if (n->nomulti) { 1639 return 0; 1640 } else if (n->allmulti || n->mac_table.multi_overflow) { 1641 return 1; 1642 } 1643 1644 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 1645 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1646 return 1; 1647 } 1648 } 1649 } else { // unicast 1650 if (n->nouni) { 1651 return 0; 1652 } else if (n->alluni || n->mac_table.uni_overflow) { 1653 return 1; 1654 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) { 1655 return 1; 1656 } 1657 1658 for (i = 0; i < n->mac_table.first_multi; i++) { 1659 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1660 return 1; 1661 } 1662 } 1663 } 1664 1665 return 0; 1666 } 1667 1668 static uint8_t virtio_net_get_hash_type(bool isip4, 1669 bool isip6, 1670 bool isudp, 1671 bool istcp, 1672 uint32_t types) 1673 { 1674 if (isip4) { 1675 if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) { 1676 return NetPktRssIpV4Tcp; 1677 } 1678 if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) { 1679 return NetPktRssIpV4Udp; 1680 } 1681 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 1682 return NetPktRssIpV4; 1683 } 1684 } else if (isip6) { 1685 uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | 1686 VIRTIO_NET_RSS_HASH_TYPE_TCPv6; 1687 1688 if (istcp && (types & mask)) { 1689 return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ? 1690 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp; 1691 } 1692 mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6; 1693 if (isudp && (types & mask)) { 1694 return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ? 1695 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp; 1696 } 1697 mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6; 1698 if (types & mask) { 1699 return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ? 1700 NetPktRssIpV6Ex : NetPktRssIpV6; 1701 } 1702 } 1703 return 0xff; 1704 } 1705 1706 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report, 1707 uint32_t hash) 1708 { 1709 struct virtio_net_hdr_v1_hash *hdr = (void *)buf; 1710 hdr->hash_value = hash; 1711 hdr->hash_report = report; 1712 } 1713 1714 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf, 1715 size_t size) 1716 { 1717 VirtIONet *n = qemu_get_nic_opaque(nc); 1718 unsigned int index = nc->queue_index, new_index = index; 1719 struct NetRxPkt *pkt = n->rx_pkt; 1720 uint8_t net_hash_type; 1721 uint32_t hash; 1722 bool isip4, isip6, isudp, istcp; 1723 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = { 1724 VIRTIO_NET_HASH_REPORT_IPv4, 1725 VIRTIO_NET_HASH_REPORT_TCPv4, 1726 VIRTIO_NET_HASH_REPORT_TCPv6, 1727 VIRTIO_NET_HASH_REPORT_IPv6, 1728 VIRTIO_NET_HASH_REPORT_IPv6_EX, 1729 VIRTIO_NET_HASH_REPORT_TCPv6_EX, 1730 VIRTIO_NET_HASH_REPORT_UDPv4, 1731 VIRTIO_NET_HASH_REPORT_UDPv6, 1732 VIRTIO_NET_HASH_REPORT_UDPv6_EX 1733 }; 1734 1735 net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len, 1736 size - n->host_hdr_len); 1737 net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); 1738 if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) { 1739 istcp = isudp = false; 1740 } 1741 if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) { 1742 istcp = isudp = false; 1743 } 1744 net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp, 1745 n->rss_data.hash_types); 1746 if (net_hash_type > NetPktRssIpV6UdpEx) { 1747 if (n->rss_data.populate_hash) { 1748 virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0); 1749 } 1750 return n->rss_data.redirect ? n->rss_data.default_queue : -1; 1751 } 1752 1753 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key); 1754 1755 if (n->rss_data.populate_hash) { 1756 virtio_set_packet_hash(buf, reports[net_hash_type], hash); 1757 } 1758 1759 if (n->rss_data.redirect) { 1760 new_index = hash & (n->rss_data.indirections_len - 1); 1761 new_index = n->rss_data.indirections_table[new_index]; 1762 } 1763 1764 return (index == new_index) ? -1 : new_index; 1765 } 1766 1767 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, 1768 size_t size, bool no_rss) 1769 { 1770 VirtIONet *n = qemu_get_nic_opaque(nc); 1771 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1772 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1773 VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE]; 1774 size_t lens[VIRTQUEUE_MAX_SIZE]; 1775 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; 1776 struct virtio_net_hdr_mrg_rxbuf mhdr; 1777 unsigned mhdr_cnt = 0; 1778 size_t offset, i, guest_offset, j; 1779 ssize_t err; 1780 1781 if (!virtio_net_can_receive(nc)) { 1782 return -1; 1783 } 1784 1785 if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) { 1786 int index = virtio_net_process_rss(nc, buf, size); 1787 if (index >= 0) { 1788 NetClientState *nc2 = qemu_get_subqueue(n->nic, index); 1789 return virtio_net_receive_rcu(nc2, buf, size, true); 1790 } 1791 } 1792 1793 /* hdr_len refers to the header we supply to the guest */ 1794 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) { 1795 return 0; 1796 } 1797 1798 if (!receive_filter(n, buf, size)) 1799 return size; 1800 1801 offset = i = 0; 1802 1803 while (offset < size) { 1804 VirtQueueElement *elem; 1805 int len, total; 1806 const struct iovec *sg; 1807 1808 total = 0; 1809 1810 if (i == VIRTQUEUE_MAX_SIZE) { 1811 virtio_error(vdev, "virtio-net unexpected long buffer chain"); 1812 err = size; 1813 goto err; 1814 } 1815 1816 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement)); 1817 if (!elem) { 1818 if (i) { 1819 virtio_error(vdev, "virtio-net unexpected empty queue: " 1820 "i %zd mergeable %d offset %zd, size %zd, " 1821 "guest hdr len %zd, host hdr len %zd " 1822 "guest features 0x%" PRIx64, 1823 i, n->mergeable_rx_bufs, offset, size, 1824 n->guest_hdr_len, n->host_hdr_len, 1825 vdev->guest_features); 1826 } 1827 err = -1; 1828 goto err; 1829 } 1830 1831 if (elem->in_num < 1) { 1832 virtio_error(vdev, 1833 "virtio-net receive queue contains no in buffers"); 1834 virtqueue_detach_element(q->rx_vq, elem, 0); 1835 g_free(elem); 1836 err = -1; 1837 goto err; 1838 } 1839 1840 sg = elem->in_sg; 1841 if (i == 0) { 1842 assert(offset == 0); 1843 if (n->mergeable_rx_bufs) { 1844 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg), 1845 sg, elem->in_num, 1846 offsetof(typeof(mhdr), num_buffers), 1847 sizeof(mhdr.num_buffers)); 1848 } 1849 1850 receive_header(n, sg, elem->in_num, buf, size); 1851 if (n->rss_data.populate_hash) { 1852 offset = sizeof(mhdr); 1853 iov_from_buf(sg, elem->in_num, offset, 1854 buf + offset, n->host_hdr_len - sizeof(mhdr)); 1855 } 1856 offset = n->host_hdr_len; 1857 total += n->guest_hdr_len; 1858 guest_offset = n->guest_hdr_len; 1859 } else { 1860 guest_offset = 0; 1861 } 1862 1863 /* copy in packet. ugh */ 1864 len = iov_from_buf(sg, elem->in_num, guest_offset, 1865 buf + offset, size - offset); 1866 total += len; 1867 offset += len; 1868 /* If buffers can't be merged, at this point we 1869 * must have consumed the complete packet. 1870 * Otherwise, drop it. */ 1871 if (!n->mergeable_rx_bufs && offset < size) { 1872 virtqueue_unpop(q->rx_vq, elem, total); 1873 g_free(elem); 1874 err = size; 1875 goto err; 1876 } 1877 1878 elems[i] = elem; 1879 lens[i] = total; 1880 i++; 1881 } 1882 1883 if (mhdr_cnt) { 1884 virtio_stw_p(vdev, &mhdr.num_buffers, i); 1885 iov_from_buf(mhdr_sg, mhdr_cnt, 1886 0, 1887 &mhdr.num_buffers, sizeof mhdr.num_buffers); 1888 } 1889 1890 for (j = 0; j < i; j++) { 1891 /* signal other side */ 1892 virtqueue_fill(q->rx_vq, elems[j], lens[j], j); 1893 g_free(elems[j]); 1894 } 1895 1896 virtqueue_flush(q->rx_vq, i); 1897 virtio_notify(vdev, q->rx_vq); 1898 1899 return size; 1900 1901 err: 1902 for (j = 0; j < i; j++) { 1903 virtqueue_detach_element(q->rx_vq, elems[j], lens[j]); 1904 g_free(elems[j]); 1905 } 1906 1907 return err; 1908 } 1909 1910 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf, 1911 size_t size) 1912 { 1913 RCU_READ_LOCK_GUARD(); 1914 1915 return virtio_net_receive_rcu(nc, buf, size, false); 1916 } 1917 1918 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain, 1919 const uint8_t *buf, 1920 VirtioNetRscUnit *unit) 1921 { 1922 uint16_t ip_hdrlen; 1923 struct ip_header *ip; 1924 1925 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len 1926 + sizeof(struct eth_header)); 1927 unit->ip = (void *)ip; 1928 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2; 1929 unit->ip_plen = &ip->ip_len; 1930 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen); 1931 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 1932 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen; 1933 } 1934 1935 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain, 1936 const uint8_t *buf, 1937 VirtioNetRscUnit *unit) 1938 { 1939 struct ip6_header *ip6; 1940 1941 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len 1942 + sizeof(struct eth_header)); 1943 unit->ip = ip6; 1944 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 1945 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) 1946 + sizeof(struct ip6_header)); 1947 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 1948 1949 /* There is a difference between payload lenght in ipv4 and v6, 1950 ip header is excluded in ipv6 */ 1951 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen; 1952 } 1953 1954 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain, 1955 VirtioNetRscSeg *seg) 1956 { 1957 int ret; 1958 struct virtio_net_hdr_v1 *h; 1959 1960 h = (struct virtio_net_hdr_v1 *)seg->buf; 1961 h->flags = 0; 1962 h->gso_type = VIRTIO_NET_HDR_GSO_NONE; 1963 1964 if (seg->is_coalesced) { 1965 h->rsc.segments = seg->packets; 1966 h->rsc.dup_acks = seg->dup_ack; 1967 h->flags = VIRTIO_NET_HDR_F_RSC_INFO; 1968 if (chain->proto == ETH_P_IP) { 1969 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 1970 } else { 1971 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 1972 } 1973 } 1974 1975 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); 1976 QTAILQ_REMOVE(&chain->buffers, seg, next); 1977 g_free(seg->buf); 1978 g_free(seg); 1979 1980 return ret; 1981 } 1982 1983 static void virtio_net_rsc_purge(void *opq) 1984 { 1985 VirtioNetRscSeg *seg, *rn; 1986 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq; 1987 1988 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) { 1989 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 1990 chain->stat.purge_failed++; 1991 continue; 1992 } 1993 } 1994 1995 chain->stat.timer++; 1996 if (!QTAILQ_EMPTY(&chain->buffers)) { 1997 timer_mod(chain->drain_timer, 1998 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 1999 } 2000 } 2001 2002 static void virtio_net_rsc_cleanup(VirtIONet *n) 2003 { 2004 VirtioNetRscChain *chain, *rn_chain; 2005 VirtioNetRscSeg *seg, *rn_seg; 2006 2007 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) { 2008 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) { 2009 QTAILQ_REMOVE(&chain->buffers, seg, next); 2010 g_free(seg->buf); 2011 g_free(seg); 2012 } 2013 2014 timer_free(chain->drain_timer); 2015 QTAILQ_REMOVE(&n->rsc_chains, chain, next); 2016 g_free(chain); 2017 } 2018 } 2019 2020 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain, 2021 NetClientState *nc, 2022 const uint8_t *buf, size_t size) 2023 { 2024 uint16_t hdr_len; 2025 VirtioNetRscSeg *seg; 2026 2027 hdr_len = chain->n->guest_hdr_len; 2028 seg = g_new(VirtioNetRscSeg, 1); 2029 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header) 2030 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD); 2031 memcpy(seg->buf, buf, size); 2032 seg->size = size; 2033 seg->packets = 1; 2034 seg->dup_ack = 0; 2035 seg->is_coalesced = 0; 2036 seg->nc = nc; 2037 2038 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); 2039 chain->stat.cache++; 2040 2041 switch (chain->proto) { 2042 case ETH_P_IP: 2043 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); 2044 break; 2045 case ETH_P_IPV6: 2046 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); 2047 break; 2048 default: 2049 g_assert_not_reached(); 2050 } 2051 } 2052 2053 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain, 2054 VirtioNetRscSeg *seg, 2055 const uint8_t *buf, 2056 struct tcp_header *n_tcp, 2057 struct tcp_header *o_tcp) 2058 { 2059 uint32_t nack, oack; 2060 uint16_t nwin, owin; 2061 2062 nack = htonl(n_tcp->th_ack); 2063 nwin = htons(n_tcp->th_win); 2064 oack = htonl(o_tcp->th_ack); 2065 owin = htons(o_tcp->th_win); 2066 2067 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) { 2068 chain->stat.ack_out_of_win++; 2069 return RSC_FINAL; 2070 } else if (nack == oack) { 2071 /* duplicated ack or window probe */ 2072 if (nwin == owin) { 2073 /* duplicated ack, add dup ack count due to whql test up to 1 */ 2074 chain->stat.dup_ack++; 2075 return RSC_FINAL; 2076 } else { 2077 /* Coalesce window update */ 2078 o_tcp->th_win = n_tcp->th_win; 2079 chain->stat.win_update++; 2080 return RSC_COALESCE; 2081 } 2082 } else { 2083 /* pure ack, go to 'C', finalize*/ 2084 chain->stat.pure_ack++; 2085 return RSC_FINAL; 2086 } 2087 } 2088 2089 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain, 2090 VirtioNetRscSeg *seg, 2091 const uint8_t *buf, 2092 VirtioNetRscUnit *n_unit) 2093 { 2094 void *data; 2095 uint16_t o_ip_len; 2096 uint32_t nseq, oseq; 2097 VirtioNetRscUnit *o_unit; 2098 2099 o_unit = &seg->unit; 2100 o_ip_len = htons(*o_unit->ip_plen); 2101 nseq = htonl(n_unit->tcp->th_seq); 2102 oseq = htonl(o_unit->tcp->th_seq); 2103 2104 /* out of order or retransmitted. */ 2105 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) { 2106 chain->stat.data_out_of_win++; 2107 return RSC_FINAL; 2108 } 2109 2110 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen; 2111 if (nseq == oseq) { 2112 if ((o_unit->payload == 0) && n_unit->payload) { 2113 /* From no payload to payload, normal case, not a dup ack or etc */ 2114 chain->stat.data_after_pure_ack++; 2115 goto coalesce; 2116 } else { 2117 return virtio_net_rsc_handle_ack(chain, seg, buf, 2118 n_unit->tcp, o_unit->tcp); 2119 } 2120 } else if ((nseq - oseq) != o_unit->payload) { 2121 /* Not a consistent packet, out of order */ 2122 chain->stat.data_out_of_order++; 2123 return RSC_FINAL; 2124 } else { 2125 coalesce: 2126 if ((o_ip_len + n_unit->payload) > chain->max_payload) { 2127 chain->stat.over_size++; 2128 return RSC_FINAL; 2129 } 2130 2131 /* Here comes the right data, the payload length in v4/v6 is different, 2132 so use the field value to update and record the new data len */ 2133 o_unit->payload += n_unit->payload; /* update new data len */ 2134 2135 /* update field in ip header */ 2136 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload); 2137 2138 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced 2139 for windows guest, while this may change the behavior for linux 2140 guest (only if it uses RSC feature). */ 2141 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags; 2142 2143 o_unit->tcp->th_ack = n_unit->tcp->th_ack; 2144 o_unit->tcp->th_win = n_unit->tcp->th_win; 2145 2146 memmove(seg->buf + seg->size, data, n_unit->payload); 2147 seg->size += n_unit->payload; 2148 seg->packets++; 2149 chain->stat.coalesced++; 2150 return RSC_COALESCE; 2151 } 2152 } 2153 2154 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain, 2155 VirtioNetRscSeg *seg, 2156 const uint8_t *buf, size_t size, 2157 VirtioNetRscUnit *unit) 2158 { 2159 struct ip_header *ip1, *ip2; 2160 2161 ip1 = (struct ip_header *)(unit->ip); 2162 ip2 = (struct ip_header *)(seg->unit.ip); 2163 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst) 2164 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2165 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2166 chain->stat.no_match++; 2167 return RSC_NO_MATCH; 2168 } 2169 2170 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2171 } 2172 2173 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain, 2174 VirtioNetRscSeg *seg, 2175 const uint8_t *buf, size_t size, 2176 VirtioNetRscUnit *unit) 2177 { 2178 struct ip6_header *ip1, *ip2; 2179 2180 ip1 = (struct ip6_header *)(unit->ip); 2181 ip2 = (struct ip6_header *)(seg->unit.ip); 2182 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address)) 2183 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address)) 2184 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2185 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2186 chain->stat.no_match++; 2187 return RSC_NO_MATCH; 2188 } 2189 2190 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2191 } 2192 2193 /* Packets with 'SYN' should bypass, other flag should be sent after drain 2194 * to prevent out of order */ 2195 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain, 2196 struct tcp_header *tcp) 2197 { 2198 uint16_t tcp_hdr; 2199 uint16_t tcp_flag; 2200 2201 tcp_flag = htons(tcp->th_offset_flags); 2202 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10; 2203 tcp_flag &= VIRTIO_NET_TCP_FLAG; 2204 if (tcp_flag & TH_SYN) { 2205 chain->stat.tcp_syn++; 2206 return RSC_BYPASS; 2207 } 2208 2209 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) { 2210 chain->stat.tcp_ctrl_drain++; 2211 return RSC_FINAL; 2212 } 2213 2214 if (tcp_hdr > sizeof(struct tcp_header)) { 2215 chain->stat.tcp_all_opt++; 2216 return RSC_FINAL; 2217 } 2218 2219 return RSC_CANDIDATE; 2220 } 2221 2222 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain, 2223 NetClientState *nc, 2224 const uint8_t *buf, size_t size, 2225 VirtioNetRscUnit *unit) 2226 { 2227 int ret; 2228 VirtioNetRscSeg *seg, *nseg; 2229 2230 if (QTAILQ_EMPTY(&chain->buffers)) { 2231 chain->stat.empty_cache++; 2232 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2233 timer_mod(chain->drain_timer, 2234 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 2235 return size; 2236 } 2237 2238 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2239 if (chain->proto == ETH_P_IP) { 2240 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit); 2241 } else { 2242 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit); 2243 } 2244 2245 if (ret == RSC_FINAL) { 2246 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2247 /* Send failed */ 2248 chain->stat.final_failed++; 2249 return 0; 2250 } 2251 2252 /* Send current packet */ 2253 return virtio_net_do_receive(nc, buf, size); 2254 } else if (ret == RSC_NO_MATCH) { 2255 continue; 2256 } else { 2257 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ 2258 seg->is_coalesced = 1; 2259 return size; 2260 } 2261 } 2262 2263 chain->stat.no_match_cache++; 2264 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2265 return size; 2266 } 2267 2268 /* Drain a connection data, this is to avoid out of order segments */ 2269 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain, 2270 NetClientState *nc, 2271 const uint8_t *buf, size_t size, 2272 uint16_t ip_start, uint16_t ip_size, 2273 uint16_t tcp_port) 2274 { 2275 VirtioNetRscSeg *seg, *nseg; 2276 uint32_t ppair1, ppair2; 2277 2278 ppair1 = *(uint32_t *)(buf + tcp_port); 2279 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2280 ppair2 = *(uint32_t *)(seg->buf + tcp_port); 2281 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size) 2282 || (ppair1 != ppair2)) { 2283 continue; 2284 } 2285 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2286 chain->stat.drain_failed++; 2287 } 2288 2289 break; 2290 } 2291 2292 return virtio_net_do_receive(nc, buf, size); 2293 } 2294 2295 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain, 2296 struct ip_header *ip, 2297 const uint8_t *buf, size_t size) 2298 { 2299 uint16_t ip_len; 2300 2301 /* Not an ipv4 packet */ 2302 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) { 2303 chain->stat.ip_option++; 2304 return RSC_BYPASS; 2305 } 2306 2307 /* Don't handle packets with ip option */ 2308 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) { 2309 chain->stat.ip_option++; 2310 return RSC_BYPASS; 2311 } 2312 2313 if (ip->ip_p != IPPROTO_TCP) { 2314 chain->stat.bypass_not_tcp++; 2315 return RSC_BYPASS; 2316 } 2317 2318 /* Don't handle packets with ip fragment */ 2319 if (!(htons(ip->ip_off) & IP_DF)) { 2320 chain->stat.ip_frag++; 2321 return RSC_BYPASS; 2322 } 2323 2324 /* Don't handle packets with ecn flag */ 2325 if (IPTOS_ECN(ip->ip_tos)) { 2326 chain->stat.ip_ecn++; 2327 return RSC_BYPASS; 2328 } 2329 2330 ip_len = htons(ip->ip_len); 2331 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header)) 2332 || ip_len > (size - chain->n->guest_hdr_len - 2333 sizeof(struct eth_header))) { 2334 chain->stat.ip_hacked++; 2335 return RSC_BYPASS; 2336 } 2337 2338 return RSC_CANDIDATE; 2339 } 2340 2341 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain, 2342 NetClientState *nc, 2343 const uint8_t *buf, size_t size) 2344 { 2345 int32_t ret; 2346 uint16_t hdr_len; 2347 VirtioNetRscUnit unit; 2348 2349 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2350 2351 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header) 2352 + sizeof(struct tcp_header))) { 2353 chain->stat.bypass_not_tcp++; 2354 return virtio_net_do_receive(nc, buf, size); 2355 } 2356 2357 virtio_net_rsc_extract_unit4(chain, buf, &unit); 2358 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size) 2359 != RSC_CANDIDATE) { 2360 return virtio_net_do_receive(nc, buf, size); 2361 } 2362 2363 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2364 if (ret == RSC_BYPASS) { 2365 return virtio_net_do_receive(nc, buf, size); 2366 } else if (ret == RSC_FINAL) { 2367 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2368 ((hdr_len + sizeof(struct eth_header)) + 12), 2369 VIRTIO_NET_IP4_ADDR_SIZE, 2370 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)); 2371 } 2372 2373 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2374 } 2375 2376 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain, 2377 struct ip6_header *ip6, 2378 const uint8_t *buf, size_t size) 2379 { 2380 uint16_t ip_len; 2381 2382 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4) 2383 != IP_HEADER_VERSION_6) { 2384 return RSC_BYPASS; 2385 } 2386 2387 /* Both option and protocol is checked in this */ 2388 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) { 2389 chain->stat.bypass_not_tcp++; 2390 return RSC_BYPASS; 2391 } 2392 2393 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 2394 if (ip_len < sizeof(struct tcp_header) || 2395 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header) 2396 - sizeof(struct ip6_header))) { 2397 chain->stat.ip_hacked++; 2398 return RSC_BYPASS; 2399 } 2400 2401 /* Don't handle packets with ecn flag */ 2402 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) { 2403 chain->stat.ip_ecn++; 2404 return RSC_BYPASS; 2405 } 2406 2407 return RSC_CANDIDATE; 2408 } 2409 2410 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc, 2411 const uint8_t *buf, size_t size) 2412 { 2413 int32_t ret; 2414 uint16_t hdr_len; 2415 VirtioNetRscChain *chain; 2416 VirtioNetRscUnit unit; 2417 2418 chain = (VirtioNetRscChain *)opq; 2419 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2420 2421 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header) 2422 + sizeof(tcp_header))) { 2423 return virtio_net_do_receive(nc, buf, size); 2424 } 2425 2426 virtio_net_rsc_extract_unit6(chain, buf, &unit); 2427 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain, 2428 unit.ip, buf, size)) { 2429 return virtio_net_do_receive(nc, buf, size); 2430 } 2431 2432 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2433 if (ret == RSC_BYPASS) { 2434 return virtio_net_do_receive(nc, buf, size); 2435 } else if (ret == RSC_FINAL) { 2436 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2437 ((hdr_len + sizeof(struct eth_header)) + 8), 2438 VIRTIO_NET_IP6_ADDR_SIZE, 2439 hdr_len + sizeof(struct eth_header) 2440 + sizeof(struct ip6_header)); 2441 } 2442 2443 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2444 } 2445 2446 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n, 2447 NetClientState *nc, 2448 uint16_t proto) 2449 { 2450 VirtioNetRscChain *chain; 2451 2452 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) { 2453 return NULL; 2454 } 2455 2456 QTAILQ_FOREACH(chain, &n->rsc_chains, next) { 2457 if (chain->proto == proto) { 2458 return chain; 2459 } 2460 } 2461 2462 chain = g_malloc(sizeof(*chain)); 2463 chain->n = n; 2464 chain->proto = proto; 2465 if (proto == (uint16_t)ETH_P_IP) { 2466 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD; 2467 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2468 } else { 2469 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD; 2470 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2471 } 2472 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST, 2473 virtio_net_rsc_purge, chain); 2474 memset(&chain->stat, 0, sizeof(chain->stat)); 2475 2476 QTAILQ_INIT(&chain->buffers); 2477 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); 2478 2479 return chain; 2480 } 2481 2482 static ssize_t virtio_net_rsc_receive(NetClientState *nc, 2483 const uint8_t *buf, 2484 size_t size) 2485 { 2486 uint16_t proto; 2487 VirtioNetRscChain *chain; 2488 struct eth_header *eth; 2489 VirtIONet *n; 2490 2491 n = qemu_get_nic_opaque(nc); 2492 if (size < (n->host_hdr_len + sizeof(struct eth_header))) { 2493 return virtio_net_do_receive(nc, buf, size); 2494 } 2495 2496 eth = (struct eth_header *)(buf + n->guest_hdr_len); 2497 proto = htons(eth->h_proto); 2498 2499 chain = virtio_net_rsc_lookup_chain(n, nc, proto); 2500 if (chain) { 2501 chain->stat.received++; 2502 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) { 2503 return virtio_net_rsc_receive4(chain, nc, buf, size); 2504 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) { 2505 return virtio_net_rsc_receive6(chain, nc, buf, size); 2506 } 2507 } 2508 return virtio_net_do_receive(nc, buf, size); 2509 } 2510 2511 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, 2512 size_t size) 2513 { 2514 VirtIONet *n = qemu_get_nic_opaque(nc); 2515 if ((n->rsc4_enabled || n->rsc6_enabled)) { 2516 return virtio_net_rsc_receive(nc, buf, size); 2517 } else { 2518 return virtio_net_do_receive(nc, buf, size); 2519 } 2520 } 2521 2522 static int32_t virtio_net_flush_tx(VirtIONetQueue *q); 2523 2524 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) 2525 { 2526 VirtIONet *n = qemu_get_nic_opaque(nc); 2527 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 2528 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2529 int ret; 2530 2531 virtqueue_push(q->tx_vq, q->async_tx.elem, 0); 2532 virtio_notify(vdev, q->tx_vq); 2533 2534 g_free(q->async_tx.elem); 2535 q->async_tx.elem = NULL; 2536 2537 virtio_queue_set_notification(q->tx_vq, 1); 2538 ret = virtio_net_flush_tx(q); 2539 if (ret >= n->tx_burst) { 2540 /* 2541 * the flush has been stopped by tx_burst 2542 * we will not receive notification for the 2543 * remainining part, so re-schedule 2544 */ 2545 virtio_queue_set_notification(q->tx_vq, 0); 2546 if (q->tx_bh) { 2547 qemu_bh_schedule(q->tx_bh); 2548 } else { 2549 timer_mod(q->tx_timer, 2550 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2551 } 2552 q->tx_waiting = 1; 2553 } 2554 } 2555 2556 /* TX */ 2557 static int32_t virtio_net_flush_tx(VirtIONetQueue *q) 2558 { 2559 VirtIONet *n = q->n; 2560 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2561 VirtQueueElement *elem; 2562 int32_t num_packets = 0; 2563 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq)); 2564 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2565 return num_packets; 2566 } 2567 2568 if (q->async_tx.elem) { 2569 virtio_queue_set_notification(q->tx_vq, 0); 2570 return num_packets; 2571 } 2572 2573 for (;;) { 2574 ssize_t ret; 2575 unsigned int out_num; 2576 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg; 2577 struct virtio_net_hdr_mrg_rxbuf mhdr; 2578 2579 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement)); 2580 if (!elem) { 2581 break; 2582 } 2583 2584 out_num = elem->out_num; 2585 out_sg = elem->out_sg; 2586 if (out_num < 1) { 2587 virtio_error(vdev, "virtio-net header not in first element"); 2588 virtqueue_detach_element(q->tx_vq, elem, 0); 2589 g_free(elem); 2590 return -EINVAL; 2591 } 2592 2593 if (n->has_vnet_hdr) { 2594 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) < 2595 n->guest_hdr_len) { 2596 virtio_error(vdev, "virtio-net header incorrect"); 2597 virtqueue_detach_element(q->tx_vq, elem, 0); 2598 g_free(elem); 2599 return -EINVAL; 2600 } 2601 if (n->needs_vnet_hdr_swap) { 2602 virtio_net_hdr_swap(vdev, (void *) &mhdr); 2603 sg2[0].iov_base = &mhdr; 2604 sg2[0].iov_len = n->guest_hdr_len; 2605 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, 2606 out_sg, out_num, 2607 n->guest_hdr_len, -1); 2608 if (out_num == VIRTQUEUE_MAX_SIZE) { 2609 goto drop; 2610 } 2611 out_num += 1; 2612 out_sg = sg2; 2613 } 2614 } 2615 /* 2616 * If host wants to see the guest header as is, we can 2617 * pass it on unchanged. Otherwise, copy just the parts 2618 * that host is interested in. 2619 */ 2620 assert(n->host_hdr_len <= n->guest_hdr_len); 2621 if (n->host_hdr_len != n->guest_hdr_len) { 2622 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg), 2623 out_sg, out_num, 2624 0, n->host_hdr_len); 2625 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num, 2626 out_sg, out_num, 2627 n->guest_hdr_len, -1); 2628 out_num = sg_num; 2629 out_sg = sg; 2630 } 2631 2632 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index), 2633 out_sg, out_num, virtio_net_tx_complete); 2634 if (ret == 0) { 2635 virtio_queue_set_notification(q->tx_vq, 0); 2636 q->async_tx.elem = elem; 2637 return -EBUSY; 2638 } 2639 2640 drop: 2641 virtqueue_push(q->tx_vq, elem, 0); 2642 virtio_notify(vdev, q->tx_vq); 2643 g_free(elem); 2644 2645 if (++num_packets >= n->tx_burst) { 2646 break; 2647 } 2648 } 2649 return num_packets; 2650 } 2651 2652 static void virtio_net_tx_timer(void *opaque); 2653 2654 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) 2655 { 2656 VirtIONet *n = VIRTIO_NET(vdev); 2657 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2658 2659 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2660 virtio_net_drop_tx_queue_data(vdev, vq); 2661 return; 2662 } 2663 2664 /* This happens when device was stopped but VCPU wasn't. */ 2665 if (!vdev->vm_running) { 2666 q->tx_waiting = 1; 2667 return; 2668 } 2669 2670 if (q->tx_waiting) { 2671 /* We already have queued packets, immediately flush */ 2672 timer_del(q->tx_timer); 2673 virtio_net_tx_timer(q); 2674 } else { 2675 /* re-arm timer to flush it (and more) on next tick */ 2676 timer_mod(q->tx_timer, 2677 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2678 q->tx_waiting = 1; 2679 virtio_queue_set_notification(vq, 0); 2680 } 2681 } 2682 2683 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) 2684 { 2685 VirtIONet *n = VIRTIO_NET(vdev); 2686 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2687 2688 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2689 virtio_net_drop_tx_queue_data(vdev, vq); 2690 return; 2691 } 2692 2693 if (unlikely(q->tx_waiting)) { 2694 return; 2695 } 2696 q->tx_waiting = 1; 2697 /* This happens when device was stopped but VCPU wasn't. */ 2698 if (!vdev->vm_running) { 2699 return; 2700 } 2701 virtio_queue_set_notification(vq, 0); 2702 qemu_bh_schedule(q->tx_bh); 2703 } 2704 2705 static void virtio_net_tx_timer(void *opaque) 2706 { 2707 VirtIONetQueue *q = opaque; 2708 VirtIONet *n = q->n; 2709 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2710 int ret; 2711 2712 /* This happens when device was stopped but BH wasn't. */ 2713 if (!vdev->vm_running) { 2714 /* Make sure tx waiting is set, so we'll run when restarted. */ 2715 assert(q->tx_waiting); 2716 return; 2717 } 2718 2719 q->tx_waiting = 0; 2720 2721 /* Just in case the driver is not ready on more */ 2722 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2723 return; 2724 } 2725 2726 ret = virtio_net_flush_tx(q); 2727 if (ret == -EBUSY || ret == -EINVAL) { 2728 return; 2729 } 2730 /* 2731 * If we flush a full burst of packets, assume there are 2732 * more coming and immediately rearm 2733 */ 2734 if (ret >= n->tx_burst) { 2735 q->tx_waiting = 1; 2736 timer_mod(q->tx_timer, 2737 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2738 return; 2739 } 2740 /* 2741 * If less than a full burst, re-enable notification and flush 2742 * anything that may have come in while we weren't looking. If 2743 * we find something, assume the guest is still active and rearm 2744 */ 2745 virtio_queue_set_notification(q->tx_vq, 1); 2746 ret = virtio_net_flush_tx(q); 2747 if (ret > 0) { 2748 virtio_queue_set_notification(q->tx_vq, 0); 2749 q->tx_waiting = 1; 2750 timer_mod(q->tx_timer, 2751 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2752 } 2753 } 2754 2755 static void virtio_net_tx_bh(void *opaque) 2756 { 2757 VirtIONetQueue *q = opaque; 2758 VirtIONet *n = q->n; 2759 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2760 int32_t ret; 2761 2762 /* This happens when device was stopped but BH wasn't. */ 2763 if (!vdev->vm_running) { 2764 /* Make sure tx waiting is set, so we'll run when restarted. */ 2765 assert(q->tx_waiting); 2766 return; 2767 } 2768 2769 q->tx_waiting = 0; 2770 2771 /* Just in case the driver is not ready on more */ 2772 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) { 2773 return; 2774 } 2775 2776 ret = virtio_net_flush_tx(q); 2777 if (ret == -EBUSY || ret == -EINVAL) { 2778 return; /* Notification re-enable handled by tx_complete or device 2779 * broken */ 2780 } 2781 2782 /* If we flush a full burst of packets, assume there are 2783 * more coming and immediately reschedule */ 2784 if (ret >= n->tx_burst) { 2785 qemu_bh_schedule(q->tx_bh); 2786 q->tx_waiting = 1; 2787 return; 2788 } 2789 2790 /* If less than a full burst, re-enable notification and flush 2791 * anything that may have come in while we weren't looking. If 2792 * we find something, assume the guest is still active and reschedule */ 2793 virtio_queue_set_notification(q->tx_vq, 1); 2794 ret = virtio_net_flush_tx(q); 2795 if (ret == -EINVAL) { 2796 return; 2797 } else if (ret > 0) { 2798 virtio_queue_set_notification(q->tx_vq, 0); 2799 qemu_bh_schedule(q->tx_bh); 2800 q->tx_waiting = 1; 2801 } 2802 } 2803 2804 static void virtio_net_add_queue(VirtIONet *n, int index) 2805 { 2806 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2807 2808 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size, 2809 virtio_net_handle_rx); 2810 2811 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { 2812 n->vqs[index].tx_vq = 2813 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2814 virtio_net_handle_tx_timer); 2815 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 2816 virtio_net_tx_timer, 2817 &n->vqs[index]); 2818 } else { 2819 n->vqs[index].tx_vq = 2820 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2821 virtio_net_handle_tx_bh); 2822 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]); 2823 } 2824 2825 n->vqs[index].tx_waiting = 0; 2826 n->vqs[index].n = n; 2827 } 2828 2829 static void virtio_net_del_queue(VirtIONet *n, int index) 2830 { 2831 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2832 VirtIONetQueue *q = &n->vqs[index]; 2833 NetClientState *nc = qemu_get_subqueue(n->nic, index); 2834 2835 qemu_purge_queued_packets(nc); 2836 2837 virtio_del_queue(vdev, index * 2); 2838 if (q->tx_timer) { 2839 timer_free(q->tx_timer); 2840 q->tx_timer = NULL; 2841 } else { 2842 qemu_bh_delete(q->tx_bh); 2843 q->tx_bh = NULL; 2844 } 2845 q->tx_waiting = 0; 2846 virtio_del_queue(vdev, index * 2 + 1); 2847 } 2848 2849 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs) 2850 { 2851 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2852 int old_num_queues = virtio_get_num_queues(vdev); 2853 int new_num_queues = new_max_queue_pairs * 2 + 1; 2854 int i; 2855 2856 assert(old_num_queues >= 3); 2857 assert(old_num_queues % 2 == 1); 2858 2859 if (old_num_queues == new_num_queues) { 2860 return; 2861 } 2862 2863 /* 2864 * We always need to remove and add ctrl vq if 2865 * old_num_queues != new_num_queues. Remove ctrl_vq first, 2866 * and then we only enter one of the following two loops. 2867 */ 2868 virtio_del_queue(vdev, old_num_queues - 1); 2869 2870 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) { 2871 /* new_num_queues < old_num_queues */ 2872 virtio_net_del_queue(n, i / 2); 2873 } 2874 2875 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) { 2876 /* new_num_queues > old_num_queues */ 2877 virtio_net_add_queue(n, i / 2); 2878 } 2879 2880 /* add ctrl_vq last */ 2881 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 2882 } 2883 2884 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) 2885 { 2886 int max = multiqueue ? n->max_queue_pairs : 1; 2887 2888 n->multiqueue = multiqueue; 2889 virtio_net_change_num_queue_pairs(n, max); 2890 2891 virtio_net_set_queue_pairs(n); 2892 } 2893 2894 static int virtio_net_post_load_device(void *opaque, int version_id) 2895 { 2896 VirtIONet *n = opaque; 2897 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2898 int i, link_down; 2899 2900 trace_virtio_net_post_load_device(); 2901 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs, 2902 virtio_vdev_has_feature(vdev, 2903 VIRTIO_F_VERSION_1), 2904 virtio_vdev_has_feature(vdev, 2905 VIRTIO_NET_F_HASH_REPORT)); 2906 2907 /* MAC_TABLE_ENTRIES may be different from the saved image */ 2908 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) { 2909 n->mac_table.in_use = 0; 2910 } 2911 2912 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 2913 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n); 2914 } 2915 2916 /* 2917 * curr_guest_offloads will be later overwritten by the 2918 * virtio_set_features_nocheck call done from the virtio_load. 2919 * Here we make sure it is preserved and restored accordingly 2920 * in the virtio_net_post_load_virtio callback. 2921 */ 2922 n->saved_guest_offloads = n->curr_guest_offloads; 2923 2924 virtio_net_set_queue_pairs(n); 2925 2926 /* Find the first multicast entry in the saved MAC filter */ 2927 for (i = 0; i < n->mac_table.in_use; i++) { 2928 if (n->mac_table.macs[i * ETH_ALEN] & 1) { 2929 break; 2930 } 2931 } 2932 n->mac_table.first_multi = i; 2933 2934 /* nc.link_down can't be migrated, so infer link_down according 2935 * to link status bit in n->status */ 2936 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0; 2937 for (i = 0; i < n->max_queue_pairs; i++) { 2938 qemu_get_subqueue(n->nic, i)->link_down = link_down; 2939 } 2940 2941 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 2942 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 2943 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 2944 QEMU_CLOCK_VIRTUAL, 2945 virtio_net_announce_timer, n); 2946 if (n->announce_timer.round) { 2947 timer_mod(n->announce_timer.tm, 2948 qemu_clock_get_ms(n->announce_timer.type)); 2949 } else { 2950 qemu_announce_timer_del(&n->announce_timer, false); 2951 } 2952 } 2953 2954 if (n->rss_data.enabled) { 2955 n->rss_data.enabled_software_rss = n->rss_data.populate_hash; 2956 if (!n->rss_data.populate_hash) { 2957 if (!virtio_net_attach_epbf_rss(n)) { 2958 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 2959 warn_report("Can't post-load eBPF RSS for vhost"); 2960 } else { 2961 warn_report("Can't post-load eBPF RSS - " 2962 "fallback to software RSS"); 2963 n->rss_data.enabled_software_rss = true; 2964 } 2965 } 2966 } 2967 2968 trace_virtio_net_rss_enable(n->rss_data.hash_types, 2969 n->rss_data.indirections_len, 2970 sizeof(n->rss_data.key)); 2971 } else { 2972 trace_virtio_net_rss_disable(); 2973 } 2974 return 0; 2975 } 2976 2977 static int virtio_net_post_load_virtio(VirtIODevice *vdev) 2978 { 2979 VirtIONet *n = VIRTIO_NET(vdev); 2980 /* 2981 * The actual needed state is now in saved_guest_offloads, 2982 * see virtio_net_post_load_device for detail. 2983 * Restore it back and apply the desired offloads. 2984 */ 2985 n->curr_guest_offloads = n->saved_guest_offloads; 2986 if (peer_has_vnet_hdr(n)) { 2987 virtio_net_apply_guest_offloads(n); 2988 } 2989 2990 return 0; 2991 } 2992 2993 /* tx_waiting field of a VirtIONetQueue */ 2994 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = { 2995 .name = "virtio-net-queue-tx_waiting", 2996 .fields = (VMStateField[]) { 2997 VMSTATE_UINT32(tx_waiting, VirtIONetQueue), 2998 VMSTATE_END_OF_LIST() 2999 }, 3000 }; 3001 3002 static bool max_queue_pairs_gt_1(void *opaque, int version_id) 3003 { 3004 return VIRTIO_NET(opaque)->max_queue_pairs > 1; 3005 } 3006 3007 static bool has_ctrl_guest_offloads(void *opaque, int version_id) 3008 { 3009 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque), 3010 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 3011 } 3012 3013 static bool mac_table_fits(void *opaque, int version_id) 3014 { 3015 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES; 3016 } 3017 3018 static bool mac_table_doesnt_fit(void *opaque, int version_id) 3019 { 3020 return !mac_table_fits(opaque, version_id); 3021 } 3022 3023 /* This temporary type is shared by all the WITH_TMP methods 3024 * although only some fields are used by each. 3025 */ 3026 struct VirtIONetMigTmp { 3027 VirtIONet *parent; 3028 VirtIONetQueue *vqs_1; 3029 uint16_t curr_queue_pairs_1; 3030 uint8_t has_ufo; 3031 uint32_t has_vnet_hdr; 3032 }; 3033 3034 /* The 2nd and subsequent tx_waiting flags are loaded later than 3035 * the 1st entry in the queue_pairs and only if there's more than one 3036 * entry. We use the tmp mechanism to calculate a temporary 3037 * pointer and count and also validate the count. 3038 */ 3039 3040 static int virtio_net_tx_waiting_pre_save(void *opaque) 3041 { 3042 struct VirtIONetMigTmp *tmp = opaque; 3043 3044 tmp->vqs_1 = tmp->parent->vqs + 1; 3045 tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1; 3046 if (tmp->parent->curr_queue_pairs == 0) { 3047 tmp->curr_queue_pairs_1 = 0; 3048 } 3049 3050 return 0; 3051 } 3052 3053 static int virtio_net_tx_waiting_pre_load(void *opaque) 3054 { 3055 struct VirtIONetMigTmp *tmp = opaque; 3056 3057 /* Reuse the pointer setup from save */ 3058 virtio_net_tx_waiting_pre_save(opaque); 3059 3060 if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) { 3061 error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x", 3062 tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs); 3063 3064 return -EINVAL; 3065 } 3066 3067 return 0; /* all good */ 3068 } 3069 3070 static const VMStateDescription vmstate_virtio_net_tx_waiting = { 3071 .name = "virtio-net-tx_waiting", 3072 .pre_load = virtio_net_tx_waiting_pre_load, 3073 .pre_save = virtio_net_tx_waiting_pre_save, 3074 .fields = (VMStateField[]) { 3075 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp, 3076 curr_queue_pairs_1, 3077 vmstate_virtio_net_queue_tx_waiting, 3078 struct VirtIONetQueue), 3079 VMSTATE_END_OF_LIST() 3080 }, 3081 }; 3082 3083 /* the 'has_ufo' flag is just tested; if the incoming stream has the 3084 * flag set we need to check that we have it 3085 */ 3086 static int virtio_net_ufo_post_load(void *opaque, int version_id) 3087 { 3088 struct VirtIONetMigTmp *tmp = opaque; 3089 3090 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) { 3091 error_report("virtio-net: saved image requires TUN_F_UFO support"); 3092 return -EINVAL; 3093 } 3094 3095 return 0; 3096 } 3097 3098 static int virtio_net_ufo_pre_save(void *opaque) 3099 { 3100 struct VirtIONetMigTmp *tmp = opaque; 3101 3102 tmp->has_ufo = tmp->parent->has_ufo; 3103 3104 return 0; 3105 } 3106 3107 static const VMStateDescription vmstate_virtio_net_has_ufo = { 3108 .name = "virtio-net-ufo", 3109 .post_load = virtio_net_ufo_post_load, 3110 .pre_save = virtio_net_ufo_pre_save, 3111 .fields = (VMStateField[]) { 3112 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp), 3113 VMSTATE_END_OF_LIST() 3114 }, 3115 }; 3116 3117 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the 3118 * flag set we need to check that we have it 3119 */ 3120 static int virtio_net_vnet_post_load(void *opaque, int version_id) 3121 { 3122 struct VirtIONetMigTmp *tmp = opaque; 3123 3124 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) { 3125 error_report("virtio-net: saved image requires vnet_hdr=on"); 3126 return -EINVAL; 3127 } 3128 3129 return 0; 3130 } 3131 3132 static int virtio_net_vnet_pre_save(void *opaque) 3133 { 3134 struct VirtIONetMigTmp *tmp = opaque; 3135 3136 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr; 3137 3138 return 0; 3139 } 3140 3141 static const VMStateDescription vmstate_virtio_net_has_vnet = { 3142 .name = "virtio-net-vnet", 3143 .post_load = virtio_net_vnet_post_load, 3144 .pre_save = virtio_net_vnet_pre_save, 3145 .fields = (VMStateField[]) { 3146 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp), 3147 VMSTATE_END_OF_LIST() 3148 }, 3149 }; 3150 3151 static bool virtio_net_rss_needed(void *opaque) 3152 { 3153 return VIRTIO_NET(opaque)->rss_data.enabled; 3154 } 3155 3156 static const VMStateDescription vmstate_virtio_net_rss = { 3157 .name = "virtio-net-device/rss", 3158 .version_id = 1, 3159 .minimum_version_id = 1, 3160 .needed = virtio_net_rss_needed, 3161 .fields = (VMStateField[]) { 3162 VMSTATE_BOOL(rss_data.enabled, VirtIONet), 3163 VMSTATE_BOOL(rss_data.redirect, VirtIONet), 3164 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet), 3165 VMSTATE_UINT32(rss_data.hash_types, VirtIONet), 3166 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet), 3167 VMSTATE_UINT16(rss_data.default_queue, VirtIONet), 3168 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet, 3169 VIRTIO_NET_RSS_MAX_KEY_SIZE), 3170 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet, 3171 rss_data.indirections_len, 0, 3172 vmstate_info_uint16, uint16_t), 3173 VMSTATE_END_OF_LIST() 3174 }, 3175 }; 3176 3177 static const VMStateDescription vmstate_virtio_net_device = { 3178 .name = "virtio-net-device", 3179 .version_id = VIRTIO_NET_VM_VERSION, 3180 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3181 .post_load = virtio_net_post_load_device, 3182 .fields = (VMStateField[]) { 3183 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN), 3184 VMSTATE_STRUCT_POINTER(vqs, VirtIONet, 3185 vmstate_virtio_net_queue_tx_waiting, 3186 VirtIONetQueue), 3187 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet), 3188 VMSTATE_UINT16(status, VirtIONet), 3189 VMSTATE_UINT8(promisc, VirtIONet), 3190 VMSTATE_UINT8(allmulti, VirtIONet), 3191 VMSTATE_UINT32(mac_table.in_use, VirtIONet), 3192 3193 /* Guarded pair: If it fits we load it, else we throw it away 3194 * - can happen if source has a larger MAC table.; post-load 3195 * sets flags in this case. 3196 */ 3197 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet, 3198 0, mac_table_fits, mac_table.in_use, 3199 ETH_ALEN), 3200 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0, 3201 mac_table.in_use, ETH_ALEN), 3202 3203 /* Note: This is an array of uint32's that's always been saved as a 3204 * buffer; hold onto your endiannesses; it's actually used as a bitmap 3205 * but based on the uint. 3206 */ 3207 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3), 3208 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3209 vmstate_virtio_net_has_vnet), 3210 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet), 3211 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet), 3212 VMSTATE_UINT8(alluni, VirtIONet), 3213 VMSTATE_UINT8(nomulti, VirtIONet), 3214 VMSTATE_UINT8(nouni, VirtIONet), 3215 VMSTATE_UINT8(nobcast, VirtIONet), 3216 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3217 vmstate_virtio_net_has_ufo), 3218 VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0, 3219 vmstate_info_uint16_equal, uint16_t), 3220 VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1), 3221 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3222 vmstate_virtio_net_tx_waiting), 3223 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet, 3224 has_ctrl_guest_offloads), 3225 VMSTATE_END_OF_LIST() 3226 }, 3227 .subsections = (const VMStateDescription * []) { 3228 &vmstate_virtio_net_rss, 3229 NULL 3230 } 3231 }; 3232 3233 static NetClientInfo net_virtio_info = { 3234 .type = NET_CLIENT_DRIVER_NIC, 3235 .size = sizeof(NICState), 3236 .can_receive = virtio_net_can_receive, 3237 .receive = virtio_net_receive, 3238 .link_status_changed = virtio_net_set_link_status, 3239 .query_rx_filter = virtio_net_query_rxfilter, 3240 .announce = virtio_net_announce, 3241 }; 3242 3243 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) 3244 { 3245 VirtIONet *n = VIRTIO_NET(vdev); 3246 NetClientState *nc; 3247 assert(n->vhost_started); 3248 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { 3249 /* Must guard against invalid features and bogus queue index 3250 * from being set by malicious guest, or penetrated through 3251 * buggy migration stream. 3252 */ 3253 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3254 qemu_log_mask(LOG_GUEST_ERROR, 3255 "%s: bogus vq index ignored\n", __func__); 3256 return false; 3257 } 3258 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3259 } else { 3260 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3261 } 3262 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); 3263 } 3264 3265 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, 3266 bool mask) 3267 { 3268 VirtIONet *n = VIRTIO_NET(vdev); 3269 NetClientState *nc; 3270 assert(n->vhost_started); 3271 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { 3272 /* Must guard against invalid features and bogus queue index 3273 * from being set by malicious guest, or penetrated through 3274 * buggy migration stream. 3275 */ 3276 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3277 qemu_log_mask(LOG_GUEST_ERROR, 3278 "%s: bogus vq index ignored\n", __func__); 3279 return; 3280 } 3281 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3282 } else { 3283 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3284 } 3285 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), 3286 vdev, idx, mask); 3287 } 3288 3289 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) 3290 { 3291 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC); 3292 3293 n->config_size = virtio_get_config_size(&cfg_size_params, host_features); 3294 } 3295 3296 void virtio_net_set_netclient_name(VirtIONet *n, const char *name, 3297 const char *type) 3298 { 3299 /* 3300 * The name can be NULL, the netclient name will be type.x. 3301 */ 3302 assert(type != NULL); 3303 3304 g_free(n->netclient_name); 3305 g_free(n->netclient_type); 3306 n->netclient_name = g_strdup(name); 3307 n->netclient_type = g_strdup(type); 3308 } 3309 3310 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev) 3311 { 3312 HotplugHandler *hotplug_ctrl; 3313 PCIDevice *pci_dev; 3314 Error *err = NULL; 3315 3316 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3317 if (hotplug_ctrl) { 3318 pci_dev = PCI_DEVICE(dev); 3319 pci_dev->partially_hotplugged = true; 3320 hotplug_handler_unplug_request(hotplug_ctrl, dev, &err); 3321 if (err) { 3322 error_report_err(err); 3323 return false; 3324 } 3325 } else { 3326 return false; 3327 } 3328 return true; 3329 } 3330 3331 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev, 3332 Error **errp) 3333 { 3334 Error *err = NULL; 3335 HotplugHandler *hotplug_ctrl; 3336 PCIDevice *pdev = PCI_DEVICE(dev); 3337 BusState *primary_bus; 3338 3339 if (!pdev->partially_hotplugged) { 3340 return true; 3341 } 3342 primary_bus = dev->parent_bus; 3343 if (!primary_bus) { 3344 error_setg(errp, "virtio_net: couldn't find primary bus"); 3345 return false; 3346 } 3347 qdev_set_parent_bus(dev, primary_bus, &error_abort); 3348 qatomic_set(&n->failover_primary_hidden, false); 3349 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3350 if (hotplug_ctrl) { 3351 hotplug_handler_pre_plug(hotplug_ctrl, dev, &err); 3352 if (err) { 3353 goto out; 3354 } 3355 hotplug_handler_plug(hotplug_ctrl, dev, &err); 3356 } 3357 pdev->partially_hotplugged = false; 3358 3359 out: 3360 error_propagate(errp, err); 3361 return !err; 3362 } 3363 3364 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s) 3365 { 3366 bool should_be_hidden; 3367 Error *err = NULL; 3368 DeviceState *dev = failover_find_primary_device(n); 3369 3370 if (!dev) { 3371 return; 3372 } 3373 3374 should_be_hidden = qatomic_read(&n->failover_primary_hidden); 3375 3376 if (migration_in_setup(s) && !should_be_hidden) { 3377 if (failover_unplug_primary(n, dev)) { 3378 vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev); 3379 qapi_event_send_unplug_primary(dev->id); 3380 qatomic_set(&n->failover_primary_hidden, true); 3381 } else { 3382 warn_report("couldn't unplug primary device"); 3383 } 3384 } else if (migration_has_failed(s)) { 3385 /* We already unplugged the device let's plug it back */ 3386 if (!failover_replug_primary(n, dev, &err)) { 3387 if (err) { 3388 error_report_err(err); 3389 } 3390 } 3391 } 3392 } 3393 3394 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data) 3395 { 3396 MigrationState *s = data; 3397 VirtIONet *n = container_of(notifier, VirtIONet, migration_state); 3398 virtio_net_handle_migration_primary(n, s); 3399 } 3400 3401 static bool failover_hide_primary_device(DeviceListener *listener, 3402 const QDict *device_opts, 3403 bool from_json, 3404 Error **errp) 3405 { 3406 VirtIONet *n = container_of(listener, VirtIONet, primary_listener); 3407 const char *standby_id; 3408 3409 if (!device_opts) { 3410 return false; 3411 } 3412 3413 if (!qdict_haskey(device_opts, "failover_pair_id")) { 3414 return false; 3415 } 3416 3417 if (!qdict_haskey(device_opts, "id")) { 3418 error_setg(errp, "Device with failover_pair_id needs to have id"); 3419 return false; 3420 } 3421 3422 standby_id = qdict_get_str(device_opts, "failover_pair_id"); 3423 if (g_strcmp0(standby_id, n->netclient_name) != 0) { 3424 return false; 3425 } 3426 3427 /* 3428 * The hide helper can be called several times for a given device. 3429 * Check there is only one primary for a virtio-net device but 3430 * don't duplicate the qdict several times if it's called for the same 3431 * device. 3432 */ 3433 if (n->primary_opts) { 3434 const char *old, *new; 3435 /* devices with failover_pair_id always have an id */ 3436 old = qdict_get_str(n->primary_opts, "id"); 3437 new = qdict_get_str(device_opts, "id"); 3438 if (strcmp(old, new) != 0) { 3439 error_setg(errp, "Cannot attach more than one primary device to " 3440 "'%s': '%s' and '%s'", n->netclient_name, old, new); 3441 return false; 3442 } 3443 } else { 3444 n->primary_opts = qdict_clone_shallow(device_opts); 3445 n->primary_opts_from_json = from_json; 3446 } 3447 3448 /* failover_primary_hidden is set during feature negotiation */ 3449 return qatomic_read(&n->failover_primary_hidden); 3450 } 3451 3452 static void virtio_net_device_realize(DeviceState *dev, Error **errp) 3453 { 3454 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3455 VirtIONet *n = VIRTIO_NET(dev); 3456 NetClientState *nc; 3457 int i; 3458 3459 if (n->net_conf.mtu) { 3460 n->host_features |= (1ULL << VIRTIO_NET_F_MTU); 3461 } 3462 3463 if (n->net_conf.duplex_str) { 3464 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) { 3465 n->net_conf.duplex = DUPLEX_HALF; 3466 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) { 3467 n->net_conf.duplex = DUPLEX_FULL; 3468 } else { 3469 error_setg(errp, "'duplex' must be 'half' or 'full'"); 3470 return; 3471 } 3472 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3473 } else { 3474 n->net_conf.duplex = DUPLEX_UNKNOWN; 3475 } 3476 3477 if (n->net_conf.speed < SPEED_UNKNOWN) { 3478 error_setg(errp, "'speed' must be between 0 and INT_MAX"); 3479 return; 3480 } 3481 if (n->net_conf.speed >= 0) { 3482 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3483 } 3484 3485 if (n->failover) { 3486 n->primary_listener.hide_device = failover_hide_primary_device; 3487 qatomic_set(&n->failover_primary_hidden, true); 3488 device_listener_register(&n->primary_listener); 3489 n->migration_state.notify = virtio_net_migration_state_notifier; 3490 add_migration_state_change_notifier(&n->migration_state); 3491 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY); 3492 } 3493 3494 virtio_net_set_config_size(n, n->host_features); 3495 virtio_init(vdev, VIRTIO_ID_NET, n->config_size); 3496 3497 /* 3498 * We set a lower limit on RX queue size to what it always was. 3499 * Guests that want a smaller ring can always resize it without 3500 * help from us (using virtio 1 and up). 3501 */ 3502 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || 3503 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || 3504 !is_power_of_2(n->net_conf.rx_queue_size)) { 3505 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " 3506 "must be a power of 2 between %d and %d.", 3507 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, 3508 VIRTQUEUE_MAX_SIZE); 3509 virtio_cleanup(vdev); 3510 return; 3511 } 3512 3513 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || 3514 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE || 3515 !is_power_of_2(n->net_conf.tx_queue_size)) { 3516 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " 3517 "must be a power of 2 between %d and %d", 3518 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, 3519 VIRTQUEUE_MAX_SIZE); 3520 virtio_cleanup(vdev); 3521 return; 3522 } 3523 3524 n->max_ncs = MAX(n->nic_conf.peers.queues, 1); 3525 3526 /* 3527 * Figure out the datapath queue pairs since the backend could 3528 * provide control queue via peers as well. 3529 */ 3530 if (n->nic_conf.peers.queues) { 3531 for (i = 0; i < n->max_ncs; i++) { 3532 if (n->nic_conf.peers.ncs[i]->is_datapath) { 3533 ++n->max_queue_pairs; 3534 } 3535 } 3536 } 3537 n->max_queue_pairs = MAX(n->max_queue_pairs, 1); 3538 3539 if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) { 3540 error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), " 3541 "must be a positive integer less than %d.", 3542 n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2); 3543 virtio_cleanup(vdev); 3544 return; 3545 } 3546 n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs); 3547 n->curr_queue_pairs = 1; 3548 n->tx_timeout = n->net_conf.txtimer; 3549 3550 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer") 3551 && strcmp(n->net_conf.tx, "bh")) { 3552 warn_report("virtio-net: " 3553 "Unknown option tx=%s, valid options: \"timer\" \"bh\"", 3554 n->net_conf.tx); 3555 error_printf("Defaulting to \"bh\""); 3556 } 3557 3558 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), 3559 n->net_conf.tx_queue_size); 3560 3561 for (i = 0; i < n->max_queue_pairs; i++) { 3562 virtio_net_add_queue(n, i); 3563 } 3564 3565 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 3566 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr); 3567 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac)); 3568 n->status = VIRTIO_NET_S_LINK_UP; 3569 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 3570 QEMU_CLOCK_VIRTUAL, 3571 virtio_net_announce_timer, n); 3572 n->announce_timer.round = 0; 3573 3574 if (n->netclient_type) { 3575 /* 3576 * Happen when virtio_net_set_netclient_name has been called. 3577 */ 3578 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3579 n->netclient_type, n->netclient_name, n); 3580 } else { 3581 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3582 object_get_typename(OBJECT(dev)), dev->id, n); 3583 } 3584 3585 for (i = 0; i < n->max_queue_pairs; i++) { 3586 n->nic->ncs[i].do_not_pad = true; 3587 } 3588 3589 peer_test_vnet_hdr(n); 3590 if (peer_has_vnet_hdr(n)) { 3591 for (i = 0; i < n->max_queue_pairs; i++) { 3592 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true); 3593 } 3594 n->host_hdr_len = sizeof(struct virtio_net_hdr); 3595 } else { 3596 n->host_hdr_len = 0; 3597 } 3598 3599 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a); 3600 3601 n->vqs[0].tx_waiting = 0; 3602 n->tx_burst = n->net_conf.txburst; 3603 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0); 3604 n->promisc = 1; /* for compatibility */ 3605 3606 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 3607 3608 n->vlans = g_malloc0(MAX_VLAN >> 3); 3609 3610 nc = qemu_get_queue(n->nic); 3611 nc->rxfilter_notify_enabled = 1; 3612 3613 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 3614 struct virtio_net_config netcfg = {}; 3615 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN); 3616 vhost_net_set_config(get_vhost_net(nc->peer), 3617 (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER); 3618 } 3619 QTAILQ_INIT(&n->rsc_chains); 3620 n->qdev = dev; 3621 3622 net_rx_pkt_init(&n->rx_pkt, false); 3623 3624 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3625 virtio_net_load_ebpf(n); 3626 } 3627 } 3628 3629 static void virtio_net_device_unrealize(DeviceState *dev) 3630 { 3631 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3632 VirtIONet *n = VIRTIO_NET(dev); 3633 int i, max_queue_pairs; 3634 3635 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3636 virtio_net_unload_ebpf(n); 3637 } 3638 3639 /* This will stop vhost backend if appropriate. */ 3640 virtio_net_set_status(vdev, 0); 3641 3642 g_free(n->netclient_name); 3643 n->netclient_name = NULL; 3644 g_free(n->netclient_type); 3645 n->netclient_type = NULL; 3646 3647 g_free(n->mac_table.macs); 3648 g_free(n->vlans); 3649 3650 if (n->failover) { 3651 qobject_unref(n->primary_opts); 3652 device_listener_unregister(&n->primary_listener); 3653 remove_migration_state_change_notifier(&n->migration_state); 3654 } else { 3655 assert(n->primary_opts == NULL); 3656 } 3657 3658 max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 3659 for (i = 0; i < max_queue_pairs; i++) { 3660 virtio_net_del_queue(n, i); 3661 } 3662 /* delete also control vq */ 3663 virtio_del_queue(vdev, max_queue_pairs * 2); 3664 qemu_announce_timer_del(&n->announce_timer, false); 3665 g_free(n->vqs); 3666 qemu_del_nic(n->nic); 3667 virtio_net_rsc_cleanup(n); 3668 g_free(n->rss_data.indirections_table); 3669 net_rx_pkt_uninit(n->rx_pkt); 3670 virtio_cleanup(vdev); 3671 } 3672 3673 static void virtio_net_instance_init(Object *obj) 3674 { 3675 VirtIONet *n = VIRTIO_NET(obj); 3676 3677 /* 3678 * The default config_size is sizeof(struct virtio_net_config). 3679 * Can be overriden with virtio_net_set_config_size. 3680 */ 3681 n->config_size = sizeof(struct virtio_net_config); 3682 device_add_bootindex_property(obj, &n->nic_conf.bootindex, 3683 "bootindex", "/ethernet-phy@0", 3684 DEVICE(n)); 3685 3686 ebpf_rss_init(&n->ebpf_rss); 3687 } 3688 3689 static int virtio_net_pre_save(void *opaque) 3690 { 3691 VirtIONet *n = opaque; 3692 3693 /* At this point, backend must be stopped, otherwise 3694 * it might keep writing to memory. */ 3695 assert(!n->vhost_started); 3696 3697 return 0; 3698 } 3699 3700 static bool primary_unplug_pending(void *opaque) 3701 { 3702 DeviceState *dev = opaque; 3703 DeviceState *primary; 3704 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3705 VirtIONet *n = VIRTIO_NET(vdev); 3706 3707 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 3708 return false; 3709 } 3710 primary = failover_find_primary_device(n); 3711 return primary ? primary->pending_deleted_event : false; 3712 } 3713 3714 static bool dev_unplug_pending(void *opaque) 3715 { 3716 DeviceState *dev = opaque; 3717 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 3718 3719 return vdc->primary_unplug_pending(dev); 3720 } 3721 3722 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev) 3723 { 3724 VirtIONet *n = VIRTIO_NET(vdev); 3725 NetClientState *nc = qemu_get_queue(n->nic); 3726 struct vhost_net *net = get_vhost_net(nc->peer); 3727 return &net->dev; 3728 } 3729 3730 static const VMStateDescription vmstate_virtio_net = { 3731 .name = "virtio-net", 3732 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3733 .version_id = VIRTIO_NET_VM_VERSION, 3734 .fields = (VMStateField[]) { 3735 VMSTATE_VIRTIO_DEVICE, 3736 VMSTATE_END_OF_LIST() 3737 }, 3738 .pre_save = virtio_net_pre_save, 3739 .dev_unplug_pending = dev_unplug_pending, 3740 }; 3741 3742 static Property virtio_net_properties[] = { 3743 DEFINE_PROP_BIT64("csum", VirtIONet, host_features, 3744 VIRTIO_NET_F_CSUM, true), 3745 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features, 3746 VIRTIO_NET_F_GUEST_CSUM, true), 3747 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), 3748 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features, 3749 VIRTIO_NET_F_GUEST_TSO4, true), 3750 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features, 3751 VIRTIO_NET_F_GUEST_TSO6, true), 3752 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features, 3753 VIRTIO_NET_F_GUEST_ECN, true), 3754 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features, 3755 VIRTIO_NET_F_GUEST_UFO, true), 3756 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features, 3757 VIRTIO_NET_F_GUEST_ANNOUNCE, true), 3758 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features, 3759 VIRTIO_NET_F_HOST_TSO4, true), 3760 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features, 3761 VIRTIO_NET_F_HOST_TSO6, true), 3762 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features, 3763 VIRTIO_NET_F_HOST_ECN, true), 3764 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features, 3765 VIRTIO_NET_F_HOST_UFO, true), 3766 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features, 3767 VIRTIO_NET_F_MRG_RXBUF, true), 3768 DEFINE_PROP_BIT64("status", VirtIONet, host_features, 3769 VIRTIO_NET_F_STATUS, true), 3770 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features, 3771 VIRTIO_NET_F_CTRL_VQ, true), 3772 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features, 3773 VIRTIO_NET_F_CTRL_RX, true), 3774 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features, 3775 VIRTIO_NET_F_CTRL_VLAN, true), 3776 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features, 3777 VIRTIO_NET_F_CTRL_RX_EXTRA, true), 3778 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features, 3779 VIRTIO_NET_F_CTRL_MAC_ADDR, true), 3780 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features, 3781 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), 3782 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), 3783 DEFINE_PROP_BIT64("rss", VirtIONet, host_features, 3784 VIRTIO_NET_F_RSS, false), 3785 DEFINE_PROP_BIT64("hash", VirtIONet, host_features, 3786 VIRTIO_NET_F_HASH_REPORT, false), 3787 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features, 3788 VIRTIO_NET_F_RSC_EXT, false), 3789 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout, 3790 VIRTIO_NET_RSC_DEFAULT_INTERVAL), 3791 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf), 3792 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer, 3793 TX_TIMER_INTERVAL), 3794 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), 3795 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), 3796 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, 3797 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), 3798 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, 3799 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), 3800 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), 3801 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, 3802 true), 3803 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN), 3804 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str), 3805 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false), 3806 DEFINE_PROP_END_OF_LIST(), 3807 }; 3808 3809 static void virtio_net_class_init(ObjectClass *klass, void *data) 3810 { 3811 DeviceClass *dc = DEVICE_CLASS(klass); 3812 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 3813 3814 device_class_set_props(dc, virtio_net_properties); 3815 dc->vmsd = &vmstate_virtio_net; 3816 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 3817 vdc->realize = virtio_net_device_realize; 3818 vdc->unrealize = virtio_net_device_unrealize; 3819 vdc->get_config = virtio_net_get_config; 3820 vdc->set_config = virtio_net_set_config; 3821 vdc->get_features = virtio_net_get_features; 3822 vdc->set_features = virtio_net_set_features; 3823 vdc->bad_features = virtio_net_bad_features; 3824 vdc->reset = virtio_net_reset; 3825 vdc->set_status = virtio_net_set_status; 3826 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; 3827 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; 3828 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO); 3829 vdc->post_load = virtio_net_post_load_virtio; 3830 vdc->vmsd = &vmstate_virtio_net_device; 3831 vdc->primary_unplug_pending = primary_unplug_pending; 3832 vdc->get_vhost = virtio_net_get_vhost; 3833 } 3834 3835 static const TypeInfo virtio_net_info = { 3836 .name = TYPE_VIRTIO_NET, 3837 .parent = TYPE_VIRTIO_DEVICE, 3838 .instance_size = sizeof(VirtIONet), 3839 .instance_init = virtio_net_instance_init, 3840 .class_init = virtio_net_class_init, 3841 }; 3842 3843 static void virtio_register_types(void) 3844 { 3845 type_register_static(&virtio_net_info); 3846 } 3847 3848 type_init(virtio_register_types) 3849