1 /* 2 * Virtio Network Device 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qemu/atomic.h" 16 #include "qemu/iov.h" 17 #include "qemu/log.h" 18 #include "qemu/main-loop.h" 19 #include "qemu/module.h" 20 #include "hw/virtio/virtio.h" 21 #include "net/net.h" 22 #include "net/checksum.h" 23 #include "net/tap.h" 24 #include "qemu/error-report.h" 25 #include "qemu/timer.h" 26 #include "qemu/option.h" 27 #include "qemu/option_int.h" 28 #include "qemu/config-file.h" 29 #include "qapi/qmp/qdict.h" 30 #include "hw/virtio/virtio-net.h" 31 #include "net/vhost_net.h" 32 #include "net/announce.h" 33 #include "hw/virtio/virtio-bus.h" 34 #include "qapi/error.h" 35 #include "qapi/qapi-events-net.h" 36 #include "hw/qdev-properties.h" 37 #include "qapi/qapi-types-migration.h" 38 #include "qapi/qapi-events-migration.h" 39 #include "hw/virtio/virtio-access.h" 40 #include "migration/misc.h" 41 #include "standard-headers/linux/ethtool.h" 42 #include "sysemu/sysemu.h" 43 #include "trace.h" 44 #include "monitor/qdev.h" 45 #include "hw/pci/pci.h" 46 #include "net_rx_pkt.h" 47 #include "hw/virtio/vhost.h" 48 #include "sysemu/qtest.h" 49 50 #define VIRTIO_NET_VM_VERSION 11 51 52 #define MAC_TABLE_ENTRIES 64 53 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ 54 55 /* previously fixed value */ 56 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 57 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 58 59 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */ 60 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 61 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 62 63 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ 64 65 #define VIRTIO_NET_TCP_FLAG 0x3F 66 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000 67 68 /* IPv4 max payload, 16 bits in the header */ 69 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header)) 70 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535 71 72 /* header length value in ip header without option */ 73 #define VIRTIO_NET_IP4_HEADER_LENGTH 5 74 75 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ 76 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD 77 78 /* Purge coalesced packets timer interval, This value affects the performance 79 a lot, and should be tuned carefully, '300000'(300us) is the recommended 80 value to pass the WHQL test, '50000' can gain 2x netperf throughput with 81 tso/gso/gro 'off'. */ 82 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000 83 84 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \ 85 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \ 86 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \ 87 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \ 88 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \ 89 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \ 90 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \ 91 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \ 92 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) 93 94 static const VirtIOFeature feature_sizes[] = { 95 {.flags = 1ULL << VIRTIO_NET_F_MAC, 96 .end = endof(struct virtio_net_config, mac)}, 97 {.flags = 1ULL << VIRTIO_NET_F_STATUS, 98 .end = endof(struct virtio_net_config, status)}, 99 {.flags = 1ULL << VIRTIO_NET_F_MQ, 100 .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, 101 {.flags = 1ULL << VIRTIO_NET_F_MTU, 102 .end = endof(struct virtio_net_config, mtu)}, 103 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, 104 .end = endof(struct virtio_net_config, duplex)}, 105 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT), 106 .end = endof(struct virtio_net_config, supported_hash_types)}, 107 {} 108 }; 109 110 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc) 111 { 112 VirtIONet *n = qemu_get_nic_opaque(nc); 113 114 return &n->vqs[nc->queue_index]; 115 } 116 117 static int vq2q(int queue_index) 118 { 119 return queue_index / 2; 120 } 121 122 /* TODO 123 * - we could suppress RX interrupt if we were so inclined. 124 */ 125 126 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) 127 { 128 VirtIONet *n = VIRTIO_NET(vdev); 129 struct virtio_net_config netcfg; 130 NetClientState *nc = qemu_get_queue(n->nic); 131 static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } }; 132 133 int ret = 0; 134 memset(&netcfg, 0 , sizeof(struct virtio_net_config)); 135 virtio_stw_p(vdev, &netcfg.status, n->status); 136 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs); 137 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); 138 memcpy(netcfg.mac, n->mac, ETH_ALEN); 139 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); 140 netcfg.duplex = n->net_conf.duplex; 141 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE; 142 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length, 143 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ? 144 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1); 145 virtio_stl_p(vdev, &netcfg.supported_hash_types, 146 VIRTIO_NET_RSS_SUPPORTED_HASHES); 147 memcpy(config, &netcfg, n->config_size); 148 149 /* 150 * Is this VDPA? No peer means not VDPA: there's no way to 151 * disconnect/reconnect a VDPA peer. 152 */ 153 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 154 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg, 155 n->config_size); 156 if (ret != -1) { 157 /* 158 * Some NIC/kernel combinations present 0 as the mac address. As 159 * that is not a legal address, try to proceed with the 160 * address from the QEMU command line in the hope that the 161 * address has been configured correctly elsewhere - just not 162 * reported by the device. 163 */ 164 if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) { 165 info_report("Zero hardware mac address detected. Ignoring."); 166 memcpy(netcfg.mac, n->mac, ETH_ALEN); 167 } 168 memcpy(config, &netcfg, n->config_size); 169 } 170 } 171 } 172 173 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) 174 { 175 VirtIONet *n = VIRTIO_NET(vdev); 176 struct virtio_net_config netcfg = {}; 177 NetClientState *nc = qemu_get_queue(n->nic); 178 179 memcpy(&netcfg, config, n->config_size); 180 181 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) && 182 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) && 183 memcmp(netcfg.mac, n->mac, ETH_ALEN)) { 184 memcpy(n->mac, netcfg.mac, ETH_ALEN); 185 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 186 } 187 188 /* 189 * Is this VDPA? No peer means not VDPA: there's no way to 190 * disconnect/reconnect a VDPA peer. 191 */ 192 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 193 vhost_net_set_config(get_vhost_net(nc->peer), 194 (uint8_t *)&netcfg, 0, n->config_size, 195 VHOST_SET_CONFIG_TYPE_MASTER); 196 } 197 } 198 199 static bool virtio_net_started(VirtIONet *n, uint8_t status) 200 { 201 VirtIODevice *vdev = VIRTIO_DEVICE(n); 202 return (status & VIRTIO_CONFIG_S_DRIVER_OK) && 203 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running; 204 } 205 206 static void virtio_net_announce_notify(VirtIONet *net) 207 { 208 VirtIODevice *vdev = VIRTIO_DEVICE(net); 209 trace_virtio_net_announce_notify(); 210 211 net->status |= VIRTIO_NET_S_ANNOUNCE; 212 virtio_notify_config(vdev); 213 } 214 215 static void virtio_net_announce_timer(void *opaque) 216 { 217 VirtIONet *n = opaque; 218 trace_virtio_net_announce_timer(n->announce_timer.round); 219 220 n->announce_timer.round--; 221 virtio_net_announce_notify(n); 222 } 223 224 static void virtio_net_announce(NetClientState *nc) 225 { 226 VirtIONet *n = qemu_get_nic_opaque(nc); 227 VirtIODevice *vdev = VIRTIO_DEVICE(n); 228 229 /* 230 * Make sure the virtio migration announcement timer isn't running 231 * If it is, let it trigger announcement so that we do not cause 232 * confusion. 233 */ 234 if (n->announce_timer.round) { 235 return; 236 } 237 238 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 239 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 240 virtio_net_announce_notify(n); 241 } 242 } 243 244 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) 245 { 246 VirtIODevice *vdev = VIRTIO_DEVICE(n); 247 NetClientState *nc = qemu_get_queue(n->nic); 248 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 249 int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? 250 n->max_ncs - n->max_queue_pairs : 0; 251 252 if (!get_vhost_net(nc->peer)) { 253 return; 254 } 255 256 if ((virtio_net_started(n, status) && !nc->peer->link_down) == 257 !!n->vhost_started) { 258 return; 259 } 260 if (!n->vhost_started) { 261 int r, i; 262 263 if (n->needs_vnet_hdr_swap) { 264 error_report("backend does not support %s vnet headers; " 265 "falling back on userspace virtio", 266 virtio_is_big_endian(vdev) ? "BE" : "LE"); 267 return; 268 } 269 270 /* Any packets outstanding? Purge them to avoid touching rings 271 * when vhost is running. 272 */ 273 for (i = 0; i < queue_pairs; i++) { 274 NetClientState *qnc = qemu_get_subqueue(n->nic, i); 275 276 /* Purge both directions: TX and RX. */ 277 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc); 278 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer); 279 } 280 281 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) { 282 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu); 283 if (r < 0) { 284 error_report("%uBytes MTU not supported by the backend", 285 n->net_conf.mtu); 286 287 return; 288 } 289 } 290 291 n->vhost_started = 1; 292 r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq); 293 if (r < 0) { 294 error_report("unable to start vhost net: %d: " 295 "falling back on userspace virtio", -r); 296 n->vhost_started = 0; 297 } 298 } else { 299 vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq); 300 n->vhost_started = 0; 301 } 302 } 303 304 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev, 305 NetClientState *peer, 306 bool enable) 307 { 308 if (virtio_is_big_endian(vdev)) { 309 return qemu_set_vnet_be(peer, enable); 310 } else { 311 return qemu_set_vnet_le(peer, enable); 312 } 313 } 314 315 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs, 316 int queue_pairs, bool enable) 317 { 318 int i; 319 320 for (i = 0; i < queue_pairs; i++) { 321 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 && 322 enable) { 323 while (--i >= 0) { 324 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false); 325 } 326 327 return true; 328 } 329 } 330 331 return false; 332 } 333 334 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status) 335 { 336 VirtIODevice *vdev = VIRTIO_DEVICE(n); 337 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 338 339 if (virtio_net_started(n, status)) { 340 /* Before using the device, we tell the network backend about the 341 * endianness to use when parsing vnet headers. If the backend 342 * can't do it, we fallback onto fixing the headers in the core 343 * virtio-net code. 344 */ 345 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs, 346 queue_pairs, true); 347 } else if (virtio_net_started(n, vdev->status)) { 348 /* After using the device, we need to reset the network backend to 349 * the default (guest native endianness), otherwise the guest may 350 * lose network connectivity if it is rebooted into a different 351 * endianness. 352 */ 353 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false); 354 } 355 } 356 357 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq) 358 { 359 unsigned int dropped = virtqueue_drop_all(vq); 360 if (dropped) { 361 virtio_notify(vdev, vq); 362 } 363 } 364 365 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) 366 { 367 VirtIONet *n = VIRTIO_NET(vdev); 368 VirtIONetQueue *q; 369 int i; 370 uint8_t queue_status; 371 372 virtio_net_vnet_endian_status(n, status); 373 virtio_net_vhost_status(n, status); 374 375 for (i = 0; i < n->max_queue_pairs; i++) { 376 NetClientState *ncs = qemu_get_subqueue(n->nic, i); 377 bool queue_started; 378 q = &n->vqs[i]; 379 380 if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) { 381 queue_status = 0; 382 } else { 383 queue_status = status; 384 } 385 queue_started = 386 virtio_net_started(n, queue_status) && !n->vhost_started; 387 388 if (queue_started) { 389 qemu_flush_queued_packets(ncs); 390 } 391 392 if (!q->tx_waiting) { 393 continue; 394 } 395 396 if (queue_started) { 397 if (q->tx_timer) { 398 timer_mod(q->tx_timer, 399 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 400 } else { 401 qemu_bh_schedule(q->tx_bh); 402 } 403 } else { 404 if (q->tx_timer) { 405 timer_del(q->tx_timer); 406 } else { 407 qemu_bh_cancel(q->tx_bh); 408 } 409 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 && 410 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) && 411 vdev->vm_running) { 412 /* if tx is waiting we are likely have some packets in tx queue 413 * and disabled notification */ 414 q->tx_waiting = 0; 415 virtio_queue_set_notification(q->tx_vq, 1); 416 virtio_net_drop_tx_queue_data(vdev, q->tx_vq); 417 } 418 } 419 } 420 } 421 422 static void virtio_net_set_link_status(NetClientState *nc) 423 { 424 VirtIONet *n = qemu_get_nic_opaque(nc); 425 VirtIODevice *vdev = VIRTIO_DEVICE(n); 426 uint16_t old_status = n->status; 427 428 if (nc->link_down) 429 n->status &= ~VIRTIO_NET_S_LINK_UP; 430 else 431 n->status |= VIRTIO_NET_S_LINK_UP; 432 433 if (n->status != old_status) 434 virtio_notify_config(vdev); 435 436 virtio_net_set_status(vdev, vdev->status); 437 } 438 439 static void rxfilter_notify(NetClientState *nc) 440 { 441 VirtIONet *n = qemu_get_nic_opaque(nc); 442 443 if (nc->rxfilter_notify_enabled) { 444 char *path = object_get_canonical_path(OBJECT(n->qdev)); 445 qapi_event_send_nic_rx_filter_changed(!!n->netclient_name, 446 n->netclient_name, path); 447 g_free(path); 448 449 /* disable event notification to avoid events flooding */ 450 nc->rxfilter_notify_enabled = 0; 451 } 452 } 453 454 static intList *get_vlan_table(VirtIONet *n) 455 { 456 intList *list; 457 int i, j; 458 459 list = NULL; 460 for (i = 0; i < MAX_VLAN >> 5; i++) { 461 for (j = 0; n->vlans[i] && j <= 0x1f; j++) { 462 if (n->vlans[i] & (1U << j)) { 463 QAPI_LIST_PREPEND(list, (i << 5) + j); 464 } 465 } 466 } 467 468 return list; 469 } 470 471 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc) 472 { 473 VirtIONet *n = qemu_get_nic_opaque(nc); 474 VirtIODevice *vdev = VIRTIO_DEVICE(n); 475 RxFilterInfo *info; 476 strList *str_list; 477 int i; 478 479 info = g_malloc0(sizeof(*info)); 480 info->name = g_strdup(nc->name); 481 info->promiscuous = n->promisc; 482 483 if (n->nouni) { 484 info->unicast = RX_STATE_NONE; 485 } else if (n->alluni) { 486 info->unicast = RX_STATE_ALL; 487 } else { 488 info->unicast = RX_STATE_NORMAL; 489 } 490 491 if (n->nomulti) { 492 info->multicast = RX_STATE_NONE; 493 } else if (n->allmulti) { 494 info->multicast = RX_STATE_ALL; 495 } else { 496 info->multicast = RX_STATE_NORMAL; 497 } 498 499 info->broadcast_allowed = n->nobcast; 500 info->multicast_overflow = n->mac_table.multi_overflow; 501 info->unicast_overflow = n->mac_table.uni_overflow; 502 503 info->main_mac = qemu_mac_strdup_printf(n->mac); 504 505 str_list = NULL; 506 for (i = 0; i < n->mac_table.first_multi; i++) { 507 QAPI_LIST_PREPEND(str_list, 508 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 509 } 510 info->unicast_table = str_list; 511 512 str_list = NULL; 513 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 514 QAPI_LIST_PREPEND(str_list, 515 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 516 } 517 info->multicast_table = str_list; 518 info->vlan_table = get_vlan_table(n); 519 520 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) { 521 info->vlan = RX_STATE_ALL; 522 } else if (!info->vlan_table) { 523 info->vlan = RX_STATE_NONE; 524 } else { 525 info->vlan = RX_STATE_NORMAL; 526 } 527 528 /* enable event notification after query */ 529 nc->rxfilter_notify_enabled = 1; 530 531 return info; 532 } 533 534 static void virtio_net_reset(VirtIODevice *vdev) 535 { 536 VirtIONet *n = VIRTIO_NET(vdev); 537 int i; 538 539 /* Reset back to compatibility mode */ 540 n->promisc = 1; 541 n->allmulti = 0; 542 n->alluni = 0; 543 n->nomulti = 0; 544 n->nouni = 0; 545 n->nobcast = 0; 546 /* multiqueue is disabled by default */ 547 n->curr_queue_pairs = 1; 548 timer_del(n->announce_timer.tm); 549 n->announce_timer.round = 0; 550 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 551 552 /* Flush any MAC and VLAN filter table state */ 553 n->mac_table.in_use = 0; 554 n->mac_table.first_multi = 0; 555 n->mac_table.multi_overflow = 0; 556 n->mac_table.uni_overflow = 0; 557 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN); 558 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac)); 559 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 560 memset(n->vlans, 0, MAX_VLAN >> 3); 561 562 /* Flush any async TX */ 563 for (i = 0; i < n->max_queue_pairs; i++) { 564 NetClientState *nc = qemu_get_subqueue(n->nic, i); 565 566 if (nc->peer) { 567 qemu_flush_or_purge_queued_packets(nc->peer, true); 568 assert(!virtio_net_get_subqueue(nc)->async_tx.elem); 569 } 570 } 571 } 572 573 static void peer_test_vnet_hdr(VirtIONet *n) 574 { 575 NetClientState *nc = qemu_get_queue(n->nic); 576 if (!nc->peer) { 577 return; 578 } 579 580 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer); 581 } 582 583 static int peer_has_vnet_hdr(VirtIONet *n) 584 { 585 return n->has_vnet_hdr; 586 } 587 588 static int peer_has_ufo(VirtIONet *n) 589 { 590 if (!peer_has_vnet_hdr(n)) 591 return 0; 592 593 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer); 594 595 return n->has_ufo; 596 } 597 598 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, 599 int version_1, int hash_report) 600 { 601 int i; 602 NetClientState *nc; 603 604 n->mergeable_rx_bufs = mergeable_rx_bufs; 605 606 if (version_1) { 607 n->guest_hdr_len = hash_report ? 608 sizeof(struct virtio_net_hdr_v1_hash) : 609 sizeof(struct virtio_net_hdr_mrg_rxbuf); 610 n->rss_data.populate_hash = !!hash_report; 611 } else { 612 n->guest_hdr_len = n->mergeable_rx_bufs ? 613 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 614 sizeof(struct virtio_net_hdr); 615 } 616 617 for (i = 0; i < n->max_queue_pairs; i++) { 618 nc = qemu_get_subqueue(n->nic, i); 619 620 if (peer_has_vnet_hdr(n) && 621 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) { 622 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len); 623 n->host_hdr_len = n->guest_hdr_len; 624 } 625 } 626 } 627 628 static int virtio_net_max_tx_queue_size(VirtIONet *n) 629 { 630 NetClientState *peer = n->nic_conf.peers.ncs[0]; 631 632 /* 633 * Backends other than vhost-user or vhost-vdpa don't support max queue 634 * size. 635 */ 636 if (!peer) { 637 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 638 } 639 640 switch(peer->info->type) { 641 case NET_CLIENT_DRIVER_VHOST_USER: 642 case NET_CLIENT_DRIVER_VHOST_VDPA: 643 return VIRTQUEUE_MAX_SIZE; 644 default: 645 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 646 }; 647 } 648 649 static int peer_attach(VirtIONet *n, int index) 650 { 651 NetClientState *nc = qemu_get_subqueue(n->nic, index); 652 653 if (!nc->peer) { 654 return 0; 655 } 656 657 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 658 vhost_set_vring_enable(nc->peer, 1); 659 } 660 661 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 662 return 0; 663 } 664 665 if (n->max_queue_pairs == 1) { 666 return 0; 667 } 668 669 return tap_enable(nc->peer); 670 } 671 672 static int peer_detach(VirtIONet *n, int index) 673 { 674 NetClientState *nc = qemu_get_subqueue(n->nic, index); 675 676 if (!nc->peer) { 677 return 0; 678 } 679 680 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 681 vhost_set_vring_enable(nc->peer, 0); 682 } 683 684 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 685 return 0; 686 } 687 688 return tap_disable(nc->peer); 689 } 690 691 static void virtio_net_set_queue_pairs(VirtIONet *n) 692 { 693 int i; 694 int r; 695 696 if (n->nic->peer_deleted) { 697 return; 698 } 699 700 for (i = 0; i < n->max_queue_pairs; i++) { 701 if (i < n->curr_queue_pairs) { 702 r = peer_attach(n, i); 703 assert(!r); 704 } else { 705 r = peer_detach(n, i); 706 assert(!r); 707 } 708 } 709 } 710 711 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue); 712 713 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, 714 Error **errp) 715 { 716 VirtIONet *n = VIRTIO_NET(vdev); 717 NetClientState *nc = qemu_get_queue(n->nic); 718 719 /* Firstly sync all virtio-net possible supported features */ 720 features |= n->host_features; 721 722 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 723 724 if (!peer_has_vnet_hdr(n)) { 725 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); 726 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); 727 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); 728 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); 729 730 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); 731 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); 732 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); 733 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); 734 735 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); 736 } 737 738 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { 739 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); 740 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); 741 } 742 743 if (!get_vhost_net(nc->peer)) { 744 return features; 745 } 746 747 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 748 virtio_clear_feature(&features, VIRTIO_NET_F_RSS); 749 } 750 features = vhost_net_get_features(get_vhost_net(nc->peer), features); 751 vdev->backend_features = features; 752 753 if (n->mtu_bypass_backend && 754 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { 755 features |= (1ULL << VIRTIO_NET_F_MTU); 756 } 757 758 return features; 759 } 760 761 static uint64_t virtio_net_bad_features(VirtIODevice *vdev) 762 { 763 uint64_t features = 0; 764 765 /* Linux kernel 2.6.25. It understood MAC (as everyone must), 766 * but also these: */ 767 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 768 virtio_add_feature(&features, VIRTIO_NET_F_CSUM); 769 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4); 770 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6); 771 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN); 772 773 return features; 774 } 775 776 static void virtio_net_apply_guest_offloads(VirtIONet *n) 777 { 778 qemu_set_offload(qemu_get_queue(n->nic)->peer, 779 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)), 780 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)), 781 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)), 782 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)), 783 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO))); 784 } 785 786 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features) 787 { 788 static const uint64_t guest_offloads_mask = 789 (1ULL << VIRTIO_NET_F_GUEST_CSUM) | 790 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | 791 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | 792 (1ULL << VIRTIO_NET_F_GUEST_ECN) | 793 (1ULL << VIRTIO_NET_F_GUEST_UFO); 794 795 return guest_offloads_mask & features; 796 } 797 798 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) 799 { 800 VirtIODevice *vdev = VIRTIO_DEVICE(n); 801 return virtio_net_guest_offloads_by_features(vdev->guest_features); 802 } 803 804 typedef struct { 805 VirtIONet *n; 806 DeviceState *dev; 807 } FailoverDevice; 808 809 /** 810 * Set the failover primary device 811 * 812 * @opaque: FailoverId to setup 813 * @opts: opts for device we are handling 814 * @errp: returns an error if this function fails 815 */ 816 static int failover_set_primary(DeviceState *dev, void *opaque) 817 { 818 FailoverDevice *fdev = opaque; 819 PCIDevice *pci_dev = (PCIDevice *) 820 object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE); 821 822 if (!pci_dev) { 823 return 0; 824 } 825 826 if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) { 827 fdev->dev = dev; 828 return 1; 829 } 830 831 return 0; 832 } 833 834 /** 835 * Find the primary device for this failover virtio-net 836 * 837 * @n: VirtIONet device 838 * @errp: returns an error if this function fails 839 */ 840 static DeviceState *failover_find_primary_device(VirtIONet *n) 841 { 842 FailoverDevice fdev = { 843 .n = n, 844 }; 845 846 qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL, 847 NULL, NULL, &fdev); 848 return fdev.dev; 849 } 850 851 static void failover_add_primary(VirtIONet *n, Error **errp) 852 { 853 Error *err = NULL; 854 DeviceState *dev = failover_find_primary_device(n); 855 856 if (dev) { 857 return; 858 } 859 860 if (!n->primary_opts) { 861 error_setg(errp, "Primary device not found"); 862 error_append_hint(errp, "Virtio-net failover will not work. Make " 863 "sure primary device has parameter" 864 " failover_pair_id=%s\n", n->netclient_name); 865 return; 866 } 867 868 dev = qdev_device_add_from_qdict(n->primary_opts, 869 n->primary_opts_from_json, 870 &err); 871 if (err) { 872 qobject_unref(n->primary_opts); 873 n->primary_opts = NULL; 874 } else { 875 object_unref(OBJECT(dev)); 876 } 877 error_propagate(errp, err); 878 } 879 880 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) 881 { 882 VirtIONet *n = VIRTIO_NET(vdev); 883 Error *err = NULL; 884 int i; 885 886 if (n->mtu_bypass_backend && 887 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) { 888 features &= ~(1ULL << VIRTIO_NET_F_MTU); 889 } 890 891 virtio_net_set_multiqueue(n, 892 virtio_has_feature(features, VIRTIO_NET_F_RSS) || 893 virtio_has_feature(features, VIRTIO_NET_F_MQ)); 894 895 virtio_net_set_mrg_rx_bufs(n, 896 virtio_has_feature(features, 897 VIRTIO_NET_F_MRG_RXBUF), 898 virtio_has_feature(features, 899 VIRTIO_F_VERSION_1), 900 virtio_has_feature(features, 901 VIRTIO_NET_F_HASH_REPORT)); 902 903 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 904 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4); 905 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 906 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6); 907 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS); 908 909 if (n->has_vnet_hdr) { 910 n->curr_guest_offloads = 911 virtio_net_guest_offloads_by_features(features); 912 virtio_net_apply_guest_offloads(n); 913 } 914 915 for (i = 0; i < n->max_queue_pairs; i++) { 916 NetClientState *nc = qemu_get_subqueue(n->nic, i); 917 918 if (!get_vhost_net(nc->peer)) { 919 continue; 920 } 921 vhost_net_ack_features(get_vhost_net(nc->peer), features); 922 } 923 924 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { 925 memset(n->vlans, 0, MAX_VLAN >> 3); 926 } else { 927 memset(n->vlans, 0xff, MAX_VLAN >> 3); 928 } 929 930 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) { 931 qapi_event_send_failover_negotiated(n->netclient_name); 932 qatomic_set(&n->failover_primary_hidden, false); 933 failover_add_primary(n, &err); 934 if (err) { 935 if (!qtest_enabled()) { 936 warn_report_err(err); 937 } else { 938 error_free(err); 939 } 940 } 941 } 942 } 943 944 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd, 945 struct iovec *iov, unsigned int iov_cnt) 946 { 947 uint8_t on; 948 size_t s; 949 NetClientState *nc = qemu_get_queue(n->nic); 950 951 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on)); 952 if (s != sizeof(on)) { 953 return VIRTIO_NET_ERR; 954 } 955 956 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) { 957 n->promisc = on; 958 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) { 959 n->allmulti = on; 960 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) { 961 n->alluni = on; 962 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) { 963 n->nomulti = on; 964 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) { 965 n->nouni = on; 966 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) { 967 n->nobcast = on; 968 } else { 969 return VIRTIO_NET_ERR; 970 } 971 972 rxfilter_notify(nc); 973 974 return VIRTIO_NET_OK; 975 } 976 977 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd, 978 struct iovec *iov, unsigned int iov_cnt) 979 { 980 VirtIODevice *vdev = VIRTIO_DEVICE(n); 981 uint64_t offloads; 982 size_t s; 983 984 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 985 return VIRTIO_NET_ERR; 986 } 987 988 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads)); 989 if (s != sizeof(offloads)) { 990 return VIRTIO_NET_ERR; 991 } 992 993 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) { 994 uint64_t supported_offloads; 995 996 offloads = virtio_ldq_p(vdev, &offloads); 997 998 if (!n->has_vnet_hdr) { 999 return VIRTIO_NET_ERR; 1000 } 1001 1002 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1003 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4); 1004 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1005 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6); 1006 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT); 1007 1008 supported_offloads = virtio_net_supported_guest_offloads(n); 1009 if (offloads & ~supported_offloads) { 1010 return VIRTIO_NET_ERR; 1011 } 1012 1013 n->curr_guest_offloads = offloads; 1014 virtio_net_apply_guest_offloads(n); 1015 1016 return VIRTIO_NET_OK; 1017 } else { 1018 return VIRTIO_NET_ERR; 1019 } 1020 } 1021 1022 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd, 1023 struct iovec *iov, unsigned int iov_cnt) 1024 { 1025 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1026 struct virtio_net_ctrl_mac mac_data; 1027 size_t s; 1028 NetClientState *nc = qemu_get_queue(n->nic); 1029 1030 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) { 1031 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) { 1032 return VIRTIO_NET_ERR; 1033 } 1034 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac)); 1035 assert(s == sizeof(n->mac)); 1036 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 1037 rxfilter_notify(nc); 1038 1039 return VIRTIO_NET_OK; 1040 } 1041 1042 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) { 1043 return VIRTIO_NET_ERR; 1044 } 1045 1046 int in_use = 0; 1047 int first_multi = 0; 1048 uint8_t uni_overflow = 0; 1049 uint8_t multi_overflow = 0; 1050 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 1051 1052 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1053 sizeof(mac_data.entries)); 1054 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1055 if (s != sizeof(mac_data.entries)) { 1056 goto error; 1057 } 1058 iov_discard_front(&iov, &iov_cnt, s); 1059 1060 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) { 1061 goto error; 1062 } 1063 1064 if (mac_data.entries <= MAC_TABLE_ENTRIES) { 1065 s = iov_to_buf(iov, iov_cnt, 0, macs, 1066 mac_data.entries * ETH_ALEN); 1067 if (s != mac_data.entries * ETH_ALEN) { 1068 goto error; 1069 } 1070 in_use += mac_data.entries; 1071 } else { 1072 uni_overflow = 1; 1073 } 1074 1075 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN); 1076 1077 first_multi = in_use; 1078 1079 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1080 sizeof(mac_data.entries)); 1081 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1082 if (s != sizeof(mac_data.entries)) { 1083 goto error; 1084 } 1085 1086 iov_discard_front(&iov, &iov_cnt, s); 1087 1088 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) { 1089 goto error; 1090 } 1091 1092 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) { 1093 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN], 1094 mac_data.entries * ETH_ALEN); 1095 if (s != mac_data.entries * ETH_ALEN) { 1096 goto error; 1097 } 1098 in_use += mac_data.entries; 1099 } else { 1100 multi_overflow = 1; 1101 } 1102 1103 n->mac_table.in_use = in_use; 1104 n->mac_table.first_multi = first_multi; 1105 n->mac_table.uni_overflow = uni_overflow; 1106 n->mac_table.multi_overflow = multi_overflow; 1107 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN); 1108 g_free(macs); 1109 rxfilter_notify(nc); 1110 1111 return VIRTIO_NET_OK; 1112 1113 error: 1114 g_free(macs); 1115 return VIRTIO_NET_ERR; 1116 } 1117 1118 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd, 1119 struct iovec *iov, unsigned int iov_cnt) 1120 { 1121 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1122 uint16_t vid; 1123 size_t s; 1124 NetClientState *nc = qemu_get_queue(n->nic); 1125 1126 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid)); 1127 vid = virtio_lduw_p(vdev, &vid); 1128 if (s != sizeof(vid)) { 1129 return VIRTIO_NET_ERR; 1130 } 1131 1132 if (vid >= MAX_VLAN) 1133 return VIRTIO_NET_ERR; 1134 1135 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD) 1136 n->vlans[vid >> 5] |= (1U << (vid & 0x1f)); 1137 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL) 1138 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f)); 1139 else 1140 return VIRTIO_NET_ERR; 1141 1142 rxfilter_notify(nc); 1143 1144 return VIRTIO_NET_OK; 1145 } 1146 1147 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, 1148 struct iovec *iov, unsigned int iov_cnt) 1149 { 1150 trace_virtio_net_handle_announce(n->announce_timer.round); 1151 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK && 1152 n->status & VIRTIO_NET_S_ANNOUNCE) { 1153 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 1154 if (n->announce_timer.round) { 1155 qemu_announce_timer_step(&n->announce_timer); 1156 } 1157 return VIRTIO_NET_OK; 1158 } else { 1159 return VIRTIO_NET_ERR; 1160 } 1161 } 1162 1163 static void virtio_net_detach_epbf_rss(VirtIONet *n); 1164 1165 static void virtio_net_disable_rss(VirtIONet *n) 1166 { 1167 if (n->rss_data.enabled) { 1168 trace_virtio_net_rss_disable(); 1169 } 1170 n->rss_data.enabled = false; 1171 1172 virtio_net_detach_epbf_rss(n); 1173 } 1174 1175 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd) 1176 { 1177 NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0); 1178 if (nc == NULL || nc->info->set_steering_ebpf == NULL) { 1179 return false; 1180 } 1181 1182 return nc->info->set_steering_ebpf(nc, prog_fd); 1183 } 1184 1185 static void rss_data_to_rss_config(struct VirtioNetRssData *data, 1186 struct EBPFRSSConfig *config) 1187 { 1188 config->redirect = data->redirect; 1189 config->populate_hash = data->populate_hash; 1190 config->hash_types = data->hash_types; 1191 config->indirections_len = data->indirections_len; 1192 config->default_queue = data->default_queue; 1193 } 1194 1195 static bool virtio_net_attach_epbf_rss(VirtIONet *n) 1196 { 1197 struct EBPFRSSConfig config = {}; 1198 1199 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 1200 return false; 1201 } 1202 1203 rss_data_to_rss_config(&n->rss_data, &config); 1204 1205 if (!ebpf_rss_set_all(&n->ebpf_rss, &config, 1206 n->rss_data.indirections_table, n->rss_data.key)) { 1207 return false; 1208 } 1209 1210 if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) { 1211 return false; 1212 } 1213 1214 return true; 1215 } 1216 1217 static void virtio_net_detach_epbf_rss(VirtIONet *n) 1218 { 1219 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1220 } 1221 1222 static bool virtio_net_load_ebpf(VirtIONet *n) 1223 { 1224 if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) { 1225 /* backend does't support steering ebpf */ 1226 return false; 1227 } 1228 1229 return ebpf_rss_load(&n->ebpf_rss); 1230 } 1231 1232 static void virtio_net_unload_ebpf(VirtIONet *n) 1233 { 1234 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1235 ebpf_rss_unload(&n->ebpf_rss); 1236 } 1237 1238 static uint16_t virtio_net_handle_rss(VirtIONet *n, 1239 struct iovec *iov, 1240 unsigned int iov_cnt, 1241 bool do_rss) 1242 { 1243 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1244 struct virtio_net_rss_config cfg; 1245 size_t s, offset = 0, size_get; 1246 uint16_t queue_pairs, i; 1247 struct { 1248 uint16_t us; 1249 uint8_t b; 1250 } QEMU_PACKED temp; 1251 const char *err_msg = ""; 1252 uint32_t err_value = 0; 1253 1254 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) { 1255 err_msg = "RSS is not negotiated"; 1256 goto error; 1257 } 1258 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) { 1259 err_msg = "Hash report is not negotiated"; 1260 goto error; 1261 } 1262 size_get = offsetof(struct virtio_net_rss_config, indirection_table); 1263 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get); 1264 if (s != size_get) { 1265 err_msg = "Short command buffer"; 1266 err_value = (uint32_t)s; 1267 goto error; 1268 } 1269 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types); 1270 n->rss_data.indirections_len = 1271 virtio_lduw_p(vdev, &cfg.indirection_table_mask); 1272 n->rss_data.indirections_len++; 1273 if (!do_rss) { 1274 n->rss_data.indirections_len = 1; 1275 } 1276 if (!is_power_of_2(n->rss_data.indirections_len)) { 1277 err_msg = "Invalid size of indirection table"; 1278 err_value = n->rss_data.indirections_len; 1279 goto error; 1280 } 1281 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) { 1282 err_msg = "Too large indirection table"; 1283 err_value = n->rss_data.indirections_len; 1284 goto error; 1285 } 1286 n->rss_data.default_queue = do_rss ? 1287 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0; 1288 if (n->rss_data.default_queue >= n->max_queue_pairs) { 1289 err_msg = "Invalid default queue"; 1290 err_value = n->rss_data.default_queue; 1291 goto error; 1292 } 1293 offset += size_get; 1294 size_get = sizeof(uint16_t) * n->rss_data.indirections_len; 1295 g_free(n->rss_data.indirections_table); 1296 n->rss_data.indirections_table = g_malloc(size_get); 1297 if (!n->rss_data.indirections_table) { 1298 err_msg = "Can't allocate indirections table"; 1299 err_value = n->rss_data.indirections_len; 1300 goto error; 1301 } 1302 s = iov_to_buf(iov, iov_cnt, offset, 1303 n->rss_data.indirections_table, size_get); 1304 if (s != size_get) { 1305 err_msg = "Short indirection table buffer"; 1306 err_value = (uint32_t)s; 1307 goto error; 1308 } 1309 for (i = 0; i < n->rss_data.indirections_len; ++i) { 1310 uint16_t val = n->rss_data.indirections_table[i]; 1311 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val); 1312 } 1313 offset += size_get; 1314 size_get = sizeof(temp); 1315 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get); 1316 if (s != size_get) { 1317 err_msg = "Can't get queue_pairs"; 1318 err_value = (uint32_t)s; 1319 goto error; 1320 } 1321 queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs; 1322 if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) { 1323 err_msg = "Invalid number of queue_pairs"; 1324 err_value = queue_pairs; 1325 goto error; 1326 } 1327 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) { 1328 err_msg = "Invalid key size"; 1329 err_value = temp.b; 1330 goto error; 1331 } 1332 if (!temp.b && n->rss_data.hash_types) { 1333 err_msg = "No key provided"; 1334 err_value = 0; 1335 goto error; 1336 } 1337 if (!temp.b && !n->rss_data.hash_types) { 1338 virtio_net_disable_rss(n); 1339 return queue_pairs; 1340 } 1341 offset += size_get; 1342 size_get = temp.b; 1343 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get); 1344 if (s != size_get) { 1345 err_msg = "Can get key buffer"; 1346 err_value = (uint32_t)s; 1347 goto error; 1348 } 1349 n->rss_data.enabled = true; 1350 1351 if (!n->rss_data.populate_hash) { 1352 if (!virtio_net_attach_epbf_rss(n)) { 1353 /* EBPF must be loaded for vhost */ 1354 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 1355 warn_report("Can't load eBPF RSS for vhost"); 1356 goto error; 1357 } 1358 /* fallback to software RSS */ 1359 warn_report("Can't load eBPF RSS - fallback to software RSS"); 1360 n->rss_data.enabled_software_rss = true; 1361 } 1362 } else { 1363 /* use software RSS for hash populating */ 1364 /* and detach eBPF if was loaded before */ 1365 virtio_net_detach_epbf_rss(n); 1366 n->rss_data.enabled_software_rss = true; 1367 } 1368 1369 trace_virtio_net_rss_enable(n->rss_data.hash_types, 1370 n->rss_data.indirections_len, 1371 temp.b); 1372 return queue_pairs; 1373 error: 1374 trace_virtio_net_rss_error(err_msg, err_value); 1375 virtio_net_disable_rss(n); 1376 return 0; 1377 } 1378 1379 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, 1380 struct iovec *iov, unsigned int iov_cnt) 1381 { 1382 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1383 uint16_t queue_pairs; 1384 NetClientState *nc = qemu_get_queue(n->nic); 1385 1386 virtio_net_disable_rss(n); 1387 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { 1388 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false); 1389 return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR; 1390 } 1391 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) { 1392 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true); 1393 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { 1394 struct virtio_net_ctrl_mq mq; 1395 size_t s; 1396 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) { 1397 return VIRTIO_NET_ERR; 1398 } 1399 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq)); 1400 if (s != sizeof(mq)) { 1401 return VIRTIO_NET_ERR; 1402 } 1403 queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs); 1404 1405 } else { 1406 return VIRTIO_NET_ERR; 1407 } 1408 1409 if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 1410 queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 1411 queue_pairs > n->max_queue_pairs || 1412 !n->multiqueue) { 1413 return VIRTIO_NET_ERR; 1414 } 1415 1416 /* Avoid changing the number of queue_pairs for vdpa device in 1417 * userspace handler. A future fix is needed to handle the mq 1418 * change in userspace handler with vhost-vdpa. Let's disable 1419 * the mq handling from userspace for now and only allow get 1420 * done through the kernel. Ripples may be seen when falling 1421 * back to userspace, but without doing it qemu process would 1422 * crash on a recursive entry to virtio_net_set_status(). 1423 */ 1424 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 1425 return VIRTIO_NET_ERR; 1426 } 1427 1428 n->curr_queue_pairs = queue_pairs; 1429 /* stop the backend before changing the number of queue_pairs to avoid handling a 1430 * disabled queue */ 1431 virtio_net_set_status(vdev, vdev->status); 1432 virtio_net_set_queue_pairs(n); 1433 1434 return VIRTIO_NET_OK; 1435 } 1436 1437 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) 1438 { 1439 VirtIONet *n = VIRTIO_NET(vdev); 1440 struct virtio_net_ctrl_hdr ctrl; 1441 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1442 VirtQueueElement *elem; 1443 size_t s; 1444 struct iovec *iov, *iov2; 1445 unsigned int iov_cnt; 1446 1447 for (;;) { 1448 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 1449 if (!elem) { 1450 break; 1451 } 1452 if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) || 1453 iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) { 1454 virtio_error(vdev, "virtio-net ctrl missing headers"); 1455 virtqueue_detach_element(vq, elem, 0); 1456 g_free(elem); 1457 break; 1458 } 1459 1460 iov_cnt = elem->out_num; 1461 iov2 = iov = g_memdup2(elem->out_sg, 1462 sizeof(struct iovec) * elem->out_num); 1463 s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl)); 1464 iov_discard_front(&iov, &iov_cnt, sizeof(ctrl)); 1465 if (s != sizeof(ctrl)) { 1466 status = VIRTIO_NET_ERR; 1467 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { 1468 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt); 1469 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { 1470 status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt); 1471 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { 1472 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt); 1473 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { 1474 status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt); 1475 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { 1476 status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt); 1477 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { 1478 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt); 1479 } 1480 1481 s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status)); 1482 assert(s == sizeof(status)); 1483 1484 virtqueue_push(vq, elem, sizeof(status)); 1485 virtio_notify(vdev, vq); 1486 g_free(iov2); 1487 g_free(elem); 1488 } 1489 } 1490 1491 /* RX */ 1492 1493 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) 1494 { 1495 VirtIONet *n = VIRTIO_NET(vdev); 1496 int queue_index = vq2q(virtio_get_queue_index(vq)); 1497 1498 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index)); 1499 } 1500 1501 static bool virtio_net_can_receive(NetClientState *nc) 1502 { 1503 VirtIONet *n = qemu_get_nic_opaque(nc); 1504 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1505 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1506 1507 if (!vdev->vm_running) { 1508 return false; 1509 } 1510 1511 if (nc->queue_index >= n->curr_queue_pairs) { 1512 return false; 1513 } 1514 1515 if (!virtio_queue_ready(q->rx_vq) || 1516 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 1517 return false; 1518 } 1519 1520 return true; 1521 } 1522 1523 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) 1524 { 1525 VirtIONet *n = q->n; 1526 if (virtio_queue_empty(q->rx_vq) || 1527 (n->mergeable_rx_bufs && 1528 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1529 virtio_queue_set_notification(q->rx_vq, 1); 1530 1531 /* To avoid a race condition where the guest has made some buffers 1532 * available after the above check but before notification was 1533 * enabled, check for available buffers again. 1534 */ 1535 if (virtio_queue_empty(q->rx_vq) || 1536 (n->mergeable_rx_bufs && 1537 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1538 return 0; 1539 } 1540 } 1541 1542 virtio_queue_set_notification(q->rx_vq, 0); 1543 return 1; 1544 } 1545 1546 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr) 1547 { 1548 virtio_tswap16s(vdev, &hdr->hdr_len); 1549 virtio_tswap16s(vdev, &hdr->gso_size); 1550 virtio_tswap16s(vdev, &hdr->csum_start); 1551 virtio_tswap16s(vdev, &hdr->csum_offset); 1552 } 1553 1554 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so 1555 * it never finds out that the packets don't have valid checksums. This 1556 * causes dhclient to get upset. Fedora's carried a patch for ages to 1557 * fix this with Xen but it hasn't appeared in an upstream release of 1558 * dhclient yet. 1559 * 1560 * To avoid breaking existing guests, we catch udp packets and add 1561 * checksums. This is terrible but it's better than hacking the guest 1562 * kernels. 1563 * 1564 * N.B. if we introduce a zero-copy API, this operation is no longer free so 1565 * we should provide a mechanism to disable it to avoid polluting the host 1566 * cache. 1567 */ 1568 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr, 1569 uint8_t *buf, size_t size) 1570 { 1571 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */ 1572 (size > 27 && size < 1500) && /* normal sized MTU */ 1573 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */ 1574 (buf[23] == 17) && /* ip.protocol == UDP */ 1575 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */ 1576 net_checksum_calculate(buf, size, CSUM_UDP); 1577 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; 1578 } 1579 } 1580 1581 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt, 1582 const void *buf, size_t size) 1583 { 1584 if (n->has_vnet_hdr) { 1585 /* FIXME this cast is evil */ 1586 void *wbuf = (void *)buf; 1587 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len, 1588 size - n->host_hdr_len); 1589 1590 if (n->needs_vnet_hdr_swap) { 1591 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf); 1592 } 1593 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr)); 1594 } else { 1595 struct virtio_net_hdr hdr = { 1596 .flags = 0, 1597 .gso_type = VIRTIO_NET_HDR_GSO_NONE 1598 }; 1599 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr); 1600 } 1601 } 1602 1603 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) 1604 { 1605 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; 1606 static const uint8_t vlan[] = {0x81, 0x00}; 1607 uint8_t *ptr = (uint8_t *)buf; 1608 int i; 1609 1610 if (n->promisc) 1611 return 1; 1612 1613 ptr += n->host_hdr_len; 1614 1615 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) { 1616 int vid = lduw_be_p(ptr + 14) & 0xfff; 1617 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f)))) 1618 return 0; 1619 } 1620 1621 if (ptr[0] & 1) { // multicast 1622 if (!memcmp(ptr, bcast, sizeof(bcast))) { 1623 return !n->nobcast; 1624 } else if (n->nomulti) { 1625 return 0; 1626 } else if (n->allmulti || n->mac_table.multi_overflow) { 1627 return 1; 1628 } 1629 1630 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 1631 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1632 return 1; 1633 } 1634 } 1635 } else { // unicast 1636 if (n->nouni) { 1637 return 0; 1638 } else if (n->alluni || n->mac_table.uni_overflow) { 1639 return 1; 1640 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) { 1641 return 1; 1642 } 1643 1644 for (i = 0; i < n->mac_table.first_multi; i++) { 1645 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1646 return 1; 1647 } 1648 } 1649 } 1650 1651 return 0; 1652 } 1653 1654 static uint8_t virtio_net_get_hash_type(bool isip4, 1655 bool isip6, 1656 bool isudp, 1657 bool istcp, 1658 uint32_t types) 1659 { 1660 if (isip4) { 1661 if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) { 1662 return NetPktRssIpV4Tcp; 1663 } 1664 if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) { 1665 return NetPktRssIpV4Udp; 1666 } 1667 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 1668 return NetPktRssIpV4; 1669 } 1670 } else if (isip6) { 1671 uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | 1672 VIRTIO_NET_RSS_HASH_TYPE_TCPv6; 1673 1674 if (istcp && (types & mask)) { 1675 return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ? 1676 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp; 1677 } 1678 mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6; 1679 if (isudp && (types & mask)) { 1680 return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ? 1681 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp; 1682 } 1683 mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6; 1684 if (types & mask) { 1685 return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ? 1686 NetPktRssIpV6Ex : NetPktRssIpV6; 1687 } 1688 } 1689 return 0xff; 1690 } 1691 1692 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report, 1693 uint32_t hash) 1694 { 1695 struct virtio_net_hdr_v1_hash *hdr = (void *)buf; 1696 hdr->hash_value = hash; 1697 hdr->hash_report = report; 1698 } 1699 1700 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf, 1701 size_t size) 1702 { 1703 VirtIONet *n = qemu_get_nic_opaque(nc); 1704 unsigned int index = nc->queue_index, new_index = index; 1705 struct NetRxPkt *pkt = n->rx_pkt; 1706 uint8_t net_hash_type; 1707 uint32_t hash; 1708 bool isip4, isip6, isudp, istcp; 1709 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = { 1710 VIRTIO_NET_HASH_REPORT_IPv4, 1711 VIRTIO_NET_HASH_REPORT_TCPv4, 1712 VIRTIO_NET_HASH_REPORT_TCPv6, 1713 VIRTIO_NET_HASH_REPORT_IPv6, 1714 VIRTIO_NET_HASH_REPORT_IPv6_EX, 1715 VIRTIO_NET_HASH_REPORT_TCPv6_EX, 1716 VIRTIO_NET_HASH_REPORT_UDPv4, 1717 VIRTIO_NET_HASH_REPORT_UDPv6, 1718 VIRTIO_NET_HASH_REPORT_UDPv6_EX 1719 }; 1720 1721 net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len, 1722 size - n->host_hdr_len); 1723 net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); 1724 if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) { 1725 istcp = isudp = false; 1726 } 1727 if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) { 1728 istcp = isudp = false; 1729 } 1730 net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp, 1731 n->rss_data.hash_types); 1732 if (net_hash_type > NetPktRssIpV6UdpEx) { 1733 if (n->rss_data.populate_hash) { 1734 virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0); 1735 } 1736 return n->rss_data.redirect ? n->rss_data.default_queue : -1; 1737 } 1738 1739 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key); 1740 1741 if (n->rss_data.populate_hash) { 1742 virtio_set_packet_hash(buf, reports[net_hash_type], hash); 1743 } 1744 1745 if (n->rss_data.redirect) { 1746 new_index = hash & (n->rss_data.indirections_len - 1); 1747 new_index = n->rss_data.indirections_table[new_index]; 1748 } 1749 1750 return (index == new_index) ? -1 : new_index; 1751 } 1752 1753 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, 1754 size_t size, bool no_rss) 1755 { 1756 VirtIONet *n = qemu_get_nic_opaque(nc); 1757 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1758 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1759 VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE]; 1760 size_t lens[VIRTQUEUE_MAX_SIZE]; 1761 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; 1762 struct virtio_net_hdr_mrg_rxbuf mhdr; 1763 unsigned mhdr_cnt = 0; 1764 size_t offset, i, guest_offset, j; 1765 ssize_t err; 1766 1767 if (!virtio_net_can_receive(nc)) { 1768 return -1; 1769 } 1770 1771 if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) { 1772 int index = virtio_net_process_rss(nc, buf, size); 1773 if (index >= 0) { 1774 NetClientState *nc2 = qemu_get_subqueue(n->nic, index); 1775 return virtio_net_receive_rcu(nc2, buf, size, true); 1776 } 1777 } 1778 1779 /* hdr_len refers to the header we supply to the guest */ 1780 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) { 1781 return 0; 1782 } 1783 1784 if (!receive_filter(n, buf, size)) 1785 return size; 1786 1787 offset = i = 0; 1788 1789 while (offset < size) { 1790 VirtQueueElement *elem; 1791 int len, total; 1792 const struct iovec *sg; 1793 1794 total = 0; 1795 1796 if (i == VIRTQUEUE_MAX_SIZE) { 1797 virtio_error(vdev, "virtio-net unexpected long buffer chain"); 1798 err = size; 1799 goto err; 1800 } 1801 1802 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement)); 1803 if (!elem) { 1804 if (i) { 1805 virtio_error(vdev, "virtio-net unexpected empty queue: " 1806 "i %zd mergeable %d offset %zd, size %zd, " 1807 "guest hdr len %zd, host hdr len %zd " 1808 "guest features 0x%" PRIx64, 1809 i, n->mergeable_rx_bufs, offset, size, 1810 n->guest_hdr_len, n->host_hdr_len, 1811 vdev->guest_features); 1812 } 1813 err = -1; 1814 goto err; 1815 } 1816 1817 if (elem->in_num < 1) { 1818 virtio_error(vdev, 1819 "virtio-net receive queue contains no in buffers"); 1820 virtqueue_detach_element(q->rx_vq, elem, 0); 1821 g_free(elem); 1822 err = -1; 1823 goto err; 1824 } 1825 1826 sg = elem->in_sg; 1827 if (i == 0) { 1828 assert(offset == 0); 1829 if (n->mergeable_rx_bufs) { 1830 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg), 1831 sg, elem->in_num, 1832 offsetof(typeof(mhdr), num_buffers), 1833 sizeof(mhdr.num_buffers)); 1834 } 1835 1836 receive_header(n, sg, elem->in_num, buf, size); 1837 if (n->rss_data.populate_hash) { 1838 offset = sizeof(mhdr); 1839 iov_from_buf(sg, elem->in_num, offset, 1840 buf + offset, n->host_hdr_len - sizeof(mhdr)); 1841 } 1842 offset = n->host_hdr_len; 1843 total += n->guest_hdr_len; 1844 guest_offset = n->guest_hdr_len; 1845 } else { 1846 guest_offset = 0; 1847 } 1848 1849 /* copy in packet. ugh */ 1850 len = iov_from_buf(sg, elem->in_num, guest_offset, 1851 buf + offset, size - offset); 1852 total += len; 1853 offset += len; 1854 /* If buffers can't be merged, at this point we 1855 * must have consumed the complete packet. 1856 * Otherwise, drop it. */ 1857 if (!n->mergeable_rx_bufs && offset < size) { 1858 virtqueue_unpop(q->rx_vq, elem, total); 1859 g_free(elem); 1860 err = size; 1861 goto err; 1862 } 1863 1864 elems[i] = elem; 1865 lens[i] = total; 1866 i++; 1867 } 1868 1869 if (mhdr_cnt) { 1870 virtio_stw_p(vdev, &mhdr.num_buffers, i); 1871 iov_from_buf(mhdr_sg, mhdr_cnt, 1872 0, 1873 &mhdr.num_buffers, sizeof mhdr.num_buffers); 1874 } 1875 1876 for (j = 0; j < i; j++) { 1877 /* signal other side */ 1878 virtqueue_fill(q->rx_vq, elems[j], lens[j], j); 1879 g_free(elems[j]); 1880 } 1881 1882 virtqueue_flush(q->rx_vq, i); 1883 virtio_notify(vdev, q->rx_vq); 1884 1885 return size; 1886 1887 err: 1888 for (j = 0; j < i; j++) { 1889 virtqueue_detach_element(q->rx_vq, elems[j], lens[j]); 1890 g_free(elems[j]); 1891 } 1892 1893 return err; 1894 } 1895 1896 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf, 1897 size_t size) 1898 { 1899 RCU_READ_LOCK_GUARD(); 1900 1901 return virtio_net_receive_rcu(nc, buf, size, false); 1902 } 1903 1904 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain, 1905 const uint8_t *buf, 1906 VirtioNetRscUnit *unit) 1907 { 1908 uint16_t ip_hdrlen; 1909 struct ip_header *ip; 1910 1911 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len 1912 + sizeof(struct eth_header)); 1913 unit->ip = (void *)ip; 1914 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2; 1915 unit->ip_plen = &ip->ip_len; 1916 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen); 1917 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 1918 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen; 1919 } 1920 1921 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain, 1922 const uint8_t *buf, 1923 VirtioNetRscUnit *unit) 1924 { 1925 struct ip6_header *ip6; 1926 1927 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len 1928 + sizeof(struct eth_header)); 1929 unit->ip = ip6; 1930 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 1931 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) 1932 + sizeof(struct ip6_header)); 1933 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 1934 1935 /* There is a difference between payload lenght in ipv4 and v6, 1936 ip header is excluded in ipv6 */ 1937 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen; 1938 } 1939 1940 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain, 1941 VirtioNetRscSeg *seg) 1942 { 1943 int ret; 1944 struct virtio_net_hdr_v1 *h; 1945 1946 h = (struct virtio_net_hdr_v1 *)seg->buf; 1947 h->flags = 0; 1948 h->gso_type = VIRTIO_NET_HDR_GSO_NONE; 1949 1950 if (seg->is_coalesced) { 1951 h->rsc.segments = seg->packets; 1952 h->rsc.dup_acks = seg->dup_ack; 1953 h->flags = VIRTIO_NET_HDR_F_RSC_INFO; 1954 if (chain->proto == ETH_P_IP) { 1955 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 1956 } else { 1957 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 1958 } 1959 } 1960 1961 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); 1962 QTAILQ_REMOVE(&chain->buffers, seg, next); 1963 g_free(seg->buf); 1964 g_free(seg); 1965 1966 return ret; 1967 } 1968 1969 static void virtio_net_rsc_purge(void *opq) 1970 { 1971 VirtioNetRscSeg *seg, *rn; 1972 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq; 1973 1974 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) { 1975 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 1976 chain->stat.purge_failed++; 1977 continue; 1978 } 1979 } 1980 1981 chain->stat.timer++; 1982 if (!QTAILQ_EMPTY(&chain->buffers)) { 1983 timer_mod(chain->drain_timer, 1984 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 1985 } 1986 } 1987 1988 static void virtio_net_rsc_cleanup(VirtIONet *n) 1989 { 1990 VirtioNetRscChain *chain, *rn_chain; 1991 VirtioNetRscSeg *seg, *rn_seg; 1992 1993 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) { 1994 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) { 1995 QTAILQ_REMOVE(&chain->buffers, seg, next); 1996 g_free(seg->buf); 1997 g_free(seg); 1998 } 1999 2000 timer_free(chain->drain_timer); 2001 QTAILQ_REMOVE(&n->rsc_chains, chain, next); 2002 g_free(chain); 2003 } 2004 } 2005 2006 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain, 2007 NetClientState *nc, 2008 const uint8_t *buf, size_t size) 2009 { 2010 uint16_t hdr_len; 2011 VirtioNetRscSeg *seg; 2012 2013 hdr_len = chain->n->guest_hdr_len; 2014 seg = g_new(VirtioNetRscSeg, 1); 2015 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header) 2016 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD); 2017 memcpy(seg->buf, buf, size); 2018 seg->size = size; 2019 seg->packets = 1; 2020 seg->dup_ack = 0; 2021 seg->is_coalesced = 0; 2022 seg->nc = nc; 2023 2024 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); 2025 chain->stat.cache++; 2026 2027 switch (chain->proto) { 2028 case ETH_P_IP: 2029 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); 2030 break; 2031 case ETH_P_IPV6: 2032 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); 2033 break; 2034 default: 2035 g_assert_not_reached(); 2036 } 2037 } 2038 2039 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain, 2040 VirtioNetRscSeg *seg, 2041 const uint8_t *buf, 2042 struct tcp_header *n_tcp, 2043 struct tcp_header *o_tcp) 2044 { 2045 uint32_t nack, oack; 2046 uint16_t nwin, owin; 2047 2048 nack = htonl(n_tcp->th_ack); 2049 nwin = htons(n_tcp->th_win); 2050 oack = htonl(o_tcp->th_ack); 2051 owin = htons(o_tcp->th_win); 2052 2053 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) { 2054 chain->stat.ack_out_of_win++; 2055 return RSC_FINAL; 2056 } else if (nack == oack) { 2057 /* duplicated ack or window probe */ 2058 if (nwin == owin) { 2059 /* duplicated ack, add dup ack count due to whql test up to 1 */ 2060 chain->stat.dup_ack++; 2061 return RSC_FINAL; 2062 } else { 2063 /* Coalesce window update */ 2064 o_tcp->th_win = n_tcp->th_win; 2065 chain->stat.win_update++; 2066 return RSC_COALESCE; 2067 } 2068 } else { 2069 /* pure ack, go to 'C', finalize*/ 2070 chain->stat.pure_ack++; 2071 return RSC_FINAL; 2072 } 2073 } 2074 2075 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain, 2076 VirtioNetRscSeg *seg, 2077 const uint8_t *buf, 2078 VirtioNetRscUnit *n_unit) 2079 { 2080 void *data; 2081 uint16_t o_ip_len; 2082 uint32_t nseq, oseq; 2083 VirtioNetRscUnit *o_unit; 2084 2085 o_unit = &seg->unit; 2086 o_ip_len = htons(*o_unit->ip_plen); 2087 nseq = htonl(n_unit->tcp->th_seq); 2088 oseq = htonl(o_unit->tcp->th_seq); 2089 2090 /* out of order or retransmitted. */ 2091 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) { 2092 chain->stat.data_out_of_win++; 2093 return RSC_FINAL; 2094 } 2095 2096 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen; 2097 if (nseq == oseq) { 2098 if ((o_unit->payload == 0) && n_unit->payload) { 2099 /* From no payload to payload, normal case, not a dup ack or etc */ 2100 chain->stat.data_after_pure_ack++; 2101 goto coalesce; 2102 } else { 2103 return virtio_net_rsc_handle_ack(chain, seg, buf, 2104 n_unit->tcp, o_unit->tcp); 2105 } 2106 } else if ((nseq - oseq) != o_unit->payload) { 2107 /* Not a consistent packet, out of order */ 2108 chain->stat.data_out_of_order++; 2109 return RSC_FINAL; 2110 } else { 2111 coalesce: 2112 if ((o_ip_len + n_unit->payload) > chain->max_payload) { 2113 chain->stat.over_size++; 2114 return RSC_FINAL; 2115 } 2116 2117 /* Here comes the right data, the payload length in v4/v6 is different, 2118 so use the field value to update and record the new data len */ 2119 o_unit->payload += n_unit->payload; /* update new data len */ 2120 2121 /* update field in ip header */ 2122 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload); 2123 2124 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced 2125 for windows guest, while this may change the behavior for linux 2126 guest (only if it uses RSC feature). */ 2127 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags; 2128 2129 o_unit->tcp->th_ack = n_unit->tcp->th_ack; 2130 o_unit->tcp->th_win = n_unit->tcp->th_win; 2131 2132 memmove(seg->buf + seg->size, data, n_unit->payload); 2133 seg->size += n_unit->payload; 2134 seg->packets++; 2135 chain->stat.coalesced++; 2136 return RSC_COALESCE; 2137 } 2138 } 2139 2140 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain, 2141 VirtioNetRscSeg *seg, 2142 const uint8_t *buf, size_t size, 2143 VirtioNetRscUnit *unit) 2144 { 2145 struct ip_header *ip1, *ip2; 2146 2147 ip1 = (struct ip_header *)(unit->ip); 2148 ip2 = (struct ip_header *)(seg->unit.ip); 2149 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst) 2150 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2151 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2152 chain->stat.no_match++; 2153 return RSC_NO_MATCH; 2154 } 2155 2156 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2157 } 2158 2159 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain, 2160 VirtioNetRscSeg *seg, 2161 const uint8_t *buf, size_t size, 2162 VirtioNetRscUnit *unit) 2163 { 2164 struct ip6_header *ip1, *ip2; 2165 2166 ip1 = (struct ip6_header *)(unit->ip); 2167 ip2 = (struct ip6_header *)(seg->unit.ip); 2168 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address)) 2169 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address)) 2170 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2171 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2172 chain->stat.no_match++; 2173 return RSC_NO_MATCH; 2174 } 2175 2176 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2177 } 2178 2179 /* Packets with 'SYN' should bypass, other flag should be sent after drain 2180 * to prevent out of order */ 2181 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain, 2182 struct tcp_header *tcp) 2183 { 2184 uint16_t tcp_hdr; 2185 uint16_t tcp_flag; 2186 2187 tcp_flag = htons(tcp->th_offset_flags); 2188 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10; 2189 tcp_flag &= VIRTIO_NET_TCP_FLAG; 2190 if (tcp_flag & TH_SYN) { 2191 chain->stat.tcp_syn++; 2192 return RSC_BYPASS; 2193 } 2194 2195 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) { 2196 chain->stat.tcp_ctrl_drain++; 2197 return RSC_FINAL; 2198 } 2199 2200 if (tcp_hdr > sizeof(struct tcp_header)) { 2201 chain->stat.tcp_all_opt++; 2202 return RSC_FINAL; 2203 } 2204 2205 return RSC_CANDIDATE; 2206 } 2207 2208 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain, 2209 NetClientState *nc, 2210 const uint8_t *buf, size_t size, 2211 VirtioNetRscUnit *unit) 2212 { 2213 int ret; 2214 VirtioNetRscSeg *seg, *nseg; 2215 2216 if (QTAILQ_EMPTY(&chain->buffers)) { 2217 chain->stat.empty_cache++; 2218 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2219 timer_mod(chain->drain_timer, 2220 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 2221 return size; 2222 } 2223 2224 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2225 if (chain->proto == ETH_P_IP) { 2226 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit); 2227 } else { 2228 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit); 2229 } 2230 2231 if (ret == RSC_FINAL) { 2232 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2233 /* Send failed */ 2234 chain->stat.final_failed++; 2235 return 0; 2236 } 2237 2238 /* Send current packet */ 2239 return virtio_net_do_receive(nc, buf, size); 2240 } else if (ret == RSC_NO_MATCH) { 2241 continue; 2242 } else { 2243 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ 2244 seg->is_coalesced = 1; 2245 return size; 2246 } 2247 } 2248 2249 chain->stat.no_match_cache++; 2250 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2251 return size; 2252 } 2253 2254 /* Drain a connection data, this is to avoid out of order segments */ 2255 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain, 2256 NetClientState *nc, 2257 const uint8_t *buf, size_t size, 2258 uint16_t ip_start, uint16_t ip_size, 2259 uint16_t tcp_port) 2260 { 2261 VirtioNetRscSeg *seg, *nseg; 2262 uint32_t ppair1, ppair2; 2263 2264 ppair1 = *(uint32_t *)(buf + tcp_port); 2265 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2266 ppair2 = *(uint32_t *)(seg->buf + tcp_port); 2267 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size) 2268 || (ppair1 != ppair2)) { 2269 continue; 2270 } 2271 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2272 chain->stat.drain_failed++; 2273 } 2274 2275 break; 2276 } 2277 2278 return virtio_net_do_receive(nc, buf, size); 2279 } 2280 2281 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain, 2282 struct ip_header *ip, 2283 const uint8_t *buf, size_t size) 2284 { 2285 uint16_t ip_len; 2286 2287 /* Not an ipv4 packet */ 2288 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) { 2289 chain->stat.ip_option++; 2290 return RSC_BYPASS; 2291 } 2292 2293 /* Don't handle packets with ip option */ 2294 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) { 2295 chain->stat.ip_option++; 2296 return RSC_BYPASS; 2297 } 2298 2299 if (ip->ip_p != IPPROTO_TCP) { 2300 chain->stat.bypass_not_tcp++; 2301 return RSC_BYPASS; 2302 } 2303 2304 /* Don't handle packets with ip fragment */ 2305 if (!(htons(ip->ip_off) & IP_DF)) { 2306 chain->stat.ip_frag++; 2307 return RSC_BYPASS; 2308 } 2309 2310 /* Don't handle packets with ecn flag */ 2311 if (IPTOS_ECN(ip->ip_tos)) { 2312 chain->stat.ip_ecn++; 2313 return RSC_BYPASS; 2314 } 2315 2316 ip_len = htons(ip->ip_len); 2317 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header)) 2318 || ip_len > (size - chain->n->guest_hdr_len - 2319 sizeof(struct eth_header))) { 2320 chain->stat.ip_hacked++; 2321 return RSC_BYPASS; 2322 } 2323 2324 return RSC_CANDIDATE; 2325 } 2326 2327 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain, 2328 NetClientState *nc, 2329 const uint8_t *buf, size_t size) 2330 { 2331 int32_t ret; 2332 uint16_t hdr_len; 2333 VirtioNetRscUnit unit; 2334 2335 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2336 2337 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header) 2338 + sizeof(struct tcp_header))) { 2339 chain->stat.bypass_not_tcp++; 2340 return virtio_net_do_receive(nc, buf, size); 2341 } 2342 2343 virtio_net_rsc_extract_unit4(chain, buf, &unit); 2344 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size) 2345 != RSC_CANDIDATE) { 2346 return virtio_net_do_receive(nc, buf, size); 2347 } 2348 2349 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2350 if (ret == RSC_BYPASS) { 2351 return virtio_net_do_receive(nc, buf, size); 2352 } else if (ret == RSC_FINAL) { 2353 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2354 ((hdr_len + sizeof(struct eth_header)) + 12), 2355 VIRTIO_NET_IP4_ADDR_SIZE, 2356 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)); 2357 } 2358 2359 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2360 } 2361 2362 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain, 2363 struct ip6_header *ip6, 2364 const uint8_t *buf, size_t size) 2365 { 2366 uint16_t ip_len; 2367 2368 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4) 2369 != IP_HEADER_VERSION_6) { 2370 return RSC_BYPASS; 2371 } 2372 2373 /* Both option and protocol is checked in this */ 2374 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) { 2375 chain->stat.bypass_not_tcp++; 2376 return RSC_BYPASS; 2377 } 2378 2379 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 2380 if (ip_len < sizeof(struct tcp_header) || 2381 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header) 2382 - sizeof(struct ip6_header))) { 2383 chain->stat.ip_hacked++; 2384 return RSC_BYPASS; 2385 } 2386 2387 /* Don't handle packets with ecn flag */ 2388 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) { 2389 chain->stat.ip_ecn++; 2390 return RSC_BYPASS; 2391 } 2392 2393 return RSC_CANDIDATE; 2394 } 2395 2396 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc, 2397 const uint8_t *buf, size_t size) 2398 { 2399 int32_t ret; 2400 uint16_t hdr_len; 2401 VirtioNetRscChain *chain; 2402 VirtioNetRscUnit unit; 2403 2404 chain = (VirtioNetRscChain *)opq; 2405 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2406 2407 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header) 2408 + sizeof(tcp_header))) { 2409 return virtio_net_do_receive(nc, buf, size); 2410 } 2411 2412 virtio_net_rsc_extract_unit6(chain, buf, &unit); 2413 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain, 2414 unit.ip, buf, size)) { 2415 return virtio_net_do_receive(nc, buf, size); 2416 } 2417 2418 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2419 if (ret == RSC_BYPASS) { 2420 return virtio_net_do_receive(nc, buf, size); 2421 } else if (ret == RSC_FINAL) { 2422 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2423 ((hdr_len + sizeof(struct eth_header)) + 8), 2424 VIRTIO_NET_IP6_ADDR_SIZE, 2425 hdr_len + sizeof(struct eth_header) 2426 + sizeof(struct ip6_header)); 2427 } 2428 2429 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2430 } 2431 2432 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n, 2433 NetClientState *nc, 2434 uint16_t proto) 2435 { 2436 VirtioNetRscChain *chain; 2437 2438 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) { 2439 return NULL; 2440 } 2441 2442 QTAILQ_FOREACH(chain, &n->rsc_chains, next) { 2443 if (chain->proto == proto) { 2444 return chain; 2445 } 2446 } 2447 2448 chain = g_malloc(sizeof(*chain)); 2449 chain->n = n; 2450 chain->proto = proto; 2451 if (proto == (uint16_t)ETH_P_IP) { 2452 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD; 2453 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2454 } else { 2455 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD; 2456 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2457 } 2458 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST, 2459 virtio_net_rsc_purge, chain); 2460 memset(&chain->stat, 0, sizeof(chain->stat)); 2461 2462 QTAILQ_INIT(&chain->buffers); 2463 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); 2464 2465 return chain; 2466 } 2467 2468 static ssize_t virtio_net_rsc_receive(NetClientState *nc, 2469 const uint8_t *buf, 2470 size_t size) 2471 { 2472 uint16_t proto; 2473 VirtioNetRscChain *chain; 2474 struct eth_header *eth; 2475 VirtIONet *n; 2476 2477 n = qemu_get_nic_opaque(nc); 2478 if (size < (n->host_hdr_len + sizeof(struct eth_header))) { 2479 return virtio_net_do_receive(nc, buf, size); 2480 } 2481 2482 eth = (struct eth_header *)(buf + n->guest_hdr_len); 2483 proto = htons(eth->h_proto); 2484 2485 chain = virtio_net_rsc_lookup_chain(n, nc, proto); 2486 if (chain) { 2487 chain->stat.received++; 2488 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) { 2489 return virtio_net_rsc_receive4(chain, nc, buf, size); 2490 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) { 2491 return virtio_net_rsc_receive6(chain, nc, buf, size); 2492 } 2493 } 2494 return virtio_net_do_receive(nc, buf, size); 2495 } 2496 2497 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, 2498 size_t size) 2499 { 2500 VirtIONet *n = qemu_get_nic_opaque(nc); 2501 if ((n->rsc4_enabled || n->rsc6_enabled)) { 2502 return virtio_net_rsc_receive(nc, buf, size); 2503 } else { 2504 return virtio_net_do_receive(nc, buf, size); 2505 } 2506 } 2507 2508 static int32_t virtio_net_flush_tx(VirtIONetQueue *q); 2509 2510 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) 2511 { 2512 VirtIONet *n = qemu_get_nic_opaque(nc); 2513 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 2514 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2515 2516 virtqueue_push(q->tx_vq, q->async_tx.elem, 0); 2517 virtio_notify(vdev, q->tx_vq); 2518 2519 g_free(q->async_tx.elem); 2520 q->async_tx.elem = NULL; 2521 2522 virtio_queue_set_notification(q->tx_vq, 1); 2523 virtio_net_flush_tx(q); 2524 } 2525 2526 /* TX */ 2527 static int32_t virtio_net_flush_tx(VirtIONetQueue *q) 2528 { 2529 VirtIONet *n = q->n; 2530 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2531 VirtQueueElement *elem; 2532 int32_t num_packets = 0; 2533 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq)); 2534 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2535 return num_packets; 2536 } 2537 2538 if (q->async_tx.elem) { 2539 virtio_queue_set_notification(q->tx_vq, 0); 2540 return num_packets; 2541 } 2542 2543 for (;;) { 2544 ssize_t ret; 2545 unsigned int out_num; 2546 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg; 2547 struct virtio_net_hdr_mrg_rxbuf mhdr; 2548 2549 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement)); 2550 if (!elem) { 2551 break; 2552 } 2553 2554 out_num = elem->out_num; 2555 out_sg = elem->out_sg; 2556 if (out_num < 1) { 2557 virtio_error(vdev, "virtio-net header not in first element"); 2558 virtqueue_detach_element(q->tx_vq, elem, 0); 2559 g_free(elem); 2560 return -EINVAL; 2561 } 2562 2563 if (n->has_vnet_hdr) { 2564 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) < 2565 n->guest_hdr_len) { 2566 virtio_error(vdev, "virtio-net header incorrect"); 2567 virtqueue_detach_element(q->tx_vq, elem, 0); 2568 g_free(elem); 2569 return -EINVAL; 2570 } 2571 if (n->needs_vnet_hdr_swap) { 2572 virtio_net_hdr_swap(vdev, (void *) &mhdr); 2573 sg2[0].iov_base = &mhdr; 2574 sg2[0].iov_len = n->guest_hdr_len; 2575 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, 2576 out_sg, out_num, 2577 n->guest_hdr_len, -1); 2578 if (out_num == VIRTQUEUE_MAX_SIZE) { 2579 goto drop; 2580 } 2581 out_num += 1; 2582 out_sg = sg2; 2583 } 2584 } 2585 /* 2586 * If host wants to see the guest header as is, we can 2587 * pass it on unchanged. Otherwise, copy just the parts 2588 * that host is interested in. 2589 */ 2590 assert(n->host_hdr_len <= n->guest_hdr_len); 2591 if (n->host_hdr_len != n->guest_hdr_len) { 2592 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg), 2593 out_sg, out_num, 2594 0, n->host_hdr_len); 2595 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num, 2596 out_sg, out_num, 2597 n->guest_hdr_len, -1); 2598 out_num = sg_num; 2599 out_sg = sg; 2600 } 2601 2602 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index), 2603 out_sg, out_num, virtio_net_tx_complete); 2604 if (ret == 0) { 2605 virtio_queue_set_notification(q->tx_vq, 0); 2606 q->async_tx.elem = elem; 2607 return -EBUSY; 2608 } 2609 2610 drop: 2611 virtqueue_push(q->tx_vq, elem, 0); 2612 virtio_notify(vdev, q->tx_vq); 2613 g_free(elem); 2614 2615 if (++num_packets >= n->tx_burst) { 2616 break; 2617 } 2618 } 2619 return num_packets; 2620 } 2621 2622 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) 2623 { 2624 VirtIONet *n = VIRTIO_NET(vdev); 2625 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2626 2627 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2628 virtio_net_drop_tx_queue_data(vdev, vq); 2629 return; 2630 } 2631 2632 /* This happens when device was stopped but VCPU wasn't. */ 2633 if (!vdev->vm_running) { 2634 q->tx_waiting = 1; 2635 return; 2636 } 2637 2638 if (q->tx_waiting) { 2639 virtio_queue_set_notification(vq, 1); 2640 timer_del(q->tx_timer); 2641 q->tx_waiting = 0; 2642 if (virtio_net_flush_tx(q) == -EINVAL) { 2643 return; 2644 } 2645 } else { 2646 timer_mod(q->tx_timer, 2647 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2648 q->tx_waiting = 1; 2649 virtio_queue_set_notification(vq, 0); 2650 } 2651 } 2652 2653 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) 2654 { 2655 VirtIONet *n = VIRTIO_NET(vdev); 2656 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2657 2658 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2659 virtio_net_drop_tx_queue_data(vdev, vq); 2660 return; 2661 } 2662 2663 if (unlikely(q->tx_waiting)) { 2664 return; 2665 } 2666 q->tx_waiting = 1; 2667 /* This happens when device was stopped but VCPU wasn't. */ 2668 if (!vdev->vm_running) { 2669 return; 2670 } 2671 virtio_queue_set_notification(vq, 0); 2672 qemu_bh_schedule(q->tx_bh); 2673 } 2674 2675 static void virtio_net_tx_timer(void *opaque) 2676 { 2677 VirtIONetQueue *q = opaque; 2678 VirtIONet *n = q->n; 2679 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2680 /* This happens when device was stopped but BH wasn't. */ 2681 if (!vdev->vm_running) { 2682 /* Make sure tx waiting is set, so we'll run when restarted. */ 2683 assert(q->tx_waiting); 2684 return; 2685 } 2686 2687 q->tx_waiting = 0; 2688 2689 /* Just in case the driver is not ready on more */ 2690 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2691 return; 2692 } 2693 2694 virtio_queue_set_notification(q->tx_vq, 1); 2695 virtio_net_flush_tx(q); 2696 } 2697 2698 static void virtio_net_tx_bh(void *opaque) 2699 { 2700 VirtIONetQueue *q = opaque; 2701 VirtIONet *n = q->n; 2702 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2703 int32_t ret; 2704 2705 /* This happens when device was stopped but BH wasn't. */ 2706 if (!vdev->vm_running) { 2707 /* Make sure tx waiting is set, so we'll run when restarted. */ 2708 assert(q->tx_waiting); 2709 return; 2710 } 2711 2712 q->tx_waiting = 0; 2713 2714 /* Just in case the driver is not ready on more */ 2715 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) { 2716 return; 2717 } 2718 2719 ret = virtio_net_flush_tx(q); 2720 if (ret == -EBUSY || ret == -EINVAL) { 2721 return; /* Notification re-enable handled by tx_complete or device 2722 * broken */ 2723 } 2724 2725 /* If we flush a full burst of packets, assume there are 2726 * more coming and immediately reschedule */ 2727 if (ret >= n->tx_burst) { 2728 qemu_bh_schedule(q->tx_bh); 2729 q->tx_waiting = 1; 2730 return; 2731 } 2732 2733 /* If less than a full burst, re-enable notification and flush 2734 * anything that may have come in while we weren't looking. If 2735 * we find something, assume the guest is still active and reschedule */ 2736 virtio_queue_set_notification(q->tx_vq, 1); 2737 ret = virtio_net_flush_tx(q); 2738 if (ret == -EINVAL) { 2739 return; 2740 } else if (ret > 0) { 2741 virtio_queue_set_notification(q->tx_vq, 0); 2742 qemu_bh_schedule(q->tx_bh); 2743 q->tx_waiting = 1; 2744 } 2745 } 2746 2747 static void virtio_net_add_queue(VirtIONet *n, int index) 2748 { 2749 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2750 2751 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size, 2752 virtio_net_handle_rx); 2753 2754 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { 2755 n->vqs[index].tx_vq = 2756 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2757 virtio_net_handle_tx_timer); 2758 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 2759 virtio_net_tx_timer, 2760 &n->vqs[index]); 2761 } else { 2762 n->vqs[index].tx_vq = 2763 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2764 virtio_net_handle_tx_bh); 2765 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]); 2766 } 2767 2768 n->vqs[index].tx_waiting = 0; 2769 n->vqs[index].n = n; 2770 } 2771 2772 static void virtio_net_del_queue(VirtIONet *n, int index) 2773 { 2774 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2775 VirtIONetQueue *q = &n->vqs[index]; 2776 NetClientState *nc = qemu_get_subqueue(n->nic, index); 2777 2778 qemu_purge_queued_packets(nc); 2779 2780 virtio_del_queue(vdev, index * 2); 2781 if (q->tx_timer) { 2782 timer_free(q->tx_timer); 2783 q->tx_timer = NULL; 2784 } else { 2785 qemu_bh_delete(q->tx_bh); 2786 q->tx_bh = NULL; 2787 } 2788 q->tx_waiting = 0; 2789 virtio_del_queue(vdev, index * 2 + 1); 2790 } 2791 2792 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs) 2793 { 2794 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2795 int old_num_queues = virtio_get_num_queues(vdev); 2796 int new_num_queues = new_max_queue_pairs * 2 + 1; 2797 int i; 2798 2799 assert(old_num_queues >= 3); 2800 assert(old_num_queues % 2 == 1); 2801 2802 if (old_num_queues == new_num_queues) { 2803 return; 2804 } 2805 2806 /* 2807 * We always need to remove and add ctrl vq if 2808 * old_num_queues != new_num_queues. Remove ctrl_vq first, 2809 * and then we only enter one of the following two loops. 2810 */ 2811 virtio_del_queue(vdev, old_num_queues - 1); 2812 2813 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) { 2814 /* new_num_queues < old_num_queues */ 2815 virtio_net_del_queue(n, i / 2); 2816 } 2817 2818 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) { 2819 /* new_num_queues > old_num_queues */ 2820 virtio_net_add_queue(n, i / 2); 2821 } 2822 2823 /* add ctrl_vq last */ 2824 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 2825 } 2826 2827 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) 2828 { 2829 int max = multiqueue ? n->max_queue_pairs : 1; 2830 2831 n->multiqueue = multiqueue; 2832 virtio_net_change_num_queue_pairs(n, max); 2833 2834 virtio_net_set_queue_pairs(n); 2835 } 2836 2837 static int virtio_net_post_load_device(void *opaque, int version_id) 2838 { 2839 VirtIONet *n = opaque; 2840 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2841 int i, link_down; 2842 2843 trace_virtio_net_post_load_device(); 2844 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs, 2845 virtio_vdev_has_feature(vdev, 2846 VIRTIO_F_VERSION_1), 2847 virtio_vdev_has_feature(vdev, 2848 VIRTIO_NET_F_HASH_REPORT)); 2849 2850 /* MAC_TABLE_ENTRIES may be different from the saved image */ 2851 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) { 2852 n->mac_table.in_use = 0; 2853 } 2854 2855 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 2856 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n); 2857 } 2858 2859 /* 2860 * curr_guest_offloads will be later overwritten by the 2861 * virtio_set_features_nocheck call done from the virtio_load. 2862 * Here we make sure it is preserved and restored accordingly 2863 * in the virtio_net_post_load_virtio callback. 2864 */ 2865 n->saved_guest_offloads = n->curr_guest_offloads; 2866 2867 virtio_net_set_queue_pairs(n); 2868 2869 /* Find the first multicast entry in the saved MAC filter */ 2870 for (i = 0; i < n->mac_table.in_use; i++) { 2871 if (n->mac_table.macs[i * ETH_ALEN] & 1) { 2872 break; 2873 } 2874 } 2875 n->mac_table.first_multi = i; 2876 2877 /* nc.link_down can't be migrated, so infer link_down according 2878 * to link status bit in n->status */ 2879 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0; 2880 for (i = 0; i < n->max_queue_pairs; i++) { 2881 qemu_get_subqueue(n->nic, i)->link_down = link_down; 2882 } 2883 2884 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 2885 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 2886 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 2887 QEMU_CLOCK_VIRTUAL, 2888 virtio_net_announce_timer, n); 2889 if (n->announce_timer.round) { 2890 timer_mod(n->announce_timer.tm, 2891 qemu_clock_get_ms(n->announce_timer.type)); 2892 } else { 2893 qemu_announce_timer_del(&n->announce_timer, false); 2894 } 2895 } 2896 2897 if (n->rss_data.enabled) { 2898 n->rss_data.enabled_software_rss = n->rss_data.populate_hash; 2899 if (!n->rss_data.populate_hash) { 2900 if (!virtio_net_attach_epbf_rss(n)) { 2901 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 2902 warn_report("Can't post-load eBPF RSS for vhost"); 2903 } else { 2904 warn_report("Can't post-load eBPF RSS - " 2905 "fallback to software RSS"); 2906 n->rss_data.enabled_software_rss = true; 2907 } 2908 } 2909 } 2910 2911 trace_virtio_net_rss_enable(n->rss_data.hash_types, 2912 n->rss_data.indirections_len, 2913 sizeof(n->rss_data.key)); 2914 } else { 2915 trace_virtio_net_rss_disable(); 2916 } 2917 return 0; 2918 } 2919 2920 static int virtio_net_post_load_virtio(VirtIODevice *vdev) 2921 { 2922 VirtIONet *n = VIRTIO_NET(vdev); 2923 /* 2924 * The actual needed state is now in saved_guest_offloads, 2925 * see virtio_net_post_load_device for detail. 2926 * Restore it back and apply the desired offloads. 2927 */ 2928 n->curr_guest_offloads = n->saved_guest_offloads; 2929 if (peer_has_vnet_hdr(n)) { 2930 virtio_net_apply_guest_offloads(n); 2931 } 2932 2933 return 0; 2934 } 2935 2936 /* tx_waiting field of a VirtIONetQueue */ 2937 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = { 2938 .name = "virtio-net-queue-tx_waiting", 2939 .fields = (VMStateField[]) { 2940 VMSTATE_UINT32(tx_waiting, VirtIONetQueue), 2941 VMSTATE_END_OF_LIST() 2942 }, 2943 }; 2944 2945 static bool max_queue_pairs_gt_1(void *opaque, int version_id) 2946 { 2947 return VIRTIO_NET(opaque)->max_queue_pairs > 1; 2948 } 2949 2950 static bool has_ctrl_guest_offloads(void *opaque, int version_id) 2951 { 2952 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque), 2953 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 2954 } 2955 2956 static bool mac_table_fits(void *opaque, int version_id) 2957 { 2958 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES; 2959 } 2960 2961 static bool mac_table_doesnt_fit(void *opaque, int version_id) 2962 { 2963 return !mac_table_fits(opaque, version_id); 2964 } 2965 2966 /* This temporary type is shared by all the WITH_TMP methods 2967 * although only some fields are used by each. 2968 */ 2969 struct VirtIONetMigTmp { 2970 VirtIONet *parent; 2971 VirtIONetQueue *vqs_1; 2972 uint16_t curr_queue_pairs_1; 2973 uint8_t has_ufo; 2974 uint32_t has_vnet_hdr; 2975 }; 2976 2977 /* The 2nd and subsequent tx_waiting flags are loaded later than 2978 * the 1st entry in the queue_pairs and only if there's more than one 2979 * entry. We use the tmp mechanism to calculate a temporary 2980 * pointer and count and also validate the count. 2981 */ 2982 2983 static int virtio_net_tx_waiting_pre_save(void *opaque) 2984 { 2985 struct VirtIONetMigTmp *tmp = opaque; 2986 2987 tmp->vqs_1 = tmp->parent->vqs + 1; 2988 tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1; 2989 if (tmp->parent->curr_queue_pairs == 0) { 2990 tmp->curr_queue_pairs_1 = 0; 2991 } 2992 2993 return 0; 2994 } 2995 2996 static int virtio_net_tx_waiting_pre_load(void *opaque) 2997 { 2998 struct VirtIONetMigTmp *tmp = opaque; 2999 3000 /* Reuse the pointer setup from save */ 3001 virtio_net_tx_waiting_pre_save(opaque); 3002 3003 if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) { 3004 error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x", 3005 tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs); 3006 3007 return -EINVAL; 3008 } 3009 3010 return 0; /* all good */ 3011 } 3012 3013 static const VMStateDescription vmstate_virtio_net_tx_waiting = { 3014 .name = "virtio-net-tx_waiting", 3015 .pre_load = virtio_net_tx_waiting_pre_load, 3016 .pre_save = virtio_net_tx_waiting_pre_save, 3017 .fields = (VMStateField[]) { 3018 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp, 3019 curr_queue_pairs_1, 3020 vmstate_virtio_net_queue_tx_waiting, 3021 struct VirtIONetQueue), 3022 VMSTATE_END_OF_LIST() 3023 }, 3024 }; 3025 3026 /* the 'has_ufo' flag is just tested; if the incoming stream has the 3027 * flag set we need to check that we have it 3028 */ 3029 static int virtio_net_ufo_post_load(void *opaque, int version_id) 3030 { 3031 struct VirtIONetMigTmp *tmp = opaque; 3032 3033 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) { 3034 error_report("virtio-net: saved image requires TUN_F_UFO support"); 3035 return -EINVAL; 3036 } 3037 3038 return 0; 3039 } 3040 3041 static int virtio_net_ufo_pre_save(void *opaque) 3042 { 3043 struct VirtIONetMigTmp *tmp = opaque; 3044 3045 tmp->has_ufo = tmp->parent->has_ufo; 3046 3047 return 0; 3048 } 3049 3050 static const VMStateDescription vmstate_virtio_net_has_ufo = { 3051 .name = "virtio-net-ufo", 3052 .post_load = virtio_net_ufo_post_load, 3053 .pre_save = virtio_net_ufo_pre_save, 3054 .fields = (VMStateField[]) { 3055 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp), 3056 VMSTATE_END_OF_LIST() 3057 }, 3058 }; 3059 3060 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the 3061 * flag set we need to check that we have it 3062 */ 3063 static int virtio_net_vnet_post_load(void *opaque, int version_id) 3064 { 3065 struct VirtIONetMigTmp *tmp = opaque; 3066 3067 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) { 3068 error_report("virtio-net: saved image requires vnet_hdr=on"); 3069 return -EINVAL; 3070 } 3071 3072 return 0; 3073 } 3074 3075 static int virtio_net_vnet_pre_save(void *opaque) 3076 { 3077 struct VirtIONetMigTmp *tmp = opaque; 3078 3079 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr; 3080 3081 return 0; 3082 } 3083 3084 static const VMStateDescription vmstate_virtio_net_has_vnet = { 3085 .name = "virtio-net-vnet", 3086 .post_load = virtio_net_vnet_post_load, 3087 .pre_save = virtio_net_vnet_pre_save, 3088 .fields = (VMStateField[]) { 3089 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp), 3090 VMSTATE_END_OF_LIST() 3091 }, 3092 }; 3093 3094 static bool virtio_net_rss_needed(void *opaque) 3095 { 3096 return VIRTIO_NET(opaque)->rss_data.enabled; 3097 } 3098 3099 static const VMStateDescription vmstate_virtio_net_rss = { 3100 .name = "virtio-net-device/rss", 3101 .version_id = 1, 3102 .minimum_version_id = 1, 3103 .needed = virtio_net_rss_needed, 3104 .fields = (VMStateField[]) { 3105 VMSTATE_BOOL(rss_data.enabled, VirtIONet), 3106 VMSTATE_BOOL(rss_data.redirect, VirtIONet), 3107 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet), 3108 VMSTATE_UINT32(rss_data.hash_types, VirtIONet), 3109 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet), 3110 VMSTATE_UINT16(rss_data.default_queue, VirtIONet), 3111 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet, 3112 VIRTIO_NET_RSS_MAX_KEY_SIZE), 3113 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet, 3114 rss_data.indirections_len, 0, 3115 vmstate_info_uint16, uint16_t), 3116 VMSTATE_END_OF_LIST() 3117 }, 3118 }; 3119 3120 static const VMStateDescription vmstate_virtio_net_device = { 3121 .name = "virtio-net-device", 3122 .version_id = VIRTIO_NET_VM_VERSION, 3123 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3124 .post_load = virtio_net_post_load_device, 3125 .fields = (VMStateField[]) { 3126 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN), 3127 VMSTATE_STRUCT_POINTER(vqs, VirtIONet, 3128 vmstate_virtio_net_queue_tx_waiting, 3129 VirtIONetQueue), 3130 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet), 3131 VMSTATE_UINT16(status, VirtIONet), 3132 VMSTATE_UINT8(promisc, VirtIONet), 3133 VMSTATE_UINT8(allmulti, VirtIONet), 3134 VMSTATE_UINT32(mac_table.in_use, VirtIONet), 3135 3136 /* Guarded pair: If it fits we load it, else we throw it away 3137 * - can happen if source has a larger MAC table.; post-load 3138 * sets flags in this case. 3139 */ 3140 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet, 3141 0, mac_table_fits, mac_table.in_use, 3142 ETH_ALEN), 3143 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0, 3144 mac_table.in_use, ETH_ALEN), 3145 3146 /* Note: This is an array of uint32's that's always been saved as a 3147 * buffer; hold onto your endiannesses; it's actually used as a bitmap 3148 * but based on the uint. 3149 */ 3150 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3), 3151 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3152 vmstate_virtio_net_has_vnet), 3153 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet), 3154 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet), 3155 VMSTATE_UINT8(alluni, VirtIONet), 3156 VMSTATE_UINT8(nomulti, VirtIONet), 3157 VMSTATE_UINT8(nouni, VirtIONet), 3158 VMSTATE_UINT8(nobcast, VirtIONet), 3159 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3160 vmstate_virtio_net_has_ufo), 3161 VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0, 3162 vmstate_info_uint16_equal, uint16_t), 3163 VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1), 3164 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3165 vmstate_virtio_net_tx_waiting), 3166 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet, 3167 has_ctrl_guest_offloads), 3168 VMSTATE_END_OF_LIST() 3169 }, 3170 .subsections = (const VMStateDescription * []) { 3171 &vmstate_virtio_net_rss, 3172 NULL 3173 } 3174 }; 3175 3176 static NetClientInfo net_virtio_info = { 3177 .type = NET_CLIENT_DRIVER_NIC, 3178 .size = sizeof(NICState), 3179 .can_receive = virtio_net_can_receive, 3180 .receive = virtio_net_receive, 3181 .link_status_changed = virtio_net_set_link_status, 3182 .query_rx_filter = virtio_net_query_rxfilter, 3183 .announce = virtio_net_announce, 3184 }; 3185 3186 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) 3187 { 3188 VirtIONet *n = VIRTIO_NET(vdev); 3189 NetClientState *nc; 3190 assert(n->vhost_started); 3191 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { 3192 /* Must guard against invalid features and bogus queue index 3193 * from being set by malicious guest, or penetrated through 3194 * buggy migration stream. 3195 */ 3196 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3197 qemu_log_mask(LOG_GUEST_ERROR, 3198 "%s: bogus vq index ignored\n", __func__); 3199 return false; 3200 } 3201 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3202 } else { 3203 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3204 } 3205 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); 3206 } 3207 3208 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, 3209 bool mask) 3210 { 3211 VirtIONet *n = VIRTIO_NET(vdev); 3212 NetClientState *nc; 3213 assert(n->vhost_started); 3214 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { 3215 /* Must guard against invalid features and bogus queue index 3216 * from being set by malicious guest, or penetrated through 3217 * buggy migration stream. 3218 */ 3219 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3220 qemu_log_mask(LOG_GUEST_ERROR, 3221 "%s: bogus vq index ignored\n", __func__); 3222 return; 3223 } 3224 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3225 } else { 3226 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3227 } 3228 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), 3229 vdev, idx, mask); 3230 } 3231 3232 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) 3233 { 3234 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC); 3235 3236 n->config_size = virtio_feature_get_config_size(feature_sizes, 3237 host_features); 3238 } 3239 3240 void virtio_net_set_netclient_name(VirtIONet *n, const char *name, 3241 const char *type) 3242 { 3243 /* 3244 * The name can be NULL, the netclient name will be type.x. 3245 */ 3246 assert(type != NULL); 3247 3248 g_free(n->netclient_name); 3249 g_free(n->netclient_type); 3250 n->netclient_name = g_strdup(name); 3251 n->netclient_type = g_strdup(type); 3252 } 3253 3254 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev) 3255 { 3256 HotplugHandler *hotplug_ctrl; 3257 PCIDevice *pci_dev; 3258 Error *err = NULL; 3259 3260 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3261 if (hotplug_ctrl) { 3262 pci_dev = PCI_DEVICE(dev); 3263 pci_dev->partially_hotplugged = true; 3264 hotplug_handler_unplug_request(hotplug_ctrl, dev, &err); 3265 if (err) { 3266 error_report_err(err); 3267 return false; 3268 } 3269 } else { 3270 return false; 3271 } 3272 return true; 3273 } 3274 3275 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev, 3276 Error **errp) 3277 { 3278 Error *err = NULL; 3279 HotplugHandler *hotplug_ctrl; 3280 PCIDevice *pdev = PCI_DEVICE(dev); 3281 BusState *primary_bus; 3282 3283 if (!pdev->partially_hotplugged) { 3284 return true; 3285 } 3286 primary_bus = dev->parent_bus; 3287 if (!primary_bus) { 3288 error_setg(errp, "virtio_net: couldn't find primary bus"); 3289 return false; 3290 } 3291 qdev_set_parent_bus(dev, primary_bus, &error_abort); 3292 qatomic_set(&n->failover_primary_hidden, false); 3293 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3294 if (hotplug_ctrl) { 3295 hotplug_handler_pre_plug(hotplug_ctrl, dev, &err); 3296 if (err) { 3297 goto out; 3298 } 3299 hotplug_handler_plug(hotplug_ctrl, dev, &err); 3300 } 3301 pdev->partially_hotplugged = false; 3302 3303 out: 3304 error_propagate(errp, err); 3305 return !err; 3306 } 3307 3308 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s) 3309 { 3310 bool should_be_hidden; 3311 Error *err = NULL; 3312 DeviceState *dev = failover_find_primary_device(n); 3313 3314 if (!dev) { 3315 return; 3316 } 3317 3318 should_be_hidden = qatomic_read(&n->failover_primary_hidden); 3319 3320 if (migration_in_setup(s) && !should_be_hidden) { 3321 if (failover_unplug_primary(n, dev)) { 3322 vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev); 3323 qapi_event_send_unplug_primary(dev->id); 3324 qatomic_set(&n->failover_primary_hidden, true); 3325 } else { 3326 warn_report("couldn't unplug primary device"); 3327 } 3328 } else if (migration_has_failed(s)) { 3329 /* We already unplugged the device let's plug it back */ 3330 if (!failover_replug_primary(n, dev, &err)) { 3331 if (err) { 3332 error_report_err(err); 3333 } 3334 } 3335 } 3336 } 3337 3338 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data) 3339 { 3340 MigrationState *s = data; 3341 VirtIONet *n = container_of(notifier, VirtIONet, migration_state); 3342 virtio_net_handle_migration_primary(n, s); 3343 } 3344 3345 static bool failover_hide_primary_device(DeviceListener *listener, 3346 const QDict *device_opts, 3347 bool from_json, 3348 Error **errp) 3349 { 3350 VirtIONet *n = container_of(listener, VirtIONet, primary_listener); 3351 const char *standby_id; 3352 3353 if (!device_opts) { 3354 return false; 3355 } 3356 3357 if (!qdict_haskey(device_opts, "failover_pair_id")) { 3358 return false; 3359 } 3360 3361 if (!qdict_haskey(device_opts, "id")) { 3362 error_setg(errp, "Device with failover_pair_id needs to have id"); 3363 return false; 3364 } 3365 3366 standby_id = qdict_get_str(device_opts, "failover_pair_id"); 3367 if (g_strcmp0(standby_id, n->netclient_name) != 0) { 3368 return false; 3369 } 3370 3371 /* 3372 * The hide helper can be called several times for a given device. 3373 * Check there is only one primary for a virtio-net device but 3374 * don't duplicate the qdict several times if it's called for the same 3375 * device. 3376 */ 3377 if (n->primary_opts) { 3378 const char *old, *new; 3379 /* devices with failover_pair_id always have an id */ 3380 old = qdict_get_str(n->primary_opts, "id"); 3381 new = qdict_get_str(device_opts, "id"); 3382 if (strcmp(old, new) != 0) { 3383 error_setg(errp, "Cannot attach more than one primary device to " 3384 "'%s': '%s' and '%s'", n->netclient_name, old, new); 3385 return false; 3386 } 3387 } else { 3388 n->primary_opts = qdict_clone_shallow(device_opts); 3389 n->primary_opts_from_json = from_json; 3390 } 3391 3392 /* failover_primary_hidden is set during feature negotiation */ 3393 return qatomic_read(&n->failover_primary_hidden); 3394 } 3395 3396 static void virtio_net_device_realize(DeviceState *dev, Error **errp) 3397 { 3398 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3399 VirtIONet *n = VIRTIO_NET(dev); 3400 NetClientState *nc; 3401 int i; 3402 3403 if (n->net_conf.mtu) { 3404 n->host_features |= (1ULL << VIRTIO_NET_F_MTU); 3405 } 3406 3407 if (n->net_conf.duplex_str) { 3408 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) { 3409 n->net_conf.duplex = DUPLEX_HALF; 3410 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) { 3411 n->net_conf.duplex = DUPLEX_FULL; 3412 } else { 3413 error_setg(errp, "'duplex' must be 'half' or 'full'"); 3414 return; 3415 } 3416 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3417 } else { 3418 n->net_conf.duplex = DUPLEX_UNKNOWN; 3419 } 3420 3421 if (n->net_conf.speed < SPEED_UNKNOWN) { 3422 error_setg(errp, "'speed' must be between 0 and INT_MAX"); 3423 return; 3424 } 3425 if (n->net_conf.speed >= 0) { 3426 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3427 } 3428 3429 if (n->failover) { 3430 n->primary_listener.hide_device = failover_hide_primary_device; 3431 qatomic_set(&n->failover_primary_hidden, true); 3432 device_listener_register(&n->primary_listener); 3433 n->migration_state.notify = virtio_net_migration_state_notifier; 3434 add_migration_state_change_notifier(&n->migration_state); 3435 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY); 3436 } 3437 3438 virtio_net_set_config_size(n, n->host_features); 3439 virtio_init(vdev, VIRTIO_ID_NET, n->config_size); 3440 3441 /* 3442 * We set a lower limit on RX queue size to what it always was. 3443 * Guests that want a smaller ring can always resize it without 3444 * help from us (using virtio 1 and up). 3445 */ 3446 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || 3447 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || 3448 !is_power_of_2(n->net_conf.rx_queue_size)) { 3449 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " 3450 "must be a power of 2 between %d and %d.", 3451 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, 3452 VIRTQUEUE_MAX_SIZE); 3453 virtio_cleanup(vdev); 3454 return; 3455 } 3456 3457 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || 3458 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE || 3459 !is_power_of_2(n->net_conf.tx_queue_size)) { 3460 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " 3461 "must be a power of 2 between %d and %d", 3462 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, 3463 VIRTQUEUE_MAX_SIZE); 3464 virtio_cleanup(vdev); 3465 return; 3466 } 3467 3468 n->max_ncs = MAX(n->nic_conf.peers.queues, 1); 3469 3470 /* 3471 * Figure out the datapath queue pairs since the backend could 3472 * provide control queue via peers as well. 3473 */ 3474 if (n->nic_conf.peers.queues) { 3475 for (i = 0; i < n->max_ncs; i++) { 3476 if (n->nic_conf.peers.ncs[i]->is_datapath) { 3477 ++n->max_queue_pairs; 3478 } 3479 } 3480 } 3481 n->max_queue_pairs = MAX(n->max_queue_pairs, 1); 3482 3483 if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) { 3484 error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), " 3485 "must be a positive integer less than %d.", 3486 n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2); 3487 virtio_cleanup(vdev); 3488 return; 3489 } 3490 n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs); 3491 n->curr_queue_pairs = 1; 3492 n->tx_timeout = n->net_conf.txtimer; 3493 3494 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer") 3495 && strcmp(n->net_conf.tx, "bh")) { 3496 warn_report("virtio-net: " 3497 "Unknown option tx=%s, valid options: \"timer\" \"bh\"", 3498 n->net_conf.tx); 3499 error_printf("Defaulting to \"bh\""); 3500 } 3501 3502 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), 3503 n->net_conf.tx_queue_size); 3504 3505 for (i = 0; i < n->max_queue_pairs; i++) { 3506 virtio_net_add_queue(n, i); 3507 } 3508 3509 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 3510 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr); 3511 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac)); 3512 n->status = VIRTIO_NET_S_LINK_UP; 3513 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 3514 QEMU_CLOCK_VIRTUAL, 3515 virtio_net_announce_timer, n); 3516 n->announce_timer.round = 0; 3517 3518 if (n->netclient_type) { 3519 /* 3520 * Happen when virtio_net_set_netclient_name has been called. 3521 */ 3522 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3523 n->netclient_type, n->netclient_name, n); 3524 } else { 3525 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3526 object_get_typename(OBJECT(dev)), dev->id, n); 3527 } 3528 3529 for (i = 0; i < n->max_queue_pairs; i++) { 3530 n->nic->ncs[i].do_not_pad = true; 3531 } 3532 3533 peer_test_vnet_hdr(n); 3534 if (peer_has_vnet_hdr(n)) { 3535 for (i = 0; i < n->max_queue_pairs; i++) { 3536 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true); 3537 } 3538 n->host_hdr_len = sizeof(struct virtio_net_hdr); 3539 } else { 3540 n->host_hdr_len = 0; 3541 } 3542 3543 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a); 3544 3545 n->vqs[0].tx_waiting = 0; 3546 n->tx_burst = n->net_conf.txburst; 3547 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0); 3548 n->promisc = 1; /* for compatibility */ 3549 3550 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 3551 3552 n->vlans = g_malloc0(MAX_VLAN >> 3); 3553 3554 nc = qemu_get_queue(n->nic); 3555 nc->rxfilter_notify_enabled = 1; 3556 3557 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 3558 struct virtio_net_config netcfg = {}; 3559 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN); 3560 vhost_net_set_config(get_vhost_net(nc->peer), 3561 (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER); 3562 } 3563 QTAILQ_INIT(&n->rsc_chains); 3564 n->qdev = dev; 3565 3566 net_rx_pkt_init(&n->rx_pkt, false); 3567 3568 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3569 virtio_net_load_ebpf(n); 3570 } 3571 } 3572 3573 static void virtio_net_device_unrealize(DeviceState *dev) 3574 { 3575 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3576 VirtIONet *n = VIRTIO_NET(dev); 3577 int i, max_queue_pairs; 3578 3579 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3580 virtio_net_unload_ebpf(n); 3581 } 3582 3583 /* This will stop vhost backend if appropriate. */ 3584 virtio_net_set_status(vdev, 0); 3585 3586 g_free(n->netclient_name); 3587 n->netclient_name = NULL; 3588 g_free(n->netclient_type); 3589 n->netclient_type = NULL; 3590 3591 g_free(n->mac_table.macs); 3592 g_free(n->vlans); 3593 3594 if (n->failover) { 3595 qobject_unref(n->primary_opts); 3596 device_listener_unregister(&n->primary_listener); 3597 remove_migration_state_change_notifier(&n->migration_state); 3598 } else { 3599 assert(n->primary_opts == NULL); 3600 } 3601 3602 max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 3603 for (i = 0; i < max_queue_pairs; i++) { 3604 virtio_net_del_queue(n, i); 3605 } 3606 /* delete also control vq */ 3607 virtio_del_queue(vdev, max_queue_pairs * 2); 3608 qemu_announce_timer_del(&n->announce_timer, false); 3609 g_free(n->vqs); 3610 qemu_del_nic(n->nic); 3611 virtio_net_rsc_cleanup(n); 3612 g_free(n->rss_data.indirections_table); 3613 net_rx_pkt_uninit(n->rx_pkt); 3614 virtio_cleanup(vdev); 3615 } 3616 3617 static void virtio_net_instance_init(Object *obj) 3618 { 3619 VirtIONet *n = VIRTIO_NET(obj); 3620 3621 /* 3622 * The default config_size is sizeof(struct virtio_net_config). 3623 * Can be overriden with virtio_net_set_config_size. 3624 */ 3625 n->config_size = sizeof(struct virtio_net_config); 3626 device_add_bootindex_property(obj, &n->nic_conf.bootindex, 3627 "bootindex", "/ethernet-phy@0", 3628 DEVICE(n)); 3629 3630 ebpf_rss_init(&n->ebpf_rss); 3631 } 3632 3633 static int virtio_net_pre_save(void *opaque) 3634 { 3635 VirtIONet *n = opaque; 3636 3637 /* At this point, backend must be stopped, otherwise 3638 * it might keep writing to memory. */ 3639 assert(!n->vhost_started); 3640 3641 return 0; 3642 } 3643 3644 static bool primary_unplug_pending(void *opaque) 3645 { 3646 DeviceState *dev = opaque; 3647 DeviceState *primary; 3648 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3649 VirtIONet *n = VIRTIO_NET(vdev); 3650 3651 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 3652 return false; 3653 } 3654 primary = failover_find_primary_device(n); 3655 return primary ? primary->pending_deleted_event : false; 3656 } 3657 3658 static bool dev_unplug_pending(void *opaque) 3659 { 3660 DeviceState *dev = opaque; 3661 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 3662 3663 return vdc->primary_unplug_pending(dev); 3664 } 3665 3666 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev) 3667 { 3668 VirtIONet *n = VIRTIO_NET(vdev); 3669 NetClientState *nc = qemu_get_queue(n->nic); 3670 struct vhost_net *net = get_vhost_net(nc->peer); 3671 return &net->dev; 3672 } 3673 3674 static const VMStateDescription vmstate_virtio_net = { 3675 .name = "virtio-net", 3676 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3677 .version_id = VIRTIO_NET_VM_VERSION, 3678 .fields = (VMStateField[]) { 3679 VMSTATE_VIRTIO_DEVICE, 3680 VMSTATE_END_OF_LIST() 3681 }, 3682 .pre_save = virtio_net_pre_save, 3683 .dev_unplug_pending = dev_unplug_pending, 3684 }; 3685 3686 static Property virtio_net_properties[] = { 3687 DEFINE_PROP_BIT64("csum", VirtIONet, host_features, 3688 VIRTIO_NET_F_CSUM, true), 3689 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features, 3690 VIRTIO_NET_F_GUEST_CSUM, true), 3691 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), 3692 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features, 3693 VIRTIO_NET_F_GUEST_TSO4, true), 3694 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features, 3695 VIRTIO_NET_F_GUEST_TSO6, true), 3696 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features, 3697 VIRTIO_NET_F_GUEST_ECN, true), 3698 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features, 3699 VIRTIO_NET_F_GUEST_UFO, true), 3700 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features, 3701 VIRTIO_NET_F_GUEST_ANNOUNCE, true), 3702 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features, 3703 VIRTIO_NET_F_HOST_TSO4, true), 3704 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features, 3705 VIRTIO_NET_F_HOST_TSO6, true), 3706 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features, 3707 VIRTIO_NET_F_HOST_ECN, true), 3708 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features, 3709 VIRTIO_NET_F_HOST_UFO, true), 3710 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features, 3711 VIRTIO_NET_F_MRG_RXBUF, true), 3712 DEFINE_PROP_BIT64("status", VirtIONet, host_features, 3713 VIRTIO_NET_F_STATUS, true), 3714 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features, 3715 VIRTIO_NET_F_CTRL_VQ, true), 3716 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features, 3717 VIRTIO_NET_F_CTRL_RX, true), 3718 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features, 3719 VIRTIO_NET_F_CTRL_VLAN, true), 3720 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features, 3721 VIRTIO_NET_F_CTRL_RX_EXTRA, true), 3722 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features, 3723 VIRTIO_NET_F_CTRL_MAC_ADDR, true), 3724 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features, 3725 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), 3726 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), 3727 DEFINE_PROP_BIT64("rss", VirtIONet, host_features, 3728 VIRTIO_NET_F_RSS, false), 3729 DEFINE_PROP_BIT64("hash", VirtIONet, host_features, 3730 VIRTIO_NET_F_HASH_REPORT, false), 3731 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features, 3732 VIRTIO_NET_F_RSC_EXT, false), 3733 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout, 3734 VIRTIO_NET_RSC_DEFAULT_INTERVAL), 3735 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf), 3736 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer, 3737 TX_TIMER_INTERVAL), 3738 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), 3739 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), 3740 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, 3741 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), 3742 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, 3743 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), 3744 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), 3745 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, 3746 true), 3747 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN), 3748 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str), 3749 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false), 3750 DEFINE_PROP_END_OF_LIST(), 3751 }; 3752 3753 static void virtio_net_class_init(ObjectClass *klass, void *data) 3754 { 3755 DeviceClass *dc = DEVICE_CLASS(klass); 3756 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 3757 3758 device_class_set_props(dc, virtio_net_properties); 3759 dc->vmsd = &vmstate_virtio_net; 3760 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 3761 vdc->realize = virtio_net_device_realize; 3762 vdc->unrealize = virtio_net_device_unrealize; 3763 vdc->get_config = virtio_net_get_config; 3764 vdc->set_config = virtio_net_set_config; 3765 vdc->get_features = virtio_net_get_features; 3766 vdc->set_features = virtio_net_set_features; 3767 vdc->bad_features = virtio_net_bad_features; 3768 vdc->reset = virtio_net_reset; 3769 vdc->set_status = virtio_net_set_status; 3770 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; 3771 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; 3772 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO); 3773 vdc->post_load = virtio_net_post_load_virtio; 3774 vdc->vmsd = &vmstate_virtio_net_device; 3775 vdc->primary_unplug_pending = primary_unplug_pending; 3776 vdc->get_vhost = virtio_net_get_vhost; 3777 } 3778 3779 static const TypeInfo virtio_net_info = { 3780 .name = TYPE_VIRTIO_NET, 3781 .parent = TYPE_VIRTIO_DEVICE, 3782 .instance_size = sizeof(VirtIONet), 3783 .instance_init = virtio_net_instance_init, 3784 .class_init = virtio_net_class_init, 3785 }; 3786 3787 static void virtio_register_types(void) 3788 { 3789 type_register_static(&virtio_net_info); 3790 } 3791 3792 type_init(virtio_register_types) 3793