1 /* 2 * Virtio Network Device 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qemu/atomic.h" 16 #include "qemu/iov.h" 17 #include "qemu/log.h" 18 #include "qemu/main-loop.h" 19 #include "qemu/module.h" 20 #include "hw/virtio/virtio.h" 21 #include "net/net.h" 22 #include "net/checksum.h" 23 #include "net/tap.h" 24 #include "qemu/error-report.h" 25 #include "qemu/timer.h" 26 #include "qemu/option.h" 27 #include "qemu/option_int.h" 28 #include "qemu/config-file.h" 29 #include "qapi/qmp/qdict.h" 30 #include "hw/virtio/virtio-net.h" 31 #include "net/vhost_net.h" 32 #include "net/announce.h" 33 #include "hw/virtio/virtio-bus.h" 34 #include "qapi/error.h" 35 #include "qapi/qapi-events-net.h" 36 #include "hw/qdev-properties.h" 37 #include "qapi/qapi-types-migration.h" 38 #include "qapi/qapi-events-migration.h" 39 #include "hw/virtio/virtio-access.h" 40 #include "migration/misc.h" 41 #include "standard-headers/linux/ethtool.h" 42 #include "sysemu/sysemu.h" 43 #include "trace.h" 44 #include "monitor/qdev.h" 45 #include "hw/pci/pci.h" 46 #include "net_rx_pkt.h" 47 #include "hw/virtio/vhost.h" 48 #include "sysemu/qtest.h" 49 50 #define VIRTIO_NET_VM_VERSION 11 51 52 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ 53 54 /* previously fixed value */ 55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 57 58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */ 59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 61 62 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ 63 64 #define VIRTIO_NET_TCP_FLAG 0x3F 65 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000 66 67 /* IPv4 max payload, 16 bits in the header */ 68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header)) 69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535 70 71 /* header length value in ip header without option */ 72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5 73 74 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ 75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD 76 77 /* Purge coalesced packets timer interval, This value affects the performance 78 a lot, and should be tuned carefully, '300000'(300us) is the recommended 79 value to pass the WHQL test, '50000' can gain 2x netperf throughput with 80 tso/gso/gro 'off'. */ 81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000 82 83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \ 84 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \ 85 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \ 86 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \ 87 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \ 88 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \ 89 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \ 90 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \ 91 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) 92 93 static const VirtIOFeature feature_sizes[] = { 94 {.flags = 1ULL << VIRTIO_NET_F_MAC, 95 .end = endof(struct virtio_net_config, mac)}, 96 {.flags = 1ULL << VIRTIO_NET_F_STATUS, 97 .end = endof(struct virtio_net_config, status)}, 98 {.flags = 1ULL << VIRTIO_NET_F_MQ, 99 .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, 100 {.flags = 1ULL << VIRTIO_NET_F_MTU, 101 .end = endof(struct virtio_net_config, mtu)}, 102 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, 103 .end = endof(struct virtio_net_config, duplex)}, 104 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT), 105 .end = endof(struct virtio_net_config, supported_hash_types)}, 106 {} 107 }; 108 109 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc) 110 { 111 VirtIONet *n = qemu_get_nic_opaque(nc); 112 113 return &n->vqs[nc->queue_index]; 114 } 115 116 static int vq2q(int queue_index) 117 { 118 return queue_index / 2; 119 } 120 121 /* TODO 122 * - we could suppress RX interrupt if we were so inclined. 123 */ 124 125 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) 126 { 127 VirtIONet *n = VIRTIO_NET(vdev); 128 struct virtio_net_config netcfg; 129 NetClientState *nc = qemu_get_queue(n->nic); 130 static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } }; 131 132 int ret = 0; 133 memset(&netcfg, 0 , sizeof(struct virtio_net_config)); 134 virtio_stw_p(vdev, &netcfg.status, n->status); 135 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs); 136 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); 137 memcpy(netcfg.mac, n->mac, ETH_ALEN); 138 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); 139 netcfg.duplex = n->net_conf.duplex; 140 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE; 141 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length, 142 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ? 143 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1); 144 virtio_stl_p(vdev, &netcfg.supported_hash_types, 145 VIRTIO_NET_RSS_SUPPORTED_HASHES); 146 memcpy(config, &netcfg, n->config_size); 147 148 /* 149 * Is this VDPA? No peer means not VDPA: there's no way to 150 * disconnect/reconnect a VDPA peer. 151 */ 152 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 153 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg, 154 n->config_size); 155 if (ret != -1) { 156 /* 157 * Some NIC/kernel combinations present 0 as the mac address. As 158 * that is not a legal address, try to proceed with the 159 * address from the QEMU command line in the hope that the 160 * address has been configured correctly elsewhere - just not 161 * reported by the device. 162 */ 163 if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) { 164 info_report("Zero hardware mac address detected. Ignoring."); 165 memcpy(netcfg.mac, n->mac, ETH_ALEN); 166 } 167 memcpy(config, &netcfg, n->config_size); 168 } 169 } 170 } 171 172 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) 173 { 174 VirtIONet *n = VIRTIO_NET(vdev); 175 struct virtio_net_config netcfg = {}; 176 NetClientState *nc = qemu_get_queue(n->nic); 177 178 memcpy(&netcfg, config, n->config_size); 179 180 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) && 181 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) && 182 memcmp(netcfg.mac, n->mac, ETH_ALEN)) { 183 memcpy(n->mac, netcfg.mac, ETH_ALEN); 184 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 185 } 186 187 /* 188 * Is this VDPA? No peer means not VDPA: there's no way to 189 * disconnect/reconnect a VDPA peer. 190 */ 191 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 192 vhost_net_set_config(get_vhost_net(nc->peer), 193 (uint8_t *)&netcfg, 0, n->config_size, 194 VHOST_SET_CONFIG_TYPE_MASTER); 195 } 196 } 197 198 static bool virtio_net_started(VirtIONet *n, uint8_t status) 199 { 200 VirtIODevice *vdev = VIRTIO_DEVICE(n); 201 return (status & VIRTIO_CONFIG_S_DRIVER_OK) && 202 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running; 203 } 204 205 static void virtio_net_announce_notify(VirtIONet *net) 206 { 207 VirtIODevice *vdev = VIRTIO_DEVICE(net); 208 trace_virtio_net_announce_notify(); 209 210 net->status |= VIRTIO_NET_S_ANNOUNCE; 211 virtio_notify_config(vdev); 212 } 213 214 static void virtio_net_announce_timer(void *opaque) 215 { 216 VirtIONet *n = opaque; 217 trace_virtio_net_announce_timer(n->announce_timer.round); 218 219 n->announce_timer.round--; 220 virtio_net_announce_notify(n); 221 } 222 223 static void virtio_net_announce(NetClientState *nc) 224 { 225 VirtIONet *n = qemu_get_nic_opaque(nc); 226 VirtIODevice *vdev = VIRTIO_DEVICE(n); 227 228 /* 229 * Make sure the virtio migration announcement timer isn't running 230 * If it is, let it trigger announcement so that we do not cause 231 * confusion. 232 */ 233 if (n->announce_timer.round) { 234 return; 235 } 236 237 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 238 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 239 virtio_net_announce_notify(n); 240 } 241 } 242 243 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) 244 { 245 VirtIODevice *vdev = VIRTIO_DEVICE(n); 246 NetClientState *nc = qemu_get_queue(n->nic); 247 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 248 int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? 249 n->max_ncs - n->max_queue_pairs : 0; 250 251 if (!get_vhost_net(nc->peer)) { 252 return; 253 } 254 255 if ((virtio_net_started(n, status) && !nc->peer->link_down) == 256 !!n->vhost_started) { 257 return; 258 } 259 if (!n->vhost_started) { 260 int r, i; 261 262 if (n->needs_vnet_hdr_swap) { 263 error_report("backend does not support %s vnet headers; " 264 "falling back on userspace virtio", 265 virtio_is_big_endian(vdev) ? "BE" : "LE"); 266 return; 267 } 268 269 /* Any packets outstanding? Purge them to avoid touching rings 270 * when vhost is running. 271 */ 272 for (i = 0; i < queue_pairs; i++) { 273 NetClientState *qnc = qemu_get_subqueue(n->nic, i); 274 275 /* Purge both directions: TX and RX. */ 276 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc); 277 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer); 278 } 279 280 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) { 281 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu); 282 if (r < 0) { 283 error_report("%uBytes MTU not supported by the backend", 284 n->net_conf.mtu); 285 286 return; 287 } 288 } 289 290 n->vhost_started = 1; 291 r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq); 292 if (r < 0) { 293 error_report("unable to start vhost net: %d: " 294 "falling back on userspace virtio", -r); 295 n->vhost_started = 0; 296 } 297 } else { 298 vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq); 299 n->vhost_started = 0; 300 } 301 } 302 303 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev, 304 NetClientState *peer, 305 bool enable) 306 { 307 if (virtio_is_big_endian(vdev)) { 308 return qemu_set_vnet_be(peer, enable); 309 } else { 310 return qemu_set_vnet_le(peer, enable); 311 } 312 } 313 314 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs, 315 int queue_pairs, bool enable) 316 { 317 int i; 318 319 for (i = 0; i < queue_pairs; i++) { 320 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 && 321 enable) { 322 while (--i >= 0) { 323 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false); 324 } 325 326 return true; 327 } 328 } 329 330 return false; 331 } 332 333 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status) 334 { 335 VirtIODevice *vdev = VIRTIO_DEVICE(n); 336 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 337 338 if (virtio_net_started(n, status)) { 339 /* Before using the device, we tell the network backend about the 340 * endianness to use when parsing vnet headers. If the backend 341 * can't do it, we fallback onto fixing the headers in the core 342 * virtio-net code. 343 */ 344 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs, 345 queue_pairs, true); 346 } else if (virtio_net_started(n, vdev->status)) { 347 /* After using the device, we need to reset the network backend to 348 * the default (guest native endianness), otherwise the guest may 349 * lose network connectivity if it is rebooted into a different 350 * endianness. 351 */ 352 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false); 353 } 354 } 355 356 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq) 357 { 358 unsigned int dropped = virtqueue_drop_all(vq); 359 if (dropped) { 360 virtio_notify(vdev, vq); 361 } 362 } 363 364 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) 365 { 366 VirtIONet *n = VIRTIO_NET(vdev); 367 VirtIONetQueue *q; 368 int i; 369 uint8_t queue_status; 370 371 virtio_net_vnet_endian_status(n, status); 372 virtio_net_vhost_status(n, status); 373 374 for (i = 0; i < n->max_queue_pairs; i++) { 375 NetClientState *ncs = qemu_get_subqueue(n->nic, i); 376 bool queue_started; 377 q = &n->vqs[i]; 378 379 if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) { 380 queue_status = 0; 381 } else { 382 queue_status = status; 383 } 384 queue_started = 385 virtio_net_started(n, queue_status) && !n->vhost_started; 386 387 if (queue_started) { 388 qemu_flush_queued_packets(ncs); 389 } 390 391 if (!q->tx_waiting) { 392 continue; 393 } 394 395 if (queue_started) { 396 if (q->tx_timer) { 397 timer_mod(q->tx_timer, 398 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 399 } else { 400 qemu_bh_schedule(q->tx_bh); 401 } 402 } else { 403 if (q->tx_timer) { 404 timer_del(q->tx_timer); 405 } else { 406 qemu_bh_cancel(q->tx_bh); 407 } 408 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 && 409 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) && 410 vdev->vm_running) { 411 /* if tx is waiting we are likely have some packets in tx queue 412 * and disabled notification */ 413 q->tx_waiting = 0; 414 virtio_queue_set_notification(q->tx_vq, 1); 415 virtio_net_drop_tx_queue_data(vdev, q->tx_vq); 416 } 417 } 418 } 419 } 420 421 static void virtio_net_set_link_status(NetClientState *nc) 422 { 423 VirtIONet *n = qemu_get_nic_opaque(nc); 424 VirtIODevice *vdev = VIRTIO_DEVICE(n); 425 uint16_t old_status = n->status; 426 427 if (nc->link_down) 428 n->status &= ~VIRTIO_NET_S_LINK_UP; 429 else 430 n->status |= VIRTIO_NET_S_LINK_UP; 431 432 if (n->status != old_status) 433 virtio_notify_config(vdev); 434 435 virtio_net_set_status(vdev, vdev->status); 436 } 437 438 static void rxfilter_notify(NetClientState *nc) 439 { 440 VirtIONet *n = qemu_get_nic_opaque(nc); 441 442 if (nc->rxfilter_notify_enabled) { 443 char *path = object_get_canonical_path(OBJECT(n->qdev)); 444 qapi_event_send_nic_rx_filter_changed(!!n->netclient_name, 445 n->netclient_name, path); 446 g_free(path); 447 448 /* disable event notification to avoid events flooding */ 449 nc->rxfilter_notify_enabled = 0; 450 } 451 } 452 453 static intList *get_vlan_table(VirtIONet *n) 454 { 455 intList *list; 456 int i, j; 457 458 list = NULL; 459 for (i = 0; i < MAX_VLAN >> 5; i++) { 460 for (j = 0; n->vlans[i] && j <= 0x1f; j++) { 461 if (n->vlans[i] & (1U << j)) { 462 QAPI_LIST_PREPEND(list, (i << 5) + j); 463 } 464 } 465 } 466 467 return list; 468 } 469 470 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc) 471 { 472 VirtIONet *n = qemu_get_nic_opaque(nc); 473 VirtIODevice *vdev = VIRTIO_DEVICE(n); 474 RxFilterInfo *info; 475 strList *str_list; 476 int i; 477 478 info = g_malloc0(sizeof(*info)); 479 info->name = g_strdup(nc->name); 480 info->promiscuous = n->promisc; 481 482 if (n->nouni) { 483 info->unicast = RX_STATE_NONE; 484 } else if (n->alluni) { 485 info->unicast = RX_STATE_ALL; 486 } else { 487 info->unicast = RX_STATE_NORMAL; 488 } 489 490 if (n->nomulti) { 491 info->multicast = RX_STATE_NONE; 492 } else if (n->allmulti) { 493 info->multicast = RX_STATE_ALL; 494 } else { 495 info->multicast = RX_STATE_NORMAL; 496 } 497 498 info->broadcast_allowed = n->nobcast; 499 info->multicast_overflow = n->mac_table.multi_overflow; 500 info->unicast_overflow = n->mac_table.uni_overflow; 501 502 info->main_mac = qemu_mac_strdup_printf(n->mac); 503 504 str_list = NULL; 505 for (i = 0; i < n->mac_table.first_multi; i++) { 506 QAPI_LIST_PREPEND(str_list, 507 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 508 } 509 info->unicast_table = str_list; 510 511 str_list = NULL; 512 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 513 QAPI_LIST_PREPEND(str_list, 514 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 515 } 516 info->multicast_table = str_list; 517 info->vlan_table = get_vlan_table(n); 518 519 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) { 520 info->vlan = RX_STATE_ALL; 521 } else if (!info->vlan_table) { 522 info->vlan = RX_STATE_NONE; 523 } else { 524 info->vlan = RX_STATE_NORMAL; 525 } 526 527 /* enable event notification after query */ 528 nc->rxfilter_notify_enabled = 1; 529 530 return info; 531 } 532 533 static void virtio_net_reset(VirtIODevice *vdev) 534 { 535 VirtIONet *n = VIRTIO_NET(vdev); 536 int i; 537 538 /* Reset back to compatibility mode */ 539 n->promisc = 1; 540 n->allmulti = 0; 541 n->alluni = 0; 542 n->nomulti = 0; 543 n->nouni = 0; 544 n->nobcast = 0; 545 /* multiqueue is disabled by default */ 546 n->curr_queue_pairs = 1; 547 timer_del(n->announce_timer.tm); 548 n->announce_timer.round = 0; 549 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 550 551 /* Flush any MAC and VLAN filter table state */ 552 n->mac_table.in_use = 0; 553 n->mac_table.first_multi = 0; 554 n->mac_table.multi_overflow = 0; 555 n->mac_table.uni_overflow = 0; 556 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN); 557 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac)); 558 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 559 memset(n->vlans, 0, MAX_VLAN >> 3); 560 561 /* Flush any async TX */ 562 for (i = 0; i < n->max_queue_pairs; i++) { 563 NetClientState *nc = qemu_get_subqueue(n->nic, i); 564 565 if (nc->peer) { 566 qemu_flush_or_purge_queued_packets(nc->peer, true); 567 assert(!virtio_net_get_subqueue(nc)->async_tx.elem); 568 } 569 } 570 } 571 572 static void peer_test_vnet_hdr(VirtIONet *n) 573 { 574 NetClientState *nc = qemu_get_queue(n->nic); 575 if (!nc->peer) { 576 return; 577 } 578 579 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer); 580 } 581 582 static int peer_has_vnet_hdr(VirtIONet *n) 583 { 584 return n->has_vnet_hdr; 585 } 586 587 static int peer_has_ufo(VirtIONet *n) 588 { 589 if (!peer_has_vnet_hdr(n)) 590 return 0; 591 592 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer); 593 594 return n->has_ufo; 595 } 596 597 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, 598 int version_1, int hash_report) 599 { 600 int i; 601 NetClientState *nc; 602 603 n->mergeable_rx_bufs = mergeable_rx_bufs; 604 605 if (version_1) { 606 n->guest_hdr_len = hash_report ? 607 sizeof(struct virtio_net_hdr_v1_hash) : 608 sizeof(struct virtio_net_hdr_mrg_rxbuf); 609 n->rss_data.populate_hash = !!hash_report; 610 } else { 611 n->guest_hdr_len = n->mergeable_rx_bufs ? 612 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 613 sizeof(struct virtio_net_hdr); 614 } 615 616 for (i = 0; i < n->max_queue_pairs; i++) { 617 nc = qemu_get_subqueue(n->nic, i); 618 619 if (peer_has_vnet_hdr(n) && 620 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) { 621 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len); 622 n->host_hdr_len = n->guest_hdr_len; 623 } 624 } 625 } 626 627 static int virtio_net_max_tx_queue_size(VirtIONet *n) 628 { 629 NetClientState *peer = n->nic_conf.peers.ncs[0]; 630 631 /* 632 * Backends other than vhost-user or vhost-vdpa don't support max queue 633 * size. 634 */ 635 if (!peer) { 636 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 637 } 638 639 switch(peer->info->type) { 640 case NET_CLIENT_DRIVER_VHOST_USER: 641 case NET_CLIENT_DRIVER_VHOST_VDPA: 642 return VIRTQUEUE_MAX_SIZE; 643 default: 644 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 645 }; 646 } 647 648 static int peer_attach(VirtIONet *n, int index) 649 { 650 NetClientState *nc = qemu_get_subqueue(n->nic, index); 651 652 if (!nc->peer) { 653 return 0; 654 } 655 656 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 657 vhost_set_vring_enable(nc->peer, 1); 658 } 659 660 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 661 return 0; 662 } 663 664 if (n->max_queue_pairs == 1) { 665 return 0; 666 } 667 668 return tap_enable(nc->peer); 669 } 670 671 static int peer_detach(VirtIONet *n, int index) 672 { 673 NetClientState *nc = qemu_get_subqueue(n->nic, index); 674 675 if (!nc->peer) { 676 return 0; 677 } 678 679 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 680 vhost_set_vring_enable(nc->peer, 0); 681 } 682 683 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 684 return 0; 685 } 686 687 return tap_disable(nc->peer); 688 } 689 690 static void virtio_net_set_queue_pairs(VirtIONet *n) 691 { 692 int i; 693 int r; 694 695 if (n->nic->peer_deleted) { 696 return; 697 } 698 699 for (i = 0; i < n->max_queue_pairs; i++) { 700 if (i < n->curr_queue_pairs) { 701 r = peer_attach(n, i); 702 assert(!r); 703 } else { 704 r = peer_detach(n, i); 705 assert(!r); 706 } 707 } 708 } 709 710 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue); 711 712 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, 713 Error **errp) 714 { 715 VirtIONet *n = VIRTIO_NET(vdev); 716 NetClientState *nc = qemu_get_queue(n->nic); 717 718 /* Firstly sync all virtio-net possible supported features */ 719 features |= n->host_features; 720 721 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 722 723 if (!peer_has_vnet_hdr(n)) { 724 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); 725 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); 726 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); 727 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); 728 729 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); 730 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); 731 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); 732 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); 733 734 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); 735 } 736 737 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { 738 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); 739 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); 740 } 741 742 if (!get_vhost_net(nc->peer)) { 743 return features; 744 } 745 746 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 747 virtio_clear_feature(&features, VIRTIO_NET_F_RSS); 748 } 749 features = vhost_net_get_features(get_vhost_net(nc->peer), features); 750 vdev->backend_features = features; 751 752 if (n->mtu_bypass_backend && 753 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { 754 features |= (1ULL << VIRTIO_NET_F_MTU); 755 } 756 757 return features; 758 } 759 760 static uint64_t virtio_net_bad_features(VirtIODevice *vdev) 761 { 762 uint64_t features = 0; 763 764 /* Linux kernel 2.6.25. It understood MAC (as everyone must), 765 * but also these: */ 766 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 767 virtio_add_feature(&features, VIRTIO_NET_F_CSUM); 768 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4); 769 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6); 770 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN); 771 772 return features; 773 } 774 775 static void virtio_net_apply_guest_offloads(VirtIONet *n) 776 { 777 qemu_set_offload(qemu_get_queue(n->nic)->peer, 778 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)), 779 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)), 780 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)), 781 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)), 782 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO))); 783 } 784 785 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features) 786 { 787 static const uint64_t guest_offloads_mask = 788 (1ULL << VIRTIO_NET_F_GUEST_CSUM) | 789 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | 790 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | 791 (1ULL << VIRTIO_NET_F_GUEST_ECN) | 792 (1ULL << VIRTIO_NET_F_GUEST_UFO); 793 794 return guest_offloads_mask & features; 795 } 796 797 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) 798 { 799 VirtIODevice *vdev = VIRTIO_DEVICE(n); 800 return virtio_net_guest_offloads_by_features(vdev->guest_features); 801 } 802 803 typedef struct { 804 VirtIONet *n; 805 DeviceState *dev; 806 } FailoverDevice; 807 808 /** 809 * Set the failover primary device 810 * 811 * @opaque: FailoverId to setup 812 * @opts: opts for device we are handling 813 * @errp: returns an error if this function fails 814 */ 815 static int failover_set_primary(DeviceState *dev, void *opaque) 816 { 817 FailoverDevice *fdev = opaque; 818 PCIDevice *pci_dev = (PCIDevice *) 819 object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE); 820 821 if (!pci_dev) { 822 return 0; 823 } 824 825 if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) { 826 fdev->dev = dev; 827 return 1; 828 } 829 830 return 0; 831 } 832 833 /** 834 * Find the primary device for this failover virtio-net 835 * 836 * @n: VirtIONet device 837 * @errp: returns an error if this function fails 838 */ 839 static DeviceState *failover_find_primary_device(VirtIONet *n) 840 { 841 FailoverDevice fdev = { 842 .n = n, 843 }; 844 845 qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL, 846 NULL, NULL, &fdev); 847 return fdev.dev; 848 } 849 850 static void failover_add_primary(VirtIONet *n, Error **errp) 851 { 852 Error *err = NULL; 853 DeviceState *dev = failover_find_primary_device(n); 854 855 if (dev) { 856 return; 857 } 858 859 if (!n->primary_opts) { 860 error_setg(errp, "Primary device not found"); 861 error_append_hint(errp, "Virtio-net failover will not work. Make " 862 "sure primary device has parameter" 863 " failover_pair_id=%s\n", n->netclient_name); 864 return; 865 } 866 867 dev = qdev_device_add_from_qdict(n->primary_opts, 868 n->primary_opts_from_json, 869 &err); 870 if (err) { 871 qobject_unref(n->primary_opts); 872 n->primary_opts = NULL; 873 } else { 874 object_unref(OBJECT(dev)); 875 } 876 error_propagate(errp, err); 877 } 878 879 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) 880 { 881 VirtIONet *n = VIRTIO_NET(vdev); 882 Error *err = NULL; 883 int i; 884 885 if (n->mtu_bypass_backend && 886 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) { 887 features &= ~(1ULL << VIRTIO_NET_F_MTU); 888 } 889 890 virtio_net_set_multiqueue(n, 891 virtio_has_feature(features, VIRTIO_NET_F_RSS) || 892 virtio_has_feature(features, VIRTIO_NET_F_MQ)); 893 894 virtio_net_set_mrg_rx_bufs(n, 895 virtio_has_feature(features, 896 VIRTIO_NET_F_MRG_RXBUF), 897 virtio_has_feature(features, 898 VIRTIO_F_VERSION_1), 899 virtio_has_feature(features, 900 VIRTIO_NET_F_HASH_REPORT)); 901 902 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 903 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4); 904 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 905 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6); 906 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS); 907 908 if (n->has_vnet_hdr) { 909 n->curr_guest_offloads = 910 virtio_net_guest_offloads_by_features(features); 911 virtio_net_apply_guest_offloads(n); 912 } 913 914 for (i = 0; i < n->max_queue_pairs; i++) { 915 NetClientState *nc = qemu_get_subqueue(n->nic, i); 916 917 if (!get_vhost_net(nc->peer)) { 918 continue; 919 } 920 vhost_net_ack_features(get_vhost_net(nc->peer), features); 921 } 922 923 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { 924 memset(n->vlans, 0, MAX_VLAN >> 3); 925 } else { 926 memset(n->vlans, 0xff, MAX_VLAN >> 3); 927 } 928 929 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) { 930 qapi_event_send_failover_negotiated(n->netclient_name); 931 qatomic_set(&n->failover_primary_hidden, false); 932 failover_add_primary(n, &err); 933 if (err) { 934 if (!qtest_enabled()) { 935 warn_report_err(err); 936 } else { 937 error_free(err); 938 } 939 } 940 } 941 } 942 943 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd, 944 struct iovec *iov, unsigned int iov_cnt) 945 { 946 uint8_t on; 947 size_t s; 948 NetClientState *nc = qemu_get_queue(n->nic); 949 950 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on)); 951 if (s != sizeof(on)) { 952 return VIRTIO_NET_ERR; 953 } 954 955 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) { 956 n->promisc = on; 957 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) { 958 n->allmulti = on; 959 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) { 960 n->alluni = on; 961 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) { 962 n->nomulti = on; 963 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) { 964 n->nouni = on; 965 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) { 966 n->nobcast = on; 967 } else { 968 return VIRTIO_NET_ERR; 969 } 970 971 rxfilter_notify(nc); 972 973 return VIRTIO_NET_OK; 974 } 975 976 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd, 977 struct iovec *iov, unsigned int iov_cnt) 978 { 979 VirtIODevice *vdev = VIRTIO_DEVICE(n); 980 uint64_t offloads; 981 size_t s; 982 983 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 984 return VIRTIO_NET_ERR; 985 } 986 987 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads)); 988 if (s != sizeof(offloads)) { 989 return VIRTIO_NET_ERR; 990 } 991 992 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) { 993 uint64_t supported_offloads; 994 995 offloads = virtio_ldq_p(vdev, &offloads); 996 997 if (!n->has_vnet_hdr) { 998 return VIRTIO_NET_ERR; 999 } 1000 1001 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1002 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4); 1003 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1004 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6); 1005 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT); 1006 1007 supported_offloads = virtio_net_supported_guest_offloads(n); 1008 if (offloads & ~supported_offloads) { 1009 return VIRTIO_NET_ERR; 1010 } 1011 1012 n->curr_guest_offloads = offloads; 1013 virtio_net_apply_guest_offloads(n); 1014 1015 return VIRTIO_NET_OK; 1016 } else { 1017 return VIRTIO_NET_ERR; 1018 } 1019 } 1020 1021 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd, 1022 struct iovec *iov, unsigned int iov_cnt) 1023 { 1024 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1025 struct virtio_net_ctrl_mac mac_data; 1026 size_t s; 1027 NetClientState *nc = qemu_get_queue(n->nic); 1028 1029 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) { 1030 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) { 1031 return VIRTIO_NET_ERR; 1032 } 1033 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac)); 1034 assert(s == sizeof(n->mac)); 1035 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 1036 rxfilter_notify(nc); 1037 1038 return VIRTIO_NET_OK; 1039 } 1040 1041 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) { 1042 return VIRTIO_NET_ERR; 1043 } 1044 1045 int in_use = 0; 1046 int first_multi = 0; 1047 uint8_t uni_overflow = 0; 1048 uint8_t multi_overflow = 0; 1049 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 1050 1051 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1052 sizeof(mac_data.entries)); 1053 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1054 if (s != sizeof(mac_data.entries)) { 1055 goto error; 1056 } 1057 iov_discard_front(&iov, &iov_cnt, s); 1058 1059 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) { 1060 goto error; 1061 } 1062 1063 if (mac_data.entries <= MAC_TABLE_ENTRIES) { 1064 s = iov_to_buf(iov, iov_cnt, 0, macs, 1065 mac_data.entries * ETH_ALEN); 1066 if (s != mac_data.entries * ETH_ALEN) { 1067 goto error; 1068 } 1069 in_use += mac_data.entries; 1070 } else { 1071 uni_overflow = 1; 1072 } 1073 1074 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN); 1075 1076 first_multi = in_use; 1077 1078 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1079 sizeof(mac_data.entries)); 1080 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1081 if (s != sizeof(mac_data.entries)) { 1082 goto error; 1083 } 1084 1085 iov_discard_front(&iov, &iov_cnt, s); 1086 1087 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) { 1088 goto error; 1089 } 1090 1091 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) { 1092 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN], 1093 mac_data.entries * ETH_ALEN); 1094 if (s != mac_data.entries * ETH_ALEN) { 1095 goto error; 1096 } 1097 in_use += mac_data.entries; 1098 } else { 1099 multi_overflow = 1; 1100 } 1101 1102 n->mac_table.in_use = in_use; 1103 n->mac_table.first_multi = first_multi; 1104 n->mac_table.uni_overflow = uni_overflow; 1105 n->mac_table.multi_overflow = multi_overflow; 1106 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN); 1107 g_free(macs); 1108 rxfilter_notify(nc); 1109 1110 return VIRTIO_NET_OK; 1111 1112 error: 1113 g_free(macs); 1114 return VIRTIO_NET_ERR; 1115 } 1116 1117 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd, 1118 struct iovec *iov, unsigned int iov_cnt) 1119 { 1120 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1121 uint16_t vid; 1122 size_t s; 1123 NetClientState *nc = qemu_get_queue(n->nic); 1124 1125 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid)); 1126 vid = virtio_lduw_p(vdev, &vid); 1127 if (s != sizeof(vid)) { 1128 return VIRTIO_NET_ERR; 1129 } 1130 1131 if (vid >= MAX_VLAN) 1132 return VIRTIO_NET_ERR; 1133 1134 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD) 1135 n->vlans[vid >> 5] |= (1U << (vid & 0x1f)); 1136 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL) 1137 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f)); 1138 else 1139 return VIRTIO_NET_ERR; 1140 1141 rxfilter_notify(nc); 1142 1143 return VIRTIO_NET_OK; 1144 } 1145 1146 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, 1147 struct iovec *iov, unsigned int iov_cnt) 1148 { 1149 trace_virtio_net_handle_announce(n->announce_timer.round); 1150 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK && 1151 n->status & VIRTIO_NET_S_ANNOUNCE) { 1152 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 1153 if (n->announce_timer.round) { 1154 qemu_announce_timer_step(&n->announce_timer); 1155 } 1156 return VIRTIO_NET_OK; 1157 } else { 1158 return VIRTIO_NET_ERR; 1159 } 1160 } 1161 1162 static void virtio_net_detach_epbf_rss(VirtIONet *n); 1163 1164 static void virtio_net_disable_rss(VirtIONet *n) 1165 { 1166 if (n->rss_data.enabled) { 1167 trace_virtio_net_rss_disable(); 1168 } 1169 n->rss_data.enabled = false; 1170 1171 virtio_net_detach_epbf_rss(n); 1172 } 1173 1174 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd) 1175 { 1176 NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0); 1177 if (nc == NULL || nc->info->set_steering_ebpf == NULL) { 1178 return false; 1179 } 1180 1181 return nc->info->set_steering_ebpf(nc, prog_fd); 1182 } 1183 1184 static void rss_data_to_rss_config(struct VirtioNetRssData *data, 1185 struct EBPFRSSConfig *config) 1186 { 1187 config->redirect = data->redirect; 1188 config->populate_hash = data->populate_hash; 1189 config->hash_types = data->hash_types; 1190 config->indirections_len = data->indirections_len; 1191 config->default_queue = data->default_queue; 1192 } 1193 1194 static bool virtio_net_attach_epbf_rss(VirtIONet *n) 1195 { 1196 struct EBPFRSSConfig config = {}; 1197 1198 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 1199 return false; 1200 } 1201 1202 rss_data_to_rss_config(&n->rss_data, &config); 1203 1204 if (!ebpf_rss_set_all(&n->ebpf_rss, &config, 1205 n->rss_data.indirections_table, n->rss_data.key)) { 1206 return false; 1207 } 1208 1209 if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) { 1210 return false; 1211 } 1212 1213 return true; 1214 } 1215 1216 static void virtio_net_detach_epbf_rss(VirtIONet *n) 1217 { 1218 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1219 } 1220 1221 static bool virtio_net_load_ebpf(VirtIONet *n) 1222 { 1223 if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) { 1224 /* backend does't support steering ebpf */ 1225 return false; 1226 } 1227 1228 return ebpf_rss_load(&n->ebpf_rss); 1229 } 1230 1231 static void virtio_net_unload_ebpf(VirtIONet *n) 1232 { 1233 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1234 ebpf_rss_unload(&n->ebpf_rss); 1235 } 1236 1237 static uint16_t virtio_net_handle_rss(VirtIONet *n, 1238 struct iovec *iov, 1239 unsigned int iov_cnt, 1240 bool do_rss) 1241 { 1242 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1243 struct virtio_net_rss_config cfg; 1244 size_t s, offset = 0, size_get; 1245 uint16_t queue_pairs, i; 1246 struct { 1247 uint16_t us; 1248 uint8_t b; 1249 } QEMU_PACKED temp; 1250 const char *err_msg = ""; 1251 uint32_t err_value = 0; 1252 1253 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) { 1254 err_msg = "RSS is not negotiated"; 1255 goto error; 1256 } 1257 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) { 1258 err_msg = "Hash report is not negotiated"; 1259 goto error; 1260 } 1261 size_get = offsetof(struct virtio_net_rss_config, indirection_table); 1262 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get); 1263 if (s != size_get) { 1264 err_msg = "Short command buffer"; 1265 err_value = (uint32_t)s; 1266 goto error; 1267 } 1268 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types); 1269 n->rss_data.indirections_len = 1270 virtio_lduw_p(vdev, &cfg.indirection_table_mask); 1271 n->rss_data.indirections_len++; 1272 if (!do_rss) { 1273 n->rss_data.indirections_len = 1; 1274 } 1275 if (!is_power_of_2(n->rss_data.indirections_len)) { 1276 err_msg = "Invalid size of indirection table"; 1277 err_value = n->rss_data.indirections_len; 1278 goto error; 1279 } 1280 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) { 1281 err_msg = "Too large indirection table"; 1282 err_value = n->rss_data.indirections_len; 1283 goto error; 1284 } 1285 n->rss_data.default_queue = do_rss ? 1286 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0; 1287 if (n->rss_data.default_queue >= n->max_queue_pairs) { 1288 err_msg = "Invalid default queue"; 1289 err_value = n->rss_data.default_queue; 1290 goto error; 1291 } 1292 offset += size_get; 1293 size_get = sizeof(uint16_t) * n->rss_data.indirections_len; 1294 g_free(n->rss_data.indirections_table); 1295 n->rss_data.indirections_table = g_malloc(size_get); 1296 if (!n->rss_data.indirections_table) { 1297 err_msg = "Can't allocate indirections table"; 1298 err_value = n->rss_data.indirections_len; 1299 goto error; 1300 } 1301 s = iov_to_buf(iov, iov_cnt, offset, 1302 n->rss_data.indirections_table, size_get); 1303 if (s != size_get) { 1304 err_msg = "Short indirection table buffer"; 1305 err_value = (uint32_t)s; 1306 goto error; 1307 } 1308 for (i = 0; i < n->rss_data.indirections_len; ++i) { 1309 uint16_t val = n->rss_data.indirections_table[i]; 1310 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val); 1311 } 1312 offset += size_get; 1313 size_get = sizeof(temp); 1314 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get); 1315 if (s != size_get) { 1316 err_msg = "Can't get queue_pairs"; 1317 err_value = (uint32_t)s; 1318 goto error; 1319 } 1320 queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs; 1321 if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) { 1322 err_msg = "Invalid number of queue_pairs"; 1323 err_value = queue_pairs; 1324 goto error; 1325 } 1326 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) { 1327 err_msg = "Invalid key size"; 1328 err_value = temp.b; 1329 goto error; 1330 } 1331 if (!temp.b && n->rss_data.hash_types) { 1332 err_msg = "No key provided"; 1333 err_value = 0; 1334 goto error; 1335 } 1336 if (!temp.b && !n->rss_data.hash_types) { 1337 virtio_net_disable_rss(n); 1338 return queue_pairs; 1339 } 1340 offset += size_get; 1341 size_get = temp.b; 1342 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get); 1343 if (s != size_get) { 1344 err_msg = "Can get key buffer"; 1345 err_value = (uint32_t)s; 1346 goto error; 1347 } 1348 n->rss_data.enabled = true; 1349 1350 if (!n->rss_data.populate_hash) { 1351 if (!virtio_net_attach_epbf_rss(n)) { 1352 /* EBPF must be loaded for vhost */ 1353 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 1354 warn_report("Can't load eBPF RSS for vhost"); 1355 goto error; 1356 } 1357 /* fallback to software RSS */ 1358 warn_report("Can't load eBPF RSS - fallback to software RSS"); 1359 n->rss_data.enabled_software_rss = true; 1360 } 1361 } else { 1362 /* use software RSS for hash populating */ 1363 /* and detach eBPF if was loaded before */ 1364 virtio_net_detach_epbf_rss(n); 1365 n->rss_data.enabled_software_rss = true; 1366 } 1367 1368 trace_virtio_net_rss_enable(n->rss_data.hash_types, 1369 n->rss_data.indirections_len, 1370 temp.b); 1371 return queue_pairs; 1372 error: 1373 trace_virtio_net_rss_error(err_msg, err_value); 1374 virtio_net_disable_rss(n); 1375 return 0; 1376 } 1377 1378 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, 1379 struct iovec *iov, unsigned int iov_cnt) 1380 { 1381 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1382 uint16_t queue_pairs; 1383 NetClientState *nc = qemu_get_queue(n->nic); 1384 1385 virtio_net_disable_rss(n); 1386 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { 1387 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false); 1388 return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR; 1389 } 1390 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) { 1391 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true); 1392 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { 1393 struct virtio_net_ctrl_mq mq; 1394 size_t s; 1395 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) { 1396 return VIRTIO_NET_ERR; 1397 } 1398 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq)); 1399 if (s != sizeof(mq)) { 1400 return VIRTIO_NET_ERR; 1401 } 1402 queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs); 1403 1404 } else { 1405 return VIRTIO_NET_ERR; 1406 } 1407 1408 if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 1409 queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 1410 queue_pairs > n->max_queue_pairs || 1411 !n->multiqueue) { 1412 return VIRTIO_NET_ERR; 1413 } 1414 1415 n->curr_queue_pairs = queue_pairs; 1416 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 1417 /* 1418 * Avoid updating the backend for a vdpa device: We're only interested 1419 * in updating the device model queues. 1420 */ 1421 return VIRTIO_NET_OK; 1422 } 1423 /* stop the backend before changing the number of queue_pairs to avoid handling a 1424 * disabled queue */ 1425 virtio_net_set_status(vdev, vdev->status); 1426 virtio_net_set_queue_pairs(n); 1427 1428 return VIRTIO_NET_OK; 1429 } 1430 1431 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, 1432 const struct iovec *in_sg, unsigned in_num, 1433 const struct iovec *out_sg, 1434 unsigned out_num) 1435 { 1436 VirtIONet *n = VIRTIO_NET(vdev); 1437 struct virtio_net_ctrl_hdr ctrl; 1438 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1439 size_t s; 1440 struct iovec *iov, *iov2; 1441 1442 if (iov_size(in_sg, in_num) < sizeof(status) || 1443 iov_size(out_sg, out_num) < sizeof(ctrl)) { 1444 virtio_error(vdev, "virtio-net ctrl missing headers"); 1445 return 0; 1446 } 1447 1448 iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num); 1449 s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl)); 1450 iov_discard_front(&iov, &out_num, sizeof(ctrl)); 1451 if (s != sizeof(ctrl)) { 1452 status = VIRTIO_NET_ERR; 1453 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { 1454 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num); 1455 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { 1456 status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num); 1457 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { 1458 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num); 1459 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { 1460 status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num); 1461 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { 1462 status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num); 1463 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { 1464 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num); 1465 } 1466 1467 s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status)); 1468 assert(s == sizeof(status)); 1469 1470 g_free(iov2); 1471 return sizeof(status); 1472 } 1473 1474 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) 1475 { 1476 VirtQueueElement *elem; 1477 1478 for (;;) { 1479 size_t written; 1480 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 1481 if (!elem) { 1482 break; 1483 } 1484 1485 written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num, 1486 elem->out_sg, elem->out_num); 1487 if (written > 0) { 1488 virtqueue_push(vq, elem, written); 1489 virtio_notify(vdev, vq); 1490 g_free(elem); 1491 } else { 1492 virtqueue_detach_element(vq, elem, 0); 1493 g_free(elem); 1494 break; 1495 } 1496 } 1497 } 1498 1499 /* RX */ 1500 1501 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) 1502 { 1503 VirtIONet *n = VIRTIO_NET(vdev); 1504 int queue_index = vq2q(virtio_get_queue_index(vq)); 1505 1506 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index)); 1507 } 1508 1509 static bool virtio_net_can_receive(NetClientState *nc) 1510 { 1511 VirtIONet *n = qemu_get_nic_opaque(nc); 1512 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1513 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1514 1515 if (!vdev->vm_running) { 1516 return false; 1517 } 1518 1519 if (nc->queue_index >= n->curr_queue_pairs) { 1520 return false; 1521 } 1522 1523 if (!virtio_queue_ready(q->rx_vq) || 1524 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 1525 return false; 1526 } 1527 1528 return true; 1529 } 1530 1531 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) 1532 { 1533 VirtIONet *n = q->n; 1534 if (virtio_queue_empty(q->rx_vq) || 1535 (n->mergeable_rx_bufs && 1536 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1537 virtio_queue_set_notification(q->rx_vq, 1); 1538 1539 /* To avoid a race condition where the guest has made some buffers 1540 * available after the above check but before notification was 1541 * enabled, check for available buffers again. 1542 */ 1543 if (virtio_queue_empty(q->rx_vq) || 1544 (n->mergeable_rx_bufs && 1545 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1546 return 0; 1547 } 1548 } 1549 1550 virtio_queue_set_notification(q->rx_vq, 0); 1551 return 1; 1552 } 1553 1554 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr) 1555 { 1556 virtio_tswap16s(vdev, &hdr->hdr_len); 1557 virtio_tswap16s(vdev, &hdr->gso_size); 1558 virtio_tswap16s(vdev, &hdr->csum_start); 1559 virtio_tswap16s(vdev, &hdr->csum_offset); 1560 } 1561 1562 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so 1563 * it never finds out that the packets don't have valid checksums. This 1564 * causes dhclient to get upset. Fedora's carried a patch for ages to 1565 * fix this with Xen but it hasn't appeared in an upstream release of 1566 * dhclient yet. 1567 * 1568 * To avoid breaking existing guests, we catch udp packets and add 1569 * checksums. This is terrible but it's better than hacking the guest 1570 * kernels. 1571 * 1572 * N.B. if we introduce a zero-copy API, this operation is no longer free so 1573 * we should provide a mechanism to disable it to avoid polluting the host 1574 * cache. 1575 */ 1576 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr, 1577 uint8_t *buf, size_t size) 1578 { 1579 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */ 1580 (size > 27 && size < 1500) && /* normal sized MTU */ 1581 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */ 1582 (buf[23] == 17) && /* ip.protocol == UDP */ 1583 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */ 1584 net_checksum_calculate(buf, size, CSUM_UDP); 1585 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; 1586 } 1587 } 1588 1589 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt, 1590 const void *buf, size_t size) 1591 { 1592 if (n->has_vnet_hdr) { 1593 /* FIXME this cast is evil */ 1594 void *wbuf = (void *)buf; 1595 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len, 1596 size - n->host_hdr_len); 1597 1598 if (n->needs_vnet_hdr_swap) { 1599 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf); 1600 } 1601 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr)); 1602 } else { 1603 struct virtio_net_hdr hdr = { 1604 .flags = 0, 1605 .gso_type = VIRTIO_NET_HDR_GSO_NONE 1606 }; 1607 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr); 1608 } 1609 } 1610 1611 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) 1612 { 1613 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; 1614 static const uint8_t vlan[] = {0x81, 0x00}; 1615 uint8_t *ptr = (uint8_t *)buf; 1616 int i; 1617 1618 if (n->promisc) 1619 return 1; 1620 1621 ptr += n->host_hdr_len; 1622 1623 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) { 1624 int vid = lduw_be_p(ptr + 14) & 0xfff; 1625 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f)))) 1626 return 0; 1627 } 1628 1629 if (ptr[0] & 1) { // multicast 1630 if (!memcmp(ptr, bcast, sizeof(bcast))) { 1631 return !n->nobcast; 1632 } else if (n->nomulti) { 1633 return 0; 1634 } else if (n->allmulti || n->mac_table.multi_overflow) { 1635 return 1; 1636 } 1637 1638 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 1639 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1640 return 1; 1641 } 1642 } 1643 } else { // unicast 1644 if (n->nouni) { 1645 return 0; 1646 } else if (n->alluni || n->mac_table.uni_overflow) { 1647 return 1; 1648 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) { 1649 return 1; 1650 } 1651 1652 for (i = 0; i < n->mac_table.first_multi; i++) { 1653 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1654 return 1; 1655 } 1656 } 1657 } 1658 1659 return 0; 1660 } 1661 1662 static uint8_t virtio_net_get_hash_type(bool isip4, 1663 bool isip6, 1664 bool isudp, 1665 bool istcp, 1666 uint32_t types) 1667 { 1668 if (isip4) { 1669 if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) { 1670 return NetPktRssIpV4Tcp; 1671 } 1672 if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) { 1673 return NetPktRssIpV4Udp; 1674 } 1675 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 1676 return NetPktRssIpV4; 1677 } 1678 } else if (isip6) { 1679 uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | 1680 VIRTIO_NET_RSS_HASH_TYPE_TCPv6; 1681 1682 if (istcp && (types & mask)) { 1683 return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ? 1684 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp; 1685 } 1686 mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6; 1687 if (isudp && (types & mask)) { 1688 return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ? 1689 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp; 1690 } 1691 mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6; 1692 if (types & mask) { 1693 return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ? 1694 NetPktRssIpV6Ex : NetPktRssIpV6; 1695 } 1696 } 1697 return 0xff; 1698 } 1699 1700 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report, 1701 uint32_t hash) 1702 { 1703 struct virtio_net_hdr_v1_hash *hdr = (void *)buf; 1704 hdr->hash_value = hash; 1705 hdr->hash_report = report; 1706 } 1707 1708 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf, 1709 size_t size) 1710 { 1711 VirtIONet *n = qemu_get_nic_opaque(nc); 1712 unsigned int index = nc->queue_index, new_index = index; 1713 struct NetRxPkt *pkt = n->rx_pkt; 1714 uint8_t net_hash_type; 1715 uint32_t hash; 1716 bool isip4, isip6, isudp, istcp; 1717 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = { 1718 VIRTIO_NET_HASH_REPORT_IPv4, 1719 VIRTIO_NET_HASH_REPORT_TCPv4, 1720 VIRTIO_NET_HASH_REPORT_TCPv6, 1721 VIRTIO_NET_HASH_REPORT_IPv6, 1722 VIRTIO_NET_HASH_REPORT_IPv6_EX, 1723 VIRTIO_NET_HASH_REPORT_TCPv6_EX, 1724 VIRTIO_NET_HASH_REPORT_UDPv4, 1725 VIRTIO_NET_HASH_REPORT_UDPv6, 1726 VIRTIO_NET_HASH_REPORT_UDPv6_EX 1727 }; 1728 1729 net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len, 1730 size - n->host_hdr_len); 1731 net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); 1732 if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) { 1733 istcp = isudp = false; 1734 } 1735 if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) { 1736 istcp = isudp = false; 1737 } 1738 net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp, 1739 n->rss_data.hash_types); 1740 if (net_hash_type > NetPktRssIpV6UdpEx) { 1741 if (n->rss_data.populate_hash) { 1742 virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0); 1743 } 1744 return n->rss_data.redirect ? n->rss_data.default_queue : -1; 1745 } 1746 1747 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key); 1748 1749 if (n->rss_data.populate_hash) { 1750 virtio_set_packet_hash(buf, reports[net_hash_type], hash); 1751 } 1752 1753 if (n->rss_data.redirect) { 1754 new_index = hash & (n->rss_data.indirections_len - 1); 1755 new_index = n->rss_data.indirections_table[new_index]; 1756 } 1757 1758 return (index == new_index) ? -1 : new_index; 1759 } 1760 1761 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, 1762 size_t size, bool no_rss) 1763 { 1764 VirtIONet *n = qemu_get_nic_opaque(nc); 1765 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1766 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1767 VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE]; 1768 size_t lens[VIRTQUEUE_MAX_SIZE]; 1769 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; 1770 struct virtio_net_hdr_mrg_rxbuf mhdr; 1771 unsigned mhdr_cnt = 0; 1772 size_t offset, i, guest_offset, j; 1773 ssize_t err; 1774 1775 if (!virtio_net_can_receive(nc)) { 1776 return -1; 1777 } 1778 1779 if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) { 1780 int index = virtio_net_process_rss(nc, buf, size); 1781 if (index >= 0) { 1782 NetClientState *nc2 = qemu_get_subqueue(n->nic, index); 1783 return virtio_net_receive_rcu(nc2, buf, size, true); 1784 } 1785 } 1786 1787 /* hdr_len refers to the header we supply to the guest */ 1788 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) { 1789 return 0; 1790 } 1791 1792 if (!receive_filter(n, buf, size)) 1793 return size; 1794 1795 offset = i = 0; 1796 1797 while (offset < size) { 1798 VirtQueueElement *elem; 1799 int len, total; 1800 const struct iovec *sg; 1801 1802 total = 0; 1803 1804 if (i == VIRTQUEUE_MAX_SIZE) { 1805 virtio_error(vdev, "virtio-net unexpected long buffer chain"); 1806 err = size; 1807 goto err; 1808 } 1809 1810 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement)); 1811 if (!elem) { 1812 if (i) { 1813 virtio_error(vdev, "virtio-net unexpected empty queue: " 1814 "i %zd mergeable %d offset %zd, size %zd, " 1815 "guest hdr len %zd, host hdr len %zd " 1816 "guest features 0x%" PRIx64, 1817 i, n->mergeable_rx_bufs, offset, size, 1818 n->guest_hdr_len, n->host_hdr_len, 1819 vdev->guest_features); 1820 } 1821 err = -1; 1822 goto err; 1823 } 1824 1825 if (elem->in_num < 1) { 1826 virtio_error(vdev, 1827 "virtio-net receive queue contains no in buffers"); 1828 virtqueue_detach_element(q->rx_vq, elem, 0); 1829 g_free(elem); 1830 err = -1; 1831 goto err; 1832 } 1833 1834 sg = elem->in_sg; 1835 if (i == 0) { 1836 assert(offset == 0); 1837 if (n->mergeable_rx_bufs) { 1838 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg), 1839 sg, elem->in_num, 1840 offsetof(typeof(mhdr), num_buffers), 1841 sizeof(mhdr.num_buffers)); 1842 } 1843 1844 receive_header(n, sg, elem->in_num, buf, size); 1845 if (n->rss_data.populate_hash) { 1846 offset = sizeof(mhdr); 1847 iov_from_buf(sg, elem->in_num, offset, 1848 buf + offset, n->host_hdr_len - sizeof(mhdr)); 1849 } 1850 offset = n->host_hdr_len; 1851 total += n->guest_hdr_len; 1852 guest_offset = n->guest_hdr_len; 1853 } else { 1854 guest_offset = 0; 1855 } 1856 1857 /* copy in packet. ugh */ 1858 len = iov_from_buf(sg, elem->in_num, guest_offset, 1859 buf + offset, size - offset); 1860 total += len; 1861 offset += len; 1862 /* If buffers can't be merged, at this point we 1863 * must have consumed the complete packet. 1864 * Otherwise, drop it. */ 1865 if (!n->mergeable_rx_bufs && offset < size) { 1866 virtqueue_unpop(q->rx_vq, elem, total); 1867 g_free(elem); 1868 err = size; 1869 goto err; 1870 } 1871 1872 elems[i] = elem; 1873 lens[i] = total; 1874 i++; 1875 } 1876 1877 if (mhdr_cnt) { 1878 virtio_stw_p(vdev, &mhdr.num_buffers, i); 1879 iov_from_buf(mhdr_sg, mhdr_cnt, 1880 0, 1881 &mhdr.num_buffers, sizeof mhdr.num_buffers); 1882 } 1883 1884 for (j = 0; j < i; j++) { 1885 /* signal other side */ 1886 virtqueue_fill(q->rx_vq, elems[j], lens[j], j); 1887 g_free(elems[j]); 1888 } 1889 1890 virtqueue_flush(q->rx_vq, i); 1891 virtio_notify(vdev, q->rx_vq); 1892 1893 return size; 1894 1895 err: 1896 for (j = 0; j < i; j++) { 1897 virtqueue_detach_element(q->rx_vq, elems[j], lens[j]); 1898 g_free(elems[j]); 1899 } 1900 1901 return err; 1902 } 1903 1904 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf, 1905 size_t size) 1906 { 1907 RCU_READ_LOCK_GUARD(); 1908 1909 return virtio_net_receive_rcu(nc, buf, size, false); 1910 } 1911 1912 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain, 1913 const uint8_t *buf, 1914 VirtioNetRscUnit *unit) 1915 { 1916 uint16_t ip_hdrlen; 1917 struct ip_header *ip; 1918 1919 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len 1920 + sizeof(struct eth_header)); 1921 unit->ip = (void *)ip; 1922 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2; 1923 unit->ip_plen = &ip->ip_len; 1924 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen); 1925 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 1926 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen; 1927 } 1928 1929 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain, 1930 const uint8_t *buf, 1931 VirtioNetRscUnit *unit) 1932 { 1933 struct ip6_header *ip6; 1934 1935 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len 1936 + sizeof(struct eth_header)); 1937 unit->ip = ip6; 1938 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 1939 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) 1940 + sizeof(struct ip6_header)); 1941 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 1942 1943 /* There is a difference between payload lenght in ipv4 and v6, 1944 ip header is excluded in ipv6 */ 1945 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen; 1946 } 1947 1948 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain, 1949 VirtioNetRscSeg *seg) 1950 { 1951 int ret; 1952 struct virtio_net_hdr_v1 *h; 1953 1954 h = (struct virtio_net_hdr_v1 *)seg->buf; 1955 h->flags = 0; 1956 h->gso_type = VIRTIO_NET_HDR_GSO_NONE; 1957 1958 if (seg->is_coalesced) { 1959 h->rsc.segments = seg->packets; 1960 h->rsc.dup_acks = seg->dup_ack; 1961 h->flags = VIRTIO_NET_HDR_F_RSC_INFO; 1962 if (chain->proto == ETH_P_IP) { 1963 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 1964 } else { 1965 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 1966 } 1967 } 1968 1969 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); 1970 QTAILQ_REMOVE(&chain->buffers, seg, next); 1971 g_free(seg->buf); 1972 g_free(seg); 1973 1974 return ret; 1975 } 1976 1977 static void virtio_net_rsc_purge(void *opq) 1978 { 1979 VirtioNetRscSeg *seg, *rn; 1980 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq; 1981 1982 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) { 1983 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 1984 chain->stat.purge_failed++; 1985 continue; 1986 } 1987 } 1988 1989 chain->stat.timer++; 1990 if (!QTAILQ_EMPTY(&chain->buffers)) { 1991 timer_mod(chain->drain_timer, 1992 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 1993 } 1994 } 1995 1996 static void virtio_net_rsc_cleanup(VirtIONet *n) 1997 { 1998 VirtioNetRscChain *chain, *rn_chain; 1999 VirtioNetRscSeg *seg, *rn_seg; 2000 2001 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) { 2002 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) { 2003 QTAILQ_REMOVE(&chain->buffers, seg, next); 2004 g_free(seg->buf); 2005 g_free(seg); 2006 } 2007 2008 timer_free(chain->drain_timer); 2009 QTAILQ_REMOVE(&n->rsc_chains, chain, next); 2010 g_free(chain); 2011 } 2012 } 2013 2014 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain, 2015 NetClientState *nc, 2016 const uint8_t *buf, size_t size) 2017 { 2018 uint16_t hdr_len; 2019 VirtioNetRscSeg *seg; 2020 2021 hdr_len = chain->n->guest_hdr_len; 2022 seg = g_new(VirtioNetRscSeg, 1); 2023 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header) 2024 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD); 2025 memcpy(seg->buf, buf, size); 2026 seg->size = size; 2027 seg->packets = 1; 2028 seg->dup_ack = 0; 2029 seg->is_coalesced = 0; 2030 seg->nc = nc; 2031 2032 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); 2033 chain->stat.cache++; 2034 2035 switch (chain->proto) { 2036 case ETH_P_IP: 2037 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); 2038 break; 2039 case ETH_P_IPV6: 2040 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); 2041 break; 2042 default: 2043 g_assert_not_reached(); 2044 } 2045 } 2046 2047 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain, 2048 VirtioNetRscSeg *seg, 2049 const uint8_t *buf, 2050 struct tcp_header *n_tcp, 2051 struct tcp_header *o_tcp) 2052 { 2053 uint32_t nack, oack; 2054 uint16_t nwin, owin; 2055 2056 nack = htonl(n_tcp->th_ack); 2057 nwin = htons(n_tcp->th_win); 2058 oack = htonl(o_tcp->th_ack); 2059 owin = htons(o_tcp->th_win); 2060 2061 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) { 2062 chain->stat.ack_out_of_win++; 2063 return RSC_FINAL; 2064 } else if (nack == oack) { 2065 /* duplicated ack or window probe */ 2066 if (nwin == owin) { 2067 /* duplicated ack, add dup ack count due to whql test up to 1 */ 2068 chain->stat.dup_ack++; 2069 return RSC_FINAL; 2070 } else { 2071 /* Coalesce window update */ 2072 o_tcp->th_win = n_tcp->th_win; 2073 chain->stat.win_update++; 2074 return RSC_COALESCE; 2075 } 2076 } else { 2077 /* pure ack, go to 'C', finalize*/ 2078 chain->stat.pure_ack++; 2079 return RSC_FINAL; 2080 } 2081 } 2082 2083 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain, 2084 VirtioNetRscSeg *seg, 2085 const uint8_t *buf, 2086 VirtioNetRscUnit *n_unit) 2087 { 2088 void *data; 2089 uint16_t o_ip_len; 2090 uint32_t nseq, oseq; 2091 VirtioNetRscUnit *o_unit; 2092 2093 o_unit = &seg->unit; 2094 o_ip_len = htons(*o_unit->ip_plen); 2095 nseq = htonl(n_unit->tcp->th_seq); 2096 oseq = htonl(o_unit->tcp->th_seq); 2097 2098 /* out of order or retransmitted. */ 2099 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) { 2100 chain->stat.data_out_of_win++; 2101 return RSC_FINAL; 2102 } 2103 2104 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen; 2105 if (nseq == oseq) { 2106 if ((o_unit->payload == 0) && n_unit->payload) { 2107 /* From no payload to payload, normal case, not a dup ack or etc */ 2108 chain->stat.data_after_pure_ack++; 2109 goto coalesce; 2110 } else { 2111 return virtio_net_rsc_handle_ack(chain, seg, buf, 2112 n_unit->tcp, o_unit->tcp); 2113 } 2114 } else if ((nseq - oseq) != o_unit->payload) { 2115 /* Not a consistent packet, out of order */ 2116 chain->stat.data_out_of_order++; 2117 return RSC_FINAL; 2118 } else { 2119 coalesce: 2120 if ((o_ip_len + n_unit->payload) > chain->max_payload) { 2121 chain->stat.over_size++; 2122 return RSC_FINAL; 2123 } 2124 2125 /* Here comes the right data, the payload length in v4/v6 is different, 2126 so use the field value to update and record the new data len */ 2127 o_unit->payload += n_unit->payload; /* update new data len */ 2128 2129 /* update field in ip header */ 2130 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload); 2131 2132 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced 2133 for windows guest, while this may change the behavior for linux 2134 guest (only if it uses RSC feature). */ 2135 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags; 2136 2137 o_unit->tcp->th_ack = n_unit->tcp->th_ack; 2138 o_unit->tcp->th_win = n_unit->tcp->th_win; 2139 2140 memmove(seg->buf + seg->size, data, n_unit->payload); 2141 seg->size += n_unit->payload; 2142 seg->packets++; 2143 chain->stat.coalesced++; 2144 return RSC_COALESCE; 2145 } 2146 } 2147 2148 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain, 2149 VirtioNetRscSeg *seg, 2150 const uint8_t *buf, size_t size, 2151 VirtioNetRscUnit *unit) 2152 { 2153 struct ip_header *ip1, *ip2; 2154 2155 ip1 = (struct ip_header *)(unit->ip); 2156 ip2 = (struct ip_header *)(seg->unit.ip); 2157 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst) 2158 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2159 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2160 chain->stat.no_match++; 2161 return RSC_NO_MATCH; 2162 } 2163 2164 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2165 } 2166 2167 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain, 2168 VirtioNetRscSeg *seg, 2169 const uint8_t *buf, size_t size, 2170 VirtioNetRscUnit *unit) 2171 { 2172 struct ip6_header *ip1, *ip2; 2173 2174 ip1 = (struct ip6_header *)(unit->ip); 2175 ip2 = (struct ip6_header *)(seg->unit.ip); 2176 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address)) 2177 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address)) 2178 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2179 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2180 chain->stat.no_match++; 2181 return RSC_NO_MATCH; 2182 } 2183 2184 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2185 } 2186 2187 /* Packets with 'SYN' should bypass, other flag should be sent after drain 2188 * to prevent out of order */ 2189 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain, 2190 struct tcp_header *tcp) 2191 { 2192 uint16_t tcp_hdr; 2193 uint16_t tcp_flag; 2194 2195 tcp_flag = htons(tcp->th_offset_flags); 2196 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10; 2197 tcp_flag &= VIRTIO_NET_TCP_FLAG; 2198 if (tcp_flag & TH_SYN) { 2199 chain->stat.tcp_syn++; 2200 return RSC_BYPASS; 2201 } 2202 2203 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) { 2204 chain->stat.tcp_ctrl_drain++; 2205 return RSC_FINAL; 2206 } 2207 2208 if (tcp_hdr > sizeof(struct tcp_header)) { 2209 chain->stat.tcp_all_opt++; 2210 return RSC_FINAL; 2211 } 2212 2213 return RSC_CANDIDATE; 2214 } 2215 2216 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain, 2217 NetClientState *nc, 2218 const uint8_t *buf, size_t size, 2219 VirtioNetRscUnit *unit) 2220 { 2221 int ret; 2222 VirtioNetRscSeg *seg, *nseg; 2223 2224 if (QTAILQ_EMPTY(&chain->buffers)) { 2225 chain->stat.empty_cache++; 2226 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2227 timer_mod(chain->drain_timer, 2228 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 2229 return size; 2230 } 2231 2232 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2233 if (chain->proto == ETH_P_IP) { 2234 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit); 2235 } else { 2236 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit); 2237 } 2238 2239 if (ret == RSC_FINAL) { 2240 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2241 /* Send failed */ 2242 chain->stat.final_failed++; 2243 return 0; 2244 } 2245 2246 /* Send current packet */ 2247 return virtio_net_do_receive(nc, buf, size); 2248 } else if (ret == RSC_NO_MATCH) { 2249 continue; 2250 } else { 2251 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ 2252 seg->is_coalesced = 1; 2253 return size; 2254 } 2255 } 2256 2257 chain->stat.no_match_cache++; 2258 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2259 return size; 2260 } 2261 2262 /* Drain a connection data, this is to avoid out of order segments */ 2263 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain, 2264 NetClientState *nc, 2265 const uint8_t *buf, size_t size, 2266 uint16_t ip_start, uint16_t ip_size, 2267 uint16_t tcp_port) 2268 { 2269 VirtioNetRscSeg *seg, *nseg; 2270 uint32_t ppair1, ppair2; 2271 2272 ppair1 = *(uint32_t *)(buf + tcp_port); 2273 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2274 ppair2 = *(uint32_t *)(seg->buf + tcp_port); 2275 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size) 2276 || (ppair1 != ppair2)) { 2277 continue; 2278 } 2279 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2280 chain->stat.drain_failed++; 2281 } 2282 2283 break; 2284 } 2285 2286 return virtio_net_do_receive(nc, buf, size); 2287 } 2288 2289 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain, 2290 struct ip_header *ip, 2291 const uint8_t *buf, size_t size) 2292 { 2293 uint16_t ip_len; 2294 2295 /* Not an ipv4 packet */ 2296 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) { 2297 chain->stat.ip_option++; 2298 return RSC_BYPASS; 2299 } 2300 2301 /* Don't handle packets with ip option */ 2302 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) { 2303 chain->stat.ip_option++; 2304 return RSC_BYPASS; 2305 } 2306 2307 if (ip->ip_p != IPPROTO_TCP) { 2308 chain->stat.bypass_not_tcp++; 2309 return RSC_BYPASS; 2310 } 2311 2312 /* Don't handle packets with ip fragment */ 2313 if (!(htons(ip->ip_off) & IP_DF)) { 2314 chain->stat.ip_frag++; 2315 return RSC_BYPASS; 2316 } 2317 2318 /* Don't handle packets with ecn flag */ 2319 if (IPTOS_ECN(ip->ip_tos)) { 2320 chain->stat.ip_ecn++; 2321 return RSC_BYPASS; 2322 } 2323 2324 ip_len = htons(ip->ip_len); 2325 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header)) 2326 || ip_len > (size - chain->n->guest_hdr_len - 2327 sizeof(struct eth_header))) { 2328 chain->stat.ip_hacked++; 2329 return RSC_BYPASS; 2330 } 2331 2332 return RSC_CANDIDATE; 2333 } 2334 2335 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain, 2336 NetClientState *nc, 2337 const uint8_t *buf, size_t size) 2338 { 2339 int32_t ret; 2340 uint16_t hdr_len; 2341 VirtioNetRscUnit unit; 2342 2343 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2344 2345 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header) 2346 + sizeof(struct tcp_header))) { 2347 chain->stat.bypass_not_tcp++; 2348 return virtio_net_do_receive(nc, buf, size); 2349 } 2350 2351 virtio_net_rsc_extract_unit4(chain, buf, &unit); 2352 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size) 2353 != RSC_CANDIDATE) { 2354 return virtio_net_do_receive(nc, buf, size); 2355 } 2356 2357 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2358 if (ret == RSC_BYPASS) { 2359 return virtio_net_do_receive(nc, buf, size); 2360 } else if (ret == RSC_FINAL) { 2361 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2362 ((hdr_len + sizeof(struct eth_header)) + 12), 2363 VIRTIO_NET_IP4_ADDR_SIZE, 2364 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)); 2365 } 2366 2367 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2368 } 2369 2370 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain, 2371 struct ip6_header *ip6, 2372 const uint8_t *buf, size_t size) 2373 { 2374 uint16_t ip_len; 2375 2376 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4) 2377 != IP_HEADER_VERSION_6) { 2378 return RSC_BYPASS; 2379 } 2380 2381 /* Both option and protocol is checked in this */ 2382 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) { 2383 chain->stat.bypass_not_tcp++; 2384 return RSC_BYPASS; 2385 } 2386 2387 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 2388 if (ip_len < sizeof(struct tcp_header) || 2389 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header) 2390 - sizeof(struct ip6_header))) { 2391 chain->stat.ip_hacked++; 2392 return RSC_BYPASS; 2393 } 2394 2395 /* Don't handle packets with ecn flag */ 2396 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) { 2397 chain->stat.ip_ecn++; 2398 return RSC_BYPASS; 2399 } 2400 2401 return RSC_CANDIDATE; 2402 } 2403 2404 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc, 2405 const uint8_t *buf, size_t size) 2406 { 2407 int32_t ret; 2408 uint16_t hdr_len; 2409 VirtioNetRscChain *chain; 2410 VirtioNetRscUnit unit; 2411 2412 chain = (VirtioNetRscChain *)opq; 2413 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2414 2415 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header) 2416 + sizeof(tcp_header))) { 2417 return virtio_net_do_receive(nc, buf, size); 2418 } 2419 2420 virtio_net_rsc_extract_unit6(chain, buf, &unit); 2421 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain, 2422 unit.ip, buf, size)) { 2423 return virtio_net_do_receive(nc, buf, size); 2424 } 2425 2426 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2427 if (ret == RSC_BYPASS) { 2428 return virtio_net_do_receive(nc, buf, size); 2429 } else if (ret == RSC_FINAL) { 2430 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2431 ((hdr_len + sizeof(struct eth_header)) + 8), 2432 VIRTIO_NET_IP6_ADDR_SIZE, 2433 hdr_len + sizeof(struct eth_header) 2434 + sizeof(struct ip6_header)); 2435 } 2436 2437 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2438 } 2439 2440 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n, 2441 NetClientState *nc, 2442 uint16_t proto) 2443 { 2444 VirtioNetRscChain *chain; 2445 2446 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) { 2447 return NULL; 2448 } 2449 2450 QTAILQ_FOREACH(chain, &n->rsc_chains, next) { 2451 if (chain->proto == proto) { 2452 return chain; 2453 } 2454 } 2455 2456 chain = g_malloc(sizeof(*chain)); 2457 chain->n = n; 2458 chain->proto = proto; 2459 if (proto == (uint16_t)ETH_P_IP) { 2460 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD; 2461 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2462 } else { 2463 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD; 2464 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2465 } 2466 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST, 2467 virtio_net_rsc_purge, chain); 2468 memset(&chain->stat, 0, sizeof(chain->stat)); 2469 2470 QTAILQ_INIT(&chain->buffers); 2471 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); 2472 2473 return chain; 2474 } 2475 2476 static ssize_t virtio_net_rsc_receive(NetClientState *nc, 2477 const uint8_t *buf, 2478 size_t size) 2479 { 2480 uint16_t proto; 2481 VirtioNetRscChain *chain; 2482 struct eth_header *eth; 2483 VirtIONet *n; 2484 2485 n = qemu_get_nic_opaque(nc); 2486 if (size < (n->host_hdr_len + sizeof(struct eth_header))) { 2487 return virtio_net_do_receive(nc, buf, size); 2488 } 2489 2490 eth = (struct eth_header *)(buf + n->guest_hdr_len); 2491 proto = htons(eth->h_proto); 2492 2493 chain = virtio_net_rsc_lookup_chain(n, nc, proto); 2494 if (chain) { 2495 chain->stat.received++; 2496 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) { 2497 return virtio_net_rsc_receive4(chain, nc, buf, size); 2498 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) { 2499 return virtio_net_rsc_receive6(chain, nc, buf, size); 2500 } 2501 } 2502 return virtio_net_do_receive(nc, buf, size); 2503 } 2504 2505 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, 2506 size_t size) 2507 { 2508 VirtIONet *n = qemu_get_nic_opaque(nc); 2509 if ((n->rsc4_enabled || n->rsc6_enabled)) { 2510 return virtio_net_rsc_receive(nc, buf, size); 2511 } else { 2512 return virtio_net_do_receive(nc, buf, size); 2513 } 2514 } 2515 2516 static int32_t virtio_net_flush_tx(VirtIONetQueue *q); 2517 2518 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) 2519 { 2520 VirtIONet *n = qemu_get_nic_opaque(nc); 2521 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 2522 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2523 2524 virtqueue_push(q->tx_vq, q->async_tx.elem, 0); 2525 virtio_notify(vdev, q->tx_vq); 2526 2527 g_free(q->async_tx.elem); 2528 q->async_tx.elem = NULL; 2529 2530 virtio_queue_set_notification(q->tx_vq, 1); 2531 virtio_net_flush_tx(q); 2532 } 2533 2534 /* TX */ 2535 static int32_t virtio_net_flush_tx(VirtIONetQueue *q) 2536 { 2537 VirtIONet *n = q->n; 2538 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2539 VirtQueueElement *elem; 2540 int32_t num_packets = 0; 2541 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq)); 2542 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2543 return num_packets; 2544 } 2545 2546 if (q->async_tx.elem) { 2547 virtio_queue_set_notification(q->tx_vq, 0); 2548 return num_packets; 2549 } 2550 2551 for (;;) { 2552 ssize_t ret; 2553 unsigned int out_num; 2554 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg; 2555 struct virtio_net_hdr_mrg_rxbuf mhdr; 2556 2557 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement)); 2558 if (!elem) { 2559 break; 2560 } 2561 2562 out_num = elem->out_num; 2563 out_sg = elem->out_sg; 2564 if (out_num < 1) { 2565 virtio_error(vdev, "virtio-net header not in first element"); 2566 virtqueue_detach_element(q->tx_vq, elem, 0); 2567 g_free(elem); 2568 return -EINVAL; 2569 } 2570 2571 if (n->has_vnet_hdr) { 2572 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) < 2573 n->guest_hdr_len) { 2574 virtio_error(vdev, "virtio-net header incorrect"); 2575 virtqueue_detach_element(q->tx_vq, elem, 0); 2576 g_free(elem); 2577 return -EINVAL; 2578 } 2579 if (n->needs_vnet_hdr_swap) { 2580 virtio_net_hdr_swap(vdev, (void *) &mhdr); 2581 sg2[0].iov_base = &mhdr; 2582 sg2[0].iov_len = n->guest_hdr_len; 2583 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, 2584 out_sg, out_num, 2585 n->guest_hdr_len, -1); 2586 if (out_num == VIRTQUEUE_MAX_SIZE) { 2587 goto drop; 2588 } 2589 out_num += 1; 2590 out_sg = sg2; 2591 } 2592 } 2593 /* 2594 * If host wants to see the guest header as is, we can 2595 * pass it on unchanged. Otherwise, copy just the parts 2596 * that host is interested in. 2597 */ 2598 assert(n->host_hdr_len <= n->guest_hdr_len); 2599 if (n->host_hdr_len != n->guest_hdr_len) { 2600 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg), 2601 out_sg, out_num, 2602 0, n->host_hdr_len); 2603 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num, 2604 out_sg, out_num, 2605 n->guest_hdr_len, -1); 2606 out_num = sg_num; 2607 out_sg = sg; 2608 } 2609 2610 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index), 2611 out_sg, out_num, virtio_net_tx_complete); 2612 if (ret == 0) { 2613 virtio_queue_set_notification(q->tx_vq, 0); 2614 q->async_tx.elem = elem; 2615 return -EBUSY; 2616 } 2617 2618 drop: 2619 virtqueue_push(q->tx_vq, elem, 0); 2620 virtio_notify(vdev, q->tx_vq); 2621 g_free(elem); 2622 2623 if (++num_packets >= n->tx_burst) { 2624 break; 2625 } 2626 } 2627 return num_packets; 2628 } 2629 2630 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) 2631 { 2632 VirtIONet *n = VIRTIO_NET(vdev); 2633 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2634 2635 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2636 virtio_net_drop_tx_queue_data(vdev, vq); 2637 return; 2638 } 2639 2640 /* This happens when device was stopped but VCPU wasn't. */ 2641 if (!vdev->vm_running) { 2642 q->tx_waiting = 1; 2643 return; 2644 } 2645 2646 if (q->tx_waiting) { 2647 virtio_queue_set_notification(vq, 1); 2648 timer_del(q->tx_timer); 2649 q->tx_waiting = 0; 2650 if (virtio_net_flush_tx(q) == -EINVAL) { 2651 return; 2652 } 2653 } else { 2654 timer_mod(q->tx_timer, 2655 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2656 q->tx_waiting = 1; 2657 virtio_queue_set_notification(vq, 0); 2658 } 2659 } 2660 2661 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) 2662 { 2663 VirtIONet *n = VIRTIO_NET(vdev); 2664 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2665 2666 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2667 virtio_net_drop_tx_queue_data(vdev, vq); 2668 return; 2669 } 2670 2671 if (unlikely(q->tx_waiting)) { 2672 return; 2673 } 2674 q->tx_waiting = 1; 2675 /* This happens when device was stopped but VCPU wasn't. */ 2676 if (!vdev->vm_running) { 2677 return; 2678 } 2679 virtio_queue_set_notification(vq, 0); 2680 qemu_bh_schedule(q->tx_bh); 2681 } 2682 2683 static void virtio_net_tx_timer(void *opaque) 2684 { 2685 VirtIONetQueue *q = opaque; 2686 VirtIONet *n = q->n; 2687 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2688 /* This happens when device was stopped but BH wasn't. */ 2689 if (!vdev->vm_running) { 2690 /* Make sure tx waiting is set, so we'll run when restarted. */ 2691 assert(q->tx_waiting); 2692 return; 2693 } 2694 2695 q->tx_waiting = 0; 2696 2697 /* Just in case the driver is not ready on more */ 2698 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2699 return; 2700 } 2701 2702 virtio_queue_set_notification(q->tx_vq, 1); 2703 virtio_net_flush_tx(q); 2704 } 2705 2706 static void virtio_net_tx_bh(void *opaque) 2707 { 2708 VirtIONetQueue *q = opaque; 2709 VirtIONet *n = q->n; 2710 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2711 int32_t ret; 2712 2713 /* This happens when device was stopped but BH wasn't. */ 2714 if (!vdev->vm_running) { 2715 /* Make sure tx waiting is set, so we'll run when restarted. */ 2716 assert(q->tx_waiting); 2717 return; 2718 } 2719 2720 q->tx_waiting = 0; 2721 2722 /* Just in case the driver is not ready on more */ 2723 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) { 2724 return; 2725 } 2726 2727 ret = virtio_net_flush_tx(q); 2728 if (ret == -EBUSY || ret == -EINVAL) { 2729 return; /* Notification re-enable handled by tx_complete or device 2730 * broken */ 2731 } 2732 2733 /* If we flush a full burst of packets, assume there are 2734 * more coming and immediately reschedule */ 2735 if (ret >= n->tx_burst) { 2736 qemu_bh_schedule(q->tx_bh); 2737 q->tx_waiting = 1; 2738 return; 2739 } 2740 2741 /* If less than a full burst, re-enable notification and flush 2742 * anything that may have come in while we weren't looking. If 2743 * we find something, assume the guest is still active and reschedule */ 2744 virtio_queue_set_notification(q->tx_vq, 1); 2745 ret = virtio_net_flush_tx(q); 2746 if (ret == -EINVAL) { 2747 return; 2748 } else if (ret > 0) { 2749 virtio_queue_set_notification(q->tx_vq, 0); 2750 qemu_bh_schedule(q->tx_bh); 2751 q->tx_waiting = 1; 2752 } 2753 } 2754 2755 static void virtio_net_add_queue(VirtIONet *n, int index) 2756 { 2757 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2758 2759 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size, 2760 virtio_net_handle_rx); 2761 2762 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { 2763 n->vqs[index].tx_vq = 2764 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2765 virtio_net_handle_tx_timer); 2766 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 2767 virtio_net_tx_timer, 2768 &n->vqs[index]); 2769 } else { 2770 n->vqs[index].tx_vq = 2771 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2772 virtio_net_handle_tx_bh); 2773 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]); 2774 } 2775 2776 n->vqs[index].tx_waiting = 0; 2777 n->vqs[index].n = n; 2778 } 2779 2780 static void virtio_net_del_queue(VirtIONet *n, int index) 2781 { 2782 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2783 VirtIONetQueue *q = &n->vqs[index]; 2784 NetClientState *nc = qemu_get_subqueue(n->nic, index); 2785 2786 qemu_purge_queued_packets(nc); 2787 2788 virtio_del_queue(vdev, index * 2); 2789 if (q->tx_timer) { 2790 timer_free(q->tx_timer); 2791 q->tx_timer = NULL; 2792 } else { 2793 qemu_bh_delete(q->tx_bh); 2794 q->tx_bh = NULL; 2795 } 2796 q->tx_waiting = 0; 2797 virtio_del_queue(vdev, index * 2 + 1); 2798 } 2799 2800 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs) 2801 { 2802 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2803 int old_num_queues = virtio_get_num_queues(vdev); 2804 int new_num_queues = new_max_queue_pairs * 2 + 1; 2805 int i; 2806 2807 assert(old_num_queues >= 3); 2808 assert(old_num_queues % 2 == 1); 2809 2810 if (old_num_queues == new_num_queues) { 2811 return; 2812 } 2813 2814 /* 2815 * We always need to remove and add ctrl vq if 2816 * old_num_queues != new_num_queues. Remove ctrl_vq first, 2817 * and then we only enter one of the following two loops. 2818 */ 2819 virtio_del_queue(vdev, old_num_queues - 1); 2820 2821 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) { 2822 /* new_num_queues < old_num_queues */ 2823 virtio_net_del_queue(n, i / 2); 2824 } 2825 2826 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) { 2827 /* new_num_queues > old_num_queues */ 2828 virtio_net_add_queue(n, i / 2); 2829 } 2830 2831 /* add ctrl_vq last */ 2832 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 2833 } 2834 2835 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) 2836 { 2837 int max = multiqueue ? n->max_queue_pairs : 1; 2838 2839 n->multiqueue = multiqueue; 2840 virtio_net_change_num_queue_pairs(n, max); 2841 2842 virtio_net_set_queue_pairs(n); 2843 } 2844 2845 static int virtio_net_post_load_device(void *opaque, int version_id) 2846 { 2847 VirtIONet *n = opaque; 2848 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2849 int i, link_down; 2850 2851 trace_virtio_net_post_load_device(); 2852 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs, 2853 virtio_vdev_has_feature(vdev, 2854 VIRTIO_F_VERSION_1), 2855 virtio_vdev_has_feature(vdev, 2856 VIRTIO_NET_F_HASH_REPORT)); 2857 2858 /* MAC_TABLE_ENTRIES may be different from the saved image */ 2859 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) { 2860 n->mac_table.in_use = 0; 2861 } 2862 2863 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 2864 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n); 2865 } 2866 2867 /* 2868 * curr_guest_offloads will be later overwritten by the 2869 * virtio_set_features_nocheck call done from the virtio_load. 2870 * Here we make sure it is preserved and restored accordingly 2871 * in the virtio_net_post_load_virtio callback. 2872 */ 2873 n->saved_guest_offloads = n->curr_guest_offloads; 2874 2875 virtio_net_set_queue_pairs(n); 2876 2877 /* Find the first multicast entry in the saved MAC filter */ 2878 for (i = 0; i < n->mac_table.in_use; i++) { 2879 if (n->mac_table.macs[i * ETH_ALEN] & 1) { 2880 break; 2881 } 2882 } 2883 n->mac_table.first_multi = i; 2884 2885 /* nc.link_down can't be migrated, so infer link_down according 2886 * to link status bit in n->status */ 2887 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0; 2888 for (i = 0; i < n->max_queue_pairs; i++) { 2889 qemu_get_subqueue(n->nic, i)->link_down = link_down; 2890 } 2891 2892 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 2893 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 2894 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 2895 QEMU_CLOCK_VIRTUAL, 2896 virtio_net_announce_timer, n); 2897 if (n->announce_timer.round) { 2898 timer_mod(n->announce_timer.tm, 2899 qemu_clock_get_ms(n->announce_timer.type)); 2900 } else { 2901 qemu_announce_timer_del(&n->announce_timer, false); 2902 } 2903 } 2904 2905 if (n->rss_data.enabled) { 2906 n->rss_data.enabled_software_rss = n->rss_data.populate_hash; 2907 if (!n->rss_data.populate_hash) { 2908 if (!virtio_net_attach_epbf_rss(n)) { 2909 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 2910 warn_report("Can't post-load eBPF RSS for vhost"); 2911 } else { 2912 warn_report("Can't post-load eBPF RSS - " 2913 "fallback to software RSS"); 2914 n->rss_data.enabled_software_rss = true; 2915 } 2916 } 2917 } 2918 2919 trace_virtio_net_rss_enable(n->rss_data.hash_types, 2920 n->rss_data.indirections_len, 2921 sizeof(n->rss_data.key)); 2922 } else { 2923 trace_virtio_net_rss_disable(); 2924 } 2925 return 0; 2926 } 2927 2928 static int virtio_net_post_load_virtio(VirtIODevice *vdev) 2929 { 2930 VirtIONet *n = VIRTIO_NET(vdev); 2931 /* 2932 * The actual needed state is now in saved_guest_offloads, 2933 * see virtio_net_post_load_device for detail. 2934 * Restore it back and apply the desired offloads. 2935 */ 2936 n->curr_guest_offloads = n->saved_guest_offloads; 2937 if (peer_has_vnet_hdr(n)) { 2938 virtio_net_apply_guest_offloads(n); 2939 } 2940 2941 return 0; 2942 } 2943 2944 /* tx_waiting field of a VirtIONetQueue */ 2945 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = { 2946 .name = "virtio-net-queue-tx_waiting", 2947 .fields = (VMStateField[]) { 2948 VMSTATE_UINT32(tx_waiting, VirtIONetQueue), 2949 VMSTATE_END_OF_LIST() 2950 }, 2951 }; 2952 2953 static bool max_queue_pairs_gt_1(void *opaque, int version_id) 2954 { 2955 return VIRTIO_NET(opaque)->max_queue_pairs > 1; 2956 } 2957 2958 static bool has_ctrl_guest_offloads(void *opaque, int version_id) 2959 { 2960 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque), 2961 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 2962 } 2963 2964 static bool mac_table_fits(void *opaque, int version_id) 2965 { 2966 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES; 2967 } 2968 2969 static bool mac_table_doesnt_fit(void *opaque, int version_id) 2970 { 2971 return !mac_table_fits(opaque, version_id); 2972 } 2973 2974 /* This temporary type is shared by all the WITH_TMP methods 2975 * although only some fields are used by each. 2976 */ 2977 struct VirtIONetMigTmp { 2978 VirtIONet *parent; 2979 VirtIONetQueue *vqs_1; 2980 uint16_t curr_queue_pairs_1; 2981 uint8_t has_ufo; 2982 uint32_t has_vnet_hdr; 2983 }; 2984 2985 /* The 2nd and subsequent tx_waiting flags are loaded later than 2986 * the 1st entry in the queue_pairs and only if there's more than one 2987 * entry. We use the tmp mechanism to calculate a temporary 2988 * pointer and count and also validate the count. 2989 */ 2990 2991 static int virtio_net_tx_waiting_pre_save(void *opaque) 2992 { 2993 struct VirtIONetMigTmp *tmp = opaque; 2994 2995 tmp->vqs_1 = tmp->parent->vqs + 1; 2996 tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1; 2997 if (tmp->parent->curr_queue_pairs == 0) { 2998 tmp->curr_queue_pairs_1 = 0; 2999 } 3000 3001 return 0; 3002 } 3003 3004 static int virtio_net_tx_waiting_pre_load(void *opaque) 3005 { 3006 struct VirtIONetMigTmp *tmp = opaque; 3007 3008 /* Reuse the pointer setup from save */ 3009 virtio_net_tx_waiting_pre_save(opaque); 3010 3011 if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) { 3012 error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x", 3013 tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs); 3014 3015 return -EINVAL; 3016 } 3017 3018 return 0; /* all good */ 3019 } 3020 3021 static const VMStateDescription vmstate_virtio_net_tx_waiting = { 3022 .name = "virtio-net-tx_waiting", 3023 .pre_load = virtio_net_tx_waiting_pre_load, 3024 .pre_save = virtio_net_tx_waiting_pre_save, 3025 .fields = (VMStateField[]) { 3026 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp, 3027 curr_queue_pairs_1, 3028 vmstate_virtio_net_queue_tx_waiting, 3029 struct VirtIONetQueue), 3030 VMSTATE_END_OF_LIST() 3031 }, 3032 }; 3033 3034 /* the 'has_ufo' flag is just tested; if the incoming stream has the 3035 * flag set we need to check that we have it 3036 */ 3037 static int virtio_net_ufo_post_load(void *opaque, int version_id) 3038 { 3039 struct VirtIONetMigTmp *tmp = opaque; 3040 3041 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) { 3042 error_report("virtio-net: saved image requires TUN_F_UFO support"); 3043 return -EINVAL; 3044 } 3045 3046 return 0; 3047 } 3048 3049 static int virtio_net_ufo_pre_save(void *opaque) 3050 { 3051 struct VirtIONetMigTmp *tmp = opaque; 3052 3053 tmp->has_ufo = tmp->parent->has_ufo; 3054 3055 return 0; 3056 } 3057 3058 static const VMStateDescription vmstate_virtio_net_has_ufo = { 3059 .name = "virtio-net-ufo", 3060 .post_load = virtio_net_ufo_post_load, 3061 .pre_save = virtio_net_ufo_pre_save, 3062 .fields = (VMStateField[]) { 3063 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp), 3064 VMSTATE_END_OF_LIST() 3065 }, 3066 }; 3067 3068 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the 3069 * flag set we need to check that we have it 3070 */ 3071 static int virtio_net_vnet_post_load(void *opaque, int version_id) 3072 { 3073 struct VirtIONetMigTmp *tmp = opaque; 3074 3075 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) { 3076 error_report("virtio-net: saved image requires vnet_hdr=on"); 3077 return -EINVAL; 3078 } 3079 3080 return 0; 3081 } 3082 3083 static int virtio_net_vnet_pre_save(void *opaque) 3084 { 3085 struct VirtIONetMigTmp *tmp = opaque; 3086 3087 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr; 3088 3089 return 0; 3090 } 3091 3092 static const VMStateDescription vmstate_virtio_net_has_vnet = { 3093 .name = "virtio-net-vnet", 3094 .post_load = virtio_net_vnet_post_load, 3095 .pre_save = virtio_net_vnet_pre_save, 3096 .fields = (VMStateField[]) { 3097 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp), 3098 VMSTATE_END_OF_LIST() 3099 }, 3100 }; 3101 3102 static bool virtio_net_rss_needed(void *opaque) 3103 { 3104 return VIRTIO_NET(opaque)->rss_data.enabled; 3105 } 3106 3107 static const VMStateDescription vmstate_virtio_net_rss = { 3108 .name = "virtio-net-device/rss", 3109 .version_id = 1, 3110 .minimum_version_id = 1, 3111 .needed = virtio_net_rss_needed, 3112 .fields = (VMStateField[]) { 3113 VMSTATE_BOOL(rss_data.enabled, VirtIONet), 3114 VMSTATE_BOOL(rss_data.redirect, VirtIONet), 3115 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet), 3116 VMSTATE_UINT32(rss_data.hash_types, VirtIONet), 3117 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet), 3118 VMSTATE_UINT16(rss_data.default_queue, VirtIONet), 3119 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet, 3120 VIRTIO_NET_RSS_MAX_KEY_SIZE), 3121 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet, 3122 rss_data.indirections_len, 0, 3123 vmstate_info_uint16, uint16_t), 3124 VMSTATE_END_OF_LIST() 3125 }, 3126 }; 3127 3128 static const VMStateDescription vmstate_virtio_net_device = { 3129 .name = "virtio-net-device", 3130 .version_id = VIRTIO_NET_VM_VERSION, 3131 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3132 .post_load = virtio_net_post_load_device, 3133 .fields = (VMStateField[]) { 3134 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN), 3135 VMSTATE_STRUCT_POINTER(vqs, VirtIONet, 3136 vmstate_virtio_net_queue_tx_waiting, 3137 VirtIONetQueue), 3138 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet), 3139 VMSTATE_UINT16(status, VirtIONet), 3140 VMSTATE_UINT8(promisc, VirtIONet), 3141 VMSTATE_UINT8(allmulti, VirtIONet), 3142 VMSTATE_UINT32(mac_table.in_use, VirtIONet), 3143 3144 /* Guarded pair: If it fits we load it, else we throw it away 3145 * - can happen if source has a larger MAC table.; post-load 3146 * sets flags in this case. 3147 */ 3148 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet, 3149 0, mac_table_fits, mac_table.in_use, 3150 ETH_ALEN), 3151 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0, 3152 mac_table.in_use, ETH_ALEN), 3153 3154 /* Note: This is an array of uint32's that's always been saved as a 3155 * buffer; hold onto your endiannesses; it's actually used as a bitmap 3156 * but based on the uint. 3157 */ 3158 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3), 3159 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3160 vmstate_virtio_net_has_vnet), 3161 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet), 3162 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet), 3163 VMSTATE_UINT8(alluni, VirtIONet), 3164 VMSTATE_UINT8(nomulti, VirtIONet), 3165 VMSTATE_UINT8(nouni, VirtIONet), 3166 VMSTATE_UINT8(nobcast, VirtIONet), 3167 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3168 vmstate_virtio_net_has_ufo), 3169 VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0, 3170 vmstate_info_uint16_equal, uint16_t), 3171 VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1), 3172 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3173 vmstate_virtio_net_tx_waiting), 3174 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet, 3175 has_ctrl_guest_offloads), 3176 VMSTATE_END_OF_LIST() 3177 }, 3178 .subsections = (const VMStateDescription * []) { 3179 &vmstate_virtio_net_rss, 3180 NULL 3181 } 3182 }; 3183 3184 static NetClientInfo net_virtio_info = { 3185 .type = NET_CLIENT_DRIVER_NIC, 3186 .size = sizeof(NICState), 3187 .can_receive = virtio_net_can_receive, 3188 .receive = virtio_net_receive, 3189 .link_status_changed = virtio_net_set_link_status, 3190 .query_rx_filter = virtio_net_query_rxfilter, 3191 .announce = virtio_net_announce, 3192 }; 3193 3194 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) 3195 { 3196 VirtIONet *n = VIRTIO_NET(vdev); 3197 NetClientState *nc; 3198 assert(n->vhost_started); 3199 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { 3200 /* Must guard against invalid features and bogus queue index 3201 * from being set by malicious guest, or penetrated through 3202 * buggy migration stream. 3203 */ 3204 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3205 qemu_log_mask(LOG_GUEST_ERROR, 3206 "%s: bogus vq index ignored\n", __func__); 3207 return false; 3208 } 3209 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3210 } else { 3211 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3212 } 3213 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); 3214 } 3215 3216 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, 3217 bool mask) 3218 { 3219 VirtIONet *n = VIRTIO_NET(vdev); 3220 NetClientState *nc; 3221 assert(n->vhost_started); 3222 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { 3223 /* Must guard against invalid features and bogus queue index 3224 * from being set by malicious guest, or penetrated through 3225 * buggy migration stream. 3226 */ 3227 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3228 qemu_log_mask(LOG_GUEST_ERROR, 3229 "%s: bogus vq index ignored\n", __func__); 3230 return; 3231 } 3232 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3233 } else { 3234 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3235 } 3236 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), 3237 vdev, idx, mask); 3238 } 3239 3240 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) 3241 { 3242 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC); 3243 3244 n->config_size = virtio_feature_get_config_size(feature_sizes, 3245 host_features); 3246 } 3247 3248 void virtio_net_set_netclient_name(VirtIONet *n, const char *name, 3249 const char *type) 3250 { 3251 /* 3252 * The name can be NULL, the netclient name will be type.x. 3253 */ 3254 assert(type != NULL); 3255 3256 g_free(n->netclient_name); 3257 g_free(n->netclient_type); 3258 n->netclient_name = g_strdup(name); 3259 n->netclient_type = g_strdup(type); 3260 } 3261 3262 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev) 3263 { 3264 HotplugHandler *hotplug_ctrl; 3265 PCIDevice *pci_dev; 3266 Error *err = NULL; 3267 3268 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3269 if (hotplug_ctrl) { 3270 pci_dev = PCI_DEVICE(dev); 3271 pci_dev->partially_hotplugged = true; 3272 hotplug_handler_unplug_request(hotplug_ctrl, dev, &err); 3273 if (err) { 3274 error_report_err(err); 3275 return false; 3276 } 3277 } else { 3278 return false; 3279 } 3280 return true; 3281 } 3282 3283 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev, 3284 Error **errp) 3285 { 3286 Error *err = NULL; 3287 HotplugHandler *hotplug_ctrl; 3288 PCIDevice *pdev = PCI_DEVICE(dev); 3289 BusState *primary_bus; 3290 3291 if (!pdev->partially_hotplugged) { 3292 return true; 3293 } 3294 primary_bus = dev->parent_bus; 3295 if (!primary_bus) { 3296 error_setg(errp, "virtio_net: couldn't find primary bus"); 3297 return false; 3298 } 3299 qdev_set_parent_bus(dev, primary_bus, &error_abort); 3300 qatomic_set(&n->failover_primary_hidden, false); 3301 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3302 if (hotplug_ctrl) { 3303 hotplug_handler_pre_plug(hotplug_ctrl, dev, &err); 3304 if (err) { 3305 goto out; 3306 } 3307 hotplug_handler_plug(hotplug_ctrl, dev, &err); 3308 } 3309 pdev->partially_hotplugged = false; 3310 3311 out: 3312 error_propagate(errp, err); 3313 return !err; 3314 } 3315 3316 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s) 3317 { 3318 bool should_be_hidden; 3319 Error *err = NULL; 3320 DeviceState *dev = failover_find_primary_device(n); 3321 3322 if (!dev) { 3323 return; 3324 } 3325 3326 should_be_hidden = qatomic_read(&n->failover_primary_hidden); 3327 3328 if (migration_in_setup(s) && !should_be_hidden) { 3329 if (failover_unplug_primary(n, dev)) { 3330 vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev); 3331 qapi_event_send_unplug_primary(dev->id); 3332 qatomic_set(&n->failover_primary_hidden, true); 3333 } else { 3334 warn_report("couldn't unplug primary device"); 3335 } 3336 } else if (migration_has_failed(s)) { 3337 /* We already unplugged the device let's plug it back */ 3338 if (!failover_replug_primary(n, dev, &err)) { 3339 if (err) { 3340 error_report_err(err); 3341 } 3342 } 3343 } 3344 } 3345 3346 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data) 3347 { 3348 MigrationState *s = data; 3349 VirtIONet *n = container_of(notifier, VirtIONet, migration_state); 3350 virtio_net_handle_migration_primary(n, s); 3351 } 3352 3353 static bool failover_hide_primary_device(DeviceListener *listener, 3354 const QDict *device_opts, 3355 bool from_json, 3356 Error **errp) 3357 { 3358 VirtIONet *n = container_of(listener, VirtIONet, primary_listener); 3359 const char *standby_id; 3360 3361 if (!device_opts) { 3362 return false; 3363 } 3364 3365 if (!qdict_haskey(device_opts, "failover_pair_id")) { 3366 return false; 3367 } 3368 3369 if (!qdict_haskey(device_opts, "id")) { 3370 error_setg(errp, "Device with failover_pair_id needs to have id"); 3371 return false; 3372 } 3373 3374 standby_id = qdict_get_str(device_opts, "failover_pair_id"); 3375 if (g_strcmp0(standby_id, n->netclient_name) != 0) { 3376 return false; 3377 } 3378 3379 /* 3380 * The hide helper can be called several times for a given device. 3381 * Check there is only one primary for a virtio-net device but 3382 * don't duplicate the qdict several times if it's called for the same 3383 * device. 3384 */ 3385 if (n->primary_opts) { 3386 const char *old, *new; 3387 /* devices with failover_pair_id always have an id */ 3388 old = qdict_get_str(n->primary_opts, "id"); 3389 new = qdict_get_str(device_opts, "id"); 3390 if (strcmp(old, new) != 0) { 3391 error_setg(errp, "Cannot attach more than one primary device to " 3392 "'%s': '%s' and '%s'", n->netclient_name, old, new); 3393 return false; 3394 } 3395 } else { 3396 n->primary_opts = qdict_clone_shallow(device_opts); 3397 n->primary_opts_from_json = from_json; 3398 } 3399 3400 /* failover_primary_hidden is set during feature negotiation */ 3401 return qatomic_read(&n->failover_primary_hidden); 3402 } 3403 3404 static void virtio_net_device_realize(DeviceState *dev, Error **errp) 3405 { 3406 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3407 VirtIONet *n = VIRTIO_NET(dev); 3408 NetClientState *nc; 3409 int i; 3410 3411 if (n->net_conf.mtu) { 3412 n->host_features |= (1ULL << VIRTIO_NET_F_MTU); 3413 } 3414 3415 if (n->net_conf.duplex_str) { 3416 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) { 3417 n->net_conf.duplex = DUPLEX_HALF; 3418 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) { 3419 n->net_conf.duplex = DUPLEX_FULL; 3420 } else { 3421 error_setg(errp, "'duplex' must be 'half' or 'full'"); 3422 return; 3423 } 3424 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3425 } else { 3426 n->net_conf.duplex = DUPLEX_UNKNOWN; 3427 } 3428 3429 if (n->net_conf.speed < SPEED_UNKNOWN) { 3430 error_setg(errp, "'speed' must be between 0 and INT_MAX"); 3431 return; 3432 } 3433 if (n->net_conf.speed >= 0) { 3434 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3435 } 3436 3437 if (n->failover) { 3438 n->primary_listener.hide_device = failover_hide_primary_device; 3439 qatomic_set(&n->failover_primary_hidden, true); 3440 device_listener_register(&n->primary_listener); 3441 n->migration_state.notify = virtio_net_migration_state_notifier; 3442 add_migration_state_change_notifier(&n->migration_state); 3443 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY); 3444 } 3445 3446 virtio_net_set_config_size(n, n->host_features); 3447 virtio_init(vdev, VIRTIO_ID_NET, n->config_size); 3448 3449 /* 3450 * We set a lower limit on RX queue size to what it always was. 3451 * Guests that want a smaller ring can always resize it without 3452 * help from us (using virtio 1 and up). 3453 */ 3454 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || 3455 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || 3456 !is_power_of_2(n->net_conf.rx_queue_size)) { 3457 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " 3458 "must be a power of 2 between %d and %d.", 3459 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, 3460 VIRTQUEUE_MAX_SIZE); 3461 virtio_cleanup(vdev); 3462 return; 3463 } 3464 3465 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || 3466 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE || 3467 !is_power_of_2(n->net_conf.tx_queue_size)) { 3468 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " 3469 "must be a power of 2 between %d and %d", 3470 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, 3471 VIRTQUEUE_MAX_SIZE); 3472 virtio_cleanup(vdev); 3473 return; 3474 } 3475 3476 n->max_ncs = MAX(n->nic_conf.peers.queues, 1); 3477 3478 /* 3479 * Figure out the datapath queue pairs since the backend could 3480 * provide control queue via peers as well. 3481 */ 3482 if (n->nic_conf.peers.queues) { 3483 for (i = 0; i < n->max_ncs; i++) { 3484 if (n->nic_conf.peers.ncs[i]->is_datapath) { 3485 ++n->max_queue_pairs; 3486 } 3487 } 3488 } 3489 n->max_queue_pairs = MAX(n->max_queue_pairs, 1); 3490 3491 if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) { 3492 error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), " 3493 "must be a positive integer less than %d.", 3494 n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2); 3495 virtio_cleanup(vdev); 3496 return; 3497 } 3498 n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs); 3499 n->curr_queue_pairs = 1; 3500 n->tx_timeout = n->net_conf.txtimer; 3501 3502 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer") 3503 && strcmp(n->net_conf.tx, "bh")) { 3504 warn_report("virtio-net: " 3505 "Unknown option tx=%s, valid options: \"timer\" \"bh\"", 3506 n->net_conf.tx); 3507 error_printf("Defaulting to \"bh\""); 3508 } 3509 3510 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), 3511 n->net_conf.tx_queue_size); 3512 3513 for (i = 0; i < n->max_queue_pairs; i++) { 3514 virtio_net_add_queue(n, i); 3515 } 3516 3517 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 3518 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr); 3519 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac)); 3520 n->status = VIRTIO_NET_S_LINK_UP; 3521 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 3522 QEMU_CLOCK_VIRTUAL, 3523 virtio_net_announce_timer, n); 3524 n->announce_timer.round = 0; 3525 3526 if (n->netclient_type) { 3527 /* 3528 * Happen when virtio_net_set_netclient_name has been called. 3529 */ 3530 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3531 n->netclient_type, n->netclient_name, n); 3532 } else { 3533 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3534 object_get_typename(OBJECT(dev)), dev->id, n); 3535 } 3536 3537 for (i = 0; i < n->max_queue_pairs; i++) { 3538 n->nic->ncs[i].do_not_pad = true; 3539 } 3540 3541 peer_test_vnet_hdr(n); 3542 if (peer_has_vnet_hdr(n)) { 3543 for (i = 0; i < n->max_queue_pairs; i++) { 3544 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true); 3545 } 3546 n->host_hdr_len = sizeof(struct virtio_net_hdr); 3547 } else { 3548 n->host_hdr_len = 0; 3549 } 3550 3551 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a); 3552 3553 n->vqs[0].tx_waiting = 0; 3554 n->tx_burst = n->net_conf.txburst; 3555 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0); 3556 n->promisc = 1; /* for compatibility */ 3557 3558 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 3559 3560 n->vlans = g_malloc0(MAX_VLAN >> 3); 3561 3562 nc = qemu_get_queue(n->nic); 3563 nc->rxfilter_notify_enabled = 1; 3564 3565 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 3566 struct virtio_net_config netcfg = {}; 3567 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN); 3568 vhost_net_set_config(get_vhost_net(nc->peer), 3569 (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER); 3570 } 3571 QTAILQ_INIT(&n->rsc_chains); 3572 n->qdev = dev; 3573 3574 net_rx_pkt_init(&n->rx_pkt, false); 3575 3576 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3577 virtio_net_load_ebpf(n); 3578 } 3579 } 3580 3581 static void virtio_net_device_unrealize(DeviceState *dev) 3582 { 3583 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3584 VirtIONet *n = VIRTIO_NET(dev); 3585 int i, max_queue_pairs; 3586 3587 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3588 virtio_net_unload_ebpf(n); 3589 } 3590 3591 /* This will stop vhost backend if appropriate. */ 3592 virtio_net_set_status(vdev, 0); 3593 3594 g_free(n->netclient_name); 3595 n->netclient_name = NULL; 3596 g_free(n->netclient_type); 3597 n->netclient_type = NULL; 3598 3599 g_free(n->mac_table.macs); 3600 g_free(n->vlans); 3601 3602 if (n->failover) { 3603 qobject_unref(n->primary_opts); 3604 device_listener_unregister(&n->primary_listener); 3605 remove_migration_state_change_notifier(&n->migration_state); 3606 } else { 3607 assert(n->primary_opts == NULL); 3608 } 3609 3610 max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 3611 for (i = 0; i < max_queue_pairs; i++) { 3612 virtio_net_del_queue(n, i); 3613 } 3614 /* delete also control vq */ 3615 virtio_del_queue(vdev, max_queue_pairs * 2); 3616 qemu_announce_timer_del(&n->announce_timer, false); 3617 g_free(n->vqs); 3618 qemu_del_nic(n->nic); 3619 virtio_net_rsc_cleanup(n); 3620 g_free(n->rss_data.indirections_table); 3621 net_rx_pkt_uninit(n->rx_pkt); 3622 virtio_cleanup(vdev); 3623 } 3624 3625 static void virtio_net_instance_init(Object *obj) 3626 { 3627 VirtIONet *n = VIRTIO_NET(obj); 3628 3629 /* 3630 * The default config_size is sizeof(struct virtio_net_config). 3631 * Can be overriden with virtio_net_set_config_size. 3632 */ 3633 n->config_size = sizeof(struct virtio_net_config); 3634 device_add_bootindex_property(obj, &n->nic_conf.bootindex, 3635 "bootindex", "/ethernet-phy@0", 3636 DEVICE(n)); 3637 3638 ebpf_rss_init(&n->ebpf_rss); 3639 } 3640 3641 static int virtio_net_pre_save(void *opaque) 3642 { 3643 VirtIONet *n = opaque; 3644 3645 /* At this point, backend must be stopped, otherwise 3646 * it might keep writing to memory. */ 3647 assert(!n->vhost_started); 3648 3649 return 0; 3650 } 3651 3652 static bool primary_unplug_pending(void *opaque) 3653 { 3654 DeviceState *dev = opaque; 3655 DeviceState *primary; 3656 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3657 VirtIONet *n = VIRTIO_NET(vdev); 3658 3659 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 3660 return false; 3661 } 3662 primary = failover_find_primary_device(n); 3663 return primary ? primary->pending_deleted_event : false; 3664 } 3665 3666 static bool dev_unplug_pending(void *opaque) 3667 { 3668 DeviceState *dev = opaque; 3669 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 3670 3671 return vdc->primary_unplug_pending(dev); 3672 } 3673 3674 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev) 3675 { 3676 VirtIONet *n = VIRTIO_NET(vdev); 3677 NetClientState *nc = qemu_get_queue(n->nic); 3678 struct vhost_net *net = get_vhost_net(nc->peer); 3679 return &net->dev; 3680 } 3681 3682 static const VMStateDescription vmstate_virtio_net = { 3683 .name = "virtio-net", 3684 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3685 .version_id = VIRTIO_NET_VM_VERSION, 3686 .fields = (VMStateField[]) { 3687 VMSTATE_VIRTIO_DEVICE, 3688 VMSTATE_END_OF_LIST() 3689 }, 3690 .pre_save = virtio_net_pre_save, 3691 .dev_unplug_pending = dev_unplug_pending, 3692 }; 3693 3694 static Property virtio_net_properties[] = { 3695 DEFINE_PROP_BIT64("csum", VirtIONet, host_features, 3696 VIRTIO_NET_F_CSUM, true), 3697 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features, 3698 VIRTIO_NET_F_GUEST_CSUM, true), 3699 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), 3700 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features, 3701 VIRTIO_NET_F_GUEST_TSO4, true), 3702 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features, 3703 VIRTIO_NET_F_GUEST_TSO6, true), 3704 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features, 3705 VIRTIO_NET_F_GUEST_ECN, true), 3706 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features, 3707 VIRTIO_NET_F_GUEST_UFO, true), 3708 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features, 3709 VIRTIO_NET_F_GUEST_ANNOUNCE, true), 3710 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features, 3711 VIRTIO_NET_F_HOST_TSO4, true), 3712 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features, 3713 VIRTIO_NET_F_HOST_TSO6, true), 3714 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features, 3715 VIRTIO_NET_F_HOST_ECN, true), 3716 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features, 3717 VIRTIO_NET_F_HOST_UFO, true), 3718 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features, 3719 VIRTIO_NET_F_MRG_RXBUF, true), 3720 DEFINE_PROP_BIT64("status", VirtIONet, host_features, 3721 VIRTIO_NET_F_STATUS, true), 3722 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features, 3723 VIRTIO_NET_F_CTRL_VQ, true), 3724 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features, 3725 VIRTIO_NET_F_CTRL_RX, true), 3726 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features, 3727 VIRTIO_NET_F_CTRL_VLAN, true), 3728 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features, 3729 VIRTIO_NET_F_CTRL_RX_EXTRA, true), 3730 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features, 3731 VIRTIO_NET_F_CTRL_MAC_ADDR, true), 3732 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features, 3733 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), 3734 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), 3735 DEFINE_PROP_BIT64("rss", VirtIONet, host_features, 3736 VIRTIO_NET_F_RSS, false), 3737 DEFINE_PROP_BIT64("hash", VirtIONet, host_features, 3738 VIRTIO_NET_F_HASH_REPORT, false), 3739 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features, 3740 VIRTIO_NET_F_RSC_EXT, false), 3741 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout, 3742 VIRTIO_NET_RSC_DEFAULT_INTERVAL), 3743 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf), 3744 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer, 3745 TX_TIMER_INTERVAL), 3746 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), 3747 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), 3748 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, 3749 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), 3750 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, 3751 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), 3752 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), 3753 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, 3754 true), 3755 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN), 3756 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str), 3757 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false), 3758 DEFINE_PROP_END_OF_LIST(), 3759 }; 3760 3761 static void virtio_net_class_init(ObjectClass *klass, void *data) 3762 { 3763 DeviceClass *dc = DEVICE_CLASS(klass); 3764 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 3765 3766 device_class_set_props(dc, virtio_net_properties); 3767 dc->vmsd = &vmstate_virtio_net; 3768 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 3769 vdc->realize = virtio_net_device_realize; 3770 vdc->unrealize = virtio_net_device_unrealize; 3771 vdc->get_config = virtio_net_get_config; 3772 vdc->set_config = virtio_net_set_config; 3773 vdc->get_features = virtio_net_get_features; 3774 vdc->set_features = virtio_net_set_features; 3775 vdc->bad_features = virtio_net_bad_features; 3776 vdc->reset = virtio_net_reset; 3777 vdc->set_status = virtio_net_set_status; 3778 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; 3779 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; 3780 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO); 3781 vdc->post_load = virtio_net_post_load_virtio; 3782 vdc->vmsd = &vmstate_virtio_net_device; 3783 vdc->primary_unplug_pending = primary_unplug_pending; 3784 vdc->get_vhost = virtio_net_get_vhost; 3785 } 3786 3787 static const TypeInfo virtio_net_info = { 3788 .name = TYPE_VIRTIO_NET, 3789 .parent = TYPE_VIRTIO_DEVICE, 3790 .instance_size = sizeof(VirtIONet), 3791 .instance_init = virtio_net_instance_init, 3792 .class_init = virtio_net_class_init, 3793 }; 3794 3795 static void virtio_register_types(void) 3796 { 3797 type_register_static(&virtio_net_info); 3798 } 3799 3800 type_init(virtio_register_types) 3801