1 /* 2 * Virtio Network Device 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qemu/atomic.h" 16 #include "qemu/iov.h" 17 #include "qemu/log.h" 18 #include "qemu/main-loop.h" 19 #include "qemu/module.h" 20 #include "hw/virtio/virtio.h" 21 #include "net/net.h" 22 #include "net/checksum.h" 23 #include "net/tap.h" 24 #include "qemu/error-report.h" 25 #include "qemu/timer.h" 26 #include "qemu/option.h" 27 #include "qemu/option_int.h" 28 #include "qemu/config-file.h" 29 #include "qapi/qmp/qdict.h" 30 #include "hw/virtio/virtio-net.h" 31 #include "net/vhost_net.h" 32 #include "net/announce.h" 33 #include "hw/virtio/virtio-bus.h" 34 #include "qapi/error.h" 35 #include "qapi/qapi-events-net.h" 36 #include "hw/qdev-properties.h" 37 #include "qapi/qapi-types-migration.h" 38 #include "qapi/qapi-events-migration.h" 39 #include "hw/virtio/virtio-access.h" 40 #include "migration/misc.h" 41 #include "standard-headers/linux/ethtool.h" 42 #include "sysemu/sysemu.h" 43 #include "trace.h" 44 #include "monitor/qdev.h" 45 #include "hw/pci/pci.h" 46 #include "net_rx_pkt.h" 47 #include "hw/virtio/vhost.h" 48 #include "sysemu/qtest.h" 49 50 #define VIRTIO_NET_VM_VERSION 11 51 52 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ 53 54 /* previously fixed value */ 55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 57 58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */ 59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 61 62 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ 63 64 #define VIRTIO_NET_TCP_FLAG 0x3F 65 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000 66 67 /* IPv4 max payload, 16 bits in the header */ 68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header)) 69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535 70 71 /* header length value in ip header without option */ 72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5 73 74 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ 75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD 76 77 /* Purge coalesced packets timer interval, This value affects the performance 78 a lot, and should be tuned carefully, '300000'(300us) is the recommended 79 value to pass the WHQL test, '50000' can gain 2x netperf throughput with 80 tso/gso/gro 'off'. */ 81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000 82 83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \ 84 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \ 85 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \ 86 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \ 87 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \ 88 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \ 89 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \ 90 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \ 91 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) 92 93 static const VirtIOFeature feature_sizes[] = { 94 {.flags = 1ULL << VIRTIO_NET_F_MAC, 95 .end = endof(struct virtio_net_config, mac)}, 96 {.flags = 1ULL << VIRTIO_NET_F_STATUS, 97 .end = endof(struct virtio_net_config, status)}, 98 {.flags = 1ULL << VIRTIO_NET_F_MQ, 99 .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, 100 {.flags = 1ULL << VIRTIO_NET_F_MTU, 101 .end = endof(struct virtio_net_config, mtu)}, 102 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, 103 .end = endof(struct virtio_net_config, duplex)}, 104 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT), 105 .end = endof(struct virtio_net_config, supported_hash_types)}, 106 {} 107 }; 108 109 static const VirtIOConfigSizeParams cfg_size_params = { 110 .min_size = endof(struct virtio_net_config, mac), 111 .max_size = sizeof(struct virtio_net_config), 112 .feature_sizes = feature_sizes 113 }; 114 115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc) 116 { 117 VirtIONet *n = qemu_get_nic_opaque(nc); 118 119 return &n->vqs[nc->queue_index]; 120 } 121 122 static int vq2q(int queue_index) 123 { 124 return queue_index / 2; 125 } 126 127 /* TODO 128 * - we could suppress RX interrupt if we were so inclined. 129 */ 130 131 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) 132 { 133 VirtIONet *n = VIRTIO_NET(vdev); 134 struct virtio_net_config netcfg; 135 NetClientState *nc = qemu_get_queue(n->nic); 136 static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } }; 137 138 int ret = 0; 139 memset(&netcfg, 0 , sizeof(struct virtio_net_config)); 140 virtio_stw_p(vdev, &netcfg.status, n->status); 141 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs); 142 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); 143 memcpy(netcfg.mac, n->mac, ETH_ALEN); 144 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); 145 netcfg.duplex = n->net_conf.duplex; 146 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE; 147 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length, 148 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ? 149 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1); 150 virtio_stl_p(vdev, &netcfg.supported_hash_types, 151 VIRTIO_NET_RSS_SUPPORTED_HASHES); 152 memcpy(config, &netcfg, n->config_size); 153 154 /* 155 * Is this VDPA? No peer means not VDPA: there's no way to 156 * disconnect/reconnect a VDPA peer. 157 */ 158 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 159 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg, 160 n->config_size); 161 if (ret != -1) { 162 /* 163 * Some NIC/kernel combinations present 0 as the mac address. As 164 * that is not a legal address, try to proceed with the 165 * address from the QEMU command line in the hope that the 166 * address has been configured correctly elsewhere - just not 167 * reported by the device. 168 */ 169 if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) { 170 info_report("Zero hardware mac address detected. Ignoring."); 171 memcpy(netcfg.mac, n->mac, ETH_ALEN); 172 } 173 memcpy(config, &netcfg, n->config_size); 174 } 175 } 176 } 177 178 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) 179 { 180 VirtIONet *n = VIRTIO_NET(vdev); 181 struct virtio_net_config netcfg = {}; 182 NetClientState *nc = qemu_get_queue(n->nic); 183 184 memcpy(&netcfg, config, n->config_size); 185 186 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) && 187 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) && 188 memcmp(netcfg.mac, n->mac, ETH_ALEN)) { 189 memcpy(n->mac, netcfg.mac, ETH_ALEN); 190 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 191 } 192 193 /* 194 * Is this VDPA? No peer means not VDPA: there's no way to 195 * disconnect/reconnect a VDPA peer. 196 */ 197 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 198 vhost_net_set_config(get_vhost_net(nc->peer), 199 (uint8_t *)&netcfg, 0, n->config_size, 200 VHOST_SET_CONFIG_TYPE_MASTER); 201 } 202 } 203 204 static bool virtio_net_started(VirtIONet *n, uint8_t status) 205 { 206 VirtIODevice *vdev = VIRTIO_DEVICE(n); 207 return (status & VIRTIO_CONFIG_S_DRIVER_OK) && 208 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running; 209 } 210 211 static void virtio_net_announce_notify(VirtIONet *net) 212 { 213 VirtIODevice *vdev = VIRTIO_DEVICE(net); 214 trace_virtio_net_announce_notify(); 215 216 net->status |= VIRTIO_NET_S_ANNOUNCE; 217 virtio_notify_config(vdev); 218 } 219 220 static void virtio_net_announce_timer(void *opaque) 221 { 222 VirtIONet *n = opaque; 223 trace_virtio_net_announce_timer(n->announce_timer.round); 224 225 n->announce_timer.round--; 226 virtio_net_announce_notify(n); 227 } 228 229 static void virtio_net_announce(NetClientState *nc) 230 { 231 VirtIONet *n = qemu_get_nic_opaque(nc); 232 VirtIODevice *vdev = VIRTIO_DEVICE(n); 233 234 /* 235 * Make sure the virtio migration announcement timer isn't running 236 * If it is, let it trigger announcement so that we do not cause 237 * confusion. 238 */ 239 if (n->announce_timer.round) { 240 return; 241 } 242 243 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 244 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 245 virtio_net_announce_notify(n); 246 } 247 } 248 249 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) 250 { 251 VirtIODevice *vdev = VIRTIO_DEVICE(n); 252 NetClientState *nc = qemu_get_queue(n->nic); 253 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 254 int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? 255 n->max_ncs - n->max_queue_pairs : 0; 256 257 if (!get_vhost_net(nc->peer)) { 258 return; 259 } 260 261 if ((virtio_net_started(n, status) && !nc->peer->link_down) == 262 !!n->vhost_started) { 263 return; 264 } 265 if (!n->vhost_started) { 266 int r, i; 267 268 if (n->needs_vnet_hdr_swap) { 269 error_report("backend does not support %s vnet headers; " 270 "falling back on userspace virtio", 271 virtio_is_big_endian(vdev) ? "BE" : "LE"); 272 return; 273 } 274 275 /* Any packets outstanding? Purge them to avoid touching rings 276 * when vhost is running. 277 */ 278 for (i = 0; i < queue_pairs; i++) { 279 NetClientState *qnc = qemu_get_subqueue(n->nic, i); 280 281 /* Purge both directions: TX and RX. */ 282 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc); 283 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer); 284 } 285 286 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) { 287 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu); 288 if (r < 0) { 289 error_report("%uBytes MTU not supported by the backend", 290 n->net_conf.mtu); 291 292 return; 293 } 294 } 295 296 n->vhost_started = 1; 297 r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq); 298 if (r < 0) { 299 error_report("unable to start vhost net: %d: " 300 "falling back on userspace virtio", -r); 301 n->vhost_started = 0; 302 } 303 } else { 304 vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq); 305 n->vhost_started = 0; 306 } 307 } 308 309 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev, 310 NetClientState *peer, 311 bool enable) 312 { 313 if (virtio_is_big_endian(vdev)) { 314 return qemu_set_vnet_be(peer, enable); 315 } else { 316 return qemu_set_vnet_le(peer, enable); 317 } 318 } 319 320 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs, 321 int queue_pairs, bool enable) 322 { 323 int i; 324 325 for (i = 0; i < queue_pairs; i++) { 326 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 && 327 enable) { 328 while (--i >= 0) { 329 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false); 330 } 331 332 return true; 333 } 334 } 335 336 return false; 337 } 338 339 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status) 340 { 341 VirtIODevice *vdev = VIRTIO_DEVICE(n); 342 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 343 344 if (virtio_net_started(n, status)) { 345 /* Before using the device, we tell the network backend about the 346 * endianness to use when parsing vnet headers. If the backend 347 * can't do it, we fallback onto fixing the headers in the core 348 * virtio-net code. 349 */ 350 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs, 351 queue_pairs, true); 352 } else if (virtio_net_started(n, vdev->status)) { 353 /* After using the device, we need to reset the network backend to 354 * the default (guest native endianness), otherwise the guest may 355 * lose network connectivity if it is rebooted into a different 356 * endianness. 357 */ 358 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false); 359 } 360 } 361 362 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq) 363 { 364 unsigned int dropped = virtqueue_drop_all(vq); 365 if (dropped) { 366 virtio_notify(vdev, vq); 367 } 368 } 369 370 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) 371 { 372 VirtIONet *n = VIRTIO_NET(vdev); 373 VirtIONetQueue *q; 374 int i; 375 uint8_t queue_status; 376 377 virtio_net_vnet_endian_status(n, status); 378 virtio_net_vhost_status(n, status); 379 380 for (i = 0; i < n->max_queue_pairs; i++) { 381 NetClientState *ncs = qemu_get_subqueue(n->nic, i); 382 bool queue_started; 383 q = &n->vqs[i]; 384 385 if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) { 386 queue_status = 0; 387 } else { 388 queue_status = status; 389 } 390 queue_started = 391 virtio_net_started(n, queue_status) && !n->vhost_started; 392 393 if (queue_started) { 394 qemu_flush_queued_packets(ncs); 395 } 396 397 if (!q->tx_waiting) { 398 continue; 399 } 400 401 if (queue_started) { 402 if (q->tx_timer) { 403 timer_mod(q->tx_timer, 404 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 405 } else { 406 qemu_bh_schedule(q->tx_bh); 407 } 408 } else { 409 if (q->tx_timer) { 410 timer_del(q->tx_timer); 411 } else { 412 qemu_bh_cancel(q->tx_bh); 413 } 414 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 && 415 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) && 416 vdev->vm_running) { 417 /* if tx is waiting we are likely have some packets in tx queue 418 * and disabled notification */ 419 q->tx_waiting = 0; 420 virtio_queue_set_notification(q->tx_vq, 1); 421 virtio_net_drop_tx_queue_data(vdev, q->tx_vq); 422 } 423 } 424 } 425 } 426 427 static void virtio_net_set_link_status(NetClientState *nc) 428 { 429 VirtIONet *n = qemu_get_nic_opaque(nc); 430 VirtIODevice *vdev = VIRTIO_DEVICE(n); 431 uint16_t old_status = n->status; 432 433 if (nc->link_down) 434 n->status &= ~VIRTIO_NET_S_LINK_UP; 435 else 436 n->status |= VIRTIO_NET_S_LINK_UP; 437 438 if (n->status != old_status) 439 virtio_notify_config(vdev); 440 441 virtio_net_set_status(vdev, vdev->status); 442 } 443 444 static void rxfilter_notify(NetClientState *nc) 445 { 446 VirtIONet *n = qemu_get_nic_opaque(nc); 447 448 if (nc->rxfilter_notify_enabled) { 449 char *path = object_get_canonical_path(OBJECT(n->qdev)); 450 qapi_event_send_nic_rx_filter_changed(!!n->netclient_name, 451 n->netclient_name, path); 452 g_free(path); 453 454 /* disable event notification to avoid events flooding */ 455 nc->rxfilter_notify_enabled = 0; 456 } 457 } 458 459 static intList *get_vlan_table(VirtIONet *n) 460 { 461 intList *list; 462 int i, j; 463 464 list = NULL; 465 for (i = 0; i < MAX_VLAN >> 5; i++) { 466 for (j = 0; n->vlans[i] && j <= 0x1f; j++) { 467 if (n->vlans[i] & (1U << j)) { 468 QAPI_LIST_PREPEND(list, (i << 5) + j); 469 } 470 } 471 } 472 473 return list; 474 } 475 476 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc) 477 { 478 VirtIONet *n = qemu_get_nic_opaque(nc); 479 VirtIODevice *vdev = VIRTIO_DEVICE(n); 480 RxFilterInfo *info; 481 strList *str_list; 482 int i; 483 484 info = g_malloc0(sizeof(*info)); 485 info->name = g_strdup(nc->name); 486 info->promiscuous = n->promisc; 487 488 if (n->nouni) { 489 info->unicast = RX_STATE_NONE; 490 } else if (n->alluni) { 491 info->unicast = RX_STATE_ALL; 492 } else { 493 info->unicast = RX_STATE_NORMAL; 494 } 495 496 if (n->nomulti) { 497 info->multicast = RX_STATE_NONE; 498 } else if (n->allmulti) { 499 info->multicast = RX_STATE_ALL; 500 } else { 501 info->multicast = RX_STATE_NORMAL; 502 } 503 504 info->broadcast_allowed = n->nobcast; 505 info->multicast_overflow = n->mac_table.multi_overflow; 506 info->unicast_overflow = n->mac_table.uni_overflow; 507 508 info->main_mac = qemu_mac_strdup_printf(n->mac); 509 510 str_list = NULL; 511 for (i = 0; i < n->mac_table.first_multi; i++) { 512 QAPI_LIST_PREPEND(str_list, 513 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 514 } 515 info->unicast_table = str_list; 516 517 str_list = NULL; 518 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 519 QAPI_LIST_PREPEND(str_list, 520 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 521 } 522 info->multicast_table = str_list; 523 info->vlan_table = get_vlan_table(n); 524 525 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) { 526 info->vlan = RX_STATE_ALL; 527 } else if (!info->vlan_table) { 528 info->vlan = RX_STATE_NONE; 529 } else { 530 info->vlan = RX_STATE_NORMAL; 531 } 532 533 /* enable event notification after query */ 534 nc->rxfilter_notify_enabled = 1; 535 536 return info; 537 } 538 539 static void virtio_net_reset(VirtIODevice *vdev) 540 { 541 VirtIONet *n = VIRTIO_NET(vdev); 542 int i; 543 544 /* Reset back to compatibility mode */ 545 n->promisc = 1; 546 n->allmulti = 0; 547 n->alluni = 0; 548 n->nomulti = 0; 549 n->nouni = 0; 550 n->nobcast = 0; 551 /* multiqueue is disabled by default */ 552 n->curr_queue_pairs = 1; 553 timer_del(n->announce_timer.tm); 554 n->announce_timer.round = 0; 555 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 556 557 /* Flush any MAC and VLAN filter table state */ 558 n->mac_table.in_use = 0; 559 n->mac_table.first_multi = 0; 560 n->mac_table.multi_overflow = 0; 561 n->mac_table.uni_overflow = 0; 562 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN); 563 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac)); 564 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 565 memset(n->vlans, 0, MAX_VLAN >> 3); 566 567 /* Flush any async TX */ 568 for (i = 0; i < n->max_queue_pairs; i++) { 569 NetClientState *nc = qemu_get_subqueue(n->nic, i); 570 571 if (nc->peer) { 572 qemu_flush_or_purge_queued_packets(nc->peer, true); 573 assert(!virtio_net_get_subqueue(nc)->async_tx.elem); 574 } 575 } 576 } 577 578 static void peer_test_vnet_hdr(VirtIONet *n) 579 { 580 NetClientState *nc = qemu_get_queue(n->nic); 581 if (!nc->peer) { 582 return; 583 } 584 585 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer); 586 } 587 588 static int peer_has_vnet_hdr(VirtIONet *n) 589 { 590 return n->has_vnet_hdr; 591 } 592 593 static int peer_has_ufo(VirtIONet *n) 594 { 595 if (!peer_has_vnet_hdr(n)) 596 return 0; 597 598 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer); 599 600 return n->has_ufo; 601 } 602 603 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, 604 int version_1, int hash_report) 605 { 606 int i; 607 NetClientState *nc; 608 609 n->mergeable_rx_bufs = mergeable_rx_bufs; 610 611 if (version_1) { 612 n->guest_hdr_len = hash_report ? 613 sizeof(struct virtio_net_hdr_v1_hash) : 614 sizeof(struct virtio_net_hdr_mrg_rxbuf); 615 n->rss_data.populate_hash = !!hash_report; 616 } else { 617 n->guest_hdr_len = n->mergeable_rx_bufs ? 618 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 619 sizeof(struct virtio_net_hdr); 620 } 621 622 for (i = 0; i < n->max_queue_pairs; i++) { 623 nc = qemu_get_subqueue(n->nic, i); 624 625 if (peer_has_vnet_hdr(n) && 626 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) { 627 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len); 628 n->host_hdr_len = n->guest_hdr_len; 629 } 630 } 631 } 632 633 static int virtio_net_max_tx_queue_size(VirtIONet *n) 634 { 635 NetClientState *peer = n->nic_conf.peers.ncs[0]; 636 637 /* 638 * Backends other than vhost-user or vhost-vdpa don't support max queue 639 * size. 640 */ 641 if (!peer) { 642 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 643 } 644 645 switch(peer->info->type) { 646 case NET_CLIENT_DRIVER_VHOST_USER: 647 case NET_CLIENT_DRIVER_VHOST_VDPA: 648 return VIRTQUEUE_MAX_SIZE; 649 default: 650 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 651 }; 652 } 653 654 static int peer_attach(VirtIONet *n, int index) 655 { 656 NetClientState *nc = qemu_get_subqueue(n->nic, index); 657 658 if (!nc->peer) { 659 return 0; 660 } 661 662 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 663 vhost_set_vring_enable(nc->peer, 1); 664 } 665 666 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 667 return 0; 668 } 669 670 if (n->max_queue_pairs == 1) { 671 return 0; 672 } 673 674 return tap_enable(nc->peer); 675 } 676 677 static int peer_detach(VirtIONet *n, int index) 678 { 679 NetClientState *nc = qemu_get_subqueue(n->nic, index); 680 681 if (!nc->peer) { 682 return 0; 683 } 684 685 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 686 vhost_set_vring_enable(nc->peer, 0); 687 } 688 689 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 690 return 0; 691 } 692 693 return tap_disable(nc->peer); 694 } 695 696 static void virtio_net_set_queue_pairs(VirtIONet *n) 697 { 698 int i; 699 int r; 700 701 if (n->nic->peer_deleted) { 702 return; 703 } 704 705 for (i = 0; i < n->max_queue_pairs; i++) { 706 if (i < n->curr_queue_pairs) { 707 r = peer_attach(n, i); 708 assert(!r); 709 } else { 710 r = peer_detach(n, i); 711 assert(!r); 712 } 713 } 714 } 715 716 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue); 717 718 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, 719 Error **errp) 720 { 721 VirtIONet *n = VIRTIO_NET(vdev); 722 NetClientState *nc = qemu_get_queue(n->nic); 723 724 /* Firstly sync all virtio-net possible supported features */ 725 features |= n->host_features; 726 727 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 728 729 if (!peer_has_vnet_hdr(n)) { 730 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); 731 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); 732 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); 733 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); 734 735 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); 736 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); 737 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); 738 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); 739 740 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); 741 } 742 743 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { 744 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); 745 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); 746 } 747 748 if (!get_vhost_net(nc->peer)) { 749 return features; 750 } 751 752 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 753 virtio_clear_feature(&features, VIRTIO_NET_F_RSS); 754 } 755 features = vhost_net_get_features(get_vhost_net(nc->peer), features); 756 vdev->backend_features = features; 757 758 if (n->mtu_bypass_backend && 759 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { 760 features |= (1ULL << VIRTIO_NET_F_MTU); 761 } 762 763 return features; 764 } 765 766 static uint64_t virtio_net_bad_features(VirtIODevice *vdev) 767 { 768 uint64_t features = 0; 769 770 /* Linux kernel 2.6.25. It understood MAC (as everyone must), 771 * but also these: */ 772 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 773 virtio_add_feature(&features, VIRTIO_NET_F_CSUM); 774 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4); 775 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6); 776 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN); 777 778 return features; 779 } 780 781 static void virtio_net_apply_guest_offloads(VirtIONet *n) 782 { 783 qemu_set_offload(qemu_get_queue(n->nic)->peer, 784 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)), 785 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)), 786 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)), 787 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)), 788 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO))); 789 } 790 791 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features) 792 { 793 static const uint64_t guest_offloads_mask = 794 (1ULL << VIRTIO_NET_F_GUEST_CSUM) | 795 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | 796 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | 797 (1ULL << VIRTIO_NET_F_GUEST_ECN) | 798 (1ULL << VIRTIO_NET_F_GUEST_UFO); 799 800 return guest_offloads_mask & features; 801 } 802 803 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) 804 { 805 VirtIODevice *vdev = VIRTIO_DEVICE(n); 806 return virtio_net_guest_offloads_by_features(vdev->guest_features); 807 } 808 809 typedef struct { 810 VirtIONet *n; 811 DeviceState *dev; 812 } FailoverDevice; 813 814 /** 815 * Set the failover primary device 816 * 817 * @opaque: FailoverId to setup 818 * @opts: opts for device we are handling 819 * @errp: returns an error if this function fails 820 */ 821 static int failover_set_primary(DeviceState *dev, void *opaque) 822 { 823 FailoverDevice *fdev = opaque; 824 PCIDevice *pci_dev = (PCIDevice *) 825 object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE); 826 827 if (!pci_dev) { 828 return 0; 829 } 830 831 if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) { 832 fdev->dev = dev; 833 return 1; 834 } 835 836 return 0; 837 } 838 839 /** 840 * Find the primary device for this failover virtio-net 841 * 842 * @n: VirtIONet device 843 * @errp: returns an error if this function fails 844 */ 845 static DeviceState *failover_find_primary_device(VirtIONet *n) 846 { 847 FailoverDevice fdev = { 848 .n = n, 849 }; 850 851 qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL, 852 NULL, NULL, &fdev); 853 return fdev.dev; 854 } 855 856 static void failover_add_primary(VirtIONet *n, Error **errp) 857 { 858 Error *err = NULL; 859 DeviceState *dev = failover_find_primary_device(n); 860 861 if (dev) { 862 return; 863 } 864 865 if (!n->primary_opts) { 866 error_setg(errp, "Primary device not found"); 867 error_append_hint(errp, "Virtio-net failover will not work. Make " 868 "sure primary device has parameter" 869 " failover_pair_id=%s\n", n->netclient_name); 870 return; 871 } 872 873 dev = qdev_device_add_from_qdict(n->primary_opts, 874 n->primary_opts_from_json, 875 &err); 876 if (err) { 877 qobject_unref(n->primary_opts); 878 n->primary_opts = NULL; 879 } else { 880 object_unref(OBJECT(dev)); 881 } 882 error_propagate(errp, err); 883 } 884 885 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) 886 { 887 VirtIONet *n = VIRTIO_NET(vdev); 888 Error *err = NULL; 889 int i; 890 891 if (n->mtu_bypass_backend && 892 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) { 893 features &= ~(1ULL << VIRTIO_NET_F_MTU); 894 } 895 896 virtio_net_set_multiqueue(n, 897 virtio_has_feature(features, VIRTIO_NET_F_RSS) || 898 virtio_has_feature(features, VIRTIO_NET_F_MQ)); 899 900 virtio_net_set_mrg_rx_bufs(n, 901 virtio_has_feature(features, 902 VIRTIO_NET_F_MRG_RXBUF), 903 virtio_has_feature(features, 904 VIRTIO_F_VERSION_1), 905 virtio_has_feature(features, 906 VIRTIO_NET_F_HASH_REPORT)); 907 908 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 909 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4); 910 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 911 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6); 912 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS); 913 914 if (n->has_vnet_hdr) { 915 n->curr_guest_offloads = 916 virtio_net_guest_offloads_by_features(features); 917 virtio_net_apply_guest_offloads(n); 918 } 919 920 for (i = 0; i < n->max_queue_pairs; i++) { 921 NetClientState *nc = qemu_get_subqueue(n->nic, i); 922 923 if (!get_vhost_net(nc->peer)) { 924 continue; 925 } 926 vhost_net_ack_features(get_vhost_net(nc->peer), features); 927 } 928 929 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { 930 memset(n->vlans, 0, MAX_VLAN >> 3); 931 } else { 932 memset(n->vlans, 0xff, MAX_VLAN >> 3); 933 } 934 935 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) { 936 qapi_event_send_failover_negotiated(n->netclient_name); 937 qatomic_set(&n->failover_primary_hidden, false); 938 failover_add_primary(n, &err); 939 if (err) { 940 if (!qtest_enabled()) { 941 warn_report_err(err); 942 } else { 943 error_free(err); 944 } 945 } 946 } 947 } 948 949 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd, 950 struct iovec *iov, unsigned int iov_cnt) 951 { 952 uint8_t on; 953 size_t s; 954 NetClientState *nc = qemu_get_queue(n->nic); 955 956 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on)); 957 if (s != sizeof(on)) { 958 return VIRTIO_NET_ERR; 959 } 960 961 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) { 962 n->promisc = on; 963 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) { 964 n->allmulti = on; 965 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) { 966 n->alluni = on; 967 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) { 968 n->nomulti = on; 969 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) { 970 n->nouni = on; 971 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) { 972 n->nobcast = on; 973 } else { 974 return VIRTIO_NET_ERR; 975 } 976 977 rxfilter_notify(nc); 978 979 return VIRTIO_NET_OK; 980 } 981 982 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd, 983 struct iovec *iov, unsigned int iov_cnt) 984 { 985 VirtIODevice *vdev = VIRTIO_DEVICE(n); 986 uint64_t offloads; 987 size_t s; 988 989 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 990 return VIRTIO_NET_ERR; 991 } 992 993 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads)); 994 if (s != sizeof(offloads)) { 995 return VIRTIO_NET_ERR; 996 } 997 998 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) { 999 uint64_t supported_offloads; 1000 1001 offloads = virtio_ldq_p(vdev, &offloads); 1002 1003 if (!n->has_vnet_hdr) { 1004 return VIRTIO_NET_ERR; 1005 } 1006 1007 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1008 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4); 1009 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1010 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6); 1011 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT); 1012 1013 supported_offloads = virtio_net_supported_guest_offloads(n); 1014 if (offloads & ~supported_offloads) { 1015 return VIRTIO_NET_ERR; 1016 } 1017 1018 n->curr_guest_offloads = offloads; 1019 virtio_net_apply_guest_offloads(n); 1020 1021 return VIRTIO_NET_OK; 1022 } else { 1023 return VIRTIO_NET_ERR; 1024 } 1025 } 1026 1027 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd, 1028 struct iovec *iov, unsigned int iov_cnt) 1029 { 1030 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1031 struct virtio_net_ctrl_mac mac_data; 1032 size_t s; 1033 NetClientState *nc = qemu_get_queue(n->nic); 1034 1035 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) { 1036 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) { 1037 return VIRTIO_NET_ERR; 1038 } 1039 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac)); 1040 assert(s == sizeof(n->mac)); 1041 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 1042 rxfilter_notify(nc); 1043 1044 return VIRTIO_NET_OK; 1045 } 1046 1047 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) { 1048 return VIRTIO_NET_ERR; 1049 } 1050 1051 int in_use = 0; 1052 int first_multi = 0; 1053 uint8_t uni_overflow = 0; 1054 uint8_t multi_overflow = 0; 1055 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 1056 1057 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1058 sizeof(mac_data.entries)); 1059 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1060 if (s != sizeof(mac_data.entries)) { 1061 goto error; 1062 } 1063 iov_discard_front(&iov, &iov_cnt, s); 1064 1065 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) { 1066 goto error; 1067 } 1068 1069 if (mac_data.entries <= MAC_TABLE_ENTRIES) { 1070 s = iov_to_buf(iov, iov_cnt, 0, macs, 1071 mac_data.entries * ETH_ALEN); 1072 if (s != mac_data.entries * ETH_ALEN) { 1073 goto error; 1074 } 1075 in_use += mac_data.entries; 1076 } else { 1077 uni_overflow = 1; 1078 } 1079 1080 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN); 1081 1082 first_multi = in_use; 1083 1084 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1085 sizeof(mac_data.entries)); 1086 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1087 if (s != sizeof(mac_data.entries)) { 1088 goto error; 1089 } 1090 1091 iov_discard_front(&iov, &iov_cnt, s); 1092 1093 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) { 1094 goto error; 1095 } 1096 1097 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) { 1098 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN], 1099 mac_data.entries * ETH_ALEN); 1100 if (s != mac_data.entries * ETH_ALEN) { 1101 goto error; 1102 } 1103 in_use += mac_data.entries; 1104 } else { 1105 multi_overflow = 1; 1106 } 1107 1108 n->mac_table.in_use = in_use; 1109 n->mac_table.first_multi = first_multi; 1110 n->mac_table.uni_overflow = uni_overflow; 1111 n->mac_table.multi_overflow = multi_overflow; 1112 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN); 1113 g_free(macs); 1114 rxfilter_notify(nc); 1115 1116 return VIRTIO_NET_OK; 1117 1118 error: 1119 g_free(macs); 1120 return VIRTIO_NET_ERR; 1121 } 1122 1123 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd, 1124 struct iovec *iov, unsigned int iov_cnt) 1125 { 1126 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1127 uint16_t vid; 1128 size_t s; 1129 NetClientState *nc = qemu_get_queue(n->nic); 1130 1131 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid)); 1132 vid = virtio_lduw_p(vdev, &vid); 1133 if (s != sizeof(vid)) { 1134 return VIRTIO_NET_ERR; 1135 } 1136 1137 if (vid >= MAX_VLAN) 1138 return VIRTIO_NET_ERR; 1139 1140 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD) 1141 n->vlans[vid >> 5] |= (1U << (vid & 0x1f)); 1142 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL) 1143 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f)); 1144 else 1145 return VIRTIO_NET_ERR; 1146 1147 rxfilter_notify(nc); 1148 1149 return VIRTIO_NET_OK; 1150 } 1151 1152 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, 1153 struct iovec *iov, unsigned int iov_cnt) 1154 { 1155 trace_virtio_net_handle_announce(n->announce_timer.round); 1156 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK && 1157 n->status & VIRTIO_NET_S_ANNOUNCE) { 1158 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 1159 if (n->announce_timer.round) { 1160 qemu_announce_timer_step(&n->announce_timer); 1161 } 1162 return VIRTIO_NET_OK; 1163 } else { 1164 return VIRTIO_NET_ERR; 1165 } 1166 } 1167 1168 static void virtio_net_detach_epbf_rss(VirtIONet *n); 1169 1170 static void virtio_net_disable_rss(VirtIONet *n) 1171 { 1172 if (n->rss_data.enabled) { 1173 trace_virtio_net_rss_disable(); 1174 } 1175 n->rss_data.enabled = false; 1176 1177 virtio_net_detach_epbf_rss(n); 1178 } 1179 1180 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd) 1181 { 1182 NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0); 1183 if (nc == NULL || nc->info->set_steering_ebpf == NULL) { 1184 return false; 1185 } 1186 1187 return nc->info->set_steering_ebpf(nc, prog_fd); 1188 } 1189 1190 static void rss_data_to_rss_config(struct VirtioNetRssData *data, 1191 struct EBPFRSSConfig *config) 1192 { 1193 config->redirect = data->redirect; 1194 config->populate_hash = data->populate_hash; 1195 config->hash_types = data->hash_types; 1196 config->indirections_len = data->indirections_len; 1197 config->default_queue = data->default_queue; 1198 } 1199 1200 static bool virtio_net_attach_epbf_rss(VirtIONet *n) 1201 { 1202 struct EBPFRSSConfig config = {}; 1203 1204 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 1205 return false; 1206 } 1207 1208 rss_data_to_rss_config(&n->rss_data, &config); 1209 1210 if (!ebpf_rss_set_all(&n->ebpf_rss, &config, 1211 n->rss_data.indirections_table, n->rss_data.key)) { 1212 return false; 1213 } 1214 1215 if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) { 1216 return false; 1217 } 1218 1219 return true; 1220 } 1221 1222 static void virtio_net_detach_epbf_rss(VirtIONet *n) 1223 { 1224 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1225 } 1226 1227 static bool virtio_net_load_ebpf(VirtIONet *n) 1228 { 1229 if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) { 1230 /* backend does't support steering ebpf */ 1231 return false; 1232 } 1233 1234 return ebpf_rss_load(&n->ebpf_rss); 1235 } 1236 1237 static void virtio_net_unload_ebpf(VirtIONet *n) 1238 { 1239 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1240 ebpf_rss_unload(&n->ebpf_rss); 1241 } 1242 1243 static uint16_t virtio_net_handle_rss(VirtIONet *n, 1244 struct iovec *iov, 1245 unsigned int iov_cnt, 1246 bool do_rss) 1247 { 1248 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1249 struct virtio_net_rss_config cfg; 1250 size_t s, offset = 0, size_get; 1251 uint16_t queue_pairs, i; 1252 struct { 1253 uint16_t us; 1254 uint8_t b; 1255 } QEMU_PACKED temp; 1256 const char *err_msg = ""; 1257 uint32_t err_value = 0; 1258 1259 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) { 1260 err_msg = "RSS is not negotiated"; 1261 goto error; 1262 } 1263 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) { 1264 err_msg = "Hash report is not negotiated"; 1265 goto error; 1266 } 1267 size_get = offsetof(struct virtio_net_rss_config, indirection_table); 1268 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get); 1269 if (s != size_get) { 1270 err_msg = "Short command buffer"; 1271 err_value = (uint32_t)s; 1272 goto error; 1273 } 1274 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types); 1275 n->rss_data.indirections_len = 1276 virtio_lduw_p(vdev, &cfg.indirection_table_mask); 1277 n->rss_data.indirections_len++; 1278 if (!do_rss) { 1279 n->rss_data.indirections_len = 1; 1280 } 1281 if (!is_power_of_2(n->rss_data.indirections_len)) { 1282 err_msg = "Invalid size of indirection table"; 1283 err_value = n->rss_data.indirections_len; 1284 goto error; 1285 } 1286 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) { 1287 err_msg = "Too large indirection table"; 1288 err_value = n->rss_data.indirections_len; 1289 goto error; 1290 } 1291 n->rss_data.default_queue = do_rss ? 1292 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0; 1293 if (n->rss_data.default_queue >= n->max_queue_pairs) { 1294 err_msg = "Invalid default queue"; 1295 err_value = n->rss_data.default_queue; 1296 goto error; 1297 } 1298 offset += size_get; 1299 size_get = sizeof(uint16_t) * n->rss_data.indirections_len; 1300 g_free(n->rss_data.indirections_table); 1301 n->rss_data.indirections_table = g_malloc(size_get); 1302 if (!n->rss_data.indirections_table) { 1303 err_msg = "Can't allocate indirections table"; 1304 err_value = n->rss_data.indirections_len; 1305 goto error; 1306 } 1307 s = iov_to_buf(iov, iov_cnt, offset, 1308 n->rss_data.indirections_table, size_get); 1309 if (s != size_get) { 1310 err_msg = "Short indirection table buffer"; 1311 err_value = (uint32_t)s; 1312 goto error; 1313 } 1314 for (i = 0; i < n->rss_data.indirections_len; ++i) { 1315 uint16_t val = n->rss_data.indirections_table[i]; 1316 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val); 1317 } 1318 offset += size_get; 1319 size_get = sizeof(temp); 1320 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get); 1321 if (s != size_get) { 1322 err_msg = "Can't get queue_pairs"; 1323 err_value = (uint32_t)s; 1324 goto error; 1325 } 1326 queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs; 1327 if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) { 1328 err_msg = "Invalid number of queue_pairs"; 1329 err_value = queue_pairs; 1330 goto error; 1331 } 1332 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) { 1333 err_msg = "Invalid key size"; 1334 err_value = temp.b; 1335 goto error; 1336 } 1337 if (!temp.b && n->rss_data.hash_types) { 1338 err_msg = "No key provided"; 1339 err_value = 0; 1340 goto error; 1341 } 1342 if (!temp.b && !n->rss_data.hash_types) { 1343 virtio_net_disable_rss(n); 1344 return queue_pairs; 1345 } 1346 offset += size_get; 1347 size_get = temp.b; 1348 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get); 1349 if (s != size_get) { 1350 err_msg = "Can get key buffer"; 1351 err_value = (uint32_t)s; 1352 goto error; 1353 } 1354 n->rss_data.enabled = true; 1355 1356 if (!n->rss_data.populate_hash) { 1357 if (!virtio_net_attach_epbf_rss(n)) { 1358 /* EBPF must be loaded for vhost */ 1359 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 1360 warn_report("Can't load eBPF RSS for vhost"); 1361 goto error; 1362 } 1363 /* fallback to software RSS */ 1364 warn_report("Can't load eBPF RSS - fallback to software RSS"); 1365 n->rss_data.enabled_software_rss = true; 1366 } 1367 } else { 1368 /* use software RSS for hash populating */ 1369 /* and detach eBPF if was loaded before */ 1370 virtio_net_detach_epbf_rss(n); 1371 n->rss_data.enabled_software_rss = true; 1372 } 1373 1374 trace_virtio_net_rss_enable(n->rss_data.hash_types, 1375 n->rss_data.indirections_len, 1376 temp.b); 1377 return queue_pairs; 1378 error: 1379 trace_virtio_net_rss_error(err_msg, err_value); 1380 virtio_net_disable_rss(n); 1381 return 0; 1382 } 1383 1384 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, 1385 struct iovec *iov, unsigned int iov_cnt) 1386 { 1387 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1388 uint16_t queue_pairs; 1389 NetClientState *nc = qemu_get_queue(n->nic); 1390 1391 virtio_net_disable_rss(n); 1392 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { 1393 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false); 1394 return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR; 1395 } 1396 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) { 1397 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true); 1398 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { 1399 struct virtio_net_ctrl_mq mq; 1400 size_t s; 1401 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) { 1402 return VIRTIO_NET_ERR; 1403 } 1404 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq)); 1405 if (s != sizeof(mq)) { 1406 return VIRTIO_NET_ERR; 1407 } 1408 queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs); 1409 1410 } else { 1411 return VIRTIO_NET_ERR; 1412 } 1413 1414 if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 1415 queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 1416 queue_pairs > n->max_queue_pairs || 1417 !n->multiqueue) { 1418 return VIRTIO_NET_ERR; 1419 } 1420 1421 n->curr_queue_pairs = queue_pairs; 1422 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 1423 /* 1424 * Avoid updating the backend for a vdpa device: We're only interested 1425 * in updating the device model queues. 1426 */ 1427 return VIRTIO_NET_OK; 1428 } 1429 /* stop the backend before changing the number of queue_pairs to avoid handling a 1430 * disabled queue */ 1431 virtio_net_set_status(vdev, vdev->status); 1432 virtio_net_set_queue_pairs(n); 1433 1434 return VIRTIO_NET_OK; 1435 } 1436 1437 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, 1438 const struct iovec *in_sg, unsigned in_num, 1439 const struct iovec *out_sg, 1440 unsigned out_num) 1441 { 1442 VirtIONet *n = VIRTIO_NET(vdev); 1443 struct virtio_net_ctrl_hdr ctrl; 1444 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1445 size_t s; 1446 struct iovec *iov, *iov2; 1447 1448 if (iov_size(in_sg, in_num) < sizeof(status) || 1449 iov_size(out_sg, out_num) < sizeof(ctrl)) { 1450 virtio_error(vdev, "virtio-net ctrl missing headers"); 1451 return 0; 1452 } 1453 1454 iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num); 1455 s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl)); 1456 iov_discard_front(&iov, &out_num, sizeof(ctrl)); 1457 if (s != sizeof(ctrl)) { 1458 status = VIRTIO_NET_ERR; 1459 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { 1460 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num); 1461 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { 1462 status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num); 1463 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { 1464 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num); 1465 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { 1466 status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num); 1467 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { 1468 status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num); 1469 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { 1470 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num); 1471 } 1472 1473 s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status)); 1474 assert(s == sizeof(status)); 1475 1476 g_free(iov2); 1477 return sizeof(status); 1478 } 1479 1480 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) 1481 { 1482 VirtQueueElement *elem; 1483 1484 for (;;) { 1485 size_t written; 1486 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 1487 if (!elem) { 1488 break; 1489 } 1490 1491 written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num, 1492 elem->out_sg, elem->out_num); 1493 if (written > 0) { 1494 virtqueue_push(vq, elem, written); 1495 virtio_notify(vdev, vq); 1496 g_free(elem); 1497 } else { 1498 virtqueue_detach_element(vq, elem, 0); 1499 g_free(elem); 1500 break; 1501 } 1502 } 1503 } 1504 1505 /* RX */ 1506 1507 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) 1508 { 1509 VirtIONet *n = VIRTIO_NET(vdev); 1510 int queue_index = vq2q(virtio_get_queue_index(vq)); 1511 1512 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index)); 1513 } 1514 1515 static bool virtio_net_can_receive(NetClientState *nc) 1516 { 1517 VirtIONet *n = qemu_get_nic_opaque(nc); 1518 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1519 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1520 1521 if (!vdev->vm_running) { 1522 return false; 1523 } 1524 1525 if (nc->queue_index >= n->curr_queue_pairs) { 1526 return false; 1527 } 1528 1529 if (!virtio_queue_ready(q->rx_vq) || 1530 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 1531 return false; 1532 } 1533 1534 return true; 1535 } 1536 1537 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) 1538 { 1539 VirtIONet *n = q->n; 1540 if (virtio_queue_empty(q->rx_vq) || 1541 (n->mergeable_rx_bufs && 1542 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1543 virtio_queue_set_notification(q->rx_vq, 1); 1544 1545 /* To avoid a race condition where the guest has made some buffers 1546 * available after the above check but before notification was 1547 * enabled, check for available buffers again. 1548 */ 1549 if (virtio_queue_empty(q->rx_vq) || 1550 (n->mergeable_rx_bufs && 1551 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1552 return 0; 1553 } 1554 } 1555 1556 virtio_queue_set_notification(q->rx_vq, 0); 1557 return 1; 1558 } 1559 1560 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr) 1561 { 1562 virtio_tswap16s(vdev, &hdr->hdr_len); 1563 virtio_tswap16s(vdev, &hdr->gso_size); 1564 virtio_tswap16s(vdev, &hdr->csum_start); 1565 virtio_tswap16s(vdev, &hdr->csum_offset); 1566 } 1567 1568 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so 1569 * it never finds out that the packets don't have valid checksums. This 1570 * causes dhclient to get upset. Fedora's carried a patch for ages to 1571 * fix this with Xen but it hasn't appeared in an upstream release of 1572 * dhclient yet. 1573 * 1574 * To avoid breaking existing guests, we catch udp packets and add 1575 * checksums. This is terrible but it's better than hacking the guest 1576 * kernels. 1577 * 1578 * N.B. if we introduce a zero-copy API, this operation is no longer free so 1579 * we should provide a mechanism to disable it to avoid polluting the host 1580 * cache. 1581 */ 1582 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr, 1583 uint8_t *buf, size_t size) 1584 { 1585 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */ 1586 (size > 27 && size < 1500) && /* normal sized MTU */ 1587 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */ 1588 (buf[23] == 17) && /* ip.protocol == UDP */ 1589 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */ 1590 net_checksum_calculate(buf, size, CSUM_UDP); 1591 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; 1592 } 1593 } 1594 1595 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt, 1596 const void *buf, size_t size) 1597 { 1598 if (n->has_vnet_hdr) { 1599 /* FIXME this cast is evil */ 1600 void *wbuf = (void *)buf; 1601 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len, 1602 size - n->host_hdr_len); 1603 1604 if (n->needs_vnet_hdr_swap) { 1605 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf); 1606 } 1607 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr)); 1608 } else { 1609 struct virtio_net_hdr hdr = { 1610 .flags = 0, 1611 .gso_type = VIRTIO_NET_HDR_GSO_NONE 1612 }; 1613 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr); 1614 } 1615 } 1616 1617 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) 1618 { 1619 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; 1620 static const uint8_t vlan[] = {0x81, 0x00}; 1621 uint8_t *ptr = (uint8_t *)buf; 1622 int i; 1623 1624 if (n->promisc) 1625 return 1; 1626 1627 ptr += n->host_hdr_len; 1628 1629 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) { 1630 int vid = lduw_be_p(ptr + 14) & 0xfff; 1631 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f)))) 1632 return 0; 1633 } 1634 1635 if (ptr[0] & 1) { // multicast 1636 if (!memcmp(ptr, bcast, sizeof(bcast))) { 1637 return !n->nobcast; 1638 } else if (n->nomulti) { 1639 return 0; 1640 } else if (n->allmulti || n->mac_table.multi_overflow) { 1641 return 1; 1642 } 1643 1644 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 1645 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1646 return 1; 1647 } 1648 } 1649 } else { // unicast 1650 if (n->nouni) { 1651 return 0; 1652 } else if (n->alluni || n->mac_table.uni_overflow) { 1653 return 1; 1654 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) { 1655 return 1; 1656 } 1657 1658 for (i = 0; i < n->mac_table.first_multi; i++) { 1659 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1660 return 1; 1661 } 1662 } 1663 } 1664 1665 return 0; 1666 } 1667 1668 static uint8_t virtio_net_get_hash_type(bool isip4, 1669 bool isip6, 1670 bool isudp, 1671 bool istcp, 1672 uint32_t types) 1673 { 1674 if (isip4) { 1675 if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) { 1676 return NetPktRssIpV4Tcp; 1677 } 1678 if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) { 1679 return NetPktRssIpV4Udp; 1680 } 1681 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 1682 return NetPktRssIpV4; 1683 } 1684 } else if (isip6) { 1685 uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | 1686 VIRTIO_NET_RSS_HASH_TYPE_TCPv6; 1687 1688 if (istcp && (types & mask)) { 1689 return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ? 1690 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp; 1691 } 1692 mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6; 1693 if (isudp && (types & mask)) { 1694 return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ? 1695 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp; 1696 } 1697 mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6; 1698 if (types & mask) { 1699 return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ? 1700 NetPktRssIpV6Ex : NetPktRssIpV6; 1701 } 1702 } 1703 return 0xff; 1704 } 1705 1706 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report, 1707 uint32_t hash) 1708 { 1709 struct virtio_net_hdr_v1_hash *hdr = (void *)buf; 1710 hdr->hash_value = hash; 1711 hdr->hash_report = report; 1712 } 1713 1714 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf, 1715 size_t size) 1716 { 1717 VirtIONet *n = qemu_get_nic_opaque(nc); 1718 unsigned int index = nc->queue_index, new_index = index; 1719 struct NetRxPkt *pkt = n->rx_pkt; 1720 uint8_t net_hash_type; 1721 uint32_t hash; 1722 bool isip4, isip6, isudp, istcp; 1723 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = { 1724 VIRTIO_NET_HASH_REPORT_IPv4, 1725 VIRTIO_NET_HASH_REPORT_TCPv4, 1726 VIRTIO_NET_HASH_REPORT_TCPv6, 1727 VIRTIO_NET_HASH_REPORT_IPv6, 1728 VIRTIO_NET_HASH_REPORT_IPv6_EX, 1729 VIRTIO_NET_HASH_REPORT_TCPv6_EX, 1730 VIRTIO_NET_HASH_REPORT_UDPv4, 1731 VIRTIO_NET_HASH_REPORT_UDPv6, 1732 VIRTIO_NET_HASH_REPORT_UDPv6_EX 1733 }; 1734 1735 net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len, 1736 size - n->host_hdr_len); 1737 net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); 1738 if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) { 1739 istcp = isudp = false; 1740 } 1741 if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) { 1742 istcp = isudp = false; 1743 } 1744 net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp, 1745 n->rss_data.hash_types); 1746 if (net_hash_type > NetPktRssIpV6UdpEx) { 1747 if (n->rss_data.populate_hash) { 1748 virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0); 1749 } 1750 return n->rss_data.redirect ? n->rss_data.default_queue : -1; 1751 } 1752 1753 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key); 1754 1755 if (n->rss_data.populate_hash) { 1756 virtio_set_packet_hash(buf, reports[net_hash_type], hash); 1757 } 1758 1759 if (n->rss_data.redirect) { 1760 new_index = hash & (n->rss_data.indirections_len - 1); 1761 new_index = n->rss_data.indirections_table[new_index]; 1762 } 1763 1764 return (index == new_index) ? -1 : new_index; 1765 } 1766 1767 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, 1768 size_t size, bool no_rss) 1769 { 1770 VirtIONet *n = qemu_get_nic_opaque(nc); 1771 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1772 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1773 VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE]; 1774 size_t lens[VIRTQUEUE_MAX_SIZE]; 1775 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; 1776 struct virtio_net_hdr_mrg_rxbuf mhdr; 1777 unsigned mhdr_cnt = 0; 1778 size_t offset, i, guest_offset, j; 1779 ssize_t err; 1780 1781 if (!virtio_net_can_receive(nc)) { 1782 return -1; 1783 } 1784 1785 if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) { 1786 int index = virtio_net_process_rss(nc, buf, size); 1787 if (index >= 0) { 1788 NetClientState *nc2 = qemu_get_subqueue(n->nic, index); 1789 return virtio_net_receive_rcu(nc2, buf, size, true); 1790 } 1791 } 1792 1793 /* hdr_len refers to the header we supply to the guest */ 1794 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) { 1795 return 0; 1796 } 1797 1798 if (!receive_filter(n, buf, size)) 1799 return size; 1800 1801 offset = i = 0; 1802 1803 while (offset < size) { 1804 VirtQueueElement *elem; 1805 int len, total; 1806 const struct iovec *sg; 1807 1808 total = 0; 1809 1810 if (i == VIRTQUEUE_MAX_SIZE) { 1811 virtio_error(vdev, "virtio-net unexpected long buffer chain"); 1812 err = size; 1813 goto err; 1814 } 1815 1816 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement)); 1817 if (!elem) { 1818 if (i) { 1819 virtio_error(vdev, "virtio-net unexpected empty queue: " 1820 "i %zd mergeable %d offset %zd, size %zd, " 1821 "guest hdr len %zd, host hdr len %zd " 1822 "guest features 0x%" PRIx64, 1823 i, n->mergeable_rx_bufs, offset, size, 1824 n->guest_hdr_len, n->host_hdr_len, 1825 vdev->guest_features); 1826 } 1827 err = -1; 1828 goto err; 1829 } 1830 1831 if (elem->in_num < 1) { 1832 virtio_error(vdev, 1833 "virtio-net receive queue contains no in buffers"); 1834 virtqueue_detach_element(q->rx_vq, elem, 0); 1835 g_free(elem); 1836 err = -1; 1837 goto err; 1838 } 1839 1840 sg = elem->in_sg; 1841 if (i == 0) { 1842 assert(offset == 0); 1843 if (n->mergeable_rx_bufs) { 1844 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg), 1845 sg, elem->in_num, 1846 offsetof(typeof(mhdr), num_buffers), 1847 sizeof(mhdr.num_buffers)); 1848 } 1849 1850 receive_header(n, sg, elem->in_num, buf, size); 1851 if (n->rss_data.populate_hash) { 1852 offset = sizeof(mhdr); 1853 iov_from_buf(sg, elem->in_num, offset, 1854 buf + offset, n->host_hdr_len - sizeof(mhdr)); 1855 } 1856 offset = n->host_hdr_len; 1857 total += n->guest_hdr_len; 1858 guest_offset = n->guest_hdr_len; 1859 } else { 1860 guest_offset = 0; 1861 } 1862 1863 /* copy in packet. ugh */ 1864 len = iov_from_buf(sg, elem->in_num, guest_offset, 1865 buf + offset, size - offset); 1866 total += len; 1867 offset += len; 1868 /* If buffers can't be merged, at this point we 1869 * must have consumed the complete packet. 1870 * Otherwise, drop it. */ 1871 if (!n->mergeable_rx_bufs && offset < size) { 1872 virtqueue_unpop(q->rx_vq, elem, total); 1873 g_free(elem); 1874 err = size; 1875 goto err; 1876 } 1877 1878 elems[i] = elem; 1879 lens[i] = total; 1880 i++; 1881 } 1882 1883 if (mhdr_cnt) { 1884 virtio_stw_p(vdev, &mhdr.num_buffers, i); 1885 iov_from_buf(mhdr_sg, mhdr_cnt, 1886 0, 1887 &mhdr.num_buffers, sizeof mhdr.num_buffers); 1888 } 1889 1890 for (j = 0; j < i; j++) { 1891 /* signal other side */ 1892 virtqueue_fill(q->rx_vq, elems[j], lens[j], j); 1893 g_free(elems[j]); 1894 } 1895 1896 virtqueue_flush(q->rx_vq, i); 1897 virtio_notify(vdev, q->rx_vq); 1898 1899 return size; 1900 1901 err: 1902 for (j = 0; j < i; j++) { 1903 virtqueue_detach_element(q->rx_vq, elems[j], lens[j]); 1904 g_free(elems[j]); 1905 } 1906 1907 return err; 1908 } 1909 1910 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf, 1911 size_t size) 1912 { 1913 RCU_READ_LOCK_GUARD(); 1914 1915 return virtio_net_receive_rcu(nc, buf, size, false); 1916 } 1917 1918 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain, 1919 const uint8_t *buf, 1920 VirtioNetRscUnit *unit) 1921 { 1922 uint16_t ip_hdrlen; 1923 struct ip_header *ip; 1924 1925 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len 1926 + sizeof(struct eth_header)); 1927 unit->ip = (void *)ip; 1928 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2; 1929 unit->ip_plen = &ip->ip_len; 1930 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen); 1931 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 1932 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen; 1933 } 1934 1935 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain, 1936 const uint8_t *buf, 1937 VirtioNetRscUnit *unit) 1938 { 1939 struct ip6_header *ip6; 1940 1941 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len 1942 + sizeof(struct eth_header)); 1943 unit->ip = ip6; 1944 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 1945 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) 1946 + sizeof(struct ip6_header)); 1947 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 1948 1949 /* There is a difference between payload lenght in ipv4 and v6, 1950 ip header is excluded in ipv6 */ 1951 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen; 1952 } 1953 1954 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain, 1955 VirtioNetRscSeg *seg) 1956 { 1957 int ret; 1958 struct virtio_net_hdr_v1 *h; 1959 1960 h = (struct virtio_net_hdr_v1 *)seg->buf; 1961 h->flags = 0; 1962 h->gso_type = VIRTIO_NET_HDR_GSO_NONE; 1963 1964 if (seg->is_coalesced) { 1965 h->rsc.segments = seg->packets; 1966 h->rsc.dup_acks = seg->dup_ack; 1967 h->flags = VIRTIO_NET_HDR_F_RSC_INFO; 1968 if (chain->proto == ETH_P_IP) { 1969 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 1970 } else { 1971 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 1972 } 1973 } 1974 1975 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); 1976 QTAILQ_REMOVE(&chain->buffers, seg, next); 1977 g_free(seg->buf); 1978 g_free(seg); 1979 1980 return ret; 1981 } 1982 1983 static void virtio_net_rsc_purge(void *opq) 1984 { 1985 VirtioNetRscSeg *seg, *rn; 1986 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq; 1987 1988 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) { 1989 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 1990 chain->stat.purge_failed++; 1991 continue; 1992 } 1993 } 1994 1995 chain->stat.timer++; 1996 if (!QTAILQ_EMPTY(&chain->buffers)) { 1997 timer_mod(chain->drain_timer, 1998 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 1999 } 2000 } 2001 2002 static void virtio_net_rsc_cleanup(VirtIONet *n) 2003 { 2004 VirtioNetRscChain *chain, *rn_chain; 2005 VirtioNetRscSeg *seg, *rn_seg; 2006 2007 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) { 2008 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) { 2009 QTAILQ_REMOVE(&chain->buffers, seg, next); 2010 g_free(seg->buf); 2011 g_free(seg); 2012 } 2013 2014 timer_free(chain->drain_timer); 2015 QTAILQ_REMOVE(&n->rsc_chains, chain, next); 2016 g_free(chain); 2017 } 2018 } 2019 2020 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain, 2021 NetClientState *nc, 2022 const uint8_t *buf, size_t size) 2023 { 2024 uint16_t hdr_len; 2025 VirtioNetRscSeg *seg; 2026 2027 hdr_len = chain->n->guest_hdr_len; 2028 seg = g_new(VirtioNetRscSeg, 1); 2029 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header) 2030 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD); 2031 memcpy(seg->buf, buf, size); 2032 seg->size = size; 2033 seg->packets = 1; 2034 seg->dup_ack = 0; 2035 seg->is_coalesced = 0; 2036 seg->nc = nc; 2037 2038 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); 2039 chain->stat.cache++; 2040 2041 switch (chain->proto) { 2042 case ETH_P_IP: 2043 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); 2044 break; 2045 case ETH_P_IPV6: 2046 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); 2047 break; 2048 default: 2049 g_assert_not_reached(); 2050 } 2051 } 2052 2053 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain, 2054 VirtioNetRscSeg *seg, 2055 const uint8_t *buf, 2056 struct tcp_header *n_tcp, 2057 struct tcp_header *o_tcp) 2058 { 2059 uint32_t nack, oack; 2060 uint16_t nwin, owin; 2061 2062 nack = htonl(n_tcp->th_ack); 2063 nwin = htons(n_tcp->th_win); 2064 oack = htonl(o_tcp->th_ack); 2065 owin = htons(o_tcp->th_win); 2066 2067 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) { 2068 chain->stat.ack_out_of_win++; 2069 return RSC_FINAL; 2070 } else if (nack == oack) { 2071 /* duplicated ack or window probe */ 2072 if (nwin == owin) { 2073 /* duplicated ack, add dup ack count due to whql test up to 1 */ 2074 chain->stat.dup_ack++; 2075 return RSC_FINAL; 2076 } else { 2077 /* Coalesce window update */ 2078 o_tcp->th_win = n_tcp->th_win; 2079 chain->stat.win_update++; 2080 return RSC_COALESCE; 2081 } 2082 } else { 2083 /* pure ack, go to 'C', finalize*/ 2084 chain->stat.pure_ack++; 2085 return RSC_FINAL; 2086 } 2087 } 2088 2089 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain, 2090 VirtioNetRscSeg *seg, 2091 const uint8_t *buf, 2092 VirtioNetRscUnit *n_unit) 2093 { 2094 void *data; 2095 uint16_t o_ip_len; 2096 uint32_t nseq, oseq; 2097 VirtioNetRscUnit *o_unit; 2098 2099 o_unit = &seg->unit; 2100 o_ip_len = htons(*o_unit->ip_plen); 2101 nseq = htonl(n_unit->tcp->th_seq); 2102 oseq = htonl(o_unit->tcp->th_seq); 2103 2104 /* out of order or retransmitted. */ 2105 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) { 2106 chain->stat.data_out_of_win++; 2107 return RSC_FINAL; 2108 } 2109 2110 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen; 2111 if (nseq == oseq) { 2112 if ((o_unit->payload == 0) && n_unit->payload) { 2113 /* From no payload to payload, normal case, not a dup ack or etc */ 2114 chain->stat.data_after_pure_ack++; 2115 goto coalesce; 2116 } else { 2117 return virtio_net_rsc_handle_ack(chain, seg, buf, 2118 n_unit->tcp, o_unit->tcp); 2119 } 2120 } else if ((nseq - oseq) != o_unit->payload) { 2121 /* Not a consistent packet, out of order */ 2122 chain->stat.data_out_of_order++; 2123 return RSC_FINAL; 2124 } else { 2125 coalesce: 2126 if ((o_ip_len + n_unit->payload) > chain->max_payload) { 2127 chain->stat.over_size++; 2128 return RSC_FINAL; 2129 } 2130 2131 /* Here comes the right data, the payload length in v4/v6 is different, 2132 so use the field value to update and record the new data len */ 2133 o_unit->payload += n_unit->payload; /* update new data len */ 2134 2135 /* update field in ip header */ 2136 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload); 2137 2138 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced 2139 for windows guest, while this may change the behavior for linux 2140 guest (only if it uses RSC feature). */ 2141 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags; 2142 2143 o_unit->tcp->th_ack = n_unit->tcp->th_ack; 2144 o_unit->tcp->th_win = n_unit->tcp->th_win; 2145 2146 memmove(seg->buf + seg->size, data, n_unit->payload); 2147 seg->size += n_unit->payload; 2148 seg->packets++; 2149 chain->stat.coalesced++; 2150 return RSC_COALESCE; 2151 } 2152 } 2153 2154 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain, 2155 VirtioNetRscSeg *seg, 2156 const uint8_t *buf, size_t size, 2157 VirtioNetRscUnit *unit) 2158 { 2159 struct ip_header *ip1, *ip2; 2160 2161 ip1 = (struct ip_header *)(unit->ip); 2162 ip2 = (struct ip_header *)(seg->unit.ip); 2163 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst) 2164 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2165 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2166 chain->stat.no_match++; 2167 return RSC_NO_MATCH; 2168 } 2169 2170 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2171 } 2172 2173 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain, 2174 VirtioNetRscSeg *seg, 2175 const uint8_t *buf, size_t size, 2176 VirtioNetRscUnit *unit) 2177 { 2178 struct ip6_header *ip1, *ip2; 2179 2180 ip1 = (struct ip6_header *)(unit->ip); 2181 ip2 = (struct ip6_header *)(seg->unit.ip); 2182 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address)) 2183 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address)) 2184 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2185 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2186 chain->stat.no_match++; 2187 return RSC_NO_MATCH; 2188 } 2189 2190 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2191 } 2192 2193 /* Packets with 'SYN' should bypass, other flag should be sent after drain 2194 * to prevent out of order */ 2195 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain, 2196 struct tcp_header *tcp) 2197 { 2198 uint16_t tcp_hdr; 2199 uint16_t tcp_flag; 2200 2201 tcp_flag = htons(tcp->th_offset_flags); 2202 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10; 2203 tcp_flag &= VIRTIO_NET_TCP_FLAG; 2204 if (tcp_flag & TH_SYN) { 2205 chain->stat.tcp_syn++; 2206 return RSC_BYPASS; 2207 } 2208 2209 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) { 2210 chain->stat.tcp_ctrl_drain++; 2211 return RSC_FINAL; 2212 } 2213 2214 if (tcp_hdr > sizeof(struct tcp_header)) { 2215 chain->stat.tcp_all_opt++; 2216 return RSC_FINAL; 2217 } 2218 2219 return RSC_CANDIDATE; 2220 } 2221 2222 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain, 2223 NetClientState *nc, 2224 const uint8_t *buf, size_t size, 2225 VirtioNetRscUnit *unit) 2226 { 2227 int ret; 2228 VirtioNetRscSeg *seg, *nseg; 2229 2230 if (QTAILQ_EMPTY(&chain->buffers)) { 2231 chain->stat.empty_cache++; 2232 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2233 timer_mod(chain->drain_timer, 2234 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 2235 return size; 2236 } 2237 2238 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2239 if (chain->proto == ETH_P_IP) { 2240 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit); 2241 } else { 2242 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit); 2243 } 2244 2245 if (ret == RSC_FINAL) { 2246 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2247 /* Send failed */ 2248 chain->stat.final_failed++; 2249 return 0; 2250 } 2251 2252 /* Send current packet */ 2253 return virtio_net_do_receive(nc, buf, size); 2254 } else if (ret == RSC_NO_MATCH) { 2255 continue; 2256 } else { 2257 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ 2258 seg->is_coalesced = 1; 2259 return size; 2260 } 2261 } 2262 2263 chain->stat.no_match_cache++; 2264 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2265 return size; 2266 } 2267 2268 /* Drain a connection data, this is to avoid out of order segments */ 2269 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain, 2270 NetClientState *nc, 2271 const uint8_t *buf, size_t size, 2272 uint16_t ip_start, uint16_t ip_size, 2273 uint16_t tcp_port) 2274 { 2275 VirtioNetRscSeg *seg, *nseg; 2276 uint32_t ppair1, ppair2; 2277 2278 ppair1 = *(uint32_t *)(buf + tcp_port); 2279 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2280 ppair2 = *(uint32_t *)(seg->buf + tcp_port); 2281 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size) 2282 || (ppair1 != ppair2)) { 2283 continue; 2284 } 2285 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2286 chain->stat.drain_failed++; 2287 } 2288 2289 break; 2290 } 2291 2292 return virtio_net_do_receive(nc, buf, size); 2293 } 2294 2295 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain, 2296 struct ip_header *ip, 2297 const uint8_t *buf, size_t size) 2298 { 2299 uint16_t ip_len; 2300 2301 /* Not an ipv4 packet */ 2302 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) { 2303 chain->stat.ip_option++; 2304 return RSC_BYPASS; 2305 } 2306 2307 /* Don't handle packets with ip option */ 2308 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) { 2309 chain->stat.ip_option++; 2310 return RSC_BYPASS; 2311 } 2312 2313 if (ip->ip_p != IPPROTO_TCP) { 2314 chain->stat.bypass_not_tcp++; 2315 return RSC_BYPASS; 2316 } 2317 2318 /* Don't handle packets with ip fragment */ 2319 if (!(htons(ip->ip_off) & IP_DF)) { 2320 chain->stat.ip_frag++; 2321 return RSC_BYPASS; 2322 } 2323 2324 /* Don't handle packets with ecn flag */ 2325 if (IPTOS_ECN(ip->ip_tos)) { 2326 chain->stat.ip_ecn++; 2327 return RSC_BYPASS; 2328 } 2329 2330 ip_len = htons(ip->ip_len); 2331 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header)) 2332 || ip_len > (size - chain->n->guest_hdr_len - 2333 sizeof(struct eth_header))) { 2334 chain->stat.ip_hacked++; 2335 return RSC_BYPASS; 2336 } 2337 2338 return RSC_CANDIDATE; 2339 } 2340 2341 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain, 2342 NetClientState *nc, 2343 const uint8_t *buf, size_t size) 2344 { 2345 int32_t ret; 2346 uint16_t hdr_len; 2347 VirtioNetRscUnit unit; 2348 2349 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2350 2351 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header) 2352 + sizeof(struct tcp_header))) { 2353 chain->stat.bypass_not_tcp++; 2354 return virtio_net_do_receive(nc, buf, size); 2355 } 2356 2357 virtio_net_rsc_extract_unit4(chain, buf, &unit); 2358 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size) 2359 != RSC_CANDIDATE) { 2360 return virtio_net_do_receive(nc, buf, size); 2361 } 2362 2363 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2364 if (ret == RSC_BYPASS) { 2365 return virtio_net_do_receive(nc, buf, size); 2366 } else if (ret == RSC_FINAL) { 2367 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2368 ((hdr_len + sizeof(struct eth_header)) + 12), 2369 VIRTIO_NET_IP4_ADDR_SIZE, 2370 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)); 2371 } 2372 2373 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2374 } 2375 2376 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain, 2377 struct ip6_header *ip6, 2378 const uint8_t *buf, size_t size) 2379 { 2380 uint16_t ip_len; 2381 2382 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4) 2383 != IP_HEADER_VERSION_6) { 2384 return RSC_BYPASS; 2385 } 2386 2387 /* Both option and protocol is checked in this */ 2388 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) { 2389 chain->stat.bypass_not_tcp++; 2390 return RSC_BYPASS; 2391 } 2392 2393 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 2394 if (ip_len < sizeof(struct tcp_header) || 2395 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header) 2396 - sizeof(struct ip6_header))) { 2397 chain->stat.ip_hacked++; 2398 return RSC_BYPASS; 2399 } 2400 2401 /* Don't handle packets with ecn flag */ 2402 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) { 2403 chain->stat.ip_ecn++; 2404 return RSC_BYPASS; 2405 } 2406 2407 return RSC_CANDIDATE; 2408 } 2409 2410 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc, 2411 const uint8_t *buf, size_t size) 2412 { 2413 int32_t ret; 2414 uint16_t hdr_len; 2415 VirtioNetRscChain *chain; 2416 VirtioNetRscUnit unit; 2417 2418 chain = (VirtioNetRscChain *)opq; 2419 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2420 2421 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header) 2422 + sizeof(tcp_header))) { 2423 return virtio_net_do_receive(nc, buf, size); 2424 } 2425 2426 virtio_net_rsc_extract_unit6(chain, buf, &unit); 2427 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain, 2428 unit.ip, buf, size)) { 2429 return virtio_net_do_receive(nc, buf, size); 2430 } 2431 2432 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2433 if (ret == RSC_BYPASS) { 2434 return virtio_net_do_receive(nc, buf, size); 2435 } else if (ret == RSC_FINAL) { 2436 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2437 ((hdr_len + sizeof(struct eth_header)) + 8), 2438 VIRTIO_NET_IP6_ADDR_SIZE, 2439 hdr_len + sizeof(struct eth_header) 2440 + sizeof(struct ip6_header)); 2441 } 2442 2443 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2444 } 2445 2446 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n, 2447 NetClientState *nc, 2448 uint16_t proto) 2449 { 2450 VirtioNetRscChain *chain; 2451 2452 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) { 2453 return NULL; 2454 } 2455 2456 QTAILQ_FOREACH(chain, &n->rsc_chains, next) { 2457 if (chain->proto == proto) { 2458 return chain; 2459 } 2460 } 2461 2462 chain = g_malloc(sizeof(*chain)); 2463 chain->n = n; 2464 chain->proto = proto; 2465 if (proto == (uint16_t)ETH_P_IP) { 2466 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD; 2467 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2468 } else { 2469 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD; 2470 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2471 } 2472 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST, 2473 virtio_net_rsc_purge, chain); 2474 memset(&chain->stat, 0, sizeof(chain->stat)); 2475 2476 QTAILQ_INIT(&chain->buffers); 2477 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); 2478 2479 return chain; 2480 } 2481 2482 static ssize_t virtio_net_rsc_receive(NetClientState *nc, 2483 const uint8_t *buf, 2484 size_t size) 2485 { 2486 uint16_t proto; 2487 VirtioNetRscChain *chain; 2488 struct eth_header *eth; 2489 VirtIONet *n; 2490 2491 n = qemu_get_nic_opaque(nc); 2492 if (size < (n->host_hdr_len + sizeof(struct eth_header))) { 2493 return virtio_net_do_receive(nc, buf, size); 2494 } 2495 2496 eth = (struct eth_header *)(buf + n->guest_hdr_len); 2497 proto = htons(eth->h_proto); 2498 2499 chain = virtio_net_rsc_lookup_chain(n, nc, proto); 2500 if (chain) { 2501 chain->stat.received++; 2502 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) { 2503 return virtio_net_rsc_receive4(chain, nc, buf, size); 2504 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) { 2505 return virtio_net_rsc_receive6(chain, nc, buf, size); 2506 } 2507 } 2508 return virtio_net_do_receive(nc, buf, size); 2509 } 2510 2511 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, 2512 size_t size) 2513 { 2514 VirtIONet *n = qemu_get_nic_opaque(nc); 2515 if ((n->rsc4_enabled || n->rsc6_enabled)) { 2516 return virtio_net_rsc_receive(nc, buf, size); 2517 } else { 2518 return virtio_net_do_receive(nc, buf, size); 2519 } 2520 } 2521 2522 static int32_t virtio_net_flush_tx(VirtIONetQueue *q); 2523 2524 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) 2525 { 2526 VirtIONet *n = qemu_get_nic_opaque(nc); 2527 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 2528 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2529 2530 virtqueue_push(q->tx_vq, q->async_tx.elem, 0); 2531 virtio_notify(vdev, q->tx_vq); 2532 2533 g_free(q->async_tx.elem); 2534 q->async_tx.elem = NULL; 2535 2536 virtio_queue_set_notification(q->tx_vq, 1); 2537 virtio_net_flush_tx(q); 2538 } 2539 2540 /* TX */ 2541 static int32_t virtio_net_flush_tx(VirtIONetQueue *q) 2542 { 2543 VirtIONet *n = q->n; 2544 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2545 VirtQueueElement *elem; 2546 int32_t num_packets = 0; 2547 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq)); 2548 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2549 return num_packets; 2550 } 2551 2552 if (q->async_tx.elem) { 2553 virtio_queue_set_notification(q->tx_vq, 0); 2554 return num_packets; 2555 } 2556 2557 for (;;) { 2558 ssize_t ret; 2559 unsigned int out_num; 2560 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg; 2561 struct virtio_net_hdr_mrg_rxbuf mhdr; 2562 2563 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement)); 2564 if (!elem) { 2565 break; 2566 } 2567 2568 out_num = elem->out_num; 2569 out_sg = elem->out_sg; 2570 if (out_num < 1) { 2571 virtio_error(vdev, "virtio-net header not in first element"); 2572 virtqueue_detach_element(q->tx_vq, elem, 0); 2573 g_free(elem); 2574 return -EINVAL; 2575 } 2576 2577 if (n->has_vnet_hdr) { 2578 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) < 2579 n->guest_hdr_len) { 2580 virtio_error(vdev, "virtio-net header incorrect"); 2581 virtqueue_detach_element(q->tx_vq, elem, 0); 2582 g_free(elem); 2583 return -EINVAL; 2584 } 2585 if (n->needs_vnet_hdr_swap) { 2586 virtio_net_hdr_swap(vdev, (void *) &mhdr); 2587 sg2[0].iov_base = &mhdr; 2588 sg2[0].iov_len = n->guest_hdr_len; 2589 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, 2590 out_sg, out_num, 2591 n->guest_hdr_len, -1); 2592 if (out_num == VIRTQUEUE_MAX_SIZE) { 2593 goto drop; 2594 } 2595 out_num += 1; 2596 out_sg = sg2; 2597 } 2598 } 2599 /* 2600 * If host wants to see the guest header as is, we can 2601 * pass it on unchanged. Otherwise, copy just the parts 2602 * that host is interested in. 2603 */ 2604 assert(n->host_hdr_len <= n->guest_hdr_len); 2605 if (n->host_hdr_len != n->guest_hdr_len) { 2606 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg), 2607 out_sg, out_num, 2608 0, n->host_hdr_len); 2609 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num, 2610 out_sg, out_num, 2611 n->guest_hdr_len, -1); 2612 out_num = sg_num; 2613 out_sg = sg; 2614 } 2615 2616 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index), 2617 out_sg, out_num, virtio_net_tx_complete); 2618 if (ret == 0) { 2619 virtio_queue_set_notification(q->tx_vq, 0); 2620 q->async_tx.elem = elem; 2621 return -EBUSY; 2622 } 2623 2624 drop: 2625 virtqueue_push(q->tx_vq, elem, 0); 2626 virtio_notify(vdev, q->tx_vq); 2627 g_free(elem); 2628 2629 if (++num_packets >= n->tx_burst) { 2630 break; 2631 } 2632 } 2633 return num_packets; 2634 } 2635 2636 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) 2637 { 2638 VirtIONet *n = VIRTIO_NET(vdev); 2639 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2640 2641 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2642 virtio_net_drop_tx_queue_data(vdev, vq); 2643 return; 2644 } 2645 2646 /* This happens when device was stopped but VCPU wasn't. */ 2647 if (!vdev->vm_running) { 2648 q->tx_waiting = 1; 2649 return; 2650 } 2651 2652 if (q->tx_waiting) { 2653 virtio_queue_set_notification(vq, 1); 2654 timer_del(q->tx_timer); 2655 q->tx_waiting = 0; 2656 if (virtio_net_flush_tx(q) == -EINVAL) { 2657 return; 2658 } 2659 } else { 2660 timer_mod(q->tx_timer, 2661 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2662 q->tx_waiting = 1; 2663 virtio_queue_set_notification(vq, 0); 2664 } 2665 } 2666 2667 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) 2668 { 2669 VirtIONet *n = VIRTIO_NET(vdev); 2670 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2671 2672 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2673 virtio_net_drop_tx_queue_data(vdev, vq); 2674 return; 2675 } 2676 2677 if (unlikely(q->tx_waiting)) { 2678 return; 2679 } 2680 q->tx_waiting = 1; 2681 /* This happens when device was stopped but VCPU wasn't. */ 2682 if (!vdev->vm_running) { 2683 return; 2684 } 2685 virtio_queue_set_notification(vq, 0); 2686 qemu_bh_schedule(q->tx_bh); 2687 } 2688 2689 static void virtio_net_tx_timer(void *opaque) 2690 { 2691 VirtIONetQueue *q = opaque; 2692 VirtIONet *n = q->n; 2693 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2694 /* This happens when device was stopped but BH wasn't. */ 2695 if (!vdev->vm_running) { 2696 /* Make sure tx waiting is set, so we'll run when restarted. */ 2697 assert(q->tx_waiting); 2698 return; 2699 } 2700 2701 q->tx_waiting = 0; 2702 2703 /* Just in case the driver is not ready on more */ 2704 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2705 return; 2706 } 2707 2708 virtio_queue_set_notification(q->tx_vq, 1); 2709 virtio_net_flush_tx(q); 2710 } 2711 2712 static void virtio_net_tx_bh(void *opaque) 2713 { 2714 VirtIONetQueue *q = opaque; 2715 VirtIONet *n = q->n; 2716 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2717 int32_t ret; 2718 2719 /* This happens when device was stopped but BH wasn't. */ 2720 if (!vdev->vm_running) { 2721 /* Make sure tx waiting is set, so we'll run when restarted. */ 2722 assert(q->tx_waiting); 2723 return; 2724 } 2725 2726 q->tx_waiting = 0; 2727 2728 /* Just in case the driver is not ready on more */ 2729 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) { 2730 return; 2731 } 2732 2733 ret = virtio_net_flush_tx(q); 2734 if (ret == -EBUSY || ret == -EINVAL) { 2735 return; /* Notification re-enable handled by tx_complete or device 2736 * broken */ 2737 } 2738 2739 /* If we flush a full burst of packets, assume there are 2740 * more coming and immediately reschedule */ 2741 if (ret >= n->tx_burst) { 2742 qemu_bh_schedule(q->tx_bh); 2743 q->tx_waiting = 1; 2744 return; 2745 } 2746 2747 /* If less than a full burst, re-enable notification and flush 2748 * anything that may have come in while we weren't looking. If 2749 * we find something, assume the guest is still active and reschedule */ 2750 virtio_queue_set_notification(q->tx_vq, 1); 2751 ret = virtio_net_flush_tx(q); 2752 if (ret == -EINVAL) { 2753 return; 2754 } else if (ret > 0) { 2755 virtio_queue_set_notification(q->tx_vq, 0); 2756 qemu_bh_schedule(q->tx_bh); 2757 q->tx_waiting = 1; 2758 } 2759 } 2760 2761 static void virtio_net_add_queue(VirtIONet *n, int index) 2762 { 2763 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2764 2765 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size, 2766 virtio_net_handle_rx); 2767 2768 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { 2769 n->vqs[index].tx_vq = 2770 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2771 virtio_net_handle_tx_timer); 2772 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 2773 virtio_net_tx_timer, 2774 &n->vqs[index]); 2775 } else { 2776 n->vqs[index].tx_vq = 2777 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2778 virtio_net_handle_tx_bh); 2779 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]); 2780 } 2781 2782 n->vqs[index].tx_waiting = 0; 2783 n->vqs[index].n = n; 2784 } 2785 2786 static void virtio_net_del_queue(VirtIONet *n, int index) 2787 { 2788 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2789 VirtIONetQueue *q = &n->vqs[index]; 2790 NetClientState *nc = qemu_get_subqueue(n->nic, index); 2791 2792 qemu_purge_queued_packets(nc); 2793 2794 virtio_del_queue(vdev, index * 2); 2795 if (q->tx_timer) { 2796 timer_free(q->tx_timer); 2797 q->tx_timer = NULL; 2798 } else { 2799 qemu_bh_delete(q->tx_bh); 2800 q->tx_bh = NULL; 2801 } 2802 q->tx_waiting = 0; 2803 virtio_del_queue(vdev, index * 2 + 1); 2804 } 2805 2806 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs) 2807 { 2808 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2809 int old_num_queues = virtio_get_num_queues(vdev); 2810 int new_num_queues = new_max_queue_pairs * 2 + 1; 2811 int i; 2812 2813 assert(old_num_queues >= 3); 2814 assert(old_num_queues % 2 == 1); 2815 2816 if (old_num_queues == new_num_queues) { 2817 return; 2818 } 2819 2820 /* 2821 * We always need to remove and add ctrl vq if 2822 * old_num_queues != new_num_queues. Remove ctrl_vq first, 2823 * and then we only enter one of the following two loops. 2824 */ 2825 virtio_del_queue(vdev, old_num_queues - 1); 2826 2827 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) { 2828 /* new_num_queues < old_num_queues */ 2829 virtio_net_del_queue(n, i / 2); 2830 } 2831 2832 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) { 2833 /* new_num_queues > old_num_queues */ 2834 virtio_net_add_queue(n, i / 2); 2835 } 2836 2837 /* add ctrl_vq last */ 2838 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 2839 } 2840 2841 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) 2842 { 2843 int max = multiqueue ? n->max_queue_pairs : 1; 2844 2845 n->multiqueue = multiqueue; 2846 virtio_net_change_num_queue_pairs(n, max); 2847 2848 virtio_net_set_queue_pairs(n); 2849 } 2850 2851 static int virtio_net_post_load_device(void *opaque, int version_id) 2852 { 2853 VirtIONet *n = opaque; 2854 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2855 int i, link_down; 2856 2857 trace_virtio_net_post_load_device(); 2858 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs, 2859 virtio_vdev_has_feature(vdev, 2860 VIRTIO_F_VERSION_1), 2861 virtio_vdev_has_feature(vdev, 2862 VIRTIO_NET_F_HASH_REPORT)); 2863 2864 /* MAC_TABLE_ENTRIES may be different from the saved image */ 2865 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) { 2866 n->mac_table.in_use = 0; 2867 } 2868 2869 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 2870 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n); 2871 } 2872 2873 /* 2874 * curr_guest_offloads will be later overwritten by the 2875 * virtio_set_features_nocheck call done from the virtio_load. 2876 * Here we make sure it is preserved and restored accordingly 2877 * in the virtio_net_post_load_virtio callback. 2878 */ 2879 n->saved_guest_offloads = n->curr_guest_offloads; 2880 2881 virtio_net_set_queue_pairs(n); 2882 2883 /* Find the first multicast entry in the saved MAC filter */ 2884 for (i = 0; i < n->mac_table.in_use; i++) { 2885 if (n->mac_table.macs[i * ETH_ALEN] & 1) { 2886 break; 2887 } 2888 } 2889 n->mac_table.first_multi = i; 2890 2891 /* nc.link_down can't be migrated, so infer link_down according 2892 * to link status bit in n->status */ 2893 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0; 2894 for (i = 0; i < n->max_queue_pairs; i++) { 2895 qemu_get_subqueue(n->nic, i)->link_down = link_down; 2896 } 2897 2898 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 2899 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 2900 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 2901 QEMU_CLOCK_VIRTUAL, 2902 virtio_net_announce_timer, n); 2903 if (n->announce_timer.round) { 2904 timer_mod(n->announce_timer.tm, 2905 qemu_clock_get_ms(n->announce_timer.type)); 2906 } else { 2907 qemu_announce_timer_del(&n->announce_timer, false); 2908 } 2909 } 2910 2911 if (n->rss_data.enabled) { 2912 n->rss_data.enabled_software_rss = n->rss_data.populate_hash; 2913 if (!n->rss_data.populate_hash) { 2914 if (!virtio_net_attach_epbf_rss(n)) { 2915 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 2916 warn_report("Can't post-load eBPF RSS for vhost"); 2917 } else { 2918 warn_report("Can't post-load eBPF RSS - " 2919 "fallback to software RSS"); 2920 n->rss_data.enabled_software_rss = true; 2921 } 2922 } 2923 } 2924 2925 trace_virtio_net_rss_enable(n->rss_data.hash_types, 2926 n->rss_data.indirections_len, 2927 sizeof(n->rss_data.key)); 2928 } else { 2929 trace_virtio_net_rss_disable(); 2930 } 2931 return 0; 2932 } 2933 2934 static int virtio_net_post_load_virtio(VirtIODevice *vdev) 2935 { 2936 VirtIONet *n = VIRTIO_NET(vdev); 2937 /* 2938 * The actual needed state is now in saved_guest_offloads, 2939 * see virtio_net_post_load_device for detail. 2940 * Restore it back and apply the desired offloads. 2941 */ 2942 n->curr_guest_offloads = n->saved_guest_offloads; 2943 if (peer_has_vnet_hdr(n)) { 2944 virtio_net_apply_guest_offloads(n); 2945 } 2946 2947 return 0; 2948 } 2949 2950 /* tx_waiting field of a VirtIONetQueue */ 2951 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = { 2952 .name = "virtio-net-queue-tx_waiting", 2953 .fields = (VMStateField[]) { 2954 VMSTATE_UINT32(tx_waiting, VirtIONetQueue), 2955 VMSTATE_END_OF_LIST() 2956 }, 2957 }; 2958 2959 static bool max_queue_pairs_gt_1(void *opaque, int version_id) 2960 { 2961 return VIRTIO_NET(opaque)->max_queue_pairs > 1; 2962 } 2963 2964 static bool has_ctrl_guest_offloads(void *opaque, int version_id) 2965 { 2966 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque), 2967 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 2968 } 2969 2970 static bool mac_table_fits(void *opaque, int version_id) 2971 { 2972 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES; 2973 } 2974 2975 static bool mac_table_doesnt_fit(void *opaque, int version_id) 2976 { 2977 return !mac_table_fits(opaque, version_id); 2978 } 2979 2980 /* This temporary type is shared by all the WITH_TMP methods 2981 * although only some fields are used by each. 2982 */ 2983 struct VirtIONetMigTmp { 2984 VirtIONet *parent; 2985 VirtIONetQueue *vqs_1; 2986 uint16_t curr_queue_pairs_1; 2987 uint8_t has_ufo; 2988 uint32_t has_vnet_hdr; 2989 }; 2990 2991 /* The 2nd and subsequent tx_waiting flags are loaded later than 2992 * the 1st entry in the queue_pairs and only if there's more than one 2993 * entry. We use the tmp mechanism to calculate a temporary 2994 * pointer and count and also validate the count. 2995 */ 2996 2997 static int virtio_net_tx_waiting_pre_save(void *opaque) 2998 { 2999 struct VirtIONetMigTmp *tmp = opaque; 3000 3001 tmp->vqs_1 = tmp->parent->vqs + 1; 3002 tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1; 3003 if (tmp->parent->curr_queue_pairs == 0) { 3004 tmp->curr_queue_pairs_1 = 0; 3005 } 3006 3007 return 0; 3008 } 3009 3010 static int virtio_net_tx_waiting_pre_load(void *opaque) 3011 { 3012 struct VirtIONetMigTmp *tmp = opaque; 3013 3014 /* Reuse the pointer setup from save */ 3015 virtio_net_tx_waiting_pre_save(opaque); 3016 3017 if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) { 3018 error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x", 3019 tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs); 3020 3021 return -EINVAL; 3022 } 3023 3024 return 0; /* all good */ 3025 } 3026 3027 static const VMStateDescription vmstate_virtio_net_tx_waiting = { 3028 .name = "virtio-net-tx_waiting", 3029 .pre_load = virtio_net_tx_waiting_pre_load, 3030 .pre_save = virtio_net_tx_waiting_pre_save, 3031 .fields = (VMStateField[]) { 3032 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp, 3033 curr_queue_pairs_1, 3034 vmstate_virtio_net_queue_tx_waiting, 3035 struct VirtIONetQueue), 3036 VMSTATE_END_OF_LIST() 3037 }, 3038 }; 3039 3040 /* the 'has_ufo' flag is just tested; if the incoming stream has the 3041 * flag set we need to check that we have it 3042 */ 3043 static int virtio_net_ufo_post_load(void *opaque, int version_id) 3044 { 3045 struct VirtIONetMigTmp *tmp = opaque; 3046 3047 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) { 3048 error_report("virtio-net: saved image requires TUN_F_UFO support"); 3049 return -EINVAL; 3050 } 3051 3052 return 0; 3053 } 3054 3055 static int virtio_net_ufo_pre_save(void *opaque) 3056 { 3057 struct VirtIONetMigTmp *tmp = opaque; 3058 3059 tmp->has_ufo = tmp->parent->has_ufo; 3060 3061 return 0; 3062 } 3063 3064 static const VMStateDescription vmstate_virtio_net_has_ufo = { 3065 .name = "virtio-net-ufo", 3066 .post_load = virtio_net_ufo_post_load, 3067 .pre_save = virtio_net_ufo_pre_save, 3068 .fields = (VMStateField[]) { 3069 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp), 3070 VMSTATE_END_OF_LIST() 3071 }, 3072 }; 3073 3074 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the 3075 * flag set we need to check that we have it 3076 */ 3077 static int virtio_net_vnet_post_load(void *opaque, int version_id) 3078 { 3079 struct VirtIONetMigTmp *tmp = opaque; 3080 3081 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) { 3082 error_report("virtio-net: saved image requires vnet_hdr=on"); 3083 return -EINVAL; 3084 } 3085 3086 return 0; 3087 } 3088 3089 static int virtio_net_vnet_pre_save(void *opaque) 3090 { 3091 struct VirtIONetMigTmp *tmp = opaque; 3092 3093 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr; 3094 3095 return 0; 3096 } 3097 3098 static const VMStateDescription vmstate_virtio_net_has_vnet = { 3099 .name = "virtio-net-vnet", 3100 .post_load = virtio_net_vnet_post_load, 3101 .pre_save = virtio_net_vnet_pre_save, 3102 .fields = (VMStateField[]) { 3103 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp), 3104 VMSTATE_END_OF_LIST() 3105 }, 3106 }; 3107 3108 static bool virtio_net_rss_needed(void *opaque) 3109 { 3110 return VIRTIO_NET(opaque)->rss_data.enabled; 3111 } 3112 3113 static const VMStateDescription vmstate_virtio_net_rss = { 3114 .name = "virtio-net-device/rss", 3115 .version_id = 1, 3116 .minimum_version_id = 1, 3117 .needed = virtio_net_rss_needed, 3118 .fields = (VMStateField[]) { 3119 VMSTATE_BOOL(rss_data.enabled, VirtIONet), 3120 VMSTATE_BOOL(rss_data.redirect, VirtIONet), 3121 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet), 3122 VMSTATE_UINT32(rss_data.hash_types, VirtIONet), 3123 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet), 3124 VMSTATE_UINT16(rss_data.default_queue, VirtIONet), 3125 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet, 3126 VIRTIO_NET_RSS_MAX_KEY_SIZE), 3127 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet, 3128 rss_data.indirections_len, 0, 3129 vmstate_info_uint16, uint16_t), 3130 VMSTATE_END_OF_LIST() 3131 }, 3132 }; 3133 3134 static const VMStateDescription vmstate_virtio_net_device = { 3135 .name = "virtio-net-device", 3136 .version_id = VIRTIO_NET_VM_VERSION, 3137 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3138 .post_load = virtio_net_post_load_device, 3139 .fields = (VMStateField[]) { 3140 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN), 3141 VMSTATE_STRUCT_POINTER(vqs, VirtIONet, 3142 vmstate_virtio_net_queue_tx_waiting, 3143 VirtIONetQueue), 3144 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet), 3145 VMSTATE_UINT16(status, VirtIONet), 3146 VMSTATE_UINT8(promisc, VirtIONet), 3147 VMSTATE_UINT8(allmulti, VirtIONet), 3148 VMSTATE_UINT32(mac_table.in_use, VirtIONet), 3149 3150 /* Guarded pair: If it fits we load it, else we throw it away 3151 * - can happen if source has a larger MAC table.; post-load 3152 * sets flags in this case. 3153 */ 3154 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet, 3155 0, mac_table_fits, mac_table.in_use, 3156 ETH_ALEN), 3157 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0, 3158 mac_table.in_use, ETH_ALEN), 3159 3160 /* Note: This is an array of uint32's that's always been saved as a 3161 * buffer; hold onto your endiannesses; it's actually used as a bitmap 3162 * but based on the uint. 3163 */ 3164 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3), 3165 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3166 vmstate_virtio_net_has_vnet), 3167 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet), 3168 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet), 3169 VMSTATE_UINT8(alluni, VirtIONet), 3170 VMSTATE_UINT8(nomulti, VirtIONet), 3171 VMSTATE_UINT8(nouni, VirtIONet), 3172 VMSTATE_UINT8(nobcast, VirtIONet), 3173 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3174 vmstate_virtio_net_has_ufo), 3175 VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0, 3176 vmstate_info_uint16_equal, uint16_t), 3177 VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1), 3178 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3179 vmstate_virtio_net_tx_waiting), 3180 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet, 3181 has_ctrl_guest_offloads), 3182 VMSTATE_END_OF_LIST() 3183 }, 3184 .subsections = (const VMStateDescription * []) { 3185 &vmstate_virtio_net_rss, 3186 NULL 3187 } 3188 }; 3189 3190 static NetClientInfo net_virtio_info = { 3191 .type = NET_CLIENT_DRIVER_NIC, 3192 .size = sizeof(NICState), 3193 .can_receive = virtio_net_can_receive, 3194 .receive = virtio_net_receive, 3195 .link_status_changed = virtio_net_set_link_status, 3196 .query_rx_filter = virtio_net_query_rxfilter, 3197 .announce = virtio_net_announce, 3198 }; 3199 3200 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) 3201 { 3202 VirtIONet *n = VIRTIO_NET(vdev); 3203 NetClientState *nc; 3204 assert(n->vhost_started); 3205 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { 3206 /* Must guard against invalid features and bogus queue index 3207 * from being set by malicious guest, or penetrated through 3208 * buggy migration stream. 3209 */ 3210 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3211 qemu_log_mask(LOG_GUEST_ERROR, 3212 "%s: bogus vq index ignored\n", __func__); 3213 return false; 3214 } 3215 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3216 } else { 3217 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3218 } 3219 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); 3220 } 3221 3222 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, 3223 bool mask) 3224 { 3225 VirtIONet *n = VIRTIO_NET(vdev); 3226 NetClientState *nc; 3227 assert(n->vhost_started); 3228 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { 3229 /* Must guard against invalid features and bogus queue index 3230 * from being set by malicious guest, or penetrated through 3231 * buggy migration stream. 3232 */ 3233 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3234 qemu_log_mask(LOG_GUEST_ERROR, 3235 "%s: bogus vq index ignored\n", __func__); 3236 return; 3237 } 3238 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3239 } else { 3240 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3241 } 3242 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), 3243 vdev, idx, mask); 3244 } 3245 3246 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) 3247 { 3248 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC); 3249 3250 n->config_size = virtio_get_config_size(&cfg_size_params, host_features); 3251 } 3252 3253 void virtio_net_set_netclient_name(VirtIONet *n, const char *name, 3254 const char *type) 3255 { 3256 /* 3257 * The name can be NULL, the netclient name will be type.x. 3258 */ 3259 assert(type != NULL); 3260 3261 g_free(n->netclient_name); 3262 g_free(n->netclient_type); 3263 n->netclient_name = g_strdup(name); 3264 n->netclient_type = g_strdup(type); 3265 } 3266 3267 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev) 3268 { 3269 HotplugHandler *hotplug_ctrl; 3270 PCIDevice *pci_dev; 3271 Error *err = NULL; 3272 3273 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3274 if (hotplug_ctrl) { 3275 pci_dev = PCI_DEVICE(dev); 3276 pci_dev->partially_hotplugged = true; 3277 hotplug_handler_unplug_request(hotplug_ctrl, dev, &err); 3278 if (err) { 3279 error_report_err(err); 3280 return false; 3281 } 3282 } else { 3283 return false; 3284 } 3285 return true; 3286 } 3287 3288 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev, 3289 Error **errp) 3290 { 3291 Error *err = NULL; 3292 HotplugHandler *hotplug_ctrl; 3293 PCIDevice *pdev = PCI_DEVICE(dev); 3294 BusState *primary_bus; 3295 3296 if (!pdev->partially_hotplugged) { 3297 return true; 3298 } 3299 primary_bus = dev->parent_bus; 3300 if (!primary_bus) { 3301 error_setg(errp, "virtio_net: couldn't find primary bus"); 3302 return false; 3303 } 3304 qdev_set_parent_bus(dev, primary_bus, &error_abort); 3305 qatomic_set(&n->failover_primary_hidden, false); 3306 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3307 if (hotplug_ctrl) { 3308 hotplug_handler_pre_plug(hotplug_ctrl, dev, &err); 3309 if (err) { 3310 goto out; 3311 } 3312 hotplug_handler_plug(hotplug_ctrl, dev, &err); 3313 } 3314 pdev->partially_hotplugged = false; 3315 3316 out: 3317 error_propagate(errp, err); 3318 return !err; 3319 } 3320 3321 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s) 3322 { 3323 bool should_be_hidden; 3324 Error *err = NULL; 3325 DeviceState *dev = failover_find_primary_device(n); 3326 3327 if (!dev) { 3328 return; 3329 } 3330 3331 should_be_hidden = qatomic_read(&n->failover_primary_hidden); 3332 3333 if (migration_in_setup(s) && !should_be_hidden) { 3334 if (failover_unplug_primary(n, dev)) { 3335 vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev); 3336 qapi_event_send_unplug_primary(dev->id); 3337 qatomic_set(&n->failover_primary_hidden, true); 3338 } else { 3339 warn_report("couldn't unplug primary device"); 3340 } 3341 } else if (migration_has_failed(s)) { 3342 /* We already unplugged the device let's plug it back */ 3343 if (!failover_replug_primary(n, dev, &err)) { 3344 if (err) { 3345 error_report_err(err); 3346 } 3347 } 3348 } 3349 } 3350 3351 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data) 3352 { 3353 MigrationState *s = data; 3354 VirtIONet *n = container_of(notifier, VirtIONet, migration_state); 3355 virtio_net_handle_migration_primary(n, s); 3356 } 3357 3358 static bool failover_hide_primary_device(DeviceListener *listener, 3359 const QDict *device_opts, 3360 bool from_json, 3361 Error **errp) 3362 { 3363 VirtIONet *n = container_of(listener, VirtIONet, primary_listener); 3364 const char *standby_id; 3365 3366 if (!device_opts) { 3367 return false; 3368 } 3369 3370 if (!qdict_haskey(device_opts, "failover_pair_id")) { 3371 return false; 3372 } 3373 3374 if (!qdict_haskey(device_opts, "id")) { 3375 error_setg(errp, "Device with failover_pair_id needs to have id"); 3376 return false; 3377 } 3378 3379 standby_id = qdict_get_str(device_opts, "failover_pair_id"); 3380 if (g_strcmp0(standby_id, n->netclient_name) != 0) { 3381 return false; 3382 } 3383 3384 /* 3385 * The hide helper can be called several times for a given device. 3386 * Check there is only one primary for a virtio-net device but 3387 * don't duplicate the qdict several times if it's called for the same 3388 * device. 3389 */ 3390 if (n->primary_opts) { 3391 const char *old, *new; 3392 /* devices with failover_pair_id always have an id */ 3393 old = qdict_get_str(n->primary_opts, "id"); 3394 new = qdict_get_str(device_opts, "id"); 3395 if (strcmp(old, new) != 0) { 3396 error_setg(errp, "Cannot attach more than one primary device to " 3397 "'%s': '%s' and '%s'", n->netclient_name, old, new); 3398 return false; 3399 } 3400 } else { 3401 n->primary_opts = qdict_clone_shallow(device_opts); 3402 n->primary_opts_from_json = from_json; 3403 } 3404 3405 /* failover_primary_hidden is set during feature negotiation */ 3406 return qatomic_read(&n->failover_primary_hidden); 3407 } 3408 3409 static void virtio_net_device_realize(DeviceState *dev, Error **errp) 3410 { 3411 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3412 VirtIONet *n = VIRTIO_NET(dev); 3413 NetClientState *nc; 3414 int i; 3415 3416 if (n->net_conf.mtu) { 3417 n->host_features |= (1ULL << VIRTIO_NET_F_MTU); 3418 } 3419 3420 if (n->net_conf.duplex_str) { 3421 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) { 3422 n->net_conf.duplex = DUPLEX_HALF; 3423 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) { 3424 n->net_conf.duplex = DUPLEX_FULL; 3425 } else { 3426 error_setg(errp, "'duplex' must be 'half' or 'full'"); 3427 return; 3428 } 3429 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3430 } else { 3431 n->net_conf.duplex = DUPLEX_UNKNOWN; 3432 } 3433 3434 if (n->net_conf.speed < SPEED_UNKNOWN) { 3435 error_setg(errp, "'speed' must be between 0 and INT_MAX"); 3436 return; 3437 } 3438 if (n->net_conf.speed >= 0) { 3439 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3440 } 3441 3442 if (n->failover) { 3443 n->primary_listener.hide_device = failover_hide_primary_device; 3444 qatomic_set(&n->failover_primary_hidden, true); 3445 device_listener_register(&n->primary_listener); 3446 n->migration_state.notify = virtio_net_migration_state_notifier; 3447 add_migration_state_change_notifier(&n->migration_state); 3448 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY); 3449 } 3450 3451 virtio_net_set_config_size(n, n->host_features); 3452 virtio_init(vdev, VIRTIO_ID_NET, n->config_size); 3453 3454 /* 3455 * We set a lower limit on RX queue size to what it always was. 3456 * Guests that want a smaller ring can always resize it without 3457 * help from us (using virtio 1 and up). 3458 */ 3459 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || 3460 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || 3461 !is_power_of_2(n->net_conf.rx_queue_size)) { 3462 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " 3463 "must be a power of 2 between %d and %d.", 3464 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, 3465 VIRTQUEUE_MAX_SIZE); 3466 virtio_cleanup(vdev); 3467 return; 3468 } 3469 3470 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || 3471 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE || 3472 !is_power_of_2(n->net_conf.tx_queue_size)) { 3473 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " 3474 "must be a power of 2 between %d and %d", 3475 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, 3476 VIRTQUEUE_MAX_SIZE); 3477 virtio_cleanup(vdev); 3478 return; 3479 } 3480 3481 n->max_ncs = MAX(n->nic_conf.peers.queues, 1); 3482 3483 /* 3484 * Figure out the datapath queue pairs since the backend could 3485 * provide control queue via peers as well. 3486 */ 3487 if (n->nic_conf.peers.queues) { 3488 for (i = 0; i < n->max_ncs; i++) { 3489 if (n->nic_conf.peers.ncs[i]->is_datapath) { 3490 ++n->max_queue_pairs; 3491 } 3492 } 3493 } 3494 n->max_queue_pairs = MAX(n->max_queue_pairs, 1); 3495 3496 if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) { 3497 error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), " 3498 "must be a positive integer less than %d.", 3499 n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2); 3500 virtio_cleanup(vdev); 3501 return; 3502 } 3503 n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs); 3504 n->curr_queue_pairs = 1; 3505 n->tx_timeout = n->net_conf.txtimer; 3506 3507 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer") 3508 && strcmp(n->net_conf.tx, "bh")) { 3509 warn_report("virtio-net: " 3510 "Unknown option tx=%s, valid options: \"timer\" \"bh\"", 3511 n->net_conf.tx); 3512 error_printf("Defaulting to \"bh\""); 3513 } 3514 3515 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), 3516 n->net_conf.tx_queue_size); 3517 3518 for (i = 0; i < n->max_queue_pairs; i++) { 3519 virtio_net_add_queue(n, i); 3520 } 3521 3522 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 3523 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr); 3524 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac)); 3525 n->status = VIRTIO_NET_S_LINK_UP; 3526 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 3527 QEMU_CLOCK_VIRTUAL, 3528 virtio_net_announce_timer, n); 3529 n->announce_timer.round = 0; 3530 3531 if (n->netclient_type) { 3532 /* 3533 * Happen when virtio_net_set_netclient_name has been called. 3534 */ 3535 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3536 n->netclient_type, n->netclient_name, n); 3537 } else { 3538 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3539 object_get_typename(OBJECT(dev)), dev->id, n); 3540 } 3541 3542 for (i = 0; i < n->max_queue_pairs; i++) { 3543 n->nic->ncs[i].do_not_pad = true; 3544 } 3545 3546 peer_test_vnet_hdr(n); 3547 if (peer_has_vnet_hdr(n)) { 3548 for (i = 0; i < n->max_queue_pairs; i++) { 3549 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true); 3550 } 3551 n->host_hdr_len = sizeof(struct virtio_net_hdr); 3552 } else { 3553 n->host_hdr_len = 0; 3554 } 3555 3556 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a); 3557 3558 n->vqs[0].tx_waiting = 0; 3559 n->tx_burst = n->net_conf.txburst; 3560 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0); 3561 n->promisc = 1; /* for compatibility */ 3562 3563 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 3564 3565 n->vlans = g_malloc0(MAX_VLAN >> 3); 3566 3567 nc = qemu_get_queue(n->nic); 3568 nc->rxfilter_notify_enabled = 1; 3569 3570 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 3571 struct virtio_net_config netcfg = {}; 3572 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN); 3573 vhost_net_set_config(get_vhost_net(nc->peer), 3574 (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER); 3575 } 3576 QTAILQ_INIT(&n->rsc_chains); 3577 n->qdev = dev; 3578 3579 net_rx_pkt_init(&n->rx_pkt, false); 3580 3581 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3582 virtio_net_load_ebpf(n); 3583 } 3584 } 3585 3586 static void virtio_net_device_unrealize(DeviceState *dev) 3587 { 3588 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3589 VirtIONet *n = VIRTIO_NET(dev); 3590 int i, max_queue_pairs; 3591 3592 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3593 virtio_net_unload_ebpf(n); 3594 } 3595 3596 /* This will stop vhost backend if appropriate. */ 3597 virtio_net_set_status(vdev, 0); 3598 3599 g_free(n->netclient_name); 3600 n->netclient_name = NULL; 3601 g_free(n->netclient_type); 3602 n->netclient_type = NULL; 3603 3604 g_free(n->mac_table.macs); 3605 g_free(n->vlans); 3606 3607 if (n->failover) { 3608 qobject_unref(n->primary_opts); 3609 device_listener_unregister(&n->primary_listener); 3610 remove_migration_state_change_notifier(&n->migration_state); 3611 } else { 3612 assert(n->primary_opts == NULL); 3613 } 3614 3615 max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 3616 for (i = 0; i < max_queue_pairs; i++) { 3617 virtio_net_del_queue(n, i); 3618 } 3619 /* delete also control vq */ 3620 virtio_del_queue(vdev, max_queue_pairs * 2); 3621 qemu_announce_timer_del(&n->announce_timer, false); 3622 g_free(n->vqs); 3623 qemu_del_nic(n->nic); 3624 virtio_net_rsc_cleanup(n); 3625 g_free(n->rss_data.indirections_table); 3626 net_rx_pkt_uninit(n->rx_pkt); 3627 virtio_cleanup(vdev); 3628 } 3629 3630 static void virtio_net_instance_init(Object *obj) 3631 { 3632 VirtIONet *n = VIRTIO_NET(obj); 3633 3634 /* 3635 * The default config_size is sizeof(struct virtio_net_config). 3636 * Can be overriden with virtio_net_set_config_size. 3637 */ 3638 n->config_size = sizeof(struct virtio_net_config); 3639 device_add_bootindex_property(obj, &n->nic_conf.bootindex, 3640 "bootindex", "/ethernet-phy@0", 3641 DEVICE(n)); 3642 3643 ebpf_rss_init(&n->ebpf_rss); 3644 } 3645 3646 static int virtio_net_pre_save(void *opaque) 3647 { 3648 VirtIONet *n = opaque; 3649 3650 /* At this point, backend must be stopped, otherwise 3651 * it might keep writing to memory. */ 3652 assert(!n->vhost_started); 3653 3654 return 0; 3655 } 3656 3657 static bool primary_unplug_pending(void *opaque) 3658 { 3659 DeviceState *dev = opaque; 3660 DeviceState *primary; 3661 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3662 VirtIONet *n = VIRTIO_NET(vdev); 3663 3664 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 3665 return false; 3666 } 3667 primary = failover_find_primary_device(n); 3668 return primary ? primary->pending_deleted_event : false; 3669 } 3670 3671 static bool dev_unplug_pending(void *opaque) 3672 { 3673 DeviceState *dev = opaque; 3674 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 3675 3676 return vdc->primary_unplug_pending(dev); 3677 } 3678 3679 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev) 3680 { 3681 VirtIONet *n = VIRTIO_NET(vdev); 3682 NetClientState *nc = qemu_get_queue(n->nic); 3683 struct vhost_net *net = get_vhost_net(nc->peer); 3684 return &net->dev; 3685 } 3686 3687 static const VMStateDescription vmstate_virtio_net = { 3688 .name = "virtio-net", 3689 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3690 .version_id = VIRTIO_NET_VM_VERSION, 3691 .fields = (VMStateField[]) { 3692 VMSTATE_VIRTIO_DEVICE, 3693 VMSTATE_END_OF_LIST() 3694 }, 3695 .pre_save = virtio_net_pre_save, 3696 .dev_unplug_pending = dev_unplug_pending, 3697 }; 3698 3699 static Property virtio_net_properties[] = { 3700 DEFINE_PROP_BIT64("csum", VirtIONet, host_features, 3701 VIRTIO_NET_F_CSUM, true), 3702 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features, 3703 VIRTIO_NET_F_GUEST_CSUM, true), 3704 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), 3705 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features, 3706 VIRTIO_NET_F_GUEST_TSO4, true), 3707 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features, 3708 VIRTIO_NET_F_GUEST_TSO6, true), 3709 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features, 3710 VIRTIO_NET_F_GUEST_ECN, true), 3711 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features, 3712 VIRTIO_NET_F_GUEST_UFO, true), 3713 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features, 3714 VIRTIO_NET_F_GUEST_ANNOUNCE, true), 3715 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features, 3716 VIRTIO_NET_F_HOST_TSO4, true), 3717 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features, 3718 VIRTIO_NET_F_HOST_TSO6, true), 3719 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features, 3720 VIRTIO_NET_F_HOST_ECN, true), 3721 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features, 3722 VIRTIO_NET_F_HOST_UFO, true), 3723 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features, 3724 VIRTIO_NET_F_MRG_RXBUF, true), 3725 DEFINE_PROP_BIT64("status", VirtIONet, host_features, 3726 VIRTIO_NET_F_STATUS, true), 3727 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features, 3728 VIRTIO_NET_F_CTRL_VQ, true), 3729 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features, 3730 VIRTIO_NET_F_CTRL_RX, true), 3731 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features, 3732 VIRTIO_NET_F_CTRL_VLAN, true), 3733 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features, 3734 VIRTIO_NET_F_CTRL_RX_EXTRA, true), 3735 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features, 3736 VIRTIO_NET_F_CTRL_MAC_ADDR, true), 3737 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features, 3738 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), 3739 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), 3740 DEFINE_PROP_BIT64("rss", VirtIONet, host_features, 3741 VIRTIO_NET_F_RSS, false), 3742 DEFINE_PROP_BIT64("hash", VirtIONet, host_features, 3743 VIRTIO_NET_F_HASH_REPORT, false), 3744 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features, 3745 VIRTIO_NET_F_RSC_EXT, false), 3746 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout, 3747 VIRTIO_NET_RSC_DEFAULT_INTERVAL), 3748 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf), 3749 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer, 3750 TX_TIMER_INTERVAL), 3751 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), 3752 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), 3753 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, 3754 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), 3755 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, 3756 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), 3757 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), 3758 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, 3759 true), 3760 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN), 3761 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str), 3762 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false), 3763 DEFINE_PROP_END_OF_LIST(), 3764 }; 3765 3766 static void virtio_net_class_init(ObjectClass *klass, void *data) 3767 { 3768 DeviceClass *dc = DEVICE_CLASS(klass); 3769 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 3770 3771 device_class_set_props(dc, virtio_net_properties); 3772 dc->vmsd = &vmstate_virtio_net; 3773 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 3774 vdc->realize = virtio_net_device_realize; 3775 vdc->unrealize = virtio_net_device_unrealize; 3776 vdc->get_config = virtio_net_get_config; 3777 vdc->set_config = virtio_net_set_config; 3778 vdc->get_features = virtio_net_get_features; 3779 vdc->set_features = virtio_net_set_features; 3780 vdc->bad_features = virtio_net_bad_features; 3781 vdc->reset = virtio_net_reset; 3782 vdc->set_status = virtio_net_set_status; 3783 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; 3784 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; 3785 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO); 3786 vdc->post_load = virtio_net_post_load_virtio; 3787 vdc->vmsd = &vmstate_virtio_net_device; 3788 vdc->primary_unplug_pending = primary_unplug_pending; 3789 vdc->get_vhost = virtio_net_get_vhost; 3790 } 3791 3792 static const TypeInfo virtio_net_info = { 3793 .name = TYPE_VIRTIO_NET, 3794 .parent = TYPE_VIRTIO_DEVICE, 3795 .instance_size = sizeof(VirtIONet), 3796 .instance_init = virtio_net_instance_init, 3797 .class_init = virtio_net_class_init, 3798 }; 3799 3800 static void virtio_register_types(void) 3801 { 3802 type_register_static(&virtio_net_info); 3803 } 3804 3805 type_init(virtio_register_types) 3806