1 /* 2 * Virtio Network Device 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qemu/atomic.h" 16 #include "qemu/iov.h" 17 #include "qemu/log.h" 18 #include "qemu/main-loop.h" 19 #include "qemu/module.h" 20 #include "hw/virtio/virtio.h" 21 #include "net/net.h" 22 #include "net/checksum.h" 23 #include "net/tap.h" 24 #include "qemu/error-report.h" 25 #include "qemu/timer.h" 26 #include "qemu/option.h" 27 #include "qemu/option_int.h" 28 #include "qemu/config-file.h" 29 #include "qapi/qmp/qdict.h" 30 #include "hw/virtio/virtio-net.h" 31 #include "net/vhost_net.h" 32 #include "net/announce.h" 33 #include "hw/virtio/virtio-bus.h" 34 #include "qapi/error.h" 35 #include "qapi/qapi-events-net.h" 36 #include "hw/qdev-properties.h" 37 #include "qapi/qapi-types-migration.h" 38 #include "qapi/qapi-events-migration.h" 39 #include "hw/virtio/virtio-access.h" 40 #include "migration/misc.h" 41 #include "standard-headers/linux/ethtool.h" 42 #include "sysemu/sysemu.h" 43 #include "trace.h" 44 #include "monitor/qdev.h" 45 #include "hw/pci/pci_device.h" 46 #include "net_rx_pkt.h" 47 #include "hw/virtio/vhost.h" 48 #include "sysemu/qtest.h" 49 50 #define VIRTIO_NET_VM_VERSION 11 51 52 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ 53 54 /* previously fixed value */ 55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 57 58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */ 59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 61 62 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ 63 64 #define VIRTIO_NET_TCP_FLAG 0x3F 65 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000 66 67 /* IPv4 max payload, 16 bits in the header */ 68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header)) 69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535 70 71 /* header length value in ip header without option */ 72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5 73 74 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ 75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD 76 77 /* Purge coalesced packets timer interval, This value affects the performance 78 a lot, and should be tuned carefully, '300000'(300us) is the recommended 79 value to pass the WHQL test, '50000' can gain 2x netperf throughput with 80 tso/gso/gro 'off'. */ 81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000 82 83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \ 84 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \ 85 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \ 86 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \ 87 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \ 88 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \ 89 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \ 90 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \ 91 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) 92 93 static const VirtIOFeature feature_sizes[] = { 94 {.flags = 1ULL << VIRTIO_NET_F_MAC, 95 .end = endof(struct virtio_net_config, mac)}, 96 {.flags = 1ULL << VIRTIO_NET_F_STATUS, 97 .end = endof(struct virtio_net_config, status)}, 98 {.flags = 1ULL << VIRTIO_NET_F_MQ, 99 .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, 100 {.flags = 1ULL << VIRTIO_NET_F_MTU, 101 .end = endof(struct virtio_net_config, mtu)}, 102 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, 103 .end = endof(struct virtio_net_config, duplex)}, 104 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT), 105 .end = endof(struct virtio_net_config, supported_hash_types)}, 106 {} 107 }; 108 109 static const VirtIOConfigSizeParams cfg_size_params = { 110 .min_size = endof(struct virtio_net_config, mac), 111 .max_size = sizeof(struct virtio_net_config), 112 .feature_sizes = feature_sizes 113 }; 114 115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc) 116 { 117 VirtIONet *n = qemu_get_nic_opaque(nc); 118 119 return &n->vqs[nc->queue_index]; 120 } 121 122 static int vq2q(int queue_index) 123 { 124 return queue_index / 2; 125 } 126 127 static void flush_or_purge_queued_packets(NetClientState *nc) 128 { 129 if (!nc->peer) { 130 return; 131 } 132 133 qemu_flush_or_purge_queued_packets(nc->peer, true); 134 assert(!virtio_net_get_subqueue(nc)->async_tx.elem); 135 } 136 137 /* TODO 138 * - we could suppress RX interrupt if we were so inclined. 139 */ 140 141 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) 142 { 143 VirtIONet *n = VIRTIO_NET(vdev); 144 struct virtio_net_config netcfg; 145 NetClientState *nc = qemu_get_queue(n->nic); 146 static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } }; 147 148 int ret = 0; 149 memset(&netcfg, 0 , sizeof(struct virtio_net_config)); 150 virtio_stw_p(vdev, &netcfg.status, n->status); 151 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs); 152 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); 153 memcpy(netcfg.mac, n->mac, ETH_ALEN); 154 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); 155 netcfg.duplex = n->net_conf.duplex; 156 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE; 157 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length, 158 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ? 159 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1); 160 virtio_stl_p(vdev, &netcfg.supported_hash_types, 161 VIRTIO_NET_RSS_SUPPORTED_HASHES); 162 memcpy(config, &netcfg, n->config_size); 163 164 /* 165 * Is this VDPA? No peer means not VDPA: there's no way to 166 * disconnect/reconnect a VDPA peer. 167 */ 168 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 169 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg, 170 n->config_size); 171 if (ret == -1) { 172 return; 173 } 174 175 /* 176 * Some NIC/kernel combinations present 0 as the mac address. As that 177 * is not a legal address, try to proceed with the address from the 178 * QEMU command line in the hope that the address has been configured 179 * correctly elsewhere - just not reported by the device. 180 */ 181 if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) { 182 info_report("Zero hardware mac address detected. Ignoring."); 183 memcpy(netcfg.mac, n->mac, ETH_ALEN); 184 } 185 186 netcfg.status |= virtio_tswap16(vdev, 187 n->status & VIRTIO_NET_S_ANNOUNCE); 188 memcpy(config, &netcfg, n->config_size); 189 } 190 } 191 192 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) 193 { 194 VirtIONet *n = VIRTIO_NET(vdev); 195 struct virtio_net_config netcfg = {}; 196 NetClientState *nc = qemu_get_queue(n->nic); 197 198 memcpy(&netcfg, config, n->config_size); 199 200 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) && 201 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) && 202 memcmp(netcfg.mac, n->mac, ETH_ALEN)) { 203 memcpy(n->mac, netcfg.mac, ETH_ALEN); 204 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 205 } 206 207 /* 208 * Is this VDPA? No peer means not VDPA: there's no way to 209 * disconnect/reconnect a VDPA peer. 210 */ 211 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 212 vhost_net_set_config(get_vhost_net(nc->peer), 213 (uint8_t *)&netcfg, 0, n->config_size, 214 VHOST_SET_CONFIG_TYPE_FRONTEND); 215 } 216 } 217 218 static bool virtio_net_started(VirtIONet *n, uint8_t status) 219 { 220 VirtIODevice *vdev = VIRTIO_DEVICE(n); 221 return (status & VIRTIO_CONFIG_S_DRIVER_OK) && 222 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running; 223 } 224 225 static void virtio_net_announce_notify(VirtIONet *net) 226 { 227 VirtIODevice *vdev = VIRTIO_DEVICE(net); 228 trace_virtio_net_announce_notify(); 229 230 net->status |= VIRTIO_NET_S_ANNOUNCE; 231 virtio_notify_config(vdev); 232 } 233 234 static void virtio_net_announce_timer(void *opaque) 235 { 236 VirtIONet *n = opaque; 237 trace_virtio_net_announce_timer(n->announce_timer.round); 238 239 n->announce_timer.round--; 240 virtio_net_announce_notify(n); 241 } 242 243 static void virtio_net_announce(NetClientState *nc) 244 { 245 VirtIONet *n = qemu_get_nic_opaque(nc); 246 VirtIODevice *vdev = VIRTIO_DEVICE(n); 247 248 /* 249 * Make sure the virtio migration announcement timer isn't running 250 * If it is, let it trigger announcement so that we do not cause 251 * confusion. 252 */ 253 if (n->announce_timer.round) { 254 return; 255 } 256 257 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 258 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 259 virtio_net_announce_notify(n); 260 } 261 } 262 263 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) 264 { 265 VirtIODevice *vdev = VIRTIO_DEVICE(n); 266 NetClientState *nc = qemu_get_queue(n->nic); 267 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 268 int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? 269 n->max_ncs - n->max_queue_pairs : 0; 270 271 if (!get_vhost_net(nc->peer)) { 272 return; 273 } 274 275 if ((virtio_net_started(n, status) && !nc->peer->link_down) == 276 !!n->vhost_started) { 277 return; 278 } 279 if (!n->vhost_started) { 280 int r, i; 281 282 if (n->needs_vnet_hdr_swap) { 283 error_report("backend does not support %s vnet headers; " 284 "falling back on userspace virtio", 285 virtio_is_big_endian(vdev) ? "BE" : "LE"); 286 return; 287 } 288 289 /* Any packets outstanding? Purge them to avoid touching rings 290 * when vhost is running. 291 */ 292 for (i = 0; i < queue_pairs; i++) { 293 NetClientState *qnc = qemu_get_subqueue(n->nic, i); 294 295 /* Purge both directions: TX and RX. */ 296 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc); 297 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer); 298 } 299 300 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) { 301 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu); 302 if (r < 0) { 303 error_report("%uBytes MTU not supported by the backend", 304 n->net_conf.mtu); 305 306 return; 307 } 308 } 309 310 n->vhost_started = 1; 311 r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq); 312 if (r < 0) { 313 error_report("unable to start vhost net: %d: " 314 "falling back on userspace virtio", -r); 315 n->vhost_started = 0; 316 } 317 } else { 318 vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq); 319 n->vhost_started = 0; 320 } 321 } 322 323 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev, 324 NetClientState *peer, 325 bool enable) 326 { 327 if (virtio_is_big_endian(vdev)) { 328 return qemu_set_vnet_be(peer, enable); 329 } else { 330 return qemu_set_vnet_le(peer, enable); 331 } 332 } 333 334 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs, 335 int queue_pairs, bool enable) 336 { 337 int i; 338 339 for (i = 0; i < queue_pairs; i++) { 340 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 && 341 enable) { 342 while (--i >= 0) { 343 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false); 344 } 345 346 return true; 347 } 348 } 349 350 return false; 351 } 352 353 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status) 354 { 355 VirtIODevice *vdev = VIRTIO_DEVICE(n); 356 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 357 358 if (virtio_net_started(n, status)) { 359 /* Before using the device, we tell the network backend about the 360 * endianness to use when parsing vnet headers. If the backend 361 * can't do it, we fallback onto fixing the headers in the core 362 * virtio-net code. 363 */ 364 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs, 365 queue_pairs, true); 366 } else if (virtio_net_started(n, vdev->status)) { 367 /* After using the device, we need to reset the network backend to 368 * the default (guest native endianness), otherwise the guest may 369 * lose network connectivity if it is rebooted into a different 370 * endianness. 371 */ 372 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false); 373 } 374 } 375 376 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq) 377 { 378 unsigned int dropped = virtqueue_drop_all(vq); 379 if (dropped) { 380 virtio_notify(vdev, vq); 381 } 382 } 383 384 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) 385 { 386 VirtIONet *n = VIRTIO_NET(vdev); 387 VirtIONetQueue *q; 388 int i; 389 uint8_t queue_status; 390 391 virtio_net_vnet_endian_status(n, status); 392 virtio_net_vhost_status(n, status); 393 394 for (i = 0; i < n->max_queue_pairs; i++) { 395 NetClientState *ncs = qemu_get_subqueue(n->nic, i); 396 bool queue_started; 397 q = &n->vqs[i]; 398 399 if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) { 400 queue_status = 0; 401 } else { 402 queue_status = status; 403 } 404 queue_started = 405 virtio_net_started(n, queue_status) && !n->vhost_started; 406 407 if (queue_started) { 408 qemu_flush_queued_packets(ncs); 409 } 410 411 if (!q->tx_waiting) { 412 continue; 413 } 414 415 if (queue_started) { 416 if (q->tx_timer) { 417 timer_mod(q->tx_timer, 418 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 419 } else { 420 qemu_bh_schedule(q->tx_bh); 421 } 422 } else { 423 if (q->tx_timer) { 424 timer_del(q->tx_timer); 425 } else { 426 qemu_bh_cancel(q->tx_bh); 427 } 428 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 && 429 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) && 430 vdev->vm_running) { 431 /* if tx is waiting we are likely have some packets in tx queue 432 * and disabled notification */ 433 q->tx_waiting = 0; 434 virtio_queue_set_notification(q->tx_vq, 1); 435 virtio_net_drop_tx_queue_data(vdev, q->tx_vq); 436 } 437 } 438 } 439 } 440 441 static void virtio_net_set_link_status(NetClientState *nc) 442 { 443 VirtIONet *n = qemu_get_nic_opaque(nc); 444 VirtIODevice *vdev = VIRTIO_DEVICE(n); 445 uint16_t old_status = n->status; 446 447 if (nc->link_down) 448 n->status &= ~VIRTIO_NET_S_LINK_UP; 449 else 450 n->status |= VIRTIO_NET_S_LINK_UP; 451 452 if (n->status != old_status) 453 virtio_notify_config(vdev); 454 455 virtio_net_set_status(vdev, vdev->status); 456 } 457 458 static void rxfilter_notify(NetClientState *nc) 459 { 460 VirtIONet *n = qemu_get_nic_opaque(nc); 461 462 if (nc->rxfilter_notify_enabled) { 463 char *path = object_get_canonical_path(OBJECT(n->qdev)); 464 qapi_event_send_nic_rx_filter_changed(n->netclient_name, path); 465 g_free(path); 466 467 /* disable event notification to avoid events flooding */ 468 nc->rxfilter_notify_enabled = 0; 469 } 470 } 471 472 static intList *get_vlan_table(VirtIONet *n) 473 { 474 intList *list; 475 int i, j; 476 477 list = NULL; 478 for (i = 0; i < MAX_VLAN >> 5; i++) { 479 for (j = 0; n->vlans[i] && j <= 0x1f; j++) { 480 if (n->vlans[i] & (1U << j)) { 481 QAPI_LIST_PREPEND(list, (i << 5) + j); 482 } 483 } 484 } 485 486 return list; 487 } 488 489 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc) 490 { 491 VirtIONet *n = qemu_get_nic_opaque(nc); 492 VirtIODevice *vdev = VIRTIO_DEVICE(n); 493 RxFilterInfo *info; 494 strList *str_list; 495 int i; 496 497 info = g_malloc0(sizeof(*info)); 498 info->name = g_strdup(nc->name); 499 info->promiscuous = n->promisc; 500 501 if (n->nouni) { 502 info->unicast = RX_STATE_NONE; 503 } else if (n->alluni) { 504 info->unicast = RX_STATE_ALL; 505 } else { 506 info->unicast = RX_STATE_NORMAL; 507 } 508 509 if (n->nomulti) { 510 info->multicast = RX_STATE_NONE; 511 } else if (n->allmulti) { 512 info->multicast = RX_STATE_ALL; 513 } else { 514 info->multicast = RX_STATE_NORMAL; 515 } 516 517 info->broadcast_allowed = n->nobcast; 518 info->multicast_overflow = n->mac_table.multi_overflow; 519 info->unicast_overflow = n->mac_table.uni_overflow; 520 521 info->main_mac = qemu_mac_strdup_printf(n->mac); 522 523 str_list = NULL; 524 for (i = 0; i < n->mac_table.first_multi; i++) { 525 QAPI_LIST_PREPEND(str_list, 526 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 527 } 528 info->unicast_table = str_list; 529 530 str_list = NULL; 531 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 532 QAPI_LIST_PREPEND(str_list, 533 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 534 } 535 info->multicast_table = str_list; 536 info->vlan_table = get_vlan_table(n); 537 538 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) { 539 info->vlan = RX_STATE_ALL; 540 } else if (!info->vlan_table) { 541 info->vlan = RX_STATE_NONE; 542 } else { 543 info->vlan = RX_STATE_NORMAL; 544 } 545 546 /* enable event notification after query */ 547 nc->rxfilter_notify_enabled = 1; 548 549 return info; 550 } 551 552 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index) 553 { 554 VirtIONet *n = VIRTIO_NET(vdev); 555 NetClientState *nc; 556 557 /* validate queue_index and skip for cvq */ 558 if (queue_index >= n->max_queue_pairs * 2) { 559 return; 560 } 561 562 nc = qemu_get_subqueue(n->nic, vq2q(queue_index)); 563 564 if (!nc->peer) { 565 return; 566 } 567 568 if (get_vhost_net(nc->peer) && 569 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) { 570 vhost_net_virtqueue_reset(vdev, nc, queue_index); 571 } 572 573 flush_or_purge_queued_packets(nc); 574 } 575 576 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index) 577 { 578 VirtIONet *n = VIRTIO_NET(vdev); 579 NetClientState *nc; 580 int r; 581 582 /* validate queue_index and skip for cvq */ 583 if (queue_index >= n->max_queue_pairs * 2) { 584 return; 585 } 586 587 nc = qemu_get_subqueue(n->nic, vq2q(queue_index)); 588 589 if (!nc->peer || !vdev->vhost_started) { 590 return; 591 } 592 593 if (get_vhost_net(nc->peer) && 594 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) { 595 r = vhost_net_virtqueue_restart(vdev, nc, queue_index); 596 if (r < 0) { 597 error_report("unable to restart vhost net virtqueue: %d, " 598 "when resetting the queue", queue_index); 599 } 600 } 601 } 602 603 static void virtio_net_reset(VirtIODevice *vdev) 604 { 605 VirtIONet *n = VIRTIO_NET(vdev); 606 int i; 607 608 /* Reset back to compatibility mode */ 609 n->promisc = 1; 610 n->allmulti = 0; 611 n->alluni = 0; 612 n->nomulti = 0; 613 n->nouni = 0; 614 n->nobcast = 0; 615 /* multiqueue is disabled by default */ 616 n->curr_queue_pairs = 1; 617 timer_del(n->announce_timer.tm); 618 n->announce_timer.round = 0; 619 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 620 621 /* Flush any MAC and VLAN filter table state */ 622 n->mac_table.in_use = 0; 623 n->mac_table.first_multi = 0; 624 n->mac_table.multi_overflow = 0; 625 n->mac_table.uni_overflow = 0; 626 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN); 627 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac)); 628 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 629 memset(n->vlans, 0, MAX_VLAN >> 3); 630 631 /* Flush any async TX */ 632 for (i = 0; i < n->max_queue_pairs; i++) { 633 flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i)); 634 } 635 } 636 637 static void peer_test_vnet_hdr(VirtIONet *n) 638 { 639 NetClientState *nc = qemu_get_queue(n->nic); 640 if (!nc->peer) { 641 return; 642 } 643 644 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer); 645 } 646 647 static int peer_has_vnet_hdr(VirtIONet *n) 648 { 649 return n->has_vnet_hdr; 650 } 651 652 static int peer_has_ufo(VirtIONet *n) 653 { 654 if (!peer_has_vnet_hdr(n)) 655 return 0; 656 657 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer); 658 659 return n->has_ufo; 660 } 661 662 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, 663 int version_1, int hash_report) 664 { 665 int i; 666 NetClientState *nc; 667 668 n->mergeable_rx_bufs = mergeable_rx_bufs; 669 670 if (version_1) { 671 n->guest_hdr_len = hash_report ? 672 sizeof(struct virtio_net_hdr_v1_hash) : 673 sizeof(struct virtio_net_hdr_mrg_rxbuf); 674 n->rss_data.populate_hash = !!hash_report; 675 } else { 676 n->guest_hdr_len = n->mergeable_rx_bufs ? 677 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 678 sizeof(struct virtio_net_hdr); 679 } 680 681 for (i = 0; i < n->max_queue_pairs; i++) { 682 nc = qemu_get_subqueue(n->nic, i); 683 684 if (peer_has_vnet_hdr(n) && 685 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) { 686 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len); 687 n->host_hdr_len = n->guest_hdr_len; 688 } 689 } 690 } 691 692 static int virtio_net_max_tx_queue_size(VirtIONet *n) 693 { 694 NetClientState *peer = n->nic_conf.peers.ncs[0]; 695 696 /* 697 * Backends other than vhost-user or vhost-vdpa don't support max queue 698 * size. 699 */ 700 if (!peer) { 701 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 702 } 703 704 switch(peer->info->type) { 705 case NET_CLIENT_DRIVER_VHOST_USER: 706 case NET_CLIENT_DRIVER_VHOST_VDPA: 707 return VIRTQUEUE_MAX_SIZE; 708 default: 709 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 710 }; 711 } 712 713 static int peer_attach(VirtIONet *n, int index) 714 { 715 NetClientState *nc = qemu_get_subqueue(n->nic, index); 716 717 if (!nc->peer) { 718 return 0; 719 } 720 721 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 722 vhost_set_vring_enable(nc->peer, 1); 723 } 724 725 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 726 return 0; 727 } 728 729 if (n->max_queue_pairs == 1) { 730 return 0; 731 } 732 733 return tap_enable(nc->peer); 734 } 735 736 static int peer_detach(VirtIONet *n, int index) 737 { 738 NetClientState *nc = qemu_get_subqueue(n->nic, index); 739 740 if (!nc->peer) { 741 return 0; 742 } 743 744 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 745 vhost_set_vring_enable(nc->peer, 0); 746 } 747 748 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 749 return 0; 750 } 751 752 return tap_disable(nc->peer); 753 } 754 755 static void virtio_net_set_queue_pairs(VirtIONet *n) 756 { 757 int i; 758 int r; 759 760 if (n->nic->peer_deleted) { 761 return; 762 } 763 764 for (i = 0; i < n->max_queue_pairs; i++) { 765 if (i < n->curr_queue_pairs) { 766 r = peer_attach(n, i); 767 assert(!r); 768 } else { 769 r = peer_detach(n, i); 770 assert(!r); 771 } 772 } 773 } 774 775 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue); 776 777 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, 778 Error **errp) 779 { 780 VirtIONet *n = VIRTIO_NET(vdev); 781 NetClientState *nc = qemu_get_queue(n->nic); 782 783 /* Firstly sync all virtio-net possible supported features */ 784 features |= n->host_features; 785 786 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 787 788 if (!peer_has_vnet_hdr(n)) { 789 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); 790 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); 791 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); 792 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); 793 794 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); 795 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); 796 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); 797 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); 798 799 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); 800 } 801 802 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { 803 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); 804 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); 805 } 806 807 if (!get_vhost_net(nc->peer)) { 808 return features; 809 } 810 811 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 812 virtio_clear_feature(&features, VIRTIO_NET_F_RSS); 813 } 814 features = vhost_net_get_features(get_vhost_net(nc->peer), features); 815 vdev->backend_features = features; 816 817 if (n->mtu_bypass_backend && 818 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { 819 features |= (1ULL << VIRTIO_NET_F_MTU); 820 } 821 822 /* 823 * Since GUEST_ANNOUNCE is emulated the feature bit could be set without 824 * enabled. This happens in the vDPA case. 825 * 826 * Make sure the feature set is not incoherent, as the driver could refuse 827 * to start. 828 * 829 * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes, 830 * helping guest to notify the new location with vDPA devices that does not 831 * support it. 832 */ 833 if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) { 834 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE); 835 } 836 837 return features; 838 } 839 840 static uint64_t virtio_net_bad_features(VirtIODevice *vdev) 841 { 842 uint64_t features = 0; 843 844 /* Linux kernel 2.6.25. It understood MAC (as everyone must), 845 * but also these: */ 846 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 847 virtio_add_feature(&features, VIRTIO_NET_F_CSUM); 848 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4); 849 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6); 850 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN); 851 852 return features; 853 } 854 855 static void virtio_net_apply_guest_offloads(VirtIONet *n) 856 { 857 qemu_set_offload(qemu_get_queue(n->nic)->peer, 858 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)), 859 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)), 860 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)), 861 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)), 862 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO))); 863 } 864 865 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features) 866 { 867 static const uint64_t guest_offloads_mask = 868 (1ULL << VIRTIO_NET_F_GUEST_CSUM) | 869 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | 870 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | 871 (1ULL << VIRTIO_NET_F_GUEST_ECN) | 872 (1ULL << VIRTIO_NET_F_GUEST_UFO); 873 874 return guest_offloads_mask & features; 875 } 876 877 uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n) 878 { 879 VirtIODevice *vdev = VIRTIO_DEVICE(n); 880 return virtio_net_guest_offloads_by_features(vdev->guest_features); 881 } 882 883 typedef struct { 884 VirtIONet *n; 885 DeviceState *dev; 886 } FailoverDevice; 887 888 /** 889 * Set the failover primary device 890 * 891 * @opaque: FailoverId to setup 892 * @opts: opts for device we are handling 893 * @errp: returns an error if this function fails 894 */ 895 static int failover_set_primary(DeviceState *dev, void *opaque) 896 { 897 FailoverDevice *fdev = opaque; 898 PCIDevice *pci_dev = (PCIDevice *) 899 object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE); 900 901 if (!pci_dev) { 902 return 0; 903 } 904 905 if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) { 906 fdev->dev = dev; 907 return 1; 908 } 909 910 return 0; 911 } 912 913 /** 914 * Find the primary device for this failover virtio-net 915 * 916 * @n: VirtIONet device 917 * @errp: returns an error if this function fails 918 */ 919 static DeviceState *failover_find_primary_device(VirtIONet *n) 920 { 921 FailoverDevice fdev = { 922 .n = n, 923 }; 924 925 qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL, 926 NULL, NULL, &fdev); 927 return fdev.dev; 928 } 929 930 static void failover_add_primary(VirtIONet *n, Error **errp) 931 { 932 Error *err = NULL; 933 DeviceState *dev = failover_find_primary_device(n); 934 935 if (dev) { 936 return; 937 } 938 939 if (!n->primary_opts) { 940 error_setg(errp, "Primary device not found"); 941 error_append_hint(errp, "Virtio-net failover will not work. Make " 942 "sure primary device has parameter" 943 " failover_pair_id=%s\n", n->netclient_name); 944 return; 945 } 946 947 dev = qdev_device_add_from_qdict(n->primary_opts, 948 n->primary_opts_from_json, 949 &err); 950 if (err) { 951 qobject_unref(n->primary_opts); 952 n->primary_opts = NULL; 953 } else { 954 object_unref(OBJECT(dev)); 955 } 956 error_propagate(errp, err); 957 } 958 959 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) 960 { 961 VirtIONet *n = VIRTIO_NET(vdev); 962 Error *err = NULL; 963 int i; 964 965 if (n->mtu_bypass_backend && 966 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) { 967 features &= ~(1ULL << VIRTIO_NET_F_MTU); 968 } 969 970 virtio_net_set_multiqueue(n, 971 virtio_has_feature(features, VIRTIO_NET_F_RSS) || 972 virtio_has_feature(features, VIRTIO_NET_F_MQ)); 973 974 virtio_net_set_mrg_rx_bufs(n, 975 virtio_has_feature(features, 976 VIRTIO_NET_F_MRG_RXBUF), 977 virtio_has_feature(features, 978 VIRTIO_F_VERSION_1), 979 virtio_has_feature(features, 980 VIRTIO_NET_F_HASH_REPORT)); 981 982 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 983 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4); 984 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 985 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6); 986 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS); 987 988 if (n->has_vnet_hdr) { 989 n->curr_guest_offloads = 990 virtio_net_guest_offloads_by_features(features); 991 virtio_net_apply_guest_offloads(n); 992 } 993 994 for (i = 0; i < n->max_queue_pairs; i++) { 995 NetClientState *nc = qemu_get_subqueue(n->nic, i); 996 997 if (!get_vhost_net(nc->peer)) { 998 continue; 999 } 1000 vhost_net_ack_features(get_vhost_net(nc->peer), features); 1001 1002 /* 1003 * keep acked_features in NetVhostUserState up-to-date so it 1004 * can't miss any features configured by guest virtio driver. 1005 */ 1006 vhost_net_save_acked_features(nc->peer); 1007 } 1008 1009 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { 1010 memset(n->vlans, 0, MAX_VLAN >> 3); 1011 } else { 1012 memset(n->vlans, 0xff, MAX_VLAN >> 3); 1013 } 1014 1015 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) { 1016 qapi_event_send_failover_negotiated(n->netclient_name); 1017 qatomic_set(&n->failover_primary_hidden, false); 1018 failover_add_primary(n, &err); 1019 if (err) { 1020 if (!qtest_enabled()) { 1021 warn_report_err(err); 1022 } else { 1023 error_free(err); 1024 } 1025 } 1026 } 1027 } 1028 1029 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd, 1030 struct iovec *iov, unsigned int iov_cnt) 1031 { 1032 uint8_t on; 1033 size_t s; 1034 NetClientState *nc = qemu_get_queue(n->nic); 1035 1036 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on)); 1037 if (s != sizeof(on)) { 1038 return VIRTIO_NET_ERR; 1039 } 1040 1041 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) { 1042 n->promisc = on; 1043 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) { 1044 n->allmulti = on; 1045 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) { 1046 n->alluni = on; 1047 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) { 1048 n->nomulti = on; 1049 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) { 1050 n->nouni = on; 1051 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) { 1052 n->nobcast = on; 1053 } else { 1054 return VIRTIO_NET_ERR; 1055 } 1056 1057 rxfilter_notify(nc); 1058 1059 return VIRTIO_NET_OK; 1060 } 1061 1062 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd, 1063 struct iovec *iov, unsigned int iov_cnt) 1064 { 1065 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1066 uint64_t offloads; 1067 size_t s; 1068 1069 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 1070 return VIRTIO_NET_ERR; 1071 } 1072 1073 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads)); 1074 if (s != sizeof(offloads)) { 1075 return VIRTIO_NET_ERR; 1076 } 1077 1078 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) { 1079 uint64_t supported_offloads; 1080 1081 offloads = virtio_ldq_p(vdev, &offloads); 1082 1083 if (!n->has_vnet_hdr) { 1084 return VIRTIO_NET_ERR; 1085 } 1086 1087 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1088 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4); 1089 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1090 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6); 1091 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT); 1092 1093 supported_offloads = virtio_net_supported_guest_offloads(n); 1094 if (offloads & ~supported_offloads) { 1095 return VIRTIO_NET_ERR; 1096 } 1097 1098 n->curr_guest_offloads = offloads; 1099 virtio_net_apply_guest_offloads(n); 1100 1101 return VIRTIO_NET_OK; 1102 } else { 1103 return VIRTIO_NET_ERR; 1104 } 1105 } 1106 1107 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd, 1108 struct iovec *iov, unsigned int iov_cnt) 1109 { 1110 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1111 struct virtio_net_ctrl_mac mac_data; 1112 size_t s; 1113 NetClientState *nc = qemu_get_queue(n->nic); 1114 1115 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) { 1116 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) { 1117 return VIRTIO_NET_ERR; 1118 } 1119 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac)); 1120 assert(s == sizeof(n->mac)); 1121 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 1122 rxfilter_notify(nc); 1123 1124 return VIRTIO_NET_OK; 1125 } 1126 1127 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) { 1128 return VIRTIO_NET_ERR; 1129 } 1130 1131 int in_use = 0; 1132 int first_multi = 0; 1133 uint8_t uni_overflow = 0; 1134 uint8_t multi_overflow = 0; 1135 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 1136 1137 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1138 sizeof(mac_data.entries)); 1139 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1140 if (s != sizeof(mac_data.entries)) { 1141 goto error; 1142 } 1143 iov_discard_front(&iov, &iov_cnt, s); 1144 1145 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) { 1146 goto error; 1147 } 1148 1149 if (mac_data.entries <= MAC_TABLE_ENTRIES) { 1150 s = iov_to_buf(iov, iov_cnt, 0, macs, 1151 mac_data.entries * ETH_ALEN); 1152 if (s != mac_data.entries * ETH_ALEN) { 1153 goto error; 1154 } 1155 in_use += mac_data.entries; 1156 } else { 1157 uni_overflow = 1; 1158 } 1159 1160 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN); 1161 1162 first_multi = in_use; 1163 1164 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1165 sizeof(mac_data.entries)); 1166 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1167 if (s != sizeof(mac_data.entries)) { 1168 goto error; 1169 } 1170 1171 iov_discard_front(&iov, &iov_cnt, s); 1172 1173 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) { 1174 goto error; 1175 } 1176 1177 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) { 1178 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN], 1179 mac_data.entries * ETH_ALEN); 1180 if (s != mac_data.entries * ETH_ALEN) { 1181 goto error; 1182 } 1183 in_use += mac_data.entries; 1184 } else { 1185 multi_overflow = 1; 1186 } 1187 1188 n->mac_table.in_use = in_use; 1189 n->mac_table.first_multi = first_multi; 1190 n->mac_table.uni_overflow = uni_overflow; 1191 n->mac_table.multi_overflow = multi_overflow; 1192 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN); 1193 g_free(macs); 1194 rxfilter_notify(nc); 1195 1196 return VIRTIO_NET_OK; 1197 1198 error: 1199 g_free(macs); 1200 return VIRTIO_NET_ERR; 1201 } 1202 1203 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd, 1204 struct iovec *iov, unsigned int iov_cnt) 1205 { 1206 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1207 uint16_t vid; 1208 size_t s; 1209 NetClientState *nc = qemu_get_queue(n->nic); 1210 1211 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid)); 1212 vid = virtio_lduw_p(vdev, &vid); 1213 if (s != sizeof(vid)) { 1214 return VIRTIO_NET_ERR; 1215 } 1216 1217 if (vid >= MAX_VLAN) 1218 return VIRTIO_NET_ERR; 1219 1220 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD) 1221 n->vlans[vid >> 5] |= (1U << (vid & 0x1f)); 1222 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL) 1223 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f)); 1224 else 1225 return VIRTIO_NET_ERR; 1226 1227 rxfilter_notify(nc); 1228 1229 return VIRTIO_NET_OK; 1230 } 1231 1232 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, 1233 struct iovec *iov, unsigned int iov_cnt) 1234 { 1235 trace_virtio_net_handle_announce(n->announce_timer.round); 1236 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK && 1237 n->status & VIRTIO_NET_S_ANNOUNCE) { 1238 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 1239 if (n->announce_timer.round) { 1240 qemu_announce_timer_step(&n->announce_timer); 1241 } 1242 return VIRTIO_NET_OK; 1243 } else { 1244 return VIRTIO_NET_ERR; 1245 } 1246 } 1247 1248 static void virtio_net_detach_epbf_rss(VirtIONet *n); 1249 1250 static void virtio_net_disable_rss(VirtIONet *n) 1251 { 1252 if (n->rss_data.enabled) { 1253 trace_virtio_net_rss_disable(); 1254 } 1255 n->rss_data.enabled = false; 1256 1257 virtio_net_detach_epbf_rss(n); 1258 } 1259 1260 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd) 1261 { 1262 NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0); 1263 if (nc == NULL || nc->info->set_steering_ebpf == NULL) { 1264 return false; 1265 } 1266 1267 return nc->info->set_steering_ebpf(nc, prog_fd); 1268 } 1269 1270 static void rss_data_to_rss_config(struct VirtioNetRssData *data, 1271 struct EBPFRSSConfig *config) 1272 { 1273 config->redirect = data->redirect; 1274 config->populate_hash = data->populate_hash; 1275 config->hash_types = data->hash_types; 1276 config->indirections_len = data->indirections_len; 1277 config->default_queue = data->default_queue; 1278 } 1279 1280 static bool virtio_net_attach_epbf_rss(VirtIONet *n) 1281 { 1282 struct EBPFRSSConfig config = {}; 1283 1284 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 1285 return false; 1286 } 1287 1288 rss_data_to_rss_config(&n->rss_data, &config); 1289 1290 if (!ebpf_rss_set_all(&n->ebpf_rss, &config, 1291 n->rss_data.indirections_table, n->rss_data.key)) { 1292 return false; 1293 } 1294 1295 if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) { 1296 return false; 1297 } 1298 1299 return true; 1300 } 1301 1302 static void virtio_net_detach_epbf_rss(VirtIONet *n) 1303 { 1304 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1305 } 1306 1307 static bool virtio_net_load_ebpf(VirtIONet *n) 1308 { 1309 if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) { 1310 /* backend does't support steering ebpf */ 1311 return false; 1312 } 1313 1314 return ebpf_rss_load(&n->ebpf_rss); 1315 } 1316 1317 static void virtio_net_unload_ebpf(VirtIONet *n) 1318 { 1319 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1320 ebpf_rss_unload(&n->ebpf_rss); 1321 } 1322 1323 static uint16_t virtio_net_handle_rss(VirtIONet *n, 1324 struct iovec *iov, 1325 unsigned int iov_cnt, 1326 bool do_rss) 1327 { 1328 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1329 struct virtio_net_rss_config cfg; 1330 size_t s, offset = 0, size_get; 1331 uint16_t queue_pairs, i; 1332 struct { 1333 uint16_t us; 1334 uint8_t b; 1335 } QEMU_PACKED temp; 1336 const char *err_msg = ""; 1337 uint32_t err_value = 0; 1338 1339 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) { 1340 err_msg = "RSS is not negotiated"; 1341 goto error; 1342 } 1343 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) { 1344 err_msg = "Hash report is not negotiated"; 1345 goto error; 1346 } 1347 size_get = offsetof(struct virtio_net_rss_config, indirection_table); 1348 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get); 1349 if (s != size_get) { 1350 err_msg = "Short command buffer"; 1351 err_value = (uint32_t)s; 1352 goto error; 1353 } 1354 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types); 1355 n->rss_data.indirections_len = 1356 virtio_lduw_p(vdev, &cfg.indirection_table_mask); 1357 n->rss_data.indirections_len++; 1358 if (!do_rss) { 1359 n->rss_data.indirections_len = 1; 1360 } 1361 if (!is_power_of_2(n->rss_data.indirections_len)) { 1362 err_msg = "Invalid size of indirection table"; 1363 err_value = n->rss_data.indirections_len; 1364 goto error; 1365 } 1366 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) { 1367 err_msg = "Too large indirection table"; 1368 err_value = n->rss_data.indirections_len; 1369 goto error; 1370 } 1371 n->rss_data.default_queue = do_rss ? 1372 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0; 1373 if (n->rss_data.default_queue >= n->max_queue_pairs) { 1374 err_msg = "Invalid default queue"; 1375 err_value = n->rss_data.default_queue; 1376 goto error; 1377 } 1378 offset += size_get; 1379 size_get = sizeof(uint16_t) * n->rss_data.indirections_len; 1380 g_free(n->rss_data.indirections_table); 1381 n->rss_data.indirections_table = g_malloc(size_get); 1382 if (!n->rss_data.indirections_table) { 1383 err_msg = "Can't allocate indirections table"; 1384 err_value = n->rss_data.indirections_len; 1385 goto error; 1386 } 1387 s = iov_to_buf(iov, iov_cnt, offset, 1388 n->rss_data.indirections_table, size_get); 1389 if (s != size_get) { 1390 err_msg = "Short indirection table buffer"; 1391 err_value = (uint32_t)s; 1392 goto error; 1393 } 1394 for (i = 0; i < n->rss_data.indirections_len; ++i) { 1395 uint16_t val = n->rss_data.indirections_table[i]; 1396 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val); 1397 } 1398 offset += size_get; 1399 size_get = sizeof(temp); 1400 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get); 1401 if (s != size_get) { 1402 err_msg = "Can't get queue_pairs"; 1403 err_value = (uint32_t)s; 1404 goto error; 1405 } 1406 queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs; 1407 if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) { 1408 err_msg = "Invalid number of queue_pairs"; 1409 err_value = queue_pairs; 1410 goto error; 1411 } 1412 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) { 1413 err_msg = "Invalid key size"; 1414 err_value = temp.b; 1415 goto error; 1416 } 1417 if (!temp.b && n->rss_data.hash_types) { 1418 err_msg = "No key provided"; 1419 err_value = 0; 1420 goto error; 1421 } 1422 if (!temp.b && !n->rss_data.hash_types) { 1423 virtio_net_disable_rss(n); 1424 return queue_pairs; 1425 } 1426 offset += size_get; 1427 size_get = temp.b; 1428 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get); 1429 if (s != size_get) { 1430 err_msg = "Can get key buffer"; 1431 err_value = (uint32_t)s; 1432 goto error; 1433 } 1434 n->rss_data.enabled = true; 1435 1436 if (!n->rss_data.populate_hash) { 1437 if (!virtio_net_attach_epbf_rss(n)) { 1438 /* EBPF must be loaded for vhost */ 1439 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 1440 warn_report("Can't load eBPF RSS for vhost"); 1441 goto error; 1442 } 1443 /* fallback to software RSS */ 1444 warn_report("Can't load eBPF RSS - fallback to software RSS"); 1445 n->rss_data.enabled_software_rss = true; 1446 } 1447 } else { 1448 /* use software RSS for hash populating */ 1449 /* and detach eBPF if was loaded before */ 1450 virtio_net_detach_epbf_rss(n); 1451 n->rss_data.enabled_software_rss = true; 1452 } 1453 1454 trace_virtio_net_rss_enable(n->rss_data.hash_types, 1455 n->rss_data.indirections_len, 1456 temp.b); 1457 return queue_pairs; 1458 error: 1459 trace_virtio_net_rss_error(err_msg, err_value); 1460 virtio_net_disable_rss(n); 1461 return 0; 1462 } 1463 1464 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, 1465 struct iovec *iov, unsigned int iov_cnt) 1466 { 1467 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1468 uint16_t queue_pairs; 1469 NetClientState *nc = qemu_get_queue(n->nic); 1470 1471 virtio_net_disable_rss(n); 1472 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { 1473 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false); 1474 return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR; 1475 } 1476 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) { 1477 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true); 1478 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { 1479 struct virtio_net_ctrl_mq mq; 1480 size_t s; 1481 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) { 1482 return VIRTIO_NET_ERR; 1483 } 1484 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq)); 1485 if (s != sizeof(mq)) { 1486 return VIRTIO_NET_ERR; 1487 } 1488 queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs); 1489 1490 } else { 1491 return VIRTIO_NET_ERR; 1492 } 1493 1494 if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 1495 queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 1496 queue_pairs > n->max_queue_pairs || 1497 !n->multiqueue) { 1498 return VIRTIO_NET_ERR; 1499 } 1500 1501 n->curr_queue_pairs = queue_pairs; 1502 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 1503 /* 1504 * Avoid updating the backend for a vdpa device: We're only interested 1505 * in updating the device model queues. 1506 */ 1507 return VIRTIO_NET_OK; 1508 } 1509 /* stop the backend before changing the number of queue_pairs to avoid handling a 1510 * disabled queue */ 1511 virtio_net_set_status(vdev, vdev->status); 1512 virtio_net_set_queue_pairs(n); 1513 1514 return VIRTIO_NET_OK; 1515 } 1516 1517 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, 1518 const struct iovec *in_sg, unsigned in_num, 1519 const struct iovec *out_sg, 1520 unsigned out_num) 1521 { 1522 VirtIONet *n = VIRTIO_NET(vdev); 1523 struct virtio_net_ctrl_hdr ctrl; 1524 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1525 size_t s; 1526 struct iovec *iov, *iov2; 1527 1528 if (iov_size(in_sg, in_num) < sizeof(status) || 1529 iov_size(out_sg, out_num) < sizeof(ctrl)) { 1530 virtio_error(vdev, "virtio-net ctrl missing headers"); 1531 return 0; 1532 } 1533 1534 iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num); 1535 s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl)); 1536 iov_discard_front(&iov, &out_num, sizeof(ctrl)); 1537 if (s != sizeof(ctrl)) { 1538 status = VIRTIO_NET_ERR; 1539 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { 1540 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num); 1541 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { 1542 status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num); 1543 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { 1544 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num); 1545 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { 1546 status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num); 1547 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { 1548 status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num); 1549 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { 1550 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num); 1551 } 1552 1553 s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status)); 1554 assert(s == sizeof(status)); 1555 1556 g_free(iov2); 1557 return sizeof(status); 1558 } 1559 1560 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) 1561 { 1562 VirtQueueElement *elem; 1563 1564 for (;;) { 1565 size_t written; 1566 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 1567 if (!elem) { 1568 break; 1569 } 1570 1571 written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num, 1572 elem->out_sg, elem->out_num); 1573 if (written > 0) { 1574 virtqueue_push(vq, elem, written); 1575 virtio_notify(vdev, vq); 1576 g_free(elem); 1577 } else { 1578 virtqueue_detach_element(vq, elem, 0); 1579 g_free(elem); 1580 break; 1581 } 1582 } 1583 } 1584 1585 /* RX */ 1586 1587 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) 1588 { 1589 VirtIONet *n = VIRTIO_NET(vdev); 1590 int queue_index = vq2q(virtio_get_queue_index(vq)); 1591 1592 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index)); 1593 } 1594 1595 static bool virtio_net_can_receive(NetClientState *nc) 1596 { 1597 VirtIONet *n = qemu_get_nic_opaque(nc); 1598 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1599 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1600 1601 if (!vdev->vm_running) { 1602 return false; 1603 } 1604 1605 if (nc->queue_index >= n->curr_queue_pairs) { 1606 return false; 1607 } 1608 1609 if (!virtio_queue_ready(q->rx_vq) || 1610 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 1611 return false; 1612 } 1613 1614 return true; 1615 } 1616 1617 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) 1618 { 1619 VirtIONet *n = q->n; 1620 if (virtio_queue_empty(q->rx_vq) || 1621 (n->mergeable_rx_bufs && 1622 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1623 virtio_queue_set_notification(q->rx_vq, 1); 1624 1625 /* To avoid a race condition where the guest has made some buffers 1626 * available after the above check but before notification was 1627 * enabled, check for available buffers again. 1628 */ 1629 if (virtio_queue_empty(q->rx_vq) || 1630 (n->mergeable_rx_bufs && 1631 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1632 return 0; 1633 } 1634 } 1635 1636 virtio_queue_set_notification(q->rx_vq, 0); 1637 return 1; 1638 } 1639 1640 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr) 1641 { 1642 virtio_tswap16s(vdev, &hdr->hdr_len); 1643 virtio_tswap16s(vdev, &hdr->gso_size); 1644 virtio_tswap16s(vdev, &hdr->csum_start); 1645 virtio_tswap16s(vdev, &hdr->csum_offset); 1646 } 1647 1648 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so 1649 * it never finds out that the packets don't have valid checksums. This 1650 * causes dhclient to get upset. Fedora's carried a patch for ages to 1651 * fix this with Xen but it hasn't appeared in an upstream release of 1652 * dhclient yet. 1653 * 1654 * To avoid breaking existing guests, we catch udp packets and add 1655 * checksums. This is terrible but it's better than hacking the guest 1656 * kernels. 1657 * 1658 * N.B. if we introduce a zero-copy API, this operation is no longer free so 1659 * we should provide a mechanism to disable it to avoid polluting the host 1660 * cache. 1661 */ 1662 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr, 1663 uint8_t *buf, size_t size) 1664 { 1665 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */ 1666 (size > 27 && size < 1500) && /* normal sized MTU */ 1667 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */ 1668 (buf[23] == 17) && /* ip.protocol == UDP */ 1669 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */ 1670 net_checksum_calculate(buf, size, CSUM_UDP); 1671 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; 1672 } 1673 } 1674 1675 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt, 1676 const void *buf, size_t size) 1677 { 1678 if (n->has_vnet_hdr) { 1679 /* FIXME this cast is evil */ 1680 void *wbuf = (void *)buf; 1681 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len, 1682 size - n->host_hdr_len); 1683 1684 if (n->needs_vnet_hdr_swap) { 1685 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf); 1686 } 1687 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr)); 1688 } else { 1689 struct virtio_net_hdr hdr = { 1690 .flags = 0, 1691 .gso_type = VIRTIO_NET_HDR_GSO_NONE 1692 }; 1693 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr); 1694 } 1695 } 1696 1697 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) 1698 { 1699 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; 1700 static const uint8_t vlan[] = {0x81, 0x00}; 1701 uint8_t *ptr = (uint8_t *)buf; 1702 int i; 1703 1704 if (n->promisc) 1705 return 1; 1706 1707 ptr += n->host_hdr_len; 1708 1709 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) { 1710 int vid = lduw_be_p(ptr + 14) & 0xfff; 1711 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f)))) 1712 return 0; 1713 } 1714 1715 if (ptr[0] & 1) { // multicast 1716 if (!memcmp(ptr, bcast, sizeof(bcast))) { 1717 return !n->nobcast; 1718 } else if (n->nomulti) { 1719 return 0; 1720 } else if (n->allmulti || n->mac_table.multi_overflow) { 1721 return 1; 1722 } 1723 1724 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 1725 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1726 return 1; 1727 } 1728 } 1729 } else { // unicast 1730 if (n->nouni) { 1731 return 0; 1732 } else if (n->alluni || n->mac_table.uni_overflow) { 1733 return 1; 1734 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) { 1735 return 1; 1736 } 1737 1738 for (i = 0; i < n->mac_table.first_multi; i++) { 1739 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1740 return 1; 1741 } 1742 } 1743 } 1744 1745 return 0; 1746 } 1747 1748 static uint8_t virtio_net_get_hash_type(bool hasip4, 1749 bool hasip6, 1750 EthL4HdrProto l4hdr_proto, 1751 uint32_t types) 1752 { 1753 if (hasip4) { 1754 switch (l4hdr_proto) { 1755 case ETH_L4_HDR_PROTO_TCP: 1756 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { 1757 return NetPktRssIpV4Tcp; 1758 } 1759 break; 1760 1761 case ETH_L4_HDR_PROTO_UDP: 1762 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { 1763 return NetPktRssIpV4Udp; 1764 } 1765 break; 1766 1767 default: 1768 break; 1769 } 1770 1771 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 1772 return NetPktRssIpV4; 1773 } 1774 } else if (hasip6) { 1775 switch (l4hdr_proto) { 1776 case ETH_L4_HDR_PROTO_TCP: 1777 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) { 1778 return NetPktRssIpV6TcpEx; 1779 } 1780 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { 1781 return NetPktRssIpV6Tcp; 1782 } 1783 break; 1784 1785 case ETH_L4_HDR_PROTO_UDP: 1786 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) { 1787 return NetPktRssIpV6UdpEx; 1788 } 1789 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { 1790 return NetPktRssIpV6Udp; 1791 } 1792 break; 1793 1794 default: 1795 break; 1796 } 1797 1798 if (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) { 1799 return NetPktRssIpV6Ex; 1800 } 1801 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 1802 return NetPktRssIpV6; 1803 } 1804 } 1805 return 0xff; 1806 } 1807 1808 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report, 1809 uint32_t hash) 1810 { 1811 struct virtio_net_hdr_v1_hash *hdr = (void *)buf; 1812 hdr->hash_value = hash; 1813 hdr->hash_report = report; 1814 } 1815 1816 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf, 1817 size_t size) 1818 { 1819 VirtIONet *n = qemu_get_nic_opaque(nc); 1820 unsigned int index = nc->queue_index, new_index = index; 1821 struct NetRxPkt *pkt = n->rx_pkt; 1822 uint8_t net_hash_type; 1823 uint32_t hash; 1824 bool hasip4, hasip6; 1825 EthL4HdrProto l4hdr_proto; 1826 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = { 1827 VIRTIO_NET_HASH_REPORT_IPv4, 1828 VIRTIO_NET_HASH_REPORT_TCPv4, 1829 VIRTIO_NET_HASH_REPORT_TCPv6, 1830 VIRTIO_NET_HASH_REPORT_IPv6, 1831 VIRTIO_NET_HASH_REPORT_IPv6_EX, 1832 VIRTIO_NET_HASH_REPORT_TCPv6_EX, 1833 VIRTIO_NET_HASH_REPORT_UDPv4, 1834 VIRTIO_NET_HASH_REPORT_UDPv6, 1835 VIRTIO_NET_HASH_REPORT_UDPv6_EX 1836 }; 1837 struct iovec iov = { 1838 .iov_base = (void *)buf, 1839 .iov_len = size 1840 }; 1841 1842 net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len); 1843 net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); 1844 net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto, 1845 n->rss_data.hash_types); 1846 if (net_hash_type > NetPktRssIpV6UdpEx) { 1847 if (n->rss_data.populate_hash) { 1848 virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0); 1849 } 1850 return n->rss_data.redirect ? n->rss_data.default_queue : -1; 1851 } 1852 1853 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key); 1854 1855 if (n->rss_data.populate_hash) { 1856 virtio_set_packet_hash(buf, reports[net_hash_type], hash); 1857 } 1858 1859 if (n->rss_data.redirect) { 1860 new_index = hash & (n->rss_data.indirections_len - 1); 1861 new_index = n->rss_data.indirections_table[new_index]; 1862 } 1863 1864 return (index == new_index) ? -1 : new_index; 1865 } 1866 1867 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, 1868 size_t size, bool no_rss) 1869 { 1870 VirtIONet *n = qemu_get_nic_opaque(nc); 1871 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1872 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1873 VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE]; 1874 size_t lens[VIRTQUEUE_MAX_SIZE]; 1875 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; 1876 struct virtio_net_hdr_mrg_rxbuf mhdr; 1877 unsigned mhdr_cnt = 0; 1878 size_t offset, i, guest_offset, j; 1879 ssize_t err; 1880 1881 if (!virtio_net_can_receive(nc)) { 1882 return -1; 1883 } 1884 1885 if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) { 1886 int index = virtio_net_process_rss(nc, buf, size); 1887 if (index >= 0) { 1888 NetClientState *nc2 = qemu_get_subqueue(n->nic, index); 1889 return virtio_net_receive_rcu(nc2, buf, size, true); 1890 } 1891 } 1892 1893 /* hdr_len refers to the header we supply to the guest */ 1894 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) { 1895 return 0; 1896 } 1897 1898 if (!receive_filter(n, buf, size)) 1899 return size; 1900 1901 offset = i = 0; 1902 1903 while (offset < size) { 1904 VirtQueueElement *elem; 1905 int len, total; 1906 const struct iovec *sg; 1907 1908 total = 0; 1909 1910 if (i == VIRTQUEUE_MAX_SIZE) { 1911 virtio_error(vdev, "virtio-net unexpected long buffer chain"); 1912 err = size; 1913 goto err; 1914 } 1915 1916 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement)); 1917 if (!elem) { 1918 if (i) { 1919 virtio_error(vdev, "virtio-net unexpected empty queue: " 1920 "i %zd mergeable %d offset %zd, size %zd, " 1921 "guest hdr len %zd, host hdr len %zd " 1922 "guest features 0x%" PRIx64, 1923 i, n->mergeable_rx_bufs, offset, size, 1924 n->guest_hdr_len, n->host_hdr_len, 1925 vdev->guest_features); 1926 } 1927 err = -1; 1928 goto err; 1929 } 1930 1931 if (elem->in_num < 1) { 1932 virtio_error(vdev, 1933 "virtio-net receive queue contains no in buffers"); 1934 virtqueue_detach_element(q->rx_vq, elem, 0); 1935 g_free(elem); 1936 err = -1; 1937 goto err; 1938 } 1939 1940 sg = elem->in_sg; 1941 if (i == 0) { 1942 assert(offset == 0); 1943 if (n->mergeable_rx_bufs) { 1944 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg), 1945 sg, elem->in_num, 1946 offsetof(typeof(mhdr), num_buffers), 1947 sizeof(mhdr.num_buffers)); 1948 } 1949 1950 receive_header(n, sg, elem->in_num, buf, size); 1951 if (n->rss_data.populate_hash) { 1952 offset = sizeof(mhdr); 1953 iov_from_buf(sg, elem->in_num, offset, 1954 buf + offset, n->host_hdr_len - sizeof(mhdr)); 1955 } 1956 offset = n->host_hdr_len; 1957 total += n->guest_hdr_len; 1958 guest_offset = n->guest_hdr_len; 1959 } else { 1960 guest_offset = 0; 1961 } 1962 1963 /* copy in packet. ugh */ 1964 len = iov_from_buf(sg, elem->in_num, guest_offset, 1965 buf + offset, size - offset); 1966 total += len; 1967 offset += len; 1968 /* If buffers can't be merged, at this point we 1969 * must have consumed the complete packet. 1970 * Otherwise, drop it. */ 1971 if (!n->mergeable_rx_bufs && offset < size) { 1972 virtqueue_unpop(q->rx_vq, elem, total); 1973 g_free(elem); 1974 err = size; 1975 goto err; 1976 } 1977 1978 elems[i] = elem; 1979 lens[i] = total; 1980 i++; 1981 } 1982 1983 if (mhdr_cnt) { 1984 virtio_stw_p(vdev, &mhdr.num_buffers, i); 1985 iov_from_buf(mhdr_sg, mhdr_cnt, 1986 0, 1987 &mhdr.num_buffers, sizeof mhdr.num_buffers); 1988 } 1989 1990 for (j = 0; j < i; j++) { 1991 /* signal other side */ 1992 virtqueue_fill(q->rx_vq, elems[j], lens[j], j); 1993 g_free(elems[j]); 1994 } 1995 1996 virtqueue_flush(q->rx_vq, i); 1997 virtio_notify(vdev, q->rx_vq); 1998 1999 return size; 2000 2001 err: 2002 for (j = 0; j < i; j++) { 2003 virtqueue_detach_element(q->rx_vq, elems[j], lens[j]); 2004 g_free(elems[j]); 2005 } 2006 2007 return err; 2008 } 2009 2010 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf, 2011 size_t size) 2012 { 2013 RCU_READ_LOCK_GUARD(); 2014 2015 return virtio_net_receive_rcu(nc, buf, size, false); 2016 } 2017 2018 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain, 2019 const uint8_t *buf, 2020 VirtioNetRscUnit *unit) 2021 { 2022 uint16_t ip_hdrlen; 2023 struct ip_header *ip; 2024 2025 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len 2026 + sizeof(struct eth_header)); 2027 unit->ip = (void *)ip; 2028 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2; 2029 unit->ip_plen = &ip->ip_len; 2030 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen); 2031 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 2032 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen; 2033 } 2034 2035 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain, 2036 const uint8_t *buf, 2037 VirtioNetRscUnit *unit) 2038 { 2039 struct ip6_header *ip6; 2040 2041 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len 2042 + sizeof(struct eth_header)); 2043 unit->ip = ip6; 2044 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 2045 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) 2046 + sizeof(struct ip6_header)); 2047 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 2048 2049 /* There is a difference between payload lenght in ipv4 and v6, 2050 ip header is excluded in ipv6 */ 2051 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen; 2052 } 2053 2054 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain, 2055 VirtioNetRscSeg *seg) 2056 { 2057 int ret; 2058 struct virtio_net_hdr_v1 *h; 2059 2060 h = (struct virtio_net_hdr_v1 *)seg->buf; 2061 h->flags = 0; 2062 h->gso_type = VIRTIO_NET_HDR_GSO_NONE; 2063 2064 if (seg->is_coalesced) { 2065 h->rsc.segments = seg->packets; 2066 h->rsc.dup_acks = seg->dup_ack; 2067 h->flags = VIRTIO_NET_HDR_F_RSC_INFO; 2068 if (chain->proto == ETH_P_IP) { 2069 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2070 } else { 2071 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2072 } 2073 } 2074 2075 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); 2076 QTAILQ_REMOVE(&chain->buffers, seg, next); 2077 g_free(seg->buf); 2078 g_free(seg); 2079 2080 return ret; 2081 } 2082 2083 static void virtio_net_rsc_purge(void *opq) 2084 { 2085 VirtioNetRscSeg *seg, *rn; 2086 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq; 2087 2088 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) { 2089 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2090 chain->stat.purge_failed++; 2091 continue; 2092 } 2093 } 2094 2095 chain->stat.timer++; 2096 if (!QTAILQ_EMPTY(&chain->buffers)) { 2097 timer_mod(chain->drain_timer, 2098 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 2099 } 2100 } 2101 2102 static void virtio_net_rsc_cleanup(VirtIONet *n) 2103 { 2104 VirtioNetRscChain *chain, *rn_chain; 2105 VirtioNetRscSeg *seg, *rn_seg; 2106 2107 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) { 2108 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) { 2109 QTAILQ_REMOVE(&chain->buffers, seg, next); 2110 g_free(seg->buf); 2111 g_free(seg); 2112 } 2113 2114 timer_free(chain->drain_timer); 2115 QTAILQ_REMOVE(&n->rsc_chains, chain, next); 2116 g_free(chain); 2117 } 2118 } 2119 2120 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain, 2121 NetClientState *nc, 2122 const uint8_t *buf, size_t size) 2123 { 2124 uint16_t hdr_len; 2125 VirtioNetRscSeg *seg; 2126 2127 hdr_len = chain->n->guest_hdr_len; 2128 seg = g_new(VirtioNetRscSeg, 1); 2129 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header) 2130 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD); 2131 memcpy(seg->buf, buf, size); 2132 seg->size = size; 2133 seg->packets = 1; 2134 seg->dup_ack = 0; 2135 seg->is_coalesced = 0; 2136 seg->nc = nc; 2137 2138 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); 2139 chain->stat.cache++; 2140 2141 switch (chain->proto) { 2142 case ETH_P_IP: 2143 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); 2144 break; 2145 case ETH_P_IPV6: 2146 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); 2147 break; 2148 default: 2149 g_assert_not_reached(); 2150 } 2151 } 2152 2153 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain, 2154 VirtioNetRscSeg *seg, 2155 const uint8_t *buf, 2156 struct tcp_header *n_tcp, 2157 struct tcp_header *o_tcp) 2158 { 2159 uint32_t nack, oack; 2160 uint16_t nwin, owin; 2161 2162 nack = htonl(n_tcp->th_ack); 2163 nwin = htons(n_tcp->th_win); 2164 oack = htonl(o_tcp->th_ack); 2165 owin = htons(o_tcp->th_win); 2166 2167 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) { 2168 chain->stat.ack_out_of_win++; 2169 return RSC_FINAL; 2170 } else if (nack == oack) { 2171 /* duplicated ack or window probe */ 2172 if (nwin == owin) { 2173 /* duplicated ack, add dup ack count due to whql test up to 1 */ 2174 chain->stat.dup_ack++; 2175 return RSC_FINAL; 2176 } else { 2177 /* Coalesce window update */ 2178 o_tcp->th_win = n_tcp->th_win; 2179 chain->stat.win_update++; 2180 return RSC_COALESCE; 2181 } 2182 } else { 2183 /* pure ack, go to 'C', finalize*/ 2184 chain->stat.pure_ack++; 2185 return RSC_FINAL; 2186 } 2187 } 2188 2189 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain, 2190 VirtioNetRscSeg *seg, 2191 const uint8_t *buf, 2192 VirtioNetRscUnit *n_unit) 2193 { 2194 void *data; 2195 uint16_t o_ip_len; 2196 uint32_t nseq, oseq; 2197 VirtioNetRscUnit *o_unit; 2198 2199 o_unit = &seg->unit; 2200 o_ip_len = htons(*o_unit->ip_plen); 2201 nseq = htonl(n_unit->tcp->th_seq); 2202 oseq = htonl(o_unit->tcp->th_seq); 2203 2204 /* out of order or retransmitted. */ 2205 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) { 2206 chain->stat.data_out_of_win++; 2207 return RSC_FINAL; 2208 } 2209 2210 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen; 2211 if (nseq == oseq) { 2212 if ((o_unit->payload == 0) && n_unit->payload) { 2213 /* From no payload to payload, normal case, not a dup ack or etc */ 2214 chain->stat.data_after_pure_ack++; 2215 goto coalesce; 2216 } else { 2217 return virtio_net_rsc_handle_ack(chain, seg, buf, 2218 n_unit->tcp, o_unit->tcp); 2219 } 2220 } else if ((nseq - oseq) != o_unit->payload) { 2221 /* Not a consistent packet, out of order */ 2222 chain->stat.data_out_of_order++; 2223 return RSC_FINAL; 2224 } else { 2225 coalesce: 2226 if ((o_ip_len + n_unit->payload) > chain->max_payload) { 2227 chain->stat.over_size++; 2228 return RSC_FINAL; 2229 } 2230 2231 /* Here comes the right data, the payload length in v4/v6 is different, 2232 so use the field value to update and record the new data len */ 2233 o_unit->payload += n_unit->payload; /* update new data len */ 2234 2235 /* update field in ip header */ 2236 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload); 2237 2238 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced 2239 for windows guest, while this may change the behavior for linux 2240 guest (only if it uses RSC feature). */ 2241 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags; 2242 2243 o_unit->tcp->th_ack = n_unit->tcp->th_ack; 2244 o_unit->tcp->th_win = n_unit->tcp->th_win; 2245 2246 memmove(seg->buf + seg->size, data, n_unit->payload); 2247 seg->size += n_unit->payload; 2248 seg->packets++; 2249 chain->stat.coalesced++; 2250 return RSC_COALESCE; 2251 } 2252 } 2253 2254 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain, 2255 VirtioNetRscSeg *seg, 2256 const uint8_t *buf, size_t size, 2257 VirtioNetRscUnit *unit) 2258 { 2259 struct ip_header *ip1, *ip2; 2260 2261 ip1 = (struct ip_header *)(unit->ip); 2262 ip2 = (struct ip_header *)(seg->unit.ip); 2263 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst) 2264 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2265 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2266 chain->stat.no_match++; 2267 return RSC_NO_MATCH; 2268 } 2269 2270 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2271 } 2272 2273 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain, 2274 VirtioNetRscSeg *seg, 2275 const uint8_t *buf, size_t size, 2276 VirtioNetRscUnit *unit) 2277 { 2278 struct ip6_header *ip1, *ip2; 2279 2280 ip1 = (struct ip6_header *)(unit->ip); 2281 ip2 = (struct ip6_header *)(seg->unit.ip); 2282 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address)) 2283 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address)) 2284 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2285 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2286 chain->stat.no_match++; 2287 return RSC_NO_MATCH; 2288 } 2289 2290 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2291 } 2292 2293 /* Packets with 'SYN' should bypass, other flag should be sent after drain 2294 * to prevent out of order */ 2295 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain, 2296 struct tcp_header *tcp) 2297 { 2298 uint16_t tcp_hdr; 2299 uint16_t tcp_flag; 2300 2301 tcp_flag = htons(tcp->th_offset_flags); 2302 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10; 2303 tcp_flag &= VIRTIO_NET_TCP_FLAG; 2304 if (tcp_flag & TH_SYN) { 2305 chain->stat.tcp_syn++; 2306 return RSC_BYPASS; 2307 } 2308 2309 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) { 2310 chain->stat.tcp_ctrl_drain++; 2311 return RSC_FINAL; 2312 } 2313 2314 if (tcp_hdr > sizeof(struct tcp_header)) { 2315 chain->stat.tcp_all_opt++; 2316 return RSC_FINAL; 2317 } 2318 2319 return RSC_CANDIDATE; 2320 } 2321 2322 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain, 2323 NetClientState *nc, 2324 const uint8_t *buf, size_t size, 2325 VirtioNetRscUnit *unit) 2326 { 2327 int ret; 2328 VirtioNetRscSeg *seg, *nseg; 2329 2330 if (QTAILQ_EMPTY(&chain->buffers)) { 2331 chain->stat.empty_cache++; 2332 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2333 timer_mod(chain->drain_timer, 2334 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 2335 return size; 2336 } 2337 2338 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2339 if (chain->proto == ETH_P_IP) { 2340 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit); 2341 } else { 2342 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit); 2343 } 2344 2345 if (ret == RSC_FINAL) { 2346 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2347 /* Send failed */ 2348 chain->stat.final_failed++; 2349 return 0; 2350 } 2351 2352 /* Send current packet */ 2353 return virtio_net_do_receive(nc, buf, size); 2354 } else if (ret == RSC_NO_MATCH) { 2355 continue; 2356 } else { 2357 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ 2358 seg->is_coalesced = 1; 2359 return size; 2360 } 2361 } 2362 2363 chain->stat.no_match_cache++; 2364 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2365 return size; 2366 } 2367 2368 /* Drain a connection data, this is to avoid out of order segments */ 2369 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain, 2370 NetClientState *nc, 2371 const uint8_t *buf, size_t size, 2372 uint16_t ip_start, uint16_t ip_size, 2373 uint16_t tcp_port) 2374 { 2375 VirtioNetRscSeg *seg, *nseg; 2376 uint32_t ppair1, ppair2; 2377 2378 ppair1 = *(uint32_t *)(buf + tcp_port); 2379 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2380 ppair2 = *(uint32_t *)(seg->buf + tcp_port); 2381 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size) 2382 || (ppair1 != ppair2)) { 2383 continue; 2384 } 2385 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2386 chain->stat.drain_failed++; 2387 } 2388 2389 break; 2390 } 2391 2392 return virtio_net_do_receive(nc, buf, size); 2393 } 2394 2395 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain, 2396 struct ip_header *ip, 2397 const uint8_t *buf, size_t size) 2398 { 2399 uint16_t ip_len; 2400 2401 /* Not an ipv4 packet */ 2402 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) { 2403 chain->stat.ip_option++; 2404 return RSC_BYPASS; 2405 } 2406 2407 /* Don't handle packets with ip option */ 2408 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) { 2409 chain->stat.ip_option++; 2410 return RSC_BYPASS; 2411 } 2412 2413 if (ip->ip_p != IPPROTO_TCP) { 2414 chain->stat.bypass_not_tcp++; 2415 return RSC_BYPASS; 2416 } 2417 2418 /* Don't handle packets with ip fragment */ 2419 if (!(htons(ip->ip_off) & IP_DF)) { 2420 chain->stat.ip_frag++; 2421 return RSC_BYPASS; 2422 } 2423 2424 /* Don't handle packets with ecn flag */ 2425 if (IPTOS_ECN(ip->ip_tos)) { 2426 chain->stat.ip_ecn++; 2427 return RSC_BYPASS; 2428 } 2429 2430 ip_len = htons(ip->ip_len); 2431 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header)) 2432 || ip_len > (size - chain->n->guest_hdr_len - 2433 sizeof(struct eth_header))) { 2434 chain->stat.ip_hacked++; 2435 return RSC_BYPASS; 2436 } 2437 2438 return RSC_CANDIDATE; 2439 } 2440 2441 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain, 2442 NetClientState *nc, 2443 const uint8_t *buf, size_t size) 2444 { 2445 int32_t ret; 2446 uint16_t hdr_len; 2447 VirtioNetRscUnit unit; 2448 2449 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2450 2451 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header) 2452 + sizeof(struct tcp_header))) { 2453 chain->stat.bypass_not_tcp++; 2454 return virtio_net_do_receive(nc, buf, size); 2455 } 2456 2457 virtio_net_rsc_extract_unit4(chain, buf, &unit); 2458 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size) 2459 != RSC_CANDIDATE) { 2460 return virtio_net_do_receive(nc, buf, size); 2461 } 2462 2463 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2464 if (ret == RSC_BYPASS) { 2465 return virtio_net_do_receive(nc, buf, size); 2466 } else if (ret == RSC_FINAL) { 2467 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2468 ((hdr_len + sizeof(struct eth_header)) + 12), 2469 VIRTIO_NET_IP4_ADDR_SIZE, 2470 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)); 2471 } 2472 2473 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2474 } 2475 2476 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain, 2477 struct ip6_header *ip6, 2478 const uint8_t *buf, size_t size) 2479 { 2480 uint16_t ip_len; 2481 2482 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4) 2483 != IP_HEADER_VERSION_6) { 2484 return RSC_BYPASS; 2485 } 2486 2487 /* Both option and protocol is checked in this */ 2488 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) { 2489 chain->stat.bypass_not_tcp++; 2490 return RSC_BYPASS; 2491 } 2492 2493 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 2494 if (ip_len < sizeof(struct tcp_header) || 2495 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header) 2496 - sizeof(struct ip6_header))) { 2497 chain->stat.ip_hacked++; 2498 return RSC_BYPASS; 2499 } 2500 2501 /* Don't handle packets with ecn flag */ 2502 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) { 2503 chain->stat.ip_ecn++; 2504 return RSC_BYPASS; 2505 } 2506 2507 return RSC_CANDIDATE; 2508 } 2509 2510 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc, 2511 const uint8_t *buf, size_t size) 2512 { 2513 int32_t ret; 2514 uint16_t hdr_len; 2515 VirtioNetRscChain *chain; 2516 VirtioNetRscUnit unit; 2517 2518 chain = opq; 2519 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2520 2521 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header) 2522 + sizeof(tcp_header))) { 2523 return virtio_net_do_receive(nc, buf, size); 2524 } 2525 2526 virtio_net_rsc_extract_unit6(chain, buf, &unit); 2527 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain, 2528 unit.ip, buf, size)) { 2529 return virtio_net_do_receive(nc, buf, size); 2530 } 2531 2532 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2533 if (ret == RSC_BYPASS) { 2534 return virtio_net_do_receive(nc, buf, size); 2535 } else if (ret == RSC_FINAL) { 2536 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2537 ((hdr_len + sizeof(struct eth_header)) + 8), 2538 VIRTIO_NET_IP6_ADDR_SIZE, 2539 hdr_len + sizeof(struct eth_header) 2540 + sizeof(struct ip6_header)); 2541 } 2542 2543 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2544 } 2545 2546 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n, 2547 NetClientState *nc, 2548 uint16_t proto) 2549 { 2550 VirtioNetRscChain *chain; 2551 2552 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) { 2553 return NULL; 2554 } 2555 2556 QTAILQ_FOREACH(chain, &n->rsc_chains, next) { 2557 if (chain->proto == proto) { 2558 return chain; 2559 } 2560 } 2561 2562 chain = g_malloc(sizeof(*chain)); 2563 chain->n = n; 2564 chain->proto = proto; 2565 if (proto == (uint16_t)ETH_P_IP) { 2566 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD; 2567 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2568 } else { 2569 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD; 2570 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2571 } 2572 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST, 2573 virtio_net_rsc_purge, chain); 2574 memset(&chain->stat, 0, sizeof(chain->stat)); 2575 2576 QTAILQ_INIT(&chain->buffers); 2577 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); 2578 2579 return chain; 2580 } 2581 2582 static ssize_t virtio_net_rsc_receive(NetClientState *nc, 2583 const uint8_t *buf, 2584 size_t size) 2585 { 2586 uint16_t proto; 2587 VirtioNetRscChain *chain; 2588 struct eth_header *eth; 2589 VirtIONet *n; 2590 2591 n = qemu_get_nic_opaque(nc); 2592 if (size < (n->host_hdr_len + sizeof(struct eth_header))) { 2593 return virtio_net_do_receive(nc, buf, size); 2594 } 2595 2596 eth = (struct eth_header *)(buf + n->guest_hdr_len); 2597 proto = htons(eth->h_proto); 2598 2599 chain = virtio_net_rsc_lookup_chain(n, nc, proto); 2600 if (chain) { 2601 chain->stat.received++; 2602 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) { 2603 return virtio_net_rsc_receive4(chain, nc, buf, size); 2604 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) { 2605 return virtio_net_rsc_receive6(chain, nc, buf, size); 2606 } 2607 } 2608 return virtio_net_do_receive(nc, buf, size); 2609 } 2610 2611 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, 2612 size_t size) 2613 { 2614 VirtIONet *n = qemu_get_nic_opaque(nc); 2615 if ((n->rsc4_enabled || n->rsc6_enabled)) { 2616 return virtio_net_rsc_receive(nc, buf, size); 2617 } else { 2618 return virtio_net_do_receive(nc, buf, size); 2619 } 2620 } 2621 2622 static int32_t virtio_net_flush_tx(VirtIONetQueue *q); 2623 2624 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) 2625 { 2626 VirtIONet *n = qemu_get_nic_opaque(nc); 2627 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 2628 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2629 int ret; 2630 2631 virtqueue_push(q->tx_vq, q->async_tx.elem, 0); 2632 virtio_notify(vdev, q->tx_vq); 2633 2634 g_free(q->async_tx.elem); 2635 q->async_tx.elem = NULL; 2636 2637 virtio_queue_set_notification(q->tx_vq, 1); 2638 ret = virtio_net_flush_tx(q); 2639 if (ret >= n->tx_burst) { 2640 /* 2641 * the flush has been stopped by tx_burst 2642 * we will not receive notification for the 2643 * remainining part, so re-schedule 2644 */ 2645 virtio_queue_set_notification(q->tx_vq, 0); 2646 if (q->tx_bh) { 2647 qemu_bh_schedule(q->tx_bh); 2648 } else { 2649 timer_mod(q->tx_timer, 2650 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2651 } 2652 q->tx_waiting = 1; 2653 } 2654 } 2655 2656 /* TX */ 2657 static int32_t virtio_net_flush_tx(VirtIONetQueue *q) 2658 { 2659 VirtIONet *n = q->n; 2660 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2661 VirtQueueElement *elem; 2662 int32_t num_packets = 0; 2663 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq)); 2664 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2665 return num_packets; 2666 } 2667 2668 if (q->async_tx.elem) { 2669 virtio_queue_set_notification(q->tx_vq, 0); 2670 return num_packets; 2671 } 2672 2673 for (;;) { 2674 ssize_t ret; 2675 unsigned int out_num; 2676 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg; 2677 struct virtio_net_hdr_mrg_rxbuf mhdr; 2678 2679 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement)); 2680 if (!elem) { 2681 break; 2682 } 2683 2684 out_num = elem->out_num; 2685 out_sg = elem->out_sg; 2686 if (out_num < 1) { 2687 virtio_error(vdev, "virtio-net header not in first element"); 2688 virtqueue_detach_element(q->tx_vq, elem, 0); 2689 g_free(elem); 2690 return -EINVAL; 2691 } 2692 2693 if (n->has_vnet_hdr) { 2694 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) < 2695 n->guest_hdr_len) { 2696 virtio_error(vdev, "virtio-net header incorrect"); 2697 virtqueue_detach_element(q->tx_vq, elem, 0); 2698 g_free(elem); 2699 return -EINVAL; 2700 } 2701 if (n->needs_vnet_hdr_swap) { 2702 virtio_net_hdr_swap(vdev, (void *) &mhdr); 2703 sg2[0].iov_base = &mhdr; 2704 sg2[0].iov_len = n->guest_hdr_len; 2705 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, 2706 out_sg, out_num, 2707 n->guest_hdr_len, -1); 2708 if (out_num == VIRTQUEUE_MAX_SIZE) { 2709 goto drop; 2710 } 2711 out_num += 1; 2712 out_sg = sg2; 2713 } 2714 } 2715 /* 2716 * If host wants to see the guest header as is, we can 2717 * pass it on unchanged. Otherwise, copy just the parts 2718 * that host is interested in. 2719 */ 2720 assert(n->host_hdr_len <= n->guest_hdr_len); 2721 if (n->host_hdr_len != n->guest_hdr_len) { 2722 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg), 2723 out_sg, out_num, 2724 0, n->host_hdr_len); 2725 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num, 2726 out_sg, out_num, 2727 n->guest_hdr_len, -1); 2728 out_num = sg_num; 2729 out_sg = sg; 2730 } 2731 2732 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index), 2733 out_sg, out_num, virtio_net_tx_complete); 2734 if (ret == 0) { 2735 virtio_queue_set_notification(q->tx_vq, 0); 2736 q->async_tx.elem = elem; 2737 return -EBUSY; 2738 } 2739 2740 drop: 2741 virtqueue_push(q->tx_vq, elem, 0); 2742 virtio_notify(vdev, q->tx_vq); 2743 g_free(elem); 2744 2745 if (++num_packets >= n->tx_burst) { 2746 break; 2747 } 2748 } 2749 return num_packets; 2750 } 2751 2752 static void virtio_net_tx_timer(void *opaque); 2753 2754 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) 2755 { 2756 VirtIONet *n = VIRTIO_NET(vdev); 2757 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2758 2759 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2760 virtio_net_drop_tx_queue_data(vdev, vq); 2761 return; 2762 } 2763 2764 /* This happens when device was stopped but VCPU wasn't. */ 2765 if (!vdev->vm_running) { 2766 q->tx_waiting = 1; 2767 return; 2768 } 2769 2770 if (q->tx_waiting) { 2771 /* We already have queued packets, immediately flush */ 2772 timer_del(q->tx_timer); 2773 virtio_net_tx_timer(q); 2774 } else { 2775 /* re-arm timer to flush it (and more) on next tick */ 2776 timer_mod(q->tx_timer, 2777 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2778 q->tx_waiting = 1; 2779 virtio_queue_set_notification(vq, 0); 2780 } 2781 } 2782 2783 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) 2784 { 2785 VirtIONet *n = VIRTIO_NET(vdev); 2786 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2787 2788 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2789 virtio_net_drop_tx_queue_data(vdev, vq); 2790 return; 2791 } 2792 2793 if (unlikely(q->tx_waiting)) { 2794 return; 2795 } 2796 q->tx_waiting = 1; 2797 /* This happens when device was stopped but VCPU wasn't. */ 2798 if (!vdev->vm_running) { 2799 return; 2800 } 2801 virtio_queue_set_notification(vq, 0); 2802 qemu_bh_schedule(q->tx_bh); 2803 } 2804 2805 static void virtio_net_tx_timer(void *opaque) 2806 { 2807 VirtIONetQueue *q = opaque; 2808 VirtIONet *n = q->n; 2809 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2810 int ret; 2811 2812 /* This happens when device was stopped but BH wasn't. */ 2813 if (!vdev->vm_running) { 2814 /* Make sure tx waiting is set, so we'll run when restarted. */ 2815 assert(q->tx_waiting); 2816 return; 2817 } 2818 2819 q->tx_waiting = 0; 2820 2821 /* Just in case the driver is not ready on more */ 2822 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2823 return; 2824 } 2825 2826 ret = virtio_net_flush_tx(q); 2827 if (ret == -EBUSY || ret == -EINVAL) { 2828 return; 2829 } 2830 /* 2831 * If we flush a full burst of packets, assume there are 2832 * more coming and immediately rearm 2833 */ 2834 if (ret >= n->tx_burst) { 2835 q->tx_waiting = 1; 2836 timer_mod(q->tx_timer, 2837 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2838 return; 2839 } 2840 /* 2841 * If less than a full burst, re-enable notification and flush 2842 * anything that may have come in while we weren't looking. If 2843 * we find something, assume the guest is still active and rearm 2844 */ 2845 virtio_queue_set_notification(q->tx_vq, 1); 2846 ret = virtio_net_flush_tx(q); 2847 if (ret > 0) { 2848 virtio_queue_set_notification(q->tx_vq, 0); 2849 q->tx_waiting = 1; 2850 timer_mod(q->tx_timer, 2851 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2852 } 2853 } 2854 2855 static void virtio_net_tx_bh(void *opaque) 2856 { 2857 VirtIONetQueue *q = opaque; 2858 VirtIONet *n = q->n; 2859 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2860 int32_t ret; 2861 2862 /* This happens when device was stopped but BH wasn't. */ 2863 if (!vdev->vm_running) { 2864 /* Make sure tx waiting is set, so we'll run when restarted. */ 2865 assert(q->tx_waiting); 2866 return; 2867 } 2868 2869 q->tx_waiting = 0; 2870 2871 /* Just in case the driver is not ready on more */ 2872 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) { 2873 return; 2874 } 2875 2876 ret = virtio_net_flush_tx(q); 2877 if (ret == -EBUSY || ret == -EINVAL) { 2878 return; /* Notification re-enable handled by tx_complete or device 2879 * broken */ 2880 } 2881 2882 /* If we flush a full burst of packets, assume there are 2883 * more coming and immediately reschedule */ 2884 if (ret >= n->tx_burst) { 2885 qemu_bh_schedule(q->tx_bh); 2886 q->tx_waiting = 1; 2887 return; 2888 } 2889 2890 /* If less than a full burst, re-enable notification and flush 2891 * anything that may have come in while we weren't looking. If 2892 * we find something, assume the guest is still active and reschedule */ 2893 virtio_queue_set_notification(q->tx_vq, 1); 2894 ret = virtio_net_flush_tx(q); 2895 if (ret == -EINVAL) { 2896 return; 2897 } else if (ret > 0) { 2898 virtio_queue_set_notification(q->tx_vq, 0); 2899 qemu_bh_schedule(q->tx_bh); 2900 q->tx_waiting = 1; 2901 } 2902 } 2903 2904 static void virtio_net_add_queue(VirtIONet *n, int index) 2905 { 2906 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2907 2908 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size, 2909 virtio_net_handle_rx); 2910 2911 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { 2912 n->vqs[index].tx_vq = 2913 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2914 virtio_net_handle_tx_timer); 2915 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 2916 virtio_net_tx_timer, 2917 &n->vqs[index]); 2918 } else { 2919 n->vqs[index].tx_vq = 2920 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2921 virtio_net_handle_tx_bh); 2922 n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index], 2923 &DEVICE(vdev)->mem_reentrancy_guard); 2924 } 2925 2926 n->vqs[index].tx_waiting = 0; 2927 n->vqs[index].n = n; 2928 } 2929 2930 static void virtio_net_del_queue(VirtIONet *n, int index) 2931 { 2932 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2933 VirtIONetQueue *q = &n->vqs[index]; 2934 NetClientState *nc = qemu_get_subqueue(n->nic, index); 2935 2936 qemu_purge_queued_packets(nc); 2937 2938 virtio_del_queue(vdev, index * 2); 2939 if (q->tx_timer) { 2940 timer_free(q->tx_timer); 2941 q->tx_timer = NULL; 2942 } else { 2943 qemu_bh_delete(q->tx_bh); 2944 q->tx_bh = NULL; 2945 } 2946 q->tx_waiting = 0; 2947 virtio_del_queue(vdev, index * 2 + 1); 2948 } 2949 2950 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs) 2951 { 2952 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2953 int old_num_queues = virtio_get_num_queues(vdev); 2954 int new_num_queues = new_max_queue_pairs * 2 + 1; 2955 int i; 2956 2957 assert(old_num_queues >= 3); 2958 assert(old_num_queues % 2 == 1); 2959 2960 if (old_num_queues == new_num_queues) { 2961 return; 2962 } 2963 2964 /* 2965 * We always need to remove and add ctrl vq if 2966 * old_num_queues != new_num_queues. Remove ctrl_vq first, 2967 * and then we only enter one of the following two loops. 2968 */ 2969 virtio_del_queue(vdev, old_num_queues - 1); 2970 2971 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) { 2972 /* new_num_queues < old_num_queues */ 2973 virtio_net_del_queue(n, i / 2); 2974 } 2975 2976 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) { 2977 /* new_num_queues > old_num_queues */ 2978 virtio_net_add_queue(n, i / 2); 2979 } 2980 2981 /* add ctrl_vq last */ 2982 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 2983 } 2984 2985 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) 2986 { 2987 int max = multiqueue ? n->max_queue_pairs : 1; 2988 2989 n->multiqueue = multiqueue; 2990 virtio_net_change_num_queue_pairs(n, max); 2991 2992 virtio_net_set_queue_pairs(n); 2993 } 2994 2995 static int virtio_net_post_load_device(void *opaque, int version_id) 2996 { 2997 VirtIONet *n = opaque; 2998 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2999 int i, link_down; 3000 3001 trace_virtio_net_post_load_device(); 3002 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs, 3003 virtio_vdev_has_feature(vdev, 3004 VIRTIO_F_VERSION_1), 3005 virtio_vdev_has_feature(vdev, 3006 VIRTIO_NET_F_HASH_REPORT)); 3007 3008 /* MAC_TABLE_ENTRIES may be different from the saved image */ 3009 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) { 3010 n->mac_table.in_use = 0; 3011 } 3012 3013 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 3014 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n); 3015 } 3016 3017 /* 3018 * curr_guest_offloads will be later overwritten by the 3019 * virtio_set_features_nocheck call done from the virtio_load. 3020 * Here we make sure it is preserved and restored accordingly 3021 * in the virtio_net_post_load_virtio callback. 3022 */ 3023 n->saved_guest_offloads = n->curr_guest_offloads; 3024 3025 virtio_net_set_queue_pairs(n); 3026 3027 /* Find the first multicast entry in the saved MAC filter */ 3028 for (i = 0; i < n->mac_table.in_use; i++) { 3029 if (n->mac_table.macs[i * ETH_ALEN] & 1) { 3030 break; 3031 } 3032 } 3033 n->mac_table.first_multi = i; 3034 3035 /* nc.link_down can't be migrated, so infer link_down according 3036 * to link status bit in n->status */ 3037 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0; 3038 for (i = 0; i < n->max_queue_pairs; i++) { 3039 qemu_get_subqueue(n->nic, i)->link_down = link_down; 3040 } 3041 3042 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 3043 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3044 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 3045 QEMU_CLOCK_VIRTUAL, 3046 virtio_net_announce_timer, n); 3047 if (n->announce_timer.round) { 3048 timer_mod(n->announce_timer.tm, 3049 qemu_clock_get_ms(n->announce_timer.type)); 3050 } else { 3051 qemu_announce_timer_del(&n->announce_timer, false); 3052 } 3053 } 3054 3055 if (n->rss_data.enabled) { 3056 n->rss_data.enabled_software_rss = n->rss_data.populate_hash; 3057 if (!n->rss_data.populate_hash) { 3058 if (!virtio_net_attach_epbf_rss(n)) { 3059 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 3060 warn_report("Can't post-load eBPF RSS for vhost"); 3061 } else { 3062 warn_report("Can't post-load eBPF RSS - " 3063 "fallback to software RSS"); 3064 n->rss_data.enabled_software_rss = true; 3065 } 3066 } 3067 } 3068 3069 trace_virtio_net_rss_enable(n->rss_data.hash_types, 3070 n->rss_data.indirections_len, 3071 sizeof(n->rss_data.key)); 3072 } else { 3073 trace_virtio_net_rss_disable(); 3074 } 3075 return 0; 3076 } 3077 3078 static int virtio_net_post_load_virtio(VirtIODevice *vdev) 3079 { 3080 VirtIONet *n = VIRTIO_NET(vdev); 3081 /* 3082 * The actual needed state is now in saved_guest_offloads, 3083 * see virtio_net_post_load_device for detail. 3084 * Restore it back and apply the desired offloads. 3085 */ 3086 n->curr_guest_offloads = n->saved_guest_offloads; 3087 if (peer_has_vnet_hdr(n)) { 3088 virtio_net_apply_guest_offloads(n); 3089 } 3090 3091 return 0; 3092 } 3093 3094 /* tx_waiting field of a VirtIONetQueue */ 3095 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = { 3096 .name = "virtio-net-queue-tx_waiting", 3097 .fields = (VMStateField[]) { 3098 VMSTATE_UINT32(tx_waiting, VirtIONetQueue), 3099 VMSTATE_END_OF_LIST() 3100 }, 3101 }; 3102 3103 static bool max_queue_pairs_gt_1(void *opaque, int version_id) 3104 { 3105 return VIRTIO_NET(opaque)->max_queue_pairs > 1; 3106 } 3107 3108 static bool has_ctrl_guest_offloads(void *opaque, int version_id) 3109 { 3110 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque), 3111 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 3112 } 3113 3114 static bool mac_table_fits(void *opaque, int version_id) 3115 { 3116 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES; 3117 } 3118 3119 static bool mac_table_doesnt_fit(void *opaque, int version_id) 3120 { 3121 return !mac_table_fits(opaque, version_id); 3122 } 3123 3124 /* This temporary type is shared by all the WITH_TMP methods 3125 * although only some fields are used by each. 3126 */ 3127 struct VirtIONetMigTmp { 3128 VirtIONet *parent; 3129 VirtIONetQueue *vqs_1; 3130 uint16_t curr_queue_pairs_1; 3131 uint8_t has_ufo; 3132 uint32_t has_vnet_hdr; 3133 }; 3134 3135 /* The 2nd and subsequent tx_waiting flags are loaded later than 3136 * the 1st entry in the queue_pairs and only if there's more than one 3137 * entry. We use the tmp mechanism to calculate a temporary 3138 * pointer and count and also validate the count. 3139 */ 3140 3141 static int virtio_net_tx_waiting_pre_save(void *opaque) 3142 { 3143 struct VirtIONetMigTmp *tmp = opaque; 3144 3145 tmp->vqs_1 = tmp->parent->vqs + 1; 3146 tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1; 3147 if (tmp->parent->curr_queue_pairs == 0) { 3148 tmp->curr_queue_pairs_1 = 0; 3149 } 3150 3151 return 0; 3152 } 3153 3154 static int virtio_net_tx_waiting_pre_load(void *opaque) 3155 { 3156 struct VirtIONetMigTmp *tmp = opaque; 3157 3158 /* Reuse the pointer setup from save */ 3159 virtio_net_tx_waiting_pre_save(opaque); 3160 3161 if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) { 3162 error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x", 3163 tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs); 3164 3165 return -EINVAL; 3166 } 3167 3168 return 0; /* all good */ 3169 } 3170 3171 static const VMStateDescription vmstate_virtio_net_tx_waiting = { 3172 .name = "virtio-net-tx_waiting", 3173 .pre_load = virtio_net_tx_waiting_pre_load, 3174 .pre_save = virtio_net_tx_waiting_pre_save, 3175 .fields = (VMStateField[]) { 3176 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp, 3177 curr_queue_pairs_1, 3178 vmstate_virtio_net_queue_tx_waiting, 3179 struct VirtIONetQueue), 3180 VMSTATE_END_OF_LIST() 3181 }, 3182 }; 3183 3184 /* the 'has_ufo' flag is just tested; if the incoming stream has the 3185 * flag set we need to check that we have it 3186 */ 3187 static int virtio_net_ufo_post_load(void *opaque, int version_id) 3188 { 3189 struct VirtIONetMigTmp *tmp = opaque; 3190 3191 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) { 3192 error_report("virtio-net: saved image requires TUN_F_UFO support"); 3193 return -EINVAL; 3194 } 3195 3196 return 0; 3197 } 3198 3199 static int virtio_net_ufo_pre_save(void *opaque) 3200 { 3201 struct VirtIONetMigTmp *tmp = opaque; 3202 3203 tmp->has_ufo = tmp->parent->has_ufo; 3204 3205 return 0; 3206 } 3207 3208 static const VMStateDescription vmstate_virtio_net_has_ufo = { 3209 .name = "virtio-net-ufo", 3210 .post_load = virtio_net_ufo_post_load, 3211 .pre_save = virtio_net_ufo_pre_save, 3212 .fields = (VMStateField[]) { 3213 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp), 3214 VMSTATE_END_OF_LIST() 3215 }, 3216 }; 3217 3218 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the 3219 * flag set we need to check that we have it 3220 */ 3221 static int virtio_net_vnet_post_load(void *opaque, int version_id) 3222 { 3223 struct VirtIONetMigTmp *tmp = opaque; 3224 3225 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) { 3226 error_report("virtio-net: saved image requires vnet_hdr=on"); 3227 return -EINVAL; 3228 } 3229 3230 return 0; 3231 } 3232 3233 static int virtio_net_vnet_pre_save(void *opaque) 3234 { 3235 struct VirtIONetMigTmp *tmp = opaque; 3236 3237 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr; 3238 3239 return 0; 3240 } 3241 3242 static const VMStateDescription vmstate_virtio_net_has_vnet = { 3243 .name = "virtio-net-vnet", 3244 .post_load = virtio_net_vnet_post_load, 3245 .pre_save = virtio_net_vnet_pre_save, 3246 .fields = (VMStateField[]) { 3247 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp), 3248 VMSTATE_END_OF_LIST() 3249 }, 3250 }; 3251 3252 static bool virtio_net_rss_needed(void *opaque) 3253 { 3254 return VIRTIO_NET(opaque)->rss_data.enabled; 3255 } 3256 3257 static const VMStateDescription vmstate_virtio_net_rss = { 3258 .name = "virtio-net-device/rss", 3259 .version_id = 1, 3260 .minimum_version_id = 1, 3261 .needed = virtio_net_rss_needed, 3262 .fields = (VMStateField[]) { 3263 VMSTATE_BOOL(rss_data.enabled, VirtIONet), 3264 VMSTATE_BOOL(rss_data.redirect, VirtIONet), 3265 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet), 3266 VMSTATE_UINT32(rss_data.hash_types, VirtIONet), 3267 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet), 3268 VMSTATE_UINT16(rss_data.default_queue, VirtIONet), 3269 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet, 3270 VIRTIO_NET_RSS_MAX_KEY_SIZE), 3271 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet, 3272 rss_data.indirections_len, 0, 3273 vmstate_info_uint16, uint16_t), 3274 VMSTATE_END_OF_LIST() 3275 }, 3276 }; 3277 3278 static const VMStateDescription vmstate_virtio_net_device = { 3279 .name = "virtio-net-device", 3280 .version_id = VIRTIO_NET_VM_VERSION, 3281 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3282 .post_load = virtio_net_post_load_device, 3283 .fields = (VMStateField[]) { 3284 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN), 3285 VMSTATE_STRUCT_POINTER(vqs, VirtIONet, 3286 vmstate_virtio_net_queue_tx_waiting, 3287 VirtIONetQueue), 3288 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet), 3289 VMSTATE_UINT16(status, VirtIONet), 3290 VMSTATE_UINT8(promisc, VirtIONet), 3291 VMSTATE_UINT8(allmulti, VirtIONet), 3292 VMSTATE_UINT32(mac_table.in_use, VirtIONet), 3293 3294 /* Guarded pair: If it fits we load it, else we throw it away 3295 * - can happen if source has a larger MAC table.; post-load 3296 * sets flags in this case. 3297 */ 3298 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet, 3299 0, mac_table_fits, mac_table.in_use, 3300 ETH_ALEN), 3301 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0, 3302 mac_table.in_use, ETH_ALEN), 3303 3304 /* Note: This is an array of uint32's that's always been saved as a 3305 * buffer; hold onto your endiannesses; it's actually used as a bitmap 3306 * but based on the uint. 3307 */ 3308 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3), 3309 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3310 vmstate_virtio_net_has_vnet), 3311 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet), 3312 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet), 3313 VMSTATE_UINT8(alluni, VirtIONet), 3314 VMSTATE_UINT8(nomulti, VirtIONet), 3315 VMSTATE_UINT8(nouni, VirtIONet), 3316 VMSTATE_UINT8(nobcast, VirtIONet), 3317 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3318 vmstate_virtio_net_has_ufo), 3319 VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0, 3320 vmstate_info_uint16_equal, uint16_t), 3321 VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1), 3322 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3323 vmstate_virtio_net_tx_waiting), 3324 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet, 3325 has_ctrl_guest_offloads), 3326 VMSTATE_END_OF_LIST() 3327 }, 3328 .subsections = (const VMStateDescription * []) { 3329 &vmstate_virtio_net_rss, 3330 NULL 3331 } 3332 }; 3333 3334 static NetClientInfo net_virtio_info = { 3335 .type = NET_CLIENT_DRIVER_NIC, 3336 .size = sizeof(NICState), 3337 .can_receive = virtio_net_can_receive, 3338 .receive = virtio_net_receive, 3339 .link_status_changed = virtio_net_set_link_status, 3340 .query_rx_filter = virtio_net_query_rxfilter, 3341 .announce = virtio_net_announce, 3342 }; 3343 3344 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) 3345 { 3346 VirtIONet *n = VIRTIO_NET(vdev); 3347 NetClientState *nc; 3348 assert(n->vhost_started); 3349 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { 3350 /* Must guard against invalid features and bogus queue index 3351 * from being set by malicious guest, or penetrated through 3352 * buggy migration stream. 3353 */ 3354 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3355 qemu_log_mask(LOG_GUEST_ERROR, 3356 "%s: bogus vq index ignored\n", __func__); 3357 return false; 3358 } 3359 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3360 } else { 3361 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3362 } 3363 /* 3364 * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 3365 * as the macro of configure interrupt's IDX, If this driver does not 3366 * support, the function will return false 3367 */ 3368 3369 if (idx == VIRTIO_CONFIG_IRQ_IDX) { 3370 return vhost_net_config_pending(get_vhost_net(nc->peer)); 3371 } 3372 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); 3373 } 3374 3375 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, 3376 bool mask) 3377 { 3378 VirtIONet *n = VIRTIO_NET(vdev); 3379 NetClientState *nc; 3380 assert(n->vhost_started); 3381 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { 3382 /* Must guard against invalid features and bogus queue index 3383 * from being set by malicious guest, or penetrated through 3384 * buggy migration stream. 3385 */ 3386 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3387 qemu_log_mask(LOG_GUEST_ERROR, 3388 "%s: bogus vq index ignored\n", __func__); 3389 return; 3390 } 3391 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3392 } else { 3393 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3394 } 3395 /* 3396 *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 3397 * as the macro of configure interrupt's IDX, If this driver does not 3398 * support, the function will return 3399 */ 3400 3401 if (idx == VIRTIO_CONFIG_IRQ_IDX) { 3402 vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask); 3403 return; 3404 } 3405 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask); 3406 } 3407 3408 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) 3409 { 3410 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC); 3411 3412 n->config_size = virtio_get_config_size(&cfg_size_params, host_features); 3413 } 3414 3415 void virtio_net_set_netclient_name(VirtIONet *n, const char *name, 3416 const char *type) 3417 { 3418 /* 3419 * The name can be NULL, the netclient name will be type.x. 3420 */ 3421 assert(type != NULL); 3422 3423 g_free(n->netclient_name); 3424 g_free(n->netclient_type); 3425 n->netclient_name = g_strdup(name); 3426 n->netclient_type = g_strdup(type); 3427 } 3428 3429 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev) 3430 { 3431 HotplugHandler *hotplug_ctrl; 3432 PCIDevice *pci_dev; 3433 Error *err = NULL; 3434 3435 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3436 if (hotplug_ctrl) { 3437 pci_dev = PCI_DEVICE(dev); 3438 pci_dev->partially_hotplugged = true; 3439 hotplug_handler_unplug_request(hotplug_ctrl, dev, &err); 3440 if (err) { 3441 error_report_err(err); 3442 return false; 3443 } 3444 } else { 3445 return false; 3446 } 3447 return true; 3448 } 3449 3450 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev, 3451 Error **errp) 3452 { 3453 Error *err = NULL; 3454 HotplugHandler *hotplug_ctrl; 3455 PCIDevice *pdev = PCI_DEVICE(dev); 3456 BusState *primary_bus; 3457 3458 if (!pdev->partially_hotplugged) { 3459 return true; 3460 } 3461 primary_bus = dev->parent_bus; 3462 if (!primary_bus) { 3463 error_setg(errp, "virtio_net: couldn't find primary bus"); 3464 return false; 3465 } 3466 qdev_set_parent_bus(dev, primary_bus, &error_abort); 3467 qatomic_set(&n->failover_primary_hidden, false); 3468 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3469 if (hotplug_ctrl) { 3470 hotplug_handler_pre_plug(hotplug_ctrl, dev, &err); 3471 if (err) { 3472 goto out; 3473 } 3474 hotplug_handler_plug(hotplug_ctrl, dev, &err); 3475 } 3476 pdev->partially_hotplugged = false; 3477 3478 out: 3479 error_propagate(errp, err); 3480 return !err; 3481 } 3482 3483 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s) 3484 { 3485 bool should_be_hidden; 3486 Error *err = NULL; 3487 DeviceState *dev = failover_find_primary_device(n); 3488 3489 if (!dev) { 3490 return; 3491 } 3492 3493 should_be_hidden = qatomic_read(&n->failover_primary_hidden); 3494 3495 if (migration_in_setup(s) && !should_be_hidden) { 3496 if (failover_unplug_primary(n, dev)) { 3497 vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev); 3498 qapi_event_send_unplug_primary(dev->id); 3499 qatomic_set(&n->failover_primary_hidden, true); 3500 } else { 3501 warn_report("couldn't unplug primary device"); 3502 } 3503 } else if (migration_has_failed(s)) { 3504 /* We already unplugged the device let's plug it back */ 3505 if (!failover_replug_primary(n, dev, &err)) { 3506 if (err) { 3507 error_report_err(err); 3508 } 3509 } 3510 } 3511 } 3512 3513 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data) 3514 { 3515 MigrationState *s = data; 3516 VirtIONet *n = container_of(notifier, VirtIONet, migration_state); 3517 virtio_net_handle_migration_primary(n, s); 3518 } 3519 3520 static bool failover_hide_primary_device(DeviceListener *listener, 3521 const QDict *device_opts, 3522 bool from_json, 3523 Error **errp) 3524 { 3525 VirtIONet *n = container_of(listener, VirtIONet, primary_listener); 3526 const char *standby_id; 3527 3528 if (!device_opts) { 3529 return false; 3530 } 3531 3532 if (!qdict_haskey(device_opts, "failover_pair_id")) { 3533 return false; 3534 } 3535 3536 if (!qdict_haskey(device_opts, "id")) { 3537 error_setg(errp, "Device with failover_pair_id needs to have id"); 3538 return false; 3539 } 3540 3541 standby_id = qdict_get_str(device_opts, "failover_pair_id"); 3542 if (g_strcmp0(standby_id, n->netclient_name) != 0) { 3543 return false; 3544 } 3545 3546 /* 3547 * The hide helper can be called several times for a given device. 3548 * Check there is only one primary for a virtio-net device but 3549 * don't duplicate the qdict several times if it's called for the same 3550 * device. 3551 */ 3552 if (n->primary_opts) { 3553 const char *old, *new; 3554 /* devices with failover_pair_id always have an id */ 3555 old = qdict_get_str(n->primary_opts, "id"); 3556 new = qdict_get_str(device_opts, "id"); 3557 if (strcmp(old, new) != 0) { 3558 error_setg(errp, "Cannot attach more than one primary device to " 3559 "'%s': '%s' and '%s'", n->netclient_name, old, new); 3560 return false; 3561 } 3562 } else { 3563 n->primary_opts = qdict_clone_shallow(device_opts); 3564 n->primary_opts_from_json = from_json; 3565 } 3566 3567 /* failover_primary_hidden is set during feature negotiation */ 3568 return qatomic_read(&n->failover_primary_hidden); 3569 } 3570 3571 static void virtio_net_device_realize(DeviceState *dev, Error **errp) 3572 { 3573 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3574 VirtIONet *n = VIRTIO_NET(dev); 3575 NetClientState *nc; 3576 int i; 3577 3578 if (n->net_conf.mtu) { 3579 n->host_features |= (1ULL << VIRTIO_NET_F_MTU); 3580 } 3581 3582 if (n->net_conf.duplex_str) { 3583 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) { 3584 n->net_conf.duplex = DUPLEX_HALF; 3585 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) { 3586 n->net_conf.duplex = DUPLEX_FULL; 3587 } else { 3588 error_setg(errp, "'duplex' must be 'half' or 'full'"); 3589 return; 3590 } 3591 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3592 } else { 3593 n->net_conf.duplex = DUPLEX_UNKNOWN; 3594 } 3595 3596 if (n->net_conf.speed < SPEED_UNKNOWN) { 3597 error_setg(errp, "'speed' must be between 0 and INT_MAX"); 3598 return; 3599 } 3600 if (n->net_conf.speed >= 0) { 3601 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3602 } 3603 3604 if (n->failover) { 3605 n->primary_listener.hide_device = failover_hide_primary_device; 3606 qatomic_set(&n->failover_primary_hidden, true); 3607 device_listener_register(&n->primary_listener); 3608 n->migration_state.notify = virtio_net_migration_state_notifier; 3609 add_migration_state_change_notifier(&n->migration_state); 3610 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY); 3611 } 3612 3613 virtio_net_set_config_size(n, n->host_features); 3614 virtio_init(vdev, VIRTIO_ID_NET, n->config_size); 3615 3616 /* 3617 * We set a lower limit on RX queue size to what it always was. 3618 * Guests that want a smaller ring can always resize it without 3619 * help from us (using virtio 1 and up). 3620 */ 3621 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || 3622 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || 3623 !is_power_of_2(n->net_conf.rx_queue_size)) { 3624 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " 3625 "must be a power of 2 between %d and %d.", 3626 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, 3627 VIRTQUEUE_MAX_SIZE); 3628 virtio_cleanup(vdev); 3629 return; 3630 } 3631 3632 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || 3633 n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) || 3634 !is_power_of_2(n->net_conf.tx_queue_size)) { 3635 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " 3636 "must be a power of 2 between %d and %d", 3637 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, 3638 virtio_net_max_tx_queue_size(n)); 3639 virtio_cleanup(vdev); 3640 return; 3641 } 3642 3643 n->max_ncs = MAX(n->nic_conf.peers.queues, 1); 3644 3645 /* 3646 * Figure out the datapath queue pairs since the backend could 3647 * provide control queue via peers as well. 3648 */ 3649 if (n->nic_conf.peers.queues) { 3650 for (i = 0; i < n->max_ncs; i++) { 3651 if (n->nic_conf.peers.ncs[i]->is_datapath) { 3652 ++n->max_queue_pairs; 3653 } 3654 } 3655 } 3656 n->max_queue_pairs = MAX(n->max_queue_pairs, 1); 3657 3658 if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) { 3659 error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), " 3660 "must be a positive integer less than %d.", 3661 n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2); 3662 virtio_cleanup(vdev); 3663 return; 3664 } 3665 n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs); 3666 n->curr_queue_pairs = 1; 3667 n->tx_timeout = n->net_conf.txtimer; 3668 3669 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer") 3670 && strcmp(n->net_conf.tx, "bh")) { 3671 warn_report("virtio-net: " 3672 "Unknown option tx=%s, valid options: \"timer\" \"bh\"", 3673 n->net_conf.tx); 3674 error_printf("Defaulting to \"bh\""); 3675 } 3676 3677 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), 3678 n->net_conf.tx_queue_size); 3679 3680 for (i = 0; i < n->max_queue_pairs; i++) { 3681 virtio_net_add_queue(n, i); 3682 } 3683 3684 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 3685 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr); 3686 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac)); 3687 n->status = VIRTIO_NET_S_LINK_UP; 3688 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 3689 QEMU_CLOCK_VIRTUAL, 3690 virtio_net_announce_timer, n); 3691 n->announce_timer.round = 0; 3692 3693 if (n->netclient_type) { 3694 /* 3695 * Happen when virtio_net_set_netclient_name has been called. 3696 */ 3697 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3698 n->netclient_type, n->netclient_name, n); 3699 } else { 3700 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3701 object_get_typename(OBJECT(dev)), dev->id, n); 3702 } 3703 3704 for (i = 0; i < n->max_queue_pairs; i++) { 3705 n->nic->ncs[i].do_not_pad = true; 3706 } 3707 3708 peer_test_vnet_hdr(n); 3709 if (peer_has_vnet_hdr(n)) { 3710 for (i = 0; i < n->max_queue_pairs; i++) { 3711 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true); 3712 } 3713 n->host_hdr_len = sizeof(struct virtio_net_hdr); 3714 } else { 3715 n->host_hdr_len = 0; 3716 } 3717 3718 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a); 3719 3720 n->vqs[0].tx_waiting = 0; 3721 n->tx_burst = n->net_conf.txburst; 3722 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0); 3723 n->promisc = 1; /* for compatibility */ 3724 3725 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 3726 3727 n->vlans = g_malloc0(MAX_VLAN >> 3); 3728 3729 nc = qemu_get_queue(n->nic); 3730 nc->rxfilter_notify_enabled = 1; 3731 3732 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 3733 struct virtio_net_config netcfg = {}; 3734 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN); 3735 vhost_net_set_config(get_vhost_net(nc->peer), 3736 (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND); 3737 } 3738 QTAILQ_INIT(&n->rsc_chains); 3739 n->qdev = dev; 3740 3741 net_rx_pkt_init(&n->rx_pkt); 3742 3743 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3744 virtio_net_load_ebpf(n); 3745 } 3746 } 3747 3748 static void virtio_net_device_unrealize(DeviceState *dev) 3749 { 3750 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3751 VirtIONet *n = VIRTIO_NET(dev); 3752 int i, max_queue_pairs; 3753 3754 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3755 virtio_net_unload_ebpf(n); 3756 } 3757 3758 /* This will stop vhost backend if appropriate. */ 3759 virtio_net_set_status(vdev, 0); 3760 3761 g_free(n->netclient_name); 3762 n->netclient_name = NULL; 3763 g_free(n->netclient_type); 3764 n->netclient_type = NULL; 3765 3766 g_free(n->mac_table.macs); 3767 g_free(n->vlans); 3768 3769 if (n->failover) { 3770 qobject_unref(n->primary_opts); 3771 device_listener_unregister(&n->primary_listener); 3772 remove_migration_state_change_notifier(&n->migration_state); 3773 } else { 3774 assert(n->primary_opts == NULL); 3775 } 3776 3777 max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 3778 for (i = 0; i < max_queue_pairs; i++) { 3779 virtio_net_del_queue(n, i); 3780 } 3781 /* delete also control vq */ 3782 virtio_del_queue(vdev, max_queue_pairs * 2); 3783 qemu_announce_timer_del(&n->announce_timer, false); 3784 g_free(n->vqs); 3785 qemu_del_nic(n->nic); 3786 virtio_net_rsc_cleanup(n); 3787 g_free(n->rss_data.indirections_table); 3788 net_rx_pkt_uninit(n->rx_pkt); 3789 virtio_cleanup(vdev); 3790 } 3791 3792 static void virtio_net_instance_init(Object *obj) 3793 { 3794 VirtIONet *n = VIRTIO_NET(obj); 3795 3796 /* 3797 * The default config_size is sizeof(struct virtio_net_config). 3798 * Can be overriden with virtio_net_set_config_size. 3799 */ 3800 n->config_size = sizeof(struct virtio_net_config); 3801 device_add_bootindex_property(obj, &n->nic_conf.bootindex, 3802 "bootindex", "/ethernet-phy@0", 3803 DEVICE(n)); 3804 3805 ebpf_rss_init(&n->ebpf_rss); 3806 } 3807 3808 static int virtio_net_pre_save(void *opaque) 3809 { 3810 VirtIONet *n = opaque; 3811 3812 /* At this point, backend must be stopped, otherwise 3813 * it might keep writing to memory. */ 3814 assert(!n->vhost_started); 3815 3816 return 0; 3817 } 3818 3819 static bool primary_unplug_pending(void *opaque) 3820 { 3821 DeviceState *dev = opaque; 3822 DeviceState *primary; 3823 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3824 VirtIONet *n = VIRTIO_NET(vdev); 3825 3826 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 3827 return false; 3828 } 3829 primary = failover_find_primary_device(n); 3830 return primary ? primary->pending_deleted_event : false; 3831 } 3832 3833 static bool dev_unplug_pending(void *opaque) 3834 { 3835 DeviceState *dev = opaque; 3836 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 3837 3838 return vdc->primary_unplug_pending(dev); 3839 } 3840 3841 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev) 3842 { 3843 VirtIONet *n = VIRTIO_NET(vdev); 3844 NetClientState *nc = qemu_get_queue(n->nic); 3845 struct vhost_net *net = get_vhost_net(nc->peer); 3846 return &net->dev; 3847 } 3848 3849 static const VMStateDescription vmstate_virtio_net = { 3850 .name = "virtio-net", 3851 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3852 .version_id = VIRTIO_NET_VM_VERSION, 3853 .fields = (VMStateField[]) { 3854 VMSTATE_VIRTIO_DEVICE, 3855 VMSTATE_END_OF_LIST() 3856 }, 3857 .pre_save = virtio_net_pre_save, 3858 .dev_unplug_pending = dev_unplug_pending, 3859 }; 3860 3861 static Property virtio_net_properties[] = { 3862 DEFINE_PROP_BIT64("csum", VirtIONet, host_features, 3863 VIRTIO_NET_F_CSUM, true), 3864 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features, 3865 VIRTIO_NET_F_GUEST_CSUM, true), 3866 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), 3867 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features, 3868 VIRTIO_NET_F_GUEST_TSO4, true), 3869 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features, 3870 VIRTIO_NET_F_GUEST_TSO6, true), 3871 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features, 3872 VIRTIO_NET_F_GUEST_ECN, true), 3873 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features, 3874 VIRTIO_NET_F_GUEST_UFO, true), 3875 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features, 3876 VIRTIO_NET_F_GUEST_ANNOUNCE, true), 3877 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features, 3878 VIRTIO_NET_F_HOST_TSO4, true), 3879 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features, 3880 VIRTIO_NET_F_HOST_TSO6, true), 3881 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features, 3882 VIRTIO_NET_F_HOST_ECN, true), 3883 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features, 3884 VIRTIO_NET_F_HOST_UFO, true), 3885 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features, 3886 VIRTIO_NET_F_MRG_RXBUF, true), 3887 DEFINE_PROP_BIT64("status", VirtIONet, host_features, 3888 VIRTIO_NET_F_STATUS, true), 3889 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features, 3890 VIRTIO_NET_F_CTRL_VQ, true), 3891 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features, 3892 VIRTIO_NET_F_CTRL_RX, true), 3893 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features, 3894 VIRTIO_NET_F_CTRL_VLAN, true), 3895 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features, 3896 VIRTIO_NET_F_CTRL_RX_EXTRA, true), 3897 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features, 3898 VIRTIO_NET_F_CTRL_MAC_ADDR, true), 3899 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features, 3900 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), 3901 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), 3902 DEFINE_PROP_BIT64("rss", VirtIONet, host_features, 3903 VIRTIO_NET_F_RSS, false), 3904 DEFINE_PROP_BIT64("hash", VirtIONet, host_features, 3905 VIRTIO_NET_F_HASH_REPORT, false), 3906 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features, 3907 VIRTIO_NET_F_RSC_EXT, false), 3908 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout, 3909 VIRTIO_NET_RSC_DEFAULT_INTERVAL), 3910 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf), 3911 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer, 3912 TX_TIMER_INTERVAL), 3913 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), 3914 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), 3915 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, 3916 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), 3917 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, 3918 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), 3919 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), 3920 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, 3921 true), 3922 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN), 3923 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str), 3924 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false), 3925 DEFINE_PROP_END_OF_LIST(), 3926 }; 3927 3928 static void virtio_net_class_init(ObjectClass *klass, void *data) 3929 { 3930 DeviceClass *dc = DEVICE_CLASS(klass); 3931 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 3932 3933 device_class_set_props(dc, virtio_net_properties); 3934 dc->vmsd = &vmstate_virtio_net; 3935 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 3936 vdc->realize = virtio_net_device_realize; 3937 vdc->unrealize = virtio_net_device_unrealize; 3938 vdc->get_config = virtio_net_get_config; 3939 vdc->set_config = virtio_net_set_config; 3940 vdc->get_features = virtio_net_get_features; 3941 vdc->set_features = virtio_net_set_features; 3942 vdc->bad_features = virtio_net_bad_features; 3943 vdc->reset = virtio_net_reset; 3944 vdc->queue_reset = virtio_net_queue_reset; 3945 vdc->queue_enable = virtio_net_queue_enable; 3946 vdc->set_status = virtio_net_set_status; 3947 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; 3948 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; 3949 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO); 3950 vdc->post_load = virtio_net_post_load_virtio; 3951 vdc->vmsd = &vmstate_virtio_net_device; 3952 vdc->primary_unplug_pending = primary_unplug_pending; 3953 vdc->get_vhost = virtio_net_get_vhost; 3954 vdc->toggle_device_iotlb = vhost_toggle_device_iotlb; 3955 } 3956 3957 static const TypeInfo virtio_net_info = { 3958 .name = TYPE_VIRTIO_NET, 3959 .parent = TYPE_VIRTIO_DEVICE, 3960 .instance_size = sizeof(VirtIONet), 3961 .instance_init = virtio_net_instance_init, 3962 .class_init = virtio_net_class_init, 3963 }; 3964 3965 static void virtio_register_types(void) 3966 { 3967 type_register_static(&virtio_net_info); 3968 } 3969 3970 type_init(virtio_register_types) 3971