1 /* 2 * Virtio Network Device 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qemu/atomic.h" 16 #include "qemu/iov.h" 17 #include "qemu/log.h" 18 #include "qemu/main-loop.h" 19 #include "qemu/module.h" 20 #include "hw/virtio/virtio.h" 21 #include "net/net.h" 22 #include "net/checksum.h" 23 #include "net/tap.h" 24 #include "qemu/error-report.h" 25 #include "qemu/timer.h" 26 #include "qemu/option.h" 27 #include "qemu/option_int.h" 28 #include "qemu/config-file.h" 29 #include "qapi/qmp/qdict.h" 30 #include "hw/virtio/virtio-net.h" 31 #include "net/vhost_net.h" 32 #include "net/announce.h" 33 #include "hw/virtio/virtio-bus.h" 34 #include "qapi/error.h" 35 #include "qapi/qapi-events-net.h" 36 #include "hw/qdev-properties.h" 37 #include "qapi/qapi-types-migration.h" 38 #include "qapi/qapi-events-migration.h" 39 #include "hw/virtio/virtio-access.h" 40 #include "migration/misc.h" 41 #include "standard-headers/linux/ethtool.h" 42 #include "sysemu/sysemu.h" 43 #include "trace.h" 44 #include "monitor/qdev.h" 45 #include "hw/pci/pci.h" 46 #include "net_rx_pkt.h" 47 #include "hw/virtio/vhost.h" 48 #include "sysemu/qtest.h" 49 50 #define VIRTIO_NET_VM_VERSION 11 51 52 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ 53 54 /* previously fixed value */ 55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 57 58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */ 59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 61 62 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ 63 64 #define VIRTIO_NET_TCP_FLAG 0x3F 65 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000 66 67 /* IPv4 max payload, 16 bits in the header */ 68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header)) 69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535 70 71 /* header length value in ip header without option */ 72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5 73 74 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ 75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD 76 77 /* Purge coalesced packets timer interval, This value affects the performance 78 a lot, and should be tuned carefully, '300000'(300us) is the recommended 79 value to pass the WHQL test, '50000' can gain 2x netperf throughput with 80 tso/gso/gro 'off'. */ 81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000 82 83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \ 84 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \ 85 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \ 86 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \ 87 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \ 88 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \ 89 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \ 90 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \ 91 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) 92 93 static const VirtIOFeature feature_sizes[] = { 94 {.flags = 1ULL << VIRTIO_NET_F_MAC, 95 .end = endof(struct virtio_net_config, mac)}, 96 {.flags = 1ULL << VIRTIO_NET_F_STATUS, 97 .end = endof(struct virtio_net_config, status)}, 98 {.flags = 1ULL << VIRTIO_NET_F_MQ, 99 .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, 100 {.flags = 1ULL << VIRTIO_NET_F_MTU, 101 .end = endof(struct virtio_net_config, mtu)}, 102 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, 103 .end = endof(struct virtio_net_config, duplex)}, 104 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT), 105 .end = endof(struct virtio_net_config, supported_hash_types)}, 106 {} 107 }; 108 109 static const VirtIOConfigSizeParams cfg_size_params = { 110 .min_size = endof(struct virtio_net_config, mac), 111 .max_size = sizeof(struct virtio_net_config), 112 .feature_sizes = feature_sizes 113 }; 114 115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc) 116 { 117 VirtIONet *n = qemu_get_nic_opaque(nc); 118 119 return &n->vqs[nc->queue_index]; 120 } 121 122 static int vq2q(int queue_index) 123 { 124 return queue_index / 2; 125 } 126 127 static void flush_or_purge_queued_packets(NetClientState *nc) 128 { 129 if (!nc->peer) { 130 return; 131 } 132 133 qemu_flush_or_purge_queued_packets(nc->peer, true); 134 assert(!virtio_net_get_subqueue(nc)->async_tx.elem); 135 } 136 137 /* TODO 138 * - we could suppress RX interrupt if we were so inclined. 139 */ 140 141 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) 142 { 143 VirtIONet *n = VIRTIO_NET(vdev); 144 struct virtio_net_config netcfg; 145 NetClientState *nc = qemu_get_queue(n->nic); 146 static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } }; 147 148 int ret = 0; 149 memset(&netcfg, 0 , sizeof(struct virtio_net_config)); 150 virtio_stw_p(vdev, &netcfg.status, n->status); 151 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs); 152 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); 153 memcpy(netcfg.mac, n->mac, ETH_ALEN); 154 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); 155 netcfg.duplex = n->net_conf.duplex; 156 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE; 157 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length, 158 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ? 159 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1); 160 virtio_stl_p(vdev, &netcfg.supported_hash_types, 161 VIRTIO_NET_RSS_SUPPORTED_HASHES); 162 memcpy(config, &netcfg, n->config_size); 163 164 /* 165 * Is this VDPA? No peer means not VDPA: there's no way to 166 * disconnect/reconnect a VDPA peer. 167 */ 168 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 169 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg, 170 n->config_size); 171 if (ret == -1) { 172 return; 173 } 174 175 /* 176 * Some NIC/kernel combinations present 0 as the mac address. As that 177 * is not a legal address, try to proceed with the address from the 178 * QEMU command line in the hope that the address has been configured 179 * correctly elsewhere - just not reported by the device. 180 */ 181 if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) { 182 info_report("Zero hardware mac address detected. Ignoring."); 183 memcpy(netcfg.mac, n->mac, ETH_ALEN); 184 } 185 186 netcfg.status |= virtio_tswap16(vdev, 187 n->status & VIRTIO_NET_S_ANNOUNCE); 188 memcpy(config, &netcfg, n->config_size); 189 } 190 } 191 192 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) 193 { 194 VirtIONet *n = VIRTIO_NET(vdev); 195 struct virtio_net_config netcfg = {}; 196 NetClientState *nc = qemu_get_queue(n->nic); 197 198 memcpy(&netcfg, config, n->config_size); 199 200 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) && 201 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) && 202 memcmp(netcfg.mac, n->mac, ETH_ALEN)) { 203 memcpy(n->mac, netcfg.mac, ETH_ALEN); 204 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 205 } 206 207 /* 208 * Is this VDPA? No peer means not VDPA: there's no way to 209 * disconnect/reconnect a VDPA peer. 210 */ 211 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 212 vhost_net_set_config(get_vhost_net(nc->peer), 213 (uint8_t *)&netcfg, 0, n->config_size, 214 VHOST_SET_CONFIG_TYPE_MASTER); 215 } 216 } 217 218 static bool virtio_net_started(VirtIONet *n, uint8_t status) 219 { 220 VirtIODevice *vdev = VIRTIO_DEVICE(n); 221 return (status & VIRTIO_CONFIG_S_DRIVER_OK) && 222 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running; 223 } 224 225 static void virtio_net_announce_notify(VirtIONet *net) 226 { 227 VirtIODevice *vdev = VIRTIO_DEVICE(net); 228 trace_virtio_net_announce_notify(); 229 230 net->status |= VIRTIO_NET_S_ANNOUNCE; 231 virtio_notify_config(vdev); 232 } 233 234 static void virtio_net_announce_timer(void *opaque) 235 { 236 VirtIONet *n = opaque; 237 trace_virtio_net_announce_timer(n->announce_timer.round); 238 239 n->announce_timer.round--; 240 virtio_net_announce_notify(n); 241 } 242 243 static void virtio_net_announce(NetClientState *nc) 244 { 245 VirtIONet *n = qemu_get_nic_opaque(nc); 246 VirtIODevice *vdev = VIRTIO_DEVICE(n); 247 248 /* 249 * Make sure the virtio migration announcement timer isn't running 250 * If it is, let it trigger announcement so that we do not cause 251 * confusion. 252 */ 253 if (n->announce_timer.round) { 254 return; 255 } 256 257 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 258 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 259 virtio_net_announce_notify(n); 260 } 261 } 262 263 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) 264 { 265 VirtIODevice *vdev = VIRTIO_DEVICE(n); 266 NetClientState *nc = qemu_get_queue(n->nic); 267 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 268 int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? 269 n->max_ncs - n->max_queue_pairs : 0; 270 271 if (!get_vhost_net(nc->peer)) { 272 return; 273 } 274 275 if ((virtio_net_started(n, status) && !nc->peer->link_down) == 276 !!n->vhost_started) { 277 return; 278 } 279 if (!n->vhost_started) { 280 int r, i; 281 282 if (n->needs_vnet_hdr_swap) { 283 error_report("backend does not support %s vnet headers; " 284 "falling back on userspace virtio", 285 virtio_is_big_endian(vdev) ? "BE" : "LE"); 286 return; 287 } 288 289 /* Any packets outstanding? Purge them to avoid touching rings 290 * when vhost is running. 291 */ 292 for (i = 0; i < queue_pairs; i++) { 293 NetClientState *qnc = qemu_get_subqueue(n->nic, i); 294 295 /* Purge both directions: TX and RX. */ 296 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc); 297 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer); 298 } 299 300 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) { 301 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu); 302 if (r < 0) { 303 error_report("%uBytes MTU not supported by the backend", 304 n->net_conf.mtu); 305 306 return; 307 } 308 } 309 310 n->vhost_started = 1; 311 r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq); 312 if (r < 0) { 313 error_report("unable to start vhost net: %d: " 314 "falling back on userspace virtio", -r); 315 n->vhost_started = 0; 316 } 317 } else { 318 vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq); 319 n->vhost_started = 0; 320 } 321 } 322 323 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev, 324 NetClientState *peer, 325 bool enable) 326 { 327 if (virtio_is_big_endian(vdev)) { 328 return qemu_set_vnet_be(peer, enable); 329 } else { 330 return qemu_set_vnet_le(peer, enable); 331 } 332 } 333 334 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs, 335 int queue_pairs, bool enable) 336 { 337 int i; 338 339 for (i = 0; i < queue_pairs; i++) { 340 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 && 341 enable) { 342 while (--i >= 0) { 343 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false); 344 } 345 346 return true; 347 } 348 } 349 350 return false; 351 } 352 353 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status) 354 { 355 VirtIODevice *vdev = VIRTIO_DEVICE(n); 356 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 357 358 if (virtio_net_started(n, status)) { 359 /* Before using the device, we tell the network backend about the 360 * endianness to use when parsing vnet headers. If the backend 361 * can't do it, we fallback onto fixing the headers in the core 362 * virtio-net code. 363 */ 364 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs, 365 queue_pairs, true); 366 } else if (virtio_net_started(n, vdev->status)) { 367 /* After using the device, we need to reset the network backend to 368 * the default (guest native endianness), otherwise the guest may 369 * lose network connectivity if it is rebooted into a different 370 * endianness. 371 */ 372 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false); 373 } 374 } 375 376 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq) 377 { 378 unsigned int dropped = virtqueue_drop_all(vq); 379 if (dropped) { 380 virtio_notify(vdev, vq); 381 } 382 } 383 384 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) 385 { 386 VirtIONet *n = VIRTIO_NET(vdev); 387 VirtIONetQueue *q; 388 int i; 389 uint8_t queue_status; 390 391 virtio_net_vnet_endian_status(n, status); 392 virtio_net_vhost_status(n, status); 393 394 for (i = 0; i < n->max_queue_pairs; i++) { 395 NetClientState *ncs = qemu_get_subqueue(n->nic, i); 396 bool queue_started; 397 q = &n->vqs[i]; 398 399 if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) { 400 queue_status = 0; 401 } else { 402 queue_status = status; 403 } 404 queue_started = 405 virtio_net_started(n, queue_status) && !n->vhost_started; 406 407 if (queue_started) { 408 qemu_flush_queued_packets(ncs); 409 } 410 411 if (!q->tx_waiting) { 412 continue; 413 } 414 415 if (queue_started) { 416 if (q->tx_timer) { 417 timer_mod(q->tx_timer, 418 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 419 } else { 420 qemu_bh_schedule(q->tx_bh); 421 } 422 } else { 423 if (q->tx_timer) { 424 timer_del(q->tx_timer); 425 } else { 426 qemu_bh_cancel(q->tx_bh); 427 } 428 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 && 429 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) && 430 vdev->vm_running) { 431 /* if tx is waiting we are likely have some packets in tx queue 432 * and disabled notification */ 433 q->tx_waiting = 0; 434 virtio_queue_set_notification(q->tx_vq, 1); 435 virtio_net_drop_tx_queue_data(vdev, q->tx_vq); 436 } 437 } 438 } 439 } 440 441 static void virtio_net_set_link_status(NetClientState *nc) 442 { 443 VirtIONet *n = qemu_get_nic_opaque(nc); 444 VirtIODevice *vdev = VIRTIO_DEVICE(n); 445 uint16_t old_status = n->status; 446 447 if (nc->link_down) 448 n->status &= ~VIRTIO_NET_S_LINK_UP; 449 else 450 n->status |= VIRTIO_NET_S_LINK_UP; 451 452 if (n->status != old_status) 453 virtio_notify_config(vdev); 454 455 virtio_net_set_status(vdev, vdev->status); 456 } 457 458 static void rxfilter_notify(NetClientState *nc) 459 { 460 VirtIONet *n = qemu_get_nic_opaque(nc); 461 462 if (nc->rxfilter_notify_enabled) { 463 char *path = object_get_canonical_path(OBJECT(n->qdev)); 464 qapi_event_send_nic_rx_filter_changed(n->netclient_name, path); 465 g_free(path); 466 467 /* disable event notification to avoid events flooding */ 468 nc->rxfilter_notify_enabled = 0; 469 } 470 } 471 472 static intList *get_vlan_table(VirtIONet *n) 473 { 474 intList *list; 475 int i, j; 476 477 list = NULL; 478 for (i = 0; i < MAX_VLAN >> 5; i++) { 479 for (j = 0; n->vlans[i] && j <= 0x1f; j++) { 480 if (n->vlans[i] & (1U << j)) { 481 QAPI_LIST_PREPEND(list, (i << 5) + j); 482 } 483 } 484 } 485 486 return list; 487 } 488 489 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc) 490 { 491 VirtIONet *n = qemu_get_nic_opaque(nc); 492 VirtIODevice *vdev = VIRTIO_DEVICE(n); 493 RxFilterInfo *info; 494 strList *str_list; 495 int i; 496 497 info = g_malloc0(sizeof(*info)); 498 info->name = g_strdup(nc->name); 499 info->promiscuous = n->promisc; 500 501 if (n->nouni) { 502 info->unicast = RX_STATE_NONE; 503 } else if (n->alluni) { 504 info->unicast = RX_STATE_ALL; 505 } else { 506 info->unicast = RX_STATE_NORMAL; 507 } 508 509 if (n->nomulti) { 510 info->multicast = RX_STATE_NONE; 511 } else if (n->allmulti) { 512 info->multicast = RX_STATE_ALL; 513 } else { 514 info->multicast = RX_STATE_NORMAL; 515 } 516 517 info->broadcast_allowed = n->nobcast; 518 info->multicast_overflow = n->mac_table.multi_overflow; 519 info->unicast_overflow = n->mac_table.uni_overflow; 520 521 info->main_mac = qemu_mac_strdup_printf(n->mac); 522 523 str_list = NULL; 524 for (i = 0; i < n->mac_table.first_multi; i++) { 525 QAPI_LIST_PREPEND(str_list, 526 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 527 } 528 info->unicast_table = str_list; 529 530 str_list = NULL; 531 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 532 QAPI_LIST_PREPEND(str_list, 533 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 534 } 535 info->multicast_table = str_list; 536 info->vlan_table = get_vlan_table(n); 537 538 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) { 539 info->vlan = RX_STATE_ALL; 540 } else if (!info->vlan_table) { 541 info->vlan = RX_STATE_NONE; 542 } else { 543 info->vlan = RX_STATE_NORMAL; 544 } 545 546 /* enable event notification after query */ 547 nc->rxfilter_notify_enabled = 1; 548 549 return info; 550 } 551 552 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index) 553 { 554 VirtIONet *n = VIRTIO_NET(vdev); 555 NetClientState *nc; 556 557 /* validate queue_index and skip for cvq */ 558 if (queue_index >= n->max_queue_pairs * 2) { 559 return; 560 } 561 562 nc = qemu_get_subqueue(n->nic, vq2q(queue_index)); 563 564 if (!nc->peer) { 565 return; 566 } 567 568 if (get_vhost_net(nc->peer) && 569 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) { 570 vhost_net_virtqueue_reset(vdev, nc, queue_index); 571 } 572 573 flush_or_purge_queued_packets(nc); 574 } 575 576 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index) 577 { 578 VirtIONet *n = VIRTIO_NET(vdev); 579 NetClientState *nc; 580 int r; 581 582 /* validate queue_index and skip for cvq */ 583 if (queue_index >= n->max_queue_pairs * 2) { 584 return; 585 } 586 587 nc = qemu_get_subqueue(n->nic, vq2q(queue_index)); 588 589 if (!nc->peer || !vdev->vhost_started) { 590 return; 591 } 592 593 if (get_vhost_net(nc->peer) && 594 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) { 595 r = vhost_net_virtqueue_restart(vdev, nc, queue_index); 596 if (r < 0) { 597 error_report("unable to restart vhost net virtqueue: %d, " 598 "when resetting the queue", queue_index); 599 } 600 } 601 } 602 603 static void virtio_net_reset(VirtIODevice *vdev) 604 { 605 VirtIONet *n = VIRTIO_NET(vdev); 606 int i; 607 608 /* Reset back to compatibility mode */ 609 n->promisc = 1; 610 n->allmulti = 0; 611 n->alluni = 0; 612 n->nomulti = 0; 613 n->nouni = 0; 614 n->nobcast = 0; 615 /* multiqueue is disabled by default */ 616 n->curr_queue_pairs = 1; 617 timer_del(n->announce_timer.tm); 618 n->announce_timer.round = 0; 619 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 620 621 /* Flush any MAC and VLAN filter table state */ 622 n->mac_table.in_use = 0; 623 n->mac_table.first_multi = 0; 624 n->mac_table.multi_overflow = 0; 625 n->mac_table.uni_overflow = 0; 626 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN); 627 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac)); 628 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 629 memset(n->vlans, 0, MAX_VLAN >> 3); 630 631 /* Flush any async TX */ 632 for (i = 0; i < n->max_queue_pairs; i++) { 633 flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i)); 634 } 635 } 636 637 static void peer_test_vnet_hdr(VirtIONet *n) 638 { 639 NetClientState *nc = qemu_get_queue(n->nic); 640 if (!nc->peer) { 641 return; 642 } 643 644 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer); 645 } 646 647 static int peer_has_vnet_hdr(VirtIONet *n) 648 { 649 return n->has_vnet_hdr; 650 } 651 652 static int peer_has_ufo(VirtIONet *n) 653 { 654 if (!peer_has_vnet_hdr(n)) 655 return 0; 656 657 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer); 658 659 return n->has_ufo; 660 } 661 662 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, 663 int version_1, int hash_report) 664 { 665 int i; 666 NetClientState *nc; 667 668 n->mergeable_rx_bufs = mergeable_rx_bufs; 669 670 if (version_1) { 671 n->guest_hdr_len = hash_report ? 672 sizeof(struct virtio_net_hdr_v1_hash) : 673 sizeof(struct virtio_net_hdr_mrg_rxbuf); 674 n->rss_data.populate_hash = !!hash_report; 675 } else { 676 n->guest_hdr_len = n->mergeable_rx_bufs ? 677 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 678 sizeof(struct virtio_net_hdr); 679 } 680 681 for (i = 0; i < n->max_queue_pairs; i++) { 682 nc = qemu_get_subqueue(n->nic, i); 683 684 if (peer_has_vnet_hdr(n) && 685 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) { 686 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len); 687 n->host_hdr_len = n->guest_hdr_len; 688 } 689 } 690 } 691 692 static int virtio_net_max_tx_queue_size(VirtIONet *n) 693 { 694 NetClientState *peer = n->nic_conf.peers.ncs[0]; 695 696 /* 697 * Backends other than vhost-user or vhost-vdpa don't support max queue 698 * size. 699 */ 700 if (!peer) { 701 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 702 } 703 704 switch(peer->info->type) { 705 case NET_CLIENT_DRIVER_VHOST_USER: 706 case NET_CLIENT_DRIVER_VHOST_VDPA: 707 return VIRTQUEUE_MAX_SIZE; 708 default: 709 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 710 }; 711 } 712 713 static int peer_attach(VirtIONet *n, int index) 714 { 715 NetClientState *nc = qemu_get_subqueue(n->nic, index); 716 717 if (!nc->peer) { 718 return 0; 719 } 720 721 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 722 vhost_set_vring_enable(nc->peer, 1); 723 } 724 725 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 726 return 0; 727 } 728 729 if (n->max_queue_pairs == 1) { 730 return 0; 731 } 732 733 return tap_enable(nc->peer); 734 } 735 736 static int peer_detach(VirtIONet *n, int index) 737 { 738 NetClientState *nc = qemu_get_subqueue(n->nic, index); 739 740 if (!nc->peer) { 741 return 0; 742 } 743 744 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 745 vhost_set_vring_enable(nc->peer, 0); 746 } 747 748 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 749 return 0; 750 } 751 752 return tap_disable(nc->peer); 753 } 754 755 static void virtio_net_set_queue_pairs(VirtIONet *n) 756 { 757 int i; 758 int r; 759 760 if (n->nic->peer_deleted) { 761 return; 762 } 763 764 for (i = 0; i < n->max_queue_pairs; i++) { 765 if (i < n->curr_queue_pairs) { 766 r = peer_attach(n, i); 767 assert(!r); 768 } else { 769 r = peer_detach(n, i); 770 assert(!r); 771 } 772 } 773 } 774 775 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue); 776 777 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, 778 Error **errp) 779 { 780 VirtIONet *n = VIRTIO_NET(vdev); 781 NetClientState *nc = qemu_get_queue(n->nic); 782 783 /* Firstly sync all virtio-net possible supported features */ 784 features |= n->host_features; 785 786 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 787 788 if (!peer_has_vnet_hdr(n)) { 789 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); 790 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); 791 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); 792 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); 793 794 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); 795 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); 796 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); 797 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); 798 799 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); 800 } 801 802 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { 803 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); 804 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); 805 } 806 807 if (!get_vhost_net(nc->peer)) { 808 virtio_add_feature(&features, VIRTIO_F_RING_RESET); 809 return features; 810 } 811 812 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 813 virtio_clear_feature(&features, VIRTIO_NET_F_RSS); 814 } 815 features = vhost_net_get_features(get_vhost_net(nc->peer), features); 816 vdev->backend_features = features; 817 818 if (n->mtu_bypass_backend && 819 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { 820 features |= (1ULL << VIRTIO_NET_F_MTU); 821 } 822 823 return features; 824 } 825 826 static uint64_t virtio_net_bad_features(VirtIODevice *vdev) 827 { 828 uint64_t features = 0; 829 830 /* Linux kernel 2.6.25. It understood MAC (as everyone must), 831 * but also these: */ 832 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 833 virtio_add_feature(&features, VIRTIO_NET_F_CSUM); 834 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4); 835 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6); 836 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN); 837 838 return features; 839 } 840 841 static void virtio_net_apply_guest_offloads(VirtIONet *n) 842 { 843 qemu_set_offload(qemu_get_queue(n->nic)->peer, 844 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)), 845 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)), 846 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)), 847 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)), 848 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO))); 849 } 850 851 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features) 852 { 853 static const uint64_t guest_offloads_mask = 854 (1ULL << VIRTIO_NET_F_GUEST_CSUM) | 855 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | 856 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | 857 (1ULL << VIRTIO_NET_F_GUEST_ECN) | 858 (1ULL << VIRTIO_NET_F_GUEST_UFO); 859 860 return guest_offloads_mask & features; 861 } 862 863 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) 864 { 865 VirtIODevice *vdev = VIRTIO_DEVICE(n); 866 return virtio_net_guest_offloads_by_features(vdev->guest_features); 867 } 868 869 typedef struct { 870 VirtIONet *n; 871 DeviceState *dev; 872 } FailoverDevice; 873 874 /** 875 * Set the failover primary device 876 * 877 * @opaque: FailoverId to setup 878 * @opts: opts for device we are handling 879 * @errp: returns an error if this function fails 880 */ 881 static int failover_set_primary(DeviceState *dev, void *opaque) 882 { 883 FailoverDevice *fdev = opaque; 884 PCIDevice *pci_dev = (PCIDevice *) 885 object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE); 886 887 if (!pci_dev) { 888 return 0; 889 } 890 891 if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) { 892 fdev->dev = dev; 893 return 1; 894 } 895 896 return 0; 897 } 898 899 /** 900 * Find the primary device for this failover virtio-net 901 * 902 * @n: VirtIONet device 903 * @errp: returns an error if this function fails 904 */ 905 static DeviceState *failover_find_primary_device(VirtIONet *n) 906 { 907 FailoverDevice fdev = { 908 .n = n, 909 }; 910 911 qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL, 912 NULL, NULL, &fdev); 913 return fdev.dev; 914 } 915 916 static void failover_add_primary(VirtIONet *n, Error **errp) 917 { 918 Error *err = NULL; 919 DeviceState *dev = failover_find_primary_device(n); 920 921 if (dev) { 922 return; 923 } 924 925 if (!n->primary_opts) { 926 error_setg(errp, "Primary device not found"); 927 error_append_hint(errp, "Virtio-net failover will not work. Make " 928 "sure primary device has parameter" 929 " failover_pair_id=%s\n", n->netclient_name); 930 return; 931 } 932 933 dev = qdev_device_add_from_qdict(n->primary_opts, 934 n->primary_opts_from_json, 935 &err); 936 if (err) { 937 qobject_unref(n->primary_opts); 938 n->primary_opts = NULL; 939 } else { 940 object_unref(OBJECT(dev)); 941 } 942 error_propagate(errp, err); 943 } 944 945 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) 946 { 947 VirtIONet *n = VIRTIO_NET(vdev); 948 Error *err = NULL; 949 int i; 950 951 if (n->mtu_bypass_backend && 952 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) { 953 features &= ~(1ULL << VIRTIO_NET_F_MTU); 954 } 955 956 virtio_net_set_multiqueue(n, 957 virtio_has_feature(features, VIRTIO_NET_F_RSS) || 958 virtio_has_feature(features, VIRTIO_NET_F_MQ)); 959 960 virtio_net_set_mrg_rx_bufs(n, 961 virtio_has_feature(features, 962 VIRTIO_NET_F_MRG_RXBUF), 963 virtio_has_feature(features, 964 VIRTIO_F_VERSION_1), 965 virtio_has_feature(features, 966 VIRTIO_NET_F_HASH_REPORT)); 967 968 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 969 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4); 970 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 971 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6); 972 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS); 973 974 if (n->has_vnet_hdr) { 975 n->curr_guest_offloads = 976 virtio_net_guest_offloads_by_features(features); 977 virtio_net_apply_guest_offloads(n); 978 } 979 980 for (i = 0; i < n->max_queue_pairs; i++) { 981 NetClientState *nc = qemu_get_subqueue(n->nic, i); 982 983 if (!get_vhost_net(nc->peer)) { 984 continue; 985 } 986 vhost_net_ack_features(get_vhost_net(nc->peer), features); 987 } 988 989 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { 990 memset(n->vlans, 0, MAX_VLAN >> 3); 991 } else { 992 memset(n->vlans, 0xff, MAX_VLAN >> 3); 993 } 994 995 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) { 996 qapi_event_send_failover_negotiated(n->netclient_name); 997 qatomic_set(&n->failover_primary_hidden, false); 998 failover_add_primary(n, &err); 999 if (err) { 1000 if (!qtest_enabled()) { 1001 warn_report_err(err); 1002 } else { 1003 error_free(err); 1004 } 1005 } 1006 } 1007 } 1008 1009 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd, 1010 struct iovec *iov, unsigned int iov_cnt) 1011 { 1012 uint8_t on; 1013 size_t s; 1014 NetClientState *nc = qemu_get_queue(n->nic); 1015 1016 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on)); 1017 if (s != sizeof(on)) { 1018 return VIRTIO_NET_ERR; 1019 } 1020 1021 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) { 1022 n->promisc = on; 1023 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) { 1024 n->allmulti = on; 1025 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) { 1026 n->alluni = on; 1027 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) { 1028 n->nomulti = on; 1029 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) { 1030 n->nouni = on; 1031 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) { 1032 n->nobcast = on; 1033 } else { 1034 return VIRTIO_NET_ERR; 1035 } 1036 1037 rxfilter_notify(nc); 1038 1039 return VIRTIO_NET_OK; 1040 } 1041 1042 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd, 1043 struct iovec *iov, unsigned int iov_cnt) 1044 { 1045 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1046 uint64_t offloads; 1047 size_t s; 1048 1049 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 1050 return VIRTIO_NET_ERR; 1051 } 1052 1053 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads)); 1054 if (s != sizeof(offloads)) { 1055 return VIRTIO_NET_ERR; 1056 } 1057 1058 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) { 1059 uint64_t supported_offloads; 1060 1061 offloads = virtio_ldq_p(vdev, &offloads); 1062 1063 if (!n->has_vnet_hdr) { 1064 return VIRTIO_NET_ERR; 1065 } 1066 1067 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1068 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4); 1069 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1070 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6); 1071 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT); 1072 1073 supported_offloads = virtio_net_supported_guest_offloads(n); 1074 if (offloads & ~supported_offloads) { 1075 return VIRTIO_NET_ERR; 1076 } 1077 1078 n->curr_guest_offloads = offloads; 1079 virtio_net_apply_guest_offloads(n); 1080 1081 return VIRTIO_NET_OK; 1082 } else { 1083 return VIRTIO_NET_ERR; 1084 } 1085 } 1086 1087 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd, 1088 struct iovec *iov, unsigned int iov_cnt) 1089 { 1090 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1091 struct virtio_net_ctrl_mac mac_data; 1092 size_t s; 1093 NetClientState *nc = qemu_get_queue(n->nic); 1094 1095 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) { 1096 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) { 1097 return VIRTIO_NET_ERR; 1098 } 1099 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac)); 1100 assert(s == sizeof(n->mac)); 1101 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 1102 rxfilter_notify(nc); 1103 1104 return VIRTIO_NET_OK; 1105 } 1106 1107 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) { 1108 return VIRTIO_NET_ERR; 1109 } 1110 1111 int in_use = 0; 1112 int first_multi = 0; 1113 uint8_t uni_overflow = 0; 1114 uint8_t multi_overflow = 0; 1115 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 1116 1117 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1118 sizeof(mac_data.entries)); 1119 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1120 if (s != sizeof(mac_data.entries)) { 1121 goto error; 1122 } 1123 iov_discard_front(&iov, &iov_cnt, s); 1124 1125 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) { 1126 goto error; 1127 } 1128 1129 if (mac_data.entries <= MAC_TABLE_ENTRIES) { 1130 s = iov_to_buf(iov, iov_cnt, 0, macs, 1131 mac_data.entries * ETH_ALEN); 1132 if (s != mac_data.entries * ETH_ALEN) { 1133 goto error; 1134 } 1135 in_use += mac_data.entries; 1136 } else { 1137 uni_overflow = 1; 1138 } 1139 1140 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN); 1141 1142 first_multi = in_use; 1143 1144 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1145 sizeof(mac_data.entries)); 1146 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1147 if (s != sizeof(mac_data.entries)) { 1148 goto error; 1149 } 1150 1151 iov_discard_front(&iov, &iov_cnt, s); 1152 1153 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) { 1154 goto error; 1155 } 1156 1157 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) { 1158 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN], 1159 mac_data.entries * ETH_ALEN); 1160 if (s != mac_data.entries * ETH_ALEN) { 1161 goto error; 1162 } 1163 in_use += mac_data.entries; 1164 } else { 1165 multi_overflow = 1; 1166 } 1167 1168 n->mac_table.in_use = in_use; 1169 n->mac_table.first_multi = first_multi; 1170 n->mac_table.uni_overflow = uni_overflow; 1171 n->mac_table.multi_overflow = multi_overflow; 1172 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN); 1173 g_free(macs); 1174 rxfilter_notify(nc); 1175 1176 return VIRTIO_NET_OK; 1177 1178 error: 1179 g_free(macs); 1180 return VIRTIO_NET_ERR; 1181 } 1182 1183 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd, 1184 struct iovec *iov, unsigned int iov_cnt) 1185 { 1186 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1187 uint16_t vid; 1188 size_t s; 1189 NetClientState *nc = qemu_get_queue(n->nic); 1190 1191 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid)); 1192 vid = virtio_lduw_p(vdev, &vid); 1193 if (s != sizeof(vid)) { 1194 return VIRTIO_NET_ERR; 1195 } 1196 1197 if (vid >= MAX_VLAN) 1198 return VIRTIO_NET_ERR; 1199 1200 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD) 1201 n->vlans[vid >> 5] |= (1U << (vid & 0x1f)); 1202 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL) 1203 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f)); 1204 else 1205 return VIRTIO_NET_ERR; 1206 1207 rxfilter_notify(nc); 1208 1209 return VIRTIO_NET_OK; 1210 } 1211 1212 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, 1213 struct iovec *iov, unsigned int iov_cnt) 1214 { 1215 trace_virtio_net_handle_announce(n->announce_timer.round); 1216 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK && 1217 n->status & VIRTIO_NET_S_ANNOUNCE) { 1218 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 1219 if (n->announce_timer.round) { 1220 qemu_announce_timer_step(&n->announce_timer); 1221 } 1222 return VIRTIO_NET_OK; 1223 } else { 1224 return VIRTIO_NET_ERR; 1225 } 1226 } 1227 1228 static void virtio_net_detach_epbf_rss(VirtIONet *n); 1229 1230 static void virtio_net_disable_rss(VirtIONet *n) 1231 { 1232 if (n->rss_data.enabled) { 1233 trace_virtio_net_rss_disable(); 1234 } 1235 n->rss_data.enabled = false; 1236 1237 virtio_net_detach_epbf_rss(n); 1238 } 1239 1240 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd) 1241 { 1242 NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0); 1243 if (nc == NULL || nc->info->set_steering_ebpf == NULL) { 1244 return false; 1245 } 1246 1247 return nc->info->set_steering_ebpf(nc, prog_fd); 1248 } 1249 1250 static void rss_data_to_rss_config(struct VirtioNetRssData *data, 1251 struct EBPFRSSConfig *config) 1252 { 1253 config->redirect = data->redirect; 1254 config->populate_hash = data->populate_hash; 1255 config->hash_types = data->hash_types; 1256 config->indirections_len = data->indirections_len; 1257 config->default_queue = data->default_queue; 1258 } 1259 1260 static bool virtio_net_attach_epbf_rss(VirtIONet *n) 1261 { 1262 struct EBPFRSSConfig config = {}; 1263 1264 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 1265 return false; 1266 } 1267 1268 rss_data_to_rss_config(&n->rss_data, &config); 1269 1270 if (!ebpf_rss_set_all(&n->ebpf_rss, &config, 1271 n->rss_data.indirections_table, n->rss_data.key)) { 1272 return false; 1273 } 1274 1275 if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) { 1276 return false; 1277 } 1278 1279 return true; 1280 } 1281 1282 static void virtio_net_detach_epbf_rss(VirtIONet *n) 1283 { 1284 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1285 } 1286 1287 static bool virtio_net_load_ebpf(VirtIONet *n) 1288 { 1289 if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) { 1290 /* backend does't support steering ebpf */ 1291 return false; 1292 } 1293 1294 return ebpf_rss_load(&n->ebpf_rss); 1295 } 1296 1297 static void virtio_net_unload_ebpf(VirtIONet *n) 1298 { 1299 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1300 ebpf_rss_unload(&n->ebpf_rss); 1301 } 1302 1303 static uint16_t virtio_net_handle_rss(VirtIONet *n, 1304 struct iovec *iov, 1305 unsigned int iov_cnt, 1306 bool do_rss) 1307 { 1308 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1309 struct virtio_net_rss_config cfg; 1310 size_t s, offset = 0, size_get; 1311 uint16_t queue_pairs, i; 1312 struct { 1313 uint16_t us; 1314 uint8_t b; 1315 } QEMU_PACKED temp; 1316 const char *err_msg = ""; 1317 uint32_t err_value = 0; 1318 1319 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) { 1320 err_msg = "RSS is not negotiated"; 1321 goto error; 1322 } 1323 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) { 1324 err_msg = "Hash report is not negotiated"; 1325 goto error; 1326 } 1327 size_get = offsetof(struct virtio_net_rss_config, indirection_table); 1328 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get); 1329 if (s != size_get) { 1330 err_msg = "Short command buffer"; 1331 err_value = (uint32_t)s; 1332 goto error; 1333 } 1334 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types); 1335 n->rss_data.indirections_len = 1336 virtio_lduw_p(vdev, &cfg.indirection_table_mask); 1337 n->rss_data.indirections_len++; 1338 if (!do_rss) { 1339 n->rss_data.indirections_len = 1; 1340 } 1341 if (!is_power_of_2(n->rss_data.indirections_len)) { 1342 err_msg = "Invalid size of indirection table"; 1343 err_value = n->rss_data.indirections_len; 1344 goto error; 1345 } 1346 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) { 1347 err_msg = "Too large indirection table"; 1348 err_value = n->rss_data.indirections_len; 1349 goto error; 1350 } 1351 n->rss_data.default_queue = do_rss ? 1352 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0; 1353 if (n->rss_data.default_queue >= n->max_queue_pairs) { 1354 err_msg = "Invalid default queue"; 1355 err_value = n->rss_data.default_queue; 1356 goto error; 1357 } 1358 offset += size_get; 1359 size_get = sizeof(uint16_t) * n->rss_data.indirections_len; 1360 g_free(n->rss_data.indirections_table); 1361 n->rss_data.indirections_table = g_malloc(size_get); 1362 if (!n->rss_data.indirections_table) { 1363 err_msg = "Can't allocate indirections table"; 1364 err_value = n->rss_data.indirections_len; 1365 goto error; 1366 } 1367 s = iov_to_buf(iov, iov_cnt, offset, 1368 n->rss_data.indirections_table, size_get); 1369 if (s != size_get) { 1370 err_msg = "Short indirection table buffer"; 1371 err_value = (uint32_t)s; 1372 goto error; 1373 } 1374 for (i = 0; i < n->rss_data.indirections_len; ++i) { 1375 uint16_t val = n->rss_data.indirections_table[i]; 1376 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val); 1377 } 1378 offset += size_get; 1379 size_get = sizeof(temp); 1380 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get); 1381 if (s != size_get) { 1382 err_msg = "Can't get queue_pairs"; 1383 err_value = (uint32_t)s; 1384 goto error; 1385 } 1386 queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs; 1387 if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) { 1388 err_msg = "Invalid number of queue_pairs"; 1389 err_value = queue_pairs; 1390 goto error; 1391 } 1392 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) { 1393 err_msg = "Invalid key size"; 1394 err_value = temp.b; 1395 goto error; 1396 } 1397 if (!temp.b && n->rss_data.hash_types) { 1398 err_msg = "No key provided"; 1399 err_value = 0; 1400 goto error; 1401 } 1402 if (!temp.b && !n->rss_data.hash_types) { 1403 virtio_net_disable_rss(n); 1404 return queue_pairs; 1405 } 1406 offset += size_get; 1407 size_get = temp.b; 1408 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get); 1409 if (s != size_get) { 1410 err_msg = "Can get key buffer"; 1411 err_value = (uint32_t)s; 1412 goto error; 1413 } 1414 n->rss_data.enabled = true; 1415 1416 if (!n->rss_data.populate_hash) { 1417 if (!virtio_net_attach_epbf_rss(n)) { 1418 /* EBPF must be loaded for vhost */ 1419 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 1420 warn_report("Can't load eBPF RSS for vhost"); 1421 goto error; 1422 } 1423 /* fallback to software RSS */ 1424 warn_report("Can't load eBPF RSS - fallback to software RSS"); 1425 n->rss_data.enabled_software_rss = true; 1426 } 1427 } else { 1428 /* use software RSS for hash populating */ 1429 /* and detach eBPF if was loaded before */ 1430 virtio_net_detach_epbf_rss(n); 1431 n->rss_data.enabled_software_rss = true; 1432 } 1433 1434 trace_virtio_net_rss_enable(n->rss_data.hash_types, 1435 n->rss_data.indirections_len, 1436 temp.b); 1437 return queue_pairs; 1438 error: 1439 trace_virtio_net_rss_error(err_msg, err_value); 1440 virtio_net_disable_rss(n); 1441 return 0; 1442 } 1443 1444 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, 1445 struct iovec *iov, unsigned int iov_cnt) 1446 { 1447 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1448 uint16_t queue_pairs; 1449 NetClientState *nc = qemu_get_queue(n->nic); 1450 1451 virtio_net_disable_rss(n); 1452 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { 1453 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false); 1454 return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR; 1455 } 1456 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) { 1457 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true); 1458 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { 1459 struct virtio_net_ctrl_mq mq; 1460 size_t s; 1461 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) { 1462 return VIRTIO_NET_ERR; 1463 } 1464 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq)); 1465 if (s != sizeof(mq)) { 1466 return VIRTIO_NET_ERR; 1467 } 1468 queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs); 1469 1470 } else { 1471 return VIRTIO_NET_ERR; 1472 } 1473 1474 if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 1475 queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 1476 queue_pairs > n->max_queue_pairs || 1477 !n->multiqueue) { 1478 return VIRTIO_NET_ERR; 1479 } 1480 1481 n->curr_queue_pairs = queue_pairs; 1482 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 1483 /* 1484 * Avoid updating the backend for a vdpa device: We're only interested 1485 * in updating the device model queues. 1486 */ 1487 return VIRTIO_NET_OK; 1488 } 1489 /* stop the backend before changing the number of queue_pairs to avoid handling a 1490 * disabled queue */ 1491 virtio_net_set_status(vdev, vdev->status); 1492 virtio_net_set_queue_pairs(n); 1493 1494 return VIRTIO_NET_OK; 1495 } 1496 1497 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, 1498 const struct iovec *in_sg, unsigned in_num, 1499 const struct iovec *out_sg, 1500 unsigned out_num) 1501 { 1502 VirtIONet *n = VIRTIO_NET(vdev); 1503 struct virtio_net_ctrl_hdr ctrl; 1504 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1505 size_t s; 1506 struct iovec *iov, *iov2; 1507 1508 if (iov_size(in_sg, in_num) < sizeof(status) || 1509 iov_size(out_sg, out_num) < sizeof(ctrl)) { 1510 virtio_error(vdev, "virtio-net ctrl missing headers"); 1511 return 0; 1512 } 1513 1514 iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num); 1515 s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl)); 1516 iov_discard_front(&iov, &out_num, sizeof(ctrl)); 1517 if (s != sizeof(ctrl)) { 1518 status = VIRTIO_NET_ERR; 1519 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { 1520 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num); 1521 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { 1522 status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num); 1523 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { 1524 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num); 1525 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { 1526 status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num); 1527 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { 1528 status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num); 1529 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { 1530 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num); 1531 } 1532 1533 s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status)); 1534 assert(s == sizeof(status)); 1535 1536 g_free(iov2); 1537 return sizeof(status); 1538 } 1539 1540 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) 1541 { 1542 VirtQueueElement *elem; 1543 1544 for (;;) { 1545 size_t written; 1546 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 1547 if (!elem) { 1548 break; 1549 } 1550 1551 written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num, 1552 elem->out_sg, elem->out_num); 1553 if (written > 0) { 1554 virtqueue_push(vq, elem, written); 1555 virtio_notify(vdev, vq); 1556 g_free(elem); 1557 } else { 1558 virtqueue_detach_element(vq, elem, 0); 1559 g_free(elem); 1560 break; 1561 } 1562 } 1563 } 1564 1565 /* RX */ 1566 1567 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) 1568 { 1569 VirtIONet *n = VIRTIO_NET(vdev); 1570 int queue_index = vq2q(virtio_get_queue_index(vq)); 1571 1572 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index)); 1573 } 1574 1575 static bool virtio_net_can_receive(NetClientState *nc) 1576 { 1577 VirtIONet *n = qemu_get_nic_opaque(nc); 1578 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1579 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1580 1581 if (!vdev->vm_running) { 1582 return false; 1583 } 1584 1585 if (nc->queue_index >= n->curr_queue_pairs) { 1586 return false; 1587 } 1588 1589 if (!virtio_queue_ready(q->rx_vq) || 1590 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 1591 return false; 1592 } 1593 1594 return true; 1595 } 1596 1597 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) 1598 { 1599 VirtIONet *n = q->n; 1600 if (virtio_queue_empty(q->rx_vq) || 1601 (n->mergeable_rx_bufs && 1602 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1603 virtio_queue_set_notification(q->rx_vq, 1); 1604 1605 /* To avoid a race condition where the guest has made some buffers 1606 * available after the above check but before notification was 1607 * enabled, check for available buffers again. 1608 */ 1609 if (virtio_queue_empty(q->rx_vq) || 1610 (n->mergeable_rx_bufs && 1611 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1612 return 0; 1613 } 1614 } 1615 1616 virtio_queue_set_notification(q->rx_vq, 0); 1617 return 1; 1618 } 1619 1620 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr) 1621 { 1622 virtio_tswap16s(vdev, &hdr->hdr_len); 1623 virtio_tswap16s(vdev, &hdr->gso_size); 1624 virtio_tswap16s(vdev, &hdr->csum_start); 1625 virtio_tswap16s(vdev, &hdr->csum_offset); 1626 } 1627 1628 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so 1629 * it never finds out that the packets don't have valid checksums. This 1630 * causes dhclient to get upset. Fedora's carried a patch for ages to 1631 * fix this with Xen but it hasn't appeared in an upstream release of 1632 * dhclient yet. 1633 * 1634 * To avoid breaking existing guests, we catch udp packets and add 1635 * checksums. This is terrible but it's better than hacking the guest 1636 * kernels. 1637 * 1638 * N.B. if we introduce a zero-copy API, this operation is no longer free so 1639 * we should provide a mechanism to disable it to avoid polluting the host 1640 * cache. 1641 */ 1642 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr, 1643 uint8_t *buf, size_t size) 1644 { 1645 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */ 1646 (size > 27 && size < 1500) && /* normal sized MTU */ 1647 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */ 1648 (buf[23] == 17) && /* ip.protocol == UDP */ 1649 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */ 1650 net_checksum_calculate(buf, size, CSUM_UDP); 1651 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; 1652 } 1653 } 1654 1655 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt, 1656 const void *buf, size_t size) 1657 { 1658 if (n->has_vnet_hdr) { 1659 /* FIXME this cast is evil */ 1660 void *wbuf = (void *)buf; 1661 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len, 1662 size - n->host_hdr_len); 1663 1664 if (n->needs_vnet_hdr_swap) { 1665 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf); 1666 } 1667 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr)); 1668 } else { 1669 struct virtio_net_hdr hdr = { 1670 .flags = 0, 1671 .gso_type = VIRTIO_NET_HDR_GSO_NONE 1672 }; 1673 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr); 1674 } 1675 } 1676 1677 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) 1678 { 1679 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; 1680 static const uint8_t vlan[] = {0x81, 0x00}; 1681 uint8_t *ptr = (uint8_t *)buf; 1682 int i; 1683 1684 if (n->promisc) 1685 return 1; 1686 1687 ptr += n->host_hdr_len; 1688 1689 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) { 1690 int vid = lduw_be_p(ptr + 14) & 0xfff; 1691 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f)))) 1692 return 0; 1693 } 1694 1695 if (ptr[0] & 1) { // multicast 1696 if (!memcmp(ptr, bcast, sizeof(bcast))) { 1697 return !n->nobcast; 1698 } else if (n->nomulti) { 1699 return 0; 1700 } else if (n->allmulti || n->mac_table.multi_overflow) { 1701 return 1; 1702 } 1703 1704 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 1705 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1706 return 1; 1707 } 1708 } 1709 } else { // unicast 1710 if (n->nouni) { 1711 return 0; 1712 } else if (n->alluni || n->mac_table.uni_overflow) { 1713 return 1; 1714 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) { 1715 return 1; 1716 } 1717 1718 for (i = 0; i < n->mac_table.first_multi; i++) { 1719 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1720 return 1; 1721 } 1722 } 1723 } 1724 1725 return 0; 1726 } 1727 1728 static uint8_t virtio_net_get_hash_type(bool isip4, 1729 bool isip6, 1730 bool isudp, 1731 bool istcp, 1732 uint32_t types) 1733 { 1734 if (isip4) { 1735 if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) { 1736 return NetPktRssIpV4Tcp; 1737 } 1738 if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) { 1739 return NetPktRssIpV4Udp; 1740 } 1741 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 1742 return NetPktRssIpV4; 1743 } 1744 } else if (isip6) { 1745 uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | 1746 VIRTIO_NET_RSS_HASH_TYPE_TCPv6; 1747 1748 if (istcp && (types & mask)) { 1749 return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ? 1750 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp; 1751 } 1752 mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6; 1753 if (isudp && (types & mask)) { 1754 return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ? 1755 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp; 1756 } 1757 mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6; 1758 if (types & mask) { 1759 return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ? 1760 NetPktRssIpV6Ex : NetPktRssIpV6; 1761 } 1762 } 1763 return 0xff; 1764 } 1765 1766 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report, 1767 uint32_t hash) 1768 { 1769 struct virtio_net_hdr_v1_hash *hdr = (void *)buf; 1770 hdr->hash_value = hash; 1771 hdr->hash_report = report; 1772 } 1773 1774 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf, 1775 size_t size) 1776 { 1777 VirtIONet *n = qemu_get_nic_opaque(nc); 1778 unsigned int index = nc->queue_index, new_index = index; 1779 struct NetRxPkt *pkt = n->rx_pkt; 1780 uint8_t net_hash_type; 1781 uint32_t hash; 1782 bool isip4, isip6, isudp, istcp; 1783 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = { 1784 VIRTIO_NET_HASH_REPORT_IPv4, 1785 VIRTIO_NET_HASH_REPORT_TCPv4, 1786 VIRTIO_NET_HASH_REPORT_TCPv6, 1787 VIRTIO_NET_HASH_REPORT_IPv6, 1788 VIRTIO_NET_HASH_REPORT_IPv6_EX, 1789 VIRTIO_NET_HASH_REPORT_TCPv6_EX, 1790 VIRTIO_NET_HASH_REPORT_UDPv4, 1791 VIRTIO_NET_HASH_REPORT_UDPv6, 1792 VIRTIO_NET_HASH_REPORT_UDPv6_EX 1793 }; 1794 1795 net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len, 1796 size - n->host_hdr_len); 1797 net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); 1798 if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) { 1799 istcp = isudp = false; 1800 } 1801 if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) { 1802 istcp = isudp = false; 1803 } 1804 net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp, 1805 n->rss_data.hash_types); 1806 if (net_hash_type > NetPktRssIpV6UdpEx) { 1807 if (n->rss_data.populate_hash) { 1808 virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0); 1809 } 1810 return n->rss_data.redirect ? n->rss_data.default_queue : -1; 1811 } 1812 1813 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key); 1814 1815 if (n->rss_data.populate_hash) { 1816 virtio_set_packet_hash(buf, reports[net_hash_type], hash); 1817 } 1818 1819 if (n->rss_data.redirect) { 1820 new_index = hash & (n->rss_data.indirections_len - 1); 1821 new_index = n->rss_data.indirections_table[new_index]; 1822 } 1823 1824 return (index == new_index) ? -1 : new_index; 1825 } 1826 1827 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, 1828 size_t size, bool no_rss) 1829 { 1830 VirtIONet *n = qemu_get_nic_opaque(nc); 1831 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1832 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1833 VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE]; 1834 size_t lens[VIRTQUEUE_MAX_SIZE]; 1835 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; 1836 struct virtio_net_hdr_mrg_rxbuf mhdr; 1837 unsigned mhdr_cnt = 0; 1838 size_t offset, i, guest_offset, j; 1839 ssize_t err; 1840 1841 if (!virtio_net_can_receive(nc)) { 1842 return -1; 1843 } 1844 1845 if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) { 1846 int index = virtio_net_process_rss(nc, buf, size); 1847 if (index >= 0) { 1848 NetClientState *nc2 = qemu_get_subqueue(n->nic, index); 1849 return virtio_net_receive_rcu(nc2, buf, size, true); 1850 } 1851 } 1852 1853 /* hdr_len refers to the header we supply to the guest */ 1854 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) { 1855 return 0; 1856 } 1857 1858 if (!receive_filter(n, buf, size)) 1859 return size; 1860 1861 offset = i = 0; 1862 1863 while (offset < size) { 1864 VirtQueueElement *elem; 1865 int len, total; 1866 const struct iovec *sg; 1867 1868 total = 0; 1869 1870 if (i == VIRTQUEUE_MAX_SIZE) { 1871 virtio_error(vdev, "virtio-net unexpected long buffer chain"); 1872 err = size; 1873 goto err; 1874 } 1875 1876 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement)); 1877 if (!elem) { 1878 if (i) { 1879 virtio_error(vdev, "virtio-net unexpected empty queue: " 1880 "i %zd mergeable %d offset %zd, size %zd, " 1881 "guest hdr len %zd, host hdr len %zd " 1882 "guest features 0x%" PRIx64, 1883 i, n->mergeable_rx_bufs, offset, size, 1884 n->guest_hdr_len, n->host_hdr_len, 1885 vdev->guest_features); 1886 } 1887 err = -1; 1888 goto err; 1889 } 1890 1891 if (elem->in_num < 1) { 1892 virtio_error(vdev, 1893 "virtio-net receive queue contains no in buffers"); 1894 virtqueue_detach_element(q->rx_vq, elem, 0); 1895 g_free(elem); 1896 err = -1; 1897 goto err; 1898 } 1899 1900 sg = elem->in_sg; 1901 if (i == 0) { 1902 assert(offset == 0); 1903 if (n->mergeable_rx_bufs) { 1904 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg), 1905 sg, elem->in_num, 1906 offsetof(typeof(mhdr), num_buffers), 1907 sizeof(mhdr.num_buffers)); 1908 } 1909 1910 receive_header(n, sg, elem->in_num, buf, size); 1911 if (n->rss_data.populate_hash) { 1912 offset = sizeof(mhdr); 1913 iov_from_buf(sg, elem->in_num, offset, 1914 buf + offset, n->host_hdr_len - sizeof(mhdr)); 1915 } 1916 offset = n->host_hdr_len; 1917 total += n->guest_hdr_len; 1918 guest_offset = n->guest_hdr_len; 1919 } else { 1920 guest_offset = 0; 1921 } 1922 1923 /* copy in packet. ugh */ 1924 len = iov_from_buf(sg, elem->in_num, guest_offset, 1925 buf + offset, size - offset); 1926 total += len; 1927 offset += len; 1928 /* If buffers can't be merged, at this point we 1929 * must have consumed the complete packet. 1930 * Otherwise, drop it. */ 1931 if (!n->mergeable_rx_bufs && offset < size) { 1932 virtqueue_unpop(q->rx_vq, elem, total); 1933 g_free(elem); 1934 err = size; 1935 goto err; 1936 } 1937 1938 elems[i] = elem; 1939 lens[i] = total; 1940 i++; 1941 } 1942 1943 if (mhdr_cnt) { 1944 virtio_stw_p(vdev, &mhdr.num_buffers, i); 1945 iov_from_buf(mhdr_sg, mhdr_cnt, 1946 0, 1947 &mhdr.num_buffers, sizeof mhdr.num_buffers); 1948 } 1949 1950 for (j = 0; j < i; j++) { 1951 /* signal other side */ 1952 virtqueue_fill(q->rx_vq, elems[j], lens[j], j); 1953 g_free(elems[j]); 1954 } 1955 1956 virtqueue_flush(q->rx_vq, i); 1957 virtio_notify(vdev, q->rx_vq); 1958 1959 return size; 1960 1961 err: 1962 for (j = 0; j < i; j++) { 1963 virtqueue_detach_element(q->rx_vq, elems[j], lens[j]); 1964 g_free(elems[j]); 1965 } 1966 1967 return err; 1968 } 1969 1970 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf, 1971 size_t size) 1972 { 1973 RCU_READ_LOCK_GUARD(); 1974 1975 return virtio_net_receive_rcu(nc, buf, size, false); 1976 } 1977 1978 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain, 1979 const uint8_t *buf, 1980 VirtioNetRscUnit *unit) 1981 { 1982 uint16_t ip_hdrlen; 1983 struct ip_header *ip; 1984 1985 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len 1986 + sizeof(struct eth_header)); 1987 unit->ip = (void *)ip; 1988 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2; 1989 unit->ip_plen = &ip->ip_len; 1990 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen); 1991 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 1992 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen; 1993 } 1994 1995 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain, 1996 const uint8_t *buf, 1997 VirtioNetRscUnit *unit) 1998 { 1999 struct ip6_header *ip6; 2000 2001 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len 2002 + sizeof(struct eth_header)); 2003 unit->ip = ip6; 2004 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 2005 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) 2006 + sizeof(struct ip6_header)); 2007 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 2008 2009 /* There is a difference between payload lenght in ipv4 and v6, 2010 ip header is excluded in ipv6 */ 2011 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen; 2012 } 2013 2014 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain, 2015 VirtioNetRscSeg *seg) 2016 { 2017 int ret; 2018 struct virtio_net_hdr_v1 *h; 2019 2020 h = (struct virtio_net_hdr_v1 *)seg->buf; 2021 h->flags = 0; 2022 h->gso_type = VIRTIO_NET_HDR_GSO_NONE; 2023 2024 if (seg->is_coalesced) { 2025 h->rsc.segments = seg->packets; 2026 h->rsc.dup_acks = seg->dup_ack; 2027 h->flags = VIRTIO_NET_HDR_F_RSC_INFO; 2028 if (chain->proto == ETH_P_IP) { 2029 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2030 } else { 2031 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2032 } 2033 } 2034 2035 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); 2036 QTAILQ_REMOVE(&chain->buffers, seg, next); 2037 g_free(seg->buf); 2038 g_free(seg); 2039 2040 return ret; 2041 } 2042 2043 static void virtio_net_rsc_purge(void *opq) 2044 { 2045 VirtioNetRscSeg *seg, *rn; 2046 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq; 2047 2048 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) { 2049 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2050 chain->stat.purge_failed++; 2051 continue; 2052 } 2053 } 2054 2055 chain->stat.timer++; 2056 if (!QTAILQ_EMPTY(&chain->buffers)) { 2057 timer_mod(chain->drain_timer, 2058 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 2059 } 2060 } 2061 2062 static void virtio_net_rsc_cleanup(VirtIONet *n) 2063 { 2064 VirtioNetRscChain *chain, *rn_chain; 2065 VirtioNetRscSeg *seg, *rn_seg; 2066 2067 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) { 2068 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) { 2069 QTAILQ_REMOVE(&chain->buffers, seg, next); 2070 g_free(seg->buf); 2071 g_free(seg); 2072 } 2073 2074 timer_free(chain->drain_timer); 2075 QTAILQ_REMOVE(&n->rsc_chains, chain, next); 2076 g_free(chain); 2077 } 2078 } 2079 2080 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain, 2081 NetClientState *nc, 2082 const uint8_t *buf, size_t size) 2083 { 2084 uint16_t hdr_len; 2085 VirtioNetRscSeg *seg; 2086 2087 hdr_len = chain->n->guest_hdr_len; 2088 seg = g_new(VirtioNetRscSeg, 1); 2089 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header) 2090 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD); 2091 memcpy(seg->buf, buf, size); 2092 seg->size = size; 2093 seg->packets = 1; 2094 seg->dup_ack = 0; 2095 seg->is_coalesced = 0; 2096 seg->nc = nc; 2097 2098 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); 2099 chain->stat.cache++; 2100 2101 switch (chain->proto) { 2102 case ETH_P_IP: 2103 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); 2104 break; 2105 case ETH_P_IPV6: 2106 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); 2107 break; 2108 default: 2109 g_assert_not_reached(); 2110 } 2111 } 2112 2113 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain, 2114 VirtioNetRscSeg *seg, 2115 const uint8_t *buf, 2116 struct tcp_header *n_tcp, 2117 struct tcp_header *o_tcp) 2118 { 2119 uint32_t nack, oack; 2120 uint16_t nwin, owin; 2121 2122 nack = htonl(n_tcp->th_ack); 2123 nwin = htons(n_tcp->th_win); 2124 oack = htonl(o_tcp->th_ack); 2125 owin = htons(o_tcp->th_win); 2126 2127 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) { 2128 chain->stat.ack_out_of_win++; 2129 return RSC_FINAL; 2130 } else if (nack == oack) { 2131 /* duplicated ack or window probe */ 2132 if (nwin == owin) { 2133 /* duplicated ack, add dup ack count due to whql test up to 1 */ 2134 chain->stat.dup_ack++; 2135 return RSC_FINAL; 2136 } else { 2137 /* Coalesce window update */ 2138 o_tcp->th_win = n_tcp->th_win; 2139 chain->stat.win_update++; 2140 return RSC_COALESCE; 2141 } 2142 } else { 2143 /* pure ack, go to 'C', finalize*/ 2144 chain->stat.pure_ack++; 2145 return RSC_FINAL; 2146 } 2147 } 2148 2149 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain, 2150 VirtioNetRscSeg *seg, 2151 const uint8_t *buf, 2152 VirtioNetRscUnit *n_unit) 2153 { 2154 void *data; 2155 uint16_t o_ip_len; 2156 uint32_t nseq, oseq; 2157 VirtioNetRscUnit *o_unit; 2158 2159 o_unit = &seg->unit; 2160 o_ip_len = htons(*o_unit->ip_plen); 2161 nseq = htonl(n_unit->tcp->th_seq); 2162 oseq = htonl(o_unit->tcp->th_seq); 2163 2164 /* out of order or retransmitted. */ 2165 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) { 2166 chain->stat.data_out_of_win++; 2167 return RSC_FINAL; 2168 } 2169 2170 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen; 2171 if (nseq == oseq) { 2172 if ((o_unit->payload == 0) && n_unit->payload) { 2173 /* From no payload to payload, normal case, not a dup ack or etc */ 2174 chain->stat.data_after_pure_ack++; 2175 goto coalesce; 2176 } else { 2177 return virtio_net_rsc_handle_ack(chain, seg, buf, 2178 n_unit->tcp, o_unit->tcp); 2179 } 2180 } else if ((nseq - oseq) != o_unit->payload) { 2181 /* Not a consistent packet, out of order */ 2182 chain->stat.data_out_of_order++; 2183 return RSC_FINAL; 2184 } else { 2185 coalesce: 2186 if ((o_ip_len + n_unit->payload) > chain->max_payload) { 2187 chain->stat.over_size++; 2188 return RSC_FINAL; 2189 } 2190 2191 /* Here comes the right data, the payload length in v4/v6 is different, 2192 so use the field value to update and record the new data len */ 2193 o_unit->payload += n_unit->payload; /* update new data len */ 2194 2195 /* update field in ip header */ 2196 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload); 2197 2198 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced 2199 for windows guest, while this may change the behavior for linux 2200 guest (only if it uses RSC feature). */ 2201 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags; 2202 2203 o_unit->tcp->th_ack = n_unit->tcp->th_ack; 2204 o_unit->tcp->th_win = n_unit->tcp->th_win; 2205 2206 memmove(seg->buf + seg->size, data, n_unit->payload); 2207 seg->size += n_unit->payload; 2208 seg->packets++; 2209 chain->stat.coalesced++; 2210 return RSC_COALESCE; 2211 } 2212 } 2213 2214 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain, 2215 VirtioNetRscSeg *seg, 2216 const uint8_t *buf, size_t size, 2217 VirtioNetRscUnit *unit) 2218 { 2219 struct ip_header *ip1, *ip2; 2220 2221 ip1 = (struct ip_header *)(unit->ip); 2222 ip2 = (struct ip_header *)(seg->unit.ip); 2223 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst) 2224 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2225 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2226 chain->stat.no_match++; 2227 return RSC_NO_MATCH; 2228 } 2229 2230 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2231 } 2232 2233 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain, 2234 VirtioNetRscSeg *seg, 2235 const uint8_t *buf, size_t size, 2236 VirtioNetRscUnit *unit) 2237 { 2238 struct ip6_header *ip1, *ip2; 2239 2240 ip1 = (struct ip6_header *)(unit->ip); 2241 ip2 = (struct ip6_header *)(seg->unit.ip); 2242 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address)) 2243 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address)) 2244 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2245 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2246 chain->stat.no_match++; 2247 return RSC_NO_MATCH; 2248 } 2249 2250 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2251 } 2252 2253 /* Packets with 'SYN' should bypass, other flag should be sent after drain 2254 * to prevent out of order */ 2255 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain, 2256 struct tcp_header *tcp) 2257 { 2258 uint16_t tcp_hdr; 2259 uint16_t tcp_flag; 2260 2261 tcp_flag = htons(tcp->th_offset_flags); 2262 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10; 2263 tcp_flag &= VIRTIO_NET_TCP_FLAG; 2264 if (tcp_flag & TH_SYN) { 2265 chain->stat.tcp_syn++; 2266 return RSC_BYPASS; 2267 } 2268 2269 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) { 2270 chain->stat.tcp_ctrl_drain++; 2271 return RSC_FINAL; 2272 } 2273 2274 if (tcp_hdr > sizeof(struct tcp_header)) { 2275 chain->stat.tcp_all_opt++; 2276 return RSC_FINAL; 2277 } 2278 2279 return RSC_CANDIDATE; 2280 } 2281 2282 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain, 2283 NetClientState *nc, 2284 const uint8_t *buf, size_t size, 2285 VirtioNetRscUnit *unit) 2286 { 2287 int ret; 2288 VirtioNetRscSeg *seg, *nseg; 2289 2290 if (QTAILQ_EMPTY(&chain->buffers)) { 2291 chain->stat.empty_cache++; 2292 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2293 timer_mod(chain->drain_timer, 2294 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 2295 return size; 2296 } 2297 2298 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2299 if (chain->proto == ETH_P_IP) { 2300 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit); 2301 } else { 2302 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit); 2303 } 2304 2305 if (ret == RSC_FINAL) { 2306 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2307 /* Send failed */ 2308 chain->stat.final_failed++; 2309 return 0; 2310 } 2311 2312 /* Send current packet */ 2313 return virtio_net_do_receive(nc, buf, size); 2314 } else if (ret == RSC_NO_MATCH) { 2315 continue; 2316 } else { 2317 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ 2318 seg->is_coalesced = 1; 2319 return size; 2320 } 2321 } 2322 2323 chain->stat.no_match_cache++; 2324 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2325 return size; 2326 } 2327 2328 /* Drain a connection data, this is to avoid out of order segments */ 2329 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain, 2330 NetClientState *nc, 2331 const uint8_t *buf, size_t size, 2332 uint16_t ip_start, uint16_t ip_size, 2333 uint16_t tcp_port) 2334 { 2335 VirtioNetRscSeg *seg, *nseg; 2336 uint32_t ppair1, ppair2; 2337 2338 ppair1 = *(uint32_t *)(buf + tcp_port); 2339 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2340 ppair2 = *(uint32_t *)(seg->buf + tcp_port); 2341 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size) 2342 || (ppair1 != ppair2)) { 2343 continue; 2344 } 2345 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2346 chain->stat.drain_failed++; 2347 } 2348 2349 break; 2350 } 2351 2352 return virtio_net_do_receive(nc, buf, size); 2353 } 2354 2355 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain, 2356 struct ip_header *ip, 2357 const uint8_t *buf, size_t size) 2358 { 2359 uint16_t ip_len; 2360 2361 /* Not an ipv4 packet */ 2362 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) { 2363 chain->stat.ip_option++; 2364 return RSC_BYPASS; 2365 } 2366 2367 /* Don't handle packets with ip option */ 2368 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) { 2369 chain->stat.ip_option++; 2370 return RSC_BYPASS; 2371 } 2372 2373 if (ip->ip_p != IPPROTO_TCP) { 2374 chain->stat.bypass_not_tcp++; 2375 return RSC_BYPASS; 2376 } 2377 2378 /* Don't handle packets with ip fragment */ 2379 if (!(htons(ip->ip_off) & IP_DF)) { 2380 chain->stat.ip_frag++; 2381 return RSC_BYPASS; 2382 } 2383 2384 /* Don't handle packets with ecn flag */ 2385 if (IPTOS_ECN(ip->ip_tos)) { 2386 chain->stat.ip_ecn++; 2387 return RSC_BYPASS; 2388 } 2389 2390 ip_len = htons(ip->ip_len); 2391 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header)) 2392 || ip_len > (size - chain->n->guest_hdr_len - 2393 sizeof(struct eth_header))) { 2394 chain->stat.ip_hacked++; 2395 return RSC_BYPASS; 2396 } 2397 2398 return RSC_CANDIDATE; 2399 } 2400 2401 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain, 2402 NetClientState *nc, 2403 const uint8_t *buf, size_t size) 2404 { 2405 int32_t ret; 2406 uint16_t hdr_len; 2407 VirtioNetRscUnit unit; 2408 2409 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2410 2411 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header) 2412 + sizeof(struct tcp_header))) { 2413 chain->stat.bypass_not_tcp++; 2414 return virtio_net_do_receive(nc, buf, size); 2415 } 2416 2417 virtio_net_rsc_extract_unit4(chain, buf, &unit); 2418 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size) 2419 != RSC_CANDIDATE) { 2420 return virtio_net_do_receive(nc, buf, size); 2421 } 2422 2423 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2424 if (ret == RSC_BYPASS) { 2425 return virtio_net_do_receive(nc, buf, size); 2426 } else if (ret == RSC_FINAL) { 2427 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2428 ((hdr_len + sizeof(struct eth_header)) + 12), 2429 VIRTIO_NET_IP4_ADDR_SIZE, 2430 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)); 2431 } 2432 2433 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2434 } 2435 2436 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain, 2437 struct ip6_header *ip6, 2438 const uint8_t *buf, size_t size) 2439 { 2440 uint16_t ip_len; 2441 2442 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4) 2443 != IP_HEADER_VERSION_6) { 2444 return RSC_BYPASS; 2445 } 2446 2447 /* Both option and protocol is checked in this */ 2448 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) { 2449 chain->stat.bypass_not_tcp++; 2450 return RSC_BYPASS; 2451 } 2452 2453 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 2454 if (ip_len < sizeof(struct tcp_header) || 2455 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header) 2456 - sizeof(struct ip6_header))) { 2457 chain->stat.ip_hacked++; 2458 return RSC_BYPASS; 2459 } 2460 2461 /* Don't handle packets with ecn flag */ 2462 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) { 2463 chain->stat.ip_ecn++; 2464 return RSC_BYPASS; 2465 } 2466 2467 return RSC_CANDIDATE; 2468 } 2469 2470 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc, 2471 const uint8_t *buf, size_t size) 2472 { 2473 int32_t ret; 2474 uint16_t hdr_len; 2475 VirtioNetRscChain *chain; 2476 VirtioNetRscUnit unit; 2477 2478 chain = opq; 2479 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2480 2481 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header) 2482 + sizeof(tcp_header))) { 2483 return virtio_net_do_receive(nc, buf, size); 2484 } 2485 2486 virtio_net_rsc_extract_unit6(chain, buf, &unit); 2487 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain, 2488 unit.ip, buf, size)) { 2489 return virtio_net_do_receive(nc, buf, size); 2490 } 2491 2492 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2493 if (ret == RSC_BYPASS) { 2494 return virtio_net_do_receive(nc, buf, size); 2495 } else if (ret == RSC_FINAL) { 2496 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2497 ((hdr_len + sizeof(struct eth_header)) + 8), 2498 VIRTIO_NET_IP6_ADDR_SIZE, 2499 hdr_len + sizeof(struct eth_header) 2500 + sizeof(struct ip6_header)); 2501 } 2502 2503 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2504 } 2505 2506 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n, 2507 NetClientState *nc, 2508 uint16_t proto) 2509 { 2510 VirtioNetRscChain *chain; 2511 2512 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) { 2513 return NULL; 2514 } 2515 2516 QTAILQ_FOREACH(chain, &n->rsc_chains, next) { 2517 if (chain->proto == proto) { 2518 return chain; 2519 } 2520 } 2521 2522 chain = g_malloc(sizeof(*chain)); 2523 chain->n = n; 2524 chain->proto = proto; 2525 if (proto == (uint16_t)ETH_P_IP) { 2526 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD; 2527 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2528 } else { 2529 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD; 2530 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2531 } 2532 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST, 2533 virtio_net_rsc_purge, chain); 2534 memset(&chain->stat, 0, sizeof(chain->stat)); 2535 2536 QTAILQ_INIT(&chain->buffers); 2537 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); 2538 2539 return chain; 2540 } 2541 2542 static ssize_t virtio_net_rsc_receive(NetClientState *nc, 2543 const uint8_t *buf, 2544 size_t size) 2545 { 2546 uint16_t proto; 2547 VirtioNetRscChain *chain; 2548 struct eth_header *eth; 2549 VirtIONet *n; 2550 2551 n = qemu_get_nic_opaque(nc); 2552 if (size < (n->host_hdr_len + sizeof(struct eth_header))) { 2553 return virtio_net_do_receive(nc, buf, size); 2554 } 2555 2556 eth = (struct eth_header *)(buf + n->guest_hdr_len); 2557 proto = htons(eth->h_proto); 2558 2559 chain = virtio_net_rsc_lookup_chain(n, nc, proto); 2560 if (chain) { 2561 chain->stat.received++; 2562 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) { 2563 return virtio_net_rsc_receive4(chain, nc, buf, size); 2564 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) { 2565 return virtio_net_rsc_receive6(chain, nc, buf, size); 2566 } 2567 } 2568 return virtio_net_do_receive(nc, buf, size); 2569 } 2570 2571 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, 2572 size_t size) 2573 { 2574 VirtIONet *n = qemu_get_nic_opaque(nc); 2575 if ((n->rsc4_enabled || n->rsc6_enabled)) { 2576 return virtio_net_rsc_receive(nc, buf, size); 2577 } else { 2578 return virtio_net_do_receive(nc, buf, size); 2579 } 2580 } 2581 2582 static int32_t virtio_net_flush_tx(VirtIONetQueue *q); 2583 2584 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) 2585 { 2586 VirtIONet *n = qemu_get_nic_opaque(nc); 2587 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 2588 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2589 int ret; 2590 2591 virtqueue_push(q->tx_vq, q->async_tx.elem, 0); 2592 virtio_notify(vdev, q->tx_vq); 2593 2594 g_free(q->async_tx.elem); 2595 q->async_tx.elem = NULL; 2596 2597 virtio_queue_set_notification(q->tx_vq, 1); 2598 ret = virtio_net_flush_tx(q); 2599 if (ret >= n->tx_burst) { 2600 /* 2601 * the flush has been stopped by tx_burst 2602 * we will not receive notification for the 2603 * remainining part, so re-schedule 2604 */ 2605 virtio_queue_set_notification(q->tx_vq, 0); 2606 if (q->tx_bh) { 2607 qemu_bh_schedule(q->tx_bh); 2608 } else { 2609 timer_mod(q->tx_timer, 2610 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2611 } 2612 q->tx_waiting = 1; 2613 } 2614 } 2615 2616 /* TX */ 2617 static int32_t virtio_net_flush_tx(VirtIONetQueue *q) 2618 { 2619 VirtIONet *n = q->n; 2620 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2621 VirtQueueElement *elem; 2622 int32_t num_packets = 0; 2623 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq)); 2624 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2625 return num_packets; 2626 } 2627 2628 if (q->async_tx.elem) { 2629 virtio_queue_set_notification(q->tx_vq, 0); 2630 return num_packets; 2631 } 2632 2633 for (;;) { 2634 ssize_t ret; 2635 unsigned int out_num; 2636 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg; 2637 struct virtio_net_hdr_mrg_rxbuf mhdr; 2638 2639 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement)); 2640 if (!elem) { 2641 break; 2642 } 2643 2644 out_num = elem->out_num; 2645 out_sg = elem->out_sg; 2646 if (out_num < 1) { 2647 virtio_error(vdev, "virtio-net header not in first element"); 2648 virtqueue_detach_element(q->tx_vq, elem, 0); 2649 g_free(elem); 2650 return -EINVAL; 2651 } 2652 2653 if (n->has_vnet_hdr) { 2654 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) < 2655 n->guest_hdr_len) { 2656 virtio_error(vdev, "virtio-net header incorrect"); 2657 virtqueue_detach_element(q->tx_vq, elem, 0); 2658 g_free(elem); 2659 return -EINVAL; 2660 } 2661 if (n->needs_vnet_hdr_swap) { 2662 virtio_net_hdr_swap(vdev, (void *) &mhdr); 2663 sg2[0].iov_base = &mhdr; 2664 sg2[0].iov_len = n->guest_hdr_len; 2665 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, 2666 out_sg, out_num, 2667 n->guest_hdr_len, -1); 2668 if (out_num == VIRTQUEUE_MAX_SIZE) { 2669 goto drop; 2670 } 2671 out_num += 1; 2672 out_sg = sg2; 2673 } 2674 } 2675 /* 2676 * If host wants to see the guest header as is, we can 2677 * pass it on unchanged. Otherwise, copy just the parts 2678 * that host is interested in. 2679 */ 2680 assert(n->host_hdr_len <= n->guest_hdr_len); 2681 if (n->host_hdr_len != n->guest_hdr_len) { 2682 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg), 2683 out_sg, out_num, 2684 0, n->host_hdr_len); 2685 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num, 2686 out_sg, out_num, 2687 n->guest_hdr_len, -1); 2688 out_num = sg_num; 2689 out_sg = sg; 2690 } 2691 2692 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index), 2693 out_sg, out_num, virtio_net_tx_complete); 2694 if (ret == 0) { 2695 virtio_queue_set_notification(q->tx_vq, 0); 2696 q->async_tx.elem = elem; 2697 return -EBUSY; 2698 } 2699 2700 drop: 2701 virtqueue_push(q->tx_vq, elem, 0); 2702 virtio_notify(vdev, q->tx_vq); 2703 g_free(elem); 2704 2705 if (++num_packets >= n->tx_burst) { 2706 break; 2707 } 2708 } 2709 return num_packets; 2710 } 2711 2712 static void virtio_net_tx_timer(void *opaque); 2713 2714 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) 2715 { 2716 VirtIONet *n = VIRTIO_NET(vdev); 2717 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2718 2719 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2720 virtio_net_drop_tx_queue_data(vdev, vq); 2721 return; 2722 } 2723 2724 /* This happens when device was stopped but VCPU wasn't. */ 2725 if (!vdev->vm_running) { 2726 q->tx_waiting = 1; 2727 return; 2728 } 2729 2730 if (q->tx_waiting) { 2731 /* We already have queued packets, immediately flush */ 2732 timer_del(q->tx_timer); 2733 virtio_net_tx_timer(q); 2734 } else { 2735 /* re-arm timer to flush it (and more) on next tick */ 2736 timer_mod(q->tx_timer, 2737 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2738 q->tx_waiting = 1; 2739 virtio_queue_set_notification(vq, 0); 2740 } 2741 } 2742 2743 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) 2744 { 2745 VirtIONet *n = VIRTIO_NET(vdev); 2746 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2747 2748 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2749 virtio_net_drop_tx_queue_data(vdev, vq); 2750 return; 2751 } 2752 2753 if (unlikely(q->tx_waiting)) { 2754 return; 2755 } 2756 q->tx_waiting = 1; 2757 /* This happens when device was stopped but VCPU wasn't. */ 2758 if (!vdev->vm_running) { 2759 return; 2760 } 2761 virtio_queue_set_notification(vq, 0); 2762 qemu_bh_schedule(q->tx_bh); 2763 } 2764 2765 static void virtio_net_tx_timer(void *opaque) 2766 { 2767 VirtIONetQueue *q = opaque; 2768 VirtIONet *n = q->n; 2769 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2770 int ret; 2771 2772 /* This happens when device was stopped but BH wasn't. */ 2773 if (!vdev->vm_running) { 2774 /* Make sure tx waiting is set, so we'll run when restarted. */ 2775 assert(q->tx_waiting); 2776 return; 2777 } 2778 2779 q->tx_waiting = 0; 2780 2781 /* Just in case the driver is not ready on more */ 2782 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2783 return; 2784 } 2785 2786 ret = virtio_net_flush_tx(q); 2787 if (ret == -EBUSY || ret == -EINVAL) { 2788 return; 2789 } 2790 /* 2791 * If we flush a full burst of packets, assume there are 2792 * more coming and immediately rearm 2793 */ 2794 if (ret >= n->tx_burst) { 2795 q->tx_waiting = 1; 2796 timer_mod(q->tx_timer, 2797 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2798 return; 2799 } 2800 /* 2801 * If less than a full burst, re-enable notification and flush 2802 * anything that may have come in while we weren't looking. If 2803 * we find something, assume the guest is still active and rearm 2804 */ 2805 virtio_queue_set_notification(q->tx_vq, 1); 2806 ret = virtio_net_flush_tx(q); 2807 if (ret > 0) { 2808 virtio_queue_set_notification(q->tx_vq, 0); 2809 q->tx_waiting = 1; 2810 timer_mod(q->tx_timer, 2811 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2812 } 2813 } 2814 2815 static void virtio_net_tx_bh(void *opaque) 2816 { 2817 VirtIONetQueue *q = opaque; 2818 VirtIONet *n = q->n; 2819 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2820 int32_t ret; 2821 2822 /* This happens when device was stopped but BH wasn't. */ 2823 if (!vdev->vm_running) { 2824 /* Make sure tx waiting is set, so we'll run when restarted. */ 2825 assert(q->tx_waiting); 2826 return; 2827 } 2828 2829 q->tx_waiting = 0; 2830 2831 /* Just in case the driver is not ready on more */ 2832 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) { 2833 return; 2834 } 2835 2836 ret = virtio_net_flush_tx(q); 2837 if (ret == -EBUSY || ret == -EINVAL) { 2838 return; /* Notification re-enable handled by tx_complete or device 2839 * broken */ 2840 } 2841 2842 /* If we flush a full burst of packets, assume there are 2843 * more coming and immediately reschedule */ 2844 if (ret >= n->tx_burst) { 2845 qemu_bh_schedule(q->tx_bh); 2846 q->tx_waiting = 1; 2847 return; 2848 } 2849 2850 /* If less than a full burst, re-enable notification and flush 2851 * anything that may have come in while we weren't looking. If 2852 * we find something, assume the guest is still active and reschedule */ 2853 virtio_queue_set_notification(q->tx_vq, 1); 2854 ret = virtio_net_flush_tx(q); 2855 if (ret == -EINVAL) { 2856 return; 2857 } else if (ret > 0) { 2858 virtio_queue_set_notification(q->tx_vq, 0); 2859 qemu_bh_schedule(q->tx_bh); 2860 q->tx_waiting = 1; 2861 } 2862 } 2863 2864 static void virtio_net_add_queue(VirtIONet *n, int index) 2865 { 2866 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2867 2868 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size, 2869 virtio_net_handle_rx); 2870 2871 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { 2872 n->vqs[index].tx_vq = 2873 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2874 virtio_net_handle_tx_timer); 2875 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 2876 virtio_net_tx_timer, 2877 &n->vqs[index]); 2878 } else { 2879 n->vqs[index].tx_vq = 2880 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2881 virtio_net_handle_tx_bh); 2882 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]); 2883 } 2884 2885 n->vqs[index].tx_waiting = 0; 2886 n->vqs[index].n = n; 2887 } 2888 2889 static void virtio_net_del_queue(VirtIONet *n, int index) 2890 { 2891 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2892 VirtIONetQueue *q = &n->vqs[index]; 2893 NetClientState *nc = qemu_get_subqueue(n->nic, index); 2894 2895 qemu_purge_queued_packets(nc); 2896 2897 virtio_del_queue(vdev, index * 2); 2898 if (q->tx_timer) { 2899 timer_free(q->tx_timer); 2900 q->tx_timer = NULL; 2901 } else { 2902 qemu_bh_delete(q->tx_bh); 2903 q->tx_bh = NULL; 2904 } 2905 q->tx_waiting = 0; 2906 virtio_del_queue(vdev, index * 2 + 1); 2907 } 2908 2909 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs) 2910 { 2911 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2912 int old_num_queues = virtio_get_num_queues(vdev); 2913 int new_num_queues = new_max_queue_pairs * 2 + 1; 2914 int i; 2915 2916 assert(old_num_queues >= 3); 2917 assert(old_num_queues % 2 == 1); 2918 2919 if (old_num_queues == new_num_queues) { 2920 return; 2921 } 2922 2923 /* 2924 * We always need to remove and add ctrl vq if 2925 * old_num_queues != new_num_queues. Remove ctrl_vq first, 2926 * and then we only enter one of the following two loops. 2927 */ 2928 virtio_del_queue(vdev, old_num_queues - 1); 2929 2930 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) { 2931 /* new_num_queues < old_num_queues */ 2932 virtio_net_del_queue(n, i / 2); 2933 } 2934 2935 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) { 2936 /* new_num_queues > old_num_queues */ 2937 virtio_net_add_queue(n, i / 2); 2938 } 2939 2940 /* add ctrl_vq last */ 2941 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 2942 } 2943 2944 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) 2945 { 2946 int max = multiqueue ? n->max_queue_pairs : 1; 2947 2948 n->multiqueue = multiqueue; 2949 virtio_net_change_num_queue_pairs(n, max); 2950 2951 virtio_net_set_queue_pairs(n); 2952 } 2953 2954 static int virtio_net_post_load_device(void *opaque, int version_id) 2955 { 2956 VirtIONet *n = opaque; 2957 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2958 int i, link_down; 2959 2960 trace_virtio_net_post_load_device(); 2961 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs, 2962 virtio_vdev_has_feature(vdev, 2963 VIRTIO_F_VERSION_1), 2964 virtio_vdev_has_feature(vdev, 2965 VIRTIO_NET_F_HASH_REPORT)); 2966 2967 /* MAC_TABLE_ENTRIES may be different from the saved image */ 2968 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) { 2969 n->mac_table.in_use = 0; 2970 } 2971 2972 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 2973 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n); 2974 } 2975 2976 /* 2977 * curr_guest_offloads will be later overwritten by the 2978 * virtio_set_features_nocheck call done from the virtio_load. 2979 * Here we make sure it is preserved and restored accordingly 2980 * in the virtio_net_post_load_virtio callback. 2981 */ 2982 n->saved_guest_offloads = n->curr_guest_offloads; 2983 2984 virtio_net_set_queue_pairs(n); 2985 2986 /* Find the first multicast entry in the saved MAC filter */ 2987 for (i = 0; i < n->mac_table.in_use; i++) { 2988 if (n->mac_table.macs[i * ETH_ALEN] & 1) { 2989 break; 2990 } 2991 } 2992 n->mac_table.first_multi = i; 2993 2994 /* nc.link_down can't be migrated, so infer link_down according 2995 * to link status bit in n->status */ 2996 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0; 2997 for (i = 0; i < n->max_queue_pairs; i++) { 2998 qemu_get_subqueue(n->nic, i)->link_down = link_down; 2999 } 3000 3001 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 3002 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3003 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 3004 QEMU_CLOCK_VIRTUAL, 3005 virtio_net_announce_timer, n); 3006 if (n->announce_timer.round) { 3007 timer_mod(n->announce_timer.tm, 3008 qemu_clock_get_ms(n->announce_timer.type)); 3009 } else { 3010 qemu_announce_timer_del(&n->announce_timer, false); 3011 } 3012 } 3013 3014 if (n->rss_data.enabled) { 3015 n->rss_data.enabled_software_rss = n->rss_data.populate_hash; 3016 if (!n->rss_data.populate_hash) { 3017 if (!virtio_net_attach_epbf_rss(n)) { 3018 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 3019 warn_report("Can't post-load eBPF RSS for vhost"); 3020 } else { 3021 warn_report("Can't post-load eBPF RSS - " 3022 "fallback to software RSS"); 3023 n->rss_data.enabled_software_rss = true; 3024 } 3025 } 3026 } 3027 3028 trace_virtio_net_rss_enable(n->rss_data.hash_types, 3029 n->rss_data.indirections_len, 3030 sizeof(n->rss_data.key)); 3031 } else { 3032 trace_virtio_net_rss_disable(); 3033 } 3034 return 0; 3035 } 3036 3037 static int virtio_net_post_load_virtio(VirtIODevice *vdev) 3038 { 3039 VirtIONet *n = VIRTIO_NET(vdev); 3040 /* 3041 * The actual needed state is now in saved_guest_offloads, 3042 * see virtio_net_post_load_device for detail. 3043 * Restore it back and apply the desired offloads. 3044 */ 3045 n->curr_guest_offloads = n->saved_guest_offloads; 3046 if (peer_has_vnet_hdr(n)) { 3047 virtio_net_apply_guest_offloads(n); 3048 } 3049 3050 return 0; 3051 } 3052 3053 /* tx_waiting field of a VirtIONetQueue */ 3054 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = { 3055 .name = "virtio-net-queue-tx_waiting", 3056 .fields = (VMStateField[]) { 3057 VMSTATE_UINT32(tx_waiting, VirtIONetQueue), 3058 VMSTATE_END_OF_LIST() 3059 }, 3060 }; 3061 3062 static bool max_queue_pairs_gt_1(void *opaque, int version_id) 3063 { 3064 return VIRTIO_NET(opaque)->max_queue_pairs > 1; 3065 } 3066 3067 static bool has_ctrl_guest_offloads(void *opaque, int version_id) 3068 { 3069 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque), 3070 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 3071 } 3072 3073 static bool mac_table_fits(void *opaque, int version_id) 3074 { 3075 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES; 3076 } 3077 3078 static bool mac_table_doesnt_fit(void *opaque, int version_id) 3079 { 3080 return !mac_table_fits(opaque, version_id); 3081 } 3082 3083 /* This temporary type is shared by all the WITH_TMP methods 3084 * although only some fields are used by each. 3085 */ 3086 struct VirtIONetMigTmp { 3087 VirtIONet *parent; 3088 VirtIONetQueue *vqs_1; 3089 uint16_t curr_queue_pairs_1; 3090 uint8_t has_ufo; 3091 uint32_t has_vnet_hdr; 3092 }; 3093 3094 /* The 2nd and subsequent tx_waiting flags are loaded later than 3095 * the 1st entry in the queue_pairs and only if there's more than one 3096 * entry. We use the tmp mechanism to calculate a temporary 3097 * pointer and count and also validate the count. 3098 */ 3099 3100 static int virtio_net_tx_waiting_pre_save(void *opaque) 3101 { 3102 struct VirtIONetMigTmp *tmp = opaque; 3103 3104 tmp->vqs_1 = tmp->parent->vqs + 1; 3105 tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1; 3106 if (tmp->parent->curr_queue_pairs == 0) { 3107 tmp->curr_queue_pairs_1 = 0; 3108 } 3109 3110 return 0; 3111 } 3112 3113 static int virtio_net_tx_waiting_pre_load(void *opaque) 3114 { 3115 struct VirtIONetMigTmp *tmp = opaque; 3116 3117 /* Reuse the pointer setup from save */ 3118 virtio_net_tx_waiting_pre_save(opaque); 3119 3120 if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) { 3121 error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x", 3122 tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs); 3123 3124 return -EINVAL; 3125 } 3126 3127 return 0; /* all good */ 3128 } 3129 3130 static const VMStateDescription vmstate_virtio_net_tx_waiting = { 3131 .name = "virtio-net-tx_waiting", 3132 .pre_load = virtio_net_tx_waiting_pre_load, 3133 .pre_save = virtio_net_tx_waiting_pre_save, 3134 .fields = (VMStateField[]) { 3135 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp, 3136 curr_queue_pairs_1, 3137 vmstate_virtio_net_queue_tx_waiting, 3138 struct VirtIONetQueue), 3139 VMSTATE_END_OF_LIST() 3140 }, 3141 }; 3142 3143 /* the 'has_ufo' flag is just tested; if the incoming stream has the 3144 * flag set we need to check that we have it 3145 */ 3146 static int virtio_net_ufo_post_load(void *opaque, int version_id) 3147 { 3148 struct VirtIONetMigTmp *tmp = opaque; 3149 3150 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) { 3151 error_report("virtio-net: saved image requires TUN_F_UFO support"); 3152 return -EINVAL; 3153 } 3154 3155 return 0; 3156 } 3157 3158 static int virtio_net_ufo_pre_save(void *opaque) 3159 { 3160 struct VirtIONetMigTmp *tmp = opaque; 3161 3162 tmp->has_ufo = tmp->parent->has_ufo; 3163 3164 return 0; 3165 } 3166 3167 static const VMStateDescription vmstate_virtio_net_has_ufo = { 3168 .name = "virtio-net-ufo", 3169 .post_load = virtio_net_ufo_post_load, 3170 .pre_save = virtio_net_ufo_pre_save, 3171 .fields = (VMStateField[]) { 3172 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp), 3173 VMSTATE_END_OF_LIST() 3174 }, 3175 }; 3176 3177 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the 3178 * flag set we need to check that we have it 3179 */ 3180 static int virtio_net_vnet_post_load(void *opaque, int version_id) 3181 { 3182 struct VirtIONetMigTmp *tmp = opaque; 3183 3184 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) { 3185 error_report("virtio-net: saved image requires vnet_hdr=on"); 3186 return -EINVAL; 3187 } 3188 3189 return 0; 3190 } 3191 3192 static int virtio_net_vnet_pre_save(void *opaque) 3193 { 3194 struct VirtIONetMigTmp *tmp = opaque; 3195 3196 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr; 3197 3198 return 0; 3199 } 3200 3201 static const VMStateDescription vmstate_virtio_net_has_vnet = { 3202 .name = "virtio-net-vnet", 3203 .post_load = virtio_net_vnet_post_load, 3204 .pre_save = virtio_net_vnet_pre_save, 3205 .fields = (VMStateField[]) { 3206 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp), 3207 VMSTATE_END_OF_LIST() 3208 }, 3209 }; 3210 3211 static bool virtio_net_rss_needed(void *opaque) 3212 { 3213 return VIRTIO_NET(opaque)->rss_data.enabled; 3214 } 3215 3216 static const VMStateDescription vmstate_virtio_net_rss = { 3217 .name = "virtio-net-device/rss", 3218 .version_id = 1, 3219 .minimum_version_id = 1, 3220 .needed = virtio_net_rss_needed, 3221 .fields = (VMStateField[]) { 3222 VMSTATE_BOOL(rss_data.enabled, VirtIONet), 3223 VMSTATE_BOOL(rss_data.redirect, VirtIONet), 3224 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet), 3225 VMSTATE_UINT32(rss_data.hash_types, VirtIONet), 3226 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet), 3227 VMSTATE_UINT16(rss_data.default_queue, VirtIONet), 3228 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet, 3229 VIRTIO_NET_RSS_MAX_KEY_SIZE), 3230 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet, 3231 rss_data.indirections_len, 0, 3232 vmstate_info_uint16, uint16_t), 3233 VMSTATE_END_OF_LIST() 3234 }, 3235 }; 3236 3237 static const VMStateDescription vmstate_virtio_net_device = { 3238 .name = "virtio-net-device", 3239 .version_id = VIRTIO_NET_VM_VERSION, 3240 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3241 .post_load = virtio_net_post_load_device, 3242 .fields = (VMStateField[]) { 3243 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN), 3244 VMSTATE_STRUCT_POINTER(vqs, VirtIONet, 3245 vmstate_virtio_net_queue_tx_waiting, 3246 VirtIONetQueue), 3247 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet), 3248 VMSTATE_UINT16(status, VirtIONet), 3249 VMSTATE_UINT8(promisc, VirtIONet), 3250 VMSTATE_UINT8(allmulti, VirtIONet), 3251 VMSTATE_UINT32(mac_table.in_use, VirtIONet), 3252 3253 /* Guarded pair: If it fits we load it, else we throw it away 3254 * - can happen if source has a larger MAC table.; post-load 3255 * sets flags in this case. 3256 */ 3257 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet, 3258 0, mac_table_fits, mac_table.in_use, 3259 ETH_ALEN), 3260 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0, 3261 mac_table.in_use, ETH_ALEN), 3262 3263 /* Note: This is an array of uint32's that's always been saved as a 3264 * buffer; hold onto your endiannesses; it's actually used as a bitmap 3265 * but based on the uint. 3266 */ 3267 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3), 3268 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3269 vmstate_virtio_net_has_vnet), 3270 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet), 3271 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet), 3272 VMSTATE_UINT8(alluni, VirtIONet), 3273 VMSTATE_UINT8(nomulti, VirtIONet), 3274 VMSTATE_UINT8(nouni, VirtIONet), 3275 VMSTATE_UINT8(nobcast, VirtIONet), 3276 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3277 vmstate_virtio_net_has_ufo), 3278 VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0, 3279 vmstate_info_uint16_equal, uint16_t), 3280 VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1), 3281 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3282 vmstate_virtio_net_tx_waiting), 3283 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet, 3284 has_ctrl_guest_offloads), 3285 VMSTATE_END_OF_LIST() 3286 }, 3287 .subsections = (const VMStateDescription * []) { 3288 &vmstate_virtio_net_rss, 3289 NULL 3290 } 3291 }; 3292 3293 static NetClientInfo net_virtio_info = { 3294 .type = NET_CLIENT_DRIVER_NIC, 3295 .size = sizeof(NICState), 3296 .can_receive = virtio_net_can_receive, 3297 .receive = virtio_net_receive, 3298 .link_status_changed = virtio_net_set_link_status, 3299 .query_rx_filter = virtio_net_query_rxfilter, 3300 .announce = virtio_net_announce, 3301 }; 3302 3303 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) 3304 { 3305 VirtIONet *n = VIRTIO_NET(vdev); 3306 NetClientState *nc; 3307 assert(n->vhost_started); 3308 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { 3309 /* Must guard against invalid features and bogus queue index 3310 * from being set by malicious guest, or penetrated through 3311 * buggy migration stream. 3312 */ 3313 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3314 qemu_log_mask(LOG_GUEST_ERROR, 3315 "%s: bogus vq index ignored\n", __func__); 3316 return false; 3317 } 3318 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3319 } else { 3320 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3321 } 3322 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); 3323 } 3324 3325 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, 3326 bool mask) 3327 { 3328 VirtIONet *n = VIRTIO_NET(vdev); 3329 NetClientState *nc; 3330 assert(n->vhost_started); 3331 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { 3332 /* Must guard against invalid features and bogus queue index 3333 * from being set by malicious guest, or penetrated through 3334 * buggy migration stream. 3335 */ 3336 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3337 qemu_log_mask(LOG_GUEST_ERROR, 3338 "%s: bogus vq index ignored\n", __func__); 3339 return; 3340 } 3341 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3342 } else { 3343 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3344 } 3345 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), 3346 vdev, idx, mask); 3347 } 3348 3349 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) 3350 { 3351 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC); 3352 3353 n->config_size = virtio_get_config_size(&cfg_size_params, host_features); 3354 } 3355 3356 void virtio_net_set_netclient_name(VirtIONet *n, const char *name, 3357 const char *type) 3358 { 3359 /* 3360 * The name can be NULL, the netclient name will be type.x. 3361 */ 3362 assert(type != NULL); 3363 3364 g_free(n->netclient_name); 3365 g_free(n->netclient_type); 3366 n->netclient_name = g_strdup(name); 3367 n->netclient_type = g_strdup(type); 3368 } 3369 3370 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev) 3371 { 3372 HotplugHandler *hotplug_ctrl; 3373 PCIDevice *pci_dev; 3374 Error *err = NULL; 3375 3376 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3377 if (hotplug_ctrl) { 3378 pci_dev = PCI_DEVICE(dev); 3379 pci_dev->partially_hotplugged = true; 3380 hotplug_handler_unplug_request(hotplug_ctrl, dev, &err); 3381 if (err) { 3382 error_report_err(err); 3383 return false; 3384 } 3385 } else { 3386 return false; 3387 } 3388 return true; 3389 } 3390 3391 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev, 3392 Error **errp) 3393 { 3394 Error *err = NULL; 3395 HotplugHandler *hotplug_ctrl; 3396 PCIDevice *pdev = PCI_DEVICE(dev); 3397 BusState *primary_bus; 3398 3399 if (!pdev->partially_hotplugged) { 3400 return true; 3401 } 3402 primary_bus = dev->parent_bus; 3403 if (!primary_bus) { 3404 error_setg(errp, "virtio_net: couldn't find primary bus"); 3405 return false; 3406 } 3407 qdev_set_parent_bus(dev, primary_bus, &error_abort); 3408 qatomic_set(&n->failover_primary_hidden, false); 3409 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3410 if (hotplug_ctrl) { 3411 hotplug_handler_pre_plug(hotplug_ctrl, dev, &err); 3412 if (err) { 3413 goto out; 3414 } 3415 hotplug_handler_plug(hotplug_ctrl, dev, &err); 3416 } 3417 pdev->partially_hotplugged = false; 3418 3419 out: 3420 error_propagate(errp, err); 3421 return !err; 3422 } 3423 3424 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s) 3425 { 3426 bool should_be_hidden; 3427 Error *err = NULL; 3428 DeviceState *dev = failover_find_primary_device(n); 3429 3430 if (!dev) { 3431 return; 3432 } 3433 3434 should_be_hidden = qatomic_read(&n->failover_primary_hidden); 3435 3436 if (migration_in_setup(s) && !should_be_hidden) { 3437 if (failover_unplug_primary(n, dev)) { 3438 vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev); 3439 qapi_event_send_unplug_primary(dev->id); 3440 qatomic_set(&n->failover_primary_hidden, true); 3441 } else { 3442 warn_report("couldn't unplug primary device"); 3443 } 3444 } else if (migration_has_failed(s)) { 3445 /* We already unplugged the device let's plug it back */ 3446 if (!failover_replug_primary(n, dev, &err)) { 3447 if (err) { 3448 error_report_err(err); 3449 } 3450 } 3451 } 3452 } 3453 3454 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data) 3455 { 3456 MigrationState *s = data; 3457 VirtIONet *n = container_of(notifier, VirtIONet, migration_state); 3458 virtio_net_handle_migration_primary(n, s); 3459 } 3460 3461 static bool failover_hide_primary_device(DeviceListener *listener, 3462 const QDict *device_opts, 3463 bool from_json, 3464 Error **errp) 3465 { 3466 VirtIONet *n = container_of(listener, VirtIONet, primary_listener); 3467 const char *standby_id; 3468 3469 if (!device_opts) { 3470 return false; 3471 } 3472 3473 if (!qdict_haskey(device_opts, "failover_pair_id")) { 3474 return false; 3475 } 3476 3477 if (!qdict_haskey(device_opts, "id")) { 3478 error_setg(errp, "Device with failover_pair_id needs to have id"); 3479 return false; 3480 } 3481 3482 standby_id = qdict_get_str(device_opts, "failover_pair_id"); 3483 if (g_strcmp0(standby_id, n->netclient_name) != 0) { 3484 return false; 3485 } 3486 3487 /* 3488 * The hide helper can be called several times for a given device. 3489 * Check there is only one primary for a virtio-net device but 3490 * don't duplicate the qdict several times if it's called for the same 3491 * device. 3492 */ 3493 if (n->primary_opts) { 3494 const char *old, *new; 3495 /* devices with failover_pair_id always have an id */ 3496 old = qdict_get_str(n->primary_opts, "id"); 3497 new = qdict_get_str(device_opts, "id"); 3498 if (strcmp(old, new) != 0) { 3499 error_setg(errp, "Cannot attach more than one primary device to " 3500 "'%s': '%s' and '%s'", n->netclient_name, old, new); 3501 return false; 3502 } 3503 } else { 3504 n->primary_opts = qdict_clone_shallow(device_opts); 3505 n->primary_opts_from_json = from_json; 3506 } 3507 3508 /* failover_primary_hidden is set during feature negotiation */ 3509 return qatomic_read(&n->failover_primary_hidden); 3510 } 3511 3512 static void virtio_net_device_realize(DeviceState *dev, Error **errp) 3513 { 3514 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3515 VirtIONet *n = VIRTIO_NET(dev); 3516 NetClientState *nc; 3517 int i; 3518 3519 if (n->net_conf.mtu) { 3520 n->host_features |= (1ULL << VIRTIO_NET_F_MTU); 3521 } 3522 3523 if (n->net_conf.duplex_str) { 3524 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) { 3525 n->net_conf.duplex = DUPLEX_HALF; 3526 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) { 3527 n->net_conf.duplex = DUPLEX_FULL; 3528 } else { 3529 error_setg(errp, "'duplex' must be 'half' or 'full'"); 3530 return; 3531 } 3532 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3533 } else { 3534 n->net_conf.duplex = DUPLEX_UNKNOWN; 3535 } 3536 3537 if (n->net_conf.speed < SPEED_UNKNOWN) { 3538 error_setg(errp, "'speed' must be between 0 and INT_MAX"); 3539 return; 3540 } 3541 if (n->net_conf.speed >= 0) { 3542 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3543 } 3544 3545 if (n->failover) { 3546 n->primary_listener.hide_device = failover_hide_primary_device; 3547 qatomic_set(&n->failover_primary_hidden, true); 3548 device_listener_register(&n->primary_listener); 3549 n->migration_state.notify = virtio_net_migration_state_notifier; 3550 add_migration_state_change_notifier(&n->migration_state); 3551 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY); 3552 } 3553 3554 virtio_net_set_config_size(n, n->host_features); 3555 virtio_init(vdev, VIRTIO_ID_NET, n->config_size); 3556 3557 /* 3558 * We set a lower limit on RX queue size to what it always was. 3559 * Guests that want a smaller ring can always resize it without 3560 * help from us (using virtio 1 and up). 3561 */ 3562 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || 3563 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || 3564 !is_power_of_2(n->net_conf.rx_queue_size)) { 3565 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " 3566 "must be a power of 2 between %d and %d.", 3567 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, 3568 VIRTQUEUE_MAX_SIZE); 3569 virtio_cleanup(vdev); 3570 return; 3571 } 3572 3573 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || 3574 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE || 3575 !is_power_of_2(n->net_conf.tx_queue_size)) { 3576 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " 3577 "must be a power of 2 between %d and %d", 3578 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, 3579 VIRTQUEUE_MAX_SIZE); 3580 virtio_cleanup(vdev); 3581 return; 3582 } 3583 3584 n->max_ncs = MAX(n->nic_conf.peers.queues, 1); 3585 3586 /* 3587 * Figure out the datapath queue pairs since the backend could 3588 * provide control queue via peers as well. 3589 */ 3590 if (n->nic_conf.peers.queues) { 3591 for (i = 0; i < n->max_ncs; i++) { 3592 if (n->nic_conf.peers.ncs[i]->is_datapath) { 3593 ++n->max_queue_pairs; 3594 } 3595 } 3596 } 3597 n->max_queue_pairs = MAX(n->max_queue_pairs, 1); 3598 3599 if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) { 3600 error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), " 3601 "must be a positive integer less than %d.", 3602 n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2); 3603 virtio_cleanup(vdev); 3604 return; 3605 } 3606 n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs); 3607 n->curr_queue_pairs = 1; 3608 n->tx_timeout = n->net_conf.txtimer; 3609 3610 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer") 3611 && strcmp(n->net_conf.tx, "bh")) { 3612 warn_report("virtio-net: " 3613 "Unknown option tx=%s, valid options: \"timer\" \"bh\"", 3614 n->net_conf.tx); 3615 error_printf("Defaulting to \"bh\""); 3616 } 3617 3618 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), 3619 n->net_conf.tx_queue_size); 3620 3621 for (i = 0; i < n->max_queue_pairs; i++) { 3622 virtio_net_add_queue(n, i); 3623 } 3624 3625 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 3626 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr); 3627 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac)); 3628 n->status = VIRTIO_NET_S_LINK_UP; 3629 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 3630 QEMU_CLOCK_VIRTUAL, 3631 virtio_net_announce_timer, n); 3632 n->announce_timer.round = 0; 3633 3634 if (n->netclient_type) { 3635 /* 3636 * Happen when virtio_net_set_netclient_name has been called. 3637 */ 3638 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3639 n->netclient_type, n->netclient_name, n); 3640 } else { 3641 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3642 object_get_typename(OBJECT(dev)), dev->id, n); 3643 } 3644 3645 for (i = 0; i < n->max_queue_pairs; i++) { 3646 n->nic->ncs[i].do_not_pad = true; 3647 } 3648 3649 peer_test_vnet_hdr(n); 3650 if (peer_has_vnet_hdr(n)) { 3651 for (i = 0; i < n->max_queue_pairs; i++) { 3652 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true); 3653 } 3654 n->host_hdr_len = sizeof(struct virtio_net_hdr); 3655 } else { 3656 n->host_hdr_len = 0; 3657 } 3658 3659 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a); 3660 3661 n->vqs[0].tx_waiting = 0; 3662 n->tx_burst = n->net_conf.txburst; 3663 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0); 3664 n->promisc = 1; /* for compatibility */ 3665 3666 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 3667 3668 n->vlans = g_malloc0(MAX_VLAN >> 3); 3669 3670 nc = qemu_get_queue(n->nic); 3671 nc->rxfilter_notify_enabled = 1; 3672 3673 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 3674 struct virtio_net_config netcfg = {}; 3675 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN); 3676 vhost_net_set_config(get_vhost_net(nc->peer), 3677 (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER); 3678 } 3679 QTAILQ_INIT(&n->rsc_chains); 3680 n->qdev = dev; 3681 3682 net_rx_pkt_init(&n->rx_pkt, false); 3683 3684 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3685 virtio_net_load_ebpf(n); 3686 } 3687 } 3688 3689 static void virtio_net_device_unrealize(DeviceState *dev) 3690 { 3691 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3692 VirtIONet *n = VIRTIO_NET(dev); 3693 int i, max_queue_pairs; 3694 3695 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3696 virtio_net_unload_ebpf(n); 3697 } 3698 3699 /* This will stop vhost backend if appropriate. */ 3700 virtio_net_set_status(vdev, 0); 3701 3702 g_free(n->netclient_name); 3703 n->netclient_name = NULL; 3704 g_free(n->netclient_type); 3705 n->netclient_type = NULL; 3706 3707 g_free(n->mac_table.macs); 3708 g_free(n->vlans); 3709 3710 if (n->failover) { 3711 qobject_unref(n->primary_opts); 3712 device_listener_unregister(&n->primary_listener); 3713 remove_migration_state_change_notifier(&n->migration_state); 3714 } else { 3715 assert(n->primary_opts == NULL); 3716 } 3717 3718 max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 3719 for (i = 0; i < max_queue_pairs; i++) { 3720 virtio_net_del_queue(n, i); 3721 } 3722 /* delete also control vq */ 3723 virtio_del_queue(vdev, max_queue_pairs * 2); 3724 qemu_announce_timer_del(&n->announce_timer, false); 3725 g_free(n->vqs); 3726 qemu_del_nic(n->nic); 3727 virtio_net_rsc_cleanup(n); 3728 g_free(n->rss_data.indirections_table); 3729 net_rx_pkt_uninit(n->rx_pkt); 3730 virtio_cleanup(vdev); 3731 } 3732 3733 static void virtio_net_instance_init(Object *obj) 3734 { 3735 VirtIONet *n = VIRTIO_NET(obj); 3736 3737 /* 3738 * The default config_size is sizeof(struct virtio_net_config). 3739 * Can be overriden with virtio_net_set_config_size. 3740 */ 3741 n->config_size = sizeof(struct virtio_net_config); 3742 device_add_bootindex_property(obj, &n->nic_conf.bootindex, 3743 "bootindex", "/ethernet-phy@0", 3744 DEVICE(n)); 3745 3746 ebpf_rss_init(&n->ebpf_rss); 3747 } 3748 3749 static int virtio_net_pre_save(void *opaque) 3750 { 3751 VirtIONet *n = opaque; 3752 3753 /* At this point, backend must be stopped, otherwise 3754 * it might keep writing to memory. */ 3755 assert(!n->vhost_started); 3756 3757 return 0; 3758 } 3759 3760 static bool primary_unplug_pending(void *opaque) 3761 { 3762 DeviceState *dev = opaque; 3763 DeviceState *primary; 3764 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3765 VirtIONet *n = VIRTIO_NET(vdev); 3766 3767 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 3768 return false; 3769 } 3770 primary = failover_find_primary_device(n); 3771 return primary ? primary->pending_deleted_event : false; 3772 } 3773 3774 static bool dev_unplug_pending(void *opaque) 3775 { 3776 DeviceState *dev = opaque; 3777 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 3778 3779 return vdc->primary_unplug_pending(dev); 3780 } 3781 3782 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev) 3783 { 3784 VirtIONet *n = VIRTIO_NET(vdev); 3785 NetClientState *nc = qemu_get_queue(n->nic); 3786 struct vhost_net *net = get_vhost_net(nc->peer); 3787 return &net->dev; 3788 } 3789 3790 static const VMStateDescription vmstate_virtio_net = { 3791 .name = "virtio-net", 3792 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3793 .version_id = VIRTIO_NET_VM_VERSION, 3794 .fields = (VMStateField[]) { 3795 VMSTATE_VIRTIO_DEVICE, 3796 VMSTATE_END_OF_LIST() 3797 }, 3798 .pre_save = virtio_net_pre_save, 3799 .dev_unplug_pending = dev_unplug_pending, 3800 }; 3801 3802 static Property virtio_net_properties[] = { 3803 DEFINE_PROP_BIT64("csum", VirtIONet, host_features, 3804 VIRTIO_NET_F_CSUM, true), 3805 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features, 3806 VIRTIO_NET_F_GUEST_CSUM, true), 3807 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), 3808 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features, 3809 VIRTIO_NET_F_GUEST_TSO4, true), 3810 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features, 3811 VIRTIO_NET_F_GUEST_TSO6, true), 3812 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features, 3813 VIRTIO_NET_F_GUEST_ECN, true), 3814 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features, 3815 VIRTIO_NET_F_GUEST_UFO, true), 3816 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features, 3817 VIRTIO_NET_F_GUEST_ANNOUNCE, true), 3818 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features, 3819 VIRTIO_NET_F_HOST_TSO4, true), 3820 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features, 3821 VIRTIO_NET_F_HOST_TSO6, true), 3822 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features, 3823 VIRTIO_NET_F_HOST_ECN, true), 3824 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features, 3825 VIRTIO_NET_F_HOST_UFO, true), 3826 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features, 3827 VIRTIO_NET_F_MRG_RXBUF, true), 3828 DEFINE_PROP_BIT64("status", VirtIONet, host_features, 3829 VIRTIO_NET_F_STATUS, true), 3830 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features, 3831 VIRTIO_NET_F_CTRL_VQ, true), 3832 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features, 3833 VIRTIO_NET_F_CTRL_RX, true), 3834 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features, 3835 VIRTIO_NET_F_CTRL_VLAN, true), 3836 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features, 3837 VIRTIO_NET_F_CTRL_RX_EXTRA, true), 3838 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features, 3839 VIRTIO_NET_F_CTRL_MAC_ADDR, true), 3840 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features, 3841 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), 3842 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), 3843 DEFINE_PROP_BIT64("rss", VirtIONet, host_features, 3844 VIRTIO_NET_F_RSS, false), 3845 DEFINE_PROP_BIT64("hash", VirtIONet, host_features, 3846 VIRTIO_NET_F_HASH_REPORT, false), 3847 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features, 3848 VIRTIO_NET_F_RSC_EXT, false), 3849 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout, 3850 VIRTIO_NET_RSC_DEFAULT_INTERVAL), 3851 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf), 3852 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer, 3853 TX_TIMER_INTERVAL), 3854 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), 3855 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), 3856 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, 3857 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), 3858 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, 3859 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), 3860 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), 3861 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, 3862 true), 3863 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN), 3864 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str), 3865 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false), 3866 DEFINE_PROP_END_OF_LIST(), 3867 }; 3868 3869 static void virtio_net_class_init(ObjectClass *klass, void *data) 3870 { 3871 DeviceClass *dc = DEVICE_CLASS(klass); 3872 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 3873 3874 device_class_set_props(dc, virtio_net_properties); 3875 dc->vmsd = &vmstate_virtio_net; 3876 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 3877 vdc->realize = virtio_net_device_realize; 3878 vdc->unrealize = virtio_net_device_unrealize; 3879 vdc->get_config = virtio_net_get_config; 3880 vdc->set_config = virtio_net_set_config; 3881 vdc->get_features = virtio_net_get_features; 3882 vdc->set_features = virtio_net_set_features; 3883 vdc->bad_features = virtio_net_bad_features; 3884 vdc->reset = virtio_net_reset; 3885 vdc->queue_reset = virtio_net_queue_reset; 3886 vdc->queue_enable = virtio_net_queue_enable; 3887 vdc->set_status = virtio_net_set_status; 3888 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; 3889 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; 3890 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO); 3891 vdc->post_load = virtio_net_post_load_virtio; 3892 vdc->vmsd = &vmstate_virtio_net_device; 3893 vdc->primary_unplug_pending = primary_unplug_pending; 3894 vdc->get_vhost = virtio_net_get_vhost; 3895 } 3896 3897 static const TypeInfo virtio_net_info = { 3898 .name = TYPE_VIRTIO_NET, 3899 .parent = TYPE_VIRTIO_DEVICE, 3900 .instance_size = sizeof(VirtIONet), 3901 .instance_init = virtio_net_instance_init, 3902 .class_init = virtio_net_class_init, 3903 }; 3904 3905 static void virtio_register_types(void) 3906 { 3907 type_register_static(&virtio_net_info); 3908 } 3909 3910 type_init(virtio_register_types) 3911