1 /* 2 * Virtio Network Device 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qemu/atomic.h" 16 #include "qemu/iov.h" 17 #include "qemu/log.h" 18 #include "qemu/main-loop.h" 19 #include "qemu/module.h" 20 #include "hw/virtio/virtio.h" 21 #include "net/net.h" 22 #include "net/checksum.h" 23 #include "net/tap.h" 24 #include "qemu/error-report.h" 25 #include "qemu/timer.h" 26 #include "qemu/option.h" 27 #include "qemu/option_int.h" 28 #include "qemu/config-file.h" 29 #include "qapi/qmp/qdict.h" 30 #include "hw/virtio/virtio-net.h" 31 #include "net/vhost_net.h" 32 #include "net/announce.h" 33 #include "hw/virtio/virtio-bus.h" 34 #include "qapi/error.h" 35 #include "qapi/qapi-events-net.h" 36 #include "hw/qdev-properties.h" 37 #include "qapi/qapi-types-migration.h" 38 #include "qapi/qapi-events-migration.h" 39 #include "hw/virtio/virtio-access.h" 40 #include "migration/misc.h" 41 #include "standard-headers/linux/ethtool.h" 42 #include "sysemu/sysemu.h" 43 #include "trace.h" 44 #include "monitor/qdev.h" 45 #include "hw/pci/pci.h" 46 #include "net_rx_pkt.h" 47 #include "hw/virtio/vhost.h" 48 #include "sysemu/qtest.h" 49 50 #define VIRTIO_NET_VM_VERSION 11 51 52 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ 53 54 /* previously fixed value */ 55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 57 58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */ 59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 61 62 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ 63 64 #define VIRTIO_NET_TCP_FLAG 0x3F 65 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000 66 67 /* IPv4 max payload, 16 bits in the header */ 68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header)) 69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535 70 71 /* header length value in ip header without option */ 72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5 73 74 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ 75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD 76 77 /* Purge coalesced packets timer interval, This value affects the performance 78 a lot, and should be tuned carefully, '300000'(300us) is the recommended 79 value to pass the WHQL test, '50000' can gain 2x netperf throughput with 80 tso/gso/gro 'off'. */ 81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000 82 83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \ 84 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \ 85 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \ 86 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \ 87 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \ 88 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \ 89 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \ 90 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \ 91 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) 92 93 static const VirtIOFeature feature_sizes[] = { 94 {.flags = 1ULL << VIRTIO_NET_F_MAC, 95 .end = endof(struct virtio_net_config, mac)}, 96 {.flags = 1ULL << VIRTIO_NET_F_STATUS, 97 .end = endof(struct virtio_net_config, status)}, 98 {.flags = 1ULL << VIRTIO_NET_F_MQ, 99 .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, 100 {.flags = 1ULL << VIRTIO_NET_F_MTU, 101 .end = endof(struct virtio_net_config, mtu)}, 102 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, 103 .end = endof(struct virtio_net_config, duplex)}, 104 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT), 105 .end = endof(struct virtio_net_config, supported_hash_types)}, 106 {} 107 }; 108 109 static const VirtIOConfigSizeParams cfg_size_params = { 110 .min_size = endof(struct virtio_net_config, mac), 111 .max_size = sizeof(struct virtio_net_config), 112 .feature_sizes = feature_sizes 113 }; 114 115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc) 116 { 117 VirtIONet *n = qemu_get_nic_opaque(nc); 118 119 return &n->vqs[nc->queue_index]; 120 } 121 122 static int vq2q(int queue_index) 123 { 124 return queue_index / 2; 125 } 126 127 static void flush_or_purge_queued_packets(NetClientState *nc) 128 { 129 if (!nc->peer) { 130 return; 131 } 132 133 qemu_flush_or_purge_queued_packets(nc->peer, true); 134 assert(!virtio_net_get_subqueue(nc)->async_tx.elem); 135 } 136 137 /* TODO 138 * - we could suppress RX interrupt if we were so inclined. 139 */ 140 141 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) 142 { 143 VirtIONet *n = VIRTIO_NET(vdev); 144 struct virtio_net_config netcfg; 145 NetClientState *nc = qemu_get_queue(n->nic); 146 static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } }; 147 148 int ret = 0; 149 memset(&netcfg, 0 , sizeof(struct virtio_net_config)); 150 virtio_stw_p(vdev, &netcfg.status, n->status); 151 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs); 152 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); 153 memcpy(netcfg.mac, n->mac, ETH_ALEN); 154 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); 155 netcfg.duplex = n->net_conf.duplex; 156 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE; 157 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length, 158 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ? 159 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1); 160 virtio_stl_p(vdev, &netcfg.supported_hash_types, 161 VIRTIO_NET_RSS_SUPPORTED_HASHES); 162 memcpy(config, &netcfg, n->config_size); 163 164 /* 165 * Is this VDPA? No peer means not VDPA: there's no way to 166 * disconnect/reconnect a VDPA peer. 167 */ 168 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 169 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg, 170 n->config_size); 171 if (ret == -1) { 172 return; 173 } 174 175 /* 176 * Some NIC/kernel combinations present 0 as the mac address. As that 177 * is not a legal address, try to proceed with the address from the 178 * QEMU command line in the hope that the address has been configured 179 * correctly elsewhere - just not reported by the device. 180 */ 181 if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) { 182 info_report("Zero hardware mac address detected. Ignoring."); 183 memcpy(netcfg.mac, n->mac, ETH_ALEN); 184 } 185 186 memcpy(config, &netcfg, n->config_size); 187 } 188 } 189 190 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) 191 { 192 VirtIONet *n = VIRTIO_NET(vdev); 193 struct virtio_net_config netcfg = {}; 194 NetClientState *nc = qemu_get_queue(n->nic); 195 196 memcpy(&netcfg, config, n->config_size); 197 198 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) && 199 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) && 200 memcmp(netcfg.mac, n->mac, ETH_ALEN)) { 201 memcpy(n->mac, netcfg.mac, ETH_ALEN); 202 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 203 } 204 205 /* 206 * Is this VDPA? No peer means not VDPA: there's no way to 207 * disconnect/reconnect a VDPA peer. 208 */ 209 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 210 vhost_net_set_config(get_vhost_net(nc->peer), 211 (uint8_t *)&netcfg, 0, n->config_size, 212 VHOST_SET_CONFIG_TYPE_MASTER); 213 } 214 } 215 216 static bool virtio_net_started(VirtIONet *n, uint8_t status) 217 { 218 VirtIODevice *vdev = VIRTIO_DEVICE(n); 219 return (status & VIRTIO_CONFIG_S_DRIVER_OK) && 220 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running; 221 } 222 223 static void virtio_net_announce_notify(VirtIONet *net) 224 { 225 VirtIODevice *vdev = VIRTIO_DEVICE(net); 226 trace_virtio_net_announce_notify(); 227 228 net->status |= VIRTIO_NET_S_ANNOUNCE; 229 virtio_notify_config(vdev); 230 } 231 232 static void virtio_net_announce_timer(void *opaque) 233 { 234 VirtIONet *n = opaque; 235 trace_virtio_net_announce_timer(n->announce_timer.round); 236 237 n->announce_timer.round--; 238 virtio_net_announce_notify(n); 239 } 240 241 static void virtio_net_announce(NetClientState *nc) 242 { 243 VirtIONet *n = qemu_get_nic_opaque(nc); 244 VirtIODevice *vdev = VIRTIO_DEVICE(n); 245 246 /* 247 * Make sure the virtio migration announcement timer isn't running 248 * If it is, let it trigger announcement so that we do not cause 249 * confusion. 250 */ 251 if (n->announce_timer.round) { 252 return; 253 } 254 255 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 256 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 257 virtio_net_announce_notify(n); 258 } 259 } 260 261 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) 262 { 263 VirtIODevice *vdev = VIRTIO_DEVICE(n); 264 NetClientState *nc = qemu_get_queue(n->nic); 265 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 266 int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? 267 n->max_ncs - n->max_queue_pairs : 0; 268 269 if (!get_vhost_net(nc->peer)) { 270 return; 271 } 272 273 if ((virtio_net_started(n, status) && !nc->peer->link_down) == 274 !!n->vhost_started) { 275 return; 276 } 277 if (!n->vhost_started) { 278 int r, i; 279 280 if (n->needs_vnet_hdr_swap) { 281 error_report("backend does not support %s vnet headers; " 282 "falling back on userspace virtio", 283 virtio_is_big_endian(vdev) ? "BE" : "LE"); 284 return; 285 } 286 287 /* Any packets outstanding? Purge them to avoid touching rings 288 * when vhost is running. 289 */ 290 for (i = 0; i < queue_pairs; i++) { 291 NetClientState *qnc = qemu_get_subqueue(n->nic, i); 292 293 /* Purge both directions: TX and RX. */ 294 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc); 295 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer); 296 } 297 298 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) { 299 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu); 300 if (r < 0) { 301 error_report("%uBytes MTU not supported by the backend", 302 n->net_conf.mtu); 303 304 return; 305 } 306 } 307 308 n->vhost_started = 1; 309 r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq); 310 if (r < 0) { 311 error_report("unable to start vhost net: %d: " 312 "falling back on userspace virtio", -r); 313 n->vhost_started = 0; 314 } 315 } else { 316 vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq); 317 n->vhost_started = 0; 318 } 319 } 320 321 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev, 322 NetClientState *peer, 323 bool enable) 324 { 325 if (virtio_is_big_endian(vdev)) { 326 return qemu_set_vnet_be(peer, enable); 327 } else { 328 return qemu_set_vnet_le(peer, enable); 329 } 330 } 331 332 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs, 333 int queue_pairs, bool enable) 334 { 335 int i; 336 337 for (i = 0; i < queue_pairs; i++) { 338 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 && 339 enable) { 340 while (--i >= 0) { 341 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false); 342 } 343 344 return true; 345 } 346 } 347 348 return false; 349 } 350 351 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status) 352 { 353 VirtIODevice *vdev = VIRTIO_DEVICE(n); 354 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 355 356 if (virtio_net_started(n, status)) { 357 /* Before using the device, we tell the network backend about the 358 * endianness to use when parsing vnet headers. If the backend 359 * can't do it, we fallback onto fixing the headers in the core 360 * virtio-net code. 361 */ 362 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs, 363 queue_pairs, true); 364 } else if (virtio_net_started(n, vdev->status)) { 365 /* After using the device, we need to reset the network backend to 366 * the default (guest native endianness), otherwise the guest may 367 * lose network connectivity if it is rebooted into a different 368 * endianness. 369 */ 370 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false); 371 } 372 } 373 374 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq) 375 { 376 unsigned int dropped = virtqueue_drop_all(vq); 377 if (dropped) { 378 virtio_notify(vdev, vq); 379 } 380 } 381 382 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) 383 { 384 VirtIONet *n = VIRTIO_NET(vdev); 385 VirtIONetQueue *q; 386 int i; 387 uint8_t queue_status; 388 389 virtio_net_vnet_endian_status(n, status); 390 virtio_net_vhost_status(n, status); 391 392 for (i = 0; i < n->max_queue_pairs; i++) { 393 NetClientState *ncs = qemu_get_subqueue(n->nic, i); 394 bool queue_started; 395 q = &n->vqs[i]; 396 397 if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) { 398 queue_status = 0; 399 } else { 400 queue_status = status; 401 } 402 queue_started = 403 virtio_net_started(n, queue_status) && !n->vhost_started; 404 405 if (queue_started) { 406 qemu_flush_queued_packets(ncs); 407 } 408 409 if (!q->tx_waiting) { 410 continue; 411 } 412 413 if (queue_started) { 414 if (q->tx_timer) { 415 timer_mod(q->tx_timer, 416 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 417 } else { 418 qemu_bh_schedule(q->tx_bh); 419 } 420 } else { 421 if (q->tx_timer) { 422 timer_del(q->tx_timer); 423 } else { 424 qemu_bh_cancel(q->tx_bh); 425 } 426 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 && 427 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) && 428 vdev->vm_running) { 429 /* if tx is waiting we are likely have some packets in tx queue 430 * and disabled notification */ 431 q->tx_waiting = 0; 432 virtio_queue_set_notification(q->tx_vq, 1); 433 virtio_net_drop_tx_queue_data(vdev, q->tx_vq); 434 } 435 } 436 } 437 } 438 439 static void virtio_net_set_link_status(NetClientState *nc) 440 { 441 VirtIONet *n = qemu_get_nic_opaque(nc); 442 VirtIODevice *vdev = VIRTIO_DEVICE(n); 443 uint16_t old_status = n->status; 444 445 if (nc->link_down) 446 n->status &= ~VIRTIO_NET_S_LINK_UP; 447 else 448 n->status |= VIRTIO_NET_S_LINK_UP; 449 450 if (n->status != old_status) 451 virtio_notify_config(vdev); 452 453 virtio_net_set_status(vdev, vdev->status); 454 } 455 456 static void rxfilter_notify(NetClientState *nc) 457 { 458 VirtIONet *n = qemu_get_nic_opaque(nc); 459 460 if (nc->rxfilter_notify_enabled) { 461 char *path = object_get_canonical_path(OBJECT(n->qdev)); 462 qapi_event_send_nic_rx_filter_changed(n->netclient_name, path); 463 g_free(path); 464 465 /* disable event notification to avoid events flooding */ 466 nc->rxfilter_notify_enabled = 0; 467 } 468 } 469 470 static intList *get_vlan_table(VirtIONet *n) 471 { 472 intList *list; 473 int i, j; 474 475 list = NULL; 476 for (i = 0; i < MAX_VLAN >> 5; i++) { 477 for (j = 0; n->vlans[i] && j <= 0x1f; j++) { 478 if (n->vlans[i] & (1U << j)) { 479 QAPI_LIST_PREPEND(list, (i << 5) + j); 480 } 481 } 482 } 483 484 return list; 485 } 486 487 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc) 488 { 489 VirtIONet *n = qemu_get_nic_opaque(nc); 490 VirtIODevice *vdev = VIRTIO_DEVICE(n); 491 RxFilterInfo *info; 492 strList *str_list; 493 int i; 494 495 info = g_malloc0(sizeof(*info)); 496 info->name = g_strdup(nc->name); 497 info->promiscuous = n->promisc; 498 499 if (n->nouni) { 500 info->unicast = RX_STATE_NONE; 501 } else if (n->alluni) { 502 info->unicast = RX_STATE_ALL; 503 } else { 504 info->unicast = RX_STATE_NORMAL; 505 } 506 507 if (n->nomulti) { 508 info->multicast = RX_STATE_NONE; 509 } else if (n->allmulti) { 510 info->multicast = RX_STATE_ALL; 511 } else { 512 info->multicast = RX_STATE_NORMAL; 513 } 514 515 info->broadcast_allowed = n->nobcast; 516 info->multicast_overflow = n->mac_table.multi_overflow; 517 info->unicast_overflow = n->mac_table.uni_overflow; 518 519 info->main_mac = qemu_mac_strdup_printf(n->mac); 520 521 str_list = NULL; 522 for (i = 0; i < n->mac_table.first_multi; i++) { 523 QAPI_LIST_PREPEND(str_list, 524 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 525 } 526 info->unicast_table = str_list; 527 528 str_list = NULL; 529 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 530 QAPI_LIST_PREPEND(str_list, 531 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 532 } 533 info->multicast_table = str_list; 534 info->vlan_table = get_vlan_table(n); 535 536 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) { 537 info->vlan = RX_STATE_ALL; 538 } else if (!info->vlan_table) { 539 info->vlan = RX_STATE_NONE; 540 } else { 541 info->vlan = RX_STATE_NORMAL; 542 } 543 544 /* enable event notification after query */ 545 nc->rxfilter_notify_enabled = 1; 546 547 return info; 548 } 549 550 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index) 551 { 552 VirtIONet *n = VIRTIO_NET(vdev); 553 NetClientState *nc; 554 555 /* validate queue_index and skip for cvq */ 556 if (queue_index >= n->max_queue_pairs * 2) { 557 return; 558 } 559 560 nc = qemu_get_subqueue(n->nic, vq2q(queue_index)); 561 562 if (!nc->peer) { 563 return; 564 } 565 566 if (get_vhost_net(nc->peer) && 567 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) { 568 vhost_net_virtqueue_reset(vdev, nc, queue_index); 569 } 570 571 flush_or_purge_queued_packets(nc); 572 } 573 574 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index) 575 { 576 VirtIONet *n = VIRTIO_NET(vdev); 577 NetClientState *nc; 578 int r; 579 580 /* validate queue_index and skip for cvq */ 581 if (queue_index >= n->max_queue_pairs * 2) { 582 return; 583 } 584 585 nc = qemu_get_subqueue(n->nic, vq2q(queue_index)); 586 587 if (!nc->peer || !vdev->vhost_started) { 588 return; 589 } 590 591 if (get_vhost_net(nc->peer) && 592 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) { 593 r = vhost_net_virtqueue_restart(vdev, nc, queue_index); 594 if (r < 0) { 595 error_report("unable to restart vhost net virtqueue: %d, " 596 "when resetting the queue", queue_index); 597 } 598 } 599 } 600 601 static void virtio_net_reset(VirtIODevice *vdev) 602 { 603 VirtIONet *n = VIRTIO_NET(vdev); 604 int i; 605 606 /* Reset back to compatibility mode */ 607 n->promisc = 1; 608 n->allmulti = 0; 609 n->alluni = 0; 610 n->nomulti = 0; 611 n->nouni = 0; 612 n->nobcast = 0; 613 /* multiqueue is disabled by default */ 614 n->curr_queue_pairs = 1; 615 timer_del(n->announce_timer.tm); 616 n->announce_timer.round = 0; 617 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 618 619 /* Flush any MAC and VLAN filter table state */ 620 n->mac_table.in_use = 0; 621 n->mac_table.first_multi = 0; 622 n->mac_table.multi_overflow = 0; 623 n->mac_table.uni_overflow = 0; 624 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN); 625 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac)); 626 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 627 memset(n->vlans, 0, MAX_VLAN >> 3); 628 629 /* Flush any async TX */ 630 for (i = 0; i < n->max_queue_pairs; i++) { 631 flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i)); 632 } 633 } 634 635 static void peer_test_vnet_hdr(VirtIONet *n) 636 { 637 NetClientState *nc = qemu_get_queue(n->nic); 638 if (!nc->peer) { 639 return; 640 } 641 642 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer); 643 } 644 645 static int peer_has_vnet_hdr(VirtIONet *n) 646 { 647 return n->has_vnet_hdr; 648 } 649 650 static int peer_has_ufo(VirtIONet *n) 651 { 652 if (!peer_has_vnet_hdr(n)) 653 return 0; 654 655 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer); 656 657 return n->has_ufo; 658 } 659 660 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, 661 int version_1, int hash_report) 662 { 663 int i; 664 NetClientState *nc; 665 666 n->mergeable_rx_bufs = mergeable_rx_bufs; 667 668 if (version_1) { 669 n->guest_hdr_len = hash_report ? 670 sizeof(struct virtio_net_hdr_v1_hash) : 671 sizeof(struct virtio_net_hdr_mrg_rxbuf); 672 n->rss_data.populate_hash = !!hash_report; 673 } else { 674 n->guest_hdr_len = n->mergeable_rx_bufs ? 675 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 676 sizeof(struct virtio_net_hdr); 677 } 678 679 for (i = 0; i < n->max_queue_pairs; i++) { 680 nc = qemu_get_subqueue(n->nic, i); 681 682 if (peer_has_vnet_hdr(n) && 683 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) { 684 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len); 685 n->host_hdr_len = n->guest_hdr_len; 686 } 687 } 688 } 689 690 static int virtio_net_max_tx_queue_size(VirtIONet *n) 691 { 692 NetClientState *peer = n->nic_conf.peers.ncs[0]; 693 694 /* 695 * Backends other than vhost-user or vhost-vdpa don't support max queue 696 * size. 697 */ 698 if (!peer) { 699 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 700 } 701 702 switch(peer->info->type) { 703 case NET_CLIENT_DRIVER_VHOST_USER: 704 case NET_CLIENT_DRIVER_VHOST_VDPA: 705 return VIRTQUEUE_MAX_SIZE; 706 default: 707 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 708 }; 709 } 710 711 static int peer_attach(VirtIONet *n, int index) 712 { 713 NetClientState *nc = qemu_get_subqueue(n->nic, index); 714 715 if (!nc->peer) { 716 return 0; 717 } 718 719 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 720 vhost_set_vring_enable(nc->peer, 1); 721 } 722 723 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 724 return 0; 725 } 726 727 if (n->max_queue_pairs == 1) { 728 return 0; 729 } 730 731 return tap_enable(nc->peer); 732 } 733 734 static int peer_detach(VirtIONet *n, int index) 735 { 736 NetClientState *nc = qemu_get_subqueue(n->nic, index); 737 738 if (!nc->peer) { 739 return 0; 740 } 741 742 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 743 vhost_set_vring_enable(nc->peer, 0); 744 } 745 746 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 747 return 0; 748 } 749 750 return tap_disable(nc->peer); 751 } 752 753 static void virtio_net_set_queue_pairs(VirtIONet *n) 754 { 755 int i; 756 int r; 757 758 if (n->nic->peer_deleted) { 759 return; 760 } 761 762 for (i = 0; i < n->max_queue_pairs; i++) { 763 if (i < n->curr_queue_pairs) { 764 r = peer_attach(n, i); 765 assert(!r); 766 } else { 767 r = peer_detach(n, i); 768 assert(!r); 769 } 770 } 771 } 772 773 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue); 774 775 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, 776 Error **errp) 777 { 778 VirtIONet *n = VIRTIO_NET(vdev); 779 NetClientState *nc = qemu_get_queue(n->nic); 780 781 /* Firstly sync all virtio-net possible supported features */ 782 features |= n->host_features; 783 784 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 785 786 if (!peer_has_vnet_hdr(n)) { 787 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); 788 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); 789 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); 790 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); 791 792 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); 793 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); 794 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); 795 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); 796 797 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); 798 } 799 800 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { 801 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); 802 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); 803 } 804 805 if (!get_vhost_net(nc->peer)) { 806 virtio_add_feature(&features, VIRTIO_F_RING_RESET); 807 return features; 808 } 809 810 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 811 virtio_clear_feature(&features, VIRTIO_NET_F_RSS); 812 } 813 features = vhost_net_get_features(get_vhost_net(nc->peer), features); 814 vdev->backend_features = features; 815 816 if (n->mtu_bypass_backend && 817 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { 818 features |= (1ULL << VIRTIO_NET_F_MTU); 819 } 820 821 return features; 822 } 823 824 static uint64_t virtio_net_bad_features(VirtIODevice *vdev) 825 { 826 uint64_t features = 0; 827 828 /* Linux kernel 2.6.25. It understood MAC (as everyone must), 829 * but also these: */ 830 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 831 virtio_add_feature(&features, VIRTIO_NET_F_CSUM); 832 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4); 833 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6); 834 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN); 835 836 return features; 837 } 838 839 static void virtio_net_apply_guest_offloads(VirtIONet *n) 840 { 841 qemu_set_offload(qemu_get_queue(n->nic)->peer, 842 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)), 843 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)), 844 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)), 845 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)), 846 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO))); 847 } 848 849 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features) 850 { 851 static const uint64_t guest_offloads_mask = 852 (1ULL << VIRTIO_NET_F_GUEST_CSUM) | 853 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | 854 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | 855 (1ULL << VIRTIO_NET_F_GUEST_ECN) | 856 (1ULL << VIRTIO_NET_F_GUEST_UFO); 857 858 return guest_offloads_mask & features; 859 } 860 861 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) 862 { 863 VirtIODevice *vdev = VIRTIO_DEVICE(n); 864 return virtio_net_guest_offloads_by_features(vdev->guest_features); 865 } 866 867 typedef struct { 868 VirtIONet *n; 869 DeviceState *dev; 870 } FailoverDevice; 871 872 /** 873 * Set the failover primary device 874 * 875 * @opaque: FailoverId to setup 876 * @opts: opts for device we are handling 877 * @errp: returns an error if this function fails 878 */ 879 static int failover_set_primary(DeviceState *dev, void *opaque) 880 { 881 FailoverDevice *fdev = opaque; 882 PCIDevice *pci_dev = (PCIDevice *) 883 object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE); 884 885 if (!pci_dev) { 886 return 0; 887 } 888 889 if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) { 890 fdev->dev = dev; 891 return 1; 892 } 893 894 return 0; 895 } 896 897 /** 898 * Find the primary device for this failover virtio-net 899 * 900 * @n: VirtIONet device 901 * @errp: returns an error if this function fails 902 */ 903 static DeviceState *failover_find_primary_device(VirtIONet *n) 904 { 905 FailoverDevice fdev = { 906 .n = n, 907 }; 908 909 qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL, 910 NULL, NULL, &fdev); 911 return fdev.dev; 912 } 913 914 static void failover_add_primary(VirtIONet *n, Error **errp) 915 { 916 Error *err = NULL; 917 DeviceState *dev = failover_find_primary_device(n); 918 919 if (dev) { 920 return; 921 } 922 923 if (!n->primary_opts) { 924 error_setg(errp, "Primary device not found"); 925 error_append_hint(errp, "Virtio-net failover will not work. Make " 926 "sure primary device has parameter" 927 " failover_pair_id=%s\n", n->netclient_name); 928 return; 929 } 930 931 dev = qdev_device_add_from_qdict(n->primary_opts, 932 n->primary_opts_from_json, 933 &err); 934 if (err) { 935 qobject_unref(n->primary_opts); 936 n->primary_opts = NULL; 937 } else { 938 object_unref(OBJECT(dev)); 939 } 940 error_propagate(errp, err); 941 } 942 943 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) 944 { 945 VirtIONet *n = VIRTIO_NET(vdev); 946 Error *err = NULL; 947 int i; 948 949 if (n->mtu_bypass_backend && 950 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) { 951 features &= ~(1ULL << VIRTIO_NET_F_MTU); 952 } 953 954 virtio_net_set_multiqueue(n, 955 virtio_has_feature(features, VIRTIO_NET_F_RSS) || 956 virtio_has_feature(features, VIRTIO_NET_F_MQ)); 957 958 virtio_net_set_mrg_rx_bufs(n, 959 virtio_has_feature(features, 960 VIRTIO_NET_F_MRG_RXBUF), 961 virtio_has_feature(features, 962 VIRTIO_F_VERSION_1), 963 virtio_has_feature(features, 964 VIRTIO_NET_F_HASH_REPORT)); 965 966 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 967 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4); 968 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 969 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6); 970 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS); 971 972 if (n->has_vnet_hdr) { 973 n->curr_guest_offloads = 974 virtio_net_guest_offloads_by_features(features); 975 virtio_net_apply_guest_offloads(n); 976 } 977 978 for (i = 0; i < n->max_queue_pairs; i++) { 979 NetClientState *nc = qemu_get_subqueue(n->nic, i); 980 981 if (!get_vhost_net(nc->peer)) { 982 continue; 983 } 984 vhost_net_ack_features(get_vhost_net(nc->peer), features); 985 } 986 987 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { 988 memset(n->vlans, 0, MAX_VLAN >> 3); 989 } else { 990 memset(n->vlans, 0xff, MAX_VLAN >> 3); 991 } 992 993 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) { 994 qapi_event_send_failover_negotiated(n->netclient_name); 995 qatomic_set(&n->failover_primary_hidden, false); 996 failover_add_primary(n, &err); 997 if (err) { 998 if (!qtest_enabled()) { 999 warn_report_err(err); 1000 } else { 1001 error_free(err); 1002 } 1003 } 1004 } 1005 } 1006 1007 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd, 1008 struct iovec *iov, unsigned int iov_cnt) 1009 { 1010 uint8_t on; 1011 size_t s; 1012 NetClientState *nc = qemu_get_queue(n->nic); 1013 1014 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on)); 1015 if (s != sizeof(on)) { 1016 return VIRTIO_NET_ERR; 1017 } 1018 1019 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) { 1020 n->promisc = on; 1021 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) { 1022 n->allmulti = on; 1023 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) { 1024 n->alluni = on; 1025 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) { 1026 n->nomulti = on; 1027 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) { 1028 n->nouni = on; 1029 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) { 1030 n->nobcast = on; 1031 } else { 1032 return VIRTIO_NET_ERR; 1033 } 1034 1035 rxfilter_notify(nc); 1036 1037 return VIRTIO_NET_OK; 1038 } 1039 1040 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd, 1041 struct iovec *iov, unsigned int iov_cnt) 1042 { 1043 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1044 uint64_t offloads; 1045 size_t s; 1046 1047 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 1048 return VIRTIO_NET_ERR; 1049 } 1050 1051 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads)); 1052 if (s != sizeof(offloads)) { 1053 return VIRTIO_NET_ERR; 1054 } 1055 1056 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) { 1057 uint64_t supported_offloads; 1058 1059 offloads = virtio_ldq_p(vdev, &offloads); 1060 1061 if (!n->has_vnet_hdr) { 1062 return VIRTIO_NET_ERR; 1063 } 1064 1065 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1066 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4); 1067 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1068 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6); 1069 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT); 1070 1071 supported_offloads = virtio_net_supported_guest_offloads(n); 1072 if (offloads & ~supported_offloads) { 1073 return VIRTIO_NET_ERR; 1074 } 1075 1076 n->curr_guest_offloads = offloads; 1077 virtio_net_apply_guest_offloads(n); 1078 1079 return VIRTIO_NET_OK; 1080 } else { 1081 return VIRTIO_NET_ERR; 1082 } 1083 } 1084 1085 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd, 1086 struct iovec *iov, unsigned int iov_cnt) 1087 { 1088 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1089 struct virtio_net_ctrl_mac mac_data; 1090 size_t s; 1091 NetClientState *nc = qemu_get_queue(n->nic); 1092 1093 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) { 1094 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) { 1095 return VIRTIO_NET_ERR; 1096 } 1097 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac)); 1098 assert(s == sizeof(n->mac)); 1099 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 1100 rxfilter_notify(nc); 1101 1102 return VIRTIO_NET_OK; 1103 } 1104 1105 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) { 1106 return VIRTIO_NET_ERR; 1107 } 1108 1109 int in_use = 0; 1110 int first_multi = 0; 1111 uint8_t uni_overflow = 0; 1112 uint8_t multi_overflow = 0; 1113 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 1114 1115 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1116 sizeof(mac_data.entries)); 1117 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1118 if (s != sizeof(mac_data.entries)) { 1119 goto error; 1120 } 1121 iov_discard_front(&iov, &iov_cnt, s); 1122 1123 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) { 1124 goto error; 1125 } 1126 1127 if (mac_data.entries <= MAC_TABLE_ENTRIES) { 1128 s = iov_to_buf(iov, iov_cnt, 0, macs, 1129 mac_data.entries * ETH_ALEN); 1130 if (s != mac_data.entries * ETH_ALEN) { 1131 goto error; 1132 } 1133 in_use += mac_data.entries; 1134 } else { 1135 uni_overflow = 1; 1136 } 1137 1138 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN); 1139 1140 first_multi = in_use; 1141 1142 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1143 sizeof(mac_data.entries)); 1144 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1145 if (s != sizeof(mac_data.entries)) { 1146 goto error; 1147 } 1148 1149 iov_discard_front(&iov, &iov_cnt, s); 1150 1151 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) { 1152 goto error; 1153 } 1154 1155 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) { 1156 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN], 1157 mac_data.entries * ETH_ALEN); 1158 if (s != mac_data.entries * ETH_ALEN) { 1159 goto error; 1160 } 1161 in_use += mac_data.entries; 1162 } else { 1163 multi_overflow = 1; 1164 } 1165 1166 n->mac_table.in_use = in_use; 1167 n->mac_table.first_multi = first_multi; 1168 n->mac_table.uni_overflow = uni_overflow; 1169 n->mac_table.multi_overflow = multi_overflow; 1170 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN); 1171 g_free(macs); 1172 rxfilter_notify(nc); 1173 1174 return VIRTIO_NET_OK; 1175 1176 error: 1177 g_free(macs); 1178 return VIRTIO_NET_ERR; 1179 } 1180 1181 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd, 1182 struct iovec *iov, unsigned int iov_cnt) 1183 { 1184 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1185 uint16_t vid; 1186 size_t s; 1187 NetClientState *nc = qemu_get_queue(n->nic); 1188 1189 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid)); 1190 vid = virtio_lduw_p(vdev, &vid); 1191 if (s != sizeof(vid)) { 1192 return VIRTIO_NET_ERR; 1193 } 1194 1195 if (vid >= MAX_VLAN) 1196 return VIRTIO_NET_ERR; 1197 1198 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD) 1199 n->vlans[vid >> 5] |= (1U << (vid & 0x1f)); 1200 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL) 1201 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f)); 1202 else 1203 return VIRTIO_NET_ERR; 1204 1205 rxfilter_notify(nc); 1206 1207 return VIRTIO_NET_OK; 1208 } 1209 1210 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, 1211 struct iovec *iov, unsigned int iov_cnt) 1212 { 1213 trace_virtio_net_handle_announce(n->announce_timer.round); 1214 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK && 1215 n->status & VIRTIO_NET_S_ANNOUNCE) { 1216 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 1217 if (n->announce_timer.round) { 1218 qemu_announce_timer_step(&n->announce_timer); 1219 } 1220 return VIRTIO_NET_OK; 1221 } else { 1222 return VIRTIO_NET_ERR; 1223 } 1224 } 1225 1226 static void virtio_net_detach_epbf_rss(VirtIONet *n); 1227 1228 static void virtio_net_disable_rss(VirtIONet *n) 1229 { 1230 if (n->rss_data.enabled) { 1231 trace_virtio_net_rss_disable(); 1232 } 1233 n->rss_data.enabled = false; 1234 1235 virtio_net_detach_epbf_rss(n); 1236 } 1237 1238 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd) 1239 { 1240 NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0); 1241 if (nc == NULL || nc->info->set_steering_ebpf == NULL) { 1242 return false; 1243 } 1244 1245 return nc->info->set_steering_ebpf(nc, prog_fd); 1246 } 1247 1248 static void rss_data_to_rss_config(struct VirtioNetRssData *data, 1249 struct EBPFRSSConfig *config) 1250 { 1251 config->redirect = data->redirect; 1252 config->populate_hash = data->populate_hash; 1253 config->hash_types = data->hash_types; 1254 config->indirections_len = data->indirections_len; 1255 config->default_queue = data->default_queue; 1256 } 1257 1258 static bool virtio_net_attach_epbf_rss(VirtIONet *n) 1259 { 1260 struct EBPFRSSConfig config = {}; 1261 1262 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 1263 return false; 1264 } 1265 1266 rss_data_to_rss_config(&n->rss_data, &config); 1267 1268 if (!ebpf_rss_set_all(&n->ebpf_rss, &config, 1269 n->rss_data.indirections_table, n->rss_data.key)) { 1270 return false; 1271 } 1272 1273 if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) { 1274 return false; 1275 } 1276 1277 return true; 1278 } 1279 1280 static void virtio_net_detach_epbf_rss(VirtIONet *n) 1281 { 1282 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1283 } 1284 1285 static bool virtio_net_load_ebpf(VirtIONet *n) 1286 { 1287 if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) { 1288 /* backend does't support steering ebpf */ 1289 return false; 1290 } 1291 1292 return ebpf_rss_load(&n->ebpf_rss); 1293 } 1294 1295 static void virtio_net_unload_ebpf(VirtIONet *n) 1296 { 1297 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1298 ebpf_rss_unload(&n->ebpf_rss); 1299 } 1300 1301 static uint16_t virtio_net_handle_rss(VirtIONet *n, 1302 struct iovec *iov, 1303 unsigned int iov_cnt, 1304 bool do_rss) 1305 { 1306 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1307 struct virtio_net_rss_config cfg; 1308 size_t s, offset = 0, size_get; 1309 uint16_t queue_pairs, i; 1310 struct { 1311 uint16_t us; 1312 uint8_t b; 1313 } QEMU_PACKED temp; 1314 const char *err_msg = ""; 1315 uint32_t err_value = 0; 1316 1317 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) { 1318 err_msg = "RSS is not negotiated"; 1319 goto error; 1320 } 1321 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) { 1322 err_msg = "Hash report is not negotiated"; 1323 goto error; 1324 } 1325 size_get = offsetof(struct virtio_net_rss_config, indirection_table); 1326 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get); 1327 if (s != size_get) { 1328 err_msg = "Short command buffer"; 1329 err_value = (uint32_t)s; 1330 goto error; 1331 } 1332 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types); 1333 n->rss_data.indirections_len = 1334 virtio_lduw_p(vdev, &cfg.indirection_table_mask); 1335 n->rss_data.indirections_len++; 1336 if (!do_rss) { 1337 n->rss_data.indirections_len = 1; 1338 } 1339 if (!is_power_of_2(n->rss_data.indirections_len)) { 1340 err_msg = "Invalid size of indirection table"; 1341 err_value = n->rss_data.indirections_len; 1342 goto error; 1343 } 1344 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) { 1345 err_msg = "Too large indirection table"; 1346 err_value = n->rss_data.indirections_len; 1347 goto error; 1348 } 1349 n->rss_data.default_queue = do_rss ? 1350 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0; 1351 if (n->rss_data.default_queue >= n->max_queue_pairs) { 1352 err_msg = "Invalid default queue"; 1353 err_value = n->rss_data.default_queue; 1354 goto error; 1355 } 1356 offset += size_get; 1357 size_get = sizeof(uint16_t) * n->rss_data.indirections_len; 1358 g_free(n->rss_data.indirections_table); 1359 n->rss_data.indirections_table = g_malloc(size_get); 1360 if (!n->rss_data.indirections_table) { 1361 err_msg = "Can't allocate indirections table"; 1362 err_value = n->rss_data.indirections_len; 1363 goto error; 1364 } 1365 s = iov_to_buf(iov, iov_cnt, offset, 1366 n->rss_data.indirections_table, size_get); 1367 if (s != size_get) { 1368 err_msg = "Short indirection table buffer"; 1369 err_value = (uint32_t)s; 1370 goto error; 1371 } 1372 for (i = 0; i < n->rss_data.indirections_len; ++i) { 1373 uint16_t val = n->rss_data.indirections_table[i]; 1374 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val); 1375 } 1376 offset += size_get; 1377 size_get = sizeof(temp); 1378 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get); 1379 if (s != size_get) { 1380 err_msg = "Can't get queue_pairs"; 1381 err_value = (uint32_t)s; 1382 goto error; 1383 } 1384 queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs; 1385 if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) { 1386 err_msg = "Invalid number of queue_pairs"; 1387 err_value = queue_pairs; 1388 goto error; 1389 } 1390 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) { 1391 err_msg = "Invalid key size"; 1392 err_value = temp.b; 1393 goto error; 1394 } 1395 if (!temp.b && n->rss_data.hash_types) { 1396 err_msg = "No key provided"; 1397 err_value = 0; 1398 goto error; 1399 } 1400 if (!temp.b && !n->rss_data.hash_types) { 1401 virtio_net_disable_rss(n); 1402 return queue_pairs; 1403 } 1404 offset += size_get; 1405 size_get = temp.b; 1406 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get); 1407 if (s != size_get) { 1408 err_msg = "Can get key buffer"; 1409 err_value = (uint32_t)s; 1410 goto error; 1411 } 1412 n->rss_data.enabled = true; 1413 1414 if (!n->rss_data.populate_hash) { 1415 if (!virtio_net_attach_epbf_rss(n)) { 1416 /* EBPF must be loaded for vhost */ 1417 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 1418 warn_report("Can't load eBPF RSS for vhost"); 1419 goto error; 1420 } 1421 /* fallback to software RSS */ 1422 warn_report("Can't load eBPF RSS - fallback to software RSS"); 1423 n->rss_data.enabled_software_rss = true; 1424 } 1425 } else { 1426 /* use software RSS for hash populating */ 1427 /* and detach eBPF if was loaded before */ 1428 virtio_net_detach_epbf_rss(n); 1429 n->rss_data.enabled_software_rss = true; 1430 } 1431 1432 trace_virtio_net_rss_enable(n->rss_data.hash_types, 1433 n->rss_data.indirections_len, 1434 temp.b); 1435 return queue_pairs; 1436 error: 1437 trace_virtio_net_rss_error(err_msg, err_value); 1438 virtio_net_disable_rss(n); 1439 return 0; 1440 } 1441 1442 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, 1443 struct iovec *iov, unsigned int iov_cnt) 1444 { 1445 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1446 uint16_t queue_pairs; 1447 NetClientState *nc = qemu_get_queue(n->nic); 1448 1449 virtio_net_disable_rss(n); 1450 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { 1451 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false); 1452 return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR; 1453 } 1454 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) { 1455 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true); 1456 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { 1457 struct virtio_net_ctrl_mq mq; 1458 size_t s; 1459 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) { 1460 return VIRTIO_NET_ERR; 1461 } 1462 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq)); 1463 if (s != sizeof(mq)) { 1464 return VIRTIO_NET_ERR; 1465 } 1466 queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs); 1467 1468 } else { 1469 return VIRTIO_NET_ERR; 1470 } 1471 1472 if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 1473 queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 1474 queue_pairs > n->max_queue_pairs || 1475 !n->multiqueue) { 1476 return VIRTIO_NET_ERR; 1477 } 1478 1479 n->curr_queue_pairs = queue_pairs; 1480 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 1481 /* 1482 * Avoid updating the backend for a vdpa device: We're only interested 1483 * in updating the device model queues. 1484 */ 1485 return VIRTIO_NET_OK; 1486 } 1487 /* stop the backend before changing the number of queue_pairs to avoid handling a 1488 * disabled queue */ 1489 virtio_net_set_status(vdev, vdev->status); 1490 virtio_net_set_queue_pairs(n); 1491 1492 return VIRTIO_NET_OK; 1493 } 1494 1495 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, 1496 const struct iovec *in_sg, unsigned in_num, 1497 const struct iovec *out_sg, 1498 unsigned out_num) 1499 { 1500 VirtIONet *n = VIRTIO_NET(vdev); 1501 struct virtio_net_ctrl_hdr ctrl; 1502 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1503 size_t s; 1504 struct iovec *iov, *iov2; 1505 1506 if (iov_size(in_sg, in_num) < sizeof(status) || 1507 iov_size(out_sg, out_num) < sizeof(ctrl)) { 1508 virtio_error(vdev, "virtio-net ctrl missing headers"); 1509 return 0; 1510 } 1511 1512 iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num); 1513 s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl)); 1514 iov_discard_front(&iov, &out_num, sizeof(ctrl)); 1515 if (s != sizeof(ctrl)) { 1516 status = VIRTIO_NET_ERR; 1517 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { 1518 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num); 1519 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { 1520 status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num); 1521 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { 1522 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num); 1523 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { 1524 status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num); 1525 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { 1526 status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num); 1527 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { 1528 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num); 1529 } 1530 1531 s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status)); 1532 assert(s == sizeof(status)); 1533 1534 g_free(iov2); 1535 return sizeof(status); 1536 } 1537 1538 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) 1539 { 1540 VirtQueueElement *elem; 1541 1542 for (;;) { 1543 size_t written; 1544 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 1545 if (!elem) { 1546 break; 1547 } 1548 1549 written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num, 1550 elem->out_sg, elem->out_num); 1551 if (written > 0) { 1552 virtqueue_push(vq, elem, written); 1553 virtio_notify(vdev, vq); 1554 g_free(elem); 1555 } else { 1556 virtqueue_detach_element(vq, elem, 0); 1557 g_free(elem); 1558 break; 1559 } 1560 } 1561 } 1562 1563 /* RX */ 1564 1565 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) 1566 { 1567 VirtIONet *n = VIRTIO_NET(vdev); 1568 int queue_index = vq2q(virtio_get_queue_index(vq)); 1569 1570 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index)); 1571 } 1572 1573 static bool virtio_net_can_receive(NetClientState *nc) 1574 { 1575 VirtIONet *n = qemu_get_nic_opaque(nc); 1576 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1577 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1578 1579 if (!vdev->vm_running) { 1580 return false; 1581 } 1582 1583 if (nc->queue_index >= n->curr_queue_pairs) { 1584 return false; 1585 } 1586 1587 if (!virtio_queue_ready(q->rx_vq) || 1588 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 1589 return false; 1590 } 1591 1592 return true; 1593 } 1594 1595 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) 1596 { 1597 VirtIONet *n = q->n; 1598 if (virtio_queue_empty(q->rx_vq) || 1599 (n->mergeable_rx_bufs && 1600 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1601 virtio_queue_set_notification(q->rx_vq, 1); 1602 1603 /* To avoid a race condition where the guest has made some buffers 1604 * available after the above check but before notification was 1605 * enabled, check for available buffers again. 1606 */ 1607 if (virtio_queue_empty(q->rx_vq) || 1608 (n->mergeable_rx_bufs && 1609 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1610 return 0; 1611 } 1612 } 1613 1614 virtio_queue_set_notification(q->rx_vq, 0); 1615 return 1; 1616 } 1617 1618 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr) 1619 { 1620 virtio_tswap16s(vdev, &hdr->hdr_len); 1621 virtio_tswap16s(vdev, &hdr->gso_size); 1622 virtio_tswap16s(vdev, &hdr->csum_start); 1623 virtio_tswap16s(vdev, &hdr->csum_offset); 1624 } 1625 1626 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so 1627 * it never finds out that the packets don't have valid checksums. This 1628 * causes dhclient to get upset. Fedora's carried a patch for ages to 1629 * fix this with Xen but it hasn't appeared in an upstream release of 1630 * dhclient yet. 1631 * 1632 * To avoid breaking existing guests, we catch udp packets and add 1633 * checksums. This is terrible but it's better than hacking the guest 1634 * kernels. 1635 * 1636 * N.B. if we introduce a zero-copy API, this operation is no longer free so 1637 * we should provide a mechanism to disable it to avoid polluting the host 1638 * cache. 1639 */ 1640 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr, 1641 uint8_t *buf, size_t size) 1642 { 1643 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */ 1644 (size > 27 && size < 1500) && /* normal sized MTU */ 1645 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */ 1646 (buf[23] == 17) && /* ip.protocol == UDP */ 1647 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */ 1648 net_checksum_calculate(buf, size, CSUM_UDP); 1649 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; 1650 } 1651 } 1652 1653 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt, 1654 const void *buf, size_t size) 1655 { 1656 if (n->has_vnet_hdr) { 1657 /* FIXME this cast is evil */ 1658 void *wbuf = (void *)buf; 1659 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len, 1660 size - n->host_hdr_len); 1661 1662 if (n->needs_vnet_hdr_swap) { 1663 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf); 1664 } 1665 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr)); 1666 } else { 1667 struct virtio_net_hdr hdr = { 1668 .flags = 0, 1669 .gso_type = VIRTIO_NET_HDR_GSO_NONE 1670 }; 1671 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr); 1672 } 1673 } 1674 1675 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) 1676 { 1677 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; 1678 static const uint8_t vlan[] = {0x81, 0x00}; 1679 uint8_t *ptr = (uint8_t *)buf; 1680 int i; 1681 1682 if (n->promisc) 1683 return 1; 1684 1685 ptr += n->host_hdr_len; 1686 1687 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) { 1688 int vid = lduw_be_p(ptr + 14) & 0xfff; 1689 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f)))) 1690 return 0; 1691 } 1692 1693 if (ptr[0] & 1) { // multicast 1694 if (!memcmp(ptr, bcast, sizeof(bcast))) { 1695 return !n->nobcast; 1696 } else if (n->nomulti) { 1697 return 0; 1698 } else if (n->allmulti || n->mac_table.multi_overflow) { 1699 return 1; 1700 } 1701 1702 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 1703 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1704 return 1; 1705 } 1706 } 1707 } else { // unicast 1708 if (n->nouni) { 1709 return 0; 1710 } else if (n->alluni || n->mac_table.uni_overflow) { 1711 return 1; 1712 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) { 1713 return 1; 1714 } 1715 1716 for (i = 0; i < n->mac_table.first_multi; i++) { 1717 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1718 return 1; 1719 } 1720 } 1721 } 1722 1723 return 0; 1724 } 1725 1726 static uint8_t virtio_net_get_hash_type(bool isip4, 1727 bool isip6, 1728 bool isudp, 1729 bool istcp, 1730 uint32_t types) 1731 { 1732 if (isip4) { 1733 if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) { 1734 return NetPktRssIpV4Tcp; 1735 } 1736 if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) { 1737 return NetPktRssIpV4Udp; 1738 } 1739 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 1740 return NetPktRssIpV4; 1741 } 1742 } else if (isip6) { 1743 uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | 1744 VIRTIO_NET_RSS_HASH_TYPE_TCPv6; 1745 1746 if (istcp && (types & mask)) { 1747 return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ? 1748 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp; 1749 } 1750 mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6; 1751 if (isudp && (types & mask)) { 1752 return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ? 1753 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp; 1754 } 1755 mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6; 1756 if (types & mask) { 1757 return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ? 1758 NetPktRssIpV6Ex : NetPktRssIpV6; 1759 } 1760 } 1761 return 0xff; 1762 } 1763 1764 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report, 1765 uint32_t hash) 1766 { 1767 struct virtio_net_hdr_v1_hash *hdr = (void *)buf; 1768 hdr->hash_value = hash; 1769 hdr->hash_report = report; 1770 } 1771 1772 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf, 1773 size_t size) 1774 { 1775 VirtIONet *n = qemu_get_nic_opaque(nc); 1776 unsigned int index = nc->queue_index, new_index = index; 1777 struct NetRxPkt *pkt = n->rx_pkt; 1778 uint8_t net_hash_type; 1779 uint32_t hash; 1780 bool isip4, isip6, isudp, istcp; 1781 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = { 1782 VIRTIO_NET_HASH_REPORT_IPv4, 1783 VIRTIO_NET_HASH_REPORT_TCPv4, 1784 VIRTIO_NET_HASH_REPORT_TCPv6, 1785 VIRTIO_NET_HASH_REPORT_IPv6, 1786 VIRTIO_NET_HASH_REPORT_IPv6_EX, 1787 VIRTIO_NET_HASH_REPORT_TCPv6_EX, 1788 VIRTIO_NET_HASH_REPORT_UDPv4, 1789 VIRTIO_NET_HASH_REPORT_UDPv6, 1790 VIRTIO_NET_HASH_REPORT_UDPv6_EX 1791 }; 1792 1793 net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len, 1794 size - n->host_hdr_len); 1795 net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); 1796 if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) { 1797 istcp = isudp = false; 1798 } 1799 if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) { 1800 istcp = isudp = false; 1801 } 1802 net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp, 1803 n->rss_data.hash_types); 1804 if (net_hash_type > NetPktRssIpV6UdpEx) { 1805 if (n->rss_data.populate_hash) { 1806 virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0); 1807 } 1808 return n->rss_data.redirect ? n->rss_data.default_queue : -1; 1809 } 1810 1811 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key); 1812 1813 if (n->rss_data.populate_hash) { 1814 virtio_set_packet_hash(buf, reports[net_hash_type], hash); 1815 } 1816 1817 if (n->rss_data.redirect) { 1818 new_index = hash & (n->rss_data.indirections_len - 1); 1819 new_index = n->rss_data.indirections_table[new_index]; 1820 } 1821 1822 return (index == new_index) ? -1 : new_index; 1823 } 1824 1825 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, 1826 size_t size, bool no_rss) 1827 { 1828 VirtIONet *n = qemu_get_nic_opaque(nc); 1829 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1830 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1831 VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE]; 1832 size_t lens[VIRTQUEUE_MAX_SIZE]; 1833 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; 1834 struct virtio_net_hdr_mrg_rxbuf mhdr; 1835 unsigned mhdr_cnt = 0; 1836 size_t offset, i, guest_offset, j; 1837 ssize_t err; 1838 1839 if (!virtio_net_can_receive(nc)) { 1840 return -1; 1841 } 1842 1843 if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) { 1844 int index = virtio_net_process_rss(nc, buf, size); 1845 if (index >= 0) { 1846 NetClientState *nc2 = qemu_get_subqueue(n->nic, index); 1847 return virtio_net_receive_rcu(nc2, buf, size, true); 1848 } 1849 } 1850 1851 /* hdr_len refers to the header we supply to the guest */ 1852 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) { 1853 return 0; 1854 } 1855 1856 if (!receive_filter(n, buf, size)) 1857 return size; 1858 1859 offset = i = 0; 1860 1861 while (offset < size) { 1862 VirtQueueElement *elem; 1863 int len, total; 1864 const struct iovec *sg; 1865 1866 total = 0; 1867 1868 if (i == VIRTQUEUE_MAX_SIZE) { 1869 virtio_error(vdev, "virtio-net unexpected long buffer chain"); 1870 err = size; 1871 goto err; 1872 } 1873 1874 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement)); 1875 if (!elem) { 1876 if (i) { 1877 virtio_error(vdev, "virtio-net unexpected empty queue: " 1878 "i %zd mergeable %d offset %zd, size %zd, " 1879 "guest hdr len %zd, host hdr len %zd " 1880 "guest features 0x%" PRIx64, 1881 i, n->mergeable_rx_bufs, offset, size, 1882 n->guest_hdr_len, n->host_hdr_len, 1883 vdev->guest_features); 1884 } 1885 err = -1; 1886 goto err; 1887 } 1888 1889 if (elem->in_num < 1) { 1890 virtio_error(vdev, 1891 "virtio-net receive queue contains no in buffers"); 1892 virtqueue_detach_element(q->rx_vq, elem, 0); 1893 g_free(elem); 1894 err = -1; 1895 goto err; 1896 } 1897 1898 sg = elem->in_sg; 1899 if (i == 0) { 1900 assert(offset == 0); 1901 if (n->mergeable_rx_bufs) { 1902 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg), 1903 sg, elem->in_num, 1904 offsetof(typeof(mhdr), num_buffers), 1905 sizeof(mhdr.num_buffers)); 1906 } 1907 1908 receive_header(n, sg, elem->in_num, buf, size); 1909 if (n->rss_data.populate_hash) { 1910 offset = sizeof(mhdr); 1911 iov_from_buf(sg, elem->in_num, offset, 1912 buf + offset, n->host_hdr_len - sizeof(mhdr)); 1913 } 1914 offset = n->host_hdr_len; 1915 total += n->guest_hdr_len; 1916 guest_offset = n->guest_hdr_len; 1917 } else { 1918 guest_offset = 0; 1919 } 1920 1921 /* copy in packet. ugh */ 1922 len = iov_from_buf(sg, elem->in_num, guest_offset, 1923 buf + offset, size - offset); 1924 total += len; 1925 offset += len; 1926 /* If buffers can't be merged, at this point we 1927 * must have consumed the complete packet. 1928 * Otherwise, drop it. */ 1929 if (!n->mergeable_rx_bufs && offset < size) { 1930 virtqueue_unpop(q->rx_vq, elem, total); 1931 g_free(elem); 1932 err = size; 1933 goto err; 1934 } 1935 1936 elems[i] = elem; 1937 lens[i] = total; 1938 i++; 1939 } 1940 1941 if (mhdr_cnt) { 1942 virtio_stw_p(vdev, &mhdr.num_buffers, i); 1943 iov_from_buf(mhdr_sg, mhdr_cnt, 1944 0, 1945 &mhdr.num_buffers, sizeof mhdr.num_buffers); 1946 } 1947 1948 for (j = 0; j < i; j++) { 1949 /* signal other side */ 1950 virtqueue_fill(q->rx_vq, elems[j], lens[j], j); 1951 g_free(elems[j]); 1952 } 1953 1954 virtqueue_flush(q->rx_vq, i); 1955 virtio_notify(vdev, q->rx_vq); 1956 1957 return size; 1958 1959 err: 1960 for (j = 0; j < i; j++) { 1961 virtqueue_detach_element(q->rx_vq, elems[j], lens[j]); 1962 g_free(elems[j]); 1963 } 1964 1965 return err; 1966 } 1967 1968 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf, 1969 size_t size) 1970 { 1971 RCU_READ_LOCK_GUARD(); 1972 1973 return virtio_net_receive_rcu(nc, buf, size, false); 1974 } 1975 1976 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain, 1977 const uint8_t *buf, 1978 VirtioNetRscUnit *unit) 1979 { 1980 uint16_t ip_hdrlen; 1981 struct ip_header *ip; 1982 1983 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len 1984 + sizeof(struct eth_header)); 1985 unit->ip = (void *)ip; 1986 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2; 1987 unit->ip_plen = &ip->ip_len; 1988 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen); 1989 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 1990 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen; 1991 } 1992 1993 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain, 1994 const uint8_t *buf, 1995 VirtioNetRscUnit *unit) 1996 { 1997 struct ip6_header *ip6; 1998 1999 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len 2000 + sizeof(struct eth_header)); 2001 unit->ip = ip6; 2002 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 2003 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) 2004 + sizeof(struct ip6_header)); 2005 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 2006 2007 /* There is a difference between payload lenght in ipv4 and v6, 2008 ip header is excluded in ipv6 */ 2009 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen; 2010 } 2011 2012 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain, 2013 VirtioNetRscSeg *seg) 2014 { 2015 int ret; 2016 struct virtio_net_hdr_v1 *h; 2017 2018 h = (struct virtio_net_hdr_v1 *)seg->buf; 2019 h->flags = 0; 2020 h->gso_type = VIRTIO_NET_HDR_GSO_NONE; 2021 2022 if (seg->is_coalesced) { 2023 h->rsc.segments = seg->packets; 2024 h->rsc.dup_acks = seg->dup_ack; 2025 h->flags = VIRTIO_NET_HDR_F_RSC_INFO; 2026 if (chain->proto == ETH_P_IP) { 2027 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2028 } else { 2029 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2030 } 2031 } 2032 2033 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); 2034 QTAILQ_REMOVE(&chain->buffers, seg, next); 2035 g_free(seg->buf); 2036 g_free(seg); 2037 2038 return ret; 2039 } 2040 2041 static void virtio_net_rsc_purge(void *opq) 2042 { 2043 VirtioNetRscSeg *seg, *rn; 2044 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq; 2045 2046 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) { 2047 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2048 chain->stat.purge_failed++; 2049 continue; 2050 } 2051 } 2052 2053 chain->stat.timer++; 2054 if (!QTAILQ_EMPTY(&chain->buffers)) { 2055 timer_mod(chain->drain_timer, 2056 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 2057 } 2058 } 2059 2060 static void virtio_net_rsc_cleanup(VirtIONet *n) 2061 { 2062 VirtioNetRscChain *chain, *rn_chain; 2063 VirtioNetRscSeg *seg, *rn_seg; 2064 2065 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) { 2066 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) { 2067 QTAILQ_REMOVE(&chain->buffers, seg, next); 2068 g_free(seg->buf); 2069 g_free(seg); 2070 } 2071 2072 timer_free(chain->drain_timer); 2073 QTAILQ_REMOVE(&n->rsc_chains, chain, next); 2074 g_free(chain); 2075 } 2076 } 2077 2078 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain, 2079 NetClientState *nc, 2080 const uint8_t *buf, size_t size) 2081 { 2082 uint16_t hdr_len; 2083 VirtioNetRscSeg *seg; 2084 2085 hdr_len = chain->n->guest_hdr_len; 2086 seg = g_new(VirtioNetRscSeg, 1); 2087 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header) 2088 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD); 2089 memcpy(seg->buf, buf, size); 2090 seg->size = size; 2091 seg->packets = 1; 2092 seg->dup_ack = 0; 2093 seg->is_coalesced = 0; 2094 seg->nc = nc; 2095 2096 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); 2097 chain->stat.cache++; 2098 2099 switch (chain->proto) { 2100 case ETH_P_IP: 2101 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); 2102 break; 2103 case ETH_P_IPV6: 2104 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); 2105 break; 2106 default: 2107 g_assert_not_reached(); 2108 } 2109 } 2110 2111 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain, 2112 VirtioNetRscSeg *seg, 2113 const uint8_t *buf, 2114 struct tcp_header *n_tcp, 2115 struct tcp_header *o_tcp) 2116 { 2117 uint32_t nack, oack; 2118 uint16_t nwin, owin; 2119 2120 nack = htonl(n_tcp->th_ack); 2121 nwin = htons(n_tcp->th_win); 2122 oack = htonl(o_tcp->th_ack); 2123 owin = htons(o_tcp->th_win); 2124 2125 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) { 2126 chain->stat.ack_out_of_win++; 2127 return RSC_FINAL; 2128 } else if (nack == oack) { 2129 /* duplicated ack or window probe */ 2130 if (nwin == owin) { 2131 /* duplicated ack, add dup ack count due to whql test up to 1 */ 2132 chain->stat.dup_ack++; 2133 return RSC_FINAL; 2134 } else { 2135 /* Coalesce window update */ 2136 o_tcp->th_win = n_tcp->th_win; 2137 chain->stat.win_update++; 2138 return RSC_COALESCE; 2139 } 2140 } else { 2141 /* pure ack, go to 'C', finalize*/ 2142 chain->stat.pure_ack++; 2143 return RSC_FINAL; 2144 } 2145 } 2146 2147 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain, 2148 VirtioNetRscSeg *seg, 2149 const uint8_t *buf, 2150 VirtioNetRscUnit *n_unit) 2151 { 2152 void *data; 2153 uint16_t o_ip_len; 2154 uint32_t nseq, oseq; 2155 VirtioNetRscUnit *o_unit; 2156 2157 o_unit = &seg->unit; 2158 o_ip_len = htons(*o_unit->ip_plen); 2159 nseq = htonl(n_unit->tcp->th_seq); 2160 oseq = htonl(o_unit->tcp->th_seq); 2161 2162 /* out of order or retransmitted. */ 2163 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) { 2164 chain->stat.data_out_of_win++; 2165 return RSC_FINAL; 2166 } 2167 2168 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen; 2169 if (nseq == oseq) { 2170 if ((o_unit->payload == 0) && n_unit->payload) { 2171 /* From no payload to payload, normal case, not a dup ack or etc */ 2172 chain->stat.data_after_pure_ack++; 2173 goto coalesce; 2174 } else { 2175 return virtio_net_rsc_handle_ack(chain, seg, buf, 2176 n_unit->tcp, o_unit->tcp); 2177 } 2178 } else if ((nseq - oseq) != o_unit->payload) { 2179 /* Not a consistent packet, out of order */ 2180 chain->stat.data_out_of_order++; 2181 return RSC_FINAL; 2182 } else { 2183 coalesce: 2184 if ((o_ip_len + n_unit->payload) > chain->max_payload) { 2185 chain->stat.over_size++; 2186 return RSC_FINAL; 2187 } 2188 2189 /* Here comes the right data, the payload length in v4/v6 is different, 2190 so use the field value to update and record the new data len */ 2191 o_unit->payload += n_unit->payload; /* update new data len */ 2192 2193 /* update field in ip header */ 2194 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload); 2195 2196 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced 2197 for windows guest, while this may change the behavior for linux 2198 guest (only if it uses RSC feature). */ 2199 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags; 2200 2201 o_unit->tcp->th_ack = n_unit->tcp->th_ack; 2202 o_unit->tcp->th_win = n_unit->tcp->th_win; 2203 2204 memmove(seg->buf + seg->size, data, n_unit->payload); 2205 seg->size += n_unit->payload; 2206 seg->packets++; 2207 chain->stat.coalesced++; 2208 return RSC_COALESCE; 2209 } 2210 } 2211 2212 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain, 2213 VirtioNetRscSeg *seg, 2214 const uint8_t *buf, size_t size, 2215 VirtioNetRscUnit *unit) 2216 { 2217 struct ip_header *ip1, *ip2; 2218 2219 ip1 = (struct ip_header *)(unit->ip); 2220 ip2 = (struct ip_header *)(seg->unit.ip); 2221 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst) 2222 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2223 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2224 chain->stat.no_match++; 2225 return RSC_NO_MATCH; 2226 } 2227 2228 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2229 } 2230 2231 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain, 2232 VirtioNetRscSeg *seg, 2233 const uint8_t *buf, size_t size, 2234 VirtioNetRscUnit *unit) 2235 { 2236 struct ip6_header *ip1, *ip2; 2237 2238 ip1 = (struct ip6_header *)(unit->ip); 2239 ip2 = (struct ip6_header *)(seg->unit.ip); 2240 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address)) 2241 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address)) 2242 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2243 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2244 chain->stat.no_match++; 2245 return RSC_NO_MATCH; 2246 } 2247 2248 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2249 } 2250 2251 /* Packets with 'SYN' should bypass, other flag should be sent after drain 2252 * to prevent out of order */ 2253 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain, 2254 struct tcp_header *tcp) 2255 { 2256 uint16_t tcp_hdr; 2257 uint16_t tcp_flag; 2258 2259 tcp_flag = htons(tcp->th_offset_flags); 2260 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10; 2261 tcp_flag &= VIRTIO_NET_TCP_FLAG; 2262 if (tcp_flag & TH_SYN) { 2263 chain->stat.tcp_syn++; 2264 return RSC_BYPASS; 2265 } 2266 2267 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) { 2268 chain->stat.tcp_ctrl_drain++; 2269 return RSC_FINAL; 2270 } 2271 2272 if (tcp_hdr > sizeof(struct tcp_header)) { 2273 chain->stat.tcp_all_opt++; 2274 return RSC_FINAL; 2275 } 2276 2277 return RSC_CANDIDATE; 2278 } 2279 2280 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain, 2281 NetClientState *nc, 2282 const uint8_t *buf, size_t size, 2283 VirtioNetRscUnit *unit) 2284 { 2285 int ret; 2286 VirtioNetRscSeg *seg, *nseg; 2287 2288 if (QTAILQ_EMPTY(&chain->buffers)) { 2289 chain->stat.empty_cache++; 2290 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2291 timer_mod(chain->drain_timer, 2292 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 2293 return size; 2294 } 2295 2296 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2297 if (chain->proto == ETH_P_IP) { 2298 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit); 2299 } else { 2300 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit); 2301 } 2302 2303 if (ret == RSC_FINAL) { 2304 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2305 /* Send failed */ 2306 chain->stat.final_failed++; 2307 return 0; 2308 } 2309 2310 /* Send current packet */ 2311 return virtio_net_do_receive(nc, buf, size); 2312 } else if (ret == RSC_NO_MATCH) { 2313 continue; 2314 } else { 2315 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ 2316 seg->is_coalesced = 1; 2317 return size; 2318 } 2319 } 2320 2321 chain->stat.no_match_cache++; 2322 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2323 return size; 2324 } 2325 2326 /* Drain a connection data, this is to avoid out of order segments */ 2327 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain, 2328 NetClientState *nc, 2329 const uint8_t *buf, size_t size, 2330 uint16_t ip_start, uint16_t ip_size, 2331 uint16_t tcp_port) 2332 { 2333 VirtioNetRscSeg *seg, *nseg; 2334 uint32_t ppair1, ppair2; 2335 2336 ppair1 = *(uint32_t *)(buf + tcp_port); 2337 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2338 ppair2 = *(uint32_t *)(seg->buf + tcp_port); 2339 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size) 2340 || (ppair1 != ppair2)) { 2341 continue; 2342 } 2343 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2344 chain->stat.drain_failed++; 2345 } 2346 2347 break; 2348 } 2349 2350 return virtio_net_do_receive(nc, buf, size); 2351 } 2352 2353 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain, 2354 struct ip_header *ip, 2355 const uint8_t *buf, size_t size) 2356 { 2357 uint16_t ip_len; 2358 2359 /* Not an ipv4 packet */ 2360 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) { 2361 chain->stat.ip_option++; 2362 return RSC_BYPASS; 2363 } 2364 2365 /* Don't handle packets with ip option */ 2366 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) { 2367 chain->stat.ip_option++; 2368 return RSC_BYPASS; 2369 } 2370 2371 if (ip->ip_p != IPPROTO_TCP) { 2372 chain->stat.bypass_not_tcp++; 2373 return RSC_BYPASS; 2374 } 2375 2376 /* Don't handle packets with ip fragment */ 2377 if (!(htons(ip->ip_off) & IP_DF)) { 2378 chain->stat.ip_frag++; 2379 return RSC_BYPASS; 2380 } 2381 2382 /* Don't handle packets with ecn flag */ 2383 if (IPTOS_ECN(ip->ip_tos)) { 2384 chain->stat.ip_ecn++; 2385 return RSC_BYPASS; 2386 } 2387 2388 ip_len = htons(ip->ip_len); 2389 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header)) 2390 || ip_len > (size - chain->n->guest_hdr_len - 2391 sizeof(struct eth_header))) { 2392 chain->stat.ip_hacked++; 2393 return RSC_BYPASS; 2394 } 2395 2396 return RSC_CANDIDATE; 2397 } 2398 2399 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain, 2400 NetClientState *nc, 2401 const uint8_t *buf, size_t size) 2402 { 2403 int32_t ret; 2404 uint16_t hdr_len; 2405 VirtioNetRscUnit unit; 2406 2407 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2408 2409 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header) 2410 + sizeof(struct tcp_header))) { 2411 chain->stat.bypass_not_tcp++; 2412 return virtio_net_do_receive(nc, buf, size); 2413 } 2414 2415 virtio_net_rsc_extract_unit4(chain, buf, &unit); 2416 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size) 2417 != RSC_CANDIDATE) { 2418 return virtio_net_do_receive(nc, buf, size); 2419 } 2420 2421 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2422 if (ret == RSC_BYPASS) { 2423 return virtio_net_do_receive(nc, buf, size); 2424 } else if (ret == RSC_FINAL) { 2425 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2426 ((hdr_len + sizeof(struct eth_header)) + 12), 2427 VIRTIO_NET_IP4_ADDR_SIZE, 2428 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)); 2429 } 2430 2431 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2432 } 2433 2434 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain, 2435 struct ip6_header *ip6, 2436 const uint8_t *buf, size_t size) 2437 { 2438 uint16_t ip_len; 2439 2440 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4) 2441 != IP_HEADER_VERSION_6) { 2442 return RSC_BYPASS; 2443 } 2444 2445 /* Both option and protocol is checked in this */ 2446 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) { 2447 chain->stat.bypass_not_tcp++; 2448 return RSC_BYPASS; 2449 } 2450 2451 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 2452 if (ip_len < sizeof(struct tcp_header) || 2453 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header) 2454 - sizeof(struct ip6_header))) { 2455 chain->stat.ip_hacked++; 2456 return RSC_BYPASS; 2457 } 2458 2459 /* Don't handle packets with ecn flag */ 2460 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) { 2461 chain->stat.ip_ecn++; 2462 return RSC_BYPASS; 2463 } 2464 2465 return RSC_CANDIDATE; 2466 } 2467 2468 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc, 2469 const uint8_t *buf, size_t size) 2470 { 2471 int32_t ret; 2472 uint16_t hdr_len; 2473 VirtioNetRscChain *chain; 2474 VirtioNetRscUnit unit; 2475 2476 chain = opq; 2477 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2478 2479 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header) 2480 + sizeof(tcp_header))) { 2481 return virtio_net_do_receive(nc, buf, size); 2482 } 2483 2484 virtio_net_rsc_extract_unit6(chain, buf, &unit); 2485 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain, 2486 unit.ip, buf, size)) { 2487 return virtio_net_do_receive(nc, buf, size); 2488 } 2489 2490 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2491 if (ret == RSC_BYPASS) { 2492 return virtio_net_do_receive(nc, buf, size); 2493 } else if (ret == RSC_FINAL) { 2494 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2495 ((hdr_len + sizeof(struct eth_header)) + 8), 2496 VIRTIO_NET_IP6_ADDR_SIZE, 2497 hdr_len + sizeof(struct eth_header) 2498 + sizeof(struct ip6_header)); 2499 } 2500 2501 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2502 } 2503 2504 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n, 2505 NetClientState *nc, 2506 uint16_t proto) 2507 { 2508 VirtioNetRscChain *chain; 2509 2510 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) { 2511 return NULL; 2512 } 2513 2514 QTAILQ_FOREACH(chain, &n->rsc_chains, next) { 2515 if (chain->proto == proto) { 2516 return chain; 2517 } 2518 } 2519 2520 chain = g_malloc(sizeof(*chain)); 2521 chain->n = n; 2522 chain->proto = proto; 2523 if (proto == (uint16_t)ETH_P_IP) { 2524 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD; 2525 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2526 } else { 2527 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD; 2528 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2529 } 2530 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST, 2531 virtio_net_rsc_purge, chain); 2532 memset(&chain->stat, 0, sizeof(chain->stat)); 2533 2534 QTAILQ_INIT(&chain->buffers); 2535 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); 2536 2537 return chain; 2538 } 2539 2540 static ssize_t virtio_net_rsc_receive(NetClientState *nc, 2541 const uint8_t *buf, 2542 size_t size) 2543 { 2544 uint16_t proto; 2545 VirtioNetRscChain *chain; 2546 struct eth_header *eth; 2547 VirtIONet *n; 2548 2549 n = qemu_get_nic_opaque(nc); 2550 if (size < (n->host_hdr_len + sizeof(struct eth_header))) { 2551 return virtio_net_do_receive(nc, buf, size); 2552 } 2553 2554 eth = (struct eth_header *)(buf + n->guest_hdr_len); 2555 proto = htons(eth->h_proto); 2556 2557 chain = virtio_net_rsc_lookup_chain(n, nc, proto); 2558 if (chain) { 2559 chain->stat.received++; 2560 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) { 2561 return virtio_net_rsc_receive4(chain, nc, buf, size); 2562 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) { 2563 return virtio_net_rsc_receive6(chain, nc, buf, size); 2564 } 2565 } 2566 return virtio_net_do_receive(nc, buf, size); 2567 } 2568 2569 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, 2570 size_t size) 2571 { 2572 VirtIONet *n = qemu_get_nic_opaque(nc); 2573 if ((n->rsc4_enabled || n->rsc6_enabled)) { 2574 return virtio_net_rsc_receive(nc, buf, size); 2575 } else { 2576 return virtio_net_do_receive(nc, buf, size); 2577 } 2578 } 2579 2580 static int32_t virtio_net_flush_tx(VirtIONetQueue *q); 2581 2582 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) 2583 { 2584 VirtIONet *n = qemu_get_nic_opaque(nc); 2585 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 2586 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2587 int ret; 2588 2589 virtqueue_push(q->tx_vq, q->async_tx.elem, 0); 2590 virtio_notify(vdev, q->tx_vq); 2591 2592 g_free(q->async_tx.elem); 2593 q->async_tx.elem = NULL; 2594 2595 virtio_queue_set_notification(q->tx_vq, 1); 2596 ret = virtio_net_flush_tx(q); 2597 if (ret >= n->tx_burst) { 2598 /* 2599 * the flush has been stopped by tx_burst 2600 * we will not receive notification for the 2601 * remainining part, so re-schedule 2602 */ 2603 virtio_queue_set_notification(q->tx_vq, 0); 2604 if (q->tx_bh) { 2605 qemu_bh_schedule(q->tx_bh); 2606 } else { 2607 timer_mod(q->tx_timer, 2608 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2609 } 2610 q->tx_waiting = 1; 2611 } 2612 } 2613 2614 /* TX */ 2615 static int32_t virtio_net_flush_tx(VirtIONetQueue *q) 2616 { 2617 VirtIONet *n = q->n; 2618 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2619 VirtQueueElement *elem; 2620 int32_t num_packets = 0; 2621 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq)); 2622 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2623 return num_packets; 2624 } 2625 2626 if (q->async_tx.elem) { 2627 virtio_queue_set_notification(q->tx_vq, 0); 2628 return num_packets; 2629 } 2630 2631 for (;;) { 2632 ssize_t ret; 2633 unsigned int out_num; 2634 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg; 2635 struct virtio_net_hdr_mrg_rxbuf mhdr; 2636 2637 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement)); 2638 if (!elem) { 2639 break; 2640 } 2641 2642 out_num = elem->out_num; 2643 out_sg = elem->out_sg; 2644 if (out_num < 1) { 2645 virtio_error(vdev, "virtio-net header not in first element"); 2646 virtqueue_detach_element(q->tx_vq, elem, 0); 2647 g_free(elem); 2648 return -EINVAL; 2649 } 2650 2651 if (n->has_vnet_hdr) { 2652 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) < 2653 n->guest_hdr_len) { 2654 virtio_error(vdev, "virtio-net header incorrect"); 2655 virtqueue_detach_element(q->tx_vq, elem, 0); 2656 g_free(elem); 2657 return -EINVAL; 2658 } 2659 if (n->needs_vnet_hdr_swap) { 2660 virtio_net_hdr_swap(vdev, (void *) &mhdr); 2661 sg2[0].iov_base = &mhdr; 2662 sg2[0].iov_len = n->guest_hdr_len; 2663 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, 2664 out_sg, out_num, 2665 n->guest_hdr_len, -1); 2666 if (out_num == VIRTQUEUE_MAX_SIZE) { 2667 goto drop; 2668 } 2669 out_num += 1; 2670 out_sg = sg2; 2671 } 2672 } 2673 /* 2674 * If host wants to see the guest header as is, we can 2675 * pass it on unchanged. Otherwise, copy just the parts 2676 * that host is interested in. 2677 */ 2678 assert(n->host_hdr_len <= n->guest_hdr_len); 2679 if (n->host_hdr_len != n->guest_hdr_len) { 2680 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg), 2681 out_sg, out_num, 2682 0, n->host_hdr_len); 2683 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num, 2684 out_sg, out_num, 2685 n->guest_hdr_len, -1); 2686 out_num = sg_num; 2687 out_sg = sg; 2688 } 2689 2690 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index), 2691 out_sg, out_num, virtio_net_tx_complete); 2692 if (ret == 0) { 2693 virtio_queue_set_notification(q->tx_vq, 0); 2694 q->async_tx.elem = elem; 2695 return -EBUSY; 2696 } 2697 2698 drop: 2699 virtqueue_push(q->tx_vq, elem, 0); 2700 virtio_notify(vdev, q->tx_vq); 2701 g_free(elem); 2702 2703 if (++num_packets >= n->tx_burst) { 2704 break; 2705 } 2706 } 2707 return num_packets; 2708 } 2709 2710 static void virtio_net_tx_timer(void *opaque); 2711 2712 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) 2713 { 2714 VirtIONet *n = VIRTIO_NET(vdev); 2715 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2716 2717 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2718 virtio_net_drop_tx_queue_data(vdev, vq); 2719 return; 2720 } 2721 2722 /* This happens when device was stopped but VCPU wasn't. */ 2723 if (!vdev->vm_running) { 2724 q->tx_waiting = 1; 2725 return; 2726 } 2727 2728 if (q->tx_waiting) { 2729 /* We already have queued packets, immediately flush */ 2730 timer_del(q->tx_timer); 2731 virtio_net_tx_timer(q); 2732 } else { 2733 /* re-arm timer to flush it (and more) on next tick */ 2734 timer_mod(q->tx_timer, 2735 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2736 q->tx_waiting = 1; 2737 virtio_queue_set_notification(vq, 0); 2738 } 2739 } 2740 2741 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) 2742 { 2743 VirtIONet *n = VIRTIO_NET(vdev); 2744 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2745 2746 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2747 virtio_net_drop_tx_queue_data(vdev, vq); 2748 return; 2749 } 2750 2751 if (unlikely(q->tx_waiting)) { 2752 return; 2753 } 2754 q->tx_waiting = 1; 2755 /* This happens when device was stopped but VCPU wasn't. */ 2756 if (!vdev->vm_running) { 2757 return; 2758 } 2759 virtio_queue_set_notification(vq, 0); 2760 qemu_bh_schedule(q->tx_bh); 2761 } 2762 2763 static void virtio_net_tx_timer(void *opaque) 2764 { 2765 VirtIONetQueue *q = opaque; 2766 VirtIONet *n = q->n; 2767 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2768 int ret; 2769 2770 /* This happens when device was stopped but BH wasn't. */ 2771 if (!vdev->vm_running) { 2772 /* Make sure tx waiting is set, so we'll run when restarted. */ 2773 assert(q->tx_waiting); 2774 return; 2775 } 2776 2777 q->tx_waiting = 0; 2778 2779 /* Just in case the driver is not ready on more */ 2780 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2781 return; 2782 } 2783 2784 ret = virtio_net_flush_tx(q); 2785 if (ret == -EBUSY || ret == -EINVAL) { 2786 return; 2787 } 2788 /* 2789 * If we flush a full burst of packets, assume there are 2790 * more coming and immediately rearm 2791 */ 2792 if (ret >= n->tx_burst) { 2793 q->tx_waiting = 1; 2794 timer_mod(q->tx_timer, 2795 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2796 return; 2797 } 2798 /* 2799 * If less than a full burst, re-enable notification and flush 2800 * anything that may have come in while we weren't looking. If 2801 * we find something, assume the guest is still active and rearm 2802 */ 2803 virtio_queue_set_notification(q->tx_vq, 1); 2804 ret = virtio_net_flush_tx(q); 2805 if (ret > 0) { 2806 virtio_queue_set_notification(q->tx_vq, 0); 2807 q->tx_waiting = 1; 2808 timer_mod(q->tx_timer, 2809 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2810 } 2811 } 2812 2813 static void virtio_net_tx_bh(void *opaque) 2814 { 2815 VirtIONetQueue *q = opaque; 2816 VirtIONet *n = q->n; 2817 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2818 int32_t ret; 2819 2820 /* This happens when device was stopped but BH wasn't. */ 2821 if (!vdev->vm_running) { 2822 /* Make sure tx waiting is set, so we'll run when restarted. */ 2823 assert(q->tx_waiting); 2824 return; 2825 } 2826 2827 q->tx_waiting = 0; 2828 2829 /* Just in case the driver is not ready on more */ 2830 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) { 2831 return; 2832 } 2833 2834 ret = virtio_net_flush_tx(q); 2835 if (ret == -EBUSY || ret == -EINVAL) { 2836 return; /* Notification re-enable handled by tx_complete or device 2837 * broken */ 2838 } 2839 2840 /* If we flush a full burst of packets, assume there are 2841 * more coming and immediately reschedule */ 2842 if (ret >= n->tx_burst) { 2843 qemu_bh_schedule(q->tx_bh); 2844 q->tx_waiting = 1; 2845 return; 2846 } 2847 2848 /* If less than a full burst, re-enable notification and flush 2849 * anything that may have come in while we weren't looking. If 2850 * we find something, assume the guest is still active and reschedule */ 2851 virtio_queue_set_notification(q->tx_vq, 1); 2852 ret = virtio_net_flush_tx(q); 2853 if (ret == -EINVAL) { 2854 return; 2855 } else if (ret > 0) { 2856 virtio_queue_set_notification(q->tx_vq, 0); 2857 qemu_bh_schedule(q->tx_bh); 2858 q->tx_waiting = 1; 2859 } 2860 } 2861 2862 static void virtio_net_add_queue(VirtIONet *n, int index) 2863 { 2864 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2865 2866 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size, 2867 virtio_net_handle_rx); 2868 2869 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { 2870 n->vqs[index].tx_vq = 2871 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2872 virtio_net_handle_tx_timer); 2873 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 2874 virtio_net_tx_timer, 2875 &n->vqs[index]); 2876 } else { 2877 n->vqs[index].tx_vq = 2878 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2879 virtio_net_handle_tx_bh); 2880 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]); 2881 } 2882 2883 n->vqs[index].tx_waiting = 0; 2884 n->vqs[index].n = n; 2885 } 2886 2887 static void virtio_net_del_queue(VirtIONet *n, int index) 2888 { 2889 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2890 VirtIONetQueue *q = &n->vqs[index]; 2891 NetClientState *nc = qemu_get_subqueue(n->nic, index); 2892 2893 qemu_purge_queued_packets(nc); 2894 2895 virtio_del_queue(vdev, index * 2); 2896 if (q->tx_timer) { 2897 timer_free(q->tx_timer); 2898 q->tx_timer = NULL; 2899 } else { 2900 qemu_bh_delete(q->tx_bh); 2901 q->tx_bh = NULL; 2902 } 2903 q->tx_waiting = 0; 2904 virtio_del_queue(vdev, index * 2 + 1); 2905 } 2906 2907 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs) 2908 { 2909 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2910 int old_num_queues = virtio_get_num_queues(vdev); 2911 int new_num_queues = new_max_queue_pairs * 2 + 1; 2912 int i; 2913 2914 assert(old_num_queues >= 3); 2915 assert(old_num_queues % 2 == 1); 2916 2917 if (old_num_queues == new_num_queues) { 2918 return; 2919 } 2920 2921 /* 2922 * We always need to remove and add ctrl vq if 2923 * old_num_queues != new_num_queues. Remove ctrl_vq first, 2924 * and then we only enter one of the following two loops. 2925 */ 2926 virtio_del_queue(vdev, old_num_queues - 1); 2927 2928 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) { 2929 /* new_num_queues < old_num_queues */ 2930 virtio_net_del_queue(n, i / 2); 2931 } 2932 2933 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) { 2934 /* new_num_queues > old_num_queues */ 2935 virtio_net_add_queue(n, i / 2); 2936 } 2937 2938 /* add ctrl_vq last */ 2939 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 2940 } 2941 2942 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) 2943 { 2944 int max = multiqueue ? n->max_queue_pairs : 1; 2945 2946 n->multiqueue = multiqueue; 2947 virtio_net_change_num_queue_pairs(n, max); 2948 2949 virtio_net_set_queue_pairs(n); 2950 } 2951 2952 static int virtio_net_post_load_device(void *opaque, int version_id) 2953 { 2954 VirtIONet *n = opaque; 2955 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2956 int i, link_down; 2957 2958 trace_virtio_net_post_load_device(); 2959 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs, 2960 virtio_vdev_has_feature(vdev, 2961 VIRTIO_F_VERSION_1), 2962 virtio_vdev_has_feature(vdev, 2963 VIRTIO_NET_F_HASH_REPORT)); 2964 2965 /* MAC_TABLE_ENTRIES may be different from the saved image */ 2966 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) { 2967 n->mac_table.in_use = 0; 2968 } 2969 2970 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 2971 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n); 2972 } 2973 2974 /* 2975 * curr_guest_offloads will be later overwritten by the 2976 * virtio_set_features_nocheck call done from the virtio_load. 2977 * Here we make sure it is preserved and restored accordingly 2978 * in the virtio_net_post_load_virtio callback. 2979 */ 2980 n->saved_guest_offloads = n->curr_guest_offloads; 2981 2982 virtio_net_set_queue_pairs(n); 2983 2984 /* Find the first multicast entry in the saved MAC filter */ 2985 for (i = 0; i < n->mac_table.in_use; i++) { 2986 if (n->mac_table.macs[i * ETH_ALEN] & 1) { 2987 break; 2988 } 2989 } 2990 n->mac_table.first_multi = i; 2991 2992 /* nc.link_down can't be migrated, so infer link_down according 2993 * to link status bit in n->status */ 2994 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0; 2995 for (i = 0; i < n->max_queue_pairs; i++) { 2996 qemu_get_subqueue(n->nic, i)->link_down = link_down; 2997 } 2998 2999 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 3000 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3001 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 3002 QEMU_CLOCK_VIRTUAL, 3003 virtio_net_announce_timer, n); 3004 if (n->announce_timer.round) { 3005 timer_mod(n->announce_timer.tm, 3006 qemu_clock_get_ms(n->announce_timer.type)); 3007 } else { 3008 qemu_announce_timer_del(&n->announce_timer, false); 3009 } 3010 } 3011 3012 if (n->rss_data.enabled) { 3013 n->rss_data.enabled_software_rss = n->rss_data.populate_hash; 3014 if (!n->rss_data.populate_hash) { 3015 if (!virtio_net_attach_epbf_rss(n)) { 3016 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 3017 warn_report("Can't post-load eBPF RSS for vhost"); 3018 } else { 3019 warn_report("Can't post-load eBPF RSS - " 3020 "fallback to software RSS"); 3021 n->rss_data.enabled_software_rss = true; 3022 } 3023 } 3024 } 3025 3026 trace_virtio_net_rss_enable(n->rss_data.hash_types, 3027 n->rss_data.indirections_len, 3028 sizeof(n->rss_data.key)); 3029 } else { 3030 trace_virtio_net_rss_disable(); 3031 } 3032 return 0; 3033 } 3034 3035 static int virtio_net_post_load_virtio(VirtIODevice *vdev) 3036 { 3037 VirtIONet *n = VIRTIO_NET(vdev); 3038 /* 3039 * The actual needed state is now in saved_guest_offloads, 3040 * see virtio_net_post_load_device for detail. 3041 * Restore it back and apply the desired offloads. 3042 */ 3043 n->curr_guest_offloads = n->saved_guest_offloads; 3044 if (peer_has_vnet_hdr(n)) { 3045 virtio_net_apply_guest_offloads(n); 3046 } 3047 3048 return 0; 3049 } 3050 3051 /* tx_waiting field of a VirtIONetQueue */ 3052 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = { 3053 .name = "virtio-net-queue-tx_waiting", 3054 .fields = (VMStateField[]) { 3055 VMSTATE_UINT32(tx_waiting, VirtIONetQueue), 3056 VMSTATE_END_OF_LIST() 3057 }, 3058 }; 3059 3060 static bool max_queue_pairs_gt_1(void *opaque, int version_id) 3061 { 3062 return VIRTIO_NET(opaque)->max_queue_pairs > 1; 3063 } 3064 3065 static bool has_ctrl_guest_offloads(void *opaque, int version_id) 3066 { 3067 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque), 3068 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 3069 } 3070 3071 static bool mac_table_fits(void *opaque, int version_id) 3072 { 3073 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES; 3074 } 3075 3076 static bool mac_table_doesnt_fit(void *opaque, int version_id) 3077 { 3078 return !mac_table_fits(opaque, version_id); 3079 } 3080 3081 /* This temporary type is shared by all the WITH_TMP methods 3082 * although only some fields are used by each. 3083 */ 3084 struct VirtIONetMigTmp { 3085 VirtIONet *parent; 3086 VirtIONetQueue *vqs_1; 3087 uint16_t curr_queue_pairs_1; 3088 uint8_t has_ufo; 3089 uint32_t has_vnet_hdr; 3090 }; 3091 3092 /* The 2nd and subsequent tx_waiting flags are loaded later than 3093 * the 1st entry in the queue_pairs and only if there's more than one 3094 * entry. We use the tmp mechanism to calculate a temporary 3095 * pointer and count and also validate the count. 3096 */ 3097 3098 static int virtio_net_tx_waiting_pre_save(void *opaque) 3099 { 3100 struct VirtIONetMigTmp *tmp = opaque; 3101 3102 tmp->vqs_1 = tmp->parent->vqs + 1; 3103 tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1; 3104 if (tmp->parent->curr_queue_pairs == 0) { 3105 tmp->curr_queue_pairs_1 = 0; 3106 } 3107 3108 return 0; 3109 } 3110 3111 static int virtio_net_tx_waiting_pre_load(void *opaque) 3112 { 3113 struct VirtIONetMigTmp *tmp = opaque; 3114 3115 /* Reuse the pointer setup from save */ 3116 virtio_net_tx_waiting_pre_save(opaque); 3117 3118 if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) { 3119 error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x", 3120 tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs); 3121 3122 return -EINVAL; 3123 } 3124 3125 return 0; /* all good */ 3126 } 3127 3128 static const VMStateDescription vmstate_virtio_net_tx_waiting = { 3129 .name = "virtio-net-tx_waiting", 3130 .pre_load = virtio_net_tx_waiting_pre_load, 3131 .pre_save = virtio_net_tx_waiting_pre_save, 3132 .fields = (VMStateField[]) { 3133 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp, 3134 curr_queue_pairs_1, 3135 vmstate_virtio_net_queue_tx_waiting, 3136 struct VirtIONetQueue), 3137 VMSTATE_END_OF_LIST() 3138 }, 3139 }; 3140 3141 /* the 'has_ufo' flag is just tested; if the incoming stream has the 3142 * flag set we need to check that we have it 3143 */ 3144 static int virtio_net_ufo_post_load(void *opaque, int version_id) 3145 { 3146 struct VirtIONetMigTmp *tmp = opaque; 3147 3148 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) { 3149 error_report("virtio-net: saved image requires TUN_F_UFO support"); 3150 return -EINVAL; 3151 } 3152 3153 return 0; 3154 } 3155 3156 static int virtio_net_ufo_pre_save(void *opaque) 3157 { 3158 struct VirtIONetMigTmp *tmp = opaque; 3159 3160 tmp->has_ufo = tmp->parent->has_ufo; 3161 3162 return 0; 3163 } 3164 3165 static const VMStateDescription vmstate_virtio_net_has_ufo = { 3166 .name = "virtio-net-ufo", 3167 .post_load = virtio_net_ufo_post_load, 3168 .pre_save = virtio_net_ufo_pre_save, 3169 .fields = (VMStateField[]) { 3170 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp), 3171 VMSTATE_END_OF_LIST() 3172 }, 3173 }; 3174 3175 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the 3176 * flag set we need to check that we have it 3177 */ 3178 static int virtio_net_vnet_post_load(void *opaque, int version_id) 3179 { 3180 struct VirtIONetMigTmp *tmp = opaque; 3181 3182 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) { 3183 error_report("virtio-net: saved image requires vnet_hdr=on"); 3184 return -EINVAL; 3185 } 3186 3187 return 0; 3188 } 3189 3190 static int virtio_net_vnet_pre_save(void *opaque) 3191 { 3192 struct VirtIONetMigTmp *tmp = opaque; 3193 3194 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr; 3195 3196 return 0; 3197 } 3198 3199 static const VMStateDescription vmstate_virtio_net_has_vnet = { 3200 .name = "virtio-net-vnet", 3201 .post_load = virtio_net_vnet_post_load, 3202 .pre_save = virtio_net_vnet_pre_save, 3203 .fields = (VMStateField[]) { 3204 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp), 3205 VMSTATE_END_OF_LIST() 3206 }, 3207 }; 3208 3209 static bool virtio_net_rss_needed(void *opaque) 3210 { 3211 return VIRTIO_NET(opaque)->rss_data.enabled; 3212 } 3213 3214 static const VMStateDescription vmstate_virtio_net_rss = { 3215 .name = "virtio-net-device/rss", 3216 .version_id = 1, 3217 .minimum_version_id = 1, 3218 .needed = virtio_net_rss_needed, 3219 .fields = (VMStateField[]) { 3220 VMSTATE_BOOL(rss_data.enabled, VirtIONet), 3221 VMSTATE_BOOL(rss_data.redirect, VirtIONet), 3222 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet), 3223 VMSTATE_UINT32(rss_data.hash_types, VirtIONet), 3224 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet), 3225 VMSTATE_UINT16(rss_data.default_queue, VirtIONet), 3226 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet, 3227 VIRTIO_NET_RSS_MAX_KEY_SIZE), 3228 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet, 3229 rss_data.indirections_len, 0, 3230 vmstate_info_uint16, uint16_t), 3231 VMSTATE_END_OF_LIST() 3232 }, 3233 }; 3234 3235 static const VMStateDescription vmstate_virtio_net_device = { 3236 .name = "virtio-net-device", 3237 .version_id = VIRTIO_NET_VM_VERSION, 3238 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3239 .post_load = virtio_net_post_load_device, 3240 .fields = (VMStateField[]) { 3241 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN), 3242 VMSTATE_STRUCT_POINTER(vqs, VirtIONet, 3243 vmstate_virtio_net_queue_tx_waiting, 3244 VirtIONetQueue), 3245 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet), 3246 VMSTATE_UINT16(status, VirtIONet), 3247 VMSTATE_UINT8(promisc, VirtIONet), 3248 VMSTATE_UINT8(allmulti, VirtIONet), 3249 VMSTATE_UINT32(mac_table.in_use, VirtIONet), 3250 3251 /* Guarded pair: If it fits we load it, else we throw it away 3252 * - can happen if source has a larger MAC table.; post-load 3253 * sets flags in this case. 3254 */ 3255 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet, 3256 0, mac_table_fits, mac_table.in_use, 3257 ETH_ALEN), 3258 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0, 3259 mac_table.in_use, ETH_ALEN), 3260 3261 /* Note: This is an array of uint32's that's always been saved as a 3262 * buffer; hold onto your endiannesses; it's actually used as a bitmap 3263 * but based on the uint. 3264 */ 3265 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3), 3266 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3267 vmstate_virtio_net_has_vnet), 3268 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet), 3269 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet), 3270 VMSTATE_UINT8(alluni, VirtIONet), 3271 VMSTATE_UINT8(nomulti, VirtIONet), 3272 VMSTATE_UINT8(nouni, VirtIONet), 3273 VMSTATE_UINT8(nobcast, VirtIONet), 3274 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3275 vmstate_virtio_net_has_ufo), 3276 VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0, 3277 vmstate_info_uint16_equal, uint16_t), 3278 VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1), 3279 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3280 vmstate_virtio_net_tx_waiting), 3281 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet, 3282 has_ctrl_guest_offloads), 3283 VMSTATE_END_OF_LIST() 3284 }, 3285 .subsections = (const VMStateDescription * []) { 3286 &vmstate_virtio_net_rss, 3287 NULL 3288 } 3289 }; 3290 3291 static NetClientInfo net_virtio_info = { 3292 .type = NET_CLIENT_DRIVER_NIC, 3293 .size = sizeof(NICState), 3294 .can_receive = virtio_net_can_receive, 3295 .receive = virtio_net_receive, 3296 .link_status_changed = virtio_net_set_link_status, 3297 .query_rx_filter = virtio_net_query_rxfilter, 3298 .announce = virtio_net_announce, 3299 }; 3300 3301 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) 3302 { 3303 VirtIONet *n = VIRTIO_NET(vdev); 3304 NetClientState *nc; 3305 assert(n->vhost_started); 3306 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { 3307 /* Must guard against invalid features and bogus queue index 3308 * from being set by malicious guest, or penetrated through 3309 * buggy migration stream. 3310 */ 3311 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3312 qemu_log_mask(LOG_GUEST_ERROR, 3313 "%s: bogus vq index ignored\n", __func__); 3314 return false; 3315 } 3316 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3317 } else { 3318 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3319 } 3320 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); 3321 } 3322 3323 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, 3324 bool mask) 3325 { 3326 VirtIONet *n = VIRTIO_NET(vdev); 3327 NetClientState *nc; 3328 assert(n->vhost_started); 3329 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { 3330 /* Must guard against invalid features and bogus queue index 3331 * from being set by malicious guest, or penetrated through 3332 * buggy migration stream. 3333 */ 3334 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3335 qemu_log_mask(LOG_GUEST_ERROR, 3336 "%s: bogus vq index ignored\n", __func__); 3337 return; 3338 } 3339 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3340 } else { 3341 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3342 } 3343 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), 3344 vdev, idx, mask); 3345 } 3346 3347 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) 3348 { 3349 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC); 3350 3351 n->config_size = virtio_get_config_size(&cfg_size_params, host_features); 3352 } 3353 3354 void virtio_net_set_netclient_name(VirtIONet *n, const char *name, 3355 const char *type) 3356 { 3357 /* 3358 * The name can be NULL, the netclient name will be type.x. 3359 */ 3360 assert(type != NULL); 3361 3362 g_free(n->netclient_name); 3363 g_free(n->netclient_type); 3364 n->netclient_name = g_strdup(name); 3365 n->netclient_type = g_strdup(type); 3366 } 3367 3368 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev) 3369 { 3370 HotplugHandler *hotplug_ctrl; 3371 PCIDevice *pci_dev; 3372 Error *err = NULL; 3373 3374 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3375 if (hotplug_ctrl) { 3376 pci_dev = PCI_DEVICE(dev); 3377 pci_dev->partially_hotplugged = true; 3378 hotplug_handler_unplug_request(hotplug_ctrl, dev, &err); 3379 if (err) { 3380 error_report_err(err); 3381 return false; 3382 } 3383 } else { 3384 return false; 3385 } 3386 return true; 3387 } 3388 3389 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev, 3390 Error **errp) 3391 { 3392 Error *err = NULL; 3393 HotplugHandler *hotplug_ctrl; 3394 PCIDevice *pdev = PCI_DEVICE(dev); 3395 BusState *primary_bus; 3396 3397 if (!pdev->partially_hotplugged) { 3398 return true; 3399 } 3400 primary_bus = dev->parent_bus; 3401 if (!primary_bus) { 3402 error_setg(errp, "virtio_net: couldn't find primary bus"); 3403 return false; 3404 } 3405 qdev_set_parent_bus(dev, primary_bus, &error_abort); 3406 qatomic_set(&n->failover_primary_hidden, false); 3407 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3408 if (hotplug_ctrl) { 3409 hotplug_handler_pre_plug(hotplug_ctrl, dev, &err); 3410 if (err) { 3411 goto out; 3412 } 3413 hotplug_handler_plug(hotplug_ctrl, dev, &err); 3414 } 3415 pdev->partially_hotplugged = false; 3416 3417 out: 3418 error_propagate(errp, err); 3419 return !err; 3420 } 3421 3422 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s) 3423 { 3424 bool should_be_hidden; 3425 Error *err = NULL; 3426 DeviceState *dev = failover_find_primary_device(n); 3427 3428 if (!dev) { 3429 return; 3430 } 3431 3432 should_be_hidden = qatomic_read(&n->failover_primary_hidden); 3433 3434 if (migration_in_setup(s) && !should_be_hidden) { 3435 if (failover_unplug_primary(n, dev)) { 3436 vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev); 3437 qapi_event_send_unplug_primary(dev->id); 3438 qatomic_set(&n->failover_primary_hidden, true); 3439 } else { 3440 warn_report("couldn't unplug primary device"); 3441 } 3442 } else if (migration_has_failed(s)) { 3443 /* We already unplugged the device let's plug it back */ 3444 if (!failover_replug_primary(n, dev, &err)) { 3445 if (err) { 3446 error_report_err(err); 3447 } 3448 } 3449 } 3450 } 3451 3452 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data) 3453 { 3454 MigrationState *s = data; 3455 VirtIONet *n = container_of(notifier, VirtIONet, migration_state); 3456 virtio_net_handle_migration_primary(n, s); 3457 } 3458 3459 static bool failover_hide_primary_device(DeviceListener *listener, 3460 const QDict *device_opts, 3461 bool from_json, 3462 Error **errp) 3463 { 3464 VirtIONet *n = container_of(listener, VirtIONet, primary_listener); 3465 const char *standby_id; 3466 3467 if (!device_opts) { 3468 return false; 3469 } 3470 3471 if (!qdict_haskey(device_opts, "failover_pair_id")) { 3472 return false; 3473 } 3474 3475 if (!qdict_haskey(device_opts, "id")) { 3476 error_setg(errp, "Device with failover_pair_id needs to have id"); 3477 return false; 3478 } 3479 3480 standby_id = qdict_get_str(device_opts, "failover_pair_id"); 3481 if (g_strcmp0(standby_id, n->netclient_name) != 0) { 3482 return false; 3483 } 3484 3485 /* 3486 * The hide helper can be called several times for a given device. 3487 * Check there is only one primary for a virtio-net device but 3488 * don't duplicate the qdict several times if it's called for the same 3489 * device. 3490 */ 3491 if (n->primary_opts) { 3492 const char *old, *new; 3493 /* devices with failover_pair_id always have an id */ 3494 old = qdict_get_str(n->primary_opts, "id"); 3495 new = qdict_get_str(device_opts, "id"); 3496 if (strcmp(old, new) != 0) { 3497 error_setg(errp, "Cannot attach more than one primary device to " 3498 "'%s': '%s' and '%s'", n->netclient_name, old, new); 3499 return false; 3500 } 3501 } else { 3502 n->primary_opts = qdict_clone_shallow(device_opts); 3503 n->primary_opts_from_json = from_json; 3504 } 3505 3506 /* failover_primary_hidden is set during feature negotiation */ 3507 return qatomic_read(&n->failover_primary_hidden); 3508 } 3509 3510 static void virtio_net_device_realize(DeviceState *dev, Error **errp) 3511 { 3512 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3513 VirtIONet *n = VIRTIO_NET(dev); 3514 NetClientState *nc; 3515 int i; 3516 3517 if (n->net_conf.mtu) { 3518 n->host_features |= (1ULL << VIRTIO_NET_F_MTU); 3519 } 3520 3521 if (n->net_conf.duplex_str) { 3522 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) { 3523 n->net_conf.duplex = DUPLEX_HALF; 3524 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) { 3525 n->net_conf.duplex = DUPLEX_FULL; 3526 } else { 3527 error_setg(errp, "'duplex' must be 'half' or 'full'"); 3528 return; 3529 } 3530 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3531 } else { 3532 n->net_conf.duplex = DUPLEX_UNKNOWN; 3533 } 3534 3535 if (n->net_conf.speed < SPEED_UNKNOWN) { 3536 error_setg(errp, "'speed' must be between 0 and INT_MAX"); 3537 return; 3538 } 3539 if (n->net_conf.speed >= 0) { 3540 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3541 } 3542 3543 if (n->failover) { 3544 n->primary_listener.hide_device = failover_hide_primary_device; 3545 qatomic_set(&n->failover_primary_hidden, true); 3546 device_listener_register(&n->primary_listener); 3547 n->migration_state.notify = virtio_net_migration_state_notifier; 3548 add_migration_state_change_notifier(&n->migration_state); 3549 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY); 3550 } 3551 3552 virtio_net_set_config_size(n, n->host_features); 3553 virtio_init(vdev, VIRTIO_ID_NET, n->config_size); 3554 3555 /* 3556 * We set a lower limit on RX queue size to what it always was. 3557 * Guests that want a smaller ring can always resize it without 3558 * help from us (using virtio 1 and up). 3559 */ 3560 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || 3561 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || 3562 !is_power_of_2(n->net_conf.rx_queue_size)) { 3563 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " 3564 "must be a power of 2 between %d and %d.", 3565 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, 3566 VIRTQUEUE_MAX_SIZE); 3567 virtio_cleanup(vdev); 3568 return; 3569 } 3570 3571 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || 3572 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE || 3573 !is_power_of_2(n->net_conf.tx_queue_size)) { 3574 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " 3575 "must be a power of 2 between %d and %d", 3576 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, 3577 VIRTQUEUE_MAX_SIZE); 3578 virtio_cleanup(vdev); 3579 return; 3580 } 3581 3582 n->max_ncs = MAX(n->nic_conf.peers.queues, 1); 3583 3584 /* 3585 * Figure out the datapath queue pairs since the backend could 3586 * provide control queue via peers as well. 3587 */ 3588 if (n->nic_conf.peers.queues) { 3589 for (i = 0; i < n->max_ncs; i++) { 3590 if (n->nic_conf.peers.ncs[i]->is_datapath) { 3591 ++n->max_queue_pairs; 3592 } 3593 } 3594 } 3595 n->max_queue_pairs = MAX(n->max_queue_pairs, 1); 3596 3597 if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) { 3598 error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), " 3599 "must be a positive integer less than %d.", 3600 n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2); 3601 virtio_cleanup(vdev); 3602 return; 3603 } 3604 n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs); 3605 n->curr_queue_pairs = 1; 3606 n->tx_timeout = n->net_conf.txtimer; 3607 3608 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer") 3609 && strcmp(n->net_conf.tx, "bh")) { 3610 warn_report("virtio-net: " 3611 "Unknown option tx=%s, valid options: \"timer\" \"bh\"", 3612 n->net_conf.tx); 3613 error_printf("Defaulting to \"bh\""); 3614 } 3615 3616 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), 3617 n->net_conf.tx_queue_size); 3618 3619 for (i = 0; i < n->max_queue_pairs; i++) { 3620 virtio_net_add_queue(n, i); 3621 } 3622 3623 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 3624 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr); 3625 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac)); 3626 n->status = VIRTIO_NET_S_LINK_UP; 3627 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 3628 QEMU_CLOCK_VIRTUAL, 3629 virtio_net_announce_timer, n); 3630 n->announce_timer.round = 0; 3631 3632 if (n->netclient_type) { 3633 /* 3634 * Happen when virtio_net_set_netclient_name has been called. 3635 */ 3636 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3637 n->netclient_type, n->netclient_name, n); 3638 } else { 3639 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3640 object_get_typename(OBJECT(dev)), dev->id, n); 3641 } 3642 3643 for (i = 0; i < n->max_queue_pairs; i++) { 3644 n->nic->ncs[i].do_not_pad = true; 3645 } 3646 3647 peer_test_vnet_hdr(n); 3648 if (peer_has_vnet_hdr(n)) { 3649 for (i = 0; i < n->max_queue_pairs; i++) { 3650 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true); 3651 } 3652 n->host_hdr_len = sizeof(struct virtio_net_hdr); 3653 } else { 3654 n->host_hdr_len = 0; 3655 } 3656 3657 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a); 3658 3659 n->vqs[0].tx_waiting = 0; 3660 n->tx_burst = n->net_conf.txburst; 3661 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0); 3662 n->promisc = 1; /* for compatibility */ 3663 3664 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 3665 3666 n->vlans = g_malloc0(MAX_VLAN >> 3); 3667 3668 nc = qemu_get_queue(n->nic); 3669 nc->rxfilter_notify_enabled = 1; 3670 3671 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 3672 struct virtio_net_config netcfg = {}; 3673 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN); 3674 vhost_net_set_config(get_vhost_net(nc->peer), 3675 (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER); 3676 } 3677 QTAILQ_INIT(&n->rsc_chains); 3678 n->qdev = dev; 3679 3680 net_rx_pkt_init(&n->rx_pkt, false); 3681 3682 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3683 virtio_net_load_ebpf(n); 3684 } 3685 } 3686 3687 static void virtio_net_device_unrealize(DeviceState *dev) 3688 { 3689 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3690 VirtIONet *n = VIRTIO_NET(dev); 3691 int i, max_queue_pairs; 3692 3693 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3694 virtio_net_unload_ebpf(n); 3695 } 3696 3697 /* This will stop vhost backend if appropriate. */ 3698 virtio_net_set_status(vdev, 0); 3699 3700 g_free(n->netclient_name); 3701 n->netclient_name = NULL; 3702 g_free(n->netclient_type); 3703 n->netclient_type = NULL; 3704 3705 g_free(n->mac_table.macs); 3706 g_free(n->vlans); 3707 3708 if (n->failover) { 3709 qobject_unref(n->primary_opts); 3710 device_listener_unregister(&n->primary_listener); 3711 remove_migration_state_change_notifier(&n->migration_state); 3712 } else { 3713 assert(n->primary_opts == NULL); 3714 } 3715 3716 max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 3717 for (i = 0; i < max_queue_pairs; i++) { 3718 virtio_net_del_queue(n, i); 3719 } 3720 /* delete also control vq */ 3721 virtio_del_queue(vdev, max_queue_pairs * 2); 3722 qemu_announce_timer_del(&n->announce_timer, false); 3723 g_free(n->vqs); 3724 qemu_del_nic(n->nic); 3725 virtio_net_rsc_cleanup(n); 3726 g_free(n->rss_data.indirections_table); 3727 net_rx_pkt_uninit(n->rx_pkt); 3728 virtio_cleanup(vdev); 3729 } 3730 3731 static void virtio_net_instance_init(Object *obj) 3732 { 3733 VirtIONet *n = VIRTIO_NET(obj); 3734 3735 /* 3736 * The default config_size is sizeof(struct virtio_net_config). 3737 * Can be overriden with virtio_net_set_config_size. 3738 */ 3739 n->config_size = sizeof(struct virtio_net_config); 3740 device_add_bootindex_property(obj, &n->nic_conf.bootindex, 3741 "bootindex", "/ethernet-phy@0", 3742 DEVICE(n)); 3743 3744 ebpf_rss_init(&n->ebpf_rss); 3745 } 3746 3747 static int virtio_net_pre_save(void *opaque) 3748 { 3749 VirtIONet *n = opaque; 3750 3751 /* At this point, backend must be stopped, otherwise 3752 * it might keep writing to memory. */ 3753 assert(!n->vhost_started); 3754 3755 return 0; 3756 } 3757 3758 static bool primary_unplug_pending(void *opaque) 3759 { 3760 DeviceState *dev = opaque; 3761 DeviceState *primary; 3762 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3763 VirtIONet *n = VIRTIO_NET(vdev); 3764 3765 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 3766 return false; 3767 } 3768 primary = failover_find_primary_device(n); 3769 return primary ? primary->pending_deleted_event : false; 3770 } 3771 3772 static bool dev_unplug_pending(void *opaque) 3773 { 3774 DeviceState *dev = opaque; 3775 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 3776 3777 return vdc->primary_unplug_pending(dev); 3778 } 3779 3780 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev) 3781 { 3782 VirtIONet *n = VIRTIO_NET(vdev); 3783 NetClientState *nc = qemu_get_queue(n->nic); 3784 struct vhost_net *net = get_vhost_net(nc->peer); 3785 return &net->dev; 3786 } 3787 3788 static const VMStateDescription vmstate_virtio_net = { 3789 .name = "virtio-net", 3790 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3791 .version_id = VIRTIO_NET_VM_VERSION, 3792 .fields = (VMStateField[]) { 3793 VMSTATE_VIRTIO_DEVICE, 3794 VMSTATE_END_OF_LIST() 3795 }, 3796 .pre_save = virtio_net_pre_save, 3797 .dev_unplug_pending = dev_unplug_pending, 3798 }; 3799 3800 static Property virtio_net_properties[] = { 3801 DEFINE_PROP_BIT64("csum", VirtIONet, host_features, 3802 VIRTIO_NET_F_CSUM, true), 3803 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features, 3804 VIRTIO_NET_F_GUEST_CSUM, true), 3805 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), 3806 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features, 3807 VIRTIO_NET_F_GUEST_TSO4, true), 3808 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features, 3809 VIRTIO_NET_F_GUEST_TSO6, true), 3810 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features, 3811 VIRTIO_NET_F_GUEST_ECN, true), 3812 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features, 3813 VIRTIO_NET_F_GUEST_UFO, true), 3814 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features, 3815 VIRTIO_NET_F_GUEST_ANNOUNCE, true), 3816 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features, 3817 VIRTIO_NET_F_HOST_TSO4, true), 3818 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features, 3819 VIRTIO_NET_F_HOST_TSO6, true), 3820 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features, 3821 VIRTIO_NET_F_HOST_ECN, true), 3822 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features, 3823 VIRTIO_NET_F_HOST_UFO, true), 3824 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features, 3825 VIRTIO_NET_F_MRG_RXBUF, true), 3826 DEFINE_PROP_BIT64("status", VirtIONet, host_features, 3827 VIRTIO_NET_F_STATUS, true), 3828 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features, 3829 VIRTIO_NET_F_CTRL_VQ, true), 3830 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features, 3831 VIRTIO_NET_F_CTRL_RX, true), 3832 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features, 3833 VIRTIO_NET_F_CTRL_VLAN, true), 3834 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features, 3835 VIRTIO_NET_F_CTRL_RX_EXTRA, true), 3836 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features, 3837 VIRTIO_NET_F_CTRL_MAC_ADDR, true), 3838 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features, 3839 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), 3840 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), 3841 DEFINE_PROP_BIT64("rss", VirtIONet, host_features, 3842 VIRTIO_NET_F_RSS, false), 3843 DEFINE_PROP_BIT64("hash", VirtIONet, host_features, 3844 VIRTIO_NET_F_HASH_REPORT, false), 3845 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features, 3846 VIRTIO_NET_F_RSC_EXT, false), 3847 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout, 3848 VIRTIO_NET_RSC_DEFAULT_INTERVAL), 3849 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf), 3850 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer, 3851 TX_TIMER_INTERVAL), 3852 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), 3853 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), 3854 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, 3855 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), 3856 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, 3857 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), 3858 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), 3859 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, 3860 true), 3861 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN), 3862 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str), 3863 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false), 3864 DEFINE_PROP_END_OF_LIST(), 3865 }; 3866 3867 static void virtio_net_class_init(ObjectClass *klass, void *data) 3868 { 3869 DeviceClass *dc = DEVICE_CLASS(klass); 3870 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 3871 3872 device_class_set_props(dc, virtio_net_properties); 3873 dc->vmsd = &vmstate_virtio_net; 3874 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 3875 vdc->realize = virtio_net_device_realize; 3876 vdc->unrealize = virtio_net_device_unrealize; 3877 vdc->get_config = virtio_net_get_config; 3878 vdc->set_config = virtio_net_set_config; 3879 vdc->get_features = virtio_net_get_features; 3880 vdc->set_features = virtio_net_set_features; 3881 vdc->bad_features = virtio_net_bad_features; 3882 vdc->reset = virtio_net_reset; 3883 vdc->queue_reset = virtio_net_queue_reset; 3884 vdc->queue_enable = virtio_net_queue_enable; 3885 vdc->set_status = virtio_net_set_status; 3886 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; 3887 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; 3888 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO); 3889 vdc->post_load = virtio_net_post_load_virtio; 3890 vdc->vmsd = &vmstate_virtio_net_device; 3891 vdc->primary_unplug_pending = primary_unplug_pending; 3892 vdc->get_vhost = virtio_net_get_vhost; 3893 } 3894 3895 static const TypeInfo virtio_net_info = { 3896 .name = TYPE_VIRTIO_NET, 3897 .parent = TYPE_VIRTIO_DEVICE, 3898 .instance_size = sizeof(VirtIONet), 3899 .instance_init = virtio_net_instance_init, 3900 .class_init = virtio_net_class_init, 3901 }; 3902 3903 static void virtio_register_types(void) 3904 { 3905 type_register_static(&virtio_net_info); 3906 } 3907 3908 type_init(virtio_register_types) 3909