1 /* 2 * Virtio Network Device 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qemu/atomic.h" 16 #include "qemu/iov.h" 17 #include "qemu/log.h" 18 #include "qemu/main-loop.h" 19 #include "qemu/module.h" 20 #include "hw/virtio/virtio.h" 21 #include "net/net.h" 22 #include "net/checksum.h" 23 #include "net/tap.h" 24 #include "qemu/error-report.h" 25 #include "qemu/timer.h" 26 #include "qemu/option.h" 27 #include "qemu/option_int.h" 28 #include "qemu/config-file.h" 29 #include "qobject/qdict.h" 30 #include "hw/virtio/virtio-net.h" 31 #include "net/vhost_net.h" 32 #include "net/announce.h" 33 #include "hw/virtio/virtio-bus.h" 34 #include "qapi/error.h" 35 #include "qapi/qapi-events-net.h" 36 #include "hw/qdev-properties.h" 37 #include "qapi/qapi-types-migration.h" 38 #include "qapi/qapi-events-migration.h" 39 #include "hw/virtio/virtio-access.h" 40 #include "migration/misc.h" 41 #include "standard-headers/linux/ethtool.h" 42 #include "system/system.h" 43 #include "system/replay.h" 44 #include "trace.h" 45 #include "monitor/qdev.h" 46 #include "monitor/monitor.h" 47 #include "hw/pci/pci_device.h" 48 #include "net_rx_pkt.h" 49 #include "hw/virtio/vhost.h" 50 #include "system/qtest.h" 51 52 #define VIRTIO_NET_VM_VERSION 11 53 54 /* previously fixed value */ 55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 57 58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */ 59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 61 62 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ 63 64 #define VIRTIO_NET_TCP_FLAG 0x3F 65 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000 66 67 /* IPv4 max payload, 16 bits in the header */ 68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header)) 69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535 70 71 /* header length value in ip header without option */ 72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5 73 74 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ 75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD 76 77 /* Purge coalesced packets timer interval, This value affects the performance 78 a lot, and should be tuned carefully, '300000'(300us) is the recommended 79 value to pass the WHQL test, '50000' can gain 2x netperf throughput with 80 tso/gso/gro 'off'. */ 81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000 82 83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \ 84 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \ 85 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \ 86 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \ 87 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \ 88 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \ 89 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \ 90 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \ 91 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) 92 93 static const VirtIOFeature feature_sizes[] = { 94 {.flags = 1ULL << VIRTIO_NET_F_MAC, 95 .end = endof(struct virtio_net_config, mac)}, 96 {.flags = 1ULL << VIRTIO_NET_F_STATUS, 97 .end = endof(struct virtio_net_config, status)}, 98 {.flags = 1ULL << VIRTIO_NET_F_MQ, 99 .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, 100 {.flags = 1ULL << VIRTIO_NET_F_MTU, 101 .end = endof(struct virtio_net_config, mtu)}, 102 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, 103 .end = endof(struct virtio_net_config, duplex)}, 104 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT), 105 .end = endof(struct virtio_net_config, supported_hash_types)}, 106 {} 107 }; 108 109 static const VirtIOConfigSizeParams cfg_size_params = { 110 .min_size = endof(struct virtio_net_config, mac), 111 .max_size = sizeof(struct virtio_net_config), 112 .feature_sizes = feature_sizes 113 }; 114 115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc) 116 { 117 VirtIONet *n = qemu_get_nic_opaque(nc); 118 119 return &n->vqs[nc->queue_index]; 120 } 121 122 static int vq2q(int queue_index) 123 { 124 return queue_index / 2; 125 } 126 127 static void flush_or_purge_queued_packets(NetClientState *nc) 128 { 129 if (!nc->peer) { 130 return; 131 } 132 133 qemu_flush_or_purge_queued_packets(nc->peer, true); 134 assert(!virtio_net_get_subqueue(nc)->async_tx.elem); 135 } 136 137 /* TODO 138 * - we could suppress RX interrupt if we were so inclined. 139 */ 140 141 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) 142 { 143 VirtIONet *n = VIRTIO_NET(vdev); 144 struct virtio_net_config netcfg; 145 NetClientState *nc = qemu_get_queue(n->nic); 146 static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } }; 147 148 int ret = 0; 149 memset(&netcfg, 0 , sizeof(struct virtio_net_config)); 150 virtio_stw_p(vdev, &netcfg.status, n->status); 151 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs); 152 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); 153 memcpy(netcfg.mac, n->mac, ETH_ALEN); 154 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); 155 netcfg.duplex = n->net_conf.duplex; 156 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE; 157 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length, 158 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ? 159 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1); 160 virtio_stl_p(vdev, &netcfg.supported_hash_types, 161 n->rss_data.supported_hash_types); 162 memcpy(config, &netcfg, n->config_size); 163 164 /* 165 * Is this VDPA? No peer means not VDPA: there's no way to 166 * disconnect/reconnect a VDPA peer. 167 */ 168 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 169 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg, 170 n->config_size); 171 if (ret == -1) { 172 return; 173 } 174 175 /* 176 * Some NIC/kernel combinations present 0 as the mac address. As that 177 * is not a legal address, try to proceed with the address from the 178 * QEMU command line in the hope that the address has been configured 179 * correctly elsewhere - just not reported by the device. 180 */ 181 if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) { 182 info_report("Zero hardware mac address detected. Ignoring."); 183 memcpy(netcfg.mac, n->mac, ETH_ALEN); 184 } 185 186 netcfg.status |= virtio_tswap16(vdev, 187 n->status & VIRTIO_NET_S_ANNOUNCE); 188 memcpy(config, &netcfg, n->config_size); 189 } 190 } 191 192 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) 193 { 194 VirtIONet *n = VIRTIO_NET(vdev); 195 struct virtio_net_config netcfg = {}; 196 NetClientState *nc = qemu_get_queue(n->nic); 197 198 memcpy(&netcfg, config, n->config_size); 199 200 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) && 201 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) && 202 memcmp(netcfg.mac, n->mac, ETH_ALEN)) { 203 memcpy(n->mac, netcfg.mac, ETH_ALEN); 204 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 205 } 206 207 /* 208 * Is this VDPA? No peer means not VDPA: there's no way to 209 * disconnect/reconnect a VDPA peer. 210 */ 211 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 212 vhost_net_set_config(get_vhost_net(nc->peer), 213 (uint8_t *)&netcfg, 0, n->config_size, 214 VHOST_SET_CONFIG_TYPE_FRONTEND); 215 } 216 } 217 218 static bool virtio_net_started(VirtIONet *n, uint8_t status) 219 { 220 VirtIODevice *vdev = VIRTIO_DEVICE(n); 221 return (status & VIRTIO_CONFIG_S_DRIVER_OK) && 222 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running; 223 } 224 225 static void virtio_net_announce_notify(VirtIONet *net) 226 { 227 VirtIODevice *vdev = VIRTIO_DEVICE(net); 228 trace_virtio_net_announce_notify(); 229 230 net->status |= VIRTIO_NET_S_ANNOUNCE; 231 virtio_notify_config(vdev); 232 } 233 234 static void virtio_net_announce_timer(void *opaque) 235 { 236 VirtIONet *n = opaque; 237 trace_virtio_net_announce_timer(n->announce_timer.round); 238 239 n->announce_timer.round--; 240 virtio_net_announce_notify(n); 241 } 242 243 static void virtio_net_announce(NetClientState *nc) 244 { 245 VirtIONet *n = qemu_get_nic_opaque(nc); 246 VirtIODevice *vdev = VIRTIO_DEVICE(n); 247 248 /* 249 * Make sure the virtio migration announcement timer isn't running 250 * If it is, let it trigger announcement so that we do not cause 251 * confusion. 252 */ 253 if (n->announce_timer.round) { 254 return; 255 } 256 257 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 258 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 259 virtio_net_announce_notify(n); 260 } 261 } 262 263 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) 264 { 265 VirtIODevice *vdev = VIRTIO_DEVICE(n); 266 NetClientState *nc = qemu_get_queue(n->nic); 267 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 268 int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? 269 n->max_ncs - n->max_queue_pairs : 0; 270 271 if (!get_vhost_net(nc->peer)) { 272 return; 273 } 274 275 if ((virtio_net_started(n, status) && !nc->peer->link_down) == 276 !!n->vhost_started) { 277 return; 278 } 279 if (!n->vhost_started) { 280 int r, i; 281 282 if (n->needs_vnet_hdr_swap) { 283 error_report("backend does not support %s vnet headers; " 284 "falling back on userspace virtio", 285 virtio_is_big_endian(vdev) ? "BE" : "LE"); 286 return; 287 } 288 289 /* Any packets outstanding? Purge them to avoid touching rings 290 * when vhost is running. 291 */ 292 for (i = 0; i < queue_pairs; i++) { 293 NetClientState *qnc = qemu_get_subqueue(n->nic, i); 294 295 /* Purge both directions: TX and RX. */ 296 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc); 297 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer); 298 } 299 300 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) { 301 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu); 302 if (r < 0) { 303 error_report("%uBytes MTU not supported by the backend", 304 n->net_conf.mtu); 305 306 return; 307 } 308 } 309 310 n->vhost_started = 1; 311 r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq); 312 if (r < 0) { 313 error_report("unable to start vhost net: %d: " 314 "falling back on userspace virtio", -r); 315 n->vhost_started = 0; 316 } 317 } else { 318 vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq); 319 n->vhost_started = 0; 320 } 321 } 322 323 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev, 324 NetClientState *peer, 325 bool enable) 326 { 327 if (virtio_is_big_endian(vdev)) { 328 return qemu_set_vnet_be(peer, enable); 329 } else { 330 return qemu_set_vnet_le(peer, enable); 331 } 332 } 333 334 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs, 335 int queue_pairs, bool enable) 336 { 337 int i; 338 339 for (i = 0; i < queue_pairs; i++) { 340 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 && 341 enable) { 342 while (--i >= 0) { 343 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false); 344 } 345 346 return true; 347 } 348 } 349 350 return false; 351 } 352 353 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status) 354 { 355 VirtIODevice *vdev = VIRTIO_DEVICE(n); 356 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 357 358 if (virtio_net_started(n, status)) { 359 /* Before using the device, we tell the network backend about the 360 * endianness to use when parsing vnet headers. If the backend 361 * can't do it, we fallback onto fixing the headers in the core 362 * virtio-net code. 363 */ 364 n->needs_vnet_hdr_swap = n->has_vnet_hdr && 365 virtio_net_set_vnet_endian(vdev, n->nic->ncs, 366 queue_pairs, true); 367 } else if (virtio_net_started(n, vdev->status)) { 368 /* After using the device, we need to reset the network backend to 369 * the default (guest native endianness), otherwise the guest may 370 * lose network connectivity if it is rebooted into a different 371 * endianness. 372 */ 373 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false); 374 } 375 } 376 377 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq) 378 { 379 unsigned int dropped = virtqueue_drop_all(vq); 380 if (dropped) { 381 virtio_notify(vdev, vq); 382 } 383 } 384 385 static int virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) 386 { 387 VirtIONet *n = VIRTIO_NET(vdev); 388 VirtIONetQueue *q; 389 int i; 390 uint8_t queue_status; 391 392 virtio_net_vnet_endian_status(n, status); 393 virtio_net_vhost_status(n, status); 394 395 for (i = 0; i < n->max_queue_pairs; i++) { 396 NetClientState *ncs = qemu_get_subqueue(n->nic, i); 397 bool queue_started; 398 q = &n->vqs[i]; 399 400 if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) { 401 queue_status = 0; 402 } else { 403 queue_status = status; 404 } 405 queue_started = 406 virtio_net_started(n, queue_status) && !n->vhost_started; 407 408 if (queue_started) { 409 qemu_flush_queued_packets(ncs); 410 } 411 412 if (!q->tx_waiting) { 413 continue; 414 } 415 416 if (queue_started) { 417 if (q->tx_timer) { 418 timer_mod(q->tx_timer, 419 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 420 } else { 421 replay_bh_schedule_event(q->tx_bh); 422 } 423 } else { 424 if (q->tx_timer) { 425 timer_del(q->tx_timer); 426 } else { 427 qemu_bh_cancel(q->tx_bh); 428 } 429 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 && 430 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) && 431 vdev->vm_running) { 432 /* if tx is waiting we are likely have some packets in tx queue 433 * and disabled notification */ 434 q->tx_waiting = 0; 435 virtio_queue_set_notification(q->tx_vq, 1); 436 virtio_net_drop_tx_queue_data(vdev, q->tx_vq); 437 } 438 } 439 } 440 return 0; 441 } 442 443 static void virtio_net_set_link_status(NetClientState *nc) 444 { 445 VirtIONet *n = qemu_get_nic_opaque(nc); 446 VirtIODevice *vdev = VIRTIO_DEVICE(n); 447 uint16_t old_status = n->status; 448 449 if (nc->link_down) 450 n->status &= ~VIRTIO_NET_S_LINK_UP; 451 else 452 n->status |= VIRTIO_NET_S_LINK_UP; 453 454 if (n->status != old_status) 455 virtio_notify_config(vdev); 456 457 virtio_net_set_status(vdev, vdev->status); 458 } 459 460 static void rxfilter_notify(NetClientState *nc) 461 { 462 VirtIONet *n = qemu_get_nic_opaque(nc); 463 464 if (nc->rxfilter_notify_enabled) { 465 char *path = object_get_canonical_path(OBJECT(n->qdev)); 466 qapi_event_send_nic_rx_filter_changed(n->netclient_name, path); 467 g_free(path); 468 469 /* disable event notification to avoid events flooding */ 470 nc->rxfilter_notify_enabled = 0; 471 } 472 } 473 474 static intList *get_vlan_table(VirtIONet *n) 475 { 476 intList *list; 477 int i, j; 478 479 list = NULL; 480 for (i = 0; i < MAX_VLAN >> 5; i++) { 481 for (j = 0; n->vlans[i] && j <= 0x1f; j++) { 482 if (n->vlans[i] & (1U << j)) { 483 QAPI_LIST_PREPEND(list, (i << 5) + j); 484 } 485 } 486 } 487 488 return list; 489 } 490 491 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc) 492 { 493 VirtIONet *n = qemu_get_nic_opaque(nc); 494 VirtIODevice *vdev = VIRTIO_DEVICE(n); 495 RxFilterInfo *info; 496 strList *str_list; 497 int i; 498 499 info = g_malloc0(sizeof(*info)); 500 info->name = g_strdup(nc->name); 501 info->promiscuous = n->promisc; 502 503 if (n->nouni) { 504 info->unicast = RX_STATE_NONE; 505 } else if (n->alluni) { 506 info->unicast = RX_STATE_ALL; 507 } else { 508 info->unicast = RX_STATE_NORMAL; 509 } 510 511 if (n->nomulti) { 512 info->multicast = RX_STATE_NONE; 513 } else if (n->allmulti) { 514 info->multicast = RX_STATE_ALL; 515 } else { 516 info->multicast = RX_STATE_NORMAL; 517 } 518 519 info->broadcast_allowed = n->nobcast; 520 info->multicast_overflow = n->mac_table.multi_overflow; 521 info->unicast_overflow = n->mac_table.uni_overflow; 522 523 info->main_mac = qemu_mac_strdup_printf(n->mac); 524 525 str_list = NULL; 526 for (i = 0; i < n->mac_table.first_multi; i++) { 527 QAPI_LIST_PREPEND(str_list, 528 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 529 } 530 info->unicast_table = str_list; 531 532 str_list = NULL; 533 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 534 QAPI_LIST_PREPEND(str_list, 535 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 536 } 537 info->multicast_table = str_list; 538 info->vlan_table = get_vlan_table(n); 539 540 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) { 541 info->vlan = RX_STATE_ALL; 542 } else if (!info->vlan_table) { 543 info->vlan = RX_STATE_NONE; 544 } else { 545 info->vlan = RX_STATE_NORMAL; 546 } 547 548 /* enable event notification after query */ 549 nc->rxfilter_notify_enabled = 1; 550 551 return info; 552 } 553 554 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index) 555 { 556 VirtIONet *n = VIRTIO_NET(vdev); 557 NetClientState *nc; 558 559 /* validate queue_index and skip for cvq */ 560 if (queue_index >= n->max_queue_pairs * 2) { 561 return; 562 } 563 564 nc = qemu_get_subqueue(n->nic, vq2q(queue_index)); 565 566 if (!nc->peer) { 567 return; 568 } 569 570 if (get_vhost_net(nc->peer) && 571 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) { 572 vhost_net_virtqueue_reset(vdev, nc, queue_index); 573 } 574 575 flush_or_purge_queued_packets(nc); 576 } 577 578 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index) 579 { 580 VirtIONet *n = VIRTIO_NET(vdev); 581 NetClientState *nc; 582 int r; 583 584 /* validate queue_index and skip for cvq */ 585 if (queue_index >= n->max_queue_pairs * 2) { 586 return; 587 } 588 589 nc = qemu_get_subqueue(n->nic, vq2q(queue_index)); 590 591 if (!nc->peer || !vdev->vhost_started) { 592 return; 593 } 594 595 if (get_vhost_net(nc->peer) && 596 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) { 597 r = vhost_net_virtqueue_restart(vdev, nc, queue_index); 598 if (r < 0) { 599 error_report("unable to restart vhost net virtqueue: %d, " 600 "when resetting the queue", queue_index); 601 } 602 } 603 } 604 605 static void peer_test_vnet_hdr(VirtIONet *n) 606 { 607 NetClientState *nc = qemu_get_queue(n->nic); 608 if (!nc->peer) { 609 return; 610 } 611 612 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer); 613 } 614 615 static int peer_has_vnet_hdr(VirtIONet *n) 616 { 617 return n->has_vnet_hdr; 618 } 619 620 static int peer_has_ufo(VirtIONet *n) 621 { 622 if (!peer_has_vnet_hdr(n)) 623 return 0; 624 625 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer); 626 627 return n->has_ufo; 628 } 629 630 static int peer_has_uso(VirtIONet *n) 631 { 632 if (!peer_has_vnet_hdr(n)) { 633 return 0; 634 } 635 636 return qemu_has_uso(qemu_get_queue(n->nic)->peer); 637 } 638 639 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, 640 int version_1, int hash_report) 641 { 642 int i; 643 NetClientState *nc; 644 645 n->mergeable_rx_bufs = mergeable_rx_bufs; 646 647 if (version_1) { 648 n->guest_hdr_len = hash_report ? 649 sizeof(struct virtio_net_hdr_v1_hash) : 650 sizeof(struct virtio_net_hdr_mrg_rxbuf); 651 n->rss_data.populate_hash = !!hash_report; 652 } else { 653 n->guest_hdr_len = n->mergeable_rx_bufs ? 654 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 655 sizeof(struct virtio_net_hdr); 656 n->rss_data.populate_hash = false; 657 } 658 659 for (i = 0; i < n->max_queue_pairs; i++) { 660 nc = qemu_get_subqueue(n->nic, i); 661 662 if (peer_has_vnet_hdr(n) && 663 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) { 664 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len); 665 n->host_hdr_len = n->guest_hdr_len; 666 } 667 } 668 } 669 670 static int virtio_net_max_tx_queue_size(VirtIONet *n) 671 { 672 NetClientState *peer = n->nic_conf.peers.ncs[0]; 673 struct vhost_net *net; 674 675 if (!peer) { 676 goto default_value; 677 } 678 679 net = get_vhost_net(peer); 680 681 if (!net || !net->max_tx_queue_size) { 682 goto default_value; 683 } 684 685 return net->max_tx_queue_size; 686 687 default_value: 688 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 689 } 690 691 static int peer_attach(VirtIONet *n, int index) 692 { 693 NetClientState *nc = qemu_get_subqueue(n->nic, index); 694 struct vhost_net *net; 695 696 if (!nc->peer) { 697 return 0; 698 } 699 700 net = get_vhost_net(nc->peer); 701 if (net && net->is_vhost_user) { 702 vhost_net_set_vring_enable(nc->peer, 1); 703 } 704 705 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 706 return 0; 707 } 708 709 if (n->max_queue_pairs == 1) { 710 return 0; 711 } 712 713 return tap_enable(nc->peer); 714 } 715 716 static int peer_detach(VirtIONet *n, int index) 717 { 718 NetClientState *nc = qemu_get_subqueue(n->nic, index); 719 struct vhost_net *net; 720 721 if (!nc->peer) { 722 return 0; 723 } 724 725 net = get_vhost_net(nc->peer); 726 if (net && net->is_vhost_user) { 727 vhost_net_set_vring_enable(nc->peer, 0); 728 } 729 730 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 731 return 0; 732 } 733 734 return tap_disable(nc->peer); 735 } 736 737 static void virtio_net_set_queue_pairs(VirtIONet *n) 738 { 739 int i; 740 int r; 741 742 if (n->nic->peer_deleted) { 743 return; 744 } 745 746 for (i = 0; i < n->max_queue_pairs; i++) { 747 if (i < n->curr_queue_pairs) { 748 r = peer_attach(n, i); 749 assert(!r); 750 } else { 751 r = peer_detach(n, i); 752 assert(!r); 753 } 754 } 755 } 756 757 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue); 758 759 static uint64_t virtio_net_bad_features(VirtIODevice *vdev) 760 { 761 uint64_t features = 0; 762 763 /* Linux kernel 2.6.25. It understood MAC (as everyone must), 764 * but also these: */ 765 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 766 virtio_add_feature(&features, VIRTIO_NET_F_CSUM); 767 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4); 768 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6); 769 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN); 770 771 return features; 772 } 773 774 static void virtio_net_apply_guest_offloads(VirtIONet *n) 775 { 776 qemu_set_offload(qemu_get_queue(n->nic)->peer, 777 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)), 778 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)), 779 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)), 780 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)), 781 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)), 782 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)), 783 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6))); 784 } 785 786 static uint64_t virtio_net_guest_offloads_by_features(uint64_t features) 787 { 788 static const uint64_t guest_offloads_mask = 789 (1ULL << VIRTIO_NET_F_GUEST_CSUM) | 790 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | 791 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | 792 (1ULL << VIRTIO_NET_F_GUEST_ECN) | 793 (1ULL << VIRTIO_NET_F_GUEST_UFO) | 794 (1ULL << VIRTIO_NET_F_GUEST_USO4) | 795 (1ULL << VIRTIO_NET_F_GUEST_USO6); 796 797 return guest_offloads_mask & features; 798 } 799 800 uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n) 801 { 802 VirtIODevice *vdev = VIRTIO_DEVICE(n); 803 return virtio_net_guest_offloads_by_features(vdev->guest_features); 804 } 805 806 typedef struct { 807 VirtIONet *n; 808 DeviceState *dev; 809 } FailoverDevice; 810 811 /** 812 * Set the failover primary device 813 * 814 * @opaque: FailoverId to setup 815 * @opts: opts for device we are handling 816 * @errp: returns an error if this function fails 817 */ 818 static int failover_set_primary(DeviceState *dev, void *opaque) 819 { 820 FailoverDevice *fdev = opaque; 821 PCIDevice *pci_dev = (PCIDevice *) 822 object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE); 823 824 if (!pci_dev) { 825 return 0; 826 } 827 828 if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) { 829 fdev->dev = dev; 830 return 1; 831 } 832 833 return 0; 834 } 835 836 /** 837 * Find the primary device for this failover virtio-net 838 * 839 * @n: VirtIONet device 840 * @errp: returns an error if this function fails 841 */ 842 static DeviceState *failover_find_primary_device(VirtIONet *n) 843 { 844 FailoverDevice fdev = { 845 .n = n, 846 }; 847 848 qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL, 849 NULL, NULL, &fdev); 850 return fdev.dev; 851 } 852 853 static void failover_add_primary(VirtIONet *n, Error **errp) 854 { 855 Error *err = NULL; 856 DeviceState *dev = failover_find_primary_device(n); 857 858 if (dev) { 859 return; 860 } 861 862 if (!n->primary_opts) { 863 error_setg(errp, "Primary device not found"); 864 error_append_hint(errp, "Virtio-net failover will not work. Make " 865 "sure primary device has parameter" 866 " failover_pair_id=%s\n", n->netclient_name); 867 return; 868 } 869 870 dev = qdev_device_add_from_qdict(n->primary_opts, 871 n->primary_opts_from_json, 872 &err); 873 if (err) { 874 qobject_unref(n->primary_opts); 875 n->primary_opts = NULL; 876 } else { 877 object_unref(OBJECT(dev)); 878 } 879 error_propagate(errp, err); 880 } 881 882 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) 883 { 884 VirtIONet *n = VIRTIO_NET(vdev); 885 Error *err = NULL; 886 int i; 887 888 if (n->mtu_bypass_backend && 889 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) { 890 features &= ~(1ULL << VIRTIO_NET_F_MTU); 891 } 892 893 virtio_net_set_multiqueue(n, 894 virtio_has_feature(features, VIRTIO_NET_F_RSS) || 895 virtio_has_feature(features, VIRTIO_NET_F_MQ)); 896 897 virtio_net_set_mrg_rx_bufs(n, 898 virtio_has_feature(features, 899 VIRTIO_NET_F_MRG_RXBUF), 900 virtio_has_feature(features, 901 VIRTIO_F_VERSION_1), 902 virtio_has_feature(features, 903 VIRTIO_NET_F_HASH_REPORT)); 904 905 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 906 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4); 907 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 908 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6); 909 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS); 910 911 if (n->has_vnet_hdr) { 912 n->curr_guest_offloads = 913 virtio_net_guest_offloads_by_features(features); 914 virtio_net_apply_guest_offloads(n); 915 } 916 917 for (i = 0; i < n->max_queue_pairs; i++) { 918 NetClientState *nc = qemu_get_subqueue(n->nic, i); 919 920 if (!get_vhost_net(nc->peer)) { 921 continue; 922 } 923 vhost_net_ack_features(get_vhost_net(nc->peer), features); 924 925 /* 926 * keep acked_features in NetVhostUserState up-to-date so it 927 * can't miss any features configured by guest virtio driver. 928 */ 929 vhost_net_save_acked_features(nc->peer); 930 } 931 932 if (!virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { 933 memset(n->vlans, 0xff, MAX_VLAN >> 3); 934 } 935 936 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) { 937 qapi_event_send_failover_negotiated(n->netclient_name); 938 qatomic_set(&n->failover_primary_hidden, false); 939 failover_add_primary(n, &err); 940 if (err) { 941 if (!qtest_enabled()) { 942 warn_report_err(err); 943 } else { 944 error_free(err); 945 } 946 } 947 } 948 } 949 950 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd, 951 struct iovec *iov, unsigned int iov_cnt) 952 { 953 uint8_t on; 954 size_t s; 955 NetClientState *nc = qemu_get_queue(n->nic); 956 957 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on)); 958 if (s != sizeof(on)) { 959 return VIRTIO_NET_ERR; 960 } 961 962 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) { 963 n->promisc = on; 964 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) { 965 n->allmulti = on; 966 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) { 967 n->alluni = on; 968 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) { 969 n->nomulti = on; 970 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) { 971 n->nouni = on; 972 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) { 973 n->nobcast = on; 974 } else { 975 return VIRTIO_NET_ERR; 976 } 977 978 rxfilter_notify(nc); 979 980 return VIRTIO_NET_OK; 981 } 982 983 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd, 984 struct iovec *iov, unsigned int iov_cnt) 985 { 986 VirtIODevice *vdev = VIRTIO_DEVICE(n); 987 uint64_t offloads; 988 size_t s; 989 990 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 991 return VIRTIO_NET_ERR; 992 } 993 994 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads)); 995 if (s != sizeof(offloads)) { 996 return VIRTIO_NET_ERR; 997 } 998 999 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) { 1000 uint64_t supported_offloads; 1001 1002 offloads = virtio_ldq_p(vdev, &offloads); 1003 1004 if (!n->has_vnet_hdr) { 1005 return VIRTIO_NET_ERR; 1006 } 1007 1008 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1009 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4); 1010 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1011 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6); 1012 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT); 1013 1014 supported_offloads = virtio_net_supported_guest_offloads(n); 1015 if (offloads & ~supported_offloads) { 1016 return VIRTIO_NET_ERR; 1017 } 1018 1019 n->curr_guest_offloads = offloads; 1020 virtio_net_apply_guest_offloads(n); 1021 1022 return VIRTIO_NET_OK; 1023 } else { 1024 return VIRTIO_NET_ERR; 1025 } 1026 } 1027 1028 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd, 1029 struct iovec *iov, unsigned int iov_cnt) 1030 { 1031 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1032 struct virtio_net_ctrl_mac mac_data; 1033 size_t s; 1034 NetClientState *nc = qemu_get_queue(n->nic); 1035 1036 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) { 1037 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) { 1038 return VIRTIO_NET_ERR; 1039 } 1040 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac)); 1041 assert(s == sizeof(n->mac)); 1042 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 1043 rxfilter_notify(nc); 1044 1045 return VIRTIO_NET_OK; 1046 } 1047 1048 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) { 1049 return VIRTIO_NET_ERR; 1050 } 1051 1052 int in_use = 0; 1053 int first_multi = 0; 1054 uint8_t uni_overflow = 0; 1055 uint8_t multi_overflow = 0; 1056 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 1057 1058 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1059 sizeof(mac_data.entries)); 1060 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1061 if (s != sizeof(mac_data.entries)) { 1062 goto error; 1063 } 1064 iov_discard_front(&iov, &iov_cnt, s); 1065 1066 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) { 1067 goto error; 1068 } 1069 1070 if (mac_data.entries <= MAC_TABLE_ENTRIES) { 1071 s = iov_to_buf(iov, iov_cnt, 0, macs, 1072 mac_data.entries * ETH_ALEN); 1073 if (s != mac_data.entries * ETH_ALEN) { 1074 goto error; 1075 } 1076 in_use += mac_data.entries; 1077 } else { 1078 uni_overflow = 1; 1079 } 1080 1081 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN); 1082 1083 first_multi = in_use; 1084 1085 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1086 sizeof(mac_data.entries)); 1087 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1088 if (s != sizeof(mac_data.entries)) { 1089 goto error; 1090 } 1091 1092 iov_discard_front(&iov, &iov_cnt, s); 1093 1094 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) { 1095 goto error; 1096 } 1097 1098 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) { 1099 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN], 1100 mac_data.entries * ETH_ALEN); 1101 if (s != mac_data.entries * ETH_ALEN) { 1102 goto error; 1103 } 1104 in_use += mac_data.entries; 1105 } else { 1106 multi_overflow = 1; 1107 } 1108 1109 n->mac_table.in_use = in_use; 1110 n->mac_table.first_multi = first_multi; 1111 n->mac_table.uni_overflow = uni_overflow; 1112 n->mac_table.multi_overflow = multi_overflow; 1113 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN); 1114 g_free(macs); 1115 rxfilter_notify(nc); 1116 1117 return VIRTIO_NET_OK; 1118 1119 error: 1120 g_free(macs); 1121 return VIRTIO_NET_ERR; 1122 } 1123 1124 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd, 1125 struct iovec *iov, unsigned int iov_cnt) 1126 { 1127 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1128 uint16_t vid; 1129 size_t s; 1130 NetClientState *nc = qemu_get_queue(n->nic); 1131 1132 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid)); 1133 vid = virtio_lduw_p(vdev, &vid); 1134 if (s != sizeof(vid)) { 1135 return VIRTIO_NET_ERR; 1136 } 1137 1138 if (vid >= MAX_VLAN) 1139 return VIRTIO_NET_ERR; 1140 1141 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD) 1142 n->vlans[vid >> 5] |= (1U << (vid & 0x1f)); 1143 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL) 1144 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f)); 1145 else 1146 return VIRTIO_NET_ERR; 1147 1148 rxfilter_notify(nc); 1149 1150 return VIRTIO_NET_OK; 1151 } 1152 1153 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, 1154 struct iovec *iov, unsigned int iov_cnt) 1155 { 1156 trace_virtio_net_handle_announce(n->announce_timer.round); 1157 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK && 1158 n->status & VIRTIO_NET_S_ANNOUNCE) { 1159 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 1160 if (n->announce_timer.round) { 1161 qemu_announce_timer_step(&n->announce_timer); 1162 } 1163 return VIRTIO_NET_OK; 1164 } else { 1165 return VIRTIO_NET_ERR; 1166 } 1167 } 1168 1169 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd) 1170 { 1171 NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0); 1172 if (nc == NULL || nc->info->set_steering_ebpf == NULL) { 1173 return false; 1174 } 1175 1176 trace_virtio_net_rss_attach_ebpf(nic, prog_fd); 1177 return nc->info->set_steering_ebpf(nc, prog_fd); 1178 } 1179 1180 static void rss_data_to_rss_config(struct VirtioNetRssData *data, 1181 struct EBPFRSSConfig *config) 1182 { 1183 config->redirect = data->redirect; 1184 config->populate_hash = data->populate_hash; 1185 config->hash_types = data->runtime_hash_types; 1186 config->indirections_len = data->indirections_len; 1187 config->default_queue = data->default_queue; 1188 } 1189 1190 static bool virtio_net_attach_ebpf_rss(VirtIONet *n) 1191 { 1192 struct EBPFRSSConfig config = {}; 1193 1194 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 1195 return false; 1196 } 1197 1198 rss_data_to_rss_config(&n->rss_data, &config); 1199 1200 if (!ebpf_rss_set_all(&n->ebpf_rss, &config, 1201 n->rss_data.indirections_table, n->rss_data.key, 1202 NULL)) { 1203 return false; 1204 } 1205 1206 if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) { 1207 return false; 1208 } 1209 1210 return true; 1211 } 1212 1213 static void virtio_net_detach_ebpf_rss(VirtIONet *n) 1214 { 1215 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1216 } 1217 1218 static void virtio_net_commit_rss_config(VirtIONet *n) 1219 { 1220 if (n->rss_data.peer_hash_available) { 1221 return; 1222 } 1223 1224 if (n->rss_data.enabled) { 1225 n->rss_data.enabled_software_rss = n->rss_data.populate_hash; 1226 if (n->rss_data.populate_hash) { 1227 virtio_net_detach_ebpf_rss(n); 1228 } else if (!virtio_net_attach_ebpf_rss(n)) { 1229 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 1230 warn_report("Can't load eBPF RSS for vhost"); 1231 } else { 1232 warn_report("Can't load eBPF RSS - fallback to software RSS"); 1233 n->rss_data.enabled_software_rss = true; 1234 } 1235 } 1236 1237 trace_virtio_net_rss_enable(n, 1238 n->rss_data.runtime_hash_types, 1239 n->rss_data.indirections_len, 1240 sizeof(n->rss_data.key)); 1241 } else { 1242 virtio_net_detach_ebpf_rss(n); 1243 trace_virtio_net_rss_disable(n); 1244 } 1245 } 1246 1247 static void virtio_net_disable_rss(VirtIONet *n) 1248 { 1249 if (!n->rss_data.enabled) { 1250 return; 1251 } 1252 1253 n->rss_data.enabled = false; 1254 virtio_net_commit_rss_config(n); 1255 } 1256 1257 static bool virtio_net_load_ebpf_fds(VirtIONet *n, Error **errp) 1258 { 1259 int fds[EBPF_RSS_MAX_FDS] = { [0 ... EBPF_RSS_MAX_FDS - 1] = -1}; 1260 int ret = true; 1261 int i = 0; 1262 1263 if (n->nr_ebpf_rss_fds != EBPF_RSS_MAX_FDS) { 1264 error_setg(errp, "Expected %d file descriptors but got %d", 1265 EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds); 1266 return false; 1267 } 1268 1269 for (i = 0; i < n->nr_ebpf_rss_fds; i++) { 1270 fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i], errp); 1271 if (fds[i] < 0) { 1272 ret = false; 1273 goto exit; 1274 } 1275 } 1276 1277 ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3], errp); 1278 1279 exit: 1280 if (!ret) { 1281 for (i = 0; i < n->nr_ebpf_rss_fds && fds[i] != -1; i++) { 1282 close(fds[i]); 1283 } 1284 } 1285 1286 return ret; 1287 } 1288 1289 static bool virtio_net_load_ebpf(VirtIONet *n, Error **errp) 1290 { 1291 if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) { 1292 return true; 1293 } 1294 1295 trace_virtio_net_rss_load(n, n->nr_ebpf_rss_fds, n->ebpf_rss_fds); 1296 1297 /* 1298 * If user explicitly gave QEMU RSS FDs to use, then 1299 * failing to use them must be considered a fatal 1300 * error. If no RSS FDs were provided, QEMU is trying 1301 * eBPF on a "best effort" basis only, so report a 1302 * warning and allow fallback to software RSS. 1303 */ 1304 if (n->ebpf_rss_fds) { 1305 return virtio_net_load_ebpf_fds(n, errp); 1306 } 1307 1308 ebpf_rss_load(&n->ebpf_rss, &error_warn); 1309 return true; 1310 } 1311 1312 static void virtio_net_unload_ebpf(VirtIONet *n) 1313 { 1314 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1315 ebpf_rss_unload(&n->ebpf_rss); 1316 } 1317 1318 static uint16_t virtio_net_handle_rss(VirtIONet *n, 1319 struct iovec *iov, 1320 unsigned int iov_cnt, 1321 bool do_rss) 1322 { 1323 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1324 struct virtio_net_rss_config cfg; 1325 size_t s, offset = 0, size_get; 1326 uint16_t queue_pairs, i; 1327 struct { 1328 uint16_t us; 1329 uint8_t b; 1330 } QEMU_PACKED temp; 1331 const char *err_msg = ""; 1332 uint32_t err_value = 0; 1333 1334 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) { 1335 err_msg = "RSS is not negotiated"; 1336 goto error; 1337 } 1338 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) { 1339 err_msg = "Hash report is not negotiated"; 1340 goto error; 1341 } 1342 size_get = offsetof(struct virtio_net_rss_config, indirection_table); 1343 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get); 1344 if (s != size_get) { 1345 err_msg = "Short command buffer"; 1346 err_value = (uint32_t)s; 1347 goto error; 1348 } 1349 n->rss_data.runtime_hash_types = virtio_ldl_p(vdev, &cfg.hash_types); 1350 n->rss_data.indirections_len = 1351 virtio_lduw_p(vdev, &cfg.indirection_table_mask); 1352 if (!do_rss) { 1353 n->rss_data.indirections_len = 0; 1354 } 1355 if (n->rss_data.indirections_len >= VIRTIO_NET_RSS_MAX_TABLE_LEN) { 1356 err_msg = "Too large indirection table"; 1357 err_value = n->rss_data.indirections_len; 1358 goto error; 1359 } 1360 n->rss_data.indirections_len++; 1361 if (!is_power_of_2(n->rss_data.indirections_len)) { 1362 err_msg = "Invalid size of indirection table"; 1363 err_value = n->rss_data.indirections_len; 1364 goto error; 1365 } 1366 n->rss_data.default_queue = do_rss ? 1367 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0; 1368 if (n->rss_data.default_queue >= n->max_queue_pairs) { 1369 err_msg = "Invalid default queue"; 1370 err_value = n->rss_data.default_queue; 1371 goto error; 1372 } 1373 offset += size_get; 1374 size_get = sizeof(uint16_t) * n->rss_data.indirections_len; 1375 g_free(n->rss_data.indirections_table); 1376 n->rss_data.indirections_table = g_malloc(size_get); 1377 if (!n->rss_data.indirections_table) { 1378 err_msg = "Can't allocate indirections table"; 1379 err_value = n->rss_data.indirections_len; 1380 goto error; 1381 } 1382 s = iov_to_buf(iov, iov_cnt, offset, 1383 n->rss_data.indirections_table, size_get); 1384 if (s != size_get) { 1385 err_msg = "Short indirection table buffer"; 1386 err_value = (uint32_t)s; 1387 goto error; 1388 } 1389 for (i = 0; i < n->rss_data.indirections_len; ++i) { 1390 uint16_t val = n->rss_data.indirections_table[i]; 1391 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val); 1392 } 1393 offset += size_get; 1394 size_get = sizeof(temp); 1395 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get); 1396 if (s != size_get) { 1397 err_msg = "Can't get queue_pairs"; 1398 err_value = (uint32_t)s; 1399 goto error; 1400 } 1401 queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs; 1402 if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) { 1403 err_msg = "Invalid number of queue_pairs"; 1404 err_value = queue_pairs; 1405 goto error; 1406 } 1407 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) { 1408 err_msg = "Invalid key size"; 1409 err_value = temp.b; 1410 goto error; 1411 } 1412 if (!temp.b && n->rss_data.runtime_hash_types) { 1413 err_msg = "No key provided"; 1414 err_value = 0; 1415 goto error; 1416 } 1417 if (!temp.b && !n->rss_data.runtime_hash_types) { 1418 virtio_net_disable_rss(n); 1419 return queue_pairs; 1420 } 1421 offset += size_get; 1422 size_get = temp.b; 1423 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get); 1424 if (s != size_get) { 1425 err_msg = "Can get key buffer"; 1426 err_value = (uint32_t)s; 1427 goto error; 1428 } 1429 n->rss_data.enabled = true; 1430 virtio_net_commit_rss_config(n); 1431 return queue_pairs; 1432 error: 1433 trace_virtio_net_rss_error(n, err_msg, err_value); 1434 virtio_net_disable_rss(n); 1435 return 0; 1436 } 1437 1438 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, 1439 struct iovec *iov, unsigned int iov_cnt) 1440 { 1441 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1442 uint16_t queue_pairs; 1443 NetClientState *nc = qemu_get_queue(n->nic); 1444 1445 virtio_net_disable_rss(n); 1446 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { 1447 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false); 1448 return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR; 1449 } 1450 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) { 1451 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true); 1452 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { 1453 struct virtio_net_ctrl_mq mq; 1454 size_t s; 1455 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) { 1456 return VIRTIO_NET_ERR; 1457 } 1458 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq)); 1459 if (s != sizeof(mq)) { 1460 return VIRTIO_NET_ERR; 1461 } 1462 queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs); 1463 1464 } else { 1465 return VIRTIO_NET_ERR; 1466 } 1467 1468 if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 1469 queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 1470 queue_pairs > n->max_queue_pairs || 1471 !n->multiqueue) { 1472 return VIRTIO_NET_ERR; 1473 } 1474 1475 n->curr_queue_pairs = queue_pairs; 1476 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 1477 /* 1478 * Avoid updating the backend for a vdpa device: We're only interested 1479 * in updating the device model queues. 1480 */ 1481 return VIRTIO_NET_OK; 1482 } 1483 /* stop the backend before changing the number of queue_pairs to avoid handling a 1484 * disabled queue */ 1485 virtio_net_set_status(vdev, vdev->status); 1486 virtio_net_set_queue_pairs(n); 1487 1488 return VIRTIO_NET_OK; 1489 } 1490 1491 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, 1492 const struct iovec *in_sg, unsigned in_num, 1493 const struct iovec *out_sg, 1494 unsigned out_num) 1495 { 1496 VirtIONet *n = VIRTIO_NET(vdev); 1497 struct virtio_net_ctrl_hdr ctrl; 1498 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1499 size_t s; 1500 struct iovec *iov, *iov2; 1501 1502 if (iov_size(in_sg, in_num) < sizeof(status) || 1503 iov_size(out_sg, out_num) < sizeof(ctrl)) { 1504 virtio_error(vdev, "virtio-net ctrl missing headers"); 1505 return 0; 1506 } 1507 1508 iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num); 1509 s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl)); 1510 iov_discard_front(&iov, &out_num, sizeof(ctrl)); 1511 if (s != sizeof(ctrl)) { 1512 status = VIRTIO_NET_ERR; 1513 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { 1514 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num); 1515 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { 1516 status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num); 1517 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { 1518 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num); 1519 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { 1520 status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num); 1521 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { 1522 status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num); 1523 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { 1524 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num); 1525 } 1526 1527 s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status)); 1528 assert(s == sizeof(status)); 1529 1530 g_free(iov2); 1531 return sizeof(status); 1532 } 1533 1534 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) 1535 { 1536 VirtQueueElement *elem; 1537 1538 for (;;) { 1539 size_t written; 1540 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 1541 if (!elem) { 1542 break; 1543 } 1544 1545 written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num, 1546 elem->out_sg, elem->out_num); 1547 if (written > 0) { 1548 virtqueue_push(vq, elem, written); 1549 virtio_notify(vdev, vq); 1550 g_free(elem); 1551 } else { 1552 virtqueue_detach_element(vq, elem, 0); 1553 g_free(elem); 1554 break; 1555 } 1556 } 1557 } 1558 1559 /* RX */ 1560 1561 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) 1562 { 1563 VirtIONet *n = VIRTIO_NET(vdev); 1564 int queue_index = vq2q(virtio_get_queue_index(vq)); 1565 1566 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index)); 1567 } 1568 1569 static bool virtio_net_can_receive(NetClientState *nc) 1570 { 1571 VirtIONet *n = qemu_get_nic_opaque(nc); 1572 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1573 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1574 1575 if (!vdev->vm_running) { 1576 return false; 1577 } 1578 1579 if (nc->queue_index >= n->curr_queue_pairs) { 1580 return false; 1581 } 1582 1583 if (!virtio_queue_ready(q->rx_vq) || 1584 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 1585 return false; 1586 } 1587 1588 return true; 1589 } 1590 1591 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) 1592 { 1593 int opaque; 1594 unsigned int in_bytes; 1595 VirtIONet *n = q->n; 1596 1597 while (virtio_queue_empty(q->rx_vq) || n->mergeable_rx_bufs) { 1598 opaque = virtqueue_get_avail_bytes(q->rx_vq, &in_bytes, NULL, 1599 bufsize, 0); 1600 /* Buffer is enough, disable notifiaction */ 1601 if (bufsize <= in_bytes) { 1602 break; 1603 } 1604 1605 if (virtio_queue_enable_notification_and_check(q->rx_vq, opaque)) { 1606 /* Guest has added some buffers, try again */ 1607 continue; 1608 } else { 1609 return 0; 1610 } 1611 } 1612 1613 virtio_queue_set_notification(q->rx_vq, 0); 1614 1615 return 1; 1616 } 1617 1618 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr) 1619 { 1620 virtio_tswap16s(vdev, &hdr->hdr_len); 1621 virtio_tswap16s(vdev, &hdr->gso_size); 1622 virtio_tswap16s(vdev, &hdr->csum_start); 1623 virtio_tswap16s(vdev, &hdr->csum_offset); 1624 } 1625 1626 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so 1627 * it never finds out that the packets don't have valid checksums. This 1628 * causes dhclient to get upset. Fedora's carried a patch for ages to 1629 * fix this with Xen but it hasn't appeared in an upstream release of 1630 * dhclient yet. 1631 * 1632 * To avoid breaking existing guests, we catch udp packets and add 1633 * checksums. This is terrible but it's better than hacking the guest 1634 * kernels. 1635 * 1636 * N.B. if we introduce a zero-copy API, this operation is no longer free so 1637 * we should provide a mechanism to disable it to avoid polluting the host 1638 * cache. 1639 */ 1640 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr, 1641 uint8_t *buf, size_t size) 1642 { 1643 size_t csum_size = ETH_HLEN + sizeof(struct ip_header) + 1644 sizeof(struct udp_header); 1645 1646 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */ 1647 (size >= csum_size && size < 1500) && /* normal sized MTU */ 1648 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */ 1649 (buf[23] == 17) && /* ip.protocol == UDP */ 1650 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */ 1651 net_checksum_calculate(buf, size, CSUM_UDP); 1652 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; 1653 } 1654 } 1655 1656 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt, 1657 const void *buf, size_t size) 1658 { 1659 if (n->has_vnet_hdr) { 1660 /* FIXME this cast is evil */ 1661 void *wbuf = (void *)buf; 1662 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len, 1663 size - n->host_hdr_len); 1664 1665 if (n->needs_vnet_hdr_swap) { 1666 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf); 1667 } 1668 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr)); 1669 } else { 1670 struct virtio_net_hdr hdr = { 1671 .flags = 0, 1672 .gso_type = VIRTIO_NET_HDR_GSO_NONE 1673 }; 1674 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr); 1675 } 1676 } 1677 1678 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) 1679 { 1680 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; 1681 static const uint8_t vlan[] = {0x81, 0x00}; 1682 uint8_t *ptr = (uint8_t *)buf; 1683 int i; 1684 1685 if (n->promisc) 1686 return 1; 1687 1688 ptr += n->host_hdr_len; 1689 1690 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) { 1691 int vid = lduw_be_p(ptr + 14) & 0xfff; 1692 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f)))) 1693 return 0; 1694 } 1695 1696 if (ptr[0] & 1) { // multicast 1697 if (!memcmp(ptr, bcast, sizeof(bcast))) { 1698 return !n->nobcast; 1699 } else if (n->nomulti) { 1700 return 0; 1701 } else if (n->allmulti || n->mac_table.multi_overflow) { 1702 return 1; 1703 } 1704 1705 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 1706 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1707 return 1; 1708 } 1709 } 1710 } else { // unicast 1711 if (n->nouni) { 1712 return 0; 1713 } else if (n->alluni || n->mac_table.uni_overflow) { 1714 return 1; 1715 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) { 1716 return 1; 1717 } 1718 1719 for (i = 0; i < n->mac_table.first_multi; i++) { 1720 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1721 return 1; 1722 } 1723 } 1724 } 1725 1726 return 0; 1727 } 1728 1729 static uint8_t virtio_net_get_hash_type(bool hasip4, 1730 bool hasip6, 1731 EthL4HdrProto l4hdr_proto, 1732 uint32_t types) 1733 { 1734 if (hasip4) { 1735 switch (l4hdr_proto) { 1736 case ETH_L4_HDR_PROTO_TCP: 1737 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { 1738 return NetPktRssIpV4Tcp; 1739 } 1740 break; 1741 1742 case ETH_L4_HDR_PROTO_UDP: 1743 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { 1744 return NetPktRssIpV4Udp; 1745 } 1746 break; 1747 1748 default: 1749 break; 1750 } 1751 1752 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 1753 return NetPktRssIpV4; 1754 } 1755 } else if (hasip6) { 1756 switch (l4hdr_proto) { 1757 case ETH_L4_HDR_PROTO_TCP: 1758 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) { 1759 return NetPktRssIpV6TcpEx; 1760 } 1761 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { 1762 return NetPktRssIpV6Tcp; 1763 } 1764 break; 1765 1766 case ETH_L4_HDR_PROTO_UDP: 1767 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) { 1768 return NetPktRssIpV6UdpEx; 1769 } 1770 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { 1771 return NetPktRssIpV6Udp; 1772 } 1773 break; 1774 1775 default: 1776 break; 1777 } 1778 1779 if (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) { 1780 return NetPktRssIpV6Ex; 1781 } 1782 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 1783 return NetPktRssIpV6; 1784 } 1785 } 1786 return 0xff; 1787 } 1788 1789 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf, 1790 size_t size, 1791 struct virtio_net_hdr_v1_hash *hdr) 1792 { 1793 VirtIONet *n = qemu_get_nic_opaque(nc); 1794 unsigned int index = nc->queue_index, new_index = index; 1795 struct NetRxPkt *pkt = n->rx_pkt; 1796 uint8_t net_hash_type; 1797 uint32_t hash; 1798 bool hasip4, hasip6; 1799 EthL4HdrProto l4hdr_proto; 1800 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = { 1801 VIRTIO_NET_HASH_REPORT_IPv4, 1802 VIRTIO_NET_HASH_REPORT_TCPv4, 1803 VIRTIO_NET_HASH_REPORT_TCPv6, 1804 VIRTIO_NET_HASH_REPORT_IPv6, 1805 VIRTIO_NET_HASH_REPORT_IPv6_EX, 1806 VIRTIO_NET_HASH_REPORT_TCPv6_EX, 1807 VIRTIO_NET_HASH_REPORT_UDPv4, 1808 VIRTIO_NET_HASH_REPORT_UDPv6, 1809 VIRTIO_NET_HASH_REPORT_UDPv6_EX 1810 }; 1811 struct iovec iov = { 1812 .iov_base = (void *)buf, 1813 .iov_len = size 1814 }; 1815 1816 net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len); 1817 net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); 1818 net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto, 1819 n->rss_data.runtime_hash_types); 1820 if (net_hash_type > NetPktRssIpV6UdpEx) { 1821 if (n->rss_data.populate_hash) { 1822 hdr->hash_value = VIRTIO_NET_HASH_REPORT_NONE; 1823 hdr->hash_report = 0; 1824 } 1825 return n->rss_data.redirect ? n->rss_data.default_queue : -1; 1826 } 1827 1828 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key); 1829 1830 if (n->rss_data.populate_hash) { 1831 hdr->hash_value = hash; 1832 hdr->hash_report = reports[net_hash_type]; 1833 } 1834 1835 if (n->rss_data.redirect) { 1836 new_index = hash & (n->rss_data.indirections_len - 1); 1837 new_index = n->rss_data.indirections_table[new_index]; 1838 } 1839 1840 return (index == new_index) ? -1 : new_index; 1841 } 1842 1843 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, 1844 size_t size) 1845 { 1846 VirtIONet *n = qemu_get_nic_opaque(nc); 1847 VirtIONetQueue *q; 1848 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1849 QEMU_UNINITIALIZED VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE]; 1850 QEMU_UNINITIALIZED size_t lens[VIRTQUEUE_MAX_SIZE]; 1851 QEMU_UNINITIALIZED struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; 1852 struct virtio_net_hdr_v1_hash extra_hdr; 1853 unsigned mhdr_cnt = 0; 1854 size_t offset, i, guest_offset, j; 1855 ssize_t err; 1856 1857 memset(&extra_hdr, 0, sizeof(extra_hdr)); 1858 1859 if (n->rss_data.enabled && n->rss_data.enabled_software_rss) { 1860 int index = virtio_net_process_rss(nc, buf, size, &extra_hdr); 1861 if (index >= 0) { 1862 nc = qemu_get_subqueue(n->nic, index % n->curr_queue_pairs); 1863 } 1864 } 1865 1866 if (!virtio_net_can_receive(nc)) { 1867 return -1; 1868 } 1869 1870 q = virtio_net_get_subqueue(nc); 1871 1872 /* hdr_len refers to the header we supply to the guest */ 1873 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) { 1874 return 0; 1875 } 1876 1877 if (!receive_filter(n, buf, size)) 1878 return size; 1879 1880 offset = i = 0; 1881 1882 while (offset < size) { 1883 VirtQueueElement *elem; 1884 int len, total; 1885 const struct iovec *sg; 1886 1887 total = 0; 1888 1889 if (i == VIRTQUEUE_MAX_SIZE) { 1890 virtio_error(vdev, "virtio-net unexpected long buffer chain"); 1891 err = size; 1892 goto err; 1893 } 1894 1895 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement)); 1896 if (!elem) { 1897 if (i) { 1898 virtio_error(vdev, "virtio-net unexpected empty queue: " 1899 "i %zd mergeable %d offset %zd, size %zd, " 1900 "guest hdr len %zd, host hdr len %zd " 1901 "guest features 0x%" PRIx64, 1902 i, n->mergeable_rx_bufs, offset, size, 1903 n->guest_hdr_len, n->host_hdr_len, 1904 vdev->guest_features); 1905 } 1906 err = -1; 1907 goto err; 1908 } 1909 1910 if (elem->in_num < 1) { 1911 virtio_error(vdev, 1912 "virtio-net receive queue contains no in buffers"); 1913 virtqueue_detach_element(q->rx_vq, elem, 0); 1914 g_free(elem); 1915 err = -1; 1916 goto err; 1917 } 1918 1919 sg = elem->in_sg; 1920 if (i == 0) { 1921 assert(offset == 0); 1922 if (n->mergeable_rx_bufs) { 1923 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg), 1924 sg, elem->in_num, 1925 offsetof(typeof(extra_hdr), hdr.num_buffers), 1926 sizeof(extra_hdr.hdr.num_buffers)); 1927 } else { 1928 extra_hdr.hdr.num_buffers = cpu_to_le16(1); 1929 } 1930 1931 receive_header(n, sg, elem->in_num, buf, size); 1932 if (n->rss_data.populate_hash) { 1933 offset = offsetof(typeof(extra_hdr), hash_value); 1934 iov_from_buf(sg, elem->in_num, offset, 1935 (char *)&extra_hdr + offset, 1936 sizeof(extra_hdr.hash_value) + 1937 sizeof(extra_hdr.hash_report)); 1938 } 1939 offset = n->host_hdr_len; 1940 total += n->guest_hdr_len; 1941 guest_offset = n->guest_hdr_len; 1942 } else { 1943 guest_offset = 0; 1944 } 1945 1946 /* copy in packet. ugh */ 1947 len = iov_from_buf(sg, elem->in_num, guest_offset, 1948 buf + offset, size - offset); 1949 total += len; 1950 offset += len; 1951 /* If buffers can't be merged, at this point we 1952 * must have consumed the complete packet. 1953 * Otherwise, drop it. */ 1954 if (!n->mergeable_rx_bufs && offset < size) { 1955 virtqueue_unpop(q->rx_vq, elem, total); 1956 g_free(elem); 1957 err = size; 1958 goto err; 1959 } 1960 1961 elems[i] = elem; 1962 lens[i] = total; 1963 i++; 1964 } 1965 1966 if (mhdr_cnt) { 1967 virtio_stw_p(vdev, &extra_hdr.hdr.num_buffers, i); 1968 iov_from_buf(mhdr_sg, mhdr_cnt, 1969 0, 1970 &extra_hdr.hdr.num_buffers, 1971 sizeof extra_hdr.hdr.num_buffers); 1972 } 1973 1974 for (j = 0; j < i; j++) { 1975 /* signal other side */ 1976 virtqueue_fill(q->rx_vq, elems[j], lens[j], j); 1977 g_free(elems[j]); 1978 } 1979 1980 virtqueue_flush(q->rx_vq, i); 1981 virtio_notify(vdev, q->rx_vq); 1982 1983 return size; 1984 1985 err: 1986 for (j = 0; j < i; j++) { 1987 virtqueue_detach_element(q->rx_vq, elems[j], lens[j]); 1988 g_free(elems[j]); 1989 } 1990 1991 return err; 1992 } 1993 1994 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf, 1995 size_t size) 1996 { 1997 RCU_READ_LOCK_GUARD(); 1998 1999 return virtio_net_receive_rcu(nc, buf, size); 2000 } 2001 2002 /* 2003 * Accessors to read and write the IP packet data length field. This 2004 * is a potentially unaligned network-byte-order 16 bit unsigned integer 2005 * pointed to by unit->ip_len. 2006 */ 2007 static uint16_t read_unit_ip_len(VirtioNetRscUnit *unit) 2008 { 2009 return lduw_be_p(unit->ip_plen); 2010 } 2011 2012 static void write_unit_ip_len(VirtioNetRscUnit *unit, uint16_t l) 2013 { 2014 stw_be_p(unit->ip_plen, l); 2015 } 2016 2017 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain, 2018 const uint8_t *buf, 2019 VirtioNetRscUnit *unit) 2020 { 2021 uint16_t ip_hdrlen; 2022 struct ip_header *ip; 2023 2024 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len 2025 + sizeof(struct eth_header)); 2026 unit->ip = (void *)ip; 2027 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2; 2028 unit->ip_plen = &ip->ip_len; 2029 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen); 2030 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 2031 unit->payload = read_unit_ip_len(unit) - ip_hdrlen - unit->tcp_hdrlen; 2032 } 2033 2034 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain, 2035 const uint8_t *buf, 2036 VirtioNetRscUnit *unit) 2037 { 2038 struct ip6_header *ip6; 2039 2040 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len 2041 + sizeof(struct eth_header)); 2042 unit->ip = ip6; 2043 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 2044 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) 2045 + sizeof(struct ip6_header)); 2046 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 2047 2048 /* There is a difference between payload length in ipv4 and v6, 2049 ip header is excluded in ipv6 */ 2050 unit->payload = read_unit_ip_len(unit) - unit->tcp_hdrlen; 2051 } 2052 2053 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain, 2054 VirtioNetRscSeg *seg) 2055 { 2056 int ret; 2057 struct virtio_net_hdr_v1 *h; 2058 2059 h = (struct virtio_net_hdr_v1 *)seg->buf; 2060 h->flags = 0; 2061 h->gso_type = VIRTIO_NET_HDR_GSO_NONE; 2062 2063 if (seg->is_coalesced) { 2064 h->rsc.segments = seg->packets; 2065 h->rsc.dup_acks = seg->dup_ack; 2066 h->flags = VIRTIO_NET_HDR_F_RSC_INFO; 2067 if (chain->proto == ETH_P_IP) { 2068 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2069 } else { 2070 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2071 } 2072 } 2073 2074 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); 2075 QTAILQ_REMOVE(&chain->buffers, seg, next); 2076 g_free(seg->buf); 2077 g_free(seg); 2078 2079 return ret; 2080 } 2081 2082 static void virtio_net_rsc_purge(void *opq) 2083 { 2084 VirtioNetRscSeg *seg, *rn; 2085 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq; 2086 2087 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) { 2088 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2089 chain->stat.purge_failed++; 2090 continue; 2091 } 2092 } 2093 2094 chain->stat.timer++; 2095 if (!QTAILQ_EMPTY(&chain->buffers)) { 2096 timer_mod(chain->drain_timer, 2097 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout); 2098 } 2099 } 2100 2101 static void virtio_net_rsc_cleanup(VirtIONet *n) 2102 { 2103 VirtioNetRscChain *chain, *rn_chain; 2104 VirtioNetRscSeg *seg, *rn_seg; 2105 2106 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) { 2107 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) { 2108 QTAILQ_REMOVE(&chain->buffers, seg, next); 2109 g_free(seg->buf); 2110 g_free(seg); 2111 } 2112 2113 timer_free(chain->drain_timer); 2114 QTAILQ_REMOVE(&n->rsc_chains, chain, next); 2115 g_free(chain); 2116 } 2117 } 2118 2119 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain, 2120 NetClientState *nc, 2121 const uint8_t *buf, size_t size) 2122 { 2123 uint16_t hdr_len; 2124 VirtioNetRscSeg *seg; 2125 2126 hdr_len = chain->n->guest_hdr_len; 2127 seg = g_new(VirtioNetRscSeg, 1); 2128 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header) 2129 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD); 2130 memcpy(seg->buf, buf, size); 2131 seg->size = size; 2132 seg->packets = 1; 2133 seg->dup_ack = 0; 2134 seg->is_coalesced = 0; 2135 seg->nc = nc; 2136 2137 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); 2138 chain->stat.cache++; 2139 2140 switch (chain->proto) { 2141 case ETH_P_IP: 2142 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); 2143 break; 2144 case ETH_P_IPV6: 2145 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); 2146 break; 2147 default: 2148 g_assert_not_reached(); 2149 } 2150 } 2151 2152 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain, 2153 VirtioNetRscSeg *seg, 2154 const uint8_t *buf, 2155 struct tcp_header *n_tcp, 2156 struct tcp_header *o_tcp) 2157 { 2158 uint32_t nack, oack; 2159 uint16_t nwin, owin; 2160 2161 nack = htonl(n_tcp->th_ack); 2162 nwin = htons(n_tcp->th_win); 2163 oack = htonl(o_tcp->th_ack); 2164 owin = htons(o_tcp->th_win); 2165 2166 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) { 2167 chain->stat.ack_out_of_win++; 2168 return RSC_FINAL; 2169 } else if (nack == oack) { 2170 /* duplicated ack or window probe */ 2171 if (nwin == owin) { 2172 /* duplicated ack, add dup ack count due to whql test up to 1 */ 2173 chain->stat.dup_ack++; 2174 return RSC_FINAL; 2175 } else { 2176 /* Coalesce window update */ 2177 o_tcp->th_win = n_tcp->th_win; 2178 chain->stat.win_update++; 2179 return RSC_COALESCE; 2180 } 2181 } else { 2182 /* pure ack, go to 'C', finalize*/ 2183 chain->stat.pure_ack++; 2184 return RSC_FINAL; 2185 } 2186 } 2187 2188 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain, 2189 VirtioNetRscSeg *seg, 2190 const uint8_t *buf, 2191 VirtioNetRscUnit *n_unit) 2192 { 2193 void *data; 2194 uint16_t o_ip_len; 2195 uint32_t nseq, oseq; 2196 VirtioNetRscUnit *o_unit; 2197 2198 o_unit = &seg->unit; 2199 o_ip_len = read_unit_ip_len(o_unit); 2200 nseq = htonl(n_unit->tcp->th_seq); 2201 oseq = htonl(o_unit->tcp->th_seq); 2202 2203 /* out of order or retransmitted. */ 2204 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) { 2205 chain->stat.data_out_of_win++; 2206 return RSC_FINAL; 2207 } 2208 2209 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen; 2210 if (nseq == oseq) { 2211 if ((o_unit->payload == 0) && n_unit->payload) { 2212 /* From no payload to payload, normal case, not a dup ack or etc */ 2213 chain->stat.data_after_pure_ack++; 2214 goto coalesce; 2215 } else { 2216 return virtio_net_rsc_handle_ack(chain, seg, buf, 2217 n_unit->tcp, o_unit->tcp); 2218 } 2219 } else if ((nseq - oseq) != o_unit->payload) { 2220 /* Not a consistent packet, out of order */ 2221 chain->stat.data_out_of_order++; 2222 return RSC_FINAL; 2223 } else { 2224 coalesce: 2225 if ((o_ip_len + n_unit->payload) > chain->max_payload) { 2226 chain->stat.over_size++; 2227 return RSC_FINAL; 2228 } 2229 2230 /* Here comes the right data, the payload length in v4/v6 is different, 2231 so use the field value to update and record the new data len */ 2232 o_unit->payload += n_unit->payload; /* update new data len */ 2233 2234 /* update field in ip header */ 2235 write_unit_ip_len(o_unit, o_ip_len + n_unit->payload); 2236 2237 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced 2238 for windows guest, while this may change the behavior for linux 2239 guest (only if it uses RSC feature). */ 2240 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags; 2241 2242 o_unit->tcp->th_ack = n_unit->tcp->th_ack; 2243 o_unit->tcp->th_win = n_unit->tcp->th_win; 2244 2245 memmove(seg->buf + seg->size, data, n_unit->payload); 2246 seg->size += n_unit->payload; 2247 seg->packets++; 2248 chain->stat.coalesced++; 2249 return RSC_COALESCE; 2250 } 2251 } 2252 2253 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain, 2254 VirtioNetRscSeg *seg, 2255 const uint8_t *buf, size_t size, 2256 VirtioNetRscUnit *unit) 2257 { 2258 struct ip_header *ip1, *ip2; 2259 2260 ip1 = (struct ip_header *)(unit->ip); 2261 ip2 = (struct ip_header *)(seg->unit.ip); 2262 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst) 2263 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2264 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2265 chain->stat.no_match++; 2266 return RSC_NO_MATCH; 2267 } 2268 2269 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2270 } 2271 2272 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain, 2273 VirtioNetRscSeg *seg, 2274 const uint8_t *buf, size_t size, 2275 VirtioNetRscUnit *unit) 2276 { 2277 struct ip6_header *ip1, *ip2; 2278 2279 ip1 = (struct ip6_header *)(unit->ip); 2280 ip2 = (struct ip6_header *)(seg->unit.ip); 2281 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address)) 2282 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address)) 2283 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2284 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2285 chain->stat.no_match++; 2286 return RSC_NO_MATCH; 2287 } 2288 2289 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2290 } 2291 2292 /* Packets with 'SYN' should bypass, other flag should be sent after drain 2293 * to prevent out of order */ 2294 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain, 2295 struct tcp_header *tcp) 2296 { 2297 uint16_t tcp_hdr; 2298 uint16_t tcp_flag; 2299 2300 tcp_flag = htons(tcp->th_offset_flags); 2301 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10; 2302 tcp_flag &= VIRTIO_NET_TCP_FLAG; 2303 if (tcp_flag & TH_SYN) { 2304 chain->stat.tcp_syn++; 2305 return RSC_BYPASS; 2306 } 2307 2308 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) { 2309 chain->stat.tcp_ctrl_drain++; 2310 return RSC_FINAL; 2311 } 2312 2313 if (tcp_hdr > sizeof(struct tcp_header)) { 2314 chain->stat.tcp_all_opt++; 2315 return RSC_FINAL; 2316 } 2317 2318 return RSC_CANDIDATE; 2319 } 2320 2321 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain, 2322 NetClientState *nc, 2323 const uint8_t *buf, size_t size, 2324 VirtioNetRscUnit *unit) 2325 { 2326 int ret; 2327 VirtioNetRscSeg *seg, *nseg; 2328 2329 if (QTAILQ_EMPTY(&chain->buffers)) { 2330 chain->stat.empty_cache++; 2331 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2332 timer_mod(chain->drain_timer, 2333 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout); 2334 return size; 2335 } 2336 2337 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2338 if (chain->proto == ETH_P_IP) { 2339 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit); 2340 } else { 2341 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit); 2342 } 2343 2344 if (ret == RSC_FINAL) { 2345 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2346 /* Send failed */ 2347 chain->stat.final_failed++; 2348 return 0; 2349 } 2350 2351 /* Send current packet */ 2352 return virtio_net_do_receive(nc, buf, size); 2353 } else if (ret == RSC_NO_MATCH) { 2354 continue; 2355 } else { 2356 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ 2357 seg->is_coalesced = 1; 2358 return size; 2359 } 2360 } 2361 2362 chain->stat.no_match_cache++; 2363 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2364 return size; 2365 } 2366 2367 /* Drain a connection data, this is to avoid out of order segments */ 2368 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain, 2369 NetClientState *nc, 2370 const uint8_t *buf, size_t size, 2371 uint16_t ip_start, uint16_t ip_size, 2372 uint16_t tcp_port) 2373 { 2374 VirtioNetRscSeg *seg, *nseg; 2375 uint32_t ppair1, ppair2; 2376 2377 ppair1 = *(uint32_t *)(buf + tcp_port); 2378 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2379 ppair2 = *(uint32_t *)(seg->buf + tcp_port); 2380 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size) 2381 || (ppair1 != ppair2)) { 2382 continue; 2383 } 2384 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2385 chain->stat.drain_failed++; 2386 } 2387 2388 break; 2389 } 2390 2391 return virtio_net_do_receive(nc, buf, size); 2392 } 2393 2394 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain, 2395 struct ip_header *ip, 2396 const uint8_t *buf, size_t size) 2397 { 2398 uint16_t ip_len; 2399 2400 /* Not an ipv4 packet */ 2401 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) { 2402 chain->stat.ip_option++; 2403 return RSC_BYPASS; 2404 } 2405 2406 /* Don't handle packets with ip option */ 2407 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) { 2408 chain->stat.ip_option++; 2409 return RSC_BYPASS; 2410 } 2411 2412 if (ip->ip_p != IPPROTO_TCP) { 2413 chain->stat.bypass_not_tcp++; 2414 return RSC_BYPASS; 2415 } 2416 2417 /* Don't handle packets with ip fragment */ 2418 if (!(htons(ip->ip_off) & IP_DF)) { 2419 chain->stat.ip_frag++; 2420 return RSC_BYPASS; 2421 } 2422 2423 /* Don't handle packets with ecn flag */ 2424 if (IPTOS_ECN(ip->ip_tos)) { 2425 chain->stat.ip_ecn++; 2426 return RSC_BYPASS; 2427 } 2428 2429 ip_len = htons(ip->ip_len); 2430 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header)) 2431 || ip_len > (size - chain->n->guest_hdr_len - 2432 sizeof(struct eth_header))) { 2433 chain->stat.ip_hacked++; 2434 return RSC_BYPASS; 2435 } 2436 2437 return RSC_CANDIDATE; 2438 } 2439 2440 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain, 2441 NetClientState *nc, 2442 const uint8_t *buf, size_t size) 2443 { 2444 int32_t ret; 2445 uint16_t hdr_len; 2446 VirtioNetRscUnit unit; 2447 2448 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2449 2450 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header) 2451 + sizeof(struct tcp_header))) { 2452 chain->stat.bypass_not_tcp++; 2453 return virtio_net_do_receive(nc, buf, size); 2454 } 2455 2456 virtio_net_rsc_extract_unit4(chain, buf, &unit); 2457 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size) 2458 != RSC_CANDIDATE) { 2459 return virtio_net_do_receive(nc, buf, size); 2460 } 2461 2462 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2463 if (ret == RSC_BYPASS) { 2464 return virtio_net_do_receive(nc, buf, size); 2465 } else if (ret == RSC_FINAL) { 2466 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2467 ((hdr_len + sizeof(struct eth_header)) + 12), 2468 VIRTIO_NET_IP4_ADDR_SIZE, 2469 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)); 2470 } 2471 2472 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2473 } 2474 2475 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain, 2476 struct ip6_header *ip6, 2477 const uint8_t *buf, size_t size) 2478 { 2479 uint16_t ip_len; 2480 2481 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4) 2482 != IP_HEADER_VERSION_6) { 2483 return RSC_BYPASS; 2484 } 2485 2486 /* Both option and protocol is checked in this */ 2487 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) { 2488 chain->stat.bypass_not_tcp++; 2489 return RSC_BYPASS; 2490 } 2491 2492 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 2493 if (ip_len < sizeof(struct tcp_header) || 2494 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header) 2495 - sizeof(struct ip6_header))) { 2496 chain->stat.ip_hacked++; 2497 return RSC_BYPASS; 2498 } 2499 2500 /* Don't handle packets with ecn flag */ 2501 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) { 2502 chain->stat.ip_ecn++; 2503 return RSC_BYPASS; 2504 } 2505 2506 return RSC_CANDIDATE; 2507 } 2508 2509 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc, 2510 const uint8_t *buf, size_t size) 2511 { 2512 int32_t ret; 2513 uint16_t hdr_len; 2514 VirtioNetRscChain *chain; 2515 VirtioNetRscUnit unit; 2516 2517 chain = opq; 2518 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2519 2520 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header) 2521 + sizeof(tcp_header))) { 2522 return virtio_net_do_receive(nc, buf, size); 2523 } 2524 2525 virtio_net_rsc_extract_unit6(chain, buf, &unit); 2526 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain, 2527 unit.ip, buf, size)) { 2528 return virtio_net_do_receive(nc, buf, size); 2529 } 2530 2531 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2532 if (ret == RSC_BYPASS) { 2533 return virtio_net_do_receive(nc, buf, size); 2534 } else if (ret == RSC_FINAL) { 2535 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2536 ((hdr_len + sizeof(struct eth_header)) + 8), 2537 VIRTIO_NET_IP6_ADDR_SIZE, 2538 hdr_len + sizeof(struct eth_header) 2539 + sizeof(struct ip6_header)); 2540 } 2541 2542 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2543 } 2544 2545 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n, 2546 NetClientState *nc, 2547 uint16_t proto) 2548 { 2549 VirtioNetRscChain *chain; 2550 2551 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) { 2552 return NULL; 2553 } 2554 2555 QTAILQ_FOREACH(chain, &n->rsc_chains, next) { 2556 if (chain->proto == proto) { 2557 return chain; 2558 } 2559 } 2560 2561 chain = g_malloc(sizeof(*chain)); 2562 chain->n = n; 2563 chain->proto = proto; 2564 if (proto == (uint16_t)ETH_P_IP) { 2565 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD; 2566 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2567 } else { 2568 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD; 2569 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2570 } 2571 chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 2572 virtio_net_rsc_purge, chain); 2573 memset(&chain->stat, 0, sizeof(chain->stat)); 2574 2575 QTAILQ_INIT(&chain->buffers); 2576 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); 2577 2578 return chain; 2579 } 2580 2581 static ssize_t virtio_net_rsc_receive(NetClientState *nc, 2582 const uint8_t *buf, 2583 size_t size) 2584 { 2585 uint16_t proto; 2586 VirtioNetRscChain *chain; 2587 struct eth_header *eth; 2588 VirtIONet *n; 2589 2590 n = qemu_get_nic_opaque(nc); 2591 if (size < (n->host_hdr_len + sizeof(struct eth_header))) { 2592 return virtio_net_do_receive(nc, buf, size); 2593 } 2594 2595 eth = (struct eth_header *)(buf + n->guest_hdr_len); 2596 proto = htons(eth->h_proto); 2597 2598 chain = virtio_net_rsc_lookup_chain(n, nc, proto); 2599 if (chain) { 2600 chain->stat.received++; 2601 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) { 2602 return virtio_net_rsc_receive4(chain, nc, buf, size); 2603 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) { 2604 return virtio_net_rsc_receive6(chain, nc, buf, size); 2605 } 2606 } 2607 return virtio_net_do_receive(nc, buf, size); 2608 } 2609 2610 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, 2611 size_t size) 2612 { 2613 VirtIONet *n = qemu_get_nic_opaque(nc); 2614 if ((n->rsc4_enabled || n->rsc6_enabled)) { 2615 return virtio_net_rsc_receive(nc, buf, size); 2616 } else { 2617 return virtio_net_do_receive(nc, buf, size); 2618 } 2619 } 2620 2621 static int32_t virtio_net_flush_tx(VirtIONetQueue *q); 2622 2623 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) 2624 { 2625 VirtIONet *n = qemu_get_nic_opaque(nc); 2626 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 2627 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2628 int ret; 2629 2630 virtqueue_push(q->tx_vq, q->async_tx.elem, 0); 2631 virtio_notify(vdev, q->tx_vq); 2632 2633 g_free(q->async_tx.elem); 2634 q->async_tx.elem = NULL; 2635 2636 virtio_queue_set_notification(q->tx_vq, 1); 2637 ret = virtio_net_flush_tx(q); 2638 if (ret >= n->tx_burst) { 2639 /* 2640 * the flush has been stopped by tx_burst 2641 * we will not receive notification for the 2642 * remainining part, so re-schedule 2643 */ 2644 virtio_queue_set_notification(q->tx_vq, 0); 2645 if (q->tx_bh) { 2646 replay_bh_schedule_event(q->tx_bh); 2647 } else { 2648 timer_mod(q->tx_timer, 2649 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2650 } 2651 q->tx_waiting = 1; 2652 } 2653 } 2654 2655 /* TX */ 2656 static int32_t virtio_net_flush_tx(VirtIONetQueue *q) 2657 { 2658 VirtIONet *n = q->n; 2659 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2660 VirtQueueElement *elem; 2661 int32_t num_packets = 0; 2662 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq)); 2663 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2664 return num_packets; 2665 } 2666 2667 if (q->async_tx.elem) { 2668 virtio_queue_set_notification(q->tx_vq, 0); 2669 return num_packets; 2670 } 2671 2672 for (;;) { 2673 ssize_t ret; 2674 unsigned int out_num; 2675 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg; 2676 struct virtio_net_hdr vhdr; 2677 2678 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement)); 2679 if (!elem) { 2680 break; 2681 } 2682 2683 out_num = elem->out_num; 2684 out_sg = elem->out_sg; 2685 if (out_num < 1) { 2686 virtio_error(vdev, "virtio-net header not in first element"); 2687 goto detach; 2688 } 2689 2690 if (n->needs_vnet_hdr_swap) { 2691 if (iov_to_buf(out_sg, out_num, 0, &vhdr, sizeof(vhdr)) < 2692 sizeof(vhdr)) { 2693 virtio_error(vdev, "virtio-net header incorrect"); 2694 goto detach; 2695 } 2696 virtio_net_hdr_swap(vdev, &vhdr); 2697 sg2[0].iov_base = &vhdr; 2698 sg2[0].iov_len = sizeof(vhdr); 2699 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, out_sg, out_num, 2700 sizeof(vhdr), -1); 2701 if (out_num == VIRTQUEUE_MAX_SIZE) { 2702 goto drop; 2703 } 2704 out_num += 1; 2705 out_sg = sg2; 2706 } 2707 /* 2708 * If host wants to see the guest header as is, we can 2709 * pass it on unchanged. Otherwise, copy just the parts 2710 * that host is interested in. 2711 */ 2712 assert(n->host_hdr_len <= n->guest_hdr_len); 2713 if (n->host_hdr_len != n->guest_hdr_len) { 2714 if (iov_size(out_sg, out_num) < n->guest_hdr_len) { 2715 virtio_error(vdev, "virtio-net header is invalid"); 2716 goto detach; 2717 } 2718 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg), 2719 out_sg, out_num, 2720 0, n->host_hdr_len); 2721 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num, 2722 out_sg, out_num, 2723 n->guest_hdr_len, -1); 2724 out_num = sg_num; 2725 out_sg = sg; 2726 2727 if (out_num < 1) { 2728 virtio_error(vdev, "virtio-net nothing to send"); 2729 goto detach; 2730 } 2731 } 2732 2733 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index), 2734 out_sg, out_num, virtio_net_tx_complete); 2735 if (ret == 0) { 2736 virtio_queue_set_notification(q->tx_vq, 0); 2737 q->async_tx.elem = elem; 2738 return -EBUSY; 2739 } 2740 2741 drop: 2742 virtqueue_push(q->tx_vq, elem, 0); 2743 virtio_notify(vdev, q->tx_vq); 2744 g_free(elem); 2745 2746 if (++num_packets >= n->tx_burst) { 2747 break; 2748 } 2749 } 2750 return num_packets; 2751 2752 detach: 2753 virtqueue_detach_element(q->tx_vq, elem, 0); 2754 g_free(elem); 2755 return -EINVAL; 2756 } 2757 2758 static void virtio_net_tx_timer(void *opaque); 2759 2760 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) 2761 { 2762 VirtIONet *n = VIRTIO_NET(vdev); 2763 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2764 2765 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2766 virtio_net_drop_tx_queue_data(vdev, vq); 2767 return; 2768 } 2769 2770 /* This happens when device was stopped but VCPU wasn't. */ 2771 if (!vdev->vm_running) { 2772 q->tx_waiting = 1; 2773 return; 2774 } 2775 2776 if (q->tx_waiting) { 2777 /* We already have queued packets, immediately flush */ 2778 timer_del(q->tx_timer); 2779 virtio_net_tx_timer(q); 2780 } else { 2781 /* re-arm timer to flush it (and more) on next tick */ 2782 timer_mod(q->tx_timer, 2783 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2784 q->tx_waiting = 1; 2785 virtio_queue_set_notification(vq, 0); 2786 } 2787 } 2788 2789 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) 2790 { 2791 VirtIONet *n = VIRTIO_NET(vdev); 2792 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2793 2794 if (unlikely(n->vhost_started)) { 2795 return; 2796 } 2797 2798 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2799 virtio_net_drop_tx_queue_data(vdev, vq); 2800 return; 2801 } 2802 2803 if (unlikely(q->tx_waiting)) { 2804 return; 2805 } 2806 q->tx_waiting = 1; 2807 /* This happens when device was stopped but VCPU wasn't. */ 2808 if (!vdev->vm_running) { 2809 return; 2810 } 2811 virtio_queue_set_notification(vq, 0); 2812 replay_bh_schedule_event(q->tx_bh); 2813 } 2814 2815 static void virtio_net_tx_timer(void *opaque) 2816 { 2817 VirtIONetQueue *q = opaque; 2818 VirtIONet *n = q->n; 2819 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2820 int ret; 2821 2822 /* This happens when device was stopped but BH wasn't. */ 2823 if (!vdev->vm_running) { 2824 /* Make sure tx waiting is set, so we'll run when restarted. */ 2825 assert(q->tx_waiting); 2826 return; 2827 } 2828 2829 q->tx_waiting = 0; 2830 2831 /* Just in case the driver is not ready on more */ 2832 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2833 return; 2834 } 2835 2836 ret = virtio_net_flush_tx(q); 2837 if (ret == -EBUSY || ret == -EINVAL) { 2838 return; 2839 } 2840 /* 2841 * If we flush a full burst of packets, assume there are 2842 * more coming and immediately rearm 2843 */ 2844 if (ret >= n->tx_burst) { 2845 q->tx_waiting = 1; 2846 timer_mod(q->tx_timer, 2847 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2848 return; 2849 } 2850 /* 2851 * If less than a full burst, re-enable notification and flush 2852 * anything that may have come in while we weren't looking. If 2853 * we find something, assume the guest is still active and rearm 2854 */ 2855 virtio_queue_set_notification(q->tx_vq, 1); 2856 ret = virtio_net_flush_tx(q); 2857 if (ret > 0) { 2858 virtio_queue_set_notification(q->tx_vq, 0); 2859 q->tx_waiting = 1; 2860 timer_mod(q->tx_timer, 2861 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2862 } 2863 } 2864 2865 static void virtio_net_tx_bh(void *opaque) 2866 { 2867 VirtIONetQueue *q = opaque; 2868 VirtIONet *n = q->n; 2869 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2870 int32_t ret; 2871 2872 /* This happens when device was stopped but BH wasn't. */ 2873 if (!vdev->vm_running) { 2874 /* Make sure tx waiting is set, so we'll run when restarted. */ 2875 assert(q->tx_waiting); 2876 return; 2877 } 2878 2879 q->tx_waiting = 0; 2880 2881 /* Just in case the driver is not ready on more */ 2882 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) { 2883 return; 2884 } 2885 2886 ret = virtio_net_flush_tx(q); 2887 if (ret == -EBUSY || ret == -EINVAL) { 2888 return; /* Notification re-enable handled by tx_complete or device 2889 * broken */ 2890 } 2891 2892 /* If we flush a full burst of packets, assume there are 2893 * more coming and immediately reschedule */ 2894 if (ret >= n->tx_burst) { 2895 replay_bh_schedule_event(q->tx_bh); 2896 q->tx_waiting = 1; 2897 return; 2898 } 2899 2900 /* If less than a full burst, re-enable notification and flush 2901 * anything that may have come in while we weren't looking. If 2902 * we find something, assume the guest is still active and reschedule */ 2903 virtio_queue_set_notification(q->tx_vq, 1); 2904 ret = virtio_net_flush_tx(q); 2905 if (ret == -EINVAL) { 2906 return; 2907 } else if (ret > 0) { 2908 virtio_queue_set_notification(q->tx_vq, 0); 2909 replay_bh_schedule_event(q->tx_bh); 2910 q->tx_waiting = 1; 2911 } 2912 } 2913 2914 static void virtio_net_add_queue(VirtIONet *n, int index) 2915 { 2916 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2917 2918 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size, 2919 virtio_net_handle_rx); 2920 2921 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { 2922 n->vqs[index].tx_vq = 2923 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2924 virtio_net_handle_tx_timer); 2925 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 2926 virtio_net_tx_timer, 2927 &n->vqs[index]); 2928 } else { 2929 n->vqs[index].tx_vq = 2930 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2931 virtio_net_handle_tx_bh); 2932 n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index], 2933 &DEVICE(vdev)->mem_reentrancy_guard); 2934 } 2935 2936 n->vqs[index].tx_waiting = 0; 2937 n->vqs[index].n = n; 2938 } 2939 2940 static void virtio_net_del_queue(VirtIONet *n, int index) 2941 { 2942 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2943 VirtIONetQueue *q = &n->vqs[index]; 2944 NetClientState *nc = qemu_get_subqueue(n->nic, index); 2945 2946 qemu_purge_queued_packets(nc); 2947 2948 virtio_del_queue(vdev, index * 2); 2949 if (q->tx_timer) { 2950 timer_free(q->tx_timer); 2951 q->tx_timer = NULL; 2952 } else { 2953 qemu_bh_delete(q->tx_bh); 2954 q->tx_bh = NULL; 2955 } 2956 q->tx_waiting = 0; 2957 virtio_del_queue(vdev, index * 2 + 1); 2958 } 2959 2960 static void virtio_net_change_num_queues(VirtIONet *n, int new_num_queues) 2961 { 2962 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2963 int old_num_queues = virtio_get_num_queues(vdev); 2964 int i; 2965 2966 assert(old_num_queues >= 3); 2967 assert(old_num_queues % 2 == 1); 2968 2969 if (old_num_queues == new_num_queues) { 2970 return; 2971 } 2972 2973 /* 2974 * We always need to remove and add ctrl vq if 2975 * old_num_queues != new_num_queues. Remove ctrl_vq first, 2976 * and then we only enter one of the following two loops. 2977 */ 2978 virtio_del_queue(vdev, old_num_queues - 1); 2979 2980 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) { 2981 /* new_num_queues < old_num_queues */ 2982 virtio_net_del_queue(n, i / 2); 2983 } 2984 2985 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) { 2986 /* new_num_queues > old_num_queues */ 2987 virtio_net_add_queue(n, i / 2); 2988 } 2989 2990 /* add ctrl_vq last */ 2991 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 2992 } 2993 2994 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) 2995 { 2996 int max = multiqueue ? n->max_queue_pairs : 1; 2997 2998 n->multiqueue = multiqueue; 2999 virtio_net_change_num_queues(n, max * 2 + 1); 3000 3001 virtio_net_set_queue_pairs(n); 3002 } 3003 3004 static int virtio_net_pre_load_queues(VirtIODevice *vdev, uint32_t n) 3005 { 3006 virtio_net_change_num_queues(VIRTIO_NET(vdev), n); 3007 3008 return 0; 3009 } 3010 3011 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, 3012 Error **errp) 3013 { 3014 VirtIONet *n = VIRTIO_NET(vdev); 3015 NetClientState *nc = qemu_get_queue(n->nic); 3016 uint32_t supported_hash_types = n->rss_data.supported_hash_types; 3017 uint32_t peer_hash_types = n->rss_data.peer_hash_types; 3018 bool use_own_hash = 3019 (supported_hash_types & VIRTIO_NET_RSS_SUPPORTED_HASHES) == 3020 supported_hash_types; 3021 bool use_peer_hash = 3022 n->rss_data.peer_hash_available && 3023 (supported_hash_types & peer_hash_types) == supported_hash_types; 3024 3025 /* Firstly sync all virtio-net possible supported features */ 3026 features |= n->host_features; 3027 3028 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 3029 3030 if (!peer_has_vnet_hdr(n)) { 3031 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); 3032 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); 3033 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); 3034 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); 3035 3036 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); 3037 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); 3038 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); 3039 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); 3040 3041 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); 3042 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); 3043 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); 3044 3045 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); 3046 } 3047 3048 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { 3049 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); 3050 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); 3051 } 3052 3053 if (!peer_has_uso(n)) { 3054 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); 3055 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); 3056 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); 3057 } 3058 3059 if (!get_vhost_net(nc->peer)) { 3060 if (!use_own_hash) { 3061 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); 3062 virtio_clear_feature(&features, VIRTIO_NET_F_RSS); 3063 } else if (virtio_has_feature(features, VIRTIO_NET_F_RSS)) { 3064 virtio_net_load_ebpf(n, errp); 3065 } 3066 3067 return features; 3068 } 3069 3070 if (!use_peer_hash) { 3071 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); 3072 3073 if (!use_own_hash || !virtio_net_attach_ebpf_to_backend(n->nic, -1)) { 3074 if (!virtio_net_load_ebpf(n, errp)) { 3075 return features; 3076 } 3077 3078 virtio_clear_feature(&features, VIRTIO_NET_F_RSS); 3079 } 3080 } 3081 3082 features = vhost_net_get_features(get_vhost_net(nc->peer), features); 3083 vdev->backend_features = features; 3084 3085 if (n->mtu_bypass_backend && 3086 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { 3087 features |= (1ULL << VIRTIO_NET_F_MTU); 3088 } 3089 3090 /* 3091 * Since GUEST_ANNOUNCE is emulated the feature bit could be set without 3092 * enabled. This happens in the vDPA case. 3093 * 3094 * Make sure the feature set is not incoherent, as the driver could refuse 3095 * to start. 3096 * 3097 * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes, 3098 * helping guest to notify the new location with vDPA devices that does not 3099 * support it. 3100 */ 3101 if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) { 3102 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE); 3103 } 3104 3105 return features; 3106 } 3107 3108 static int virtio_net_post_load_device(void *opaque, int version_id) 3109 { 3110 VirtIONet *n = opaque; 3111 VirtIODevice *vdev = VIRTIO_DEVICE(n); 3112 int i, link_down; 3113 3114 trace_virtio_net_post_load_device(); 3115 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs, 3116 virtio_vdev_has_feature(vdev, 3117 VIRTIO_F_VERSION_1), 3118 virtio_vdev_has_feature(vdev, 3119 VIRTIO_NET_F_HASH_REPORT)); 3120 3121 /* MAC_TABLE_ENTRIES may be different from the saved image */ 3122 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) { 3123 n->mac_table.in_use = 0; 3124 } 3125 3126 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 3127 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n); 3128 } 3129 3130 /* 3131 * curr_guest_offloads will be later overwritten by the 3132 * virtio_set_features_nocheck call done from the virtio_load. 3133 * Here we make sure it is preserved and restored accordingly 3134 * in the virtio_net_post_load_virtio callback. 3135 */ 3136 n->saved_guest_offloads = n->curr_guest_offloads; 3137 3138 virtio_net_set_queue_pairs(n); 3139 3140 /* Find the first multicast entry in the saved MAC filter */ 3141 for (i = 0; i < n->mac_table.in_use; i++) { 3142 if (n->mac_table.macs[i * ETH_ALEN] & 1) { 3143 break; 3144 } 3145 } 3146 n->mac_table.first_multi = i; 3147 3148 /* nc.link_down can't be migrated, so infer link_down according 3149 * to link status bit in n->status */ 3150 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0; 3151 for (i = 0; i < n->max_queue_pairs; i++) { 3152 qemu_get_subqueue(n->nic, i)->link_down = link_down; 3153 } 3154 3155 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 3156 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3157 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 3158 QEMU_CLOCK_VIRTUAL, 3159 virtio_net_announce_timer, n); 3160 if (n->announce_timer.round) { 3161 timer_mod(n->announce_timer.tm, 3162 qemu_clock_get_ms(n->announce_timer.type)); 3163 } else { 3164 qemu_announce_timer_del(&n->announce_timer, false); 3165 } 3166 } 3167 3168 virtio_net_commit_rss_config(n); 3169 return 0; 3170 } 3171 3172 static int virtio_net_post_load_virtio(VirtIODevice *vdev) 3173 { 3174 VirtIONet *n = VIRTIO_NET(vdev); 3175 /* 3176 * The actual needed state is now in saved_guest_offloads, 3177 * see virtio_net_post_load_device for detail. 3178 * Restore it back and apply the desired offloads. 3179 */ 3180 n->curr_guest_offloads = n->saved_guest_offloads; 3181 if (peer_has_vnet_hdr(n)) { 3182 virtio_net_apply_guest_offloads(n); 3183 } 3184 3185 return 0; 3186 } 3187 3188 /* tx_waiting field of a VirtIONetQueue */ 3189 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = { 3190 .name = "virtio-net-queue-tx_waiting", 3191 .fields = (const VMStateField[]) { 3192 VMSTATE_UINT32(tx_waiting, VirtIONetQueue), 3193 VMSTATE_END_OF_LIST() 3194 }, 3195 }; 3196 3197 static bool max_queue_pairs_gt_1(void *opaque, int version_id) 3198 { 3199 return VIRTIO_NET(opaque)->max_queue_pairs > 1; 3200 } 3201 3202 static bool has_ctrl_guest_offloads(void *opaque, int version_id) 3203 { 3204 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque), 3205 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 3206 } 3207 3208 static bool mac_table_fits(void *opaque, int version_id) 3209 { 3210 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES; 3211 } 3212 3213 static bool mac_table_doesnt_fit(void *opaque, int version_id) 3214 { 3215 return !mac_table_fits(opaque, version_id); 3216 } 3217 3218 /* This temporary type is shared by all the WITH_TMP methods 3219 * although only some fields are used by each. 3220 */ 3221 struct VirtIONetMigTmp { 3222 VirtIONet *parent; 3223 VirtIONetQueue *vqs_1; 3224 uint16_t curr_queue_pairs_1; 3225 uint8_t has_ufo; 3226 uint32_t has_vnet_hdr; 3227 }; 3228 3229 /* The 2nd and subsequent tx_waiting flags are loaded later than 3230 * the 1st entry in the queue_pairs and only if there's more than one 3231 * entry. We use the tmp mechanism to calculate a temporary 3232 * pointer and count and also validate the count. 3233 */ 3234 3235 static int virtio_net_tx_waiting_pre_save(void *opaque) 3236 { 3237 struct VirtIONetMigTmp *tmp = opaque; 3238 3239 tmp->vqs_1 = tmp->parent->vqs + 1; 3240 tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1; 3241 if (tmp->parent->curr_queue_pairs == 0) { 3242 tmp->curr_queue_pairs_1 = 0; 3243 } 3244 3245 return 0; 3246 } 3247 3248 static int virtio_net_tx_waiting_pre_load(void *opaque) 3249 { 3250 struct VirtIONetMigTmp *tmp = opaque; 3251 3252 /* Reuse the pointer setup from save */ 3253 virtio_net_tx_waiting_pre_save(opaque); 3254 3255 if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) { 3256 error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x", 3257 tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs); 3258 3259 return -EINVAL; 3260 } 3261 3262 return 0; /* all good */ 3263 } 3264 3265 static const VMStateDescription vmstate_virtio_net_tx_waiting = { 3266 .name = "virtio-net-tx_waiting", 3267 .pre_load = virtio_net_tx_waiting_pre_load, 3268 .pre_save = virtio_net_tx_waiting_pre_save, 3269 .fields = (const VMStateField[]) { 3270 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp, 3271 curr_queue_pairs_1, 3272 vmstate_virtio_net_queue_tx_waiting, 3273 struct VirtIONetQueue), 3274 VMSTATE_END_OF_LIST() 3275 }, 3276 }; 3277 3278 /* the 'has_ufo' flag is just tested; if the incoming stream has the 3279 * flag set we need to check that we have it 3280 */ 3281 static int virtio_net_ufo_post_load(void *opaque, int version_id) 3282 { 3283 struct VirtIONetMigTmp *tmp = opaque; 3284 3285 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) { 3286 error_report("virtio-net: saved image requires TUN_F_UFO support"); 3287 return -EINVAL; 3288 } 3289 3290 return 0; 3291 } 3292 3293 static int virtio_net_ufo_pre_save(void *opaque) 3294 { 3295 struct VirtIONetMigTmp *tmp = opaque; 3296 3297 tmp->has_ufo = tmp->parent->has_ufo; 3298 3299 return 0; 3300 } 3301 3302 static const VMStateDescription vmstate_virtio_net_has_ufo = { 3303 .name = "virtio-net-ufo", 3304 .post_load = virtio_net_ufo_post_load, 3305 .pre_save = virtio_net_ufo_pre_save, 3306 .fields = (const VMStateField[]) { 3307 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp), 3308 VMSTATE_END_OF_LIST() 3309 }, 3310 }; 3311 3312 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the 3313 * flag set we need to check that we have it 3314 */ 3315 static int virtio_net_vnet_post_load(void *opaque, int version_id) 3316 { 3317 struct VirtIONetMigTmp *tmp = opaque; 3318 3319 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) { 3320 error_report("virtio-net: saved image requires vnet_hdr=on"); 3321 return -EINVAL; 3322 } 3323 3324 return 0; 3325 } 3326 3327 static int virtio_net_vnet_pre_save(void *opaque) 3328 { 3329 struct VirtIONetMigTmp *tmp = opaque; 3330 3331 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr; 3332 3333 return 0; 3334 } 3335 3336 static const VMStateDescription vmstate_virtio_net_has_vnet = { 3337 .name = "virtio-net-vnet", 3338 .post_load = virtio_net_vnet_post_load, 3339 .pre_save = virtio_net_vnet_pre_save, 3340 .fields = (const VMStateField[]) { 3341 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp), 3342 VMSTATE_END_OF_LIST() 3343 }, 3344 }; 3345 3346 static int virtio_net_rss_post_load(void *opaque, int version_id) 3347 { 3348 VirtIONet *n = VIRTIO_NET(opaque); 3349 3350 if (version_id == 1) { 3351 n->rss_data.supported_hash_types = VIRTIO_NET_RSS_SUPPORTED_HASHES; 3352 } 3353 3354 return 0; 3355 } 3356 3357 static bool virtio_net_rss_needed(void *opaque) 3358 { 3359 return VIRTIO_NET(opaque)->rss_data.enabled; 3360 } 3361 3362 static const VMStateDescription vmstate_virtio_net_rss = { 3363 .name = "virtio-net-device/rss", 3364 .version_id = 2, 3365 .minimum_version_id = 1, 3366 .post_load = virtio_net_rss_post_load, 3367 .needed = virtio_net_rss_needed, 3368 .fields = (const VMStateField[]) { 3369 VMSTATE_BOOL(rss_data.enabled, VirtIONet), 3370 VMSTATE_BOOL(rss_data.redirect, VirtIONet), 3371 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet), 3372 VMSTATE_UINT32(rss_data.runtime_hash_types, VirtIONet), 3373 VMSTATE_UINT32_V(rss_data.supported_hash_types, VirtIONet, 2), 3374 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet), 3375 VMSTATE_UINT16(rss_data.default_queue, VirtIONet), 3376 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet, 3377 VIRTIO_NET_RSS_MAX_KEY_SIZE), 3378 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet, 3379 rss_data.indirections_len, 0, 3380 vmstate_info_uint16, uint16_t), 3381 VMSTATE_END_OF_LIST() 3382 }, 3383 }; 3384 3385 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev) 3386 { 3387 VirtIONet *n = VIRTIO_NET(vdev); 3388 NetClientState *nc; 3389 struct vhost_net *net; 3390 3391 if (!n->nic) { 3392 return NULL; 3393 } 3394 3395 nc = qemu_get_queue(n->nic); 3396 if (!nc) { 3397 return NULL; 3398 } 3399 3400 net = get_vhost_net(nc->peer); 3401 if (!net) { 3402 return NULL; 3403 } 3404 3405 return &net->dev; 3406 } 3407 3408 static int vhost_user_net_save_state(QEMUFile *f, void *pv, size_t size, 3409 const VMStateField *field, 3410 JSONWriter *vmdesc) 3411 { 3412 VirtIONet *n = pv; 3413 VirtIODevice *vdev = VIRTIO_DEVICE(n); 3414 struct vhost_dev *vhdev; 3415 Error *local_error = NULL; 3416 int ret; 3417 3418 vhdev = virtio_net_get_vhost(vdev); 3419 if (vhdev == NULL) { 3420 error_reportf_err(local_error, 3421 "Error getting vhost back-end of %s device %s: ", 3422 vdev->name, vdev->parent_obj.canonical_path); 3423 return -1; 3424 } 3425 3426 ret = vhost_save_backend_state(vhdev, f, &local_error); 3427 if (ret < 0) { 3428 error_reportf_err(local_error, 3429 "Error saving back-end state of %s device %s: ", 3430 vdev->name, vdev->parent_obj.canonical_path); 3431 return ret; 3432 } 3433 3434 return 0; 3435 } 3436 3437 static int vhost_user_net_load_state(QEMUFile *f, void *pv, size_t size, 3438 const VMStateField *field) 3439 { 3440 VirtIONet *n = pv; 3441 VirtIODevice *vdev = VIRTIO_DEVICE(n); 3442 struct vhost_dev *vhdev; 3443 Error *local_error = NULL; 3444 int ret; 3445 3446 vhdev = virtio_net_get_vhost(vdev); 3447 if (vhdev == NULL) { 3448 error_reportf_err(local_error, 3449 "Error getting vhost back-end of %s device %s: ", 3450 vdev->name, vdev->parent_obj.canonical_path); 3451 return -1; 3452 } 3453 3454 ret = vhost_load_backend_state(vhdev, f, &local_error); 3455 if (ret < 0) { 3456 error_reportf_err(local_error, 3457 "Error loading back-end state of %s device %s: ", 3458 vdev->name, vdev->parent_obj.canonical_path); 3459 return ret; 3460 } 3461 3462 return 0; 3463 } 3464 3465 static bool vhost_user_net_is_internal_migration(void *opaque) 3466 { 3467 VirtIONet *n = opaque; 3468 VirtIODevice *vdev = VIRTIO_DEVICE(n); 3469 struct vhost_dev *vhdev; 3470 3471 vhdev = virtio_net_get_vhost(vdev); 3472 if (vhdev == NULL) { 3473 return false; 3474 } 3475 3476 return vhost_supports_device_state(vhdev); 3477 } 3478 3479 static const VMStateDescription vhost_user_net_backend_state = { 3480 .name = "virtio-net-device/backend", 3481 .version_id = 0, 3482 .needed = vhost_user_net_is_internal_migration, 3483 .fields = (const VMStateField[]) { 3484 { 3485 .name = "backend", 3486 .info = &(const VMStateInfo) { 3487 .name = "virtio-net vhost-user backend state", 3488 .get = vhost_user_net_load_state, 3489 .put = vhost_user_net_save_state, 3490 }, 3491 }, 3492 VMSTATE_END_OF_LIST() 3493 } 3494 }; 3495 3496 static const VMStateDescription vmstate_virtio_net_device = { 3497 .name = "virtio-net-device", 3498 .version_id = VIRTIO_NET_VM_VERSION, 3499 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3500 .post_load = virtio_net_post_load_device, 3501 .fields = (const VMStateField[]) { 3502 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN), 3503 VMSTATE_STRUCT_POINTER(vqs, VirtIONet, 3504 vmstate_virtio_net_queue_tx_waiting, 3505 VirtIONetQueue), 3506 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet), 3507 VMSTATE_UINT16(status, VirtIONet), 3508 VMSTATE_UINT8(promisc, VirtIONet), 3509 VMSTATE_UINT8(allmulti, VirtIONet), 3510 VMSTATE_UINT32(mac_table.in_use, VirtIONet), 3511 3512 /* Guarded pair: If it fits we load it, else we throw it away 3513 * - can happen if source has a larger MAC table.; post-load 3514 * sets flags in this case. 3515 */ 3516 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet, 3517 0, mac_table_fits, mac_table.in_use, 3518 ETH_ALEN), 3519 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0, 3520 mac_table.in_use, ETH_ALEN), 3521 3522 /* Note: This is an array of uint32's that's always been saved as a 3523 * buffer; hold onto your endiannesses; it's actually used as a bitmap 3524 * but based on the uint. 3525 */ 3526 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3), 3527 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3528 vmstate_virtio_net_has_vnet), 3529 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet), 3530 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet), 3531 VMSTATE_UINT8(alluni, VirtIONet), 3532 VMSTATE_UINT8(nomulti, VirtIONet), 3533 VMSTATE_UINT8(nouni, VirtIONet), 3534 VMSTATE_UINT8(nobcast, VirtIONet), 3535 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3536 vmstate_virtio_net_has_ufo), 3537 VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0, 3538 vmstate_info_uint16_equal, uint16_t), 3539 VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1), 3540 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3541 vmstate_virtio_net_tx_waiting), 3542 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet, 3543 has_ctrl_guest_offloads), 3544 VMSTATE_END_OF_LIST() 3545 }, 3546 .subsections = (const VMStateDescription * const []) { 3547 &vmstate_virtio_net_rss, 3548 &vhost_user_net_backend_state, 3549 NULL 3550 } 3551 }; 3552 3553 static NetClientInfo net_virtio_info = { 3554 .type = NET_CLIENT_DRIVER_NIC, 3555 .size = sizeof(NICState), 3556 .can_receive = virtio_net_can_receive, 3557 .receive = virtio_net_receive, 3558 .link_status_changed = virtio_net_set_link_status, 3559 .query_rx_filter = virtio_net_query_rxfilter, 3560 .announce = virtio_net_announce, 3561 }; 3562 3563 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) 3564 { 3565 VirtIONet *n = VIRTIO_NET(vdev); 3566 NetClientState *nc; 3567 assert(n->vhost_started); 3568 if (!n->multiqueue && idx == 2) { 3569 /* Must guard against invalid features and bogus queue index 3570 * from being set by malicious guest, or penetrated through 3571 * buggy migration stream. 3572 */ 3573 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3574 qemu_log_mask(LOG_GUEST_ERROR, 3575 "%s: bogus vq index ignored\n", __func__); 3576 return false; 3577 } 3578 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3579 } else { 3580 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3581 } 3582 /* 3583 * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 3584 * as the macro of configure interrupt's IDX, If this driver does not 3585 * support, the function will return false 3586 */ 3587 3588 if (idx == VIRTIO_CONFIG_IRQ_IDX) { 3589 return vhost_net_config_pending(get_vhost_net(nc->peer)); 3590 } 3591 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); 3592 } 3593 3594 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, 3595 bool mask) 3596 { 3597 VirtIONet *n = VIRTIO_NET(vdev); 3598 NetClientState *nc; 3599 assert(n->vhost_started); 3600 if (!n->multiqueue && idx == 2) { 3601 /* Must guard against invalid features and bogus queue index 3602 * from being set by malicious guest, or penetrated through 3603 * buggy migration stream. 3604 */ 3605 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3606 qemu_log_mask(LOG_GUEST_ERROR, 3607 "%s: bogus vq index ignored\n", __func__); 3608 return; 3609 } 3610 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3611 } else { 3612 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3613 } 3614 /* 3615 *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 3616 * as the macro of configure interrupt's IDX, If this driver does not 3617 * support, the function will return 3618 */ 3619 3620 if (idx == VIRTIO_CONFIG_IRQ_IDX) { 3621 vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask); 3622 return; 3623 } 3624 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask); 3625 } 3626 3627 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) 3628 { 3629 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC); 3630 3631 n->config_size = virtio_get_config_size(&cfg_size_params, host_features); 3632 } 3633 3634 void virtio_net_set_netclient_name(VirtIONet *n, const char *name, 3635 const char *type) 3636 { 3637 /* 3638 * The name can be NULL, the netclient name will be type.x. 3639 */ 3640 assert(type != NULL); 3641 3642 g_free(n->netclient_name); 3643 g_free(n->netclient_type); 3644 n->netclient_name = g_strdup(name); 3645 n->netclient_type = g_strdup(type); 3646 } 3647 3648 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev) 3649 { 3650 HotplugHandler *hotplug_ctrl; 3651 PCIDevice *pci_dev; 3652 Error *err = NULL; 3653 3654 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3655 if (hotplug_ctrl) { 3656 pci_dev = PCI_DEVICE(dev); 3657 pci_dev->partially_hotplugged = true; 3658 hotplug_handler_unplug_request(hotplug_ctrl, dev, &err); 3659 if (err) { 3660 error_report_err(err); 3661 return false; 3662 } 3663 } else { 3664 return false; 3665 } 3666 return true; 3667 } 3668 3669 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev, 3670 Error **errp) 3671 { 3672 Error *err = NULL; 3673 HotplugHandler *hotplug_ctrl; 3674 PCIDevice *pdev = PCI_DEVICE(dev); 3675 BusState *primary_bus; 3676 3677 if (!pdev->partially_hotplugged) { 3678 return true; 3679 } 3680 primary_bus = dev->parent_bus; 3681 if (!primary_bus) { 3682 error_setg(errp, "virtio_net: couldn't find primary bus"); 3683 return false; 3684 } 3685 qdev_set_parent_bus(dev, primary_bus, &error_abort); 3686 qatomic_set(&n->failover_primary_hidden, false); 3687 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3688 if (hotplug_ctrl) { 3689 hotplug_handler_pre_plug(hotplug_ctrl, dev, &err); 3690 if (err) { 3691 goto out; 3692 } 3693 hotplug_handler_plug(hotplug_ctrl, dev, &err); 3694 } 3695 pdev->partially_hotplugged = false; 3696 3697 out: 3698 error_propagate(errp, err); 3699 return !err; 3700 } 3701 3702 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationEvent *e) 3703 { 3704 bool should_be_hidden; 3705 Error *err = NULL; 3706 DeviceState *dev = failover_find_primary_device(n); 3707 3708 if (!dev) { 3709 return; 3710 } 3711 3712 should_be_hidden = qatomic_read(&n->failover_primary_hidden); 3713 3714 if (e->type == MIG_EVENT_PRECOPY_SETUP && !should_be_hidden) { 3715 if (failover_unplug_primary(n, dev)) { 3716 vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev); 3717 qapi_event_send_unplug_primary(dev->id); 3718 qatomic_set(&n->failover_primary_hidden, true); 3719 } else { 3720 warn_report("couldn't unplug primary device"); 3721 } 3722 } else if (e->type == MIG_EVENT_PRECOPY_FAILED) { 3723 /* We already unplugged the device let's plug it back */ 3724 if (!failover_replug_primary(n, dev, &err)) { 3725 if (err) { 3726 error_report_err(err); 3727 } 3728 } 3729 } 3730 } 3731 3732 static int virtio_net_migration_state_notifier(NotifierWithReturn *notifier, 3733 MigrationEvent *e, Error **errp) 3734 { 3735 VirtIONet *n = container_of(notifier, VirtIONet, migration_state); 3736 virtio_net_handle_migration_primary(n, e); 3737 return 0; 3738 } 3739 3740 static bool failover_hide_primary_device(DeviceListener *listener, 3741 const QDict *device_opts, 3742 bool from_json, 3743 Error **errp) 3744 { 3745 VirtIONet *n = container_of(listener, VirtIONet, primary_listener); 3746 const char *standby_id; 3747 3748 if (!device_opts) { 3749 return false; 3750 } 3751 3752 if (!qdict_haskey(device_opts, "failover_pair_id")) { 3753 return false; 3754 } 3755 3756 if (!qdict_haskey(device_opts, "id")) { 3757 error_setg(errp, "Device with failover_pair_id needs to have id"); 3758 return false; 3759 } 3760 3761 standby_id = qdict_get_str(device_opts, "failover_pair_id"); 3762 if (g_strcmp0(standby_id, n->netclient_name) != 0) { 3763 return false; 3764 } 3765 3766 /* 3767 * The hide helper can be called several times for a given device. 3768 * Check there is only one primary for a virtio-net device but 3769 * don't duplicate the qdict several times if it's called for the same 3770 * device. 3771 */ 3772 if (n->primary_opts) { 3773 const char *old, *new; 3774 /* devices with failover_pair_id always have an id */ 3775 old = qdict_get_str(n->primary_opts, "id"); 3776 new = qdict_get_str(device_opts, "id"); 3777 if (strcmp(old, new) != 0) { 3778 error_setg(errp, "Cannot attach more than one primary device to " 3779 "'%s': '%s' and '%s'", n->netclient_name, old, new); 3780 return false; 3781 } 3782 } else { 3783 n->primary_opts = qdict_clone_shallow(device_opts); 3784 n->primary_opts_from_json = from_json; 3785 } 3786 3787 /* failover_primary_hidden is set during feature negotiation */ 3788 return qatomic_read(&n->failover_primary_hidden); 3789 } 3790 3791 static void virtio_net_device_realize(DeviceState *dev, Error **errp) 3792 { 3793 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3794 VirtIONet *n = VIRTIO_NET(dev); 3795 NetClientState *nc; 3796 int i; 3797 3798 if (n->net_conf.mtu) { 3799 n->host_features |= (1ULL << VIRTIO_NET_F_MTU); 3800 } 3801 3802 if (n->net_conf.duplex_str) { 3803 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) { 3804 n->net_conf.duplex = DUPLEX_HALF; 3805 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) { 3806 n->net_conf.duplex = DUPLEX_FULL; 3807 } else { 3808 error_setg(errp, "'duplex' must be 'half' or 'full'"); 3809 return; 3810 } 3811 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3812 } else { 3813 n->net_conf.duplex = DUPLEX_UNKNOWN; 3814 } 3815 3816 if (n->net_conf.speed < SPEED_UNKNOWN) { 3817 error_setg(errp, "'speed' must be between 0 and INT_MAX"); 3818 return; 3819 } 3820 if (n->net_conf.speed >= 0) { 3821 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3822 } 3823 3824 if (n->failover) { 3825 n->primary_listener.hide_device = failover_hide_primary_device; 3826 qatomic_set(&n->failover_primary_hidden, true); 3827 device_listener_register(&n->primary_listener); 3828 migration_add_notifier(&n->migration_state, 3829 virtio_net_migration_state_notifier); 3830 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY); 3831 } 3832 3833 virtio_net_set_config_size(n, n->host_features); 3834 virtio_init(vdev, VIRTIO_ID_NET, n->config_size); 3835 3836 /* 3837 * We set a lower limit on RX queue size to what it always was. 3838 * Guests that want a smaller ring can always resize it without 3839 * help from us (using virtio 1 and up). 3840 */ 3841 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || 3842 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || 3843 !is_power_of_2(n->net_conf.rx_queue_size)) { 3844 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " 3845 "must be a power of 2 between %d and %d.", 3846 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, 3847 VIRTQUEUE_MAX_SIZE); 3848 virtio_cleanup(vdev); 3849 return; 3850 } 3851 3852 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || 3853 n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) || 3854 !is_power_of_2(n->net_conf.tx_queue_size)) { 3855 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " 3856 "must be a power of 2 between %d and %d", 3857 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, 3858 virtio_net_max_tx_queue_size(n)); 3859 virtio_cleanup(vdev); 3860 return; 3861 } 3862 3863 n->max_ncs = MAX(n->nic_conf.peers.queues, 1); 3864 3865 /* 3866 * Figure out the datapath queue pairs since the backend could 3867 * provide control queue via peers as well. 3868 */ 3869 if (n->nic_conf.peers.queues) { 3870 for (i = 0; i < n->max_ncs; i++) { 3871 if (n->nic_conf.peers.ncs[i]->is_datapath) { 3872 ++n->max_queue_pairs; 3873 } 3874 } 3875 } 3876 n->max_queue_pairs = MAX(n->max_queue_pairs, 1); 3877 3878 if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) { 3879 error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), " 3880 "must be a positive integer less than %d.", 3881 n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2); 3882 virtio_cleanup(vdev); 3883 return; 3884 } 3885 n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs); 3886 n->curr_queue_pairs = 1; 3887 n->tx_timeout = n->net_conf.txtimer; 3888 3889 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer") 3890 && strcmp(n->net_conf.tx, "bh")) { 3891 warn_report("virtio-net: " 3892 "Unknown option tx=%s, valid options: \"timer\" \"bh\"", 3893 n->net_conf.tx); 3894 error_printf("Defaulting to \"bh\""); 3895 } 3896 3897 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), 3898 n->net_conf.tx_queue_size); 3899 3900 virtio_net_add_queue(n, 0); 3901 3902 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 3903 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr); 3904 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac)); 3905 n->status = VIRTIO_NET_S_LINK_UP; 3906 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 3907 QEMU_CLOCK_VIRTUAL, 3908 virtio_net_announce_timer, n); 3909 n->announce_timer.round = 0; 3910 3911 if (n->netclient_type) { 3912 /* 3913 * Happen when virtio_net_set_netclient_name has been called. 3914 */ 3915 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3916 n->netclient_type, n->netclient_name, 3917 &dev->mem_reentrancy_guard, n); 3918 } else { 3919 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3920 object_get_typename(OBJECT(dev)), dev->id, 3921 &dev->mem_reentrancy_guard, n); 3922 } 3923 3924 for (i = 0; i < n->max_queue_pairs; i++) { 3925 n->nic->ncs[i].do_not_pad = true; 3926 } 3927 3928 peer_test_vnet_hdr(n); 3929 if (peer_has_vnet_hdr(n)) { 3930 n->host_hdr_len = sizeof(struct virtio_net_hdr); 3931 } else { 3932 n->host_hdr_len = 0; 3933 } 3934 3935 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a); 3936 3937 n->vqs[0].tx_waiting = 0; 3938 n->tx_burst = n->net_conf.txburst; 3939 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0); 3940 n->promisc = 1; /* for compatibility */ 3941 3942 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 3943 3944 n->vlans = g_malloc0(MAX_VLAN >> 3); 3945 3946 nc = qemu_get_queue(n->nic); 3947 nc->rxfilter_notify_enabled = 1; 3948 3949 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 3950 struct virtio_net_config netcfg = {}; 3951 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN); 3952 vhost_net_set_config(get_vhost_net(nc->peer), 3953 (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND); 3954 } 3955 QTAILQ_INIT(&n->rsc_chains); 3956 n->qdev = dev; 3957 3958 net_rx_pkt_init(&n->rx_pkt); 3959 3960 if (qemu_get_vnet_hash_supported_types(qemu_get_queue(n->nic)->peer, 3961 &n->rss_data.peer_hash_types)) { 3962 n->rss_data.peer_hash_available = true; 3963 n->rss_data.supported_hash_types = 3964 n->rss_data.specified_hash_types.on_bits | 3965 (n->rss_data.specified_hash_types.auto_bits & 3966 n->rss_data.peer_hash_types); 3967 } else { 3968 n->rss_data.supported_hash_types = 3969 n->rss_data.specified_hash_types.on_bits | 3970 n->rss_data.specified_hash_types.auto_bits; 3971 } 3972 } 3973 3974 static void virtio_net_device_unrealize(DeviceState *dev) 3975 { 3976 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3977 VirtIONet *n = VIRTIO_NET(dev); 3978 int i, max_queue_pairs; 3979 3980 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3981 virtio_net_unload_ebpf(n); 3982 } 3983 3984 /* This will stop vhost backend if appropriate. */ 3985 virtio_net_set_status(vdev, 0); 3986 3987 g_free(n->netclient_name); 3988 n->netclient_name = NULL; 3989 g_free(n->netclient_type); 3990 n->netclient_type = NULL; 3991 3992 g_free(n->mac_table.macs); 3993 g_free(n->vlans); 3994 3995 if (n->failover) { 3996 qobject_unref(n->primary_opts); 3997 device_listener_unregister(&n->primary_listener); 3998 migration_remove_notifier(&n->migration_state); 3999 } else { 4000 assert(n->primary_opts == NULL); 4001 } 4002 4003 max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 4004 for (i = 0; i < max_queue_pairs; i++) { 4005 virtio_net_del_queue(n, i); 4006 } 4007 /* delete also control vq */ 4008 virtio_del_queue(vdev, max_queue_pairs * 2); 4009 qemu_announce_timer_del(&n->announce_timer, false); 4010 g_free(n->vqs); 4011 qemu_del_nic(n->nic); 4012 virtio_net_rsc_cleanup(n); 4013 g_free(n->rss_data.indirections_table); 4014 net_rx_pkt_uninit(n->rx_pkt); 4015 virtio_cleanup(vdev); 4016 } 4017 4018 static void virtio_net_reset(VirtIODevice *vdev) 4019 { 4020 VirtIONet *n = VIRTIO_NET(vdev); 4021 int i; 4022 4023 /* Reset back to compatibility mode */ 4024 n->promisc = 1; 4025 n->allmulti = 0; 4026 n->alluni = 0; 4027 n->nomulti = 0; 4028 n->nouni = 0; 4029 n->nobcast = 0; 4030 /* multiqueue is disabled by default */ 4031 n->curr_queue_pairs = 1; 4032 timer_del(n->announce_timer.tm); 4033 n->announce_timer.round = 0; 4034 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 4035 4036 /* Flush any MAC and VLAN filter table state */ 4037 n->mac_table.in_use = 0; 4038 n->mac_table.first_multi = 0; 4039 n->mac_table.multi_overflow = 0; 4040 n->mac_table.uni_overflow = 0; 4041 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN); 4042 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac)); 4043 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 4044 memset(n->vlans, 0, MAX_VLAN >> 3); 4045 4046 /* Flush any async TX */ 4047 for (i = 0; i < n->max_queue_pairs; i++) { 4048 flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i)); 4049 } 4050 4051 virtio_net_disable_rss(n); 4052 } 4053 4054 static void virtio_net_instance_init(Object *obj) 4055 { 4056 VirtIONet *n = VIRTIO_NET(obj); 4057 4058 /* 4059 * The default config_size is sizeof(struct virtio_net_config). 4060 * Can be overridden with virtio_net_set_config_size. 4061 */ 4062 n->config_size = sizeof(struct virtio_net_config); 4063 device_add_bootindex_property(obj, &n->nic_conf.bootindex, 4064 "bootindex", "/ethernet-phy@0", 4065 DEVICE(n)); 4066 4067 ebpf_rss_init(&n->ebpf_rss); 4068 } 4069 4070 static int virtio_net_pre_save(void *opaque) 4071 { 4072 VirtIONet *n = opaque; 4073 4074 /* At this point, backend must be stopped, otherwise 4075 * it might keep writing to memory. */ 4076 assert(!n->vhost_started); 4077 4078 return 0; 4079 } 4080 4081 static bool primary_unplug_pending(void *opaque) 4082 { 4083 DeviceState *dev = opaque; 4084 DeviceState *primary; 4085 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 4086 VirtIONet *n = VIRTIO_NET(vdev); 4087 4088 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 4089 return false; 4090 } 4091 primary = failover_find_primary_device(n); 4092 return primary ? primary->pending_deleted_event : false; 4093 } 4094 4095 static bool dev_unplug_pending(void *opaque) 4096 { 4097 DeviceState *dev = opaque; 4098 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 4099 4100 return vdc->primary_unplug_pending(dev); 4101 } 4102 4103 static const VMStateDescription vmstate_virtio_net = { 4104 .name = "virtio-net", 4105 .minimum_version_id = VIRTIO_NET_VM_VERSION, 4106 .version_id = VIRTIO_NET_VM_VERSION, 4107 .fields = (const VMStateField[]) { 4108 VMSTATE_VIRTIO_DEVICE, 4109 VMSTATE_END_OF_LIST() 4110 }, 4111 .pre_save = virtio_net_pre_save, 4112 .dev_unplug_pending = dev_unplug_pending, 4113 }; 4114 4115 static const Property virtio_net_properties[] = { 4116 DEFINE_PROP_BIT64("csum", VirtIONet, host_features, 4117 VIRTIO_NET_F_CSUM, true), 4118 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features, 4119 VIRTIO_NET_F_GUEST_CSUM, true), 4120 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), 4121 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features, 4122 VIRTIO_NET_F_GUEST_TSO4, true), 4123 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features, 4124 VIRTIO_NET_F_GUEST_TSO6, true), 4125 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features, 4126 VIRTIO_NET_F_GUEST_ECN, true), 4127 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features, 4128 VIRTIO_NET_F_GUEST_UFO, true), 4129 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features, 4130 VIRTIO_NET_F_GUEST_ANNOUNCE, true), 4131 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features, 4132 VIRTIO_NET_F_HOST_TSO4, true), 4133 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features, 4134 VIRTIO_NET_F_HOST_TSO6, true), 4135 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features, 4136 VIRTIO_NET_F_HOST_ECN, true), 4137 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features, 4138 VIRTIO_NET_F_HOST_UFO, true), 4139 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features, 4140 VIRTIO_NET_F_MRG_RXBUF, true), 4141 DEFINE_PROP_BIT64("status", VirtIONet, host_features, 4142 VIRTIO_NET_F_STATUS, true), 4143 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features, 4144 VIRTIO_NET_F_CTRL_VQ, true), 4145 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features, 4146 VIRTIO_NET_F_CTRL_RX, true), 4147 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features, 4148 VIRTIO_NET_F_CTRL_VLAN, true), 4149 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features, 4150 VIRTIO_NET_F_CTRL_RX_EXTRA, true), 4151 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features, 4152 VIRTIO_NET_F_CTRL_MAC_ADDR, true), 4153 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features, 4154 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), 4155 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), 4156 DEFINE_PROP_BIT64("rss", VirtIONet, host_features, 4157 VIRTIO_NET_F_RSS, false), 4158 DEFINE_PROP_BIT64("hash", VirtIONet, host_features, 4159 VIRTIO_NET_F_HASH_REPORT, false), 4160 DEFINE_PROP_ARRAY("ebpf-rss-fds", VirtIONet, nr_ebpf_rss_fds, 4161 ebpf_rss_fds, qdev_prop_string, char*), 4162 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features, 4163 VIRTIO_NET_F_RSC_EXT, false), 4164 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout, 4165 VIRTIO_NET_RSC_DEFAULT_INTERVAL), 4166 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf), 4167 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer, 4168 TX_TIMER_INTERVAL), 4169 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), 4170 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), 4171 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, 4172 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), 4173 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, 4174 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), 4175 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), 4176 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, 4177 true), 4178 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN), 4179 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str), 4180 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false), 4181 DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features, 4182 VIRTIO_NET_F_GUEST_USO4, true), 4183 DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features, 4184 VIRTIO_NET_F_GUEST_USO6, true), 4185 DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features, 4186 VIRTIO_NET_F_HOST_USO, true), 4187 DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-ipv4", VirtIONet, 4188 rss_data.specified_hash_types, 4189 VIRTIO_NET_HASH_REPORT_IPv4 - 1, 4190 ON_OFF_AUTO_AUTO), 4191 DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-tcp4", VirtIONet, 4192 rss_data.specified_hash_types, 4193 VIRTIO_NET_HASH_REPORT_TCPv4 - 1, 4194 ON_OFF_AUTO_AUTO), 4195 DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-udp4", VirtIONet, 4196 rss_data.specified_hash_types, 4197 VIRTIO_NET_HASH_REPORT_UDPv4 - 1, 4198 ON_OFF_AUTO_AUTO), 4199 DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-ipv6", VirtIONet, 4200 rss_data.specified_hash_types, 4201 VIRTIO_NET_HASH_REPORT_IPv6 - 1, 4202 ON_OFF_AUTO_AUTO), 4203 DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-tcp6", VirtIONet, 4204 rss_data.specified_hash_types, 4205 VIRTIO_NET_HASH_REPORT_TCPv6 - 1, 4206 ON_OFF_AUTO_AUTO), 4207 DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-udp6", VirtIONet, 4208 rss_data.specified_hash_types, 4209 VIRTIO_NET_HASH_REPORT_UDPv6 - 1, 4210 ON_OFF_AUTO_AUTO), 4211 DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-ipv6ex", VirtIONet, 4212 rss_data.specified_hash_types, 4213 VIRTIO_NET_HASH_REPORT_IPv6_EX - 1, 4214 ON_OFF_AUTO_AUTO), 4215 DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-tcp6ex", VirtIONet, 4216 rss_data.specified_hash_types, 4217 VIRTIO_NET_HASH_REPORT_TCPv6_EX - 1, 4218 ON_OFF_AUTO_AUTO), 4219 DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-udp6ex", VirtIONet, 4220 rss_data.specified_hash_types, 4221 VIRTIO_NET_HASH_REPORT_UDPv6_EX - 1, 4222 ON_OFF_AUTO_AUTO), 4223 }; 4224 4225 static void virtio_net_class_init(ObjectClass *klass, const void *data) 4226 { 4227 DeviceClass *dc = DEVICE_CLASS(klass); 4228 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 4229 4230 device_class_set_props(dc, virtio_net_properties); 4231 dc->vmsd = &vmstate_virtio_net; 4232 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 4233 vdc->realize = virtio_net_device_realize; 4234 vdc->unrealize = virtio_net_device_unrealize; 4235 vdc->get_config = virtio_net_get_config; 4236 vdc->set_config = virtio_net_set_config; 4237 vdc->get_features = virtio_net_get_features; 4238 vdc->set_features = virtio_net_set_features; 4239 vdc->bad_features = virtio_net_bad_features; 4240 vdc->reset = virtio_net_reset; 4241 vdc->queue_reset = virtio_net_queue_reset; 4242 vdc->queue_enable = virtio_net_queue_enable; 4243 vdc->set_status = virtio_net_set_status; 4244 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; 4245 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; 4246 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO); 4247 vdc->pre_load_queues = virtio_net_pre_load_queues; 4248 vdc->post_load = virtio_net_post_load_virtio; 4249 vdc->vmsd = &vmstate_virtio_net_device; 4250 vdc->primary_unplug_pending = primary_unplug_pending; 4251 vdc->get_vhost = virtio_net_get_vhost; 4252 vdc->toggle_device_iotlb = vhost_toggle_device_iotlb; 4253 } 4254 4255 static const TypeInfo virtio_net_info = { 4256 .name = TYPE_VIRTIO_NET, 4257 .parent = TYPE_VIRTIO_DEVICE, 4258 .instance_size = sizeof(VirtIONet), 4259 .instance_init = virtio_net_instance_init, 4260 .class_init = virtio_net_class_init, 4261 }; 4262 4263 static void virtio_register_types(void) 4264 { 4265 type_register_static(&virtio_net_info); 4266 } 4267 4268 type_init(virtio_register_types) 4269