1 /* 2 * Virtio Network Device 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qemu/atomic.h" 16 #include "qemu/iov.h" 17 #include "qemu/log.h" 18 #include "qemu/main-loop.h" 19 #include "qemu/module.h" 20 #include "hw/virtio/virtio.h" 21 #include "net/net.h" 22 #include "net/checksum.h" 23 #include "net/tap.h" 24 #include "qemu/error-report.h" 25 #include "qemu/timer.h" 26 #include "qemu/option.h" 27 #include "qemu/option_int.h" 28 #include "qemu/config-file.h" 29 #include "qapi/qmp/qdict.h" 30 #include "hw/virtio/virtio-net.h" 31 #include "net/vhost_net.h" 32 #include "net/announce.h" 33 #include "hw/virtio/virtio-bus.h" 34 #include "qapi/error.h" 35 #include "qapi/qapi-events-net.h" 36 #include "hw/qdev-properties.h" 37 #include "qapi/qapi-types-migration.h" 38 #include "qapi/qapi-events-migration.h" 39 #include "hw/virtio/virtio-access.h" 40 #include "migration/misc.h" 41 #include "standard-headers/linux/ethtool.h" 42 #include "sysemu/sysemu.h" 43 #include "sysemu/replay.h" 44 #include "trace.h" 45 #include "monitor/qdev.h" 46 #include "monitor/monitor.h" 47 #include "hw/pci/pci_device.h" 48 #include "net_rx_pkt.h" 49 #include "hw/virtio/vhost.h" 50 #include "sysemu/qtest.h" 51 52 #define VIRTIO_NET_VM_VERSION 11 53 54 /* previously fixed value */ 55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 57 58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */ 59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 61 62 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ 63 64 #define VIRTIO_NET_TCP_FLAG 0x3F 65 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000 66 67 /* IPv4 max payload, 16 bits in the header */ 68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header)) 69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535 70 71 /* header length value in ip header without option */ 72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5 73 74 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ 75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD 76 77 /* Purge coalesced packets timer interval, This value affects the performance 78 a lot, and should be tuned carefully, '300000'(300us) is the recommended 79 value to pass the WHQL test, '50000' can gain 2x netperf throughput with 80 tso/gso/gro 'off'. */ 81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000 82 83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \ 84 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \ 85 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \ 86 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \ 87 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \ 88 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \ 89 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \ 90 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \ 91 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) 92 93 static const VirtIOFeature feature_sizes[] = { 94 {.flags = 1ULL << VIRTIO_NET_F_MAC, 95 .end = endof(struct virtio_net_config, mac)}, 96 {.flags = 1ULL << VIRTIO_NET_F_STATUS, 97 .end = endof(struct virtio_net_config, status)}, 98 {.flags = 1ULL << VIRTIO_NET_F_MQ, 99 .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, 100 {.flags = 1ULL << VIRTIO_NET_F_MTU, 101 .end = endof(struct virtio_net_config, mtu)}, 102 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, 103 .end = endof(struct virtio_net_config, duplex)}, 104 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT), 105 .end = endof(struct virtio_net_config, supported_hash_types)}, 106 {} 107 }; 108 109 static const VirtIOConfigSizeParams cfg_size_params = { 110 .min_size = endof(struct virtio_net_config, mac), 111 .max_size = sizeof(struct virtio_net_config), 112 .feature_sizes = feature_sizes 113 }; 114 115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc) 116 { 117 VirtIONet *n = qemu_get_nic_opaque(nc); 118 119 return &n->vqs[nc->queue_index]; 120 } 121 122 static int vq2q(int queue_index) 123 { 124 return queue_index / 2; 125 } 126 127 static void flush_or_purge_queued_packets(NetClientState *nc) 128 { 129 if (!nc->peer) { 130 return; 131 } 132 133 qemu_flush_or_purge_queued_packets(nc->peer, true); 134 assert(!virtio_net_get_subqueue(nc)->async_tx.elem); 135 } 136 137 /* TODO 138 * - we could suppress RX interrupt if we were so inclined. 139 */ 140 141 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) 142 { 143 VirtIONet *n = VIRTIO_NET(vdev); 144 struct virtio_net_config netcfg; 145 NetClientState *nc = qemu_get_queue(n->nic); 146 static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } }; 147 148 int ret = 0; 149 memset(&netcfg, 0 , sizeof(struct virtio_net_config)); 150 virtio_stw_p(vdev, &netcfg.status, n->status); 151 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs); 152 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); 153 memcpy(netcfg.mac, n->mac, ETH_ALEN); 154 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); 155 netcfg.duplex = n->net_conf.duplex; 156 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE; 157 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length, 158 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ? 159 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1); 160 virtio_stl_p(vdev, &netcfg.supported_hash_types, 161 VIRTIO_NET_RSS_SUPPORTED_HASHES); 162 memcpy(config, &netcfg, n->config_size); 163 164 /* 165 * Is this VDPA? No peer means not VDPA: there's no way to 166 * disconnect/reconnect a VDPA peer. 167 */ 168 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 169 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg, 170 n->config_size); 171 if (ret == -1) { 172 return; 173 } 174 175 /* 176 * Some NIC/kernel combinations present 0 as the mac address. As that 177 * is not a legal address, try to proceed with the address from the 178 * QEMU command line in the hope that the address has been configured 179 * correctly elsewhere - just not reported by the device. 180 */ 181 if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) { 182 info_report("Zero hardware mac address detected. Ignoring."); 183 memcpy(netcfg.mac, n->mac, ETH_ALEN); 184 } 185 186 netcfg.status |= virtio_tswap16(vdev, 187 n->status & VIRTIO_NET_S_ANNOUNCE); 188 memcpy(config, &netcfg, n->config_size); 189 } 190 } 191 192 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) 193 { 194 VirtIONet *n = VIRTIO_NET(vdev); 195 struct virtio_net_config netcfg = {}; 196 NetClientState *nc = qemu_get_queue(n->nic); 197 198 memcpy(&netcfg, config, n->config_size); 199 200 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) && 201 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) && 202 memcmp(netcfg.mac, n->mac, ETH_ALEN)) { 203 memcpy(n->mac, netcfg.mac, ETH_ALEN); 204 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 205 } 206 207 /* 208 * Is this VDPA? No peer means not VDPA: there's no way to 209 * disconnect/reconnect a VDPA peer. 210 */ 211 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 212 vhost_net_set_config(get_vhost_net(nc->peer), 213 (uint8_t *)&netcfg, 0, n->config_size, 214 VHOST_SET_CONFIG_TYPE_FRONTEND); 215 } 216 } 217 218 static bool virtio_net_started(VirtIONet *n, uint8_t status) 219 { 220 VirtIODevice *vdev = VIRTIO_DEVICE(n); 221 return (status & VIRTIO_CONFIG_S_DRIVER_OK) && 222 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running; 223 } 224 225 static void virtio_net_announce_notify(VirtIONet *net) 226 { 227 VirtIODevice *vdev = VIRTIO_DEVICE(net); 228 trace_virtio_net_announce_notify(); 229 230 net->status |= VIRTIO_NET_S_ANNOUNCE; 231 virtio_notify_config(vdev); 232 } 233 234 static void virtio_net_announce_timer(void *opaque) 235 { 236 VirtIONet *n = opaque; 237 trace_virtio_net_announce_timer(n->announce_timer.round); 238 239 n->announce_timer.round--; 240 virtio_net_announce_notify(n); 241 } 242 243 static void virtio_net_announce(NetClientState *nc) 244 { 245 VirtIONet *n = qemu_get_nic_opaque(nc); 246 VirtIODevice *vdev = VIRTIO_DEVICE(n); 247 248 /* 249 * Make sure the virtio migration announcement timer isn't running 250 * If it is, let it trigger announcement so that we do not cause 251 * confusion. 252 */ 253 if (n->announce_timer.round) { 254 return; 255 } 256 257 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 258 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 259 virtio_net_announce_notify(n); 260 } 261 } 262 263 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) 264 { 265 VirtIODevice *vdev = VIRTIO_DEVICE(n); 266 NetClientState *nc = qemu_get_queue(n->nic); 267 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 268 int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? 269 n->max_ncs - n->max_queue_pairs : 0; 270 271 if (!get_vhost_net(nc->peer)) { 272 return; 273 } 274 275 if ((virtio_net_started(n, status) && !nc->peer->link_down) == 276 !!n->vhost_started) { 277 return; 278 } 279 if (!n->vhost_started) { 280 int r, i; 281 282 if (n->needs_vnet_hdr_swap) { 283 error_report("backend does not support %s vnet headers; " 284 "falling back on userspace virtio", 285 virtio_is_big_endian(vdev) ? "BE" : "LE"); 286 return; 287 } 288 289 /* Any packets outstanding? Purge them to avoid touching rings 290 * when vhost is running. 291 */ 292 for (i = 0; i < queue_pairs; i++) { 293 NetClientState *qnc = qemu_get_subqueue(n->nic, i); 294 295 /* Purge both directions: TX and RX. */ 296 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc); 297 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer); 298 } 299 300 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) { 301 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu); 302 if (r < 0) { 303 error_report("%uBytes MTU not supported by the backend", 304 n->net_conf.mtu); 305 306 return; 307 } 308 } 309 310 n->vhost_started = 1; 311 r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq); 312 if (r < 0) { 313 error_report("unable to start vhost net: %d: " 314 "falling back on userspace virtio", -r); 315 n->vhost_started = 0; 316 } 317 } else { 318 vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq); 319 n->vhost_started = 0; 320 } 321 } 322 323 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev, 324 NetClientState *peer, 325 bool enable) 326 { 327 if (virtio_is_big_endian(vdev)) { 328 return qemu_set_vnet_be(peer, enable); 329 } else { 330 return qemu_set_vnet_le(peer, enable); 331 } 332 } 333 334 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs, 335 int queue_pairs, bool enable) 336 { 337 int i; 338 339 for (i = 0; i < queue_pairs; i++) { 340 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 && 341 enable) { 342 while (--i >= 0) { 343 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false); 344 } 345 346 return true; 347 } 348 } 349 350 return false; 351 } 352 353 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status) 354 { 355 VirtIODevice *vdev = VIRTIO_DEVICE(n); 356 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 357 358 if (virtio_net_started(n, status)) { 359 /* Before using the device, we tell the network backend about the 360 * endianness to use when parsing vnet headers. If the backend 361 * can't do it, we fallback onto fixing the headers in the core 362 * virtio-net code. 363 */ 364 n->needs_vnet_hdr_swap = n->has_vnet_hdr && 365 virtio_net_set_vnet_endian(vdev, n->nic->ncs, 366 queue_pairs, true); 367 } else if (virtio_net_started(n, vdev->status)) { 368 /* After using the device, we need to reset the network backend to 369 * the default (guest native endianness), otherwise the guest may 370 * lose network connectivity if it is rebooted into a different 371 * endianness. 372 */ 373 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false); 374 } 375 } 376 377 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq) 378 { 379 unsigned int dropped = virtqueue_drop_all(vq); 380 if (dropped) { 381 virtio_notify(vdev, vq); 382 } 383 } 384 385 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) 386 { 387 VirtIONet *n = VIRTIO_NET(vdev); 388 VirtIONetQueue *q; 389 int i; 390 uint8_t queue_status; 391 392 virtio_net_vnet_endian_status(n, status); 393 virtio_net_vhost_status(n, status); 394 395 for (i = 0; i < n->max_queue_pairs; i++) { 396 NetClientState *ncs = qemu_get_subqueue(n->nic, i); 397 bool queue_started; 398 q = &n->vqs[i]; 399 400 if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) { 401 queue_status = 0; 402 } else { 403 queue_status = status; 404 } 405 queue_started = 406 virtio_net_started(n, queue_status) && !n->vhost_started; 407 408 if (queue_started) { 409 qemu_flush_queued_packets(ncs); 410 } 411 412 if (!q->tx_waiting) { 413 continue; 414 } 415 416 if (queue_started) { 417 if (q->tx_timer) { 418 timer_mod(q->tx_timer, 419 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 420 } else { 421 replay_bh_schedule_event(q->tx_bh); 422 } 423 } else { 424 if (q->tx_timer) { 425 timer_del(q->tx_timer); 426 } else { 427 qemu_bh_cancel(q->tx_bh); 428 } 429 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 && 430 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) && 431 vdev->vm_running) { 432 /* if tx is waiting we are likely have some packets in tx queue 433 * and disabled notification */ 434 q->tx_waiting = 0; 435 virtio_queue_set_notification(q->tx_vq, 1); 436 virtio_net_drop_tx_queue_data(vdev, q->tx_vq); 437 } 438 } 439 } 440 } 441 442 static void virtio_net_set_link_status(NetClientState *nc) 443 { 444 VirtIONet *n = qemu_get_nic_opaque(nc); 445 VirtIODevice *vdev = VIRTIO_DEVICE(n); 446 uint16_t old_status = n->status; 447 448 if (nc->link_down) 449 n->status &= ~VIRTIO_NET_S_LINK_UP; 450 else 451 n->status |= VIRTIO_NET_S_LINK_UP; 452 453 if (n->status != old_status) 454 virtio_notify_config(vdev); 455 456 virtio_net_set_status(vdev, vdev->status); 457 } 458 459 static void rxfilter_notify(NetClientState *nc) 460 { 461 VirtIONet *n = qemu_get_nic_opaque(nc); 462 463 if (nc->rxfilter_notify_enabled) { 464 char *path = object_get_canonical_path(OBJECT(n->qdev)); 465 qapi_event_send_nic_rx_filter_changed(n->netclient_name, path); 466 g_free(path); 467 468 /* disable event notification to avoid events flooding */ 469 nc->rxfilter_notify_enabled = 0; 470 } 471 } 472 473 static intList *get_vlan_table(VirtIONet *n) 474 { 475 intList *list; 476 int i, j; 477 478 list = NULL; 479 for (i = 0; i < MAX_VLAN >> 5; i++) { 480 for (j = 0; n->vlans[i] && j <= 0x1f; j++) { 481 if (n->vlans[i] & (1U << j)) { 482 QAPI_LIST_PREPEND(list, (i << 5) + j); 483 } 484 } 485 } 486 487 return list; 488 } 489 490 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc) 491 { 492 VirtIONet *n = qemu_get_nic_opaque(nc); 493 VirtIODevice *vdev = VIRTIO_DEVICE(n); 494 RxFilterInfo *info; 495 strList *str_list; 496 int i; 497 498 info = g_malloc0(sizeof(*info)); 499 info->name = g_strdup(nc->name); 500 info->promiscuous = n->promisc; 501 502 if (n->nouni) { 503 info->unicast = RX_STATE_NONE; 504 } else if (n->alluni) { 505 info->unicast = RX_STATE_ALL; 506 } else { 507 info->unicast = RX_STATE_NORMAL; 508 } 509 510 if (n->nomulti) { 511 info->multicast = RX_STATE_NONE; 512 } else if (n->allmulti) { 513 info->multicast = RX_STATE_ALL; 514 } else { 515 info->multicast = RX_STATE_NORMAL; 516 } 517 518 info->broadcast_allowed = n->nobcast; 519 info->multicast_overflow = n->mac_table.multi_overflow; 520 info->unicast_overflow = n->mac_table.uni_overflow; 521 522 info->main_mac = qemu_mac_strdup_printf(n->mac); 523 524 str_list = NULL; 525 for (i = 0; i < n->mac_table.first_multi; i++) { 526 QAPI_LIST_PREPEND(str_list, 527 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 528 } 529 info->unicast_table = str_list; 530 531 str_list = NULL; 532 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 533 QAPI_LIST_PREPEND(str_list, 534 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 535 } 536 info->multicast_table = str_list; 537 info->vlan_table = get_vlan_table(n); 538 539 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) { 540 info->vlan = RX_STATE_ALL; 541 } else if (!info->vlan_table) { 542 info->vlan = RX_STATE_NONE; 543 } else { 544 info->vlan = RX_STATE_NORMAL; 545 } 546 547 /* enable event notification after query */ 548 nc->rxfilter_notify_enabled = 1; 549 550 return info; 551 } 552 553 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index) 554 { 555 VirtIONet *n = VIRTIO_NET(vdev); 556 NetClientState *nc; 557 558 /* validate queue_index and skip for cvq */ 559 if (queue_index >= n->max_queue_pairs * 2) { 560 return; 561 } 562 563 nc = qemu_get_subqueue(n->nic, vq2q(queue_index)); 564 565 if (!nc->peer) { 566 return; 567 } 568 569 if (get_vhost_net(nc->peer) && 570 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) { 571 vhost_net_virtqueue_reset(vdev, nc, queue_index); 572 } 573 574 flush_or_purge_queued_packets(nc); 575 } 576 577 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index) 578 { 579 VirtIONet *n = VIRTIO_NET(vdev); 580 NetClientState *nc; 581 int r; 582 583 /* validate queue_index and skip for cvq */ 584 if (queue_index >= n->max_queue_pairs * 2) { 585 return; 586 } 587 588 nc = qemu_get_subqueue(n->nic, vq2q(queue_index)); 589 590 if (!nc->peer || !vdev->vhost_started) { 591 return; 592 } 593 594 if (get_vhost_net(nc->peer) && 595 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) { 596 r = vhost_net_virtqueue_restart(vdev, nc, queue_index); 597 if (r < 0) { 598 error_report("unable to restart vhost net virtqueue: %d, " 599 "when resetting the queue", queue_index); 600 } 601 } 602 } 603 604 static void peer_test_vnet_hdr(VirtIONet *n) 605 { 606 NetClientState *nc = qemu_get_queue(n->nic); 607 if (!nc->peer) { 608 return; 609 } 610 611 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer); 612 } 613 614 static int peer_has_vnet_hdr(VirtIONet *n) 615 { 616 return n->has_vnet_hdr; 617 } 618 619 static int peer_has_ufo(VirtIONet *n) 620 { 621 if (!peer_has_vnet_hdr(n)) 622 return 0; 623 624 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer); 625 626 return n->has_ufo; 627 } 628 629 static int peer_has_uso(VirtIONet *n) 630 { 631 if (!peer_has_vnet_hdr(n)) { 632 return 0; 633 } 634 635 return qemu_has_uso(qemu_get_queue(n->nic)->peer); 636 } 637 638 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, 639 int version_1, int hash_report) 640 { 641 int i; 642 NetClientState *nc; 643 644 n->mergeable_rx_bufs = mergeable_rx_bufs; 645 646 if (version_1) { 647 n->guest_hdr_len = hash_report ? 648 sizeof(struct virtio_net_hdr_v1_hash) : 649 sizeof(struct virtio_net_hdr_mrg_rxbuf); 650 n->rss_data.populate_hash = !!hash_report; 651 } else { 652 n->guest_hdr_len = n->mergeable_rx_bufs ? 653 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 654 sizeof(struct virtio_net_hdr); 655 n->rss_data.populate_hash = false; 656 } 657 658 for (i = 0; i < n->max_queue_pairs; i++) { 659 nc = qemu_get_subqueue(n->nic, i); 660 661 if (peer_has_vnet_hdr(n) && 662 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) { 663 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len); 664 n->host_hdr_len = n->guest_hdr_len; 665 } 666 } 667 } 668 669 static int virtio_net_max_tx_queue_size(VirtIONet *n) 670 { 671 NetClientState *peer = n->nic_conf.peers.ncs[0]; 672 673 /* 674 * Backends other than vhost-user or vhost-vdpa don't support max queue 675 * size. 676 */ 677 if (!peer) { 678 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 679 } 680 681 switch(peer->info->type) { 682 case NET_CLIENT_DRIVER_VHOST_USER: 683 case NET_CLIENT_DRIVER_VHOST_VDPA: 684 return VIRTQUEUE_MAX_SIZE; 685 default: 686 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 687 }; 688 } 689 690 static int peer_attach(VirtIONet *n, int index) 691 { 692 NetClientState *nc = qemu_get_subqueue(n->nic, index); 693 694 if (!nc->peer) { 695 return 0; 696 } 697 698 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 699 vhost_set_vring_enable(nc->peer, 1); 700 } 701 702 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 703 return 0; 704 } 705 706 if (n->max_queue_pairs == 1) { 707 return 0; 708 } 709 710 return tap_enable(nc->peer); 711 } 712 713 static int peer_detach(VirtIONet *n, int index) 714 { 715 NetClientState *nc = qemu_get_subqueue(n->nic, index); 716 717 if (!nc->peer) { 718 return 0; 719 } 720 721 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 722 vhost_set_vring_enable(nc->peer, 0); 723 } 724 725 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 726 return 0; 727 } 728 729 return tap_disable(nc->peer); 730 } 731 732 static void virtio_net_set_queue_pairs(VirtIONet *n) 733 { 734 int i; 735 int r; 736 737 if (n->nic->peer_deleted) { 738 return; 739 } 740 741 for (i = 0; i < n->max_queue_pairs; i++) { 742 if (i < n->curr_queue_pairs) { 743 r = peer_attach(n, i); 744 assert(!r); 745 } else { 746 r = peer_detach(n, i); 747 assert(!r); 748 } 749 } 750 } 751 752 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue); 753 754 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, 755 Error **errp) 756 { 757 VirtIONet *n = VIRTIO_NET(vdev); 758 NetClientState *nc = qemu_get_queue(n->nic); 759 760 /* Firstly sync all virtio-net possible supported features */ 761 features |= n->host_features; 762 763 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 764 765 if (!peer_has_vnet_hdr(n)) { 766 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); 767 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); 768 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); 769 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); 770 771 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); 772 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); 773 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); 774 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); 775 776 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); 777 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); 778 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); 779 780 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); 781 } 782 783 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { 784 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); 785 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); 786 } 787 788 if (!peer_has_uso(n)) { 789 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); 790 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); 791 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); 792 } 793 794 if (!get_vhost_net(nc->peer)) { 795 return features; 796 } 797 798 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 799 virtio_clear_feature(&features, VIRTIO_NET_F_RSS); 800 } 801 features = vhost_net_get_features(get_vhost_net(nc->peer), features); 802 vdev->backend_features = features; 803 804 if (n->mtu_bypass_backend && 805 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { 806 features |= (1ULL << VIRTIO_NET_F_MTU); 807 } 808 809 /* 810 * Since GUEST_ANNOUNCE is emulated the feature bit could be set without 811 * enabled. This happens in the vDPA case. 812 * 813 * Make sure the feature set is not incoherent, as the driver could refuse 814 * to start. 815 * 816 * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes, 817 * helping guest to notify the new location with vDPA devices that does not 818 * support it. 819 */ 820 if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) { 821 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE); 822 } 823 824 return features; 825 } 826 827 static uint64_t virtio_net_bad_features(VirtIODevice *vdev) 828 { 829 uint64_t features = 0; 830 831 /* Linux kernel 2.6.25. It understood MAC (as everyone must), 832 * but also these: */ 833 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 834 virtio_add_feature(&features, VIRTIO_NET_F_CSUM); 835 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4); 836 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6); 837 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN); 838 839 return features; 840 } 841 842 static void virtio_net_apply_guest_offloads(VirtIONet *n) 843 { 844 qemu_set_offload(qemu_get_queue(n->nic)->peer, 845 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)), 846 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)), 847 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)), 848 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)), 849 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)), 850 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)), 851 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6))); 852 } 853 854 static uint64_t virtio_net_guest_offloads_by_features(uint64_t features) 855 { 856 static const uint64_t guest_offloads_mask = 857 (1ULL << VIRTIO_NET_F_GUEST_CSUM) | 858 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | 859 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | 860 (1ULL << VIRTIO_NET_F_GUEST_ECN) | 861 (1ULL << VIRTIO_NET_F_GUEST_UFO) | 862 (1ULL << VIRTIO_NET_F_GUEST_USO4) | 863 (1ULL << VIRTIO_NET_F_GUEST_USO6); 864 865 return guest_offloads_mask & features; 866 } 867 868 uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n) 869 { 870 VirtIODevice *vdev = VIRTIO_DEVICE(n); 871 return virtio_net_guest_offloads_by_features(vdev->guest_features); 872 } 873 874 typedef struct { 875 VirtIONet *n; 876 DeviceState *dev; 877 } FailoverDevice; 878 879 /** 880 * Set the failover primary device 881 * 882 * @opaque: FailoverId to setup 883 * @opts: opts for device we are handling 884 * @errp: returns an error if this function fails 885 */ 886 static int failover_set_primary(DeviceState *dev, void *opaque) 887 { 888 FailoverDevice *fdev = opaque; 889 PCIDevice *pci_dev = (PCIDevice *) 890 object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE); 891 892 if (!pci_dev) { 893 return 0; 894 } 895 896 if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) { 897 fdev->dev = dev; 898 return 1; 899 } 900 901 return 0; 902 } 903 904 /** 905 * Find the primary device for this failover virtio-net 906 * 907 * @n: VirtIONet device 908 * @errp: returns an error if this function fails 909 */ 910 static DeviceState *failover_find_primary_device(VirtIONet *n) 911 { 912 FailoverDevice fdev = { 913 .n = n, 914 }; 915 916 qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL, 917 NULL, NULL, &fdev); 918 return fdev.dev; 919 } 920 921 static void failover_add_primary(VirtIONet *n, Error **errp) 922 { 923 Error *err = NULL; 924 DeviceState *dev = failover_find_primary_device(n); 925 926 if (dev) { 927 return; 928 } 929 930 if (!n->primary_opts) { 931 error_setg(errp, "Primary device not found"); 932 error_append_hint(errp, "Virtio-net failover will not work. Make " 933 "sure primary device has parameter" 934 " failover_pair_id=%s\n", n->netclient_name); 935 return; 936 } 937 938 dev = qdev_device_add_from_qdict(n->primary_opts, 939 n->primary_opts_from_json, 940 &err); 941 if (err) { 942 qobject_unref(n->primary_opts); 943 n->primary_opts = NULL; 944 } else { 945 object_unref(OBJECT(dev)); 946 } 947 error_propagate(errp, err); 948 } 949 950 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) 951 { 952 VirtIONet *n = VIRTIO_NET(vdev); 953 Error *err = NULL; 954 int i; 955 956 if (n->mtu_bypass_backend && 957 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) { 958 features &= ~(1ULL << VIRTIO_NET_F_MTU); 959 } 960 961 virtio_net_set_multiqueue(n, 962 virtio_has_feature(features, VIRTIO_NET_F_RSS) || 963 virtio_has_feature(features, VIRTIO_NET_F_MQ)); 964 965 virtio_net_set_mrg_rx_bufs(n, 966 virtio_has_feature(features, 967 VIRTIO_NET_F_MRG_RXBUF), 968 virtio_has_feature(features, 969 VIRTIO_F_VERSION_1), 970 virtio_has_feature(features, 971 VIRTIO_NET_F_HASH_REPORT)); 972 973 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 974 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4); 975 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 976 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6); 977 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS); 978 979 if (n->has_vnet_hdr) { 980 n->curr_guest_offloads = 981 virtio_net_guest_offloads_by_features(features); 982 virtio_net_apply_guest_offloads(n); 983 } 984 985 for (i = 0; i < n->max_queue_pairs; i++) { 986 NetClientState *nc = qemu_get_subqueue(n->nic, i); 987 988 if (!get_vhost_net(nc->peer)) { 989 continue; 990 } 991 vhost_net_ack_features(get_vhost_net(nc->peer), features); 992 993 /* 994 * keep acked_features in NetVhostUserState up-to-date so it 995 * can't miss any features configured by guest virtio driver. 996 */ 997 vhost_net_save_acked_features(nc->peer); 998 } 999 1000 if (!virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { 1001 memset(n->vlans, 0xff, MAX_VLAN >> 3); 1002 } 1003 1004 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) { 1005 qapi_event_send_failover_negotiated(n->netclient_name); 1006 qatomic_set(&n->failover_primary_hidden, false); 1007 failover_add_primary(n, &err); 1008 if (err) { 1009 if (!qtest_enabled()) { 1010 warn_report_err(err); 1011 } else { 1012 error_free(err); 1013 } 1014 } 1015 } 1016 } 1017 1018 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd, 1019 struct iovec *iov, unsigned int iov_cnt) 1020 { 1021 uint8_t on; 1022 size_t s; 1023 NetClientState *nc = qemu_get_queue(n->nic); 1024 1025 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on)); 1026 if (s != sizeof(on)) { 1027 return VIRTIO_NET_ERR; 1028 } 1029 1030 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) { 1031 n->promisc = on; 1032 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) { 1033 n->allmulti = on; 1034 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) { 1035 n->alluni = on; 1036 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) { 1037 n->nomulti = on; 1038 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) { 1039 n->nouni = on; 1040 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) { 1041 n->nobcast = on; 1042 } else { 1043 return VIRTIO_NET_ERR; 1044 } 1045 1046 rxfilter_notify(nc); 1047 1048 return VIRTIO_NET_OK; 1049 } 1050 1051 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd, 1052 struct iovec *iov, unsigned int iov_cnt) 1053 { 1054 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1055 uint64_t offloads; 1056 size_t s; 1057 1058 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 1059 return VIRTIO_NET_ERR; 1060 } 1061 1062 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads)); 1063 if (s != sizeof(offloads)) { 1064 return VIRTIO_NET_ERR; 1065 } 1066 1067 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) { 1068 uint64_t supported_offloads; 1069 1070 offloads = virtio_ldq_p(vdev, &offloads); 1071 1072 if (!n->has_vnet_hdr) { 1073 return VIRTIO_NET_ERR; 1074 } 1075 1076 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1077 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4); 1078 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1079 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6); 1080 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT); 1081 1082 supported_offloads = virtio_net_supported_guest_offloads(n); 1083 if (offloads & ~supported_offloads) { 1084 return VIRTIO_NET_ERR; 1085 } 1086 1087 n->curr_guest_offloads = offloads; 1088 virtio_net_apply_guest_offloads(n); 1089 1090 return VIRTIO_NET_OK; 1091 } else { 1092 return VIRTIO_NET_ERR; 1093 } 1094 } 1095 1096 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd, 1097 struct iovec *iov, unsigned int iov_cnt) 1098 { 1099 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1100 struct virtio_net_ctrl_mac mac_data; 1101 size_t s; 1102 NetClientState *nc = qemu_get_queue(n->nic); 1103 1104 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) { 1105 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) { 1106 return VIRTIO_NET_ERR; 1107 } 1108 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac)); 1109 assert(s == sizeof(n->mac)); 1110 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 1111 rxfilter_notify(nc); 1112 1113 return VIRTIO_NET_OK; 1114 } 1115 1116 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) { 1117 return VIRTIO_NET_ERR; 1118 } 1119 1120 int in_use = 0; 1121 int first_multi = 0; 1122 uint8_t uni_overflow = 0; 1123 uint8_t multi_overflow = 0; 1124 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 1125 1126 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1127 sizeof(mac_data.entries)); 1128 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1129 if (s != sizeof(mac_data.entries)) { 1130 goto error; 1131 } 1132 iov_discard_front(&iov, &iov_cnt, s); 1133 1134 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) { 1135 goto error; 1136 } 1137 1138 if (mac_data.entries <= MAC_TABLE_ENTRIES) { 1139 s = iov_to_buf(iov, iov_cnt, 0, macs, 1140 mac_data.entries * ETH_ALEN); 1141 if (s != mac_data.entries * ETH_ALEN) { 1142 goto error; 1143 } 1144 in_use += mac_data.entries; 1145 } else { 1146 uni_overflow = 1; 1147 } 1148 1149 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN); 1150 1151 first_multi = in_use; 1152 1153 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1154 sizeof(mac_data.entries)); 1155 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1156 if (s != sizeof(mac_data.entries)) { 1157 goto error; 1158 } 1159 1160 iov_discard_front(&iov, &iov_cnt, s); 1161 1162 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) { 1163 goto error; 1164 } 1165 1166 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) { 1167 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN], 1168 mac_data.entries * ETH_ALEN); 1169 if (s != mac_data.entries * ETH_ALEN) { 1170 goto error; 1171 } 1172 in_use += mac_data.entries; 1173 } else { 1174 multi_overflow = 1; 1175 } 1176 1177 n->mac_table.in_use = in_use; 1178 n->mac_table.first_multi = first_multi; 1179 n->mac_table.uni_overflow = uni_overflow; 1180 n->mac_table.multi_overflow = multi_overflow; 1181 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN); 1182 g_free(macs); 1183 rxfilter_notify(nc); 1184 1185 return VIRTIO_NET_OK; 1186 1187 error: 1188 g_free(macs); 1189 return VIRTIO_NET_ERR; 1190 } 1191 1192 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd, 1193 struct iovec *iov, unsigned int iov_cnt) 1194 { 1195 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1196 uint16_t vid; 1197 size_t s; 1198 NetClientState *nc = qemu_get_queue(n->nic); 1199 1200 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid)); 1201 vid = virtio_lduw_p(vdev, &vid); 1202 if (s != sizeof(vid)) { 1203 return VIRTIO_NET_ERR; 1204 } 1205 1206 if (vid >= MAX_VLAN) 1207 return VIRTIO_NET_ERR; 1208 1209 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD) 1210 n->vlans[vid >> 5] |= (1U << (vid & 0x1f)); 1211 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL) 1212 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f)); 1213 else 1214 return VIRTIO_NET_ERR; 1215 1216 rxfilter_notify(nc); 1217 1218 return VIRTIO_NET_OK; 1219 } 1220 1221 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, 1222 struct iovec *iov, unsigned int iov_cnt) 1223 { 1224 trace_virtio_net_handle_announce(n->announce_timer.round); 1225 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK && 1226 n->status & VIRTIO_NET_S_ANNOUNCE) { 1227 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 1228 if (n->announce_timer.round) { 1229 qemu_announce_timer_step(&n->announce_timer); 1230 } 1231 return VIRTIO_NET_OK; 1232 } else { 1233 return VIRTIO_NET_ERR; 1234 } 1235 } 1236 1237 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd) 1238 { 1239 NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0); 1240 if (nc == NULL || nc->info->set_steering_ebpf == NULL) { 1241 return false; 1242 } 1243 1244 trace_virtio_net_rss_attach_ebpf(nic, prog_fd); 1245 return nc->info->set_steering_ebpf(nc, prog_fd); 1246 } 1247 1248 static void rss_data_to_rss_config(struct VirtioNetRssData *data, 1249 struct EBPFRSSConfig *config) 1250 { 1251 config->redirect = data->redirect; 1252 config->populate_hash = data->populate_hash; 1253 config->hash_types = data->hash_types; 1254 config->indirections_len = data->indirections_len; 1255 config->default_queue = data->default_queue; 1256 } 1257 1258 static bool virtio_net_attach_ebpf_rss(VirtIONet *n) 1259 { 1260 struct EBPFRSSConfig config = {}; 1261 1262 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 1263 return false; 1264 } 1265 1266 rss_data_to_rss_config(&n->rss_data, &config); 1267 1268 if (!ebpf_rss_set_all(&n->ebpf_rss, &config, 1269 n->rss_data.indirections_table, n->rss_data.key, 1270 NULL)) { 1271 return false; 1272 } 1273 1274 if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) { 1275 return false; 1276 } 1277 1278 return true; 1279 } 1280 1281 static void virtio_net_detach_ebpf_rss(VirtIONet *n) 1282 { 1283 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1284 } 1285 1286 static void virtio_net_commit_rss_config(VirtIONet *n) 1287 { 1288 if (n->rss_data.enabled) { 1289 n->rss_data.enabled_software_rss = n->rss_data.populate_hash; 1290 if (n->rss_data.populate_hash) { 1291 virtio_net_detach_ebpf_rss(n); 1292 } else if (!virtio_net_attach_ebpf_rss(n)) { 1293 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 1294 warn_report("Can't load eBPF RSS for vhost"); 1295 } else { 1296 warn_report("Can't load eBPF RSS - fallback to software RSS"); 1297 n->rss_data.enabled_software_rss = true; 1298 } 1299 } 1300 1301 trace_virtio_net_rss_enable(n, 1302 n->rss_data.hash_types, 1303 n->rss_data.indirections_len, 1304 sizeof(n->rss_data.key)); 1305 } else { 1306 virtio_net_detach_ebpf_rss(n); 1307 trace_virtio_net_rss_disable(n); 1308 } 1309 } 1310 1311 static void virtio_net_disable_rss(VirtIONet *n) 1312 { 1313 if (!n->rss_data.enabled) { 1314 return; 1315 } 1316 1317 n->rss_data.enabled = false; 1318 virtio_net_commit_rss_config(n); 1319 } 1320 1321 static bool virtio_net_load_ebpf_fds(VirtIONet *n, Error **errp) 1322 { 1323 int fds[EBPF_RSS_MAX_FDS] = { [0 ... EBPF_RSS_MAX_FDS - 1] = -1}; 1324 int ret = true; 1325 int i = 0; 1326 1327 if (n->nr_ebpf_rss_fds != EBPF_RSS_MAX_FDS) { 1328 error_setg(errp, "Expected %d file descriptors but got %d", 1329 EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds); 1330 return false; 1331 } 1332 1333 for (i = 0; i < n->nr_ebpf_rss_fds; i++) { 1334 fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i], errp); 1335 if (fds[i] < 0) { 1336 ret = false; 1337 goto exit; 1338 } 1339 } 1340 1341 ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3], errp); 1342 1343 exit: 1344 if (!ret) { 1345 for (i = 0; i < n->nr_ebpf_rss_fds && fds[i] != -1; i++) { 1346 close(fds[i]); 1347 } 1348 } 1349 1350 return ret; 1351 } 1352 1353 static bool virtio_net_load_ebpf(VirtIONet *n, Error **errp) 1354 { 1355 bool ret = false; 1356 1357 if (virtio_net_attach_ebpf_to_backend(n->nic, -1)) { 1358 trace_virtio_net_rss_load(n, n->nr_ebpf_rss_fds, n->ebpf_rss_fds); 1359 if (n->ebpf_rss_fds) { 1360 ret = virtio_net_load_ebpf_fds(n, errp); 1361 } else { 1362 ret = ebpf_rss_load(&n->ebpf_rss, errp); 1363 } 1364 } 1365 1366 return ret; 1367 } 1368 1369 static void virtio_net_unload_ebpf(VirtIONet *n) 1370 { 1371 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1372 ebpf_rss_unload(&n->ebpf_rss); 1373 } 1374 1375 static uint16_t virtio_net_handle_rss(VirtIONet *n, 1376 struct iovec *iov, 1377 unsigned int iov_cnt, 1378 bool do_rss) 1379 { 1380 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1381 struct virtio_net_rss_config cfg; 1382 size_t s, offset = 0, size_get; 1383 uint16_t queue_pairs, i; 1384 struct { 1385 uint16_t us; 1386 uint8_t b; 1387 } QEMU_PACKED temp; 1388 const char *err_msg = ""; 1389 uint32_t err_value = 0; 1390 1391 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) { 1392 err_msg = "RSS is not negotiated"; 1393 goto error; 1394 } 1395 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) { 1396 err_msg = "Hash report is not negotiated"; 1397 goto error; 1398 } 1399 size_get = offsetof(struct virtio_net_rss_config, indirection_table); 1400 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get); 1401 if (s != size_get) { 1402 err_msg = "Short command buffer"; 1403 err_value = (uint32_t)s; 1404 goto error; 1405 } 1406 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types); 1407 n->rss_data.indirections_len = 1408 virtio_lduw_p(vdev, &cfg.indirection_table_mask); 1409 if (!do_rss) { 1410 n->rss_data.indirections_len = 0; 1411 } 1412 if (n->rss_data.indirections_len >= VIRTIO_NET_RSS_MAX_TABLE_LEN) { 1413 err_msg = "Too large indirection table"; 1414 err_value = n->rss_data.indirections_len; 1415 goto error; 1416 } 1417 n->rss_data.indirections_len++; 1418 if (!is_power_of_2(n->rss_data.indirections_len)) { 1419 err_msg = "Invalid size of indirection table"; 1420 err_value = n->rss_data.indirections_len; 1421 goto error; 1422 } 1423 n->rss_data.default_queue = do_rss ? 1424 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0; 1425 if (n->rss_data.default_queue >= n->max_queue_pairs) { 1426 err_msg = "Invalid default queue"; 1427 err_value = n->rss_data.default_queue; 1428 goto error; 1429 } 1430 offset += size_get; 1431 size_get = sizeof(uint16_t) * n->rss_data.indirections_len; 1432 g_free(n->rss_data.indirections_table); 1433 n->rss_data.indirections_table = g_malloc(size_get); 1434 if (!n->rss_data.indirections_table) { 1435 err_msg = "Can't allocate indirections table"; 1436 err_value = n->rss_data.indirections_len; 1437 goto error; 1438 } 1439 s = iov_to_buf(iov, iov_cnt, offset, 1440 n->rss_data.indirections_table, size_get); 1441 if (s != size_get) { 1442 err_msg = "Short indirection table buffer"; 1443 err_value = (uint32_t)s; 1444 goto error; 1445 } 1446 for (i = 0; i < n->rss_data.indirections_len; ++i) { 1447 uint16_t val = n->rss_data.indirections_table[i]; 1448 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val); 1449 } 1450 offset += size_get; 1451 size_get = sizeof(temp); 1452 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get); 1453 if (s != size_get) { 1454 err_msg = "Can't get queue_pairs"; 1455 err_value = (uint32_t)s; 1456 goto error; 1457 } 1458 queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs; 1459 if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) { 1460 err_msg = "Invalid number of queue_pairs"; 1461 err_value = queue_pairs; 1462 goto error; 1463 } 1464 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) { 1465 err_msg = "Invalid key size"; 1466 err_value = temp.b; 1467 goto error; 1468 } 1469 if (!temp.b && n->rss_data.hash_types) { 1470 err_msg = "No key provided"; 1471 err_value = 0; 1472 goto error; 1473 } 1474 if (!temp.b && !n->rss_data.hash_types) { 1475 virtio_net_disable_rss(n); 1476 return queue_pairs; 1477 } 1478 offset += size_get; 1479 size_get = temp.b; 1480 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get); 1481 if (s != size_get) { 1482 err_msg = "Can get key buffer"; 1483 err_value = (uint32_t)s; 1484 goto error; 1485 } 1486 n->rss_data.enabled = true; 1487 virtio_net_commit_rss_config(n); 1488 return queue_pairs; 1489 error: 1490 trace_virtio_net_rss_error(n, err_msg, err_value); 1491 virtio_net_disable_rss(n); 1492 return 0; 1493 } 1494 1495 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, 1496 struct iovec *iov, unsigned int iov_cnt) 1497 { 1498 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1499 uint16_t queue_pairs; 1500 NetClientState *nc = qemu_get_queue(n->nic); 1501 1502 virtio_net_disable_rss(n); 1503 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { 1504 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false); 1505 return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR; 1506 } 1507 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) { 1508 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true); 1509 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { 1510 struct virtio_net_ctrl_mq mq; 1511 size_t s; 1512 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) { 1513 return VIRTIO_NET_ERR; 1514 } 1515 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq)); 1516 if (s != sizeof(mq)) { 1517 return VIRTIO_NET_ERR; 1518 } 1519 queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs); 1520 1521 } else { 1522 return VIRTIO_NET_ERR; 1523 } 1524 1525 if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 1526 queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 1527 queue_pairs > n->max_queue_pairs || 1528 !n->multiqueue) { 1529 return VIRTIO_NET_ERR; 1530 } 1531 1532 n->curr_queue_pairs = queue_pairs; 1533 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 1534 /* 1535 * Avoid updating the backend for a vdpa device: We're only interested 1536 * in updating the device model queues. 1537 */ 1538 return VIRTIO_NET_OK; 1539 } 1540 /* stop the backend before changing the number of queue_pairs to avoid handling a 1541 * disabled queue */ 1542 virtio_net_set_status(vdev, vdev->status); 1543 virtio_net_set_queue_pairs(n); 1544 1545 return VIRTIO_NET_OK; 1546 } 1547 1548 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, 1549 const struct iovec *in_sg, unsigned in_num, 1550 const struct iovec *out_sg, 1551 unsigned out_num) 1552 { 1553 VirtIONet *n = VIRTIO_NET(vdev); 1554 struct virtio_net_ctrl_hdr ctrl; 1555 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1556 size_t s; 1557 struct iovec *iov, *iov2; 1558 1559 if (iov_size(in_sg, in_num) < sizeof(status) || 1560 iov_size(out_sg, out_num) < sizeof(ctrl)) { 1561 virtio_error(vdev, "virtio-net ctrl missing headers"); 1562 return 0; 1563 } 1564 1565 iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num); 1566 s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl)); 1567 iov_discard_front(&iov, &out_num, sizeof(ctrl)); 1568 if (s != sizeof(ctrl)) { 1569 status = VIRTIO_NET_ERR; 1570 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { 1571 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num); 1572 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { 1573 status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num); 1574 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { 1575 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num); 1576 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { 1577 status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num); 1578 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { 1579 status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num); 1580 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { 1581 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num); 1582 } 1583 1584 s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status)); 1585 assert(s == sizeof(status)); 1586 1587 g_free(iov2); 1588 return sizeof(status); 1589 } 1590 1591 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) 1592 { 1593 VirtQueueElement *elem; 1594 1595 for (;;) { 1596 size_t written; 1597 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 1598 if (!elem) { 1599 break; 1600 } 1601 1602 written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num, 1603 elem->out_sg, elem->out_num); 1604 if (written > 0) { 1605 virtqueue_push(vq, elem, written); 1606 virtio_notify(vdev, vq); 1607 g_free(elem); 1608 } else { 1609 virtqueue_detach_element(vq, elem, 0); 1610 g_free(elem); 1611 break; 1612 } 1613 } 1614 } 1615 1616 /* RX */ 1617 1618 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) 1619 { 1620 VirtIONet *n = VIRTIO_NET(vdev); 1621 int queue_index = vq2q(virtio_get_queue_index(vq)); 1622 1623 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index)); 1624 } 1625 1626 static bool virtio_net_can_receive(NetClientState *nc) 1627 { 1628 VirtIONet *n = qemu_get_nic_opaque(nc); 1629 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1630 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1631 1632 if (!vdev->vm_running) { 1633 return false; 1634 } 1635 1636 if (nc->queue_index >= n->curr_queue_pairs) { 1637 return false; 1638 } 1639 1640 if (!virtio_queue_ready(q->rx_vq) || 1641 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 1642 return false; 1643 } 1644 1645 return true; 1646 } 1647 1648 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) 1649 { 1650 int opaque; 1651 unsigned int in_bytes; 1652 VirtIONet *n = q->n; 1653 1654 while (virtio_queue_empty(q->rx_vq) || n->mergeable_rx_bufs) { 1655 opaque = virtqueue_get_avail_bytes(q->rx_vq, &in_bytes, NULL, 1656 bufsize, 0); 1657 /* Buffer is enough, disable notifiaction */ 1658 if (bufsize <= in_bytes) { 1659 break; 1660 } 1661 1662 if (virtio_queue_enable_notification_and_check(q->rx_vq, opaque)) { 1663 /* Guest has added some buffers, try again */ 1664 continue; 1665 } else { 1666 return 0; 1667 } 1668 } 1669 1670 virtio_queue_set_notification(q->rx_vq, 0); 1671 1672 return 1; 1673 } 1674 1675 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr) 1676 { 1677 virtio_tswap16s(vdev, &hdr->hdr_len); 1678 virtio_tswap16s(vdev, &hdr->gso_size); 1679 virtio_tswap16s(vdev, &hdr->csum_start); 1680 virtio_tswap16s(vdev, &hdr->csum_offset); 1681 } 1682 1683 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so 1684 * it never finds out that the packets don't have valid checksums. This 1685 * causes dhclient to get upset. Fedora's carried a patch for ages to 1686 * fix this with Xen but it hasn't appeared in an upstream release of 1687 * dhclient yet. 1688 * 1689 * To avoid breaking existing guests, we catch udp packets and add 1690 * checksums. This is terrible but it's better than hacking the guest 1691 * kernels. 1692 * 1693 * N.B. if we introduce a zero-copy API, this operation is no longer free so 1694 * we should provide a mechanism to disable it to avoid polluting the host 1695 * cache. 1696 */ 1697 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr, 1698 uint8_t *buf, size_t size) 1699 { 1700 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */ 1701 (size > 27 && size < 1500) && /* normal sized MTU */ 1702 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */ 1703 (buf[23] == 17) && /* ip.protocol == UDP */ 1704 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */ 1705 net_checksum_calculate(buf, size, CSUM_UDP); 1706 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; 1707 } 1708 } 1709 1710 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt, 1711 const void *buf, size_t size) 1712 { 1713 if (n->has_vnet_hdr) { 1714 /* FIXME this cast is evil */ 1715 void *wbuf = (void *)buf; 1716 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len, 1717 size - n->host_hdr_len); 1718 1719 if (n->needs_vnet_hdr_swap) { 1720 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf); 1721 } 1722 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr)); 1723 } else { 1724 struct virtio_net_hdr hdr = { 1725 .flags = 0, 1726 .gso_type = VIRTIO_NET_HDR_GSO_NONE 1727 }; 1728 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr); 1729 } 1730 } 1731 1732 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) 1733 { 1734 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; 1735 static const uint8_t vlan[] = {0x81, 0x00}; 1736 uint8_t *ptr = (uint8_t *)buf; 1737 int i; 1738 1739 if (n->promisc) 1740 return 1; 1741 1742 ptr += n->host_hdr_len; 1743 1744 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) { 1745 int vid = lduw_be_p(ptr + 14) & 0xfff; 1746 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f)))) 1747 return 0; 1748 } 1749 1750 if (ptr[0] & 1) { // multicast 1751 if (!memcmp(ptr, bcast, sizeof(bcast))) { 1752 return !n->nobcast; 1753 } else if (n->nomulti) { 1754 return 0; 1755 } else if (n->allmulti || n->mac_table.multi_overflow) { 1756 return 1; 1757 } 1758 1759 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 1760 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1761 return 1; 1762 } 1763 } 1764 } else { // unicast 1765 if (n->nouni) { 1766 return 0; 1767 } else if (n->alluni || n->mac_table.uni_overflow) { 1768 return 1; 1769 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) { 1770 return 1; 1771 } 1772 1773 for (i = 0; i < n->mac_table.first_multi; i++) { 1774 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1775 return 1; 1776 } 1777 } 1778 } 1779 1780 return 0; 1781 } 1782 1783 static uint8_t virtio_net_get_hash_type(bool hasip4, 1784 bool hasip6, 1785 EthL4HdrProto l4hdr_proto, 1786 uint32_t types) 1787 { 1788 if (hasip4) { 1789 switch (l4hdr_proto) { 1790 case ETH_L4_HDR_PROTO_TCP: 1791 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { 1792 return NetPktRssIpV4Tcp; 1793 } 1794 break; 1795 1796 case ETH_L4_HDR_PROTO_UDP: 1797 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { 1798 return NetPktRssIpV4Udp; 1799 } 1800 break; 1801 1802 default: 1803 break; 1804 } 1805 1806 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 1807 return NetPktRssIpV4; 1808 } 1809 } else if (hasip6) { 1810 switch (l4hdr_proto) { 1811 case ETH_L4_HDR_PROTO_TCP: 1812 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) { 1813 return NetPktRssIpV6TcpEx; 1814 } 1815 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { 1816 return NetPktRssIpV6Tcp; 1817 } 1818 break; 1819 1820 case ETH_L4_HDR_PROTO_UDP: 1821 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) { 1822 return NetPktRssIpV6UdpEx; 1823 } 1824 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { 1825 return NetPktRssIpV6Udp; 1826 } 1827 break; 1828 1829 default: 1830 break; 1831 } 1832 1833 if (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) { 1834 return NetPktRssIpV6Ex; 1835 } 1836 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 1837 return NetPktRssIpV6; 1838 } 1839 } 1840 return 0xff; 1841 } 1842 1843 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf, 1844 size_t size, 1845 struct virtio_net_hdr_v1_hash *hdr) 1846 { 1847 VirtIONet *n = qemu_get_nic_opaque(nc); 1848 unsigned int index = nc->queue_index, new_index = index; 1849 struct NetRxPkt *pkt = n->rx_pkt; 1850 uint8_t net_hash_type; 1851 uint32_t hash; 1852 bool hasip4, hasip6; 1853 EthL4HdrProto l4hdr_proto; 1854 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = { 1855 VIRTIO_NET_HASH_REPORT_IPv4, 1856 VIRTIO_NET_HASH_REPORT_TCPv4, 1857 VIRTIO_NET_HASH_REPORT_TCPv6, 1858 VIRTIO_NET_HASH_REPORT_IPv6, 1859 VIRTIO_NET_HASH_REPORT_IPv6_EX, 1860 VIRTIO_NET_HASH_REPORT_TCPv6_EX, 1861 VIRTIO_NET_HASH_REPORT_UDPv4, 1862 VIRTIO_NET_HASH_REPORT_UDPv6, 1863 VIRTIO_NET_HASH_REPORT_UDPv6_EX 1864 }; 1865 struct iovec iov = { 1866 .iov_base = (void *)buf, 1867 .iov_len = size 1868 }; 1869 1870 net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len); 1871 net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); 1872 net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto, 1873 n->rss_data.hash_types); 1874 if (net_hash_type > NetPktRssIpV6UdpEx) { 1875 if (n->rss_data.populate_hash) { 1876 hdr->hash_value = VIRTIO_NET_HASH_REPORT_NONE; 1877 hdr->hash_report = 0; 1878 } 1879 return n->rss_data.redirect ? n->rss_data.default_queue : -1; 1880 } 1881 1882 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key); 1883 1884 if (n->rss_data.populate_hash) { 1885 hdr->hash_value = hash; 1886 hdr->hash_report = reports[net_hash_type]; 1887 } 1888 1889 if (n->rss_data.redirect) { 1890 new_index = hash & (n->rss_data.indirections_len - 1); 1891 new_index = n->rss_data.indirections_table[new_index]; 1892 } 1893 1894 return (index == new_index) ? -1 : new_index; 1895 } 1896 1897 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, 1898 size_t size, bool no_rss) 1899 { 1900 VirtIONet *n = qemu_get_nic_opaque(nc); 1901 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1902 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1903 VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE]; 1904 size_t lens[VIRTQUEUE_MAX_SIZE]; 1905 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; 1906 struct virtio_net_hdr_v1_hash extra_hdr; 1907 unsigned mhdr_cnt = 0; 1908 size_t offset, i, guest_offset, j; 1909 ssize_t err; 1910 1911 if (!virtio_net_can_receive(nc)) { 1912 return -1; 1913 } 1914 1915 if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) { 1916 int index = virtio_net_process_rss(nc, buf, size, &extra_hdr); 1917 if (index >= 0) { 1918 NetClientState *nc2 = 1919 qemu_get_subqueue(n->nic, index % n->curr_queue_pairs); 1920 return virtio_net_receive_rcu(nc2, buf, size, true); 1921 } 1922 } 1923 1924 /* hdr_len refers to the header we supply to the guest */ 1925 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) { 1926 return 0; 1927 } 1928 1929 if (!receive_filter(n, buf, size)) 1930 return size; 1931 1932 offset = i = 0; 1933 1934 while (offset < size) { 1935 VirtQueueElement *elem; 1936 int len, total; 1937 const struct iovec *sg; 1938 1939 total = 0; 1940 1941 if (i == VIRTQUEUE_MAX_SIZE) { 1942 virtio_error(vdev, "virtio-net unexpected long buffer chain"); 1943 err = size; 1944 goto err; 1945 } 1946 1947 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement)); 1948 if (!elem) { 1949 if (i) { 1950 virtio_error(vdev, "virtio-net unexpected empty queue: " 1951 "i %zd mergeable %d offset %zd, size %zd, " 1952 "guest hdr len %zd, host hdr len %zd " 1953 "guest features 0x%" PRIx64, 1954 i, n->mergeable_rx_bufs, offset, size, 1955 n->guest_hdr_len, n->host_hdr_len, 1956 vdev->guest_features); 1957 } 1958 err = -1; 1959 goto err; 1960 } 1961 1962 if (elem->in_num < 1) { 1963 virtio_error(vdev, 1964 "virtio-net receive queue contains no in buffers"); 1965 virtqueue_detach_element(q->rx_vq, elem, 0); 1966 g_free(elem); 1967 err = -1; 1968 goto err; 1969 } 1970 1971 sg = elem->in_sg; 1972 if (i == 0) { 1973 assert(offset == 0); 1974 if (n->mergeable_rx_bufs) { 1975 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg), 1976 sg, elem->in_num, 1977 offsetof(typeof(extra_hdr), hdr.num_buffers), 1978 sizeof(extra_hdr.hdr.num_buffers)); 1979 } 1980 1981 receive_header(n, sg, elem->in_num, buf, size); 1982 if (n->rss_data.populate_hash) { 1983 offset = offsetof(typeof(extra_hdr), hash_value); 1984 iov_from_buf(sg, elem->in_num, offset, 1985 (char *)&extra_hdr + offset, 1986 sizeof(extra_hdr.hash_value) + 1987 sizeof(extra_hdr.hash_report)); 1988 } 1989 offset = n->host_hdr_len; 1990 total += n->guest_hdr_len; 1991 guest_offset = n->guest_hdr_len; 1992 } else { 1993 guest_offset = 0; 1994 } 1995 1996 /* copy in packet. ugh */ 1997 len = iov_from_buf(sg, elem->in_num, guest_offset, 1998 buf + offset, size - offset); 1999 total += len; 2000 offset += len; 2001 /* If buffers can't be merged, at this point we 2002 * must have consumed the complete packet. 2003 * Otherwise, drop it. */ 2004 if (!n->mergeable_rx_bufs && offset < size) { 2005 virtqueue_unpop(q->rx_vq, elem, total); 2006 g_free(elem); 2007 err = size; 2008 goto err; 2009 } 2010 2011 elems[i] = elem; 2012 lens[i] = total; 2013 i++; 2014 } 2015 2016 if (mhdr_cnt) { 2017 virtio_stw_p(vdev, &extra_hdr.hdr.num_buffers, i); 2018 iov_from_buf(mhdr_sg, mhdr_cnt, 2019 0, 2020 &extra_hdr.hdr.num_buffers, 2021 sizeof extra_hdr.hdr.num_buffers); 2022 } 2023 2024 for (j = 0; j < i; j++) { 2025 /* signal other side */ 2026 virtqueue_fill(q->rx_vq, elems[j], lens[j], j); 2027 g_free(elems[j]); 2028 } 2029 2030 virtqueue_flush(q->rx_vq, i); 2031 virtio_notify(vdev, q->rx_vq); 2032 2033 return size; 2034 2035 err: 2036 for (j = 0; j < i; j++) { 2037 virtqueue_detach_element(q->rx_vq, elems[j], lens[j]); 2038 g_free(elems[j]); 2039 } 2040 2041 return err; 2042 } 2043 2044 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf, 2045 size_t size) 2046 { 2047 RCU_READ_LOCK_GUARD(); 2048 2049 return virtio_net_receive_rcu(nc, buf, size, false); 2050 } 2051 2052 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain, 2053 const uint8_t *buf, 2054 VirtioNetRscUnit *unit) 2055 { 2056 uint16_t ip_hdrlen; 2057 struct ip_header *ip; 2058 2059 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len 2060 + sizeof(struct eth_header)); 2061 unit->ip = (void *)ip; 2062 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2; 2063 unit->ip_plen = &ip->ip_len; 2064 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen); 2065 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 2066 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen; 2067 } 2068 2069 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain, 2070 const uint8_t *buf, 2071 VirtioNetRscUnit *unit) 2072 { 2073 struct ip6_header *ip6; 2074 2075 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len 2076 + sizeof(struct eth_header)); 2077 unit->ip = ip6; 2078 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 2079 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) 2080 + sizeof(struct ip6_header)); 2081 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 2082 2083 /* There is a difference between payload length in ipv4 and v6, 2084 ip header is excluded in ipv6 */ 2085 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen; 2086 } 2087 2088 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain, 2089 VirtioNetRscSeg *seg) 2090 { 2091 int ret; 2092 struct virtio_net_hdr_v1 *h; 2093 2094 h = (struct virtio_net_hdr_v1 *)seg->buf; 2095 h->flags = 0; 2096 h->gso_type = VIRTIO_NET_HDR_GSO_NONE; 2097 2098 if (seg->is_coalesced) { 2099 h->rsc.segments = seg->packets; 2100 h->rsc.dup_acks = seg->dup_ack; 2101 h->flags = VIRTIO_NET_HDR_F_RSC_INFO; 2102 if (chain->proto == ETH_P_IP) { 2103 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2104 } else { 2105 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2106 } 2107 } 2108 2109 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); 2110 QTAILQ_REMOVE(&chain->buffers, seg, next); 2111 g_free(seg->buf); 2112 g_free(seg); 2113 2114 return ret; 2115 } 2116 2117 static void virtio_net_rsc_purge(void *opq) 2118 { 2119 VirtioNetRscSeg *seg, *rn; 2120 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq; 2121 2122 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) { 2123 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2124 chain->stat.purge_failed++; 2125 continue; 2126 } 2127 } 2128 2129 chain->stat.timer++; 2130 if (!QTAILQ_EMPTY(&chain->buffers)) { 2131 timer_mod(chain->drain_timer, 2132 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout); 2133 } 2134 } 2135 2136 static void virtio_net_rsc_cleanup(VirtIONet *n) 2137 { 2138 VirtioNetRscChain *chain, *rn_chain; 2139 VirtioNetRscSeg *seg, *rn_seg; 2140 2141 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) { 2142 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) { 2143 QTAILQ_REMOVE(&chain->buffers, seg, next); 2144 g_free(seg->buf); 2145 g_free(seg); 2146 } 2147 2148 timer_free(chain->drain_timer); 2149 QTAILQ_REMOVE(&n->rsc_chains, chain, next); 2150 g_free(chain); 2151 } 2152 } 2153 2154 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain, 2155 NetClientState *nc, 2156 const uint8_t *buf, size_t size) 2157 { 2158 uint16_t hdr_len; 2159 VirtioNetRscSeg *seg; 2160 2161 hdr_len = chain->n->guest_hdr_len; 2162 seg = g_new(VirtioNetRscSeg, 1); 2163 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header) 2164 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD); 2165 memcpy(seg->buf, buf, size); 2166 seg->size = size; 2167 seg->packets = 1; 2168 seg->dup_ack = 0; 2169 seg->is_coalesced = 0; 2170 seg->nc = nc; 2171 2172 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); 2173 chain->stat.cache++; 2174 2175 switch (chain->proto) { 2176 case ETH_P_IP: 2177 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); 2178 break; 2179 case ETH_P_IPV6: 2180 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); 2181 break; 2182 default: 2183 g_assert_not_reached(); 2184 } 2185 } 2186 2187 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain, 2188 VirtioNetRscSeg *seg, 2189 const uint8_t *buf, 2190 struct tcp_header *n_tcp, 2191 struct tcp_header *o_tcp) 2192 { 2193 uint32_t nack, oack; 2194 uint16_t nwin, owin; 2195 2196 nack = htonl(n_tcp->th_ack); 2197 nwin = htons(n_tcp->th_win); 2198 oack = htonl(o_tcp->th_ack); 2199 owin = htons(o_tcp->th_win); 2200 2201 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) { 2202 chain->stat.ack_out_of_win++; 2203 return RSC_FINAL; 2204 } else if (nack == oack) { 2205 /* duplicated ack or window probe */ 2206 if (nwin == owin) { 2207 /* duplicated ack, add dup ack count due to whql test up to 1 */ 2208 chain->stat.dup_ack++; 2209 return RSC_FINAL; 2210 } else { 2211 /* Coalesce window update */ 2212 o_tcp->th_win = n_tcp->th_win; 2213 chain->stat.win_update++; 2214 return RSC_COALESCE; 2215 } 2216 } else { 2217 /* pure ack, go to 'C', finalize*/ 2218 chain->stat.pure_ack++; 2219 return RSC_FINAL; 2220 } 2221 } 2222 2223 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain, 2224 VirtioNetRscSeg *seg, 2225 const uint8_t *buf, 2226 VirtioNetRscUnit *n_unit) 2227 { 2228 void *data; 2229 uint16_t o_ip_len; 2230 uint32_t nseq, oseq; 2231 VirtioNetRscUnit *o_unit; 2232 2233 o_unit = &seg->unit; 2234 o_ip_len = htons(*o_unit->ip_plen); 2235 nseq = htonl(n_unit->tcp->th_seq); 2236 oseq = htonl(o_unit->tcp->th_seq); 2237 2238 /* out of order or retransmitted. */ 2239 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) { 2240 chain->stat.data_out_of_win++; 2241 return RSC_FINAL; 2242 } 2243 2244 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen; 2245 if (nseq == oseq) { 2246 if ((o_unit->payload == 0) && n_unit->payload) { 2247 /* From no payload to payload, normal case, not a dup ack or etc */ 2248 chain->stat.data_after_pure_ack++; 2249 goto coalesce; 2250 } else { 2251 return virtio_net_rsc_handle_ack(chain, seg, buf, 2252 n_unit->tcp, o_unit->tcp); 2253 } 2254 } else if ((nseq - oseq) != o_unit->payload) { 2255 /* Not a consistent packet, out of order */ 2256 chain->stat.data_out_of_order++; 2257 return RSC_FINAL; 2258 } else { 2259 coalesce: 2260 if ((o_ip_len + n_unit->payload) > chain->max_payload) { 2261 chain->stat.over_size++; 2262 return RSC_FINAL; 2263 } 2264 2265 /* Here comes the right data, the payload length in v4/v6 is different, 2266 so use the field value to update and record the new data len */ 2267 o_unit->payload += n_unit->payload; /* update new data len */ 2268 2269 /* update field in ip header */ 2270 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload); 2271 2272 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced 2273 for windows guest, while this may change the behavior for linux 2274 guest (only if it uses RSC feature). */ 2275 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags; 2276 2277 o_unit->tcp->th_ack = n_unit->tcp->th_ack; 2278 o_unit->tcp->th_win = n_unit->tcp->th_win; 2279 2280 memmove(seg->buf + seg->size, data, n_unit->payload); 2281 seg->size += n_unit->payload; 2282 seg->packets++; 2283 chain->stat.coalesced++; 2284 return RSC_COALESCE; 2285 } 2286 } 2287 2288 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain, 2289 VirtioNetRscSeg *seg, 2290 const uint8_t *buf, size_t size, 2291 VirtioNetRscUnit *unit) 2292 { 2293 struct ip_header *ip1, *ip2; 2294 2295 ip1 = (struct ip_header *)(unit->ip); 2296 ip2 = (struct ip_header *)(seg->unit.ip); 2297 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst) 2298 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2299 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2300 chain->stat.no_match++; 2301 return RSC_NO_MATCH; 2302 } 2303 2304 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2305 } 2306 2307 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain, 2308 VirtioNetRscSeg *seg, 2309 const uint8_t *buf, size_t size, 2310 VirtioNetRscUnit *unit) 2311 { 2312 struct ip6_header *ip1, *ip2; 2313 2314 ip1 = (struct ip6_header *)(unit->ip); 2315 ip2 = (struct ip6_header *)(seg->unit.ip); 2316 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address)) 2317 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address)) 2318 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2319 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2320 chain->stat.no_match++; 2321 return RSC_NO_MATCH; 2322 } 2323 2324 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2325 } 2326 2327 /* Packets with 'SYN' should bypass, other flag should be sent after drain 2328 * to prevent out of order */ 2329 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain, 2330 struct tcp_header *tcp) 2331 { 2332 uint16_t tcp_hdr; 2333 uint16_t tcp_flag; 2334 2335 tcp_flag = htons(tcp->th_offset_flags); 2336 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10; 2337 tcp_flag &= VIRTIO_NET_TCP_FLAG; 2338 if (tcp_flag & TH_SYN) { 2339 chain->stat.tcp_syn++; 2340 return RSC_BYPASS; 2341 } 2342 2343 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) { 2344 chain->stat.tcp_ctrl_drain++; 2345 return RSC_FINAL; 2346 } 2347 2348 if (tcp_hdr > sizeof(struct tcp_header)) { 2349 chain->stat.tcp_all_opt++; 2350 return RSC_FINAL; 2351 } 2352 2353 return RSC_CANDIDATE; 2354 } 2355 2356 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain, 2357 NetClientState *nc, 2358 const uint8_t *buf, size_t size, 2359 VirtioNetRscUnit *unit) 2360 { 2361 int ret; 2362 VirtioNetRscSeg *seg, *nseg; 2363 2364 if (QTAILQ_EMPTY(&chain->buffers)) { 2365 chain->stat.empty_cache++; 2366 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2367 timer_mod(chain->drain_timer, 2368 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout); 2369 return size; 2370 } 2371 2372 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2373 if (chain->proto == ETH_P_IP) { 2374 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit); 2375 } else { 2376 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit); 2377 } 2378 2379 if (ret == RSC_FINAL) { 2380 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2381 /* Send failed */ 2382 chain->stat.final_failed++; 2383 return 0; 2384 } 2385 2386 /* Send current packet */ 2387 return virtio_net_do_receive(nc, buf, size); 2388 } else if (ret == RSC_NO_MATCH) { 2389 continue; 2390 } else { 2391 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ 2392 seg->is_coalesced = 1; 2393 return size; 2394 } 2395 } 2396 2397 chain->stat.no_match_cache++; 2398 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2399 return size; 2400 } 2401 2402 /* Drain a connection data, this is to avoid out of order segments */ 2403 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain, 2404 NetClientState *nc, 2405 const uint8_t *buf, size_t size, 2406 uint16_t ip_start, uint16_t ip_size, 2407 uint16_t tcp_port) 2408 { 2409 VirtioNetRscSeg *seg, *nseg; 2410 uint32_t ppair1, ppair2; 2411 2412 ppair1 = *(uint32_t *)(buf + tcp_port); 2413 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2414 ppair2 = *(uint32_t *)(seg->buf + tcp_port); 2415 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size) 2416 || (ppair1 != ppair2)) { 2417 continue; 2418 } 2419 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2420 chain->stat.drain_failed++; 2421 } 2422 2423 break; 2424 } 2425 2426 return virtio_net_do_receive(nc, buf, size); 2427 } 2428 2429 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain, 2430 struct ip_header *ip, 2431 const uint8_t *buf, size_t size) 2432 { 2433 uint16_t ip_len; 2434 2435 /* Not an ipv4 packet */ 2436 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) { 2437 chain->stat.ip_option++; 2438 return RSC_BYPASS; 2439 } 2440 2441 /* Don't handle packets with ip option */ 2442 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) { 2443 chain->stat.ip_option++; 2444 return RSC_BYPASS; 2445 } 2446 2447 if (ip->ip_p != IPPROTO_TCP) { 2448 chain->stat.bypass_not_tcp++; 2449 return RSC_BYPASS; 2450 } 2451 2452 /* Don't handle packets with ip fragment */ 2453 if (!(htons(ip->ip_off) & IP_DF)) { 2454 chain->stat.ip_frag++; 2455 return RSC_BYPASS; 2456 } 2457 2458 /* Don't handle packets with ecn flag */ 2459 if (IPTOS_ECN(ip->ip_tos)) { 2460 chain->stat.ip_ecn++; 2461 return RSC_BYPASS; 2462 } 2463 2464 ip_len = htons(ip->ip_len); 2465 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header)) 2466 || ip_len > (size - chain->n->guest_hdr_len - 2467 sizeof(struct eth_header))) { 2468 chain->stat.ip_hacked++; 2469 return RSC_BYPASS; 2470 } 2471 2472 return RSC_CANDIDATE; 2473 } 2474 2475 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain, 2476 NetClientState *nc, 2477 const uint8_t *buf, size_t size) 2478 { 2479 int32_t ret; 2480 uint16_t hdr_len; 2481 VirtioNetRscUnit unit; 2482 2483 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2484 2485 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header) 2486 + sizeof(struct tcp_header))) { 2487 chain->stat.bypass_not_tcp++; 2488 return virtio_net_do_receive(nc, buf, size); 2489 } 2490 2491 virtio_net_rsc_extract_unit4(chain, buf, &unit); 2492 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size) 2493 != RSC_CANDIDATE) { 2494 return virtio_net_do_receive(nc, buf, size); 2495 } 2496 2497 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2498 if (ret == RSC_BYPASS) { 2499 return virtio_net_do_receive(nc, buf, size); 2500 } else if (ret == RSC_FINAL) { 2501 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2502 ((hdr_len + sizeof(struct eth_header)) + 12), 2503 VIRTIO_NET_IP4_ADDR_SIZE, 2504 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)); 2505 } 2506 2507 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2508 } 2509 2510 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain, 2511 struct ip6_header *ip6, 2512 const uint8_t *buf, size_t size) 2513 { 2514 uint16_t ip_len; 2515 2516 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4) 2517 != IP_HEADER_VERSION_6) { 2518 return RSC_BYPASS; 2519 } 2520 2521 /* Both option and protocol is checked in this */ 2522 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) { 2523 chain->stat.bypass_not_tcp++; 2524 return RSC_BYPASS; 2525 } 2526 2527 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 2528 if (ip_len < sizeof(struct tcp_header) || 2529 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header) 2530 - sizeof(struct ip6_header))) { 2531 chain->stat.ip_hacked++; 2532 return RSC_BYPASS; 2533 } 2534 2535 /* Don't handle packets with ecn flag */ 2536 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) { 2537 chain->stat.ip_ecn++; 2538 return RSC_BYPASS; 2539 } 2540 2541 return RSC_CANDIDATE; 2542 } 2543 2544 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc, 2545 const uint8_t *buf, size_t size) 2546 { 2547 int32_t ret; 2548 uint16_t hdr_len; 2549 VirtioNetRscChain *chain; 2550 VirtioNetRscUnit unit; 2551 2552 chain = opq; 2553 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2554 2555 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header) 2556 + sizeof(tcp_header))) { 2557 return virtio_net_do_receive(nc, buf, size); 2558 } 2559 2560 virtio_net_rsc_extract_unit6(chain, buf, &unit); 2561 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain, 2562 unit.ip, buf, size)) { 2563 return virtio_net_do_receive(nc, buf, size); 2564 } 2565 2566 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2567 if (ret == RSC_BYPASS) { 2568 return virtio_net_do_receive(nc, buf, size); 2569 } else if (ret == RSC_FINAL) { 2570 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2571 ((hdr_len + sizeof(struct eth_header)) + 8), 2572 VIRTIO_NET_IP6_ADDR_SIZE, 2573 hdr_len + sizeof(struct eth_header) 2574 + sizeof(struct ip6_header)); 2575 } 2576 2577 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2578 } 2579 2580 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n, 2581 NetClientState *nc, 2582 uint16_t proto) 2583 { 2584 VirtioNetRscChain *chain; 2585 2586 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) { 2587 return NULL; 2588 } 2589 2590 QTAILQ_FOREACH(chain, &n->rsc_chains, next) { 2591 if (chain->proto == proto) { 2592 return chain; 2593 } 2594 } 2595 2596 chain = g_malloc(sizeof(*chain)); 2597 chain->n = n; 2598 chain->proto = proto; 2599 if (proto == (uint16_t)ETH_P_IP) { 2600 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD; 2601 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2602 } else { 2603 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD; 2604 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2605 } 2606 chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 2607 virtio_net_rsc_purge, chain); 2608 memset(&chain->stat, 0, sizeof(chain->stat)); 2609 2610 QTAILQ_INIT(&chain->buffers); 2611 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); 2612 2613 return chain; 2614 } 2615 2616 static ssize_t virtio_net_rsc_receive(NetClientState *nc, 2617 const uint8_t *buf, 2618 size_t size) 2619 { 2620 uint16_t proto; 2621 VirtioNetRscChain *chain; 2622 struct eth_header *eth; 2623 VirtIONet *n; 2624 2625 n = qemu_get_nic_opaque(nc); 2626 if (size < (n->host_hdr_len + sizeof(struct eth_header))) { 2627 return virtio_net_do_receive(nc, buf, size); 2628 } 2629 2630 eth = (struct eth_header *)(buf + n->guest_hdr_len); 2631 proto = htons(eth->h_proto); 2632 2633 chain = virtio_net_rsc_lookup_chain(n, nc, proto); 2634 if (chain) { 2635 chain->stat.received++; 2636 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) { 2637 return virtio_net_rsc_receive4(chain, nc, buf, size); 2638 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) { 2639 return virtio_net_rsc_receive6(chain, nc, buf, size); 2640 } 2641 } 2642 return virtio_net_do_receive(nc, buf, size); 2643 } 2644 2645 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, 2646 size_t size) 2647 { 2648 VirtIONet *n = qemu_get_nic_opaque(nc); 2649 if ((n->rsc4_enabled || n->rsc6_enabled)) { 2650 return virtio_net_rsc_receive(nc, buf, size); 2651 } else { 2652 return virtio_net_do_receive(nc, buf, size); 2653 } 2654 } 2655 2656 static int32_t virtio_net_flush_tx(VirtIONetQueue *q); 2657 2658 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) 2659 { 2660 VirtIONet *n = qemu_get_nic_opaque(nc); 2661 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 2662 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2663 int ret; 2664 2665 virtqueue_push(q->tx_vq, q->async_tx.elem, 0); 2666 virtio_notify(vdev, q->tx_vq); 2667 2668 g_free(q->async_tx.elem); 2669 q->async_tx.elem = NULL; 2670 2671 virtio_queue_set_notification(q->tx_vq, 1); 2672 ret = virtio_net_flush_tx(q); 2673 if (ret >= n->tx_burst) { 2674 /* 2675 * the flush has been stopped by tx_burst 2676 * we will not receive notification for the 2677 * remainining part, so re-schedule 2678 */ 2679 virtio_queue_set_notification(q->tx_vq, 0); 2680 if (q->tx_bh) { 2681 replay_bh_schedule_event(q->tx_bh); 2682 } else { 2683 timer_mod(q->tx_timer, 2684 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2685 } 2686 q->tx_waiting = 1; 2687 } 2688 } 2689 2690 /* TX */ 2691 static int32_t virtio_net_flush_tx(VirtIONetQueue *q) 2692 { 2693 VirtIONet *n = q->n; 2694 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2695 VirtQueueElement *elem; 2696 int32_t num_packets = 0; 2697 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq)); 2698 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2699 return num_packets; 2700 } 2701 2702 if (q->async_tx.elem) { 2703 virtio_queue_set_notification(q->tx_vq, 0); 2704 return num_packets; 2705 } 2706 2707 for (;;) { 2708 ssize_t ret; 2709 unsigned int out_num; 2710 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg; 2711 struct virtio_net_hdr vhdr; 2712 2713 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement)); 2714 if (!elem) { 2715 break; 2716 } 2717 2718 out_num = elem->out_num; 2719 out_sg = elem->out_sg; 2720 if (out_num < 1) { 2721 virtio_error(vdev, "virtio-net header not in first element"); 2722 goto detach; 2723 } 2724 2725 if (n->needs_vnet_hdr_swap) { 2726 if (iov_to_buf(out_sg, out_num, 0, &vhdr, sizeof(vhdr)) < 2727 sizeof(vhdr)) { 2728 virtio_error(vdev, "virtio-net header incorrect"); 2729 goto detach; 2730 } 2731 virtio_net_hdr_swap(vdev, &vhdr); 2732 sg2[0].iov_base = &vhdr; 2733 sg2[0].iov_len = sizeof(vhdr); 2734 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, out_sg, out_num, 2735 sizeof(vhdr), -1); 2736 if (out_num == VIRTQUEUE_MAX_SIZE) { 2737 goto drop; 2738 } 2739 out_num += 1; 2740 out_sg = sg2; 2741 } 2742 /* 2743 * If host wants to see the guest header as is, we can 2744 * pass it on unchanged. Otherwise, copy just the parts 2745 * that host is interested in. 2746 */ 2747 assert(n->host_hdr_len <= n->guest_hdr_len); 2748 if (n->host_hdr_len != n->guest_hdr_len) { 2749 if (iov_size(out_sg, out_num) < n->guest_hdr_len) { 2750 virtio_error(vdev, "virtio-net header is invalid"); 2751 goto detach; 2752 } 2753 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg), 2754 out_sg, out_num, 2755 0, n->host_hdr_len); 2756 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num, 2757 out_sg, out_num, 2758 n->guest_hdr_len, -1); 2759 out_num = sg_num; 2760 out_sg = sg; 2761 2762 if (out_num < 1) { 2763 virtio_error(vdev, "virtio-net nothing to send"); 2764 goto detach; 2765 } 2766 } 2767 2768 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index), 2769 out_sg, out_num, virtio_net_tx_complete); 2770 if (ret == 0) { 2771 virtio_queue_set_notification(q->tx_vq, 0); 2772 q->async_tx.elem = elem; 2773 return -EBUSY; 2774 } 2775 2776 drop: 2777 virtqueue_push(q->tx_vq, elem, 0); 2778 virtio_notify(vdev, q->tx_vq); 2779 g_free(elem); 2780 2781 if (++num_packets >= n->tx_burst) { 2782 break; 2783 } 2784 } 2785 return num_packets; 2786 2787 detach: 2788 virtqueue_detach_element(q->tx_vq, elem, 0); 2789 g_free(elem); 2790 return -EINVAL; 2791 } 2792 2793 static void virtio_net_tx_timer(void *opaque); 2794 2795 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) 2796 { 2797 VirtIONet *n = VIRTIO_NET(vdev); 2798 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2799 2800 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2801 virtio_net_drop_tx_queue_data(vdev, vq); 2802 return; 2803 } 2804 2805 /* This happens when device was stopped but VCPU wasn't. */ 2806 if (!vdev->vm_running) { 2807 q->tx_waiting = 1; 2808 return; 2809 } 2810 2811 if (q->tx_waiting) { 2812 /* We already have queued packets, immediately flush */ 2813 timer_del(q->tx_timer); 2814 virtio_net_tx_timer(q); 2815 } else { 2816 /* re-arm timer to flush it (and more) on next tick */ 2817 timer_mod(q->tx_timer, 2818 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2819 q->tx_waiting = 1; 2820 virtio_queue_set_notification(vq, 0); 2821 } 2822 } 2823 2824 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) 2825 { 2826 VirtIONet *n = VIRTIO_NET(vdev); 2827 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2828 2829 if (unlikely(n->vhost_started)) { 2830 return; 2831 } 2832 2833 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2834 virtio_net_drop_tx_queue_data(vdev, vq); 2835 return; 2836 } 2837 2838 if (unlikely(q->tx_waiting)) { 2839 return; 2840 } 2841 q->tx_waiting = 1; 2842 /* This happens when device was stopped but VCPU wasn't. */ 2843 if (!vdev->vm_running) { 2844 return; 2845 } 2846 virtio_queue_set_notification(vq, 0); 2847 replay_bh_schedule_event(q->tx_bh); 2848 } 2849 2850 static void virtio_net_tx_timer(void *opaque) 2851 { 2852 VirtIONetQueue *q = opaque; 2853 VirtIONet *n = q->n; 2854 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2855 int ret; 2856 2857 /* This happens when device was stopped but BH wasn't. */ 2858 if (!vdev->vm_running) { 2859 /* Make sure tx waiting is set, so we'll run when restarted. */ 2860 assert(q->tx_waiting); 2861 return; 2862 } 2863 2864 q->tx_waiting = 0; 2865 2866 /* Just in case the driver is not ready on more */ 2867 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2868 return; 2869 } 2870 2871 ret = virtio_net_flush_tx(q); 2872 if (ret == -EBUSY || ret == -EINVAL) { 2873 return; 2874 } 2875 /* 2876 * If we flush a full burst of packets, assume there are 2877 * more coming and immediately rearm 2878 */ 2879 if (ret >= n->tx_burst) { 2880 q->tx_waiting = 1; 2881 timer_mod(q->tx_timer, 2882 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2883 return; 2884 } 2885 /* 2886 * If less than a full burst, re-enable notification and flush 2887 * anything that may have come in while we weren't looking. If 2888 * we find something, assume the guest is still active and rearm 2889 */ 2890 virtio_queue_set_notification(q->tx_vq, 1); 2891 ret = virtio_net_flush_tx(q); 2892 if (ret > 0) { 2893 virtio_queue_set_notification(q->tx_vq, 0); 2894 q->tx_waiting = 1; 2895 timer_mod(q->tx_timer, 2896 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2897 } 2898 } 2899 2900 static void virtio_net_tx_bh(void *opaque) 2901 { 2902 VirtIONetQueue *q = opaque; 2903 VirtIONet *n = q->n; 2904 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2905 int32_t ret; 2906 2907 /* This happens when device was stopped but BH wasn't. */ 2908 if (!vdev->vm_running) { 2909 /* Make sure tx waiting is set, so we'll run when restarted. */ 2910 assert(q->tx_waiting); 2911 return; 2912 } 2913 2914 q->tx_waiting = 0; 2915 2916 /* Just in case the driver is not ready on more */ 2917 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) { 2918 return; 2919 } 2920 2921 ret = virtio_net_flush_tx(q); 2922 if (ret == -EBUSY || ret == -EINVAL) { 2923 return; /* Notification re-enable handled by tx_complete or device 2924 * broken */ 2925 } 2926 2927 /* If we flush a full burst of packets, assume there are 2928 * more coming and immediately reschedule */ 2929 if (ret >= n->tx_burst) { 2930 replay_bh_schedule_event(q->tx_bh); 2931 q->tx_waiting = 1; 2932 return; 2933 } 2934 2935 /* If less than a full burst, re-enable notification and flush 2936 * anything that may have come in while we weren't looking. If 2937 * we find something, assume the guest is still active and reschedule */ 2938 virtio_queue_set_notification(q->tx_vq, 1); 2939 ret = virtio_net_flush_tx(q); 2940 if (ret == -EINVAL) { 2941 return; 2942 } else if (ret > 0) { 2943 virtio_queue_set_notification(q->tx_vq, 0); 2944 replay_bh_schedule_event(q->tx_bh); 2945 q->tx_waiting = 1; 2946 } 2947 } 2948 2949 static void virtio_net_add_queue(VirtIONet *n, int index) 2950 { 2951 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2952 2953 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size, 2954 virtio_net_handle_rx); 2955 2956 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { 2957 n->vqs[index].tx_vq = 2958 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2959 virtio_net_handle_tx_timer); 2960 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 2961 virtio_net_tx_timer, 2962 &n->vqs[index]); 2963 } else { 2964 n->vqs[index].tx_vq = 2965 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2966 virtio_net_handle_tx_bh); 2967 n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index], 2968 &DEVICE(vdev)->mem_reentrancy_guard); 2969 } 2970 2971 n->vqs[index].tx_waiting = 0; 2972 n->vqs[index].n = n; 2973 } 2974 2975 static void virtio_net_del_queue(VirtIONet *n, int index) 2976 { 2977 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2978 VirtIONetQueue *q = &n->vqs[index]; 2979 NetClientState *nc = qemu_get_subqueue(n->nic, index); 2980 2981 qemu_purge_queued_packets(nc); 2982 2983 virtio_del_queue(vdev, index * 2); 2984 if (q->tx_timer) { 2985 timer_free(q->tx_timer); 2986 q->tx_timer = NULL; 2987 } else { 2988 qemu_bh_delete(q->tx_bh); 2989 q->tx_bh = NULL; 2990 } 2991 q->tx_waiting = 0; 2992 virtio_del_queue(vdev, index * 2 + 1); 2993 } 2994 2995 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs) 2996 { 2997 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2998 int old_num_queues = virtio_get_num_queues(vdev); 2999 int new_num_queues = new_max_queue_pairs * 2 + 1; 3000 int i; 3001 3002 assert(old_num_queues >= 3); 3003 assert(old_num_queues % 2 == 1); 3004 3005 if (old_num_queues == new_num_queues) { 3006 return; 3007 } 3008 3009 /* 3010 * We always need to remove and add ctrl vq if 3011 * old_num_queues != new_num_queues. Remove ctrl_vq first, 3012 * and then we only enter one of the following two loops. 3013 */ 3014 virtio_del_queue(vdev, old_num_queues - 1); 3015 3016 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) { 3017 /* new_num_queues < old_num_queues */ 3018 virtio_net_del_queue(n, i / 2); 3019 } 3020 3021 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) { 3022 /* new_num_queues > old_num_queues */ 3023 virtio_net_add_queue(n, i / 2); 3024 } 3025 3026 /* add ctrl_vq last */ 3027 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 3028 } 3029 3030 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) 3031 { 3032 int max = multiqueue ? n->max_queue_pairs : 1; 3033 3034 n->multiqueue = multiqueue; 3035 virtio_net_change_num_queue_pairs(n, max); 3036 3037 virtio_net_set_queue_pairs(n); 3038 } 3039 3040 static int virtio_net_post_load_device(void *opaque, int version_id) 3041 { 3042 VirtIONet *n = opaque; 3043 VirtIODevice *vdev = VIRTIO_DEVICE(n); 3044 int i, link_down; 3045 3046 trace_virtio_net_post_load_device(); 3047 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs, 3048 virtio_vdev_has_feature(vdev, 3049 VIRTIO_F_VERSION_1), 3050 virtio_vdev_has_feature(vdev, 3051 VIRTIO_NET_F_HASH_REPORT)); 3052 3053 /* MAC_TABLE_ENTRIES may be different from the saved image */ 3054 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) { 3055 n->mac_table.in_use = 0; 3056 } 3057 3058 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 3059 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n); 3060 } 3061 3062 /* 3063 * curr_guest_offloads will be later overwritten by the 3064 * virtio_set_features_nocheck call done from the virtio_load. 3065 * Here we make sure it is preserved and restored accordingly 3066 * in the virtio_net_post_load_virtio callback. 3067 */ 3068 n->saved_guest_offloads = n->curr_guest_offloads; 3069 3070 virtio_net_set_queue_pairs(n); 3071 3072 /* Find the first multicast entry in the saved MAC filter */ 3073 for (i = 0; i < n->mac_table.in_use; i++) { 3074 if (n->mac_table.macs[i * ETH_ALEN] & 1) { 3075 break; 3076 } 3077 } 3078 n->mac_table.first_multi = i; 3079 3080 /* nc.link_down can't be migrated, so infer link_down according 3081 * to link status bit in n->status */ 3082 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0; 3083 for (i = 0; i < n->max_queue_pairs; i++) { 3084 qemu_get_subqueue(n->nic, i)->link_down = link_down; 3085 } 3086 3087 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 3088 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3089 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 3090 QEMU_CLOCK_VIRTUAL, 3091 virtio_net_announce_timer, n); 3092 if (n->announce_timer.round) { 3093 timer_mod(n->announce_timer.tm, 3094 qemu_clock_get_ms(n->announce_timer.type)); 3095 } else { 3096 qemu_announce_timer_del(&n->announce_timer, false); 3097 } 3098 } 3099 3100 virtio_net_commit_rss_config(n); 3101 return 0; 3102 } 3103 3104 static int virtio_net_post_load_virtio(VirtIODevice *vdev) 3105 { 3106 VirtIONet *n = VIRTIO_NET(vdev); 3107 /* 3108 * The actual needed state is now in saved_guest_offloads, 3109 * see virtio_net_post_load_device for detail. 3110 * Restore it back and apply the desired offloads. 3111 */ 3112 n->curr_guest_offloads = n->saved_guest_offloads; 3113 if (peer_has_vnet_hdr(n)) { 3114 virtio_net_apply_guest_offloads(n); 3115 } 3116 3117 return 0; 3118 } 3119 3120 /* tx_waiting field of a VirtIONetQueue */ 3121 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = { 3122 .name = "virtio-net-queue-tx_waiting", 3123 .fields = (const VMStateField[]) { 3124 VMSTATE_UINT32(tx_waiting, VirtIONetQueue), 3125 VMSTATE_END_OF_LIST() 3126 }, 3127 }; 3128 3129 static bool max_queue_pairs_gt_1(void *opaque, int version_id) 3130 { 3131 return VIRTIO_NET(opaque)->max_queue_pairs > 1; 3132 } 3133 3134 static bool has_ctrl_guest_offloads(void *opaque, int version_id) 3135 { 3136 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque), 3137 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 3138 } 3139 3140 static bool mac_table_fits(void *opaque, int version_id) 3141 { 3142 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES; 3143 } 3144 3145 static bool mac_table_doesnt_fit(void *opaque, int version_id) 3146 { 3147 return !mac_table_fits(opaque, version_id); 3148 } 3149 3150 /* This temporary type is shared by all the WITH_TMP methods 3151 * although only some fields are used by each. 3152 */ 3153 struct VirtIONetMigTmp { 3154 VirtIONet *parent; 3155 VirtIONetQueue *vqs_1; 3156 uint16_t curr_queue_pairs_1; 3157 uint8_t has_ufo; 3158 uint32_t has_vnet_hdr; 3159 }; 3160 3161 /* The 2nd and subsequent tx_waiting flags are loaded later than 3162 * the 1st entry in the queue_pairs and only if there's more than one 3163 * entry. We use the tmp mechanism to calculate a temporary 3164 * pointer and count and also validate the count. 3165 */ 3166 3167 static int virtio_net_tx_waiting_pre_save(void *opaque) 3168 { 3169 struct VirtIONetMigTmp *tmp = opaque; 3170 3171 tmp->vqs_1 = tmp->parent->vqs + 1; 3172 tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1; 3173 if (tmp->parent->curr_queue_pairs == 0) { 3174 tmp->curr_queue_pairs_1 = 0; 3175 } 3176 3177 return 0; 3178 } 3179 3180 static int virtio_net_tx_waiting_pre_load(void *opaque) 3181 { 3182 struct VirtIONetMigTmp *tmp = opaque; 3183 3184 /* Reuse the pointer setup from save */ 3185 virtio_net_tx_waiting_pre_save(opaque); 3186 3187 if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) { 3188 error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x", 3189 tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs); 3190 3191 return -EINVAL; 3192 } 3193 3194 return 0; /* all good */ 3195 } 3196 3197 static const VMStateDescription vmstate_virtio_net_tx_waiting = { 3198 .name = "virtio-net-tx_waiting", 3199 .pre_load = virtio_net_tx_waiting_pre_load, 3200 .pre_save = virtio_net_tx_waiting_pre_save, 3201 .fields = (const VMStateField[]) { 3202 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp, 3203 curr_queue_pairs_1, 3204 vmstate_virtio_net_queue_tx_waiting, 3205 struct VirtIONetQueue), 3206 VMSTATE_END_OF_LIST() 3207 }, 3208 }; 3209 3210 /* the 'has_ufo' flag is just tested; if the incoming stream has the 3211 * flag set we need to check that we have it 3212 */ 3213 static int virtio_net_ufo_post_load(void *opaque, int version_id) 3214 { 3215 struct VirtIONetMigTmp *tmp = opaque; 3216 3217 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) { 3218 error_report("virtio-net: saved image requires TUN_F_UFO support"); 3219 return -EINVAL; 3220 } 3221 3222 return 0; 3223 } 3224 3225 static int virtio_net_ufo_pre_save(void *opaque) 3226 { 3227 struct VirtIONetMigTmp *tmp = opaque; 3228 3229 tmp->has_ufo = tmp->parent->has_ufo; 3230 3231 return 0; 3232 } 3233 3234 static const VMStateDescription vmstate_virtio_net_has_ufo = { 3235 .name = "virtio-net-ufo", 3236 .post_load = virtio_net_ufo_post_load, 3237 .pre_save = virtio_net_ufo_pre_save, 3238 .fields = (const VMStateField[]) { 3239 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp), 3240 VMSTATE_END_OF_LIST() 3241 }, 3242 }; 3243 3244 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the 3245 * flag set we need to check that we have it 3246 */ 3247 static int virtio_net_vnet_post_load(void *opaque, int version_id) 3248 { 3249 struct VirtIONetMigTmp *tmp = opaque; 3250 3251 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) { 3252 error_report("virtio-net: saved image requires vnet_hdr=on"); 3253 return -EINVAL; 3254 } 3255 3256 return 0; 3257 } 3258 3259 static int virtio_net_vnet_pre_save(void *opaque) 3260 { 3261 struct VirtIONetMigTmp *tmp = opaque; 3262 3263 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr; 3264 3265 return 0; 3266 } 3267 3268 static const VMStateDescription vmstate_virtio_net_has_vnet = { 3269 .name = "virtio-net-vnet", 3270 .post_load = virtio_net_vnet_post_load, 3271 .pre_save = virtio_net_vnet_pre_save, 3272 .fields = (const VMStateField[]) { 3273 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp), 3274 VMSTATE_END_OF_LIST() 3275 }, 3276 }; 3277 3278 static bool virtio_net_rss_needed(void *opaque) 3279 { 3280 return VIRTIO_NET(opaque)->rss_data.enabled; 3281 } 3282 3283 static const VMStateDescription vmstate_virtio_net_rss = { 3284 .name = "virtio-net-device/rss", 3285 .version_id = 1, 3286 .minimum_version_id = 1, 3287 .needed = virtio_net_rss_needed, 3288 .fields = (const VMStateField[]) { 3289 VMSTATE_BOOL(rss_data.enabled, VirtIONet), 3290 VMSTATE_BOOL(rss_data.redirect, VirtIONet), 3291 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet), 3292 VMSTATE_UINT32(rss_data.hash_types, VirtIONet), 3293 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet), 3294 VMSTATE_UINT16(rss_data.default_queue, VirtIONet), 3295 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet, 3296 VIRTIO_NET_RSS_MAX_KEY_SIZE), 3297 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet, 3298 rss_data.indirections_len, 0, 3299 vmstate_info_uint16, uint16_t), 3300 VMSTATE_END_OF_LIST() 3301 }, 3302 }; 3303 3304 static const VMStateDescription vmstate_virtio_net_device = { 3305 .name = "virtio-net-device", 3306 .version_id = VIRTIO_NET_VM_VERSION, 3307 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3308 .post_load = virtio_net_post_load_device, 3309 .fields = (const VMStateField[]) { 3310 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN), 3311 VMSTATE_STRUCT_POINTER(vqs, VirtIONet, 3312 vmstate_virtio_net_queue_tx_waiting, 3313 VirtIONetQueue), 3314 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet), 3315 VMSTATE_UINT16(status, VirtIONet), 3316 VMSTATE_UINT8(promisc, VirtIONet), 3317 VMSTATE_UINT8(allmulti, VirtIONet), 3318 VMSTATE_UINT32(mac_table.in_use, VirtIONet), 3319 3320 /* Guarded pair: If it fits we load it, else we throw it away 3321 * - can happen if source has a larger MAC table.; post-load 3322 * sets flags in this case. 3323 */ 3324 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet, 3325 0, mac_table_fits, mac_table.in_use, 3326 ETH_ALEN), 3327 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0, 3328 mac_table.in_use, ETH_ALEN), 3329 3330 /* Note: This is an array of uint32's that's always been saved as a 3331 * buffer; hold onto your endiannesses; it's actually used as a bitmap 3332 * but based on the uint. 3333 */ 3334 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3), 3335 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3336 vmstate_virtio_net_has_vnet), 3337 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet), 3338 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet), 3339 VMSTATE_UINT8(alluni, VirtIONet), 3340 VMSTATE_UINT8(nomulti, VirtIONet), 3341 VMSTATE_UINT8(nouni, VirtIONet), 3342 VMSTATE_UINT8(nobcast, VirtIONet), 3343 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3344 vmstate_virtio_net_has_ufo), 3345 VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0, 3346 vmstate_info_uint16_equal, uint16_t), 3347 VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1), 3348 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3349 vmstate_virtio_net_tx_waiting), 3350 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet, 3351 has_ctrl_guest_offloads), 3352 VMSTATE_END_OF_LIST() 3353 }, 3354 .subsections = (const VMStateDescription * const []) { 3355 &vmstate_virtio_net_rss, 3356 NULL 3357 } 3358 }; 3359 3360 static NetClientInfo net_virtio_info = { 3361 .type = NET_CLIENT_DRIVER_NIC, 3362 .size = sizeof(NICState), 3363 .can_receive = virtio_net_can_receive, 3364 .receive = virtio_net_receive, 3365 .link_status_changed = virtio_net_set_link_status, 3366 .query_rx_filter = virtio_net_query_rxfilter, 3367 .announce = virtio_net_announce, 3368 }; 3369 3370 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) 3371 { 3372 VirtIONet *n = VIRTIO_NET(vdev); 3373 NetClientState *nc; 3374 assert(n->vhost_started); 3375 if (!n->multiqueue && idx == 2) { 3376 /* Must guard against invalid features and bogus queue index 3377 * from being set by malicious guest, or penetrated through 3378 * buggy migration stream. 3379 */ 3380 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3381 qemu_log_mask(LOG_GUEST_ERROR, 3382 "%s: bogus vq index ignored\n", __func__); 3383 return false; 3384 } 3385 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3386 } else { 3387 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3388 } 3389 /* 3390 * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 3391 * as the macro of configure interrupt's IDX, If this driver does not 3392 * support, the function will return false 3393 */ 3394 3395 if (idx == VIRTIO_CONFIG_IRQ_IDX) { 3396 return vhost_net_config_pending(get_vhost_net(nc->peer)); 3397 } 3398 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); 3399 } 3400 3401 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, 3402 bool mask) 3403 { 3404 VirtIONet *n = VIRTIO_NET(vdev); 3405 NetClientState *nc; 3406 assert(n->vhost_started); 3407 if (!n->multiqueue && idx == 2) { 3408 /* Must guard against invalid features and bogus queue index 3409 * from being set by malicious guest, or penetrated through 3410 * buggy migration stream. 3411 */ 3412 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3413 qemu_log_mask(LOG_GUEST_ERROR, 3414 "%s: bogus vq index ignored\n", __func__); 3415 return; 3416 } 3417 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3418 } else { 3419 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3420 } 3421 /* 3422 *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 3423 * as the macro of configure interrupt's IDX, If this driver does not 3424 * support, the function will return 3425 */ 3426 3427 if (idx == VIRTIO_CONFIG_IRQ_IDX) { 3428 vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask); 3429 return; 3430 } 3431 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask); 3432 } 3433 3434 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) 3435 { 3436 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC); 3437 3438 n->config_size = virtio_get_config_size(&cfg_size_params, host_features); 3439 } 3440 3441 void virtio_net_set_netclient_name(VirtIONet *n, const char *name, 3442 const char *type) 3443 { 3444 /* 3445 * The name can be NULL, the netclient name will be type.x. 3446 */ 3447 assert(type != NULL); 3448 3449 g_free(n->netclient_name); 3450 g_free(n->netclient_type); 3451 n->netclient_name = g_strdup(name); 3452 n->netclient_type = g_strdup(type); 3453 } 3454 3455 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev) 3456 { 3457 HotplugHandler *hotplug_ctrl; 3458 PCIDevice *pci_dev; 3459 Error *err = NULL; 3460 3461 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3462 if (hotplug_ctrl) { 3463 pci_dev = PCI_DEVICE(dev); 3464 pci_dev->partially_hotplugged = true; 3465 hotplug_handler_unplug_request(hotplug_ctrl, dev, &err); 3466 if (err) { 3467 error_report_err(err); 3468 return false; 3469 } 3470 } else { 3471 return false; 3472 } 3473 return true; 3474 } 3475 3476 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev, 3477 Error **errp) 3478 { 3479 Error *err = NULL; 3480 HotplugHandler *hotplug_ctrl; 3481 PCIDevice *pdev = PCI_DEVICE(dev); 3482 BusState *primary_bus; 3483 3484 if (!pdev->partially_hotplugged) { 3485 return true; 3486 } 3487 primary_bus = dev->parent_bus; 3488 if (!primary_bus) { 3489 error_setg(errp, "virtio_net: couldn't find primary bus"); 3490 return false; 3491 } 3492 qdev_set_parent_bus(dev, primary_bus, &error_abort); 3493 qatomic_set(&n->failover_primary_hidden, false); 3494 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3495 if (hotplug_ctrl) { 3496 hotplug_handler_pre_plug(hotplug_ctrl, dev, &err); 3497 if (err) { 3498 goto out; 3499 } 3500 hotplug_handler_plug(hotplug_ctrl, dev, &err); 3501 } 3502 pdev->partially_hotplugged = false; 3503 3504 out: 3505 error_propagate(errp, err); 3506 return !err; 3507 } 3508 3509 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationEvent *e) 3510 { 3511 bool should_be_hidden; 3512 Error *err = NULL; 3513 DeviceState *dev = failover_find_primary_device(n); 3514 3515 if (!dev) { 3516 return; 3517 } 3518 3519 should_be_hidden = qatomic_read(&n->failover_primary_hidden); 3520 3521 if (e->type == MIG_EVENT_PRECOPY_SETUP && !should_be_hidden) { 3522 if (failover_unplug_primary(n, dev)) { 3523 vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev); 3524 qapi_event_send_unplug_primary(dev->id); 3525 qatomic_set(&n->failover_primary_hidden, true); 3526 } else { 3527 warn_report("couldn't unplug primary device"); 3528 } 3529 } else if (e->type == MIG_EVENT_PRECOPY_FAILED) { 3530 /* We already unplugged the device let's plug it back */ 3531 if (!failover_replug_primary(n, dev, &err)) { 3532 if (err) { 3533 error_report_err(err); 3534 } 3535 } 3536 } 3537 } 3538 3539 static int virtio_net_migration_state_notifier(NotifierWithReturn *notifier, 3540 MigrationEvent *e, Error **errp) 3541 { 3542 VirtIONet *n = container_of(notifier, VirtIONet, migration_state); 3543 virtio_net_handle_migration_primary(n, e); 3544 return 0; 3545 } 3546 3547 static bool failover_hide_primary_device(DeviceListener *listener, 3548 const QDict *device_opts, 3549 bool from_json, 3550 Error **errp) 3551 { 3552 VirtIONet *n = container_of(listener, VirtIONet, primary_listener); 3553 const char *standby_id; 3554 3555 if (!device_opts) { 3556 return false; 3557 } 3558 3559 if (!qdict_haskey(device_opts, "failover_pair_id")) { 3560 return false; 3561 } 3562 3563 if (!qdict_haskey(device_opts, "id")) { 3564 error_setg(errp, "Device with failover_pair_id needs to have id"); 3565 return false; 3566 } 3567 3568 standby_id = qdict_get_str(device_opts, "failover_pair_id"); 3569 if (g_strcmp0(standby_id, n->netclient_name) != 0) { 3570 return false; 3571 } 3572 3573 /* 3574 * The hide helper can be called several times for a given device. 3575 * Check there is only one primary for a virtio-net device but 3576 * don't duplicate the qdict several times if it's called for the same 3577 * device. 3578 */ 3579 if (n->primary_opts) { 3580 const char *old, *new; 3581 /* devices with failover_pair_id always have an id */ 3582 old = qdict_get_str(n->primary_opts, "id"); 3583 new = qdict_get_str(device_opts, "id"); 3584 if (strcmp(old, new) != 0) { 3585 error_setg(errp, "Cannot attach more than one primary device to " 3586 "'%s': '%s' and '%s'", n->netclient_name, old, new); 3587 return false; 3588 } 3589 } else { 3590 n->primary_opts = qdict_clone_shallow(device_opts); 3591 n->primary_opts_from_json = from_json; 3592 } 3593 3594 /* failover_primary_hidden is set during feature negotiation */ 3595 return qatomic_read(&n->failover_primary_hidden); 3596 } 3597 3598 static void virtio_net_device_realize(DeviceState *dev, Error **errp) 3599 { 3600 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3601 VirtIONet *n = VIRTIO_NET(dev); 3602 NetClientState *nc; 3603 int i; 3604 3605 if (n->net_conf.mtu) { 3606 n->host_features |= (1ULL << VIRTIO_NET_F_MTU); 3607 } 3608 3609 if (n->net_conf.duplex_str) { 3610 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) { 3611 n->net_conf.duplex = DUPLEX_HALF; 3612 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) { 3613 n->net_conf.duplex = DUPLEX_FULL; 3614 } else { 3615 error_setg(errp, "'duplex' must be 'half' or 'full'"); 3616 return; 3617 } 3618 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3619 } else { 3620 n->net_conf.duplex = DUPLEX_UNKNOWN; 3621 } 3622 3623 if (n->net_conf.speed < SPEED_UNKNOWN) { 3624 error_setg(errp, "'speed' must be between 0 and INT_MAX"); 3625 return; 3626 } 3627 if (n->net_conf.speed >= 0) { 3628 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3629 } 3630 3631 if (n->failover) { 3632 n->primary_listener.hide_device = failover_hide_primary_device; 3633 qatomic_set(&n->failover_primary_hidden, true); 3634 device_listener_register(&n->primary_listener); 3635 migration_add_notifier(&n->migration_state, 3636 virtio_net_migration_state_notifier); 3637 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY); 3638 } 3639 3640 virtio_net_set_config_size(n, n->host_features); 3641 virtio_init(vdev, VIRTIO_ID_NET, n->config_size); 3642 3643 /* 3644 * We set a lower limit on RX queue size to what it always was. 3645 * Guests that want a smaller ring can always resize it without 3646 * help from us (using virtio 1 and up). 3647 */ 3648 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || 3649 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || 3650 !is_power_of_2(n->net_conf.rx_queue_size)) { 3651 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " 3652 "must be a power of 2 between %d and %d.", 3653 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, 3654 VIRTQUEUE_MAX_SIZE); 3655 virtio_cleanup(vdev); 3656 return; 3657 } 3658 3659 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || 3660 n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) || 3661 !is_power_of_2(n->net_conf.tx_queue_size)) { 3662 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " 3663 "must be a power of 2 between %d and %d", 3664 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, 3665 virtio_net_max_tx_queue_size(n)); 3666 virtio_cleanup(vdev); 3667 return; 3668 } 3669 3670 n->max_ncs = MAX(n->nic_conf.peers.queues, 1); 3671 3672 /* 3673 * Figure out the datapath queue pairs since the backend could 3674 * provide control queue via peers as well. 3675 */ 3676 if (n->nic_conf.peers.queues) { 3677 for (i = 0; i < n->max_ncs; i++) { 3678 if (n->nic_conf.peers.ncs[i]->is_datapath) { 3679 ++n->max_queue_pairs; 3680 } 3681 } 3682 } 3683 n->max_queue_pairs = MAX(n->max_queue_pairs, 1); 3684 3685 if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) { 3686 error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), " 3687 "must be a positive integer less than %d.", 3688 n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2); 3689 virtio_cleanup(vdev); 3690 return; 3691 } 3692 n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs); 3693 n->curr_queue_pairs = 1; 3694 n->tx_timeout = n->net_conf.txtimer; 3695 3696 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer") 3697 && strcmp(n->net_conf.tx, "bh")) { 3698 warn_report("virtio-net: " 3699 "Unknown option tx=%s, valid options: \"timer\" \"bh\"", 3700 n->net_conf.tx); 3701 error_printf("Defaulting to \"bh\""); 3702 } 3703 3704 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), 3705 n->net_conf.tx_queue_size); 3706 3707 virtio_net_add_queue(n, 0); 3708 3709 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 3710 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr); 3711 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac)); 3712 n->status = VIRTIO_NET_S_LINK_UP; 3713 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 3714 QEMU_CLOCK_VIRTUAL, 3715 virtio_net_announce_timer, n); 3716 n->announce_timer.round = 0; 3717 3718 if (n->netclient_type) { 3719 /* 3720 * Happen when virtio_net_set_netclient_name has been called. 3721 */ 3722 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3723 n->netclient_type, n->netclient_name, 3724 &dev->mem_reentrancy_guard, n); 3725 } else { 3726 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3727 object_get_typename(OBJECT(dev)), dev->id, 3728 &dev->mem_reentrancy_guard, n); 3729 } 3730 3731 for (i = 0; i < n->max_queue_pairs; i++) { 3732 n->nic->ncs[i].do_not_pad = true; 3733 } 3734 3735 peer_test_vnet_hdr(n); 3736 if (peer_has_vnet_hdr(n)) { 3737 n->host_hdr_len = sizeof(struct virtio_net_hdr); 3738 } else { 3739 n->host_hdr_len = 0; 3740 } 3741 3742 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a); 3743 3744 n->vqs[0].tx_waiting = 0; 3745 n->tx_burst = n->net_conf.txburst; 3746 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0); 3747 n->promisc = 1; /* for compatibility */ 3748 3749 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 3750 3751 n->vlans = g_malloc0(MAX_VLAN >> 3); 3752 3753 nc = qemu_get_queue(n->nic); 3754 nc->rxfilter_notify_enabled = 1; 3755 3756 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 3757 struct virtio_net_config netcfg = {}; 3758 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN); 3759 vhost_net_set_config(get_vhost_net(nc->peer), 3760 (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND); 3761 } 3762 QTAILQ_INIT(&n->rsc_chains); 3763 n->qdev = dev; 3764 3765 net_rx_pkt_init(&n->rx_pkt); 3766 3767 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3768 Error *err = NULL; 3769 if (!virtio_net_load_ebpf(n, &err)) { 3770 /* 3771 * If user explicitly gave QEMU RSS FDs to use, then 3772 * failing to use them must be considered a fatal 3773 * error. If no RSS FDs were provided, QEMU is trying 3774 * eBPF on a "best effort" basis only, so report a 3775 * warning and allow fallback to software RSS. 3776 */ 3777 if (n->ebpf_rss_fds) { 3778 error_propagate(errp, err); 3779 } else { 3780 warn_report("unable to load eBPF RSS: %s", 3781 error_get_pretty(err)); 3782 error_free(err); 3783 } 3784 } 3785 } 3786 } 3787 3788 static void virtio_net_device_unrealize(DeviceState *dev) 3789 { 3790 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3791 VirtIONet *n = VIRTIO_NET(dev); 3792 int i, max_queue_pairs; 3793 3794 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3795 virtio_net_unload_ebpf(n); 3796 } 3797 3798 /* This will stop vhost backend if appropriate. */ 3799 virtio_net_set_status(vdev, 0); 3800 3801 g_free(n->netclient_name); 3802 n->netclient_name = NULL; 3803 g_free(n->netclient_type); 3804 n->netclient_type = NULL; 3805 3806 g_free(n->mac_table.macs); 3807 g_free(n->vlans); 3808 3809 if (n->failover) { 3810 qobject_unref(n->primary_opts); 3811 device_listener_unregister(&n->primary_listener); 3812 migration_remove_notifier(&n->migration_state); 3813 } else { 3814 assert(n->primary_opts == NULL); 3815 } 3816 3817 max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 3818 for (i = 0; i < max_queue_pairs; i++) { 3819 virtio_net_del_queue(n, i); 3820 } 3821 /* delete also control vq */ 3822 virtio_del_queue(vdev, max_queue_pairs * 2); 3823 qemu_announce_timer_del(&n->announce_timer, false); 3824 g_free(n->vqs); 3825 qemu_del_nic(n->nic); 3826 virtio_net_rsc_cleanup(n); 3827 g_free(n->rss_data.indirections_table); 3828 net_rx_pkt_uninit(n->rx_pkt); 3829 virtio_cleanup(vdev); 3830 } 3831 3832 static void virtio_net_reset(VirtIODevice *vdev) 3833 { 3834 VirtIONet *n = VIRTIO_NET(vdev); 3835 int i; 3836 3837 /* Reset back to compatibility mode */ 3838 n->promisc = 1; 3839 n->allmulti = 0; 3840 n->alluni = 0; 3841 n->nomulti = 0; 3842 n->nouni = 0; 3843 n->nobcast = 0; 3844 /* multiqueue is disabled by default */ 3845 n->curr_queue_pairs = 1; 3846 timer_del(n->announce_timer.tm); 3847 n->announce_timer.round = 0; 3848 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 3849 3850 /* Flush any MAC and VLAN filter table state */ 3851 n->mac_table.in_use = 0; 3852 n->mac_table.first_multi = 0; 3853 n->mac_table.multi_overflow = 0; 3854 n->mac_table.uni_overflow = 0; 3855 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN); 3856 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac)); 3857 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 3858 memset(n->vlans, 0, MAX_VLAN >> 3); 3859 3860 /* Flush any async TX */ 3861 for (i = 0; i < n->max_queue_pairs; i++) { 3862 flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i)); 3863 } 3864 3865 virtio_net_disable_rss(n); 3866 } 3867 3868 static void virtio_net_instance_init(Object *obj) 3869 { 3870 VirtIONet *n = VIRTIO_NET(obj); 3871 3872 /* 3873 * The default config_size is sizeof(struct virtio_net_config). 3874 * Can be overridden with virtio_net_set_config_size. 3875 */ 3876 n->config_size = sizeof(struct virtio_net_config); 3877 device_add_bootindex_property(obj, &n->nic_conf.bootindex, 3878 "bootindex", "/ethernet-phy@0", 3879 DEVICE(n)); 3880 3881 ebpf_rss_init(&n->ebpf_rss); 3882 } 3883 3884 static int virtio_net_pre_save(void *opaque) 3885 { 3886 VirtIONet *n = opaque; 3887 3888 /* At this point, backend must be stopped, otherwise 3889 * it might keep writing to memory. */ 3890 assert(!n->vhost_started); 3891 3892 return 0; 3893 } 3894 3895 static bool primary_unplug_pending(void *opaque) 3896 { 3897 DeviceState *dev = opaque; 3898 DeviceState *primary; 3899 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3900 VirtIONet *n = VIRTIO_NET(vdev); 3901 3902 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 3903 return false; 3904 } 3905 primary = failover_find_primary_device(n); 3906 return primary ? primary->pending_deleted_event : false; 3907 } 3908 3909 static bool dev_unplug_pending(void *opaque) 3910 { 3911 DeviceState *dev = opaque; 3912 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 3913 3914 return vdc->primary_unplug_pending(dev); 3915 } 3916 3917 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev) 3918 { 3919 VirtIONet *n = VIRTIO_NET(vdev); 3920 NetClientState *nc; 3921 struct vhost_net *net; 3922 3923 if (!n->nic) { 3924 return NULL; 3925 } 3926 3927 nc = qemu_get_queue(n->nic); 3928 if (!nc) { 3929 return NULL; 3930 } 3931 3932 net = get_vhost_net(nc->peer); 3933 if (!net) { 3934 return NULL; 3935 } 3936 3937 return &net->dev; 3938 } 3939 3940 static const VMStateDescription vmstate_virtio_net = { 3941 .name = "virtio-net", 3942 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3943 .version_id = VIRTIO_NET_VM_VERSION, 3944 .fields = (const VMStateField[]) { 3945 VMSTATE_VIRTIO_DEVICE, 3946 VMSTATE_END_OF_LIST() 3947 }, 3948 .pre_save = virtio_net_pre_save, 3949 .dev_unplug_pending = dev_unplug_pending, 3950 }; 3951 3952 static Property virtio_net_properties[] = { 3953 DEFINE_PROP_BIT64("csum", VirtIONet, host_features, 3954 VIRTIO_NET_F_CSUM, true), 3955 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features, 3956 VIRTIO_NET_F_GUEST_CSUM, true), 3957 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), 3958 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features, 3959 VIRTIO_NET_F_GUEST_TSO4, true), 3960 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features, 3961 VIRTIO_NET_F_GUEST_TSO6, true), 3962 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features, 3963 VIRTIO_NET_F_GUEST_ECN, true), 3964 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features, 3965 VIRTIO_NET_F_GUEST_UFO, true), 3966 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features, 3967 VIRTIO_NET_F_GUEST_ANNOUNCE, true), 3968 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features, 3969 VIRTIO_NET_F_HOST_TSO4, true), 3970 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features, 3971 VIRTIO_NET_F_HOST_TSO6, true), 3972 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features, 3973 VIRTIO_NET_F_HOST_ECN, true), 3974 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features, 3975 VIRTIO_NET_F_HOST_UFO, true), 3976 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features, 3977 VIRTIO_NET_F_MRG_RXBUF, true), 3978 DEFINE_PROP_BIT64("status", VirtIONet, host_features, 3979 VIRTIO_NET_F_STATUS, true), 3980 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features, 3981 VIRTIO_NET_F_CTRL_VQ, true), 3982 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features, 3983 VIRTIO_NET_F_CTRL_RX, true), 3984 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features, 3985 VIRTIO_NET_F_CTRL_VLAN, true), 3986 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features, 3987 VIRTIO_NET_F_CTRL_RX_EXTRA, true), 3988 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features, 3989 VIRTIO_NET_F_CTRL_MAC_ADDR, true), 3990 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features, 3991 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), 3992 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), 3993 DEFINE_PROP_BIT64("rss", VirtIONet, host_features, 3994 VIRTIO_NET_F_RSS, false), 3995 DEFINE_PROP_BIT64("hash", VirtIONet, host_features, 3996 VIRTIO_NET_F_HASH_REPORT, false), 3997 DEFINE_PROP_ARRAY("ebpf-rss-fds", VirtIONet, nr_ebpf_rss_fds, 3998 ebpf_rss_fds, qdev_prop_string, char*), 3999 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features, 4000 VIRTIO_NET_F_RSC_EXT, false), 4001 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout, 4002 VIRTIO_NET_RSC_DEFAULT_INTERVAL), 4003 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf), 4004 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer, 4005 TX_TIMER_INTERVAL), 4006 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), 4007 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), 4008 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, 4009 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), 4010 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, 4011 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), 4012 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), 4013 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, 4014 true), 4015 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN), 4016 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str), 4017 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false), 4018 DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features, 4019 VIRTIO_NET_F_GUEST_USO4, true), 4020 DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features, 4021 VIRTIO_NET_F_GUEST_USO6, true), 4022 DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features, 4023 VIRTIO_NET_F_HOST_USO, true), 4024 DEFINE_PROP_END_OF_LIST(), 4025 }; 4026 4027 static void virtio_net_class_init(ObjectClass *klass, void *data) 4028 { 4029 DeviceClass *dc = DEVICE_CLASS(klass); 4030 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 4031 4032 device_class_set_props(dc, virtio_net_properties); 4033 dc->vmsd = &vmstate_virtio_net; 4034 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 4035 vdc->realize = virtio_net_device_realize; 4036 vdc->unrealize = virtio_net_device_unrealize; 4037 vdc->get_config = virtio_net_get_config; 4038 vdc->set_config = virtio_net_set_config; 4039 vdc->get_features = virtio_net_get_features; 4040 vdc->set_features = virtio_net_set_features; 4041 vdc->bad_features = virtio_net_bad_features; 4042 vdc->reset = virtio_net_reset; 4043 vdc->queue_reset = virtio_net_queue_reset; 4044 vdc->queue_enable = virtio_net_queue_enable; 4045 vdc->set_status = virtio_net_set_status; 4046 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; 4047 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; 4048 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO); 4049 vdc->post_load = virtio_net_post_load_virtio; 4050 vdc->vmsd = &vmstate_virtio_net_device; 4051 vdc->primary_unplug_pending = primary_unplug_pending; 4052 vdc->get_vhost = virtio_net_get_vhost; 4053 vdc->toggle_device_iotlb = vhost_toggle_device_iotlb; 4054 } 4055 4056 static const TypeInfo virtio_net_info = { 4057 .name = TYPE_VIRTIO_NET, 4058 .parent = TYPE_VIRTIO_DEVICE, 4059 .instance_size = sizeof(VirtIONet), 4060 .instance_init = virtio_net_instance_init, 4061 .class_init = virtio_net_class_init, 4062 }; 4063 4064 static void virtio_register_types(void) 4065 { 4066 type_register_static(&virtio_net_info); 4067 } 4068 4069 type_init(virtio_register_types) 4070