1 /* 2 * Virtio Network Device 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qemu/atomic.h" 16 #include "qemu/iov.h" 17 #include "qemu/log.h" 18 #include "qemu/main-loop.h" 19 #include "qemu/module.h" 20 #include "hw/virtio/virtio.h" 21 #include "net/net.h" 22 #include "net/checksum.h" 23 #include "net/tap.h" 24 #include "qemu/error-report.h" 25 #include "qemu/timer.h" 26 #include "qemu/option.h" 27 #include "qemu/option_int.h" 28 #include "qemu/config-file.h" 29 #include "qapi/qmp/qdict.h" 30 #include "hw/virtio/virtio-net.h" 31 #include "net/vhost_net.h" 32 #include "net/announce.h" 33 #include "hw/virtio/virtio-bus.h" 34 #include "qapi/error.h" 35 #include "qapi/qapi-events-net.h" 36 #include "hw/qdev-properties.h" 37 #include "qapi/qapi-types-migration.h" 38 #include "qapi/qapi-events-migration.h" 39 #include "hw/virtio/virtio-access.h" 40 #include "migration/misc.h" 41 #include "standard-headers/linux/ethtool.h" 42 #include "sysemu/sysemu.h" 43 #include "trace.h" 44 #include "monitor/qdev.h" 45 #include "hw/pci/pci_device.h" 46 #include "net_rx_pkt.h" 47 #include "hw/virtio/vhost.h" 48 #include "sysemu/qtest.h" 49 50 #define VIRTIO_NET_VM_VERSION 11 51 52 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ 53 54 /* previously fixed value */ 55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 57 58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */ 59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 61 62 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ 63 64 #define VIRTIO_NET_TCP_FLAG 0x3F 65 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000 66 67 /* IPv4 max payload, 16 bits in the header */ 68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header)) 69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535 70 71 /* header length value in ip header without option */ 72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5 73 74 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ 75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD 76 77 /* Purge coalesced packets timer interval, This value affects the performance 78 a lot, and should be tuned carefully, '300000'(300us) is the recommended 79 value to pass the WHQL test, '50000' can gain 2x netperf throughput with 80 tso/gso/gro 'off'. */ 81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000 82 83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \ 84 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \ 85 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \ 86 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \ 87 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \ 88 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \ 89 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \ 90 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \ 91 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) 92 93 static const VirtIOFeature feature_sizes[] = { 94 {.flags = 1ULL << VIRTIO_NET_F_MAC, 95 .end = endof(struct virtio_net_config, mac)}, 96 {.flags = 1ULL << VIRTIO_NET_F_STATUS, 97 .end = endof(struct virtio_net_config, status)}, 98 {.flags = 1ULL << VIRTIO_NET_F_MQ, 99 .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, 100 {.flags = 1ULL << VIRTIO_NET_F_MTU, 101 .end = endof(struct virtio_net_config, mtu)}, 102 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, 103 .end = endof(struct virtio_net_config, duplex)}, 104 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT), 105 .end = endof(struct virtio_net_config, supported_hash_types)}, 106 {} 107 }; 108 109 static const VirtIOConfigSizeParams cfg_size_params = { 110 .min_size = endof(struct virtio_net_config, mac), 111 .max_size = sizeof(struct virtio_net_config), 112 .feature_sizes = feature_sizes 113 }; 114 115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc) 116 { 117 VirtIONet *n = qemu_get_nic_opaque(nc); 118 119 return &n->vqs[nc->queue_index]; 120 } 121 122 static int vq2q(int queue_index) 123 { 124 return queue_index / 2; 125 } 126 127 static void flush_or_purge_queued_packets(NetClientState *nc) 128 { 129 if (!nc->peer) { 130 return; 131 } 132 133 qemu_flush_or_purge_queued_packets(nc->peer, true); 134 assert(!virtio_net_get_subqueue(nc)->async_tx.elem); 135 } 136 137 /* TODO 138 * - we could suppress RX interrupt if we were so inclined. 139 */ 140 141 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) 142 { 143 VirtIONet *n = VIRTIO_NET(vdev); 144 struct virtio_net_config netcfg; 145 NetClientState *nc = qemu_get_queue(n->nic); 146 static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } }; 147 148 int ret = 0; 149 memset(&netcfg, 0 , sizeof(struct virtio_net_config)); 150 virtio_stw_p(vdev, &netcfg.status, n->status); 151 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs); 152 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); 153 memcpy(netcfg.mac, n->mac, ETH_ALEN); 154 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); 155 netcfg.duplex = n->net_conf.duplex; 156 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE; 157 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length, 158 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ? 159 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1); 160 virtio_stl_p(vdev, &netcfg.supported_hash_types, 161 VIRTIO_NET_RSS_SUPPORTED_HASHES); 162 memcpy(config, &netcfg, n->config_size); 163 164 /* 165 * Is this VDPA? No peer means not VDPA: there's no way to 166 * disconnect/reconnect a VDPA peer. 167 */ 168 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 169 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg, 170 n->config_size); 171 if (ret == -1) { 172 return; 173 } 174 175 /* 176 * Some NIC/kernel combinations present 0 as the mac address. As that 177 * is not a legal address, try to proceed with the address from the 178 * QEMU command line in the hope that the address has been configured 179 * correctly elsewhere - just not reported by the device. 180 */ 181 if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) { 182 info_report("Zero hardware mac address detected. Ignoring."); 183 memcpy(netcfg.mac, n->mac, ETH_ALEN); 184 } 185 186 netcfg.status |= virtio_tswap16(vdev, 187 n->status & VIRTIO_NET_S_ANNOUNCE); 188 memcpy(config, &netcfg, n->config_size); 189 } 190 } 191 192 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) 193 { 194 VirtIONet *n = VIRTIO_NET(vdev); 195 struct virtio_net_config netcfg = {}; 196 NetClientState *nc = qemu_get_queue(n->nic); 197 198 memcpy(&netcfg, config, n->config_size); 199 200 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) && 201 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) && 202 memcmp(netcfg.mac, n->mac, ETH_ALEN)) { 203 memcpy(n->mac, netcfg.mac, ETH_ALEN); 204 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 205 } 206 207 /* 208 * Is this VDPA? No peer means not VDPA: there's no way to 209 * disconnect/reconnect a VDPA peer. 210 */ 211 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 212 vhost_net_set_config(get_vhost_net(nc->peer), 213 (uint8_t *)&netcfg, 0, n->config_size, 214 VHOST_SET_CONFIG_TYPE_MASTER); 215 } 216 } 217 218 static bool virtio_net_started(VirtIONet *n, uint8_t status) 219 { 220 VirtIODevice *vdev = VIRTIO_DEVICE(n); 221 return (status & VIRTIO_CONFIG_S_DRIVER_OK) && 222 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running; 223 } 224 225 static void virtio_net_announce_notify(VirtIONet *net) 226 { 227 VirtIODevice *vdev = VIRTIO_DEVICE(net); 228 trace_virtio_net_announce_notify(); 229 230 net->status |= VIRTIO_NET_S_ANNOUNCE; 231 virtio_notify_config(vdev); 232 } 233 234 static void virtio_net_announce_timer(void *opaque) 235 { 236 VirtIONet *n = opaque; 237 trace_virtio_net_announce_timer(n->announce_timer.round); 238 239 n->announce_timer.round--; 240 virtio_net_announce_notify(n); 241 } 242 243 static void virtio_net_announce(NetClientState *nc) 244 { 245 VirtIONet *n = qemu_get_nic_opaque(nc); 246 VirtIODevice *vdev = VIRTIO_DEVICE(n); 247 248 /* 249 * Make sure the virtio migration announcement timer isn't running 250 * If it is, let it trigger announcement so that we do not cause 251 * confusion. 252 */ 253 if (n->announce_timer.round) { 254 return; 255 } 256 257 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 258 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 259 virtio_net_announce_notify(n); 260 } 261 } 262 263 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) 264 { 265 VirtIODevice *vdev = VIRTIO_DEVICE(n); 266 NetClientState *nc = qemu_get_queue(n->nic); 267 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 268 int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? 269 n->max_ncs - n->max_queue_pairs : 0; 270 271 if (!get_vhost_net(nc->peer)) { 272 return; 273 } 274 275 if ((virtio_net_started(n, status) && !nc->peer->link_down) == 276 !!n->vhost_started) { 277 return; 278 } 279 if (!n->vhost_started) { 280 int r, i; 281 282 if (n->needs_vnet_hdr_swap) { 283 error_report("backend does not support %s vnet headers; " 284 "falling back on userspace virtio", 285 virtio_is_big_endian(vdev) ? "BE" : "LE"); 286 return; 287 } 288 289 /* Any packets outstanding? Purge them to avoid touching rings 290 * when vhost is running. 291 */ 292 for (i = 0; i < queue_pairs; i++) { 293 NetClientState *qnc = qemu_get_subqueue(n->nic, i); 294 295 /* Purge both directions: TX and RX. */ 296 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc); 297 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer); 298 } 299 300 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) { 301 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu); 302 if (r < 0) { 303 error_report("%uBytes MTU not supported by the backend", 304 n->net_conf.mtu); 305 306 return; 307 } 308 } 309 310 n->vhost_started = 1; 311 r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq); 312 if (r < 0) { 313 error_report("unable to start vhost net: %d: " 314 "falling back on userspace virtio", -r); 315 n->vhost_started = 0; 316 } 317 } else { 318 vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq); 319 n->vhost_started = 0; 320 } 321 } 322 323 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev, 324 NetClientState *peer, 325 bool enable) 326 { 327 if (virtio_is_big_endian(vdev)) { 328 return qemu_set_vnet_be(peer, enable); 329 } else { 330 return qemu_set_vnet_le(peer, enable); 331 } 332 } 333 334 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs, 335 int queue_pairs, bool enable) 336 { 337 int i; 338 339 for (i = 0; i < queue_pairs; i++) { 340 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 && 341 enable) { 342 while (--i >= 0) { 343 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false); 344 } 345 346 return true; 347 } 348 } 349 350 return false; 351 } 352 353 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status) 354 { 355 VirtIODevice *vdev = VIRTIO_DEVICE(n); 356 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 357 358 if (virtio_net_started(n, status)) { 359 /* Before using the device, we tell the network backend about the 360 * endianness to use when parsing vnet headers. If the backend 361 * can't do it, we fallback onto fixing the headers in the core 362 * virtio-net code. 363 */ 364 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs, 365 queue_pairs, true); 366 } else if (virtio_net_started(n, vdev->status)) { 367 /* After using the device, we need to reset the network backend to 368 * the default (guest native endianness), otherwise the guest may 369 * lose network connectivity if it is rebooted into a different 370 * endianness. 371 */ 372 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false); 373 } 374 } 375 376 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq) 377 { 378 unsigned int dropped = virtqueue_drop_all(vq); 379 if (dropped) { 380 virtio_notify(vdev, vq); 381 } 382 } 383 384 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) 385 { 386 VirtIONet *n = VIRTIO_NET(vdev); 387 VirtIONetQueue *q; 388 int i; 389 uint8_t queue_status; 390 391 virtio_net_vnet_endian_status(n, status); 392 virtio_net_vhost_status(n, status); 393 394 for (i = 0; i < n->max_queue_pairs; i++) { 395 NetClientState *ncs = qemu_get_subqueue(n->nic, i); 396 bool queue_started; 397 q = &n->vqs[i]; 398 399 if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) { 400 queue_status = 0; 401 } else { 402 queue_status = status; 403 } 404 queue_started = 405 virtio_net_started(n, queue_status) && !n->vhost_started; 406 407 if (queue_started) { 408 qemu_flush_queued_packets(ncs); 409 } 410 411 if (!q->tx_waiting) { 412 continue; 413 } 414 415 if (queue_started) { 416 if (q->tx_timer) { 417 timer_mod(q->tx_timer, 418 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 419 } else { 420 qemu_bh_schedule(q->tx_bh); 421 } 422 } else { 423 if (q->tx_timer) { 424 timer_del(q->tx_timer); 425 } else { 426 qemu_bh_cancel(q->tx_bh); 427 } 428 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 && 429 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) && 430 vdev->vm_running) { 431 /* if tx is waiting we are likely have some packets in tx queue 432 * and disabled notification */ 433 q->tx_waiting = 0; 434 virtio_queue_set_notification(q->tx_vq, 1); 435 virtio_net_drop_tx_queue_data(vdev, q->tx_vq); 436 } 437 } 438 } 439 } 440 441 static void virtio_net_set_link_status(NetClientState *nc) 442 { 443 VirtIONet *n = qemu_get_nic_opaque(nc); 444 VirtIODevice *vdev = VIRTIO_DEVICE(n); 445 uint16_t old_status = n->status; 446 447 if (nc->link_down) 448 n->status &= ~VIRTIO_NET_S_LINK_UP; 449 else 450 n->status |= VIRTIO_NET_S_LINK_UP; 451 452 if (n->status != old_status) 453 virtio_notify_config(vdev); 454 455 virtio_net_set_status(vdev, vdev->status); 456 } 457 458 static void rxfilter_notify(NetClientState *nc) 459 { 460 VirtIONet *n = qemu_get_nic_opaque(nc); 461 462 if (nc->rxfilter_notify_enabled) { 463 char *path = object_get_canonical_path(OBJECT(n->qdev)); 464 qapi_event_send_nic_rx_filter_changed(n->netclient_name, path); 465 g_free(path); 466 467 /* disable event notification to avoid events flooding */ 468 nc->rxfilter_notify_enabled = 0; 469 } 470 } 471 472 static intList *get_vlan_table(VirtIONet *n) 473 { 474 intList *list; 475 int i, j; 476 477 list = NULL; 478 for (i = 0; i < MAX_VLAN >> 5; i++) { 479 for (j = 0; n->vlans[i] && j <= 0x1f; j++) { 480 if (n->vlans[i] & (1U << j)) { 481 QAPI_LIST_PREPEND(list, (i << 5) + j); 482 } 483 } 484 } 485 486 return list; 487 } 488 489 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc) 490 { 491 VirtIONet *n = qemu_get_nic_opaque(nc); 492 VirtIODevice *vdev = VIRTIO_DEVICE(n); 493 RxFilterInfo *info; 494 strList *str_list; 495 int i; 496 497 info = g_malloc0(sizeof(*info)); 498 info->name = g_strdup(nc->name); 499 info->promiscuous = n->promisc; 500 501 if (n->nouni) { 502 info->unicast = RX_STATE_NONE; 503 } else if (n->alluni) { 504 info->unicast = RX_STATE_ALL; 505 } else { 506 info->unicast = RX_STATE_NORMAL; 507 } 508 509 if (n->nomulti) { 510 info->multicast = RX_STATE_NONE; 511 } else if (n->allmulti) { 512 info->multicast = RX_STATE_ALL; 513 } else { 514 info->multicast = RX_STATE_NORMAL; 515 } 516 517 info->broadcast_allowed = n->nobcast; 518 info->multicast_overflow = n->mac_table.multi_overflow; 519 info->unicast_overflow = n->mac_table.uni_overflow; 520 521 info->main_mac = qemu_mac_strdup_printf(n->mac); 522 523 str_list = NULL; 524 for (i = 0; i < n->mac_table.first_multi; i++) { 525 QAPI_LIST_PREPEND(str_list, 526 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 527 } 528 info->unicast_table = str_list; 529 530 str_list = NULL; 531 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 532 QAPI_LIST_PREPEND(str_list, 533 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 534 } 535 info->multicast_table = str_list; 536 info->vlan_table = get_vlan_table(n); 537 538 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) { 539 info->vlan = RX_STATE_ALL; 540 } else if (!info->vlan_table) { 541 info->vlan = RX_STATE_NONE; 542 } else { 543 info->vlan = RX_STATE_NORMAL; 544 } 545 546 /* enable event notification after query */ 547 nc->rxfilter_notify_enabled = 1; 548 549 return info; 550 } 551 552 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index) 553 { 554 VirtIONet *n = VIRTIO_NET(vdev); 555 NetClientState *nc; 556 557 /* validate queue_index and skip for cvq */ 558 if (queue_index >= n->max_queue_pairs * 2) { 559 return; 560 } 561 562 nc = qemu_get_subqueue(n->nic, vq2q(queue_index)); 563 564 if (!nc->peer) { 565 return; 566 } 567 568 if (get_vhost_net(nc->peer) && 569 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) { 570 vhost_net_virtqueue_reset(vdev, nc, queue_index); 571 } 572 573 flush_or_purge_queued_packets(nc); 574 } 575 576 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index) 577 { 578 VirtIONet *n = VIRTIO_NET(vdev); 579 NetClientState *nc; 580 int r; 581 582 /* validate queue_index and skip for cvq */ 583 if (queue_index >= n->max_queue_pairs * 2) { 584 return; 585 } 586 587 nc = qemu_get_subqueue(n->nic, vq2q(queue_index)); 588 589 if (!nc->peer || !vdev->vhost_started) { 590 return; 591 } 592 593 if (get_vhost_net(nc->peer) && 594 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) { 595 r = vhost_net_virtqueue_restart(vdev, nc, queue_index); 596 if (r < 0) { 597 error_report("unable to restart vhost net virtqueue: %d, " 598 "when resetting the queue", queue_index); 599 } 600 } 601 } 602 603 static void virtio_net_reset(VirtIODevice *vdev) 604 { 605 VirtIONet *n = VIRTIO_NET(vdev); 606 int i; 607 608 /* Reset back to compatibility mode */ 609 n->promisc = 1; 610 n->allmulti = 0; 611 n->alluni = 0; 612 n->nomulti = 0; 613 n->nouni = 0; 614 n->nobcast = 0; 615 /* multiqueue is disabled by default */ 616 n->curr_queue_pairs = 1; 617 timer_del(n->announce_timer.tm); 618 n->announce_timer.round = 0; 619 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 620 621 /* Flush any MAC and VLAN filter table state */ 622 n->mac_table.in_use = 0; 623 n->mac_table.first_multi = 0; 624 n->mac_table.multi_overflow = 0; 625 n->mac_table.uni_overflow = 0; 626 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN); 627 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac)); 628 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 629 memset(n->vlans, 0, MAX_VLAN >> 3); 630 631 /* Flush any async TX */ 632 for (i = 0; i < n->max_queue_pairs; i++) { 633 flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i)); 634 } 635 } 636 637 static void peer_test_vnet_hdr(VirtIONet *n) 638 { 639 NetClientState *nc = qemu_get_queue(n->nic); 640 if (!nc->peer) { 641 return; 642 } 643 644 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer); 645 } 646 647 static int peer_has_vnet_hdr(VirtIONet *n) 648 { 649 return n->has_vnet_hdr; 650 } 651 652 static int peer_has_ufo(VirtIONet *n) 653 { 654 if (!peer_has_vnet_hdr(n)) 655 return 0; 656 657 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer); 658 659 return n->has_ufo; 660 } 661 662 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, 663 int version_1, int hash_report) 664 { 665 int i; 666 NetClientState *nc; 667 668 n->mergeable_rx_bufs = mergeable_rx_bufs; 669 670 if (version_1) { 671 n->guest_hdr_len = hash_report ? 672 sizeof(struct virtio_net_hdr_v1_hash) : 673 sizeof(struct virtio_net_hdr_mrg_rxbuf); 674 n->rss_data.populate_hash = !!hash_report; 675 } else { 676 n->guest_hdr_len = n->mergeable_rx_bufs ? 677 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 678 sizeof(struct virtio_net_hdr); 679 } 680 681 for (i = 0; i < n->max_queue_pairs; i++) { 682 nc = qemu_get_subqueue(n->nic, i); 683 684 if (peer_has_vnet_hdr(n) && 685 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) { 686 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len); 687 n->host_hdr_len = n->guest_hdr_len; 688 } 689 } 690 } 691 692 static int virtio_net_max_tx_queue_size(VirtIONet *n) 693 { 694 NetClientState *peer = n->nic_conf.peers.ncs[0]; 695 696 /* 697 * Backends other than vhost-user or vhost-vdpa don't support max queue 698 * size. 699 */ 700 if (!peer) { 701 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 702 } 703 704 switch(peer->info->type) { 705 case NET_CLIENT_DRIVER_VHOST_USER: 706 case NET_CLIENT_DRIVER_VHOST_VDPA: 707 return VIRTQUEUE_MAX_SIZE; 708 default: 709 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 710 }; 711 } 712 713 static int peer_attach(VirtIONet *n, int index) 714 { 715 NetClientState *nc = qemu_get_subqueue(n->nic, index); 716 717 if (!nc->peer) { 718 return 0; 719 } 720 721 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 722 vhost_set_vring_enable(nc->peer, 1); 723 } 724 725 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 726 return 0; 727 } 728 729 if (n->max_queue_pairs == 1) { 730 return 0; 731 } 732 733 return tap_enable(nc->peer); 734 } 735 736 static int peer_detach(VirtIONet *n, int index) 737 { 738 NetClientState *nc = qemu_get_subqueue(n->nic, index); 739 740 if (!nc->peer) { 741 return 0; 742 } 743 744 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 745 vhost_set_vring_enable(nc->peer, 0); 746 } 747 748 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 749 return 0; 750 } 751 752 return tap_disable(nc->peer); 753 } 754 755 static void virtio_net_set_queue_pairs(VirtIONet *n) 756 { 757 int i; 758 int r; 759 760 if (n->nic->peer_deleted) { 761 return; 762 } 763 764 for (i = 0; i < n->max_queue_pairs; i++) { 765 if (i < n->curr_queue_pairs) { 766 r = peer_attach(n, i); 767 assert(!r); 768 } else { 769 r = peer_detach(n, i); 770 assert(!r); 771 } 772 } 773 } 774 775 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue); 776 777 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, 778 Error **errp) 779 { 780 VirtIONet *n = VIRTIO_NET(vdev); 781 NetClientState *nc = qemu_get_queue(n->nic); 782 783 /* Firstly sync all virtio-net possible supported features */ 784 features |= n->host_features; 785 786 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 787 788 if (!peer_has_vnet_hdr(n)) { 789 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); 790 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); 791 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); 792 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); 793 794 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); 795 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); 796 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); 797 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); 798 799 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); 800 } 801 802 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { 803 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); 804 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); 805 } 806 807 if (!get_vhost_net(nc->peer)) { 808 virtio_add_feature(&features, VIRTIO_F_RING_RESET); 809 return features; 810 } 811 812 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 813 virtio_clear_feature(&features, VIRTIO_NET_F_RSS); 814 } 815 features = vhost_net_get_features(get_vhost_net(nc->peer), features); 816 vdev->backend_features = features; 817 818 if (n->mtu_bypass_backend && 819 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { 820 features |= (1ULL << VIRTIO_NET_F_MTU); 821 } 822 823 /* 824 * Since GUEST_ANNOUNCE is emulated the feature bit could be set without 825 * enabled. This happens in the vDPA case. 826 * 827 * Make sure the feature set is not incoherent, as the driver could refuse 828 * to start. 829 * 830 * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes, 831 * helping guest to notify the new location with vDPA devices that does not 832 * support it. 833 */ 834 if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) { 835 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE); 836 } 837 838 return features; 839 } 840 841 static uint64_t virtio_net_bad_features(VirtIODevice *vdev) 842 { 843 uint64_t features = 0; 844 845 /* Linux kernel 2.6.25. It understood MAC (as everyone must), 846 * but also these: */ 847 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 848 virtio_add_feature(&features, VIRTIO_NET_F_CSUM); 849 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4); 850 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6); 851 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN); 852 853 return features; 854 } 855 856 static void virtio_net_apply_guest_offloads(VirtIONet *n) 857 { 858 qemu_set_offload(qemu_get_queue(n->nic)->peer, 859 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)), 860 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)), 861 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)), 862 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)), 863 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO))); 864 } 865 866 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features) 867 { 868 static const uint64_t guest_offloads_mask = 869 (1ULL << VIRTIO_NET_F_GUEST_CSUM) | 870 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | 871 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | 872 (1ULL << VIRTIO_NET_F_GUEST_ECN) | 873 (1ULL << VIRTIO_NET_F_GUEST_UFO); 874 875 return guest_offloads_mask & features; 876 } 877 878 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) 879 { 880 VirtIODevice *vdev = VIRTIO_DEVICE(n); 881 return virtio_net_guest_offloads_by_features(vdev->guest_features); 882 } 883 884 typedef struct { 885 VirtIONet *n; 886 DeviceState *dev; 887 } FailoverDevice; 888 889 /** 890 * Set the failover primary device 891 * 892 * @opaque: FailoverId to setup 893 * @opts: opts for device we are handling 894 * @errp: returns an error if this function fails 895 */ 896 static int failover_set_primary(DeviceState *dev, void *opaque) 897 { 898 FailoverDevice *fdev = opaque; 899 PCIDevice *pci_dev = (PCIDevice *) 900 object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE); 901 902 if (!pci_dev) { 903 return 0; 904 } 905 906 if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) { 907 fdev->dev = dev; 908 return 1; 909 } 910 911 return 0; 912 } 913 914 /** 915 * Find the primary device for this failover virtio-net 916 * 917 * @n: VirtIONet device 918 * @errp: returns an error if this function fails 919 */ 920 static DeviceState *failover_find_primary_device(VirtIONet *n) 921 { 922 FailoverDevice fdev = { 923 .n = n, 924 }; 925 926 qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL, 927 NULL, NULL, &fdev); 928 return fdev.dev; 929 } 930 931 static void failover_add_primary(VirtIONet *n, Error **errp) 932 { 933 Error *err = NULL; 934 DeviceState *dev = failover_find_primary_device(n); 935 936 if (dev) { 937 return; 938 } 939 940 if (!n->primary_opts) { 941 error_setg(errp, "Primary device not found"); 942 error_append_hint(errp, "Virtio-net failover will not work. Make " 943 "sure primary device has parameter" 944 " failover_pair_id=%s\n", n->netclient_name); 945 return; 946 } 947 948 dev = qdev_device_add_from_qdict(n->primary_opts, 949 n->primary_opts_from_json, 950 &err); 951 if (err) { 952 qobject_unref(n->primary_opts); 953 n->primary_opts = NULL; 954 } else { 955 object_unref(OBJECT(dev)); 956 } 957 error_propagate(errp, err); 958 } 959 960 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) 961 { 962 VirtIONet *n = VIRTIO_NET(vdev); 963 Error *err = NULL; 964 int i; 965 966 if (n->mtu_bypass_backend && 967 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) { 968 features &= ~(1ULL << VIRTIO_NET_F_MTU); 969 } 970 971 virtio_net_set_multiqueue(n, 972 virtio_has_feature(features, VIRTIO_NET_F_RSS) || 973 virtio_has_feature(features, VIRTIO_NET_F_MQ)); 974 975 virtio_net_set_mrg_rx_bufs(n, 976 virtio_has_feature(features, 977 VIRTIO_NET_F_MRG_RXBUF), 978 virtio_has_feature(features, 979 VIRTIO_F_VERSION_1), 980 virtio_has_feature(features, 981 VIRTIO_NET_F_HASH_REPORT)); 982 983 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 984 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4); 985 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 986 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6); 987 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS); 988 989 if (n->has_vnet_hdr) { 990 n->curr_guest_offloads = 991 virtio_net_guest_offloads_by_features(features); 992 virtio_net_apply_guest_offloads(n); 993 } 994 995 for (i = 0; i < n->max_queue_pairs; i++) { 996 NetClientState *nc = qemu_get_subqueue(n->nic, i); 997 998 if (!get_vhost_net(nc->peer)) { 999 continue; 1000 } 1001 vhost_net_ack_features(get_vhost_net(nc->peer), features); 1002 1003 /* 1004 * keep acked_features in NetVhostUserState up-to-date so it 1005 * can't miss any features configured by guest virtio driver. 1006 */ 1007 vhost_net_save_acked_features(nc->peer); 1008 } 1009 1010 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { 1011 memset(n->vlans, 0, MAX_VLAN >> 3); 1012 } else { 1013 memset(n->vlans, 0xff, MAX_VLAN >> 3); 1014 } 1015 1016 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) { 1017 qapi_event_send_failover_negotiated(n->netclient_name); 1018 qatomic_set(&n->failover_primary_hidden, false); 1019 failover_add_primary(n, &err); 1020 if (err) { 1021 if (!qtest_enabled()) { 1022 warn_report_err(err); 1023 } else { 1024 error_free(err); 1025 } 1026 } 1027 } 1028 } 1029 1030 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd, 1031 struct iovec *iov, unsigned int iov_cnt) 1032 { 1033 uint8_t on; 1034 size_t s; 1035 NetClientState *nc = qemu_get_queue(n->nic); 1036 1037 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on)); 1038 if (s != sizeof(on)) { 1039 return VIRTIO_NET_ERR; 1040 } 1041 1042 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) { 1043 n->promisc = on; 1044 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) { 1045 n->allmulti = on; 1046 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) { 1047 n->alluni = on; 1048 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) { 1049 n->nomulti = on; 1050 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) { 1051 n->nouni = on; 1052 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) { 1053 n->nobcast = on; 1054 } else { 1055 return VIRTIO_NET_ERR; 1056 } 1057 1058 rxfilter_notify(nc); 1059 1060 return VIRTIO_NET_OK; 1061 } 1062 1063 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd, 1064 struct iovec *iov, unsigned int iov_cnt) 1065 { 1066 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1067 uint64_t offloads; 1068 size_t s; 1069 1070 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 1071 return VIRTIO_NET_ERR; 1072 } 1073 1074 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads)); 1075 if (s != sizeof(offloads)) { 1076 return VIRTIO_NET_ERR; 1077 } 1078 1079 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) { 1080 uint64_t supported_offloads; 1081 1082 offloads = virtio_ldq_p(vdev, &offloads); 1083 1084 if (!n->has_vnet_hdr) { 1085 return VIRTIO_NET_ERR; 1086 } 1087 1088 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1089 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4); 1090 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1091 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6); 1092 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT); 1093 1094 supported_offloads = virtio_net_supported_guest_offloads(n); 1095 if (offloads & ~supported_offloads) { 1096 return VIRTIO_NET_ERR; 1097 } 1098 1099 n->curr_guest_offloads = offloads; 1100 virtio_net_apply_guest_offloads(n); 1101 1102 return VIRTIO_NET_OK; 1103 } else { 1104 return VIRTIO_NET_ERR; 1105 } 1106 } 1107 1108 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd, 1109 struct iovec *iov, unsigned int iov_cnt) 1110 { 1111 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1112 struct virtio_net_ctrl_mac mac_data; 1113 size_t s; 1114 NetClientState *nc = qemu_get_queue(n->nic); 1115 1116 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) { 1117 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) { 1118 return VIRTIO_NET_ERR; 1119 } 1120 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac)); 1121 assert(s == sizeof(n->mac)); 1122 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 1123 rxfilter_notify(nc); 1124 1125 return VIRTIO_NET_OK; 1126 } 1127 1128 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) { 1129 return VIRTIO_NET_ERR; 1130 } 1131 1132 int in_use = 0; 1133 int first_multi = 0; 1134 uint8_t uni_overflow = 0; 1135 uint8_t multi_overflow = 0; 1136 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 1137 1138 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1139 sizeof(mac_data.entries)); 1140 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1141 if (s != sizeof(mac_data.entries)) { 1142 goto error; 1143 } 1144 iov_discard_front(&iov, &iov_cnt, s); 1145 1146 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) { 1147 goto error; 1148 } 1149 1150 if (mac_data.entries <= MAC_TABLE_ENTRIES) { 1151 s = iov_to_buf(iov, iov_cnt, 0, macs, 1152 mac_data.entries * ETH_ALEN); 1153 if (s != mac_data.entries * ETH_ALEN) { 1154 goto error; 1155 } 1156 in_use += mac_data.entries; 1157 } else { 1158 uni_overflow = 1; 1159 } 1160 1161 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN); 1162 1163 first_multi = in_use; 1164 1165 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1166 sizeof(mac_data.entries)); 1167 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1168 if (s != sizeof(mac_data.entries)) { 1169 goto error; 1170 } 1171 1172 iov_discard_front(&iov, &iov_cnt, s); 1173 1174 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) { 1175 goto error; 1176 } 1177 1178 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) { 1179 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN], 1180 mac_data.entries * ETH_ALEN); 1181 if (s != mac_data.entries * ETH_ALEN) { 1182 goto error; 1183 } 1184 in_use += mac_data.entries; 1185 } else { 1186 multi_overflow = 1; 1187 } 1188 1189 n->mac_table.in_use = in_use; 1190 n->mac_table.first_multi = first_multi; 1191 n->mac_table.uni_overflow = uni_overflow; 1192 n->mac_table.multi_overflow = multi_overflow; 1193 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN); 1194 g_free(macs); 1195 rxfilter_notify(nc); 1196 1197 return VIRTIO_NET_OK; 1198 1199 error: 1200 g_free(macs); 1201 return VIRTIO_NET_ERR; 1202 } 1203 1204 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd, 1205 struct iovec *iov, unsigned int iov_cnt) 1206 { 1207 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1208 uint16_t vid; 1209 size_t s; 1210 NetClientState *nc = qemu_get_queue(n->nic); 1211 1212 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid)); 1213 vid = virtio_lduw_p(vdev, &vid); 1214 if (s != sizeof(vid)) { 1215 return VIRTIO_NET_ERR; 1216 } 1217 1218 if (vid >= MAX_VLAN) 1219 return VIRTIO_NET_ERR; 1220 1221 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD) 1222 n->vlans[vid >> 5] |= (1U << (vid & 0x1f)); 1223 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL) 1224 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f)); 1225 else 1226 return VIRTIO_NET_ERR; 1227 1228 rxfilter_notify(nc); 1229 1230 return VIRTIO_NET_OK; 1231 } 1232 1233 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, 1234 struct iovec *iov, unsigned int iov_cnt) 1235 { 1236 trace_virtio_net_handle_announce(n->announce_timer.round); 1237 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK && 1238 n->status & VIRTIO_NET_S_ANNOUNCE) { 1239 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 1240 if (n->announce_timer.round) { 1241 qemu_announce_timer_step(&n->announce_timer); 1242 } 1243 return VIRTIO_NET_OK; 1244 } else { 1245 return VIRTIO_NET_ERR; 1246 } 1247 } 1248 1249 static void virtio_net_detach_epbf_rss(VirtIONet *n); 1250 1251 static void virtio_net_disable_rss(VirtIONet *n) 1252 { 1253 if (n->rss_data.enabled) { 1254 trace_virtio_net_rss_disable(); 1255 } 1256 n->rss_data.enabled = false; 1257 1258 virtio_net_detach_epbf_rss(n); 1259 } 1260 1261 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd) 1262 { 1263 NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0); 1264 if (nc == NULL || nc->info->set_steering_ebpf == NULL) { 1265 return false; 1266 } 1267 1268 return nc->info->set_steering_ebpf(nc, prog_fd); 1269 } 1270 1271 static void rss_data_to_rss_config(struct VirtioNetRssData *data, 1272 struct EBPFRSSConfig *config) 1273 { 1274 config->redirect = data->redirect; 1275 config->populate_hash = data->populate_hash; 1276 config->hash_types = data->hash_types; 1277 config->indirections_len = data->indirections_len; 1278 config->default_queue = data->default_queue; 1279 } 1280 1281 static bool virtio_net_attach_epbf_rss(VirtIONet *n) 1282 { 1283 struct EBPFRSSConfig config = {}; 1284 1285 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 1286 return false; 1287 } 1288 1289 rss_data_to_rss_config(&n->rss_data, &config); 1290 1291 if (!ebpf_rss_set_all(&n->ebpf_rss, &config, 1292 n->rss_data.indirections_table, n->rss_data.key)) { 1293 return false; 1294 } 1295 1296 if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) { 1297 return false; 1298 } 1299 1300 return true; 1301 } 1302 1303 static void virtio_net_detach_epbf_rss(VirtIONet *n) 1304 { 1305 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1306 } 1307 1308 static bool virtio_net_load_ebpf(VirtIONet *n) 1309 { 1310 if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) { 1311 /* backend does't support steering ebpf */ 1312 return false; 1313 } 1314 1315 return ebpf_rss_load(&n->ebpf_rss); 1316 } 1317 1318 static void virtio_net_unload_ebpf(VirtIONet *n) 1319 { 1320 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1321 ebpf_rss_unload(&n->ebpf_rss); 1322 } 1323 1324 static uint16_t virtio_net_handle_rss(VirtIONet *n, 1325 struct iovec *iov, 1326 unsigned int iov_cnt, 1327 bool do_rss) 1328 { 1329 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1330 struct virtio_net_rss_config cfg; 1331 size_t s, offset = 0, size_get; 1332 uint16_t queue_pairs, i; 1333 struct { 1334 uint16_t us; 1335 uint8_t b; 1336 } QEMU_PACKED temp; 1337 const char *err_msg = ""; 1338 uint32_t err_value = 0; 1339 1340 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) { 1341 err_msg = "RSS is not negotiated"; 1342 goto error; 1343 } 1344 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) { 1345 err_msg = "Hash report is not negotiated"; 1346 goto error; 1347 } 1348 size_get = offsetof(struct virtio_net_rss_config, indirection_table); 1349 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get); 1350 if (s != size_get) { 1351 err_msg = "Short command buffer"; 1352 err_value = (uint32_t)s; 1353 goto error; 1354 } 1355 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types); 1356 n->rss_data.indirections_len = 1357 virtio_lduw_p(vdev, &cfg.indirection_table_mask); 1358 n->rss_data.indirections_len++; 1359 if (!do_rss) { 1360 n->rss_data.indirections_len = 1; 1361 } 1362 if (!is_power_of_2(n->rss_data.indirections_len)) { 1363 err_msg = "Invalid size of indirection table"; 1364 err_value = n->rss_data.indirections_len; 1365 goto error; 1366 } 1367 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) { 1368 err_msg = "Too large indirection table"; 1369 err_value = n->rss_data.indirections_len; 1370 goto error; 1371 } 1372 n->rss_data.default_queue = do_rss ? 1373 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0; 1374 if (n->rss_data.default_queue >= n->max_queue_pairs) { 1375 err_msg = "Invalid default queue"; 1376 err_value = n->rss_data.default_queue; 1377 goto error; 1378 } 1379 offset += size_get; 1380 size_get = sizeof(uint16_t) * n->rss_data.indirections_len; 1381 g_free(n->rss_data.indirections_table); 1382 n->rss_data.indirections_table = g_malloc(size_get); 1383 if (!n->rss_data.indirections_table) { 1384 err_msg = "Can't allocate indirections table"; 1385 err_value = n->rss_data.indirections_len; 1386 goto error; 1387 } 1388 s = iov_to_buf(iov, iov_cnt, offset, 1389 n->rss_data.indirections_table, size_get); 1390 if (s != size_get) { 1391 err_msg = "Short indirection table buffer"; 1392 err_value = (uint32_t)s; 1393 goto error; 1394 } 1395 for (i = 0; i < n->rss_data.indirections_len; ++i) { 1396 uint16_t val = n->rss_data.indirections_table[i]; 1397 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val); 1398 } 1399 offset += size_get; 1400 size_get = sizeof(temp); 1401 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get); 1402 if (s != size_get) { 1403 err_msg = "Can't get queue_pairs"; 1404 err_value = (uint32_t)s; 1405 goto error; 1406 } 1407 queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs; 1408 if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) { 1409 err_msg = "Invalid number of queue_pairs"; 1410 err_value = queue_pairs; 1411 goto error; 1412 } 1413 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) { 1414 err_msg = "Invalid key size"; 1415 err_value = temp.b; 1416 goto error; 1417 } 1418 if (!temp.b && n->rss_data.hash_types) { 1419 err_msg = "No key provided"; 1420 err_value = 0; 1421 goto error; 1422 } 1423 if (!temp.b && !n->rss_data.hash_types) { 1424 virtio_net_disable_rss(n); 1425 return queue_pairs; 1426 } 1427 offset += size_get; 1428 size_get = temp.b; 1429 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get); 1430 if (s != size_get) { 1431 err_msg = "Can get key buffer"; 1432 err_value = (uint32_t)s; 1433 goto error; 1434 } 1435 n->rss_data.enabled = true; 1436 1437 if (!n->rss_data.populate_hash) { 1438 if (!virtio_net_attach_epbf_rss(n)) { 1439 /* EBPF must be loaded for vhost */ 1440 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 1441 warn_report("Can't load eBPF RSS for vhost"); 1442 goto error; 1443 } 1444 /* fallback to software RSS */ 1445 warn_report("Can't load eBPF RSS - fallback to software RSS"); 1446 n->rss_data.enabled_software_rss = true; 1447 } 1448 } else { 1449 /* use software RSS for hash populating */ 1450 /* and detach eBPF if was loaded before */ 1451 virtio_net_detach_epbf_rss(n); 1452 n->rss_data.enabled_software_rss = true; 1453 } 1454 1455 trace_virtio_net_rss_enable(n->rss_data.hash_types, 1456 n->rss_data.indirections_len, 1457 temp.b); 1458 return queue_pairs; 1459 error: 1460 trace_virtio_net_rss_error(err_msg, err_value); 1461 virtio_net_disable_rss(n); 1462 return 0; 1463 } 1464 1465 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, 1466 struct iovec *iov, unsigned int iov_cnt) 1467 { 1468 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1469 uint16_t queue_pairs; 1470 NetClientState *nc = qemu_get_queue(n->nic); 1471 1472 virtio_net_disable_rss(n); 1473 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { 1474 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false); 1475 return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR; 1476 } 1477 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) { 1478 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true); 1479 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { 1480 struct virtio_net_ctrl_mq mq; 1481 size_t s; 1482 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) { 1483 return VIRTIO_NET_ERR; 1484 } 1485 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq)); 1486 if (s != sizeof(mq)) { 1487 return VIRTIO_NET_ERR; 1488 } 1489 queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs); 1490 1491 } else { 1492 return VIRTIO_NET_ERR; 1493 } 1494 1495 if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 1496 queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 1497 queue_pairs > n->max_queue_pairs || 1498 !n->multiqueue) { 1499 return VIRTIO_NET_ERR; 1500 } 1501 1502 n->curr_queue_pairs = queue_pairs; 1503 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 1504 /* 1505 * Avoid updating the backend for a vdpa device: We're only interested 1506 * in updating the device model queues. 1507 */ 1508 return VIRTIO_NET_OK; 1509 } 1510 /* stop the backend before changing the number of queue_pairs to avoid handling a 1511 * disabled queue */ 1512 virtio_net_set_status(vdev, vdev->status); 1513 virtio_net_set_queue_pairs(n); 1514 1515 return VIRTIO_NET_OK; 1516 } 1517 1518 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, 1519 const struct iovec *in_sg, unsigned in_num, 1520 const struct iovec *out_sg, 1521 unsigned out_num) 1522 { 1523 VirtIONet *n = VIRTIO_NET(vdev); 1524 struct virtio_net_ctrl_hdr ctrl; 1525 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1526 size_t s; 1527 struct iovec *iov, *iov2; 1528 1529 if (iov_size(in_sg, in_num) < sizeof(status) || 1530 iov_size(out_sg, out_num) < sizeof(ctrl)) { 1531 virtio_error(vdev, "virtio-net ctrl missing headers"); 1532 return 0; 1533 } 1534 1535 iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num); 1536 s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl)); 1537 iov_discard_front(&iov, &out_num, sizeof(ctrl)); 1538 if (s != sizeof(ctrl)) { 1539 status = VIRTIO_NET_ERR; 1540 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { 1541 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num); 1542 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { 1543 status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num); 1544 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { 1545 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num); 1546 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { 1547 status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num); 1548 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { 1549 status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num); 1550 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { 1551 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num); 1552 } 1553 1554 s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status)); 1555 assert(s == sizeof(status)); 1556 1557 g_free(iov2); 1558 return sizeof(status); 1559 } 1560 1561 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) 1562 { 1563 VirtQueueElement *elem; 1564 1565 for (;;) { 1566 size_t written; 1567 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 1568 if (!elem) { 1569 break; 1570 } 1571 1572 written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num, 1573 elem->out_sg, elem->out_num); 1574 if (written > 0) { 1575 virtqueue_push(vq, elem, written); 1576 virtio_notify(vdev, vq); 1577 g_free(elem); 1578 } else { 1579 virtqueue_detach_element(vq, elem, 0); 1580 g_free(elem); 1581 break; 1582 } 1583 } 1584 } 1585 1586 /* RX */ 1587 1588 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) 1589 { 1590 VirtIONet *n = VIRTIO_NET(vdev); 1591 int queue_index = vq2q(virtio_get_queue_index(vq)); 1592 1593 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index)); 1594 } 1595 1596 static bool virtio_net_can_receive(NetClientState *nc) 1597 { 1598 VirtIONet *n = qemu_get_nic_opaque(nc); 1599 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1600 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1601 1602 if (!vdev->vm_running) { 1603 return false; 1604 } 1605 1606 if (nc->queue_index >= n->curr_queue_pairs) { 1607 return false; 1608 } 1609 1610 if (!virtio_queue_ready(q->rx_vq) || 1611 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 1612 return false; 1613 } 1614 1615 return true; 1616 } 1617 1618 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) 1619 { 1620 VirtIONet *n = q->n; 1621 if (virtio_queue_empty(q->rx_vq) || 1622 (n->mergeable_rx_bufs && 1623 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1624 virtio_queue_set_notification(q->rx_vq, 1); 1625 1626 /* To avoid a race condition where the guest has made some buffers 1627 * available after the above check but before notification was 1628 * enabled, check for available buffers again. 1629 */ 1630 if (virtio_queue_empty(q->rx_vq) || 1631 (n->mergeable_rx_bufs && 1632 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1633 return 0; 1634 } 1635 } 1636 1637 virtio_queue_set_notification(q->rx_vq, 0); 1638 return 1; 1639 } 1640 1641 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr) 1642 { 1643 virtio_tswap16s(vdev, &hdr->hdr_len); 1644 virtio_tswap16s(vdev, &hdr->gso_size); 1645 virtio_tswap16s(vdev, &hdr->csum_start); 1646 virtio_tswap16s(vdev, &hdr->csum_offset); 1647 } 1648 1649 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so 1650 * it never finds out that the packets don't have valid checksums. This 1651 * causes dhclient to get upset. Fedora's carried a patch for ages to 1652 * fix this with Xen but it hasn't appeared in an upstream release of 1653 * dhclient yet. 1654 * 1655 * To avoid breaking existing guests, we catch udp packets and add 1656 * checksums. This is terrible but it's better than hacking the guest 1657 * kernels. 1658 * 1659 * N.B. if we introduce a zero-copy API, this operation is no longer free so 1660 * we should provide a mechanism to disable it to avoid polluting the host 1661 * cache. 1662 */ 1663 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr, 1664 uint8_t *buf, size_t size) 1665 { 1666 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */ 1667 (size > 27 && size < 1500) && /* normal sized MTU */ 1668 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */ 1669 (buf[23] == 17) && /* ip.protocol == UDP */ 1670 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */ 1671 net_checksum_calculate(buf, size, CSUM_UDP); 1672 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; 1673 } 1674 } 1675 1676 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt, 1677 const void *buf, size_t size) 1678 { 1679 if (n->has_vnet_hdr) { 1680 /* FIXME this cast is evil */ 1681 void *wbuf = (void *)buf; 1682 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len, 1683 size - n->host_hdr_len); 1684 1685 if (n->needs_vnet_hdr_swap) { 1686 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf); 1687 } 1688 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr)); 1689 } else { 1690 struct virtio_net_hdr hdr = { 1691 .flags = 0, 1692 .gso_type = VIRTIO_NET_HDR_GSO_NONE 1693 }; 1694 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr); 1695 } 1696 } 1697 1698 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) 1699 { 1700 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; 1701 static const uint8_t vlan[] = {0x81, 0x00}; 1702 uint8_t *ptr = (uint8_t *)buf; 1703 int i; 1704 1705 if (n->promisc) 1706 return 1; 1707 1708 ptr += n->host_hdr_len; 1709 1710 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) { 1711 int vid = lduw_be_p(ptr + 14) & 0xfff; 1712 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f)))) 1713 return 0; 1714 } 1715 1716 if (ptr[0] & 1) { // multicast 1717 if (!memcmp(ptr, bcast, sizeof(bcast))) { 1718 return !n->nobcast; 1719 } else if (n->nomulti) { 1720 return 0; 1721 } else if (n->allmulti || n->mac_table.multi_overflow) { 1722 return 1; 1723 } 1724 1725 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 1726 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1727 return 1; 1728 } 1729 } 1730 } else { // unicast 1731 if (n->nouni) { 1732 return 0; 1733 } else if (n->alluni || n->mac_table.uni_overflow) { 1734 return 1; 1735 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) { 1736 return 1; 1737 } 1738 1739 for (i = 0; i < n->mac_table.first_multi; i++) { 1740 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1741 return 1; 1742 } 1743 } 1744 } 1745 1746 return 0; 1747 } 1748 1749 static uint8_t virtio_net_get_hash_type(bool isip4, 1750 bool isip6, 1751 bool isudp, 1752 bool istcp, 1753 uint32_t types) 1754 { 1755 if (isip4) { 1756 if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) { 1757 return NetPktRssIpV4Tcp; 1758 } 1759 if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) { 1760 return NetPktRssIpV4Udp; 1761 } 1762 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 1763 return NetPktRssIpV4; 1764 } 1765 } else if (isip6) { 1766 uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | 1767 VIRTIO_NET_RSS_HASH_TYPE_TCPv6; 1768 1769 if (istcp && (types & mask)) { 1770 return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ? 1771 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp; 1772 } 1773 mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6; 1774 if (isudp && (types & mask)) { 1775 return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ? 1776 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp; 1777 } 1778 mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6; 1779 if (types & mask) { 1780 return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ? 1781 NetPktRssIpV6Ex : NetPktRssIpV6; 1782 } 1783 } 1784 return 0xff; 1785 } 1786 1787 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report, 1788 uint32_t hash) 1789 { 1790 struct virtio_net_hdr_v1_hash *hdr = (void *)buf; 1791 hdr->hash_value = hash; 1792 hdr->hash_report = report; 1793 } 1794 1795 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf, 1796 size_t size) 1797 { 1798 VirtIONet *n = qemu_get_nic_opaque(nc); 1799 unsigned int index = nc->queue_index, new_index = index; 1800 struct NetRxPkt *pkt = n->rx_pkt; 1801 uint8_t net_hash_type; 1802 uint32_t hash; 1803 bool isip4, isip6, isudp, istcp; 1804 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = { 1805 VIRTIO_NET_HASH_REPORT_IPv4, 1806 VIRTIO_NET_HASH_REPORT_TCPv4, 1807 VIRTIO_NET_HASH_REPORT_TCPv6, 1808 VIRTIO_NET_HASH_REPORT_IPv6, 1809 VIRTIO_NET_HASH_REPORT_IPv6_EX, 1810 VIRTIO_NET_HASH_REPORT_TCPv6_EX, 1811 VIRTIO_NET_HASH_REPORT_UDPv4, 1812 VIRTIO_NET_HASH_REPORT_UDPv6, 1813 VIRTIO_NET_HASH_REPORT_UDPv6_EX 1814 }; 1815 1816 net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len, 1817 size - n->host_hdr_len); 1818 net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); 1819 if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) { 1820 istcp = isudp = false; 1821 } 1822 if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) { 1823 istcp = isudp = false; 1824 } 1825 net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp, 1826 n->rss_data.hash_types); 1827 if (net_hash_type > NetPktRssIpV6UdpEx) { 1828 if (n->rss_data.populate_hash) { 1829 virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0); 1830 } 1831 return n->rss_data.redirect ? n->rss_data.default_queue : -1; 1832 } 1833 1834 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key); 1835 1836 if (n->rss_data.populate_hash) { 1837 virtio_set_packet_hash(buf, reports[net_hash_type], hash); 1838 } 1839 1840 if (n->rss_data.redirect) { 1841 new_index = hash & (n->rss_data.indirections_len - 1); 1842 new_index = n->rss_data.indirections_table[new_index]; 1843 } 1844 1845 return (index == new_index) ? -1 : new_index; 1846 } 1847 1848 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, 1849 size_t size, bool no_rss) 1850 { 1851 VirtIONet *n = qemu_get_nic_opaque(nc); 1852 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1853 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1854 VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE]; 1855 size_t lens[VIRTQUEUE_MAX_SIZE]; 1856 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; 1857 struct virtio_net_hdr_mrg_rxbuf mhdr; 1858 unsigned mhdr_cnt = 0; 1859 size_t offset, i, guest_offset, j; 1860 ssize_t err; 1861 1862 if (!virtio_net_can_receive(nc)) { 1863 return -1; 1864 } 1865 1866 if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) { 1867 int index = virtio_net_process_rss(nc, buf, size); 1868 if (index >= 0) { 1869 NetClientState *nc2 = qemu_get_subqueue(n->nic, index); 1870 return virtio_net_receive_rcu(nc2, buf, size, true); 1871 } 1872 } 1873 1874 /* hdr_len refers to the header we supply to the guest */ 1875 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) { 1876 return 0; 1877 } 1878 1879 if (!receive_filter(n, buf, size)) 1880 return size; 1881 1882 offset = i = 0; 1883 1884 while (offset < size) { 1885 VirtQueueElement *elem; 1886 int len, total; 1887 const struct iovec *sg; 1888 1889 total = 0; 1890 1891 if (i == VIRTQUEUE_MAX_SIZE) { 1892 virtio_error(vdev, "virtio-net unexpected long buffer chain"); 1893 err = size; 1894 goto err; 1895 } 1896 1897 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement)); 1898 if (!elem) { 1899 if (i) { 1900 virtio_error(vdev, "virtio-net unexpected empty queue: " 1901 "i %zd mergeable %d offset %zd, size %zd, " 1902 "guest hdr len %zd, host hdr len %zd " 1903 "guest features 0x%" PRIx64, 1904 i, n->mergeable_rx_bufs, offset, size, 1905 n->guest_hdr_len, n->host_hdr_len, 1906 vdev->guest_features); 1907 } 1908 err = -1; 1909 goto err; 1910 } 1911 1912 if (elem->in_num < 1) { 1913 virtio_error(vdev, 1914 "virtio-net receive queue contains no in buffers"); 1915 virtqueue_detach_element(q->rx_vq, elem, 0); 1916 g_free(elem); 1917 err = -1; 1918 goto err; 1919 } 1920 1921 sg = elem->in_sg; 1922 if (i == 0) { 1923 assert(offset == 0); 1924 if (n->mergeable_rx_bufs) { 1925 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg), 1926 sg, elem->in_num, 1927 offsetof(typeof(mhdr), num_buffers), 1928 sizeof(mhdr.num_buffers)); 1929 } 1930 1931 receive_header(n, sg, elem->in_num, buf, size); 1932 if (n->rss_data.populate_hash) { 1933 offset = sizeof(mhdr); 1934 iov_from_buf(sg, elem->in_num, offset, 1935 buf + offset, n->host_hdr_len - sizeof(mhdr)); 1936 } 1937 offset = n->host_hdr_len; 1938 total += n->guest_hdr_len; 1939 guest_offset = n->guest_hdr_len; 1940 } else { 1941 guest_offset = 0; 1942 } 1943 1944 /* copy in packet. ugh */ 1945 len = iov_from_buf(sg, elem->in_num, guest_offset, 1946 buf + offset, size - offset); 1947 total += len; 1948 offset += len; 1949 /* If buffers can't be merged, at this point we 1950 * must have consumed the complete packet. 1951 * Otherwise, drop it. */ 1952 if (!n->mergeable_rx_bufs && offset < size) { 1953 virtqueue_unpop(q->rx_vq, elem, total); 1954 g_free(elem); 1955 err = size; 1956 goto err; 1957 } 1958 1959 elems[i] = elem; 1960 lens[i] = total; 1961 i++; 1962 } 1963 1964 if (mhdr_cnt) { 1965 virtio_stw_p(vdev, &mhdr.num_buffers, i); 1966 iov_from_buf(mhdr_sg, mhdr_cnt, 1967 0, 1968 &mhdr.num_buffers, sizeof mhdr.num_buffers); 1969 } 1970 1971 for (j = 0; j < i; j++) { 1972 /* signal other side */ 1973 virtqueue_fill(q->rx_vq, elems[j], lens[j], j); 1974 g_free(elems[j]); 1975 } 1976 1977 virtqueue_flush(q->rx_vq, i); 1978 virtio_notify(vdev, q->rx_vq); 1979 1980 return size; 1981 1982 err: 1983 for (j = 0; j < i; j++) { 1984 virtqueue_detach_element(q->rx_vq, elems[j], lens[j]); 1985 g_free(elems[j]); 1986 } 1987 1988 return err; 1989 } 1990 1991 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf, 1992 size_t size) 1993 { 1994 RCU_READ_LOCK_GUARD(); 1995 1996 return virtio_net_receive_rcu(nc, buf, size, false); 1997 } 1998 1999 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain, 2000 const uint8_t *buf, 2001 VirtioNetRscUnit *unit) 2002 { 2003 uint16_t ip_hdrlen; 2004 struct ip_header *ip; 2005 2006 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len 2007 + sizeof(struct eth_header)); 2008 unit->ip = (void *)ip; 2009 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2; 2010 unit->ip_plen = &ip->ip_len; 2011 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen); 2012 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 2013 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen; 2014 } 2015 2016 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain, 2017 const uint8_t *buf, 2018 VirtioNetRscUnit *unit) 2019 { 2020 struct ip6_header *ip6; 2021 2022 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len 2023 + sizeof(struct eth_header)); 2024 unit->ip = ip6; 2025 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 2026 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) 2027 + sizeof(struct ip6_header)); 2028 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 2029 2030 /* There is a difference between payload lenght in ipv4 and v6, 2031 ip header is excluded in ipv6 */ 2032 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen; 2033 } 2034 2035 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain, 2036 VirtioNetRscSeg *seg) 2037 { 2038 int ret; 2039 struct virtio_net_hdr_v1 *h; 2040 2041 h = (struct virtio_net_hdr_v1 *)seg->buf; 2042 h->flags = 0; 2043 h->gso_type = VIRTIO_NET_HDR_GSO_NONE; 2044 2045 if (seg->is_coalesced) { 2046 h->rsc.segments = seg->packets; 2047 h->rsc.dup_acks = seg->dup_ack; 2048 h->flags = VIRTIO_NET_HDR_F_RSC_INFO; 2049 if (chain->proto == ETH_P_IP) { 2050 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2051 } else { 2052 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2053 } 2054 } 2055 2056 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); 2057 QTAILQ_REMOVE(&chain->buffers, seg, next); 2058 g_free(seg->buf); 2059 g_free(seg); 2060 2061 return ret; 2062 } 2063 2064 static void virtio_net_rsc_purge(void *opq) 2065 { 2066 VirtioNetRscSeg *seg, *rn; 2067 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq; 2068 2069 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) { 2070 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2071 chain->stat.purge_failed++; 2072 continue; 2073 } 2074 } 2075 2076 chain->stat.timer++; 2077 if (!QTAILQ_EMPTY(&chain->buffers)) { 2078 timer_mod(chain->drain_timer, 2079 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 2080 } 2081 } 2082 2083 static void virtio_net_rsc_cleanup(VirtIONet *n) 2084 { 2085 VirtioNetRscChain *chain, *rn_chain; 2086 VirtioNetRscSeg *seg, *rn_seg; 2087 2088 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) { 2089 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) { 2090 QTAILQ_REMOVE(&chain->buffers, seg, next); 2091 g_free(seg->buf); 2092 g_free(seg); 2093 } 2094 2095 timer_free(chain->drain_timer); 2096 QTAILQ_REMOVE(&n->rsc_chains, chain, next); 2097 g_free(chain); 2098 } 2099 } 2100 2101 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain, 2102 NetClientState *nc, 2103 const uint8_t *buf, size_t size) 2104 { 2105 uint16_t hdr_len; 2106 VirtioNetRscSeg *seg; 2107 2108 hdr_len = chain->n->guest_hdr_len; 2109 seg = g_new(VirtioNetRscSeg, 1); 2110 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header) 2111 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD); 2112 memcpy(seg->buf, buf, size); 2113 seg->size = size; 2114 seg->packets = 1; 2115 seg->dup_ack = 0; 2116 seg->is_coalesced = 0; 2117 seg->nc = nc; 2118 2119 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); 2120 chain->stat.cache++; 2121 2122 switch (chain->proto) { 2123 case ETH_P_IP: 2124 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); 2125 break; 2126 case ETH_P_IPV6: 2127 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); 2128 break; 2129 default: 2130 g_assert_not_reached(); 2131 } 2132 } 2133 2134 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain, 2135 VirtioNetRscSeg *seg, 2136 const uint8_t *buf, 2137 struct tcp_header *n_tcp, 2138 struct tcp_header *o_tcp) 2139 { 2140 uint32_t nack, oack; 2141 uint16_t nwin, owin; 2142 2143 nack = htonl(n_tcp->th_ack); 2144 nwin = htons(n_tcp->th_win); 2145 oack = htonl(o_tcp->th_ack); 2146 owin = htons(o_tcp->th_win); 2147 2148 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) { 2149 chain->stat.ack_out_of_win++; 2150 return RSC_FINAL; 2151 } else if (nack == oack) { 2152 /* duplicated ack or window probe */ 2153 if (nwin == owin) { 2154 /* duplicated ack, add dup ack count due to whql test up to 1 */ 2155 chain->stat.dup_ack++; 2156 return RSC_FINAL; 2157 } else { 2158 /* Coalesce window update */ 2159 o_tcp->th_win = n_tcp->th_win; 2160 chain->stat.win_update++; 2161 return RSC_COALESCE; 2162 } 2163 } else { 2164 /* pure ack, go to 'C', finalize*/ 2165 chain->stat.pure_ack++; 2166 return RSC_FINAL; 2167 } 2168 } 2169 2170 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain, 2171 VirtioNetRscSeg *seg, 2172 const uint8_t *buf, 2173 VirtioNetRscUnit *n_unit) 2174 { 2175 void *data; 2176 uint16_t o_ip_len; 2177 uint32_t nseq, oseq; 2178 VirtioNetRscUnit *o_unit; 2179 2180 o_unit = &seg->unit; 2181 o_ip_len = htons(*o_unit->ip_plen); 2182 nseq = htonl(n_unit->tcp->th_seq); 2183 oseq = htonl(o_unit->tcp->th_seq); 2184 2185 /* out of order or retransmitted. */ 2186 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) { 2187 chain->stat.data_out_of_win++; 2188 return RSC_FINAL; 2189 } 2190 2191 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen; 2192 if (nseq == oseq) { 2193 if ((o_unit->payload == 0) && n_unit->payload) { 2194 /* From no payload to payload, normal case, not a dup ack or etc */ 2195 chain->stat.data_after_pure_ack++; 2196 goto coalesce; 2197 } else { 2198 return virtio_net_rsc_handle_ack(chain, seg, buf, 2199 n_unit->tcp, o_unit->tcp); 2200 } 2201 } else if ((nseq - oseq) != o_unit->payload) { 2202 /* Not a consistent packet, out of order */ 2203 chain->stat.data_out_of_order++; 2204 return RSC_FINAL; 2205 } else { 2206 coalesce: 2207 if ((o_ip_len + n_unit->payload) > chain->max_payload) { 2208 chain->stat.over_size++; 2209 return RSC_FINAL; 2210 } 2211 2212 /* Here comes the right data, the payload length in v4/v6 is different, 2213 so use the field value to update and record the new data len */ 2214 o_unit->payload += n_unit->payload; /* update new data len */ 2215 2216 /* update field in ip header */ 2217 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload); 2218 2219 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced 2220 for windows guest, while this may change the behavior for linux 2221 guest (only if it uses RSC feature). */ 2222 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags; 2223 2224 o_unit->tcp->th_ack = n_unit->tcp->th_ack; 2225 o_unit->tcp->th_win = n_unit->tcp->th_win; 2226 2227 memmove(seg->buf + seg->size, data, n_unit->payload); 2228 seg->size += n_unit->payload; 2229 seg->packets++; 2230 chain->stat.coalesced++; 2231 return RSC_COALESCE; 2232 } 2233 } 2234 2235 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain, 2236 VirtioNetRscSeg *seg, 2237 const uint8_t *buf, size_t size, 2238 VirtioNetRscUnit *unit) 2239 { 2240 struct ip_header *ip1, *ip2; 2241 2242 ip1 = (struct ip_header *)(unit->ip); 2243 ip2 = (struct ip_header *)(seg->unit.ip); 2244 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst) 2245 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2246 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2247 chain->stat.no_match++; 2248 return RSC_NO_MATCH; 2249 } 2250 2251 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2252 } 2253 2254 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain, 2255 VirtioNetRscSeg *seg, 2256 const uint8_t *buf, size_t size, 2257 VirtioNetRscUnit *unit) 2258 { 2259 struct ip6_header *ip1, *ip2; 2260 2261 ip1 = (struct ip6_header *)(unit->ip); 2262 ip2 = (struct ip6_header *)(seg->unit.ip); 2263 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address)) 2264 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address)) 2265 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2266 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2267 chain->stat.no_match++; 2268 return RSC_NO_MATCH; 2269 } 2270 2271 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2272 } 2273 2274 /* Packets with 'SYN' should bypass, other flag should be sent after drain 2275 * to prevent out of order */ 2276 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain, 2277 struct tcp_header *tcp) 2278 { 2279 uint16_t tcp_hdr; 2280 uint16_t tcp_flag; 2281 2282 tcp_flag = htons(tcp->th_offset_flags); 2283 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10; 2284 tcp_flag &= VIRTIO_NET_TCP_FLAG; 2285 if (tcp_flag & TH_SYN) { 2286 chain->stat.tcp_syn++; 2287 return RSC_BYPASS; 2288 } 2289 2290 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) { 2291 chain->stat.tcp_ctrl_drain++; 2292 return RSC_FINAL; 2293 } 2294 2295 if (tcp_hdr > sizeof(struct tcp_header)) { 2296 chain->stat.tcp_all_opt++; 2297 return RSC_FINAL; 2298 } 2299 2300 return RSC_CANDIDATE; 2301 } 2302 2303 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain, 2304 NetClientState *nc, 2305 const uint8_t *buf, size_t size, 2306 VirtioNetRscUnit *unit) 2307 { 2308 int ret; 2309 VirtioNetRscSeg *seg, *nseg; 2310 2311 if (QTAILQ_EMPTY(&chain->buffers)) { 2312 chain->stat.empty_cache++; 2313 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2314 timer_mod(chain->drain_timer, 2315 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 2316 return size; 2317 } 2318 2319 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2320 if (chain->proto == ETH_P_IP) { 2321 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit); 2322 } else { 2323 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit); 2324 } 2325 2326 if (ret == RSC_FINAL) { 2327 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2328 /* Send failed */ 2329 chain->stat.final_failed++; 2330 return 0; 2331 } 2332 2333 /* Send current packet */ 2334 return virtio_net_do_receive(nc, buf, size); 2335 } else if (ret == RSC_NO_MATCH) { 2336 continue; 2337 } else { 2338 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ 2339 seg->is_coalesced = 1; 2340 return size; 2341 } 2342 } 2343 2344 chain->stat.no_match_cache++; 2345 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2346 return size; 2347 } 2348 2349 /* Drain a connection data, this is to avoid out of order segments */ 2350 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain, 2351 NetClientState *nc, 2352 const uint8_t *buf, size_t size, 2353 uint16_t ip_start, uint16_t ip_size, 2354 uint16_t tcp_port) 2355 { 2356 VirtioNetRscSeg *seg, *nseg; 2357 uint32_t ppair1, ppair2; 2358 2359 ppair1 = *(uint32_t *)(buf + tcp_port); 2360 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2361 ppair2 = *(uint32_t *)(seg->buf + tcp_port); 2362 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size) 2363 || (ppair1 != ppair2)) { 2364 continue; 2365 } 2366 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2367 chain->stat.drain_failed++; 2368 } 2369 2370 break; 2371 } 2372 2373 return virtio_net_do_receive(nc, buf, size); 2374 } 2375 2376 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain, 2377 struct ip_header *ip, 2378 const uint8_t *buf, size_t size) 2379 { 2380 uint16_t ip_len; 2381 2382 /* Not an ipv4 packet */ 2383 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) { 2384 chain->stat.ip_option++; 2385 return RSC_BYPASS; 2386 } 2387 2388 /* Don't handle packets with ip option */ 2389 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) { 2390 chain->stat.ip_option++; 2391 return RSC_BYPASS; 2392 } 2393 2394 if (ip->ip_p != IPPROTO_TCP) { 2395 chain->stat.bypass_not_tcp++; 2396 return RSC_BYPASS; 2397 } 2398 2399 /* Don't handle packets with ip fragment */ 2400 if (!(htons(ip->ip_off) & IP_DF)) { 2401 chain->stat.ip_frag++; 2402 return RSC_BYPASS; 2403 } 2404 2405 /* Don't handle packets with ecn flag */ 2406 if (IPTOS_ECN(ip->ip_tos)) { 2407 chain->stat.ip_ecn++; 2408 return RSC_BYPASS; 2409 } 2410 2411 ip_len = htons(ip->ip_len); 2412 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header)) 2413 || ip_len > (size - chain->n->guest_hdr_len - 2414 sizeof(struct eth_header))) { 2415 chain->stat.ip_hacked++; 2416 return RSC_BYPASS; 2417 } 2418 2419 return RSC_CANDIDATE; 2420 } 2421 2422 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain, 2423 NetClientState *nc, 2424 const uint8_t *buf, size_t size) 2425 { 2426 int32_t ret; 2427 uint16_t hdr_len; 2428 VirtioNetRscUnit unit; 2429 2430 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2431 2432 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header) 2433 + sizeof(struct tcp_header))) { 2434 chain->stat.bypass_not_tcp++; 2435 return virtio_net_do_receive(nc, buf, size); 2436 } 2437 2438 virtio_net_rsc_extract_unit4(chain, buf, &unit); 2439 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size) 2440 != RSC_CANDIDATE) { 2441 return virtio_net_do_receive(nc, buf, size); 2442 } 2443 2444 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2445 if (ret == RSC_BYPASS) { 2446 return virtio_net_do_receive(nc, buf, size); 2447 } else if (ret == RSC_FINAL) { 2448 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2449 ((hdr_len + sizeof(struct eth_header)) + 12), 2450 VIRTIO_NET_IP4_ADDR_SIZE, 2451 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)); 2452 } 2453 2454 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2455 } 2456 2457 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain, 2458 struct ip6_header *ip6, 2459 const uint8_t *buf, size_t size) 2460 { 2461 uint16_t ip_len; 2462 2463 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4) 2464 != IP_HEADER_VERSION_6) { 2465 return RSC_BYPASS; 2466 } 2467 2468 /* Both option and protocol is checked in this */ 2469 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) { 2470 chain->stat.bypass_not_tcp++; 2471 return RSC_BYPASS; 2472 } 2473 2474 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 2475 if (ip_len < sizeof(struct tcp_header) || 2476 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header) 2477 - sizeof(struct ip6_header))) { 2478 chain->stat.ip_hacked++; 2479 return RSC_BYPASS; 2480 } 2481 2482 /* Don't handle packets with ecn flag */ 2483 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) { 2484 chain->stat.ip_ecn++; 2485 return RSC_BYPASS; 2486 } 2487 2488 return RSC_CANDIDATE; 2489 } 2490 2491 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc, 2492 const uint8_t *buf, size_t size) 2493 { 2494 int32_t ret; 2495 uint16_t hdr_len; 2496 VirtioNetRscChain *chain; 2497 VirtioNetRscUnit unit; 2498 2499 chain = opq; 2500 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2501 2502 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header) 2503 + sizeof(tcp_header))) { 2504 return virtio_net_do_receive(nc, buf, size); 2505 } 2506 2507 virtio_net_rsc_extract_unit6(chain, buf, &unit); 2508 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain, 2509 unit.ip, buf, size)) { 2510 return virtio_net_do_receive(nc, buf, size); 2511 } 2512 2513 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2514 if (ret == RSC_BYPASS) { 2515 return virtio_net_do_receive(nc, buf, size); 2516 } else if (ret == RSC_FINAL) { 2517 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2518 ((hdr_len + sizeof(struct eth_header)) + 8), 2519 VIRTIO_NET_IP6_ADDR_SIZE, 2520 hdr_len + sizeof(struct eth_header) 2521 + sizeof(struct ip6_header)); 2522 } 2523 2524 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2525 } 2526 2527 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n, 2528 NetClientState *nc, 2529 uint16_t proto) 2530 { 2531 VirtioNetRscChain *chain; 2532 2533 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) { 2534 return NULL; 2535 } 2536 2537 QTAILQ_FOREACH(chain, &n->rsc_chains, next) { 2538 if (chain->proto == proto) { 2539 return chain; 2540 } 2541 } 2542 2543 chain = g_malloc(sizeof(*chain)); 2544 chain->n = n; 2545 chain->proto = proto; 2546 if (proto == (uint16_t)ETH_P_IP) { 2547 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD; 2548 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2549 } else { 2550 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD; 2551 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2552 } 2553 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST, 2554 virtio_net_rsc_purge, chain); 2555 memset(&chain->stat, 0, sizeof(chain->stat)); 2556 2557 QTAILQ_INIT(&chain->buffers); 2558 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); 2559 2560 return chain; 2561 } 2562 2563 static ssize_t virtio_net_rsc_receive(NetClientState *nc, 2564 const uint8_t *buf, 2565 size_t size) 2566 { 2567 uint16_t proto; 2568 VirtioNetRscChain *chain; 2569 struct eth_header *eth; 2570 VirtIONet *n; 2571 2572 n = qemu_get_nic_opaque(nc); 2573 if (size < (n->host_hdr_len + sizeof(struct eth_header))) { 2574 return virtio_net_do_receive(nc, buf, size); 2575 } 2576 2577 eth = (struct eth_header *)(buf + n->guest_hdr_len); 2578 proto = htons(eth->h_proto); 2579 2580 chain = virtio_net_rsc_lookup_chain(n, nc, proto); 2581 if (chain) { 2582 chain->stat.received++; 2583 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) { 2584 return virtio_net_rsc_receive4(chain, nc, buf, size); 2585 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) { 2586 return virtio_net_rsc_receive6(chain, nc, buf, size); 2587 } 2588 } 2589 return virtio_net_do_receive(nc, buf, size); 2590 } 2591 2592 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, 2593 size_t size) 2594 { 2595 VirtIONet *n = qemu_get_nic_opaque(nc); 2596 if ((n->rsc4_enabled || n->rsc6_enabled)) { 2597 return virtio_net_rsc_receive(nc, buf, size); 2598 } else { 2599 return virtio_net_do_receive(nc, buf, size); 2600 } 2601 } 2602 2603 static int32_t virtio_net_flush_tx(VirtIONetQueue *q); 2604 2605 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) 2606 { 2607 VirtIONet *n = qemu_get_nic_opaque(nc); 2608 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 2609 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2610 int ret; 2611 2612 virtqueue_push(q->tx_vq, q->async_tx.elem, 0); 2613 virtio_notify(vdev, q->tx_vq); 2614 2615 g_free(q->async_tx.elem); 2616 q->async_tx.elem = NULL; 2617 2618 virtio_queue_set_notification(q->tx_vq, 1); 2619 ret = virtio_net_flush_tx(q); 2620 if (ret >= n->tx_burst) { 2621 /* 2622 * the flush has been stopped by tx_burst 2623 * we will not receive notification for the 2624 * remainining part, so re-schedule 2625 */ 2626 virtio_queue_set_notification(q->tx_vq, 0); 2627 if (q->tx_bh) { 2628 qemu_bh_schedule(q->tx_bh); 2629 } else { 2630 timer_mod(q->tx_timer, 2631 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2632 } 2633 q->tx_waiting = 1; 2634 } 2635 } 2636 2637 /* TX */ 2638 static int32_t virtio_net_flush_tx(VirtIONetQueue *q) 2639 { 2640 VirtIONet *n = q->n; 2641 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2642 VirtQueueElement *elem; 2643 int32_t num_packets = 0; 2644 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq)); 2645 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2646 return num_packets; 2647 } 2648 2649 if (q->async_tx.elem) { 2650 virtio_queue_set_notification(q->tx_vq, 0); 2651 return num_packets; 2652 } 2653 2654 for (;;) { 2655 ssize_t ret; 2656 unsigned int out_num; 2657 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg; 2658 struct virtio_net_hdr_mrg_rxbuf mhdr; 2659 2660 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement)); 2661 if (!elem) { 2662 break; 2663 } 2664 2665 out_num = elem->out_num; 2666 out_sg = elem->out_sg; 2667 if (out_num < 1) { 2668 virtio_error(vdev, "virtio-net header not in first element"); 2669 virtqueue_detach_element(q->tx_vq, elem, 0); 2670 g_free(elem); 2671 return -EINVAL; 2672 } 2673 2674 if (n->has_vnet_hdr) { 2675 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) < 2676 n->guest_hdr_len) { 2677 virtio_error(vdev, "virtio-net header incorrect"); 2678 virtqueue_detach_element(q->tx_vq, elem, 0); 2679 g_free(elem); 2680 return -EINVAL; 2681 } 2682 if (n->needs_vnet_hdr_swap) { 2683 virtio_net_hdr_swap(vdev, (void *) &mhdr); 2684 sg2[0].iov_base = &mhdr; 2685 sg2[0].iov_len = n->guest_hdr_len; 2686 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, 2687 out_sg, out_num, 2688 n->guest_hdr_len, -1); 2689 if (out_num == VIRTQUEUE_MAX_SIZE) { 2690 goto drop; 2691 } 2692 out_num += 1; 2693 out_sg = sg2; 2694 } 2695 } 2696 /* 2697 * If host wants to see the guest header as is, we can 2698 * pass it on unchanged. Otherwise, copy just the parts 2699 * that host is interested in. 2700 */ 2701 assert(n->host_hdr_len <= n->guest_hdr_len); 2702 if (n->host_hdr_len != n->guest_hdr_len) { 2703 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg), 2704 out_sg, out_num, 2705 0, n->host_hdr_len); 2706 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num, 2707 out_sg, out_num, 2708 n->guest_hdr_len, -1); 2709 out_num = sg_num; 2710 out_sg = sg; 2711 } 2712 2713 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index), 2714 out_sg, out_num, virtio_net_tx_complete); 2715 if (ret == 0) { 2716 virtio_queue_set_notification(q->tx_vq, 0); 2717 q->async_tx.elem = elem; 2718 return -EBUSY; 2719 } 2720 2721 drop: 2722 virtqueue_push(q->tx_vq, elem, 0); 2723 virtio_notify(vdev, q->tx_vq); 2724 g_free(elem); 2725 2726 if (++num_packets >= n->tx_burst) { 2727 break; 2728 } 2729 } 2730 return num_packets; 2731 } 2732 2733 static void virtio_net_tx_timer(void *opaque); 2734 2735 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) 2736 { 2737 VirtIONet *n = VIRTIO_NET(vdev); 2738 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2739 2740 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2741 virtio_net_drop_tx_queue_data(vdev, vq); 2742 return; 2743 } 2744 2745 /* This happens when device was stopped but VCPU wasn't. */ 2746 if (!vdev->vm_running) { 2747 q->tx_waiting = 1; 2748 return; 2749 } 2750 2751 if (q->tx_waiting) { 2752 /* We already have queued packets, immediately flush */ 2753 timer_del(q->tx_timer); 2754 virtio_net_tx_timer(q); 2755 } else { 2756 /* re-arm timer to flush it (and more) on next tick */ 2757 timer_mod(q->tx_timer, 2758 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2759 q->tx_waiting = 1; 2760 virtio_queue_set_notification(vq, 0); 2761 } 2762 } 2763 2764 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) 2765 { 2766 VirtIONet *n = VIRTIO_NET(vdev); 2767 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2768 2769 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2770 virtio_net_drop_tx_queue_data(vdev, vq); 2771 return; 2772 } 2773 2774 if (unlikely(q->tx_waiting)) { 2775 return; 2776 } 2777 q->tx_waiting = 1; 2778 /* This happens when device was stopped but VCPU wasn't. */ 2779 if (!vdev->vm_running) { 2780 return; 2781 } 2782 virtio_queue_set_notification(vq, 0); 2783 qemu_bh_schedule(q->tx_bh); 2784 } 2785 2786 static void virtio_net_tx_timer(void *opaque) 2787 { 2788 VirtIONetQueue *q = opaque; 2789 VirtIONet *n = q->n; 2790 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2791 int ret; 2792 2793 /* This happens when device was stopped but BH wasn't. */ 2794 if (!vdev->vm_running) { 2795 /* Make sure tx waiting is set, so we'll run when restarted. */ 2796 assert(q->tx_waiting); 2797 return; 2798 } 2799 2800 q->tx_waiting = 0; 2801 2802 /* Just in case the driver is not ready on more */ 2803 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2804 return; 2805 } 2806 2807 ret = virtio_net_flush_tx(q); 2808 if (ret == -EBUSY || ret == -EINVAL) { 2809 return; 2810 } 2811 /* 2812 * If we flush a full burst of packets, assume there are 2813 * more coming and immediately rearm 2814 */ 2815 if (ret >= n->tx_burst) { 2816 q->tx_waiting = 1; 2817 timer_mod(q->tx_timer, 2818 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2819 return; 2820 } 2821 /* 2822 * If less than a full burst, re-enable notification and flush 2823 * anything that may have come in while we weren't looking. If 2824 * we find something, assume the guest is still active and rearm 2825 */ 2826 virtio_queue_set_notification(q->tx_vq, 1); 2827 ret = virtio_net_flush_tx(q); 2828 if (ret > 0) { 2829 virtio_queue_set_notification(q->tx_vq, 0); 2830 q->tx_waiting = 1; 2831 timer_mod(q->tx_timer, 2832 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2833 } 2834 } 2835 2836 static void virtio_net_tx_bh(void *opaque) 2837 { 2838 VirtIONetQueue *q = opaque; 2839 VirtIONet *n = q->n; 2840 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2841 int32_t ret; 2842 2843 /* This happens when device was stopped but BH wasn't. */ 2844 if (!vdev->vm_running) { 2845 /* Make sure tx waiting is set, so we'll run when restarted. */ 2846 assert(q->tx_waiting); 2847 return; 2848 } 2849 2850 q->tx_waiting = 0; 2851 2852 /* Just in case the driver is not ready on more */ 2853 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) { 2854 return; 2855 } 2856 2857 ret = virtio_net_flush_tx(q); 2858 if (ret == -EBUSY || ret == -EINVAL) { 2859 return; /* Notification re-enable handled by tx_complete or device 2860 * broken */ 2861 } 2862 2863 /* If we flush a full burst of packets, assume there are 2864 * more coming and immediately reschedule */ 2865 if (ret >= n->tx_burst) { 2866 qemu_bh_schedule(q->tx_bh); 2867 q->tx_waiting = 1; 2868 return; 2869 } 2870 2871 /* If less than a full burst, re-enable notification and flush 2872 * anything that may have come in while we weren't looking. If 2873 * we find something, assume the guest is still active and reschedule */ 2874 virtio_queue_set_notification(q->tx_vq, 1); 2875 ret = virtio_net_flush_tx(q); 2876 if (ret == -EINVAL) { 2877 return; 2878 } else if (ret > 0) { 2879 virtio_queue_set_notification(q->tx_vq, 0); 2880 qemu_bh_schedule(q->tx_bh); 2881 q->tx_waiting = 1; 2882 } 2883 } 2884 2885 static void virtio_net_add_queue(VirtIONet *n, int index) 2886 { 2887 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2888 2889 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size, 2890 virtio_net_handle_rx); 2891 2892 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { 2893 n->vqs[index].tx_vq = 2894 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2895 virtio_net_handle_tx_timer); 2896 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 2897 virtio_net_tx_timer, 2898 &n->vqs[index]); 2899 } else { 2900 n->vqs[index].tx_vq = 2901 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2902 virtio_net_handle_tx_bh); 2903 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]); 2904 } 2905 2906 n->vqs[index].tx_waiting = 0; 2907 n->vqs[index].n = n; 2908 } 2909 2910 static void virtio_net_del_queue(VirtIONet *n, int index) 2911 { 2912 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2913 VirtIONetQueue *q = &n->vqs[index]; 2914 NetClientState *nc = qemu_get_subqueue(n->nic, index); 2915 2916 qemu_purge_queued_packets(nc); 2917 2918 virtio_del_queue(vdev, index * 2); 2919 if (q->tx_timer) { 2920 timer_free(q->tx_timer); 2921 q->tx_timer = NULL; 2922 } else { 2923 qemu_bh_delete(q->tx_bh); 2924 q->tx_bh = NULL; 2925 } 2926 q->tx_waiting = 0; 2927 virtio_del_queue(vdev, index * 2 + 1); 2928 } 2929 2930 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs) 2931 { 2932 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2933 int old_num_queues = virtio_get_num_queues(vdev); 2934 int new_num_queues = new_max_queue_pairs * 2 + 1; 2935 int i; 2936 2937 assert(old_num_queues >= 3); 2938 assert(old_num_queues % 2 == 1); 2939 2940 if (old_num_queues == new_num_queues) { 2941 return; 2942 } 2943 2944 /* 2945 * We always need to remove and add ctrl vq if 2946 * old_num_queues != new_num_queues. Remove ctrl_vq first, 2947 * and then we only enter one of the following two loops. 2948 */ 2949 virtio_del_queue(vdev, old_num_queues - 1); 2950 2951 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) { 2952 /* new_num_queues < old_num_queues */ 2953 virtio_net_del_queue(n, i / 2); 2954 } 2955 2956 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) { 2957 /* new_num_queues > old_num_queues */ 2958 virtio_net_add_queue(n, i / 2); 2959 } 2960 2961 /* add ctrl_vq last */ 2962 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 2963 } 2964 2965 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) 2966 { 2967 int max = multiqueue ? n->max_queue_pairs : 1; 2968 2969 n->multiqueue = multiqueue; 2970 virtio_net_change_num_queue_pairs(n, max); 2971 2972 virtio_net_set_queue_pairs(n); 2973 } 2974 2975 static int virtio_net_post_load_device(void *opaque, int version_id) 2976 { 2977 VirtIONet *n = opaque; 2978 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2979 int i, link_down; 2980 2981 trace_virtio_net_post_load_device(); 2982 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs, 2983 virtio_vdev_has_feature(vdev, 2984 VIRTIO_F_VERSION_1), 2985 virtio_vdev_has_feature(vdev, 2986 VIRTIO_NET_F_HASH_REPORT)); 2987 2988 /* MAC_TABLE_ENTRIES may be different from the saved image */ 2989 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) { 2990 n->mac_table.in_use = 0; 2991 } 2992 2993 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 2994 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n); 2995 } 2996 2997 /* 2998 * curr_guest_offloads will be later overwritten by the 2999 * virtio_set_features_nocheck call done from the virtio_load. 3000 * Here we make sure it is preserved and restored accordingly 3001 * in the virtio_net_post_load_virtio callback. 3002 */ 3003 n->saved_guest_offloads = n->curr_guest_offloads; 3004 3005 virtio_net_set_queue_pairs(n); 3006 3007 /* Find the first multicast entry in the saved MAC filter */ 3008 for (i = 0; i < n->mac_table.in_use; i++) { 3009 if (n->mac_table.macs[i * ETH_ALEN] & 1) { 3010 break; 3011 } 3012 } 3013 n->mac_table.first_multi = i; 3014 3015 /* nc.link_down can't be migrated, so infer link_down according 3016 * to link status bit in n->status */ 3017 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0; 3018 for (i = 0; i < n->max_queue_pairs; i++) { 3019 qemu_get_subqueue(n->nic, i)->link_down = link_down; 3020 } 3021 3022 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 3023 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3024 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 3025 QEMU_CLOCK_VIRTUAL, 3026 virtio_net_announce_timer, n); 3027 if (n->announce_timer.round) { 3028 timer_mod(n->announce_timer.tm, 3029 qemu_clock_get_ms(n->announce_timer.type)); 3030 } else { 3031 qemu_announce_timer_del(&n->announce_timer, false); 3032 } 3033 } 3034 3035 if (n->rss_data.enabled) { 3036 n->rss_data.enabled_software_rss = n->rss_data.populate_hash; 3037 if (!n->rss_data.populate_hash) { 3038 if (!virtio_net_attach_epbf_rss(n)) { 3039 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 3040 warn_report("Can't post-load eBPF RSS for vhost"); 3041 } else { 3042 warn_report("Can't post-load eBPF RSS - " 3043 "fallback to software RSS"); 3044 n->rss_data.enabled_software_rss = true; 3045 } 3046 } 3047 } 3048 3049 trace_virtio_net_rss_enable(n->rss_data.hash_types, 3050 n->rss_data.indirections_len, 3051 sizeof(n->rss_data.key)); 3052 } else { 3053 trace_virtio_net_rss_disable(); 3054 } 3055 return 0; 3056 } 3057 3058 static int virtio_net_post_load_virtio(VirtIODevice *vdev) 3059 { 3060 VirtIONet *n = VIRTIO_NET(vdev); 3061 /* 3062 * The actual needed state is now in saved_guest_offloads, 3063 * see virtio_net_post_load_device for detail. 3064 * Restore it back and apply the desired offloads. 3065 */ 3066 n->curr_guest_offloads = n->saved_guest_offloads; 3067 if (peer_has_vnet_hdr(n)) { 3068 virtio_net_apply_guest_offloads(n); 3069 } 3070 3071 return 0; 3072 } 3073 3074 /* tx_waiting field of a VirtIONetQueue */ 3075 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = { 3076 .name = "virtio-net-queue-tx_waiting", 3077 .fields = (VMStateField[]) { 3078 VMSTATE_UINT32(tx_waiting, VirtIONetQueue), 3079 VMSTATE_END_OF_LIST() 3080 }, 3081 }; 3082 3083 static bool max_queue_pairs_gt_1(void *opaque, int version_id) 3084 { 3085 return VIRTIO_NET(opaque)->max_queue_pairs > 1; 3086 } 3087 3088 static bool has_ctrl_guest_offloads(void *opaque, int version_id) 3089 { 3090 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque), 3091 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 3092 } 3093 3094 static bool mac_table_fits(void *opaque, int version_id) 3095 { 3096 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES; 3097 } 3098 3099 static bool mac_table_doesnt_fit(void *opaque, int version_id) 3100 { 3101 return !mac_table_fits(opaque, version_id); 3102 } 3103 3104 /* This temporary type is shared by all the WITH_TMP methods 3105 * although only some fields are used by each. 3106 */ 3107 struct VirtIONetMigTmp { 3108 VirtIONet *parent; 3109 VirtIONetQueue *vqs_1; 3110 uint16_t curr_queue_pairs_1; 3111 uint8_t has_ufo; 3112 uint32_t has_vnet_hdr; 3113 }; 3114 3115 /* The 2nd and subsequent tx_waiting flags are loaded later than 3116 * the 1st entry in the queue_pairs and only if there's more than one 3117 * entry. We use the tmp mechanism to calculate a temporary 3118 * pointer and count and also validate the count. 3119 */ 3120 3121 static int virtio_net_tx_waiting_pre_save(void *opaque) 3122 { 3123 struct VirtIONetMigTmp *tmp = opaque; 3124 3125 tmp->vqs_1 = tmp->parent->vqs + 1; 3126 tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1; 3127 if (tmp->parent->curr_queue_pairs == 0) { 3128 tmp->curr_queue_pairs_1 = 0; 3129 } 3130 3131 return 0; 3132 } 3133 3134 static int virtio_net_tx_waiting_pre_load(void *opaque) 3135 { 3136 struct VirtIONetMigTmp *tmp = opaque; 3137 3138 /* Reuse the pointer setup from save */ 3139 virtio_net_tx_waiting_pre_save(opaque); 3140 3141 if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) { 3142 error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x", 3143 tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs); 3144 3145 return -EINVAL; 3146 } 3147 3148 return 0; /* all good */ 3149 } 3150 3151 static const VMStateDescription vmstate_virtio_net_tx_waiting = { 3152 .name = "virtio-net-tx_waiting", 3153 .pre_load = virtio_net_tx_waiting_pre_load, 3154 .pre_save = virtio_net_tx_waiting_pre_save, 3155 .fields = (VMStateField[]) { 3156 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp, 3157 curr_queue_pairs_1, 3158 vmstate_virtio_net_queue_tx_waiting, 3159 struct VirtIONetQueue), 3160 VMSTATE_END_OF_LIST() 3161 }, 3162 }; 3163 3164 /* the 'has_ufo' flag is just tested; if the incoming stream has the 3165 * flag set we need to check that we have it 3166 */ 3167 static int virtio_net_ufo_post_load(void *opaque, int version_id) 3168 { 3169 struct VirtIONetMigTmp *tmp = opaque; 3170 3171 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) { 3172 error_report("virtio-net: saved image requires TUN_F_UFO support"); 3173 return -EINVAL; 3174 } 3175 3176 return 0; 3177 } 3178 3179 static int virtio_net_ufo_pre_save(void *opaque) 3180 { 3181 struct VirtIONetMigTmp *tmp = opaque; 3182 3183 tmp->has_ufo = tmp->parent->has_ufo; 3184 3185 return 0; 3186 } 3187 3188 static const VMStateDescription vmstate_virtio_net_has_ufo = { 3189 .name = "virtio-net-ufo", 3190 .post_load = virtio_net_ufo_post_load, 3191 .pre_save = virtio_net_ufo_pre_save, 3192 .fields = (VMStateField[]) { 3193 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp), 3194 VMSTATE_END_OF_LIST() 3195 }, 3196 }; 3197 3198 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the 3199 * flag set we need to check that we have it 3200 */ 3201 static int virtio_net_vnet_post_load(void *opaque, int version_id) 3202 { 3203 struct VirtIONetMigTmp *tmp = opaque; 3204 3205 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) { 3206 error_report("virtio-net: saved image requires vnet_hdr=on"); 3207 return -EINVAL; 3208 } 3209 3210 return 0; 3211 } 3212 3213 static int virtio_net_vnet_pre_save(void *opaque) 3214 { 3215 struct VirtIONetMigTmp *tmp = opaque; 3216 3217 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr; 3218 3219 return 0; 3220 } 3221 3222 static const VMStateDescription vmstate_virtio_net_has_vnet = { 3223 .name = "virtio-net-vnet", 3224 .post_load = virtio_net_vnet_post_load, 3225 .pre_save = virtio_net_vnet_pre_save, 3226 .fields = (VMStateField[]) { 3227 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp), 3228 VMSTATE_END_OF_LIST() 3229 }, 3230 }; 3231 3232 static bool virtio_net_rss_needed(void *opaque) 3233 { 3234 return VIRTIO_NET(opaque)->rss_data.enabled; 3235 } 3236 3237 static const VMStateDescription vmstate_virtio_net_rss = { 3238 .name = "virtio-net-device/rss", 3239 .version_id = 1, 3240 .minimum_version_id = 1, 3241 .needed = virtio_net_rss_needed, 3242 .fields = (VMStateField[]) { 3243 VMSTATE_BOOL(rss_data.enabled, VirtIONet), 3244 VMSTATE_BOOL(rss_data.redirect, VirtIONet), 3245 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet), 3246 VMSTATE_UINT32(rss_data.hash_types, VirtIONet), 3247 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet), 3248 VMSTATE_UINT16(rss_data.default_queue, VirtIONet), 3249 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet, 3250 VIRTIO_NET_RSS_MAX_KEY_SIZE), 3251 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet, 3252 rss_data.indirections_len, 0, 3253 vmstate_info_uint16, uint16_t), 3254 VMSTATE_END_OF_LIST() 3255 }, 3256 }; 3257 3258 static const VMStateDescription vmstate_virtio_net_device = { 3259 .name = "virtio-net-device", 3260 .version_id = VIRTIO_NET_VM_VERSION, 3261 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3262 .post_load = virtio_net_post_load_device, 3263 .fields = (VMStateField[]) { 3264 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN), 3265 VMSTATE_STRUCT_POINTER(vqs, VirtIONet, 3266 vmstate_virtio_net_queue_tx_waiting, 3267 VirtIONetQueue), 3268 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet), 3269 VMSTATE_UINT16(status, VirtIONet), 3270 VMSTATE_UINT8(promisc, VirtIONet), 3271 VMSTATE_UINT8(allmulti, VirtIONet), 3272 VMSTATE_UINT32(mac_table.in_use, VirtIONet), 3273 3274 /* Guarded pair: If it fits we load it, else we throw it away 3275 * - can happen if source has a larger MAC table.; post-load 3276 * sets flags in this case. 3277 */ 3278 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet, 3279 0, mac_table_fits, mac_table.in_use, 3280 ETH_ALEN), 3281 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0, 3282 mac_table.in_use, ETH_ALEN), 3283 3284 /* Note: This is an array of uint32's that's always been saved as a 3285 * buffer; hold onto your endiannesses; it's actually used as a bitmap 3286 * but based on the uint. 3287 */ 3288 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3), 3289 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3290 vmstate_virtio_net_has_vnet), 3291 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet), 3292 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet), 3293 VMSTATE_UINT8(alluni, VirtIONet), 3294 VMSTATE_UINT8(nomulti, VirtIONet), 3295 VMSTATE_UINT8(nouni, VirtIONet), 3296 VMSTATE_UINT8(nobcast, VirtIONet), 3297 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3298 vmstate_virtio_net_has_ufo), 3299 VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0, 3300 vmstate_info_uint16_equal, uint16_t), 3301 VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1), 3302 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3303 vmstate_virtio_net_tx_waiting), 3304 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet, 3305 has_ctrl_guest_offloads), 3306 VMSTATE_END_OF_LIST() 3307 }, 3308 .subsections = (const VMStateDescription * []) { 3309 &vmstate_virtio_net_rss, 3310 NULL 3311 } 3312 }; 3313 3314 static NetClientInfo net_virtio_info = { 3315 .type = NET_CLIENT_DRIVER_NIC, 3316 .size = sizeof(NICState), 3317 .can_receive = virtio_net_can_receive, 3318 .receive = virtio_net_receive, 3319 .link_status_changed = virtio_net_set_link_status, 3320 .query_rx_filter = virtio_net_query_rxfilter, 3321 .announce = virtio_net_announce, 3322 }; 3323 3324 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) 3325 { 3326 VirtIONet *n = VIRTIO_NET(vdev); 3327 NetClientState *nc; 3328 assert(n->vhost_started); 3329 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { 3330 /* Must guard against invalid features and bogus queue index 3331 * from being set by malicious guest, or penetrated through 3332 * buggy migration stream. 3333 */ 3334 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3335 qemu_log_mask(LOG_GUEST_ERROR, 3336 "%s: bogus vq index ignored\n", __func__); 3337 return false; 3338 } 3339 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3340 } else { 3341 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3342 } 3343 /* 3344 * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 3345 * as the Marco of configure interrupt's IDX, If this driver does not 3346 * support, the function will return false 3347 */ 3348 3349 if (idx == VIRTIO_CONFIG_IRQ_IDX) { 3350 return vhost_net_config_pending(get_vhost_net(nc->peer)); 3351 } 3352 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); 3353 } 3354 3355 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, 3356 bool mask) 3357 { 3358 VirtIONet *n = VIRTIO_NET(vdev); 3359 NetClientState *nc; 3360 assert(n->vhost_started); 3361 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { 3362 /* Must guard against invalid features and bogus queue index 3363 * from being set by malicious guest, or penetrated through 3364 * buggy migration stream. 3365 */ 3366 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3367 qemu_log_mask(LOG_GUEST_ERROR, 3368 "%s: bogus vq index ignored\n", __func__); 3369 return; 3370 } 3371 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3372 } else { 3373 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3374 } 3375 /* 3376 *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 3377 * as the Marco of configure interrupt's IDX, If this driver does not 3378 * support, the function will return 3379 */ 3380 3381 if (idx == VIRTIO_CONFIG_IRQ_IDX) { 3382 vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask); 3383 return; 3384 } 3385 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask); 3386 } 3387 3388 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) 3389 { 3390 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC); 3391 3392 n->config_size = virtio_get_config_size(&cfg_size_params, host_features); 3393 } 3394 3395 void virtio_net_set_netclient_name(VirtIONet *n, const char *name, 3396 const char *type) 3397 { 3398 /* 3399 * The name can be NULL, the netclient name will be type.x. 3400 */ 3401 assert(type != NULL); 3402 3403 g_free(n->netclient_name); 3404 g_free(n->netclient_type); 3405 n->netclient_name = g_strdup(name); 3406 n->netclient_type = g_strdup(type); 3407 } 3408 3409 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev) 3410 { 3411 HotplugHandler *hotplug_ctrl; 3412 PCIDevice *pci_dev; 3413 Error *err = NULL; 3414 3415 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3416 if (hotplug_ctrl) { 3417 pci_dev = PCI_DEVICE(dev); 3418 pci_dev->partially_hotplugged = true; 3419 hotplug_handler_unplug_request(hotplug_ctrl, dev, &err); 3420 if (err) { 3421 error_report_err(err); 3422 return false; 3423 } 3424 } else { 3425 return false; 3426 } 3427 return true; 3428 } 3429 3430 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev, 3431 Error **errp) 3432 { 3433 Error *err = NULL; 3434 HotplugHandler *hotplug_ctrl; 3435 PCIDevice *pdev = PCI_DEVICE(dev); 3436 BusState *primary_bus; 3437 3438 if (!pdev->partially_hotplugged) { 3439 return true; 3440 } 3441 primary_bus = dev->parent_bus; 3442 if (!primary_bus) { 3443 error_setg(errp, "virtio_net: couldn't find primary bus"); 3444 return false; 3445 } 3446 qdev_set_parent_bus(dev, primary_bus, &error_abort); 3447 qatomic_set(&n->failover_primary_hidden, false); 3448 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3449 if (hotplug_ctrl) { 3450 hotplug_handler_pre_plug(hotplug_ctrl, dev, &err); 3451 if (err) { 3452 goto out; 3453 } 3454 hotplug_handler_plug(hotplug_ctrl, dev, &err); 3455 } 3456 pdev->partially_hotplugged = false; 3457 3458 out: 3459 error_propagate(errp, err); 3460 return !err; 3461 } 3462 3463 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s) 3464 { 3465 bool should_be_hidden; 3466 Error *err = NULL; 3467 DeviceState *dev = failover_find_primary_device(n); 3468 3469 if (!dev) { 3470 return; 3471 } 3472 3473 should_be_hidden = qatomic_read(&n->failover_primary_hidden); 3474 3475 if (migration_in_setup(s) && !should_be_hidden) { 3476 if (failover_unplug_primary(n, dev)) { 3477 vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev); 3478 qapi_event_send_unplug_primary(dev->id); 3479 qatomic_set(&n->failover_primary_hidden, true); 3480 } else { 3481 warn_report("couldn't unplug primary device"); 3482 } 3483 } else if (migration_has_failed(s)) { 3484 /* We already unplugged the device let's plug it back */ 3485 if (!failover_replug_primary(n, dev, &err)) { 3486 if (err) { 3487 error_report_err(err); 3488 } 3489 } 3490 } 3491 } 3492 3493 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data) 3494 { 3495 MigrationState *s = data; 3496 VirtIONet *n = container_of(notifier, VirtIONet, migration_state); 3497 virtio_net_handle_migration_primary(n, s); 3498 } 3499 3500 static bool failover_hide_primary_device(DeviceListener *listener, 3501 const QDict *device_opts, 3502 bool from_json, 3503 Error **errp) 3504 { 3505 VirtIONet *n = container_of(listener, VirtIONet, primary_listener); 3506 const char *standby_id; 3507 3508 if (!device_opts) { 3509 return false; 3510 } 3511 3512 if (!qdict_haskey(device_opts, "failover_pair_id")) { 3513 return false; 3514 } 3515 3516 if (!qdict_haskey(device_opts, "id")) { 3517 error_setg(errp, "Device with failover_pair_id needs to have id"); 3518 return false; 3519 } 3520 3521 standby_id = qdict_get_str(device_opts, "failover_pair_id"); 3522 if (g_strcmp0(standby_id, n->netclient_name) != 0) { 3523 return false; 3524 } 3525 3526 /* 3527 * The hide helper can be called several times for a given device. 3528 * Check there is only one primary for a virtio-net device but 3529 * don't duplicate the qdict several times if it's called for the same 3530 * device. 3531 */ 3532 if (n->primary_opts) { 3533 const char *old, *new; 3534 /* devices with failover_pair_id always have an id */ 3535 old = qdict_get_str(n->primary_opts, "id"); 3536 new = qdict_get_str(device_opts, "id"); 3537 if (strcmp(old, new) != 0) { 3538 error_setg(errp, "Cannot attach more than one primary device to " 3539 "'%s': '%s' and '%s'", n->netclient_name, old, new); 3540 return false; 3541 } 3542 } else { 3543 n->primary_opts = qdict_clone_shallow(device_opts); 3544 n->primary_opts_from_json = from_json; 3545 } 3546 3547 /* failover_primary_hidden is set during feature negotiation */ 3548 return qatomic_read(&n->failover_primary_hidden); 3549 } 3550 3551 static void virtio_net_device_realize(DeviceState *dev, Error **errp) 3552 { 3553 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3554 VirtIONet *n = VIRTIO_NET(dev); 3555 NetClientState *nc; 3556 int i; 3557 3558 if (n->net_conf.mtu) { 3559 n->host_features |= (1ULL << VIRTIO_NET_F_MTU); 3560 } 3561 3562 if (n->net_conf.duplex_str) { 3563 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) { 3564 n->net_conf.duplex = DUPLEX_HALF; 3565 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) { 3566 n->net_conf.duplex = DUPLEX_FULL; 3567 } else { 3568 error_setg(errp, "'duplex' must be 'half' or 'full'"); 3569 return; 3570 } 3571 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3572 } else { 3573 n->net_conf.duplex = DUPLEX_UNKNOWN; 3574 } 3575 3576 if (n->net_conf.speed < SPEED_UNKNOWN) { 3577 error_setg(errp, "'speed' must be between 0 and INT_MAX"); 3578 return; 3579 } 3580 if (n->net_conf.speed >= 0) { 3581 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3582 } 3583 3584 if (n->failover) { 3585 n->primary_listener.hide_device = failover_hide_primary_device; 3586 qatomic_set(&n->failover_primary_hidden, true); 3587 device_listener_register(&n->primary_listener); 3588 n->migration_state.notify = virtio_net_migration_state_notifier; 3589 add_migration_state_change_notifier(&n->migration_state); 3590 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY); 3591 } 3592 3593 virtio_net_set_config_size(n, n->host_features); 3594 virtio_init(vdev, VIRTIO_ID_NET, n->config_size); 3595 3596 /* 3597 * We set a lower limit on RX queue size to what it always was. 3598 * Guests that want a smaller ring can always resize it without 3599 * help from us (using virtio 1 and up). 3600 */ 3601 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || 3602 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || 3603 !is_power_of_2(n->net_conf.rx_queue_size)) { 3604 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " 3605 "must be a power of 2 between %d and %d.", 3606 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, 3607 VIRTQUEUE_MAX_SIZE); 3608 virtio_cleanup(vdev); 3609 return; 3610 } 3611 3612 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || 3613 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE || 3614 !is_power_of_2(n->net_conf.tx_queue_size)) { 3615 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " 3616 "must be a power of 2 between %d and %d", 3617 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, 3618 VIRTQUEUE_MAX_SIZE); 3619 virtio_cleanup(vdev); 3620 return; 3621 } 3622 3623 n->max_ncs = MAX(n->nic_conf.peers.queues, 1); 3624 3625 /* 3626 * Figure out the datapath queue pairs since the backend could 3627 * provide control queue via peers as well. 3628 */ 3629 if (n->nic_conf.peers.queues) { 3630 for (i = 0; i < n->max_ncs; i++) { 3631 if (n->nic_conf.peers.ncs[i]->is_datapath) { 3632 ++n->max_queue_pairs; 3633 } 3634 } 3635 } 3636 n->max_queue_pairs = MAX(n->max_queue_pairs, 1); 3637 3638 if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) { 3639 error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), " 3640 "must be a positive integer less than %d.", 3641 n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2); 3642 virtio_cleanup(vdev); 3643 return; 3644 } 3645 n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs); 3646 n->curr_queue_pairs = 1; 3647 n->tx_timeout = n->net_conf.txtimer; 3648 3649 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer") 3650 && strcmp(n->net_conf.tx, "bh")) { 3651 warn_report("virtio-net: " 3652 "Unknown option tx=%s, valid options: \"timer\" \"bh\"", 3653 n->net_conf.tx); 3654 error_printf("Defaulting to \"bh\""); 3655 } 3656 3657 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), 3658 n->net_conf.tx_queue_size); 3659 3660 for (i = 0; i < n->max_queue_pairs; i++) { 3661 virtio_net_add_queue(n, i); 3662 } 3663 3664 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 3665 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr); 3666 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac)); 3667 n->status = VIRTIO_NET_S_LINK_UP; 3668 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 3669 QEMU_CLOCK_VIRTUAL, 3670 virtio_net_announce_timer, n); 3671 n->announce_timer.round = 0; 3672 3673 if (n->netclient_type) { 3674 /* 3675 * Happen when virtio_net_set_netclient_name has been called. 3676 */ 3677 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3678 n->netclient_type, n->netclient_name, n); 3679 } else { 3680 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3681 object_get_typename(OBJECT(dev)), dev->id, n); 3682 } 3683 3684 for (i = 0; i < n->max_queue_pairs; i++) { 3685 n->nic->ncs[i].do_not_pad = true; 3686 } 3687 3688 peer_test_vnet_hdr(n); 3689 if (peer_has_vnet_hdr(n)) { 3690 for (i = 0; i < n->max_queue_pairs; i++) { 3691 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true); 3692 } 3693 n->host_hdr_len = sizeof(struct virtio_net_hdr); 3694 } else { 3695 n->host_hdr_len = 0; 3696 } 3697 3698 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a); 3699 3700 n->vqs[0].tx_waiting = 0; 3701 n->tx_burst = n->net_conf.txburst; 3702 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0); 3703 n->promisc = 1; /* for compatibility */ 3704 3705 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 3706 3707 n->vlans = g_malloc0(MAX_VLAN >> 3); 3708 3709 nc = qemu_get_queue(n->nic); 3710 nc->rxfilter_notify_enabled = 1; 3711 3712 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 3713 struct virtio_net_config netcfg = {}; 3714 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN); 3715 vhost_net_set_config(get_vhost_net(nc->peer), 3716 (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER); 3717 } 3718 QTAILQ_INIT(&n->rsc_chains); 3719 n->qdev = dev; 3720 3721 net_rx_pkt_init(&n->rx_pkt, false); 3722 3723 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3724 virtio_net_load_ebpf(n); 3725 } 3726 } 3727 3728 static void virtio_net_device_unrealize(DeviceState *dev) 3729 { 3730 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3731 VirtIONet *n = VIRTIO_NET(dev); 3732 int i, max_queue_pairs; 3733 3734 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3735 virtio_net_unload_ebpf(n); 3736 } 3737 3738 /* This will stop vhost backend if appropriate. */ 3739 virtio_net_set_status(vdev, 0); 3740 3741 g_free(n->netclient_name); 3742 n->netclient_name = NULL; 3743 g_free(n->netclient_type); 3744 n->netclient_type = NULL; 3745 3746 g_free(n->mac_table.macs); 3747 g_free(n->vlans); 3748 3749 if (n->failover) { 3750 qobject_unref(n->primary_opts); 3751 device_listener_unregister(&n->primary_listener); 3752 remove_migration_state_change_notifier(&n->migration_state); 3753 } else { 3754 assert(n->primary_opts == NULL); 3755 } 3756 3757 max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 3758 for (i = 0; i < max_queue_pairs; i++) { 3759 virtio_net_del_queue(n, i); 3760 } 3761 /* delete also control vq */ 3762 virtio_del_queue(vdev, max_queue_pairs * 2); 3763 qemu_announce_timer_del(&n->announce_timer, false); 3764 g_free(n->vqs); 3765 qemu_del_nic(n->nic); 3766 virtio_net_rsc_cleanup(n); 3767 g_free(n->rss_data.indirections_table); 3768 net_rx_pkt_uninit(n->rx_pkt); 3769 virtio_cleanup(vdev); 3770 } 3771 3772 static void virtio_net_instance_init(Object *obj) 3773 { 3774 VirtIONet *n = VIRTIO_NET(obj); 3775 3776 /* 3777 * The default config_size is sizeof(struct virtio_net_config). 3778 * Can be overriden with virtio_net_set_config_size. 3779 */ 3780 n->config_size = sizeof(struct virtio_net_config); 3781 device_add_bootindex_property(obj, &n->nic_conf.bootindex, 3782 "bootindex", "/ethernet-phy@0", 3783 DEVICE(n)); 3784 3785 ebpf_rss_init(&n->ebpf_rss); 3786 } 3787 3788 static int virtio_net_pre_save(void *opaque) 3789 { 3790 VirtIONet *n = opaque; 3791 3792 /* At this point, backend must be stopped, otherwise 3793 * it might keep writing to memory. */ 3794 assert(!n->vhost_started); 3795 3796 return 0; 3797 } 3798 3799 static bool primary_unplug_pending(void *opaque) 3800 { 3801 DeviceState *dev = opaque; 3802 DeviceState *primary; 3803 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3804 VirtIONet *n = VIRTIO_NET(vdev); 3805 3806 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 3807 return false; 3808 } 3809 primary = failover_find_primary_device(n); 3810 return primary ? primary->pending_deleted_event : false; 3811 } 3812 3813 static bool dev_unplug_pending(void *opaque) 3814 { 3815 DeviceState *dev = opaque; 3816 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 3817 3818 return vdc->primary_unplug_pending(dev); 3819 } 3820 3821 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev) 3822 { 3823 VirtIONet *n = VIRTIO_NET(vdev); 3824 NetClientState *nc = qemu_get_queue(n->nic); 3825 struct vhost_net *net = get_vhost_net(nc->peer); 3826 return &net->dev; 3827 } 3828 3829 static const VMStateDescription vmstate_virtio_net = { 3830 .name = "virtio-net", 3831 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3832 .version_id = VIRTIO_NET_VM_VERSION, 3833 .fields = (VMStateField[]) { 3834 VMSTATE_VIRTIO_DEVICE, 3835 VMSTATE_END_OF_LIST() 3836 }, 3837 .pre_save = virtio_net_pre_save, 3838 .dev_unplug_pending = dev_unplug_pending, 3839 }; 3840 3841 static Property virtio_net_properties[] = { 3842 DEFINE_PROP_BIT64("csum", VirtIONet, host_features, 3843 VIRTIO_NET_F_CSUM, true), 3844 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features, 3845 VIRTIO_NET_F_GUEST_CSUM, true), 3846 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), 3847 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features, 3848 VIRTIO_NET_F_GUEST_TSO4, true), 3849 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features, 3850 VIRTIO_NET_F_GUEST_TSO6, true), 3851 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features, 3852 VIRTIO_NET_F_GUEST_ECN, true), 3853 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features, 3854 VIRTIO_NET_F_GUEST_UFO, true), 3855 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features, 3856 VIRTIO_NET_F_GUEST_ANNOUNCE, true), 3857 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features, 3858 VIRTIO_NET_F_HOST_TSO4, true), 3859 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features, 3860 VIRTIO_NET_F_HOST_TSO6, true), 3861 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features, 3862 VIRTIO_NET_F_HOST_ECN, true), 3863 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features, 3864 VIRTIO_NET_F_HOST_UFO, true), 3865 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features, 3866 VIRTIO_NET_F_MRG_RXBUF, true), 3867 DEFINE_PROP_BIT64("status", VirtIONet, host_features, 3868 VIRTIO_NET_F_STATUS, true), 3869 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features, 3870 VIRTIO_NET_F_CTRL_VQ, true), 3871 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features, 3872 VIRTIO_NET_F_CTRL_RX, true), 3873 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features, 3874 VIRTIO_NET_F_CTRL_VLAN, true), 3875 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features, 3876 VIRTIO_NET_F_CTRL_RX_EXTRA, true), 3877 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features, 3878 VIRTIO_NET_F_CTRL_MAC_ADDR, true), 3879 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features, 3880 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), 3881 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), 3882 DEFINE_PROP_BIT64("rss", VirtIONet, host_features, 3883 VIRTIO_NET_F_RSS, false), 3884 DEFINE_PROP_BIT64("hash", VirtIONet, host_features, 3885 VIRTIO_NET_F_HASH_REPORT, false), 3886 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features, 3887 VIRTIO_NET_F_RSC_EXT, false), 3888 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout, 3889 VIRTIO_NET_RSC_DEFAULT_INTERVAL), 3890 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf), 3891 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer, 3892 TX_TIMER_INTERVAL), 3893 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), 3894 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), 3895 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, 3896 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), 3897 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, 3898 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), 3899 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), 3900 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, 3901 true), 3902 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN), 3903 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str), 3904 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false), 3905 DEFINE_PROP_END_OF_LIST(), 3906 }; 3907 3908 static void virtio_net_class_init(ObjectClass *klass, void *data) 3909 { 3910 DeviceClass *dc = DEVICE_CLASS(klass); 3911 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 3912 3913 device_class_set_props(dc, virtio_net_properties); 3914 dc->vmsd = &vmstate_virtio_net; 3915 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 3916 vdc->realize = virtio_net_device_realize; 3917 vdc->unrealize = virtio_net_device_unrealize; 3918 vdc->get_config = virtio_net_get_config; 3919 vdc->set_config = virtio_net_set_config; 3920 vdc->get_features = virtio_net_get_features; 3921 vdc->set_features = virtio_net_set_features; 3922 vdc->bad_features = virtio_net_bad_features; 3923 vdc->reset = virtio_net_reset; 3924 vdc->queue_reset = virtio_net_queue_reset; 3925 vdc->queue_enable = virtio_net_queue_enable; 3926 vdc->set_status = virtio_net_set_status; 3927 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; 3928 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; 3929 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO); 3930 vdc->post_load = virtio_net_post_load_virtio; 3931 vdc->vmsd = &vmstate_virtio_net_device; 3932 vdc->primary_unplug_pending = primary_unplug_pending; 3933 vdc->get_vhost = virtio_net_get_vhost; 3934 } 3935 3936 static const TypeInfo virtio_net_info = { 3937 .name = TYPE_VIRTIO_NET, 3938 .parent = TYPE_VIRTIO_DEVICE, 3939 .instance_size = sizeof(VirtIONet), 3940 .instance_init = virtio_net_instance_init, 3941 .class_init = virtio_net_class_init, 3942 }; 3943 3944 static void virtio_register_types(void) 3945 { 3946 type_register_static(&virtio_net_info); 3947 } 3948 3949 type_init(virtio_register_types) 3950