1 /* 2 * Virtio Network Device 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qemu/atomic.h" 16 #include "qemu/iov.h" 17 #include "qemu/log.h" 18 #include "qemu/main-loop.h" 19 #include "qemu/module.h" 20 #include "hw/virtio/virtio.h" 21 #include "net/net.h" 22 #include "net/checksum.h" 23 #include "net/tap.h" 24 #include "qemu/error-report.h" 25 #include "qemu/timer.h" 26 #include "qemu/option.h" 27 #include "qemu/option_int.h" 28 #include "qemu/config-file.h" 29 #include "qapi/qmp/qdict.h" 30 #include "hw/virtio/virtio-net.h" 31 #include "net/vhost_net.h" 32 #include "net/announce.h" 33 #include "hw/virtio/virtio-bus.h" 34 #include "qapi/error.h" 35 #include "qapi/qapi-events-net.h" 36 #include "hw/qdev-properties.h" 37 #include "qapi/qapi-types-migration.h" 38 #include "qapi/qapi-events-migration.h" 39 #include "hw/virtio/virtio-access.h" 40 #include "migration/misc.h" 41 #include "standard-headers/linux/ethtool.h" 42 #include "sysemu/sysemu.h" 43 #include "trace.h" 44 #include "monitor/qdev.h" 45 #include "hw/pci/pci_device.h" 46 #include "net_rx_pkt.h" 47 #include "hw/virtio/vhost.h" 48 #include "sysemu/qtest.h" 49 50 #define VIRTIO_NET_VM_VERSION 11 51 52 /* previously fixed value */ 53 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 54 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 55 56 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */ 57 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 58 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 59 60 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ 61 62 #define VIRTIO_NET_TCP_FLAG 0x3F 63 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000 64 65 /* IPv4 max payload, 16 bits in the header */ 66 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header)) 67 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535 68 69 /* header length value in ip header without option */ 70 #define VIRTIO_NET_IP4_HEADER_LENGTH 5 71 72 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ 73 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD 74 75 /* Purge coalesced packets timer interval, This value affects the performance 76 a lot, and should be tuned carefully, '300000'(300us) is the recommended 77 value to pass the WHQL test, '50000' can gain 2x netperf throughput with 78 tso/gso/gro 'off'. */ 79 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000 80 81 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \ 82 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \ 83 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \ 84 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \ 85 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \ 86 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \ 87 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \ 88 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \ 89 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) 90 91 static const VirtIOFeature feature_sizes[] = { 92 {.flags = 1ULL << VIRTIO_NET_F_MAC, 93 .end = endof(struct virtio_net_config, mac)}, 94 {.flags = 1ULL << VIRTIO_NET_F_STATUS, 95 .end = endof(struct virtio_net_config, status)}, 96 {.flags = 1ULL << VIRTIO_NET_F_MQ, 97 .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, 98 {.flags = 1ULL << VIRTIO_NET_F_MTU, 99 .end = endof(struct virtio_net_config, mtu)}, 100 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, 101 .end = endof(struct virtio_net_config, duplex)}, 102 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT), 103 .end = endof(struct virtio_net_config, supported_hash_types)}, 104 {} 105 }; 106 107 static const VirtIOConfigSizeParams cfg_size_params = { 108 .min_size = endof(struct virtio_net_config, mac), 109 .max_size = sizeof(struct virtio_net_config), 110 .feature_sizes = feature_sizes 111 }; 112 113 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc) 114 { 115 VirtIONet *n = qemu_get_nic_opaque(nc); 116 117 return &n->vqs[nc->queue_index]; 118 } 119 120 static int vq2q(int queue_index) 121 { 122 return queue_index / 2; 123 } 124 125 static void flush_or_purge_queued_packets(NetClientState *nc) 126 { 127 if (!nc->peer) { 128 return; 129 } 130 131 qemu_flush_or_purge_queued_packets(nc->peer, true); 132 assert(!virtio_net_get_subqueue(nc)->async_tx.elem); 133 } 134 135 /* TODO 136 * - we could suppress RX interrupt if we were so inclined. 137 */ 138 139 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) 140 { 141 VirtIONet *n = VIRTIO_NET(vdev); 142 struct virtio_net_config netcfg; 143 NetClientState *nc = qemu_get_queue(n->nic); 144 static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } }; 145 146 int ret = 0; 147 memset(&netcfg, 0 , sizeof(struct virtio_net_config)); 148 virtio_stw_p(vdev, &netcfg.status, n->status); 149 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs); 150 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); 151 memcpy(netcfg.mac, n->mac, ETH_ALEN); 152 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); 153 netcfg.duplex = n->net_conf.duplex; 154 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE; 155 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length, 156 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ? 157 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1); 158 virtio_stl_p(vdev, &netcfg.supported_hash_types, 159 VIRTIO_NET_RSS_SUPPORTED_HASHES); 160 memcpy(config, &netcfg, n->config_size); 161 162 /* 163 * Is this VDPA? No peer means not VDPA: there's no way to 164 * disconnect/reconnect a VDPA peer. 165 */ 166 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 167 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg, 168 n->config_size); 169 if (ret == -1) { 170 return; 171 } 172 173 /* 174 * Some NIC/kernel combinations present 0 as the mac address. As that 175 * is not a legal address, try to proceed with the address from the 176 * QEMU command line in the hope that the address has been configured 177 * correctly elsewhere - just not reported by the device. 178 */ 179 if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) { 180 info_report("Zero hardware mac address detected. Ignoring."); 181 memcpy(netcfg.mac, n->mac, ETH_ALEN); 182 } 183 184 netcfg.status |= virtio_tswap16(vdev, 185 n->status & VIRTIO_NET_S_ANNOUNCE); 186 memcpy(config, &netcfg, n->config_size); 187 } 188 } 189 190 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) 191 { 192 VirtIONet *n = VIRTIO_NET(vdev); 193 struct virtio_net_config netcfg = {}; 194 NetClientState *nc = qemu_get_queue(n->nic); 195 196 memcpy(&netcfg, config, n->config_size); 197 198 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) && 199 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) && 200 memcmp(netcfg.mac, n->mac, ETH_ALEN)) { 201 memcpy(n->mac, netcfg.mac, ETH_ALEN); 202 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 203 } 204 205 /* 206 * Is this VDPA? No peer means not VDPA: there's no way to 207 * disconnect/reconnect a VDPA peer. 208 */ 209 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 210 vhost_net_set_config(get_vhost_net(nc->peer), 211 (uint8_t *)&netcfg, 0, n->config_size, 212 VHOST_SET_CONFIG_TYPE_FRONTEND); 213 } 214 } 215 216 static bool virtio_net_started(VirtIONet *n, uint8_t status) 217 { 218 VirtIODevice *vdev = VIRTIO_DEVICE(n); 219 return (status & VIRTIO_CONFIG_S_DRIVER_OK) && 220 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running; 221 } 222 223 static void virtio_net_announce_notify(VirtIONet *net) 224 { 225 VirtIODevice *vdev = VIRTIO_DEVICE(net); 226 trace_virtio_net_announce_notify(); 227 228 net->status |= VIRTIO_NET_S_ANNOUNCE; 229 virtio_notify_config(vdev); 230 } 231 232 static void virtio_net_announce_timer(void *opaque) 233 { 234 VirtIONet *n = opaque; 235 trace_virtio_net_announce_timer(n->announce_timer.round); 236 237 n->announce_timer.round--; 238 virtio_net_announce_notify(n); 239 } 240 241 static void virtio_net_announce(NetClientState *nc) 242 { 243 VirtIONet *n = qemu_get_nic_opaque(nc); 244 VirtIODevice *vdev = VIRTIO_DEVICE(n); 245 246 /* 247 * Make sure the virtio migration announcement timer isn't running 248 * If it is, let it trigger announcement so that we do not cause 249 * confusion. 250 */ 251 if (n->announce_timer.round) { 252 return; 253 } 254 255 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 256 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 257 virtio_net_announce_notify(n); 258 } 259 } 260 261 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) 262 { 263 VirtIODevice *vdev = VIRTIO_DEVICE(n); 264 NetClientState *nc = qemu_get_queue(n->nic); 265 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 266 int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? 267 n->max_ncs - n->max_queue_pairs : 0; 268 269 if (!get_vhost_net(nc->peer)) { 270 return; 271 } 272 273 if ((virtio_net_started(n, status) && !nc->peer->link_down) == 274 !!n->vhost_started) { 275 return; 276 } 277 if (!n->vhost_started) { 278 int r, i; 279 280 if (n->needs_vnet_hdr_swap) { 281 error_report("backend does not support %s vnet headers; " 282 "falling back on userspace virtio", 283 virtio_is_big_endian(vdev) ? "BE" : "LE"); 284 return; 285 } 286 287 /* Any packets outstanding? Purge them to avoid touching rings 288 * when vhost is running. 289 */ 290 for (i = 0; i < queue_pairs; i++) { 291 NetClientState *qnc = qemu_get_subqueue(n->nic, i); 292 293 /* Purge both directions: TX and RX. */ 294 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc); 295 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer); 296 } 297 298 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) { 299 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu); 300 if (r < 0) { 301 error_report("%uBytes MTU not supported by the backend", 302 n->net_conf.mtu); 303 304 return; 305 } 306 } 307 308 n->vhost_started = 1; 309 r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq); 310 if (r < 0) { 311 error_report("unable to start vhost net: %d: " 312 "falling back on userspace virtio", -r); 313 n->vhost_started = 0; 314 } 315 } else { 316 vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq); 317 n->vhost_started = 0; 318 } 319 } 320 321 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev, 322 NetClientState *peer, 323 bool enable) 324 { 325 if (virtio_is_big_endian(vdev)) { 326 return qemu_set_vnet_be(peer, enable); 327 } else { 328 return qemu_set_vnet_le(peer, enable); 329 } 330 } 331 332 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs, 333 int queue_pairs, bool enable) 334 { 335 int i; 336 337 for (i = 0; i < queue_pairs; i++) { 338 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 && 339 enable) { 340 while (--i >= 0) { 341 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false); 342 } 343 344 return true; 345 } 346 } 347 348 return false; 349 } 350 351 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status) 352 { 353 VirtIODevice *vdev = VIRTIO_DEVICE(n); 354 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 355 356 if (virtio_net_started(n, status)) { 357 /* Before using the device, we tell the network backend about the 358 * endianness to use when parsing vnet headers. If the backend 359 * can't do it, we fallback onto fixing the headers in the core 360 * virtio-net code. 361 */ 362 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs, 363 queue_pairs, true); 364 } else if (virtio_net_started(n, vdev->status)) { 365 /* After using the device, we need to reset the network backend to 366 * the default (guest native endianness), otherwise the guest may 367 * lose network connectivity if it is rebooted into a different 368 * endianness. 369 */ 370 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false); 371 } 372 } 373 374 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq) 375 { 376 unsigned int dropped = virtqueue_drop_all(vq); 377 if (dropped) { 378 virtio_notify(vdev, vq); 379 } 380 } 381 382 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) 383 { 384 VirtIONet *n = VIRTIO_NET(vdev); 385 VirtIONetQueue *q; 386 int i; 387 uint8_t queue_status; 388 389 virtio_net_vnet_endian_status(n, status); 390 virtio_net_vhost_status(n, status); 391 392 for (i = 0; i < n->max_queue_pairs; i++) { 393 NetClientState *ncs = qemu_get_subqueue(n->nic, i); 394 bool queue_started; 395 q = &n->vqs[i]; 396 397 if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) { 398 queue_status = 0; 399 } else { 400 queue_status = status; 401 } 402 queue_started = 403 virtio_net_started(n, queue_status) && !n->vhost_started; 404 405 if (queue_started) { 406 qemu_flush_queued_packets(ncs); 407 } 408 409 if (!q->tx_waiting) { 410 continue; 411 } 412 413 if (queue_started) { 414 if (q->tx_timer) { 415 timer_mod(q->tx_timer, 416 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 417 } else { 418 qemu_bh_schedule(q->tx_bh); 419 } 420 } else { 421 if (q->tx_timer) { 422 timer_del(q->tx_timer); 423 } else { 424 qemu_bh_cancel(q->tx_bh); 425 } 426 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 && 427 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) && 428 vdev->vm_running) { 429 /* if tx is waiting we are likely have some packets in tx queue 430 * and disabled notification */ 431 q->tx_waiting = 0; 432 virtio_queue_set_notification(q->tx_vq, 1); 433 virtio_net_drop_tx_queue_data(vdev, q->tx_vq); 434 } 435 } 436 } 437 } 438 439 static void virtio_net_set_link_status(NetClientState *nc) 440 { 441 VirtIONet *n = qemu_get_nic_opaque(nc); 442 VirtIODevice *vdev = VIRTIO_DEVICE(n); 443 uint16_t old_status = n->status; 444 445 if (nc->link_down) 446 n->status &= ~VIRTIO_NET_S_LINK_UP; 447 else 448 n->status |= VIRTIO_NET_S_LINK_UP; 449 450 if (n->status != old_status) 451 virtio_notify_config(vdev); 452 453 virtio_net_set_status(vdev, vdev->status); 454 } 455 456 static void rxfilter_notify(NetClientState *nc) 457 { 458 VirtIONet *n = qemu_get_nic_opaque(nc); 459 460 if (nc->rxfilter_notify_enabled) { 461 char *path = object_get_canonical_path(OBJECT(n->qdev)); 462 qapi_event_send_nic_rx_filter_changed(n->netclient_name, path); 463 g_free(path); 464 465 /* disable event notification to avoid events flooding */ 466 nc->rxfilter_notify_enabled = 0; 467 } 468 } 469 470 static intList *get_vlan_table(VirtIONet *n) 471 { 472 intList *list; 473 int i, j; 474 475 list = NULL; 476 for (i = 0; i < MAX_VLAN >> 5; i++) { 477 for (j = 0; n->vlans[i] && j <= 0x1f; j++) { 478 if (n->vlans[i] & (1U << j)) { 479 QAPI_LIST_PREPEND(list, (i << 5) + j); 480 } 481 } 482 } 483 484 return list; 485 } 486 487 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc) 488 { 489 VirtIONet *n = qemu_get_nic_opaque(nc); 490 VirtIODevice *vdev = VIRTIO_DEVICE(n); 491 RxFilterInfo *info; 492 strList *str_list; 493 int i; 494 495 info = g_malloc0(sizeof(*info)); 496 info->name = g_strdup(nc->name); 497 info->promiscuous = n->promisc; 498 499 if (n->nouni) { 500 info->unicast = RX_STATE_NONE; 501 } else if (n->alluni) { 502 info->unicast = RX_STATE_ALL; 503 } else { 504 info->unicast = RX_STATE_NORMAL; 505 } 506 507 if (n->nomulti) { 508 info->multicast = RX_STATE_NONE; 509 } else if (n->allmulti) { 510 info->multicast = RX_STATE_ALL; 511 } else { 512 info->multicast = RX_STATE_NORMAL; 513 } 514 515 info->broadcast_allowed = n->nobcast; 516 info->multicast_overflow = n->mac_table.multi_overflow; 517 info->unicast_overflow = n->mac_table.uni_overflow; 518 519 info->main_mac = qemu_mac_strdup_printf(n->mac); 520 521 str_list = NULL; 522 for (i = 0; i < n->mac_table.first_multi; i++) { 523 QAPI_LIST_PREPEND(str_list, 524 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 525 } 526 info->unicast_table = str_list; 527 528 str_list = NULL; 529 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 530 QAPI_LIST_PREPEND(str_list, 531 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 532 } 533 info->multicast_table = str_list; 534 info->vlan_table = get_vlan_table(n); 535 536 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) { 537 info->vlan = RX_STATE_ALL; 538 } else if (!info->vlan_table) { 539 info->vlan = RX_STATE_NONE; 540 } else { 541 info->vlan = RX_STATE_NORMAL; 542 } 543 544 /* enable event notification after query */ 545 nc->rxfilter_notify_enabled = 1; 546 547 return info; 548 } 549 550 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index) 551 { 552 VirtIONet *n = VIRTIO_NET(vdev); 553 NetClientState *nc; 554 555 /* validate queue_index and skip for cvq */ 556 if (queue_index >= n->max_queue_pairs * 2) { 557 return; 558 } 559 560 nc = qemu_get_subqueue(n->nic, vq2q(queue_index)); 561 562 if (!nc->peer) { 563 return; 564 } 565 566 if (get_vhost_net(nc->peer) && 567 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) { 568 vhost_net_virtqueue_reset(vdev, nc, queue_index); 569 } 570 571 flush_or_purge_queued_packets(nc); 572 } 573 574 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index) 575 { 576 VirtIONet *n = VIRTIO_NET(vdev); 577 NetClientState *nc; 578 int r; 579 580 /* validate queue_index and skip for cvq */ 581 if (queue_index >= n->max_queue_pairs * 2) { 582 return; 583 } 584 585 nc = qemu_get_subqueue(n->nic, vq2q(queue_index)); 586 587 if (!nc->peer || !vdev->vhost_started) { 588 return; 589 } 590 591 if (get_vhost_net(nc->peer) && 592 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) { 593 r = vhost_net_virtqueue_restart(vdev, nc, queue_index); 594 if (r < 0) { 595 error_report("unable to restart vhost net virtqueue: %d, " 596 "when resetting the queue", queue_index); 597 } 598 } 599 } 600 601 static void virtio_net_reset(VirtIODevice *vdev) 602 { 603 VirtIONet *n = VIRTIO_NET(vdev); 604 int i; 605 606 /* Reset back to compatibility mode */ 607 n->promisc = 1; 608 n->allmulti = 0; 609 n->alluni = 0; 610 n->nomulti = 0; 611 n->nouni = 0; 612 n->nobcast = 0; 613 /* multiqueue is disabled by default */ 614 n->curr_queue_pairs = 1; 615 timer_del(n->announce_timer.tm); 616 n->announce_timer.round = 0; 617 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 618 619 /* Flush any MAC and VLAN filter table state */ 620 n->mac_table.in_use = 0; 621 n->mac_table.first_multi = 0; 622 n->mac_table.multi_overflow = 0; 623 n->mac_table.uni_overflow = 0; 624 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN); 625 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac)); 626 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 627 memset(n->vlans, 0, MAX_VLAN >> 3); 628 629 /* Flush any async TX */ 630 for (i = 0; i < n->max_queue_pairs; i++) { 631 flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i)); 632 } 633 } 634 635 static void peer_test_vnet_hdr(VirtIONet *n) 636 { 637 NetClientState *nc = qemu_get_queue(n->nic); 638 if (!nc->peer) { 639 return; 640 } 641 642 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer); 643 } 644 645 static int peer_has_vnet_hdr(VirtIONet *n) 646 { 647 return n->has_vnet_hdr; 648 } 649 650 static int peer_has_ufo(VirtIONet *n) 651 { 652 if (!peer_has_vnet_hdr(n)) 653 return 0; 654 655 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer); 656 657 return n->has_ufo; 658 } 659 660 static int peer_has_uso(VirtIONet *n) 661 { 662 if (!peer_has_vnet_hdr(n)) { 663 return 0; 664 } 665 666 return qemu_has_uso(qemu_get_queue(n->nic)->peer); 667 } 668 669 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, 670 int version_1, int hash_report) 671 { 672 int i; 673 NetClientState *nc; 674 675 n->mergeable_rx_bufs = mergeable_rx_bufs; 676 677 /* 678 * Note: when extending the vnet header, please make sure to 679 * change the vnet header copying logic in virtio_net_flush_tx() 680 * as well. 681 */ 682 if (version_1) { 683 n->guest_hdr_len = hash_report ? 684 sizeof(struct virtio_net_hdr_v1_hash) : 685 sizeof(struct virtio_net_hdr_mrg_rxbuf); 686 n->rss_data.populate_hash = !!hash_report; 687 } else { 688 n->guest_hdr_len = n->mergeable_rx_bufs ? 689 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 690 sizeof(struct virtio_net_hdr); 691 } 692 693 for (i = 0; i < n->max_queue_pairs; i++) { 694 nc = qemu_get_subqueue(n->nic, i); 695 696 if (peer_has_vnet_hdr(n) && 697 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) { 698 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len); 699 n->host_hdr_len = n->guest_hdr_len; 700 } 701 } 702 } 703 704 static int virtio_net_max_tx_queue_size(VirtIONet *n) 705 { 706 NetClientState *peer = n->nic_conf.peers.ncs[0]; 707 708 /* 709 * Backends other than vhost-user or vhost-vdpa don't support max queue 710 * size. 711 */ 712 if (!peer) { 713 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 714 } 715 716 switch(peer->info->type) { 717 case NET_CLIENT_DRIVER_VHOST_USER: 718 case NET_CLIENT_DRIVER_VHOST_VDPA: 719 return VIRTQUEUE_MAX_SIZE; 720 default: 721 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 722 }; 723 } 724 725 static int peer_attach(VirtIONet *n, int index) 726 { 727 NetClientState *nc = qemu_get_subqueue(n->nic, index); 728 729 if (!nc->peer) { 730 return 0; 731 } 732 733 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 734 vhost_set_vring_enable(nc->peer, 1); 735 } 736 737 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 738 return 0; 739 } 740 741 if (n->max_queue_pairs == 1) { 742 return 0; 743 } 744 745 return tap_enable(nc->peer); 746 } 747 748 static int peer_detach(VirtIONet *n, int index) 749 { 750 NetClientState *nc = qemu_get_subqueue(n->nic, index); 751 752 if (!nc->peer) { 753 return 0; 754 } 755 756 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 757 vhost_set_vring_enable(nc->peer, 0); 758 } 759 760 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 761 return 0; 762 } 763 764 return tap_disable(nc->peer); 765 } 766 767 static void virtio_net_set_queue_pairs(VirtIONet *n) 768 { 769 int i; 770 int r; 771 772 if (n->nic->peer_deleted) { 773 return; 774 } 775 776 for (i = 0; i < n->max_queue_pairs; i++) { 777 if (i < n->curr_queue_pairs) { 778 r = peer_attach(n, i); 779 assert(!r); 780 } else { 781 r = peer_detach(n, i); 782 assert(!r); 783 } 784 } 785 } 786 787 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue); 788 789 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, 790 Error **errp) 791 { 792 VirtIONet *n = VIRTIO_NET(vdev); 793 NetClientState *nc = qemu_get_queue(n->nic); 794 795 /* Firstly sync all virtio-net possible supported features */ 796 features |= n->host_features; 797 798 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 799 800 if (!peer_has_vnet_hdr(n)) { 801 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); 802 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); 803 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); 804 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); 805 806 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); 807 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); 808 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); 809 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); 810 811 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); 812 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); 813 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); 814 815 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); 816 } 817 818 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { 819 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); 820 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); 821 } 822 823 if (!peer_has_uso(n)) { 824 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); 825 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); 826 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); 827 } 828 829 if (!get_vhost_net(nc->peer)) { 830 return features; 831 } 832 833 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 834 virtio_clear_feature(&features, VIRTIO_NET_F_RSS); 835 } 836 features = vhost_net_get_features(get_vhost_net(nc->peer), features); 837 vdev->backend_features = features; 838 839 if (n->mtu_bypass_backend && 840 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { 841 features |= (1ULL << VIRTIO_NET_F_MTU); 842 } 843 844 /* 845 * Since GUEST_ANNOUNCE is emulated the feature bit could be set without 846 * enabled. This happens in the vDPA case. 847 * 848 * Make sure the feature set is not incoherent, as the driver could refuse 849 * to start. 850 * 851 * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes, 852 * helping guest to notify the new location with vDPA devices that does not 853 * support it. 854 */ 855 if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) { 856 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE); 857 } 858 859 return features; 860 } 861 862 static uint64_t virtio_net_bad_features(VirtIODevice *vdev) 863 { 864 uint64_t features = 0; 865 866 /* Linux kernel 2.6.25. It understood MAC (as everyone must), 867 * but also these: */ 868 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 869 virtio_add_feature(&features, VIRTIO_NET_F_CSUM); 870 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4); 871 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6); 872 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN); 873 874 return features; 875 } 876 877 static void virtio_net_apply_guest_offloads(VirtIONet *n) 878 { 879 qemu_set_offload(qemu_get_queue(n->nic)->peer, 880 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)), 881 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)), 882 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)), 883 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)), 884 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)), 885 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)), 886 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6))); 887 } 888 889 static uint64_t virtio_net_guest_offloads_by_features(uint64_t features) 890 { 891 static const uint64_t guest_offloads_mask = 892 (1ULL << VIRTIO_NET_F_GUEST_CSUM) | 893 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | 894 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | 895 (1ULL << VIRTIO_NET_F_GUEST_ECN) | 896 (1ULL << VIRTIO_NET_F_GUEST_UFO) | 897 (1ULL << VIRTIO_NET_F_GUEST_USO4) | 898 (1ULL << VIRTIO_NET_F_GUEST_USO6); 899 900 return guest_offloads_mask & features; 901 } 902 903 uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n) 904 { 905 VirtIODevice *vdev = VIRTIO_DEVICE(n); 906 return virtio_net_guest_offloads_by_features(vdev->guest_features); 907 } 908 909 typedef struct { 910 VirtIONet *n; 911 DeviceState *dev; 912 } FailoverDevice; 913 914 /** 915 * Set the failover primary device 916 * 917 * @opaque: FailoverId to setup 918 * @opts: opts for device we are handling 919 * @errp: returns an error if this function fails 920 */ 921 static int failover_set_primary(DeviceState *dev, void *opaque) 922 { 923 FailoverDevice *fdev = opaque; 924 PCIDevice *pci_dev = (PCIDevice *) 925 object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE); 926 927 if (!pci_dev) { 928 return 0; 929 } 930 931 if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) { 932 fdev->dev = dev; 933 return 1; 934 } 935 936 return 0; 937 } 938 939 /** 940 * Find the primary device for this failover virtio-net 941 * 942 * @n: VirtIONet device 943 * @errp: returns an error if this function fails 944 */ 945 static DeviceState *failover_find_primary_device(VirtIONet *n) 946 { 947 FailoverDevice fdev = { 948 .n = n, 949 }; 950 951 qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL, 952 NULL, NULL, &fdev); 953 return fdev.dev; 954 } 955 956 static void failover_add_primary(VirtIONet *n, Error **errp) 957 { 958 Error *err = NULL; 959 DeviceState *dev = failover_find_primary_device(n); 960 961 if (dev) { 962 return; 963 } 964 965 if (!n->primary_opts) { 966 error_setg(errp, "Primary device not found"); 967 error_append_hint(errp, "Virtio-net failover will not work. Make " 968 "sure primary device has parameter" 969 " failover_pair_id=%s\n", n->netclient_name); 970 return; 971 } 972 973 dev = qdev_device_add_from_qdict(n->primary_opts, 974 n->primary_opts_from_json, 975 &err); 976 if (err) { 977 qobject_unref(n->primary_opts); 978 n->primary_opts = NULL; 979 } else { 980 object_unref(OBJECT(dev)); 981 } 982 error_propagate(errp, err); 983 } 984 985 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) 986 { 987 VirtIONet *n = VIRTIO_NET(vdev); 988 Error *err = NULL; 989 int i; 990 991 if (n->mtu_bypass_backend && 992 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) { 993 features &= ~(1ULL << VIRTIO_NET_F_MTU); 994 } 995 996 virtio_net_set_multiqueue(n, 997 virtio_has_feature(features, VIRTIO_NET_F_RSS) || 998 virtio_has_feature(features, VIRTIO_NET_F_MQ)); 999 1000 virtio_net_set_mrg_rx_bufs(n, 1001 virtio_has_feature(features, 1002 VIRTIO_NET_F_MRG_RXBUF), 1003 virtio_has_feature(features, 1004 VIRTIO_F_VERSION_1), 1005 virtio_has_feature(features, 1006 VIRTIO_NET_F_HASH_REPORT)); 1007 1008 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 1009 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4); 1010 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 1011 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6); 1012 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS); 1013 1014 if (n->has_vnet_hdr) { 1015 n->curr_guest_offloads = 1016 virtio_net_guest_offloads_by_features(features); 1017 virtio_net_apply_guest_offloads(n); 1018 } 1019 1020 for (i = 0; i < n->max_queue_pairs; i++) { 1021 NetClientState *nc = qemu_get_subqueue(n->nic, i); 1022 1023 if (!get_vhost_net(nc->peer)) { 1024 continue; 1025 } 1026 vhost_net_ack_features(get_vhost_net(nc->peer), features); 1027 1028 /* 1029 * keep acked_features in NetVhostUserState up-to-date so it 1030 * can't miss any features configured by guest virtio driver. 1031 */ 1032 vhost_net_save_acked_features(nc->peer); 1033 } 1034 1035 if (!virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { 1036 memset(n->vlans, 0xff, MAX_VLAN >> 3); 1037 } 1038 1039 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) { 1040 qapi_event_send_failover_negotiated(n->netclient_name); 1041 qatomic_set(&n->failover_primary_hidden, false); 1042 failover_add_primary(n, &err); 1043 if (err) { 1044 if (!qtest_enabled()) { 1045 warn_report_err(err); 1046 } else { 1047 error_free(err); 1048 } 1049 } 1050 } 1051 } 1052 1053 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd, 1054 struct iovec *iov, unsigned int iov_cnt) 1055 { 1056 uint8_t on; 1057 size_t s; 1058 NetClientState *nc = qemu_get_queue(n->nic); 1059 1060 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on)); 1061 if (s != sizeof(on)) { 1062 return VIRTIO_NET_ERR; 1063 } 1064 1065 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) { 1066 n->promisc = on; 1067 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) { 1068 n->allmulti = on; 1069 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) { 1070 n->alluni = on; 1071 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) { 1072 n->nomulti = on; 1073 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) { 1074 n->nouni = on; 1075 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) { 1076 n->nobcast = on; 1077 } else { 1078 return VIRTIO_NET_ERR; 1079 } 1080 1081 rxfilter_notify(nc); 1082 1083 return VIRTIO_NET_OK; 1084 } 1085 1086 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd, 1087 struct iovec *iov, unsigned int iov_cnt) 1088 { 1089 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1090 uint64_t offloads; 1091 size_t s; 1092 1093 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 1094 return VIRTIO_NET_ERR; 1095 } 1096 1097 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads)); 1098 if (s != sizeof(offloads)) { 1099 return VIRTIO_NET_ERR; 1100 } 1101 1102 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) { 1103 uint64_t supported_offloads; 1104 1105 offloads = virtio_ldq_p(vdev, &offloads); 1106 1107 if (!n->has_vnet_hdr) { 1108 return VIRTIO_NET_ERR; 1109 } 1110 1111 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1112 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4); 1113 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1114 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6); 1115 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT); 1116 1117 supported_offloads = virtio_net_supported_guest_offloads(n); 1118 if (offloads & ~supported_offloads) { 1119 return VIRTIO_NET_ERR; 1120 } 1121 1122 n->curr_guest_offloads = offloads; 1123 virtio_net_apply_guest_offloads(n); 1124 1125 return VIRTIO_NET_OK; 1126 } else { 1127 return VIRTIO_NET_ERR; 1128 } 1129 } 1130 1131 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd, 1132 struct iovec *iov, unsigned int iov_cnt) 1133 { 1134 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1135 struct virtio_net_ctrl_mac mac_data; 1136 size_t s; 1137 NetClientState *nc = qemu_get_queue(n->nic); 1138 1139 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) { 1140 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) { 1141 return VIRTIO_NET_ERR; 1142 } 1143 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac)); 1144 assert(s == sizeof(n->mac)); 1145 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 1146 rxfilter_notify(nc); 1147 1148 return VIRTIO_NET_OK; 1149 } 1150 1151 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) { 1152 return VIRTIO_NET_ERR; 1153 } 1154 1155 int in_use = 0; 1156 int first_multi = 0; 1157 uint8_t uni_overflow = 0; 1158 uint8_t multi_overflow = 0; 1159 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 1160 1161 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1162 sizeof(mac_data.entries)); 1163 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1164 if (s != sizeof(mac_data.entries)) { 1165 goto error; 1166 } 1167 iov_discard_front(&iov, &iov_cnt, s); 1168 1169 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) { 1170 goto error; 1171 } 1172 1173 if (mac_data.entries <= MAC_TABLE_ENTRIES) { 1174 s = iov_to_buf(iov, iov_cnt, 0, macs, 1175 mac_data.entries * ETH_ALEN); 1176 if (s != mac_data.entries * ETH_ALEN) { 1177 goto error; 1178 } 1179 in_use += mac_data.entries; 1180 } else { 1181 uni_overflow = 1; 1182 } 1183 1184 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN); 1185 1186 first_multi = in_use; 1187 1188 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1189 sizeof(mac_data.entries)); 1190 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1191 if (s != sizeof(mac_data.entries)) { 1192 goto error; 1193 } 1194 1195 iov_discard_front(&iov, &iov_cnt, s); 1196 1197 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) { 1198 goto error; 1199 } 1200 1201 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) { 1202 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN], 1203 mac_data.entries * ETH_ALEN); 1204 if (s != mac_data.entries * ETH_ALEN) { 1205 goto error; 1206 } 1207 in_use += mac_data.entries; 1208 } else { 1209 multi_overflow = 1; 1210 } 1211 1212 n->mac_table.in_use = in_use; 1213 n->mac_table.first_multi = first_multi; 1214 n->mac_table.uni_overflow = uni_overflow; 1215 n->mac_table.multi_overflow = multi_overflow; 1216 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN); 1217 g_free(macs); 1218 rxfilter_notify(nc); 1219 1220 return VIRTIO_NET_OK; 1221 1222 error: 1223 g_free(macs); 1224 return VIRTIO_NET_ERR; 1225 } 1226 1227 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd, 1228 struct iovec *iov, unsigned int iov_cnt) 1229 { 1230 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1231 uint16_t vid; 1232 size_t s; 1233 NetClientState *nc = qemu_get_queue(n->nic); 1234 1235 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid)); 1236 vid = virtio_lduw_p(vdev, &vid); 1237 if (s != sizeof(vid)) { 1238 return VIRTIO_NET_ERR; 1239 } 1240 1241 if (vid >= MAX_VLAN) 1242 return VIRTIO_NET_ERR; 1243 1244 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD) 1245 n->vlans[vid >> 5] |= (1U << (vid & 0x1f)); 1246 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL) 1247 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f)); 1248 else 1249 return VIRTIO_NET_ERR; 1250 1251 rxfilter_notify(nc); 1252 1253 return VIRTIO_NET_OK; 1254 } 1255 1256 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, 1257 struct iovec *iov, unsigned int iov_cnt) 1258 { 1259 trace_virtio_net_handle_announce(n->announce_timer.round); 1260 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK && 1261 n->status & VIRTIO_NET_S_ANNOUNCE) { 1262 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 1263 if (n->announce_timer.round) { 1264 qemu_announce_timer_step(&n->announce_timer); 1265 } 1266 return VIRTIO_NET_OK; 1267 } else { 1268 return VIRTIO_NET_ERR; 1269 } 1270 } 1271 1272 static void virtio_net_detach_epbf_rss(VirtIONet *n); 1273 1274 static void virtio_net_disable_rss(VirtIONet *n) 1275 { 1276 if (n->rss_data.enabled) { 1277 trace_virtio_net_rss_disable(); 1278 } 1279 n->rss_data.enabled = false; 1280 1281 virtio_net_detach_epbf_rss(n); 1282 } 1283 1284 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd) 1285 { 1286 NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0); 1287 if (nc == NULL || nc->info->set_steering_ebpf == NULL) { 1288 return false; 1289 } 1290 1291 return nc->info->set_steering_ebpf(nc, prog_fd); 1292 } 1293 1294 static void rss_data_to_rss_config(struct VirtioNetRssData *data, 1295 struct EBPFRSSConfig *config) 1296 { 1297 config->redirect = data->redirect; 1298 config->populate_hash = data->populate_hash; 1299 config->hash_types = data->hash_types; 1300 config->indirections_len = data->indirections_len; 1301 config->default_queue = data->default_queue; 1302 } 1303 1304 static bool virtio_net_attach_epbf_rss(VirtIONet *n) 1305 { 1306 struct EBPFRSSConfig config = {}; 1307 1308 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 1309 return false; 1310 } 1311 1312 rss_data_to_rss_config(&n->rss_data, &config); 1313 1314 if (!ebpf_rss_set_all(&n->ebpf_rss, &config, 1315 n->rss_data.indirections_table, n->rss_data.key)) { 1316 return false; 1317 } 1318 1319 if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) { 1320 return false; 1321 } 1322 1323 return true; 1324 } 1325 1326 static void virtio_net_detach_epbf_rss(VirtIONet *n) 1327 { 1328 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1329 } 1330 1331 static bool virtio_net_load_ebpf(VirtIONet *n) 1332 { 1333 if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) { 1334 /* backend doesn't support steering ebpf */ 1335 return false; 1336 } 1337 1338 return ebpf_rss_load(&n->ebpf_rss); 1339 } 1340 1341 static void virtio_net_unload_ebpf(VirtIONet *n) 1342 { 1343 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1344 ebpf_rss_unload(&n->ebpf_rss); 1345 } 1346 1347 static uint16_t virtio_net_handle_rss(VirtIONet *n, 1348 struct iovec *iov, 1349 unsigned int iov_cnt, 1350 bool do_rss) 1351 { 1352 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1353 struct virtio_net_rss_config cfg; 1354 size_t s, offset = 0, size_get; 1355 uint16_t queue_pairs, i; 1356 struct { 1357 uint16_t us; 1358 uint8_t b; 1359 } QEMU_PACKED temp; 1360 const char *err_msg = ""; 1361 uint32_t err_value = 0; 1362 1363 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) { 1364 err_msg = "RSS is not negotiated"; 1365 goto error; 1366 } 1367 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) { 1368 err_msg = "Hash report is not negotiated"; 1369 goto error; 1370 } 1371 size_get = offsetof(struct virtio_net_rss_config, indirection_table); 1372 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get); 1373 if (s != size_get) { 1374 err_msg = "Short command buffer"; 1375 err_value = (uint32_t)s; 1376 goto error; 1377 } 1378 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types); 1379 n->rss_data.indirections_len = 1380 virtio_lduw_p(vdev, &cfg.indirection_table_mask); 1381 n->rss_data.indirections_len++; 1382 if (!do_rss) { 1383 n->rss_data.indirections_len = 1; 1384 } 1385 if (!is_power_of_2(n->rss_data.indirections_len)) { 1386 err_msg = "Invalid size of indirection table"; 1387 err_value = n->rss_data.indirections_len; 1388 goto error; 1389 } 1390 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) { 1391 err_msg = "Too large indirection table"; 1392 err_value = n->rss_data.indirections_len; 1393 goto error; 1394 } 1395 n->rss_data.default_queue = do_rss ? 1396 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0; 1397 if (n->rss_data.default_queue >= n->max_queue_pairs) { 1398 err_msg = "Invalid default queue"; 1399 err_value = n->rss_data.default_queue; 1400 goto error; 1401 } 1402 offset += size_get; 1403 size_get = sizeof(uint16_t) * n->rss_data.indirections_len; 1404 g_free(n->rss_data.indirections_table); 1405 n->rss_data.indirections_table = g_malloc(size_get); 1406 if (!n->rss_data.indirections_table) { 1407 err_msg = "Can't allocate indirections table"; 1408 err_value = n->rss_data.indirections_len; 1409 goto error; 1410 } 1411 s = iov_to_buf(iov, iov_cnt, offset, 1412 n->rss_data.indirections_table, size_get); 1413 if (s != size_get) { 1414 err_msg = "Short indirection table buffer"; 1415 err_value = (uint32_t)s; 1416 goto error; 1417 } 1418 for (i = 0; i < n->rss_data.indirections_len; ++i) { 1419 uint16_t val = n->rss_data.indirections_table[i]; 1420 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val); 1421 } 1422 offset += size_get; 1423 size_get = sizeof(temp); 1424 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get); 1425 if (s != size_get) { 1426 err_msg = "Can't get queue_pairs"; 1427 err_value = (uint32_t)s; 1428 goto error; 1429 } 1430 queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs; 1431 if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) { 1432 err_msg = "Invalid number of queue_pairs"; 1433 err_value = queue_pairs; 1434 goto error; 1435 } 1436 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) { 1437 err_msg = "Invalid key size"; 1438 err_value = temp.b; 1439 goto error; 1440 } 1441 if (!temp.b && n->rss_data.hash_types) { 1442 err_msg = "No key provided"; 1443 err_value = 0; 1444 goto error; 1445 } 1446 if (!temp.b && !n->rss_data.hash_types) { 1447 virtio_net_disable_rss(n); 1448 return queue_pairs; 1449 } 1450 offset += size_get; 1451 size_get = temp.b; 1452 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get); 1453 if (s != size_get) { 1454 err_msg = "Can get key buffer"; 1455 err_value = (uint32_t)s; 1456 goto error; 1457 } 1458 n->rss_data.enabled = true; 1459 1460 if (!n->rss_data.populate_hash) { 1461 if (!virtio_net_attach_epbf_rss(n)) { 1462 /* EBPF must be loaded for vhost */ 1463 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 1464 warn_report("Can't load eBPF RSS for vhost"); 1465 goto error; 1466 } 1467 /* fallback to software RSS */ 1468 warn_report("Can't load eBPF RSS - fallback to software RSS"); 1469 n->rss_data.enabled_software_rss = true; 1470 } 1471 } else { 1472 /* use software RSS for hash populating */ 1473 /* and detach eBPF if was loaded before */ 1474 virtio_net_detach_epbf_rss(n); 1475 n->rss_data.enabled_software_rss = true; 1476 } 1477 1478 trace_virtio_net_rss_enable(n->rss_data.hash_types, 1479 n->rss_data.indirections_len, 1480 temp.b); 1481 return queue_pairs; 1482 error: 1483 trace_virtio_net_rss_error(err_msg, err_value); 1484 virtio_net_disable_rss(n); 1485 return 0; 1486 } 1487 1488 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, 1489 struct iovec *iov, unsigned int iov_cnt) 1490 { 1491 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1492 uint16_t queue_pairs; 1493 NetClientState *nc = qemu_get_queue(n->nic); 1494 1495 virtio_net_disable_rss(n); 1496 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { 1497 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false); 1498 return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR; 1499 } 1500 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) { 1501 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true); 1502 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { 1503 struct virtio_net_ctrl_mq mq; 1504 size_t s; 1505 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) { 1506 return VIRTIO_NET_ERR; 1507 } 1508 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq)); 1509 if (s != sizeof(mq)) { 1510 return VIRTIO_NET_ERR; 1511 } 1512 queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs); 1513 1514 } else { 1515 return VIRTIO_NET_ERR; 1516 } 1517 1518 if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 1519 queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 1520 queue_pairs > n->max_queue_pairs || 1521 !n->multiqueue) { 1522 return VIRTIO_NET_ERR; 1523 } 1524 1525 n->curr_queue_pairs = queue_pairs; 1526 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 1527 /* 1528 * Avoid updating the backend for a vdpa device: We're only interested 1529 * in updating the device model queues. 1530 */ 1531 return VIRTIO_NET_OK; 1532 } 1533 /* stop the backend before changing the number of queue_pairs to avoid handling a 1534 * disabled queue */ 1535 virtio_net_set_status(vdev, vdev->status); 1536 virtio_net_set_queue_pairs(n); 1537 1538 return VIRTIO_NET_OK; 1539 } 1540 1541 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, 1542 const struct iovec *in_sg, unsigned in_num, 1543 const struct iovec *out_sg, 1544 unsigned out_num) 1545 { 1546 VirtIONet *n = VIRTIO_NET(vdev); 1547 struct virtio_net_ctrl_hdr ctrl; 1548 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1549 size_t s; 1550 struct iovec *iov, *iov2; 1551 1552 if (iov_size(in_sg, in_num) < sizeof(status) || 1553 iov_size(out_sg, out_num) < sizeof(ctrl)) { 1554 virtio_error(vdev, "virtio-net ctrl missing headers"); 1555 return 0; 1556 } 1557 1558 iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num); 1559 s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl)); 1560 iov_discard_front(&iov, &out_num, sizeof(ctrl)); 1561 if (s != sizeof(ctrl)) { 1562 status = VIRTIO_NET_ERR; 1563 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { 1564 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num); 1565 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { 1566 status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num); 1567 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { 1568 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num); 1569 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { 1570 status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num); 1571 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { 1572 status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num); 1573 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { 1574 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num); 1575 } 1576 1577 s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status)); 1578 assert(s == sizeof(status)); 1579 1580 g_free(iov2); 1581 return sizeof(status); 1582 } 1583 1584 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) 1585 { 1586 VirtQueueElement *elem; 1587 1588 for (;;) { 1589 size_t written; 1590 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 1591 if (!elem) { 1592 break; 1593 } 1594 1595 written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num, 1596 elem->out_sg, elem->out_num); 1597 if (written > 0) { 1598 virtqueue_push(vq, elem, written); 1599 virtio_notify(vdev, vq); 1600 g_free(elem); 1601 } else { 1602 virtqueue_detach_element(vq, elem, 0); 1603 g_free(elem); 1604 break; 1605 } 1606 } 1607 } 1608 1609 /* RX */ 1610 1611 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) 1612 { 1613 VirtIONet *n = VIRTIO_NET(vdev); 1614 int queue_index = vq2q(virtio_get_queue_index(vq)); 1615 1616 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index)); 1617 } 1618 1619 static bool virtio_net_can_receive(NetClientState *nc) 1620 { 1621 VirtIONet *n = qemu_get_nic_opaque(nc); 1622 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1623 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1624 1625 if (!vdev->vm_running) { 1626 return false; 1627 } 1628 1629 if (nc->queue_index >= n->curr_queue_pairs) { 1630 return false; 1631 } 1632 1633 if (!virtio_queue_ready(q->rx_vq) || 1634 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 1635 return false; 1636 } 1637 1638 return true; 1639 } 1640 1641 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) 1642 { 1643 VirtIONet *n = q->n; 1644 if (virtio_queue_empty(q->rx_vq) || 1645 (n->mergeable_rx_bufs && 1646 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1647 virtio_queue_set_notification(q->rx_vq, 1); 1648 1649 /* To avoid a race condition where the guest has made some buffers 1650 * available after the above check but before notification was 1651 * enabled, check for available buffers again. 1652 */ 1653 if (virtio_queue_empty(q->rx_vq) || 1654 (n->mergeable_rx_bufs && 1655 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1656 return 0; 1657 } 1658 } 1659 1660 virtio_queue_set_notification(q->rx_vq, 0); 1661 return 1; 1662 } 1663 1664 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr) 1665 { 1666 virtio_tswap16s(vdev, &hdr->hdr_len); 1667 virtio_tswap16s(vdev, &hdr->gso_size); 1668 virtio_tswap16s(vdev, &hdr->csum_start); 1669 virtio_tswap16s(vdev, &hdr->csum_offset); 1670 } 1671 1672 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so 1673 * it never finds out that the packets don't have valid checksums. This 1674 * causes dhclient to get upset. Fedora's carried a patch for ages to 1675 * fix this with Xen but it hasn't appeared in an upstream release of 1676 * dhclient yet. 1677 * 1678 * To avoid breaking existing guests, we catch udp packets and add 1679 * checksums. This is terrible but it's better than hacking the guest 1680 * kernels. 1681 * 1682 * N.B. if we introduce a zero-copy API, this operation is no longer free so 1683 * we should provide a mechanism to disable it to avoid polluting the host 1684 * cache. 1685 */ 1686 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr, 1687 uint8_t *buf, size_t size) 1688 { 1689 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */ 1690 (size > 27 && size < 1500) && /* normal sized MTU */ 1691 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */ 1692 (buf[23] == 17) && /* ip.protocol == UDP */ 1693 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */ 1694 net_checksum_calculate(buf, size, CSUM_UDP); 1695 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; 1696 } 1697 } 1698 1699 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt, 1700 const void *buf, size_t size) 1701 { 1702 if (n->has_vnet_hdr) { 1703 /* FIXME this cast is evil */ 1704 void *wbuf = (void *)buf; 1705 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len, 1706 size - n->host_hdr_len); 1707 1708 if (n->needs_vnet_hdr_swap) { 1709 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf); 1710 } 1711 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr)); 1712 } else { 1713 struct virtio_net_hdr hdr = { 1714 .flags = 0, 1715 .gso_type = VIRTIO_NET_HDR_GSO_NONE 1716 }; 1717 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr); 1718 } 1719 } 1720 1721 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) 1722 { 1723 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; 1724 static const uint8_t vlan[] = {0x81, 0x00}; 1725 uint8_t *ptr = (uint8_t *)buf; 1726 int i; 1727 1728 if (n->promisc) 1729 return 1; 1730 1731 ptr += n->host_hdr_len; 1732 1733 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) { 1734 int vid = lduw_be_p(ptr + 14) & 0xfff; 1735 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f)))) 1736 return 0; 1737 } 1738 1739 if (ptr[0] & 1) { // multicast 1740 if (!memcmp(ptr, bcast, sizeof(bcast))) { 1741 return !n->nobcast; 1742 } else if (n->nomulti) { 1743 return 0; 1744 } else if (n->allmulti || n->mac_table.multi_overflow) { 1745 return 1; 1746 } 1747 1748 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 1749 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1750 return 1; 1751 } 1752 } 1753 } else { // unicast 1754 if (n->nouni) { 1755 return 0; 1756 } else if (n->alluni || n->mac_table.uni_overflow) { 1757 return 1; 1758 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) { 1759 return 1; 1760 } 1761 1762 for (i = 0; i < n->mac_table.first_multi; i++) { 1763 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1764 return 1; 1765 } 1766 } 1767 } 1768 1769 return 0; 1770 } 1771 1772 static uint8_t virtio_net_get_hash_type(bool hasip4, 1773 bool hasip6, 1774 EthL4HdrProto l4hdr_proto, 1775 uint32_t types) 1776 { 1777 if (hasip4) { 1778 switch (l4hdr_proto) { 1779 case ETH_L4_HDR_PROTO_TCP: 1780 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { 1781 return NetPktRssIpV4Tcp; 1782 } 1783 break; 1784 1785 case ETH_L4_HDR_PROTO_UDP: 1786 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { 1787 return NetPktRssIpV4Udp; 1788 } 1789 break; 1790 1791 default: 1792 break; 1793 } 1794 1795 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 1796 return NetPktRssIpV4; 1797 } 1798 } else if (hasip6) { 1799 switch (l4hdr_proto) { 1800 case ETH_L4_HDR_PROTO_TCP: 1801 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) { 1802 return NetPktRssIpV6TcpEx; 1803 } 1804 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { 1805 return NetPktRssIpV6Tcp; 1806 } 1807 break; 1808 1809 case ETH_L4_HDR_PROTO_UDP: 1810 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) { 1811 return NetPktRssIpV6UdpEx; 1812 } 1813 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { 1814 return NetPktRssIpV6Udp; 1815 } 1816 break; 1817 1818 default: 1819 break; 1820 } 1821 1822 if (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) { 1823 return NetPktRssIpV6Ex; 1824 } 1825 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 1826 return NetPktRssIpV6; 1827 } 1828 } 1829 return 0xff; 1830 } 1831 1832 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report, 1833 uint32_t hash) 1834 { 1835 struct virtio_net_hdr_v1_hash *hdr = (void *)buf; 1836 hdr->hash_value = hash; 1837 hdr->hash_report = report; 1838 } 1839 1840 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf, 1841 size_t size) 1842 { 1843 VirtIONet *n = qemu_get_nic_opaque(nc); 1844 unsigned int index = nc->queue_index, new_index = index; 1845 struct NetRxPkt *pkt = n->rx_pkt; 1846 uint8_t net_hash_type; 1847 uint32_t hash; 1848 bool hasip4, hasip6; 1849 EthL4HdrProto l4hdr_proto; 1850 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = { 1851 VIRTIO_NET_HASH_REPORT_IPv4, 1852 VIRTIO_NET_HASH_REPORT_TCPv4, 1853 VIRTIO_NET_HASH_REPORT_TCPv6, 1854 VIRTIO_NET_HASH_REPORT_IPv6, 1855 VIRTIO_NET_HASH_REPORT_IPv6_EX, 1856 VIRTIO_NET_HASH_REPORT_TCPv6_EX, 1857 VIRTIO_NET_HASH_REPORT_UDPv4, 1858 VIRTIO_NET_HASH_REPORT_UDPv6, 1859 VIRTIO_NET_HASH_REPORT_UDPv6_EX 1860 }; 1861 struct iovec iov = { 1862 .iov_base = (void *)buf, 1863 .iov_len = size 1864 }; 1865 1866 net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len); 1867 net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); 1868 net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto, 1869 n->rss_data.hash_types); 1870 if (net_hash_type > NetPktRssIpV6UdpEx) { 1871 if (n->rss_data.populate_hash) { 1872 virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0); 1873 } 1874 return n->rss_data.redirect ? n->rss_data.default_queue : -1; 1875 } 1876 1877 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key); 1878 1879 if (n->rss_data.populate_hash) { 1880 virtio_set_packet_hash(buf, reports[net_hash_type], hash); 1881 } 1882 1883 if (n->rss_data.redirect) { 1884 new_index = hash & (n->rss_data.indirections_len - 1); 1885 new_index = n->rss_data.indirections_table[new_index]; 1886 } 1887 1888 return (index == new_index) ? -1 : new_index; 1889 } 1890 1891 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, 1892 size_t size, bool no_rss) 1893 { 1894 VirtIONet *n = qemu_get_nic_opaque(nc); 1895 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1896 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1897 VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE]; 1898 size_t lens[VIRTQUEUE_MAX_SIZE]; 1899 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; 1900 struct virtio_net_hdr_mrg_rxbuf mhdr; 1901 unsigned mhdr_cnt = 0; 1902 size_t offset, i, guest_offset, j; 1903 ssize_t err; 1904 1905 if (!virtio_net_can_receive(nc)) { 1906 return -1; 1907 } 1908 1909 if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) { 1910 int index = virtio_net_process_rss(nc, buf, size); 1911 if (index >= 0) { 1912 NetClientState *nc2 = qemu_get_subqueue(n->nic, index); 1913 return virtio_net_receive_rcu(nc2, buf, size, true); 1914 } 1915 } 1916 1917 /* hdr_len refers to the header we supply to the guest */ 1918 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) { 1919 return 0; 1920 } 1921 1922 if (!receive_filter(n, buf, size)) 1923 return size; 1924 1925 offset = i = 0; 1926 1927 while (offset < size) { 1928 VirtQueueElement *elem; 1929 int len, total; 1930 const struct iovec *sg; 1931 1932 total = 0; 1933 1934 if (i == VIRTQUEUE_MAX_SIZE) { 1935 virtio_error(vdev, "virtio-net unexpected long buffer chain"); 1936 err = size; 1937 goto err; 1938 } 1939 1940 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement)); 1941 if (!elem) { 1942 if (i) { 1943 virtio_error(vdev, "virtio-net unexpected empty queue: " 1944 "i %zd mergeable %d offset %zd, size %zd, " 1945 "guest hdr len %zd, host hdr len %zd " 1946 "guest features 0x%" PRIx64, 1947 i, n->mergeable_rx_bufs, offset, size, 1948 n->guest_hdr_len, n->host_hdr_len, 1949 vdev->guest_features); 1950 } 1951 err = -1; 1952 goto err; 1953 } 1954 1955 if (elem->in_num < 1) { 1956 virtio_error(vdev, 1957 "virtio-net receive queue contains no in buffers"); 1958 virtqueue_detach_element(q->rx_vq, elem, 0); 1959 g_free(elem); 1960 err = -1; 1961 goto err; 1962 } 1963 1964 sg = elem->in_sg; 1965 if (i == 0) { 1966 assert(offset == 0); 1967 if (n->mergeable_rx_bufs) { 1968 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg), 1969 sg, elem->in_num, 1970 offsetof(typeof(mhdr), num_buffers), 1971 sizeof(mhdr.num_buffers)); 1972 } 1973 1974 receive_header(n, sg, elem->in_num, buf, size); 1975 if (n->rss_data.populate_hash) { 1976 offset = sizeof(mhdr); 1977 iov_from_buf(sg, elem->in_num, offset, 1978 buf + offset, n->host_hdr_len - sizeof(mhdr)); 1979 } 1980 offset = n->host_hdr_len; 1981 total += n->guest_hdr_len; 1982 guest_offset = n->guest_hdr_len; 1983 } else { 1984 guest_offset = 0; 1985 } 1986 1987 /* copy in packet. ugh */ 1988 len = iov_from_buf(sg, elem->in_num, guest_offset, 1989 buf + offset, size - offset); 1990 total += len; 1991 offset += len; 1992 /* If buffers can't be merged, at this point we 1993 * must have consumed the complete packet. 1994 * Otherwise, drop it. */ 1995 if (!n->mergeable_rx_bufs && offset < size) { 1996 virtqueue_unpop(q->rx_vq, elem, total); 1997 g_free(elem); 1998 err = size; 1999 goto err; 2000 } 2001 2002 elems[i] = elem; 2003 lens[i] = total; 2004 i++; 2005 } 2006 2007 if (mhdr_cnt) { 2008 virtio_stw_p(vdev, &mhdr.num_buffers, i); 2009 iov_from_buf(mhdr_sg, mhdr_cnt, 2010 0, 2011 &mhdr.num_buffers, sizeof mhdr.num_buffers); 2012 } 2013 2014 for (j = 0; j < i; j++) { 2015 /* signal other side */ 2016 virtqueue_fill(q->rx_vq, elems[j], lens[j], j); 2017 g_free(elems[j]); 2018 } 2019 2020 virtqueue_flush(q->rx_vq, i); 2021 virtio_notify(vdev, q->rx_vq); 2022 2023 return size; 2024 2025 err: 2026 for (j = 0; j < i; j++) { 2027 virtqueue_detach_element(q->rx_vq, elems[j], lens[j]); 2028 g_free(elems[j]); 2029 } 2030 2031 return err; 2032 } 2033 2034 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf, 2035 size_t size) 2036 { 2037 RCU_READ_LOCK_GUARD(); 2038 2039 return virtio_net_receive_rcu(nc, buf, size, false); 2040 } 2041 2042 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain, 2043 const uint8_t *buf, 2044 VirtioNetRscUnit *unit) 2045 { 2046 uint16_t ip_hdrlen; 2047 struct ip_header *ip; 2048 2049 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len 2050 + sizeof(struct eth_header)); 2051 unit->ip = (void *)ip; 2052 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2; 2053 unit->ip_plen = &ip->ip_len; 2054 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen); 2055 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 2056 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen; 2057 } 2058 2059 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain, 2060 const uint8_t *buf, 2061 VirtioNetRscUnit *unit) 2062 { 2063 struct ip6_header *ip6; 2064 2065 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len 2066 + sizeof(struct eth_header)); 2067 unit->ip = ip6; 2068 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 2069 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) 2070 + sizeof(struct ip6_header)); 2071 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 2072 2073 /* There is a difference between payload length in ipv4 and v6, 2074 ip header is excluded in ipv6 */ 2075 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen; 2076 } 2077 2078 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain, 2079 VirtioNetRscSeg *seg) 2080 { 2081 int ret; 2082 struct virtio_net_hdr_v1 *h; 2083 2084 h = (struct virtio_net_hdr_v1 *)seg->buf; 2085 h->flags = 0; 2086 h->gso_type = VIRTIO_NET_HDR_GSO_NONE; 2087 2088 if (seg->is_coalesced) { 2089 h->rsc.segments = seg->packets; 2090 h->rsc.dup_acks = seg->dup_ack; 2091 h->flags = VIRTIO_NET_HDR_F_RSC_INFO; 2092 if (chain->proto == ETH_P_IP) { 2093 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2094 } else { 2095 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2096 } 2097 } 2098 2099 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); 2100 QTAILQ_REMOVE(&chain->buffers, seg, next); 2101 g_free(seg->buf); 2102 g_free(seg); 2103 2104 return ret; 2105 } 2106 2107 static void virtio_net_rsc_purge(void *opq) 2108 { 2109 VirtioNetRscSeg *seg, *rn; 2110 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq; 2111 2112 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) { 2113 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2114 chain->stat.purge_failed++; 2115 continue; 2116 } 2117 } 2118 2119 chain->stat.timer++; 2120 if (!QTAILQ_EMPTY(&chain->buffers)) { 2121 timer_mod(chain->drain_timer, 2122 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 2123 } 2124 } 2125 2126 static void virtio_net_rsc_cleanup(VirtIONet *n) 2127 { 2128 VirtioNetRscChain *chain, *rn_chain; 2129 VirtioNetRscSeg *seg, *rn_seg; 2130 2131 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) { 2132 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) { 2133 QTAILQ_REMOVE(&chain->buffers, seg, next); 2134 g_free(seg->buf); 2135 g_free(seg); 2136 } 2137 2138 timer_free(chain->drain_timer); 2139 QTAILQ_REMOVE(&n->rsc_chains, chain, next); 2140 g_free(chain); 2141 } 2142 } 2143 2144 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain, 2145 NetClientState *nc, 2146 const uint8_t *buf, size_t size) 2147 { 2148 uint16_t hdr_len; 2149 VirtioNetRscSeg *seg; 2150 2151 hdr_len = chain->n->guest_hdr_len; 2152 seg = g_new(VirtioNetRscSeg, 1); 2153 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header) 2154 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD); 2155 memcpy(seg->buf, buf, size); 2156 seg->size = size; 2157 seg->packets = 1; 2158 seg->dup_ack = 0; 2159 seg->is_coalesced = 0; 2160 seg->nc = nc; 2161 2162 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); 2163 chain->stat.cache++; 2164 2165 switch (chain->proto) { 2166 case ETH_P_IP: 2167 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); 2168 break; 2169 case ETH_P_IPV6: 2170 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); 2171 break; 2172 default: 2173 g_assert_not_reached(); 2174 } 2175 } 2176 2177 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain, 2178 VirtioNetRscSeg *seg, 2179 const uint8_t *buf, 2180 struct tcp_header *n_tcp, 2181 struct tcp_header *o_tcp) 2182 { 2183 uint32_t nack, oack; 2184 uint16_t nwin, owin; 2185 2186 nack = htonl(n_tcp->th_ack); 2187 nwin = htons(n_tcp->th_win); 2188 oack = htonl(o_tcp->th_ack); 2189 owin = htons(o_tcp->th_win); 2190 2191 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) { 2192 chain->stat.ack_out_of_win++; 2193 return RSC_FINAL; 2194 } else if (nack == oack) { 2195 /* duplicated ack or window probe */ 2196 if (nwin == owin) { 2197 /* duplicated ack, add dup ack count due to whql test up to 1 */ 2198 chain->stat.dup_ack++; 2199 return RSC_FINAL; 2200 } else { 2201 /* Coalesce window update */ 2202 o_tcp->th_win = n_tcp->th_win; 2203 chain->stat.win_update++; 2204 return RSC_COALESCE; 2205 } 2206 } else { 2207 /* pure ack, go to 'C', finalize*/ 2208 chain->stat.pure_ack++; 2209 return RSC_FINAL; 2210 } 2211 } 2212 2213 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain, 2214 VirtioNetRscSeg *seg, 2215 const uint8_t *buf, 2216 VirtioNetRscUnit *n_unit) 2217 { 2218 void *data; 2219 uint16_t o_ip_len; 2220 uint32_t nseq, oseq; 2221 VirtioNetRscUnit *o_unit; 2222 2223 o_unit = &seg->unit; 2224 o_ip_len = htons(*o_unit->ip_plen); 2225 nseq = htonl(n_unit->tcp->th_seq); 2226 oseq = htonl(o_unit->tcp->th_seq); 2227 2228 /* out of order or retransmitted. */ 2229 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) { 2230 chain->stat.data_out_of_win++; 2231 return RSC_FINAL; 2232 } 2233 2234 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen; 2235 if (nseq == oseq) { 2236 if ((o_unit->payload == 0) && n_unit->payload) { 2237 /* From no payload to payload, normal case, not a dup ack or etc */ 2238 chain->stat.data_after_pure_ack++; 2239 goto coalesce; 2240 } else { 2241 return virtio_net_rsc_handle_ack(chain, seg, buf, 2242 n_unit->tcp, o_unit->tcp); 2243 } 2244 } else if ((nseq - oseq) != o_unit->payload) { 2245 /* Not a consistent packet, out of order */ 2246 chain->stat.data_out_of_order++; 2247 return RSC_FINAL; 2248 } else { 2249 coalesce: 2250 if ((o_ip_len + n_unit->payload) > chain->max_payload) { 2251 chain->stat.over_size++; 2252 return RSC_FINAL; 2253 } 2254 2255 /* Here comes the right data, the payload length in v4/v6 is different, 2256 so use the field value to update and record the new data len */ 2257 o_unit->payload += n_unit->payload; /* update new data len */ 2258 2259 /* update field in ip header */ 2260 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload); 2261 2262 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced 2263 for windows guest, while this may change the behavior for linux 2264 guest (only if it uses RSC feature). */ 2265 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags; 2266 2267 o_unit->tcp->th_ack = n_unit->tcp->th_ack; 2268 o_unit->tcp->th_win = n_unit->tcp->th_win; 2269 2270 memmove(seg->buf + seg->size, data, n_unit->payload); 2271 seg->size += n_unit->payload; 2272 seg->packets++; 2273 chain->stat.coalesced++; 2274 return RSC_COALESCE; 2275 } 2276 } 2277 2278 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain, 2279 VirtioNetRscSeg *seg, 2280 const uint8_t *buf, size_t size, 2281 VirtioNetRscUnit *unit) 2282 { 2283 struct ip_header *ip1, *ip2; 2284 2285 ip1 = (struct ip_header *)(unit->ip); 2286 ip2 = (struct ip_header *)(seg->unit.ip); 2287 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst) 2288 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2289 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2290 chain->stat.no_match++; 2291 return RSC_NO_MATCH; 2292 } 2293 2294 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2295 } 2296 2297 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain, 2298 VirtioNetRscSeg *seg, 2299 const uint8_t *buf, size_t size, 2300 VirtioNetRscUnit *unit) 2301 { 2302 struct ip6_header *ip1, *ip2; 2303 2304 ip1 = (struct ip6_header *)(unit->ip); 2305 ip2 = (struct ip6_header *)(seg->unit.ip); 2306 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address)) 2307 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address)) 2308 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2309 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2310 chain->stat.no_match++; 2311 return RSC_NO_MATCH; 2312 } 2313 2314 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2315 } 2316 2317 /* Packets with 'SYN' should bypass, other flag should be sent after drain 2318 * to prevent out of order */ 2319 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain, 2320 struct tcp_header *tcp) 2321 { 2322 uint16_t tcp_hdr; 2323 uint16_t tcp_flag; 2324 2325 tcp_flag = htons(tcp->th_offset_flags); 2326 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10; 2327 tcp_flag &= VIRTIO_NET_TCP_FLAG; 2328 if (tcp_flag & TH_SYN) { 2329 chain->stat.tcp_syn++; 2330 return RSC_BYPASS; 2331 } 2332 2333 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) { 2334 chain->stat.tcp_ctrl_drain++; 2335 return RSC_FINAL; 2336 } 2337 2338 if (tcp_hdr > sizeof(struct tcp_header)) { 2339 chain->stat.tcp_all_opt++; 2340 return RSC_FINAL; 2341 } 2342 2343 return RSC_CANDIDATE; 2344 } 2345 2346 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain, 2347 NetClientState *nc, 2348 const uint8_t *buf, size_t size, 2349 VirtioNetRscUnit *unit) 2350 { 2351 int ret; 2352 VirtioNetRscSeg *seg, *nseg; 2353 2354 if (QTAILQ_EMPTY(&chain->buffers)) { 2355 chain->stat.empty_cache++; 2356 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2357 timer_mod(chain->drain_timer, 2358 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 2359 return size; 2360 } 2361 2362 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2363 if (chain->proto == ETH_P_IP) { 2364 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit); 2365 } else { 2366 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit); 2367 } 2368 2369 if (ret == RSC_FINAL) { 2370 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2371 /* Send failed */ 2372 chain->stat.final_failed++; 2373 return 0; 2374 } 2375 2376 /* Send current packet */ 2377 return virtio_net_do_receive(nc, buf, size); 2378 } else if (ret == RSC_NO_MATCH) { 2379 continue; 2380 } else { 2381 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ 2382 seg->is_coalesced = 1; 2383 return size; 2384 } 2385 } 2386 2387 chain->stat.no_match_cache++; 2388 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2389 return size; 2390 } 2391 2392 /* Drain a connection data, this is to avoid out of order segments */ 2393 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain, 2394 NetClientState *nc, 2395 const uint8_t *buf, size_t size, 2396 uint16_t ip_start, uint16_t ip_size, 2397 uint16_t tcp_port) 2398 { 2399 VirtioNetRscSeg *seg, *nseg; 2400 uint32_t ppair1, ppair2; 2401 2402 ppair1 = *(uint32_t *)(buf + tcp_port); 2403 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2404 ppair2 = *(uint32_t *)(seg->buf + tcp_port); 2405 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size) 2406 || (ppair1 != ppair2)) { 2407 continue; 2408 } 2409 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2410 chain->stat.drain_failed++; 2411 } 2412 2413 break; 2414 } 2415 2416 return virtio_net_do_receive(nc, buf, size); 2417 } 2418 2419 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain, 2420 struct ip_header *ip, 2421 const uint8_t *buf, size_t size) 2422 { 2423 uint16_t ip_len; 2424 2425 /* Not an ipv4 packet */ 2426 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) { 2427 chain->stat.ip_option++; 2428 return RSC_BYPASS; 2429 } 2430 2431 /* Don't handle packets with ip option */ 2432 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) { 2433 chain->stat.ip_option++; 2434 return RSC_BYPASS; 2435 } 2436 2437 if (ip->ip_p != IPPROTO_TCP) { 2438 chain->stat.bypass_not_tcp++; 2439 return RSC_BYPASS; 2440 } 2441 2442 /* Don't handle packets with ip fragment */ 2443 if (!(htons(ip->ip_off) & IP_DF)) { 2444 chain->stat.ip_frag++; 2445 return RSC_BYPASS; 2446 } 2447 2448 /* Don't handle packets with ecn flag */ 2449 if (IPTOS_ECN(ip->ip_tos)) { 2450 chain->stat.ip_ecn++; 2451 return RSC_BYPASS; 2452 } 2453 2454 ip_len = htons(ip->ip_len); 2455 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header)) 2456 || ip_len > (size - chain->n->guest_hdr_len - 2457 sizeof(struct eth_header))) { 2458 chain->stat.ip_hacked++; 2459 return RSC_BYPASS; 2460 } 2461 2462 return RSC_CANDIDATE; 2463 } 2464 2465 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain, 2466 NetClientState *nc, 2467 const uint8_t *buf, size_t size) 2468 { 2469 int32_t ret; 2470 uint16_t hdr_len; 2471 VirtioNetRscUnit unit; 2472 2473 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2474 2475 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header) 2476 + sizeof(struct tcp_header))) { 2477 chain->stat.bypass_not_tcp++; 2478 return virtio_net_do_receive(nc, buf, size); 2479 } 2480 2481 virtio_net_rsc_extract_unit4(chain, buf, &unit); 2482 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size) 2483 != RSC_CANDIDATE) { 2484 return virtio_net_do_receive(nc, buf, size); 2485 } 2486 2487 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2488 if (ret == RSC_BYPASS) { 2489 return virtio_net_do_receive(nc, buf, size); 2490 } else if (ret == RSC_FINAL) { 2491 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2492 ((hdr_len + sizeof(struct eth_header)) + 12), 2493 VIRTIO_NET_IP4_ADDR_SIZE, 2494 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)); 2495 } 2496 2497 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2498 } 2499 2500 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain, 2501 struct ip6_header *ip6, 2502 const uint8_t *buf, size_t size) 2503 { 2504 uint16_t ip_len; 2505 2506 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4) 2507 != IP_HEADER_VERSION_6) { 2508 return RSC_BYPASS; 2509 } 2510 2511 /* Both option and protocol is checked in this */ 2512 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) { 2513 chain->stat.bypass_not_tcp++; 2514 return RSC_BYPASS; 2515 } 2516 2517 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 2518 if (ip_len < sizeof(struct tcp_header) || 2519 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header) 2520 - sizeof(struct ip6_header))) { 2521 chain->stat.ip_hacked++; 2522 return RSC_BYPASS; 2523 } 2524 2525 /* Don't handle packets with ecn flag */ 2526 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) { 2527 chain->stat.ip_ecn++; 2528 return RSC_BYPASS; 2529 } 2530 2531 return RSC_CANDIDATE; 2532 } 2533 2534 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc, 2535 const uint8_t *buf, size_t size) 2536 { 2537 int32_t ret; 2538 uint16_t hdr_len; 2539 VirtioNetRscChain *chain; 2540 VirtioNetRscUnit unit; 2541 2542 chain = opq; 2543 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2544 2545 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header) 2546 + sizeof(tcp_header))) { 2547 return virtio_net_do_receive(nc, buf, size); 2548 } 2549 2550 virtio_net_rsc_extract_unit6(chain, buf, &unit); 2551 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain, 2552 unit.ip, buf, size)) { 2553 return virtio_net_do_receive(nc, buf, size); 2554 } 2555 2556 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2557 if (ret == RSC_BYPASS) { 2558 return virtio_net_do_receive(nc, buf, size); 2559 } else if (ret == RSC_FINAL) { 2560 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2561 ((hdr_len + sizeof(struct eth_header)) + 8), 2562 VIRTIO_NET_IP6_ADDR_SIZE, 2563 hdr_len + sizeof(struct eth_header) 2564 + sizeof(struct ip6_header)); 2565 } 2566 2567 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2568 } 2569 2570 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n, 2571 NetClientState *nc, 2572 uint16_t proto) 2573 { 2574 VirtioNetRscChain *chain; 2575 2576 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) { 2577 return NULL; 2578 } 2579 2580 QTAILQ_FOREACH(chain, &n->rsc_chains, next) { 2581 if (chain->proto == proto) { 2582 return chain; 2583 } 2584 } 2585 2586 chain = g_malloc(sizeof(*chain)); 2587 chain->n = n; 2588 chain->proto = proto; 2589 if (proto == (uint16_t)ETH_P_IP) { 2590 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD; 2591 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2592 } else { 2593 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD; 2594 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2595 } 2596 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST, 2597 virtio_net_rsc_purge, chain); 2598 memset(&chain->stat, 0, sizeof(chain->stat)); 2599 2600 QTAILQ_INIT(&chain->buffers); 2601 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); 2602 2603 return chain; 2604 } 2605 2606 static ssize_t virtio_net_rsc_receive(NetClientState *nc, 2607 const uint8_t *buf, 2608 size_t size) 2609 { 2610 uint16_t proto; 2611 VirtioNetRscChain *chain; 2612 struct eth_header *eth; 2613 VirtIONet *n; 2614 2615 n = qemu_get_nic_opaque(nc); 2616 if (size < (n->host_hdr_len + sizeof(struct eth_header))) { 2617 return virtio_net_do_receive(nc, buf, size); 2618 } 2619 2620 eth = (struct eth_header *)(buf + n->guest_hdr_len); 2621 proto = htons(eth->h_proto); 2622 2623 chain = virtio_net_rsc_lookup_chain(n, nc, proto); 2624 if (chain) { 2625 chain->stat.received++; 2626 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) { 2627 return virtio_net_rsc_receive4(chain, nc, buf, size); 2628 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) { 2629 return virtio_net_rsc_receive6(chain, nc, buf, size); 2630 } 2631 } 2632 return virtio_net_do_receive(nc, buf, size); 2633 } 2634 2635 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, 2636 size_t size) 2637 { 2638 VirtIONet *n = qemu_get_nic_opaque(nc); 2639 if ((n->rsc4_enabled || n->rsc6_enabled)) { 2640 return virtio_net_rsc_receive(nc, buf, size); 2641 } else { 2642 return virtio_net_do_receive(nc, buf, size); 2643 } 2644 } 2645 2646 static int32_t virtio_net_flush_tx(VirtIONetQueue *q); 2647 2648 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) 2649 { 2650 VirtIONet *n = qemu_get_nic_opaque(nc); 2651 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 2652 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2653 int ret; 2654 2655 virtqueue_push(q->tx_vq, q->async_tx.elem, 0); 2656 virtio_notify(vdev, q->tx_vq); 2657 2658 g_free(q->async_tx.elem); 2659 q->async_tx.elem = NULL; 2660 2661 virtio_queue_set_notification(q->tx_vq, 1); 2662 ret = virtio_net_flush_tx(q); 2663 if (ret >= n->tx_burst) { 2664 /* 2665 * the flush has been stopped by tx_burst 2666 * we will not receive notification for the 2667 * remainining part, so re-schedule 2668 */ 2669 virtio_queue_set_notification(q->tx_vq, 0); 2670 if (q->tx_bh) { 2671 qemu_bh_schedule(q->tx_bh); 2672 } else { 2673 timer_mod(q->tx_timer, 2674 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2675 } 2676 q->tx_waiting = 1; 2677 } 2678 } 2679 2680 /* TX */ 2681 static int32_t virtio_net_flush_tx(VirtIONetQueue *q) 2682 { 2683 VirtIONet *n = q->n; 2684 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2685 VirtQueueElement *elem; 2686 int32_t num_packets = 0; 2687 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq)); 2688 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2689 return num_packets; 2690 } 2691 2692 if (q->async_tx.elem) { 2693 virtio_queue_set_notification(q->tx_vq, 0); 2694 return num_packets; 2695 } 2696 2697 for (;;) { 2698 ssize_t ret; 2699 unsigned int out_num; 2700 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg; 2701 struct virtio_net_hdr_v1_hash vhdr; 2702 2703 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement)); 2704 if (!elem) { 2705 break; 2706 } 2707 2708 out_num = elem->out_num; 2709 out_sg = elem->out_sg; 2710 if (out_num < 1) { 2711 virtio_error(vdev, "virtio-net header not in first element"); 2712 virtqueue_detach_element(q->tx_vq, elem, 0); 2713 g_free(elem); 2714 return -EINVAL; 2715 } 2716 2717 if (n->has_vnet_hdr) { 2718 if (iov_to_buf(out_sg, out_num, 0, &vhdr, n->guest_hdr_len) < 2719 n->guest_hdr_len) { 2720 virtio_error(vdev, "virtio-net header incorrect"); 2721 virtqueue_detach_element(q->tx_vq, elem, 0); 2722 g_free(elem); 2723 return -EINVAL; 2724 } 2725 if (n->needs_vnet_hdr_swap) { 2726 virtio_net_hdr_swap(vdev, (void *) &vhdr); 2727 sg2[0].iov_base = &vhdr; 2728 sg2[0].iov_len = n->guest_hdr_len; 2729 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, 2730 out_sg, out_num, 2731 n->guest_hdr_len, -1); 2732 if (out_num == VIRTQUEUE_MAX_SIZE) { 2733 goto drop; 2734 } 2735 out_num += 1; 2736 out_sg = sg2; 2737 } 2738 } 2739 /* 2740 * If host wants to see the guest header as is, we can 2741 * pass it on unchanged. Otherwise, copy just the parts 2742 * that host is interested in. 2743 */ 2744 assert(n->host_hdr_len <= n->guest_hdr_len); 2745 if (n->host_hdr_len != n->guest_hdr_len) { 2746 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg), 2747 out_sg, out_num, 2748 0, n->host_hdr_len); 2749 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num, 2750 out_sg, out_num, 2751 n->guest_hdr_len, -1); 2752 out_num = sg_num; 2753 out_sg = sg; 2754 } 2755 2756 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index), 2757 out_sg, out_num, virtio_net_tx_complete); 2758 if (ret == 0) { 2759 virtio_queue_set_notification(q->tx_vq, 0); 2760 q->async_tx.elem = elem; 2761 return -EBUSY; 2762 } 2763 2764 drop: 2765 virtqueue_push(q->tx_vq, elem, 0); 2766 virtio_notify(vdev, q->tx_vq); 2767 g_free(elem); 2768 2769 if (++num_packets >= n->tx_burst) { 2770 break; 2771 } 2772 } 2773 return num_packets; 2774 } 2775 2776 static void virtio_net_tx_timer(void *opaque); 2777 2778 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) 2779 { 2780 VirtIONet *n = VIRTIO_NET(vdev); 2781 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2782 2783 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2784 virtio_net_drop_tx_queue_data(vdev, vq); 2785 return; 2786 } 2787 2788 /* This happens when device was stopped but VCPU wasn't. */ 2789 if (!vdev->vm_running) { 2790 q->tx_waiting = 1; 2791 return; 2792 } 2793 2794 if (q->tx_waiting) { 2795 /* We already have queued packets, immediately flush */ 2796 timer_del(q->tx_timer); 2797 virtio_net_tx_timer(q); 2798 } else { 2799 /* re-arm timer to flush it (and more) on next tick */ 2800 timer_mod(q->tx_timer, 2801 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2802 q->tx_waiting = 1; 2803 virtio_queue_set_notification(vq, 0); 2804 } 2805 } 2806 2807 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) 2808 { 2809 VirtIONet *n = VIRTIO_NET(vdev); 2810 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2811 2812 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2813 virtio_net_drop_tx_queue_data(vdev, vq); 2814 return; 2815 } 2816 2817 if (unlikely(q->tx_waiting)) { 2818 return; 2819 } 2820 q->tx_waiting = 1; 2821 /* This happens when device was stopped but VCPU wasn't. */ 2822 if (!vdev->vm_running) { 2823 return; 2824 } 2825 virtio_queue_set_notification(vq, 0); 2826 qemu_bh_schedule(q->tx_bh); 2827 } 2828 2829 static void virtio_net_tx_timer(void *opaque) 2830 { 2831 VirtIONetQueue *q = opaque; 2832 VirtIONet *n = q->n; 2833 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2834 int ret; 2835 2836 /* This happens when device was stopped but BH wasn't. */ 2837 if (!vdev->vm_running) { 2838 /* Make sure tx waiting is set, so we'll run when restarted. */ 2839 assert(q->tx_waiting); 2840 return; 2841 } 2842 2843 q->tx_waiting = 0; 2844 2845 /* Just in case the driver is not ready on more */ 2846 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2847 return; 2848 } 2849 2850 ret = virtio_net_flush_tx(q); 2851 if (ret == -EBUSY || ret == -EINVAL) { 2852 return; 2853 } 2854 /* 2855 * If we flush a full burst of packets, assume there are 2856 * more coming and immediately rearm 2857 */ 2858 if (ret >= n->tx_burst) { 2859 q->tx_waiting = 1; 2860 timer_mod(q->tx_timer, 2861 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2862 return; 2863 } 2864 /* 2865 * If less than a full burst, re-enable notification and flush 2866 * anything that may have come in while we weren't looking. If 2867 * we find something, assume the guest is still active and rearm 2868 */ 2869 virtio_queue_set_notification(q->tx_vq, 1); 2870 ret = virtio_net_flush_tx(q); 2871 if (ret > 0) { 2872 virtio_queue_set_notification(q->tx_vq, 0); 2873 q->tx_waiting = 1; 2874 timer_mod(q->tx_timer, 2875 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2876 } 2877 } 2878 2879 static void virtio_net_tx_bh(void *opaque) 2880 { 2881 VirtIONetQueue *q = opaque; 2882 VirtIONet *n = q->n; 2883 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2884 int32_t ret; 2885 2886 /* This happens when device was stopped but BH wasn't. */ 2887 if (!vdev->vm_running) { 2888 /* Make sure tx waiting is set, so we'll run when restarted. */ 2889 assert(q->tx_waiting); 2890 return; 2891 } 2892 2893 q->tx_waiting = 0; 2894 2895 /* Just in case the driver is not ready on more */ 2896 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) { 2897 return; 2898 } 2899 2900 ret = virtio_net_flush_tx(q); 2901 if (ret == -EBUSY || ret == -EINVAL) { 2902 return; /* Notification re-enable handled by tx_complete or device 2903 * broken */ 2904 } 2905 2906 /* If we flush a full burst of packets, assume there are 2907 * more coming and immediately reschedule */ 2908 if (ret >= n->tx_burst) { 2909 qemu_bh_schedule(q->tx_bh); 2910 q->tx_waiting = 1; 2911 return; 2912 } 2913 2914 /* If less than a full burst, re-enable notification and flush 2915 * anything that may have come in while we weren't looking. If 2916 * we find something, assume the guest is still active and reschedule */ 2917 virtio_queue_set_notification(q->tx_vq, 1); 2918 ret = virtio_net_flush_tx(q); 2919 if (ret == -EINVAL) { 2920 return; 2921 } else if (ret > 0) { 2922 virtio_queue_set_notification(q->tx_vq, 0); 2923 qemu_bh_schedule(q->tx_bh); 2924 q->tx_waiting = 1; 2925 } 2926 } 2927 2928 static void virtio_net_add_queue(VirtIONet *n, int index) 2929 { 2930 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2931 2932 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size, 2933 virtio_net_handle_rx); 2934 2935 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { 2936 n->vqs[index].tx_vq = 2937 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2938 virtio_net_handle_tx_timer); 2939 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 2940 virtio_net_tx_timer, 2941 &n->vqs[index]); 2942 } else { 2943 n->vqs[index].tx_vq = 2944 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2945 virtio_net_handle_tx_bh); 2946 n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index], 2947 &DEVICE(vdev)->mem_reentrancy_guard); 2948 } 2949 2950 n->vqs[index].tx_waiting = 0; 2951 n->vqs[index].n = n; 2952 } 2953 2954 static void virtio_net_del_queue(VirtIONet *n, int index) 2955 { 2956 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2957 VirtIONetQueue *q = &n->vqs[index]; 2958 NetClientState *nc = qemu_get_subqueue(n->nic, index); 2959 2960 qemu_purge_queued_packets(nc); 2961 2962 virtio_del_queue(vdev, index * 2); 2963 if (q->tx_timer) { 2964 timer_free(q->tx_timer); 2965 q->tx_timer = NULL; 2966 } else { 2967 qemu_bh_delete(q->tx_bh); 2968 q->tx_bh = NULL; 2969 } 2970 q->tx_waiting = 0; 2971 virtio_del_queue(vdev, index * 2 + 1); 2972 } 2973 2974 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs) 2975 { 2976 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2977 int old_num_queues = virtio_get_num_queues(vdev); 2978 int new_num_queues = new_max_queue_pairs * 2 + 1; 2979 int i; 2980 2981 assert(old_num_queues >= 3); 2982 assert(old_num_queues % 2 == 1); 2983 2984 if (old_num_queues == new_num_queues) { 2985 return; 2986 } 2987 2988 /* 2989 * We always need to remove and add ctrl vq if 2990 * old_num_queues != new_num_queues. Remove ctrl_vq first, 2991 * and then we only enter one of the following two loops. 2992 */ 2993 virtio_del_queue(vdev, old_num_queues - 1); 2994 2995 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) { 2996 /* new_num_queues < old_num_queues */ 2997 virtio_net_del_queue(n, i / 2); 2998 } 2999 3000 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) { 3001 /* new_num_queues > old_num_queues */ 3002 virtio_net_add_queue(n, i / 2); 3003 } 3004 3005 /* add ctrl_vq last */ 3006 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 3007 } 3008 3009 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) 3010 { 3011 int max = multiqueue ? n->max_queue_pairs : 1; 3012 3013 n->multiqueue = multiqueue; 3014 virtio_net_change_num_queue_pairs(n, max); 3015 3016 virtio_net_set_queue_pairs(n); 3017 } 3018 3019 static int virtio_net_post_load_device(void *opaque, int version_id) 3020 { 3021 VirtIONet *n = opaque; 3022 VirtIODevice *vdev = VIRTIO_DEVICE(n); 3023 int i, link_down; 3024 3025 trace_virtio_net_post_load_device(); 3026 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs, 3027 virtio_vdev_has_feature(vdev, 3028 VIRTIO_F_VERSION_1), 3029 virtio_vdev_has_feature(vdev, 3030 VIRTIO_NET_F_HASH_REPORT)); 3031 3032 /* MAC_TABLE_ENTRIES may be different from the saved image */ 3033 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) { 3034 n->mac_table.in_use = 0; 3035 } 3036 3037 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 3038 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n); 3039 } 3040 3041 /* 3042 * curr_guest_offloads will be later overwritten by the 3043 * virtio_set_features_nocheck call done from the virtio_load. 3044 * Here we make sure it is preserved and restored accordingly 3045 * in the virtio_net_post_load_virtio callback. 3046 */ 3047 n->saved_guest_offloads = n->curr_guest_offloads; 3048 3049 virtio_net_set_queue_pairs(n); 3050 3051 /* Find the first multicast entry in the saved MAC filter */ 3052 for (i = 0; i < n->mac_table.in_use; i++) { 3053 if (n->mac_table.macs[i * ETH_ALEN] & 1) { 3054 break; 3055 } 3056 } 3057 n->mac_table.first_multi = i; 3058 3059 /* nc.link_down can't be migrated, so infer link_down according 3060 * to link status bit in n->status */ 3061 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0; 3062 for (i = 0; i < n->max_queue_pairs; i++) { 3063 qemu_get_subqueue(n->nic, i)->link_down = link_down; 3064 } 3065 3066 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 3067 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3068 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 3069 QEMU_CLOCK_VIRTUAL, 3070 virtio_net_announce_timer, n); 3071 if (n->announce_timer.round) { 3072 timer_mod(n->announce_timer.tm, 3073 qemu_clock_get_ms(n->announce_timer.type)); 3074 } else { 3075 qemu_announce_timer_del(&n->announce_timer, false); 3076 } 3077 } 3078 3079 if (n->rss_data.enabled) { 3080 n->rss_data.enabled_software_rss = n->rss_data.populate_hash; 3081 if (!n->rss_data.populate_hash) { 3082 if (!virtio_net_attach_epbf_rss(n)) { 3083 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 3084 warn_report("Can't post-load eBPF RSS for vhost"); 3085 } else { 3086 warn_report("Can't post-load eBPF RSS - " 3087 "fallback to software RSS"); 3088 n->rss_data.enabled_software_rss = true; 3089 } 3090 } 3091 } 3092 3093 trace_virtio_net_rss_enable(n->rss_data.hash_types, 3094 n->rss_data.indirections_len, 3095 sizeof(n->rss_data.key)); 3096 } else { 3097 trace_virtio_net_rss_disable(); 3098 } 3099 return 0; 3100 } 3101 3102 static int virtio_net_post_load_virtio(VirtIODevice *vdev) 3103 { 3104 VirtIONet *n = VIRTIO_NET(vdev); 3105 /* 3106 * The actual needed state is now in saved_guest_offloads, 3107 * see virtio_net_post_load_device for detail. 3108 * Restore it back and apply the desired offloads. 3109 */ 3110 n->curr_guest_offloads = n->saved_guest_offloads; 3111 if (peer_has_vnet_hdr(n)) { 3112 virtio_net_apply_guest_offloads(n); 3113 } 3114 3115 return 0; 3116 } 3117 3118 /* tx_waiting field of a VirtIONetQueue */ 3119 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = { 3120 .name = "virtio-net-queue-tx_waiting", 3121 .fields = (const VMStateField[]) { 3122 VMSTATE_UINT32(tx_waiting, VirtIONetQueue), 3123 VMSTATE_END_OF_LIST() 3124 }, 3125 }; 3126 3127 static bool max_queue_pairs_gt_1(void *opaque, int version_id) 3128 { 3129 return VIRTIO_NET(opaque)->max_queue_pairs > 1; 3130 } 3131 3132 static bool has_ctrl_guest_offloads(void *opaque, int version_id) 3133 { 3134 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque), 3135 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 3136 } 3137 3138 static bool mac_table_fits(void *opaque, int version_id) 3139 { 3140 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES; 3141 } 3142 3143 static bool mac_table_doesnt_fit(void *opaque, int version_id) 3144 { 3145 return !mac_table_fits(opaque, version_id); 3146 } 3147 3148 /* This temporary type is shared by all the WITH_TMP methods 3149 * although only some fields are used by each. 3150 */ 3151 struct VirtIONetMigTmp { 3152 VirtIONet *parent; 3153 VirtIONetQueue *vqs_1; 3154 uint16_t curr_queue_pairs_1; 3155 uint8_t has_ufo; 3156 uint32_t has_vnet_hdr; 3157 }; 3158 3159 /* The 2nd and subsequent tx_waiting flags are loaded later than 3160 * the 1st entry in the queue_pairs and only if there's more than one 3161 * entry. We use the tmp mechanism to calculate a temporary 3162 * pointer and count and also validate the count. 3163 */ 3164 3165 static int virtio_net_tx_waiting_pre_save(void *opaque) 3166 { 3167 struct VirtIONetMigTmp *tmp = opaque; 3168 3169 tmp->vqs_1 = tmp->parent->vqs + 1; 3170 tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1; 3171 if (tmp->parent->curr_queue_pairs == 0) { 3172 tmp->curr_queue_pairs_1 = 0; 3173 } 3174 3175 return 0; 3176 } 3177 3178 static int virtio_net_tx_waiting_pre_load(void *opaque) 3179 { 3180 struct VirtIONetMigTmp *tmp = opaque; 3181 3182 /* Reuse the pointer setup from save */ 3183 virtio_net_tx_waiting_pre_save(opaque); 3184 3185 if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) { 3186 error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x", 3187 tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs); 3188 3189 return -EINVAL; 3190 } 3191 3192 return 0; /* all good */ 3193 } 3194 3195 static const VMStateDescription vmstate_virtio_net_tx_waiting = { 3196 .name = "virtio-net-tx_waiting", 3197 .pre_load = virtio_net_tx_waiting_pre_load, 3198 .pre_save = virtio_net_tx_waiting_pre_save, 3199 .fields = (const VMStateField[]) { 3200 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp, 3201 curr_queue_pairs_1, 3202 vmstate_virtio_net_queue_tx_waiting, 3203 struct VirtIONetQueue), 3204 VMSTATE_END_OF_LIST() 3205 }, 3206 }; 3207 3208 /* the 'has_ufo' flag is just tested; if the incoming stream has the 3209 * flag set we need to check that we have it 3210 */ 3211 static int virtio_net_ufo_post_load(void *opaque, int version_id) 3212 { 3213 struct VirtIONetMigTmp *tmp = opaque; 3214 3215 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) { 3216 error_report("virtio-net: saved image requires TUN_F_UFO support"); 3217 return -EINVAL; 3218 } 3219 3220 return 0; 3221 } 3222 3223 static int virtio_net_ufo_pre_save(void *opaque) 3224 { 3225 struct VirtIONetMigTmp *tmp = opaque; 3226 3227 tmp->has_ufo = tmp->parent->has_ufo; 3228 3229 return 0; 3230 } 3231 3232 static const VMStateDescription vmstate_virtio_net_has_ufo = { 3233 .name = "virtio-net-ufo", 3234 .post_load = virtio_net_ufo_post_load, 3235 .pre_save = virtio_net_ufo_pre_save, 3236 .fields = (const VMStateField[]) { 3237 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp), 3238 VMSTATE_END_OF_LIST() 3239 }, 3240 }; 3241 3242 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the 3243 * flag set we need to check that we have it 3244 */ 3245 static int virtio_net_vnet_post_load(void *opaque, int version_id) 3246 { 3247 struct VirtIONetMigTmp *tmp = opaque; 3248 3249 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) { 3250 error_report("virtio-net: saved image requires vnet_hdr=on"); 3251 return -EINVAL; 3252 } 3253 3254 return 0; 3255 } 3256 3257 static int virtio_net_vnet_pre_save(void *opaque) 3258 { 3259 struct VirtIONetMigTmp *tmp = opaque; 3260 3261 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr; 3262 3263 return 0; 3264 } 3265 3266 static const VMStateDescription vmstate_virtio_net_has_vnet = { 3267 .name = "virtio-net-vnet", 3268 .post_load = virtio_net_vnet_post_load, 3269 .pre_save = virtio_net_vnet_pre_save, 3270 .fields = (const VMStateField[]) { 3271 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp), 3272 VMSTATE_END_OF_LIST() 3273 }, 3274 }; 3275 3276 static bool virtio_net_rss_needed(void *opaque) 3277 { 3278 return VIRTIO_NET(opaque)->rss_data.enabled; 3279 } 3280 3281 static const VMStateDescription vmstate_virtio_net_rss = { 3282 .name = "virtio-net-device/rss", 3283 .version_id = 1, 3284 .minimum_version_id = 1, 3285 .needed = virtio_net_rss_needed, 3286 .fields = (const VMStateField[]) { 3287 VMSTATE_BOOL(rss_data.enabled, VirtIONet), 3288 VMSTATE_BOOL(rss_data.redirect, VirtIONet), 3289 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet), 3290 VMSTATE_UINT32(rss_data.hash_types, VirtIONet), 3291 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet), 3292 VMSTATE_UINT16(rss_data.default_queue, VirtIONet), 3293 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet, 3294 VIRTIO_NET_RSS_MAX_KEY_SIZE), 3295 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet, 3296 rss_data.indirections_len, 0, 3297 vmstate_info_uint16, uint16_t), 3298 VMSTATE_END_OF_LIST() 3299 }, 3300 }; 3301 3302 static const VMStateDescription vmstate_virtio_net_device = { 3303 .name = "virtio-net-device", 3304 .version_id = VIRTIO_NET_VM_VERSION, 3305 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3306 .post_load = virtio_net_post_load_device, 3307 .fields = (const VMStateField[]) { 3308 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN), 3309 VMSTATE_STRUCT_POINTER(vqs, VirtIONet, 3310 vmstate_virtio_net_queue_tx_waiting, 3311 VirtIONetQueue), 3312 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet), 3313 VMSTATE_UINT16(status, VirtIONet), 3314 VMSTATE_UINT8(promisc, VirtIONet), 3315 VMSTATE_UINT8(allmulti, VirtIONet), 3316 VMSTATE_UINT32(mac_table.in_use, VirtIONet), 3317 3318 /* Guarded pair: If it fits we load it, else we throw it away 3319 * - can happen if source has a larger MAC table.; post-load 3320 * sets flags in this case. 3321 */ 3322 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet, 3323 0, mac_table_fits, mac_table.in_use, 3324 ETH_ALEN), 3325 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0, 3326 mac_table.in_use, ETH_ALEN), 3327 3328 /* Note: This is an array of uint32's that's always been saved as a 3329 * buffer; hold onto your endiannesses; it's actually used as a bitmap 3330 * but based on the uint. 3331 */ 3332 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3), 3333 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3334 vmstate_virtio_net_has_vnet), 3335 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet), 3336 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet), 3337 VMSTATE_UINT8(alluni, VirtIONet), 3338 VMSTATE_UINT8(nomulti, VirtIONet), 3339 VMSTATE_UINT8(nouni, VirtIONet), 3340 VMSTATE_UINT8(nobcast, VirtIONet), 3341 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3342 vmstate_virtio_net_has_ufo), 3343 VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0, 3344 vmstate_info_uint16_equal, uint16_t), 3345 VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1), 3346 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3347 vmstate_virtio_net_tx_waiting), 3348 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet, 3349 has_ctrl_guest_offloads), 3350 VMSTATE_END_OF_LIST() 3351 }, 3352 .subsections = (const VMStateDescription * const []) { 3353 &vmstate_virtio_net_rss, 3354 NULL 3355 } 3356 }; 3357 3358 static NetClientInfo net_virtio_info = { 3359 .type = NET_CLIENT_DRIVER_NIC, 3360 .size = sizeof(NICState), 3361 .can_receive = virtio_net_can_receive, 3362 .receive = virtio_net_receive, 3363 .link_status_changed = virtio_net_set_link_status, 3364 .query_rx_filter = virtio_net_query_rxfilter, 3365 .announce = virtio_net_announce, 3366 }; 3367 3368 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) 3369 { 3370 VirtIONet *n = VIRTIO_NET(vdev); 3371 NetClientState *nc; 3372 assert(n->vhost_started); 3373 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { 3374 /* Must guard against invalid features and bogus queue index 3375 * from being set by malicious guest, or penetrated through 3376 * buggy migration stream. 3377 */ 3378 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3379 qemu_log_mask(LOG_GUEST_ERROR, 3380 "%s: bogus vq index ignored\n", __func__); 3381 return false; 3382 } 3383 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3384 } else { 3385 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3386 } 3387 /* 3388 * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 3389 * as the macro of configure interrupt's IDX, If this driver does not 3390 * support, the function will return false 3391 */ 3392 3393 if (idx == VIRTIO_CONFIG_IRQ_IDX) { 3394 return vhost_net_config_pending(get_vhost_net(nc->peer)); 3395 } 3396 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); 3397 } 3398 3399 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, 3400 bool mask) 3401 { 3402 VirtIONet *n = VIRTIO_NET(vdev); 3403 NetClientState *nc; 3404 assert(n->vhost_started); 3405 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { 3406 /* Must guard against invalid features and bogus queue index 3407 * from being set by malicious guest, or penetrated through 3408 * buggy migration stream. 3409 */ 3410 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3411 qemu_log_mask(LOG_GUEST_ERROR, 3412 "%s: bogus vq index ignored\n", __func__); 3413 return; 3414 } 3415 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3416 } else { 3417 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3418 } 3419 /* 3420 *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 3421 * as the macro of configure interrupt's IDX, If this driver does not 3422 * support, the function will return 3423 */ 3424 3425 if (idx == VIRTIO_CONFIG_IRQ_IDX) { 3426 vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask); 3427 return; 3428 } 3429 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask); 3430 } 3431 3432 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) 3433 { 3434 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC); 3435 3436 n->config_size = virtio_get_config_size(&cfg_size_params, host_features); 3437 } 3438 3439 void virtio_net_set_netclient_name(VirtIONet *n, const char *name, 3440 const char *type) 3441 { 3442 /* 3443 * The name can be NULL, the netclient name will be type.x. 3444 */ 3445 assert(type != NULL); 3446 3447 g_free(n->netclient_name); 3448 g_free(n->netclient_type); 3449 n->netclient_name = g_strdup(name); 3450 n->netclient_type = g_strdup(type); 3451 } 3452 3453 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev) 3454 { 3455 HotplugHandler *hotplug_ctrl; 3456 PCIDevice *pci_dev; 3457 Error *err = NULL; 3458 3459 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3460 if (hotplug_ctrl) { 3461 pci_dev = PCI_DEVICE(dev); 3462 pci_dev->partially_hotplugged = true; 3463 hotplug_handler_unplug_request(hotplug_ctrl, dev, &err); 3464 if (err) { 3465 error_report_err(err); 3466 return false; 3467 } 3468 } else { 3469 return false; 3470 } 3471 return true; 3472 } 3473 3474 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev, 3475 Error **errp) 3476 { 3477 Error *err = NULL; 3478 HotplugHandler *hotplug_ctrl; 3479 PCIDevice *pdev = PCI_DEVICE(dev); 3480 BusState *primary_bus; 3481 3482 if (!pdev->partially_hotplugged) { 3483 return true; 3484 } 3485 primary_bus = dev->parent_bus; 3486 if (!primary_bus) { 3487 error_setg(errp, "virtio_net: couldn't find primary bus"); 3488 return false; 3489 } 3490 qdev_set_parent_bus(dev, primary_bus, &error_abort); 3491 qatomic_set(&n->failover_primary_hidden, false); 3492 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3493 if (hotplug_ctrl) { 3494 hotplug_handler_pre_plug(hotplug_ctrl, dev, &err); 3495 if (err) { 3496 goto out; 3497 } 3498 hotplug_handler_plug(hotplug_ctrl, dev, &err); 3499 } 3500 pdev->partially_hotplugged = false; 3501 3502 out: 3503 error_propagate(errp, err); 3504 return !err; 3505 } 3506 3507 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s) 3508 { 3509 bool should_be_hidden; 3510 Error *err = NULL; 3511 DeviceState *dev = failover_find_primary_device(n); 3512 3513 if (!dev) { 3514 return; 3515 } 3516 3517 should_be_hidden = qatomic_read(&n->failover_primary_hidden); 3518 3519 if (migration_in_setup(s) && !should_be_hidden) { 3520 if (failover_unplug_primary(n, dev)) { 3521 vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev); 3522 qapi_event_send_unplug_primary(dev->id); 3523 qatomic_set(&n->failover_primary_hidden, true); 3524 } else { 3525 warn_report("couldn't unplug primary device"); 3526 } 3527 } else if (migration_has_failed(s)) { 3528 /* We already unplugged the device let's plug it back */ 3529 if (!failover_replug_primary(n, dev, &err)) { 3530 if (err) { 3531 error_report_err(err); 3532 } 3533 } 3534 } 3535 } 3536 3537 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data) 3538 { 3539 MigrationState *s = data; 3540 VirtIONet *n = container_of(notifier, VirtIONet, migration_state); 3541 virtio_net_handle_migration_primary(n, s); 3542 } 3543 3544 static bool failover_hide_primary_device(DeviceListener *listener, 3545 const QDict *device_opts, 3546 bool from_json, 3547 Error **errp) 3548 { 3549 VirtIONet *n = container_of(listener, VirtIONet, primary_listener); 3550 const char *standby_id; 3551 3552 if (!device_opts) { 3553 return false; 3554 } 3555 3556 if (!qdict_haskey(device_opts, "failover_pair_id")) { 3557 return false; 3558 } 3559 3560 if (!qdict_haskey(device_opts, "id")) { 3561 error_setg(errp, "Device with failover_pair_id needs to have id"); 3562 return false; 3563 } 3564 3565 standby_id = qdict_get_str(device_opts, "failover_pair_id"); 3566 if (g_strcmp0(standby_id, n->netclient_name) != 0) { 3567 return false; 3568 } 3569 3570 /* 3571 * The hide helper can be called several times for a given device. 3572 * Check there is only one primary for a virtio-net device but 3573 * don't duplicate the qdict several times if it's called for the same 3574 * device. 3575 */ 3576 if (n->primary_opts) { 3577 const char *old, *new; 3578 /* devices with failover_pair_id always have an id */ 3579 old = qdict_get_str(n->primary_opts, "id"); 3580 new = qdict_get_str(device_opts, "id"); 3581 if (strcmp(old, new) != 0) { 3582 error_setg(errp, "Cannot attach more than one primary device to " 3583 "'%s': '%s' and '%s'", n->netclient_name, old, new); 3584 return false; 3585 } 3586 } else { 3587 n->primary_opts = qdict_clone_shallow(device_opts); 3588 n->primary_opts_from_json = from_json; 3589 } 3590 3591 /* failover_primary_hidden is set during feature negotiation */ 3592 return qatomic_read(&n->failover_primary_hidden); 3593 } 3594 3595 static void virtio_net_device_realize(DeviceState *dev, Error **errp) 3596 { 3597 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3598 VirtIONet *n = VIRTIO_NET(dev); 3599 NetClientState *nc; 3600 int i; 3601 3602 if (n->net_conf.mtu) { 3603 n->host_features |= (1ULL << VIRTIO_NET_F_MTU); 3604 } 3605 3606 if (n->net_conf.duplex_str) { 3607 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) { 3608 n->net_conf.duplex = DUPLEX_HALF; 3609 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) { 3610 n->net_conf.duplex = DUPLEX_FULL; 3611 } else { 3612 error_setg(errp, "'duplex' must be 'half' or 'full'"); 3613 return; 3614 } 3615 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3616 } else { 3617 n->net_conf.duplex = DUPLEX_UNKNOWN; 3618 } 3619 3620 if (n->net_conf.speed < SPEED_UNKNOWN) { 3621 error_setg(errp, "'speed' must be between 0 and INT_MAX"); 3622 return; 3623 } 3624 if (n->net_conf.speed >= 0) { 3625 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3626 } 3627 3628 if (n->failover) { 3629 n->primary_listener.hide_device = failover_hide_primary_device; 3630 qatomic_set(&n->failover_primary_hidden, true); 3631 device_listener_register(&n->primary_listener); 3632 migration_add_notifier(&n->migration_state, 3633 virtio_net_migration_state_notifier); 3634 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY); 3635 } 3636 3637 virtio_net_set_config_size(n, n->host_features); 3638 virtio_init(vdev, VIRTIO_ID_NET, n->config_size); 3639 3640 /* 3641 * We set a lower limit on RX queue size to what it always was. 3642 * Guests that want a smaller ring can always resize it without 3643 * help from us (using virtio 1 and up). 3644 */ 3645 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || 3646 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || 3647 !is_power_of_2(n->net_conf.rx_queue_size)) { 3648 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " 3649 "must be a power of 2 between %d and %d.", 3650 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, 3651 VIRTQUEUE_MAX_SIZE); 3652 virtio_cleanup(vdev); 3653 return; 3654 } 3655 3656 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || 3657 n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) || 3658 !is_power_of_2(n->net_conf.tx_queue_size)) { 3659 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " 3660 "must be a power of 2 between %d and %d", 3661 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, 3662 virtio_net_max_tx_queue_size(n)); 3663 virtio_cleanup(vdev); 3664 return; 3665 } 3666 3667 n->max_ncs = MAX(n->nic_conf.peers.queues, 1); 3668 3669 /* 3670 * Figure out the datapath queue pairs since the backend could 3671 * provide control queue via peers as well. 3672 */ 3673 if (n->nic_conf.peers.queues) { 3674 for (i = 0; i < n->max_ncs; i++) { 3675 if (n->nic_conf.peers.ncs[i]->is_datapath) { 3676 ++n->max_queue_pairs; 3677 } 3678 } 3679 } 3680 n->max_queue_pairs = MAX(n->max_queue_pairs, 1); 3681 3682 if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) { 3683 error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), " 3684 "must be a positive integer less than %d.", 3685 n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2); 3686 virtio_cleanup(vdev); 3687 return; 3688 } 3689 n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs); 3690 n->curr_queue_pairs = 1; 3691 n->tx_timeout = n->net_conf.txtimer; 3692 3693 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer") 3694 && strcmp(n->net_conf.tx, "bh")) { 3695 warn_report("virtio-net: " 3696 "Unknown option tx=%s, valid options: \"timer\" \"bh\"", 3697 n->net_conf.tx); 3698 error_printf("Defaulting to \"bh\""); 3699 } 3700 3701 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), 3702 n->net_conf.tx_queue_size); 3703 3704 for (i = 0; i < n->max_queue_pairs; i++) { 3705 virtio_net_add_queue(n, i); 3706 } 3707 3708 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 3709 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr); 3710 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac)); 3711 n->status = VIRTIO_NET_S_LINK_UP; 3712 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 3713 QEMU_CLOCK_VIRTUAL, 3714 virtio_net_announce_timer, n); 3715 n->announce_timer.round = 0; 3716 3717 if (n->netclient_type) { 3718 /* 3719 * Happen when virtio_net_set_netclient_name has been called. 3720 */ 3721 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3722 n->netclient_type, n->netclient_name, 3723 &dev->mem_reentrancy_guard, n); 3724 } else { 3725 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3726 object_get_typename(OBJECT(dev)), dev->id, 3727 &dev->mem_reentrancy_guard, n); 3728 } 3729 3730 for (i = 0; i < n->max_queue_pairs; i++) { 3731 n->nic->ncs[i].do_not_pad = true; 3732 } 3733 3734 peer_test_vnet_hdr(n); 3735 if (peer_has_vnet_hdr(n)) { 3736 for (i = 0; i < n->max_queue_pairs; i++) { 3737 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true); 3738 } 3739 n->host_hdr_len = sizeof(struct virtio_net_hdr); 3740 } else { 3741 n->host_hdr_len = 0; 3742 } 3743 3744 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a); 3745 3746 n->vqs[0].tx_waiting = 0; 3747 n->tx_burst = n->net_conf.txburst; 3748 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0); 3749 n->promisc = 1; /* for compatibility */ 3750 3751 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 3752 3753 n->vlans = g_malloc0(MAX_VLAN >> 3); 3754 3755 nc = qemu_get_queue(n->nic); 3756 nc->rxfilter_notify_enabled = 1; 3757 3758 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 3759 struct virtio_net_config netcfg = {}; 3760 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN); 3761 vhost_net_set_config(get_vhost_net(nc->peer), 3762 (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND); 3763 } 3764 QTAILQ_INIT(&n->rsc_chains); 3765 n->qdev = dev; 3766 3767 net_rx_pkt_init(&n->rx_pkt); 3768 3769 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3770 virtio_net_load_ebpf(n); 3771 } 3772 } 3773 3774 static void virtio_net_device_unrealize(DeviceState *dev) 3775 { 3776 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3777 VirtIONet *n = VIRTIO_NET(dev); 3778 int i, max_queue_pairs; 3779 3780 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3781 virtio_net_unload_ebpf(n); 3782 } 3783 3784 /* This will stop vhost backend if appropriate. */ 3785 virtio_net_set_status(vdev, 0); 3786 3787 g_free(n->netclient_name); 3788 n->netclient_name = NULL; 3789 g_free(n->netclient_type); 3790 n->netclient_type = NULL; 3791 3792 g_free(n->mac_table.macs); 3793 g_free(n->vlans); 3794 3795 if (n->failover) { 3796 qobject_unref(n->primary_opts); 3797 device_listener_unregister(&n->primary_listener); 3798 migration_remove_notifier(&n->migration_state); 3799 } else { 3800 assert(n->primary_opts == NULL); 3801 } 3802 3803 max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 3804 for (i = 0; i < max_queue_pairs; i++) { 3805 virtio_net_del_queue(n, i); 3806 } 3807 /* delete also control vq */ 3808 virtio_del_queue(vdev, max_queue_pairs * 2); 3809 qemu_announce_timer_del(&n->announce_timer, false); 3810 g_free(n->vqs); 3811 qemu_del_nic(n->nic); 3812 virtio_net_rsc_cleanup(n); 3813 g_free(n->rss_data.indirections_table); 3814 net_rx_pkt_uninit(n->rx_pkt); 3815 virtio_cleanup(vdev); 3816 } 3817 3818 static void virtio_net_instance_init(Object *obj) 3819 { 3820 VirtIONet *n = VIRTIO_NET(obj); 3821 3822 /* 3823 * The default config_size is sizeof(struct virtio_net_config). 3824 * Can be overridden with virtio_net_set_config_size. 3825 */ 3826 n->config_size = sizeof(struct virtio_net_config); 3827 device_add_bootindex_property(obj, &n->nic_conf.bootindex, 3828 "bootindex", "/ethernet-phy@0", 3829 DEVICE(n)); 3830 3831 ebpf_rss_init(&n->ebpf_rss); 3832 } 3833 3834 static int virtio_net_pre_save(void *opaque) 3835 { 3836 VirtIONet *n = opaque; 3837 3838 /* At this point, backend must be stopped, otherwise 3839 * it might keep writing to memory. */ 3840 assert(!n->vhost_started); 3841 3842 return 0; 3843 } 3844 3845 static bool primary_unplug_pending(void *opaque) 3846 { 3847 DeviceState *dev = opaque; 3848 DeviceState *primary; 3849 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3850 VirtIONet *n = VIRTIO_NET(vdev); 3851 3852 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 3853 return false; 3854 } 3855 primary = failover_find_primary_device(n); 3856 return primary ? primary->pending_deleted_event : false; 3857 } 3858 3859 static bool dev_unplug_pending(void *opaque) 3860 { 3861 DeviceState *dev = opaque; 3862 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 3863 3864 return vdc->primary_unplug_pending(dev); 3865 } 3866 3867 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev) 3868 { 3869 VirtIONet *n = VIRTIO_NET(vdev); 3870 NetClientState *nc = qemu_get_queue(n->nic); 3871 struct vhost_net *net = get_vhost_net(nc->peer); 3872 return &net->dev; 3873 } 3874 3875 static const VMStateDescription vmstate_virtio_net = { 3876 .name = "virtio-net", 3877 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3878 .version_id = VIRTIO_NET_VM_VERSION, 3879 .fields = (const VMStateField[]) { 3880 VMSTATE_VIRTIO_DEVICE, 3881 VMSTATE_END_OF_LIST() 3882 }, 3883 .pre_save = virtio_net_pre_save, 3884 .dev_unplug_pending = dev_unplug_pending, 3885 }; 3886 3887 static Property virtio_net_properties[] = { 3888 DEFINE_PROP_BIT64("csum", VirtIONet, host_features, 3889 VIRTIO_NET_F_CSUM, true), 3890 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features, 3891 VIRTIO_NET_F_GUEST_CSUM, true), 3892 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), 3893 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features, 3894 VIRTIO_NET_F_GUEST_TSO4, true), 3895 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features, 3896 VIRTIO_NET_F_GUEST_TSO6, true), 3897 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features, 3898 VIRTIO_NET_F_GUEST_ECN, true), 3899 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features, 3900 VIRTIO_NET_F_GUEST_UFO, true), 3901 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features, 3902 VIRTIO_NET_F_GUEST_ANNOUNCE, true), 3903 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features, 3904 VIRTIO_NET_F_HOST_TSO4, true), 3905 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features, 3906 VIRTIO_NET_F_HOST_TSO6, true), 3907 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features, 3908 VIRTIO_NET_F_HOST_ECN, true), 3909 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features, 3910 VIRTIO_NET_F_HOST_UFO, true), 3911 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features, 3912 VIRTIO_NET_F_MRG_RXBUF, true), 3913 DEFINE_PROP_BIT64("status", VirtIONet, host_features, 3914 VIRTIO_NET_F_STATUS, true), 3915 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features, 3916 VIRTIO_NET_F_CTRL_VQ, true), 3917 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features, 3918 VIRTIO_NET_F_CTRL_RX, true), 3919 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features, 3920 VIRTIO_NET_F_CTRL_VLAN, true), 3921 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features, 3922 VIRTIO_NET_F_CTRL_RX_EXTRA, true), 3923 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features, 3924 VIRTIO_NET_F_CTRL_MAC_ADDR, true), 3925 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features, 3926 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), 3927 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), 3928 DEFINE_PROP_BIT64("rss", VirtIONet, host_features, 3929 VIRTIO_NET_F_RSS, false), 3930 DEFINE_PROP_BIT64("hash", VirtIONet, host_features, 3931 VIRTIO_NET_F_HASH_REPORT, false), 3932 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features, 3933 VIRTIO_NET_F_RSC_EXT, false), 3934 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout, 3935 VIRTIO_NET_RSC_DEFAULT_INTERVAL), 3936 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf), 3937 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer, 3938 TX_TIMER_INTERVAL), 3939 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), 3940 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), 3941 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, 3942 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), 3943 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, 3944 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), 3945 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), 3946 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, 3947 true), 3948 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN), 3949 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str), 3950 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false), 3951 DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features, 3952 VIRTIO_NET_F_GUEST_USO4, true), 3953 DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features, 3954 VIRTIO_NET_F_GUEST_USO6, true), 3955 DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features, 3956 VIRTIO_NET_F_HOST_USO, true), 3957 DEFINE_PROP_END_OF_LIST(), 3958 }; 3959 3960 static void virtio_net_class_init(ObjectClass *klass, void *data) 3961 { 3962 DeviceClass *dc = DEVICE_CLASS(klass); 3963 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 3964 3965 device_class_set_props(dc, virtio_net_properties); 3966 dc->vmsd = &vmstate_virtio_net; 3967 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 3968 vdc->realize = virtio_net_device_realize; 3969 vdc->unrealize = virtio_net_device_unrealize; 3970 vdc->get_config = virtio_net_get_config; 3971 vdc->set_config = virtio_net_set_config; 3972 vdc->get_features = virtio_net_get_features; 3973 vdc->set_features = virtio_net_set_features; 3974 vdc->bad_features = virtio_net_bad_features; 3975 vdc->reset = virtio_net_reset; 3976 vdc->queue_reset = virtio_net_queue_reset; 3977 vdc->queue_enable = virtio_net_queue_enable; 3978 vdc->set_status = virtio_net_set_status; 3979 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; 3980 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; 3981 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO); 3982 vdc->post_load = virtio_net_post_load_virtio; 3983 vdc->vmsd = &vmstate_virtio_net_device; 3984 vdc->primary_unplug_pending = primary_unplug_pending; 3985 vdc->get_vhost = virtio_net_get_vhost; 3986 vdc->toggle_device_iotlb = vhost_toggle_device_iotlb; 3987 } 3988 3989 static const TypeInfo virtio_net_info = { 3990 .name = TYPE_VIRTIO_NET, 3991 .parent = TYPE_VIRTIO_DEVICE, 3992 .instance_size = sizeof(VirtIONet), 3993 .instance_init = virtio_net_instance_init, 3994 .class_init = virtio_net_class_init, 3995 }; 3996 3997 static void virtio_register_types(void) 3998 { 3999 type_register_static(&virtio_net_info); 4000 } 4001 4002 type_init(virtio_register_types) 4003