1 /* 2 * Virtio Network Device 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qemu/atomic.h" 16 #include "qemu/iov.h" 17 #include "qemu/log.h" 18 #include "qemu/main-loop.h" 19 #include "qemu/module.h" 20 #include "hw/virtio/virtio.h" 21 #include "net/net.h" 22 #include "net/checksum.h" 23 #include "net/tap.h" 24 #include "qemu/error-report.h" 25 #include "qemu/timer.h" 26 #include "qemu/option.h" 27 #include "qemu/option_int.h" 28 #include "qemu/config-file.h" 29 #include "qapi/qmp/qdict.h" 30 #include "hw/virtio/virtio-net.h" 31 #include "net/vhost_net.h" 32 #include "net/announce.h" 33 #include "hw/virtio/virtio-bus.h" 34 #include "qapi/error.h" 35 #include "qapi/qapi-events-net.h" 36 #include "hw/qdev-properties.h" 37 #include "qapi/qapi-types-migration.h" 38 #include "qapi/qapi-events-migration.h" 39 #include "hw/virtio/virtio-access.h" 40 #include "migration/misc.h" 41 #include "standard-headers/linux/ethtool.h" 42 #include "sysemu/sysemu.h" 43 #include "sysemu/replay.h" 44 #include "trace.h" 45 #include "monitor/qdev.h" 46 #include "monitor/monitor.h" 47 #include "hw/pci/pci_device.h" 48 #include "net_rx_pkt.h" 49 #include "hw/virtio/vhost.h" 50 #include "sysemu/qtest.h" 51 52 #define VIRTIO_NET_VM_VERSION 11 53 54 /* previously fixed value */ 55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 57 58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */ 59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 61 62 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ 63 64 #define VIRTIO_NET_TCP_FLAG 0x3F 65 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000 66 67 /* IPv4 max payload, 16 bits in the header */ 68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header)) 69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535 70 71 /* header length value in ip header without option */ 72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5 73 74 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ 75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD 76 77 /* Purge coalesced packets timer interval, This value affects the performance 78 a lot, and should be tuned carefully, '300000'(300us) is the recommended 79 value to pass the WHQL test, '50000' can gain 2x netperf throughput with 80 tso/gso/gro 'off'. */ 81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000 82 83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \ 84 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \ 85 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \ 86 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \ 87 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \ 88 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \ 89 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \ 90 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \ 91 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) 92 93 static const VirtIOFeature feature_sizes[] = { 94 {.flags = 1ULL << VIRTIO_NET_F_MAC, 95 .end = endof(struct virtio_net_config, mac)}, 96 {.flags = 1ULL << VIRTIO_NET_F_STATUS, 97 .end = endof(struct virtio_net_config, status)}, 98 {.flags = 1ULL << VIRTIO_NET_F_MQ, 99 .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, 100 {.flags = 1ULL << VIRTIO_NET_F_MTU, 101 .end = endof(struct virtio_net_config, mtu)}, 102 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, 103 .end = endof(struct virtio_net_config, duplex)}, 104 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT), 105 .end = endof(struct virtio_net_config, supported_hash_types)}, 106 {} 107 }; 108 109 static const VirtIOConfigSizeParams cfg_size_params = { 110 .min_size = endof(struct virtio_net_config, mac), 111 .max_size = sizeof(struct virtio_net_config), 112 .feature_sizes = feature_sizes 113 }; 114 115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc) 116 { 117 VirtIONet *n = qemu_get_nic_opaque(nc); 118 119 return &n->vqs[nc->queue_index]; 120 } 121 122 static int vq2q(int queue_index) 123 { 124 return queue_index / 2; 125 } 126 127 static void flush_or_purge_queued_packets(NetClientState *nc) 128 { 129 if (!nc->peer) { 130 return; 131 } 132 133 qemu_flush_or_purge_queued_packets(nc->peer, true); 134 assert(!virtio_net_get_subqueue(nc)->async_tx.elem); 135 } 136 137 /* TODO 138 * - we could suppress RX interrupt if we were so inclined. 139 */ 140 141 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) 142 { 143 VirtIONet *n = VIRTIO_NET(vdev); 144 struct virtio_net_config netcfg; 145 NetClientState *nc = qemu_get_queue(n->nic); 146 static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } }; 147 148 int ret = 0; 149 memset(&netcfg, 0 , sizeof(struct virtio_net_config)); 150 virtio_stw_p(vdev, &netcfg.status, n->status); 151 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs); 152 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); 153 memcpy(netcfg.mac, n->mac, ETH_ALEN); 154 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); 155 netcfg.duplex = n->net_conf.duplex; 156 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE; 157 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length, 158 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ? 159 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1); 160 virtio_stl_p(vdev, &netcfg.supported_hash_types, 161 VIRTIO_NET_RSS_SUPPORTED_HASHES); 162 memcpy(config, &netcfg, n->config_size); 163 164 /* 165 * Is this VDPA? No peer means not VDPA: there's no way to 166 * disconnect/reconnect a VDPA peer. 167 */ 168 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 169 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg, 170 n->config_size); 171 if (ret == -1) { 172 return; 173 } 174 175 /* 176 * Some NIC/kernel combinations present 0 as the mac address. As that 177 * is not a legal address, try to proceed with the address from the 178 * QEMU command line in the hope that the address has been configured 179 * correctly elsewhere - just not reported by the device. 180 */ 181 if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) { 182 info_report("Zero hardware mac address detected. Ignoring."); 183 memcpy(netcfg.mac, n->mac, ETH_ALEN); 184 } 185 186 netcfg.status |= virtio_tswap16(vdev, 187 n->status & VIRTIO_NET_S_ANNOUNCE); 188 memcpy(config, &netcfg, n->config_size); 189 } 190 } 191 192 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) 193 { 194 VirtIONet *n = VIRTIO_NET(vdev); 195 struct virtio_net_config netcfg = {}; 196 NetClientState *nc = qemu_get_queue(n->nic); 197 198 memcpy(&netcfg, config, n->config_size); 199 200 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) && 201 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) && 202 memcmp(netcfg.mac, n->mac, ETH_ALEN)) { 203 memcpy(n->mac, netcfg.mac, ETH_ALEN); 204 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 205 } 206 207 /* 208 * Is this VDPA? No peer means not VDPA: there's no way to 209 * disconnect/reconnect a VDPA peer. 210 */ 211 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 212 vhost_net_set_config(get_vhost_net(nc->peer), 213 (uint8_t *)&netcfg, 0, n->config_size, 214 VHOST_SET_CONFIG_TYPE_FRONTEND); 215 } 216 } 217 218 static bool virtio_net_started(VirtIONet *n, uint8_t status) 219 { 220 VirtIODevice *vdev = VIRTIO_DEVICE(n); 221 return (status & VIRTIO_CONFIG_S_DRIVER_OK) && 222 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running; 223 } 224 225 static void virtio_net_announce_notify(VirtIONet *net) 226 { 227 VirtIODevice *vdev = VIRTIO_DEVICE(net); 228 trace_virtio_net_announce_notify(); 229 230 net->status |= VIRTIO_NET_S_ANNOUNCE; 231 virtio_notify_config(vdev); 232 } 233 234 static void virtio_net_announce_timer(void *opaque) 235 { 236 VirtIONet *n = opaque; 237 trace_virtio_net_announce_timer(n->announce_timer.round); 238 239 n->announce_timer.round--; 240 virtio_net_announce_notify(n); 241 } 242 243 static void virtio_net_announce(NetClientState *nc) 244 { 245 VirtIONet *n = qemu_get_nic_opaque(nc); 246 VirtIODevice *vdev = VIRTIO_DEVICE(n); 247 248 /* 249 * Make sure the virtio migration announcement timer isn't running 250 * If it is, let it trigger announcement so that we do not cause 251 * confusion. 252 */ 253 if (n->announce_timer.round) { 254 return; 255 } 256 257 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 258 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 259 virtio_net_announce_notify(n); 260 } 261 } 262 263 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) 264 { 265 VirtIODevice *vdev = VIRTIO_DEVICE(n); 266 NetClientState *nc = qemu_get_queue(n->nic); 267 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 268 int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? 269 n->max_ncs - n->max_queue_pairs : 0; 270 271 if (!get_vhost_net(nc->peer)) { 272 return; 273 } 274 275 if ((virtio_net_started(n, status) && !nc->peer->link_down) == 276 !!n->vhost_started) { 277 return; 278 } 279 if (!n->vhost_started) { 280 int r, i; 281 282 if (n->needs_vnet_hdr_swap) { 283 error_report("backend does not support %s vnet headers; " 284 "falling back on userspace virtio", 285 virtio_is_big_endian(vdev) ? "BE" : "LE"); 286 return; 287 } 288 289 /* Any packets outstanding? Purge them to avoid touching rings 290 * when vhost is running. 291 */ 292 for (i = 0; i < queue_pairs; i++) { 293 NetClientState *qnc = qemu_get_subqueue(n->nic, i); 294 295 /* Purge both directions: TX and RX. */ 296 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc); 297 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer); 298 } 299 300 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) { 301 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu); 302 if (r < 0) { 303 error_report("%uBytes MTU not supported by the backend", 304 n->net_conf.mtu); 305 306 return; 307 } 308 } 309 310 n->vhost_started = 1; 311 r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq); 312 if (r < 0) { 313 error_report("unable to start vhost net: %d: " 314 "falling back on userspace virtio", -r); 315 n->vhost_started = 0; 316 } 317 } else { 318 vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq); 319 n->vhost_started = 0; 320 } 321 } 322 323 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev, 324 NetClientState *peer, 325 bool enable) 326 { 327 if (virtio_is_big_endian(vdev)) { 328 return qemu_set_vnet_be(peer, enable); 329 } else { 330 return qemu_set_vnet_le(peer, enable); 331 } 332 } 333 334 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs, 335 int queue_pairs, bool enable) 336 { 337 int i; 338 339 for (i = 0; i < queue_pairs; i++) { 340 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 && 341 enable) { 342 while (--i >= 0) { 343 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false); 344 } 345 346 return true; 347 } 348 } 349 350 return false; 351 } 352 353 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status) 354 { 355 VirtIODevice *vdev = VIRTIO_DEVICE(n); 356 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 357 358 if (virtio_net_started(n, status)) { 359 /* Before using the device, we tell the network backend about the 360 * endianness to use when parsing vnet headers. If the backend 361 * can't do it, we fallback onto fixing the headers in the core 362 * virtio-net code. 363 */ 364 n->needs_vnet_hdr_swap = n->has_vnet_hdr && 365 virtio_net_set_vnet_endian(vdev, n->nic->ncs, 366 queue_pairs, true); 367 } else if (virtio_net_started(n, vdev->status)) { 368 /* After using the device, we need to reset the network backend to 369 * the default (guest native endianness), otherwise the guest may 370 * lose network connectivity if it is rebooted into a different 371 * endianness. 372 */ 373 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false); 374 } 375 } 376 377 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq) 378 { 379 unsigned int dropped = virtqueue_drop_all(vq); 380 if (dropped) { 381 virtio_notify(vdev, vq); 382 } 383 } 384 385 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) 386 { 387 VirtIONet *n = VIRTIO_NET(vdev); 388 VirtIONetQueue *q; 389 int i; 390 uint8_t queue_status; 391 392 virtio_net_vnet_endian_status(n, status); 393 virtio_net_vhost_status(n, status); 394 395 for (i = 0; i < n->max_queue_pairs; i++) { 396 NetClientState *ncs = qemu_get_subqueue(n->nic, i); 397 bool queue_started; 398 q = &n->vqs[i]; 399 400 if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) { 401 queue_status = 0; 402 } else { 403 queue_status = status; 404 } 405 queue_started = 406 virtio_net_started(n, queue_status) && !n->vhost_started; 407 408 if (queue_started) { 409 qemu_flush_queued_packets(ncs); 410 } 411 412 if (!q->tx_waiting) { 413 continue; 414 } 415 416 if (queue_started) { 417 if (q->tx_timer) { 418 timer_mod(q->tx_timer, 419 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 420 } else { 421 replay_bh_schedule_event(q->tx_bh); 422 } 423 } else { 424 if (q->tx_timer) { 425 timer_del(q->tx_timer); 426 } else { 427 qemu_bh_cancel(q->tx_bh); 428 } 429 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 && 430 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) && 431 vdev->vm_running) { 432 /* if tx is waiting we are likely have some packets in tx queue 433 * and disabled notification */ 434 q->tx_waiting = 0; 435 virtio_queue_set_notification(q->tx_vq, 1); 436 virtio_net_drop_tx_queue_data(vdev, q->tx_vq); 437 } 438 } 439 } 440 } 441 442 static void virtio_net_set_link_status(NetClientState *nc) 443 { 444 VirtIONet *n = qemu_get_nic_opaque(nc); 445 VirtIODevice *vdev = VIRTIO_DEVICE(n); 446 uint16_t old_status = n->status; 447 448 if (nc->link_down) 449 n->status &= ~VIRTIO_NET_S_LINK_UP; 450 else 451 n->status |= VIRTIO_NET_S_LINK_UP; 452 453 if (n->status != old_status) 454 virtio_notify_config(vdev); 455 456 virtio_net_set_status(vdev, vdev->status); 457 } 458 459 static void rxfilter_notify(NetClientState *nc) 460 { 461 VirtIONet *n = qemu_get_nic_opaque(nc); 462 463 if (nc->rxfilter_notify_enabled) { 464 char *path = object_get_canonical_path(OBJECT(n->qdev)); 465 qapi_event_send_nic_rx_filter_changed(n->netclient_name, path); 466 g_free(path); 467 468 /* disable event notification to avoid events flooding */ 469 nc->rxfilter_notify_enabled = 0; 470 } 471 } 472 473 static intList *get_vlan_table(VirtIONet *n) 474 { 475 intList *list; 476 int i, j; 477 478 list = NULL; 479 for (i = 0; i < MAX_VLAN >> 5; i++) { 480 for (j = 0; n->vlans[i] && j <= 0x1f; j++) { 481 if (n->vlans[i] & (1U << j)) { 482 QAPI_LIST_PREPEND(list, (i << 5) + j); 483 } 484 } 485 } 486 487 return list; 488 } 489 490 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc) 491 { 492 VirtIONet *n = qemu_get_nic_opaque(nc); 493 VirtIODevice *vdev = VIRTIO_DEVICE(n); 494 RxFilterInfo *info; 495 strList *str_list; 496 int i; 497 498 info = g_malloc0(sizeof(*info)); 499 info->name = g_strdup(nc->name); 500 info->promiscuous = n->promisc; 501 502 if (n->nouni) { 503 info->unicast = RX_STATE_NONE; 504 } else if (n->alluni) { 505 info->unicast = RX_STATE_ALL; 506 } else { 507 info->unicast = RX_STATE_NORMAL; 508 } 509 510 if (n->nomulti) { 511 info->multicast = RX_STATE_NONE; 512 } else if (n->allmulti) { 513 info->multicast = RX_STATE_ALL; 514 } else { 515 info->multicast = RX_STATE_NORMAL; 516 } 517 518 info->broadcast_allowed = n->nobcast; 519 info->multicast_overflow = n->mac_table.multi_overflow; 520 info->unicast_overflow = n->mac_table.uni_overflow; 521 522 info->main_mac = qemu_mac_strdup_printf(n->mac); 523 524 str_list = NULL; 525 for (i = 0; i < n->mac_table.first_multi; i++) { 526 QAPI_LIST_PREPEND(str_list, 527 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 528 } 529 info->unicast_table = str_list; 530 531 str_list = NULL; 532 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 533 QAPI_LIST_PREPEND(str_list, 534 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 535 } 536 info->multicast_table = str_list; 537 info->vlan_table = get_vlan_table(n); 538 539 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) { 540 info->vlan = RX_STATE_ALL; 541 } else if (!info->vlan_table) { 542 info->vlan = RX_STATE_NONE; 543 } else { 544 info->vlan = RX_STATE_NORMAL; 545 } 546 547 /* enable event notification after query */ 548 nc->rxfilter_notify_enabled = 1; 549 550 return info; 551 } 552 553 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index) 554 { 555 VirtIONet *n = VIRTIO_NET(vdev); 556 NetClientState *nc; 557 558 /* validate queue_index and skip for cvq */ 559 if (queue_index >= n->max_queue_pairs * 2) { 560 return; 561 } 562 563 nc = qemu_get_subqueue(n->nic, vq2q(queue_index)); 564 565 if (!nc->peer) { 566 return; 567 } 568 569 if (get_vhost_net(nc->peer) && 570 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) { 571 vhost_net_virtqueue_reset(vdev, nc, queue_index); 572 } 573 574 flush_or_purge_queued_packets(nc); 575 } 576 577 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index) 578 { 579 VirtIONet *n = VIRTIO_NET(vdev); 580 NetClientState *nc; 581 int r; 582 583 /* validate queue_index and skip for cvq */ 584 if (queue_index >= n->max_queue_pairs * 2) { 585 return; 586 } 587 588 nc = qemu_get_subqueue(n->nic, vq2q(queue_index)); 589 590 if (!nc->peer || !vdev->vhost_started) { 591 return; 592 } 593 594 if (get_vhost_net(nc->peer) && 595 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) { 596 r = vhost_net_virtqueue_restart(vdev, nc, queue_index); 597 if (r < 0) { 598 error_report("unable to restart vhost net virtqueue: %d, " 599 "when resetting the queue", queue_index); 600 } 601 } 602 } 603 604 static void peer_test_vnet_hdr(VirtIONet *n) 605 { 606 NetClientState *nc = qemu_get_queue(n->nic); 607 if (!nc->peer) { 608 return; 609 } 610 611 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer); 612 } 613 614 static int peer_has_vnet_hdr(VirtIONet *n) 615 { 616 return n->has_vnet_hdr; 617 } 618 619 static int peer_has_ufo(VirtIONet *n) 620 { 621 if (!peer_has_vnet_hdr(n)) 622 return 0; 623 624 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer); 625 626 return n->has_ufo; 627 } 628 629 static int peer_has_uso(VirtIONet *n) 630 { 631 if (!peer_has_vnet_hdr(n)) { 632 return 0; 633 } 634 635 return qemu_has_uso(qemu_get_queue(n->nic)->peer); 636 } 637 638 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, 639 int version_1, int hash_report) 640 { 641 int i; 642 NetClientState *nc; 643 644 n->mergeable_rx_bufs = mergeable_rx_bufs; 645 646 if (version_1) { 647 n->guest_hdr_len = hash_report ? 648 sizeof(struct virtio_net_hdr_v1_hash) : 649 sizeof(struct virtio_net_hdr_mrg_rxbuf); 650 n->rss_data.populate_hash = !!hash_report; 651 } else { 652 n->guest_hdr_len = n->mergeable_rx_bufs ? 653 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 654 sizeof(struct virtio_net_hdr); 655 n->rss_data.populate_hash = false; 656 } 657 658 for (i = 0; i < n->max_queue_pairs; i++) { 659 nc = qemu_get_subqueue(n->nic, i); 660 661 if (peer_has_vnet_hdr(n) && 662 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) { 663 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len); 664 n->host_hdr_len = n->guest_hdr_len; 665 } 666 } 667 } 668 669 static int virtio_net_max_tx_queue_size(VirtIONet *n) 670 { 671 NetClientState *peer = n->nic_conf.peers.ncs[0]; 672 673 /* 674 * Backends other than vhost-user or vhost-vdpa don't support max queue 675 * size. 676 */ 677 if (!peer) { 678 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 679 } 680 681 switch(peer->info->type) { 682 case NET_CLIENT_DRIVER_VHOST_USER: 683 case NET_CLIENT_DRIVER_VHOST_VDPA: 684 return VIRTQUEUE_MAX_SIZE; 685 default: 686 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 687 }; 688 } 689 690 static int peer_attach(VirtIONet *n, int index) 691 { 692 NetClientState *nc = qemu_get_subqueue(n->nic, index); 693 694 if (!nc->peer) { 695 return 0; 696 } 697 698 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 699 vhost_set_vring_enable(nc->peer, 1); 700 } 701 702 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 703 return 0; 704 } 705 706 if (n->max_queue_pairs == 1) { 707 return 0; 708 } 709 710 return tap_enable(nc->peer); 711 } 712 713 static int peer_detach(VirtIONet *n, int index) 714 { 715 NetClientState *nc = qemu_get_subqueue(n->nic, index); 716 717 if (!nc->peer) { 718 return 0; 719 } 720 721 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 722 vhost_set_vring_enable(nc->peer, 0); 723 } 724 725 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 726 return 0; 727 } 728 729 return tap_disable(nc->peer); 730 } 731 732 static void virtio_net_set_queue_pairs(VirtIONet *n) 733 { 734 int i; 735 int r; 736 737 if (n->nic->peer_deleted) { 738 return; 739 } 740 741 for (i = 0; i < n->max_queue_pairs; i++) { 742 if (i < n->curr_queue_pairs) { 743 r = peer_attach(n, i); 744 assert(!r); 745 } else { 746 r = peer_detach(n, i); 747 assert(!r); 748 } 749 } 750 } 751 752 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue); 753 754 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, 755 Error **errp) 756 { 757 VirtIONet *n = VIRTIO_NET(vdev); 758 NetClientState *nc = qemu_get_queue(n->nic); 759 760 /* Firstly sync all virtio-net possible supported features */ 761 features |= n->host_features; 762 763 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 764 765 if (!peer_has_vnet_hdr(n)) { 766 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); 767 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); 768 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); 769 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); 770 771 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); 772 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); 773 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); 774 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); 775 776 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); 777 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); 778 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); 779 780 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); 781 } 782 783 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { 784 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); 785 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); 786 } 787 788 if (!peer_has_uso(n)) { 789 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); 790 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); 791 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); 792 } 793 794 if (!get_vhost_net(nc->peer)) { 795 return features; 796 } 797 798 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 799 virtio_clear_feature(&features, VIRTIO_NET_F_RSS); 800 } 801 features = vhost_net_get_features(get_vhost_net(nc->peer), features); 802 vdev->backend_features = features; 803 804 if (n->mtu_bypass_backend && 805 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { 806 features |= (1ULL << VIRTIO_NET_F_MTU); 807 } 808 809 /* 810 * Since GUEST_ANNOUNCE is emulated the feature bit could be set without 811 * enabled. This happens in the vDPA case. 812 * 813 * Make sure the feature set is not incoherent, as the driver could refuse 814 * to start. 815 * 816 * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes, 817 * helping guest to notify the new location with vDPA devices that does not 818 * support it. 819 */ 820 if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) { 821 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE); 822 } 823 824 return features; 825 } 826 827 static uint64_t virtio_net_bad_features(VirtIODevice *vdev) 828 { 829 uint64_t features = 0; 830 831 /* Linux kernel 2.6.25. It understood MAC (as everyone must), 832 * but also these: */ 833 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 834 virtio_add_feature(&features, VIRTIO_NET_F_CSUM); 835 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4); 836 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6); 837 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN); 838 839 return features; 840 } 841 842 static void virtio_net_apply_guest_offloads(VirtIONet *n) 843 { 844 qemu_set_offload(qemu_get_queue(n->nic)->peer, 845 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)), 846 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)), 847 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)), 848 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)), 849 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)), 850 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)), 851 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6))); 852 } 853 854 static uint64_t virtio_net_guest_offloads_by_features(uint64_t features) 855 { 856 static const uint64_t guest_offloads_mask = 857 (1ULL << VIRTIO_NET_F_GUEST_CSUM) | 858 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | 859 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | 860 (1ULL << VIRTIO_NET_F_GUEST_ECN) | 861 (1ULL << VIRTIO_NET_F_GUEST_UFO) | 862 (1ULL << VIRTIO_NET_F_GUEST_USO4) | 863 (1ULL << VIRTIO_NET_F_GUEST_USO6); 864 865 return guest_offloads_mask & features; 866 } 867 868 uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n) 869 { 870 VirtIODevice *vdev = VIRTIO_DEVICE(n); 871 return virtio_net_guest_offloads_by_features(vdev->guest_features); 872 } 873 874 typedef struct { 875 VirtIONet *n; 876 DeviceState *dev; 877 } FailoverDevice; 878 879 /** 880 * Set the failover primary device 881 * 882 * @opaque: FailoverId to setup 883 * @opts: opts for device we are handling 884 * @errp: returns an error if this function fails 885 */ 886 static int failover_set_primary(DeviceState *dev, void *opaque) 887 { 888 FailoverDevice *fdev = opaque; 889 PCIDevice *pci_dev = (PCIDevice *) 890 object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE); 891 892 if (!pci_dev) { 893 return 0; 894 } 895 896 if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) { 897 fdev->dev = dev; 898 return 1; 899 } 900 901 return 0; 902 } 903 904 /** 905 * Find the primary device for this failover virtio-net 906 * 907 * @n: VirtIONet device 908 * @errp: returns an error if this function fails 909 */ 910 static DeviceState *failover_find_primary_device(VirtIONet *n) 911 { 912 FailoverDevice fdev = { 913 .n = n, 914 }; 915 916 qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL, 917 NULL, NULL, &fdev); 918 return fdev.dev; 919 } 920 921 static void failover_add_primary(VirtIONet *n, Error **errp) 922 { 923 Error *err = NULL; 924 DeviceState *dev = failover_find_primary_device(n); 925 926 if (dev) { 927 return; 928 } 929 930 if (!n->primary_opts) { 931 error_setg(errp, "Primary device not found"); 932 error_append_hint(errp, "Virtio-net failover will not work. Make " 933 "sure primary device has parameter" 934 " failover_pair_id=%s\n", n->netclient_name); 935 return; 936 } 937 938 dev = qdev_device_add_from_qdict(n->primary_opts, 939 n->primary_opts_from_json, 940 &err); 941 if (err) { 942 qobject_unref(n->primary_opts); 943 n->primary_opts = NULL; 944 } else { 945 object_unref(OBJECT(dev)); 946 } 947 error_propagate(errp, err); 948 } 949 950 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) 951 { 952 VirtIONet *n = VIRTIO_NET(vdev); 953 Error *err = NULL; 954 int i; 955 956 if (n->mtu_bypass_backend && 957 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) { 958 features &= ~(1ULL << VIRTIO_NET_F_MTU); 959 } 960 961 virtio_net_set_multiqueue(n, 962 virtio_has_feature(features, VIRTIO_NET_F_RSS) || 963 virtio_has_feature(features, VIRTIO_NET_F_MQ)); 964 965 virtio_net_set_mrg_rx_bufs(n, 966 virtio_has_feature(features, 967 VIRTIO_NET_F_MRG_RXBUF), 968 virtio_has_feature(features, 969 VIRTIO_F_VERSION_1), 970 virtio_has_feature(features, 971 VIRTIO_NET_F_HASH_REPORT)); 972 973 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 974 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4); 975 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 976 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6); 977 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS); 978 979 if (n->has_vnet_hdr) { 980 n->curr_guest_offloads = 981 virtio_net_guest_offloads_by_features(features); 982 virtio_net_apply_guest_offloads(n); 983 } 984 985 for (i = 0; i < n->max_queue_pairs; i++) { 986 NetClientState *nc = qemu_get_subqueue(n->nic, i); 987 988 if (!get_vhost_net(nc->peer)) { 989 continue; 990 } 991 vhost_net_ack_features(get_vhost_net(nc->peer), features); 992 993 /* 994 * keep acked_features in NetVhostUserState up-to-date so it 995 * can't miss any features configured by guest virtio driver. 996 */ 997 vhost_net_save_acked_features(nc->peer); 998 } 999 1000 if (!virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { 1001 memset(n->vlans, 0xff, MAX_VLAN >> 3); 1002 } 1003 1004 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) { 1005 qapi_event_send_failover_negotiated(n->netclient_name); 1006 qatomic_set(&n->failover_primary_hidden, false); 1007 failover_add_primary(n, &err); 1008 if (err) { 1009 if (!qtest_enabled()) { 1010 warn_report_err(err); 1011 } else { 1012 error_free(err); 1013 } 1014 } 1015 } 1016 } 1017 1018 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd, 1019 struct iovec *iov, unsigned int iov_cnt) 1020 { 1021 uint8_t on; 1022 size_t s; 1023 NetClientState *nc = qemu_get_queue(n->nic); 1024 1025 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on)); 1026 if (s != sizeof(on)) { 1027 return VIRTIO_NET_ERR; 1028 } 1029 1030 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) { 1031 n->promisc = on; 1032 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) { 1033 n->allmulti = on; 1034 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) { 1035 n->alluni = on; 1036 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) { 1037 n->nomulti = on; 1038 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) { 1039 n->nouni = on; 1040 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) { 1041 n->nobcast = on; 1042 } else { 1043 return VIRTIO_NET_ERR; 1044 } 1045 1046 rxfilter_notify(nc); 1047 1048 return VIRTIO_NET_OK; 1049 } 1050 1051 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd, 1052 struct iovec *iov, unsigned int iov_cnt) 1053 { 1054 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1055 uint64_t offloads; 1056 size_t s; 1057 1058 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 1059 return VIRTIO_NET_ERR; 1060 } 1061 1062 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads)); 1063 if (s != sizeof(offloads)) { 1064 return VIRTIO_NET_ERR; 1065 } 1066 1067 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) { 1068 uint64_t supported_offloads; 1069 1070 offloads = virtio_ldq_p(vdev, &offloads); 1071 1072 if (!n->has_vnet_hdr) { 1073 return VIRTIO_NET_ERR; 1074 } 1075 1076 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1077 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4); 1078 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1079 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6); 1080 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT); 1081 1082 supported_offloads = virtio_net_supported_guest_offloads(n); 1083 if (offloads & ~supported_offloads) { 1084 return VIRTIO_NET_ERR; 1085 } 1086 1087 n->curr_guest_offloads = offloads; 1088 virtio_net_apply_guest_offloads(n); 1089 1090 return VIRTIO_NET_OK; 1091 } else { 1092 return VIRTIO_NET_ERR; 1093 } 1094 } 1095 1096 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd, 1097 struct iovec *iov, unsigned int iov_cnt) 1098 { 1099 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1100 struct virtio_net_ctrl_mac mac_data; 1101 size_t s; 1102 NetClientState *nc = qemu_get_queue(n->nic); 1103 1104 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) { 1105 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) { 1106 return VIRTIO_NET_ERR; 1107 } 1108 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac)); 1109 assert(s == sizeof(n->mac)); 1110 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 1111 rxfilter_notify(nc); 1112 1113 return VIRTIO_NET_OK; 1114 } 1115 1116 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) { 1117 return VIRTIO_NET_ERR; 1118 } 1119 1120 int in_use = 0; 1121 int first_multi = 0; 1122 uint8_t uni_overflow = 0; 1123 uint8_t multi_overflow = 0; 1124 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 1125 1126 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1127 sizeof(mac_data.entries)); 1128 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1129 if (s != sizeof(mac_data.entries)) { 1130 goto error; 1131 } 1132 iov_discard_front(&iov, &iov_cnt, s); 1133 1134 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) { 1135 goto error; 1136 } 1137 1138 if (mac_data.entries <= MAC_TABLE_ENTRIES) { 1139 s = iov_to_buf(iov, iov_cnt, 0, macs, 1140 mac_data.entries * ETH_ALEN); 1141 if (s != mac_data.entries * ETH_ALEN) { 1142 goto error; 1143 } 1144 in_use += mac_data.entries; 1145 } else { 1146 uni_overflow = 1; 1147 } 1148 1149 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN); 1150 1151 first_multi = in_use; 1152 1153 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1154 sizeof(mac_data.entries)); 1155 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1156 if (s != sizeof(mac_data.entries)) { 1157 goto error; 1158 } 1159 1160 iov_discard_front(&iov, &iov_cnt, s); 1161 1162 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) { 1163 goto error; 1164 } 1165 1166 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) { 1167 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN], 1168 mac_data.entries * ETH_ALEN); 1169 if (s != mac_data.entries * ETH_ALEN) { 1170 goto error; 1171 } 1172 in_use += mac_data.entries; 1173 } else { 1174 multi_overflow = 1; 1175 } 1176 1177 n->mac_table.in_use = in_use; 1178 n->mac_table.first_multi = first_multi; 1179 n->mac_table.uni_overflow = uni_overflow; 1180 n->mac_table.multi_overflow = multi_overflow; 1181 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN); 1182 g_free(macs); 1183 rxfilter_notify(nc); 1184 1185 return VIRTIO_NET_OK; 1186 1187 error: 1188 g_free(macs); 1189 return VIRTIO_NET_ERR; 1190 } 1191 1192 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd, 1193 struct iovec *iov, unsigned int iov_cnt) 1194 { 1195 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1196 uint16_t vid; 1197 size_t s; 1198 NetClientState *nc = qemu_get_queue(n->nic); 1199 1200 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid)); 1201 vid = virtio_lduw_p(vdev, &vid); 1202 if (s != sizeof(vid)) { 1203 return VIRTIO_NET_ERR; 1204 } 1205 1206 if (vid >= MAX_VLAN) 1207 return VIRTIO_NET_ERR; 1208 1209 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD) 1210 n->vlans[vid >> 5] |= (1U << (vid & 0x1f)); 1211 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL) 1212 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f)); 1213 else 1214 return VIRTIO_NET_ERR; 1215 1216 rxfilter_notify(nc); 1217 1218 return VIRTIO_NET_OK; 1219 } 1220 1221 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, 1222 struct iovec *iov, unsigned int iov_cnt) 1223 { 1224 trace_virtio_net_handle_announce(n->announce_timer.round); 1225 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK && 1226 n->status & VIRTIO_NET_S_ANNOUNCE) { 1227 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 1228 if (n->announce_timer.round) { 1229 qemu_announce_timer_step(&n->announce_timer); 1230 } 1231 return VIRTIO_NET_OK; 1232 } else { 1233 return VIRTIO_NET_ERR; 1234 } 1235 } 1236 1237 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd) 1238 { 1239 NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0); 1240 if (nc == NULL || nc->info->set_steering_ebpf == NULL) { 1241 return false; 1242 } 1243 1244 return nc->info->set_steering_ebpf(nc, prog_fd); 1245 } 1246 1247 static void rss_data_to_rss_config(struct VirtioNetRssData *data, 1248 struct EBPFRSSConfig *config) 1249 { 1250 config->redirect = data->redirect; 1251 config->populate_hash = data->populate_hash; 1252 config->hash_types = data->hash_types; 1253 config->indirections_len = data->indirections_len; 1254 config->default_queue = data->default_queue; 1255 } 1256 1257 static bool virtio_net_attach_ebpf_rss(VirtIONet *n) 1258 { 1259 struct EBPFRSSConfig config = {}; 1260 1261 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 1262 return false; 1263 } 1264 1265 rss_data_to_rss_config(&n->rss_data, &config); 1266 1267 if (!ebpf_rss_set_all(&n->ebpf_rss, &config, 1268 n->rss_data.indirections_table, n->rss_data.key, 1269 NULL)) { 1270 return false; 1271 } 1272 1273 if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) { 1274 return false; 1275 } 1276 1277 return true; 1278 } 1279 1280 static void virtio_net_detach_ebpf_rss(VirtIONet *n) 1281 { 1282 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1283 } 1284 1285 static void virtio_net_commit_rss_config(VirtIONet *n) 1286 { 1287 if (n->rss_data.enabled) { 1288 n->rss_data.enabled_software_rss = n->rss_data.populate_hash; 1289 if (n->rss_data.populate_hash) { 1290 virtio_net_detach_ebpf_rss(n); 1291 } else if (!virtio_net_attach_ebpf_rss(n)) { 1292 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 1293 warn_report("Can't load eBPF RSS for vhost"); 1294 } else { 1295 warn_report("Can't load eBPF RSS - fallback to software RSS"); 1296 n->rss_data.enabled_software_rss = true; 1297 } 1298 } 1299 1300 trace_virtio_net_rss_enable(n->rss_data.hash_types, 1301 n->rss_data.indirections_len, 1302 sizeof(n->rss_data.key)); 1303 } else { 1304 virtio_net_detach_ebpf_rss(n); 1305 trace_virtio_net_rss_disable(); 1306 } 1307 } 1308 1309 static void virtio_net_disable_rss(VirtIONet *n) 1310 { 1311 if (!n->rss_data.enabled) { 1312 return; 1313 } 1314 1315 n->rss_data.enabled = false; 1316 virtio_net_commit_rss_config(n); 1317 } 1318 1319 static bool virtio_net_load_ebpf_fds(VirtIONet *n, Error **errp) 1320 { 1321 int fds[EBPF_RSS_MAX_FDS] = { [0 ... EBPF_RSS_MAX_FDS - 1] = -1}; 1322 int ret = true; 1323 int i = 0; 1324 1325 if (n->nr_ebpf_rss_fds != EBPF_RSS_MAX_FDS) { 1326 error_setg(errp, "Expected %d file descriptors but got %d", 1327 EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds); 1328 return false; 1329 } 1330 1331 for (i = 0; i < n->nr_ebpf_rss_fds; i++) { 1332 fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i], errp); 1333 if (fds[i] < 0) { 1334 ret = false; 1335 goto exit; 1336 } 1337 } 1338 1339 ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3], errp); 1340 1341 exit: 1342 if (!ret) { 1343 for (i = 0; i < n->nr_ebpf_rss_fds && fds[i] != -1; i++) { 1344 close(fds[i]); 1345 } 1346 } 1347 1348 return ret; 1349 } 1350 1351 static bool virtio_net_load_ebpf(VirtIONet *n, Error **errp) 1352 { 1353 bool ret = false; 1354 1355 if (virtio_net_attach_ebpf_to_backend(n->nic, -1)) { 1356 if (n->ebpf_rss_fds) { 1357 ret = virtio_net_load_ebpf_fds(n, errp); 1358 } else { 1359 ret = ebpf_rss_load(&n->ebpf_rss, errp); 1360 } 1361 } 1362 1363 return ret; 1364 } 1365 1366 static void virtio_net_unload_ebpf(VirtIONet *n) 1367 { 1368 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1369 ebpf_rss_unload(&n->ebpf_rss); 1370 } 1371 1372 static uint16_t virtio_net_handle_rss(VirtIONet *n, 1373 struct iovec *iov, 1374 unsigned int iov_cnt, 1375 bool do_rss) 1376 { 1377 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1378 struct virtio_net_rss_config cfg; 1379 size_t s, offset = 0, size_get; 1380 uint16_t queue_pairs, i; 1381 struct { 1382 uint16_t us; 1383 uint8_t b; 1384 } QEMU_PACKED temp; 1385 const char *err_msg = ""; 1386 uint32_t err_value = 0; 1387 1388 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) { 1389 err_msg = "RSS is not negotiated"; 1390 goto error; 1391 } 1392 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) { 1393 err_msg = "Hash report is not negotiated"; 1394 goto error; 1395 } 1396 size_get = offsetof(struct virtio_net_rss_config, indirection_table); 1397 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get); 1398 if (s != size_get) { 1399 err_msg = "Short command buffer"; 1400 err_value = (uint32_t)s; 1401 goto error; 1402 } 1403 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types); 1404 n->rss_data.indirections_len = 1405 virtio_lduw_p(vdev, &cfg.indirection_table_mask); 1406 n->rss_data.indirections_len++; 1407 if (!do_rss) { 1408 n->rss_data.indirections_len = 1; 1409 } 1410 if (!is_power_of_2(n->rss_data.indirections_len)) { 1411 err_msg = "Invalid size of indirection table"; 1412 err_value = n->rss_data.indirections_len; 1413 goto error; 1414 } 1415 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) { 1416 err_msg = "Too large indirection table"; 1417 err_value = n->rss_data.indirections_len; 1418 goto error; 1419 } 1420 n->rss_data.default_queue = do_rss ? 1421 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0; 1422 if (n->rss_data.default_queue >= n->max_queue_pairs) { 1423 err_msg = "Invalid default queue"; 1424 err_value = n->rss_data.default_queue; 1425 goto error; 1426 } 1427 offset += size_get; 1428 size_get = sizeof(uint16_t) * n->rss_data.indirections_len; 1429 g_free(n->rss_data.indirections_table); 1430 n->rss_data.indirections_table = g_malloc(size_get); 1431 if (!n->rss_data.indirections_table) { 1432 err_msg = "Can't allocate indirections table"; 1433 err_value = n->rss_data.indirections_len; 1434 goto error; 1435 } 1436 s = iov_to_buf(iov, iov_cnt, offset, 1437 n->rss_data.indirections_table, size_get); 1438 if (s != size_get) { 1439 err_msg = "Short indirection table buffer"; 1440 err_value = (uint32_t)s; 1441 goto error; 1442 } 1443 for (i = 0; i < n->rss_data.indirections_len; ++i) { 1444 uint16_t val = n->rss_data.indirections_table[i]; 1445 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val); 1446 } 1447 offset += size_get; 1448 size_get = sizeof(temp); 1449 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get); 1450 if (s != size_get) { 1451 err_msg = "Can't get queue_pairs"; 1452 err_value = (uint32_t)s; 1453 goto error; 1454 } 1455 queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs; 1456 if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) { 1457 err_msg = "Invalid number of queue_pairs"; 1458 err_value = queue_pairs; 1459 goto error; 1460 } 1461 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) { 1462 err_msg = "Invalid key size"; 1463 err_value = temp.b; 1464 goto error; 1465 } 1466 if (!temp.b && n->rss_data.hash_types) { 1467 err_msg = "No key provided"; 1468 err_value = 0; 1469 goto error; 1470 } 1471 if (!temp.b && !n->rss_data.hash_types) { 1472 virtio_net_disable_rss(n); 1473 return queue_pairs; 1474 } 1475 offset += size_get; 1476 size_get = temp.b; 1477 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get); 1478 if (s != size_get) { 1479 err_msg = "Can get key buffer"; 1480 err_value = (uint32_t)s; 1481 goto error; 1482 } 1483 n->rss_data.enabled = true; 1484 virtio_net_commit_rss_config(n); 1485 return queue_pairs; 1486 error: 1487 trace_virtio_net_rss_error(err_msg, err_value); 1488 virtio_net_disable_rss(n); 1489 return 0; 1490 } 1491 1492 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, 1493 struct iovec *iov, unsigned int iov_cnt) 1494 { 1495 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1496 uint16_t queue_pairs; 1497 NetClientState *nc = qemu_get_queue(n->nic); 1498 1499 virtio_net_disable_rss(n); 1500 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { 1501 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false); 1502 return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR; 1503 } 1504 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) { 1505 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true); 1506 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { 1507 struct virtio_net_ctrl_mq mq; 1508 size_t s; 1509 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) { 1510 return VIRTIO_NET_ERR; 1511 } 1512 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq)); 1513 if (s != sizeof(mq)) { 1514 return VIRTIO_NET_ERR; 1515 } 1516 queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs); 1517 1518 } else { 1519 return VIRTIO_NET_ERR; 1520 } 1521 1522 if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 1523 queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 1524 queue_pairs > n->max_queue_pairs || 1525 !n->multiqueue) { 1526 return VIRTIO_NET_ERR; 1527 } 1528 1529 n->curr_queue_pairs = queue_pairs; 1530 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 1531 /* 1532 * Avoid updating the backend for a vdpa device: We're only interested 1533 * in updating the device model queues. 1534 */ 1535 return VIRTIO_NET_OK; 1536 } 1537 /* stop the backend before changing the number of queue_pairs to avoid handling a 1538 * disabled queue */ 1539 virtio_net_set_status(vdev, vdev->status); 1540 virtio_net_set_queue_pairs(n); 1541 1542 return VIRTIO_NET_OK; 1543 } 1544 1545 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, 1546 const struct iovec *in_sg, unsigned in_num, 1547 const struct iovec *out_sg, 1548 unsigned out_num) 1549 { 1550 VirtIONet *n = VIRTIO_NET(vdev); 1551 struct virtio_net_ctrl_hdr ctrl; 1552 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1553 size_t s; 1554 struct iovec *iov, *iov2; 1555 1556 if (iov_size(in_sg, in_num) < sizeof(status) || 1557 iov_size(out_sg, out_num) < sizeof(ctrl)) { 1558 virtio_error(vdev, "virtio-net ctrl missing headers"); 1559 return 0; 1560 } 1561 1562 iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num); 1563 s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl)); 1564 iov_discard_front(&iov, &out_num, sizeof(ctrl)); 1565 if (s != sizeof(ctrl)) { 1566 status = VIRTIO_NET_ERR; 1567 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { 1568 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num); 1569 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { 1570 status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num); 1571 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { 1572 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num); 1573 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { 1574 status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num); 1575 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { 1576 status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num); 1577 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { 1578 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num); 1579 } 1580 1581 s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status)); 1582 assert(s == sizeof(status)); 1583 1584 g_free(iov2); 1585 return sizeof(status); 1586 } 1587 1588 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) 1589 { 1590 VirtQueueElement *elem; 1591 1592 for (;;) { 1593 size_t written; 1594 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 1595 if (!elem) { 1596 break; 1597 } 1598 1599 written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num, 1600 elem->out_sg, elem->out_num); 1601 if (written > 0) { 1602 virtqueue_push(vq, elem, written); 1603 virtio_notify(vdev, vq); 1604 g_free(elem); 1605 } else { 1606 virtqueue_detach_element(vq, elem, 0); 1607 g_free(elem); 1608 break; 1609 } 1610 } 1611 } 1612 1613 /* RX */ 1614 1615 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) 1616 { 1617 VirtIONet *n = VIRTIO_NET(vdev); 1618 int queue_index = vq2q(virtio_get_queue_index(vq)); 1619 1620 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index)); 1621 } 1622 1623 static bool virtio_net_can_receive(NetClientState *nc) 1624 { 1625 VirtIONet *n = qemu_get_nic_opaque(nc); 1626 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1627 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1628 1629 if (!vdev->vm_running) { 1630 return false; 1631 } 1632 1633 if (nc->queue_index >= n->curr_queue_pairs) { 1634 return false; 1635 } 1636 1637 if (!virtio_queue_ready(q->rx_vq) || 1638 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 1639 return false; 1640 } 1641 1642 return true; 1643 } 1644 1645 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) 1646 { 1647 int opaque; 1648 unsigned int in_bytes; 1649 VirtIONet *n = q->n; 1650 1651 while (virtio_queue_empty(q->rx_vq) || n->mergeable_rx_bufs) { 1652 opaque = virtqueue_get_avail_bytes(q->rx_vq, &in_bytes, NULL, 1653 bufsize, 0); 1654 /* Buffer is enough, disable notifiaction */ 1655 if (bufsize <= in_bytes) { 1656 break; 1657 } 1658 1659 if (virtio_queue_enable_notification_and_check(q->rx_vq, opaque)) { 1660 /* Guest has added some buffers, try again */ 1661 continue; 1662 } else { 1663 return 0; 1664 } 1665 } 1666 1667 virtio_queue_set_notification(q->rx_vq, 0); 1668 1669 return 1; 1670 } 1671 1672 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr) 1673 { 1674 virtio_tswap16s(vdev, &hdr->hdr_len); 1675 virtio_tswap16s(vdev, &hdr->gso_size); 1676 virtio_tswap16s(vdev, &hdr->csum_start); 1677 virtio_tswap16s(vdev, &hdr->csum_offset); 1678 } 1679 1680 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so 1681 * it never finds out that the packets don't have valid checksums. This 1682 * causes dhclient to get upset. Fedora's carried a patch for ages to 1683 * fix this with Xen but it hasn't appeared in an upstream release of 1684 * dhclient yet. 1685 * 1686 * To avoid breaking existing guests, we catch udp packets and add 1687 * checksums. This is terrible but it's better than hacking the guest 1688 * kernels. 1689 * 1690 * N.B. if we introduce a zero-copy API, this operation is no longer free so 1691 * we should provide a mechanism to disable it to avoid polluting the host 1692 * cache. 1693 */ 1694 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr, 1695 uint8_t *buf, size_t size) 1696 { 1697 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */ 1698 (size > 27 && size < 1500) && /* normal sized MTU */ 1699 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */ 1700 (buf[23] == 17) && /* ip.protocol == UDP */ 1701 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */ 1702 net_checksum_calculate(buf, size, CSUM_UDP); 1703 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; 1704 } 1705 } 1706 1707 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt, 1708 const void *buf, size_t size) 1709 { 1710 if (n->has_vnet_hdr) { 1711 /* FIXME this cast is evil */ 1712 void *wbuf = (void *)buf; 1713 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len, 1714 size - n->host_hdr_len); 1715 1716 if (n->needs_vnet_hdr_swap) { 1717 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf); 1718 } 1719 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr)); 1720 } else { 1721 struct virtio_net_hdr hdr = { 1722 .flags = 0, 1723 .gso_type = VIRTIO_NET_HDR_GSO_NONE 1724 }; 1725 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr); 1726 } 1727 } 1728 1729 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) 1730 { 1731 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; 1732 static const uint8_t vlan[] = {0x81, 0x00}; 1733 uint8_t *ptr = (uint8_t *)buf; 1734 int i; 1735 1736 if (n->promisc) 1737 return 1; 1738 1739 ptr += n->host_hdr_len; 1740 1741 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) { 1742 int vid = lduw_be_p(ptr + 14) & 0xfff; 1743 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f)))) 1744 return 0; 1745 } 1746 1747 if (ptr[0] & 1) { // multicast 1748 if (!memcmp(ptr, bcast, sizeof(bcast))) { 1749 return !n->nobcast; 1750 } else if (n->nomulti) { 1751 return 0; 1752 } else if (n->allmulti || n->mac_table.multi_overflow) { 1753 return 1; 1754 } 1755 1756 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 1757 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1758 return 1; 1759 } 1760 } 1761 } else { // unicast 1762 if (n->nouni) { 1763 return 0; 1764 } else if (n->alluni || n->mac_table.uni_overflow) { 1765 return 1; 1766 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) { 1767 return 1; 1768 } 1769 1770 for (i = 0; i < n->mac_table.first_multi; i++) { 1771 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1772 return 1; 1773 } 1774 } 1775 } 1776 1777 return 0; 1778 } 1779 1780 static uint8_t virtio_net_get_hash_type(bool hasip4, 1781 bool hasip6, 1782 EthL4HdrProto l4hdr_proto, 1783 uint32_t types) 1784 { 1785 if (hasip4) { 1786 switch (l4hdr_proto) { 1787 case ETH_L4_HDR_PROTO_TCP: 1788 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { 1789 return NetPktRssIpV4Tcp; 1790 } 1791 break; 1792 1793 case ETH_L4_HDR_PROTO_UDP: 1794 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { 1795 return NetPktRssIpV4Udp; 1796 } 1797 break; 1798 1799 default: 1800 break; 1801 } 1802 1803 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 1804 return NetPktRssIpV4; 1805 } 1806 } else if (hasip6) { 1807 switch (l4hdr_proto) { 1808 case ETH_L4_HDR_PROTO_TCP: 1809 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) { 1810 return NetPktRssIpV6TcpEx; 1811 } 1812 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { 1813 return NetPktRssIpV6Tcp; 1814 } 1815 break; 1816 1817 case ETH_L4_HDR_PROTO_UDP: 1818 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) { 1819 return NetPktRssIpV6UdpEx; 1820 } 1821 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { 1822 return NetPktRssIpV6Udp; 1823 } 1824 break; 1825 1826 default: 1827 break; 1828 } 1829 1830 if (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) { 1831 return NetPktRssIpV6Ex; 1832 } 1833 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 1834 return NetPktRssIpV6; 1835 } 1836 } 1837 return 0xff; 1838 } 1839 1840 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf, 1841 size_t size, 1842 struct virtio_net_hdr_v1_hash *hdr) 1843 { 1844 VirtIONet *n = qemu_get_nic_opaque(nc); 1845 unsigned int index = nc->queue_index, new_index = index; 1846 struct NetRxPkt *pkt = n->rx_pkt; 1847 uint8_t net_hash_type; 1848 uint32_t hash; 1849 bool hasip4, hasip6; 1850 EthL4HdrProto l4hdr_proto; 1851 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = { 1852 VIRTIO_NET_HASH_REPORT_IPv4, 1853 VIRTIO_NET_HASH_REPORT_TCPv4, 1854 VIRTIO_NET_HASH_REPORT_TCPv6, 1855 VIRTIO_NET_HASH_REPORT_IPv6, 1856 VIRTIO_NET_HASH_REPORT_IPv6_EX, 1857 VIRTIO_NET_HASH_REPORT_TCPv6_EX, 1858 VIRTIO_NET_HASH_REPORT_UDPv4, 1859 VIRTIO_NET_HASH_REPORT_UDPv6, 1860 VIRTIO_NET_HASH_REPORT_UDPv6_EX 1861 }; 1862 struct iovec iov = { 1863 .iov_base = (void *)buf, 1864 .iov_len = size 1865 }; 1866 1867 net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len); 1868 net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); 1869 net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto, 1870 n->rss_data.hash_types); 1871 if (net_hash_type > NetPktRssIpV6UdpEx) { 1872 if (n->rss_data.populate_hash) { 1873 hdr->hash_value = VIRTIO_NET_HASH_REPORT_NONE; 1874 hdr->hash_report = 0; 1875 } 1876 return n->rss_data.redirect ? n->rss_data.default_queue : -1; 1877 } 1878 1879 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key); 1880 1881 if (n->rss_data.populate_hash) { 1882 hdr->hash_value = hash; 1883 hdr->hash_report = reports[net_hash_type]; 1884 } 1885 1886 if (n->rss_data.redirect) { 1887 new_index = hash & (n->rss_data.indirections_len - 1); 1888 new_index = n->rss_data.indirections_table[new_index]; 1889 } 1890 1891 return (index == new_index) ? -1 : new_index; 1892 } 1893 1894 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, 1895 size_t size, bool no_rss) 1896 { 1897 VirtIONet *n = qemu_get_nic_opaque(nc); 1898 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1899 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1900 VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE]; 1901 size_t lens[VIRTQUEUE_MAX_SIZE]; 1902 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; 1903 struct virtio_net_hdr_v1_hash extra_hdr; 1904 unsigned mhdr_cnt = 0; 1905 size_t offset, i, guest_offset, j; 1906 ssize_t err; 1907 1908 if (!virtio_net_can_receive(nc)) { 1909 return -1; 1910 } 1911 1912 if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) { 1913 int index = virtio_net_process_rss(nc, buf, size, &extra_hdr); 1914 if (index >= 0) { 1915 NetClientState *nc2 = 1916 qemu_get_subqueue(n->nic, index % n->curr_queue_pairs); 1917 return virtio_net_receive_rcu(nc2, buf, size, true); 1918 } 1919 } 1920 1921 /* hdr_len refers to the header we supply to the guest */ 1922 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) { 1923 return 0; 1924 } 1925 1926 if (!receive_filter(n, buf, size)) 1927 return size; 1928 1929 offset = i = 0; 1930 1931 while (offset < size) { 1932 VirtQueueElement *elem; 1933 int len, total; 1934 const struct iovec *sg; 1935 1936 total = 0; 1937 1938 if (i == VIRTQUEUE_MAX_SIZE) { 1939 virtio_error(vdev, "virtio-net unexpected long buffer chain"); 1940 err = size; 1941 goto err; 1942 } 1943 1944 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement)); 1945 if (!elem) { 1946 if (i) { 1947 virtio_error(vdev, "virtio-net unexpected empty queue: " 1948 "i %zd mergeable %d offset %zd, size %zd, " 1949 "guest hdr len %zd, host hdr len %zd " 1950 "guest features 0x%" PRIx64, 1951 i, n->mergeable_rx_bufs, offset, size, 1952 n->guest_hdr_len, n->host_hdr_len, 1953 vdev->guest_features); 1954 } 1955 err = -1; 1956 goto err; 1957 } 1958 1959 if (elem->in_num < 1) { 1960 virtio_error(vdev, 1961 "virtio-net receive queue contains no in buffers"); 1962 virtqueue_detach_element(q->rx_vq, elem, 0); 1963 g_free(elem); 1964 err = -1; 1965 goto err; 1966 } 1967 1968 sg = elem->in_sg; 1969 if (i == 0) { 1970 assert(offset == 0); 1971 if (n->mergeable_rx_bufs) { 1972 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg), 1973 sg, elem->in_num, 1974 offsetof(typeof(extra_hdr), hdr.num_buffers), 1975 sizeof(extra_hdr.hdr.num_buffers)); 1976 } 1977 1978 receive_header(n, sg, elem->in_num, buf, size); 1979 if (n->rss_data.populate_hash) { 1980 offset = offsetof(typeof(extra_hdr), hash_value); 1981 iov_from_buf(sg, elem->in_num, offset, 1982 (char *)&extra_hdr + offset, 1983 sizeof(extra_hdr.hash_value) + 1984 sizeof(extra_hdr.hash_report)); 1985 } 1986 offset = n->host_hdr_len; 1987 total += n->guest_hdr_len; 1988 guest_offset = n->guest_hdr_len; 1989 } else { 1990 guest_offset = 0; 1991 } 1992 1993 /* copy in packet. ugh */ 1994 len = iov_from_buf(sg, elem->in_num, guest_offset, 1995 buf + offset, size - offset); 1996 total += len; 1997 offset += len; 1998 /* If buffers can't be merged, at this point we 1999 * must have consumed the complete packet. 2000 * Otherwise, drop it. */ 2001 if (!n->mergeable_rx_bufs && offset < size) { 2002 virtqueue_unpop(q->rx_vq, elem, total); 2003 g_free(elem); 2004 err = size; 2005 goto err; 2006 } 2007 2008 elems[i] = elem; 2009 lens[i] = total; 2010 i++; 2011 } 2012 2013 if (mhdr_cnt) { 2014 virtio_stw_p(vdev, &extra_hdr.hdr.num_buffers, i); 2015 iov_from_buf(mhdr_sg, mhdr_cnt, 2016 0, 2017 &extra_hdr.hdr.num_buffers, 2018 sizeof extra_hdr.hdr.num_buffers); 2019 } 2020 2021 for (j = 0; j < i; j++) { 2022 /* signal other side */ 2023 virtqueue_fill(q->rx_vq, elems[j], lens[j], j); 2024 g_free(elems[j]); 2025 } 2026 2027 virtqueue_flush(q->rx_vq, i); 2028 virtio_notify(vdev, q->rx_vq); 2029 2030 return size; 2031 2032 err: 2033 for (j = 0; j < i; j++) { 2034 virtqueue_detach_element(q->rx_vq, elems[j], lens[j]); 2035 g_free(elems[j]); 2036 } 2037 2038 return err; 2039 } 2040 2041 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf, 2042 size_t size) 2043 { 2044 RCU_READ_LOCK_GUARD(); 2045 2046 return virtio_net_receive_rcu(nc, buf, size, false); 2047 } 2048 2049 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain, 2050 const uint8_t *buf, 2051 VirtioNetRscUnit *unit) 2052 { 2053 uint16_t ip_hdrlen; 2054 struct ip_header *ip; 2055 2056 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len 2057 + sizeof(struct eth_header)); 2058 unit->ip = (void *)ip; 2059 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2; 2060 unit->ip_plen = &ip->ip_len; 2061 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen); 2062 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 2063 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen; 2064 } 2065 2066 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain, 2067 const uint8_t *buf, 2068 VirtioNetRscUnit *unit) 2069 { 2070 struct ip6_header *ip6; 2071 2072 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len 2073 + sizeof(struct eth_header)); 2074 unit->ip = ip6; 2075 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 2076 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) 2077 + sizeof(struct ip6_header)); 2078 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 2079 2080 /* There is a difference between payload length in ipv4 and v6, 2081 ip header is excluded in ipv6 */ 2082 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen; 2083 } 2084 2085 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain, 2086 VirtioNetRscSeg *seg) 2087 { 2088 int ret; 2089 struct virtio_net_hdr_v1 *h; 2090 2091 h = (struct virtio_net_hdr_v1 *)seg->buf; 2092 h->flags = 0; 2093 h->gso_type = VIRTIO_NET_HDR_GSO_NONE; 2094 2095 if (seg->is_coalesced) { 2096 h->rsc.segments = seg->packets; 2097 h->rsc.dup_acks = seg->dup_ack; 2098 h->flags = VIRTIO_NET_HDR_F_RSC_INFO; 2099 if (chain->proto == ETH_P_IP) { 2100 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2101 } else { 2102 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2103 } 2104 } 2105 2106 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); 2107 QTAILQ_REMOVE(&chain->buffers, seg, next); 2108 g_free(seg->buf); 2109 g_free(seg); 2110 2111 return ret; 2112 } 2113 2114 static void virtio_net_rsc_purge(void *opq) 2115 { 2116 VirtioNetRscSeg *seg, *rn; 2117 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq; 2118 2119 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) { 2120 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2121 chain->stat.purge_failed++; 2122 continue; 2123 } 2124 } 2125 2126 chain->stat.timer++; 2127 if (!QTAILQ_EMPTY(&chain->buffers)) { 2128 timer_mod(chain->drain_timer, 2129 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout); 2130 } 2131 } 2132 2133 static void virtio_net_rsc_cleanup(VirtIONet *n) 2134 { 2135 VirtioNetRscChain *chain, *rn_chain; 2136 VirtioNetRscSeg *seg, *rn_seg; 2137 2138 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) { 2139 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) { 2140 QTAILQ_REMOVE(&chain->buffers, seg, next); 2141 g_free(seg->buf); 2142 g_free(seg); 2143 } 2144 2145 timer_free(chain->drain_timer); 2146 QTAILQ_REMOVE(&n->rsc_chains, chain, next); 2147 g_free(chain); 2148 } 2149 } 2150 2151 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain, 2152 NetClientState *nc, 2153 const uint8_t *buf, size_t size) 2154 { 2155 uint16_t hdr_len; 2156 VirtioNetRscSeg *seg; 2157 2158 hdr_len = chain->n->guest_hdr_len; 2159 seg = g_new(VirtioNetRscSeg, 1); 2160 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header) 2161 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD); 2162 memcpy(seg->buf, buf, size); 2163 seg->size = size; 2164 seg->packets = 1; 2165 seg->dup_ack = 0; 2166 seg->is_coalesced = 0; 2167 seg->nc = nc; 2168 2169 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); 2170 chain->stat.cache++; 2171 2172 switch (chain->proto) { 2173 case ETH_P_IP: 2174 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); 2175 break; 2176 case ETH_P_IPV6: 2177 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); 2178 break; 2179 default: 2180 g_assert_not_reached(); 2181 } 2182 } 2183 2184 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain, 2185 VirtioNetRscSeg *seg, 2186 const uint8_t *buf, 2187 struct tcp_header *n_tcp, 2188 struct tcp_header *o_tcp) 2189 { 2190 uint32_t nack, oack; 2191 uint16_t nwin, owin; 2192 2193 nack = htonl(n_tcp->th_ack); 2194 nwin = htons(n_tcp->th_win); 2195 oack = htonl(o_tcp->th_ack); 2196 owin = htons(o_tcp->th_win); 2197 2198 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) { 2199 chain->stat.ack_out_of_win++; 2200 return RSC_FINAL; 2201 } else if (nack == oack) { 2202 /* duplicated ack or window probe */ 2203 if (nwin == owin) { 2204 /* duplicated ack, add dup ack count due to whql test up to 1 */ 2205 chain->stat.dup_ack++; 2206 return RSC_FINAL; 2207 } else { 2208 /* Coalesce window update */ 2209 o_tcp->th_win = n_tcp->th_win; 2210 chain->stat.win_update++; 2211 return RSC_COALESCE; 2212 } 2213 } else { 2214 /* pure ack, go to 'C', finalize*/ 2215 chain->stat.pure_ack++; 2216 return RSC_FINAL; 2217 } 2218 } 2219 2220 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain, 2221 VirtioNetRscSeg *seg, 2222 const uint8_t *buf, 2223 VirtioNetRscUnit *n_unit) 2224 { 2225 void *data; 2226 uint16_t o_ip_len; 2227 uint32_t nseq, oseq; 2228 VirtioNetRscUnit *o_unit; 2229 2230 o_unit = &seg->unit; 2231 o_ip_len = htons(*o_unit->ip_plen); 2232 nseq = htonl(n_unit->tcp->th_seq); 2233 oseq = htonl(o_unit->tcp->th_seq); 2234 2235 /* out of order or retransmitted. */ 2236 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) { 2237 chain->stat.data_out_of_win++; 2238 return RSC_FINAL; 2239 } 2240 2241 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen; 2242 if (nseq == oseq) { 2243 if ((o_unit->payload == 0) && n_unit->payload) { 2244 /* From no payload to payload, normal case, not a dup ack or etc */ 2245 chain->stat.data_after_pure_ack++; 2246 goto coalesce; 2247 } else { 2248 return virtio_net_rsc_handle_ack(chain, seg, buf, 2249 n_unit->tcp, o_unit->tcp); 2250 } 2251 } else if ((nseq - oseq) != o_unit->payload) { 2252 /* Not a consistent packet, out of order */ 2253 chain->stat.data_out_of_order++; 2254 return RSC_FINAL; 2255 } else { 2256 coalesce: 2257 if ((o_ip_len + n_unit->payload) > chain->max_payload) { 2258 chain->stat.over_size++; 2259 return RSC_FINAL; 2260 } 2261 2262 /* Here comes the right data, the payload length in v4/v6 is different, 2263 so use the field value to update and record the new data len */ 2264 o_unit->payload += n_unit->payload; /* update new data len */ 2265 2266 /* update field in ip header */ 2267 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload); 2268 2269 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced 2270 for windows guest, while this may change the behavior for linux 2271 guest (only if it uses RSC feature). */ 2272 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags; 2273 2274 o_unit->tcp->th_ack = n_unit->tcp->th_ack; 2275 o_unit->tcp->th_win = n_unit->tcp->th_win; 2276 2277 memmove(seg->buf + seg->size, data, n_unit->payload); 2278 seg->size += n_unit->payload; 2279 seg->packets++; 2280 chain->stat.coalesced++; 2281 return RSC_COALESCE; 2282 } 2283 } 2284 2285 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain, 2286 VirtioNetRscSeg *seg, 2287 const uint8_t *buf, size_t size, 2288 VirtioNetRscUnit *unit) 2289 { 2290 struct ip_header *ip1, *ip2; 2291 2292 ip1 = (struct ip_header *)(unit->ip); 2293 ip2 = (struct ip_header *)(seg->unit.ip); 2294 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst) 2295 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2296 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2297 chain->stat.no_match++; 2298 return RSC_NO_MATCH; 2299 } 2300 2301 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2302 } 2303 2304 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain, 2305 VirtioNetRscSeg *seg, 2306 const uint8_t *buf, size_t size, 2307 VirtioNetRscUnit *unit) 2308 { 2309 struct ip6_header *ip1, *ip2; 2310 2311 ip1 = (struct ip6_header *)(unit->ip); 2312 ip2 = (struct ip6_header *)(seg->unit.ip); 2313 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address)) 2314 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address)) 2315 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2316 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2317 chain->stat.no_match++; 2318 return RSC_NO_MATCH; 2319 } 2320 2321 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2322 } 2323 2324 /* Packets with 'SYN' should bypass, other flag should be sent after drain 2325 * to prevent out of order */ 2326 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain, 2327 struct tcp_header *tcp) 2328 { 2329 uint16_t tcp_hdr; 2330 uint16_t tcp_flag; 2331 2332 tcp_flag = htons(tcp->th_offset_flags); 2333 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10; 2334 tcp_flag &= VIRTIO_NET_TCP_FLAG; 2335 if (tcp_flag & TH_SYN) { 2336 chain->stat.tcp_syn++; 2337 return RSC_BYPASS; 2338 } 2339 2340 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) { 2341 chain->stat.tcp_ctrl_drain++; 2342 return RSC_FINAL; 2343 } 2344 2345 if (tcp_hdr > sizeof(struct tcp_header)) { 2346 chain->stat.tcp_all_opt++; 2347 return RSC_FINAL; 2348 } 2349 2350 return RSC_CANDIDATE; 2351 } 2352 2353 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain, 2354 NetClientState *nc, 2355 const uint8_t *buf, size_t size, 2356 VirtioNetRscUnit *unit) 2357 { 2358 int ret; 2359 VirtioNetRscSeg *seg, *nseg; 2360 2361 if (QTAILQ_EMPTY(&chain->buffers)) { 2362 chain->stat.empty_cache++; 2363 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2364 timer_mod(chain->drain_timer, 2365 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout); 2366 return size; 2367 } 2368 2369 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2370 if (chain->proto == ETH_P_IP) { 2371 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit); 2372 } else { 2373 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit); 2374 } 2375 2376 if (ret == RSC_FINAL) { 2377 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2378 /* Send failed */ 2379 chain->stat.final_failed++; 2380 return 0; 2381 } 2382 2383 /* Send current packet */ 2384 return virtio_net_do_receive(nc, buf, size); 2385 } else if (ret == RSC_NO_MATCH) { 2386 continue; 2387 } else { 2388 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ 2389 seg->is_coalesced = 1; 2390 return size; 2391 } 2392 } 2393 2394 chain->stat.no_match_cache++; 2395 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2396 return size; 2397 } 2398 2399 /* Drain a connection data, this is to avoid out of order segments */ 2400 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain, 2401 NetClientState *nc, 2402 const uint8_t *buf, size_t size, 2403 uint16_t ip_start, uint16_t ip_size, 2404 uint16_t tcp_port) 2405 { 2406 VirtioNetRscSeg *seg, *nseg; 2407 uint32_t ppair1, ppair2; 2408 2409 ppair1 = *(uint32_t *)(buf + tcp_port); 2410 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2411 ppair2 = *(uint32_t *)(seg->buf + tcp_port); 2412 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size) 2413 || (ppair1 != ppair2)) { 2414 continue; 2415 } 2416 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2417 chain->stat.drain_failed++; 2418 } 2419 2420 break; 2421 } 2422 2423 return virtio_net_do_receive(nc, buf, size); 2424 } 2425 2426 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain, 2427 struct ip_header *ip, 2428 const uint8_t *buf, size_t size) 2429 { 2430 uint16_t ip_len; 2431 2432 /* Not an ipv4 packet */ 2433 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) { 2434 chain->stat.ip_option++; 2435 return RSC_BYPASS; 2436 } 2437 2438 /* Don't handle packets with ip option */ 2439 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) { 2440 chain->stat.ip_option++; 2441 return RSC_BYPASS; 2442 } 2443 2444 if (ip->ip_p != IPPROTO_TCP) { 2445 chain->stat.bypass_not_tcp++; 2446 return RSC_BYPASS; 2447 } 2448 2449 /* Don't handle packets with ip fragment */ 2450 if (!(htons(ip->ip_off) & IP_DF)) { 2451 chain->stat.ip_frag++; 2452 return RSC_BYPASS; 2453 } 2454 2455 /* Don't handle packets with ecn flag */ 2456 if (IPTOS_ECN(ip->ip_tos)) { 2457 chain->stat.ip_ecn++; 2458 return RSC_BYPASS; 2459 } 2460 2461 ip_len = htons(ip->ip_len); 2462 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header)) 2463 || ip_len > (size - chain->n->guest_hdr_len - 2464 sizeof(struct eth_header))) { 2465 chain->stat.ip_hacked++; 2466 return RSC_BYPASS; 2467 } 2468 2469 return RSC_CANDIDATE; 2470 } 2471 2472 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain, 2473 NetClientState *nc, 2474 const uint8_t *buf, size_t size) 2475 { 2476 int32_t ret; 2477 uint16_t hdr_len; 2478 VirtioNetRscUnit unit; 2479 2480 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2481 2482 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header) 2483 + sizeof(struct tcp_header))) { 2484 chain->stat.bypass_not_tcp++; 2485 return virtio_net_do_receive(nc, buf, size); 2486 } 2487 2488 virtio_net_rsc_extract_unit4(chain, buf, &unit); 2489 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size) 2490 != RSC_CANDIDATE) { 2491 return virtio_net_do_receive(nc, buf, size); 2492 } 2493 2494 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2495 if (ret == RSC_BYPASS) { 2496 return virtio_net_do_receive(nc, buf, size); 2497 } else if (ret == RSC_FINAL) { 2498 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2499 ((hdr_len + sizeof(struct eth_header)) + 12), 2500 VIRTIO_NET_IP4_ADDR_SIZE, 2501 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)); 2502 } 2503 2504 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2505 } 2506 2507 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain, 2508 struct ip6_header *ip6, 2509 const uint8_t *buf, size_t size) 2510 { 2511 uint16_t ip_len; 2512 2513 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4) 2514 != IP_HEADER_VERSION_6) { 2515 return RSC_BYPASS; 2516 } 2517 2518 /* Both option and protocol is checked in this */ 2519 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) { 2520 chain->stat.bypass_not_tcp++; 2521 return RSC_BYPASS; 2522 } 2523 2524 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 2525 if (ip_len < sizeof(struct tcp_header) || 2526 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header) 2527 - sizeof(struct ip6_header))) { 2528 chain->stat.ip_hacked++; 2529 return RSC_BYPASS; 2530 } 2531 2532 /* Don't handle packets with ecn flag */ 2533 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) { 2534 chain->stat.ip_ecn++; 2535 return RSC_BYPASS; 2536 } 2537 2538 return RSC_CANDIDATE; 2539 } 2540 2541 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc, 2542 const uint8_t *buf, size_t size) 2543 { 2544 int32_t ret; 2545 uint16_t hdr_len; 2546 VirtioNetRscChain *chain; 2547 VirtioNetRscUnit unit; 2548 2549 chain = opq; 2550 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2551 2552 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header) 2553 + sizeof(tcp_header))) { 2554 return virtio_net_do_receive(nc, buf, size); 2555 } 2556 2557 virtio_net_rsc_extract_unit6(chain, buf, &unit); 2558 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain, 2559 unit.ip, buf, size)) { 2560 return virtio_net_do_receive(nc, buf, size); 2561 } 2562 2563 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2564 if (ret == RSC_BYPASS) { 2565 return virtio_net_do_receive(nc, buf, size); 2566 } else if (ret == RSC_FINAL) { 2567 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2568 ((hdr_len + sizeof(struct eth_header)) + 8), 2569 VIRTIO_NET_IP6_ADDR_SIZE, 2570 hdr_len + sizeof(struct eth_header) 2571 + sizeof(struct ip6_header)); 2572 } 2573 2574 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2575 } 2576 2577 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n, 2578 NetClientState *nc, 2579 uint16_t proto) 2580 { 2581 VirtioNetRscChain *chain; 2582 2583 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) { 2584 return NULL; 2585 } 2586 2587 QTAILQ_FOREACH(chain, &n->rsc_chains, next) { 2588 if (chain->proto == proto) { 2589 return chain; 2590 } 2591 } 2592 2593 chain = g_malloc(sizeof(*chain)); 2594 chain->n = n; 2595 chain->proto = proto; 2596 if (proto == (uint16_t)ETH_P_IP) { 2597 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD; 2598 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2599 } else { 2600 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD; 2601 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2602 } 2603 chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 2604 virtio_net_rsc_purge, chain); 2605 memset(&chain->stat, 0, sizeof(chain->stat)); 2606 2607 QTAILQ_INIT(&chain->buffers); 2608 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); 2609 2610 return chain; 2611 } 2612 2613 static ssize_t virtio_net_rsc_receive(NetClientState *nc, 2614 const uint8_t *buf, 2615 size_t size) 2616 { 2617 uint16_t proto; 2618 VirtioNetRscChain *chain; 2619 struct eth_header *eth; 2620 VirtIONet *n; 2621 2622 n = qemu_get_nic_opaque(nc); 2623 if (size < (n->host_hdr_len + sizeof(struct eth_header))) { 2624 return virtio_net_do_receive(nc, buf, size); 2625 } 2626 2627 eth = (struct eth_header *)(buf + n->guest_hdr_len); 2628 proto = htons(eth->h_proto); 2629 2630 chain = virtio_net_rsc_lookup_chain(n, nc, proto); 2631 if (chain) { 2632 chain->stat.received++; 2633 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) { 2634 return virtio_net_rsc_receive4(chain, nc, buf, size); 2635 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) { 2636 return virtio_net_rsc_receive6(chain, nc, buf, size); 2637 } 2638 } 2639 return virtio_net_do_receive(nc, buf, size); 2640 } 2641 2642 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, 2643 size_t size) 2644 { 2645 VirtIONet *n = qemu_get_nic_opaque(nc); 2646 if ((n->rsc4_enabled || n->rsc6_enabled)) { 2647 return virtio_net_rsc_receive(nc, buf, size); 2648 } else { 2649 return virtio_net_do_receive(nc, buf, size); 2650 } 2651 } 2652 2653 static int32_t virtio_net_flush_tx(VirtIONetQueue *q); 2654 2655 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) 2656 { 2657 VirtIONet *n = qemu_get_nic_opaque(nc); 2658 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 2659 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2660 int ret; 2661 2662 virtqueue_push(q->tx_vq, q->async_tx.elem, 0); 2663 virtio_notify(vdev, q->tx_vq); 2664 2665 g_free(q->async_tx.elem); 2666 q->async_tx.elem = NULL; 2667 2668 virtio_queue_set_notification(q->tx_vq, 1); 2669 ret = virtio_net_flush_tx(q); 2670 if (ret >= n->tx_burst) { 2671 /* 2672 * the flush has been stopped by tx_burst 2673 * we will not receive notification for the 2674 * remainining part, so re-schedule 2675 */ 2676 virtio_queue_set_notification(q->tx_vq, 0); 2677 if (q->tx_bh) { 2678 replay_bh_schedule_event(q->tx_bh); 2679 } else { 2680 timer_mod(q->tx_timer, 2681 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2682 } 2683 q->tx_waiting = 1; 2684 } 2685 } 2686 2687 /* TX */ 2688 static int32_t virtio_net_flush_tx(VirtIONetQueue *q) 2689 { 2690 VirtIONet *n = q->n; 2691 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2692 VirtQueueElement *elem; 2693 int32_t num_packets = 0; 2694 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq)); 2695 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2696 return num_packets; 2697 } 2698 2699 if (q->async_tx.elem) { 2700 virtio_queue_set_notification(q->tx_vq, 0); 2701 return num_packets; 2702 } 2703 2704 for (;;) { 2705 ssize_t ret; 2706 unsigned int out_num; 2707 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg; 2708 struct virtio_net_hdr vhdr; 2709 2710 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement)); 2711 if (!elem) { 2712 break; 2713 } 2714 2715 out_num = elem->out_num; 2716 out_sg = elem->out_sg; 2717 if (out_num < 1) { 2718 virtio_error(vdev, "virtio-net header not in first element"); 2719 goto detach; 2720 } 2721 2722 if (n->needs_vnet_hdr_swap) { 2723 if (iov_to_buf(out_sg, out_num, 0, &vhdr, sizeof(vhdr)) < 2724 sizeof(vhdr)) { 2725 virtio_error(vdev, "virtio-net header incorrect"); 2726 goto detach; 2727 } 2728 virtio_net_hdr_swap(vdev, &vhdr); 2729 sg2[0].iov_base = &vhdr; 2730 sg2[0].iov_len = sizeof(vhdr); 2731 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, out_sg, out_num, 2732 sizeof(vhdr), -1); 2733 if (out_num == VIRTQUEUE_MAX_SIZE) { 2734 goto drop; 2735 } 2736 out_num += 1; 2737 out_sg = sg2; 2738 } 2739 /* 2740 * If host wants to see the guest header as is, we can 2741 * pass it on unchanged. Otherwise, copy just the parts 2742 * that host is interested in. 2743 */ 2744 assert(n->host_hdr_len <= n->guest_hdr_len); 2745 if (n->host_hdr_len != n->guest_hdr_len) { 2746 if (iov_size(out_sg, out_num) < n->guest_hdr_len) { 2747 virtio_error(vdev, "virtio-net header is invalid"); 2748 goto detach; 2749 } 2750 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg), 2751 out_sg, out_num, 2752 0, n->host_hdr_len); 2753 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num, 2754 out_sg, out_num, 2755 n->guest_hdr_len, -1); 2756 out_num = sg_num; 2757 out_sg = sg; 2758 2759 if (out_num < 1) { 2760 virtio_error(vdev, "virtio-net nothing to send"); 2761 goto detach; 2762 } 2763 } 2764 2765 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index), 2766 out_sg, out_num, virtio_net_tx_complete); 2767 if (ret == 0) { 2768 virtio_queue_set_notification(q->tx_vq, 0); 2769 q->async_tx.elem = elem; 2770 return -EBUSY; 2771 } 2772 2773 drop: 2774 virtqueue_push(q->tx_vq, elem, 0); 2775 virtio_notify(vdev, q->tx_vq); 2776 g_free(elem); 2777 2778 if (++num_packets >= n->tx_burst) { 2779 break; 2780 } 2781 } 2782 return num_packets; 2783 2784 detach: 2785 virtqueue_detach_element(q->tx_vq, elem, 0); 2786 g_free(elem); 2787 return -EINVAL; 2788 } 2789 2790 static void virtio_net_tx_timer(void *opaque); 2791 2792 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) 2793 { 2794 VirtIONet *n = VIRTIO_NET(vdev); 2795 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2796 2797 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2798 virtio_net_drop_tx_queue_data(vdev, vq); 2799 return; 2800 } 2801 2802 /* This happens when device was stopped but VCPU wasn't. */ 2803 if (!vdev->vm_running) { 2804 q->tx_waiting = 1; 2805 return; 2806 } 2807 2808 if (q->tx_waiting) { 2809 /* We already have queued packets, immediately flush */ 2810 timer_del(q->tx_timer); 2811 virtio_net_tx_timer(q); 2812 } else { 2813 /* re-arm timer to flush it (and more) on next tick */ 2814 timer_mod(q->tx_timer, 2815 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2816 q->tx_waiting = 1; 2817 virtio_queue_set_notification(vq, 0); 2818 } 2819 } 2820 2821 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) 2822 { 2823 VirtIONet *n = VIRTIO_NET(vdev); 2824 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2825 2826 if (unlikely(n->vhost_started)) { 2827 return; 2828 } 2829 2830 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2831 virtio_net_drop_tx_queue_data(vdev, vq); 2832 return; 2833 } 2834 2835 if (unlikely(q->tx_waiting)) { 2836 return; 2837 } 2838 q->tx_waiting = 1; 2839 /* This happens when device was stopped but VCPU wasn't. */ 2840 if (!vdev->vm_running) { 2841 return; 2842 } 2843 virtio_queue_set_notification(vq, 0); 2844 replay_bh_schedule_event(q->tx_bh); 2845 } 2846 2847 static void virtio_net_tx_timer(void *opaque) 2848 { 2849 VirtIONetQueue *q = opaque; 2850 VirtIONet *n = q->n; 2851 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2852 int ret; 2853 2854 /* This happens when device was stopped but BH wasn't. */ 2855 if (!vdev->vm_running) { 2856 /* Make sure tx waiting is set, so we'll run when restarted. */ 2857 assert(q->tx_waiting); 2858 return; 2859 } 2860 2861 q->tx_waiting = 0; 2862 2863 /* Just in case the driver is not ready on more */ 2864 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2865 return; 2866 } 2867 2868 ret = virtio_net_flush_tx(q); 2869 if (ret == -EBUSY || ret == -EINVAL) { 2870 return; 2871 } 2872 /* 2873 * If we flush a full burst of packets, assume there are 2874 * more coming and immediately rearm 2875 */ 2876 if (ret >= n->tx_burst) { 2877 q->tx_waiting = 1; 2878 timer_mod(q->tx_timer, 2879 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2880 return; 2881 } 2882 /* 2883 * If less than a full burst, re-enable notification and flush 2884 * anything that may have come in while we weren't looking. If 2885 * we find something, assume the guest is still active and rearm 2886 */ 2887 virtio_queue_set_notification(q->tx_vq, 1); 2888 ret = virtio_net_flush_tx(q); 2889 if (ret > 0) { 2890 virtio_queue_set_notification(q->tx_vq, 0); 2891 q->tx_waiting = 1; 2892 timer_mod(q->tx_timer, 2893 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2894 } 2895 } 2896 2897 static void virtio_net_tx_bh(void *opaque) 2898 { 2899 VirtIONetQueue *q = opaque; 2900 VirtIONet *n = q->n; 2901 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2902 int32_t ret; 2903 2904 /* This happens when device was stopped but BH wasn't. */ 2905 if (!vdev->vm_running) { 2906 /* Make sure tx waiting is set, so we'll run when restarted. */ 2907 assert(q->tx_waiting); 2908 return; 2909 } 2910 2911 q->tx_waiting = 0; 2912 2913 /* Just in case the driver is not ready on more */ 2914 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) { 2915 return; 2916 } 2917 2918 ret = virtio_net_flush_tx(q); 2919 if (ret == -EBUSY || ret == -EINVAL) { 2920 return; /* Notification re-enable handled by tx_complete or device 2921 * broken */ 2922 } 2923 2924 /* If we flush a full burst of packets, assume there are 2925 * more coming and immediately reschedule */ 2926 if (ret >= n->tx_burst) { 2927 replay_bh_schedule_event(q->tx_bh); 2928 q->tx_waiting = 1; 2929 return; 2930 } 2931 2932 /* If less than a full burst, re-enable notification and flush 2933 * anything that may have come in while we weren't looking. If 2934 * we find something, assume the guest is still active and reschedule */ 2935 virtio_queue_set_notification(q->tx_vq, 1); 2936 ret = virtio_net_flush_tx(q); 2937 if (ret == -EINVAL) { 2938 return; 2939 } else if (ret > 0) { 2940 virtio_queue_set_notification(q->tx_vq, 0); 2941 replay_bh_schedule_event(q->tx_bh); 2942 q->tx_waiting = 1; 2943 } 2944 } 2945 2946 static void virtio_net_add_queue(VirtIONet *n, int index) 2947 { 2948 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2949 2950 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size, 2951 virtio_net_handle_rx); 2952 2953 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { 2954 n->vqs[index].tx_vq = 2955 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2956 virtio_net_handle_tx_timer); 2957 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 2958 virtio_net_tx_timer, 2959 &n->vqs[index]); 2960 } else { 2961 n->vqs[index].tx_vq = 2962 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2963 virtio_net_handle_tx_bh); 2964 n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index], 2965 &DEVICE(vdev)->mem_reentrancy_guard); 2966 } 2967 2968 n->vqs[index].tx_waiting = 0; 2969 n->vqs[index].n = n; 2970 } 2971 2972 static void virtio_net_del_queue(VirtIONet *n, int index) 2973 { 2974 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2975 VirtIONetQueue *q = &n->vqs[index]; 2976 NetClientState *nc = qemu_get_subqueue(n->nic, index); 2977 2978 qemu_purge_queued_packets(nc); 2979 2980 virtio_del_queue(vdev, index * 2); 2981 if (q->tx_timer) { 2982 timer_free(q->tx_timer); 2983 q->tx_timer = NULL; 2984 } else { 2985 qemu_bh_delete(q->tx_bh); 2986 q->tx_bh = NULL; 2987 } 2988 q->tx_waiting = 0; 2989 virtio_del_queue(vdev, index * 2 + 1); 2990 } 2991 2992 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs) 2993 { 2994 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2995 int old_num_queues = virtio_get_num_queues(vdev); 2996 int new_num_queues = new_max_queue_pairs * 2 + 1; 2997 int i; 2998 2999 assert(old_num_queues >= 3); 3000 assert(old_num_queues % 2 == 1); 3001 3002 if (old_num_queues == new_num_queues) { 3003 return; 3004 } 3005 3006 /* 3007 * We always need to remove and add ctrl vq if 3008 * old_num_queues != new_num_queues. Remove ctrl_vq first, 3009 * and then we only enter one of the following two loops. 3010 */ 3011 virtio_del_queue(vdev, old_num_queues - 1); 3012 3013 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) { 3014 /* new_num_queues < old_num_queues */ 3015 virtio_net_del_queue(n, i / 2); 3016 } 3017 3018 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) { 3019 /* new_num_queues > old_num_queues */ 3020 virtio_net_add_queue(n, i / 2); 3021 } 3022 3023 /* add ctrl_vq last */ 3024 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 3025 } 3026 3027 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) 3028 { 3029 int max = multiqueue ? n->max_queue_pairs : 1; 3030 3031 n->multiqueue = multiqueue; 3032 virtio_net_change_num_queue_pairs(n, max); 3033 3034 virtio_net_set_queue_pairs(n); 3035 } 3036 3037 static int virtio_net_post_load_device(void *opaque, int version_id) 3038 { 3039 VirtIONet *n = opaque; 3040 VirtIODevice *vdev = VIRTIO_DEVICE(n); 3041 int i, link_down; 3042 3043 trace_virtio_net_post_load_device(); 3044 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs, 3045 virtio_vdev_has_feature(vdev, 3046 VIRTIO_F_VERSION_1), 3047 virtio_vdev_has_feature(vdev, 3048 VIRTIO_NET_F_HASH_REPORT)); 3049 3050 /* MAC_TABLE_ENTRIES may be different from the saved image */ 3051 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) { 3052 n->mac_table.in_use = 0; 3053 } 3054 3055 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 3056 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n); 3057 } 3058 3059 /* 3060 * curr_guest_offloads will be later overwritten by the 3061 * virtio_set_features_nocheck call done from the virtio_load. 3062 * Here we make sure it is preserved and restored accordingly 3063 * in the virtio_net_post_load_virtio callback. 3064 */ 3065 n->saved_guest_offloads = n->curr_guest_offloads; 3066 3067 virtio_net_set_queue_pairs(n); 3068 3069 /* Find the first multicast entry in the saved MAC filter */ 3070 for (i = 0; i < n->mac_table.in_use; i++) { 3071 if (n->mac_table.macs[i * ETH_ALEN] & 1) { 3072 break; 3073 } 3074 } 3075 n->mac_table.first_multi = i; 3076 3077 /* nc.link_down can't be migrated, so infer link_down according 3078 * to link status bit in n->status */ 3079 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0; 3080 for (i = 0; i < n->max_queue_pairs; i++) { 3081 qemu_get_subqueue(n->nic, i)->link_down = link_down; 3082 } 3083 3084 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 3085 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3086 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 3087 QEMU_CLOCK_VIRTUAL, 3088 virtio_net_announce_timer, n); 3089 if (n->announce_timer.round) { 3090 timer_mod(n->announce_timer.tm, 3091 qemu_clock_get_ms(n->announce_timer.type)); 3092 } else { 3093 qemu_announce_timer_del(&n->announce_timer, false); 3094 } 3095 } 3096 3097 virtio_net_commit_rss_config(n); 3098 return 0; 3099 } 3100 3101 static int virtio_net_post_load_virtio(VirtIODevice *vdev) 3102 { 3103 VirtIONet *n = VIRTIO_NET(vdev); 3104 /* 3105 * The actual needed state is now in saved_guest_offloads, 3106 * see virtio_net_post_load_device for detail. 3107 * Restore it back and apply the desired offloads. 3108 */ 3109 n->curr_guest_offloads = n->saved_guest_offloads; 3110 if (peer_has_vnet_hdr(n)) { 3111 virtio_net_apply_guest_offloads(n); 3112 } 3113 3114 return 0; 3115 } 3116 3117 /* tx_waiting field of a VirtIONetQueue */ 3118 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = { 3119 .name = "virtio-net-queue-tx_waiting", 3120 .fields = (const VMStateField[]) { 3121 VMSTATE_UINT32(tx_waiting, VirtIONetQueue), 3122 VMSTATE_END_OF_LIST() 3123 }, 3124 }; 3125 3126 static bool max_queue_pairs_gt_1(void *opaque, int version_id) 3127 { 3128 return VIRTIO_NET(opaque)->max_queue_pairs > 1; 3129 } 3130 3131 static bool has_ctrl_guest_offloads(void *opaque, int version_id) 3132 { 3133 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque), 3134 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 3135 } 3136 3137 static bool mac_table_fits(void *opaque, int version_id) 3138 { 3139 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES; 3140 } 3141 3142 static bool mac_table_doesnt_fit(void *opaque, int version_id) 3143 { 3144 return !mac_table_fits(opaque, version_id); 3145 } 3146 3147 /* This temporary type is shared by all the WITH_TMP methods 3148 * although only some fields are used by each. 3149 */ 3150 struct VirtIONetMigTmp { 3151 VirtIONet *parent; 3152 VirtIONetQueue *vqs_1; 3153 uint16_t curr_queue_pairs_1; 3154 uint8_t has_ufo; 3155 uint32_t has_vnet_hdr; 3156 }; 3157 3158 /* The 2nd and subsequent tx_waiting flags are loaded later than 3159 * the 1st entry in the queue_pairs and only if there's more than one 3160 * entry. We use the tmp mechanism to calculate a temporary 3161 * pointer and count and also validate the count. 3162 */ 3163 3164 static int virtio_net_tx_waiting_pre_save(void *opaque) 3165 { 3166 struct VirtIONetMigTmp *tmp = opaque; 3167 3168 tmp->vqs_1 = tmp->parent->vqs + 1; 3169 tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1; 3170 if (tmp->parent->curr_queue_pairs == 0) { 3171 tmp->curr_queue_pairs_1 = 0; 3172 } 3173 3174 return 0; 3175 } 3176 3177 static int virtio_net_tx_waiting_pre_load(void *opaque) 3178 { 3179 struct VirtIONetMigTmp *tmp = opaque; 3180 3181 /* Reuse the pointer setup from save */ 3182 virtio_net_tx_waiting_pre_save(opaque); 3183 3184 if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) { 3185 error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x", 3186 tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs); 3187 3188 return -EINVAL; 3189 } 3190 3191 return 0; /* all good */ 3192 } 3193 3194 static const VMStateDescription vmstate_virtio_net_tx_waiting = { 3195 .name = "virtio-net-tx_waiting", 3196 .pre_load = virtio_net_tx_waiting_pre_load, 3197 .pre_save = virtio_net_tx_waiting_pre_save, 3198 .fields = (const VMStateField[]) { 3199 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp, 3200 curr_queue_pairs_1, 3201 vmstate_virtio_net_queue_tx_waiting, 3202 struct VirtIONetQueue), 3203 VMSTATE_END_OF_LIST() 3204 }, 3205 }; 3206 3207 /* the 'has_ufo' flag is just tested; if the incoming stream has the 3208 * flag set we need to check that we have it 3209 */ 3210 static int virtio_net_ufo_post_load(void *opaque, int version_id) 3211 { 3212 struct VirtIONetMigTmp *tmp = opaque; 3213 3214 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) { 3215 error_report("virtio-net: saved image requires TUN_F_UFO support"); 3216 return -EINVAL; 3217 } 3218 3219 return 0; 3220 } 3221 3222 static int virtio_net_ufo_pre_save(void *opaque) 3223 { 3224 struct VirtIONetMigTmp *tmp = opaque; 3225 3226 tmp->has_ufo = tmp->parent->has_ufo; 3227 3228 return 0; 3229 } 3230 3231 static const VMStateDescription vmstate_virtio_net_has_ufo = { 3232 .name = "virtio-net-ufo", 3233 .post_load = virtio_net_ufo_post_load, 3234 .pre_save = virtio_net_ufo_pre_save, 3235 .fields = (const VMStateField[]) { 3236 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp), 3237 VMSTATE_END_OF_LIST() 3238 }, 3239 }; 3240 3241 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the 3242 * flag set we need to check that we have it 3243 */ 3244 static int virtio_net_vnet_post_load(void *opaque, int version_id) 3245 { 3246 struct VirtIONetMigTmp *tmp = opaque; 3247 3248 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) { 3249 error_report("virtio-net: saved image requires vnet_hdr=on"); 3250 return -EINVAL; 3251 } 3252 3253 return 0; 3254 } 3255 3256 static int virtio_net_vnet_pre_save(void *opaque) 3257 { 3258 struct VirtIONetMigTmp *tmp = opaque; 3259 3260 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr; 3261 3262 return 0; 3263 } 3264 3265 static const VMStateDescription vmstate_virtio_net_has_vnet = { 3266 .name = "virtio-net-vnet", 3267 .post_load = virtio_net_vnet_post_load, 3268 .pre_save = virtio_net_vnet_pre_save, 3269 .fields = (const VMStateField[]) { 3270 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp), 3271 VMSTATE_END_OF_LIST() 3272 }, 3273 }; 3274 3275 static bool virtio_net_rss_needed(void *opaque) 3276 { 3277 return VIRTIO_NET(opaque)->rss_data.enabled; 3278 } 3279 3280 static const VMStateDescription vmstate_virtio_net_rss = { 3281 .name = "virtio-net-device/rss", 3282 .version_id = 1, 3283 .minimum_version_id = 1, 3284 .needed = virtio_net_rss_needed, 3285 .fields = (const VMStateField[]) { 3286 VMSTATE_BOOL(rss_data.enabled, VirtIONet), 3287 VMSTATE_BOOL(rss_data.redirect, VirtIONet), 3288 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet), 3289 VMSTATE_UINT32(rss_data.hash_types, VirtIONet), 3290 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet), 3291 VMSTATE_UINT16(rss_data.default_queue, VirtIONet), 3292 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet, 3293 VIRTIO_NET_RSS_MAX_KEY_SIZE), 3294 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet, 3295 rss_data.indirections_len, 0, 3296 vmstate_info_uint16, uint16_t), 3297 VMSTATE_END_OF_LIST() 3298 }, 3299 }; 3300 3301 static const VMStateDescription vmstate_virtio_net_device = { 3302 .name = "virtio-net-device", 3303 .version_id = VIRTIO_NET_VM_VERSION, 3304 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3305 .post_load = virtio_net_post_load_device, 3306 .fields = (const VMStateField[]) { 3307 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN), 3308 VMSTATE_STRUCT_POINTER(vqs, VirtIONet, 3309 vmstate_virtio_net_queue_tx_waiting, 3310 VirtIONetQueue), 3311 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet), 3312 VMSTATE_UINT16(status, VirtIONet), 3313 VMSTATE_UINT8(promisc, VirtIONet), 3314 VMSTATE_UINT8(allmulti, VirtIONet), 3315 VMSTATE_UINT32(mac_table.in_use, VirtIONet), 3316 3317 /* Guarded pair: If it fits we load it, else we throw it away 3318 * - can happen if source has a larger MAC table.; post-load 3319 * sets flags in this case. 3320 */ 3321 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet, 3322 0, mac_table_fits, mac_table.in_use, 3323 ETH_ALEN), 3324 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0, 3325 mac_table.in_use, ETH_ALEN), 3326 3327 /* Note: This is an array of uint32's that's always been saved as a 3328 * buffer; hold onto your endiannesses; it's actually used as a bitmap 3329 * but based on the uint. 3330 */ 3331 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3), 3332 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3333 vmstate_virtio_net_has_vnet), 3334 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet), 3335 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet), 3336 VMSTATE_UINT8(alluni, VirtIONet), 3337 VMSTATE_UINT8(nomulti, VirtIONet), 3338 VMSTATE_UINT8(nouni, VirtIONet), 3339 VMSTATE_UINT8(nobcast, VirtIONet), 3340 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3341 vmstate_virtio_net_has_ufo), 3342 VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0, 3343 vmstate_info_uint16_equal, uint16_t), 3344 VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1), 3345 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3346 vmstate_virtio_net_tx_waiting), 3347 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet, 3348 has_ctrl_guest_offloads), 3349 VMSTATE_END_OF_LIST() 3350 }, 3351 .subsections = (const VMStateDescription * const []) { 3352 &vmstate_virtio_net_rss, 3353 NULL 3354 } 3355 }; 3356 3357 static NetClientInfo net_virtio_info = { 3358 .type = NET_CLIENT_DRIVER_NIC, 3359 .size = sizeof(NICState), 3360 .can_receive = virtio_net_can_receive, 3361 .receive = virtio_net_receive, 3362 .link_status_changed = virtio_net_set_link_status, 3363 .query_rx_filter = virtio_net_query_rxfilter, 3364 .announce = virtio_net_announce, 3365 }; 3366 3367 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) 3368 { 3369 VirtIONet *n = VIRTIO_NET(vdev); 3370 NetClientState *nc; 3371 assert(n->vhost_started); 3372 if (!n->multiqueue && idx == 2) { 3373 /* Must guard against invalid features and bogus queue index 3374 * from being set by malicious guest, or penetrated through 3375 * buggy migration stream. 3376 */ 3377 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3378 qemu_log_mask(LOG_GUEST_ERROR, 3379 "%s: bogus vq index ignored\n", __func__); 3380 return false; 3381 } 3382 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3383 } else { 3384 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3385 } 3386 /* 3387 * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 3388 * as the macro of configure interrupt's IDX, If this driver does not 3389 * support, the function will return false 3390 */ 3391 3392 if (idx == VIRTIO_CONFIG_IRQ_IDX) { 3393 return vhost_net_config_pending(get_vhost_net(nc->peer)); 3394 } 3395 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); 3396 } 3397 3398 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, 3399 bool mask) 3400 { 3401 VirtIONet *n = VIRTIO_NET(vdev); 3402 NetClientState *nc; 3403 assert(n->vhost_started); 3404 if (!n->multiqueue && idx == 2) { 3405 /* Must guard against invalid features and bogus queue index 3406 * from being set by malicious guest, or penetrated through 3407 * buggy migration stream. 3408 */ 3409 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3410 qemu_log_mask(LOG_GUEST_ERROR, 3411 "%s: bogus vq index ignored\n", __func__); 3412 return; 3413 } 3414 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3415 } else { 3416 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3417 } 3418 /* 3419 *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 3420 * as the macro of configure interrupt's IDX, If this driver does not 3421 * support, the function will return 3422 */ 3423 3424 if (idx == VIRTIO_CONFIG_IRQ_IDX) { 3425 vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask); 3426 return; 3427 } 3428 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask); 3429 } 3430 3431 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) 3432 { 3433 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC); 3434 3435 n->config_size = virtio_get_config_size(&cfg_size_params, host_features); 3436 } 3437 3438 void virtio_net_set_netclient_name(VirtIONet *n, const char *name, 3439 const char *type) 3440 { 3441 /* 3442 * The name can be NULL, the netclient name will be type.x. 3443 */ 3444 assert(type != NULL); 3445 3446 g_free(n->netclient_name); 3447 g_free(n->netclient_type); 3448 n->netclient_name = g_strdup(name); 3449 n->netclient_type = g_strdup(type); 3450 } 3451 3452 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev) 3453 { 3454 HotplugHandler *hotplug_ctrl; 3455 PCIDevice *pci_dev; 3456 Error *err = NULL; 3457 3458 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3459 if (hotplug_ctrl) { 3460 pci_dev = PCI_DEVICE(dev); 3461 pci_dev->partially_hotplugged = true; 3462 hotplug_handler_unplug_request(hotplug_ctrl, dev, &err); 3463 if (err) { 3464 error_report_err(err); 3465 return false; 3466 } 3467 } else { 3468 return false; 3469 } 3470 return true; 3471 } 3472 3473 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev, 3474 Error **errp) 3475 { 3476 Error *err = NULL; 3477 HotplugHandler *hotplug_ctrl; 3478 PCIDevice *pdev = PCI_DEVICE(dev); 3479 BusState *primary_bus; 3480 3481 if (!pdev->partially_hotplugged) { 3482 return true; 3483 } 3484 primary_bus = dev->parent_bus; 3485 if (!primary_bus) { 3486 error_setg(errp, "virtio_net: couldn't find primary bus"); 3487 return false; 3488 } 3489 qdev_set_parent_bus(dev, primary_bus, &error_abort); 3490 qatomic_set(&n->failover_primary_hidden, false); 3491 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3492 if (hotplug_ctrl) { 3493 hotplug_handler_pre_plug(hotplug_ctrl, dev, &err); 3494 if (err) { 3495 goto out; 3496 } 3497 hotplug_handler_plug(hotplug_ctrl, dev, &err); 3498 } 3499 pdev->partially_hotplugged = false; 3500 3501 out: 3502 error_propagate(errp, err); 3503 return !err; 3504 } 3505 3506 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationEvent *e) 3507 { 3508 bool should_be_hidden; 3509 Error *err = NULL; 3510 DeviceState *dev = failover_find_primary_device(n); 3511 3512 if (!dev) { 3513 return; 3514 } 3515 3516 should_be_hidden = qatomic_read(&n->failover_primary_hidden); 3517 3518 if (e->type == MIG_EVENT_PRECOPY_SETUP && !should_be_hidden) { 3519 if (failover_unplug_primary(n, dev)) { 3520 vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev); 3521 qapi_event_send_unplug_primary(dev->id); 3522 qatomic_set(&n->failover_primary_hidden, true); 3523 } else { 3524 warn_report("couldn't unplug primary device"); 3525 } 3526 } else if (e->type == MIG_EVENT_PRECOPY_FAILED) { 3527 /* We already unplugged the device let's plug it back */ 3528 if (!failover_replug_primary(n, dev, &err)) { 3529 if (err) { 3530 error_report_err(err); 3531 } 3532 } 3533 } 3534 } 3535 3536 static int virtio_net_migration_state_notifier(NotifierWithReturn *notifier, 3537 MigrationEvent *e, Error **errp) 3538 { 3539 VirtIONet *n = container_of(notifier, VirtIONet, migration_state); 3540 virtio_net_handle_migration_primary(n, e); 3541 return 0; 3542 } 3543 3544 static bool failover_hide_primary_device(DeviceListener *listener, 3545 const QDict *device_opts, 3546 bool from_json, 3547 Error **errp) 3548 { 3549 VirtIONet *n = container_of(listener, VirtIONet, primary_listener); 3550 const char *standby_id; 3551 3552 if (!device_opts) { 3553 return false; 3554 } 3555 3556 if (!qdict_haskey(device_opts, "failover_pair_id")) { 3557 return false; 3558 } 3559 3560 if (!qdict_haskey(device_opts, "id")) { 3561 error_setg(errp, "Device with failover_pair_id needs to have id"); 3562 return false; 3563 } 3564 3565 standby_id = qdict_get_str(device_opts, "failover_pair_id"); 3566 if (g_strcmp0(standby_id, n->netclient_name) != 0) { 3567 return false; 3568 } 3569 3570 /* 3571 * The hide helper can be called several times for a given device. 3572 * Check there is only one primary for a virtio-net device but 3573 * don't duplicate the qdict several times if it's called for the same 3574 * device. 3575 */ 3576 if (n->primary_opts) { 3577 const char *old, *new; 3578 /* devices with failover_pair_id always have an id */ 3579 old = qdict_get_str(n->primary_opts, "id"); 3580 new = qdict_get_str(device_opts, "id"); 3581 if (strcmp(old, new) != 0) { 3582 error_setg(errp, "Cannot attach more than one primary device to " 3583 "'%s': '%s' and '%s'", n->netclient_name, old, new); 3584 return false; 3585 } 3586 } else { 3587 n->primary_opts = qdict_clone_shallow(device_opts); 3588 n->primary_opts_from_json = from_json; 3589 } 3590 3591 /* failover_primary_hidden is set during feature negotiation */ 3592 return qatomic_read(&n->failover_primary_hidden); 3593 } 3594 3595 static void virtio_net_device_realize(DeviceState *dev, Error **errp) 3596 { 3597 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3598 VirtIONet *n = VIRTIO_NET(dev); 3599 NetClientState *nc; 3600 int i; 3601 3602 if (n->net_conf.mtu) { 3603 n->host_features |= (1ULL << VIRTIO_NET_F_MTU); 3604 } 3605 3606 if (n->net_conf.duplex_str) { 3607 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) { 3608 n->net_conf.duplex = DUPLEX_HALF; 3609 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) { 3610 n->net_conf.duplex = DUPLEX_FULL; 3611 } else { 3612 error_setg(errp, "'duplex' must be 'half' or 'full'"); 3613 return; 3614 } 3615 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3616 } else { 3617 n->net_conf.duplex = DUPLEX_UNKNOWN; 3618 } 3619 3620 if (n->net_conf.speed < SPEED_UNKNOWN) { 3621 error_setg(errp, "'speed' must be between 0 and INT_MAX"); 3622 return; 3623 } 3624 if (n->net_conf.speed >= 0) { 3625 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3626 } 3627 3628 if (n->failover) { 3629 n->primary_listener.hide_device = failover_hide_primary_device; 3630 qatomic_set(&n->failover_primary_hidden, true); 3631 device_listener_register(&n->primary_listener); 3632 migration_add_notifier(&n->migration_state, 3633 virtio_net_migration_state_notifier); 3634 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY); 3635 } 3636 3637 virtio_net_set_config_size(n, n->host_features); 3638 virtio_init(vdev, VIRTIO_ID_NET, n->config_size); 3639 3640 /* 3641 * We set a lower limit on RX queue size to what it always was. 3642 * Guests that want a smaller ring can always resize it without 3643 * help from us (using virtio 1 and up). 3644 */ 3645 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || 3646 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || 3647 !is_power_of_2(n->net_conf.rx_queue_size)) { 3648 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " 3649 "must be a power of 2 between %d and %d.", 3650 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, 3651 VIRTQUEUE_MAX_SIZE); 3652 virtio_cleanup(vdev); 3653 return; 3654 } 3655 3656 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || 3657 n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) || 3658 !is_power_of_2(n->net_conf.tx_queue_size)) { 3659 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " 3660 "must be a power of 2 between %d and %d", 3661 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, 3662 virtio_net_max_tx_queue_size(n)); 3663 virtio_cleanup(vdev); 3664 return; 3665 } 3666 3667 n->max_ncs = MAX(n->nic_conf.peers.queues, 1); 3668 3669 /* 3670 * Figure out the datapath queue pairs since the backend could 3671 * provide control queue via peers as well. 3672 */ 3673 if (n->nic_conf.peers.queues) { 3674 for (i = 0; i < n->max_ncs; i++) { 3675 if (n->nic_conf.peers.ncs[i]->is_datapath) { 3676 ++n->max_queue_pairs; 3677 } 3678 } 3679 } 3680 n->max_queue_pairs = MAX(n->max_queue_pairs, 1); 3681 3682 if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) { 3683 error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), " 3684 "must be a positive integer less than %d.", 3685 n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2); 3686 virtio_cleanup(vdev); 3687 return; 3688 } 3689 n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs); 3690 n->curr_queue_pairs = 1; 3691 n->tx_timeout = n->net_conf.txtimer; 3692 3693 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer") 3694 && strcmp(n->net_conf.tx, "bh")) { 3695 warn_report("virtio-net: " 3696 "Unknown option tx=%s, valid options: \"timer\" \"bh\"", 3697 n->net_conf.tx); 3698 error_printf("Defaulting to \"bh\""); 3699 } 3700 3701 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), 3702 n->net_conf.tx_queue_size); 3703 3704 virtio_net_add_queue(n, 0); 3705 3706 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 3707 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr); 3708 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac)); 3709 n->status = VIRTIO_NET_S_LINK_UP; 3710 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 3711 QEMU_CLOCK_VIRTUAL, 3712 virtio_net_announce_timer, n); 3713 n->announce_timer.round = 0; 3714 3715 if (n->netclient_type) { 3716 /* 3717 * Happen when virtio_net_set_netclient_name has been called. 3718 */ 3719 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3720 n->netclient_type, n->netclient_name, 3721 &dev->mem_reentrancy_guard, n); 3722 } else { 3723 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3724 object_get_typename(OBJECT(dev)), dev->id, 3725 &dev->mem_reentrancy_guard, n); 3726 } 3727 3728 for (i = 0; i < n->max_queue_pairs; i++) { 3729 n->nic->ncs[i].do_not_pad = true; 3730 } 3731 3732 peer_test_vnet_hdr(n); 3733 if (peer_has_vnet_hdr(n)) { 3734 n->host_hdr_len = sizeof(struct virtio_net_hdr); 3735 } else { 3736 n->host_hdr_len = 0; 3737 } 3738 3739 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a); 3740 3741 n->vqs[0].tx_waiting = 0; 3742 n->tx_burst = n->net_conf.txburst; 3743 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0); 3744 n->promisc = 1; /* for compatibility */ 3745 3746 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 3747 3748 n->vlans = g_malloc0(MAX_VLAN >> 3); 3749 3750 nc = qemu_get_queue(n->nic); 3751 nc->rxfilter_notify_enabled = 1; 3752 3753 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 3754 struct virtio_net_config netcfg = {}; 3755 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN); 3756 vhost_net_set_config(get_vhost_net(nc->peer), 3757 (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND); 3758 } 3759 QTAILQ_INIT(&n->rsc_chains); 3760 n->qdev = dev; 3761 3762 net_rx_pkt_init(&n->rx_pkt); 3763 3764 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3765 Error *err = NULL; 3766 if (!virtio_net_load_ebpf(n, &err)) { 3767 /* 3768 * If user explicitly gave QEMU RSS FDs to use, then 3769 * failing to use them must be considered a fatal 3770 * error. If no RSS FDs were provided, QEMU is trying 3771 * eBPF on a "best effort" basis only, so report a 3772 * warning and allow fallback to software RSS. 3773 */ 3774 if (n->ebpf_rss_fds) { 3775 error_propagate(errp, err); 3776 } else { 3777 warn_report("unable to load eBPF RSS: %s", 3778 error_get_pretty(err)); 3779 error_free(err); 3780 } 3781 } 3782 } 3783 } 3784 3785 static void virtio_net_device_unrealize(DeviceState *dev) 3786 { 3787 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3788 VirtIONet *n = VIRTIO_NET(dev); 3789 int i, max_queue_pairs; 3790 3791 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3792 virtio_net_unload_ebpf(n); 3793 } 3794 3795 /* This will stop vhost backend if appropriate. */ 3796 virtio_net_set_status(vdev, 0); 3797 3798 g_free(n->netclient_name); 3799 n->netclient_name = NULL; 3800 g_free(n->netclient_type); 3801 n->netclient_type = NULL; 3802 3803 g_free(n->mac_table.macs); 3804 g_free(n->vlans); 3805 3806 if (n->failover) { 3807 qobject_unref(n->primary_opts); 3808 device_listener_unregister(&n->primary_listener); 3809 migration_remove_notifier(&n->migration_state); 3810 } else { 3811 assert(n->primary_opts == NULL); 3812 } 3813 3814 max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 3815 for (i = 0; i < max_queue_pairs; i++) { 3816 virtio_net_del_queue(n, i); 3817 } 3818 /* delete also control vq */ 3819 virtio_del_queue(vdev, max_queue_pairs * 2); 3820 qemu_announce_timer_del(&n->announce_timer, false); 3821 g_free(n->vqs); 3822 qemu_del_nic(n->nic); 3823 virtio_net_rsc_cleanup(n); 3824 g_free(n->rss_data.indirections_table); 3825 net_rx_pkt_uninit(n->rx_pkt); 3826 virtio_cleanup(vdev); 3827 } 3828 3829 static void virtio_net_reset(VirtIODevice *vdev) 3830 { 3831 VirtIONet *n = VIRTIO_NET(vdev); 3832 int i; 3833 3834 /* Reset back to compatibility mode */ 3835 n->promisc = 1; 3836 n->allmulti = 0; 3837 n->alluni = 0; 3838 n->nomulti = 0; 3839 n->nouni = 0; 3840 n->nobcast = 0; 3841 /* multiqueue is disabled by default */ 3842 n->curr_queue_pairs = 1; 3843 timer_del(n->announce_timer.tm); 3844 n->announce_timer.round = 0; 3845 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 3846 3847 /* Flush any MAC and VLAN filter table state */ 3848 n->mac_table.in_use = 0; 3849 n->mac_table.first_multi = 0; 3850 n->mac_table.multi_overflow = 0; 3851 n->mac_table.uni_overflow = 0; 3852 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN); 3853 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac)); 3854 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 3855 memset(n->vlans, 0, MAX_VLAN >> 3); 3856 3857 /* Flush any async TX */ 3858 for (i = 0; i < n->max_queue_pairs; i++) { 3859 flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i)); 3860 } 3861 3862 virtio_net_disable_rss(n); 3863 } 3864 3865 static void virtio_net_instance_init(Object *obj) 3866 { 3867 VirtIONet *n = VIRTIO_NET(obj); 3868 3869 /* 3870 * The default config_size is sizeof(struct virtio_net_config). 3871 * Can be overridden with virtio_net_set_config_size. 3872 */ 3873 n->config_size = sizeof(struct virtio_net_config); 3874 device_add_bootindex_property(obj, &n->nic_conf.bootindex, 3875 "bootindex", "/ethernet-phy@0", 3876 DEVICE(n)); 3877 3878 ebpf_rss_init(&n->ebpf_rss); 3879 } 3880 3881 static int virtio_net_pre_save(void *opaque) 3882 { 3883 VirtIONet *n = opaque; 3884 3885 /* At this point, backend must be stopped, otherwise 3886 * it might keep writing to memory. */ 3887 assert(!n->vhost_started); 3888 3889 return 0; 3890 } 3891 3892 static bool primary_unplug_pending(void *opaque) 3893 { 3894 DeviceState *dev = opaque; 3895 DeviceState *primary; 3896 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3897 VirtIONet *n = VIRTIO_NET(vdev); 3898 3899 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 3900 return false; 3901 } 3902 primary = failover_find_primary_device(n); 3903 return primary ? primary->pending_deleted_event : false; 3904 } 3905 3906 static bool dev_unplug_pending(void *opaque) 3907 { 3908 DeviceState *dev = opaque; 3909 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 3910 3911 return vdc->primary_unplug_pending(dev); 3912 } 3913 3914 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev) 3915 { 3916 VirtIONet *n = VIRTIO_NET(vdev); 3917 NetClientState *nc; 3918 struct vhost_net *net; 3919 3920 if (!n->nic) { 3921 return NULL; 3922 } 3923 3924 nc = qemu_get_queue(n->nic); 3925 if (!nc) { 3926 return NULL; 3927 } 3928 3929 net = get_vhost_net(nc->peer); 3930 if (!net) { 3931 return NULL; 3932 } 3933 3934 return &net->dev; 3935 } 3936 3937 static const VMStateDescription vmstate_virtio_net = { 3938 .name = "virtio-net", 3939 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3940 .version_id = VIRTIO_NET_VM_VERSION, 3941 .fields = (const VMStateField[]) { 3942 VMSTATE_VIRTIO_DEVICE, 3943 VMSTATE_END_OF_LIST() 3944 }, 3945 .pre_save = virtio_net_pre_save, 3946 .dev_unplug_pending = dev_unplug_pending, 3947 }; 3948 3949 static Property virtio_net_properties[] = { 3950 DEFINE_PROP_BIT64("csum", VirtIONet, host_features, 3951 VIRTIO_NET_F_CSUM, true), 3952 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features, 3953 VIRTIO_NET_F_GUEST_CSUM, true), 3954 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), 3955 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features, 3956 VIRTIO_NET_F_GUEST_TSO4, true), 3957 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features, 3958 VIRTIO_NET_F_GUEST_TSO6, true), 3959 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features, 3960 VIRTIO_NET_F_GUEST_ECN, true), 3961 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features, 3962 VIRTIO_NET_F_GUEST_UFO, true), 3963 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features, 3964 VIRTIO_NET_F_GUEST_ANNOUNCE, true), 3965 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features, 3966 VIRTIO_NET_F_HOST_TSO4, true), 3967 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features, 3968 VIRTIO_NET_F_HOST_TSO6, true), 3969 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features, 3970 VIRTIO_NET_F_HOST_ECN, true), 3971 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features, 3972 VIRTIO_NET_F_HOST_UFO, true), 3973 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features, 3974 VIRTIO_NET_F_MRG_RXBUF, true), 3975 DEFINE_PROP_BIT64("status", VirtIONet, host_features, 3976 VIRTIO_NET_F_STATUS, true), 3977 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features, 3978 VIRTIO_NET_F_CTRL_VQ, true), 3979 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features, 3980 VIRTIO_NET_F_CTRL_RX, true), 3981 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features, 3982 VIRTIO_NET_F_CTRL_VLAN, true), 3983 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features, 3984 VIRTIO_NET_F_CTRL_RX_EXTRA, true), 3985 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features, 3986 VIRTIO_NET_F_CTRL_MAC_ADDR, true), 3987 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features, 3988 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), 3989 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), 3990 DEFINE_PROP_BIT64("rss", VirtIONet, host_features, 3991 VIRTIO_NET_F_RSS, false), 3992 DEFINE_PROP_BIT64("hash", VirtIONet, host_features, 3993 VIRTIO_NET_F_HASH_REPORT, false), 3994 DEFINE_PROP_ARRAY("ebpf-rss-fds", VirtIONet, nr_ebpf_rss_fds, 3995 ebpf_rss_fds, qdev_prop_string, char*), 3996 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features, 3997 VIRTIO_NET_F_RSC_EXT, false), 3998 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout, 3999 VIRTIO_NET_RSC_DEFAULT_INTERVAL), 4000 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf), 4001 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer, 4002 TX_TIMER_INTERVAL), 4003 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), 4004 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), 4005 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, 4006 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), 4007 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, 4008 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), 4009 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), 4010 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, 4011 true), 4012 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN), 4013 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str), 4014 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false), 4015 DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features, 4016 VIRTIO_NET_F_GUEST_USO4, true), 4017 DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features, 4018 VIRTIO_NET_F_GUEST_USO6, true), 4019 DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features, 4020 VIRTIO_NET_F_HOST_USO, true), 4021 DEFINE_PROP_END_OF_LIST(), 4022 }; 4023 4024 static void virtio_net_class_init(ObjectClass *klass, void *data) 4025 { 4026 DeviceClass *dc = DEVICE_CLASS(klass); 4027 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 4028 4029 device_class_set_props(dc, virtio_net_properties); 4030 dc->vmsd = &vmstate_virtio_net; 4031 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 4032 vdc->realize = virtio_net_device_realize; 4033 vdc->unrealize = virtio_net_device_unrealize; 4034 vdc->get_config = virtio_net_get_config; 4035 vdc->set_config = virtio_net_set_config; 4036 vdc->get_features = virtio_net_get_features; 4037 vdc->set_features = virtio_net_set_features; 4038 vdc->bad_features = virtio_net_bad_features; 4039 vdc->reset = virtio_net_reset; 4040 vdc->queue_reset = virtio_net_queue_reset; 4041 vdc->queue_enable = virtio_net_queue_enable; 4042 vdc->set_status = virtio_net_set_status; 4043 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; 4044 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; 4045 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO); 4046 vdc->post_load = virtio_net_post_load_virtio; 4047 vdc->vmsd = &vmstate_virtio_net_device; 4048 vdc->primary_unplug_pending = primary_unplug_pending; 4049 vdc->get_vhost = virtio_net_get_vhost; 4050 vdc->toggle_device_iotlb = vhost_toggle_device_iotlb; 4051 } 4052 4053 static const TypeInfo virtio_net_info = { 4054 .name = TYPE_VIRTIO_NET, 4055 .parent = TYPE_VIRTIO_DEVICE, 4056 .instance_size = sizeof(VirtIONet), 4057 .instance_init = virtio_net_instance_init, 4058 .class_init = virtio_net_class_init, 4059 }; 4060 4061 static void virtio_register_types(void) 4062 { 4063 type_register_static(&virtio_net_info); 4064 } 4065 4066 type_init(virtio_register_types) 4067