1 /* 2 * Virtio Network Device 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qemu/atomic.h" 16 #include "qemu/iov.h" 17 #include "qemu/log.h" 18 #include "qemu/main-loop.h" 19 #include "qemu/module.h" 20 #include "hw/virtio/virtio.h" 21 #include "net/net.h" 22 #include "net/checksum.h" 23 #include "net/tap.h" 24 #include "qemu/error-report.h" 25 #include "qemu/timer.h" 26 #include "qemu/option.h" 27 #include "qemu/option_int.h" 28 #include "qemu/config-file.h" 29 #include "qapi/qmp/qdict.h" 30 #include "hw/virtio/virtio-net.h" 31 #include "net/vhost_net.h" 32 #include "net/announce.h" 33 #include "hw/virtio/virtio-bus.h" 34 #include "qapi/error.h" 35 #include "qapi/qapi-events-net.h" 36 #include "hw/qdev-properties.h" 37 #include "qapi/qapi-types-migration.h" 38 #include "qapi/qapi-events-migration.h" 39 #include "hw/virtio/virtio-access.h" 40 #include "migration/misc.h" 41 #include "standard-headers/linux/ethtool.h" 42 #include "sysemu/sysemu.h" 43 #include "sysemu/replay.h" 44 #include "trace.h" 45 #include "monitor/qdev.h" 46 #include "monitor/monitor.h" 47 #include "hw/pci/pci_device.h" 48 #include "net_rx_pkt.h" 49 #include "hw/virtio/vhost.h" 50 #include "sysemu/qtest.h" 51 52 #define VIRTIO_NET_VM_VERSION 11 53 54 /* previously fixed value */ 55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 57 58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */ 59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 61 62 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ 63 64 #define VIRTIO_NET_TCP_FLAG 0x3F 65 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000 66 67 /* IPv4 max payload, 16 bits in the header */ 68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header)) 69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535 70 71 /* header length value in ip header without option */ 72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5 73 74 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ 75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD 76 77 /* Purge coalesced packets timer interval, This value affects the performance 78 a lot, and should be tuned carefully, '300000'(300us) is the recommended 79 value to pass the WHQL test, '50000' can gain 2x netperf throughput with 80 tso/gso/gro 'off'. */ 81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000 82 83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \ 84 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \ 85 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \ 86 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \ 87 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \ 88 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \ 89 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \ 90 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \ 91 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) 92 93 static const VirtIOFeature feature_sizes[] = { 94 {.flags = 1ULL << VIRTIO_NET_F_MAC, 95 .end = endof(struct virtio_net_config, mac)}, 96 {.flags = 1ULL << VIRTIO_NET_F_STATUS, 97 .end = endof(struct virtio_net_config, status)}, 98 {.flags = 1ULL << VIRTIO_NET_F_MQ, 99 .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, 100 {.flags = 1ULL << VIRTIO_NET_F_MTU, 101 .end = endof(struct virtio_net_config, mtu)}, 102 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, 103 .end = endof(struct virtio_net_config, duplex)}, 104 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT), 105 .end = endof(struct virtio_net_config, supported_hash_types)}, 106 {} 107 }; 108 109 static const VirtIOConfigSizeParams cfg_size_params = { 110 .min_size = endof(struct virtio_net_config, mac), 111 .max_size = sizeof(struct virtio_net_config), 112 .feature_sizes = feature_sizes 113 }; 114 115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc) 116 { 117 VirtIONet *n = qemu_get_nic_opaque(nc); 118 119 return &n->vqs[nc->queue_index]; 120 } 121 122 static int vq2q(int queue_index) 123 { 124 return queue_index / 2; 125 } 126 127 static void flush_or_purge_queued_packets(NetClientState *nc) 128 { 129 if (!nc->peer) { 130 return; 131 } 132 133 qemu_flush_or_purge_queued_packets(nc->peer, true); 134 assert(!virtio_net_get_subqueue(nc)->async_tx.elem); 135 } 136 137 /* TODO 138 * - we could suppress RX interrupt if we were so inclined. 139 */ 140 141 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) 142 { 143 VirtIONet *n = VIRTIO_NET(vdev); 144 struct virtio_net_config netcfg; 145 NetClientState *nc = qemu_get_queue(n->nic); 146 static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } }; 147 148 int ret = 0; 149 memset(&netcfg, 0 , sizeof(struct virtio_net_config)); 150 virtio_stw_p(vdev, &netcfg.status, n->status); 151 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs); 152 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); 153 memcpy(netcfg.mac, n->mac, ETH_ALEN); 154 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); 155 netcfg.duplex = n->net_conf.duplex; 156 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE; 157 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length, 158 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ? 159 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1); 160 virtio_stl_p(vdev, &netcfg.supported_hash_types, 161 VIRTIO_NET_RSS_SUPPORTED_HASHES); 162 memcpy(config, &netcfg, n->config_size); 163 164 /* 165 * Is this VDPA? No peer means not VDPA: there's no way to 166 * disconnect/reconnect a VDPA peer. 167 */ 168 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 169 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg, 170 n->config_size); 171 if (ret == -1) { 172 return; 173 } 174 175 /* 176 * Some NIC/kernel combinations present 0 as the mac address. As that 177 * is not a legal address, try to proceed with the address from the 178 * QEMU command line in the hope that the address has been configured 179 * correctly elsewhere - just not reported by the device. 180 */ 181 if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) { 182 info_report("Zero hardware mac address detected. Ignoring."); 183 memcpy(netcfg.mac, n->mac, ETH_ALEN); 184 } 185 186 netcfg.status |= virtio_tswap16(vdev, 187 n->status & VIRTIO_NET_S_ANNOUNCE); 188 memcpy(config, &netcfg, n->config_size); 189 } 190 } 191 192 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) 193 { 194 VirtIONet *n = VIRTIO_NET(vdev); 195 struct virtio_net_config netcfg = {}; 196 NetClientState *nc = qemu_get_queue(n->nic); 197 198 memcpy(&netcfg, config, n->config_size); 199 200 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) && 201 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) && 202 memcmp(netcfg.mac, n->mac, ETH_ALEN)) { 203 memcpy(n->mac, netcfg.mac, ETH_ALEN); 204 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 205 } 206 207 /* 208 * Is this VDPA? No peer means not VDPA: there's no way to 209 * disconnect/reconnect a VDPA peer. 210 */ 211 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 212 vhost_net_set_config(get_vhost_net(nc->peer), 213 (uint8_t *)&netcfg, 0, n->config_size, 214 VHOST_SET_CONFIG_TYPE_FRONTEND); 215 } 216 } 217 218 static bool virtio_net_started(VirtIONet *n, uint8_t status) 219 { 220 VirtIODevice *vdev = VIRTIO_DEVICE(n); 221 return (status & VIRTIO_CONFIG_S_DRIVER_OK) && 222 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running; 223 } 224 225 static void virtio_net_announce_notify(VirtIONet *net) 226 { 227 VirtIODevice *vdev = VIRTIO_DEVICE(net); 228 trace_virtio_net_announce_notify(); 229 230 net->status |= VIRTIO_NET_S_ANNOUNCE; 231 virtio_notify_config(vdev); 232 } 233 234 static void virtio_net_announce_timer(void *opaque) 235 { 236 VirtIONet *n = opaque; 237 trace_virtio_net_announce_timer(n->announce_timer.round); 238 239 n->announce_timer.round--; 240 virtio_net_announce_notify(n); 241 } 242 243 static void virtio_net_announce(NetClientState *nc) 244 { 245 VirtIONet *n = qemu_get_nic_opaque(nc); 246 VirtIODevice *vdev = VIRTIO_DEVICE(n); 247 248 /* 249 * Make sure the virtio migration announcement timer isn't running 250 * If it is, let it trigger announcement so that we do not cause 251 * confusion. 252 */ 253 if (n->announce_timer.round) { 254 return; 255 } 256 257 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 258 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 259 virtio_net_announce_notify(n); 260 } 261 } 262 263 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) 264 { 265 VirtIODevice *vdev = VIRTIO_DEVICE(n); 266 NetClientState *nc = qemu_get_queue(n->nic); 267 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 268 int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? 269 n->max_ncs - n->max_queue_pairs : 0; 270 271 if (!get_vhost_net(nc->peer)) { 272 return; 273 } 274 275 if ((virtio_net_started(n, status) && !nc->peer->link_down) == 276 !!n->vhost_started) { 277 return; 278 } 279 if (!n->vhost_started) { 280 int r, i; 281 282 if (n->needs_vnet_hdr_swap) { 283 error_report("backend does not support %s vnet headers; " 284 "falling back on userspace virtio", 285 virtio_is_big_endian(vdev) ? "BE" : "LE"); 286 return; 287 } 288 289 /* Any packets outstanding? Purge them to avoid touching rings 290 * when vhost is running. 291 */ 292 for (i = 0; i < queue_pairs; i++) { 293 NetClientState *qnc = qemu_get_subqueue(n->nic, i); 294 295 /* Purge both directions: TX and RX. */ 296 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc); 297 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer); 298 } 299 300 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) { 301 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu); 302 if (r < 0) { 303 error_report("%uBytes MTU not supported by the backend", 304 n->net_conf.mtu); 305 306 return; 307 } 308 } 309 310 n->vhost_started = 1; 311 r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq); 312 if (r < 0) { 313 error_report("unable to start vhost net: %d: " 314 "falling back on userspace virtio", -r); 315 n->vhost_started = 0; 316 } 317 } else { 318 vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq); 319 n->vhost_started = 0; 320 } 321 } 322 323 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev, 324 NetClientState *peer, 325 bool enable) 326 { 327 if (virtio_is_big_endian(vdev)) { 328 return qemu_set_vnet_be(peer, enable); 329 } else { 330 return qemu_set_vnet_le(peer, enable); 331 } 332 } 333 334 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs, 335 int queue_pairs, bool enable) 336 { 337 int i; 338 339 for (i = 0; i < queue_pairs; i++) { 340 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 && 341 enable) { 342 while (--i >= 0) { 343 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false); 344 } 345 346 return true; 347 } 348 } 349 350 return false; 351 } 352 353 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status) 354 { 355 VirtIODevice *vdev = VIRTIO_DEVICE(n); 356 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 357 358 if (virtio_net_started(n, status)) { 359 /* Before using the device, we tell the network backend about the 360 * endianness to use when parsing vnet headers. If the backend 361 * can't do it, we fallback onto fixing the headers in the core 362 * virtio-net code. 363 */ 364 n->needs_vnet_hdr_swap = n->has_vnet_hdr && 365 virtio_net_set_vnet_endian(vdev, n->nic->ncs, 366 queue_pairs, true); 367 } else if (virtio_net_started(n, vdev->status)) { 368 /* After using the device, we need to reset the network backend to 369 * the default (guest native endianness), otherwise the guest may 370 * lose network connectivity if it is rebooted into a different 371 * endianness. 372 */ 373 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false); 374 } 375 } 376 377 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq) 378 { 379 unsigned int dropped = virtqueue_drop_all(vq); 380 if (dropped) { 381 virtio_notify(vdev, vq); 382 } 383 } 384 385 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) 386 { 387 VirtIONet *n = VIRTIO_NET(vdev); 388 VirtIONetQueue *q; 389 int i; 390 uint8_t queue_status; 391 392 virtio_net_vnet_endian_status(n, status); 393 virtio_net_vhost_status(n, status); 394 395 for (i = 0; i < n->max_queue_pairs; i++) { 396 NetClientState *ncs = qemu_get_subqueue(n->nic, i); 397 bool queue_started; 398 q = &n->vqs[i]; 399 400 if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) { 401 queue_status = 0; 402 } else { 403 queue_status = status; 404 } 405 queue_started = 406 virtio_net_started(n, queue_status) && !n->vhost_started; 407 408 if (queue_started) { 409 qemu_flush_queued_packets(ncs); 410 } 411 412 if (!q->tx_waiting) { 413 continue; 414 } 415 416 if (queue_started) { 417 if (q->tx_timer) { 418 timer_mod(q->tx_timer, 419 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 420 } else { 421 replay_bh_schedule_event(q->tx_bh); 422 } 423 } else { 424 if (q->tx_timer) { 425 timer_del(q->tx_timer); 426 } else { 427 qemu_bh_cancel(q->tx_bh); 428 } 429 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 && 430 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) && 431 vdev->vm_running) { 432 /* if tx is waiting we are likely have some packets in tx queue 433 * and disabled notification */ 434 q->tx_waiting = 0; 435 virtio_queue_set_notification(q->tx_vq, 1); 436 virtio_net_drop_tx_queue_data(vdev, q->tx_vq); 437 } 438 } 439 } 440 } 441 442 static void virtio_net_set_link_status(NetClientState *nc) 443 { 444 VirtIONet *n = qemu_get_nic_opaque(nc); 445 VirtIODevice *vdev = VIRTIO_DEVICE(n); 446 uint16_t old_status = n->status; 447 448 if (nc->link_down) 449 n->status &= ~VIRTIO_NET_S_LINK_UP; 450 else 451 n->status |= VIRTIO_NET_S_LINK_UP; 452 453 if (n->status != old_status) 454 virtio_notify_config(vdev); 455 456 virtio_net_set_status(vdev, vdev->status); 457 } 458 459 static void rxfilter_notify(NetClientState *nc) 460 { 461 VirtIONet *n = qemu_get_nic_opaque(nc); 462 463 if (nc->rxfilter_notify_enabled) { 464 char *path = object_get_canonical_path(OBJECT(n->qdev)); 465 qapi_event_send_nic_rx_filter_changed(n->netclient_name, path); 466 g_free(path); 467 468 /* disable event notification to avoid events flooding */ 469 nc->rxfilter_notify_enabled = 0; 470 } 471 } 472 473 static intList *get_vlan_table(VirtIONet *n) 474 { 475 intList *list; 476 int i, j; 477 478 list = NULL; 479 for (i = 0; i < MAX_VLAN >> 5; i++) { 480 for (j = 0; n->vlans[i] && j <= 0x1f; j++) { 481 if (n->vlans[i] & (1U << j)) { 482 QAPI_LIST_PREPEND(list, (i << 5) + j); 483 } 484 } 485 } 486 487 return list; 488 } 489 490 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc) 491 { 492 VirtIONet *n = qemu_get_nic_opaque(nc); 493 VirtIODevice *vdev = VIRTIO_DEVICE(n); 494 RxFilterInfo *info; 495 strList *str_list; 496 int i; 497 498 info = g_malloc0(sizeof(*info)); 499 info->name = g_strdup(nc->name); 500 info->promiscuous = n->promisc; 501 502 if (n->nouni) { 503 info->unicast = RX_STATE_NONE; 504 } else if (n->alluni) { 505 info->unicast = RX_STATE_ALL; 506 } else { 507 info->unicast = RX_STATE_NORMAL; 508 } 509 510 if (n->nomulti) { 511 info->multicast = RX_STATE_NONE; 512 } else if (n->allmulti) { 513 info->multicast = RX_STATE_ALL; 514 } else { 515 info->multicast = RX_STATE_NORMAL; 516 } 517 518 info->broadcast_allowed = n->nobcast; 519 info->multicast_overflow = n->mac_table.multi_overflow; 520 info->unicast_overflow = n->mac_table.uni_overflow; 521 522 info->main_mac = qemu_mac_strdup_printf(n->mac); 523 524 str_list = NULL; 525 for (i = 0; i < n->mac_table.first_multi; i++) { 526 QAPI_LIST_PREPEND(str_list, 527 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 528 } 529 info->unicast_table = str_list; 530 531 str_list = NULL; 532 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 533 QAPI_LIST_PREPEND(str_list, 534 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 535 } 536 info->multicast_table = str_list; 537 info->vlan_table = get_vlan_table(n); 538 539 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) { 540 info->vlan = RX_STATE_ALL; 541 } else if (!info->vlan_table) { 542 info->vlan = RX_STATE_NONE; 543 } else { 544 info->vlan = RX_STATE_NORMAL; 545 } 546 547 /* enable event notification after query */ 548 nc->rxfilter_notify_enabled = 1; 549 550 return info; 551 } 552 553 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index) 554 { 555 VirtIONet *n = VIRTIO_NET(vdev); 556 NetClientState *nc; 557 558 /* validate queue_index and skip for cvq */ 559 if (queue_index >= n->max_queue_pairs * 2) { 560 return; 561 } 562 563 nc = qemu_get_subqueue(n->nic, vq2q(queue_index)); 564 565 if (!nc->peer) { 566 return; 567 } 568 569 if (get_vhost_net(nc->peer) && 570 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) { 571 vhost_net_virtqueue_reset(vdev, nc, queue_index); 572 } 573 574 flush_or_purge_queued_packets(nc); 575 } 576 577 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index) 578 { 579 VirtIONet *n = VIRTIO_NET(vdev); 580 NetClientState *nc; 581 int r; 582 583 /* validate queue_index and skip for cvq */ 584 if (queue_index >= n->max_queue_pairs * 2) { 585 return; 586 } 587 588 nc = qemu_get_subqueue(n->nic, vq2q(queue_index)); 589 590 if (!nc->peer || !vdev->vhost_started) { 591 return; 592 } 593 594 if (get_vhost_net(nc->peer) && 595 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) { 596 r = vhost_net_virtqueue_restart(vdev, nc, queue_index); 597 if (r < 0) { 598 error_report("unable to restart vhost net virtqueue: %d, " 599 "when resetting the queue", queue_index); 600 } 601 } 602 } 603 604 static void peer_test_vnet_hdr(VirtIONet *n) 605 { 606 NetClientState *nc = qemu_get_queue(n->nic); 607 if (!nc->peer) { 608 return; 609 } 610 611 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer); 612 } 613 614 static int peer_has_vnet_hdr(VirtIONet *n) 615 { 616 return n->has_vnet_hdr; 617 } 618 619 static int peer_has_ufo(VirtIONet *n) 620 { 621 if (!peer_has_vnet_hdr(n)) 622 return 0; 623 624 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer); 625 626 return n->has_ufo; 627 } 628 629 static int peer_has_uso(VirtIONet *n) 630 { 631 if (!peer_has_vnet_hdr(n)) { 632 return 0; 633 } 634 635 return qemu_has_uso(qemu_get_queue(n->nic)->peer); 636 } 637 638 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, 639 int version_1, int hash_report) 640 { 641 int i; 642 NetClientState *nc; 643 644 n->mergeable_rx_bufs = mergeable_rx_bufs; 645 646 if (version_1) { 647 n->guest_hdr_len = hash_report ? 648 sizeof(struct virtio_net_hdr_v1_hash) : 649 sizeof(struct virtio_net_hdr_mrg_rxbuf); 650 n->rss_data.populate_hash = !!hash_report; 651 } else { 652 n->guest_hdr_len = n->mergeable_rx_bufs ? 653 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 654 sizeof(struct virtio_net_hdr); 655 n->rss_data.populate_hash = false; 656 } 657 658 for (i = 0; i < n->max_queue_pairs; i++) { 659 nc = qemu_get_subqueue(n->nic, i); 660 661 if (peer_has_vnet_hdr(n) && 662 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) { 663 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len); 664 n->host_hdr_len = n->guest_hdr_len; 665 } 666 } 667 } 668 669 static int virtio_net_max_tx_queue_size(VirtIONet *n) 670 { 671 NetClientState *peer = n->nic_conf.peers.ncs[0]; 672 673 /* 674 * Backends other than vhost-user or vhost-vdpa don't support max queue 675 * size. 676 */ 677 if (!peer) { 678 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 679 } 680 681 switch(peer->info->type) { 682 case NET_CLIENT_DRIVER_VHOST_USER: 683 case NET_CLIENT_DRIVER_VHOST_VDPA: 684 return VIRTQUEUE_MAX_SIZE; 685 default: 686 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 687 }; 688 } 689 690 static int peer_attach(VirtIONet *n, int index) 691 { 692 NetClientState *nc = qemu_get_subqueue(n->nic, index); 693 694 if (!nc->peer) { 695 return 0; 696 } 697 698 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 699 vhost_set_vring_enable(nc->peer, 1); 700 } 701 702 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 703 return 0; 704 } 705 706 if (n->max_queue_pairs == 1) { 707 return 0; 708 } 709 710 return tap_enable(nc->peer); 711 } 712 713 static int peer_detach(VirtIONet *n, int index) 714 { 715 NetClientState *nc = qemu_get_subqueue(n->nic, index); 716 717 if (!nc->peer) { 718 return 0; 719 } 720 721 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 722 vhost_set_vring_enable(nc->peer, 0); 723 } 724 725 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 726 return 0; 727 } 728 729 return tap_disable(nc->peer); 730 } 731 732 static void virtio_net_set_queue_pairs(VirtIONet *n) 733 { 734 int i; 735 int r; 736 737 if (n->nic->peer_deleted) { 738 return; 739 } 740 741 for (i = 0; i < n->max_queue_pairs; i++) { 742 if (i < n->curr_queue_pairs) { 743 r = peer_attach(n, i); 744 assert(!r); 745 } else { 746 r = peer_detach(n, i); 747 assert(!r); 748 } 749 } 750 } 751 752 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue); 753 754 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, 755 Error **errp) 756 { 757 VirtIONet *n = VIRTIO_NET(vdev); 758 NetClientState *nc = qemu_get_queue(n->nic); 759 760 /* Firstly sync all virtio-net possible supported features */ 761 features |= n->host_features; 762 763 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 764 765 if (!peer_has_vnet_hdr(n)) { 766 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); 767 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); 768 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); 769 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); 770 771 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); 772 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); 773 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); 774 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); 775 776 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); 777 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); 778 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); 779 780 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); 781 } 782 783 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { 784 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); 785 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); 786 } 787 788 if (!peer_has_uso(n)) { 789 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); 790 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); 791 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); 792 } 793 794 if (!get_vhost_net(nc->peer)) { 795 return features; 796 } 797 798 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 799 virtio_clear_feature(&features, VIRTIO_NET_F_RSS); 800 } 801 features = vhost_net_get_features(get_vhost_net(nc->peer), features); 802 vdev->backend_features = features; 803 804 if (n->mtu_bypass_backend && 805 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { 806 features |= (1ULL << VIRTIO_NET_F_MTU); 807 } 808 809 /* 810 * Since GUEST_ANNOUNCE is emulated the feature bit could be set without 811 * enabled. This happens in the vDPA case. 812 * 813 * Make sure the feature set is not incoherent, as the driver could refuse 814 * to start. 815 * 816 * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes, 817 * helping guest to notify the new location with vDPA devices that does not 818 * support it. 819 */ 820 if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) { 821 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE); 822 } 823 824 return features; 825 } 826 827 static uint64_t virtio_net_bad_features(VirtIODevice *vdev) 828 { 829 uint64_t features = 0; 830 831 /* Linux kernel 2.6.25. It understood MAC (as everyone must), 832 * but also these: */ 833 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 834 virtio_add_feature(&features, VIRTIO_NET_F_CSUM); 835 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4); 836 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6); 837 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN); 838 839 return features; 840 } 841 842 static void virtio_net_apply_guest_offloads(VirtIONet *n) 843 { 844 qemu_set_offload(qemu_get_queue(n->nic)->peer, 845 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)), 846 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)), 847 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)), 848 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)), 849 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)), 850 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)), 851 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6))); 852 } 853 854 static uint64_t virtio_net_guest_offloads_by_features(uint64_t features) 855 { 856 static const uint64_t guest_offloads_mask = 857 (1ULL << VIRTIO_NET_F_GUEST_CSUM) | 858 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | 859 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | 860 (1ULL << VIRTIO_NET_F_GUEST_ECN) | 861 (1ULL << VIRTIO_NET_F_GUEST_UFO) | 862 (1ULL << VIRTIO_NET_F_GUEST_USO4) | 863 (1ULL << VIRTIO_NET_F_GUEST_USO6); 864 865 return guest_offloads_mask & features; 866 } 867 868 uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n) 869 { 870 VirtIODevice *vdev = VIRTIO_DEVICE(n); 871 return virtio_net_guest_offloads_by_features(vdev->guest_features); 872 } 873 874 typedef struct { 875 VirtIONet *n; 876 DeviceState *dev; 877 } FailoverDevice; 878 879 /** 880 * Set the failover primary device 881 * 882 * @opaque: FailoverId to setup 883 * @opts: opts for device we are handling 884 * @errp: returns an error if this function fails 885 */ 886 static int failover_set_primary(DeviceState *dev, void *opaque) 887 { 888 FailoverDevice *fdev = opaque; 889 PCIDevice *pci_dev = (PCIDevice *) 890 object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE); 891 892 if (!pci_dev) { 893 return 0; 894 } 895 896 if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) { 897 fdev->dev = dev; 898 return 1; 899 } 900 901 return 0; 902 } 903 904 /** 905 * Find the primary device for this failover virtio-net 906 * 907 * @n: VirtIONet device 908 * @errp: returns an error if this function fails 909 */ 910 static DeviceState *failover_find_primary_device(VirtIONet *n) 911 { 912 FailoverDevice fdev = { 913 .n = n, 914 }; 915 916 qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL, 917 NULL, NULL, &fdev); 918 return fdev.dev; 919 } 920 921 static void failover_add_primary(VirtIONet *n, Error **errp) 922 { 923 Error *err = NULL; 924 DeviceState *dev = failover_find_primary_device(n); 925 926 if (dev) { 927 return; 928 } 929 930 if (!n->primary_opts) { 931 error_setg(errp, "Primary device not found"); 932 error_append_hint(errp, "Virtio-net failover will not work. Make " 933 "sure primary device has parameter" 934 " failover_pair_id=%s\n", n->netclient_name); 935 return; 936 } 937 938 dev = qdev_device_add_from_qdict(n->primary_opts, 939 n->primary_opts_from_json, 940 &err); 941 if (err) { 942 qobject_unref(n->primary_opts); 943 n->primary_opts = NULL; 944 } else { 945 object_unref(OBJECT(dev)); 946 } 947 error_propagate(errp, err); 948 } 949 950 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) 951 { 952 VirtIONet *n = VIRTIO_NET(vdev); 953 Error *err = NULL; 954 int i; 955 956 if (n->mtu_bypass_backend && 957 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) { 958 features &= ~(1ULL << VIRTIO_NET_F_MTU); 959 } 960 961 virtio_net_set_multiqueue(n, 962 virtio_has_feature(features, VIRTIO_NET_F_RSS) || 963 virtio_has_feature(features, VIRTIO_NET_F_MQ)); 964 965 virtio_net_set_mrg_rx_bufs(n, 966 virtio_has_feature(features, 967 VIRTIO_NET_F_MRG_RXBUF), 968 virtio_has_feature(features, 969 VIRTIO_F_VERSION_1), 970 virtio_has_feature(features, 971 VIRTIO_NET_F_HASH_REPORT)); 972 973 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 974 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4); 975 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 976 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6); 977 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS); 978 979 if (n->has_vnet_hdr) { 980 n->curr_guest_offloads = 981 virtio_net_guest_offloads_by_features(features); 982 virtio_net_apply_guest_offloads(n); 983 } 984 985 for (i = 0; i < n->max_queue_pairs; i++) { 986 NetClientState *nc = qemu_get_subqueue(n->nic, i); 987 988 if (!get_vhost_net(nc->peer)) { 989 continue; 990 } 991 vhost_net_ack_features(get_vhost_net(nc->peer), features); 992 993 /* 994 * keep acked_features in NetVhostUserState up-to-date so it 995 * can't miss any features configured by guest virtio driver. 996 */ 997 vhost_net_save_acked_features(nc->peer); 998 } 999 1000 if (!virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { 1001 memset(n->vlans, 0xff, MAX_VLAN >> 3); 1002 } 1003 1004 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) { 1005 qapi_event_send_failover_negotiated(n->netclient_name); 1006 qatomic_set(&n->failover_primary_hidden, false); 1007 failover_add_primary(n, &err); 1008 if (err) { 1009 if (!qtest_enabled()) { 1010 warn_report_err(err); 1011 } else { 1012 error_free(err); 1013 } 1014 } 1015 } 1016 } 1017 1018 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd, 1019 struct iovec *iov, unsigned int iov_cnt) 1020 { 1021 uint8_t on; 1022 size_t s; 1023 NetClientState *nc = qemu_get_queue(n->nic); 1024 1025 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on)); 1026 if (s != sizeof(on)) { 1027 return VIRTIO_NET_ERR; 1028 } 1029 1030 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) { 1031 n->promisc = on; 1032 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) { 1033 n->allmulti = on; 1034 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) { 1035 n->alluni = on; 1036 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) { 1037 n->nomulti = on; 1038 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) { 1039 n->nouni = on; 1040 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) { 1041 n->nobcast = on; 1042 } else { 1043 return VIRTIO_NET_ERR; 1044 } 1045 1046 rxfilter_notify(nc); 1047 1048 return VIRTIO_NET_OK; 1049 } 1050 1051 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd, 1052 struct iovec *iov, unsigned int iov_cnt) 1053 { 1054 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1055 uint64_t offloads; 1056 size_t s; 1057 1058 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 1059 return VIRTIO_NET_ERR; 1060 } 1061 1062 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads)); 1063 if (s != sizeof(offloads)) { 1064 return VIRTIO_NET_ERR; 1065 } 1066 1067 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) { 1068 uint64_t supported_offloads; 1069 1070 offloads = virtio_ldq_p(vdev, &offloads); 1071 1072 if (!n->has_vnet_hdr) { 1073 return VIRTIO_NET_ERR; 1074 } 1075 1076 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1077 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4); 1078 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1079 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6); 1080 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT); 1081 1082 supported_offloads = virtio_net_supported_guest_offloads(n); 1083 if (offloads & ~supported_offloads) { 1084 return VIRTIO_NET_ERR; 1085 } 1086 1087 n->curr_guest_offloads = offloads; 1088 virtio_net_apply_guest_offloads(n); 1089 1090 return VIRTIO_NET_OK; 1091 } else { 1092 return VIRTIO_NET_ERR; 1093 } 1094 } 1095 1096 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd, 1097 struct iovec *iov, unsigned int iov_cnt) 1098 { 1099 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1100 struct virtio_net_ctrl_mac mac_data; 1101 size_t s; 1102 NetClientState *nc = qemu_get_queue(n->nic); 1103 1104 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) { 1105 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) { 1106 return VIRTIO_NET_ERR; 1107 } 1108 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac)); 1109 assert(s == sizeof(n->mac)); 1110 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 1111 rxfilter_notify(nc); 1112 1113 return VIRTIO_NET_OK; 1114 } 1115 1116 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) { 1117 return VIRTIO_NET_ERR; 1118 } 1119 1120 int in_use = 0; 1121 int first_multi = 0; 1122 uint8_t uni_overflow = 0; 1123 uint8_t multi_overflow = 0; 1124 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 1125 1126 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1127 sizeof(mac_data.entries)); 1128 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1129 if (s != sizeof(mac_data.entries)) { 1130 goto error; 1131 } 1132 iov_discard_front(&iov, &iov_cnt, s); 1133 1134 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) { 1135 goto error; 1136 } 1137 1138 if (mac_data.entries <= MAC_TABLE_ENTRIES) { 1139 s = iov_to_buf(iov, iov_cnt, 0, macs, 1140 mac_data.entries * ETH_ALEN); 1141 if (s != mac_data.entries * ETH_ALEN) { 1142 goto error; 1143 } 1144 in_use += mac_data.entries; 1145 } else { 1146 uni_overflow = 1; 1147 } 1148 1149 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN); 1150 1151 first_multi = in_use; 1152 1153 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1154 sizeof(mac_data.entries)); 1155 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1156 if (s != sizeof(mac_data.entries)) { 1157 goto error; 1158 } 1159 1160 iov_discard_front(&iov, &iov_cnt, s); 1161 1162 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) { 1163 goto error; 1164 } 1165 1166 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) { 1167 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN], 1168 mac_data.entries * ETH_ALEN); 1169 if (s != mac_data.entries * ETH_ALEN) { 1170 goto error; 1171 } 1172 in_use += mac_data.entries; 1173 } else { 1174 multi_overflow = 1; 1175 } 1176 1177 n->mac_table.in_use = in_use; 1178 n->mac_table.first_multi = first_multi; 1179 n->mac_table.uni_overflow = uni_overflow; 1180 n->mac_table.multi_overflow = multi_overflow; 1181 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN); 1182 g_free(macs); 1183 rxfilter_notify(nc); 1184 1185 return VIRTIO_NET_OK; 1186 1187 error: 1188 g_free(macs); 1189 return VIRTIO_NET_ERR; 1190 } 1191 1192 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd, 1193 struct iovec *iov, unsigned int iov_cnt) 1194 { 1195 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1196 uint16_t vid; 1197 size_t s; 1198 NetClientState *nc = qemu_get_queue(n->nic); 1199 1200 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid)); 1201 vid = virtio_lduw_p(vdev, &vid); 1202 if (s != sizeof(vid)) { 1203 return VIRTIO_NET_ERR; 1204 } 1205 1206 if (vid >= MAX_VLAN) 1207 return VIRTIO_NET_ERR; 1208 1209 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD) 1210 n->vlans[vid >> 5] |= (1U << (vid & 0x1f)); 1211 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL) 1212 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f)); 1213 else 1214 return VIRTIO_NET_ERR; 1215 1216 rxfilter_notify(nc); 1217 1218 return VIRTIO_NET_OK; 1219 } 1220 1221 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, 1222 struct iovec *iov, unsigned int iov_cnt) 1223 { 1224 trace_virtio_net_handle_announce(n->announce_timer.round); 1225 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK && 1226 n->status & VIRTIO_NET_S_ANNOUNCE) { 1227 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 1228 if (n->announce_timer.round) { 1229 qemu_announce_timer_step(&n->announce_timer); 1230 } 1231 return VIRTIO_NET_OK; 1232 } else { 1233 return VIRTIO_NET_ERR; 1234 } 1235 } 1236 1237 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd) 1238 { 1239 NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0); 1240 if (nc == NULL || nc->info->set_steering_ebpf == NULL) { 1241 return false; 1242 } 1243 1244 return nc->info->set_steering_ebpf(nc, prog_fd); 1245 } 1246 1247 static void rss_data_to_rss_config(struct VirtioNetRssData *data, 1248 struct EBPFRSSConfig *config) 1249 { 1250 config->redirect = data->redirect; 1251 config->populate_hash = data->populate_hash; 1252 config->hash_types = data->hash_types; 1253 config->indirections_len = data->indirections_len; 1254 config->default_queue = data->default_queue; 1255 } 1256 1257 static bool virtio_net_attach_ebpf_rss(VirtIONet *n) 1258 { 1259 struct EBPFRSSConfig config = {}; 1260 1261 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 1262 return false; 1263 } 1264 1265 rss_data_to_rss_config(&n->rss_data, &config); 1266 1267 if (!ebpf_rss_set_all(&n->ebpf_rss, &config, 1268 n->rss_data.indirections_table, n->rss_data.key, 1269 NULL)) { 1270 return false; 1271 } 1272 1273 if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) { 1274 return false; 1275 } 1276 1277 return true; 1278 } 1279 1280 static void virtio_net_detach_ebpf_rss(VirtIONet *n) 1281 { 1282 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1283 } 1284 1285 static void virtio_net_commit_rss_config(VirtIONet *n) 1286 { 1287 if (n->rss_data.enabled) { 1288 n->rss_data.enabled_software_rss = n->rss_data.populate_hash; 1289 if (n->rss_data.populate_hash) { 1290 virtio_net_detach_ebpf_rss(n); 1291 } else if (!virtio_net_attach_ebpf_rss(n)) { 1292 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 1293 warn_report("Can't load eBPF RSS for vhost"); 1294 } else { 1295 warn_report("Can't load eBPF RSS - fallback to software RSS"); 1296 n->rss_data.enabled_software_rss = true; 1297 } 1298 } 1299 1300 trace_virtio_net_rss_enable(n->rss_data.hash_types, 1301 n->rss_data.indirections_len, 1302 sizeof(n->rss_data.key)); 1303 } else { 1304 virtio_net_detach_ebpf_rss(n); 1305 trace_virtio_net_rss_disable(); 1306 } 1307 } 1308 1309 static void virtio_net_disable_rss(VirtIONet *n) 1310 { 1311 if (!n->rss_data.enabled) { 1312 return; 1313 } 1314 1315 n->rss_data.enabled = false; 1316 virtio_net_commit_rss_config(n); 1317 } 1318 1319 static bool virtio_net_load_ebpf_fds(VirtIONet *n) 1320 { 1321 int fds[EBPF_RSS_MAX_FDS] = { [0 ... EBPF_RSS_MAX_FDS - 1] = -1}; 1322 int ret = true; 1323 int i = 0; 1324 1325 if (n->nr_ebpf_rss_fds != EBPF_RSS_MAX_FDS) { 1326 warn_report("Expected %d file descriptors but got %d", 1327 EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds); 1328 return false; 1329 } 1330 1331 for (i = 0; i < n->nr_ebpf_rss_fds; i++) { 1332 fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i], 1333 &error_warn); 1334 if (fds[i] < 0) { 1335 ret = false; 1336 goto exit; 1337 } 1338 } 1339 1340 ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3], NULL); 1341 1342 exit: 1343 if (!ret) { 1344 for (i = 0; i < n->nr_ebpf_rss_fds && fds[i] != -1; i++) { 1345 close(fds[i]); 1346 } 1347 } 1348 1349 return ret; 1350 } 1351 1352 static bool virtio_net_load_ebpf(VirtIONet *n) 1353 { 1354 bool ret = false; 1355 1356 if (virtio_net_attach_ebpf_to_backend(n->nic, -1)) { 1357 if (!(n->ebpf_rss_fds && virtio_net_load_ebpf_fds(n))) { 1358 ret = ebpf_rss_load(&n->ebpf_rss, NULL); 1359 } 1360 } 1361 1362 return ret; 1363 } 1364 1365 static void virtio_net_unload_ebpf(VirtIONet *n) 1366 { 1367 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1368 ebpf_rss_unload(&n->ebpf_rss); 1369 } 1370 1371 static uint16_t virtio_net_handle_rss(VirtIONet *n, 1372 struct iovec *iov, 1373 unsigned int iov_cnt, 1374 bool do_rss) 1375 { 1376 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1377 struct virtio_net_rss_config cfg; 1378 size_t s, offset = 0, size_get; 1379 uint16_t queue_pairs, i; 1380 struct { 1381 uint16_t us; 1382 uint8_t b; 1383 } QEMU_PACKED temp; 1384 const char *err_msg = ""; 1385 uint32_t err_value = 0; 1386 1387 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) { 1388 err_msg = "RSS is not negotiated"; 1389 goto error; 1390 } 1391 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) { 1392 err_msg = "Hash report is not negotiated"; 1393 goto error; 1394 } 1395 size_get = offsetof(struct virtio_net_rss_config, indirection_table); 1396 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get); 1397 if (s != size_get) { 1398 err_msg = "Short command buffer"; 1399 err_value = (uint32_t)s; 1400 goto error; 1401 } 1402 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types); 1403 n->rss_data.indirections_len = 1404 virtio_lduw_p(vdev, &cfg.indirection_table_mask); 1405 n->rss_data.indirections_len++; 1406 if (!do_rss) { 1407 n->rss_data.indirections_len = 1; 1408 } 1409 if (!is_power_of_2(n->rss_data.indirections_len)) { 1410 err_msg = "Invalid size of indirection table"; 1411 err_value = n->rss_data.indirections_len; 1412 goto error; 1413 } 1414 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) { 1415 err_msg = "Too large indirection table"; 1416 err_value = n->rss_data.indirections_len; 1417 goto error; 1418 } 1419 n->rss_data.default_queue = do_rss ? 1420 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0; 1421 if (n->rss_data.default_queue >= n->max_queue_pairs) { 1422 err_msg = "Invalid default queue"; 1423 err_value = n->rss_data.default_queue; 1424 goto error; 1425 } 1426 offset += size_get; 1427 size_get = sizeof(uint16_t) * n->rss_data.indirections_len; 1428 g_free(n->rss_data.indirections_table); 1429 n->rss_data.indirections_table = g_malloc(size_get); 1430 if (!n->rss_data.indirections_table) { 1431 err_msg = "Can't allocate indirections table"; 1432 err_value = n->rss_data.indirections_len; 1433 goto error; 1434 } 1435 s = iov_to_buf(iov, iov_cnt, offset, 1436 n->rss_data.indirections_table, size_get); 1437 if (s != size_get) { 1438 err_msg = "Short indirection table buffer"; 1439 err_value = (uint32_t)s; 1440 goto error; 1441 } 1442 for (i = 0; i < n->rss_data.indirections_len; ++i) { 1443 uint16_t val = n->rss_data.indirections_table[i]; 1444 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val); 1445 } 1446 offset += size_get; 1447 size_get = sizeof(temp); 1448 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get); 1449 if (s != size_get) { 1450 err_msg = "Can't get queue_pairs"; 1451 err_value = (uint32_t)s; 1452 goto error; 1453 } 1454 queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs; 1455 if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) { 1456 err_msg = "Invalid number of queue_pairs"; 1457 err_value = queue_pairs; 1458 goto error; 1459 } 1460 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) { 1461 err_msg = "Invalid key size"; 1462 err_value = temp.b; 1463 goto error; 1464 } 1465 if (!temp.b && n->rss_data.hash_types) { 1466 err_msg = "No key provided"; 1467 err_value = 0; 1468 goto error; 1469 } 1470 if (!temp.b && !n->rss_data.hash_types) { 1471 virtio_net_disable_rss(n); 1472 return queue_pairs; 1473 } 1474 offset += size_get; 1475 size_get = temp.b; 1476 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get); 1477 if (s != size_get) { 1478 err_msg = "Can get key buffer"; 1479 err_value = (uint32_t)s; 1480 goto error; 1481 } 1482 n->rss_data.enabled = true; 1483 virtio_net_commit_rss_config(n); 1484 return queue_pairs; 1485 error: 1486 trace_virtio_net_rss_error(err_msg, err_value); 1487 virtio_net_disable_rss(n); 1488 return 0; 1489 } 1490 1491 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, 1492 struct iovec *iov, unsigned int iov_cnt) 1493 { 1494 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1495 uint16_t queue_pairs; 1496 NetClientState *nc = qemu_get_queue(n->nic); 1497 1498 virtio_net_disable_rss(n); 1499 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { 1500 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false); 1501 return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR; 1502 } 1503 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) { 1504 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true); 1505 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { 1506 struct virtio_net_ctrl_mq mq; 1507 size_t s; 1508 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) { 1509 return VIRTIO_NET_ERR; 1510 } 1511 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq)); 1512 if (s != sizeof(mq)) { 1513 return VIRTIO_NET_ERR; 1514 } 1515 queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs); 1516 1517 } else { 1518 return VIRTIO_NET_ERR; 1519 } 1520 1521 if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 1522 queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 1523 queue_pairs > n->max_queue_pairs || 1524 !n->multiqueue) { 1525 return VIRTIO_NET_ERR; 1526 } 1527 1528 n->curr_queue_pairs = queue_pairs; 1529 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 1530 /* 1531 * Avoid updating the backend for a vdpa device: We're only interested 1532 * in updating the device model queues. 1533 */ 1534 return VIRTIO_NET_OK; 1535 } 1536 /* stop the backend before changing the number of queue_pairs to avoid handling a 1537 * disabled queue */ 1538 virtio_net_set_status(vdev, vdev->status); 1539 virtio_net_set_queue_pairs(n); 1540 1541 return VIRTIO_NET_OK; 1542 } 1543 1544 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, 1545 const struct iovec *in_sg, unsigned in_num, 1546 const struct iovec *out_sg, 1547 unsigned out_num) 1548 { 1549 VirtIONet *n = VIRTIO_NET(vdev); 1550 struct virtio_net_ctrl_hdr ctrl; 1551 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1552 size_t s; 1553 struct iovec *iov, *iov2; 1554 1555 if (iov_size(in_sg, in_num) < sizeof(status) || 1556 iov_size(out_sg, out_num) < sizeof(ctrl)) { 1557 virtio_error(vdev, "virtio-net ctrl missing headers"); 1558 return 0; 1559 } 1560 1561 iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num); 1562 s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl)); 1563 iov_discard_front(&iov, &out_num, sizeof(ctrl)); 1564 if (s != sizeof(ctrl)) { 1565 status = VIRTIO_NET_ERR; 1566 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { 1567 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num); 1568 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { 1569 status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num); 1570 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { 1571 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num); 1572 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { 1573 status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num); 1574 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { 1575 status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num); 1576 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { 1577 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num); 1578 } 1579 1580 s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status)); 1581 assert(s == sizeof(status)); 1582 1583 g_free(iov2); 1584 return sizeof(status); 1585 } 1586 1587 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) 1588 { 1589 VirtQueueElement *elem; 1590 1591 for (;;) { 1592 size_t written; 1593 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 1594 if (!elem) { 1595 break; 1596 } 1597 1598 written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num, 1599 elem->out_sg, elem->out_num); 1600 if (written > 0) { 1601 virtqueue_push(vq, elem, written); 1602 virtio_notify(vdev, vq); 1603 g_free(elem); 1604 } else { 1605 virtqueue_detach_element(vq, elem, 0); 1606 g_free(elem); 1607 break; 1608 } 1609 } 1610 } 1611 1612 /* RX */ 1613 1614 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) 1615 { 1616 VirtIONet *n = VIRTIO_NET(vdev); 1617 int queue_index = vq2q(virtio_get_queue_index(vq)); 1618 1619 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index)); 1620 } 1621 1622 static bool virtio_net_can_receive(NetClientState *nc) 1623 { 1624 VirtIONet *n = qemu_get_nic_opaque(nc); 1625 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1626 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1627 1628 if (!vdev->vm_running) { 1629 return false; 1630 } 1631 1632 if (nc->queue_index >= n->curr_queue_pairs) { 1633 return false; 1634 } 1635 1636 if (!virtio_queue_ready(q->rx_vq) || 1637 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 1638 return false; 1639 } 1640 1641 return true; 1642 } 1643 1644 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) 1645 { 1646 int opaque; 1647 unsigned int in_bytes; 1648 VirtIONet *n = q->n; 1649 1650 while (virtio_queue_empty(q->rx_vq) || n->mergeable_rx_bufs) { 1651 opaque = virtqueue_get_avail_bytes(q->rx_vq, &in_bytes, NULL, 1652 bufsize, 0); 1653 /* Buffer is enough, disable notifiaction */ 1654 if (bufsize <= in_bytes) { 1655 break; 1656 } 1657 1658 if (virtio_queue_enable_notification_and_check(q->rx_vq, opaque)) { 1659 /* Guest has added some buffers, try again */ 1660 continue; 1661 } else { 1662 return 0; 1663 } 1664 } 1665 1666 virtio_queue_set_notification(q->rx_vq, 0); 1667 1668 return 1; 1669 } 1670 1671 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr) 1672 { 1673 virtio_tswap16s(vdev, &hdr->hdr_len); 1674 virtio_tswap16s(vdev, &hdr->gso_size); 1675 virtio_tswap16s(vdev, &hdr->csum_start); 1676 virtio_tswap16s(vdev, &hdr->csum_offset); 1677 } 1678 1679 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so 1680 * it never finds out that the packets don't have valid checksums. This 1681 * causes dhclient to get upset. Fedora's carried a patch for ages to 1682 * fix this with Xen but it hasn't appeared in an upstream release of 1683 * dhclient yet. 1684 * 1685 * To avoid breaking existing guests, we catch udp packets and add 1686 * checksums. This is terrible but it's better than hacking the guest 1687 * kernels. 1688 * 1689 * N.B. if we introduce a zero-copy API, this operation is no longer free so 1690 * we should provide a mechanism to disable it to avoid polluting the host 1691 * cache. 1692 */ 1693 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr, 1694 uint8_t *buf, size_t size) 1695 { 1696 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */ 1697 (size > 27 && size < 1500) && /* normal sized MTU */ 1698 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */ 1699 (buf[23] == 17) && /* ip.protocol == UDP */ 1700 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */ 1701 net_checksum_calculate(buf, size, CSUM_UDP); 1702 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; 1703 } 1704 } 1705 1706 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt, 1707 const void *buf, size_t size) 1708 { 1709 if (n->has_vnet_hdr) { 1710 /* FIXME this cast is evil */ 1711 void *wbuf = (void *)buf; 1712 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len, 1713 size - n->host_hdr_len); 1714 1715 if (n->needs_vnet_hdr_swap) { 1716 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf); 1717 } 1718 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr)); 1719 } else { 1720 struct virtio_net_hdr hdr = { 1721 .flags = 0, 1722 .gso_type = VIRTIO_NET_HDR_GSO_NONE 1723 }; 1724 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr); 1725 } 1726 } 1727 1728 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) 1729 { 1730 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; 1731 static const uint8_t vlan[] = {0x81, 0x00}; 1732 uint8_t *ptr = (uint8_t *)buf; 1733 int i; 1734 1735 if (n->promisc) 1736 return 1; 1737 1738 ptr += n->host_hdr_len; 1739 1740 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) { 1741 int vid = lduw_be_p(ptr + 14) & 0xfff; 1742 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f)))) 1743 return 0; 1744 } 1745 1746 if (ptr[0] & 1) { // multicast 1747 if (!memcmp(ptr, bcast, sizeof(bcast))) { 1748 return !n->nobcast; 1749 } else if (n->nomulti) { 1750 return 0; 1751 } else if (n->allmulti || n->mac_table.multi_overflow) { 1752 return 1; 1753 } 1754 1755 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 1756 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1757 return 1; 1758 } 1759 } 1760 } else { // unicast 1761 if (n->nouni) { 1762 return 0; 1763 } else if (n->alluni || n->mac_table.uni_overflow) { 1764 return 1; 1765 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) { 1766 return 1; 1767 } 1768 1769 for (i = 0; i < n->mac_table.first_multi; i++) { 1770 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1771 return 1; 1772 } 1773 } 1774 } 1775 1776 return 0; 1777 } 1778 1779 static uint8_t virtio_net_get_hash_type(bool hasip4, 1780 bool hasip6, 1781 EthL4HdrProto l4hdr_proto, 1782 uint32_t types) 1783 { 1784 if (hasip4) { 1785 switch (l4hdr_proto) { 1786 case ETH_L4_HDR_PROTO_TCP: 1787 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { 1788 return NetPktRssIpV4Tcp; 1789 } 1790 break; 1791 1792 case ETH_L4_HDR_PROTO_UDP: 1793 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { 1794 return NetPktRssIpV4Udp; 1795 } 1796 break; 1797 1798 default: 1799 break; 1800 } 1801 1802 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 1803 return NetPktRssIpV4; 1804 } 1805 } else if (hasip6) { 1806 switch (l4hdr_proto) { 1807 case ETH_L4_HDR_PROTO_TCP: 1808 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) { 1809 return NetPktRssIpV6TcpEx; 1810 } 1811 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { 1812 return NetPktRssIpV6Tcp; 1813 } 1814 break; 1815 1816 case ETH_L4_HDR_PROTO_UDP: 1817 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) { 1818 return NetPktRssIpV6UdpEx; 1819 } 1820 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { 1821 return NetPktRssIpV6Udp; 1822 } 1823 break; 1824 1825 default: 1826 break; 1827 } 1828 1829 if (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) { 1830 return NetPktRssIpV6Ex; 1831 } 1832 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 1833 return NetPktRssIpV6; 1834 } 1835 } 1836 return 0xff; 1837 } 1838 1839 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf, 1840 size_t size, 1841 struct virtio_net_hdr_v1_hash *hdr) 1842 { 1843 VirtIONet *n = qemu_get_nic_opaque(nc); 1844 unsigned int index = nc->queue_index, new_index = index; 1845 struct NetRxPkt *pkt = n->rx_pkt; 1846 uint8_t net_hash_type; 1847 uint32_t hash; 1848 bool hasip4, hasip6; 1849 EthL4HdrProto l4hdr_proto; 1850 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = { 1851 VIRTIO_NET_HASH_REPORT_IPv4, 1852 VIRTIO_NET_HASH_REPORT_TCPv4, 1853 VIRTIO_NET_HASH_REPORT_TCPv6, 1854 VIRTIO_NET_HASH_REPORT_IPv6, 1855 VIRTIO_NET_HASH_REPORT_IPv6_EX, 1856 VIRTIO_NET_HASH_REPORT_TCPv6_EX, 1857 VIRTIO_NET_HASH_REPORT_UDPv4, 1858 VIRTIO_NET_HASH_REPORT_UDPv6, 1859 VIRTIO_NET_HASH_REPORT_UDPv6_EX 1860 }; 1861 struct iovec iov = { 1862 .iov_base = (void *)buf, 1863 .iov_len = size 1864 }; 1865 1866 net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len); 1867 net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); 1868 net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto, 1869 n->rss_data.hash_types); 1870 if (net_hash_type > NetPktRssIpV6UdpEx) { 1871 if (n->rss_data.populate_hash) { 1872 hdr->hash_value = VIRTIO_NET_HASH_REPORT_NONE; 1873 hdr->hash_report = 0; 1874 } 1875 return n->rss_data.redirect ? n->rss_data.default_queue : -1; 1876 } 1877 1878 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key); 1879 1880 if (n->rss_data.populate_hash) { 1881 hdr->hash_value = hash; 1882 hdr->hash_report = reports[net_hash_type]; 1883 } 1884 1885 if (n->rss_data.redirect) { 1886 new_index = hash & (n->rss_data.indirections_len - 1); 1887 new_index = n->rss_data.indirections_table[new_index]; 1888 } 1889 1890 return (index == new_index) ? -1 : new_index; 1891 } 1892 1893 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, 1894 size_t size, bool no_rss) 1895 { 1896 VirtIONet *n = qemu_get_nic_opaque(nc); 1897 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1898 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1899 VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE]; 1900 size_t lens[VIRTQUEUE_MAX_SIZE]; 1901 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; 1902 struct virtio_net_hdr_v1_hash extra_hdr; 1903 unsigned mhdr_cnt = 0; 1904 size_t offset, i, guest_offset, j; 1905 ssize_t err; 1906 1907 if (!virtio_net_can_receive(nc)) { 1908 return -1; 1909 } 1910 1911 if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) { 1912 int index = virtio_net_process_rss(nc, buf, size, &extra_hdr); 1913 if (index >= 0) { 1914 NetClientState *nc2 = 1915 qemu_get_subqueue(n->nic, index % n->curr_queue_pairs); 1916 return virtio_net_receive_rcu(nc2, buf, size, true); 1917 } 1918 } 1919 1920 /* hdr_len refers to the header we supply to the guest */ 1921 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) { 1922 return 0; 1923 } 1924 1925 if (!receive_filter(n, buf, size)) 1926 return size; 1927 1928 offset = i = 0; 1929 1930 while (offset < size) { 1931 VirtQueueElement *elem; 1932 int len, total; 1933 const struct iovec *sg; 1934 1935 total = 0; 1936 1937 if (i == VIRTQUEUE_MAX_SIZE) { 1938 virtio_error(vdev, "virtio-net unexpected long buffer chain"); 1939 err = size; 1940 goto err; 1941 } 1942 1943 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement)); 1944 if (!elem) { 1945 if (i) { 1946 virtio_error(vdev, "virtio-net unexpected empty queue: " 1947 "i %zd mergeable %d offset %zd, size %zd, " 1948 "guest hdr len %zd, host hdr len %zd " 1949 "guest features 0x%" PRIx64, 1950 i, n->mergeable_rx_bufs, offset, size, 1951 n->guest_hdr_len, n->host_hdr_len, 1952 vdev->guest_features); 1953 } 1954 err = -1; 1955 goto err; 1956 } 1957 1958 if (elem->in_num < 1) { 1959 virtio_error(vdev, 1960 "virtio-net receive queue contains no in buffers"); 1961 virtqueue_detach_element(q->rx_vq, elem, 0); 1962 g_free(elem); 1963 err = -1; 1964 goto err; 1965 } 1966 1967 sg = elem->in_sg; 1968 if (i == 0) { 1969 assert(offset == 0); 1970 if (n->mergeable_rx_bufs) { 1971 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg), 1972 sg, elem->in_num, 1973 offsetof(typeof(extra_hdr), hdr.num_buffers), 1974 sizeof(extra_hdr.hdr.num_buffers)); 1975 } 1976 1977 receive_header(n, sg, elem->in_num, buf, size); 1978 if (n->rss_data.populate_hash) { 1979 offset = offsetof(typeof(extra_hdr), hash_value); 1980 iov_from_buf(sg, elem->in_num, offset, 1981 (char *)&extra_hdr + offset, 1982 sizeof(extra_hdr.hash_value) + 1983 sizeof(extra_hdr.hash_report)); 1984 } 1985 offset = n->host_hdr_len; 1986 total += n->guest_hdr_len; 1987 guest_offset = n->guest_hdr_len; 1988 } else { 1989 guest_offset = 0; 1990 } 1991 1992 /* copy in packet. ugh */ 1993 len = iov_from_buf(sg, elem->in_num, guest_offset, 1994 buf + offset, size - offset); 1995 total += len; 1996 offset += len; 1997 /* If buffers can't be merged, at this point we 1998 * must have consumed the complete packet. 1999 * Otherwise, drop it. */ 2000 if (!n->mergeable_rx_bufs && offset < size) { 2001 virtqueue_unpop(q->rx_vq, elem, total); 2002 g_free(elem); 2003 err = size; 2004 goto err; 2005 } 2006 2007 elems[i] = elem; 2008 lens[i] = total; 2009 i++; 2010 } 2011 2012 if (mhdr_cnt) { 2013 virtio_stw_p(vdev, &extra_hdr.hdr.num_buffers, i); 2014 iov_from_buf(mhdr_sg, mhdr_cnt, 2015 0, 2016 &extra_hdr.hdr.num_buffers, 2017 sizeof extra_hdr.hdr.num_buffers); 2018 } 2019 2020 for (j = 0; j < i; j++) { 2021 /* signal other side */ 2022 virtqueue_fill(q->rx_vq, elems[j], lens[j], j); 2023 g_free(elems[j]); 2024 } 2025 2026 virtqueue_flush(q->rx_vq, i); 2027 virtio_notify(vdev, q->rx_vq); 2028 2029 return size; 2030 2031 err: 2032 for (j = 0; j < i; j++) { 2033 virtqueue_detach_element(q->rx_vq, elems[j], lens[j]); 2034 g_free(elems[j]); 2035 } 2036 2037 return err; 2038 } 2039 2040 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf, 2041 size_t size) 2042 { 2043 RCU_READ_LOCK_GUARD(); 2044 2045 return virtio_net_receive_rcu(nc, buf, size, false); 2046 } 2047 2048 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain, 2049 const uint8_t *buf, 2050 VirtioNetRscUnit *unit) 2051 { 2052 uint16_t ip_hdrlen; 2053 struct ip_header *ip; 2054 2055 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len 2056 + sizeof(struct eth_header)); 2057 unit->ip = (void *)ip; 2058 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2; 2059 unit->ip_plen = &ip->ip_len; 2060 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen); 2061 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 2062 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen; 2063 } 2064 2065 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain, 2066 const uint8_t *buf, 2067 VirtioNetRscUnit *unit) 2068 { 2069 struct ip6_header *ip6; 2070 2071 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len 2072 + sizeof(struct eth_header)); 2073 unit->ip = ip6; 2074 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 2075 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) 2076 + sizeof(struct ip6_header)); 2077 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 2078 2079 /* There is a difference between payload length in ipv4 and v6, 2080 ip header is excluded in ipv6 */ 2081 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen; 2082 } 2083 2084 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain, 2085 VirtioNetRscSeg *seg) 2086 { 2087 int ret; 2088 struct virtio_net_hdr_v1 *h; 2089 2090 h = (struct virtio_net_hdr_v1 *)seg->buf; 2091 h->flags = 0; 2092 h->gso_type = VIRTIO_NET_HDR_GSO_NONE; 2093 2094 if (seg->is_coalesced) { 2095 h->rsc.segments = seg->packets; 2096 h->rsc.dup_acks = seg->dup_ack; 2097 h->flags = VIRTIO_NET_HDR_F_RSC_INFO; 2098 if (chain->proto == ETH_P_IP) { 2099 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2100 } else { 2101 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2102 } 2103 } 2104 2105 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); 2106 QTAILQ_REMOVE(&chain->buffers, seg, next); 2107 g_free(seg->buf); 2108 g_free(seg); 2109 2110 return ret; 2111 } 2112 2113 static void virtio_net_rsc_purge(void *opq) 2114 { 2115 VirtioNetRscSeg *seg, *rn; 2116 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq; 2117 2118 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) { 2119 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2120 chain->stat.purge_failed++; 2121 continue; 2122 } 2123 } 2124 2125 chain->stat.timer++; 2126 if (!QTAILQ_EMPTY(&chain->buffers)) { 2127 timer_mod(chain->drain_timer, 2128 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout); 2129 } 2130 } 2131 2132 static void virtio_net_rsc_cleanup(VirtIONet *n) 2133 { 2134 VirtioNetRscChain *chain, *rn_chain; 2135 VirtioNetRscSeg *seg, *rn_seg; 2136 2137 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) { 2138 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) { 2139 QTAILQ_REMOVE(&chain->buffers, seg, next); 2140 g_free(seg->buf); 2141 g_free(seg); 2142 } 2143 2144 timer_free(chain->drain_timer); 2145 QTAILQ_REMOVE(&n->rsc_chains, chain, next); 2146 g_free(chain); 2147 } 2148 } 2149 2150 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain, 2151 NetClientState *nc, 2152 const uint8_t *buf, size_t size) 2153 { 2154 uint16_t hdr_len; 2155 VirtioNetRscSeg *seg; 2156 2157 hdr_len = chain->n->guest_hdr_len; 2158 seg = g_new(VirtioNetRscSeg, 1); 2159 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header) 2160 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD); 2161 memcpy(seg->buf, buf, size); 2162 seg->size = size; 2163 seg->packets = 1; 2164 seg->dup_ack = 0; 2165 seg->is_coalesced = 0; 2166 seg->nc = nc; 2167 2168 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); 2169 chain->stat.cache++; 2170 2171 switch (chain->proto) { 2172 case ETH_P_IP: 2173 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); 2174 break; 2175 case ETH_P_IPV6: 2176 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); 2177 break; 2178 default: 2179 g_assert_not_reached(); 2180 } 2181 } 2182 2183 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain, 2184 VirtioNetRscSeg *seg, 2185 const uint8_t *buf, 2186 struct tcp_header *n_tcp, 2187 struct tcp_header *o_tcp) 2188 { 2189 uint32_t nack, oack; 2190 uint16_t nwin, owin; 2191 2192 nack = htonl(n_tcp->th_ack); 2193 nwin = htons(n_tcp->th_win); 2194 oack = htonl(o_tcp->th_ack); 2195 owin = htons(o_tcp->th_win); 2196 2197 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) { 2198 chain->stat.ack_out_of_win++; 2199 return RSC_FINAL; 2200 } else if (nack == oack) { 2201 /* duplicated ack or window probe */ 2202 if (nwin == owin) { 2203 /* duplicated ack, add dup ack count due to whql test up to 1 */ 2204 chain->stat.dup_ack++; 2205 return RSC_FINAL; 2206 } else { 2207 /* Coalesce window update */ 2208 o_tcp->th_win = n_tcp->th_win; 2209 chain->stat.win_update++; 2210 return RSC_COALESCE; 2211 } 2212 } else { 2213 /* pure ack, go to 'C', finalize*/ 2214 chain->stat.pure_ack++; 2215 return RSC_FINAL; 2216 } 2217 } 2218 2219 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain, 2220 VirtioNetRscSeg *seg, 2221 const uint8_t *buf, 2222 VirtioNetRscUnit *n_unit) 2223 { 2224 void *data; 2225 uint16_t o_ip_len; 2226 uint32_t nseq, oseq; 2227 VirtioNetRscUnit *o_unit; 2228 2229 o_unit = &seg->unit; 2230 o_ip_len = htons(*o_unit->ip_plen); 2231 nseq = htonl(n_unit->tcp->th_seq); 2232 oseq = htonl(o_unit->tcp->th_seq); 2233 2234 /* out of order or retransmitted. */ 2235 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) { 2236 chain->stat.data_out_of_win++; 2237 return RSC_FINAL; 2238 } 2239 2240 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen; 2241 if (nseq == oseq) { 2242 if ((o_unit->payload == 0) && n_unit->payload) { 2243 /* From no payload to payload, normal case, not a dup ack or etc */ 2244 chain->stat.data_after_pure_ack++; 2245 goto coalesce; 2246 } else { 2247 return virtio_net_rsc_handle_ack(chain, seg, buf, 2248 n_unit->tcp, o_unit->tcp); 2249 } 2250 } else if ((nseq - oseq) != o_unit->payload) { 2251 /* Not a consistent packet, out of order */ 2252 chain->stat.data_out_of_order++; 2253 return RSC_FINAL; 2254 } else { 2255 coalesce: 2256 if ((o_ip_len + n_unit->payload) > chain->max_payload) { 2257 chain->stat.over_size++; 2258 return RSC_FINAL; 2259 } 2260 2261 /* Here comes the right data, the payload length in v4/v6 is different, 2262 so use the field value to update and record the new data len */ 2263 o_unit->payload += n_unit->payload; /* update new data len */ 2264 2265 /* update field in ip header */ 2266 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload); 2267 2268 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced 2269 for windows guest, while this may change the behavior for linux 2270 guest (only if it uses RSC feature). */ 2271 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags; 2272 2273 o_unit->tcp->th_ack = n_unit->tcp->th_ack; 2274 o_unit->tcp->th_win = n_unit->tcp->th_win; 2275 2276 memmove(seg->buf + seg->size, data, n_unit->payload); 2277 seg->size += n_unit->payload; 2278 seg->packets++; 2279 chain->stat.coalesced++; 2280 return RSC_COALESCE; 2281 } 2282 } 2283 2284 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain, 2285 VirtioNetRscSeg *seg, 2286 const uint8_t *buf, size_t size, 2287 VirtioNetRscUnit *unit) 2288 { 2289 struct ip_header *ip1, *ip2; 2290 2291 ip1 = (struct ip_header *)(unit->ip); 2292 ip2 = (struct ip_header *)(seg->unit.ip); 2293 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst) 2294 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2295 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2296 chain->stat.no_match++; 2297 return RSC_NO_MATCH; 2298 } 2299 2300 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2301 } 2302 2303 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain, 2304 VirtioNetRscSeg *seg, 2305 const uint8_t *buf, size_t size, 2306 VirtioNetRscUnit *unit) 2307 { 2308 struct ip6_header *ip1, *ip2; 2309 2310 ip1 = (struct ip6_header *)(unit->ip); 2311 ip2 = (struct ip6_header *)(seg->unit.ip); 2312 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address)) 2313 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address)) 2314 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2315 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2316 chain->stat.no_match++; 2317 return RSC_NO_MATCH; 2318 } 2319 2320 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2321 } 2322 2323 /* Packets with 'SYN' should bypass, other flag should be sent after drain 2324 * to prevent out of order */ 2325 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain, 2326 struct tcp_header *tcp) 2327 { 2328 uint16_t tcp_hdr; 2329 uint16_t tcp_flag; 2330 2331 tcp_flag = htons(tcp->th_offset_flags); 2332 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10; 2333 tcp_flag &= VIRTIO_NET_TCP_FLAG; 2334 if (tcp_flag & TH_SYN) { 2335 chain->stat.tcp_syn++; 2336 return RSC_BYPASS; 2337 } 2338 2339 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) { 2340 chain->stat.tcp_ctrl_drain++; 2341 return RSC_FINAL; 2342 } 2343 2344 if (tcp_hdr > sizeof(struct tcp_header)) { 2345 chain->stat.tcp_all_opt++; 2346 return RSC_FINAL; 2347 } 2348 2349 return RSC_CANDIDATE; 2350 } 2351 2352 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain, 2353 NetClientState *nc, 2354 const uint8_t *buf, size_t size, 2355 VirtioNetRscUnit *unit) 2356 { 2357 int ret; 2358 VirtioNetRscSeg *seg, *nseg; 2359 2360 if (QTAILQ_EMPTY(&chain->buffers)) { 2361 chain->stat.empty_cache++; 2362 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2363 timer_mod(chain->drain_timer, 2364 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout); 2365 return size; 2366 } 2367 2368 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2369 if (chain->proto == ETH_P_IP) { 2370 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit); 2371 } else { 2372 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit); 2373 } 2374 2375 if (ret == RSC_FINAL) { 2376 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2377 /* Send failed */ 2378 chain->stat.final_failed++; 2379 return 0; 2380 } 2381 2382 /* Send current packet */ 2383 return virtio_net_do_receive(nc, buf, size); 2384 } else if (ret == RSC_NO_MATCH) { 2385 continue; 2386 } else { 2387 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ 2388 seg->is_coalesced = 1; 2389 return size; 2390 } 2391 } 2392 2393 chain->stat.no_match_cache++; 2394 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2395 return size; 2396 } 2397 2398 /* Drain a connection data, this is to avoid out of order segments */ 2399 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain, 2400 NetClientState *nc, 2401 const uint8_t *buf, size_t size, 2402 uint16_t ip_start, uint16_t ip_size, 2403 uint16_t tcp_port) 2404 { 2405 VirtioNetRscSeg *seg, *nseg; 2406 uint32_t ppair1, ppair2; 2407 2408 ppair1 = *(uint32_t *)(buf + tcp_port); 2409 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2410 ppair2 = *(uint32_t *)(seg->buf + tcp_port); 2411 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size) 2412 || (ppair1 != ppair2)) { 2413 continue; 2414 } 2415 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2416 chain->stat.drain_failed++; 2417 } 2418 2419 break; 2420 } 2421 2422 return virtio_net_do_receive(nc, buf, size); 2423 } 2424 2425 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain, 2426 struct ip_header *ip, 2427 const uint8_t *buf, size_t size) 2428 { 2429 uint16_t ip_len; 2430 2431 /* Not an ipv4 packet */ 2432 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) { 2433 chain->stat.ip_option++; 2434 return RSC_BYPASS; 2435 } 2436 2437 /* Don't handle packets with ip option */ 2438 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) { 2439 chain->stat.ip_option++; 2440 return RSC_BYPASS; 2441 } 2442 2443 if (ip->ip_p != IPPROTO_TCP) { 2444 chain->stat.bypass_not_tcp++; 2445 return RSC_BYPASS; 2446 } 2447 2448 /* Don't handle packets with ip fragment */ 2449 if (!(htons(ip->ip_off) & IP_DF)) { 2450 chain->stat.ip_frag++; 2451 return RSC_BYPASS; 2452 } 2453 2454 /* Don't handle packets with ecn flag */ 2455 if (IPTOS_ECN(ip->ip_tos)) { 2456 chain->stat.ip_ecn++; 2457 return RSC_BYPASS; 2458 } 2459 2460 ip_len = htons(ip->ip_len); 2461 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header)) 2462 || ip_len > (size - chain->n->guest_hdr_len - 2463 sizeof(struct eth_header))) { 2464 chain->stat.ip_hacked++; 2465 return RSC_BYPASS; 2466 } 2467 2468 return RSC_CANDIDATE; 2469 } 2470 2471 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain, 2472 NetClientState *nc, 2473 const uint8_t *buf, size_t size) 2474 { 2475 int32_t ret; 2476 uint16_t hdr_len; 2477 VirtioNetRscUnit unit; 2478 2479 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2480 2481 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header) 2482 + sizeof(struct tcp_header))) { 2483 chain->stat.bypass_not_tcp++; 2484 return virtio_net_do_receive(nc, buf, size); 2485 } 2486 2487 virtio_net_rsc_extract_unit4(chain, buf, &unit); 2488 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size) 2489 != RSC_CANDIDATE) { 2490 return virtio_net_do_receive(nc, buf, size); 2491 } 2492 2493 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2494 if (ret == RSC_BYPASS) { 2495 return virtio_net_do_receive(nc, buf, size); 2496 } else if (ret == RSC_FINAL) { 2497 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2498 ((hdr_len + sizeof(struct eth_header)) + 12), 2499 VIRTIO_NET_IP4_ADDR_SIZE, 2500 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)); 2501 } 2502 2503 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2504 } 2505 2506 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain, 2507 struct ip6_header *ip6, 2508 const uint8_t *buf, size_t size) 2509 { 2510 uint16_t ip_len; 2511 2512 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4) 2513 != IP_HEADER_VERSION_6) { 2514 return RSC_BYPASS; 2515 } 2516 2517 /* Both option and protocol is checked in this */ 2518 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) { 2519 chain->stat.bypass_not_tcp++; 2520 return RSC_BYPASS; 2521 } 2522 2523 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 2524 if (ip_len < sizeof(struct tcp_header) || 2525 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header) 2526 - sizeof(struct ip6_header))) { 2527 chain->stat.ip_hacked++; 2528 return RSC_BYPASS; 2529 } 2530 2531 /* Don't handle packets with ecn flag */ 2532 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) { 2533 chain->stat.ip_ecn++; 2534 return RSC_BYPASS; 2535 } 2536 2537 return RSC_CANDIDATE; 2538 } 2539 2540 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc, 2541 const uint8_t *buf, size_t size) 2542 { 2543 int32_t ret; 2544 uint16_t hdr_len; 2545 VirtioNetRscChain *chain; 2546 VirtioNetRscUnit unit; 2547 2548 chain = opq; 2549 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2550 2551 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header) 2552 + sizeof(tcp_header))) { 2553 return virtio_net_do_receive(nc, buf, size); 2554 } 2555 2556 virtio_net_rsc_extract_unit6(chain, buf, &unit); 2557 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain, 2558 unit.ip, buf, size)) { 2559 return virtio_net_do_receive(nc, buf, size); 2560 } 2561 2562 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2563 if (ret == RSC_BYPASS) { 2564 return virtio_net_do_receive(nc, buf, size); 2565 } else if (ret == RSC_FINAL) { 2566 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2567 ((hdr_len + sizeof(struct eth_header)) + 8), 2568 VIRTIO_NET_IP6_ADDR_SIZE, 2569 hdr_len + sizeof(struct eth_header) 2570 + sizeof(struct ip6_header)); 2571 } 2572 2573 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2574 } 2575 2576 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n, 2577 NetClientState *nc, 2578 uint16_t proto) 2579 { 2580 VirtioNetRscChain *chain; 2581 2582 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) { 2583 return NULL; 2584 } 2585 2586 QTAILQ_FOREACH(chain, &n->rsc_chains, next) { 2587 if (chain->proto == proto) { 2588 return chain; 2589 } 2590 } 2591 2592 chain = g_malloc(sizeof(*chain)); 2593 chain->n = n; 2594 chain->proto = proto; 2595 if (proto == (uint16_t)ETH_P_IP) { 2596 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD; 2597 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2598 } else { 2599 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD; 2600 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2601 } 2602 chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 2603 virtio_net_rsc_purge, chain); 2604 memset(&chain->stat, 0, sizeof(chain->stat)); 2605 2606 QTAILQ_INIT(&chain->buffers); 2607 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); 2608 2609 return chain; 2610 } 2611 2612 static ssize_t virtio_net_rsc_receive(NetClientState *nc, 2613 const uint8_t *buf, 2614 size_t size) 2615 { 2616 uint16_t proto; 2617 VirtioNetRscChain *chain; 2618 struct eth_header *eth; 2619 VirtIONet *n; 2620 2621 n = qemu_get_nic_opaque(nc); 2622 if (size < (n->host_hdr_len + sizeof(struct eth_header))) { 2623 return virtio_net_do_receive(nc, buf, size); 2624 } 2625 2626 eth = (struct eth_header *)(buf + n->guest_hdr_len); 2627 proto = htons(eth->h_proto); 2628 2629 chain = virtio_net_rsc_lookup_chain(n, nc, proto); 2630 if (chain) { 2631 chain->stat.received++; 2632 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) { 2633 return virtio_net_rsc_receive4(chain, nc, buf, size); 2634 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) { 2635 return virtio_net_rsc_receive6(chain, nc, buf, size); 2636 } 2637 } 2638 return virtio_net_do_receive(nc, buf, size); 2639 } 2640 2641 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, 2642 size_t size) 2643 { 2644 VirtIONet *n = qemu_get_nic_opaque(nc); 2645 if ((n->rsc4_enabled || n->rsc6_enabled)) { 2646 return virtio_net_rsc_receive(nc, buf, size); 2647 } else { 2648 return virtio_net_do_receive(nc, buf, size); 2649 } 2650 } 2651 2652 static int32_t virtio_net_flush_tx(VirtIONetQueue *q); 2653 2654 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) 2655 { 2656 VirtIONet *n = qemu_get_nic_opaque(nc); 2657 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 2658 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2659 int ret; 2660 2661 virtqueue_push(q->tx_vq, q->async_tx.elem, 0); 2662 virtio_notify(vdev, q->tx_vq); 2663 2664 g_free(q->async_tx.elem); 2665 q->async_tx.elem = NULL; 2666 2667 virtio_queue_set_notification(q->tx_vq, 1); 2668 ret = virtio_net_flush_tx(q); 2669 if (ret >= n->tx_burst) { 2670 /* 2671 * the flush has been stopped by tx_burst 2672 * we will not receive notification for the 2673 * remainining part, so re-schedule 2674 */ 2675 virtio_queue_set_notification(q->tx_vq, 0); 2676 if (q->tx_bh) { 2677 replay_bh_schedule_event(q->tx_bh); 2678 } else { 2679 timer_mod(q->tx_timer, 2680 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2681 } 2682 q->tx_waiting = 1; 2683 } 2684 } 2685 2686 /* TX */ 2687 static int32_t virtio_net_flush_tx(VirtIONetQueue *q) 2688 { 2689 VirtIONet *n = q->n; 2690 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2691 VirtQueueElement *elem; 2692 int32_t num_packets = 0; 2693 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq)); 2694 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2695 return num_packets; 2696 } 2697 2698 if (q->async_tx.elem) { 2699 virtio_queue_set_notification(q->tx_vq, 0); 2700 return num_packets; 2701 } 2702 2703 for (;;) { 2704 ssize_t ret; 2705 unsigned int out_num; 2706 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg; 2707 struct virtio_net_hdr vhdr; 2708 2709 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement)); 2710 if (!elem) { 2711 break; 2712 } 2713 2714 out_num = elem->out_num; 2715 out_sg = elem->out_sg; 2716 if (out_num < 1) { 2717 virtio_error(vdev, "virtio-net header not in first element"); 2718 goto detach; 2719 } 2720 2721 if (n->needs_vnet_hdr_swap) { 2722 if (iov_to_buf(out_sg, out_num, 0, &vhdr, sizeof(vhdr)) < 2723 sizeof(vhdr)) { 2724 virtio_error(vdev, "virtio-net header incorrect"); 2725 goto detach; 2726 } 2727 virtio_net_hdr_swap(vdev, &vhdr); 2728 sg2[0].iov_base = &vhdr; 2729 sg2[0].iov_len = sizeof(vhdr); 2730 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, out_sg, out_num, 2731 sizeof(vhdr), -1); 2732 if (out_num == VIRTQUEUE_MAX_SIZE) { 2733 goto drop; 2734 } 2735 out_num += 1; 2736 out_sg = sg2; 2737 } 2738 /* 2739 * If host wants to see the guest header as is, we can 2740 * pass it on unchanged. Otherwise, copy just the parts 2741 * that host is interested in. 2742 */ 2743 assert(n->host_hdr_len <= n->guest_hdr_len); 2744 if (n->host_hdr_len != n->guest_hdr_len) { 2745 if (iov_size(out_sg, out_num) < n->guest_hdr_len) { 2746 virtio_error(vdev, "virtio-net header is invalid"); 2747 goto detach; 2748 } 2749 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg), 2750 out_sg, out_num, 2751 0, n->host_hdr_len); 2752 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num, 2753 out_sg, out_num, 2754 n->guest_hdr_len, -1); 2755 out_num = sg_num; 2756 out_sg = sg; 2757 2758 if (out_num < 1) { 2759 virtio_error(vdev, "virtio-net nothing to send"); 2760 goto detach; 2761 } 2762 } 2763 2764 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index), 2765 out_sg, out_num, virtio_net_tx_complete); 2766 if (ret == 0) { 2767 virtio_queue_set_notification(q->tx_vq, 0); 2768 q->async_tx.elem = elem; 2769 return -EBUSY; 2770 } 2771 2772 drop: 2773 virtqueue_push(q->tx_vq, elem, 0); 2774 virtio_notify(vdev, q->tx_vq); 2775 g_free(elem); 2776 2777 if (++num_packets >= n->tx_burst) { 2778 break; 2779 } 2780 } 2781 return num_packets; 2782 2783 detach: 2784 virtqueue_detach_element(q->tx_vq, elem, 0); 2785 g_free(elem); 2786 return -EINVAL; 2787 } 2788 2789 static void virtio_net_tx_timer(void *opaque); 2790 2791 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) 2792 { 2793 VirtIONet *n = VIRTIO_NET(vdev); 2794 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2795 2796 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2797 virtio_net_drop_tx_queue_data(vdev, vq); 2798 return; 2799 } 2800 2801 /* This happens when device was stopped but VCPU wasn't. */ 2802 if (!vdev->vm_running) { 2803 q->tx_waiting = 1; 2804 return; 2805 } 2806 2807 if (q->tx_waiting) { 2808 /* We already have queued packets, immediately flush */ 2809 timer_del(q->tx_timer); 2810 virtio_net_tx_timer(q); 2811 } else { 2812 /* re-arm timer to flush it (and more) on next tick */ 2813 timer_mod(q->tx_timer, 2814 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2815 q->tx_waiting = 1; 2816 virtio_queue_set_notification(vq, 0); 2817 } 2818 } 2819 2820 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) 2821 { 2822 VirtIONet *n = VIRTIO_NET(vdev); 2823 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2824 2825 if (unlikely(n->vhost_started)) { 2826 return; 2827 } 2828 2829 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2830 virtio_net_drop_tx_queue_data(vdev, vq); 2831 return; 2832 } 2833 2834 if (unlikely(q->tx_waiting)) { 2835 return; 2836 } 2837 q->tx_waiting = 1; 2838 /* This happens when device was stopped but VCPU wasn't. */ 2839 if (!vdev->vm_running) { 2840 return; 2841 } 2842 virtio_queue_set_notification(vq, 0); 2843 replay_bh_schedule_event(q->tx_bh); 2844 } 2845 2846 static void virtio_net_tx_timer(void *opaque) 2847 { 2848 VirtIONetQueue *q = opaque; 2849 VirtIONet *n = q->n; 2850 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2851 int ret; 2852 2853 /* This happens when device was stopped but BH wasn't. */ 2854 if (!vdev->vm_running) { 2855 /* Make sure tx waiting is set, so we'll run when restarted. */ 2856 assert(q->tx_waiting); 2857 return; 2858 } 2859 2860 q->tx_waiting = 0; 2861 2862 /* Just in case the driver is not ready on more */ 2863 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2864 return; 2865 } 2866 2867 ret = virtio_net_flush_tx(q); 2868 if (ret == -EBUSY || ret == -EINVAL) { 2869 return; 2870 } 2871 /* 2872 * If we flush a full burst of packets, assume there are 2873 * more coming and immediately rearm 2874 */ 2875 if (ret >= n->tx_burst) { 2876 q->tx_waiting = 1; 2877 timer_mod(q->tx_timer, 2878 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2879 return; 2880 } 2881 /* 2882 * If less than a full burst, re-enable notification and flush 2883 * anything that may have come in while we weren't looking. If 2884 * we find something, assume the guest is still active and rearm 2885 */ 2886 virtio_queue_set_notification(q->tx_vq, 1); 2887 ret = virtio_net_flush_tx(q); 2888 if (ret > 0) { 2889 virtio_queue_set_notification(q->tx_vq, 0); 2890 q->tx_waiting = 1; 2891 timer_mod(q->tx_timer, 2892 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2893 } 2894 } 2895 2896 static void virtio_net_tx_bh(void *opaque) 2897 { 2898 VirtIONetQueue *q = opaque; 2899 VirtIONet *n = q->n; 2900 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2901 int32_t ret; 2902 2903 /* This happens when device was stopped but BH wasn't. */ 2904 if (!vdev->vm_running) { 2905 /* Make sure tx waiting is set, so we'll run when restarted. */ 2906 assert(q->tx_waiting); 2907 return; 2908 } 2909 2910 q->tx_waiting = 0; 2911 2912 /* Just in case the driver is not ready on more */ 2913 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) { 2914 return; 2915 } 2916 2917 ret = virtio_net_flush_tx(q); 2918 if (ret == -EBUSY || ret == -EINVAL) { 2919 return; /* Notification re-enable handled by tx_complete or device 2920 * broken */ 2921 } 2922 2923 /* If we flush a full burst of packets, assume there are 2924 * more coming and immediately reschedule */ 2925 if (ret >= n->tx_burst) { 2926 replay_bh_schedule_event(q->tx_bh); 2927 q->tx_waiting = 1; 2928 return; 2929 } 2930 2931 /* If less than a full burst, re-enable notification and flush 2932 * anything that may have come in while we weren't looking. If 2933 * we find something, assume the guest is still active and reschedule */ 2934 virtio_queue_set_notification(q->tx_vq, 1); 2935 ret = virtio_net_flush_tx(q); 2936 if (ret == -EINVAL) { 2937 return; 2938 } else if (ret > 0) { 2939 virtio_queue_set_notification(q->tx_vq, 0); 2940 replay_bh_schedule_event(q->tx_bh); 2941 q->tx_waiting = 1; 2942 } 2943 } 2944 2945 static void virtio_net_add_queue(VirtIONet *n, int index) 2946 { 2947 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2948 2949 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size, 2950 virtio_net_handle_rx); 2951 2952 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { 2953 n->vqs[index].tx_vq = 2954 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2955 virtio_net_handle_tx_timer); 2956 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 2957 virtio_net_tx_timer, 2958 &n->vqs[index]); 2959 } else { 2960 n->vqs[index].tx_vq = 2961 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2962 virtio_net_handle_tx_bh); 2963 n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index], 2964 &DEVICE(vdev)->mem_reentrancy_guard); 2965 } 2966 2967 n->vqs[index].tx_waiting = 0; 2968 n->vqs[index].n = n; 2969 } 2970 2971 static void virtio_net_del_queue(VirtIONet *n, int index) 2972 { 2973 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2974 VirtIONetQueue *q = &n->vqs[index]; 2975 NetClientState *nc = qemu_get_subqueue(n->nic, index); 2976 2977 qemu_purge_queued_packets(nc); 2978 2979 virtio_del_queue(vdev, index * 2); 2980 if (q->tx_timer) { 2981 timer_free(q->tx_timer); 2982 q->tx_timer = NULL; 2983 } else { 2984 qemu_bh_delete(q->tx_bh); 2985 q->tx_bh = NULL; 2986 } 2987 q->tx_waiting = 0; 2988 virtio_del_queue(vdev, index * 2 + 1); 2989 } 2990 2991 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs) 2992 { 2993 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2994 int old_num_queues = virtio_get_num_queues(vdev); 2995 int new_num_queues = new_max_queue_pairs * 2 + 1; 2996 int i; 2997 2998 assert(old_num_queues >= 3); 2999 assert(old_num_queues % 2 == 1); 3000 3001 if (old_num_queues == new_num_queues) { 3002 return; 3003 } 3004 3005 /* 3006 * We always need to remove and add ctrl vq if 3007 * old_num_queues != new_num_queues. Remove ctrl_vq first, 3008 * and then we only enter one of the following two loops. 3009 */ 3010 virtio_del_queue(vdev, old_num_queues - 1); 3011 3012 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) { 3013 /* new_num_queues < old_num_queues */ 3014 virtio_net_del_queue(n, i / 2); 3015 } 3016 3017 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) { 3018 /* new_num_queues > old_num_queues */ 3019 virtio_net_add_queue(n, i / 2); 3020 } 3021 3022 /* add ctrl_vq last */ 3023 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 3024 } 3025 3026 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) 3027 { 3028 int max = multiqueue ? n->max_queue_pairs : 1; 3029 3030 n->multiqueue = multiqueue; 3031 virtio_net_change_num_queue_pairs(n, max); 3032 3033 virtio_net_set_queue_pairs(n); 3034 } 3035 3036 static int virtio_net_post_load_device(void *opaque, int version_id) 3037 { 3038 VirtIONet *n = opaque; 3039 VirtIODevice *vdev = VIRTIO_DEVICE(n); 3040 int i, link_down; 3041 3042 trace_virtio_net_post_load_device(); 3043 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs, 3044 virtio_vdev_has_feature(vdev, 3045 VIRTIO_F_VERSION_1), 3046 virtio_vdev_has_feature(vdev, 3047 VIRTIO_NET_F_HASH_REPORT)); 3048 3049 /* MAC_TABLE_ENTRIES may be different from the saved image */ 3050 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) { 3051 n->mac_table.in_use = 0; 3052 } 3053 3054 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 3055 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n); 3056 } 3057 3058 /* 3059 * curr_guest_offloads will be later overwritten by the 3060 * virtio_set_features_nocheck call done from the virtio_load. 3061 * Here we make sure it is preserved and restored accordingly 3062 * in the virtio_net_post_load_virtio callback. 3063 */ 3064 n->saved_guest_offloads = n->curr_guest_offloads; 3065 3066 virtio_net_set_queue_pairs(n); 3067 3068 /* Find the first multicast entry in the saved MAC filter */ 3069 for (i = 0; i < n->mac_table.in_use; i++) { 3070 if (n->mac_table.macs[i * ETH_ALEN] & 1) { 3071 break; 3072 } 3073 } 3074 n->mac_table.first_multi = i; 3075 3076 /* nc.link_down can't be migrated, so infer link_down according 3077 * to link status bit in n->status */ 3078 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0; 3079 for (i = 0; i < n->max_queue_pairs; i++) { 3080 qemu_get_subqueue(n->nic, i)->link_down = link_down; 3081 } 3082 3083 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 3084 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3085 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 3086 QEMU_CLOCK_VIRTUAL, 3087 virtio_net_announce_timer, n); 3088 if (n->announce_timer.round) { 3089 timer_mod(n->announce_timer.tm, 3090 qemu_clock_get_ms(n->announce_timer.type)); 3091 } else { 3092 qemu_announce_timer_del(&n->announce_timer, false); 3093 } 3094 } 3095 3096 virtio_net_commit_rss_config(n); 3097 return 0; 3098 } 3099 3100 static int virtio_net_post_load_virtio(VirtIODevice *vdev) 3101 { 3102 VirtIONet *n = VIRTIO_NET(vdev); 3103 /* 3104 * The actual needed state is now in saved_guest_offloads, 3105 * see virtio_net_post_load_device for detail. 3106 * Restore it back and apply the desired offloads. 3107 */ 3108 n->curr_guest_offloads = n->saved_guest_offloads; 3109 if (peer_has_vnet_hdr(n)) { 3110 virtio_net_apply_guest_offloads(n); 3111 } 3112 3113 return 0; 3114 } 3115 3116 /* tx_waiting field of a VirtIONetQueue */ 3117 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = { 3118 .name = "virtio-net-queue-tx_waiting", 3119 .fields = (const VMStateField[]) { 3120 VMSTATE_UINT32(tx_waiting, VirtIONetQueue), 3121 VMSTATE_END_OF_LIST() 3122 }, 3123 }; 3124 3125 static bool max_queue_pairs_gt_1(void *opaque, int version_id) 3126 { 3127 return VIRTIO_NET(opaque)->max_queue_pairs > 1; 3128 } 3129 3130 static bool has_ctrl_guest_offloads(void *opaque, int version_id) 3131 { 3132 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque), 3133 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 3134 } 3135 3136 static bool mac_table_fits(void *opaque, int version_id) 3137 { 3138 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES; 3139 } 3140 3141 static bool mac_table_doesnt_fit(void *opaque, int version_id) 3142 { 3143 return !mac_table_fits(opaque, version_id); 3144 } 3145 3146 /* This temporary type is shared by all the WITH_TMP methods 3147 * although only some fields are used by each. 3148 */ 3149 struct VirtIONetMigTmp { 3150 VirtIONet *parent; 3151 VirtIONetQueue *vqs_1; 3152 uint16_t curr_queue_pairs_1; 3153 uint8_t has_ufo; 3154 uint32_t has_vnet_hdr; 3155 }; 3156 3157 /* The 2nd and subsequent tx_waiting flags are loaded later than 3158 * the 1st entry in the queue_pairs and only if there's more than one 3159 * entry. We use the tmp mechanism to calculate a temporary 3160 * pointer and count and also validate the count. 3161 */ 3162 3163 static int virtio_net_tx_waiting_pre_save(void *opaque) 3164 { 3165 struct VirtIONetMigTmp *tmp = opaque; 3166 3167 tmp->vqs_1 = tmp->parent->vqs + 1; 3168 tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1; 3169 if (tmp->parent->curr_queue_pairs == 0) { 3170 tmp->curr_queue_pairs_1 = 0; 3171 } 3172 3173 return 0; 3174 } 3175 3176 static int virtio_net_tx_waiting_pre_load(void *opaque) 3177 { 3178 struct VirtIONetMigTmp *tmp = opaque; 3179 3180 /* Reuse the pointer setup from save */ 3181 virtio_net_tx_waiting_pre_save(opaque); 3182 3183 if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) { 3184 error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x", 3185 tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs); 3186 3187 return -EINVAL; 3188 } 3189 3190 return 0; /* all good */ 3191 } 3192 3193 static const VMStateDescription vmstate_virtio_net_tx_waiting = { 3194 .name = "virtio-net-tx_waiting", 3195 .pre_load = virtio_net_tx_waiting_pre_load, 3196 .pre_save = virtio_net_tx_waiting_pre_save, 3197 .fields = (const VMStateField[]) { 3198 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp, 3199 curr_queue_pairs_1, 3200 vmstate_virtio_net_queue_tx_waiting, 3201 struct VirtIONetQueue), 3202 VMSTATE_END_OF_LIST() 3203 }, 3204 }; 3205 3206 /* the 'has_ufo' flag is just tested; if the incoming stream has the 3207 * flag set we need to check that we have it 3208 */ 3209 static int virtio_net_ufo_post_load(void *opaque, int version_id) 3210 { 3211 struct VirtIONetMigTmp *tmp = opaque; 3212 3213 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) { 3214 error_report("virtio-net: saved image requires TUN_F_UFO support"); 3215 return -EINVAL; 3216 } 3217 3218 return 0; 3219 } 3220 3221 static int virtio_net_ufo_pre_save(void *opaque) 3222 { 3223 struct VirtIONetMigTmp *tmp = opaque; 3224 3225 tmp->has_ufo = tmp->parent->has_ufo; 3226 3227 return 0; 3228 } 3229 3230 static const VMStateDescription vmstate_virtio_net_has_ufo = { 3231 .name = "virtio-net-ufo", 3232 .post_load = virtio_net_ufo_post_load, 3233 .pre_save = virtio_net_ufo_pre_save, 3234 .fields = (const VMStateField[]) { 3235 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp), 3236 VMSTATE_END_OF_LIST() 3237 }, 3238 }; 3239 3240 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the 3241 * flag set we need to check that we have it 3242 */ 3243 static int virtio_net_vnet_post_load(void *opaque, int version_id) 3244 { 3245 struct VirtIONetMigTmp *tmp = opaque; 3246 3247 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) { 3248 error_report("virtio-net: saved image requires vnet_hdr=on"); 3249 return -EINVAL; 3250 } 3251 3252 return 0; 3253 } 3254 3255 static int virtio_net_vnet_pre_save(void *opaque) 3256 { 3257 struct VirtIONetMigTmp *tmp = opaque; 3258 3259 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr; 3260 3261 return 0; 3262 } 3263 3264 static const VMStateDescription vmstate_virtio_net_has_vnet = { 3265 .name = "virtio-net-vnet", 3266 .post_load = virtio_net_vnet_post_load, 3267 .pre_save = virtio_net_vnet_pre_save, 3268 .fields = (const VMStateField[]) { 3269 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp), 3270 VMSTATE_END_OF_LIST() 3271 }, 3272 }; 3273 3274 static bool virtio_net_rss_needed(void *opaque) 3275 { 3276 return VIRTIO_NET(opaque)->rss_data.enabled; 3277 } 3278 3279 static const VMStateDescription vmstate_virtio_net_rss = { 3280 .name = "virtio-net-device/rss", 3281 .version_id = 1, 3282 .minimum_version_id = 1, 3283 .needed = virtio_net_rss_needed, 3284 .fields = (const VMStateField[]) { 3285 VMSTATE_BOOL(rss_data.enabled, VirtIONet), 3286 VMSTATE_BOOL(rss_data.redirect, VirtIONet), 3287 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet), 3288 VMSTATE_UINT32(rss_data.hash_types, VirtIONet), 3289 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet), 3290 VMSTATE_UINT16(rss_data.default_queue, VirtIONet), 3291 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet, 3292 VIRTIO_NET_RSS_MAX_KEY_SIZE), 3293 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet, 3294 rss_data.indirections_len, 0, 3295 vmstate_info_uint16, uint16_t), 3296 VMSTATE_END_OF_LIST() 3297 }, 3298 }; 3299 3300 static const VMStateDescription vmstate_virtio_net_device = { 3301 .name = "virtio-net-device", 3302 .version_id = VIRTIO_NET_VM_VERSION, 3303 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3304 .post_load = virtio_net_post_load_device, 3305 .fields = (const VMStateField[]) { 3306 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN), 3307 VMSTATE_STRUCT_POINTER(vqs, VirtIONet, 3308 vmstate_virtio_net_queue_tx_waiting, 3309 VirtIONetQueue), 3310 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet), 3311 VMSTATE_UINT16(status, VirtIONet), 3312 VMSTATE_UINT8(promisc, VirtIONet), 3313 VMSTATE_UINT8(allmulti, VirtIONet), 3314 VMSTATE_UINT32(mac_table.in_use, VirtIONet), 3315 3316 /* Guarded pair: If it fits we load it, else we throw it away 3317 * - can happen if source has a larger MAC table.; post-load 3318 * sets flags in this case. 3319 */ 3320 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet, 3321 0, mac_table_fits, mac_table.in_use, 3322 ETH_ALEN), 3323 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0, 3324 mac_table.in_use, ETH_ALEN), 3325 3326 /* Note: This is an array of uint32's that's always been saved as a 3327 * buffer; hold onto your endiannesses; it's actually used as a bitmap 3328 * but based on the uint. 3329 */ 3330 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3), 3331 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3332 vmstate_virtio_net_has_vnet), 3333 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet), 3334 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet), 3335 VMSTATE_UINT8(alluni, VirtIONet), 3336 VMSTATE_UINT8(nomulti, VirtIONet), 3337 VMSTATE_UINT8(nouni, VirtIONet), 3338 VMSTATE_UINT8(nobcast, VirtIONet), 3339 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3340 vmstate_virtio_net_has_ufo), 3341 VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0, 3342 vmstate_info_uint16_equal, uint16_t), 3343 VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1), 3344 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3345 vmstate_virtio_net_tx_waiting), 3346 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet, 3347 has_ctrl_guest_offloads), 3348 VMSTATE_END_OF_LIST() 3349 }, 3350 .subsections = (const VMStateDescription * const []) { 3351 &vmstate_virtio_net_rss, 3352 NULL 3353 } 3354 }; 3355 3356 static NetClientInfo net_virtio_info = { 3357 .type = NET_CLIENT_DRIVER_NIC, 3358 .size = sizeof(NICState), 3359 .can_receive = virtio_net_can_receive, 3360 .receive = virtio_net_receive, 3361 .link_status_changed = virtio_net_set_link_status, 3362 .query_rx_filter = virtio_net_query_rxfilter, 3363 .announce = virtio_net_announce, 3364 }; 3365 3366 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) 3367 { 3368 VirtIONet *n = VIRTIO_NET(vdev); 3369 NetClientState *nc; 3370 assert(n->vhost_started); 3371 if (!n->multiqueue && idx == 2) { 3372 /* Must guard against invalid features and bogus queue index 3373 * from being set by malicious guest, or penetrated through 3374 * buggy migration stream. 3375 */ 3376 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3377 qemu_log_mask(LOG_GUEST_ERROR, 3378 "%s: bogus vq index ignored\n", __func__); 3379 return false; 3380 } 3381 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3382 } else { 3383 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3384 } 3385 /* 3386 * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 3387 * as the macro of configure interrupt's IDX, If this driver does not 3388 * support, the function will return false 3389 */ 3390 3391 if (idx == VIRTIO_CONFIG_IRQ_IDX) { 3392 return vhost_net_config_pending(get_vhost_net(nc->peer)); 3393 } 3394 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); 3395 } 3396 3397 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, 3398 bool mask) 3399 { 3400 VirtIONet *n = VIRTIO_NET(vdev); 3401 NetClientState *nc; 3402 assert(n->vhost_started); 3403 if (!n->multiqueue && idx == 2) { 3404 /* Must guard against invalid features and bogus queue index 3405 * from being set by malicious guest, or penetrated through 3406 * buggy migration stream. 3407 */ 3408 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3409 qemu_log_mask(LOG_GUEST_ERROR, 3410 "%s: bogus vq index ignored\n", __func__); 3411 return; 3412 } 3413 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3414 } else { 3415 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3416 } 3417 /* 3418 *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 3419 * as the macro of configure interrupt's IDX, If this driver does not 3420 * support, the function will return 3421 */ 3422 3423 if (idx == VIRTIO_CONFIG_IRQ_IDX) { 3424 vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask); 3425 return; 3426 } 3427 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask); 3428 } 3429 3430 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) 3431 { 3432 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC); 3433 3434 n->config_size = virtio_get_config_size(&cfg_size_params, host_features); 3435 } 3436 3437 void virtio_net_set_netclient_name(VirtIONet *n, const char *name, 3438 const char *type) 3439 { 3440 /* 3441 * The name can be NULL, the netclient name will be type.x. 3442 */ 3443 assert(type != NULL); 3444 3445 g_free(n->netclient_name); 3446 g_free(n->netclient_type); 3447 n->netclient_name = g_strdup(name); 3448 n->netclient_type = g_strdup(type); 3449 } 3450 3451 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev) 3452 { 3453 HotplugHandler *hotplug_ctrl; 3454 PCIDevice *pci_dev; 3455 Error *err = NULL; 3456 3457 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3458 if (hotplug_ctrl) { 3459 pci_dev = PCI_DEVICE(dev); 3460 pci_dev->partially_hotplugged = true; 3461 hotplug_handler_unplug_request(hotplug_ctrl, dev, &err); 3462 if (err) { 3463 error_report_err(err); 3464 return false; 3465 } 3466 } else { 3467 return false; 3468 } 3469 return true; 3470 } 3471 3472 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev, 3473 Error **errp) 3474 { 3475 Error *err = NULL; 3476 HotplugHandler *hotplug_ctrl; 3477 PCIDevice *pdev = PCI_DEVICE(dev); 3478 BusState *primary_bus; 3479 3480 if (!pdev->partially_hotplugged) { 3481 return true; 3482 } 3483 primary_bus = dev->parent_bus; 3484 if (!primary_bus) { 3485 error_setg(errp, "virtio_net: couldn't find primary bus"); 3486 return false; 3487 } 3488 qdev_set_parent_bus(dev, primary_bus, &error_abort); 3489 qatomic_set(&n->failover_primary_hidden, false); 3490 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3491 if (hotplug_ctrl) { 3492 hotplug_handler_pre_plug(hotplug_ctrl, dev, &err); 3493 if (err) { 3494 goto out; 3495 } 3496 hotplug_handler_plug(hotplug_ctrl, dev, &err); 3497 } 3498 pdev->partially_hotplugged = false; 3499 3500 out: 3501 error_propagate(errp, err); 3502 return !err; 3503 } 3504 3505 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationEvent *e) 3506 { 3507 bool should_be_hidden; 3508 Error *err = NULL; 3509 DeviceState *dev = failover_find_primary_device(n); 3510 3511 if (!dev) { 3512 return; 3513 } 3514 3515 should_be_hidden = qatomic_read(&n->failover_primary_hidden); 3516 3517 if (e->type == MIG_EVENT_PRECOPY_SETUP && !should_be_hidden) { 3518 if (failover_unplug_primary(n, dev)) { 3519 vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev); 3520 qapi_event_send_unplug_primary(dev->id); 3521 qatomic_set(&n->failover_primary_hidden, true); 3522 } else { 3523 warn_report("couldn't unplug primary device"); 3524 } 3525 } else if (e->type == MIG_EVENT_PRECOPY_FAILED) { 3526 /* We already unplugged the device let's plug it back */ 3527 if (!failover_replug_primary(n, dev, &err)) { 3528 if (err) { 3529 error_report_err(err); 3530 } 3531 } 3532 } 3533 } 3534 3535 static int virtio_net_migration_state_notifier(NotifierWithReturn *notifier, 3536 MigrationEvent *e, Error **errp) 3537 { 3538 VirtIONet *n = container_of(notifier, VirtIONet, migration_state); 3539 virtio_net_handle_migration_primary(n, e); 3540 return 0; 3541 } 3542 3543 static bool failover_hide_primary_device(DeviceListener *listener, 3544 const QDict *device_opts, 3545 bool from_json, 3546 Error **errp) 3547 { 3548 VirtIONet *n = container_of(listener, VirtIONet, primary_listener); 3549 const char *standby_id; 3550 3551 if (!device_opts) { 3552 return false; 3553 } 3554 3555 if (!qdict_haskey(device_opts, "failover_pair_id")) { 3556 return false; 3557 } 3558 3559 if (!qdict_haskey(device_opts, "id")) { 3560 error_setg(errp, "Device with failover_pair_id needs to have id"); 3561 return false; 3562 } 3563 3564 standby_id = qdict_get_str(device_opts, "failover_pair_id"); 3565 if (g_strcmp0(standby_id, n->netclient_name) != 0) { 3566 return false; 3567 } 3568 3569 /* 3570 * The hide helper can be called several times for a given device. 3571 * Check there is only one primary for a virtio-net device but 3572 * don't duplicate the qdict several times if it's called for the same 3573 * device. 3574 */ 3575 if (n->primary_opts) { 3576 const char *old, *new; 3577 /* devices with failover_pair_id always have an id */ 3578 old = qdict_get_str(n->primary_opts, "id"); 3579 new = qdict_get_str(device_opts, "id"); 3580 if (strcmp(old, new) != 0) { 3581 error_setg(errp, "Cannot attach more than one primary device to " 3582 "'%s': '%s' and '%s'", n->netclient_name, old, new); 3583 return false; 3584 } 3585 } else { 3586 n->primary_opts = qdict_clone_shallow(device_opts); 3587 n->primary_opts_from_json = from_json; 3588 } 3589 3590 /* failover_primary_hidden is set during feature negotiation */ 3591 return qatomic_read(&n->failover_primary_hidden); 3592 } 3593 3594 static void virtio_net_device_realize(DeviceState *dev, Error **errp) 3595 { 3596 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3597 VirtIONet *n = VIRTIO_NET(dev); 3598 NetClientState *nc; 3599 int i; 3600 3601 if (n->net_conf.mtu) { 3602 n->host_features |= (1ULL << VIRTIO_NET_F_MTU); 3603 } 3604 3605 if (n->net_conf.duplex_str) { 3606 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) { 3607 n->net_conf.duplex = DUPLEX_HALF; 3608 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) { 3609 n->net_conf.duplex = DUPLEX_FULL; 3610 } else { 3611 error_setg(errp, "'duplex' must be 'half' or 'full'"); 3612 return; 3613 } 3614 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3615 } else { 3616 n->net_conf.duplex = DUPLEX_UNKNOWN; 3617 } 3618 3619 if (n->net_conf.speed < SPEED_UNKNOWN) { 3620 error_setg(errp, "'speed' must be between 0 and INT_MAX"); 3621 return; 3622 } 3623 if (n->net_conf.speed >= 0) { 3624 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3625 } 3626 3627 if (n->failover) { 3628 n->primary_listener.hide_device = failover_hide_primary_device; 3629 qatomic_set(&n->failover_primary_hidden, true); 3630 device_listener_register(&n->primary_listener); 3631 migration_add_notifier(&n->migration_state, 3632 virtio_net_migration_state_notifier); 3633 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY); 3634 } 3635 3636 virtio_net_set_config_size(n, n->host_features); 3637 virtio_init(vdev, VIRTIO_ID_NET, n->config_size); 3638 3639 /* 3640 * We set a lower limit on RX queue size to what it always was. 3641 * Guests that want a smaller ring can always resize it without 3642 * help from us (using virtio 1 and up). 3643 */ 3644 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || 3645 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || 3646 !is_power_of_2(n->net_conf.rx_queue_size)) { 3647 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " 3648 "must be a power of 2 between %d and %d.", 3649 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, 3650 VIRTQUEUE_MAX_SIZE); 3651 virtio_cleanup(vdev); 3652 return; 3653 } 3654 3655 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || 3656 n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) || 3657 !is_power_of_2(n->net_conf.tx_queue_size)) { 3658 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " 3659 "must be a power of 2 between %d and %d", 3660 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, 3661 virtio_net_max_tx_queue_size(n)); 3662 virtio_cleanup(vdev); 3663 return; 3664 } 3665 3666 n->max_ncs = MAX(n->nic_conf.peers.queues, 1); 3667 3668 /* 3669 * Figure out the datapath queue pairs since the backend could 3670 * provide control queue via peers as well. 3671 */ 3672 if (n->nic_conf.peers.queues) { 3673 for (i = 0; i < n->max_ncs; i++) { 3674 if (n->nic_conf.peers.ncs[i]->is_datapath) { 3675 ++n->max_queue_pairs; 3676 } 3677 } 3678 } 3679 n->max_queue_pairs = MAX(n->max_queue_pairs, 1); 3680 3681 if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) { 3682 error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), " 3683 "must be a positive integer less than %d.", 3684 n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2); 3685 virtio_cleanup(vdev); 3686 return; 3687 } 3688 n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs); 3689 n->curr_queue_pairs = 1; 3690 n->tx_timeout = n->net_conf.txtimer; 3691 3692 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer") 3693 && strcmp(n->net_conf.tx, "bh")) { 3694 warn_report("virtio-net: " 3695 "Unknown option tx=%s, valid options: \"timer\" \"bh\"", 3696 n->net_conf.tx); 3697 error_printf("Defaulting to \"bh\""); 3698 } 3699 3700 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), 3701 n->net_conf.tx_queue_size); 3702 3703 virtio_net_add_queue(n, 0); 3704 3705 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 3706 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr); 3707 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac)); 3708 n->status = VIRTIO_NET_S_LINK_UP; 3709 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 3710 QEMU_CLOCK_VIRTUAL, 3711 virtio_net_announce_timer, n); 3712 n->announce_timer.round = 0; 3713 3714 if (n->netclient_type) { 3715 /* 3716 * Happen when virtio_net_set_netclient_name has been called. 3717 */ 3718 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3719 n->netclient_type, n->netclient_name, 3720 &dev->mem_reentrancy_guard, n); 3721 } else { 3722 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3723 object_get_typename(OBJECT(dev)), dev->id, 3724 &dev->mem_reentrancy_guard, n); 3725 } 3726 3727 for (i = 0; i < n->max_queue_pairs; i++) { 3728 n->nic->ncs[i].do_not_pad = true; 3729 } 3730 3731 peer_test_vnet_hdr(n); 3732 if (peer_has_vnet_hdr(n)) { 3733 n->host_hdr_len = sizeof(struct virtio_net_hdr); 3734 } else { 3735 n->host_hdr_len = 0; 3736 } 3737 3738 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a); 3739 3740 n->vqs[0].tx_waiting = 0; 3741 n->tx_burst = n->net_conf.txburst; 3742 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0); 3743 n->promisc = 1; /* for compatibility */ 3744 3745 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 3746 3747 n->vlans = g_malloc0(MAX_VLAN >> 3); 3748 3749 nc = qemu_get_queue(n->nic); 3750 nc->rxfilter_notify_enabled = 1; 3751 3752 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 3753 struct virtio_net_config netcfg = {}; 3754 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN); 3755 vhost_net_set_config(get_vhost_net(nc->peer), 3756 (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND); 3757 } 3758 QTAILQ_INIT(&n->rsc_chains); 3759 n->qdev = dev; 3760 3761 net_rx_pkt_init(&n->rx_pkt); 3762 3763 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3764 virtio_net_load_ebpf(n); 3765 } 3766 } 3767 3768 static void virtio_net_device_unrealize(DeviceState *dev) 3769 { 3770 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3771 VirtIONet *n = VIRTIO_NET(dev); 3772 int i, max_queue_pairs; 3773 3774 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3775 virtio_net_unload_ebpf(n); 3776 } 3777 3778 /* This will stop vhost backend if appropriate. */ 3779 virtio_net_set_status(vdev, 0); 3780 3781 g_free(n->netclient_name); 3782 n->netclient_name = NULL; 3783 g_free(n->netclient_type); 3784 n->netclient_type = NULL; 3785 3786 g_free(n->mac_table.macs); 3787 g_free(n->vlans); 3788 3789 if (n->failover) { 3790 qobject_unref(n->primary_opts); 3791 device_listener_unregister(&n->primary_listener); 3792 migration_remove_notifier(&n->migration_state); 3793 } else { 3794 assert(n->primary_opts == NULL); 3795 } 3796 3797 max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 3798 for (i = 0; i < max_queue_pairs; i++) { 3799 virtio_net_del_queue(n, i); 3800 } 3801 /* delete also control vq */ 3802 virtio_del_queue(vdev, max_queue_pairs * 2); 3803 qemu_announce_timer_del(&n->announce_timer, false); 3804 g_free(n->vqs); 3805 qemu_del_nic(n->nic); 3806 virtio_net_rsc_cleanup(n); 3807 g_free(n->rss_data.indirections_table); 3808 net_rx_pkt_uninit(n->rx_pkt); 3809 virtio_cleanup(vdev); 3810 } 3811 3812 static void virtio_net_reset(VirtIODevice *vdev) 3813 { 3814 VirtIONet *n = VIRTIO_NET(vdev); 3815 int i; 3816 3817 /* Reset back to compatibility mode */ 3818 n->promisc = 1; 3819 n->allmulti = 0; 3820 n->alluni = 0; 3821 n->nomulti = 0; 3822 n->nouni = 0; 3823 n->nobcast = 0; 3824 /* multiqueue is disabled by default */ 3825 n->curr_queue_pairs = 1; 3826 timer_del(n->announce_timer.tm); 3827 n->announce_timer.round = 0; 3828 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 3829 3830 /* Flush any MAC and VLAN filter table state */ 3831 n->mac_table.in_use = 0; 3832 n->mac_table.first_multi = 0; 3833 n->mac_table.multi_overflow = 0; 3834 n->mac_table.uni_overflow = 0; 3835 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN); 3836 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac)); 3837 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 3838 memset(n->vlans, 0, MAX_VLAN >> 3); 3839 3840 /* Flush any async TX */ 3841 for (i = 0; i < n->max_queue_pairs; i++) { 3842 flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i)); 3843 } 3844 3845 virtio_net_disable_rss(n); 3846 } 3847 3848 static void virtio_net_instance_init(Object *obj) 3849 { 3850 VirtIONet *n = VIRTIO_NET(obj); 3851 3852 /* 3853 * The default config_size is sizeof(struct virtio_net_config). 3854 * Can be overridden with virtio_net_set_config_size. 3855 */ 3856 n->config_size = sizeof(struct virtio_net_config); 3857 device_add_bootindex_property(obj, &n->nic_conf.bootindex, 3858 "bootindex", "/ethernet-phy@0", 3859 DEVICE(n)); 3860 3861 ebpf_rss_init(&n->ebpf_rss); 3862 } 3863 3864 static int virtio_net_pre_save(void *opaque) 3865 { 3866 VirtIONet *n = opaque; 3867 3868 /* At this point, backend must be stopped, otherwise 3869 * it might keep writing to memory. */ 3870 assert(!n->vhost_started); 3871 3872 return 0; 3873 } 3874 3875 static bool primary_unplug_pending(void *opaque) 3876 { 3877 DeviceState *dev = opaque; 3878 DeviceState *primary; 3879 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3880 VirtIONet *n = VIRTIO_NET(vdev); 3881 3882 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 3883 return false; 3884 } 3885 primary = failover_find_primary_device(n); 3886 return primary ? primary->pending_deleted_event : false; 3887 } 3888 3889 static bool dev_unplug_pending(void *opaque) 3890 { 3891 DeviceState *dev = opaque; 3892 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 3893 3894 return vdc->primary_unplug_pending(dev); 3895 } 3896 3897 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev) 3898 { 3899 VirtIONet *n = VIRTIO_NET(vdev); 3900 NetClientState *nc; 3901 struct vhost_net *net; 3902 3903 if (!n->nic) { 3904 return NULL; 3905 } 3906 3907 nc = qemu_get_queue(n->nic); 3908 if (!nc) { 3909 return NULL; 3910 } 3911 3912 net = get_vhost_net(nc->peer); 3913 if (!net) { 3914 return NULL; 3915 } 3916 3917 return &net->dev; 3918 } 3919 3920 static const VMStateDescription vmstate_virtio_net = { 3921 .name = "virtio-net", 3922 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3923 .version_id = VIRTIO_NET_VM_VERSION, 3924 .fields = (const VMStateField[]) { 3925 VMSTATE_VIRTIO_DEVICE, 3926 VMSTATE_END_OF_LIST() 3927 }, 3928 .pre_save = virtio_net_pre_save, 3929 .dev_unplug_pending = dev_unplug_pending, 3930 }; 3931 3932 static Property virtio_net_properties[] = { 3933 DEFINE_PROP_BIT64("csum", VirtIONet, host_features, 3934 VIRTIO_NET_F_CSUM, true), 3935 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features, 3936 VIRTIO_NET_F_GUEST_CSUM, true), 3937 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), 3938 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features, 3939 VIRTIO_NET_F_GUEST_TSO4, true), 3940 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features, 3941 VIRTIO_NET_F_GUEST_TSO6, true), 3942 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features, 3943 VIRTIO_NET_F_GUEST_ECN, true), 3944 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features, 3945 VIRTIO_NET_F_GUEST_UFO, true), 3946 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features, 3947 VIRTIO_NET_F_GUEST_ANNOUNCE, true), 3948 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features, 3949 VIRTIO_NET_F_HOST_TSO4, true), 3950 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features, 3951 VIRTIO_NET_F_HOST_TSO6, true), 3952 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features, 3953 VIRTIO_NET_F_HOST_ECN, true), 3954 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features, 3955 VIRTIO_NET_F_HOST_UFO, true), 3956 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features, 3957 VIRTIO_NET_F_MRG_RXBUF, true), 3958 DEFINE_PROP_BIT64("status", VirtIONet, host_features, 3959 VIRTIO_NET_F_STATUS, true), 3960 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features, 3961 VIRTIO_NET_F_CTRL_VQ, true), 3962 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features, 3963 VIRTIO_NET_F_CTRL_RX, true), 3964 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features, 3965 VIRTIO_NET_F_CTRL_VLAN, true), 3966 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features, 3967 VIRTIO_NET_F_CTRL_RX_EXTRA, true), 3968 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features, 3969 VIRTIO_NET_F_CTRL_MAC_ADDR, true), 3970 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features, 3971 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), 3972 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), 3973 DEFINE_PROP_BIT64("rss", VirtIONet, host_features, 3974 VIRTIO_NET_F_RSS, false), 3975 DEFINE_PROP_BIT64("hash", VirtIONet, host_features, 3976 VIRTIO_NET_F_HASH_REPORT, false), 3977 DEFINE_PROP_ARRAY("ebpf-rss-fds", VirtIONet, nr_ebpf_rss_fds, 3978 ebpf_rss_fds, qdev_prop_string, char*), 3979 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features, 3980 VIRTIO_NET_F_RSC_EXT, false), 3981 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout, 3982 VIRTIO_NET_RSC_DEFAULT_INTERVAL), 3983 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf), 3984 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer, 3985 TX_TIMER_INTERVAL), 3986 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), 3987 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), 3988 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, 3989 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), 3990 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, 3991 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), 3992 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), 3993 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, 3994 true), 3995 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN), 3996 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str), 3997 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false), 3998 DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features, 3999 VIRTIO_NET_F_GUEST_USO4, true), 4000 DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features, 4001 VIRTIO_NET_F_GUEST_USO6, true), 4002 DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features, 4003 VIRTIO_NET_F_HOST_USO, true), 4004 DEFINE_PROP_END_OF_LIST(), 4005 }; 4006 4007 static void virtio_net_class_init(ObjectClass *klass, void *data) 4008 { 4009 DeviceClass *dc = DEVICE_CLASS(klass); 4010 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 4011 4012 device_class_set_props(dc, virtio_net_properties); 4013 dc->vmsd = &vmstate_virtio_net; 4014 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 4015 vdc->realize = virtio_net_device_realize; 4016 vdc->unrealize = virtio_net_device_unrealize; 4017 vdc->get_config = virtio_net_get_config; 4018 vdc->set_config = virtio_net_set_config; 4019 vdc->get_features = virtio_net_get_features; 4020 vdc->set_features = virtio_net_set_features; 4021 vdc->bad_features = virtio_net_bad_features; 4022 vdc->reset = virtio_net_reset; 4023 vdc->queue_reset = virtio_net_queue_reset; 4024 vdc->queue_enable = virtio_net_queue_enable; 4025 vdc->set_status = virtio_net_set_status; 4026 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; 4027 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; 4028 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO); 4029 vdc->post_load = virtio_net_post_load_virtio; 4030 vdc->vmsd = &vmstate_virtio_net_device; 4031 vdc->primary_unplug_pending = primary_unplug_pending; 4032 vdc->get_vhost = virtio_net_get_vhost; 4033 vdc->toggle_device_iotlb = vhost_toggle_device_iotlb; 4034 } 4035 4036 static const TypeInfo virtio_net_info = { 4037 .name = TYPE_VIRTIO_NET, 4038 .parent = TYPE_VIRTIO_DEVICE, 4039 .instance_size = sizeof(VirtIONet), 4040 .instance_init = virtio_net_instance_init, 4041 .class_init = virtio_net_class_init, 4042 }; 4043 4044 static void virtio_register_types(void) 4045 { 4046 type_register_static(&virtio_net_info); 4047 } 4048 4049 type_init(virtio_register_types) 4050