1 /* 2 * Virtio Network Device 3 * 4 * Copyright IBM, Corp. 2007 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include "qemu/atomic.h" 16 #include "qemu/iov.h" 17 #include "qemu/log.h" 18 #include "qemu/main-loop.h" 19 #include "qemu/module.h" 20 #include "hw/virtio/virtio.h" 21 #include "net/net.h" 22 #include "net/checksum.h" 23 #include "net/tap.h" 24 #include "qemu/error-report.h" 25 #include "qemu/timer.h" 26 #include "qemu/option.h" 27 #include "qemu/option_int.h" 28 #include "qemu/config-file.h" 29 #include "qapi/qmp/qdict.h" 30 #include "hw/virtio/virtio-net.h" 31 #include "net/vhost_net.h" 32 #include "net/announce.h" 33 #include "hw/virtio/virtio-bus.h" 34 #include "qapi/error.h" 35 #include "qapi/qapi-events-net.h" 36 #include "hw/qdev-properties.h" 37 #include "qapi/qapi-types-migration.h" 38 #include "qapi/qapi-events-migration.h" 39 #include "hw/virtio/virtio-access.h" 40 #include "migration/misc.h" 41 #include "standard-headers/linux/ethtool.h" 42 #include "sysemu/sysemu.h" 43 #include "trace.h" 44 #include "monitor/qdev.h" 45 #include "monitor/monitor.h" 46 #include "hw/pci/pci_device.h" 47 #include "net_rx_pkt.h" 48 #include "hw/virtio/vhost.h" 49 #include "sysemu/qtest.h" 50 51 #define VIRTIO_NET_VM_VERSION 11 52 53 /* previously fixed value */ 54 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 55 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 56 57 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */ 58 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 59 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 60 61 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ 62 63 #define VIRTIO_NET_TCP_FLAG 0x3F 64 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000 65 66 /* IPv4 max payload, 16 bits in the header */ 67 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header)) 68 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535 69 70 /* header length value in ip header without option */ 71 #define VIRTIO_NET_IP4_HEADER_LENGTH 5 72 73 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ 74 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD 75 76 /* Purge coalesced packets timer interval, This value affects the performance 77 a lot, and should be tuned carefully, '300000'(300us) is the recommended 78 value to pass the WHQL test, '50000' can gain 2x netperf throughput with 79 tso/gso/gro 'off'. */ 80 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000 81 82 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \ 83 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \ 84 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \ 85 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \ 86 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \ 87 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \ 88 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \ 89 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \ 90 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) 91 92 static const VirtIOFeature feature_sizes[] = { 93 {.flags = 1ULL << VIRTIO_NET_F_MAC, 94 .end = endof(struct virtio_net_config, mac)}, 95 {.flags = 1ULL << VIRTIO_NET_F_STATUS, 96 .end = endof(struct virtio_net_config, status)}, 97 {.flags = 1ULL << VIRTIO_NET_F_MQ, 98 .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, 99 {.flags = 1ULL << VIRTIO_NET_F_MTU, 100 .end = endof(struct virtio_net_config, mtu)}, 101 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, 102 .end = endof(struct virtio_net_config, duplex)}, 103 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT), 104 .end = endof(struct virtio_net_config, supported_hash_types)}, 105 {} 106 }; 107 108 static const VirtIOConfigSizeParams cfg_size_params = { 109 .min_size = endof(struct virtio_net_config, mac), 110 .max_size = sizeof(struct virtio_net_config), 111 .feature_sizes = feature_sizes 112 }; 113 114 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc) 115 { 116 VirtIONet *n = qemu_get_nic_opaque(nc); 117 118 return &n->vqs[nc->queue_index]; 119 } 120 121 static int vq2q(int queue_index) 122 { 123 return queue_index / 2; 124 } 125 126 static void flush_or_purge_queued_packets(NetClientState *nc) 127 { 128 if (!nc->peer) { 129 return; 130 } 131 132 qemu_flush_or_purge_queued_packets(nc->peer, true); 133 assert(!virtio_net_get_subqueue(nc)->async_tx.elem); 134 } 135 136 /* TODO 137 * - we could suppress RX interrupt if we were so inclined. 138 */ 139 140 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) 141 { 142 VirtIONet *n = VIRTIO_NET(vdev); 143 struct virtio_net_config netcfg; 144 NetClientState *nc = qemu_get_queue(n->nic); 145 static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } }; 146 147 int ret = 0; 148 memset(&netcfg, 0 , sizeof(struct virtio_net_config)); 149 virtio_stw_p(vdev, &netcfg.status, n->status); 150 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs); 151 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); 152 memcpy(netcfg.mac, n->mac, ETH_ALEN); 153 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); 154 netcfg.duplex = n->net_conf.duplex; 155 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE; 156 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length, 157 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ? 158 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1); 159 virtio_stl_p(vdev, &netcfg.supported_hash_types, 160 VIRTIO_NET_RSS_SUPPORTED_HASHES); 161 memcpy(config, &netcfg, n->config_size); 162 163 /* 164 * Is this VDPA? No peer means not VDPA: there's no way to 165 * disconnect/reconnect a VDPA peer. 166 */ 167 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 168 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg, 169 n->config_size); 170 if (ret == -1) { 171 return; 172 } 173 174 /* 175 * Some NIC/kernel combinations present 0 as the mac address. As that 176 * is not a legal address, try to proceed with the address from the 177 * QEMU command line in the hope that the address has been configured 178 * correctly elsewhere - just not reported by the device. 179 */ 180 if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) { 181 info_report("Zero hardware mac address detected. Ignoring."); 182 memcpy(netcfg.mac, n->mac, ETH_ALEN); 183 } 184 185 netcfg.status |= virtio_tswap16(vdev, 186 n->status & VIRTIO_NET_S_ANNOUNCE); 187 memcpy(config, &netcfg, n->config_size); 188 } 189 } 190 191 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) 192 { 193 VirtIONet *n = VIRTIO_NET(vdev); 194 struct virtio_net_config netcfg = {}; 195 NetClientState *nc = qemu_get_queue(n->nic); 196 197 memcpy(&netcfg, config, n->config_size); 198 199 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) && 200 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) && 201 memcmp(netcfg.mac, n->mac, ETH_ALEN)) { 202 memcpy(n->mac, netcfg.mac, ETH_ALEN); 203 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 204 } 205 206 /* 207 * Is this VDPA? No peer means not VDPA: there's no way to 208 * disconnect/reconnect a VDPA peer. 209 */ 210 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 211 vhost_net_set_config(get_vhost_net(nc->peer), 212 (uint8_t *)&netcfg, 0, n->config_size, 213 VHOST_SET_CONFIG_TYPE_FRONTEND); 214 } 215 } 216 217 static bool virtio_net_started(VirtIONet *n, uint8_t status) 218 { 219 VirtIODevice *vdev = VIRTIO_DEVICE(n); 220 return (status & VIRTIO_CONFIG_S_DRIVER_OK) && 221 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running; 222 } 223 224 static void virtio_net_announce_notify(VirtIONet *net) 225 { 226 VirtIODevice *vdev = VIRTIO_DEVICE(net); 227 trace_virtio_net_announce_notify(); 228 229 net->status |= VIRTIO_NET_S_ANNOUNCE; 230 virtio_notify_config(vdev); 231 } 232 233 static void virtio_net_announce_timer(void *opaque) 234 { 235 VirtIONet *n = opaque; 236 trace_virtio_net_announce_timer(n->announce_timer.round); 237 238 n->announce_timer.round--; 239 virtio_net_announce_notify(n); 240 } 241 242 static void virtio_net_announce(NetClientState *nc) 243 { 244 VirtIONet *n = qemu_get_nic_opaque(nc); 245 VirtIODevice *vdev = VIRTIO_DEVICE(n); 246 247 /* 248 * Make sure the virtio migration announcement timer isn't running 249 * If it is, let it trigger announcement so that we do not cause 250 * confusion. 251 */ 252 if (n->announce_timer.round) { 253 return; 254 } 255 256 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 257 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 258 virtio_net_announce_notify(n); 259 } 260 } 261 262 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) 263 { 264 VirtIODevice *vdev = VIRTIO_DEVICE(n); 265 NetClientState *nc = qemu_get_queue(n->nic); 266 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 267 int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? 268 n->max_ncs - n->max_queue_pairs : 0; 269 270 if (!get_vhost_net(nc->peer)) { 271 return; 272 } 273 274 if ((virtio_net_started(n, status) && !nc->peer->link_down) == 275 !!n->vhost_started) { 276 return; 277 } 278 if (!n->vhost_started) { 279 int r, i; 280 281 if (n->needs_vnet_hdr_swap) { 282 error_report("backend does not support %s vnet headers; " 283 "falling back on userspace virtio", 284 virtio_is_big_endian(vdev) ? "BE" : "LE"); 285 return; 286 } 287 288 /* Any packets outstanding? Purge them to avoid touching rings 289 * when vhost is running. 290 */ 291 for (i = 0; i < queue_pairs; i++) { 292 NetClientState *qnc = qemu_get_subqueue(n->nic, i); 293 294 /* Purge both directions: TX and RX. */ 295 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc); 296 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer); 297 } 298 299 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) { 300 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu); 301 if (r < 0) { 302 error_report("%uBytes MTU not supported by the backend", 303 n->net_conf.mtu); 304 305 return; 306 } 307 } 308 309 n->vhost_started = 1; 310 r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq); 311 if (r < 0) { 312 error_report("unable to start vhost net: %d: " 313 "falling back on userspace virtio", -r); 314 n->vhost_started = 0; 315 } 316 } else { 317 vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq); 318 n->vhost_started = 0; 319 } 320 } 321 322 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev, 323 NetClientState *peer, 324 bool enable) 325 { 326 if (virtio_is_big_endian(vdev)) { 327 return qemu_set_vnet_be(peer, enable); 328 } else { 329 return qemu_set_vnet_le(peer, enable); 330 } 331 } 332 333 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs, 334 int queue_pairs, bool enable) 335 { 336 int i; 337 338 for (i = 0; i < queue_pairs; i++) { 339 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 && 340 enable) { 341 while (--i >= 0) { 342 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false); 343 } 344 345 return true; 346 } 347 } 348 349 return false; 350 } 351 352 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status) 353 { 354 VirtIODevice *vdev = VIRTIO_DEVICE(n); 355 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 356 357 if (virtio_net_started(n, status)) { 358 /* Before using the device, we tell the network backend about the 359 * endianness to use when parsing vnet headers. If the backend 360 * can't do it, we fallback onto fixing the headers in the core 361 * virtio-net code. 362 */ 363 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs, 364 queue_pairs, true); 365 } else if (virtio_net_started(n, vdev->status)) { 366 /* After using the device, we need to reset the network backend to 367 * the default (guest native endianness), otherwise the guest may 368 * lose network connectivity if it is rebooted into a different 369 * endianness. 370 */ 371 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false); 372 } 373 } 374 375 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq) 376 { 377 unsigned int dropped = virtqueue_drop_all(vq); 378 if (dropped) { 379 virtio_notify(vdev, vq); 380 } 381 } 382 383 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) 384 { 385 VirtIONet *n = VIRTIO_NET(vdev); 386 VirtIONetQueue *q; 387 int i; 388 uint8_t queue_status; 389 390 virtio_net_vnet_endian_status(n, status); 391 virtio_net_vhost_status(n, status); 392 393 for (i = 0; i < n->max_queue_pairs; i++) { 394 NetClientState *ncs = qemu_get_subqueue(n->nic, i); 395 bool queue_started; 396 q = &n->vqs[i]; 397 398 if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) { 399 queue_status = 0; 400 } else { 401 queue_status = status; 402 } 403 queue_started = 404 virtio_net_started(n, queue_status) && !n->vhost_started; 405 406 if (queue_started) { 407 qemu_flush_queued_packets(ncs); 408 } 409 410 if (!q->tx_waiting) { 411 continue; 412 } 413 414 if (queue_started) { 415 if (q->tx_timer) { 416 timer_mod(q->tx_timer, 417 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 418 } else { 419 qemu_bh_schedule(q->tx_bh); 420 } 421 } else { 422 if (q->tx_timer) { 423 timer_del(q->tx_timer); 424 } else { 425 qemu_bh_cancel(q->tx_bh); 426 } 427 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 && 428 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) && 429 vdev->vm_running) { 430 /* if tx is waiting we are likely have some packets in tx queue 431 * and disabled notification */ 432 q->tx_waiting = 0; 433 virtio_queue_set_notification(q->tx_vq, 1); 434 virtio_net_drop_tx_queue_data(vdev, q->tx_vq); 435 } 436 } 437 } 438 } 439 440 static void virtio_net_set_link_status(NetClientState *nc) 441 { 442 VirtIONet *n = qemu_get_nic_opaque(nc); 443 VirtIODevice *vdev = VIRTIO_DEVICE(n); 444 uint16_t old_status = n->status; 445 446 if (nc->link_down) 447 n->status &= ~VIRTIO_NET_S_LINK_UP; 448 else 449 n->status |= VIRTIO_NET_S_LINK_UP; 450 451 if (n->status != old_status) 452 virtio_notify_config(vdev); 453 454 virtio_net_set_status(vdev, vdev->status); 455 } 456 457 static void rxfilter_notify(NetClientState *nc) 458 { 459 VirtIONet *n = qemu_get_nic_opaque(nc); 460 461 if (nc->rxfilter_notify_enabled) { 462 char *path = object_get_canonical_path(OBJECT(n->qdev)); 463 qapi_event_send_nic_rx_filter_changed(n->netclient_name, path); 464 g_free(path); 465 466 /* disable event notification to avoid events flooding */ 467 nc->rxfilter_notify_enabled = 0; 468 } 469 } 470 471 static intList *get_vlan_table(VirtIONet *n) 472 { 473 intList *list; 474 int i, j; 475 476 list = NULL; 477 for (i = 0; i < MAX_VLAN >> 5; i++) { 478 for (j = 0; n->vlans[i] && j <= 0x1f; j++) { 479 if (n->vlans[i] & (1U << j)) { 480 QAPI_LIST_PREPEND(list, (i << 5) + j); 481 } 482 } 483 } 484 485 return list; 486 } 487 488 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc) 489 { 490 VirtIONet *n = qemu_get_nic_opaque(nc); 491 VirtIODevice *vdev = VIRTIO_DEVICE(n); 492 RxFilterInfo *info; 493 strList *str_list; 494 int i; 495 496 info = g_malloc0(sizeof(*info)); 497 info->name = g_strdup(nc->name); 498 info->promiscuous = n->promisc; 499 500 if (n->nouni) { 501 info->unicast = RX_STATE_NONE; 502 } else if (n->alluni) { 503 info->unicast = RX_STATE_ALL; 504 } else { 505 info->unicast = RX_STATE_NORMAL; 506 } 507 508 if (n->nomulti) { 509 info->multicast = RX_STATE_NONE; 510 } else if (n->allmulti) { 511 info->multicast = RX_STATE_ALL; 512 } else { 513 info->multicast = RX_STATE_NORMAL; 514 } 515 516 info->broadcast_allowed = n->nobcast; 517 info->multicast_overflow = n->mac_table.multi_overflow; 518 info->unicast_overflow = n->mac_table.uni_overflow; 519 520 info->main_mac = qemu_mac_strdup_printf(n->mac); 521 522 str_list = NULL; 523 for (i = 0; i < n->mac_table.first_multi; i++) { 524 QAPI_LIST_PREPEND(str_list, 525 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 526 } 527 info->unicast_table = str_list; 528 529 str_list = NULL; 530 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 531 QAPI_LIST_PREPEND(str_list, 532 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN)); 533 } 534 info->multicast_table = str_list; 535 info->vlan_table = get_vlan_table(n); 536 537 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) { 538 info->vlan = RX_STATE_ALL; 539 } else if (!info->vlan_table) { 540 info->vlan = RX_STATE_NONE; 541 } else { 542 info->vlan = RX_STATE_NORMAL; 543 } 544 545 /* enable event notification after query */ 546 nc->rxfilter_notify_enabled = 1; 547 548 return info; 549 } 550 551 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index) 552 { 553 VirtIONet *n = VIRTIO_NET(vdev); 554 NetClientState *nc; 555 556 /* validate queue_index and skip for cvq */ 557 if (queue_index >= n->max_queue_pairs * 2) { 558 return; 559 } 560 561 nc = qemu_get_subqueue(n->nic, vq2q(queue_index)); 562 563 if (!nc->peer) { 564 return; 565 } 566 567 if (get_vhost_net(nc->peer) && 568 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) { 569 vhost_net_virtqueue_reset(vdev, nc, queue_index); 570 } 571 572 flush_or_purge_queued_packets(nc); 573 } 574 575 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index) 576 { 577 VirtIONet *n = VIRTIO_NET(vdev); 578 NetClientState *nc; 579 int r; 580 581 /* validate queue_index and skip for cvq */ 582 if (queue_index >= n->max_queue_pairs * 2) { 583 return; 584 } 585 586 nc = qemu_get_subqueue(n->nic, vq2q(queue_index)); 587 588 if (!nc->peer || !vdev->vhost_started) { 589 return; 590 } 591 592 if (get_vhost_net(nc->peer) && 593 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) { 594 r = vhost_net_virtqueue_restart(vdev, nc, queue_index); 595 if (r < 0) { 596 error_report("unable to restart vhost net virtqueue: %d, " 597 "when resetting the queue", queue_index); 598 } 599 } 600 } 601 602 static void virtio_net_reset(VirtIODevice *vdev) 603 { 604 VirtIONet *n = VIRTIO_NET(vdev); 605 int i; 606 607 /* Reset back to compatibility mode */ 608 n->promisc = 1; 609 n->allmulti = 0; 610 n->alluni = 0; 611 n->nomulti = 0; 612 n->nouni = 0; 613 n->nobcast = 0; 614 /* multiqueue is disabled by default */ 615 n->curr_queue_pairs = 1; 616 timer_del(n->announce_timer.tm); 617 n->announce_timer.round = 0; 618 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 619 620 /* Flush any MAC and VLAN filter table state */ 621 n->mac_table.in_use = 0; 622 n->mac_table.first_multi = 0; 623 n->mac_table.multi_overflow = 0; 624 n->mac_table.uni_overflow = 0; 625 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN); 626 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac)); 627 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 628 memset(n->vlans, 0, MAX_VLAN >> 3); 629 630 /* Flush any async TX */ 631 for (i = 0; i < n->max_queue_pairs; i++) { 632 flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i)); 633 } 634 } 635 636 static void peer_test_vnet_hdr(VirtIONet *n) 637 { 638 NetClientState *nc = qemu_get_queue(n->nic); 639 if (!nc->peer) { 640 return; 641 } 642 643 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer); 644 } 645 646 static int peer_has_vnet_hdr(VirtIONet *n) 647 { 648 return n->has_vnet_hdr; 649 } 650 651 static int peer_has_ufo(VirtIONet *n) 652 { 653 if (!peer_has_vnet_hdr(n)) 654 return 0; 655 656 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer); 657 658 return n->has_ufo; 659 } 660 661 static int peer_has_uso(VirtIONet *n) 662 { 663 if (!peer_has_vnet_hdr(n)) { 664 return 0; 665 } 666 667 return qemu_has_uso(qemu_get_queue(n->nic)->peer); 668 } 669 670 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, 671 int version_1, int hash_report) 672 { 673 int i; 674 NetClientState *nc; 675 676 n->mergeable_rx_bufs = mergeable_rx_bufs; 677 678 /* 679 * Note: when extending the vnet header, please make sure to 680 * change the vnet header copying logic in virtio_net_flush_tx() 681 * as well. 682 */ 683 if (version_1) { 684 n->guest_hdr_len = hash_report ? 685 sizeof(struct virtio_net_hdr_v1_hash) : 686 sizeof(struct virtio_net_hdr_mrg_rxbuf); 687 n->rss_data.populate_hash = !!hash_report; 688 } else { 689 n->guest_hdr_len = n->mergeable_rx_bufs ? 690 sizeof(struct virtio_net_hdr_mrg_rxbuf) : 691 sizeof(struct virtio_net_hdr); 692 } 693 694 for (i = 0; i < n->max_queue_pairs; i++) { 695 nc = qemu_get_subqueue(n->nic, i); 696 697 if (peer_has_vnet_hdr(n) && 698 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) { 699 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len); 700 n->host_hdr_len = n->guest_hdr_len; 701 } 702 } 703 } 704 705 static int virtio_net_max_tx_queue_size(VirtIONet *n) 706 { 707 NetClientState *peer = n->nic_conf.peers.ncs[0]; 708 709 /* 710 * Backends other than vhost-user or vhost-vdpa don't support max queue 711 * size. 712 */ 713 if (!peer) { 714 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 715 } 716 717 switch(peer->info->type) { 718 case NET_CLIENT_DRIVER_VHOST_USER: 719 case NET_CLIENT_DRIVER_VHOST_VDPA: 720 return VIRTQUEUE_MAX_SIZE; 721 default: 722 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; 723 }; 724 } 725 726 static int peer_attach(VirtIONet *n, int index) 727 { 728 NetClientState *nc = qemu_get_subqueue(n->nic, index); 729 730 if (!nc->peer) { 731 return 0; 732 } 733 734 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 735 vhost_set_vring_enable(nc->peer, 1); 736 } 737 738 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 739 return 0; 740 } 741 742 if (n->max_queue_pairs == 1) { 743 return 0; 744 } 745 746 return tap_enable(nc->peer); 747 } 748 749 static int peer_detach(VirtIONet *n, int index) 750 { 751 NetClientState *nc = qemu_get_subqueue(n->nic, index); 752 753 if (!nc->peer) { 754 return 0; 755 } 756 757 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { 758 vhost_set_vring_enable(nc->peer, 0); 759 } 760 761 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { 762 return 0; 763 } 764 765 return tap_disable(nc->peer); 766 } 767 768 static void virtio_net_set_queue_pairs(VirtIONet *n) 769 { 770 int i; 771 int r; 772 773 if (n->nic->peer_deleted) { 774 return; 775 } 776 777 for (i = 0; i < n->max_queue_pairs; i++) { 778 if (i < n->curr_queue_pairs) { 779 r = peer_attach(n, i); 780 assert(!r); 781 } else { 782 r = peer_detach(n, i); 783 assert(!r); 784 } 785 } 786 } 787 788 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue); 789 790 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, 791 Error **errp) 792 { 793 VirtIONet *n = VIRTIO_NET(vdev); 794 NetClientState *nc = qemu_get_queue(n->nic); 795 796 /* Firstly sync all virtio-net possible supported features */ 797 features |= n->host_features; 798 799 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 800 801 if (!peer_has_vnet_hdr(n)) { 802 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); 803 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); 804 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); 805 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); 806 807 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); 808 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); 809 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); 810 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); 811 812 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); 813 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); 814 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); 815 816 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); 817 } 818 819 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { 820 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); 821 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); 822 } 823 824 if (!peer_has_uso(n)) { 825 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); 826 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); 827 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); 828 } 829 830 if (!get_vhost_net(nc->peer)) { 831 return features; 832 } 833 834 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 835 virtio_clear_feature(&features, VIRTIO_NET_F_RSS); 836 } 837 features = vhost_net_get_features(get_vhost_net(nc->peer), features); 838 vdev->backend_features = features; 839 840 if (n->mtu_bypass_backend && 841 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { 842 features |= (1ULL << VIRTIO_NET_F_MTU); 843 } 844 845 /* 846 * Since GUEST_ANNOUNCE is emulated the feature bit could be set without 847 * enabled. This happens in the vDPA case. 848 * 849 * Make sure the feature set is not incoherent, as the driver could refuse 850 * to start. 851 * 852 * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes, 853 * helping guest to notify the new location with vDPA devices that does not 854 * support it. 855 */ 856 if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) { 857 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE); 858 } 859 860 return features; 861 } 862 863 static uint64_t virtio_net_bad_features(VirtIODevice *vdev) 864 { 865 uint64_t features = 0; 866 867 /* Linux kernel 2.6.25. It understood MAC (as everyone must), 868 * but also these: */ 869 virtio_add_feature(&features, VIRTIO_NET_F_MAC); 870 virtio_add_feature(&features, VIRTIO_NET_F_CSUM); 871 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4); 872 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6); 873 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN); 874 875 return features; 876 } 877 878 static void virtio_net_apply_guest_offloads(VirtIONet *n) 879 { 880 qemu_set_offload(qemu_get_queue(n->nic)->peer, 881 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)), 882 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)), 883 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)), 884 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)), 885 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)), 886 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)), 887 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6))); 888 } 889 890 static uint64_t virtio_net_guest_offloads_by_features(uint64_t features) 891 { 892 static const uint64_t guest_offloads_mask = 893 (1ULL << VIRTIO_NET_F_GUEST_CSUM) | 894 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | 895 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | 896 (1ULL << VIRTIO_NET_F_GUEST_ECN) | 897 (1ULL << VIRTIO_NET_F_GUEST_UFO) | 898 (1ULL << VIRTIO_NET_F_GUEST_USO4) | 899 (1ULL << VIRTIO_NET_F_GUEST_USO6); 900 901 return guest_offloads_mask & features; 902 } 903 904 uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n) 905 { 906 VirtIODevice *vdev = VIRTIO_DEVICE(n); 907 return virtio_net_guest_offloads_by_features(vdev->guest_features); 908 } 909 910 typedef struct { 911 VirtIONet *n; 912 DeviceState *dev; 913 } FailoverDevice; 914 915 /** 916 * Set the failover primary device 917 * 918 * @opaque: FailoverId to setup 919 * @opts: opts for device we are handling 920 * @errp: returns an error if this function fails 921 */ 922 static int failover_set_primary(DeviceState *dev, void *opaque) 923 { 924 FailoverDevice *fdev = opaque; 925 PCIDevice *pci_dev = (PCIDevice *) 926 object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE); 927 928 if (!pci_dev) { 929 return 0; 930 } 931 932 if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) { 933 fdev->dev = dev; 934 return 1; 935 } 936 937 return 0; 938 } 939 940 /** 941 * Find the primary device for this failover virtio-net 942 * 943 * @n: VirtIONet device 944 * @errp: returns an error if this function fails 945 */ 946 static DeviceState *failover_find_primary_device(VirtIONet *n) 947 { 948 FailoverDevice fdev = { 949 .n = n, 950 }; 951 952 qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL, 953 NULL, NULL, &fdev); 954 return fdev.dev; 955 } 956 957 static void failover_add_primary(VirtIONet *n, Error **errp) 958 { 959 Error *err = NULL; 960 DeviceState *dev = failover_find_primary_device(n); 961 962 if (dev) { 963 return; 964 } 965 966 if (!n->primary_opts) { 967 error_setg(errp, "Primary device not found"); 968 error_append_hint(errp, "Virtio-net failover will not work. Make " 969 "sure primary device has parameter" 970 " failover_pair_id=%s\n", n->netclient_name); 971 return; 972 } 973 974 dev = qdev_device_add_from_qdict(n->primary_opts, 975 n->primary_opts_from_json, 976 &err); 977 if (err) { 978 qobject_unref(n->primary_opts); 979 n->primary_opts = NULL; 980 } else { 981 object_unref(OBJECT(dev)); 982 } 983 error_propagate(errp, err); 984 } 985 986 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) 987 { 988 VirtIONet *n = VIRTIO_NET(vdev); 989 Error *err = NULL; 990 int i; 991 992 if (n->mtu_bypass_backend && 993 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) { 994 features &= ~(1ULL << VIRTIO_NET_F_MTU); 995 } 996 997 virtio_net_set_multiqueue(n, 998 virtio_has_feature(features, VIRTIO_NET_F_RSS) || 999 virtio_has_feature(features, VIRTIO_NET_F_MQ)); 1000 1001 virtio_net_set_mrg_rx_bufs(n, 1002 virtio_has_feature(features, 1003 VIRTIO_NET_F_MRG_RXBUF), 1004 virtio_has_feature(features, 1005 VIRTIO_F_VERSION_1), 1006 virtio_has_feature(features, 1007 VIRTIO_NET_F_HASH_REPORT)); 1008 1009 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 1010 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4); 1011 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && 1012 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6); 1013 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS); 1014 1015 if (n->has_vnet_hdr) { 1016 n->curr_guest_offloads = 1017 virtio_net_guest_offloads_by_features(features); 1018 virtio_net_apply_guest_offloads(n); 1019 } 1020 1021 for (i = 0; i < n->max_queue_pairs; i++) { 1022 NetClientState *nc = qemu_get_subqueue(n->nic, i); 1023 1024 if (!get_vhost_net(nc->peer)) { 1025 continue; 1026 } 1027 vhost_net_ack_features(get_vhost_net(nc->peer), features); 1028 1029 /* 1030 * keep acked_features in NetVhostUserState up-to-date so it 1031 * can't miss any features configured by guest virtio driver. 1032 */ 1033 vhost_net_save_acked_features(nc->peer); 1034 } 1035 1036 if (!virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { 1037 memset(n->vlans, 0xff, MAX_VLAN >> 3); 1038 } 1039 1040 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) { 1041 qapi_event_send_failover_negotiated(n->netclient_name); 1042 qatomic_set(&n->failover_primary_hidden, false); 1043 failover_add_primary(n, &err); 1044 if (err) { 1045 if (!qtest_enabled()) { 1046 warn_report_err(err); 1047 } else { 1048 error_free(err); 1049 } 1050 } 1051 } 1052 } 1053 1054 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd, 1055 struct iovec *iov, unsigned int iov_cnt) 1056 { 1057 uint8_t on; 1058 size_t s; 1059 NetClientState *nc = qemu_get_queue(n->nic); 1060 1061 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on)); 1062 if (s != sizeof(on)) { 1063 return VIRTIO_NET_ERR; 1064 } 1065 1066 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) { 1067 n->promisc = on; 1068 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) { 1069 n->allmulti = on; 1070 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) { 1071 n->alluni = on; 1072 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) { 1073 n->nomulti = on; 1074 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) { 1075 n->nouni = on; 1076 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) { 1077 n->nobcast = on; 1078 } else { 1079 return VIRTIO_NET_ERR; 1080 } 1081 1082 rxfilter_notify(nc); 1083 1084 return VIRTIO_NET_OK; 1085 } 1086 1087 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd, 1088 struct iovec *iov, unsigned int iov_cnt) 1089 { 1090 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1091 uint64_t offloads; 1092 size_t s; 1093 1094 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 1095 return VIRTIO_NET_ERR; 1096 } 1097 1098 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads)); 1099 if (s != sizeof(offloads)) { 1100 return VIRTIO_NET_ERR; 1101 } 1102 1103 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) { 1104 uint64_t supported_offloads; 1105 1106 offloads = virtio_ldq_p(vdev, &offloads); 1107 1108 if (!n->has_vnet_hdr) { 1109 return VIRTIO_NET_ERR; 1110 } 1111 1112 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1113 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4); 1114 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) && 1115 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6); 1116 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT); 1117 1118 supported_offloads = virtio_net_supported_guest_offloads(n); 1119 if (offloads & ~supported_offloads) { 1120 return VIRTIO_NET_ERR; 1121 } 1122 1123 n->curr_guest_offloads = offloads; 1124 virtio_net_apply_guest_offloads(n); 1125 1126 return VIRTIO_NET_OK; 1127 } else { 1128 return VIRTIO_NET_ERR; 1129 } 1130 } 1131 1132 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd, 1133 struct iovec *iov, unsigned int iov_cnt) 1134 { 1135 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1136 struct virtio_net_ctrl_mac mac_data; 1137 size_t s; 1138 NetClientState *nc = qemu_get_queue(n->nic); 1139 1140 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) { 1141 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) { 1142 return VIRTIO_NET_ERR; 1143 } 1144 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac)); 1145 assert(s == sizeof(n->mac)); 1146 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); 1147 rxfilter_notify(nc); 1148 1149 return VIRTIO_NET_OK; 1150 } 1151 1152 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) { 1153 return VIRTIO_NET_ERR; 1154 } 1155 1156 int in_use = 0; 1157 int first_multi = 0; 1158 uint8_t uni_overflow = 0; 1159 uint8_t multi_overflow = 0; 1160 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 1161 1162 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1163 sizeof(mac_data.entries)); 1164 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1165 if (s != sizeof(mac_data.entries)) { 1166 goto error; 1167 } 1168 iov_discard_front(&iov, &iov_cnt, s); 1169 1170 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) { 1171 goto error; 1172 } 1173 1174 if (mac_data.entries <= MAC_TABLE_ENTRIES) { 1175 s = iov_to_buf(iov, iov_cnt, 0, macs, 1176 mac_data.entries * ETH_ALEN); 1177 if (s != mac_data.entries * ETH_ALEN) { 1178 goto error; 1179 } 1180 in_use += mac_data.entries; 1181 } else { 1182 uni_overflow = 1; 1183 } 1184 1185 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN); 1186 1187 first_multi = in_use; 1188 1189 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries, 1190 sizeof(mac_data.entries)); 1191 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries); 1192 if (s != sizeof(mac_data.entries)) { 1193 goto error; 1194 } 1195 1196 iov_discard_front(&iov, &iov_cnt, s); 1197 1198 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) { 1199 goto error; 1200 } 1201 1202 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) { 1203 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN], 1204 mac_data.entries * ETH_ALEN); 1205 if (s != mac_data.entries * ETH_ALEN) { 1206 goto error; 1207 } 1208 in_use += mac_data.entries; 1209 } else { 1210 multi_overflow = 1; 1211 } 1212 1213 n->mac_table.in_use = in_use; 1214 n->mac_table.first_multi = first_multi; 1215 n->mac_table.uni_overflow = uni_overflow; 1216 n->mac_table.multi_overflow = multi_overflow; 1217 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN); 1218 g_free(macs); 1219 rxfilter_notify(nc); 1220 1221 return VIRTIO_NET_OK; 1222 1223 error: 1224 g_free(macs); 1225 return VIRTIO_NET_ERR; 1226 } 1227 1228 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd, 1229 struct iovec *iov, unsigned int iov_cnt) 1230 { 1231 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1232 uint16_t vid; 1233 size_t s; 1234 NetClientState *nc = qemu_get_queue(n->nic); 1235 1236 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid)); 1237 vid = virtio_lduw_p(vdev, &vid); 1238 if (s != sizeof(vid)) { 1239 return VIRTIO_NET_ERR; 1240 } 1241 1242 if (vid >= MAX_VLAN) 1243 return VIRTIO_NET_ERR; 1244 1245 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD) 1246 n->vlans[vid >> 5] |= (1U << (vid & 0x1f)); 1247 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL) 1248 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f)); 1249 else 1250 return VIRTIO_NET_ERR; 1251 1252 rxfilter_notify(nc); 1253 1254 return VIRTIO_NET_OK; 1255 } 1256 1257 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, 1258 struct iovec *iov, unsigned int iov_cnt) 1259 { 1260 trace_virtio_net_handle_announce(n->announce_timer.round); 1261 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK && 1262 n->status & VIRTIO_NET_S_ANNOUNCE) { 1263 n->status &= ~VIRTIO_NET_S_ANNOUNCE; 1264 if (n->announce_timer.round) { 1265 qemu_announce_timer_step(&n->announce_timer); 1266 } 1267 return VIRTIO_NET_OK; 1268 } else { 1269 return VIRTIO_NET_ERR; 1270 } 1271 } 1272 1273 static void virtio_net_detach_epbf_rss(VirtIONet *n); 1274 1275 static void virtio_net_disable_rss(VirtIONet *n) 1276 { 1277 if (n->rss_data.enabled) { 1278 trace_virtio_net_rss_disable(); 1279 } 1280 n->rss_data.enabled = false; 1281 1282 virtio_net_detach_epbf_rss(n); 1283 } 1284 1285 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd) 1286 { 1287 NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0); 1288 if (nc == NULL || nc->info->set_steering_ebpf == NULL) { 1289 return false; 1290 } 1291 1292 return nc->info->set_steering_ebpf(nc, prog_fd); 1293 } 1294 1295 static void rss_data_to_rss_config(struct VirtioNetRssData *data, 1296 struct EBPFRSSConfig *config) 1297 { 1298 config->redirect = data->redirect; 1299 config->populate_hash = data->populate_hash; 1300 config->hash_types = data->hash_types; 1301 config->indirections_len = data->indirections_len; 1302 config->default_queue = data->default_queue; 1303 } 1304 1305 static bool virtio_net_attach_epbf_rss(VirtIONet *n) 1306 { 1307 struct EBPFRSSConfig config = {}; 1308 1309 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { 1310 return false; 1311 } 1312 1313 rss_data_to_rss_config(&n->rss_data, &config); 1314 1315 if (!ebpf_rss_set_all(&n->ebpf_rss, &config, 1316 n->rss_data.indirections_table, n->rss_data.key)) { 1317 return false; 1318 } 1319 1320 if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) { 1321 return false; 1322 } 1323 1324 return true; 1325 } 1326 1327 static void virtio_net_detach_epbf_rss(VirtIONet *n) 1328 { 1329 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1330 } 1331 1332 static bool virtio_net_load_ebpf_fds(VirtIONet *n) 1333 { 1334 int fds[EBPF_RSS_MAX_FDS] = { [0 ... EBPF_RSS_MAX_FDS - 1] = -1}; 1335 int ret = true; 1336 int i = 0; 1337 1338 if (n->nr_ebpf_rss_fds != EBPF_RSS_MAX_FDS) { 1339 warn_report("Expected %d file descriptors but got %d", 1340 EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds); 1341 return false; 1342 } 1343 1344 for (i = 0; i < n->nr_ebpf_rss_fds; i++) { 1345 fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i], 1346 &error_warn); 1347 if (fds[i] < 0) { 1348 ret = false; 1349 goto exit; 1350 } 1351 } 1352 1353 ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3]); 1354 1355 exit: 1356 if (!ret) { 1357 for (i = 0; i < n->nr_ebpf_rss_fds && fds[i] != -1; i++) { 1358 close(fds[i]); 1359 } 1360 } 1361 1362 return ret; 1363 } 1364 1365 static bool virtio_net_load_ebpf(VirtIONet *n) 1366 { 1367 bool ret = false; 1368 1369 if (virtio_net_attach_ebpf_to_backend(n->nic, -1)) { 1370 if (!(n->ebpf_rss_fds && virtio_net_load_ebpf_fds(n))) { 1371 ret = ebpf_rss_load(&n->ebpf_rss); 1372 } 1373 } 1374 1375 return ret; 1376 } 1377 1378 static void virtio_net_unload_ebpf(VirtIONet *n) 1379 { 1380 virtio_net_attach_ebpf_to_backend(n->nic, -1); 1381 ebpf_rss_unload(&n->ebpf_rss); 1382 } 1383 1384 static uint16_t virtio_net_handle_rss(VirtIONet *n, 1385 struct iovec *iov, 1386 unsigned int iov_cnt, 1387 bool do_rss) 1388 { 1389 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1390 struct virtio_net_rss_config cfg; 1391 size_t s, offset = 0, size_get; 1392 uint16_t queue_pairs, i; 1393 struct { 1394 uint16_t us; 1395 uint8_t b; 1396 } QEMU_PACKED temp; 1397 const char *err_msg = ""; 1398 uint32_t err_value = 0; 1399 1400 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) { 1401 err_msg = "RSS is not negotiated"; 1402 goto error; 1403 } 1404 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) { 1405 err_msg = "Hash report is not negotiated"; 1406 goto error; 1407 } 1408 size_get = offsetof(struct virtio_net_rss_config, indirection_table); 1409 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get); 1410 if (s != size_get) { 1411 err_msg = "Short command buffer"; 1412 err_value = (uint32_t)s; 1413 goto error; 1414 } 1415 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types); 1416 n->rss_data.indirections_len = 1417 virtio_lduw_p(vdev, &cfg.indirection_table_mask); 1418 n->rss_data.indirections_len++; 1419 if (!do_rss) { 1420 n->rss_data.indirections_len = 1; 1421 } 1422 if (!is_power_of_2(n->rss_data.indirections_len)) { 1423 err_msg = "Invalid size of indirection table"; 1424 err_value = n->rss_data.indirections_len; 1425 goto error; 1426 } 1427 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) { 1428 err_msg = "Too large indirection table"; 1429 err_value = n->rss_data.indirections_len; 1430 goto error; 1431 } 1432 n->rss_data.default_queue = do_rss ? 1433 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0; 1434 if (n->rss_data.default_queue >= n->max_queue_pairs) { 1435 err_msg = "Invalid default queue"; 1436 err_value = n->rss_data.default_queue; 1437 goto error; 1438 } 1439 offset += size_get; 1440 size_get = sizeof(uint16_t) * n->rss_data.indirections_len; 1441 g_free(n->rss_data.indirections_table); 1442 n->rss_data.indirections_table = g_malloc(size_get); 1443 if (!n->rss_data.indirections_table) { 1444 err_msg = "Can't allocate indirections table"; 1445 err_value = n->rss_data.indirections_len; 1446 goto error; 1447 } 1448 s = iov_to_buf(iov, iov_cnt, offset, 1449 n->rss_data.indirections_table, size_get); 1450 if (s != size_get) { 1451 err_msg = "Short indirection table buffer"; 1452 err_value = (uint32_t)s; 1453 goto error; 1454 } 1455 for (i = 0; i < n->rss_data.indirections_len; ++i) { 1456 uint16_t val = n->rss_data.indirections_table[i]; 1457 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val); 1458 } 1459 offset += size_get; 1460 size_get = sizeof(temp); 1461 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get); 1462 if (s != size_get) { 1463 err_msg = "Can't get queue_pairs"; 1464 err_value = (uint32_t)s; 1465 goto error; 1466 } 1467 queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs; 1468 if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) { 1469 err_msg = "Invalid number of queue_pairs"; 1470 err_value = queue_pairs; 1471 goto error; 1472 } 1473 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) { 1474 err_msg = "Invalid key size"; 1475 err_value = temp.b; 1476 goto error; 1477 } 1478 if (!temp.b && n->rss_data.hash_types) { 1479 err_msg = "No key provided"; 1480 err_value = 0; 1481 goto error; 1482 } 1483 if (!temp.b && !n->rss_data.hash_types) { 1484 virtio_net_disable_rss(n); 1485 return queue_pairs; 1486 } 1487 offset += size_get; 1488 size_get = temp.b; 1489 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get); 1490 if (s != size_get) { 1491 err_msg = "Can get key buffer"; 1492 err_value = (uint32_t)s; 1493 goto error; 1494 } 1495 n->rss_data.enabled = true; 1496 1497 if (!n->rss_data.populate_hash) { 1498 if (!virtio_net_attach_epbf_rss(n)) { 1499 /* EBPF must be loaded for vhost */ 1500 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 1501 warn_report("Can't load eBPF RSS for vhost"); 1502 goto error; 1503 } 1504 /* fallback to software RSS */ 1505 warn_report("Can't load eBPF RSS - fallback to software RSS"); 1506 n->rss_data.enabled_software_rss = true; 1507 } 1508 } else { 1509 /* use software RSS for hash populating */ 1510 /* and detach eBPF if was loaded before */ 1511 virtio_net_detach_epbf_rss(n); 1512 n->rss_data.enabled_software_rss = true; 1513 } 1514 1515 trace_virtio_net_rss_enable(n->rss_data.hash_types, 1516 n->rss_data.indirections_len, 1517 temp.b); 1518 return queue_pairs; 1519 error: 1520 trace_virtio_net_rss_error(err_msg, err_value); 1521 virtio_net_disable_rss(n); 1522 return 0; 1523 } 1524 1525 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, 1526 struct iovec *iov, unsigned int iov_cnt) 1527 { 1528 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1529 uint16_t queue_pairs; 1530 NetClientState *nc = qemu_get_queue(n->nic); 1531 1532 virtio_net_disable_rss(n); 1533 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { 1534 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false); 1535 return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR; 1536 } 1537 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) { 1538 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true); 1539 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { 1540 struct virtio_net_ctrl_mq mq; 1541 size_t s; 1542 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) { 1543 return VIRTIO_NET_ERR; 1544 } 1545 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq)); 1546 if (s != sizeof(mq)) { 1547 return VIRTIO_NET_ERR; 1548 } 1549 queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs); 1550 1551 } else { 1552 return VIRTIO_NET_ERR; 1553 } 1554 1555 if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 1556 queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || 1557 queue_pairs > n->max_queue_pairs || 1558 !n->multiqueue) { 1559 return VIRTIO_NET_ERR; 1560 } 1561 1562 n->curr_queue_pairs = queue_pairs; 1563 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 1564 /* 1565 * Avoid updating the backend for a vdpa device: We're only interested 1566 * in updating the device model queues. 1567 */ 1568 return VIRTIO_NET_OK; 1569 } 1570 /* stop the backend before changing the number of queue_pairs to avoid handling a 1571 * disabled queue */ 1572 virtio_net_set_status(vdev, vdev->status); 1573 virtio_net_set_queue_pairs(n); 1574 1575 return VIRTIO_NET_OK; 1576 } 1577 1578 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, 1579 const struct iovec *in_sg, unsigned in_num, 1580 const struct iovec *out_sg, 1581 unsigned out_num) 1582 { 1583 VirtIONet *n = VIRTIO_NET(vdev); 1584 struct virtio_net_ctrl_hdr ctrl; 1585 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1586 size_t s; 1587 struct iovec *iov, *iov2; 1588 1589 if (iov_size(in_sg, in_num) < sizeof(status) || 1590 iov_size(out_sg, out_num) < sizeof(ctrl)) { 1591 virtio_error(vdev, "virtio-net ctrl missing headers"); 1592 return 0; 1593 } 1594 1595 iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num); 1596 s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl)); 1597 iov_discard_front(&iov, &out_num, sizeof(ctrl)); 1598 if (s != sizeof(ctrl)) { 1599 status = VIRTIO_NET_ERR; 1600 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { 1601 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num); 1602 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { 1603 status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num); 1604 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { 1605 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num); 1606 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { 1607 status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num); 1608 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { 1609 status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num); 1610 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { 1611 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num); 1612 } 1613 1614 s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status)); 1615 assert(s == sizeof(status)); 1616 1617 g_free(iov2); 1618 return sizeof(status); 1619 } 1620 1621 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) 1622 { 1623 VirtQueueElement *elem; 1624 1625 for (;;) { 1626 size_t written; 1627 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 1628 if (!elem) { 1629 break; 1630 } 1631 1632 written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num, 1633 elem->out_sg, elem->out_num); 1634 if (written > 0) { 1635 virtqueue_push(vq, elem, written); 1636 virtio_notify(vdev, vq); 1637 g_free(elem); 1638 } else { 1639 virtqueue_detach_element(vq, elem, 0); 1640 g_free(elem); 1641 break; 1642 } 1643 } 1644 } 1645 1646 /* RX */ 1647 1648 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) 1649 { 1650 VirtIONet *n = VIRTIO_NET(vdev); 1651 int queue_index = vq2q(virtio_get_queue_index(vq)); 1652 1653 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index)); 1654 } 1655 1656 static bool virtio_net_can_receive(NetClientState *nc) 1657 { 1658 VirtIONet *n = qemu_get_nic_opaque(nc); 1659 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1660 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1661 1662 if (!vdev->vm_running) { 1663 return false; 1664 } 1665 1666 if (nc->queue_index >= n->curr_queue_pairs) { 1667 return false; 1668 } 1669 1670 if (!virtio_queue_ready(q->rx_vq) || 1671 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 1672 return false; 1673 } 1674 1675 return true; 1676 } 1677 1678 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) 1679 { 1680 VirtIONet *n = q->n; 1681 if (virtio_queue_empty(q->rx_vq) || 1682 (n->mergeable_rx_bufs && 1683 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1684 virtio_queue_set_notification(q->rx_vq, 1); 1685 1686 /* To avoid a race condition where the guest has made some buffers 1687 * available after the above check but before notification was 1688 * enabled, check for available buffers again. 1689 */ 1690 if (virtio_queue_empty(q->rx_vq) || 1691 (n->mergeable_rx_bufs && 1692 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { 1693 return 0; 1694 } 1695 } 1696 1697 virtio_queue_set_notification(q->rx_vq, 0); 1698 return 1; 1699 } 1700 1701 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr) 1702 { 1703 virtio_tswap16s(vdev, &hdr->hdr_len); 1704 virtio_tswap16s(vdev, &hdr->gso_size); 1705 virtio_tswap16s(vdev, &hdr->csum_start); 1706 virtio_tswap16s(vdev, &hdr->csum_offset); 1707 } 1708 1709 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so 1710 * it never finds out that the packets don't have valid checksums. This 1711 * causes dhclient to get upset. Fedora's carried a patch for ages to 1712 * fix this with Xen but it hasn't appeared in an upstream release of 1713 * dhclient yet. 1714 * 1715 * To avoid breaking existing guests, we catch udp packets and add 1716 * checksums. This is terrible but it's better than hacking the guest 1717 * kernels. 1718 * 1719 * N.B. if we introduce a zero-copy API, this operation is no longer free so 1720 * we should provide a mechanism to disable it to avoid polluting the host 1721 * cache. 1722 */ 1723 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr, 1724 uint8_t *buf, size_t size) 1725 { 1726 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */ 1727 (size > 27 && size < 1500) && /* normal sized MTU */ 1728 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */ 1729 (buf[23] == 17) && /* ip.protocol == UDP */ 1730 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */ 1731 net_checksum_calculate(buf, size, CSUM_UDP); 1732 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM; 1733 } 1734 } 1735 1736 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt, 1737 const void *buf, size_t size) 1738 { 1739 if (n->has_vnet_hdr) { 1740 /* FIXME this cast is evil */ 1741 void *wbuf = (void *)buf; 1742 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len, 1743 size - n->host_hdr_len); 1744 1745 if (n->needs_vnet_hdr_swap) { 1746 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf); 1747 } 1748 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr)); 1749 } else { 1750 struct virtio_net_hdr hdr = { 1751 .flags = 0, 1752 .gso_type = VIRTIO_NET_HDR_GSO_NONE 1753 }; 1754 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr); 1755 } 1756 } 1757 1758 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) 1759 { 1760 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; 1761 static const uint8_t vlan[] = {0x81, 0x00}; 1762 uint8_t *ptr = (uint8_t *)buf; 1763 int i; 1764 1765 if (n->promisc) 1766 return 1; 1767 1768 ptr += n->host_hdr_len; 1769 1770 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) { 1771 int vid = lduw_be_p(ptr + 14) & 0xfff; 1772 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f)))) 1773 return 0; 1774 } 1775 1776 if (ptr[0] & 1) { // multicast 1777 if (!memcmp(ptr, bcast, sizeof(bcast))) { 1778 return !n->nobcast; 1779 } else if (n->nomulti) { 1780 return 0; 1781 } else if (n->allmulti || n->mac_table.multi_overflow) { 1782 return 1; 1783 } 1784 1785 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) { 1786 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1787 return 1; 1788 } 1789 } 1790 } else { // unicast 1791 if (n->nouni) { 1792 return 0; 1793 } else if (n->alluni || n->mac_table.uni_overflow) { 1794 return 1; 1795 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) { 1796 return 1; 1797 } 1798 1799 for (i = 0; i < n->mac_table.first_multi; i++) { 1800 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) { 1801 return 1; 1802 } 1803 } 1804 } 1805 1806 return 0; 1807 } 1808 1809 static uint8_t virtio_net_get_hash_type(bool hasip4, 1810 bool hasip6, 1811 EthL4HdrProto l4hdr_proto, 1812 uint32_t types) 1813 { 1814 if (hasip4) { 1815 switch (l4hdr_proto) { 1816 case ETH_L4_HDR_PROTO_TCP: 1817 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { 1818 return NetPktRssIpV4Tcp; 1819 } 1820 break; 1821 1822 case ETH_L4_HDR_PROTO_UDP: 1823 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { 1824 return NetPktRssIpV4Udp; 1825 } 1826 break; 1827 1828 default: 1829 break; 1830 } 1831 1832 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { 1833 return NetPktRssIpV4; 1834 } 1835 } else if (hasip6) { 1836 switch (l4hdr_proto) { 1837 case ETH_L4_HDR_PROTO_TCP: 1838 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) { 1839 return NetPktRssIpV6TcpEx; 1840 } 1841 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { 1842 return NetPktRssIpV6Tcp; 1843 } 1844 break; 1845 1846 case ETH_L4_HDR_PROTO_UDP: 1847 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) { 1848 return NetPktRssIpV6UdpEx; 1849 } 1850 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { 1851 return NetPktRssIpV6Udp; 1852 } 1853 break; 1854 1855 default: 1856 break; 1857 } 1858 1859 if (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) { 1860 return NetPktRssIpV6Ex; 1861 } 1862 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { 1863 return NetPktRssIpV6; 1864 } 1865 } 1866 return 0xff; 1867 } 1868 1869 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report, 1870 uint32_t hash) 1871 { 1872 struct virtio_net_hdr_v1_hash *hdr = (void *)buf; 1873 hdr->hash_value = hash; 1874 hdr->hash_report = report; 1875 } 1876 1877 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf, 1878 size_t size) 1879 { 1880 VirtIONet *n = qemu_get_nic_opaque(nc); 1881 unsigned int index = nc->queue_index, new_index = index; 1882 struct NetRxPkt *pkt = n->rx_pkt; 1883 uint8_t net_hash_type; 1884 uint32_t hash; 1885 bool hasip4, hasip6; 1886 EthL4HdrProto l4hdr_proto; 1887 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = { 1888 VIRTIO_NET_HASH_REPORT_IPv4, 1889 VIRTIO_NET_HASH_REPORT_TCPv4, 1890 VIRTIO_NET_HASH_REPORT_TCPv6, 1891 VIRTIO_NET_HASH_REPORT_IPv6, 1892 VIRTIO_NET_HASH_REPORT_IPv6_EX, 1893 VIRTIO_NET_HASH_REPORT_TCPv6_EX, 1894 VIRTIO_NET_HASH_REPORT_UDPv4, 1895 VIRTIO_NET_HASH_REPORT_UDPv6, 1896 VIRTIO_NET_HASH_REPORT_UDPv6_EX 1897 }; 1898 struct iovec iov = { 1899 .iov_base = (void *)buf, 1900 .iov_len = size 1901 }; 1902 1903 net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len); 1904 net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); 1905 net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto, 1906 n->rss_data.hash_types); 1907 if (net_hash_type > NetPktRssIpV6UdpEx) { 1908 if (n->rss_data.populate_hash) { 1909 virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0); 1910 } 1911 return n->rss_data.redirect ? n->rss_data.default_queue : -1; 1912 } 1913 1914 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key); 1915 1916 if (n->rss_data.populate_hash) { 1917 virtio_set_packet_hash(buf, reports[net_hash_type], hash); 1918 } 1919 1920 if (n->rss_data.redirect) { 1921 new_index = hash & (n->rss_data.indirections_len - 1); 1922 new_index = n->rss_data.indirections_table[new_index]; 1923 } 1924 1925 return (index == new_index) ? -1 : new_index; 1926 } 1927 1928 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, 1929 size_t size, bool no_rss) 1930 { 1931 VirtIONet *n = qemu_get_nic_opaque(nc); 1932 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 1933 VirtIODevice *vdev = VIRTIO_DEVICE(n); 1934 VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE]; 1935 size_t lens[VIRTQUEUE_MAX_SIZE]; 1936 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; 1937 struct virtio_net_hdr_mrg_rxbuf mhdr; 1938 unsigned mhdr_cnt = 0; 1939 size_t offset, i, guest_offset, j; 1940 ssize_t err; 1941 1942 if (!virtio_net_can_receive(nc)) { 1943 return -1; 1944 } 1945 1946 if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) { 1947 int index = virtio_net_process_rss(nc, buf, size); 1948 if (index >= 0) { 1949 NetClientState *nc2 = qemu_get_subqueue(n->nic, index); 1950 return virtio_net_receive_rcu(nc2, buf, size, true); 1951 } 1952 } 1953 1954 /* hdr_len refers to the header we supply to the guest */ 1955 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) { 1956 return 0; 1957 } 1958 1959 if (!receive_filter(n, buf, size)) 1960 return size; 1961 1962 offset = i = 0; 1963 1964 while (offset < size) { 1965 VirtQueueElement *elem; 1966 int len, total; 1967 const struct iovec *sg; 1968 1969 total = 0; 1970 1971 if (i == VIRTQUEUE_MAX_SIZE) { 1972 virtio_error(vdev, "virtio-net unexpected long buffer chain"); 1973 err = size; 1974 goto err; 1975 } 1976 1977 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement)); 1978 if (!elem) { 1979 if (i) { 1980 virtio_error(vdev, "virtio-net unexpected empty queue: " 1981 "i %zd mergeable %d offset %zd, size %zd, " 1982 "guest hdr len %zd, host hdr len %zd " 1983 "guest features 0x%" PRIx64, 1984 i, n->mergeable_rx_bufs, offset, size, 1985 n->guest_hdr_len, n->host_hdr_len, 1986 vdev->guest_features); 1987 } 1988 err = -1; 1989 goto err; 1990 } 1991 1992 if (elem->in_num < 1) { 1993 virtio_error(vdev, 1994 "virtio-net receive queue contains no in buffers"); 1995 virtqueue_detach_element(q->rx_vq, elem, 0); 1996 g_free(elem); 1997 err = -1; 1998 goto err; 1999 } 2000 2001 sg = elem->in_sg; 2002 if (i == 0) { 2003 assert(offset == 0); 2004 if (n->mergeable_rx_bufs) { 2005 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg), 2006 sg, elem->in_num, 2007 offsetof(typeof(mhdr), num_buffers), 2008 sizeof(mhdr.num_buffers)); 2009 } 2010 2011 receive_header(n, sg, elem->in_num, buf, size); 2012 if (n->rss_data.populate_hash) { 2013 offset = sizeof(mhdr); 2014 iov_from_buf(sg, elem->in_num, offset, 2015 buf + offset, n->host_hdr_len - sizeof(mhdr)); 2016 } 2017 offset = n->host_hdr_len; 2018 total += n->guest_hdr_len; 2019 guest_offset = n->guest_hdr_len; 2020 } else { 2021 guest_offset = 0; 2022 } 2023 2024 /* copy in packet. ugh */ 2025 len = iov_from_buf(sg, elem->in_num, guest_offset, 2026 buf + offset, size - offset); 2027 total += len; 2028 offset += len; 2029 /* If buffers can't be merged, at this point we 2030 * must have consumed the complete packet. 2031 * Otherwise, drop it. */ 2032 if (!n->mergeable_rx_bufs && offset < size) { 2033 virtqueue_unpop(q->rx_vq, elem, total); 2034 g_free(elem); 2035 err = size; 2036 goto err; 2037 } 2038 2039 elems[i] = elem; 2040 lens[i] = total; 2041 i++; 2042 } 2043 2044 if (mhdr_cnt) { 2045 virtio_stw_p(vdev, &mhdr.num_buffers, i); 2046 iov_from_buf(mhdr_sg, mhdr_cnt, 2047 0, 2048 &mhdr.num_buffers, sizeof mhdr.num_buffers); 2049 } 2050 2051 for (j = 0; j < i; j++) { 2052 /* signal other side */ 2053 virtqueue_fill(q->rx_vq, elems[j], lens[j], j); 2054 g_free(elems[j]); 2055 } 2056 2057 virtqueue_flush(q->rx_vq, i); 2058 virtio_notify(vdev, q->rx_vq); 2059 2060 return size; 2061 2062 err: 2063 for (j = 0; j < i; j++) { 2064 virtqueue_detach_element(q->rx_vq, elems[j], lens[j]); 2065 g_free(elems[j]); 2066 } 2067 2068 return err; 2069 } 2070 2071 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf, 2072 size_t size) 2073 { 2074 RCU_READ_LOCK_GUARD(); 2075 2076 return virtio_net_receive_rcu(nc, buf, size, false); 2077 } 2078 2079 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain, 2080 const uint8_t *buf, 2081 VirtioNetRscUnit *unit) 2082 { 2083 uint16_t ip_hdrlen; 2084 struct ip_header *ip; 2085 2086 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len 2087 + sizeof(struct eth_header)); 2088 unit->ip = (void *)ip; 2089 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2; 2090 unit->ip_plen = &ip->ip_len; 2091 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen); 2092 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 2093 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen; 2094 } 2095 2096 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain, 2097 const uint8_t *buf, 2098 VirtioNetRscUnit *unit) 2099 { 2100 struct ip6_header *ip6; 2101 2102 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len 2103 + sizeof(struct eth_header)); 2104 unit->ip = ip6; 2105 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 2106 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) 2107 + sizeof(struct ip6_header)); 2108 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; 2109 2110 /* There is a difference between payload length in ipv4 and v6, 2111 ip header is excluded in ipv6 */ 2112 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen; 2113 } 2114 2115 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain, 2116 VirtioNetRscSeg *seg) 2117 { 2118 int ret; 2119 struct virtio_net_hdr_v1 *h; 2120 2121 h = (struct virtio_net_hdr_v1 *)seg->buf; 2122 h->flags = 0; 2123 h->gso_type = VIRTIO_NET_HDR_GSO_NONE; 2124 2125 if (seg->is_coalesced) { 2126 h->rsc.segments = seg->packets; 2127 h->rsc.dup_acks = seg->dup_ack; 2128 h->flags = VIRTIO_NET_HDR_F_RSC_INFO; 2129 if (chain->proto == ETH_P_IP) { 2130 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2131 } else { 2132 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2133 } 2134 } 2135 2136 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); 2137 QTAILQ_REMOVE(&chain->buffers, seg, next); 2138 g_free(seg->buf); 2139 g_free(seg); 2140 2141 return ret; 2142 } 2143 2144 static void virtio_net_rsc_purge(void *opq) 2145 { 2146 VirtioNetRscSeg *seg, *rn; 2147 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq; 2148 2149 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) { 2150 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2151 chain->stat.purge_failed++; 2152 continue; 2153 } 2154 } 2155 2156 chain->stat.timer++; 2157 if (!QTAILQ_EMPTY(&chain->buffers)) { 2158 timer_mod(chain->drain_timer, 2159 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 2160 } 2161 } 2162 2163 static void virtio_net_rsc_cleanup(VirtIONet *n) 2164 { 2165 VirtioNetRscChain *chain, *rn_chain; 2166 VirtioNetRscSeg *seg, *rn_seg; 2167 2168 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) { 2169 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) { 2170 QTAILQ_REMOVE(&chain->buffers, seg, next); 2171 g_free(seg->buf); 2172 g_free(seg); 2173 } 2174 2175 timer_free(chain->drain_timer); 2176 QTAILQ_REMOVE(&n->rsc_chains, chain, next); 2177 g_free(chain); 2178 } 2179 } 2180 2181 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain, 2182 NetClientState *nc, 2183 const uint8_t *buf, size_t size) 2184 { 2185 uint16_t hdr_len; 2186 VirtioNetRscSeg *seg; 2187 2188 hdr_len = chain->n->guest_hdr_len; 2189 seg = g_new(VirtioNetRscSeg, 1); 2190 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header) 2191 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD); 2192 memcpy(seg->buf, buf, size); 2193 seg->size = size; 2194 seg->packets = 1; 2195 seg->dup_ack = 0; 2196 seg->is_coalesced = 0; 2197 seg->nc = nc; 2198 2199 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); 2200 chain->stat.cache++; 2201 2202 switch (chain->proto) { 2203 case ETH_P_IP: 2204 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); 2205 break; 2206 case ETH_P_IPV6: 2207 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); 2208 break; 2209 default: 2210 g_assert_not_reached(); 2211 } 2212 } 2213 2214 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain, 2215 VirtioNetRscSeg *seg, 2216 const uint8_t *buf, 2217 struct tcp_header *n_tcp, 2218 struct tcp_header *o_tcp) 2219 { 2220 uint32_t nack, oack; 2221 uint16_t nwin, owin; 2222 2223 nack = htonl(n_tcp->th_ack); 2224 nwin = htons(n_tcp->th_win); 2225 oack = htonl(o_tcp->th_ack); 2226 owin = htons(o_tcp->th_win); 2227 2228 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) { 2229 chain->stat.ack_out_of_win++; 2230 return RSC_FINAL; 2231 } else if (nack == oack) { 2232 /* duplicated ack or window probe */ 2233 if (nwin == owin) { 2234 /* duplicated ack, add dup ack count due to whql test up to 1 */ 2235 chain->stat.dup_ack++; 2236 return RSC_FINAL; 2237 } else { 2238 /* Coalesce window update */ 2239 o_tcp->th_win = n_tcp->th_win; 2240 chain->stat.win_update++; 2241 return RSC_COALESCE; 2242 } 2243 } else { 2244 /* pure ack, go to 'C', finalize*/ 2245 chain->stat.pure_ack++; 2246 return RSC_FINAL; 2247 } 2248 } 2249 2250 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain, 2251 VirtioNetRscSeg *seg, 2252 const uint8_t *buf, 2253 VirtioNetRscUnit *n_unit) 2254 { 2255 void *data; 2256 uint16_t o_ip_len; 2257 uint32_t nseq, oseq; 2258 VirtioNetRscUnit *o_unit; 2259 2260 o_unit = &seg->unit; 2261 o_ip_len = htons(*o_unit->ip_plen); 2262 nseq = htonl(n_unit->tcp->th_seq); 2263 oseq = htonl(o_unit->tcp->th_seq); 2264 2265 /* out of order or retransmitted. */ 2266 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) { 2267 chain->stat.data_out_of_win++; 2268 return RSC_FINAL; 2269 } 2270 2271 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen; 2272 if (nseq == oseq) { 2273 if ((o_unit->payload == 0) && n_unit->payload) { 2274 /* From no payload to payload, normal case, not a dup ack or etc */ 2275 chain->stat.data_after_pure_ack++; 2276 goto coalesce; 2277 } else { 2278 return virtio_net_rsc_handle_ack(chain, seg, buf, 2279 n_unit->tcp, o_unit->tcp); 2280 } 2281 } else if ((nseq - oseq) != o_unit->payload) { 2282 /* Not a consistent packet, out of order */ 2283 chain->stat.data_out_of_order++; 2284 return RSC_FINAL; 2285 } else { 2286 coalesce: 2287 if ((o_ip_len + n_unit->payload) > chain->max_payload) { 2288 chain->stat.over_size++; 2289 return RSC_FINAL; 2290 } 2291 2292 /* Here comes the right data, the payload length in v4/v6 is different, 2293 so use the field value to update and record the new data len */ 2294 o_unit->payload += n_unit->payload; /* update new data len */ 2295 2296 /* update field in ip header */ 2297 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload); 2298 2299 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced 2300 for windows guest, while this may change the behavior for linux 2301 guest (only if it uses RSC feature). */ 2302 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags; 2303 2304 o_unit->tcp->th_ack = n_unit->tcp->th_ack; 2305 o_unit->tcp->th_win = n_unit->tcp->th_win; 2306 2307 memmove(seg->buf + seg->size, data, n_unit->payload); 2308 seg->size += n_unit->payload; 2309 seg->packets++; 2310 chain->stat.coalesced++; 2311 return RSC_COALESCE; 2312 } 2313 } 2314 2315 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain, 2316 VirtioNetRscSeg *seg, 2317 const uint8_t *buf, size_t size, 2318 VirtioNetRscUnit *unit) 2319 { 2320 struct ip_header *ip1, *ip2; 2321 2322 ip1 = (struct ip_header *)(unit->ip); 2323 ip2 = (struct ip_header *)(seg->unit.ip); 2324 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst) 2325 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2326 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2327 chain->stat.no_match++; 2328 return RSC_NO_MATCH; 2329 } 2330 2331 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2332 } 2333 2334 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain, 2335 VirtioNetRscSeg *seg, 2336 const uint8_t *buf, size_t size, 2337 VirtioNetRscUnit *unit) 2338 { 2339 struct ip6_header *ip1, *ip2; 2340 2341 ip1 = (struct ip6_header *)(unit->ip); 2342 ip2 = (struct ip6_header *)(seg->unit.ip); 2343 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address)) 2344 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address)) 2345 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) 2346 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { 2347 chain->stat.no_match++; 2348 return RSC_NO_MATCH; 2349 } 2350 2351 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); 2352 } 2353 2354 /* Packets with 'SYN' should bypass, other flag should be sent after drain 2355 * to prevent out of order */ 2356 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain, 2357 struct tcp_header *tcp) 2358 { 2359 uint16_t tcp_hdr; 2360 uint16_t tcp_flag; 2361 2362 tcp_flag = htons(tcp->th_offset_flags); 2363 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10; 2364 tcp_flag &= VIRTIO_NET_TCP_FLAG; 2365 if (tcp_flag & TH_SYN) { 2366 chain->stat.tcp_syn++; 2367 return RSC_BYPASS; 2368 } 2369 2370 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) { 2371 chain->stat.tcp_ctrl_drain++; 2372 return RSC_FINAL; 2373 } 2374 2375 if (tcp_hdr > sizeof(struct tcp_header)) { 2376 chain->stat.tcp_all_opt++; 2377 return RSC_FINAL; 2378 } 2379 2380 return RSC_CANDIDATE; 2381 } 2382 2383 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain, 2384 NetClientState *nc, 2385 const uint8_t *buf, size_t size, 2386 VirtioNetRscUnit *unit) 2387 { 2388 int ret; 2389 VirtioNetRscSeg *seg, *nseg; 2390 2391 if (QTAILQ_EMPTY(&chain->buffers)) { 2392 chain->stat.empty_cache++; 2393 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2394 timer_mod(chain->drain_timer, 2395 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); 2396 return size; 2397 } 2398 2399 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2400 if (chain->proto == ETH_P_IP) { 2401 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit); 2402 } else { 2403 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit); 2404 } 2405 2406 if (ret == RSC_FINAL) { 2407 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2408 /* Send failed */ 2409 chain->stat.final_failed++; 2410 return 0; 2411 } 2412 2413 /* Send current packet */ 2414 return virtio_net_do_receive(nc, buf, size); 2415 } else if (ret == RSC_NO_MATCH) { 2416 continue; 2417 } else { 2418 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ 2419 seg->is_coalesced = 1; 2420 return size; 2421 } 2422 } 2423 2424 chain->stat.no_match_cache++; 2425 virtio_net_rsc_cache_buf(chain, nc, buf, size); 2426 return size; 2427 } 2428 2429 /* Drain a connection data, this is to avoid out of order segments */ 2430 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain, 2431 NetClientState *nc, 2432 const uint8_t *buf, size_t size, 2433 uint16_t ip_start, uint16_t ip_size, 2434 uint16_t tcp_port) 2435 { 2436 VirtioNetRscSeg *seg, *nseg; 2437 uint32_t ppair1, ppair2; 2438 2439 ppair1 = *(uint32_t *)(buf + tcp_port); 2440 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { 2441 ppair2 = *(uint32_t *)(seg->buf + tcp_port); 2442 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size) 2443 || (ppair1 != ppair2)) { 2444 continue; 2445 } 2446 if (virtio_net_rsc_drain_seg(chain, seg) == 0) { 2447 chain->stat.drain_failed++; 2448 } 2449 2450 break; 2451 } 2452 2453 return virtio_net_do_receive(nc, buf, size); 2454 } 2455 2456 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain, 2457 struct ip_header *ip, 2458 const uint8_t *buf, size_t size) 2459 { 2460 uint16_t ip_len; 2461 2462 /* Not an ipv4 packet */ 2463 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) { 2464 chain->stat.ip_option++; 2465 return RSC_BYPASS; 2466 } 2467 2468 /* Don't handle packets with ip option */ 2469 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) { 2470 chain->stat.ip_option++; 2471 return RSC_BYPASS; 2472 } 2473 2474 if (ip->ip_p != IPPROTO_TCP) { 2475 chain->stat.bypass_not_tcp++; 2476 return RSC_BYPASS; 2477 } 2478 2479 /* Don't handle packets with ip fragment */ 2480 if (!(htons(ip->ip_off) & IP_DF)) { 2481 chain->stat.ip_frag++; 2482 return RSC_BYPASS; 2483 } 2484 2485 /* Don't handle packets with ecn flag */ 2486 if (IPTOS_ECN(ip->ip_tos)) { 2487 chain->stat.ip_ecn++; 2488 return RSC_BYPASS; 2489 } 2490 2491 ip_len = htons(ip->ip_len); 2492 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header)) 2493 || ip_len > (size - chain->n->guest_hdr_len - 2494 sizeof(struct eth_header))) { 2495 chain->stat.ip_hacked++; 2496 return RSC_BYPASS; 2497 } 2498 2499 return RSC_CANDIDATE; 2500 } 2501 2502 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain, 2503 NetClientState *nc, 2504 const uint8_t *buf, size_t size) 2505 { 2506 int32_t ret; 2507 uint16_t hdr_len; 2508 VirtioNetRscUnit unit; 2509 2510 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2511 2512 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header) 2513 + sizeof(struct tcp_header))) { 2514 chain->stat.bypass_not_tcp++; 2515 return virtio_net_do_receive(nc, buf, size); 2516 } 2517 2518 virtio_net_rsc_extract_unit4(chain, buf, &unit); 2519 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size) 2520 != RSC_CANDIDATE) { 2521 return virtio_net_do_receive(nc, buf, size); 2522 } 2523 2524 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2525 if (ret == RSC_BYPASS) { 2526 return virtio_net_do_receive(nc, buf, size); 2527 } else if (ret == RSC_FINAL) { 2528 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2529 ((hdr_len + sizeof(struct eth_header)) + 12), 2530 VIRTIO_NET_IP4_ADDR_SIZE, 2531 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)); 2532 } 2533 2534 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2535 } 2536 2537 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain, 2538 struct ip6_header *ip6, 2539 const uint8_t *buf, size_t size) 2540 { 2541 uint16_t ip_len; 2542 2543 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4) 2544 != IP_HEADER_VERSION_6) { 2545 return RSC_BYPASS; 2546 } 2547 2548 /* Both option and protocol is checked in this */ 2549 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) { 2550 chain->stat.bypass_not_tcp++; 2551 return RSC_BYPASS; 2552 } 2553 2554 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); 2555 if (ip_len < sizeof(struct tcp_header) || 2556 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header) 2557 - sizeof(struct ip6_header))) { 2558 chain->stat.ip_hacked++; 2559 return RSC_BYPASS; 2560 } 2561 2562 /* Don't handle packets with ecn flag */ 2563 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) { 2564 chain->stat.ip_ecn++; 2565 return RSC_BYPASS; 2566 } 2567 2568 return RSC_CANDIDATE; 2569 } 2570 2571 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc, 2572 const uint8_t *buf, size_t size) 2573 { 2574 int32_t ret; 2575 uint16_t hdr_len; 2576 VirtioNetRscChain *chain; 2577 VirtioNetRscUnit unit; 2578 2579 chain = opq; 2580 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; 2581 2582 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header) 2583 + sizeof(tcp_header))) { 2584 return virtio_net_do_receive(nc, buf, size); 2585 } 2586 2587 virtio_net_rsc_extract_unit6(chain, buf, &unit); 2588 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain, 2589 unit.ip, buf, size)) { 2590 return virtio_net_do_receive(nc, buf, size); 2591 } 2592 2593 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); 2594 if (ret == RSC_BYPASS) { 2595 return virtio_net_do_receive(nc, buf, size); 2596 } else if (ret == RSC_FINAL) { 2597 return virtio_net_rsc_drain_flow(chain, nc, buf, size, 2598 ((hdr_len + sizeof(struct eth_header)) + 8), 2599 VIRTIO_NET_IP6_ADDR_SIZE, 2600 hdr_len + sizeof(struct eth_header) 2601 + sizeof(struct ip6_header)); 2602 } 2603 2604 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); 2605 } 2606 2607 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n, 2608 NetClientState *nc, 2609 uint16_t proto) 2610 { 2611 VirtioNetRscChain *chain; 2612 2613 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) { 2614 return NULL; 2615 } 2616 2617 QTAILQ_FOREACH(chain, &n->rsc_chains, next) { 2618 if (chain->proto == proto) { 2619 return chain; 2620 } 2621 } 2622 2623 chain = g_malloc(sizeof(*chain)); 2624 chain->n = n; 2625 chain->proto = proto; 2626 if (proto == (uint16_t)ETH_P_IP) { 2627 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD; 2628 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 2629 } else { 2630 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD; 2631 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 2632 } 2633 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST, 2634 virtio_net_rsc_purge, chain); 2635 memset(&chain->stat, 0, sizeof(chain->stat)); 2636 2637 QTAILQ_INIT(&chain->buffers); 2638 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); 2639 2640 return chain; 2641 } 2642 2643 static ssize_t virtio_net_rsc_receive(NetClientState *nc, 2644 const uint8_t *buf, 2645 size_t size) 2646 { 2647 uint16_t proto; 2648 VirtioNetRscChain *chain; 2649 struct eth_header *eth; 2650 VirtIONet *n; 2651 2652 n = qemu_get_nic_opaque(nc); 2653 if (size < (n->host_hdr_len + sizeof(struct eth_header))) { 2654 return virtio_net_do_receive(nc, buf, size); 2655 } 2656 2657 eth = (struct eth_header *)(buf + n->guest_hdr_len); 2658 proto = htons(eth->h_proto); 2659 2660 chain = virtio_net_rsc_lookup_chain(n, nc, proto); 2661 if (chain) { 2662 chain->stat.received++; 2663 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) { 2664 return virtio_net_rsc_receive4(chain, nc, buf, size); 2665 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) { 2666 return virtio_net_rsc_receive6(chain, nc, buf, size); 2667 } 2668 } 2669 return virtio_net_do_receive(nc, buf, size); 2670 } 2671 2672 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, 2673 size_t size) 2674 { 2675 VirtIONet *n = qemu_get_nic_opaque(nc); 2676 if ((n->rsc4_enabled || n->rsc6_enabled)) { 2677 return virtio_net_rsc_receive(nc, buf, size); 2678 } else { 2679 return virtio_net_do_receive(nc, buf, size); 2680 } 2681 } 2682 2683 static int32_t virtio_net_flush_tx(VirtIONetQueue *q); 2684 2685 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) 2686 { 2687 VirtIONet *n = qemu_get_nic_opaque(nc); 2688 VirtIONetQueue *q = virtio_net_get_subqueue(nc); 2689 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2690 int ret; 2691 2692 virtqueue_push(q->tx_vq, q->async_tx.elem, 0); 2693 virtio_notify(vdev, q->tx_vq); 2694 2695 g_free(q->async_tx.elem); 2696 q->async_tx.elem = NULL; 2697 2698 virtio_queue_set_notification(q->tx_vq, 1); 2699 ret = virtio_net_flush_tx(q); 2700 if (ret >= n->tx_burst) { 2701 /* 2702 * the flush has been stopped by tx_burst 2703 * we will not receive notification for the 2704 * remainining part, so re-schedule 2705 */ 2706 virtio_queue_set_notification(q->tx_vq, 0); 2707 if (q->tx_bh) { 2708 qemu_bh_schedule(q->tx_bh); 2709 } else { 2710 timer_mod(q->tx_timer, 2711 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2712 } 2713 q->tx_waiting = 1; 2714 } 2715 } 2716 2717 /* TX */ 2718 static int32_t virtio_net_flush_tx(VirtIONetQueue *q) 2719 { 2720 VirtIONet *n = q->n; 2721 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2722 VirtQueueElement *elem; 2723 int32_t num_packets = 0; 2724 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq)); 2725 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2726 return num_packets; 2727 } 2728 2729 if (q->async_tx.elem) { 2730 virtio_queue_set_notification(q->tx_vq, 0); 2731 return num_packets; 2732 } 2733 2734 for (;;) { 2735 ssize_t ret; 2736 unsigned int out_num; 2737 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg; 2738 struct virtio_net_hdr_v1_hash vhdr; 2739 2740 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement)); 2741 if (!elem) { 2742 break; 2743 } 2744 2745 out_num = elem->out_num; 2746 out_sg = elem->out_sg; 2747 if (out_num < 1) { 2748 virtio_error(vdev, "virtio-net header not in first element"); 2749 virtqueue_detach_element(q->tx_vq, elem, 0); 2750 g_free(elem); 2751 return -EINVAL; 2752 } 2753 2754 if (n->has_vnet_hdr) { 2755 if (iov_to_buf(out_sg, out_num, 0, &vhdr, n->guest_hdr_len) < 2756 n->guest_hdr_len) { 2757 virtio_error(vdev, "virtio-net header incorrect"); 2758 virtqueue_detach_element(q->tx_vq, elem, 0); 2759 g_free(elem); 2760 return -EINVAL; 2761 } 2762 if (n->needs_vnet_hdr_swap) { 2763 virtio_net_hdr_swap(vdev, (void *) &vhdr); 2764 sg2[0].iov_base = &vhdr; 2765 sg2[0].iov_len = n->guest_hdr_len; 2766 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, 2767 out_sg, out_num, 2768 n->guest_hdr_len, -1); 2769 if (out_num == VIRTQUEUE_MAX_SIZE) { 2770 goto drop; 2771 } 2772 out_num += 1; 2773 out_sg = sg2; 2774 } 2775 } 2776 /* 2777 * If host wants to see the guest header as is, we can 2778 * pass it on unchanged. Otherwise, copy just the parts 2779 * that host is interested in. 2780 */ 2781 assert(n->host_hdr_len <= n->guest_hdr_len); 2782 if (n->host_hdr_len != n->guest_hdr_len) { 2783 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg), 2784 out_sg, out_num, 2785 0, n->host_hdr_len); 2786 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num, 2787 out_sg, out_num, 2788 n->guest_hdr_len, -1); 2789 out_num = sg_num; 2790 out_sg = sg; 2791 } 2792 2793 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index), 2794 out_sg, out_num, virtio_net_tx_complete); 2795 if (ret == 0) { 2796 virtio_queue_set_notification(q->tx_vq, 0); 2797 q->async_tx.elem = elem; 2798 return -EBUSY; 2799 } 2800 2801 drop: 2802 virtqueue_push(q->tx_vq, elem, 0); 2803 virtio_notify(vdev, q->tx_vq); 2804 g_free(elem); 2805 2806 if (++num_packets >= n->tx_burst) { 2807 break; 2808 } 2809 } 2810 return num_packets; 2811 } 2812 2813 static void virtio_net_tx_timer(void *opaque); 2814 2815 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) 2816 { 2817 VirtIONet *n = VIRTIO_NET(vdev); 2818 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2819 2820 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2821 virtio_net_drop_tx_queue_data(vdev, vq); 2822 return; 2823 } 2824 2825 /* This happens when device was stopped but VCPU wasn't. */ 2826 if (!vdev->vm_running) { 2827 q->tx_waiting = 1; 2828 return; 2829 } 2830 2831 if (q->tx_waiting) { 2832 /* We already have queued packets, immediately flush */ 2833 timer_del(q->tx_timer); 2834 virtio_net_tx_timer(q); 2835 } else { 2836 /* re-arm timer to flush it (and more) on next tick */ 2837 timer_mod(q->tx_timer, 2838 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2839 q->tx_waiting = 1; 2840 virtio_queue_set_notification(vq, 0); 2841 } 2842 } 2843 2844 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) 2845 { 2846 VirtIONet *n = VIRTIO_NET(vdev); 2847 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; 2848 2849 if (unlikely(n->vhost_started)) { 2850 return; 2851 } 2852 2853 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { 2854 virtio_net_drop_tx_queue_data(vdev, vq); 2855 return; 2856 } 2857 2858 if (unlikely(q->tx_waiting)) { 2859 return; 2860 } 2861 q->tx_waiting = 1; 2862 /* This happens when device was stopped but VCPU wasn't. */ 2863 if (!vdev->vm_running) { 2864 return; 2865 } 2866 virtio_queue_set_notification(vq, 0); 2867 qemu_bh_schedule(q->tx_bh); 2868 } 2869 2870 static void virtio_net_tx_timer(void *opaque) 2871 { 2872 VirtIONetQueue *q = opaque; 2873 VirtIONet *n = q->n; 2874 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2875 int ret; 2876 2877 /* This happens when device was stopped but BH wasn't. */ 2878 if (!vdev->vm_running) { 2879 /* Make sure tx waiting is set, so we'll run when restarted. */ 2880 assert(q->tx_waiting); 2881 return; 2882 } 2883 2884 q->tx_waiting = 0; 2885 2886 /* Just in case the driver is not ready on more */ 2887 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { 2888 return; 2889 } 2890 2891 ret = virtio_net_flush_tx(q); 2892 if (ret == -EBUSY || ret == -EINVAL) { 2893 return; 2894 } 2895 /* 2896 * If we flush a full burst of packets, assume there are 2897 * more coming and immediately rearm 2898 */ 2899 if (ret >= n->tx_burst) { 2900 q->tx_waiting = 1; 2901 timer_mod(q->tx_timer, 2902 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2903 return; 2904 } 2905 /* 2906 * If less than a full burst, re-enable notification and flush 2907 * anything that may have come in while we weren't looking. If 2908 * we find something, assume the guest is still active and rearm 2909 */ 2910 virtio_queue_set_notification(q->tx_vq, 1); 2911 ret = virtio_net_flush_tx(q); 2912 if (ret > 0) { 2913 virtio_queue_set_notification(q->tx_vq, 0); 2914 q->tx_waiting = 1; 2915 timer_mod(q->tx_timer, 2916 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); 2917 } 2918 } 2919 2920 static void virtio_net_tx_bh(void *opaque) 2921 { 2922 VirtIONetQueue *q = opaque; 2923 VirtIONet *n = q->n; 2924 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2925 int32_t ret; 2926 2927 /* This happens when device was stopped but BH wasn't. */ 2928 if (!vdev->vm_running) { 2929 /* Make sure tx waiting is set, so we'll run when restarted. */ 2930 assert(q->tx_waiting); 2931 return; 2932 } 2933 2934 q->tx_waiting = 0; 2935 2936 /* Just in case the driver is not ready on more */ 2937 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) { 2938 return; 2939 } 2940 2941 ret = virtio_net_flush_tx(q); 2942 if (ret == -EBUSY || ret == -EINVAL) { 2943 return; /* Notification re-enable handled by tx_complete or device 2944 * broken */ 2945 } 2946 2947 /* If we flush a full burst of packets, assume there are 2948 * more coming and immediately reschedule */ 2949 if (ret >= n->tx_burst) { 2950 qemu_bh_schedule(q->tx_bh); 2951 q->tx_waiting = 1; 2952 return; 2953 } 2954 2955 /* If less than a full burst, re-enable notification and flush 2956 * anything that may have come in while we weren't looking. If 2957 * we find something, assume the guest is still active and reschedule */ 2958 virtio_queue_set_notification(q->tx_vq, 1); 2959 ret = virtio_net_flush_tx(q); 2960 if (ret == -EINVAL) { 2961 return; 2962 } else if (ret > 0) { 2963 virtio_queue_set_notification(q->tx_vq, 0); 2964 qemu_bh_schedule(q->tx_bh); 2965 q->tx_waiting = 1; 2966 } 2967 } 2968 2969 static void virtio_net_add_queue(VirtIONet *n, int index) 2970 { 2971 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2972 2973 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size, 2974 virtio_net_handle_rx); 2975 2976 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { 2977 n->vqs[index].tx_vq = 2978 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2979 virtio_net_handle_tx_timer); 2980 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 2981 virtio_net_tx_timer, 2982 &n->vqs[index]); 2983 } else { 2984 n->vqs[index].tx_vq = 2985 virtio_add_queue(vdev, n->net_conf.tx_queue_size, 2986 virtio_net_handle_tx_bh); 2987 n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index], 2988 &DEVICE(vdev)->mem_reentrancy_guard); 2989 } 2990 2991 n->vqs[index].tx_waiting = 0; 2992 n->vqs[index].n = n; 2993 } 2994 2995 static void virtio_net_del_queue(VirtIONet *n, int index) 2996 { 2997 VirtIODevice *vdev = VIRTIO_DEVICE(n); 2998 VirtIONetQueue *q = &n->vqs[index]; 2999 NetClientState *nc = qemu_get_subqueue(n->nic, index); 3000 3001 qemu_purge_queued_packets(nc); 3002 3003 virtio_del_queue(vdev, index * 2); 3004 if (q->tx_timer) { 3005 timer_free(q->tx_timer); 3006 q->tx_timer = NULL; 3007 } else { 3008 qemu_bh_delete(q->tx_bh); 3009 q->tx_bh = NULL; 3010 } 3011 q->tx_waiting = 0; 3012 virtio_del_queue(vdev, index * 2 + 1); 3013 } 3014 3015 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs) 3016 { 3017 VirtIODevice *vdev = VIRTIO_DEVICE(n); 3018 int old_num_queues = virtio_get_num_queues(vdev); 3019 int new_num_queues = new_max_queue_pairs * 2 + 1; 3020 int i; 3021 3022 assert(old_num_queues >= 3); 3023 assert(old_num_queues % 2 == 1); 3024 3025 if (old_num_queues == new_num_queues) { 3026 return; 3027 } 3028 3029 /* 3030 * We always need to remove and add ctrl vq if 3031 * old_num_queues != new_num_queues. Remove ctrl_vq first, 3032 * and then we only enter one of the following two loops. 3033 */ 3034 virtio_del_queue(vdev, old_num_queues - 1); 3035 3036 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) { 3037 /* new_num_queues < old_num_queues */ 3038 virtio_net_del_queue(n, i / 2); 3039 } 3040 3041 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) { 3042 /* new_num_queues > old_num_queues */ 3043 virtio_net_add_queue(n, i / 2); 3044 } 3045 3046 /* add ctrl_vq last */ 3047 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 3048 } 3049 3050 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) 3051 { 3052 int max = multiqueue ? n->max_queue_pairs : 1; 3053 3054 n->multiqueue = multiqueue; 3055 virtio_net_change_num_queue_pairs(n, max); 3056 3057 virtio_net_set_queue_pairs(n); 3058 } 3059 3060 static int virtio_net_post_load_device(void *opaque, int version_id) 3061 { 3062 VirtIONet *n = opaque; 3063 VirtIODevice *vdev = VIRTIO_DEVICE(n); 3064 int i, link_down; 3065 3066 trace_virtio_net_post_load_device(); 3067 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs, 3068 virtio_vdev_has_feature(vdev, 3069 VIRTIO_F_VERSION_1), 3070 virtio_vdev_has_feature(vdev, 3071 VIRTIO_NET_F_HASH_REPORT)); 3072 3073 /* MAC_TABLE_ENTRIES may be different from the saved image */ 3074 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) { 3075 n->mac_table.in_use = 0; 3076 } 3077 3078 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 3079 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n); 3080 } 3081 3082 /* 3083 * curr_guest_offloads will be later overwritten by the 3084 * virtio_set_features_nocheck call done from the virtio_load. 3085 * Here we make sure it is preserved and restored accordingly 3086 * in the virtio_net_post_load_virtio callback. 3087 */ 3088 n->saved_guest_offloads = n->curr_guest_offloads; 3089 3090 virtio_net_set_queue_pairs(n); 3091 3092 /* Find the first multicast entry in the saved MAC filter */ 3093 for (i = 0; i < n->mac_table.in_use; i++) { 3094 if (n->mac_table.macs[i * ETH_ALEN] & 1) { 3095 break; 3096 } 3097 } 3098 n->mac_table.first_multi = i; 3099 3100 /* nc.link_down can't be migrated, so infer link_down according 3101 * to link status bit in n->status */ 3102 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0; 3103 for (i = 0; i < n->max_queue_pairs; i++) { 3104 qemu_get_subqueue(n->nic, i)->link_down = link_down; 3105 } 3106 3107 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) && 3108 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3109 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 3110 QEMU_CLOCK_VIRTUAL, 3111 virtio_net_announce_timer, n); 3112 if (n->announce_timer.round) { 3113 timer_mod(n->announce_timer.tm, 3114 qemu_clock_get_ms(n->announce_timer.type)); 3115 } else { 3116 qemu_announce_timer_del(&n->announce_timer, false); 3117 } 3118 } 3119 3120 if (n->rss_data.enabled) { 3121 n->rss_data.enabled_software_rss = n->rss_data.populate_hash; 3122 if (!n->rss_data.populate_hash) { 3123 if (!virtio_net_attach_epbf_rss(n)) { 3124 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { 3125 warn_report("Can't post-load eBPF RSS for vhost"); 3126 } else { 3127 warn_report("Can't post-load eBPF RSS - " 3128 "fallback to software RSS"); 3129 n->rss_data.enabled_software_rss = true; 3130 } 3131 } 3132 } 3133 3134 trace_virtio_net_rss_enable(n->rss_data.hash_types, 3135 n->rss_data.indirections_len, 3136 sizeof(n->rss_data.key)); 3137 } else { 3138 trace_virtio_net_rss_disable(); 3139 } 3140 return 0; 3141 } 3142 3143 static int virtio_net_post_load_virtio(VirtIODevice *vdev) 3144 { 3145 VirtIONet *n = VIRTIO_NET(vdev); 3146 /* 3147 * The actual needed state is now in saved_guest_offloads, 3148 * see virtio_net_post_load_device for detail. 3149 * Restore it back and apply the desired offloads. 3150 */ 3151 n->curr_guest_offloads = n->saved_guest_offloads; 3152 if (peer_has_vnet_hdr(n)) { 3153 virtio_net_apply_guest_offloads(n); 3154 } 3155 3156 return 0; 3157 } 3158 3159 /* tx_waiting field of a VirtIONetQueue */ 3160 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = { 3161 .name = "virtio-net-queue-tx_waiting", 3162 .fields = (const VMStateField[]) { 3163 VMSTATE_UINT32(tx_waiting, VirtIONetQueue), 3164 VMSTATE_END_OF_LIST() 3165 }, 3166 }; 3167 3168 static bool max_queue_pairs_gt_1(void *opaque, int version_id) 3169 { 3170 return VIRTIO_NET(opaque)->max_queue_pairs > 1; 3171 } 3172 3173 static bool has_ctrl_guest_offloads(void *opaque, int version_id) 3174 { 3175 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque), 3176 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 3177 } 3178 3179 static bool mac_table_fits(void *opaque, int version_id) 3180 { 3181 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES; 3182 } 3183 3184 static bool mac_table_doesnt_fit(void *opaque, int version_id) 3185 { 3186 return !mac_table_fits(opaque, version_id); 3187 } 3188 3189 /* This temporary type is shared by all the WITH_TMP methods 3190 * although only some fields are used by each. 3191 */ 3192 struct VirtIONetMigTmp { 3193 VirtIONet *parent; 3194 VirtIONetQueue *vqs_1; 3195 uint16_t curr_queue_pairs_1; 3196 uint8_t has_ufo; 3197 uint32_t has_vnet_hdr; 3198 }; 3199 3200 /* The 2nd and subsequent tx_waiting flags are loaded later than 3201 * the 1st entry in the queue_pairs and only if there's more than one 3202 * entry. We use the tmp mechanism to calculate a temporary 3203 * pointer and count and also validate the count. 3204 */ 3205 3206 static int virtio_net_tx_waiting_pre_save(void *opaque) 3207 { 3208 struct VirtIONetMigTmp *tmp = opaque; 3209 3210 tmp->vqs_1 = tmp->parent->vqs + 1; 3211 tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1; 3212 if (tmp->parent->curr_queue_pairs == 0) { 3213 tmp->curr_queue_pairs_1 = 0; 3214 } 3215 3216 return 0; 3217 } 3218 3219 static int virtio_net_tx_waiting_pre_load(void *opaque) 3220 { 3221 struct VirtIONetMigTmp *tmp = opaque; 3222 3223 /* Reuse the pointer setup from save */ 3224 virtio_net_tx_waiting_pre_save(opaque); 3225 3226 if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) { 3227 error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x", 3228 tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs); 3229 3230 return -EINVAL; 3231 } 3232 3233 return 0; /* all good */ 3234 } 3235 3236 static const VMStateDescription vmstate_virtio_net_tx_waiting = { 3237 .name = "virtio-net-tx_waiting", 3238 .pre_load = virtio_net_tx_waiting_pre_load, 3239 .pre_save = virtio_net_tx_waiting_pre_save, 3240 .fields = (const VMStateField[]) { 3241 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp, 3242 curr_queue_pairs_1, 3243 vmstate_virtio_net_queue_tx_waiting, 3244 struct VirtIONetQueue), 3245 VMSTATE_END_OF_LIST() 3246 }, 3247 }; 3248 3249 /* the 'has_ufo' flag is just tested; if the incoming stream has the 3250 * flag set we need to check that we have it 3251 */ 3252 static int virtio_net_ufo_post_load(void *opaque, int version_id) 3253 { 3254 struct VirtIONetMigTmp *tmp = opaque; 3255 3256 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) { 3257 error_report("virtio-net: saved image requires TUN_F_UFO support"); 3258 return -EINVAL; 3259 } 3260 3261 return 0; 3262 } 3263 3264 static int virtio_net_ufo_pre_save(void *opaque) 3265 { 3266 struct VirtIONetMigTmp *tmp = opaque; 3267 3268 tmp->has_ufo = tmp->parent->has_ufo; 3269 3270 return 0; 3271 } 3272 3273 static const VMStateDescription vmstate_virtio_net_has_ufo = { 3274 .name = "virtio-net-ufo", 3275 .post_load = virtio_net_ufo_post_load, 3276 .pre_save = virtio_net_ufo_pre_save, 3277 .fields = (const VMStateField[]) { 3278 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp), 3279 VMSTATE_END_OF_LIST() 3280 }, 3281 }; 3282 3283 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the 3284 * flag set we need to check that we have it 3285 */ 3286 static int virtio_net_vnet_post_load(void *opaque, int version_id) 3287 { 3288 struct VirtIONetMigTmp *tmp = opaque; 3289 3290 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) { 3291 error_report("virtio-net: saved image requires vnet_hdr=on"); 3292 return -EINVAL; 3293 } 3294 3295 return 0; 3296 } 3297 3298 static int virtio_net_vnet_pre_save(void *opaque) 3299 { 3300 struct VirtIONetMigTmp *tmp = opaque; 3301 3302 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr; 3303 3304 return 0; 3305 } 3306 3307 static const VMStateDescription vmstate_virtio_net_has_vnet = { 3308 .name = "virtio-net-vnet", 3309 .post_load = virtio_net_vnet_post_load, 3310 .pre_save = virtio_net_vnet_pre_save, 3311 .fields = (const VMStateField[]) { 3312 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp), 3313 VMSTATE_END_OF_LIST() 3314 }, 3315 }; 3316 3317 static bool virtio_net_rss_needed(void *opaque) 3318 { 3319 return VIRTIO_NET(opaque)->rss_data.enabled; 3320 } 3321 3322 static const VMStateDescription vmstate_virtio_net_rss = { 3323 .name = "virtio-net-device/rss", 3324 .version_id = 1, 3325 .minimum_version_id = 1, 3326 .needed = virtio_net_rss_needed, 3327 .fields = (const VMStateField[]) { 3328 VMSTATE_BOOL(rss_data.enabled, VirtIONet), 3329 VMSTATE_BOOL(rss_data.redirect, VirtIONet), 3330 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet), 3331 VMSTATE_UINT32(rss_data.hash_types, VirtIONet), 3332 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet), 3333 VMSTATE_UINT16(rss_data.default_queue, VirtIONet), 3334 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet, 3335 VIRTIO_NET_RSS_MAX_KEY_SIZE), 3336 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet, 3337 rss_data.indirections_len, 0, 3338 vmstate_info_uint16, uint16_t), 3339 VMSTATE_END_OF_LIST() 3340 }, 3341 }; 3342 3343 static const VMStateDescription vmstate_virtio_net_device = { 3344 .name = "virtio-net-device", 3345 .version_id = VIRTIO_NET_VM_VERSION, 3346 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3347 .post_load = virtio_net_post_load_device, 3348 .fields = (const VMStateField[]) { 3349 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN), 3350 VMSTATE_STRUCT_POINTER(vqs, VirtIONet, 3351 vmstate_virtio_net_queue_tx_waiting, 3352 VirtIONetQueue), 3353 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet), 3354 VMSTATE_UINT16(status, VirtIONet), 3355 VMSTATE_UINT8(promisc, VirtIONet), 3356 VMSTATE_UINT8(allmulti, VirtIONet), 3357 VMSTATE_UINT32(mac_table.in_use, VirtIONet), 3358 3359 /* Guarded pair: If it fits we load it, else we throw it away 3360 * - can happen if source has a larger MAC table.; post-load 3361 * sets flags in this case. 3362 */ 3363 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet, 3364 0, mac_table_fits, mac_table.in_use, 3365 ETH_ALEN), 3366 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0, 3367 mac_table.in_use, ETH_ALEN), 3368 3369 /* Note: This is an array of uint32's that's always been saved as a 3370 * buffer; hold onto your endiannesses; it's actually used as a bitmap 3371 * but based on the uint. 3372 */ 3373 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3), 3374 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3375 vmstate_virtio_net_has_vnet), 3376 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet), 3377 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet), 3378 VMSTATE_UINT8(alluni, VirtIONet), 3379 VMSTATE_UINT8(nomulti, VirtIONet), 3380 VMSTATE_UINT8(nouni, VirtIONet), 3381 VMSTATE_UINT8(nobcast, VirtIONet), 3382 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3383 vmstate_virtio_net_has_ufo), 3384 VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0, 3385 vmstate_info_uint16_equal, uint16_t), 3386 VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1), 3387 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp, 3388 vmstate_virtio_net_tx_waiting), 3389 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet, 3390 has_ctrl_guest_offloads), 3391 VMSTATE_END_OF_LIST() 3392 }, 3393 .subsections = (const VMStateDescription * const []) { 3394 &vmstate_virtio_net_rss, 3395 NULL 3396 } 3397 }; 3398 3399 static NetClientInfo net_virtio_info = { 3400 .type = NET_CLIENT_DRIVER_NIC, 3401 .size = sizeof(NICState), 3402 .can_receive = virtio_net_can_receive, 3403 .receive = virtio_net_receive, 3404 .link_status_changed = virtio_net_set_link_status, 3405 .query_rx_filter = virtio_net_query_rxfilter, 3406 .announce = virtio_net_announce, 3407 }; 3408 3409 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) 3410 { 3411 VirtIONet *n = VIRTIO_NET(vdev); 3412 NetClientState *nc; 3413 assert(n->vhost_started); 3414 if (!n->multiqueue && idx == 2) { 3415 /* Must guard against invalid features and bogus queue index 3416 * from being set by malicious guest, or penetrated through 3417 * buggy migration stream. 3418 */ 3419 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3420 qemu_log_mask(LOG_GUEST_ERROR, 3421 "%s: bogus vq index ignored\n", __func__); 3422 return false; 3423 } 3424 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3425 } else { 3426 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3427 } 3428 /* 3429 * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 3430 * as the macro of configure interrupt's IDX, If this driver does not 3431 * support, the function will return false 3432 */ 3433 3434 if (idx == VIRTIO_CONFIG_IRQ_IDX) { 3435 return vhost_net_config_pending(get_vhost_net(nc->peer)); 3436 } 3437 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); 3438 } 3439 3440 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, 3441 bool mask) 3442 { 3443 VirtIONet *n = VIRTIO_NET(vdev); 3444 NetClientState *nc; 3445 assert(n->vhost_started); 3446 if (!n->multiqueue && idx == 2) { 3447 /* Must guard against invalid features and bogus queue index 3448 * from being set by malicious guest, or penetrated through 3449 * buggy migration stream. 3450 */ 3451 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { 3452 qemu_log_mask(LOG_GUEST_ERROR, 3453 "%s: bogus vq index ignored\n", __func__); 3454 return; 3455 } 3456 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); 3457 } else { 3458 nc = qemu_get_subqueue(n->nic, vq2q(idx)); 3459 } 3460 /* 3461 *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 3462 * as the macro of configure interrupt's IDX, If this driver does not 3463 * support, the function will return 3464 */ 3465 3466 if (idx == VIRTIO_CONFIG_IRQ_IDX) { 3467 vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask); 3468 return; 3469 } 3470 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask); 3471 } 3472 3473 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) 3474 { 3475 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC); 3476 3477 n->config_size = virtio_get_config_size(&cfg_size_params, host_features); 3478 } 3479 3480 void virtio_net_set_netclient_name(VirtIONet *n, const char *name, 3481 const char *type) 3482 { 3483 /* 3484 * The name can be NULL, the netclient name will be type.x. 3485 */ 3486 assert(type != NULL); 3487 3488 g_free(n->netclient_name); 3489 g_free(n->netclient_type); 3490 n->netclient_name = g_strdup(name); 3491 n->netclient_type = g_strdup(type); 3492 } 3493 3494 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev) 3495 { 3496 HotplugHandler *hotplug_ctrl; 3497 PCIDevice *pci_dev; 3498 Error *err = NULL; 3499 3500 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3501 if (hotplug_ctrl) { 3502 pci_dev = PCI_DEVICE(dev); 3503 pci_dev->partially_hotplugged = true; 3504 hotplug_handler_unplug_request(hotplug_ctrl, dev, &err); 3505 if (err) { 3506 error_report_err(err); 3507 return false; 3508 } 3509 } else { 3510 return false; 3511 } 3512 return true; 3513 } 3514 3515 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev, 3516 Error **errp) 3517 { 3518 Error *err = NULL; 3519 HotplugHandler *hotplug_ctrl; 3520 PCIDevice *pdev = PCI_DEVICE(dev); 3521 BusState *primary_bus; 3522 3523 if (!pdev->partially_hotplugged) { 3524 return true; 3525 } 3526 primary_bus = dev->parent_bus; 3527 if (!primary_bus) { 3528 error_setg(errp, "virtio_net: couldn't find primary bus"); 3529 return false; 3530 } 3531 qdev_set_parent_bus(dev, primary_bus, &error_abort); 3532 qatomic_set(&n->failover_primary_hidden, false); 3533 hotplug_ctrl = qdev_get_hotplug_handler(dev); 3534 if (hotplug_ctrl) { 3535 hotplug_handler_pre_plug(hotplug_ctrl, dev, &err); 3536 if (err) { 3537 goto out; 3538 } 3539 hotplug_handler_plug(hotplug_ctrl, dev, &err); 3540 } 3541 pdev->partially_hotplugged = false; 3542 3543 out: 3544 error_propagate(errp, err); 3545 return !err; 3546 } 3547 3548 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationEvent *e) 3549 { 3550 bool should_be_hidden; 3551 Error *err = NULL; 3552 DeviceState *dev = failover_find_primary_device(n); 3553 3554 if (!dev) { 3555 return; 3556 } 3557 3558 should_be_hidden = qatomic_read(&n->failover_primary_hidden); 3559 3560 if (e->type == MIG_EVENT_PRECOPY_SETUP && !should_be_hidden) { 3561 if (failover_unplug_primary(n, dev)) { 3562 vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev); 3563 qapi_event_send_unplug_primary(dev->id); 3564 qatomic_set(&n->failover_primary_hidden, true); 3565 } else { 3566 warn_report("couldn't unplug primary device"); 3567 } 3568 } else if (e->type == MIG_EVENT_PRECOPY_FAILED) { 3569 /* We already unplugged the device let's plug it back */ 3570 if (!failover_replug_primary(n, dev, &err)) { 3571 if (err) { 3572 error_report_err(err); 3573 } 3574 } 3575 } 3576 } 3577 3578 static int virtio_net_migration_state_notifier(NotifierWithReturn *notifier, 3579 MigrationEvent *e, Error **errp) 3580 { 3581 VirtIONet *n = container_of(notifier, VirtIONet, migration_state); 3582 virtio_net_handle_migration_primary(n, e); 3583 return 0; 3584 } 3585 3586 static bool failover_hide_primary_device(DeviceListener *listener, 3587 const QDict *device_opts, 3588 bool from_json, 3589 Error **errp) 3590 { 3591 VirtIONet *n = container_of(listener, VirtIONet, primary_listener); 3592 const char *standby_id; 3593 3594 if (!device_opts) { 3595 return false; 3596 } 3597 3598 if (!qdict_haskey(device_opts, "failover_pair_id")) { 3599 return false; 3600 } 3601 3602 if (!qdict_haskey(device_opts, "id")) { 3603 error_setg(errp, "Device with failover_pair_id needs to have id"); 3604 return false; 3605 } 3606 3607 standby_id = qdict_get_str(device_opts, "failover_pair_id"); 3608 if (g_strcmp0(standby_id, n->netclient_name) != 0) { 3609 return false; 3610 } 3611 3612 /* 3613 * The hide helper can be called several times for a given device. 3614 * Check there is only one primary for a virtio-net device but 3615 * don't duplicate the qdict several times if it's called for the same 3616 * device. 3617 */ 3618 if (n->primary_opts) { 3619 const char *old, *new; 3620 /* devices with failover_pair_id always have an id */ 3621 old = qdict_get_str(n->primary_opts, "id"); 3622 new = qdict_get_str(device_opts, "id"); 3623 if (strcmp(old, new) != 0) { 3624 error_setg(errp, "Cannot attach more than one primary device to " 3625 "'%s': '%s' and '%s'", n->netclient_name, old, new); 3626 return false; 3627 } 3628 } else { 3629 n->primary_opts = qdict_clone_shallow(device_opts); 3630 n->primary_opts_from_json = from_json; 3631 } 3632 3633 /* failover_primary_hidden is set during feature negotiation */ 3634 return qatomic_read(&n->failover_primary_hidden); 3635 } 3636 3637 static void virtio_net_device_realize(DeviceState *dev, Error **errp) 3638 { 3639 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3640 VirtIONet *n = VIRTIO_NET(dev); 3641 NetClientState *nc; 3642 int i; 3643 3644 if (n->net_conf.mtu) { 3645 n->host_features |= (1ULL << VIRTIO_NET_F_MTU); 3646 } 3647 3648 if (n->net_conf.duplex_str) { 3649 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) { 3650 n->net_conf.duplex = DUPLEX_HALF; 3651 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) { 3652 n->net_conf.duplex = DUPLEX_FULL; 3653 } else { 3654 error_setg(errp, "'duplex' must be 'half' or 'full'"); 3655 return; 3656 } 3657 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3658 } else { 3659 n->net_conf.duplex = DUPLEX_UNKNOWN; 3660 } 3661 3662 if (n->net_conf.speed < SPEED_UNKNOWN) { 3663 error_setg(errp, "'speed' must be between 0 and INT_MAX"); 3664 return; 3665 } 3666 if (n->net_conf.speed >= 0) { 3667 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 3668 } 3669 3670 if (n->failover) { 3671 n->primary_listener.hide_device = failover_hide_primary_device; 3672 qatomic_set(&n->failover_primary_hidden, true); 3673 device_listener_register(&n->primary_listener); 3674 migration_add_notifier(&n->migration_state, 3675 virtio_net_migration_state_notifier); 3676 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY); 3677 } 3678 3679 virtio_net_set_config_size(n, n->host_features); 3680 virtio_init(vdev, VIRTIO_ID_NET, n->config_size); 3681 3682 /* 3683 * We set a lower limit on RX queue size to what it always was. 3684 * Guests that want a smaller ring can always resize it without 3685 * help from us (using virtio 1 and up). 3686 */ 3687 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || 3688 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || 3689 !is_power_of_2(n->net_conf.rx_queue_size)) { 3690 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " 3691 "must be a power of 2 between %d and %d.", 3692 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, 3693 VIRTQUEUE_MAX_SIZE); 3694 virtio_cleanup(vdev); 3695 return; 3696 } 3697 3698 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || 3699 n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) || 3700 !is_power_of_2(n->net_conf.tx_queue_size)) { 3701 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " 3702 "must be a power of 2 between %d and %d", 3703 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, 3704 virtio_net_max_tx_queue_size(n)); 3705 virtio_cleanup(vdev); 3706 return; 3707 } 3708 3709 n->max_ncs = MAX(n->nic_conf.peers.queues, 1); 3710 3711 /* 3712 * Figure out the datapath queue pairs since the backend could 3713 * provide control queue via peers as well. 3714 */ 3715 if (n->nic_conf.peers.queues) { 3716 for (i = 0; i < n->max_ncs; i++) { 3717 if (n->nic_conf.peers.ncs[i]->is_datapath) { 3718 ++n->max_queue_pairs; 3719 } 3720 } 3721 } 3722 n->max_queue_pairs = MAX(n->max_queue_pairs, 1); 3723 3724 if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) { 3725 error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), " 3726 "must be a positive integer less than %d.", 3727 n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2); 3728 virtio_cleanup(vdev); 3729 return; 3730 } 3731 n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs); 3732 n->curr_queue_pairs = 1; 3733 n->tx_timeout = n->net_conf.txtimer; 3734 3735 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer") 3736 && strcmp(n->net_conf.tx, "bh")) { 3737 warn_report("virtio-net: " 3738 "Unknown option tx=%s, valid options: \"timer\" \"bh\"", 3739 n->net_conf.tx); 3740 error_printf("Defaulting to \"bh\""); 3741 } 3742 3743 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), 3744 n->net_conf.tx_queue_size); 3745 3746 for (i = 0; i < n->max_queue_pairs; i++) { 3747 virtio_net_add_queue(n, i); 3748 } 3749 3750 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl); 3751 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr); 3752 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac)); 3753 n->status = VIRTIO_NET_S_LINK_UP; 3754 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(), 3755 QEMU_CLOCK_VIRTUAL, 3756 virtio_net_announce_timer, n); 3757 n->announce_timer.round = 0; 3758 3759 if (n->netclient_type) { 3760 /* 3761 * Happen when virtio_net_set_netclient_name has been called. 3762 */ 3763 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3764 n->netclient_type, n->netclient_name, 3765 &dev->mem_reentrancy_guard, n); 3766 } else { 3767 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf, 3768 object_get_typename(OBJECT(dev)), dev->id, 3769 &dev->mem_reentrancy_guard, n); 3770 } 3771 3772 for (i = 0; i < n->max_queue_pairs; i++) { 3773 n->nic->ncs[i].do_not_pad = true; 3774 } 3775 3776 peer_test_vnet_hdr(n); 3777 if (peer_has_vnet_hdr(n)) { 3778 n->host_hdr_len = sizeof(struct virtio_net_hdr); 3779 } else { 3780 n->host_hdr_len = 0; 3781 } 3782 3783 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a); 3784 3785 n->vqs[0].tx_waiting = 0; 3786 n->tx_burst = n->net_conf.txburst; 3787 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0); 3788 n->promisc = 1; /* for compatibility */ 3789 3790 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); 3791 3792 n->vlans = g_malloc0(MAX_VLAN >> 3); 3793 3794 nc = qemu_get_queue(n->nic); 3795 nc->rxfilter_notify_enabled = 1; 3796 3797 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { 3798 struct virtio_net_config netcfg = {}; 3799 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN); 3800 vhost_net_set_config(get_vhost_net(nc->peer), 3801 (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND); 3802 } 3803 QTAILQ_INIT(&n->rsc_chains); 3804 n->qdev = dev; 3805 3806 net_rx_pkt_init(&n->rx_pkt); 3807 3808 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3809 virtio_net_load_ebpf(n); 3810 } 3811 } 3812 3813 static void virtio_net_device_unrealize(DeviceState *dev) 3814 { 3815 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3816 VirtIONet *n = VIRTIO_NET(dev); 3817 int i, max_queue_pairs; 3818 3819 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { 3820 virtio_net_unload_ebpf(n); 3821 } 3822 3823 /* This will stop vhost backend if appropriate. */ 3824 virtio_net_set_status(vdev, 0); 3825 3826 g_free(n->netclient_name); 3827 n->netclient_name = NULL; 3828 g_free(n->netclient_type); 3829 n->netclient_type = NULL; 3830 3831 g_free(n->mac_table.macs); 3832 g_free(n->vlans); 3833 3834 if (n->failover) { 3835 qobject_unref(n->primary_opts); 3836 device_listener_unregister(&n->primary_listener); 3837 migration_remove_notifier(&n->migration_state); 3838 } else { 3839 assert(n->primary_opts == NULL); 3840 } 3841 3842 max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 3843 for (i = 0; i < max_queue_pairs; i++) { 3844 virtio_net_del_queue(n, i); 3845 } 3846 /* delete also control vq */ 3847 virtio_del_queue(vdev, max_queue_pairs * 2); 3848 qemu_announce_timer_del(&n->announce_timer, false); 3849 g_free(n->vqs); 3850 qemu_del_nic(n->nic); 3851 virtio_net_rsc_cleanup(n); 3852 g_free(n->rss_data.indirections_table); 3853 net_rx_pkt_uninit(n->rx_pkt); 3854 virtio_cleanup(vdev); 3855 } 3856 3857 static void virtio_net_instance_init(Object *obj) 3858 { 3859 VirtIONet *n = VIRTIO_NET(obj); 3860 3861 /* 3862 * The default config_size is sizeof(struct virtio_net_config). 3863 * Can be overridden with virtio_net_set_config_size. 3864 */ 3865 n->config_size = sizeof(struct virtio_net_config); 3866 device_add_bootindex_property(obj, &n->nic_conf.bootindex, 3867 "bootindex", "/ethernet-phy@0", 3868 DEVICE(n)); 3869 3870 ebpf_rss_init(&n->ebpf_rss); 3871 } 3872 3873 static int virtio_net_pre_save(void *opaque) 3874 { 3875 VirtIONet *n = opaque; 3876 3877 /* At this point, backend must be stopped, otherwise 3878 * it might keep writing to memory. */ 3879 assert(!n->vhost_started); 3880 3881 return 0; 3882 } 3883 3884 static bool primary_unplug_pending(void *opaque) 3885 { 3886 DeviceState *dev = opaque; 3887 DeviceState *primary; 3888 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 3889 VirtIONet *n = VIRTIO_NET(vdev); 3890 3891 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { 3892 return false; 3893 } 3894 primary = failover_find_primary_device(n); 3895 return primary ? primary->pending_deleted_event : false; 3896 } 3897 3898 static bool dev_unplug_pending(void *opaque) 3899 { 3900 DeviceState *dev = opaque; 3901 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); 3902 3903 return vdc->primary_unplug_pending(dev); 3904 } 3905 3906 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev) 3907 { 3908 VirtIONet *n = VIRTIO_NET(vdev); 3909 NetClientState *nc = qemu_get_queue(n->nic); 3910 struct vhost_net *net = get_vhost_net(nc->peer); 3911 return &net->dev; 3912 } 3913 3914 static const VMStateDescription vmstate_virtio_net = { 3915 .name = "virtio-net", 3916 .minimum_version_id = VIRTIO_NET_VM_VERSION, 3917 .version_id = VIRTIO_NET_VM_VERSION, 3918 .fields = (const VMStateField[]) { 3919 VMSTATE_VIRTIO_DEVICE, 3920 VMSTATE_END_OF_LIST() 3921 }, 3922 .pre_save = virtio_net_pre_save, 3923 .dev_unplug_pending = dev_unplug_pending, 3924 }; 3925 3926 static Property virtio_net_properties[] = { 3927 DEFINE_PROP_BIT64("csum", VirtIONet, host_features, 3928 VIRTIO_NET_F_CSUM, true), 3929 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features, 3930 VIRTIO_NET_F_GUEST_CSUM, true), 3931 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), 3932 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features, 3933 VIRTIO_NET_F_GUEST_TSO4, true), 3934 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features, 3935 VIRTIO_NET_F_GUEST_TSO6, true), 3936 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features, 3937 VIRTIO_NET_F_GUEST_ECN, true), 3938 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features, 3939 VIRTIO_NET_F_GUEST_UFO, true), 3940 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features, 3941 VIRTIO_NET_F_GUEST_ANNOUNCE, true), 3942 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features, 3943 VIRTIO_NET_F_HOST_TSO4, true), 3944 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features, 3945 VIRTIO_NET_F_HOST_TSO6, true), 3946 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features, 3947 VIRTIO_NET_F_HOST_ECN, true), 3948 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features, 3949 VIRTIO_NET_F_HOST_UFO, true), 3950 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features, 3951 VIRTIO_NET_F_MRG_RXBUF, true), 3952 DEFINE_PROP_BIT64("status", VirtIONet, host_features, 3953 VIRTIO_NET_F_STATUS, true), 3954 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features, 3955 VIRTIO_NET_F_CTRL_VQ, true), 3956 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features, 3957 VIRTIO_NET_F_CTRL_RX, true), 3958 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features, 3959 VIRTIO_NET_F_CTRL_VLAN, true), 3960 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features, 3961 VIRTIO_NET_F_CTRL_RX_EXTRA, true), 3962 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features, 3963 VIRTIO_NET_F_CTRL_MAC_ADDR, true), 3964 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features, 3965 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), 3966 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), 3967 DEFINE_PROP_BIT64("rss", VirtIONet, host_features, 3968 VIRTIO_NET_F_RSS, false), 3969 DEFINE_PROP_BIT64("hash", VirtIONet, host_features, 3970 VIRTIO_NET_F_HASH_REPORT, false), 3971 DEFINE_PROP_ARRAY("ebpf-rss-fds", VirtIONet, nr_ebpf_rss_fds, 3972 ebpf_rss_fds, qdev_prop_string, char*), 3973 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features, 3974 VIRTIO_NET_F_RSC_EXT, false), 3975 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout, 3976 VIRTIO_NET_RSC_DEFAULT_INTERVAL), 3977 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf), 3978 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer, 3979 TX_TIMER_INTERVAL), 3980 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), 3981 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), 3982 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, 3983 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), 3984 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, 3985 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), 3986 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), 3987 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, 3988 true), 3989 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN), 3990 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str), 3991 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false), 3992 DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features, 3993 VIRTIO_NET_F_GUEST_USO4, true), 3994 DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features, 3995 VIRTIO_NET_F_GUEST_USO6, true), 3996 DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features, 3997 VIRTIO_NET_F_HOST_USO, true), 3998 DEFINE_PROP_END_OF_LIST(), 3999 }; 4000 4001 static void virtio_net_class_init(ObjectClass *klass, void *data) 4002 { 4003 DeviceClass *dc = DEVICE_CLASS(klass); 4004 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 4005 4006 device_class_set_props(dc, virtio_net_properties); 4007 dc->vmsd = &vmstate_virtio_net; 4008 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 4009 vdc->realize = virtio_net_device_realize; 4010 vdc->unrealize = virtio_net_device_unrealize; 4011 vdc->get_config = virtio_net_get_config; 4012 vdc->set_config = virtio_net_set_config; 4013 vdc->get_features = virtio_net_get_features; 4014 vdc->set_features = virtio_net_set_features; 4015 vdc->bad_features = virtio_net_bad_features; 4016 vdc->reset = virtio_net_reset; 4017 vdc->queue_reset = virtio_net_queue_reset; 4018 vdc->queue_enable = virtio_net_queue_enable; 4019 vdc->set_status = virtio_net_set_status; 4020 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; 4021 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; 4022 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO); 4023 vdc->post_load = virtio_net_post_load_virtio; 4024 vdc->vmsd = &vmstate_virtio_net_device; 4025 vdc->primary_unplug_pending = primary_unplug_pending; 4026 vdc->get_vhost = virtio_net_get_vhost; 4027 vdc->toggle_device_iotlb = vhost_toggle_device_iotlb; 4028 } 4029 4030 static const TypeInfo virtio_net_info = { 4031 .name = TYPE_VIRTIO_NET, 4032 .parent = TYPE_VIRTIO_DEVICE, 4033 .instance_size = sizeof(VirtIONet), 4034 .instance_init = virtio_net_instance_init, 4035 .class_init = virtio_net_class_init, 4036 }; 4037 4038 static void virtio_register_types(void) 4039 { 4040 type_register_static(&virtio_net_info); 4041 } 4042 4043 type_init(virtio_register_types) 4044