xref: /openbmc/qemu/hw/net/virtio-net.c (revision ea06a006)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/main-loop.h"
18 #include "qemu/module.h"
19 #include "hw/virtio/virtio.h"
20 #include "net/net.h"
21 #include "net/checksum.h"
22 #include "net/tap.h"
23 #include "qemu/error-report.h"
24 #include "qemu/timer.h"
25 #include "qemu/option.h"
26 #include "qemu/option_int.h"
27 #include "qemu/config-file.h"
28 #include "qapi/qmp/qdict.h"
29 #include "hw/virtio/virtio-net.h"
30 #include "net/vhost_net.h"
31 #include "net/announce.h"
32 #include "hw/virtio/virtio-bus.h"
33 #include "qapi/error.h"
34 #include "qapi/qapi-events-net.h"
35 #include "hw/qdev-properties.h"
36 #include "qapi/qapi-types-migration.h"
37 #include "qapi/qapi-events-migration.h"
38 #include "hw/virtio/virtio-access.h"
39 #include "migration/misc.h"
40 #include "standard-headers/linux/ethtool.h"
41 #include "sysemu/sysemu.h"
42 #include "trace.h"
43 #include "monitor/qdev.h"
44 #include "hw/pci/pci.h"
45 #include "net_rx_pkt.h"
46 #include "hw/virtio/vhost.h"
47 
48 #define VIRTIO_NET_VM_VERSION    11
49 
50 #define MAC_TABLE_ENTRIES    64
51 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
52 
53 /* previously fixed value */
54 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
55 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
56 
57 /* for now, only allow larger queues; with virtio-1, guest can downsize */
58 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
59 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
60 
61 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
62 
63 #define VIRTIO_NET_TCP_FLAG         0x3F
64 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
65 
66 /* IPv4 max payload, 16 bits in the header */
67 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
68 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
69 
70 /* header length value in ip header without option */
71 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
72 
73 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
74 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
75 
76 /* Purge coalesced packets timer interval, This value affects the performance
77    a lot, and should be tuned carefully, '300000'(300us) is the recommended
78    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
79    tso/gso/gro 'off'. */
80 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
81 
82 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
83                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
84                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
85                                          VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
86                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
87                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
88                                          VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
89                                          VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
90                                          VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
91 
92 static const VirtIOFeature feature_sizes[] = {
93     {.flags = 1ULL << VIRTIO_NET_F_MAC,
94      .end = endof(struct virtio_net_config, mac)},
95     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
96      .end = endof(struct virtio_net_config, status)},
97     {.flags = 1ULL << VIRTIO_NET_F_MQ,
98      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
99     {.flags = 1ULL << VIRTIO_NET_F_MTU,
100      .end = endof(struct virtio_net_config, mtu)},
101     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
102      .end = endof(struct virtio_net_config, duplex)},
103     {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
104      .end = endof(struct virtio_net_config, supported_hash_types)},
105     {}
106 };
107 
108 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
109 {
110     VirtIONet *n = qemu_get_nic_opaque(nc);
111 
112     return &n->vqs[nc->queue_index];
113 }
114 
115 static int vq2q(int queue_index)
116 {
117     return queue_index / 2;
118 }
119 
120 /* TODO
121  * - we could suppress RX interrupt if we were so inclined.
122  */
123 
124 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
125 {
126     VirtIONet *n = VIRTIO_NET(vdev);
127     struct virtio_net_config netcfg;
128     NetClientState *nc = qemu_get_queue(n->nic);
129     static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
130 
131     int ret = 0;
132     memset(&netcfg, 0 , sizeof(struct virtio_net_config));
133     virtio_stw_p(vdev, &netcfg.status, n->status);
134     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
135     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
136     memcpy(netcfg.mac, n->mac, ETH_ALEN);
137     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
138     netcfg.duplex = n->net_conf.duplex;
139     netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
140     virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
141                  virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
142                  VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
143     virtio_stl_p(vdev, &netcfg.supported_hash_types,
144                  VIRTIO_NET_RSS_SUPPORTED_HASHES);
145     memcpy(config, &netcfg, n->config_size);
146 
147     /*
148      * Is this VDPA? No peer means not VDPA: there's no way to
149      * disconnect/reconnect a VDPA peer.
150      */
151     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
152         ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
153                                    n->config_size);
154         if (ret != -1) {
155             /*
156              * Some NIC/kernel combinations present 0 as the mac address.  As
157              * that is not a legal address, try to proceed with the
158              * address from the QEMU command line in the hope that the
159              * address has been configured correctly elsewhere - just not
160              * reported by the device.
161              */
162             if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
163                 info_report("Zero hardware mac address detected. Ignoring.");
164                 memcpy(netcfg.mac, n->mac, ETH_ALEN);
165             }
166             memcpy(config, &netcfg, n->config_size);
167         }
168     }
169 }
170 
171 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
172 {
173     VirtIONet *n = VIRTIO_NET(vdev);
174     struct virtio_net_config netcfg = {};
175     NetClientState *nc = qemu_get_queue(n->nic);
176 
177     memcpy(&netcfg, config, n->config_size);
178 
179     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
180         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
181         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
182         memcpy(n->mac, netcfg.mac, ETH_ALEN);
183         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
184     }
185 
186     /*
187      * Is this VDPA? No peer means not VDPA: there's no way to
188      * disconnect/reconnect a VDPA peer.
189      */
190     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
191         vhost_net_set_config(get_vhost_net(nc->peer),
192                              (uint8_t *)&netcfg, 0, n->config_size,
193                              VHOST_SET_CONFIG_TYPE_MASTER);
194       }
195 }
196 
197 static bool virtio_net_started(VirtIONet *n, uint8_t status)
198 {
199     VirtIODevice *vdev = VIRTIO_DEVICE(n);
200     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
201         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
202 }
203 
204 static void virtio_net_announce_notify(VirtIONet *net)
205 {
206     VirtIODevice *vdev = VIRTIO_DEVICE(net);
207     trace_virtio_net_announce_notify();
208 
209     net->status |= VIRTIO_NET_S_ANNOUNCE;
210     virtio_notify_config(vdev);
211 }
212 
213 static void virtio_net_announce_timer(void *opaque)
214 {
215     VirtIONet *n = opaque;
216     trace_virtio_net_announce_timer(n->announce_timer.round);
217 
218     n->announce_timer.round--;
219     virtio_net_announce_notify(n);
220 }
221 
222 static void virtio_net_announce(NetClientState *nc)
223 {
224     VirtIONet *n = qemu_get_nic_opaque(nc);
225     VirtIODevice *vdev = VIRTIO_DEVICE(n);
226 
227     /*
228      * Make sure the virtio migration announcement timer isn't running
229      * If it is, let it trigger announcement so that we do not cause
230      * confusion.
231      */
232     if (n->announce_timer.round) {
233         return;
234     }
235 
236     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
237         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
238             virtio_net_announce_notify(n);
239     }
240 }
241 
242 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
243 {
244     VirtIODevice *vdev = VIRTIO_DEVICE(n);
245     NetClientState *nc = qemu_get_queue(n->nic);
246     int queues = n->multiqueue ? n->max_queues : 1;
247 
248     if (!get_vhost_net(nc->peer)) {
249         return;
250     }
251 
252     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
253         !!n->vhost_started) {
254         return;
255     }
256     if (!n->vhost_started) {
257         int r, i;
258 
259         if (n->needs_vnet_hdr_swap) {
260             error_report("backend does not support %s vnet headers; "
261                          "falling back on userspace virtio",
262                          virtio_is_big_endian(vdev) ? "BE" : "LE");
263             return;
264         }
265 
266         /* Any packets outstanding? Purge them to avoid touching rings
267          * when vhost is running.
268          */
269         for (i = 0;  i < queues; i++) {
270             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
271 
272             /* Purge both directions: TX and RX. */
273             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
274             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
275         }
276 
277         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
278             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
279             if (r < 0) {
280                 error_report("%uBytes MTU not supported by the backend",
281                              n->net_conf.mtu);
282 
283                 return;
284             }
285         }
286 
287         n->vhost_started = 1;
288         r = vhost_net_start(vdev, n->nic->ncs, queues);
289         if (r < 0) {
290             error_report("unable to start vhost net: %d: "
291                          "falling back on userspace virtio", -r);
292             n->vhost_started = 0;
293         }
294     } else {
295         vhost_net_stop(vdev, n->nic->ncs, queues);
296         n->vhost_started = 0;
297     }
298 }
299 
300 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
301                                           NetClientState *peer,
302                                           bool enable)
303 {
304     if (virtio_is_big_endian(vdev)) {
305         return qemu_set_vnet_be(peer, enable);
306     } else {
307         return qemu_set_vnet_le(peer, enable);
308     }
309 }
310 
311 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
312                                        int queues, bool enable)
313 {
314     int i;
315 
316     for (i = 0; i < queues; i++) {
317         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
318             enable) {
319             while (--i >= 0) {
320                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
321             }
322 
323             return true;
324         }
325     }
326 
327     return false;
328 }
329 
330 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
331 {
332     VirtIODevice *vdev = VIRTIO_DEVICE(n);
333     int queues = n->multiqueue ? n->max_queues : 1;
334 
335     if (virtio_net_started(n, status)) {
336         /* Before using the device, we tell the network backend about the
337          * endianness to use when parsing vnet headers. If the backend
338          * can't do it, we fallback onto fixing the headers in the core
339          * virtio-net code.
340          */
341         n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
342                                                             queues, true);
343     } else if (virtio_net_started(n, vdev->status)) {
344         /* After using the device, we need to reset the network backend to
345          * the default (guest native endianness), otherwise the guest may
346          * lose network connectivity if it is rebooted into a different
347          * endianness.
348          */
349         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
350     }
351 }
352 
353 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
354 {
355     unsigned int dropped = virtqueue_drop_all(vq);
356     if (dropped) {
357         virtio_notify(vdev, vq);
358     }
359 }
360 
361 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
362 {
363     VirtIONet *n = VIRTIO_NET(vdev);
364     VirtIONetQueue *q;
365     int i;
366     uint8_t queue_status;
367 
368     virtio_net_vnet_endian_status(n, status);
369     virtio_net_vhost_status(n, status);
370 
371     for (i = 0; i < n->max_queues; i++) {
372         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
373         bool queue_started;
374         q = &n->vqs[i];
375 
376         if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
377             queue_status = 0;
378         } else {
379             queue_status = status;
380         }
381         queue_started =
382             virtio_net_started(n, queue_status) && !n->vhost_started;
383 
384         if (queue_started) {
385             qemu_flush_queued_packets(ncs);
386         }
387 
388         if (!q->tx_waiting) {
389             continue;
390         }
391 
392         if (queue_started) {
393             if (q->tx_timer) {
394                 timer_mod(q->tx_timer,
395                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
396             } else {
397                 qemu_bh_schedule(q->tx_bh);
398             }
399         } else {
400             if (q->tx_timer) {
401                 timer_del(q->tx_timer);
402             } else {
403                 qemu_bh_cancel(q->tx_bh);
404             }
405             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
406                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
407                 vdev->vm_running) {
408                 /* if tx is waiting we are likely have some packets in tx queue
409                  * and disabled notification */
410                 q->tx_waiting = 0;
411                 virtio_queue_set_notification(q->tx_vq, 1);
412                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
413             }
414         }
415     }
416 }
417 
418 static void virtio_net_set_link_status(NetClientState *nc)
419 {
420     VirtIONet *n = qemu_get_nic_opaque(nc);
421     VirtIODevice *vdev = VIRTIO_DEVICE(n);
422     uint16_t old_status = n->status;
423 
424     if (nc->link_down)
425         n->status &= ~VIRTIO_NET_S_LINK_UP;
426     else
427         n->status |= VIRTIO_NET_S_LINK_UP;
428 
429     if (n->status != old_status)
430         virtio_notify_config(vdev);
431 
432     virtio_net_set_status(vdev, vdev->status);
433 }
434 
435 static void rxfilter_notify(NetClientState *nc)
436 {
437     VirtIONet *n = qemu_get_nic_opaque(nc);
438 
439     if (nc->rxfilter_notify_enabled) {
440         char *path = object_get_canonical_path(OBJECT(n->qdev));
441         qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
442                                               n->netclient_name, path);
443         g_free(path);
444 
445         /* disable event notification to avoid events flooding */
446         nc->rxfilter_notify_enabled = 0;
447     }
448 }
449 
450 static intList *get_vlan_table(VirtIONet *n)
451 {
452     intList *list;
453     int i, j;
454 
455     list = NULL;
456     for (i = 0; i < MAX_VLAN >> 5; i++) {
457         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
458             if (n->vlans[i] & (1U << j)) {
459                 QAPI_LIST_PREPEND(list, (i << 5) + j);
460             }
461         }
462     }
463 
464     return list;
465 }
466 
467 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
468 {
469     VirtIONet *n = qemu_get_nic_opaque(nc);
470     VirtIODevice *vdev = VIRTIO_DEVICE(n);
471     RxFilterInfo *info;
472     strList *str_list;
473     int i;
474 
475     info = g_malloc0(sizeof(*info));
476     info->name = g_strdup(nc->name);
477     info->promiscuous = n->promisc;
478 
479     if (n->nouni) {
480         info->unicast = RX_STATE_NONE;
481     } else if (n->alluni) {
482         info->unicast = RX_STATE_ALL;
483     } else {
484         info->unicast = RX_STATE_NORMAL;
485     }
486 
487     if (n->nomulti) {
488         info->multicast = RX_STATE_NONE;
489     } else if (n->allmulti) {
490         info->multicast = RX_STATE_ALL;
491     } else {
492         info->multicast = RX_STATE_NORMAL;
493     }
494 
495     info->broadcast_allowed = n->nobcast;
496     info->multicast_overflow = n->mac_table.multi_overflow;
497     info->unicast_overflow = n->mac_table.uni_overflow;
498 
499     info->main_mac = qemu_mac_strdup_printf(n->mac);
500 
501     str_list = NULL;
502     for (i = 0; i < n->mac_table.first_multi; i++) {
503         QAPI_LIST_PREPEND(str_list,
504                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
505     }
506     info->unicast_table = str_list;
507 
508     str_list = NULL;
509     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
510         QAPI_LIST_PREPEND(str_list,
511                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
512     }
513     info->multicast_table = str_list;
514     info->vlan_table = get_vlan_table(n);
515 
516     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
517         info->vlan = RX_STATE_ALL;
518     } else if (!info->vlan_table) {
519         info->vlan = RX_STATE_NONE;
520     } else {
521         info->vlan = RX_STATE_NORMAL;
522     }
523 
524     /* enable event notification after query */
525     nc->rxfilter_notify_enabled = 1;
526 
527     return info;
528 }
529 
530 static void virtio_net_reset(VirtIODevice *vdev)
531 {
532     VirtIONet *n = VIRTIO_NET(vdev);
533     int i;
534 
535     /* Reset back to compatibility mode */
536     n->promisc = 1;
537     n->allmulti = 0;
538     n->alluni = 0;
539     n->nomulti = 0;
540     n->nouni = 0;
541     n->nobcast = 0;
542     /* multiqueue is disabled by default */
543     n->curr_queues = 1;
544     timer_del(n->announce_timer.tm);
545     n->announce_timer.round = 0;
546     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
547 
548     /* Flush any MAC and VLAN filter table state */
549     n->mac_table.in_use = 0;
550     n->mac_table.first_multi = 0;
551     n->mac_table.multi_overflow = 0;
552     n->mac_table.uni_overflow = 0;
553     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
554     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
555     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
556     memset(n->vlans, 0, MAX_VLAN >> 3);
557 
558     /* Flush any async TX */
559     for (i = 0;  i < n->max_queues; i++) {
560         NetClientState *nc = qemu_get_subqueue(n->nic, i);
561 
562         if (nc->peer) {
563             qemu_flush_or_purge_queued_packets(nc->peer, true);
564             assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
565         }
566     }
567 }
568 
569 static void peer_test_vnet_hdr(VirtIONet *n)
570 {
571     NetClientState *nc = qemu_get_queue(n->nic);
572     if (!nc->peer) {
573         return;
574     }
575 
576     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
577 }
578 
579 static int peer_has_vnet_hdr(VirtIONet *n)
580 {
581     return n->has_vnet_hdr;
582 }
583 
584 static int peer_has_ufo(VirtIONet *n)
585 {
586     if (!peer_has_vnet_hdr(n))
587         return 0;
588 
589     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
590 
591     return n->has_ufo;
592 }
593 
594 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
595                                        int version_1, int hash_report)
596 {
597     int i;
598     NetClientState *nc;
599 
600     n->mergeable_rx_bufs = mergeable_rx_bufs;
601 
602     if (version_1) {
603         n->guest_hdr_len = hash_report ?
604             sizeof(struct virtio_net_hdr_v1_hash) :
605             sizeof(struct virtio_net_hdr_mrg_rxbuf);
606         n->rss_data.populate_hash = !!hash_report;
607     } else {
608         n->guest_hdr_len = n->mergeable_rx_bufs ?
609             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
610             sizeof(struct virtio_net_hdr);
611     }
612 
613     for (i = 0; i < n->max_queues; i++) {
614         nc = qemu_get_subqueue(n->nic, i);
615 
616         if (peer_has_vnet_hdr(n) &&
617             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
618             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
619             n->host_hdr_len = n->guest_hdr_len;
620         }
621     }
622 }
623 
624 static int virtio_net_max_tx_queue_size(VirtIONet *n)
625 {
626     NetClientState *peer = n->nic_conf.peers.ncs[0];
627 
628     /*
629      * Backends other than vhost-user don't support max queue size.
630      */
631     if (!peer) {
632         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
633     }
634 
635     if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
636         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
637     }
638 
639     return VIRTQUEUE_MAX_SIZE;
640 }
641 
642 static int peer_attach(VirtIONet *n, int index)
643 {
644     NetClientState *nc = qemu_get_subqueue(n->nic, index);
645 
646     if (!nc->peer) {
647         return 0;
648     }
649 
650     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
651         vhost_set_vring_enable(nc->peer, 1);
652     }
653 
654     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
655         return 0;
656     }
657 
658     if (n->max_queues == 1) {
659         return 0;
660     }
661 
662     return tap_enable(nc->peer);
663 }
664 
665 static int peer_detach(VirtIONet *n, int index)
666 {
667     NetClientState *nc = qemu_get_subqueue(n->nic, index);
668 
669     if (!nc->peer) {
670         return 0;
671     }
672 
673     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
674         vhost_set_vring_enable(nc->peer, 0);
675     }
676 
677     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
678         return 0;
679     }
680 
681     return tap_disable(nc->peer);
682 }
683 
684 static void virtio_net_set_queues(VirtIONet *n)
685 {
686     int i;
687     int r;
688 
689     if (n->nic->peer_deleted) {
690         return;
691     }
692 
693     for (i = 0; i < n->max_queues; i++) {
694         if (i < n->curr_queues) {
695             r = peer_attach(n, i);
696             assert(!r);
697         } else {
698             r = peer_detach(n, i);
699             assert(!r);
700         }
701     }
702 }
703 
704 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
705 
706 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
707                                         Error **errp)
708 {
709     VirtIONet *n = VIRTIO_NET(vdev);
710     NetClientState *nc = qemu_get_queue(n->nic);
711 
712     /* Firstly sync all virtio-net possible supported features */
713     features |= n->host_features;
714 
715     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
716 
717     if (!peer_has_vnet_hdr(n)) {
718         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
719         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
720         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
721         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
722 
723         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
724         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
725         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
726         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
727 
728         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
729     }
730 
731     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
732         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
733         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
734     }
735 
736     if (!get_vhost_net(nc->peer)) {
737         return features;
738     }
739 
740     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
741         virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
742     }
743     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
744     vdev->backend_features = features;
745 
746     if (n->mtu_bypass_backend &&
747             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
748         features |= (1ULL << VIRTIO_NET_F_MTU);
749     }
750 
751     return features;
752 }
753 
754 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
755 {
756     uint64_t features = 0;
757 
758     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
759      * but also these: */
760     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
761     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
762     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
763     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
764     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
765 
766     return features;
767 }
768 
769 static void virtio_net_apply_guest_offloads(VirtIONet *n)
770 {
771     qemu_set_offload(qemu_get_queue(n->nic)->peer,
772             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
773             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
774             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
775             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
776             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
777 }
778 
779 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
780 {
781     static const uint64_t guest_offloads_mask =
782         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
783         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
784         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
785         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
786         (1ULL << VIRTIO_NET_F_GUEST_UFO);
787 
788     return guest_offloads_mask & features;
789 }
790 
791 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
792 {
793     VirtIODevice *vdev = VIRTIO_DEVICE(n);
794     return virtio_net_guest_offloads_by_features(vdev->guest_features);
795 }
796 
797 typedef struct {
798     VirtIONet *n;
799     DeviceState *dev;
800 } FailoverDevice;
801 
802 /**
803  * Set the failover primary device
804  *
805  * @opaque: FailoverId to setup
806  * @opts: opts for device we are handling
807  * @errp: returns an error if this function fails
808  */
809 static int failover_set_primary(DeviceState *dev, void *opaque)
810 {
811     FailoverDevice *fdev = opaque;
812     PCIDevice *pci_dev = (PCIDevice *)
813         object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
814 
815     if (!pci_dev) {
816         return 0;
817     }
818 
819     if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
820         fdev->dev = dev;
821         return 1;
822     }
823 
824     return 0;
825 }
826 
827 /**
828  * Find the primary device for this failover virtio-net
829  *
830  * @n: VirtIONet device
831  * @errp: returns an error if this function fails
832  */
833 static DeviceState *failover_find_primary_device(VirtIONet *n)
834 {
835     FailoverDevice fdev = {
836         .n = n,
837     };
838 
839     qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
840                        NULL, NULL, &fdev);
841     return fdev.dev;
842 }
843 
844 static void failover_add_primary(VirtIONet *n, Error **errp)
845 {
846     Error *err = NULL;
847     DeviceState *dev = failover_find_primary_device(n);
848 
849     if (dev) {
850         return;
851     }
852 
853     if (!n->primary_opts) {
854         error_setg(errp, "Primary device not found");
855         error_append_hint(errp, "Virtio-net failover will not work. Make "
856                           "sure primary device has parameter"
857                           " failover_pair_id=%s\n", n->netclient_name);
858         return;
859     }
860 
861     dev = qdev_device_add_from_qdict(n->primary_opts,
862                                      n->primary_opts_from_json,
863                                      &err);
864     if (err) {
865         qobject_unref(n->primary_opts);
866         n->primary_opts = NULL;
867     } else {
868         object_unref(OBJECT(dev));
869     }
870     error_propagate(errp, err);
871 }
872 
873 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
874 {
875     VirtIONet *n = VIRTIO_NET(vdev);
876     Error *err = NULL;
877     int i;
878 
879     if (n->mtu_bypass_backend &&
880             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
881         features &= ~(1ULL << VIRTIO_NET_F_MTU);
882     }
883 
884     virtio_net_set_multiqueue(n,
885                               virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
886                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
887 
888     virtio_net_set_mrg_rx_bufs(n,
889                                virtio_has_feature(features,
890                                                   VIRTIO_NET_F_MRG_RXBUF),
891                                virtio_has_feature(features,
892                                                   VIRTIO_F_VERSION_1),
893                                virtio_has_feature(features,
894                                                   VIRTIO_NET_F_HASH_REPORT));
895 
896     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
897         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
898     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
899         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
900     n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
901 
902     if (n->has_vnet_hdr) {
903         n->curr_guest_offloads =
904             virtio_net_guest_offloads_by_features(features);
905         virtio_net_apply_guest_offloads(n);
906     }
907 
908     for (i = 0;  i < n->max_queues; i++) {
909         NetClientState *nc = qemu_get_subqueue(n->nic, i);
910 
911         if (!get_vhost_net(nc->peer)) {
912             continue;
913         }
914         vhost_net_ack_features(get_vhost_net(nc->peer), features);
915     }
916 
917     if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
918         memset(n->vlans, 0, MAX_VLAN >> 3);
919     } else {
920         memset(n->vlans, 0xff, MAX_VLAN >> 3);
921     }
922 
923     if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
924         qapi_event_send_failover_negotiated(n->netclient_name);
925         qatomic_set(&n->failover_primary_hidden, false);
926         failover_add_primary(n, &err);
927         if (err) {
928             warn_report_err(err);
929         }
930     }
931 }
932 
933 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
934                                      struct iovec *iov, unsigned int iov_cnt)
935 {
936     uint8_t on;
937     size_t s;
938     NetClientState *nc = qemu_get_queue(n->nic);
939 
940     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
941     if (s != sizeof(on)) {
942         return VIRTIO_NET_ERR;
943     }
944 
945     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
946         n->promisc = on;
947     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
948         n->allmulti = on;
949     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
950         n->alluni = on;
951     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
952         n->nomulti = on;
953     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
954         n->nouni = on;
955     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
956         n->nobcast = on;
957     } else {
958         return VIRTIO_NET_ERR;
959     }
960 
961     rxfilter_notify(nc);
962 
963     return VIRTIO_NET_OK;
964 }
965 
966 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
967                                      struct iovec *iov, unsigned int iov_cnt)
968 {
969     VirtIODevice *vdev = VIRTIO_DEVICE(n);
970     uint64_t offloads;
971     size_t s;
972 
973     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
974         return VIRTIO_NET_ERR;
975     }
976 
977     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
978     if (s != sizeof(offloads)) {
979         return VIRTIO_NET_ERR;
980     }
981 
982     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
983         uint64_t supported_offloads;
984 
985         offloads = virtio_ldq_p(vdev, &offloads);
986 
987         if (!n->has_vnet_hdr) {
988             return VIRTIO_NET_ERR;
989         }
990 
991         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
992             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
993         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
994             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
995         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
996 
997         supported_offloads = virtio_net_supported_guest_offloads(n);
998         if (offloads & ~supported_offloads) {
999             return VIRTIO_NET_ERR;
1000         }
1001 
1002         n->curr_guest_offloads = offloads;
1003         virtio_net_apply_guest_offloads(n);
1004 
1005         return VIRTIO_NET_OK;
1006     } else {
1007         return VIRTIO_NET_ERR;
1008     }
1009 }
1010 
1011 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1012                                  struct iovec *iov, unsigned int iov_cnt)
1013 {
1014     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1015     struct virtio_net_ctrl_mac mac_data;
1016     size_t s;
1017     NetClientState *nc = qemu_get_queue(n->nic);
1018 
1019     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1020         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1021             return VIRTIO_NET_ERR;
1022         }
1023         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1024         assert(s == sizeof(n->mac));
1025         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1026         rxfilter_notify(nc);
1027 
1028         return VIRTIO_NET_OK;
1029     }
1030 
1031     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1032         return VIRTIO_NET_ERR;
1033     }
1034 
1035     int in_use = 0;
1036     int first_multi = 0;
1037     uint8_t uni_overflow = 0;
1038     uint8_t multi_overflow = 0;
1039     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1040 
1041     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1042                    sizeof(mac_data.entries));
1043     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1044     if (s != sizeof(mac_data.entries)) {
1045         goto error;
1046     }
1047     iov_discard_front(&iov, &iov_cnt, s);
1048 
1049     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1050         goto error;
1051     }
1052 
1053     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1054         s = iov_to_buf(iov, iov_cnt, 0, macs,
1055                        mac_data.entries * ETH_ALEN);
1056         if (s != mac_data.entries * ETH_ALEN) {
1057             goto error;
1058         }
1059         in_use += mac_data.entries;
1060     } else {
1061         uni_overflow = 1;
1062     }
1063 
1064     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1065 
1066     first_multi = in_use;
1067 
1068     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1069                    sizeof(mac_data.entries));
1070     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1071     if (s != sizeof(mac_data.entries)) {
1072         goto error;
1073     }
1074 
1075     iov_discard_front(&iov, &iov_cnt, s);
1076 
1077     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1078         goto error;
1079     }
1080 
1081     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1082         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1083                        mac_data.entries * ETH_ALEN);
1084         if (s != mac_data.entries * ETH_ALEN) {
1085             goto error;
1086         }
1087         in_use += mac_data.entries;
1088     } else {
1089         multi_overflow = 1;
1090     }
1091 
1092     n->mac_table.in_use = in_use;
1093     n->mac_table.first_multi = first_multi;
1094     n->mac_table.uni_overflow = uni_overflow;
1095     n->mac_table.multi_overflow = multi_overflow;
1096     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1097     g_free(macs);
1098     rxfilter_notify(nc);
1099 
1100     return VIRTIO_NET_OK;
1101 
1102 error:
1103     g_free(macs);
1104     return VIRTIO_NET_ERR;
1105 }
1106 
1107 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1108                                         struct iovec *iov, unsigned int iov_cnt)
1109 {
1110     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1111     uint16_t vid;
1112     size_t s;
1113     NetClientState *nc = qemu_get_queue(n->nic);
1114 
1115     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1116     vid = virtio_lduw_p(vdev, &vid);
1117     if (s != sizeof(vid)) {
1118         return VIRTIO_NET_ERR;
1119     }
1120 
1121     if (vid >= MAX_VLAN)
1122         return VIRTIO_NET_ERR;
1123 
1124     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1125         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1126     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1127         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1128     else
1129         return VIRTIO_NET_ERR;
1130 
1131     rxfilter_notify(nc);
1132 
1133     return VIRTIO_NET_OK;
1134 }
1135 
1136 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1137                                       struct iovec *iov, unsigned int iov_cnt)
1138 {
1139     trace_virtio_net_handle_announce(n->announce_timer.round);
1140     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1141         n->status & VIRTIO_NET_S_ANNOUNCE) {
1142         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1143         if (n->announce_timer.round) {
1144             qemu_announce_timer_step(&n->announce_timer);
1145         }
1146         return VIRTIO_NET_OK;
1147     } else {
1148         return VIRTIO_NET_ERR;
1149     }
1150 }
1151 
1152 static void virtio_net_detach_epbf_rss(VirtIONet *n);
1153 
1154 static void virtio_net_disable_rss(VirtIONet *n)
1155 {
1156     if (n->rss_data.enabled) {
1157         trace_virtio_net_rss_disable();
1158     }
1159     n->rss_data.enabled = false;
1160 
1161     virtio_net_detach_epbf_rss(n);
1162 }
1163 
1164 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1165 {
1166     NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1167     if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1168         return false;
1169     }
1170 
1171     return nc->info->set_steering_ebpf(nc, prog_fd);
1172 }
1173 
1174 static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1175                                    struct EBPFRSSConfig *config)
1176 {
1177     config->redirect = data->redirect;
1178     config->populate_hash = data->populate_hash;
1179     config->hash_types = data->hash_types;
1180     config->indirections_len = data->indirections_len;
1181     config->default_queue = data->default_queue;
1182 }
1183 
1184 static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1185 {
1186     struct EBPFRSSConfig config = {};
1187 
1188     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1189         return false;
1190     }
1191 
1192     rss_data_to_rss_config(&n->rss_data, &config);
1193 
1194     if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1195                           n->rss_data.indirections_table, n->rss_data.key)) {
1196         return false;
1197     }
1198 
1199     if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1200         return false;
1201     }
1202 
1203     return true;
1204 }
1205 
1206 static void virtio_net_detach_epbf_rss(VirtIONet *n)
1207 {
1208     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1209 }
1210 
1211 static bool virtio_net_load_ebpf(VirtIONet *n)
1212 {
1213     if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1214         /* backend does't support steering ebpf */
1215         return false;
1216     }
1217 
1218     return ebpf_rss_load(&n->ebpf_rss);
1219 }
1220 
1221 static void virtio_net_unload_ebpf(VirtIONet *n)
1222 {
1223     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1224     ebpf_rss_unload(&n->ebpf_rss);
1225 }
1226 
1227 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1228                                       struct iovec *iov,
1229                                       unsigned int iov_cnt,
1230                                       bool do_rss)
1231 {
1232     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1233     struct virtio_net_rss_config cfg;
1234     size_t s, offset = 0, size_get;
1235     uint16_t queues, i;
1236     struct {
1237         uint16_t us;
1238         uint8_t b;
1239     } QEMU_PACKED temp;
1240     const char *err_msg = "";
1241     uint32_t err_value = 0;
1242 
1243     if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1244         err_msg = "RSS is not negotiated";
1245         goto error;
1246     }
1247     if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1248         err_msg = "Hash report is not negotiated";
1249         goto error;
1250     }
1251     size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1252     s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1253     if (s != size_get) {
1254         err_msg = "Short command buffer";
1255         err_value = (uint32_t)s;
1256         goto error;
1257     }
1258     n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1259     n->rss_data.indirections_len =
1260         virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1261     n->rss_data.indirections_len++;
1262     if (!do_rss) {
1263         n->rss_data.indirections_len = 1;
1264     }
1265     if (!is_power_of_2(n->rss_data.indirections_len)) {
1266         err_msg = "Invalid size of indirection table";
1267         err_value = n->rss_data.indirections_len;
1268         goto error;
1269     }
1270     if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1271         err_msg = "Too large indirection table";
1272         err_value = n->rss_data.indirections_len;
1273         goto error;
1274     }
1275     n->rss_data.default_queue = do_rss ?
1276         virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1277     if (n->rss_data.default_queue >= n->max_queues) {
1278         err_msg = "Invalid default queue";
1279         err_value = n->rss_data.default_queue;
1280         goto error;
1281     }
1282     offset += size_get;
1283     size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1284     g_free(n->rss_data.indirections_table);
1285     n->rss_data.indirections_table = g_malloc(size_get);
1286     if (!n->rss_data.indirections_table) {
1287         err_msg = "Can't allocate indirections table";
1288         err_value = n->rss_data.indirections_len;
1289         goto error;
1290     }
1291     s = iov_to_buf(iov, iov_cnt, offset,
1292                    n->rss_data.indirections_table, size_get);
1293     if (s != size_get) {
1294         err_msg = "Short indirection table buffer";
1295         err_value = (uint32_t)s;
1296         goto error;
1297     }
1298     for (i = 0; i < n->rss_data.indirections_len; ++i) {
1299         uint16_t val = n->rss_data.indirections_table[i];
1300         n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1301     }
1302     offset += size_get;
1303     size_get = sizeof(temp);
1304     s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1305     if (s != size_get) {
1306         err_msg = "Can't get queues";
1307         err_value = (uint32_t)s;
1308         goto error;
1309     }
1310     queues = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queues;
1311     if (queues == 0 || queues > n->max_queues) {
1312         err_msg = "Invalid number of queues";
1313         err_value = queues;
1314         goto error;
1315     }
1316     if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1317         err_msg = "Invalid key size";
1318         err_value = temp.b;
1319         goto error;
1320     }
1321     if (!temp.b && n->rss_data.hash_types) {
1322         err_msg = "No key provided";
1323         err_value = 0;
1324         goto error;
1325     }
1326     if (!temp.b && !n->rss_data.hash_types) {
1327         virtio_net_disable_rss(n);
1328         return queues;
1329     }
1330     offset += size_get;
1331     size_get = temp.b;
1332     s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1333     if (s != size_get) {
1334         err_msg = "Can get key buffer";
1335         err_value = (uint32_t)s;
1336         goto error;
1337     }
1338     n->rss_data.enabled = true;
1339 
1340     if (!n->rss_data.populate_hash) {
1341         if (!virtio_net_attach_epbf_rss(n)) {
1342             /* EBPF must be loaded for vhost */
1343             if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1344                 warn_report("Can't load eBPF RSS for vhost");
1345                 goto error;
1346             }
1347             /* fallback to software RSS */
1348             warn_report("Can't load eBPF RSS - fallback to software RSS");
1349             n->rss_data.enabled_software_rss = true;
1350         }
1351     } else {
1352         /* use software RSS for hash populating */
1353         /* and detach eBPF if was loaded before */
1354         virtio_net_detach_epbf_rss(n);
1355         n->rss_data.enabled_software_rss = true;
1356     }
1357 
1358     trace_virtio_net_rss_enable(n->rss_data.hash_types,
1359                                 n->rss_data.indirections_len,
1360                                 temp.b);
1361     return queues;
1362 error:
1363     trace_virtio_net_rss_error(err_msg, err_value);
1364     virtio_net_disable_rss(n);
1365     return 0;
1366 }
1367 
1368 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1369                                 struct iovec *iov, unsigned int iov_cnt)
1370 {
1371     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1372     uint16_t queues;
1373 
1374     virtio_net_disable_rss(n);
1375     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1376         queues = virtio_net_handle_rss(n, iov, iov_cnt, false);
1377         return queues ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1378     }
1379     if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1380         queues = virtio_net_handle_rss(n, iov, iov_cnt, true);
1381     } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1382         struct virtio_net_ctrl_mq mq;
1383         size_t s;
1384         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1385             return VIRTIO_NET_ERR;
1386         }
1387         s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1388         if (s != sizeof(mq)) {
1389             return VIRTIO_NET_ERR;
1390         }
1391         queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1392 
1393     } else {
1394         return VIRTIO_NET_ERR;
1395     }
1396 
1397     if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1398         queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1399         queues > n->max_queues ||
1400         !n->multiqueue) {
1401         return VIRTIO_NET_ERR;
1402     }
1403 
1404     n->curr_queues = queues;
1405     /* stop the backend before changing the number of queues to avoid handling a
1406      * disabled queue */
1407     virtio_net_set_status(vdev, vdev->status);
1408     virtio_net_set_queues(n);
1409 
1410     return VIRTIO_NET_OK;
1411 }
1412 
1413 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1414 {
1415     VirtIONet *n = VIRTIO_NET(vdev);
1416     struct virtio_net_ctrl_hdr ctrl;
1417     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1418     VirtQueueElement *elem;
1419     size_t s;
1420     struct iovec *iov, *iov2;
1421     unsigned int iov_cnt;
1422 
1423     for (;;) {
1424         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1425         if (!elem) {
1426             break;
1427         }
1428         if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1429             iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1430             virtio_error(vdev, "virtio-net ctrl missing headers");
1431             virtqueue_detach_element(vq, elem, 0);
1432             g_free(elem);
1433             break;
1434         }
1435 
1436         iov_cnt = elem->out_num;
1437         iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1438         s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1439         iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1440         if (s != sizeof(ctrl)) {
1441             status = VIRTIO_NET_ERR;
1442         } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1443             status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1444         } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1445             status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1446         } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1447             status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1448         } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1449             status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1450         } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1451             status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1452         } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1453             status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1454         }
1455 
1456         s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1457         assert(s == sizeof(status));
1458 
1459         virtqueue_push(vq, elem, sizeof(status));
1460         virtio_notify(vdev, vq);
1461         g_free(iov2);
1462         g_free(elem);
1463     }
1464 }
1465 
1466 /* RX */
1467 
1468 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1469 {
1470     VirtIONet *n = VIRTIO_NET(vdev);
1471     int queue_index = vq2q(virtio_get_queue_index(vq));
1472 
1473     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1474 }
1475 
1476 static bool virtio_net_can_receive(NetClientState *nc)
1477 {
1478     VirtIONet *n = qemu_get_nic_opaque(nc);
1479     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1480     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1481 
1482     if (!vdev->vm_running) {
1483         return false;
1484     }
1485 
1486     if (nc->queue_index >= n->curr_queues) {
1487         return false;
1488     }
1489 
1490     if (!virtio_queue_ready(q->rx_vq) ||
1491         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1492         return false;
1493     }
1494 
1495     return true;
1496 }
1497 
1498 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1499 {
1500     VirtIONet *n = q->n;
1501     if (virtio_queue_empty(q->rx_vq) ||
1502         (n->mergeable_rx_bufs &&
1503          !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1504         virtio_queue_set_notification(q->rx_vq, 1);
1505 
1506         /* To avoid a race condition where the guest has made some buffers
1507          * available after the above check but before notification was
1508          * enabled, check for available buffers again.
1509          */
1510         if (virtio_queue_empty(q->rx_vq) ||
1511             (n->mergeable_rx_bufs &&
1512              !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1513             return 0;
1514         }
1515     }
1516 
1517     virtio_queue_set_notification(q->rx_vq, 0);
1518     return 1;
1519 }
1520 
1521 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1522 {
1523     virtio_tswap16s(vdev, &hdr->hdr_len);
1524     virtio_tswap16s(vdev, &hdr->gso_size);
1525     virtio_tswap16s(vdev, &hdr->csum_start);
1526     virtio_tswap16s(vdev, &hdr->csum_offset);
1527 }
1528 
1529 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1530  * it never finds out that the packets don't have valid checksums.  This
1531  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1532  * fix this with Xen but it hasn't appeared in an upstream release of
1533  * dhclient yet.
1534  *
1535  * To avoid breaking existing guests, we catch udp packets and add
1536  * checksums.  This is terrible but it's better than hacking the guest
1537  * kernels.
1538  *
1539  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1540  * we should provide a mechanism to disable it to avoid polluting the host
1541  * cache.
1542  */
1543 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1544                                         uint8_t *buf, size_t size)
1545 {
1546     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1547         (size > 27 && size < 1500) && /* normal sized MTU */
1548         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1549         (buf[23] == 17) && /* ip.protocol == UDP */
1550         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1551         net_checksum_calculate(buf, size, CSUM_UDP);
1552         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1553     }
1554 }
1555 
1556 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1557                            const void *buf, size_t size)
1558 {
1559     if (n->has_vnet_hdr) {
1560         /* FIXME this cast is evil */
1561         void *wbuf = (void *)buf;
1562         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1563                                     size - n->host_hdr_len);
1564 
1565         if (n->needs_vnet_hdr_swap) {
1566             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1567         }
1568         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1569     } else {
1570         struct virtio_net_hdr hdr = {
1571             .flags = 0,
1572             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1573         };
1574         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1575     }
1576 }
1577 
1578 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1579 {
1580     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1581     static const uint8_t vlan[] = {0x81, 0x00};
1582     uint8_t *ptr = (uint8_t *)buf;
1583     int i;
1584 
1585     if (n->promisc)
1586         return 1;
1587 
1588     ptr += n->host_hdr_len;
1589 
1590     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1591         int vid = lduw_be_p(ptr + 14) & 0xfff;
1592         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1593             return 0;
1594     }
1595 
1596     if (ptr[0] & 1) { // multicast
1597         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1598             return !n->nobcast;
1599         } else if (n->nomulti) {
1600             return 0;
1601         } else if (n->allmulti || n->mac_table.multi_overflow) {
1602             return 1;
1603         }
1604 
1605         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1606             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1607                 return 1;
1608             }
1609         }
1610     } else { // unicast
1611         if (n->nouni) {
1612             return 0;
1613         } else if (n->alluni || n->mac_table.uni_overflow) {
1614             return 1;
1615         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1616             return 1;
1617         }
1618 
1619         for (i = 0; i < n->mac_table.first_multi; i++) {
1620             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1621                 return 1;
1622             }
1623         }
1624     }
1625 
1626     return 0;
1627 }
1628 
1629 static uint8_t virtio_net_get_hash_type(bool isip4,
1630                                         bool isip6,
1631                                         bool isudp,
1632                                         bool istcp,
1633                                         uint32_t types)
1634 {
1635     if (isip4) {
1636         if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1637             return NetPktRssIpV4Tcp;
1638         }
1639         if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1640             return NetPktRssIpV4Udp;
1641         }
1642         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1643             return NetPktRssIpV4;
1644         }
1645     } else if (isip6) {
1646         uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1647                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1648 
1649         if (istcp && (types & mask)) {
1650             return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1651                 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1652         }
1653         mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1654         if (isudp && (types & mask)) {
1655             return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1656                 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1657         }
1658         mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1659         if (types & mask) {
1660             return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1661                 NetPktRssIpV6Ex : NetPktRssIpV6;
1662         }
1663     }
1664     return 0xff;
1665 }
1666 
1667 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1668                                    uint32_t hash)
1669 {
1670     struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1671     hdr->hash_value = hash;
1672     hdr->hash_report = report;
1673 }
1674 
1675 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1676                                   size_t size)
1677 {
1678     VirtIONet *n = qemu_get_nic_opaque(nc);
1679     unsigned int index = nc->queue_index, new_index = index;
1680     struct NetRxPkt *pkt = n->rx_pkt;
1681     uint8_t net_hash_type;
1682     uint32_t hash;
1683     bool isip4, isip6, isudp, istcp;
1684     static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1685         VIRTIO_NET_HASH_REPORT_IPv4,
1686         VIRTIO_NET_HASH_REPORT_TCPv4,
1687         VIRTIO_NET_HASH_REPORT_TCPv6,
1688         VIRTIO_NET_HASH_REPORT_IPv6,
1689         VIRTIO_NET_HASH_REPORT_IPv6_EX,
1690         VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1691         VIRTIO_NET_HASH_REPORT_UDPv4,
1692         VIRTIO_NET_HASH_REPORT_UDPv6,
1693         VIRTIO_NET_HASH_REPORT_UDPv6_EX
1694     };
1695 
1696     net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1697                              size - n->host_hdr_len);
1698     net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1699     if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1700         istcp = isudp = false;
1701     }
1702     if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1703         istcp = isudp = false;
1704     }
1705     net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1706                                              n->rss_data.hash_types);
1707     if (net_hash_type > NetPktRssIpV6UdpEx) {
1708         if (n->rss_data.populate_hash) {
1709             virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1710         }
1711         return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1712     }
1713 
1714     hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1715 
1716     if (n->rss_data.populate_hash) {
1717         virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1718     }
1719 
1720     if (n->rss_data.redirect) {
1721         new_index = hash & (n->rss_data.indirections_len - 1);
1722         new_index = n->rss_data.indirections_table[new_index];
1723     }
1724 
1725     return (index == new_index) ? -1 : new_index;
1726 }
1727 
1728 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1729                                       size_t size, bool no_rss)
1730 {
1731     VirtIONet *n = qemu_get_nic_opaque(nc);
1732     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1733     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1734     VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1735     size_t lens[VIRTQUEUE_MAX_SIZE];
1736     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1737     struct virtio_net_hdr_mrg_rxbuf mhdr;
1738     unsigned mhdr_cnt = 0;
1739     size_t offset, i, guest_offset, j;
1740     ssize_t err;
1741 
1742     if (!virtio_net_can_receive(nc)) {
1743         return -1;
1744     }
1745 
1746     if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1747         int index = virtio_net_process_rss(nc, buf, size);
1748         if (index >= 0) {
1749             NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1750             return virtio_net_receive_rcu(nc2, buf, size, true);
1751         }
1752     }
1753 
1754     /* hdr_len refers to the header we supply to the guest */
1755     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1756         return 0;
1757     }
1758 
1759     if (!receive_filter(n, buf, size))
1760         return size;
1761 
1762     offset = i = 0;
1763 
1764     while (offset < size) {
1765         VirtQueueElement *elem;
1766         int len, total;
1767         const struct iovec *sg;
1768 
1769         total = 0;
1770 
1771         if (i == VIRTQUEUE_MAX_SIZE) {
1772             virtio_error(vdev, "virtio-net unexpected long buffer chain");
1773             err = size;
1774             goto err;
1775         }
1776 
1777         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1778         if (!elem) {
1779             if (i) {
1780                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1781                              "i %zd mergeable %d offset %zd, size %zd, "
1782                              "guest hdr len %zd, host hdr len %zd "
1783                              "guest features 0x%" PRIx64,
1784                              i, n->mergeable_rx_bufs, offset, size,
1785                              n->guest_hdr_len, n->host_hdr_len,
1786                              vdev->guest_features);
1787             }
1788             err = -1;
1789             goto err;
1790         }
1791 
1792         if (elem->in_num < 1) {
1793             virtio_error(vdev,
1794                          "virtio-net receive queue contains no in buffers");
1795             virtqueue_detach_element(q->rx_vq, elem, 0);
1796             g_free(elem);
1797             err = -1;
1798             goto err;
1799         }
1800 
1801         sg = elem->in_sg;
1802         if (i == 0) {
1803             assert(offset == 0);
1804             if (n->mergeable_rx_bufs) {
1805                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1806                                     sg, elem->in_num,
1807                                     offsetof(typeof(mhdr), num_buffers),
1808                                     sizeof(mhdr.num_buffers));
1809             }
1810 
1811             receive_header(n, sg, elem->in_num, buf, size);
1812             if (n->rss_data.populate_hash) {
1813                 offset = sizeof(mhdr);
1814                 iov_from_buf(sg, elem->in_num, offset,
1815                              buf + offset, n->host_hdr_len - sizeof(mhdr));
1816             }
1817             offset = n->host_hdr_len;
1818             total += n->guest_hdr_len;
1819             guest_offset = n->guest_hdr_len;
1820         } else {
1821             guest_offset = 0;
1822         }
1823 
1824         /* copy in packet.  ugh */
1825         len = iov_from_buf(sg, elem->in_num, guest_offset,
1826                            buf + offset, size - offset);
1827         total += len;
1828         offset += len;
1829         /* If buffers can't be merged, at this point we
1830          * must have consumed the complete packet.
1831          * Otherwise, drop it. */
1832         if (!n->mergeable_rx_bufs && offset < size) {
1833             virtqueue_unpop(q->rx_vq, elem, total);
1834             g_free(elem);
1835             err = size;
1836             goto err;
1837         }
1838 
1839         elems[i] = elem;
1840         lens[i] = total;
1841         i++;
1842     }
1843 
1844     if (mhdr_cnt) {
1845         virtio_stw_p(vdev, &mhdr.num_buffers, i);
1846         iov_from_buf(mhdr_sg, mhdr_cnt,
1847                      0,
1848                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
1849     }
1850 
1851     for (j = 0; j < i; j++) {
1852         /* signal other side */
1853         virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
1854         g_free(elems[j]);
1855     }
1856 
1857     virtqueue_flush(q->rx_vq, i);
1858     virtio_notify(vdev, q->rx_vq);
1859 
1860     return size;
1861 
1862 err:
1863     for (j = 0; j < i; j++) {
1864         g_free(elems[j]);
1865     }
1866 
1867     return err;
1868 }
1869 
1870 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1871                                   size_t size)
1872 {
1873     RCU_READ_LOCK_GUARD();
1874 
1875     return virtio_net_receive_rcu(nc, buf, size, false);
1876 }
1877 
1878 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1879                                          const uint8_t *buf,
1880                                          VirtioNetRscUnit *unit)
1881 {
1882     uint16_t ip_hdrlen;
1883     struct ip_header *ip;
1884 
1885     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1886                               + sizeof(struct eth_header));
1887     unit->ip = (void *)ip;
1888     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1889     unit->ip_plen = &ip->ip_len;
1890     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1891     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1892     unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1893 }
1894 
1895 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1896                                          const uint8_t *buf,
1897                                          VirtioNetRscUnit *unit)
1898 {
1899     struct ip6_header *ip6;
1900 
1901     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1902                                  + sizeof(struct eth_header));
1903     unit->ip = ip6;
1904     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1905     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1906                                         + sizeof(struct ip6_header));
1907     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1908 
1909     /* There is a difference between payload lenght in ipv4 and v6,
1910        ip header is excluded in ipv6 */
1911     unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1912 }
1913 
1914 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1915                                        VirtioNetRscSeg *seg)
1916 {
1917     int ret;
1918     struct virtio_net_hdr_v1 *h;
1919 
1920     h = (struct virtio_net_hdr_v1 *)seg->buf;
1921     h->flags = 0;
1922     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1923 
1924     if (seg->is_coalesced) {
1925         h->rsc.segments = seg->packets;
1926         h->rsc.dup_acks = seg->dup_ack;
1927         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1928         if (chain->proto == ETH_P_IP) {
1929             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1930         } else {
1931             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1932         }
1933     }
1934 
1935     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1936     QTAILQ_REMOVE(&chain->buffers, seg, next);
1937     g_free(seg->buf);
1938     g_free(seg);
1939 
1940     return ret;
1941 }
1942 
1943 static void virtio_net_rsc_purge(void *opq)
1944 {
1945     VirtioNetRscSeg *seg, *rn;
1946     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1947 
1948     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1949         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1950             chain->stat.purge_failed++;
1951             continue;
1952         }
1953     }
1954 
1955     chain->stat.timer++;
1956     if (!QTAILQ_EMPTY(&chain->buffers)) {
1957         timer_mod(chain->drain_timer,
1958               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1959     }
1960 }
1961 
1962 static void virtio_net_rsc_cleanup(VirtIONet *n)
1963 {
1964     VirtioNetRscChain *chain, *rn_chain;
1965     VirtioNetRscSeg *seg, *rn_seg;
1966 
1967     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1968         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1969             QTAILQ_REMOVE(&chain->buffers, seg, next);
1970             g_free(seg->buf);
1971             g_free(seg);
1972         }
1973 
1974         timer_free(chain->drain_timer);
1975         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1976         g_free(chain);
1977     }
1978 }
1979 
1980 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1981                                      NetClientState *nc,
1982                                      const uint8_t *buf, size_t size)
1983 {
1984     uint16_t hdr_len;
1985     VirtioNetRscSeg *seg;
1986 
1987     hdr_len = chain->n->guest_hdr_len;
1988     seg = g_malloc(sizeof(VirtioNetRscSeg));
1989     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1990         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1991     memcpy(seg->buf, buf, size);
1992     seg->size = size;
1993     seg->packets = 1;
1994     seg->dup_ack = 0;
1995     seg->is_coalesced = 0;
1996     seg->nc = nc;
1997 
1998     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1999     chain->stat.cache++;
2000 
2001     switch (chain->proto) {
2002     case ETH_P_IP:
2003         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2004         break;
2005     case ETH_P_IPV6:
2006         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2007         break;
2008     default:
2009         g_assert_not_reached();
2010     }
2011 }
2012 
2013 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2014                                          VirtioNetRscSeg *seg,
2015                                          const uint8_t *buf,
2016                                          struct tcp_header *n_tcp,
2017                                          struct tcp_header *o_tcp)
2018 {
2019     uint32_t nack, oack;
2020     uint16_t nwin, owin;
2021 
2022     nack = htonl(n_tcp->th_ack);
2023     nwin = htons(n_tcp->th_win);
2024     oack = htonl(o_tcp->th_ack);
2025     owin = htons(o_tcp->th_win);
2026 
2027     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2028         chain->stat.ack_out_of_win++;
2029         return RSC_FINAL;
2030     } else if (nack == oack) {
2031         /* duplicated ack or window probe */
2032         if (nwin == owin) {
2033             /* duplicated ack, add dup ack count due to whql test up to 1 */
2034             chain->stat.dup_ack++;
2035             return RSC_FINAL;
2036         } else {
2037             /* Coalesce window update */
2038             o_tcp->th_win = n_tcp->th_win;
2039             chain->stat.win_update++;
2040             return RSC_COALESCE;
2041         }
2042     } else {
2043         /* pure ack, go to 'C', finalize*/
2044         chain->stat.pure_ack++;
2045         return RSC_FINAL;
2046     }
2047 }
2048 
2049 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2050                                             VirtioNetRscSeg *seg,
2051                                             const uint8_t *buf,
2052                                             VirtioNetRscUnit *n_unit)
2053 {
2054     void *data;
2055     uint16_t o_ip_len;
2056     uint32_t nseq, oseq;
2057     VirtioNetRscUnit *o_unit;
2058 
2059     o_unit = &seg->unit;
2060     o_ip_len = htons(*o_unit->ip_plen);
2061     nseq = htonl(n_unit->tcp->th_seq);
2062     oseq = htonl(o_unit->tcp->th_seq);
2063 
2064     /* out of order or retransmitted. */
2065     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2066         chain->stat.data_out_of_win++;
2067         return RSC_FINAL;
2068     }
2069 
2070     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2071     if (nseq == oseq) {
2072         if ((o_unit->payload == 0) && n_unit->payload) {
2073             /* From no payload to payload, normal case, not a dup ack or etc */
2074             chain->stat.data_after_pure_ack++;
2075             goto coalesce;
2076         } else {
2077             return virtio_net_rsc_handle_ack(chain, seg, buf,
2078                                              n_unit->tcp, o_unit->tcp);
2079         }
2080     } else if ((nseq - oseq) != o_unit->payload) {
2081         /* Not a consistent packet, out of order */
2082         chain->stat.data_out_of_order++;
2083         return RSC_FINAL;
2084     } else {
2085 coalesce:
2086         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2087             chain->stat.over_size++;
2088             return RSC_FINAL;
2089         }
2090 
2091         /* Here comes the right data, the payload length in v4/v6 is different,
2092            so use the field value to update and record the new data len */
2093         o_unit->payload += n_unit->payload; /* update new data len */
2094 
2095         /* update field in ip header */
2096         *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2097 
2098         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2099            for windows guest, while this may change the behavior for linux
2100            guest (only if it uses RSC feature). */
2101         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2102 
2103         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2104         o_unit->tcp->th_win = n_unit->tcp->th_win;
2105 
2106         memmove(seg->buf + seg->size, data, n_unit->payload);
2107         seg->size += n_unit->payload;
2108         seg->packets++;
2109         chain->stat.coalesced++;
2110         return RSC_COALESCE;
2111     }
2112 }
2113 
2114 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2115                                         VirtioNetRscSeg *seg,
2116                                         const uint8_t *buf, size_t size,
2117                                         VirtioNetRscUnit *unit)
2118 {
2119     struct ip_header *ip1, *ip2;
2120 
2121     ip1 = (struct ip_header *)(unit->ip);
2122     ip2 = (struct ip_header *)(seg->unit.ip);
2123     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2124         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2125         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2126         chain->stat.no_match++;
2127         return RSC_NO_MATCH;
2128     }
2129 
2130     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2131 }
2132 
2133 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2134                                         VirtioNetRscSeg *seg,
2135                                         const uint8_t *buf, size_t size,
2136                                         VirtioNetRscUnit *unit)
2137 {
2138     struct ip6_header *ip1, *ip2;
2139 
2140     ip1 = (struct ip6_header *)(unit->ip);
2141     ip2 = (struct ip6_header *)(seg->unit.ip);
2142     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2143         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2144         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2145         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2146             chain->stat.no_match++;
2147             return RSC_NO_MATCH;
2148     }
2149 
2150     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2151 }
2152 
2153 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2154  * to prevent out of order */
2155 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2156                                          struct tcp_header *tcp)
2157 {
2158     uint16_t tcp_hdr;
2159     uint16_t tcp_flag;
2160 
2161     tcp_flag = htons(tcp->th_offset_flags);
2162     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2163     tcp_flag &= VIRTIO_NET_TCP_FLAG;
2164     if (tcp_flag & TH_SYN) {
2165         chain->stat.tcp_syn++;
2166         return RSC_BYPASS;
2167     }
2168 
2169     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2170         chain->stat.tcp_ctrl_drain++;
2171         return RSC_FINAL;
2172     }
2173 
2174     if (tcp_hdr > sizeof(struct tcp_header)) {
2175         chain->stat.tcp_all_opt++;
2176         return RSC_FINAL;
2177     }
2178 
2179     return RSC_CANDIDATE;
2180 }
2181 
2182 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2183                                          NetClientState *nc,
2184                                          const uint8_t *buf, size_t size,
2185                                          VirtioNetRscUnit *unit)
2186 {
2187     int ret;
2188     VirtioNetRscSeg *seg, *nseg;
2189 
2190     if (QTAILQ_EMPTY(&chain->buffers)) {
2191         chain->stat.empty_cache++;
2192         virtio_net_rsc_cache_buf(chain, nc, buf, size);
2193         timer_mod(chain->drain_timer,
2194               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2195         return size;
2196     }
2197 
2198     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2199         if (chain->proto == ETH_P_IP) {
2200             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2201         } else {
2202             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2203         }
2204 
2205         if (ret == RSC_FINAL) {
2206             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2207                 /* Send failed */
2208                 chain->stat.final_failed++;
2209                 return 0;
2210             }
2211 
2212             /* Send current packet */
2213             return virtio_net_do_receive(nc, buf, size);
2214         } else if (ret == RSC_NO_MATCH) {
2215             continue;
2216         } else {
2217             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2218             seg->is_coalesced = 1;
2219             return size;
2220         }
2221     }
2222 
2223     chain->stat.no_match_cache++;
2224     virtio_net_rsc_cache_buf(chain, nc, buf, size);
2225     return size;
2226 }
2227 
2228 /* Drain a connection data, this is to avoid out of order segments */
2229 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2230                                         NetClientState *nc,
2231                                         const uint8_t *buf, size_t size,
2232                                         uint16_t ip_start, uint16_t ip_size,
2233                                         uint16_t tcp_port)
2234 {
2235     VirtioNetRscSeg *seg, *nseg;
2236     uint32_t ppair1, ppair2;
2237 
2238     ppair1 = *(uint32_t *)(buf + tcp_port);
2239     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2240         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2241         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2242             || (ppair1 != ppair2)) {
2243             continue;
2244         }
2245         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2246             chain->stat.drain_failed++;
2247         }
2248 
2249         break;
2250     }
2251 
2252     return virtio_net_do_receive(nc, buf, size);
2253 }
2254 
2255 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2256                                             struct ip_header *ip,
2257                                             const uint8_t *buf, size_t size)
2258 {
2259     uint16_t ip_len;
2260 
2261     /* Not an ipv4 packet */
2262     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2263         chain->stat.ip_option++;
2264         return RSC_BYPASS;
2265     }
2266 
2267     /* Don't handle packets with ip option */
2268     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2269         chain->stat.ip_option++;
2270         return RSC_BYPASS;
2271     }
2272 
2273     if (ip->ip_p != IPPROTO_TCP) {
2274         chain->stat.bypass_not_tcp++;
2275         return RSC_BYPASS;
2276     }
2277 
2278     /* Don't handle packets with ip fragment */
2279     if (!(htons(ip->ip_off) & IP_DF)) {
2280         chain->stat.ip_frag++;
2281         return RSC_BYPASS;
2282     }
2283 
2284     /* Don't handle packets with ecn flag */
2285     if (IPTOS_ECN(ip->ip_tos)) {
2286         chain->stat.ip_ecn++;
2287         return RSC_BYPASS;
2288     }
2289 
2290     ip_len = htons(ip->ip_len);
2291     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2292         || ip_len > (size - chain->n->guest_hdr_len -
2293                      sizeof(struct eth_header))) {
2294         chain->stat.ip_hacked++;
2295         return RSC_BYPASS;
2296     }
2297 
2298     return RSC_CANDIDATE;
2299 }
2300 
2301 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2302                                       NetClientState *nc,
2303                                       const uint8_t *buf, size_t size)
2304 {
2305     int32_t ret;
2306     uint16_t hdr_len;
2307     VirtioNetRscUnit unit;
2308 
2309     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2310 
2311     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2312         + sizeof(struct tcp_header))) {
2313         chain->stat.bypass_not_tcp++;
2314         return virtio_net_do_receive(nc, buf, size);
2315     }
2316 
2317     virtio_net_rsc_extract_unit4(chain, buf, &unit);
2318     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2319         != RSC_CANDIDATE) {
2320         return virtio_net_do_receive(nc, buf, size);
2321     }
2322 
2323     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2324     if (ret == RSC_BYPASS) {
2325         return virtio_net_do_receive(nc, buf, size);
2326     } else if (ret == RSC_FINAL) {
2327         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2328                 ((hdr_len + sizeof(struct eth_header)) + 12),
2329                 VIRTIO_NET_IP4_ADDR_SIZE,
2330                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2331     }
2332 
2333     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2334 }
2335 
2336 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2337                                             struct ip6_header *ip6,
2338                                             const uint8_t *buf, size_t size)
2339 {
2340     uint16_t ip_len;
2341 
2342     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2343         != IP_HEADER_VERSION_6) {
2344         return RSC_BYPASS;
2345     }
2346 
2347     /* Both option and protocol is checked in this */
2348     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2349         chain->stat.bypass_not_tcp++;
2350         return RSC_BYPASS;
2351     }
2352 
2353     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2354     if (ip_len < sizeof(struct tcp_header) ||
2355         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2356                   - sizeof(struct ip6_header))) {
2357         chain->stat.ip_hacked++;
2358         return RSC_BYPASS;
2359     }
2360 
2361     /* Don't handle packets with ecn flag */
2362     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2363         chain->stat.ip_ecn++;
2364         return RSC_BYPASS;
2365     }
2366 
2367     return RSC_CANDIDATE;
2368 }
2369 
2370 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2371                                       const uint8_t *buf, size_t size)
2372 {
2373     int32_t ret;
2374     uint16_t hdr_len;
2375     VirtioNetRscChain *chain;
2376     VirtioNetRscUnit unit;
2377 
2378     chain = (VirtioNetRscChain *)opq;
2379     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2380 
2381     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2382         + sizeof(tcp_header))) {
2383         return virtio_net_do_receive(nc, buf, size);
2384     }
2385 
2386     virtio_net_rsc_extract_unit6(chain, buf, &unit);
2387     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2388                                                  unit.ip, buf, size)) {
2389         return virtio_net_do_receive(nc, buf, size);
2390     }
2391 
2392     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2393     if (ret == RSC_BYPASS) {
2394         return virtio_net_do_receive(nc, buf, size);
2395     } else if (ret == RSC_FINAL) {
2396         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2397                 ((hdr_len + sizeof(struct eth_header)) + 8),
2398                 VIRTIO_NET_IP6_ADDR_SIZE,
2399                 hdr_len + sizeof(struct eth_header)
2400                 + sizeof(struct ip6_header));
2401     }
2402 
2403     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2404 }
2405 
2406 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2407                                                       NetClientState *nc,
2408                                                       uint16_t proto)
2409 {
2410     VirtioNetRscChain *chain;
2411 
2412     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2413         return NULL;
2414     }
2415 
2416     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2417         if (chain->proto == proto) {
2418             return chain;
2419         }
2420     }
2421 
2422     chain = g_malloc(sizeof(*chain));
2423     chain->n = n;
2424     chain->proto = proto;
2425     if (proto == (uint16_t)ETH_P_IP) {
2426         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2427         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2428     } else {
2429         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2430         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2431     }
2432     chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2433                                       virtio_net_rsc_purge, chain);
2434     memset(&chain->stat, 0, sizeof(chain->stat));
2435 
2436     QTAILQ_INIT(&chain->buffers);
2437     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2438 
2439     return chain;
2440 }
2441 
2442 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2443                                       const uint8_t *buf,
2444                                       size_t size)
2445 {
2446     uint16_t proto;
2447     VirtioNetRscChain *chain;
2448     struct eth_header *eth;
2449     VirtIONet *n;
2450 
2451     n = qemu_get_nic_opaque(nc);
2452     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2453         return virtio_net_do_receive(nc, buf, size);
2454     }
2455 
2456     eth = (struct eth_header *)(buf + n->guest_hdr_len);
2457     proto = htons(eth->h_proto);
2458 
2459     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2460     if (chain) {
2461         chain->stat.received++;
2462         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2463             return virtio_net_rsc_receive4(chain, nc, buf, size);
2464         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2465             return virtio_net_rsc_receive6(chain, nc, buf, size);
2466         }
2467     }
2468     return virtio_net_do_receive(nc, buf, size);
2469 }
2470 
2471 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2472                                   size_t size)
2473 {
2474     VirtIONet *n = qemu_get_nic_opaque(nc);
2475     if ((n->rsc4_enabled || n->rsc6_enabled)) {
2476         return virtio_net_rsc_receive(nc, buf, size);
2477     } else {
2478         return virtio_net_do_receive(nc, buf, size);
2479     }
2480 }
2481 
2482 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2483 
2484 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2485 {
2486     VirtIONet *n = qemu_get_nic_opaque(nc);
2487     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2488     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2489 
2490     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2491     virtio_notify(vdev, q->tx_vq);
2492 
2493     g_free(q->async_tx.elem);
2494     q->async_tx.elem = NULL;
2495 
2496     virtio_queue_set_notification(q->tx_vq, 1);
2497     virtio_net_flush_tx(q);
2498 }
2499 
2500 /* TX */
2501 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2502 {
2503     VirtIONet *n = q->n;
2504     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2505     VirtQueueElement *elem;
2506     int32_t num_packets = 0;
2507     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2508     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2509         return num_packets;
2510     }
2511 
2512     if (q->async_tx.elem) {
2513         virtio_queue_set_notification(q->tx_vq, 0);
2514         return num_packets;
2515     }
2516 
2517     for (;;) {
2518         ssize_t ret;
2519         unsigned int out_num;
2520         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2521         struct virtio_net_hdr_mrg_rxbuf mhdr;
2522 
2523         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2524         if (!elem) {
2525             break;
2526         }
2527 
2528         out_num = elem->out_num;
2529         out_sg = elem->out_sg;
2530         if (out_num < 1) {
2531             virtio_error(vdev, "virtio-net header not in first element");
2532             virtqueue_detach_element(q->tx_vq, elem, 0);
2533             g_free(elem);
2534             return -EINVAL;
2535         }
2536 
2537         if (n->has_vnet_hdr) {
2538             if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2539                 n->guest_hdr_len) {
2540                 virtio_error(vdev, "virtio-net header incorrect");
2541                 virtqueue_detach_element(q->tx_vq, elem, 0);
2542                 g_free(elem);
2543                 return -EINVAL;
2544             }
2545             if (n->needs_vnet_hdr_swap) {
2546                 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2547                 sg2[0].iov_base = &mhdr;
2548                 sg2[0].iov_len = n->guest_hdr_len;
2549                 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2550                                    out_sg, out_num,
2551                                    n->guest_hdr_len, -1);
2552                 if (out_num == VIRTQUEUE_MAX_SIZE) {
2553                     goto drop;
2554                 }
2555                 out_num += 1;
2556                 out_sg = sg2;
2557             }
2558         }
2559         /*
2560          * If host wants to see the guest header as is, we can
2561          * pass it on unchanged. Otherwise, copy just the parts
2562          * that host is interested in.
2563          */
2564         assert(n->host_hdr_len <= n->guest_hdr_len);
2565         if (n->host_hdr_len != n->guest_hdr_len) {
2566             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2567                                        out_sg, out_num,
2568                                        0, n->host_hdr_len);
2569             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2570                              out_sg, out_num,
2571                              n->guest_hdr_len, -1);
2572             out_num = sg_num;
2573             out_sg = sg;
2574         }
2575 
2576         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2577                                       out_sg, out_num, virtio_net_tx_complete);
2578         if (ret == 0) {
2579             virtio_queue_set_notification(q->tx_vq, 0);
2580             q->async_tx.elem = elem;
2581             return -EBUSY;
2582         }
2583 
2584 drop:
2585         virtqueue_push(q->tx_vq, elem, 0);
2586         virtio_notify(vdev, q->tx_vq);
2587         g_free(elem);
2588 
2589         if (++num_packets >= n->tx_burst) {
2590             break;
2591         }
2592     }
2593     return num_packets;
2594 }
2595 
2596 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2597 {
2598     VirtIONet *n = VIRTIO_NET(vdev);
2599     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2600 
2601     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2602         virtio_net_drop_tx_queue_data(vdev, vq);
2603         return;
2604     }
2605 
2606     /* This happens when device was stopped but VCPU wasn't. */
2607     if (!vdev->vm_running) {
2608         q->tx_waiting = 1;
2609         return;
2610     }
2611 
2612     if (q->tx_waiting) {
2613         virtio_queue_set_notification(vq, 1);
2614         timer_del(q->tx_timer);
2615         q->tx_waiting = 0;
2616         if (virtio_net_flush_tx(q) == -EINVAL) {
2617             return;
2618         }
2619     } else {
2620         timer_mod(q->tx_timer,
2621                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2622         q->tx_waiting = 1;
2623         virtio_queue_set_notification(vq, 0);
2624     }
2625 }
2626 
2627 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2628 {
2629     VirtIONet *n = VIRTIO_NET(vdev);
2630     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2631 
2632     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2633         virtio_net_drop_tx_queue_data(vdev, vq);
2634         return;
2635     }
2636 
2637     if (unlikely(q->tx_waiting)) {
2638         return;
2639     }
2640     q->tx_waiting = 1;
2641     /* This happens when device was stopped but VCPU wasn't. */
2642     if (!vdev->vm_running) {
2643         return;
2644     }
2645     virtio_queue_set_notification(vq, 0);
2646     qemu_bh_schedule(q->tx_bh);
2647 }
2648 
2649 static void virtio_net_tx_timer(void *opaque)
2650 {
2651     VirtIONetQueue *q = opaque;
2652     VirtIONet *n = q->n;
2653     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2654     /* This happens when device was stopped but BH wasn't. */
2655     if (!vdev->vm_running) {
2656         /* Make sure tx waiting is set, so we'll run when restarted. */
2657         assert(q->tx_waiting);
2658         return;
2659     }
2660 
2661     q->tx_waiting = 0;
2662 
2663     /* Just in case the driver is not ready on more */
2664     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2665         return;
2666     }
2667 
2668     virtio_queue_set_notification(q->tx_vq, 1);
2669     virtio_net_flush_tx(q);
2670 }
2671 
2672 static void virtio_net_tx_bh(void *opaque)
2673 {
2674     VirtIONetQueue *q = opaque;
2675     VirtIONet *n = q->n;
2676     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2677     int32_t ret;
2678 
2679     /* This happens when device was stopped but BH wasn't. */
2680     if (!vdev->vm_running) {
2681         /* Make sure tx waiting is set, so we'll run when restarted. */
2682         assert(q->tx_waiting);
2683         return;
2684     }
2685 
2686     q->tx_waiting = 0;
2687 
2688     /* Just in case the driver is not ready on more */
2689     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2690         return;
2691     }
2692 
2693     ret = virtio_net_flush_tx(q);
2694     if (ret == -EBUSY || ret == -EINVAL) {
2695         return; /* Notification re-enable handled by tx_complete or device
2696                  * broken */
2697     }
2698 
2699     /* If we flush a full burst of packets, assume there are
2700      * more coming and immediately reschedule */
2701     if (ret >= n->tx_burst) {
2702         qemu_bh_schedule(q->tx_bh);
2703         q->tx_waiting = 1;
2704         return;
2705     }
2706 
2707     /* If less than a full burst, re-enable notification and flush
2708      * anything that may have come in while we weren't looking.  If
2709      * we find something, assume the guest is still active and reschedule */
2710     virtio_queue_set_notification(q->tx_vq, 1);
2711     ret = virtio_net_flush_tx(q);
2712     if (ret == -EINVAL) {
2713         return;
2714     } else if (ret > 0) {
2715         virtio_queue_set_notification(q->tx_vq, 0);
2716         qemu_bh_schedule(q->tx_bh);
2717         q->tx_waiting = 1;
2718     }
2719 }
2720 
2721 static void virtio_net_add_queue(VirtIONet *n, int index)
2722 {
2723     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2724 
2725     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2726                                            virtio_net_handle_rx);
2727 
2728     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2729         n->vqs[index].tx_vq =
2730             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2731                              virtio_net_handle_tx_timer);
2732         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2733                                               virtio_net_tx_timer,
2734                                               &n->vqs[index]);
2735     } else {
2736         n->vqs[index].tx_vq =
2737             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2738                              virtio_net_handle_tx_bh);
2739         n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2740     }
2741 
2742     n->vqs[index].tx_waiting = 0;
2743     n->vqs[index].n = n;
2744 }
2745 
2746 static void virtio_net_del_queue(VirtIONet *n, int index)
2747 {
2748     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2749     VirtIONetQueue *q = &n->vqs[index];
2750     NetClientState *nc = qemu_get_subqueue(n->nic, index);
2751 
2752     qemu_purge_queued_packets(nc);
2753 
2754     virtio_del_queue(vdev, index * 2);
2755     if (q->tx_timer) {
2756         timer_free(q->tx_timer);
2757         q->tx_timer = NULL;
2758     } else {
2759         qemu_bh_delete(q->tx_bh);
2760         q->tx_bh = NULL;
2761     }
2762     q->tx_waiting = 0;
2763     virtio_del_queue(vdev, index * 2 + 1);
2764 }
2765 
2766 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2767 {
2768     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2769     int old_num_queues = virtio_get_num_queues(vdev);
2770     int new_num_queues = new_max_queues * 2 + 1;
2771     int i;
2772 
2773     assert(old_num_queues >= 3);
2774     assert(old_num_queues % 2 == 1);
2775 
2776     if (old_num_queues == new_num_queues) {
2777         return;
2778     }
2779 
2780     /*
2781      * We always need to remove and add ctrl vq if
2782      * old_num_queues != new_num_queues. Remove ctrl_vq first,
2783      * and then we only enter one of the following two loops.
2784      */
2785     virtio_del_queue(vdev, old_num_queues - 1);
2786 
2787     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2788         /* new_num_queues < old_num_queues */
2789         virtio_net_del_queue(n, i / 2);
2790     }
2791 
2792     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2793         /* new_num_queues > old_num_queues */
2794         virtio_net_add_queue(n, i / 2);
2795     }
2796 
2797     /* add ctrl_vq last */
2798     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2799 }
2800 
2801 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2802 {
2803     int max = multiqueue ? n->max_queues : 1;
2804 
2805     n->multiqueue = multiqueue;
2806     virtio_net_change_num_queues(n, max);
2807 
2808     virtio_net_set_queues(n);
2809 }
2810 
2811 static int virtio_net_post_load_device(void *opaque, int version_id)
2812 {
2813     VirtIONet *n = opaque;
2814     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2815     int i, link_down;
2816 
2817     trace_virtio_net_post_load_device();
2818     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2819                                virtio_vdev_has_feature(vdev,
2820                                                        VIRTIO_F_VERSION_1),
2821                                virtio_vdev_has_feature(vdev,
2822                                                        VIRTIO_NET_F_HASH_REPORT));
2823 
2824     /* MAC_TABLE_ENTRIES may be different from the saved image */
2825     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2826         n->mac_table.in_use = 0;
2827     }
2828 
2829     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2830         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2831     }
2832 
2833     /*
2834      * curr_guest_offloads will be later overwritten by the
2835      * virtio_set_features_nocheck call done from the virtio_load.
2836      * Here we make sure it is preserved and restored accordingly
2837      * in the virtio_net_post_load_virtio callback.
2838      */
2839     n->saved_guest_offloads = n->curr_guest_offloads;
2840 
2841     virtio_net_set_queues(n);
2842 
2843     /* Find the first multicast entry in the saved MAC filter */
2844     for (i = 0; i < n->mac_table.in_use; i++) {
2845         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2846             break;
2847         }
2848     }
2849     n->mac_table.first_multi = i;
2850 
2851     /* nc.link_down can't be migrated, so infer link_down according
2852      * to link status bit in n->status */
2853     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2854     for (i = 0; i < n->max_queues; i++) {
2855         qemu_get_subqueue(n->nic, i)->link_down = link_down;
2856     }
2857 
2858     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2859         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2860         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2861                                   QEMU_CLOCK_VIRTUAL,
2862                                   virtio_net_announce_timer, n);
2863         if (n->announce_timer.round) {
2864             timer_mod(n->announce_timer.tm,
2865                       qemu_clock_get_ms(n->announce_timer.type));
2866         } else {
2867             qemu_announce_timer_del(&n->announce_timer, false);
2868         }
2869     }
2870 
2871     if (n->rss_data.enabled) {
2872         n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
2873         if (!n->rss_data.populate_hash) {
2874             if (!virtio_net_attach_epbf_rss(n)) {
2875                 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
2876                     warn_report("Can't post-load eBPF RSS for vhost");
2877                 } else {
2878                     warn_report("Can't post-load eBPF RSS - "
2879                                 "fallback to software RSS");
2880                     n->rss_data.enabled_software_rss = true;
2881                 }
2882             }
2883         }
2884 
2885         trace_virtio_net_rss_enable(n->rss_data.hash_types,
2886                                     n->rss_data.indirections_len,
2887                                     sizeof(n->rss_data.key));
2888     } else {
2889         trace_virtio_net_rss_disable();
2890     }
2891     return 0;
2892 }
2893 
2894 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2895 {
2896     VirtIONet *n = VIRTIO_NET(vdev);
2897     /*
2898      * The actual needed state is now in saved_guest_offloads,
2899      * see virtio_net_post_load_device for detail.
2900      * Restore it back and apply the desired offloads.
2901      */
2902     n->curr_guest_offloads = n->saved_guest_offloads;
2903     if (peer_has_vnet_hdr(n)) {
2904         virtio_net_apply_guest_offloads(n);
2905     }
2906 
2907     return 0;
2908 }
2909 
2910 /* tx_waiting field of a VirtIONetQueue */
2911 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2912     .name = "virtio-net-queue-tx_waiting",
2913     .fields = (VMStateField[]) {
2914         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2915         VMSTATE_END_OF_LIST()
2916    },
2917 };
2918 
2919 static bool max_queues_gt_1(void *opaque, int version_id)
2920 {
2921     return VIRTIO_NET(opaque)->max_queues > 1;
2922 }
2923 
2924 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2925 {
2926     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2927                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2928 }
2929 
2930 static bool mac_table_fits(void *opaque, int version_id)
2931 {
2932     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2933 }
2934 
2935 static bool mac_table_doesnt_fit(void *opaque, int version_id)
2936 {
2937     return !mac_table_fits(opaque, version_id);
2938 }
2939 
2940 /* This temporary type is shared by all the WITH_TMP methods
2941  * although only some fields are used by each.
2942  */
2943 struct VirtIONetMigTmp {
2944     VirtIONet      *parent;
2945     VirtIONetQueue *vqs_1;
2946     uint16_t        curr_queues_1;
2947     uint8_t         has_ufo;
2948     uint32_t        has_vnet_hdr;
2949 };
2950 
2951 /* The 2nd and subsequent tx_waiting flags are loaded later than
2952  * the 1st entry in the queues and only if there's more than one
2953  * entry.  We use the tmp mechanism to calculate a temporary
2954  * pointer and count and also validate the count.
2955  */
2956 
2957 static int virtio_net_tx_waiting_pre_save(void *opaque)
2958 {
2959     struct VirtIONetMigTmp *tmp = opaque;
2960 
2961     tmp->vqs_1 = tmp->parent->vqs + 1;
2962     tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2963     if (tmp->parent->curr_queues == 0) {
2964         tmp->curr_queues_1 = 0;
2965     }
2966 
2967     return 0;
2968 }
2969 
2970 static int virtio_net_tx_waiting_pre_load(void *opaque)
2971 {
2972     struct VirtIONetMigTmp *tmp = opaque;
2973 
2974     /* Reuse the pointer setup from save */
2975     virtio_net_tx_waiting_pre_save(opaque);
2976 
2977     if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2978         error_report("virtio-net: curr_queues %x > max_queues %x",
2979             tmp->parent->curr_queues, tmp->parent->max_queues);
2980 
2981         return -EINVAL;
2982     }
2983 
2984     return 0; /* all good */
2985 }
2986 
2987 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2988     .name      = "virtio-net-tx_waiting",
2989     .pre_load  = virtio_net_tx_waiting_pre_load,
2990     .pre_save  = virtio_net_tx_waiting_pre_save,
2991     .fields    = (VMStateField[]) {
2992         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2993                                      curr_queues_1,
2994                                      vmstate_virtio_net_queue_tx_waiting,
2995                                      struct VirtIONetQueue),
2996         VMSTATE_END_OF_LIST()
2997     },
2998 };
2999 
3000 /* the 'has_ufo' flag is just tested; if the incoming stream has the
3001  * flag set we need to check that we have it
3002  */
3003 static int virtio_net_ufo_post_load(void *opaque, int version_id)
3004 {
3005     struct VirtIONetMigTmp *tmp = opaque;
3006 
3007     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3008         error_report("virtio-net: saved image requires TUN_F_UFO support");
3009         return -EINVAL;
3010     }
3011 
3012     return 0;
3013 }
3014 
3015 static int virtio_net_ufo_pre_save(void *opaque)
3016 {
3017     struct VirtIONetMigTmp *tmp = opaque;
3018 
3019     tmp->has_ufo = tmp->parent->has_ufo;
3020 
3021     return 0;
3022 }
3023 
3024 static const VMStateDescription vmstate_virtio_net_has_ufo = {
3025     .name      = "virtio-net-ufo",
3026     .post_load = virtio_net_ufo_post_load,
3027     .pre_save  = virtio_net_ufo_pre_save,
3028     .fields    = (VMStateField[]) {
3029         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3030         VMSTATE_END_OF_LIST()
3031     },
3032 };
3033 
3034 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3035  * flag set we need to check that we have it
3036  */
3037 static int virtio_net_vnet_post_load(void *opaque, int version_id)
3038 {
3039     struct VirtIONetMigTmp *tmp = opaque;
3040 
3041     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3042         error_report("virtio-net: saved image requires vnet_hdr=on");
3043         return -EINVAL;
3044     }
3045 
3046     return 0;
3047 }
3048 
3049 static int virtio_net_vnet_pre_save(void *opaque)
3050 {
3051     struct VirtIONetMigTmp *tmp = opaque;
3052 
3053     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3054 
3055     return 0;
3056 }
3057 
3058 static const VMStateDescription vmstate_virtio_net_has_vnet = {
3059     .name      = "virtio-net-vnet",
3060     .post_load = virtio_net_vnet_post_load,
3061     .pre_save  = virtio_net_vnet_pre_save,
3062     .fields    = (VMStateField[]) {
3063         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3064         VMSTATE_END_OF_LIST()
3065     },
3066 };
3067 
3068 static bool virtio_net_rss_needed(void *opaque)
3069 {
3070     return VIRTIO_NET(opaque)->rss_data.enabled;
3071 }
3072 
3073 static const VMStateDescription vmstate_virtio_net_rss = {
3074     .name      = "virtio-net-device/rss",
3075     .version_id = 1,
3076     .minimum_version_id = 1,
3077     .needed = virtio_net_rss_needed,
3078     .fields = (VMStateField[]) {
3079         VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3080         VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3081         VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3082         VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3083         VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3084         VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3085         VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3086                             VIRTIO_NET_RSS_MAX_KEY_SIZE),
3087         VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3088                                     rss_data.indirections_len, 0,
3089                                     vmstate_info_uint16, uint16_t),
3090         VMSTATE_END_OF_LIST()
3091     },
3092 };
3093 
3094 static const VMStateDescription vmstate_virtio_net_device = {
3095     .name = "virtio-net-device",
3096     .version_id = VIRTIO_NET_VM_VERSION,
3097     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3098     .post_load = virtio_net_post_load_device,
3099     .fields = (VMStateField[]) {
3100         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3101         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3102                                vmstate_virtio_net_queue_tx_waiting,
3103                                VirtIONetQueue),
3104         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3105         VMSTATE_UINT16(status, VirtIONet),
3106         VMSTATE_UINT8(promisc, VirtIONet),
3107         VMSTATE_UINT8(allmulti, VirtIONet),
3108         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3109 
3110         /* Guarded pair: If it fits we load it, else we throw it away
3111          * - can happen if source has a larger MAC table.; post-load
3112          *  sets flags in this case.
3113          */
3114         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3115                                 0, mac_table_fits, mac_table.in_use,
3116                                  ETH_ALEN),
3117         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3118                                      mac_table.in_use, ETH_ALEN),
3119 
3120         /* Note: This is an array of uint32's that's always been saved as a
3121          * buffer; hold onto your endiannesses; it's actually used as a bitmap
3122          * but based on the uint.
3123          */
3124         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3125         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3126                          vmstate_virtio_net_has_vnet),
3127         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3128         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3129         VMSTATE_UINT8(alluni, VirtIONet),
3130         VMSTATE_UINT8(nomulti, VirtIONet),
3131         VMSTATE_UINT8(nouni, VirtIONet),
3132         VMSTATE_UINT8(nobcast, VirtIONet),
3133         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3134                          vmstate_virtio_net_has_ufo),
3135         VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
3136                             vmstate_info_uint16_equal, uint16_t),
3137         VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
3138         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3139                          vmstate_virtio_net_tx_waiting),
3140         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3141                             has_ctrl_guest_offloads),
3142         VMSTATE_END_OF_LIST()
3143    },
3144     .subsections = (const VMStateDescription * []) {
3145         &vmstate_virtio_net_rss,
3146         NULL
3147     }
3148 };
3149 
3150 static NetClientInfo net_virtio_info = {
3151     .type = NET_CLIENT_DRIVER_NIC,
3152     .size = sizeof(NICState),
3153     .can_receive = virtio_net_can_receive,
3154     .receive = virtio_net_receive,
3155     .link_status_changed = virtio_net_set_link_status,
3156     .query_rx_filter = virtio_net_query_rxfilter,
3157     .announce = virtio_net_announce,
3158 };
3159 
3160 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3161 {
3162     VirtIONet *n = VIRTIO_NET(vdev);
3163     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3164     assert(n->vhost_started);
3165     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3166 }
3167 
3168 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3169                                            bool mask)
3170 {
3171     VirtIONet *n = VIRTIO_NET(vdev);
3172     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3173     assert(n->vhost_started);
3174     vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3175                              vdev, idx, mask);
3176 }
3177 
3178 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3179 {
3180     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3181 
3182     n->config_size = virtio_feature_get_config_size(feature_sizes,
3183                                                     host_features);
3184 }
3185 
3186 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3187                                    const char *type)
3188 {
3189     /*
3190      * The name can be NULL, the netclient name will be type.x.
3191      */
3192     assert(type != NULL);
3193 
3194     g_free(n->netclient_name);
3195     g_free(n->netclient_type);
3196     n->netclient_name = g_strdup(name);
3197     n->netclient_type = g_strdup(type);
3198 }
3199 
3200 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3201 {
3202     HotplugHandler *hotplug_ctrl;
3203     PCIDevice *pci_dev;
3204     Error *err = NULL;
3205 
3206     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3207     if (hotplug_ctrl) {
3208         pci_dev = PCI_DEVICE(dev);
3209         pci_dev->partially_hotplugged = true;
3210         hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3211         if (err) {
3212             error_report_err(err);
3213             return false;
3214         }
3215     } else {
3216         return false;
3217     }
3218     return true;
3219 }
3220 
3221 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3222                                     Error **errp)
3223 {
3224     Error *err = NULL;
3225     HotplugHandler *hotplug_ctrl;
3226     PCIDevice *pdev = PCI_DEVICE(dev);
3227     BusState *primary_bus;
3228 
3229     if (!pdev->partially_hotplugged) {
3230         return true;
3231     }
3232     primary_bus = dev->parent_bus;
3233     if (!primary_bus) {
3234         error_setg(errp, "virtio_net: couldn't find primary bus");
3235         return false;
3236     }
3237     qdev_set_parent_bus(dev, primary_bus, &error_abort);
3238     qatomic_set(&n->failover_primary_hidden, false);
3239     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3240     if (hotplug_ctrl) {
3241         hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3242         if (err) {
3243             goto out;
3244         }
3245         hotplug_handler_plug(hotplug_ctrl, dev, &err);
3246     }
3247     pdev->partially_hotplugged = false;
3248 
3249 out:
3250     error_propagate(errp, err);
3251     return !err;
3252 }
3253 
3254 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3255 {
3256     bool should_be_hidden;
3257     Error *err = NULL;
3258     DeviceState *dev = failover_find_primary_device(n);
3259 
3260     if (!dev) {
3261         return;
3262     }
3263 
3264     should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3265 
3266     if (migration_in_setup(s) && !should_be_hidden) {
3267         if (failover_unplug_primary(n, dev)) {
3268             vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3269             qapi_event_send_unplug_primary(dev->id);
3270             qatomic_set(&n->failover_primary_hidden, true);
3271         } else {
3272             warn_report("couldn't unplug primary device");
3273         }
3274     } else if (migration_has_failed(s)) {
3275         /* We already unplugged the device let's plug it back */
3276         if (!failover_replug_primary(n, dev, &err)) {
3277             if (err) {
3278                 error_report_err(err);
3279             }
3280         }
3281     }
3282 }
3283 
3284 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3285 {
3286     MigrationState *s = data;
3287     VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3288     virtio_net_handle_migration_primary(n, s);
3289 }
3290 
3291 static bool failover_hide_primary_device(DeviceListener *listener,
3292                                          const QDict *device_opts,
3293                                          bool from_json,
3294                                          Error **errp)
3295 {
3296     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3297     const char *standby_id;
3298 
3299     if (!device_opts) {
3300         return false;
3301     }
3302     standby_id = qdict_get_try_str(device_opts, "failover_pair_id");
3303     if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3304         return false;
3305     }
3306 
3307     if (n->primary_opts) {
3308         error_setg(errp, "Cannot attach more than one primary device to '%s'",
3309                    n->netclient_name);
3310         return false;
3311     }
3312 
3313     n->primary_opts = qdict_clone_shallow(device_opts);
3314     n->primary_opts_from_json = from_json;
3315 
3316     /* failover_primary_hidden is set during feature negotiation */
3317     return qatomic_read(&n->failover_primary_hidden);
3318 }
3319 
3320 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3321 {
3322     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3323     VirtIONet *n = VIRTIO_NET(dev);
3324     NetClientState *nc;
3325     int i;
3326 
3327     if (n->net_conf.mtu) {
3328         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3329     }
3330 
3331     if (n->net_conf.duplex_str) {
3332         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3333             n->net_conf.duplex = DUPLEX_HALF;
3334         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3335             n->net_conf.duplex = DUPLEX_FULL;
3336         } else {
3337             error_setg(errp, "'duplex' must be 'half' or 'full'");
3338             return;
3339         }
3340         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3341     } else {
3342         n->net_conf.duplex = DUPLEX_UNKNOWN;
3343     }
3344 
3345     if (n->net_conf.speed < SPEED_UNKNOWN) {
3346         error_setg(errp, "'speed' must be between 0 and INT_MAX");
3347         return;
3348     }
3349     if (n->net_conf.speed >= 0) {
3350         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3351     }
3352 
3353     if (n->failover) {
3354         n->primary_listener.hide_device = failover_hide_primary_device;
3355         qatomic_set(&n->failover_primary_hidden, true);
3356         device_listener_register(&n->primary_listener);
3357         n->migration_state.notify = virtio_net_migration_state_notifier;
3358         add_migration_state_change_notifier(&n->migration_state);
3359         n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3360     }
3361 
3362     virtio_net_set_config_size(n, n->host_features);
3363     virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
3364 
3365     /*
3366      * We set a lower limit on RX queue size to what it always was.
3367      * Guests that want a smaller ring can always resize it without
3368      * help from us (using virtio 1 and up).
3369      */
3370     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3371         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3372         !is_power_of_2(n->net_conf.rx_queue_size)) {
3373         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3374                    "must be a power of 2 between %d and %d.",
3375                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3376                    VIRTQUEUE_MAX_SIZE);
3377         virtio_cleanup(vdev);
3378         return;
3379     }
3380 
3381     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3382         n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3383         !is_power_of_2(n->net_conf.tx_queue_size)) {
3384         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3385                    "must be a power of 2 between %d and %d",
3386                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3387                    VIRTQUEUE_MAX_SIZE);
3388         virtio_cleanup(vdev);
3389         return;
3390     }
3391 
3392     n->max_queues = MAX(n->nic_conf.peers.queues, 1);
3393     if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
3394         error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
3395                    "must be a positive integer less than %d.",
3396                    n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
3397         virtio_cleanup(vdev);
3398         return;
3399     }
3400     n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
3401     n->curr_queues = 1;
3402     n->tx_timeout = n->net_conf.txtimer;
3403 
3404     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3405                        && strcmp(n->net_conf.tx, "bh")) {
3406         warn_report("virtio-net: "
3407                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3408                     n->net_conf.tx);
3409         error_printf("Defaulting to \"bh\"");
3410     }
3411 
3412     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3413                                     n->net_conf.tx_queue_size);
3414 
3415     for (i = 0; i < n->max_queues; i++) {
3416         virtio_net_add_queue(n, i);
3417     }
3418 
3419     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3420     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3421     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3422     n->status = VIRTIO_NET_S_LINK_UP;
3423     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3424                               QEMU_CLOCK_VIRTUAL,
3425                               virtio_net_announce_timer, n);
3426     n->announce_timer.round = 0;
3427 
3428     if (n->netclient_type) {
3429         /*
3430          * Happen when virtio_net_set_netclient_name has been called.
3431          */
3432         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3433                               n->netclient_type, n->netclient_name, n);
3434     } else {
3435         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3436                               object_get_typename(OBJECT(dev)), dev->id, n);
3437     }
3438 
3439     for (i = 0; i < n->max_queues; i++) {
3440         n->nic->ncs[i].do_not_pad = true;
3441     }
3442 
3443     peer_test_vnet_hdr(n);
3444     if (peer_has_vnet_hdr(n)) {
3445         for (i = 0; i < n->max_queues; i++) {
3446             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3447         }
3448         n->host_hdr_len = sizeof(struct virtio_net_hdr);
3449     } else {
3450         n->host_hdr_len = 0;
3451     }
3452 
3453     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3454 
3455     n->vqs[0].tx_waiting = 0;
3456     n->tx_burst = n->net_conf.txburst;
3457     virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3458     n->promisc = 1; /* for compatibility */
3459 
3460     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3461 
3462     n->vlans = g_malloc0(MAX_VLAN >> 3);
3463 
3464     nc = qemu_get_queue(n->nic);
3465     nc->rxfilter_notify_enabled = 1;
3466 
3467    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3468         struct virtio_net_config netcfg = {};
3469         memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3470         vhost_net_set_config(get_vhost_net(nc->peer),
3471             (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
3472     }
3473     QTAILQ_INIT(&n->rsc_chains);
3474     n->qdev = dev;
3475 
3476     net_rx_pkt_init(&n->rx_pkt, false);
3477 
3478     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3479         virtio_net_load_ebpf(n);
3480     }
3481 }
3482 
3483 static void virtio_net_device_unrealize(DeviceState *dev)
3484 {
3485     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3486     VirtIONet *n = VIRTIO_NET(dev);
3487     int i, max_queues;
3488 
3489     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3490         virtio_net_unload_ebpf(n);
3491     }
3492 
3493     /* This will stop vhost backend if appropriate. */
3494     virtio_net_set_status(vdev, 0);
3495 
3496     g_free(n->netclient_name);
3497     n->netclient_name = NULL;
3498     g_free(n->netclient_type);
3499     n->netclient_type = NULL;
3500 
3501     g_free(n->mac_table.macs);
3502     g_free(n->vlans);
3503 
3504     if (n->failover) {
3505         qobject_unref(n->primary_opts);
3506         device_listener_unregister(&n->primary_listener);
3507         remove_migration_state_change_notifier(&n->migration_state);
3508     } else {
3509         assert(n->primary_opts == NULL);
3510     }
3511 
3512     max_queues = n->multiqueue ? n->max_queues : 1;
3513     for (i = 0; i < max_queues; i++) {
3514         virtio_net_del_queue(n, i);
3515     }
3516     /* delete also control vq */
3517     virtio_del_queue(vdev, max_queues * 2);
3518     qemu_announce_timer_del(&n->announce_timer, false);
3519     g_free(n->vqs);
3520     qemu_del_nic(n->nic);
3521     virtio_net_rsc_cleanup(n);
3522     g_free(n->rss_data.indirections_table);
3523     net_rx_pkt_uninit(n->rx_pkt);
3524     virtio_cleanup(vdev);
3525 }
3526 
3527 static void virtio_net_instance_init(Object *obj)
3528 {
3529     VirtIONet *n = VIRTIO_NET(obj);
3530 
3531     /*
3532      * The default config_size is sizeof(struct virtio_net_config).
3533      * Can be overriden with virtio_net_set_config_size.
3534      */
3535     n->config_size = sizeof(struct virtio_net_config);
3536     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3537                                   "bootindex", "/ethernet-phy@0",
3538                                   DEVICE(n));
3539 
3540     ebpf_rss_init(&n->ebpf_rss);
3541 }
3542 
3543 static int virtio_net_pre_save(void *opaque)
3544 {
3545     VirtIONet *n = opaque;
3546 
3547     /* At this point, backend must be stopped, otherwise
3548      * it might keep writing to memory. */
3549     assert(!n->vhost_started);
3550 
3551     return 0;
3552 }
3553 
3554 static bool primary_unplug_pending(void *opaque)
3555 {
3556     DeviceState *dev = opaque;
3557     DeviceState *primary;
3558     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3559     VirtIONet *n = VIRTIO_NET(vdev);
3560 
3561     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3562         return false;
3563     }
3564     primary = failover_find_primary_device(n);
3565     return primary ? primary->pending_deleted_event : false;
3566 }
3567 
3568 static bool dev_unplug_pending(void *opaque)
3569 {
3570     DeviceState *dev = opaque;
3571     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3572 
3573     return vdc->primary_unplug_pending(dev);
3574 }
3575 
3576 static const VMStateDescription vmstate_virtio_net = {
3577     .name = "virtio-net",
3578     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3579     .version_id = VIRTIO_NET_VM_VERSION,
3580     .fields = (VMStateField[]) {
3581         VMSTATE_VIRTIO_DEVICE,
3582         VMSTATE_END_OF_LIST()
3583     },
3584     .pre_save = virtio_net_pre_save,
3585     .dev_unplug_pending = dev_unplug_pending,
3586 };
3587 
3588 static Property virtio_net_properties[] = {
3589     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3590                     VIRTIO_NET_F_CSUM, true),
3591     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3592                     VIRTIO_NET_F_GUEST_CSUM, true),
3593     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3594     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3595                     VIRTIO_NET_F_GUEST_TSO4, true),
3596     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3597                     VIRTIO_NET_F_GUEST_TSO6, true),
3598     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3599                     VIRTIO_NET_F_GUEST_ECN, true),
3600     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3601                     VIRTIO_NET_F_GUEST_UFO, true),
3602     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3603                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3604     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3605                     VIRTIO_NET_F_HOST_TSO4, true),
3606     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3607                     VIRTIO_NET_F_HOST_TSO6, true),
3608     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3609                     VIRTIO_NET_F_HOST_ECN, true),
3610     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3611                     VIRTIO_NET_F_HOST_UFO, true),
3612     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3613                     VIRTIO_NET_F_MRG_RXBUF, true),
3614     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3615                     VIRTIO_NET_F_STATUS, true),
3616     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3617                     VIRTIO_NET_F_CTRL_VQ, true),
3618     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3619                     VIRTIO_NET_F_CTRL_RX, true),
3620     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3621                     VIRTIO_NET_F_CTRL_VLAN, true),
3622     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3623                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3624     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3625                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3626     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3627                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3628     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3629     DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3630                     VIRTIO_NET_F_RSS, false),
3631     DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3632                     VIRTIO_NET_F_HASH_REPORT, false),
3633     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3634                     VIRTIO_NET_F_RSC_EXT, false),
3635     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3636                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3637     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3638     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3639                        TX_TIMER_INTERVAL),
3640     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3641     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3642     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3643                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3644     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3645                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3646     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3647     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3648                      true),
3649     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3650     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3651     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3652     DEFINE_PROP_END_OF_LIST(),
3653 };
3654 
3655 static void virtio_net_class_init(ObjectClass *klass, void *data)
3656 {
3657     DeviceClass *dc = DEVICE_CLASS(klass);
3658     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3659 
3660     device_class_set_props(dc, virtio_net_properties);
3661     dc->vmsd = &vmstate_virtio_net;
3662     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3663     vdc->realize = virtio_net_device_realize;
3664     vdc->unrealize = virtio_net_device_unrealize;
3665     vdc->get_config = virtio_net_get_config;
3666     vdc->set_config = virtio_net_set_config;
3667     vdc->get_features = virtio_net_get_features;
3668     vdc->set_features = virtio_net_set_features;
3669     vdc->bad_features = virtio_net_bad_features;
3670     vdc->reset = virtio_net_reset;
3671     vdc->set_status = virtio_net_set_status;
3672     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3673     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3674     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3675     vdc->post_load = virtio_net_post_load_virtio;
3676     vdc->vmsd = &vmstate_virtio_net_device;
3677     vdc->primary_unplug_pending = primary_unplug_pending;
3678 }
3679 
3680 static const TypeInfo virtio_net_info = {
3681     .name = TYPE_VIRTIO_NET,
3682     .parent = TYPE_VIRTIO_DEVICE,
3683     .instance_size = sizeof(VirtIONet),
3684     .instance_init = virtio_net_instance_init,
3685     .class_init = virtio_net_class_init,
3686 };
3687 
3688 static void virtio_register_types(void)
3689 {
3690     type_register_static(&virtio_net_info);
3691 }
3692 
3693 type_init(virtio_register_types)
3694