xref: /openbmc/qemu/hw/net/virtio-net.c (revision 0fbb5d2d)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/main-loop.h"
18 #include "qemu/module.h"
19 #include "hw/virtio/virtio.h"
20 #include "net/net.h"
21 #include "net/checksum.h"
22 #include "net/tap.h"
23 #include "qemu/error-report.h"
24 #include "qemu/timer.h"
25 #include "qemu/option.h"
26 #include "qemu/option_int.h"
27 #include "qemu/config-file.h"
28 #include "qapi/qmp/qdict.h"
29 #include "hw/virtio/virtio-net.h"
30 #include "net/vhost_net.h"
31 #include "net/announce.h"
32 #include "hw/virtio/virtio-bus.h"
33 #include "qapi/error.h"
34 #include "qapi/qapi-events-net.h"
35 #include "hw/qdev-properties.h"
36 #include "qapi/qapi-types-migration.h"
37 #include "qapi/qapi-events-migration.h"
38 #include "hw/virtio/virtio-access.h"
39 #include "migration/misc.h"
40 #include "standard-headers/linux/ethtool.h"
41 #include "sysemu/sysemu.h"
42 #include "trace.h"
43 #include "monitor/qdev.h"
44 #include "hw/pci/pci.h"
45 #include "net_rx_pkt.h"
46 #include "hw/virtio/vhost.h"
47 #include "sysemu/qtest.h"
48 
49 #define VIRTIO_NET_VM_VERSION    11
50 
51 #define MAC_TABLE_ENTRIES    64
52 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
53 
54 /* previously fixed value */
55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
57 
58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
61 
62 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
63 
64 #define VIRTIO_NET_TCP_FLAG         0x3F
65 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
66 
67 /* IPv4 max payload, 16 bits in the header */
68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
70 
71 /* header length value in ip header without option */
72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
73 
74 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
76 
77 /* Purge coalesced packets timer interval, This value affects the performance
78    a lot, and should be tuned carefully, '300000'(300us) is the recommended
79    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
80    tso/gso/gro 'off'. */
81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
82 
83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
84                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
85                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
86                                          VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
87                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
88                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
89                                          VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
90                                          VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
91                                          VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
92 
93 static const VirtIOFeature feature_sizes[] = {
94     {.flags = 1ULL << VIRTIO_NET_F_MAC,
95      .end = endof(struct virtio_net_config, mac)},
96     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
97      .end = endof(struct virtio_net_config, status)},
98     {.flags = 1ULL << VIRTIO_NET_F_MQ,
99      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
100     {.flags = 1ULL << VIRTIO_NET_F_MTU,
101      .end = endof(struct virtio_net_config, mtu)},
102     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
103      .end = endof(struct virtio_net_config, duplex)},
104     {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
105      .end = endof(struct virtio_net_config, supported_hash_types)},
106     {}
107 };
108 
109 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
110 {
111     VirtIONet *n = qemu_get_nic_opaque(nc);
112 
113     return &n->vqs[nc->queue_index];
114 }
115 
116 static int vq2q(int queue_index)
117 {
118     return queue_index / 2;
119 }
120 
121 /* TODO
122  * - we could suppress RX interrupt if we were so inclined.
123  */
124 
125 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
126 {
127     VirtIONet *n = VIRTIO_NET(vdev);
128     struct virtio_net_config netcfg;
129     NetClientState *nc = qemu_get_queue(n->nic);
130     static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
131 
132     int ret = 0;
133     memset(&netcfg, 0 , sizeof(struct virtio_net_config));
134     virtio_stw_p(vdev, &netcfg.status, n->status);
135     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
136     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
137     memcpy(netcfg.mac, n->mac, ETH_ALEN);
138     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
139     netcfg.duplex = n->net_conf.duplex;
140     netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
141     virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
142                  virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
143                  VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
144     virtio_stl_p(vdev, &netcfg.supported_hash_types,
145                  VIRTIO_NET_RSS_SUPPORTED_HASHES);
146     memcpy(config, &netcfg, n->config_size);
147 
148     /*
149      * Is this VDPA? No peer means not VDPA: there's no way to
150      * disconnect/reconnect a VDPA peer.
151      */
152     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
153         ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
154                                    n->config_size);
155         if (ret != -1) {
156             /*
157              * Some NIC/kernel combinations present 0 as the mac address.  As
158              * that is not a legal address, try to proceed with the
159              * address from the QEMU command line in the hope that the
160              * address has been configured correctly elsewhere - just not
161              * reported by the device.
162              */
163             if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
164                 info_report("Zero hardware mac address detected. Ignoring.");
165                 memcpy(netcfg.mac, n->mac, ETH_ALEN);
166             }
167             memcpy(config, &netcfg, n->config_size);
168         }
169     }
170 }
171 
172 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
173 {
174     VirtIONet *n = VIRTIO_NET(vdev);
175     struct virtio_net_config netcfg = {};
176     NetClientState *nc = qemu_get_queue(n->nic);
177 
178     memcpy(&netcfg, config, n->config_size);
179 
180     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
181         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
182         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
183         memcpy(n->mac, netcfg.mac, ETH_ALEN);
184         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
185     }
186 
187     /*
188      * Is this VDPA? No peer means not VDPA: there's no way to
189      * disconnect/reconnect a VDPA peer.
190      */
191     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
192         vhost_net_set_config(get_vhost_net(nc->peer),
193                              (uint8_t *)&netcfg, 0, n->config_size,
194                              VHOST_SET_CONFIG_TYPE_MASTER);
195       }
196 }
197 
198 static bool virtio_net_started(VirtIONet *n, uint8_t status)
199 {
200     VirtIODevice *vdev = VIRTIO_DEVICE(n);
201     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
202         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
203 }
204 
205 static void virtio_net_announce_notify(VirtIONet *net)
206 {
207     VirtIODevice *vdev = VIRTIO_DEVICE(net);
208     trace_virtio_net_announce_notify();
209 
210     net->status |= VIRTIO_NET_S_ANNOUNCE;
211     virtio_notify_config(vdev);
212 }
213 
214 static void virtio_net_announce_timer(void *opaque)
215 {
216     VirtIONet *n = opaque;
217     trace_virtio_net_announce_timer(n->announce_timer.round);
218 
219     n->announce_timer.round--;
220     virtio_net_announce_notify(n);
221 }
222 
223 static void virtio_net_announce(NetClientState *nc)
224 {
225     VirtIONet *n = qemu_get_nic_opaque(nc);
226     VirtIODevice *vdev = VIRTIO_DEVICE(n);
227 
228     /*
229      * Make sure the virtio migration announcement timer isn't running
230      * If it is, let it trigger announcement so that we do not cause
231      * confusion.
232      */
233     if (n->announce_timer.round) {
234         return;
235     }
236 
237     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
238         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
239             virtio_net_announce_notify(n);
240     }
241 }
242 
243 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
244 {
245     VirtIODevice *vdev = VIRTIO_DEVICE(n);
246     NetClientState *nc = qemu_get_queue(n->nic);
247     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
248     int cvq = n->max_ncs - n->max_queue_pairs;
249 
250     if (!get_vhost_net(nc->peer)) {
251         return;
252     }
253 
254     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
255         !!n->vhost_started) {
256         return;
257     }
258     if (!n->vhost_started) {
259         int r, i;
260 
261         if (n->needs_vnet_hdr_swap) {
262             error_report("backend does not support %s vnet headers; "
263                          "falling back on userspace virtio",
264                          virtio_is_big_endian(vdev) ? "BE" : "LE");
265             return;
266         }
267 
268         /* Any packets outstanding? Purge them to avoid touching rings
269          * when vhost is running.
270          */
271         for (i = 0;  i < queue_pairs; i++) {
272             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
273 
274             /* Purge both directions: TX and RX. */
275             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
276             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
277         }
278 
279         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
280             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
281             if (r < 0) {
282                 error_report("%uBytes MTU not supported by the backend",
283                              n->net_conf.mtu);
284 
285                 return;
286             }
287         }
288 
289         n->vhost_started = 1;
290         r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
291         if (r < 0) {
292             error_report("unable to start vhost net: %d: "
293                          "falling back on userspace virtio", -r);
294             n->vhost_started = 0;
295         }
296     } else {
297         vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
298         n->vhost_started = 0;
299     }
300 }
301 
302 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
303                                           NetClientState *peer,
304                                           bool enable)
305 {
306     if (virtio_is_big_endian(vdev)) {
307         return qemu_set_vnet_be(peer, enable);
308     } else {
309         return qemu_set_vnet_le(peer, enable);
310     }
311 }
312 
313 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
314                                        int queue_pairs, bool enable)
315 {
316     int i;
317 
318     for (i = 0; i < queue_pairs; i++) {
319         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
320             enable) {
321             while (--i >= 0) {
322                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
323             }
324 
325             return true;
326         }
327     }
328 
329     return false;
330 }
331 
332 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
333 {
334     VirtIODevice *vdev = VIRTIO_DEVICE(n);
335     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
336 
337     if (virtio_net_started(n, status)) {
338         /* Before using the device, we tell the network backend about the
339          * endianness to use when parsing vnet headers. If the backend
340          * can't do it, we fallback onto fixing the headers in the core
341          * virtio-net code.
342          */
343         n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
344                                                             queue_pairs, true);
345     } else if (virtio_net_started(n, vdev->status)) {
346         /* After using the device, we need to reset the network backend to
347          * the default (guest native endianness), otherwise the guest may
348          * lose network connectivity if it is rebooted into a different
349          * endianness.
350          */
351         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
352     }
353 }
354 
355 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
356 {
357     unsigned int dropped = virtqueue_drop_all(vq);
358     if (dropped) {
359         virtio_notify(vdev, vq);
360     }
361 }
362 
363 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
364 {
365     VirtIONet *n = VIRTIO_NET(vdev);
366     VirtIONetQueue *q;
367     int i;
368     uint8_t queue_status;
369 
370     virtio_net_vnet_endian_status(n, status);
371     virtio_net_vhost_status(n, status);
372 
373     for (i = 0; i < n->max_queue_pairs; i++) {
374         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
375         bool queue_started;
376         q = &n->vqs[i];
377 
378         if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
379             queue_status = 0;
380         } else {
381             queue_status = status;
382         }
383         queue_started =
384             virtio_net_started(n, queue_status) && !n->vhost_started;
385 
386         if (queue_started) {
387             qemu_flush_queued_packets(ncs);
388         }
389 
390         if (!q->tx_waiting) {
391             continue;
392         }
393 
394         if (queue_started) {
395             if (q->tx_timer) {
396                 timer_mod(q->tx_timer,
397                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
398             } else {
399                 qemu_bh_schedule(q->tx_bh);
400             }
401         } else {
402             if (q->tx_timer) {
403                 timer_del(q->tx_timer);
404             } else {
405                 qemu_bh_cancel(q->tx_bh);
406             }
407             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
408                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
409                 vdev->vm_running) {
410                 /* if tx is waiting we are likely have some packets in tx queue
411                  * and disabled notification */
412                 q->tx_waiting = 0;
413                 virtio_queue_set_notification(q->tx_vq, 1);
414                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
415             }
416         }
417     }
418 }
419 
420 static void virtio_net_set_link_status(NetClientState *nc)
421 {
422     VirtIONet *n = qemu_get_nic_opaque(nc);
423     VirtIODevice *vdev = VIRTIO_DEVICE(n);
424     uint16_t old_status = n->status;
425 
426     if (nc->link_down)
427         n->status &= ~VIRTIO_NET_S_LINK_UP;
428     else
429         n->status |= VIRTIO_NET_S_LINK_UP;
430 
431     if (n->status != old_status)
432         virtio_notify_config(vdev);
433 
434     virtio_net_set_status(vdev, vdev->status);
435 }
436 
437 static void rxfilter_notify(NetClientState *nc)
438 {
439     VirtIONet *n = qemu_get_nic_opaque(nc);
440 
441     if (nc->rxfilter_notify_enabled) {
442         char *path = object_get_canonical_path(OBJECT(n->qdev));
443         qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
444                                               n->netclient_name, path);
445         g_free(path);
446 
447         /* disable event notification to avoid events flooding */
448         nc->rxfilter_notify_enabled = 0;
449     }
450 }
451 
452 static intList *get_vlan_table(VirtIONet *n)
453 {
454     intList *list;
455     int i, j;
456 
457     list = NULL;
458     for (i = 0; i < MAX_VLAN >> 5; i++) {
459         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
460             if (n->vlans[i] & (1U << j)) {
461                 QAPI_LIST_PREPEND(list, (i << 5) + j);
462             }
463         }
464     }
465 
466     return list;
467 }
468 
469 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
470 {
471     VirtIONet *n = qemu_get_nic_opaque(nc);
472     VirtIODevice *vdev = VIRTIO_DEVICE(n);
473     RxFilterInfo *info;
474     strList *str_list;
475     int i;
476 
477     info = g_malloc0(sizeof(*info));
478     info->name = g_strdup(nc->name);
479     info->promiscuous = n->promisc;
480 
481     if (n->nouni) {
482         info->unicast = RX_STATE_NONE;
483     } else if (n->alluni) {
484         info->unicast = RX_STATE_ALL;
485     } else {
486         info->unicast = RX_STATE_NORMAL;
487     }
488 
489     if (n->nomulti) {
490         info->multicast = RX_STATE_NONE;
491     } else if (n->allmulti) {
492         info->multicast = RX_STATE_ALL;
493     } else {
494         info->multicast = RX_STATE_NORMAL;
495     }
496 
497     info->broadcast_allowed = n->nobcast;
498     info->multicast_overflow = n->mac_table.multi_overflow;
499     info->unicast_overflow = n->mac_table.uni_overflow;
500 
501     info->main_mac = qemu_mac_strdup_printf(n->mac);
502 
503     str_list = NULL;
504     for (i = 0; i < n->mac_table.first_multi; i++) {
505         QAPI_LIST_PREPEND(str_list,
506                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
507     }
508     info->unicast_table = str_list;
509 
510     str_list = NULL;
511     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
512         QAPI_LIST_PREPEND(str_list,
513                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
514     }
515     info->multicast_table = str_list;
516     info->vlan_table = get_vlan_table(n);
517 
518     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
519         info->vlan = RX_STATE_ALL;
520     } else if (!info->vlan_table) {
521         info->vlan = RX_STATE_NONE;
522     } else {
523         info->vlan = RX_STATE_NORMAL;
524     }
525 
526     /* enable event notification after query */
527     nc->rxfilter_notify_enabled = 1;
528 
529     return info;
530 }
531 
532 static void virtio_net_reset(VirtIODevice *vdev)
533 {
534     VirtIONet *n = VIRTIO_NET(vdev);
535     int i;
536 
537     /* Reset back to compatibility mode */
538     n->promisc = 1;
539     n->allmulti = 0;
540     n->alluni = 0;
541     n->nomulti = 0;
542     n->nouni = 0;
543     n->nobcast = 0;
544     /* multiqueue is disabled by default */
545     n->curr_queue_pairs = 1;
546     timer_del(n->announce_timer.tm);
547     n->announce_timer.round = 0;
548     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
549 
550     /* Flush any MAC and VLAN filter table state */
551     n->mac_table.in_use = 0;
552     n->mac_table.first_multi = 0;
553     n->mac_table.multi_overflow = 0;
554     n->mac_table.uni_overflow = 0;
555     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
556     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
557     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
558     memset(n->vlans, 0, MAX_VLAN >> 3);
559 
560     /* Flush any async TX */
561     for (i = 0;  i < n->max_queue_pairs; i++) {
562         NetClientState *nc = qemu_get_subqueue(n->nic, i);
563 
564         if (nc->peer) {
565             qemu_flush_or_purge_queued_packets(nc->peer, true);
566             assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
567         }
568     }
569 }
570 
571 static void peer_test_vnet_hdr(VirtIONet *n)
572 {
573     NetClientState *nc = qemu_get_queue(n->nic);
574     if (!nc->peer) {
575         return;
576     }
577 
578     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
579 }
580 
581 static int peer_has_vnet_hdr(VirtIONet *n)
582 {
583     return n->has_vnet_hdr;
584 }
585 
586 static int peer_has_ufo(VirtIONet *n)
587 {
588     if (!peer_has_vnet_hdr(n))
589         return 0;
590 
591     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
592 
593     return n->has_ufo;
594 }
595 
596 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
597                                        int version_1, int hash_report)
598 {
599     int i;
600     NetClientState *nc;
601 
602     n->mergeable_rx_bufs = mergeable_rx_bufs;
603 
604     if (version_1) {
605         n->guest_hdr_len = hash_report ?
606             sizeof(struct virtio_net_hdr_v1_hash) :
607             sizeof(struct virtio_net_hdr_mrg_rxbuf);
608         n->rss_data.populate_hash = !!hash_report;
609     } else {
610         n->guest_hdr_len = n->mergeable_rx_bufs ?
611             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
612             sizeof(struct virtio_net_hdr);
613     }
614 
615     for (i = 0; i < n->max_queue_pairs; i++) {
616         nc = qemu_get_subqueue(n->nic, i);
617 
618         if (peer_has_vnet_hdr(n) &&
619             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
620             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
621             n->host_hdr_len = n->guest_hdr_len;
622         }
623     }
624 }
625 
626 static int virtio_net_max_tx_queue_size(VirtIONet *n)
627 {
628     NetClientState *peer = n->nic_conf.peers.ncs[0];
629 
630     /*
631      * Backends other than vhost-user don't support max queue size.
632      */
633     if (!peer) {
634         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
635     }
636 
637     if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
638         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
639     }
640 
641     return VIRTQUEUE_MAX_SIZE;
642 }
643 
644 static int peer_attach(VirtIONet *n, int index)
645 {
646     NetClientState *nc = qemu_get_subqueue(n->nic, index);
647 
648     if (!nc->peer) {
649         return 0;
650     }
651 
652     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
653         vhost_set_vring_enable(nc->peer, 1);
654     }
655 
656     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
657         return 0;
658     }
659 
660     if (n->max_queue_pairs == 1) {
661         return 0;
662     }
663 
664     return tap_enable(nc->peer);
665 }
666 
667 static int peer_detach(VirtIONet *n, int index)
668 {
669     NetClientState *nc = qemu_get_subqueue(n->nic, index);
670 
671     if (!nc->peer) {
672         return 0;
673     }
674 
675     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
676         vhost_set_vring_enable(nc->peer, 0);
677     }
678 
679     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
680         return 0;
681     }
682 
683     return tap_disable(nc->peer);
684 }
685 
686 static void virtio_net_set_queue_pairs(VirtIONet *n)
687 {
688     int i;
689     int r;
690 
691     if (n->nic->peer_deleted) {
692         return;
693     }
694 
695     for (i = 0; i < n->max_queue_pairs; i++) {
696         if (i < n->curr_queue_pairs) {
697             r = peer_attach(n, i);
698             assert(!r);
699         } else {
700             r = peer_detach(n, i);
701             assert(!r);
702         }
703     }
704 }
705 
706 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
707 
708 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
709                                         Error **errp)
710 {
711     VirtIONet *n = VIRTIO_NET(vdev);
712     NetClientState *nc = qemu_get_queue(n->nic);
713 
714     /* Firstly sync all virtio-net possible supported features */
715     features |= n->host_features;
716 
717     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
718 
719     if (!peer_has_vnet_hdr(n)) {
720         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
721         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
722         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
723         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
724 
725         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
726         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
727         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
728         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
729 
730         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
731     }
732 
733     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
734         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
735         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
736     }
737 
738     if (!get_vhost_net(nc->peer)) {
739         return features;
740     }
741 
742     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
743         virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
744     }
745     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
746     vdev->backend_features = features;
747 
748     if (n->mtu_bypass_backend &&
749             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
750         features |= (1ULL << VIRTIO_NET_F_MTU);
751     }
752 
753     return features;
754 }
755 
756 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
757 {
758     uint64_t features = 0;
759 
760     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
761      * but also these: */
762     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
763     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
764     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
765     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
766     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
767 
768     return features;
769 }
770 
771 static void virtio_net_apply_guest_offloads(VirtIONet *n)
772 {
773     qemu_set_offload(qemu_get_queue(n->nic)->peer,
774             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
775             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
776             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
777             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
778             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
779 }
780 
781 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
782 {
783     static const uint64_t guest_offloads_mask =
784         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
785         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
786         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
787         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
788         (1ULL << VIRTIO_NET_F_GUEST_UFO);
789 
790     return guest_offloads_mask & features;
791 }
792 
793 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
794 {
795     VirtIODevice *vdev = VIRTIO_DEVICE(n);
796     return virtio_net_guest_offloads_by_features(vdev->guest_features);
797 }
798 
799 typedef struct {
800     VirtIONet *n;
801     DeviceState *dev;
802 } FailoverDevice;
803 
804 /**
805  * Set the failover primary device
806  *
807  * @opaque: FailoverId to setup
808  * @opts: opts for device we are handling
809  * @errp: returns an error if this function fails
810  */
811 static int failover_set_primary(DeviceState *dev, void *opaque)
812 {
813     FailoverDevice *fdev = opaque;
814     PCIDevice *pci_dev = (PCIDevice *)
815         object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
816 
817     if (!pci_dev) {
818         return 0;
819     }
820 
821     if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
822         fdev->dev = dev;
823         return 1;
824     }
825 
826     return 0;
827 }
828 
829 /**
830  * Find the primary device for this failover virtio-net
831  *
832  * @n: VirtIONet device
833  * @errp: returns an error if this function fails
834  */
835 static DeviceState *failover_find_primary_device(VirtIONet *n)
836 {
837     FailoverDevice fdev = {
838         .n = n,
839     };
840 
841     qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
842                        NULL, NULL, &fdev);
843     return fdev.dev;
844 }
845 
846 static void failover_add_primary(VirtIONet *n, Error **errp)
847 {
848     Error *err = NULL;
849     DeviceState *dev = failover_find_primary_device(n);
850 
851     if (dev) {
852         return;
853     }
854 
855     if (!n->primary_opts) {
856         error_setg(errp, "Primary device not found");
857         error_append_hint(errp, "Virtio-net failover will not work. Make "
858                           "sure primary device has parameter"
859                           " failover_pair_id=%s\n", n->netclient_name);
860         return;
861     }
862 
863     dev = qdev_device_add_from_qdict(n->primary_opts,
864                                      n->primary_opts_from_json,
865                                      &err);
866     if (err) {
867         qobject_unref(n->primary_opts);
868         n->primary_opts = NULL;
869     } else {
870         object_unref(OBJECT(dev));
871     }
872     error_propagate(errp, err);
873 }
874 
875 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
876 {
877     VirtIONet *n = VIRTIO_NET(vdev);
878     Error *err = NULL;
879     int i;
880 
881     if (n->mtu_bypass_backend &&
882             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
883         features &= ~(1ULL << VIRTIO_NET_F_MTU);
884     }
885 
886     virtio_net_set_multiqueue(n,
887                               virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
888                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
889 
890     virtio_net_set_mrg_rx_bufs(n,
891                                virtio_has_feature(features,
892                                                   VIRTIO_NET_F_MRG_RXBUF),
893                                virtio_has_feature(features,
894                                                   VIRTIO_F_VERSION_1),
895                                virtio_has_feature(features,
896                                                   VIRTIO_NET_F_HASH_REPORT));
897 
898     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
899         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
900     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
901         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
902     n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
903 
904     if (n->has_vnet_hdr) {
905         n->curr_guest_offloads =
906             virtio_net_guest_offloads_by_features(features);
907         virtio_net_apply_guest_offloads(n);
908     }
909 
910     for (i = 0;  i < n->max_queue_pairs; i++) {
911         NetClientState *nc = qemu_get_subqueue(n->nic, i);
912 
913         if (!get_vhost_net(nc->peer)) {
914             continue;
915         }
916         vhost_net_ack_features(get_vhost_net(nc->peer), features);
917     }
918 
919     if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
920         memset(n->vlans, 0, MAX_VLAN >> 3);
921     } else {
922         memset(n->vlans, 0xff, MAX_VLAN >> 3);
923     }
924 
925     if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
926         qapi_event_send_failover_negotiated(n->netclient_name);
927         qatomic_set(&n->failover_primary_hidden, false);
928         failover_add_primary(n, &err);
929         if (err) {
930             if (!qtest_enabled()) {
931                 warn_report_err(err);
932             } else {
933                 error_free(err);
934             }
935         }
936     }
937 }
938 
939 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
940                                      struct iovec *iov, unsigned int iov_cnt)
941 {
942     uint8_t on;
943     size_t s;
944     NetClientState *nc = qemu_get_queue(n->nic);
945 
946     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
947     if (s != sizeof(on)) {
948         return VIRTIO_NET_ERR;
949     }
950 
951     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
952         n->promisc = on;
953     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
954         n->allmulti = on;
955     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
956         n->alluni = on;
957     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
958         n->nomulti = on;
959     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
960         n->nouni = on;
961     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
962         n->nobcast = on;
963     } else {
964         return VIRTIO_NET_ERR;
965     }
966 
967     rxfilter_notify(nc);
968 
969     return VIRTIO_NET_OK;
970 }
971 
972 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
973                                      struct iovec *iov, unsigned int iov_cnt)
974 {
975     VirtIODevice *vdev = VIRTIO_DEVICE(n);
976     uint64_t offloads;
977     size_t s;
978 
979     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
980         return VIRTIO_NET_ERR;
981     }
982 
983     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
984     if (s != sizeof(offloads)) {
985         return VIRTIO_NET_ERR;
986     }
987 
988     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
989         uint64_t supported_offloads;
990 
991         offloads = virtio_ldq_p(vdev, &offloads);
992 
993         if (!n->has_vnet_hdr) {
994             return VIRTIO_NET_ERR;
995         }
996 
997         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
998             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
999         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1000             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1001         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1002 
1003         supported_offloads = virtio_net_supported_guest_offloads(n);
1004         if (offloads & ~supported_offloads) {
1005             return VIRTIO_NET_ERR;
1006         }
1007 
1008         n->curr_guest_offloads = offloads;
1009         virtio_net_apply_guest_offloads(n);
1010 
1011         return VIRTIO_NET_OK;
1012     } else {
1013         return VIRTIO_NET_ERR;
1014     }
1015 }
1016 
1017 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1018                                  struct iovec *iov, unsigned int iov_cnt)
1019 {
1020     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1021     struct virtio_net_ctrl_mac mac_data;
1022     size_t s;
1023     NetClientState *nc = qemu_get_queue(n->nic);
1024 
1025     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1026         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1027             return VIRTIO_NET_ERR;
1028         }
1029         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1030         assert(s == sizeof(n->mac));
1031         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1032         rxfilter_notify(nc);
1033 
1034         return VIRTIO_NET_OK;
1035     }
1036 
1037     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1038         return VIRTIO_NET_ERR;
1039     }
1040 
1041     int in_use = 0;
1042     int first_multi = 0;
1043     uint8_t uni_overflow = 0;
1044     uint8_t multi_overflow = 0;
1045     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1046 
1047     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1048                    sizeof(mac_data.entries));
1049     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1050     if (s != sizeof(mac_data.entries)) {
1051         goto error;
1052     }
1053     iov_discard_front(&iov, &iov_cnt, s);
1054 
1055     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1056         goto error;
1057     }
1058 
1059     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1060         s = iov_to_buf(iov, iov_cnt, 0, macs,
1061                        mac_data.entries * ETH_ALEN);
1062         if (s != mac_data.entries * ETH_ALEN) {
1063             goto error;
1064         }
1065         in_use += mac_data.entries;
1066     } else {
1067         uni_overflow = 1;
1068     }
1069 
1070     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1071 
1072     first_multi = in_use;
1073 
1074     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1075                    sizeof(mac_data.entries));
1076     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1077     if (s != sizeof(mac_data.entries)) {
1078         goto error;
1079     }
1080 
1081     iov_discard_front(&iov, &iov_cnt, s);
1082 
1083     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1084         goto error;
1085     }
1086 
1087     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1088         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1089                        mac_data.entries * ETH_ALEN);
1090         if (s != mac_data.entries * ETH_ALEN) {
1091             goto error;
1092         }
1093         in_use += mac_data.entries;
1094     } else {
1095         multi_overflow = 1;
1096     }
1097 
1098     n->mac_table.in_use = in_use;
1099     n->mac_table.first_multi = first_multi;
1100     n->mac_table.uni_overflow = uni_overflow;
1101     n->mac_table.multi_overflow = multi_overflow;
1102     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1103     g_free(macs);
1104     rxfilter_notify(nc);
1105 
1106     return VIRTIO_NET_OK;
1107 
1108 error:
1109     g_free(macs);
1110     return VIRTIO_NET_ERR;
1111 }
1112 
1113 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1114                                         struct iovec *iov, unsigned int iov_cnt)
1115 {
1116     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1117     uint16_t vid;
1118     size_t s;
1119     NetClientState *nc = qemu_get_queue(n->nic);
1120 
1121     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1122     vid = virtio_lduw_p(vdev, &vid);
1123     if (s != sizeof(vid)) {
1124         return VIRTIO_NET_ERR;
1125     }
1126 
1127     if (vid >= MAX_VLAN)
1128         return VIRTIO_NET_ERR;
1129 
1130     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1131         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1132     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1133         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1134     else
1135         return VIRTIO_NET_ERR;
1136 
1137     rxfilter_notify(nc);
1138 
1139     return VIRTIO_NET_OK;
1140 }
1141 
1142 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1143                                       struct iovec *iov, unsigned int iov_cnt)
1144 {
1145     trace_virtio_net_handle_announce(n->announce_timer.round);
1146     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1147         n->status & VIRTIO_NET_S_ANNOUNCE) {
1148         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1149         if (n->announce_timer.round) {
1150             qemu_announce_timer_step(&n->announce_timer);
1151         }
1152         return VIRTIO_NET_OK;
1153     } else {
1154         return VIRTIO_NET_ERR;
1155     }
1156 }
1157 
1158 static void virtio_net_detach_epbf_rss(VirtIONet *n);
1159 
1160 static void virtio_net_disable_rss(VirtIONet *n)
1161 {
1162     if (n->rss_data.enabled) {
1163         trace_virtio_net_rss_disable();
1164     }
1165     n->rss_data.enabled = false;
1166 
1167     virtio_net_detach_epbf_rss(n);
1168 }
1169 
1170 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1171 {
1172     NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1173     if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1174         return false;
1175     }
1176 
1177     return nc->info->set_steering_ebpf(nc, prog_fd);
1178 }
1179 
1180 static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1181                                    struct EBPFRSSConfig *config)
1182 {
1183     config->redirect = data->redirect;
1184     config->populate_hash = data->populate_hash;
1185     config->hash_types = data->hash_types;
1186     config->indirections_len = data->indirections_len;
1187     config->default_queue = data->default_queue;
1188 }
1189 
1190 static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1191 {
1192     struct EBPFRSSConfig config = {};
1193 
1194     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1195         return false;
1196     }
1197 
1198     rss_data_to_rss_config(&n->rss_data, &config);
1199 
1200     if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1201                           n->rss_data.indirections_table, n->rss_data.key)) {
1202         return false;
1203     }
1204 
1205     if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1206         return false;
1207     }
1208 
1209     return true;
1210 }
1211 
1212 static void virtio_net_detach_epbf_rss(VirtIONet *n)
1213 {
1214     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1215 }
1216 
1217 static bool virtio_net_load_ebpf(VirtIONet *n)
1218 {
1219     if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1220         /* backend does't support steering ebpf */
1221         return false;
1222     }
1223 
1224     return ebpf_rss_load(&n->ebpf_rss);
1225 }
1226 
1227 static void virtio_net_unload_ebpf(VirtIONet *n)
1228 {
1229     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1230     ebpf_rss_unload(&n->ebpf_rss);
1231 }
1232 
1233 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1234                                       struct iovec *iov,
1235                                       unsigned int iov_cnt,
1236                                       bool do_rss)
1237 {
1238     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1239     struct virtio_net_rss_config cfg;
1240     size_t s, offset = 0, size_get;
1241     uint16_t queue_pairs, i;
1242     struct {
1243         uint16_t us;
1244         uint8_t b;
1245     } QEMU_PACKED temp;
1246     const char *err_msg = "";
1247     uint32_t err_value = 0;
1248 
1249     if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1250         err_msg = "RSS is not negotiated";
1251         goto error;
1252     }
1253     if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1254         err_msg = "Hash report is not negotiated";
1255         goto error;
1256     }
1257     size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1258     s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1259     if (s != size_get) {
1260         err_msg = "Short command buffer";
1261         err_value = (uint32_t)s;
1262         goto error;
1263     }
1264     n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1265     n->rss_data.indirections_len =
1266         virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1267     n->rss_data.indirections_len++;
1268     if (!do_rss) {
1269         n->rss_data.indirections_len = 1;
1270     }
1271     if (!is_power_of_2(n->rss_data.indirections_len)) {
1272         err_msg = "Invalid size of indirection table";
1273         err_value = n->rss_data.indirections_len;
1274         goto error;
1275     }
1276     if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1277         err_msg = "Too large indirection table";
1278         err_value = n->rss_data.indirections_len;
1279         goto error;
1280     }
1281     n->rss_data.default_queue = do_rss ?
1282         virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1283     if (n->rss_data.default_queue >= n->max_queue_pairs) {
1284         err_msg = "Invalid default queue";
1285         err_value = n->rss_data.default_queue;
1286         goto error;
1287     }
1288     offset += size_get;
1289     size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1290     g_free(n->rss_data.indirections_table);
1291     n->rss_data.indirections_table = g_malloc(size_get);
1292     if (!n->rss_data.indirections_table) {
1293         err_msg = "Can't allocate indirections table";
1294         err_value = n->rss_data.indirections_len;
1295         goto error;
1296     }
1297     s = iov_to_buf(iov, iov_cnt, offset,
1298                    n->rss_data.indirections_table, size_get);
1299     if (s != size_get) {
1300         err_msg = "Short indirection table buffer";
1301         err_value = (uint32_t)s;
1302         goto error;
1303     }
1304     for (i = 0; i < n->rss_data.indirections_len; ++i) {
1305         uint16_t val = n->rss_data.indirections_table[i];
1306         n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1307     }
1308     offset += size_get;
1309     size_get = sizeof(temp);
1310     s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1311     if (s != size_get) {
1312         err_msg = "Can't get queue_pairs";
1313         err_value = (uint32_t)s;
1314         goto error;
1315     }
1316     queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1317     if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1318         err_msg = "Invalid number of queue_pairs";
1319         err_value = queue_pairs;
1320         goto error;
1321     }
1322     if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1323         err_msg = "Invalid key size";
1324         err_value = temp.b;
1325         goto error;
1326     }
1327     if (!temp.b && n->rss_data.hash_types) {
1328         err_msg = "No key provided";
1329         err_value = 0;
1330         goto error;
1331     }
1332     if (!temp.b && !n->rss_data.hash_types) {
1333         virtio_net_disable_rss(n);
1334         return queue_pairs;
1335     }
1336     offset += size_get;
1337     size_get = temp.b;
1338     s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1339     if (s != size_get) {
1340         err_msg = "Can get key buffer";
1341         err_value = (uint32_t)s;
1342         goto error;
1343     }
1344     n->rss_data.enabled = true;
1345 
1346     if (!n->rss_data.populate_hash) {
1347         if (!virtio_net_attach_epbf_rss(n)) {
1348             /* EBPF must be loaded for vhost */
1349             if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1350                 warn_report("Can't load eBPF RSS for vhost");
1351                 goto error;
1352             }
1353             /* fallback to software RSS */
1354             warn_report("Can't load eBPF RSS - fallback to software RSS");
1355             n->rss_data.enabled_software_rss = true;
1356         }
1357     } else {
1358         /* use software RSS for hash populating */
1359         /* and detach eBPF if was loaded before */
1360         virtio_net_detach_epbf_rss(n);
1361         n->rss_data.enabled_software_rss = true;
1362     }
1363 
1364     trace_virtio_net_rss_enable(n->rss_data.hash_types,
1365                                 n->rss_data.indirections_len,
1366                                 temp.b);
1367     return queue_pairs;
1368 error:
1369     trace_virtio_net_rss_error(err_msg, err_value);
1370     virtio_net_disable_rss(n);
1371     return 0;
1372 }
1373 
1374 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1375                                 struct iovec *iov, unsigned int iov_cnt)
1376 {
1377     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1378     uint16_t queue_pairs;
1379 
1380     virtio_net_disable_rss(n);
1381     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1382         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1383         return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1384     }
1385     if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1386         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1387     } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1388         struct virtio_net_ctrl_mq mq;
1389         size_t s;
1390         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1391             return VIRTIO_NET_ERR;
1392         }
1393         s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1394         if (s != sizeof(mq)) {
1395             return VIRTIO_NET_ERR;
1396         }
1397         queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1398 
1399     } else {
1400         return VIRTIO_NET_ERR;
1401     }
1402 
1403     if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1404         queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1405         queue_pairs > n->max_queue_pairs ||
1406         !n->multiqueue) {
1407         return VIRTIO_NET_ERR;
1408     }
1409 
1410     n->curr_queue_pairs = queue_pairs;
1411     /* stop the backend before changing the number of queue_pairs to avoid handling a
1412      * disabled queue */
1413     virtio_net_set_status(vdev, vdev->status);
1414     virtio_net_set_queue_pairs(n);
1415 
1416     return VIRTIO_NET_OK;
1417 }
1418 
1419 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1420 {
1421     VirtIONet *n = VIRTIO_NET(vdev);
1422     struct virtio_net_ctrl_hdr ctrl;
1423     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1424     VirtQueueElement *elem;
1425     size_t s;
1426     struct iovec *iov, *iov2;
1427     unsigned int iov_cnt;
1428 
1429     for (;;) {
1430         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1431         if (!elem) {
1432             break;
1433         }
1434         if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1435             iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1436             virtio_error(vdev, "virtio-net ctrl missing headers");
1437             virtqueue_detach_element(vq, elem, 0);
1438             g_free(elem);
1439             break;
1440         }
1441 
1442         iov_cnt = elem->out_num;
1443         iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1444         s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1445         iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1446         if (s != sizeof(ctrl)) {
1447             status = VIRTIO_NET_ERR;
1448         } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1449             status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1450         } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1451             status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1452         } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1453             status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1454         } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1455             status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1456         } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1457             status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1458         } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1459             status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1460         }
1461 
1462         s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1463         assert(s == sizeof(status));
1464 
1465         virtqueue_push(vq, elem, sizeof(status));
1466         virtio_notify(vdev, vq);
1467         g_free(iov2);
1468         g_free(elem);
1469     }
1470 }
1471 
1472 /* RX */
1473 
1474 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1475 {
1476     VirtIONet *n = VIRTIO_NET(vdev);
1477     int queue_index = vq2q(virtio_get_queue_index(vq));
1478 
1479     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1480 }
1481 
1482 static bool virtio_net_can_receive(NetClientState *nc)
1483 {
1484     VirtIONet *n = qemu_get_nic_opaque(nc);
1485     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1486     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1487 
1488     if (!vdev->vm_running) {
1489         return false;
1490     }
1491 
1492     if (nc->queue_index >= n->curr_queue_pairs) {
1493         return false;
1494     }
1495 
1496     if (!virtio_queue_ready(q->rx_vq) ||
1497         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1498         return false;
1499     }
1500 
1501     return true;
1502 }
1503 
1504 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1505 {
1506     VirtIONet *n = q->n;
1507     if (virtio_queue_empty(q->rx_vq) ||
1508         (n->mergeable_rx_bufs &&
1509          !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1510         virtio_queue_set_notification(q->rx_vq, 1);
1511 
1512         /* To avoid a race condition where the guest has made some buffers
1513          * available after the above check but before notification was
1514          * enabled, check for available buffers again.
1515          */
1516         if (virtio_queue_empty(q->rx_vq) ||
1517             (n->mergeable_rx_bufs &&
1518              !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1519             return 0;
1520         }
1521     }
1522 
1523     virtio_queue_set_notification(q->rx_vq, 0);
1524     return 1;
1525 }
1526 
1527 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1528 {
1529     virtio_tswap16s(vdev, &hdr->hdr_len);
1530     virtio_tswap16s(vdev, &hdr->gso_size);
1531     virtio_tswap16s(vdev, &hdr->csum_start);
1532     virtio_tswap16s(vdev, &hdr->csum_offset);
1533 }
1534 
1535 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1536  * it never finds out that the packets don't have valid checksums.  This
1537  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1538  * fix this with Xen but it hasn't appeared in an upstream release of
1539  * dhclient yet.
1540  *
1541  * To avoid breaking existing guests, we catch udp packets and add
1542  * checksums.  This is terrible but it's better than hacking the guest
1543  * kernels.
1544  *
1545  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1546  * we should provide a mechanism to disable it to avoid polluting the host
1547  * cache.
1548  */
1549 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1550                                         uint8_t *buf, size_t size)
1551 {
1552     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1553         (size > 27 && size < 1500) && /* normal sized MTU */
1554         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1555         (buf[23] == 17) && /* ip.protocol == UDP */
1556         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1557         net_checksum_calculate(buf, size, CSUM_UDP);
1558         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1559     }
1560 }
1561 
1562 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1563                            const void *buf, size_t size)
1564 {
1565     if (n->has_vnet_hdr) {
1566         /* FIXME this cast is evil */
1567         void *wbuf = (void *)buf;
1568         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1569                                     size - n->host_hdr_len);
1570 
1571         if (n->needs_vnet_hdr_swap) {
1572             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1573         }
1574         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1575     } else {
1576         struct virtio_net_hdr hdr = {
1577             .flags = 0,
1578             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1579         };
1580         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1581     }
1582 }
1583 
1584 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1585 {
1586     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1587     static const uint8_t vlan[] = {0x81, 0x00};
1588     uint8_t *ptr = (uint8_t *)buf;
1589     int i;
1590 
1591     if (n->promisc)
1592         return 1;
1593 
1594     ptr += n->host_hdr_len;
1595 
1596     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1597         int vid = lduw_be_p(ptr + 14) & 0xfff;
1598         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1599             return 0;
1600     }
1601 
1602     if (ptr[0] & 1) { // multicast
1603         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1604             return !n->nobcast;
1605         } else if (n->nomulti) {
1606             return 0;
1607         } else if (n->allmulti || n->mac_table.multi_overflow) {
1608             return 1;
1609         }
1610 
1611         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1612             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1613                 return 1;
1614             }
1615         }
1616     } else { // unicast
1617         if (n->nouni) {
1618             return 0;
1619         } else if (n->alluni || n->mac_table.uni_overflow) {
1620             return 1;
1621         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1622             return 1;
1623         }
1624 
1625         for (i = 0; i < n->mac_table.first_multi; i++) {
1626             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1627                 return 1;
1628             }
1629         }
1630     }
1631 
1632     return 0;
1633 }
1634 
1635 static uint8_t virtio_net_get_hash_type(bool isip4,
1636                                         bool isip6,
1637                                         bool isudp,
1638                                         bool istcp,
1639                                         uint32_t types)
1640 {
1641     if (isip4) {
1642         if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1643             return NetPktRssIpV4Tcp;
1644         }
1645         if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1646             return NetPktRssIpV4Udp;
1647         }
1648         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1649             return NetPktRssIpV4;
1650         }
1651     } else if (isip6) {
1652         uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1653                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1654 
1655         if (istcp && (types & mask)) {
1656             return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1657                 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1658         }
1659         mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1660         if (isudp && (types & mask)) {
1661             return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1662                 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1663         }
1664         mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1665         if (types & mask) {
1666             return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1667                 NetPktRssIpV6Ex : NetPktRssIpV6;
1668         }
1669     }
1670     return 0xff;
1671 }
1672 
1673 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1674                                    uint32_t hash)
1675 {
1676     struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1677     hdr->hash_value = hash;
1678     hdr->hash_report = report;
1679 }
1680 
1681 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1682                                   size_t size)
1683 {
1684     VirtIONet *n = qemu_get_nic_opaque(nc);
1685     unsigned int index = nc->queue_index, new_index = index;
1686     struct NetRxPkt *pkt = n->rx_pkt;
1687     uint8_t net_hash_type;
1688     uint32_t hash;
1689     bool isip4, isip6, isudp, istcp;
1690     static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1691         VIRTIO_NET_HASH_REPORT_IPv4,
1692         VIRTIO_NET_HASH_REPORT_TCPv4,
1693         VIRTIO_NET_HASH_REPORT_TCPv6,
1694         VIRTIO_NET_HASH_REPORT_IPv6,
1695         VIRTIO_NET_HASH_REPORT_IPv6_EX,
1696         VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1697         VIRTIO_NET_HASH_REPORT_UDPv4,
1698         VIRTIO_NET_HASH_REPORT_UDPv6,
1699         VIRTIO_NET_HASH_REPORT_UDPv6_EX
1700     };
1701 
1702     net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1703                              size - n->host_hdr_len);
1704     net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1705     if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1706         istcp = isudp = false;
1707     }
1708     if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1709         istcp = isudp = false;
1710     }
1711     net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1712                                              n->rss_data.hash_types);
1713     if (net_hash_type > NetPktRssIpV6UdpEx) {
1714         if (n->rss_data.populate_hash) {
1715             virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1716         }
1717         return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1718     }
1719 
1720     hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1721 
1722     if (n->rss_data.populate_hash) {
1723         virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1724     }
1725 
1726     if (n->rss_data.redirect) {
1727         new_index = hash & (n->rss_data.indirections_len - 1);
1728         new_index = n->rss_data.indirections_table[new_index];
1729     }
1730 
1731     return (index == new_index) ? -1 : new_index;
1732 }
1733 
1734 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1735                                       size_t size, bool no_rss)
1736 {
1737     VirtIONet *n = qemu_get_nic_opaque(nc);
1738     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1739     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1740     VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1741     size_t lens[VIRTQUEUE_MAX_SIZE];
1742     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1743     struct virtio_net_hdr_mrg_rxbuf mhdr;
1744     unsigned mhdr_cnt = 0;
1745     size_t offset, i, guest_offset, j;
1746     ssize_t err;
1747 
1748     if (!virtio_net_can_receive(nc)) {
1749         return -1;
1750     }
1751 
1752     if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1753         int index = virtio_net_process_rss(nc, buf, size);
1754         if (index >= 0) {
1755             NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1756             return virtio_net_receive_rcu(nc2, buf, size, true);
1757         }
1758     }
1759 
1760     /* hdr_len refers to the header we supply to the guest */
1761     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1762         return 0;
1763     }
1764 
1765     if (!receive_filter(n, buf, size))
1766         return size;
1767 
1768     offset = i = 0;
1769 
1770     while (offset < size) {
1771         VirtQueueElement *elem;
1772         int len, total;
1773         const struct iovec *sg;
1774 
1775         total = 0;
1776 
1777         if (i == VIRTQUEUE_MAX_SIZE) {
1778             virtio_error(vdev, "virtio-net unexpected long buffer chain");
1779             err = size;
1780             goto err;
1781         }
1782 
1783         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1784         if (!elem) {
1785             if (i) {
1786                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1787                              "i %zd mergeable %d offset %zd, size %zd, "
1788                              "guest hdr len %zd, host hdr len %zd "
1789                              "guest features 0x%" PRIx64,
1790                              i, n->mergeable_rx_bufs, offset, size,
1791                              n->guest_hdr_len, n->host_hdr_len,
1792                              vdev->guest_features);
1793             }
1794             err = -1;
1795             goto err;
1796         }
1797 
1798         if (elem->in_num < 1) {
1799             virtio_error(vdev,
1800                          "virtio-net receive queue contains no in buffers");
1801             virtqueue_detach_element(q->rx_vq, elem, 0);
1802             g_free(elem);
1803             err = -1;
1804             goto err;
1805         }
1806 
1807         sg = elem->in_sg;
1808         if (i == 0) {
1809             assert(offset == 0);
1810             if (n->mergeable_rx_bufs) {
1811                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1812                                     sg, elem->in_num,
1813                                     offsetof(typeof(mhdr), num_buffers),
1814                                     sizeof(mhdr.num_buffers));
1815             }
1816 
1817             receive_header(n, sg, elem->in_num, buf, size);
1818             if (n->rss_data.populate_hash) {
1819                 offset = sizeof(mhdr);
1820                 iov_from_buf(sg, elem->in_num, offset,
1821                              buf + offset, n->host_hdr_len - sizeof(mhdr));
1822             }
1823             offset = n->host_hdr_len;
1824             total += n->guest_hdr_len;
1825             guest_offset = n->guest_hdr_len;
1826         } else {
1827             guest_offset = 0;
1828         }
1829 
1830         /* copy in packet.  ugh */
1831         len = iov_from_buf(sg, elem->in_num, guest_offset,
1832                            buf + offset, size - offset);
1833         total += len;
1834         offset += len;
1835         /* If buffers can't be merged, at this point we
1836          * must have consumed the complete packet.
1837          * Otherwise, drop it. */
1838         if (!n->mergeable_rx_bufs && offset < size) {
1839             virtqueue_unpop(q->rx_vq, elem, total);
1840             g_free(elem);
1841             err = size;
1842             goto err;
1843         }
1844 
1845         elems[i] = elem;
1846         lens[i] = total;
1847         i++;
1848     }
1849 
1850     if (mhdr_cnt) {
1851         virtio_stw_p(vdev, &mhdr.num_buffers, i);
1852         iov_from_buf(mhdr_sg, mhdr_cnt,
1853                      0,
1854                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
1855     }
1856 
1857     for (j = 0; j < i; j++) {
1858         /* signal other side */
1859         virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
1860         g_free(elems[j]);
1861     }
1862 
1863     virtqueue_flush(q->rx_vq, i);
1864     virtio_notify(vdev, q->rx_vq);
1865 
1866     return size;
1867 
1868 err:
1869     for (j = 0; j < i; j++) {
1870         g_free(elems[j]);
1871     }
1872 
1873     return err;
1874 }
1875 
1876 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1877                                   size_t size)
1878 {
1879     RCU_READ_LOCK_GUARD();
1880 
1881     return virtio_net_receive_rcu(nc, buf, size, false);
1882 }
1883 
1884 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1885                                          const uint8_t *buf,
1886                                          VirtioNetRscUnit *unit)
1887 {
1888     uint16_t ip_hdrlen;
1889     struct ip_header *ip;
1890 
1891     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1892                               + sizeof(struct eth_header));
1893     unit->ip = (void *)ip;
1894     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1895     unit->ip_plen = &ip->ip_len;
1896     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1897     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1898     unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1899 }
1900 
1901 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1902                                          const uint8_t *buf,
1903                                          VirtioNetRscUnit *unit)
1904 {
1905     struct ip6_header *ip6;
1906 
1907     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1908                                  + sizeof(struct eth_header));
1909     unit->ip = ip6;
1910     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1911     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1912                                         + sizeof(struct ip6_header));
1913     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1914 
1915     /* There is a difference between payload lenght in ipv4 and v6,
1916        ip header is excluded in ipv6 */
1917     unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1918 }
1919 
1920 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1921                                        VirtioNetRscSeg *seg)
1922 {
1923     int ret;
1924     struct virtio_net_hdr_v1 *h;
1925 
1926     h = (struct virtio_net_hdr_v1 *)seg->buf;
1927     h->flags = 0;
1928     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1929 
1930     if (seg->is_coalesced) {
1931         h->rsc.segments = seg->packets;
1932         h->rsc.dup_acks = seg->dup_ack;
1933         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1934         if (chain->proto == ETH_P_IP) {
1935             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1936         } else {
1937             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1938         }
1939     }
1940 
1941     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1942     QTAILQ_REMOVE(&chain->buffers, seg, next);
1943     g_free(seg->buf);
1944     g_free(seg);
1945 
1946     return ret;
1947 }
1948 
1949 static void virtio_net_rsc_purge(void *opq)
1950 {
1951     VirtioNetRscSeg *seg, *rn;
1952     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1953 
1954     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1955         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1956             chain->stat.purge_failed++;
1957             continue;
1958         }
1959     }
1960 
1961     chain->stat.timer++;
1962     if (!QTAILQ_EMPTY(&chain->buffers)) {
1963         timer_mod(chain->drain_timer,
1964               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1965     }
1966 }
1967 
1968 static void virtio_net_rsc_cleanup(VirtIONet *n)
1969 {
1970     VirtioNetRscChain *chain, *rn_chain;
1971     VirtioNetRscSeg *seg, *rn_seg;
1972 
1973     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1974         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1975             QTAILQ_REMOVE(&chain->buffers, seg, next);
1976             g_free(seg->buf);
1977             g_free(seg);
1978         }
1979 
1980         timer_free(chain->drain_timer);
1981         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1982         g_free(chain);
1983     }
1984 }
1985 
1986 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1987                                      NetClientState *nc,
1988                                      const uint8_t *buf, size_t size)
1989 {
1990     uint16_t hdr_len;
1991     VirtioNetRscSeg *seg;
1992 
1993     hdr_len = chain->n->guest_hdr_len;
1994     seg = g_malloc(sizeof(VirtioNetRscSeg));
1995     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1996         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1997     memcpy(seg->buf, buf, size);
1998     seg->size = size;
1999     seg->packets = 1;
2000     seg->dup_ack = 0;
2001     seg->is_coalesced = 0;
2002     seg->nc = nc;
2003 
2004     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2005     chain->stat.cache++;
2006 
2007     switch (chain->proto) {
2008     case ETH_P_IP:
2009         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2010         break;
2011     case ETH_P_IPV6:
2012         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2013         break;
2014     default:
2015         g_assert_not_reached();
2016     }
2017 }
2018 
2019 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2020                                          VirtioNetRscSeg *seg,
2021                                          const uint8_t *buf,
2022                                          struct tcp_header *n_tcp,
2023                                          struct tcp_header *o_tcp)
2024 {
2025     uint32_t nack, oack;
2026     uint16_t nwin, owin;
2027 
2028     nack = htonl(n_tcp->th_ack);
2029     nwin = htons(n_tcp->th_win);
2030     oack = htonl(o_tcp->th_ack);
2031     owin = htons(o_tcp->th_win);
2032 
2033     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2034         chain->stat.ack_out_of_win++;
2035         return RSC_FINAL;
2036     } else if (nack == oack) {
2037         /* duplicated ack or window probe */
2038         if (nwin == owin) {
2039             /* duplicated ack, add dup ack count due to whql test up to 1 */
2040             chain->stat.dup_ack++;
2041             return RSC_FINAL;
2042         } else {
2043             /* Coalesce window update */
2044             o_tcp->th_win = n_tcp->th_win;
2045             chain->stat.win_update++;
2046             return RSC_COALESCE;
2047         }
2048     } else {
2049         /* pure ack, go to 'C', finalize*/
2050         chain->stat.pure_ack++;
2051         return RSC_FINAL;
2052     }
2053 }
2054 
2055 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2056                                             VirtioNetRscSeg *seg,
2057                                             const uint8_t *buf,
2058                                             VirtioNetRscUnit *n_unit)
2059 {
2060     void *data;
2061     uint16_t o_ip_len;
2062     uint32_t nseq, oseq;
2063     VirtioNetRscUnit *o_unit;
2064 
2065     o_unit = &seg->unit;
2066     o_ip_len = htons(*o_unit->ip_plen);
2067     nseq = htonl(n_unit->tcp->th_seq);
2068     oseq = htonl(o_unit->tcp->th_seq);
2069 
2070     /* out of order or retransmitted. */
2071     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2072         chain->stat.data_out_of_win++;
2073         return RSC_FINAL;
2074     }
2075 
2076     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2077     if (nseq == oseq) {
2078         if ((o_unit->payload == 0) && n_unit->payload) {
2079             /* From no payload to payload, normal case, not a dup ack or etc */
2080             chain->stat.data_after_pure_ack++;
2081             goto coalesce;
2082         } else {
2083             return virtio_net_rsc_handle_ack(chain, seg, buf,
2084                                              n_unit->tcp, o_unit->tcp);
2085         }
2086     } else if ((nseq - oseq) != o_unit->payload) {
2087         /* Not a consistent packet, out of order */
2088         chain->stat.data_out_of_order++;
2089         return RSC_FINAL;
2090     } else {
2091 coalesce:
2092         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2093             chain->stat.over_size++;
2094             return RSC_FINAL;
2095         }
2096 
2097         /* Here comes the right data, the payload length in v4/v6 is different,
2098            so use the field value to update and record the new data len */
2099         o_unit->payload += n_unit->payload; /* update new data len */
2100 
2101         /* update field in ip header */
2102         *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2103 
2104         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2105            for windows guest, while this may change the behavior for linux
2106            guest (only if it uses RSC feature). */
2107         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2108 
2109         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2110         o_unit->tcp->th_win = n_unit->tcp->th_win;
2111 
2112         memmove(seg->buf + seg->size, data, n_unit->payload);
2113         seg->size += n_unit->payload;
2114         seg->packets++;
2115         chain->stat.coalesced++;
2116         return RSC_COALESCE;
2117     }
2118 }
2119 
2120 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2121                                         VirtioNetRscSeg *seg,
2122                                         const uint8_t *buf, size_t size,
2123                                         VirtioNetRscUnit *unit)
2124 {
2125     struct ip_header *ip1, *ip2;
2126 
2127     ip1 = (struct ip_header *)(unit->ip);
2128     ip2 = (struct ip_header *)(seg->unit.ip);
2129     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2130         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2131         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2132         chain->stat.no_match++;
2133         return RSC_NO_MATCH;
2134     }
2135 
2136     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2137 }
2138 
2139 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2140                                         VirtioNetRscSeg *seg,
2141                                         const uint8_t *buf, size_t size,
2142                                         VirtioNetRscUnit *unit)
2143 {
2144     struct ip6_header *ip1, *ip2;
2145 
2146     ip1 = (struct ip6_header *)(unit->ip);
2147     ip2 = (struct ip6_header *)(seg->unit.ip);
2148     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2149         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2150         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2151         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2152             chain->stat.no_match++;
2153             return RSC_NO_MATCH;
2154     }
2155 
2156     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2157 }
2158 
2159 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2160  * to prevent out of order */
2161 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2162                                          struct tcp_header *tcp)
2163 {
2164     uint16_t tcp_hdr;
2165     uint16_t tcp_flag;
2166 
2167     tcp_flag = htons(tcp->th_offset_flags);
2168     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2169     tcp_flag &= VIRTIO_NET_TCP_FLAG;
2170     if (tcp_flag & TH_SYN) {
2171         chain->stat.tcp_syn++;
2172         return RSC_BYPASS;
2173     }
2174 
2175     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2176         chain->stat.tcp_ctrl_drain++;
2177         return RSC_FINAL;
2178     }
2179 
2180     if (tcp_hdr > sizeof(struct tcp_header)) {
2181         chain->stat.tcp_all_opt++;
2182         return RSC_FINAL;
2183     }
2184 
2185     return RSC_CANDIDATE;
2186 }
2187 
2188 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2189                                          NetClientState *nc,
2190                                          const uint8_t *buf, size_t size,
2191                                          VirtioNetRscUnit *unit)
2192 {
2193     int ret;
2194     VirtioNetRscSeg *seg, *nseg;
2195 
2196     if (QTAILQ_EMPTY(&chain->buffers)) {
2197         chain->stat.empty_cache++;
2198         virtio_net_rsc_cache_buf(chain, nc, buf, size);
2199         timer_mod(chain->drain_timer,
2200               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2201         return size;
2202     }
2203 
2204     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2205         if (chain->proto == ETH_P_IP) {
2206             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2207         } else {
2208             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2209         }
2210 
2211         if (ret == RSC_FINAL) {
2212             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2213                 /* Send failed */
2214                 chain->stat.final_failed++;
2215                 return 0;
2216             }
2217 
2218             /* Send current packet */
2219             return virtio_net_do_receive(nc, buf, size);
2220         } else if (ret == RSC_NO_MATCH) {
2221             continue;
2222         } else {
2223             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2224             seg->is_coalesced = 1;
2225             return size;
2226         }
2227     }
2228 
2229     chain->stat.no_match_cache++;
2230     virtio_net_rsc_cache_buf(chain, nc, buf, size);
2231     return size;
2232 }
2233 
2234 /* Drain a connection data, this is to avoid out of order segments */
2235 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2236                                         NetClientState *nc,
2237                                         const uint8_t *buf, size_t size,
2238                                         uint16_t ip_start, uint16_t ip_size,
2239                                         uint16_t tcp_port)
2240 {
2241     VirtioNetRscSeg *seg, *nseg;
2242     uint32_t ppair1, ppair2;
2243 
2244     ppair1 = *(uint32_t *)(buf + tcp_port);
2245     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2246         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2247         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2248             || (ppair1 != ppair2)) {
2249             continue;
2250         }
2251         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2252             chain->stat.drain_failed++;
2253         }
2254 
2255         break;
2256     }
2257 
2258     return virtio_net_do_receive(nc, buf, size);
2259 }
2260 
2261 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2262                                             struct ip_header *ip,
2263                                             const uint8_t *buf, size_t size)
2264 {
2265     uint16_t ip_len;
2266 
2267     /* Not an ipv4 packet */
2268     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2269         chain->stat.ip_option++;
2270         return RSC_BYPASS;
2271     }
2272 
2273     /* Don't handle packets with ip option */
2274     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2275         chain->stat.ip_option++;
2276         return RSC_BYPASS;
2277     }
2278 
2279     if (ip->ip_p != IPPROTO_TCP) {
2280         chain->stat.bypass_not_tcp++;
2281         return RSC_BYPASS;
2282     }
2283 
2284     /* Don't handle packets with ip fragment */
2285     if (!(htons(ip->ip_off) & IP_DF)) {
2286         chain->stat.ip_frag++;
2287         return RSC_BYPASS;
2288     }
2289 
2290     /* Don't handle packets with ecn flag */
2291     if (IPTOS_ECN(ip->ip_tos)) {
2292         chain->stat.ip_ecn++;
2293         return RSC_BYPASS;
2294     }
2295 
2296     ip_len = htons(ip->ip_len);
2297     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2298         || ip_len > (size - chain->n->guest_hdr_len -
2299                      sizeof(struct eth_header))) {
2300         chain->stat.ip_hacked++;
2301         return RSC_BYPASS;
2302     }
2303 
2304     return RSC_CANDIDATE;
2305 }
2306 
2307 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2308                                       NetClientState *nc,
2309                                       const uint8_t *buf, size_t size)
2310 {
2311     int32_t ret;
2312     uint16_t hdr_len;
2313     VirtioNetRscUnit unit;
2314 
2315     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2316 
2317     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2318         + sizeof(struct tcp_header))) {
2319         chain->stat.bypass_not_tcp++;
2320         return virtio_net_do_receive(nc, buf, size);
2321     }
2322 
2323     virtio_net_rsc_extract_unit4(chain, buf, &unit);
2324     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2325         != RSC_CANDIDATE) {
2326         return virtio_net_do_receive(nc, buf, size);
2327     }
2328 
2329     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2330     if (ret == RSC_BYPASS) {
2331         return virtio_net_do_receive(nc, buf, size);
2332     } else if (ret == RSC_FINAL) {
2333         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2334                 ((hdr_len + sizeof(struct eth_header)) + 12),
2335                 VIRTIO_NET_IP4_ADDR_SIZE,
2336                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2337     }
2338 
2339     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2340 }
2341 
2342 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2343                                             struct ip6_header *ip6,
2344                                             const uint8_t *buf, size_t size)
2345 {
2346     uint16_t ip_len;
2347 
2348     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2349         != IP_HEADER_VERSION_6) {
2350         return RSC_BYPASS;
2351     }
2352 
2353     /* Both option and protocol is checked in this */
2354     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2355         chain->stat.bypass_not_tcp++;
2356         return RSC_BYPASS;
2357     }
2358 
2359     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2360     if (ip_len < sizeof(struct tcp_header) ||
2361         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2362                   - sizeof(struct ip6_header))) {
2363         chain->stat.ip_hacked++;
2364         return RSC_BYPASS;
2365     }
2366 
2367     /* Don't handle packets with ecn flag */
2368     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2369         chain->stat.ip_ecn++;
2370         return RSC_BYPASS;
2371     }
2372 
2373     return RSC_CANDIDATE;
2374 }
2375 
2376 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2377                                       const uint8_t *buf, size_t size)
2378 {
2379     int32_t ret;
2380     uint16_t hdr_len;
2381     VirtioNetRscChain *chain;
2382     VirtioNetRscUnit unit;
2383 
2384     chain = (VirtioNetRscChain *)opq;
2385     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2386 
2387     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2388         + sizeof(tcp_header))) {
2389         return virtio_net_do_receive(nc, buf, size);
2390     }
2391 
2392     virtio_net_rsc_extract_unit6(chain, buf, &unit);
2393     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2394                                                  unit.ip, buf, size)) {
2395         return virtio_net_do_receive(nc, buf, size);
2396     }
2397 
2398     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2399     if (ret == RSC_BYPASS) {
2400         return virtio_net_do_receive(nc, buf, size);
2401     } else if (ret == RSC_FINAL) {
2402         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2403                 ((hdr_len + sizeof(struct eth_header)) + 8),
2404                 VIRTIO_NET_IP6_ADDR_SIZE,
2405                 hdr_len + sizeof(struct eth_header)
2406                 + sizeof(struct ip6_header));
2407     }
2408 
2409     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2410 }
2411 
2412 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2413                                                       NetClientState *nc,
2414                                                       uint16_t proto)
2415 {
2416     VirtioNetRscChain *chain;
2417 
2418     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2419         return NULL;
2420     }
2421 
2422     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2423         if (chain->proto == proto) {
2424             return chain;
2425         }
2426     }
2427 
2428     chain = g_malloc(sizeof(*chain));
2429     chain->n = n;
2430     chain->proto = proto;
2431     if (proto == (uint16_t)ETH_P_IP) {
2432         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2433         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2434     } else {
2435         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2436         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2437     }
2438     chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2439                                       virtio_net_rsc_purge, chain);
2440     memset(&chain->stat, 0, sizeof(chain->stat));
2441 
2442     QTAILQ_INIT(&chain->buffers);
2443     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2444 
2445     return chain;
2446 }
2447 
2448 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2449                                       const uint8_t *buf,
2450                                       size_t size)
2451 {
2452     uint16_t proto;
2453     VirtioNetRscChain *chain;
2454     struct eth_header *eth;
2455     VirtIONet *n;
2456 
2457     n = qemu_get_nic_opaque(nc);
2458     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2459         return virtio_net_do_receive(nc, buf, size);
2460     }
2461 
2462     eth = (struct eth_header *)(buf + n->guest_hdr_len);
2463     proto = htons(eth->h_proto);
2464 
2465     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2466     if (chain) {
2467         chain->stat.received++;
2468         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2469             return virtio_net_rsc_receive4(chain, nc, buf, size);
2470         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2471             return virtio_net_rsc_receive6(chain, nc, buf, size);
2472         }
2473     }
2474     return virtio_net_do_receive(nc, buf, size);
2475 }
2476 
2477 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2478                                   size_t size)
2479 {
2480     VirtIONet *n = qemu_get_nic_opaque(nc);
2481     if ((n->rsc4_enabled || n->rsc6_enabled)) {
2482         return virtio_net_rsc_receive(nc, buf, size);
2483     } else {
2484         return virtio_net_do_receive(nc, buf, size);
2485     }
2486 }
2487 
2488 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2489 
2490 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2491 {
2492     VirtIONet *n = qemu_get_nic_opaque(nc);
2493     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2494     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2495 
2496     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2497     virtio_notify(vdev, q->tx_vq);
2498 
2499     g_free(q->async_tx.elem);
2500     q->async_tx.elem = NULL;
2501 
2502     virtio_queue_set_notification(q->tx_vq, 1);
2503     virtio_net_flush_tx(q);
2504 }
2505 
2506 /* TX */
2507 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2508 {
2509     VirtIONet *n = q->n;
2510     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2511     VirtQueueElement *elem;
2512     int32_t num_packets = 0;
2513     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2514     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2515         return num_packets;
2516     }
2517 
2518     if (q->async_tx.elem) {
2519         virtio_queue_set_notification(q->tx_vq, 0);
2520         return num_packets;
2521     }
2522 
2523     for (;;) {
2524         ssize_t ret;
2525         unsigned int out_num;
2526         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2527         struct virtio_net_hdr_mrg_rxbuf mhdr;
2528 
2529         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2530         if (!elem) {
2531             break;
2532         }
2533 
2534         out_num = elem->out_num;
2535         out_sg = elem->out_sg;
2536         if (out_num < 1) {
2537             virtio_error(vdev, "virtio-net header not in first element");
2538             virtqueue_detach_element(q->tx_vq, elem, 0);
2539             g_free(elem);
2540             return -EINVAL;
2541         }
2542 
2543         if (n->has_vnet_hdr) {
2544             if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2545                 n->guest_hdr_len) {
2546                 virtio_error(vdev, "virtio-net header incorrect");
2547                 virtqueue_detach_element(q->tx_vq, elem, 0);
2548                 g_free(elem);
2549                 return -EINVAL;
2550             }
2551             if (n->needs_vnet_hdr_swap) {
2552                 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2553                 sg2[0].iov_base = &mhdr;
2554                 sg2[0].iov_len = n->guest_hdr_len;
2555                 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2556                                    out_sg, out_num,
2557                                    n->guest_hdr_len, -1);
2558                 if (out_num == VIRTQUEUE_MAX_SIZE) {
2559                     goto drop;
2560                 }
2561                 out_num += 1;
2562                 out_sg = sg2;
2563             }
2564         }
2565         /*
2566          * If host wants to see the guest header as is, we can
2567          * pass it on unchanged. Otherwise, copy just the parts
2568          * that host is interested in.
2569          */
2570         assert(n->host_hdr_len <= n->guest_hdr_len);
2571         if (n->host_hdr_len != n->guest_hdr_len) {
2572             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2573                                        out_sg, out_num,
2574                                        0, n->host_hdr_len);
2575             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2576                              out_sg, out_num,
2577                              n->guest_hdr_len, -1);
2578             out_num = sg_num;
2579             out_sg = sg;
2580         }
2581 
2582         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2583                                       out_sg, out_num, virtio_net_tx_complete);
2584         if (ret == 0) {
2585             virtio_queue_set_notification(q->tx_vq, 0);
2586             q->async_tx.elem = elem;
2587             return -EBUSY;
2588         }
2589 
2590 drop:
2591         virtqueue_push(q->tx_vq, elem, 0);
2592         virtio_notify(vdev, q->tx_vq);
2593         g_free(elem);
2594 
2595         if (++num_packets >= n->tx_burst) {
2596             break;
2597         }
2598     }
2599     return num_packets;
2600 }
2601 
2602 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2603 {
2604     VirtIONet *n = VIRTIO_NET(vdev);
2605     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2606 
2607     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2608         virtio_net_drop_tx_queue_data(vdev, vq);
2609         return;
2610     }
2611 
2612     /* This happens when device was stopped but VCPU wasn't. */
2613     if (!vdev->vm_running) {
2614         q->tx_waiting = 1;
2615         return;
2616     }
2617 
2618     if (q->tx_waiting) {
2619         virtio_queue_set_notification(vq, 1);
2620         timer_del(q->tx_timer);
2621         q->tx_waiting = 0;
2622         if (virtio_net_flush_tx(q) == -EINVAL) {
2623             return;
2624         }
2625     } else {
2626         timer_mod(q->tx_timer,
2627                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2628         q->tx_waiting = 1;
2629         virtio_queue_set_notification(vq, 0);
2630     }
2631 }
2632 
2633 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2634 {
2635     VirtIONet *n = VIRTIO_NET(vdev);
2636     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2637 
2638     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2639         virtio_net_drop_tx_queue_data(vdev, vq);
2640         return;
2641     }
2642 
2643     if (unlikely(q->tx_waiting)) {
2644         return;
2645     }
2646     q->tx_waiting = 1;
2647     /* This happens when device was stopped but VCPU wasn't. */
2648     if (!vdev->vm_running) {
2649         return;
2650     }
2651     virtio_queue_set_notification(vq, 0);
2652     qemu_bh_schedule(q->tx_bh);
2653 }
2654 
2655 static void virtio_net_tx_timer(void *opaque)
2656 {
2657     VirtIONetQueue *q = opaque;
2658     VirtIONet *n = q->n;
2659     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2660     /* This happens when device was stopped but BH wasn't. */
2661     if (!vdev->vm_running) {
2662         /* Make sure tx waiting is set, so we'll run when restarted. */
2663         assert(q->tx_waiting);
2664         return;
2665     }
2666 
2667     q->tx_waiting = 0;
2668 
2669     /* Just in case the driver is not ready on more */
2670     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2671         return;
2672     }
2673 
2674     virtio_queue_set_notification(q->tx_vq, 1);
2675     virtio_net_flush_tx(q);
2676 }
2677 
2678 static void virtio_net_tx_bh(void *opaque)
2679 {
2680     VirtIONetQueue *q = opaque;
2681     VirtIONet *n = q->n;
2682     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2683     int32_t ret;
2684 
2685     /* This happens when device was stopped but BH wasn't. */
2686     if (!vdev->vm_running) {
2687         /* Make sure tx waiting is set, so we'll run when restarted. */
2688         assert(q->tx_waiting);
2689         return;
2690     }
2691 
2692     q->tx_waiting = 0;
2693 
2694     /* Just in case the driver is not ready on more */
2695     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2696         return;
2697     }
2698 
2699     ret = virtio_net_flush_tx(q);
2700     if (ret == -EBUSY || ret == -EINVAL) {
2701         return; /* Notification re-enable handled by tx_complete or device
2702                  * broken */
2703     }
2704 
2705     /* If we flush a full burst of packets, assume there are
2706      * more coming and immediately reschedule */
2707     if (ret >= n->tx_burst) {
2708         qemu_bh_schedule(q->tx_bh);
2709         q->tx_waiting = 1;
2710         return;
2711     }
2712 
2713     /* If less than a full burst, re-enable notification and flush
2714      * anything that may have come in while we weren't looking.  If
2715      * we find something, assume the guest is still active and reschedule */
2716     virtio_queue_set_notification(q->tx_vq, 1);
2717     ret = virtio_net_flush_tx(q);
2718     if (ret == -EINVAL) {
2719         return;
2720     } else if (ret > 0) {
2721         virtio_queue_set_notification(q->tx_vq, 0);
2722         qemu_bh_schedule(q->tx_bh);
2723         q->tx_waiting = 1;
2724     }
2725 }
2726 
2727 static void virtio_net_add_queue(VirtIONet *n, int index)
2728 {
2729     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2730 
2731     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2732                                            virtio_net_handle_rx);
2733 
2734     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2735         n->vqs[index].tx_vq =
2736             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2737                              virtio_net_handle_tx_timer);
2738         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2739                                               virtio_net_tx_timer,
2740                                               &n->vqs[index]);
2741     } else {
2742         n->vqs[index].tx_vq =
2743             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2744                              virtio_net_handle_tx_bh);
2745         n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2746     }
2747 
2748     n->vqs[index].tx_waiting = 0;
2749     n->vqs[index].n = n;
2750 }
2751 
2752 static void virtio_net_del_queue(VirtIONet *n, int index)
2753 {
2754     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2755     VirtIONetQueue *q = &n->vqs[index];
2756     NetClientState *nc = qemu_get_subqueue(n->nic, index);
2757 
2758     qemu_purge_queued_packets(nc);
2759 
2760     virtio_del_queue(vdev, index * 2);
2761     if (q->tx_timer) {
2762         timer_free(q->tx_timer);
2763         q->tx_timer = NULL;
2764     } else {
2765         qemu_bh_delete(q->tx_bh);
2766         q->tx_bh = NULL;
2767     }
2768     q->tx_waiting = 0;
2769     virtio_del_queue(vdev, index * 2 + 1);
2770 }
2771 
2772 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
2773 {
2774     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2775     int old_num_queues = virtio_get_num_queues(vdev);
2776     int new_num_queues = new_max_queue_pairs * 2 + 1;
2777     int i;
2778 
2779     assert(old_num_queues >= 3);
2780     assert(old_num_queues % 2 == 1);
2781 
2782     if (old_num_queues == new_num_queues) {
2783         return;
2784     }
2785 
2786     /*
2787      * We always need to remove and add ctrl vq if
2788      * old_num_queues != new_num_queues. Remove ctrl_vq first,
2789      * and then we only enter one of the following two loops.
2790      */
2791     virtio_del_queue(vdev, old_num_queues - 1);
2792 
2793     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2794         /* new_num_queues < old_num_queues */
2795         virtio_net_del_queue(n, i / 2);
2796     }
2797 
2798     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2799         /* new_num_queues > old_num_queues */
2800         virtio_net_add_queue(n, i / 2);
2801     }
2802 
2803     /* add ctrl_vq last */
2804     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2805 }
2806 
2807 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2808 {
2809     int max = multiqueue ? n->max_queue_pairs : 1;
2810 
2811     n->multiqueue = multiqueue;
2812     virtio_net_change_num_queue_pairs(n, max);
2813 
2814     virtio_net_set_queue_pairs(n);
2815 }
2816 
2817 static int virtio_net_post_load_device(void *opaque, int version_id)
2818 {
2819     VirtIONet *n = opaque;
2820     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2821     int i, link_down;
2822 
2823     trace_virtio_net_post_load_device();
2824     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2825                                virtio_vdev_has_feature(vdev,
2826                                                        VIRTIO_F_VERSION_1),
2827                                virtio_vdev_has_feature(vdev,
2828                                                        VIRTIO_NET_F_HASH_REPORT));
2829 
2830     /* MAC_TABLE_ENTRIES may be different from the saved image */
2831     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2832         n->mac_table.in_use = 0;
2833     }
2834 
2835     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2836         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2837     }
2838 
2839     /*
2840      * curr_guest_offloads will be later overwritten by the
2841      * virtio_set_features_nocheck call done from the virtio_load.
2842      * Here we make sure it is preserved and restored accordingly
2843      * in the virtio_net_post_load_virtio callback.
2844      */
2845     n->saved_guest_offloads = n->curr_guest_offloads;
2846 
2847     virtio_net_set_queue_pairs(n);
2848 
2849     /* Find the first multicast entry in the saved MAC filter */
2850     for (i = 0; i < n->mac_table.in_use; i++) {
2851         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2852             break;
2853         }
2854     }
2855     n->mac_table.first_multi = i;
2856 
2857     /* nc.link_down can't be migrated, so infer link_down according
2858      * to link status bit in n->status */
2859     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2860     for (i = 0; i < n->max_queue_pairs; i++) {
2861         qemu_get_subqueue(n->nic, i)->link_down = link_down;
2862     }
2863 
2864     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2865         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2866         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2867                                   QEMU_CLOCK_VIRTUAL,
2868                                   virtio_net_announce_timer, n);
2869         if (n->announce_timer.round) {
2870             timer_mod(n->announce_timer.tm,
2871                       qemu_clock_get_ms(n->announce_timer.type));
2872         } else {
2873             qemu_announce_timer_del(&n->announce_timer, false);
2874         }
2875     }
2876 
2877     if (n->rss_data.enabled) {
2878         n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
2879         if (!n->rss_data.populate_hash) {
2880             if (!virtio_net_attach_epbf_rss(n)) {
2881                 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
2882                     warn_report("Can't post-load eBPF RSS for vhost");
2883                 } else {
2884                     warn_report("Can't post-load eBPF RSS - "
2885                                 "fallback to software RSS");
2886                     n->rss_data.enabled_software_rss = true;
2887                 }
2888             }
2889         }
2890 
2891         trace_virtio_net_rss_enable(n->rss_data.hash_types,
2892                                     n->rss_data.indirections_len,
2893                                     sizeof(n->rss_data.key));
2894     } else {
2895         trace_virtio_net_rss_disable();
2896     }
2897     return 0;
2898 }
2899 
2900 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2901 {
2902     VirtIONet *n = VIRTIO_NET(vdev);
2903     /*
2904      * The actual needed state is now in saved_guest_offloads,
2905      * see virtio_net_post_load_device for detail.
2906      * Restore it back and apply the desired offloads.
2907      */
2908     n->curr_guest_offloads = n->saved_guest_offloads;
2909     if (peer_has_vnet_hdr(n)) {
2910         virtio_net_apply_guest_offloads(n);
2911     }
2912 
2913     return 0;
2914 }
2915 
2916 /* tx_waiting field of a VirtIONetQueue */
2917 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2918     .name = "virtio-net-queue-tx_waiting",
2919     .fields = (VMStateField[]) {
2920         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2921         VMSTATE_END_OF_LIST()
2922    },
2923 };
2924 
2925 static bool max_queue_pairs_gt_1(void *opaque, int version_id)
2926 {
2927     return VIRTIO_NET(opaque)->max_queue_pairs > 1;
2928 }
2929 
2930 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2931 {
2932     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2933                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2934 }
2935 
2936 static bool mac_table_fits(void *opaque, int version_id)
2937 {
2938     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2939 }
2940 
2941 static bool mac_table_doesnt_fit(void *opaque, int version_id)
2942 {
2943     return !mac_table_fits(opaque, version_id);
2944 }
2945 
2946 /* This temporary type is shared by all the WITH_TMP methods
2947  * although only some fields are used by each.
2948  */
2949 struct VirtIONetMigTmp {
2950     VirtIONet      *parent;
2951     VirtIONetQueue *vqs_1;
2952     uint16_t        curr_queue_pairs_1;
2953     uint8_t         has_ufo;
2954     uint32_t        has_vnet_hdr;
2955 };
2956 
2957 /* The 2nd and subsequent tx_waiting flags are loaded later than
2958  * the 1st entry in the queue_pairs and only if there's more than one
2959  * entry.  We use the tmp mechanism to calculate a temporary
2960  * pointer and count and also validate the count.
2961  */
2962 
2963 static int virtio_net_tx_waiting_pre_save(void *opaque)
2964 {
2965     struct VirtIONetMigTmp *tmp = opaque;
2966 
2967     tmp->vqs_1 = tmp->parent->vqs + 1;
2968     tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
2969     if (tmp->parent->curr_queue_pairs == 0) {
2970         tmp->curr_queue_pairs_1 = 0;
2971     }
2972 
2973     return 0;
2974 }
2975 
2976 static int virtio_net_tx_waiting_pre_load(void *opaque)
2977 {
2978     struct VirtIONetMigTmp *tmp = opaque;
2979 
2980     /* Reuse the pointer setup from save */
2981     virtio_net_tx_waiting_pre_save(opaque);
2982 
2983     if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
2984         error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
2985             tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
2986 
2987         return -EINVAL;
2988     }
2989 
2990     return 0; /* all good */
2991 }
2992 
2993 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2994     .name      = "virtio-net-tx_waiting",
2995     .pre_load  = virtio_net_tx_waiting_pre_load,
2996     .pre_save  = virtio_net_tx_waiting_pre_save,
2997     .fields    = (VMStateField[]) {
2998         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2999                                      curr_queue_pairs_1,
3000                                      vmstate_virtio_net_queue_tx_waiting,
3001                                      struct VirtIONetQueue),
3002         VMSTATE_END_OF_LIST()
3003     },
3004 };
3005 
3006 /* the 'has_ufo' flag is just tested; if the incoming stream has the
3007  * flag set we need to check that we have it
3008  */
3009 static int virtio_net_ufo_post_load(void *opaque, int version_id)
3010 {
3011     struct VirtIONetMigTmp *tmp = opaque;
3012 
3013     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3014         error_report("virtio-net: saved image requires TUN_F_UFO support");
3015         return -EINVAL;
3016     }
3017 
3018     return 0;
3019 }
3020 
3021 static int virtio_net_ufo_pre_save(void *opaque)
3022 {
3023     struct VirtIONetMigTmp *tmp = opaque;
3024 
3025     tmp->has_ufo = tmp->parent->has_ufo;
3026 
3027     return 0;
3028 }
3029 
3030 static const VMStateDescription vmstate_virtio_net_has_ufo = {
3031     .name      = "virtio-net-ufo",
3032     .post_load = virtio_net_ufo_post_load,
3033     .pre_save  = virtio_net_ufo_pre_save,
3034     .fields    = (VMStateField[]) {
3035         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3036         VMSTATE_END_OF_LIST()
3037     },
3038 };
3039 
3040 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3041  * flag set we need to check that we have it
3042  */
3043 static int virtio_net_vnet_post_load(void *opaque, int version_id)
3044 {
3045     struct VirtIONetMigTmp *tmp = opaque;
3046 
3047     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3048         error_report("virtio-net: saved image requires vnet_hdr=on");
3049         return -EINVAL;
3050     }
3051 
3052     return 0;
3053 }
3054 
3055 static int virtio_net_vnet_pre_save(void *opaque)
3056 {
3057     struct VirtIONetMigTmp *tmp = opaque;
3058 
3059     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3060 
3061     return 0;
3062 }
3063 
3064 static const VMStateDescription vmstate_virtio_net_has_vnet = {
3065     .name      = "virtio-net-vnet",
3066     .post_load = virtio_net_vnet_post_load,
3067     .pre_save  = virtio_net_vnet_pre_save,
3068     .fields    = (VMStateField[]) {
3069         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3070         VMSTATE_END_OF_LIST()
3071     },
3072 };
3073 
3074 static bool virtio_net_rss_needed(void *opaque)
3075 {
3076     return VIRTIO_NET(opaque)->rss_data.enabled;
3077 }
3078 
3079 static const VMStateDescription vmstate_virtio_net_rss = {
3080     .name      = "virtio-net-device/rss",
3081     .version_id = 1,
3082     .minimum_version_id = 1,
3083     .needed = virtio_net_rss_needed,
3084     .fields = (VMStateField[]) {
3085         VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3086         VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3087         VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3088         VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3089         VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3090         VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3091         VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3092                             VIRTIO_NET_RSS_MAX_KEY_SIZE),
3093         VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3094                                     rss_data.indirections_len, 0,
3095                                     vmstate_info_uint16, uint16_t),
3096         VMSTATE_END_OF_LIST()
3097     },
3098 };
3099 
3100 static const VMStateDescription vmstate_virtio_net_device = {
3101     .name = "virtio-net-device",
3102     .version_id = VIRTIO_NET_VM_VERSION,
3103     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3104     .post_load = virtio_net_post_load_device,
3105     .fields = (VMStateField[]) {
3106         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3107         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3108                                vmstate_virtio_net_queue_tx_waiting,
3109                                VirtIONetQueue),
3110         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3111         VMSTATE_UINT16(status, VirtIONet),
3112         VMSTATE_UINT8(promisc, VirtIONet),
3113         VMSTATE_UINT8(allmulti, VirtIONet),
3114         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3115 
3116         /* Guarded pair: If it fits we load it, else we throw it away
3117          * - can happen if source has a larger MAC table.; post-load
3118          *  sets flags in this case.
3119          */
3120         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3121                                 0, mac_table_fits, mac_table.in_use,
3122                                  ETH_ALEN),
3123         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3124                                      mac_table.in_use, ETH_ALEN),
3125 
3126         /* Note: This is an array of uint32's that's always been saved as a
3127          * buffer; hold onto your endiannesses; it's actually used as a bitmap
3128          * but based on the uint.
3129          */
3130         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3131         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3132                          vmstate_virtio_net_has_vnet),
3133         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3134         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3135         VMSTATE_UINT8(alluni, VirtIONet),
3136         VMSTATE_UINT8(nomulti, VirtIONet),
3137         VMSTATE_UINT8(nouni, VirtIONet),
3138         VMSTATE_UINT8(nobcast, VirtIONet),
3139         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3140                          vmstate_virtio_net_has_ufo),
3141         VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3142                             vmstate_info_uint16_equal, uint16_t),
3143         VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3144         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3145                          vmstate_virtio_net_tx_waiting),
3146         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3147                             has_ctrl_guest_offloads),
3148         VMSTATE_END_OF_LIST()
3149    },
3150     .subsections = (const VMStateDescription * []) {
3151         &vmstate_virtio_net_rss,
3152         NULL
3153     }
3154 };
3155 
3156 static NetClientInfo net_virtio_info = {
3157     .type = NET_CLIENT_DRIVER_NIC,
3158     .size = sizeof(NICState),
3159     .can_receive = virtio_net_can_receive,
3160     .receive = virtio_net_receive,
3161     .link_status_changed = virtio_net_set_link_status,
3162     .query_rx_filter = virtio_net_query_rxfilter,
3163     .announce = virtio_net_announce,
3164 };
3165 
3166 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3167 {
3168     VirtIONet *n = VIRTIO_NET(vdev);
3169     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3170     assert(n->vhost_started);
3171     if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3172         return vhost_net_config_pending(get_vhost_net(nc->peer));
3173     }
3174     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3175 }
3176 
3177 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3178                                            bool mask)
3179 {
3180     VirtIONet *n = VIRTIO_NET(vdev);
3181     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3182     assert(n->vhost_started);
3183     if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3184         vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask);
3185         return;
3186     }
3187     vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask);
3188 }
3189 
3190 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3191 {
3192     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3193 
3194     n->config_size = virtio_feature_get_config_size(feature_sizes,
3195                                                     host_features);
3196 }
3197 
3198 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3199                                    const char *type)
3200 {
3201     /*
3202      * The name can be NULL, the netclient name will be type.x.
3203      */
3204     assert(type != NULL);
3205 
3206     g_free(n->netclient_name);
3207     g_free(n->netclient_type);
3208     n->netclient_name = g_strdup(name);
3209     n->netclient_type = g_strdup(type);
3210 }
3211 
3212 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3213 {
3214     HotplugHandler *hotplug_ctrl;
3215     PCIDevice *pci_dev;
3216     Error *err = NULL;
3217 
3218     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3219     if (hotplug_ctrl) {
3220         pci_dev = PCI_DEVICE(dev);
3221         pci_dev->partially_hotplugged = true;
3222         hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3223         if (err) {
3224             error_report_err(err);
3225             return false;
3226         }
3227     } else {
3228         return false;
3229     }
3230     return true;
3231 }
3232 
3233 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3234                                     Error **errp)
3235 {
3236     Error *err = NULL;
3237     HotplugHandler *hotplug_ctrl;
3238     PCIDevice *pdev = PCI_DEVICE(dev);
3239     BusState *primary_bus;
3240 
3241     if (!pdev->partially_hotplugged) {
3242         return true;
3243     }
3244     primary_bus = dev->parent_bus;
3245     if (!primary_bus) {
3246         error_setg(errp, "virtio_net: couldn't find primary bus");
3247         return false;
3248     }
3249     qdev_set_parent_bus(dev, primary_bus, &error_abort);
3250     qatomic_set(&n->failover_primary_hidden, false);
3251     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3252     if (hotplug_ctrl) {
3253         hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3254         if (err) {
3255             goto out;
3256         }
3257         hotplug_handler_plug(hotplug_ctrl, dev, &err);
3258     }
3259     pdev->partially_hotplugged = false;
3260 
3261 out:
3262     error_propagate(errp, err);
3263     return !err;
3264 }
3265 
3266 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3267 {
3268     bool should_be_hidden;
3269     Error *err = NULL;
3270     DeviceState *dev = failover_find_primary_device(n);
3271 
3272     if (!dev) {
3273         return;
3274     }
3275 
3276     should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3277 
3278     if (migration_in_setup(s) && !should_be_hidden) {
3279         if (failover_unplug_primary(n, dev)) {
3280             vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3281             qapi_event_send_unplug_primary(dev->id);
3282             qatomic_set(&n->failover_primary_hidden, true);
3283         } else {
3284             warn_report("couldn't unplug primary device");
3285         }
3286     } else if (migration_has_failed(s)) {
3287         /* We already unplugged the device let's plug it back */
3288         if (!failover_replug_primary(n, dev, &err)) {
3289             if (err) {
3290                 error_report_err(err);
3291             }
3292         }
3293     }
3294 }
3295 
3296 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3297 {
3298     MigrationState *s = data;
3299     VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3300     virtio_net_handle_migration_primary(n, s);
3301 }
3302 
3303 static bool failover_hide_primary_device(DeviceListener *listener,
3304                                          const QDict *device_opts,
3305                                          bool from_json,
3306                                          Error **errp)
3307 {
3308     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3309     const char *standby_id;
3310 
3311     if (!device_opts) {
3312         return false;
3313     }
3314 
3315     if (!qdict_haskey(device_opts, "failover_pair_id")) {
3316         return false;
3317     }
3318 
3319     if (!qdict_haskey(device_opts, "id")) {
3320         error_setg(errp, "Device with failover_pair_id needs to have id");
3321         return false;
3322     }
3323 
3324     standby_id = qdict_get_str(device_opts, "failover_pair_id");
3325     if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3326         return false;
3327     }
3328 
3329     /*
3330      * The hide helper can be called several times for a given device.
3331      * Check there is only one primary for a virtio-net device but
3332      * don't duplicate the qdict several times if it's called for the same
3333      * device.
3334      */
3335     if (n->primary_opts) {
3336         const char *old, *new;
3337         /* devices with failover_pair_id always have an id */
3338         old = qdict_get_str(n->primary_opts, "id");
3339         new = qdict_get_str(device_opts, "id");
3340         if (strcmp(old, new) != 0) {
3341             error_setg(errp, "Cannot attach more than one primary device to "
3342                        "'%s': '%s' and '%s'", n->netclient_name, old, new);
3343             return false;
3344         }
3345     } else {
3346         n->primary_opts = qdict_clone_shallow(device_opts);
3347         n->primary_opts_from_json = from_json;
3348     }
3349 
3350     /* failover_primary_hidden is set during feature negotiation */
3351     return qatomic_read(&n->failover_primary_hidden);
3352 }
3353 
3354 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3355 {
3356     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3357     VirtIONet *n = VIRTIO_NET(dev);
3358     NetClientState *nc;
3359     int i;
3360 
3361     if (n->net_conf.mtu) {
3362         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3363     }
3364 
3365     if (n->net_conf.duplex_str) {
3366         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3367             n->net_conf.duplex = DUPLEX_HALF;
3368         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3369             n->net_conf.duplex = DUPLEX_FULL;
3370         } else {
3371             error_setg(errp, "'duplex' must be 'half' or 'full'");
3372             return;
3373         }
3374         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3375     } else {
3376         n->net_conf.duplex = DUPLEX_UNKNOWN;
3377     }
3378 
3379     if (n->net_conf.speed < SPEED_UNKNOWN) {
3380         error_setg(errp, "'speed' must be between 0 and INT_MAX");
3381         return;
3382     }
3383     if (n->net_conf.speed >= 0) {
3384         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3385     }
3386 
3387     if (n->failover) {
3388         n->primary_listener.hide_device = failover_hide_primary_device;
3389         qatomic_set(&n->failover_primary_hidden, true);
3390         device_listener_register(&n->primary_listener);
3391         n->migration_state.notify = virtio_net_migration_state_notifier;
3392         add_migration_state_change_notifier(&n->migration_state);
3393         n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3394     }
3395 
3396     virtio_net_set_config_size(n, n->host_features);
3397     virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
3398 
3399     /*
3400      * We set a lower limit on RX queue size to what it always was.
3401      * Guests that want a smaller ring can always resize it without
3402      * help from us (using virtio 1 and up).
3403      */
3404     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3405         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3406         !is_power_of_2(n->net_conf.rx_queue_size)) {
3407         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3408                    "must be a power of 2 between %d and %d.",
3409                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3410                    VIRTQUEUE_MAX_SIZE);
3411         virtio_cleanup(vdev);
3412         return;
3413     }
3414 
3415     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3416         n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3417         !is_power_of_2(n->net_conf.tx_queue_size)) {
3418         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3419                    "must be a power of 2 between %d and %d",
3420                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3421                    VIRTQUEUE_MAX_SIZE);
3422         virtio_cleanup(vdev);
3423         return;
3424     }
3425 
3426     n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3427 
3428     /*
3429      * Figure out the datapath queue pairs since the backend could
3430      * provide control queue via peers as well.
3431      */
3432     if (n->nic_conf.peers.queues) {
3433         for (i = 0; i < n->max_ncs; i++) {
3434             if (n->nic_conf.peers.ncs[i]->is_datapath) {
3435                 ++n->max_queue_pairs;
3436             }
3437         }
3438     }
3439     n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3440 
3441     if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3442         error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3443                    "must be a positive integer less than %d.",
3444                    n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3445         virtio_cleanup(vdev);
3446         return;
3447     }
3448     n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queue_pairs);
3449     n->curr_queue_pairs = 1;
3450     n->tx_timeout = n->net_conf.txtimer;
3451 
3452     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3453                        && strcmp(n->net_conf.tx, "bh")) {
3454         warn_report("virtio-net: "
3455                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3456                     n->net_conf.tx);
3457         error_printf("Defaulting to \"bh\"");
3458     }
3459 
3460     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3461                                     n->net_conf.tx_queue_size);
3462 
3463     for (i = 0; i < n->max_queue_pairs; i++) {
3464         virtio_net_add_queue(n, i);
3465     }
3466 
3467     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3468     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3469     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3470     n->status = VIRTIO_NET_S_LINK_UP;
3471     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3472                               QEMU_CLOCK_VIRTUAL,
3473                               virtio_net_announce_timer, n);
3474     n->announce_timer.round = 0;
3475 
3476     if (n->netclient_type) {
3477         /*
3478          * Happen when virtio_net_set_netclient_name has been called.
3479          */
3480         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3481                               n->netclient_type, n->netclient_name, n);
3482     } else {
3483         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3484                               object_get_typename(OBJECT(dev)), dev->id, n);
3485     }
3486 
3487     for (i = 0; i < n->max_queue_pairs; i++) {
3488         n->nic->ncs[i].do_not_pad = true;
3489     }
3490 
3491     peer_test_vnet_hdr(n);
3492     if (peer_has_vnet_hdr(n)) {
3493         for (i = 0; i < n->max_queue_pairs; i++) {
3494             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3495         }
3496         n->host_hdr_len = sizeof(struct virtio_net_hdr);
3497     } else {
3498         n->host_hdr_len = 0;
3499     }
3500 
3501     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3502 
3503     n->vqs[0].tx_waiting = 0;
3504     n->tx_burst = n->net_conf.txburst;
3505     virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3506     n->promisc = 1; /* for compatibility */
3507 
3508     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3509 
3510     n->vlans = g_malloc0(MAX_VLAN >> 3);
3511 
3512     nc = qemu_get_queue(n->nic);
3513     nc->rxfilter_notify_enabled = 1;
3514 
3515    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3516         struct virtio_net_config netcfg = {};
3517         memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3518         vhost_net_set_config(get_vhost_net(nc->peer),
3519             (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
3520     }
3521     QTAILQ_INIT(&n->rsc_chains);
3522     n->qdev = dev;
3523 
3524     net_rx_pkt_init(&n->rx_pkt, false);
3525 
3526     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3527         virtio_net_load_ebpf(n);
3528     }
3529 }
3530 
3531 static void virtio_net_device_unrealize(DeviceState *dev)
3532 {
3533     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3534     VirtIONet *n = VIRTIO_NET(dev);
3535     int i, max_queue_pairs;
3536 
3537     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3538         virtio_net_unload_ebpf(n);
3539     }
3540 
3541     /* This will stop vhost backend if appropriate. */
3542     virtio_net_set_status(vdev, 0);
3543 
3544     g_free(n->netclient_name);
3545     n->netclient_name = NULL;
3546     g_free(n->netclient_type);
3547     n->netclient_type = NULL;
3548 
3549     g_free(n->mac_table.macs);
3550     g_free(n->vlans);
3551 
3552     if (n->failover) {
3553         qobject_unref(n->primary_opts);
3554         device_listener_unregister(&n->primary_listener);
3555         remove_migration_state_change_notifier(&n->migration_state);
3556     } else {
3557         assert(n->primary_opts == NULL);
3558     }
3559 
3560     max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3561     for (i = 0; i < max_queue_pairs; i++) {
3562         virtio_net_del_queue(n, i);
3563     }
3564     /* delete also control vq */
3565     virtio_del_queue(vdev, max_queue_pairs * 2);
3566     qemu_announce_timer_del(&n->announce_timer, false);
3567     g_free(n->vqs);
3568     qemu_del_nic(n->nic);
3569     virtio_net_rsc_cleanup(n);
3570     g_free(n->rss_data.indirections_table);
3571     net_rx_pkt_uninit(n->rx_pkt);
3572     virtio_cleanup(vdev);
3573 }
3574 
3575 static void virtio_net_instance_init(Object *obj)
3576 {
3577     VirtIONet *n = VIRTIO_NET(obj);
3578 
3579     /*
3580      * The default config_size is sizeof(struct virtio_net_config).
3581      * Can be overriden with virtio_net_set_config_size.
3582      */
3583     n->config_size = sizeof(struct virtio_net_config);
3584     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3585                                   "bootindex", "/ethernet-phy@0",
3586                                   DEVICE(n));
3587 
3588     ebpf_rss_init(&n->ebpf_rss);
3589 }
3590 
3591 static int virtio_net_pre_save(void *opaque)
3592 {
3593     VirtIONet *n = opaque;
3594 
3595     /* At this point, backend must be stopped, otherwise
3596      * it might keep writing to memory. */
3597     assert(!n->vhost_started);
3598 
3599     return 0;
3600 }
3601 
3602 static bool primary_unplug_pending(void *opaque)
3603 {
3604     DeviceState *dev = opaque;
3605     DeviceState *primary;
3606     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3607     VirtIONet *n = VIRTIO_NET(vdev);
3608 
3609     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3610         return false;
3611     }
3612     primary = failover_find_primary_device(n);
3613     return primary ? primary->pending_deleted_event : false;
3614 }
3615 
3616 static bool dev_unplug_pending(void *opaque)
3617 {
3618     DeviceState *dev = opaque;
3619     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3620 
3621     return vdc->primary_unplug_pending(dev);
3622 }
3623 
3624 static const VMStateDescription vmstate_virtio_net = {
3625     .name = "virtio-net",
3626     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3627     .version_id = VIRTIO_NET_VM_VERSION,
3628     .fields = (VMStateField[]) {
3629         VMSTATE_VIRTIO_DEVICE,
3630         VMSTATE_END_OF_LIST()
3631     },
3632     .pre_save = virtio_net_pre_save,
3633     .dev_unplug_pending = dev_unplug_pending,
3634 };
3635 
3636 static Property virtio_net_properties[] = {
3637     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3638                     VIRTIO_NET_F_CSUM, true),
3639     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3640                     VIRTIO_NET_F_GUEST_CSUM, true),
3641     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3642     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3643                     VIRTIO_NET_F_GUEST_TSO4, true),
3644     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3645                     VIRTIO_NET_F_GUEST_TSO6, true),
3646     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3647                     VIRTIO_NET_F_GUEST_ECN, true),
3648     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3649                     VIRTIO_NET_F_GUEST_UFO, true),
3650     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3651                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3652     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3653                     VIRTIO_NET_F_HOST_TSO4, true),
3654     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3655                     VIRTIO_NET_F_HOST_TSO6, true),
3656     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3657                     VIRTIO_NET_F_HOST_ECN, true),
3658     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3659                     VIRTIO_NET_F_HOST_UFO, true),
3660     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3661                     VIRTIO_NET_F_MRG_RXBUF, true),
3662     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3663                     VIRTIO_NET_F_STATUS, true),
3664     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3665                     VIRTIO_NET_F_CTRL_VQ, true),
3666     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3667                     VIRTIO_NET_F_CTRL_RX, true),
3668     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3669                     VIRTIO_NET_F_CTRL_VLAN, true),
3670     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3671                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3672     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3673                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3674     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3675                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3676     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3677     DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3678                     VIRTIO_NET_F_RSS, false),
3679     DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3680                     VIRTIO_NET_F_HASH_REPORT, false),
3681     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3682                     VIRTIO_NET_F_RSC_EXT, false),
3683     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3684                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3685     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3686     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3687                        TX_TIMER_INTERVAL),
3688     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3689     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3690     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3691                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3692     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3693                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3694     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3695     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3696                      true),
3697     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3698     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3699     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3700     DEFINE_PROP_END_OF_LIST(),
3701 };
3702 
3703 static void virtio_net_class_init(ObjectClass *klass, void *data)
3704 {
3705     DeviceClass *dc = DEVICE_CLASS(klass);
3706     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3707 
3708     device_class_set_props(dc, virtio_net_properties);
3709     dc->vmsd = &vmstate_virtio_net;
3710     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3711     vdc->realize = virtio_net_device_realize;
3712     vdc->unrealize = virtio_net_device_unrealize;
3713     vdc->get_config = virtio_net_get_config;
3714     vdc->set_config = virtio_net_set_config;
3715     vdc->get_features = virtio_net_get_features;
3716     vdc->set_features = virtio_net_set_features;
3717     vdc->bad_features = virtio_net_bad_features;
3718     vdc->reset = virtio_net_reset;
3719     vdc->set_status = virtio_net_set_status;
3720     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3721     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3722     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3723     vdc->post_load = virtio_net_post_load_virtio;
3724     vdc->vmsd = &vmstate_virtio_net_device;
3725     vdc->primary_unplug_pending = primary_unplug_pending;
3726 }
3727 
3728 static const TypeInfo virtio_net_info = {
3729     .name = TYPE_VIRTIO_NET,
3730     .parent = TYPE_VIRTIO_DEVICE,
3731     .instance_size = sizeof(VirtIONet),
3732     .instance_init = virtio_net_instance_init,
3733     .class_init = virtio_net_class_init,
3734 };
3735 
3736 static void virtio_register_types(void)
3737 {
3738     type_register_static(&virtio_net_info);
3739 }
3740 
3741 type_init(virtio_register_types)
3742