xref: /openbmc/qemu/hw/net/virtio-net.c (revision 1e458f11)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/log.h"
18 #include "qemu/main-loop.h"
19 #include "qemu/module.h"
20 #include "hw/virtio/virtio.h"
21 #include "net/net.h"
22 #include "net/checksum.h"
23 #include "net/tap.h"
24 #include "qemu/error-report.h"
25 #include "qemu/timer.h"
26 #include "qemu/option.h"
27 #include "qemu/option_int.h"
28 #include "qemu/config-file.h"
29 #include "qapi/qmp/qdict.h"
30 #include "hw/virtio/virtio-net.h"
31 #include "net/vhost_net.h"
32 #include "net/announce.h"
33 #include "hw/virtio/virtio-bus.h"
34 #include "qapi/error.h"
35 #include "qapi/qapi-events-net.h"
36 #include "hw/qdev-properties.h"
37 #include "qapi/qapi-types-migration.h"
38 #include "qapi/qapi-events-migration.h"
39 #include "hw/virtio/virtio-access.h"
40 #include "migration/misc.h"
41 #include "standard-headers/linux/ethtool.h"
42 #include "sysemu/sysemu.h"
43 #include "trace.h"
44 #include "monitor/qdev.h"
45 #include "hw/pci/pci.h"
46 #include "net_rx_pkt.h"
47 #include "hw/virtio/vhost.h"
48 #include "sysemu/qtest.h"
49 
50 #define VIRTIO_NET_VM_VERSION    11
51 
52 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
53 
54 /* previously fixed value */
55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
57 
58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
61 
62 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
63 
64 #define VIRTIO_NET_TCP_FLAG         0x3F
65 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
66 
67 /* IPv4 max payload, 16 bits in the header */
68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
70 
71 /* header length value in ip header without option */
72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
73 
74 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
76 
77 /* Purge coalesced packets timer interval, This value affects the performance
78    a lot, and should be tuned carefully, '300000'(300us) is the recommended
79    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
80    tso/gso/gro 'off'. */
81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
82 
83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
84                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
85                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
86                                          VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
87                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
88                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
89                                          VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
90                                          VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
91                                          VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
92 
93 static const VirtIOFeature feature_sizes[] = {
94     {.flags = 1ULL << VIRTIO_NET_F_MAC,
95      .end = endof(struct virtio_net_config, mac)},
96     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
97      .end = endof(struct virtio_net_config, status)},
98     {.flags = 1ULL << VIRTIO_NET_F_MQ,
99      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
100     {.flags = 1ULL << VIRTIO_NET_F_MTU,
101      .end = endof(struct virtio_net_config, mtu)},
102     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
103      .end = endof(struct virtio_net_config, duplex)},
104     {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
105      .end = endof(struct virtio_net_config, supported_hash_types)},
106     {}
107 };
108 
109 static const VirtIOConfigSizeParams cfg_size_params = {
110     .min_size = endof(struct virtio_net_config, mac),
111     .max_size = sizeof(struct virtio_net_config),
112     .feature_sizes = feature_sizes
113 };
114 
115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
116 {
117     VirtIONet *n = qemu_get_nic_opaque(nc);
118 
119     return &n->vqs[nc->queue_index];
120 }
121 
122 static int vq2q(int queue_index)
123 {
124     return queue_index / 2;
125 }
126 
127 /* TODO
128  * - we could suppress RX interrupt if we were so inclined.
129  */
130 
131 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
132 {
133     VirtIONet *n = VIRTIO_NET(vdev);
134     struct virtio_net_config netcfg;
135     NetClientState *nc = qemu_get_queue(n->nic);
136     static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
137 
138     int ret = 0;
139     memset(&netcfg, 0 , sizeof(struct virtio_net_config));
140     virtio_stw_p(vdev, &netcfg.status, n->status);
141     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
142     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
143     memcpy(netcfg.mac, n->mac, ETH_ALEN);
144     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
145     netcfg.duplex = n->net_conf.duplex;
146     netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
147     virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
148                  virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
149                  VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
150     virtio_stl_p(vdev, &netcfg.supported_hash_types,
151                  VIRTIO_NET_RSS_SUPPORTED_HASHES);
152     memcpy(config, &netcfg, n->config_size);
153 
154     /*
155      * Is this VDPA? No peer means not VDPA: there's no way to
156      * disconnect/reconnect a VDPA peer.
157      */
158     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
159         ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
160                                    n->config_size);
161         if (ret != -1) {
162             /*
163              * Some NIC/kernel combinations present 0 as the mac address.  As
164              * that is not a legal address, try to proceed with the
165              * address from the QEMU command line in the hope that the
166              * address has been configured correctly elsewhere - just not
167              * reported by the device.
168              */
169             if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
170                 info_report("Zero hardware mac address detected. Ignoring.");
171                 memcpy(netcfg.mac, n->mac, ETH_ALEN);
172             }
173             memcpy(config, &netcfg, n->config_size);
174         }
175     }
176 }
177 
178 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
179 {
180     VirtIONet *n = VIRTIO_NET(vdev);
181     struct virtio_net_config netcfg = {};
182     NetClientState *nc = qemu_get_queue(n->nic);
183 
184     memcpy(&netcfg, config, n->config_size);
185 
186     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
187         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
188         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
189         memcpy(n->mac, netcfg.mac, ETH_ALEN);
190         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
191     }
192 
193     /*
194      * Is this VDPA? No peer means not VDPA: there's no way to
195      * disconnect/reconnect a VDPA peer.
196      */
197     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
198         vhost_net_set_config(get_vhost_net(nc->peer),
199                              (uint8_t *)&netcfg, 0, n->config_size,
200                              VHOST_SET_CONFIG_TYPE_MASTER);
201       }
202 }
203 
204 static bool virtio_net_started(VirtIONet *n, uint8_t status)
205 {
206     VirtIODevice *vdev = VIRTIO_DEVICE(n);
207     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
208         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
209 }
210 
211 static void virtio_net_announce_notify(VirtIONet *net)
212 {
213     VirtIODevice *vdev = VIRTIO_DEVICE(net);
214     trace_virtio_net_announce_notify();
215 
216     net->status |= VIRTIO_NET_S_ANNOUNCE;
217     virtio_notify_config(vdev);
218 }
219 
220 static void virtio_net_announce_timer(void *opaque)
221 {
222     VirtIONet *n = opaque;
223     trace_virtio_net_announce_timer(n->announce_timer.round);
224 
225     n->announce_timer.round--;
226     virtio_net_announce_notify(n);
227 }
228 
229 static void virtio_net_announce(NetClientState *nc)
230 {
231     VirtIONet *n = qemu_get_nic_opaque(nc);
232     VirtIODevice *vdev = VIRTIO_DEVICE(n);
233 
234     /*
235      * Make sure the virtio migration announcement timer isn't running
236      * If it is, let it trigger announcement so that we do not cause
237      * confusion.
238      */
239     if (n->announce_timer.round) {
240         return;
241     }
242 
243     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
244         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
245             virtio_net_announce_notify(n);
246     }
247 }
248 
249 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
250 {
251     VirtIODevice *vdev = VIRTIO_DEVICE(n);
252     NetClientState *nc = qemu_get_queue(n->nic);
253     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
254     int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
255               n->max_ncs - n->max_queue_pairs : 0;
256 
257     if (!get_vhost_net(nc->peer)) {
258         return;
259     }
260 
261     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
262         !!n->vhost_started) {
263         return;
264     }
265     if (!n->vhost_started) {
266         int r, i;
267 
268         if (n->needs_vnet_hdr_swap) {
269             error_report("backend does not support %s vnet headers; "
270                          "falling back on userspace virtio",
271                          virtio_is_big_endian(vdev) ? "BE" : "LE");
272             return;
273         }
274 
275         /* Any packets outstanding? Purge them to avoid touching rings
276          * when vhost is running.
277          */
278         for (i = 0;  i < queue_pairs; i++) {
279             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
280 
281             /* Purge both directions: TX and RX. */
282             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
283             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
284         }
285 
286         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
287             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
288             if (r < 0) {
289                 error_report("%uBytes MTU not supported by the backend",
290                              n->net_conf.mtu);
291 
292                 return;
293             }
294         }
295 
296         n->vhost_started = 1;
297         r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
298         if (r < 0) {
299             error_report("unable to start vhost net: %d: "
300                          "falling back on userspace virtio", -r);
301             n->vhost_started = 0;
302         }
303     } else {
304         vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
305         n->vhost_started = 0;
306     }
307 }
308 
309 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
310                                           NetClientState *peer,
311                                           bool enable)
312 {
313     if (virtio_is_big_endian(vdev)) {
314         return qemu_set_vnet_be(peer, enable);
315     } else {
316         return qemu_set_vnet_le(peer, enable);
317     }
318 }
319 
320 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
321                                        int queue_pairs, bool enable)
322 {
323     int i;
324 
325     for (i = 0; i < queue_pairs; i++) {
326         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
327             enable) {
328             while (--i >= 0) {
329                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
330             }
331 
332             return true;
333         }
334     }
335 
336     return false;
337 }
338 
339 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
340 {
341     VirtIODevice *vdev = VIRTIO_DEVICE(n);
342     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
343 
344     if (virtio_net_started(n, status)) {
345         /* Before using the device, we tell the network backend about the
346          * endianness to use when parsing vnet headers. If the backend
347          * can't do it, we fallback onto fixing the headers in the core
348          * virtio-net code.
349          */
350         n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
351                                                             queue_pairs, true);
352     } else if (virtio_net_started(n, vdev->status)) {
353         /* After using the device, we need to reset the network backend to
354          * the default (guest native endianness), otherwise the guest may
355          * lose network connectivity if it is rebooted into a different
356          * endianness.
357          */
358         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
359     }
360 }
361 
362 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
363 {
364     unsigned int dropped = virtqueue_drop_all(vq);
365     if (dropped) {
366         virtio_notify(vdev, vq);
367     }
368 }
369 
370 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
371 {
372     VirtIONet *n = VIRTIO_NET(vdev);
373     VirtIONetQueue *q;
374     int i;
375     uint8_t queue_status;
376 
377     virtio_net_vnet_endian_status(n, status);
378     virtio_net_vhost_status(n, status);
379 
380     for (i = 0; i < n->max_queue_pairs; i++) {
381         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
382         bool queue_started;
383         q = &n->vqs[i];
384 
385         if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
386             queue_status = 0;
387         } else {
388             queue_status = status;
389         }
390         queue_started =
391             virtio_net_started(n, queue_status) && !n->vhost_started;
392 
393         if (queue_started) {
394             qemu_flush_queued_packets(ncs);
395         }
396 
397         if (!q->tx_waiting) {
398             continue;
399         }
400 
401         if (queue_started) {
402             if (q->tx_timer) {
403                 timer_mod(q->tx_timer,
404                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
405             } else {
406                 qemu_bh_schedule(q->tx_bh);
407             }
408         } else {
409             if (q->tx_timer) {
410                 timer_del(q->tx_timer);
411             } else {
412                 qemu_bh_cancel(q->tx_bh);
413             }
414             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
415                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
416                 vdev->vm_running) {
417                 /* if tx is waiting we are likely have some packets in tx queue
418                  * and disabled notification */
419                 q->tx_waiting = 0;
420                 virtio_queue_set_notification(q->tx_vq, 1);
421                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
422             }
423         }
424     }
425 }
426 
427 static void virtio_net_set_link_status(NetClientState *nc)
428 {
429     VirtIONet *n = qemu_get_nic_opaque(nc);
430     VirtIODevice *vdev = VIRTIO_DEVICE(n);
431     uint16_t old_status = n->status;
432 
433     if (nc->link_down)
434         n->status &= ~VIRTIO_NET_S_LINK_UP;
435     else
436         n->status |= VIRTIO_NET_S_LINK_UP;
437 
438     if (n->status != old_status)
439         virtio_notify_config(vdev);
440 
441     virtio_net_set_status(vdev, vdev->status);
442 }
443 
444 static void rxfilter_notify(NetClientState *nc)
445 {
446     VirtIONet *n = qemu_get_nic_opaque(nc);
447 
448     if (nc->rxfilter_notify_enabled) {
449         char *path = object_get_canonical_path(OBJECT(n->qdev));
450         qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
451                                               n->netclient_name, path);
452         g_free(path);
453 
454         /* disable event notification to avoid events flooding */
455         nc->rxfilter_notify_enabled = 0;
456     }
457 }
458 
459 static intList *get_vlan_table(VirtIONet *n)
460 {
461     intList *list;
462     int i, j;
463 
464     list = NULL;
465     for (i = 0; i < MAX_VLAN >> 5; i++) {
466         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
467             if (n->vlans[i] & (1U << j)) {
468                 QAPI_LIST_PREPEND(list, (i << 5) + j);
469             }
470         }
471     }
472 
473     return list;
474 }
475 
476 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
477 {
478     VirtIONet *n = qemu_get_nic_opaque(nc);
479     VirtIODevice *vdev = VIRTIO_DEVICE(n);
480     RxFilterInfo *info;
481     strList *str_list;
482     int i;
483 
484     info = g_malloc0(sizeof(*info));
485     info->name = g_strdup(nc->name);
486     info->promiscuous = n->promisc;
487 
488     if (n->nouni) {
489         info->unicast = RX_STATE_NONE;
490     } else if (n->alluni) {
491         info->unicast = RX_STATE_ALL;
492     } else {
493         info->unicast = RX_STATE_NORMAL;
494     }
495 
496     if (n->nomulti) {
497         info->multicast = RX_STATE_NONE;
498     } else if (n->allmulti) {
499         info->multicast = RX_STATE_ALL;
500     } else {
501         info->multicast = RX_STATE_NORMAL;
502     }
503 
504     info->broadcast_allowed = n->nobcast;
505     info->multicast_overflow = n->mac_table.multi_overflow;
506     info->unicast_overflow = n->mac_table.uni_overflow;
507 
508     info->main_mac = qemu_mac_strdup_printf(n->mac);
509 
510     str_list = NULL;
511     for (i = 0; i < n->mac_table.first_multi; i++) {
512         QAPI_LIST_PREPEND(str_list,
513                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
514     }
515     info->unicast_table = str_list;
516 
517     str_list = NULL;
518     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
519         QAPI_LIST_PREPEND(str_list,
520                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
521     }
522     info->multicast_table = str_list;
523     info->vlan_table = get_vlan_table(n);
524 
525     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
526         info->vlan = RX_STATE_ALL;
527     } else if (!info->vlan_table) {
528         info->vlan = RX_STATE_NONE;
529     } else {
530         info->vlan = RX_STATE_NORMAL;
531     }
532 
533     /* enable event notification after query */
534     nc->rxfilter_notify_enabled = 1;
535 
536     return info;
537 }
538 
539 static void virtio_net_reset(VirtIODevice *vdev)
540 {
541     VirtIONet *n = VIRTIO_NET(vdev);
542     int i;
543 
544     /* Reset back to compatibility mode */
545     n->promisc = 1;
546     n->allmulti = 0;
547     n->alluni = 0;
548     n->nomulti = 0;
549     n->nouni = 0;
550     n->nobcast = 0;
551     /* multiqueue is disabled by default */
552     n->curr_queue_pairs = 1;
553     timer_del(n->announce_timer.tm);
554     n->announce_timer.round = 0;
555     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
556 
557     /* Flush any MAC and VLAN filter table state */
558     n->mac_table.in_use = 0;
559     n->mac_table.first_multi = 0;
560     n->mac_table.multi_overflow = 0;
561     n->mac_table.uni_overflow = 0;
562     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
563     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
564     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
565     memset(n->vlans, 0, MAX_VLAN >> 3);
566 
567     /* Flush any async TX */
568     for (i = 0;  i < n->max_queue_pairs; i++) {
569         NetClientState *nc = qemu_get_subqueue(n->nic, i);
570 
571         if (nc->peer) {
572             qemu_flush_or_purge_queued_packets(nc->peer, true);
573             assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
574         }
575     }
576 }
577 
578 static void peer_test_vnet_hdr(VirtIONet *n)
579 {
580     NetClientState *nc = qemu_get_queue(n->nic);
581     if (!nc->peer) {
582         return;
583     }
584 
585     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
586 }
587 
588 static int peer_has_vnet_hdr(VirtIONet *n)
589 {
590     return n->has_vnet_hdr;
591 }
592 
593 static int peer_has_ufo(VirtIONet *n)
594 {
595     if (!peer_has_vnet_hdr(n))
596         return 0;
597 
598     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
599 
600     return n->has_ufo;
601 }
602 
603 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
604                                        int version_1, int hash_report)
605 {
606     int i;
607     NetClientState *nc;
608 
609     n->mergeable_rx_bufs = mergeable_rx_bufs;
610 
611     if (version_1) {
612         n->guest_hdr_len = hash_report ?
613             sizeof(struct virtio_net_hdr_v1_hash) :
614             sizeof(struct virtio_net_hdr_mrg_rxbuf);
615         n->rss_data.populate_hash = !!hash_report;
616     } else {
617         n->guest_hdr_len = n->mergeable_rx_bufs ?
618             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
619             sizeof(struct virtio_net_hdr);
620     }
621 
622     for (i = 0; i < n->max_queue_pairs; i++) {
623         nc = qemu_get_subqueue(n->nic, i);
624 
625         if (peer_has_vnet_hdr(n) &&
626             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
627             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
628             n->host_hdr_len = n->guest_hdr_len;
629         }
630     }
631 }
632 
633 static int virtio_net_max_tx_queue_size(VirtIONet *n)
634 {
635     NetClientState *peer = n->nic_conf.peers.ncs[0];
636 
637     /*
638      * Backends other than vhost-user or vhost-vdpa don't support max queue
639      * size.
640      */
641     if (!peer) {
642         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
643     }
644 
645     switch(peer->info->type) {
646     case NET_CLIENT_DRIVER_VHOST_USER:
647     case NET_CLIENT_DRIVER_VHOST_VDPA:
648         return VIRTQUEUE_MAX_SIZE;
649     default:
650         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
651     };
652 }
653 
654 static int peer_attach(VirtIONet *n, int index)
655 {
656     NetClientState *nc = qemu_get_subqueue(n->nic, index);
657 
658     if (!nc->peer) {
659         return 0;
660     }
661 
662     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
663         vhost_set_vring_enable(nc->peer, 1);
664     }
665 
666     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
667         return 0;
668     }
669 
670     if (n->max_queue_pairs == 1) {
671         return 0;
672     }
673 
674     return tap_enable(nc->peer);
675 }
676 
677 static int peer_detach(VirtIONet *n, int index)
678 {
679     NetClientState *nc = qemu_get_subqueue(n->nic, index);
680 
681     if (!nc->peer) {
682         return 0;
683     }
684 
685     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
686         vhost_set_vring_enable(nc->peer, 0);
687     }
688 
689     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
690         return 0;
691     }
692 
693     return tap_disable(nc->peer);
694 }
695 
696 static void virtio_net_set_queue_pairs(VirtIONet *n)
697 {
698     int i;
699     int r;
700 
701     if (n->nic->peer_deleted) {
702         return;
703     }
704 
705     for (i = 0; i < n->max_queue_pairs; i++) {
706         if (i < n->curr_queue_pairs) {
707             r = peer_attach(n, i);
708             assert(!r);
709         } else {
710             r = peer_detach(n, i);
711             assert(!r);
712         }
713     }
714 }
715 
716 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
717 
718 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
719                                         Error **errp)
720 {
721     VirtIONet *n = VIRTIO_NET(vdev);
722     NetClientState *nc = qemu_get_queue(n->nic);
723 
724     /* Firstly sync all virtio-net possible supported features */
725     features |= n->host_features;
726 
727     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
728 
729     if (!peer_has_vnet_hdr(n)) {
730         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
731         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
732         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
733         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
734 
735         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
736         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
737         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
738         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
739 
740         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
741     }
742 
743     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
744         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
745         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
746     }
747 
748     if (!get_vhost_net(nc->peer)) {
749         return features;
750     }
751 
752     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
753         virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
754     }
755     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
756     vdev->backend_features = features;
757 
758     if (n->mtu_bypass_backend &&
759             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
760         features |= (1ULL << VIRTIO_NET_F_MTU);
761     }
762 
763     return features;
764 }
765 
766 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
767 {
768     uint64_t features = 0;
769 
770     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
771      * but also these: */
772     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
773     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
774     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
775     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
776     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
777 
778     return features;
779 }
780 
781 static void virtio_net_apply_guest_offloads(VirtIONet *n)
782 {
783     qemu_set_offload(qemu_get_queue(n->nic)->peer,
784             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
785             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
786             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
787             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
788             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
789 }
790 
791 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
792 {
793     static const uint64_t guest_offloads_mask =
794         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
795         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
796         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
797         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
798         (1ULL << VIRTIO_NET_F_GUEST_UFO);
799 
800     return guest_offloads_mask & features;
801 }
802 
803 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
804 {
805     VirtIODevice *vdev = VIRTIO_DEVICE(n);
806     return virtio_net_guest_offloads_by_features(vdev->guest_features);
807 }
808 
809 typedef struct {
810     VirtIONet *n;
811     DeviceState *dev;
812 } FailoverDevice;
813 
814 /**
815  * Set the failover primary device
816  *
817  * @opaque: FailoverId to setup
818  * @opts: opts for device we are handling
819  * @errp: returns an error if this function fails
820  */
821 static int failover_set_primary(DeviceState *dev, void *opaque)
822 {
823     FailoverDevice *fdev = opaque;
824     PCIDevice *pci_dev = (PCIDevice *)
825         object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
826 
827     if (!pci_dev) {
828         return 0;
829     }
830 
831     if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
832         fdev->dev = dev;
833         return 1;
834     }
835 
836     return 0;
837 }
838 
839 /**
840  * Find the primary device for this failover virtio-net
841  *
842  * @n: VirtIONet device
843  * @errp: returns an error if this function fails
844  */
845 static DeviceState *failover_find_primary_device(VirtIONet *n)
846 {
847     FailoverDevice fdev = {
848         .n = n,
849     };
850 
851     qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
852                        NULL, NULL, &fdev);
853     return fdev.dev;
854 }
855 
856 static void failover_add_primary(VirtIONet *n, Error **errp)
857 {
858     Error *err = NULL;
859     DeviceState *dev = failover_find_primary_device(n);
860 
861     if (dev) {
862         return;
863     }
864 
865     if (!n->primary_opts) {
866         error_setg(errp, "Primary device not found");
867         error_append_hint(errp, "Virtio-net failover will not work. Make "
868                           "sure primary device has parameter"
869                           " failover_pair_id=%s\n", n->netclient_name);
870         return;
871     }
872 
873     dev = qdev_device_add_from_qdict(n->primary_opts,
874                                      n->primary_opts_from_json,
875                                      &err);
876     if (err) {
877         qobject_unref(n->primary_opts);
878         n->primary_opts = NULL;
879     } else {
880         object_unref(OBJECT(dev));
881     }
882     error_propagate(errp, err);
883 }
884 
885 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
886 {
887     VirtIONet *n = VIRTIO_NET(vdev);
888     Error *err = NULL;
889     int i;
890 
891     if (n->mtu_bypass_backend &&
892             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
893         features &= ~(1ULL << VIRTIO_NET_F_MTU);
894     }
895 
896     virtio_net_set_multiqueue(n,
897                               virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
898                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
899 
900     virtio_net_set_mrg_rx_bufs(n,
901                                virtio_has_feature(features,
902                                                   VIRTIO_NET_F_MRG_RXBUF),
903                                virtio_has_feature(features,
904                                                   VIRTIO_F_VERSION_1),
905                                virtio_has_feature(features,
906                                                   VIRTIO_NET_F_HASH_REPORT));
907 
908     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
909         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
910     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
911         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
912     n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
913 
914     if (n->has_vnet_hdr) {
915         n->curr_guest_offloads =
916             virtio_net_guest_offloads_by_features(features);
917         virtio_net_apply_guest_offloads(n);
918     }
919 
920     for (i = 0;  i < n->max_queue_pairs; i++) {
921         NetClientState *nc = qemu_get_subqueue(n->nic, i);
922 
923         if (!get_vhost_net(nc->peer)) {
924             continue;
925         }
926         vhost_net_ack_features(get_vhost_net(nc->peer), features);
927     }
928 
929     if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
930         memset(n->vlans, 0, MAX_VLAN >> 3);
931     } else {
932         memset(n->vlans, 0xff, MAX_VLAN >> 3);
933     }
934 
935     if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
936         qapi_event_send_failover_negotiated(n->netclient_name);
937         qatomic_set(&n->failover_primary_hidden, false);
938         failover_add_primary(n, &err);
939         if (err) {
940             if (!qtest_enabled()) {
941                 warn_report_err(err);
942             } else {
943                 error_free(err);
944             }
945         }
946     }
947 }
948 
949 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
950                                      struct iovec *iov, unsigned int iov_cnt)
951 {
952     uint8_t on;
953     size_t s;
954     NetClientState *nc = qemu_get_queue(n->nic);
955 
956     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
957     if (s != sizeof(on)) {
958         return VIRTIO_NET_ERR;
959     }
960 
961     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
962         n->promisc = on;
963     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
964         n->allmulti = on;
965     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
966         n->alluni = on;
967     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
968         n->nomulti = on;
969     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
970         n->nouni = on;
971     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
972         n->nobcast = on;
973     } else {
974         return VIRTIO_NET_ERR;
975     }
976 
977     rxfilter_notify(nc);
978 
979     return VIRTIO_NET_OK;
980 }
981 
982 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
983                                      struct iovec *iov, unsigned int iov_cnt)
984 {
985     VirtIODevice *vdev = VIRTIO_DEVICE(n);
986     uint64_t offloads;
987     size_t s;
988 
989     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
990         return VIRTIO_NET_ERR;
991     }
992 
993     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
994     if (s != sizeof(offloads)) {
995         return VIRTIO_NET_ERR;
996     }
997 
998     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
999         uint64_t supported_offloads;
1000 
1001         offloads = virtio_ldq_p(vdev, &offloads);
1002 
1003         if (!n->has_vnet_hdr) {
1004             return VIRTIO_NET_ERR;
1005         }
1006 
1007         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1008             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1009         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1010             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1011         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1012 
1013         supported_offloads = virtio_net_supported_guest_offloads(n);
1014         if (offloads & ~supported_offloads) {
1015             return VIRTIO_NET_ERR;
1016         }
1017 
1018         n->curr_guest_offloads = offloads;
1019         virtio_net_apply_guest_offloads(n);
1020 
1021         return VIRTIO_NET_OK;
1022     } else {
1023         return VIRTIO_NET_ERR;
1024     }
1025 }
1026 
1027 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1028                                  struct iovec *iov, unsigned int iov_cnt)
1029 {
1030     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1031     struct virtio_net_ctrl_mac mac_data;
1032     size_t s;
1033     NetClientState *nc = qemu_get_queue(n->nic);
1034 
1035     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1036         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1037             return VIRTIO_NET_ERR;
1038         }
1039         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1040         assert(s == sizeof(n->mac));
1041         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1042         rxfilter_notify(nc);
1043 
1044         return VIRTIO_NET_OK;
1045     }
1046 
1047     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1048         return VIRTIO_NET_ERR;
1049     }
1050 
1051     int in_use = 0;
1052     int first_multi = 0;
1053     uint8_t uni_overflow = 0;
1054     uint8_t multi_overflow = 0;
1055     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1056 
1057     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1058                    sizeof(mac_data.entries));
1059     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1060     if (s != sizeof(mac_data.entries)) {
1061         goto error;
1062     }
1063     iov_discard_front(&iov, &iov_cnt, s);
1064 
1065     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1066         goto error;
1067     }
1068 
1069     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1070         s = iov_to_buf(iov, iov_cnt, 0, macs,
1071                        mac_data.entries * ETH_ALEN);
1072         if (s != mac_data.entries * ETH_ALEN) {
1073             goto error;
1074         }
1075         in_use += mac_data.entries;
1076     } else {
1077         uni_overflow = 1;
1078     }
1079 
1080     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1081 
1082     first_multi = in_use;
1083 
1084     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1085                    sizeof(mac_data.entries));
1086     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1087     if (s != sizeof(mac_data.entries)) {
1088         goto error;
1089     }
1090 
1091     iov_discard_front(&iov, &iov_cnt, s);
1092 
1093     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1094         goto error;
1095     }
1096 
1097     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1098         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1099                        mac_data.entries * ETH_ALEN);
1100         if (s != mac_data.entries * ETH_ALEN) {
1101             goto error;
1102         }
1103         in_use += mac_data.entries;
1104     } else {
1105         multi_overflow = 1;
1106     }
1107 
1108     n->mac_table.in_use = in_use;
1109     n->mac_table.first_multi = first_multi;
1110     n->mac_table.uni_overflow = uni_overflow;
1111     n->mac_table.multi_overflow = multi_overflow;
1112     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1113     g_free(macs);
1114     rxfilter_notify(nc);
1115 
1116     return VIRTIO_NET_OK;
1117 
1118 error:
1119     g_free(macs);
1120     return VIRTIO_NET_ERR;
1121 }
1122 
1123 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1124                                         struct iovec *iov, unsigned int iov_cnt)
1125 {
1126     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1127     uint16_t vid;
1128     size_t s;
1129     NetClientState *nc = qemu_get_queue(n->nic);
1130 
1131     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1132     vid = virtio_lduw_p(vdev, &vid);
1133     if (s != sizeof(vid)) {
1134         return VIRTIO_NET_ERR;
1135     }
1136 
1137     if (vid >= MAX_VLAN)
1138         return VIRTIO_NET_ERR;
1139 
1140     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1141         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1142     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1143         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1144     else
1145         return VIRTIO_NET_ERR;
1146 
1147     rxfilter_notify(nc);
1148 
1149     return VIRTIO_NET_OK;
1150 }
1151 
1152 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1153                                       struct iovec *iov, unsigned int iov_cnt)
1154 {
1155     trace_virtio_net_handle_announce(n->announce_timer.round);
1156     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1157         n->status & VIRTIO_NET_S_ANNOUNCE) {
1158         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1159         if (n->announce_timer.round) {
1160             qemu_announce_timer_step(&n->announce_timer);
1161         }
1162         return VIRTIO_NET_OK;
1163     } else {
1164         return VIRTIO_NET_ERR;
1165     }
1166 }
1167 
1168 static void virtio_net_detach_epbf_rss(VirtIONet *n);
1169 
1170 static void virtio_net_disable_rss(VirtIONet *n)
1171 {
1172     if (n->rss_data.enabled) {
1173         trace_virtio_net_rss_disable();
1174     }
1175     n->rss_data.enabled = false;
1176 
1177     virtio_net_detach_epbf_rss(n);
1178 }
1179 
1180 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1181 {
1182     NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1183     if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1184         return false;
1185     }
1186 
1187     return nc->info->set_steering_ebpf(nc, prog_fd);
1188 }
1189 
1190 static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1191                                    struct EBPFRSSConfig *config)
1192 {
1193     config->redirect = data->redirect;
1194     config->populate_hash = data->populate_hash;
1195     config->hash_types = data->hash_types;
1196     config->indirections_len = data->indirections_len;
1197     config->default_queue = data->default_queue;
1198 }
1199 
1200 static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1201 {
1202     struct EBPFRSSConfig config = {};
1203 
1204     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1205         return false;
1206     }
1207 
1208     rss_data_to_rss_config(&n->rss_data, &config);
1209 
1210     if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1211                           n->rss_data.indirections_table, n->rss_data.key)) {
1212         return false;
1213     }
1214 
1215     if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1216         return false;
1217     }
1218 
1219     return true;
1220 }
1221 
1222 static void virtio_net_detach_epbf_rss(VirtIONet *n)
1223 {
1224     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1225 }
1226 
1227 static bool virtio_net_load_ebpf(VirtIONet *n)
1228 {
1229     if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1230         /* backend does't support steering ebpf */
1231         return false;
1232     }
1233 
1234     return ebpf_rss_load(&n->ebpf_rss);
1235 }
1236 
1237 static void virtio_net_unload_ebpf(VirtIONet *n)
1238 {
1239     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1240     ebpf_rss_unload(&n->ebpf_rss);
1241 }
1242 
1243 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1244                                       struct iovec *iov,
1245                                       unsigned int iov_cnt,
1246                                       bool do_rss)
1247 {
1248     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1249     struct virtio_net_rss_config cfg;
1250     size_t s, offset = 0, size_get;
1251     uint16_t queue_pairs, i;
1252     struct {
1253         uint16_t us;
1254         uint8_t b;
1255     } QEMU_PACKED temp;
1256     const char *err_msg = "";
1257     uint32_t err_value = 0;
1258 
1259     if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1260         err_msg = "RSS is not negotiated";
1261         goto error;
1262     }
1263     if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1264         err_msg = "Hash report is not negotiated";
1265         goto error;
1266     }
1267     size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1268     s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1269     if (s != size_get) {
1270         err_msg = "Short command buffer";
1271         err_value = (uint32_t)s;
1272         goto error;
1273     }
1274     n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1275     n->rss_data.indirections_len =
1276         virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1277     n->rss_data.indirections_len++;
1278     if (!do_rss) {
1279         n->rss_data.indirections_len = 1;
1280     }
1281     if (!is_power_of_2(n->rss_data.indirections_len)) {
1282         err_msg = "Invalid size of indirection table";
1283         err_value = n->rss_data.indirections_len;
1284         goto error;
1285     }
1286     if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1287         err_msg = "Too large indirection table";
1288         err_value = n->rss_data.indirections_len;
1289         goto error;
1290     }
1291     n->rss_data.default_queue = do_rss ?
1292         virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1293     if (n->rss_data.default_queue >= n->max_queue_pairs) {
1294         err_msg = "Invalid default queue";
1295         err_value = n->rss_data.default_queue;
1296         goto error;
1297     }
1298     offset += size_get;
1299     size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1300     g_free(n->rss_data.indirections_table);
1301     n->rss_data.indirections_table = g_malloc(size_get);
1302     if (!n->rss_data.indirections_table) {
1303         err_msg = "Can't allocate indirections table";
1304         err_value = n->rss_data.indirections_len;
1305         goto error;
1306     }
1307     s = iov_to_buf(iov, iov_cnt, offset,
1308                    n->rss_data.indirections_table, size_get);
1309     if (s != size_get) {
1310         err_msg = "Short indirection table buffer";
1311         err_value = (uint32_t)s;
1312         goto error;
1313     }
1314     for (i = 0; i < n->rss_data.indirections_len; ++i) {
1315         uint16_t val = n->rss_data.indirections_table[i];
1316         n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1317     }
1318     offset += size_get;
1319     size_get = sizeof(temp);
1320     s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1321     if (s != size_get) {
1322         err_msg = "Can't get queue_pairs";
1323         err_value = (uint32_t)s;
1324         goto error;
1325     }
1326     queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1327     if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1328         err_msg = "Invalid number of queue_pairs";
1329         err_value = queue_pairs;
1330         goto error;
1331     }
1332     if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1333         err_msg = "Invalid key size";
1334         err_value = temp.b;
1335         goto error;
1336     }
1337     if (!temp.b && n->rss_data.hash_types) {
1338         err_msg = "No key provided";
1339         err_value = 0;
1340         goto error;
1341     }
1342     if (!temp.b && !n->rss_data.hash_types) {
1343         virtio_net_disable_rss(n);
1344         return queue_pairs;
1345     }
1346     offset += size_get;
1347     size_get = temp.b;
1348     s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1349     if (s != size_get) {
1350         err_msg = "Can get key buffer";
1351         err_value = (uint32_t)s;
1352         goto error;
1353     }
1354     n->rss_data.enabled = true;
1355 
1356     if (!n->rss_data.populate_hash) {
1357         if (!virtio_net_attach_epbf_rss(n)) {
1358             /* EBPF must be loaded for vhost */
1359             if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1360                 warn_report("Can't load eBPF RSS for vhost");
1361                 goto error;
1362             }
1363             /* fallback to software RSS */
1364             warn_report("Can't load eBPF RSS - fallback to software RSS");
1365             n->rss_data.enabled_software_rss = true;
1366         }
1367     } else {
1368         /* use software RSS for hash populating */
1369         /* and detach eBPF if was loaded before */
1370         virtio_net_detach_epbf_rss(n);
1371         n->rss_data.enabled_software_rss = true;
1372     }
1373 
1374     trace_virtio_net_rss_enable(n->rss_data.hash_types,
1375                                 n->rss_data.indirections_len,
1376                                 temp.b);
1377     return queue_pairs;
1378 error:
1379     trace_virtio_net_rss_error(err_msg, err_value);
1380     virtio_net_disable_rss(n);
1381     return 0;
1382 }
1383 
1384 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1385                                 struct iovec *iov, unsigned int iov_cnt)
1386 {
1387     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1388     uint16_t queue_pairs;
1389     NetClientState *nc = qemu_get_queue(n->nic);
1390 
1391     virtio_net_disable_rss(n);
1392     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1393         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1394         return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1395     }
1396     if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1397         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1398     } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1399         struct virtio_net_ctrl_mq mq;
1400         size_t s;
1401         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1402             return VIRTIO_NET_ERR;
1403         }
1404         s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1405         if (s != sizeof(mq)) {
1406             return VIRTIO_NET_ERR;
1407         }
1408         queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1409 
1410     } else {
1411         return VIRTIO_NET_ERR;
1412     }
1413 
1414     if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1415         queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1416         queue_pairs > n->max_queue_pairs ||
1417         !n->multiqueue) {
1418         return VIRTIO_NET_ERR;
1419     }
1420 
1421     n->curr_queue_pairs = queue_pairs;
1422     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
1423         /*
1424          * Avoid updating the backend for a vdpa device: We're only interested
1425          * in updating the device model queues.
1426          */
1427         return VIRTIO_NET_OK;
1428     }
1429     /* stop the backend before changing the number of queue_pairs to avoid handling a
1430      * disabled queue */
1431     virtio_net_set_status(vdev, vdev->status);
1432     virtio_net_set_queue_pairs(n);
1433 
1434     return VIRTIO_NET_OK;
1435 }
1436 
1437 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1438                                   const struct iovec *in_sg, unsigned in_num,
1439                                   const struct iovec *out_sg,
1440                                   unsigned out_num)
1441 {
1442     VirtIONet *n = VIRTIO_NET(vdev);
1443     struct virtio_net_ctrl_hdr ctrl;
1444     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1445     size_t s;
1446     struct iovec *iov, *iov2;
1447 
1448     if (iov_size(in_sg, in_num) < sizeof(status) ||
1449         iov_size(out_sg, out_num) < sizeof(ctrl)) {
1450         virtio_error(vdev, "virtio-net ctrl missing headers");
1451         return 0;
1452     }
1453 
1454     iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1455     s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1456     iov_discard_front(&iov, &out_num, sizeof(ctrl));
1457     if (s != sizeof(ctrl)) {
1458         status = VIRTIO_NET_ERR;
1459     } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1460         status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1461     } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1462         status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1463     } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1464         status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1465     } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1466         status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1467     } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1468         status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1469     } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1470         status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1471     }
1472 
1473     s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1474     assert(s == sizeof(status));
1475 
1476     g_free(iov2);
1477     return sizeof(status);
1478 }
1479 
1480 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1481 {
1482     VirtQueueElement *elem;
1483 
1484     for (;;) {
1485         size_t written;
1486         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1487         if (!elem) {
1488             break;
1489         }
1490 
1491         written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1492                                              elem->out_sg, elem->out_num);
1493         if (written > 0) {
1494             virtqueue_push(vq, elem, written);
1495             virtio_notify(vdev, vq);
1496             g_free(elem);
1497         } else {
1498             virtqueue_detach_element(vq, elem, 0);
1499             g_free(elem);
1500             break;
1501         }
1502     }
1503 }
1504 
1505 /* RX */
1506 
1507 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1508 {
1509     VirtIONet *n = VIRTIO_NET(vdev);
1510     int queue_index = vq2q(virtio_get_queue_index(vq));
1511 
1512     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1513 }
1514 
1515 static bool virtio_net_can_receive(NetClientState *nc)
1516 {
1517     VirtIONet *n = qemu_get_nic_opaque(nc);
1518     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1519     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1520 
1521     if (!vdev->vm_running) {
1522         return false;
1523     }
1524 
1525     if (nc->queue_index >= n->curr_queue_pairs) {
1526         return false;
1527     }
1528 
1529     if (!virtio_queue_ready(q->rx_vq) ||
1530         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1531         return false;
1532     }
1533 
1534     return true;
1535 }
1536 
1537 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1538 {
1539     VirtIONet *n = q->n;
1540     if (virtio_queue_empty(q->rx_vq) ||
1541         (n->mergeable_rx_bufs &&
1542          !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1543         virtio_queue_set_notification(q->rx_vq, 1);
1544 
1545         /* To avoid a race condition where the guest has made some buffers
1546          * available after the above check but before notification was
1547          * enabled, check for available buffers again.
1548          */
1549         if (virtio_queue_empty(q->rx_vq) ||
1550             (n->mergeable_rx_bufs &&
1551              !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1552             return 0;
1553         }
1554     }
1555 
1556     virtio_queue_set_notification(q->rx_vq, 0);
1557     return 1;
1558 }
1559 
1560 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1561 {
1562     virtio_tswap16s(vdev, &hdr->hdr_len);
1563     virtio_tswap16s(vdev, &hdr->gso_size);
1564     virtio_tswap16s(vdev, &hdr->csum_start);
1565     virtio_tswap16s(vdev, &hdr->csum_offset);
1566 }
1567 
1568 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1569  * it never finds out that the packets don't have valid checksums.  This
1570  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1571  * fix this with Xen but it hasn't appeared in an upstream release of
1572  * dhclient yet.
1573  *
1574  * To avoid breaking existing guests, we catch udp packets and add
1575  * checksums.  This is terrible but it's better than hacking the guest
1576  * kernels.
1577  *
1578  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1579  * we should provide a mechanism to disable it to avoid polluting the host
1580  * cache.
1581  */
1582 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1583                                         uint8_t *buf, size_t size)
1584 {
1585     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1586         (size > 27 && size < 1500) && /* normal sized MTU */
1587         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1588         (buf[23] == 17) && /* ip.protocol == UDP */
1589         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1590         net_checksum_calculate(buf, size, CSUM_UDP);
1591         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1592     }
1593 }
1594 
1595 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1596                            const void *buf, size_t size)
1597 {
1598     if (n->has_vnet_hdr) {
1599         /* FIXME this cast is evil */
1600         void *wbuf = (void *)buf;
1601         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1602                                     size - n->host_hdr_len);
1603 
1604         if (n->needs_vnet_hdr_swap) {
1605             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1606         }
1607         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1608     } else {
1609         struct virtio_net_hdr hdr = {
1610             .flags = 0,
1611             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1612         };
1613         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1614     }
1615 }
1616 
1617 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1618 {
1619     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1620     static const uint8_t vlan[] = {0x81, 0x00};
1621     uint8_t *ptr = (uint8_t *)buf;
1622     int i;
1623 
1624     if (n->promisc)
1625         return 1;
1626 
1627     ptr += n->host_hdr_len;
1628 
1629     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1630         int vid = lduw_be_p(ptr + 14) & 0xfff;
1631         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1632             return 0;
1633     }
1634 
1635     if (ptr[0] & 1) { // multicast
1636         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1637             return !n->nobcast;
1638         } else if (n->nomulti) {
1639             return 0;
1640         } else if (n->allmulti || n->mac_table.multi_overflow) {
1641             return 1;
1642         }
1643 
1644         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1645             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1646                 return 1;
1647             }
1648         }
1649     } else { // unicast
1650         if (n->nouni) {
1651             return 0;
1652         } else if (n->alluni || n->mac_table.uni_overflow) {
1653             return 1;
1654         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1655             return 1;
1656         }
1657 
1658         for (i = 0; i < n->mac_table.first_multi; i++) {
1659             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1660                 return 1;
1661             }
1662         }
1663     }
1664 
1665     return 0;
1666 }
1667 
1668 static uint8_t virtio_net_get_hash_type(bool isip4,
1669                                         bool isip6,
1670                                         bool isudp,
1671                                         bool istcp,
1672                                         uint32_t types)
1673 {
1674     if (isip4) {
1675         if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1676             return NetPktRssIpV4Tcp;
1677         }
1678         if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1679             return NetPktRssIpV4Udp;
1680         }
1681         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1682             return NetPktRssIpV4;
1683         }
1684     } else if (isip6) {
1685         uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1686                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1687 
1688         if (istcp && (types & mask)) {
1689             return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1690                 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1691         }
1692         mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1693         if (isudp && (types & mask)) {
1694             return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1695                 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1696         }
1697         mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1698         if (types & mask) {
1699             return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1700                 NetPktRssIpV6Ex : NetPktRssIpV6;
1701         }
1702     }
1703     return 0xff;
1704 }
1705 
1706 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1707                                    uint32_t hash)
1708 {
1709     struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1710     hdr->hash_value = hash;
1711     hdr->hash_report = report;
1712 }
1713 
1714 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1715                                   size_t size)
1716 {
1717     VirtIONet *n = qemu_get_nic_opaque(nc);
1718     unsigned int index = nc->queue_index, new_index = index;
1719     struct NetRxPkt *pkt = n->rx_pkt;
1720     uint8_t net_hash_type;
1721     uint32_t hash;
1722     bool isip4, isip6, isudp, istcp;
1723     static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1724         VIRTIO_NET_HASH_REPORT_IPv4,
1725         VIRTIO_NET_HASH_REPORT_TCPv4,
1726         VIRTIO_NET_HASH_REPORT_TCPv6,
1727         VIRTIO_NET_HASH_REPORT_IPv6,
1728         VIRTIO_NET_HASH_REPORT_IPv6_EX,
1729         VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1730         VIRTIO_NET_HASH_REPORT_UDPv4,
1731         VIRTIO_NET_HASH_REPORT_UDPv6,
1732         VIRTIO_NET_HASH_REPORT_UDPv6_EX
1733     };
1734 
1735     net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1736                              size - n->host_hdr_len);
1737     net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1738     if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1739         istcp = isudp = false;
1740     }
1741     if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1742         istcp = isudp = false;
1743     }
1744     net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1745                                              n->rss_data.hash_types);
1746     if (net_hash_type > NetPktRssIpV6UdpEx) {
1747         if (n->rss_data.populate_hash) {
1748             virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1749         }
1750         return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1751     }
1752 
1753     hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1754 
1755     if (n->rss_data.populate_hash) {
1756         virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1757     }
1758 
1759     if (n->rss_data.redirect) {
1760         new_index = hash & (n->rss_data.indirections_len - 1);
1761         new_index = n->rss_data.indirections_table[new_index];
1762     }
1763 
1764     return (index == new_index) ? -1 : new_index;
1765 }
1766 
1767 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1768                                       size_t size, bool no_rss)
1769 {
1770     VirtIONet *n = qemu_get_nic_opaque(nc);
1771     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1772     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1773     VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1774     size_t lens[VIRTQUEUE_MAX_SIZE];
1775     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1776     struct virtio_net_hdr_mrg_rxbuf mhdr;
1777     unsigned mhdr_cnt = 0;
1778     size_t offset, i, guest_offset, j;
1779     ssize_t err;
1780 
1781     if (!virtio_net_can_receive(nc)) {
1782         return -1;
1783     }
1784 
1785     if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1786         int index = virtio_net_process_rss(nc, buf, size);
1787         if (index >= 0) {
1788             NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1789             return virtio_net_receive_rcu(nc2, buf, size, true);
1790         }
1791     }
1792 
1793     /* hdr_len refers to the header we supply to the guest */
1794     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1795         return 0;
1796     }
1797 
1798     if (!receive_filter(n, buf, size))
1799         return size;
1800 
1801     offset = i = 0;
1802 
1803     while (offset < size) {
1804         VirtQueueElement *elem;
1805         int len, total;
1806         const struct iovec *sg;
1807 
1808         total = 0;
1809 
1810         if (i == VIRTQUEUE_MAX_SIZE) {
1811             virtio_error(vdev, "virtio-net unexpected long buffer chain");
1812             err = size;
1813             goto err;
1814         }
1815 
1816         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1817         if (!elem) {
1818             if (i) {
1819                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1820                              "i %zd mergeable %d offset %zd, size %zd, "
1821                              "guest hdr len %zd, host hdr len %zd "
1822                              "guest features 0x%" PRIx64,
1823                              i, n->mergeable_rx_bufs, offset, size,
1824                              n->guest_hdr_len, n->host_hdr_len,
1825                              vdev->guest_features);
1826             }
1827             err = -1;
1828             goto err;
1829         }
1830 
1831         if (elem->in_num < 1) {
1832             virtio_error(vdev,
1833                          "virtio-net receive queue contains no in buffers");
1834             virtqueue_detach_element(q->rx_vq, elem, 0);
1835             g_free(elem);
1836             err = -1;
1837             goto err;
1838         }
1839 
1840         sg = elem->in_sg;
1841         if (i == 0) {
1842             assert(offset == 0);
1843             if (n->mergeable_rx_bufs) {
1844                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1845                                     sg, elem->in_num,
1846                                     offsetof(typeof(mhdr), num_buffers),
1847                                     sizeof(mhdr.num_buffers));
1848             }
1849 
1850             receive_header(n, sg, elem->in_num, buf, size);
1851             if (n->rss_data.populate_hash) {
1852                 offset = sizeof(mhdr);
1853                 iov_from_buf(sg, elem->in_num, offset,
1854                              buf + offset, n->host_hdr_len - sizeof(mhdr));
1855             }
1856             offset = n->host_hdr_len;
1857             total += n->guest_hdr_len;
1858             guest_offset = n->guest_hdr_len;
1859         } else {
1860             guest_offset = 0;
1861         }
1862 
1863         /* copy in packet.  ugh */
1864         len = iov_from_buf(sg, elem->in_num, guest_offset,
1865                            buf + offset, size - offset);
1866         total += len;
1867         offset += len;
1868         /* If buffers can't be merged, at this point we
1869          * must have consumed the complete packet.
1870          * Otherwise, drop it. */
1871         if (!n->mergeable_rx_bufs && offset < size) {
1872             virtqueue_unpop(q->rx_vq, elem, total);
1873             g_free(elem);
1874             err = size;
1875             goto err;
1876         }
1877 
1878         elems[i] = elem;
1879         lens[i] = total;
1880         i++;
1881     }
1882 
1883     if (mhdr_cnt) {
1884         virtio_stw_p(vdev, &mhdr.num_buffers, i);
1885         iov_from_buf(mhdr_sg, mhdr_cnt,
1886                      0,
1887                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
1888     }
1889 
1890     for (j = 0; j < i; j++) {
1891         /* signal other side */
1892         virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
1893         g_free(elems[j]);
1894     }
1895 
1896     virtqueue_flush(q->rx_vq, i);
1897     virtio_notify(vdev, q->rx_vq);
1898 
1899     return size;
1900 
1901 err:
1902     for (j = 0; j < i; j++) {
1903         virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
1904         g_free(elems[j]);
1905     }
1906 
1907     return err;
1908 }
1909 
1910 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1911                                   size_t size)
1912 {
1913     RCU_READ_LOCK_GUARD();
1914 
1915     return virtio_net_receive_rcu(nc, buf, size, false);
1916 }
1917 
1918 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1919                                          const uint8_t *buf,
1920                                          VirtioNetRscUnit *unit)
1921 {
1922     uint16_t ip_hdrlen;
1923     struct ip_header *ip;
1924 
1925     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1926                               + sizeof(struct eth_header));
1927     unit->ip = (void *)ip;
1928     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1929     unit->ip_plen = &ip->ip_len;
1930     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1931     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1932     unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1933 }
1934 
1935 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1936                                          const uint8_t *buf,
1937                                          VirtioNetRscUnit *unit)
1938 {
1939     struct ip6_header *ip6;
1940 
1941     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1942                                  + sizeof(struct eth_header));
1943     unit->ip = ip6;
1944     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1945     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1946                                         + sizeof(struct ip6_header));
1947     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1948 
1949     /* There is a difference between payload lenght in ipv4 and v6,
1950        ip header is excluded in ipv6 */
1951     unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1952 }
1953 
1954 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1955                                        VirtioNetRscSeg *seg)
1956 {
1957     int ret;
1958     struct virtio_net_hdr_v1 *h;
1959 
1960     h = (struct virtio_net_hdr_v1 *)seg->buf;
1961     h->flags = 0;
1962     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1963 
1964     if (seg->is_coalesced) {
1965         h->rsc.segments = seg->packets;
1966         h->rsc.dup_acks = seg->dup_ack;
1967         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1968         if (chain->proto == ETH_P_IP) {
1969             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1970         } else {
1971             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1972         }
1973     }
1974 
1975     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1976     QTAILQ_REMOVE(&chain->buffers, seg, next);
1977     g_free(seg->buf);
1978     g_free(seg);
1979 
1980     return ret;
1981 }
1982 
1983 static void virtio_net_rsc_purge(void *opq)
1984 {
1985     VirtioNetRscSeg *seg, *rn;
1986     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1987 
1988     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1989         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1990             chain->stat.purge_failed++;
1991             continue;
1992         }
1993     }
1994 
1995     chain->stat.timer++;
1996     if (!QTAILQ_EMPTY(&chain->buffers)) {
1997         timer_mod(chain->drain_timer,
1998               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1999     }
2000 }
2001 
2002 static void virtio_net_rsc_cleanup(VirtIONet *n)
2003 {
2004     VirtioNetRscChain *chain, *rn_chain;
2005     VirtioNetRscSeg *seg, *rn_seg;
2006 
2007     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2008         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2009             QTAILQ_REMOVE(&chain->buffers, seg, next);
2010             g_free(seg->buf);
2011             g_free(seg);
2012         }
2013 
2014         timer_free(chain->drain_timer);
2015         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2016         g_free(chain);
2017     }
2018 }
2019 
2020 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2021                                      NetClientState *nc,
2022                                      const uint8_t *buf, size_t size)
2023 {
2024     uint16_t hdr_len;
2025     VirtioNetRscSeg *seg;
2026 
2027     hdr_len = chain->n->guest_hdr_len;
2028     seg = g_new(VirtioNetRscSeg, 1);
2029     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2030         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2031     memcpy(seg->buf, buf, size);
2032     seg->size = size;
2033     seg->packets = 1;
2034     seg->dup_ack = 0;
2035     seg->is_coalesced = 0;
2036     seg->nc = nc;
2037 
2038     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2039     chain->stat.cache++;
2040 
2041     switch (chain->proto) {
2042     case ETH_P_IP:
2043         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2044         break;
2045     case ETH_P_IPV6:
2046         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2047         break;
2048     default:
2049         g_assert_not_reached();
2050     }
2051 }
2052 
2053 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2054                                          VirtioNetRscSeg *seg,
2055                                          const uint8_t *buf,
2056                                          struct tcp_header *n_tcp,
2057                                          struct tcp_header *o_tcp)
2058 {
2059     uint32_t nack, oack;
2060     uint16_t nwin, owin;
2061 
2062     nack = htonl(n_tcp->th_ack);
2063     nwin = htons(n_tcp->th_win);
2064     oack = htonl(o_tcp->th_ack);
2065     owin = htons(o_tcp->th_win);
2066 
2067     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2068         chain->stat.ack_out_of_win++;
2069         return RSC_FINAL;
2070     } else if (nack == oack) {
2071         /* duplicated ack or window probe */
2072         if (nwin == owin) {
2073             /* duplicated ack, add dup ack count due to whql test up to 1 */
2074             chain->stat.dup_ack++;
2075             return RSC_FINAL;
2076         } else {
2077             /* Coalesce window update */
2078             o_tcp->th_win = n_tcp->th_win;
2079             chain->stat.win_update++;
2080             return RSC_COALESCE;
2081         }
2082     } else {
2083         /* pure ack, go to 'C', finalize*/
2084         chain->stat.pure_ack++;
2085         return RSC_FINAL;
2086     }
2087 }
2088 
2089 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2090                                             VirtioNetRscSeg *seg,
2091                                             const uint8_t *buf,
2092                                             VirtioNetRscUnit *n_unit)
2093 {
2094     void *data;
2095     uint16_t o_ip_len;
2096     uint32_t nseq, oseq;
2097     VirtioNetRscUnit *o_unit;
2098 
2099     o_unit = &seg->unit;
2100     o_ip_len = htons(*o_unit->ip_plen);
2101     nseq = htonl(n_unit->tcp->th_seq);
2102     oseq = htonl(o_unit->tcp->th_seq);
2103 
2104     /* out of order or retransmitted. */
2105     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2106         chain->stat.data_out_of_win++;
2107         return RSC_FINAL;
2108     }
2109 
2110     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2111     if (nseq == oseq) {
2112         if ((o_unit->payload == 0) && n_unit->payload) {
2113             /* From no payload to payload, normal case, not a dup ack or etc */
2114             chain->stat.data_after_pure_ack++;
2115             goto coalesce;
2116         } else {
2117             return virtio_net_rsc_handle_ack(chain, seg, buf,
2118                                              n_unit->tcp, o_unit->tcp);
2119         }
2120     } else if ((nseq - oseq) != o_unit->payload) {
2121         /* Not a consistent packet, out of order */
2122         chain->stat.data_out_of_order++;
2123         return RSC_FINAL;
2124     } else {
2125 coalesce:
2126         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2127             chain->stat.over_size++;
2128             return RSC_FINAL;
2129         }
2130 
2131         /* Here comes the right data, the payload length in v4/v6 is different,
2132            so use the field value to update and record the new data len */
2133         o_unit->payload += n_unit->payload; /* update new data len */
2134 
2135         /* update field in ip header */
2136         *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2137 
2138         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2139            for windows guest, while this may change the behavior for linux
2140            guest (only if it uses RSC feature). */
2141         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2142 
2143         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2144         o_unit->tcp->th_win = n_unit->tcp->th_win;
2145 
2146         memmove(seg->buf + seg->size, data, n_unit->payload);
2147         seg->size += n_unit->payload;
2148         seg->packets++;
2149         chain->stat.coalesced++;
2150         return RSC_COALESCE;
2151     }
2152 }
2153 
2154 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2155                                         VirtioNetRscSeg *seg,
2156                                         const uint8_t *buf, size_t size,
2157                                         VirtioNetRscUnit *unit)
2158 {
2159     struct ip_header *ip1, *ip2;
2160 
2161     ip1 = (struct ip_header *)(unit->ip);
2162     ip2 = (struct ip_header *)(seg->unit.ip);
2163     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2164         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2165         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2166         chain->stat.no_match++;
2167         return RSC_NO_MATCH;
2168     }
2169 
2170     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2171 }
2172 
2173 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2174                                         VirtioNetRscSeg *seg,
2175                                         const uint8_t *buf, size_t size,
2176                                         VirtioNetRscUnit *unit)
2177 {
2178     struct ip6_header *ip1, *ip2;
2179 
2180     ip1 = (struct ip6_header *)(unit->ip);
2181     ip2 = (struct ip6_header *)(seg->unit.ip);
2182     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2183         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2184         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2185         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2186             chain->stat.no_match++;
2187             return RSC_NO_MATCH;
2188     }
2189 
2190     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2191 }
2192 
2193 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2194  * to prevent out of order */
2195 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2196                                          struct tcp_header *tcp)
2197 {
2198     uint16_t tcp_hdr;
2199     uint16_t tcp_flag;
2200 
2201     tcp_flag = htons(tcp->th_offset_flags);
2202     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2203     tcp_flag &= VIRTIO_NET_TCP_FLAG;
2204     if (tcp_flag & TH_SYN) {
2205         chain->stat.tcp_syn++;
2206         return RSC_BYPASS;
2207     }
2208 
2209     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2210         chain->stat.tcp_ctrl_drain++;
2211         return RSC_FINAL;
2212     }
2213 
2214     if (tcp_hdr > sizeof(struct tcp_header)) {
2215         chain->stat.tcp_all_opt++;
2216         return RSC_FINAL;
2217     }
2218 
2219     return RSC_CANDIDATE;
2220 }
2221 
2222 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2223                                          NetClientState *nc,
2224                                          const uint8_t *buf, size_t size,
2225                                          VirtioNetRscUnit *unit)
2226 {
2227     int ret;
2228     VirtioNetRscSeg *seg, *nseg;
2229 
2230     if (QTAILQ_EMPTY(&chain->buffers)) {
2231         chain->stat.empty_cache++;
2232         virtio_net_rsc_cache_buf(chain, nc, buf, size);
2233         timer_mod(chain->drain_timer,
2234               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2235         return size;
2236     }
2237 
2238     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2239         if (chain->proto == ETH_P_IP) {
2240             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2241         } else {
2242             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2243         }
2244 
2245         if (ret == RSC_FINAL) {
2246             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2247                 /* Send failed */
2248                 chain->stat.final_failed++;
2249                 return 0;
2250             }
2251 
2252             /* Send current packet */
2253             return virtio_net_do_receive(nc, buf, size);
2254         } else if (ret == RSC_NO_MATCH) {
2255             continue;
2256         } else {
2257             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2258             seg->is_coalesced = 1;
2259             return size;
2260         }
2261     }
2262 
2263     chain->stat.no_match_cache++;
2264     virtio_net_rsc_cache_buf(chain, nc, buf, size);
2265     return size;
2266 }
2267 
2268 /* Drain a connection data, this is to avoid out of order segments */
2269 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2270                                         NetClientState *nc,
2271                                         const uint8_t *buf, size_t size,
2272                                         uint16_t ip_start, uint16_t ip_size,
2273                                         uint16_t tcp_port)
2274 {
2275     VirtioNetRscSeg *seg, *nseg;
2276     uint32_t ppair1, ppair2;
2277 
2278     ppair1 = *(uint32_t *)(buf + tcp_port);
2279     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2280         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2281         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2282             || (ppair1 != ppair2)) {
2283             continue;
2284         }
2285         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2286             chain->stat.drain_failed++;
2287         }
2288 
2289         break;
2290     }
2291 
2292     return virtio_net_do_receive(nc, buf, size);
2293 }
2294 
2295 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2296                                             struct ip_header *ip,
2297                                             const uint8_t *buf, size_t size)
2298 {
2299     uint16_t ip_len;
2300 
2301     /* Not an ipv4 packet */
2302     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2303         chain->stat.ip_option++;
2304         return RSC_BYPASS;
2305     }
2306 
2307     /* Don't handle packets with ip option */
2308     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2309         chain->stat.ip_option++;
2310         return RSC_BYPASS;
2311     }
2312 
2313     if (ip->ip_p != IPPROTO_TCP) {
2314         chain->stat.bypass_not_tcp++;
2315         return RSC_BYPASS;
2316     }
2317 
2318     /* Don't handle packets with ip fragment */
2319     if (!(htons(ip->ip_off) & IP_DF)) {
2320         chain->stat.ip_frag++;
2321         return RSC_BYPASS;
2322     }
2323 
2324     /* Don't handle packets with ecn flag */
2325     if (IPTOS_ECN(ip->ip_tos)) {
2326         chain->stat.ip_ecn++;
2327         return RSC_BYPASS;
2328     }
2329 
2330     ip_len = htons(ip->ip_len);
2331     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2332         || ip_len > (size - chain->n->guest_hdr_len -
2333                      sizeof(struct eth_header))) {
2334         chain->stat.ip_hacked++;
2335         return RSC_BYPASS;
2336     }
2337 
2338     return RSC_CANDIDATE;
2339 }
2340 
2341 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2342                                       NetClientState *nc,
2343                                       const uint8_t *buf, size_t size)
2344 {
2345     int32_t ret;
2346     uint16_t hdr_len;
2347     VirtioNetRscUnit unit;
2348 
2349     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2350 
2351     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2352         + sizeof(struct tcp_header))) {
2353         chain->stat.bypass_not_tcp++;
2354         return virtio_net_do_receive(nc, buf, size);
2355     }
2356 
2357     virtio_net_rsc_extract_unit4(chain, buf, &unit);
2358     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2359         != RSC_CANDIDATE) {
2360         return virtio_net_do_receive(nc, buf, size);
2361     }
2362 
2363     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2364     if (ret == RSC_BYPASS) {
2365         return virtio_net_do_receive(nc, buf, size);
2366     } else if (ret == RSC_FINAL) {
2367         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2368                 ((hdr_len + sizeof(struct eth_header)) + 12),
2369                 VIRTIO_NET_IP4_ADDR_SIZE,
2370                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2371     }
2372 
2373     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2374 }
2375 
2376 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2377                                             struct ip6_header *ip6,
2378                                             const uint8_t *buf, size_t size)
2379 {
2380     uint16_t ip_len;
2381 
2382     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2383         != IP_HEADER_VERSION_6) {
2384         return RSC_BYPASS;
2385     }
2386 
2387     /* Both option and protocol is checked in this */
2388     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2389         chain->stat.bypass_not_tcp++;
2390         return RSC_BYPASS;
2391     }
2392 
2393     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2394     if (ip_len < sizeof(struct tcp_header) ||
2395         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2396                   - sizeof(struct ip6_header))) {
2397         chain->stat.ip_hacked++;
2398         return RSC_BYPASS;
2399     }
2400 
2401     /* Don't handle packets with ecn flag */
2402     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2403         chain->stat.ip_ecn++;
2404         return RSC_BYPASS;
2405     }
2406 
2407     return RSC_CANDIDATE;
2408 }
2409 
2410 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2411                                       const uint8_t *buf, size_t size)
2412 {
2413     int32_t ret;
2414     uint16_t hdr_len;
2415     VirtioNetRscChain *chain;
2416     VirtioNetRscUnit unit;
2417 
2418     chain = (VirtioNetRscChain *)opq;
2419     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2420 
2421     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2422         + sizeof(tcp_header))) {
2423         return virtio_net_do_receive(nc, buf, size);
2424     }
2425 
2426     virtio_net_rsc_extract_unit6(chain, buf, &unit);
2427     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2428                                                  unit.ip, buf, size)) {
2429         return virtio_net_do_receive(nc, buf, size);
2430     }
2431 
2432     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2433     if (ret == RSC_BYPASS) {
2434         return virtio_net_do_receive(nc, buf, size);
2435     } else if (ret == RSC_FINAL) {
2436         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2437                 ((hdr_len + sizeof(struct eth_header)) + 8),
2438                 VIRTIO_NET_IP6_ADDR_SIZE,
2439                 hdr_len + sizeof(struct eth_header)
2440                 + sizeof(struct ip6_header));
2441     }
2442 
2443     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2444 }
2445 
2446 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2447                                                       NetClientState *nc,
2448                                                       uint16_t proto)
2449 {
2450     VirtioNetRscChain *chain;
2451 
2452     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2453         return NULL;
2454     }
2455 
2456     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2457         if (chain->proto == proto) {
2458             return chain;
2459         }
2460     }
2461 
2462     chain = g_malloc(sizeof(*chain));
2463     chain->n = n;
2464     chain->proto = proto;
2465     if (proto == (uint16_t)ETH_P_IP) {
2466         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2467         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2468     } else {
2469         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2470         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2471     }
2472     chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2473                                       virtio_net_rsc_purge, chain);
2474     memset(&chain->stat, 0, sizeof(chain->stat));
2475 
2476     QTAILQ_INIT(&chain->buffers);
2477     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2478 
2479     return chain;
2480 }
2481 
2482 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2483                                       const uint8_t *buf,
2484                                       size_t size)
2485 {
2486     uint16_t proto;
2487     VirtioNetRscChain *chain;
2488     struct eth_header *eth;
2489     VirtIONet *n;
2490 
2491     n = qemu_get_nic_opaque(nc);
2492     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2493         return virtio_net_do_receive(nc, buf, size);
2494     }
2495 
2496     eth = (struct eth_header *)(buf + n->guest_hdr_len);
2497     proto = htons(eth->h_proto);
2498 
2499     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2500     if (chain) {
2501         chain->stat.received++;
2502         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2503             return virtio_net_rsc_receive4(chain, nc, buf, size);
2504         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2505             return virtio_net_rsc_receive6(chain, nc, buf, size);
2506         }
2507     }
2508     return virtio_net_do_receive(nc, buf, size);
2509 }
2510 
2511 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2512                                   size_t size)
2513 {
2514     VirtIONet *n = qemu_get_nic_opaque(nc);
2515     if ((n->rsc4_enabled || n->rsc6_enabled)) {
2516         return virtio_net_rsc_receive(nc, buf, size);
2517     } else {
2518         return virtio_net_do_receive(nc, buf, size);
2519     }
2520 }
2521 
2522 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2523 
2524 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2525 {
2526     VirtIONet *n = qemu_get_nic_opaque(nc);
2527     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2528     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2529     int ret;
2530 
2531     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2532     virtio_notify(vdev, q->tx_vq);
2533 
2534     g_free(q->async_tx.elem);
2535     q->async_tx.elem = NULL;
2536 
2537     virtio_queue_set_notification(q->tx_vq, 1);
2538     ret = virtio_net_flush_tx(q);
2539     if (ret >= n->tx_burst) {
2540         /*
2541          * the flush has been stopped by tx_burst
2542          * we will not receive notification for the
2543          * remainining part, so re-schedule
2544          */
2545         virtio_queue_set_notification(q->tx_vq, 0);
2546         if (q->tx_bh) {
2547             qemu_bh_schedule(q->tx_bh);
2548         } else {
2549             timer_mod(q->tx_timer,
2550                       qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2551         }
2552         q->tx_waiting = 1;
2553     }
2554 }
2555 
2556 /* TX */
2557 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2558 {
2559     VirtIONet *n = q->n;
2560     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2561     VirtQueueElement *elem;
2562     int32_t num_packets = 0;
2563     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2564     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2565         return num_packets;
2566     }
2567 
2568     if (q->async_tx.elem) {
2569         virtio_queue_set_notification(q->tx_vq, 0);
2570         return num_packets;
2571     }
2572 
2573     for (;;) {
2574         ssize_t ret;
2575         unsigned int out_num;
2576         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2577         struct virtio_net_hdr_mrg_rxbuf mhdr;
2578 
2579         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2580         if (!elem) {
2581             break;
2582         }
2583 
2584         out_num = elem->out_num;
2585         out_sg = elem->out_sg;
2586         if (out_num < 1) {
2587             virtio_error(vdev, "virtio-net header not in first element");
2588             virtqueue_detach_element(q->tx_vq, elem, 0);
2589             g_free(elem);
2590             return -EINVAL;
2591         }
2592 
2593         if (n->has_vnet_hdr) {
2594             if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2595                 n->guest_hdr_len) {
2596                 virtio_error(vdev, "virtio-net header incorrect");
2597                 virtqueue_detach_element(q->tx_vq, elem, 0);
2598                 g_free(elem);
2599                 return -EINVAL;
2600             }
2601             if (n->needs_vnet_hdr_swap) {
2602                 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2603                 sg2[0].iov_base = &mhdr;
2604                 sg2[0].iov_len = n->guest_hdr_len;
2605                 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2606                                    out_sg, out_num,
2607                                    n->guest_hdr_len, -1);
2608                 if (out_num == VIRTQUEUE_MAX_SIZE) {
2609                     goto drop;
2610                 }
2611                 out_num += 1;
2612                 out_sg = sg2;
2613             }
2614         }
2615         /*
2616          * If host wants to see the guest header as is, we can
2617          * pass it on unchanged. Otherwise, copy just the parts
2618          * that host is interested in.
2619          */
2620         assert(n->host_hdr_len <= n->guest_hdr_len);
2621         if (n->host_hdr_len != n->guest_hdr_len) {
2622             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2623                                        out_sg, out_num,
2624                                        0, n->host_hdr_len);
2625             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2626                              out_sg, out_num,
2627                              n->guest_hdr_len, -1);
2628             out_num = sg_num;
2629             out_sg = sg;
2630         }
2631 
2632         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2633                                       out_sg, out_num, virtio_net_tx_complete);
2634         if (ret == 0) {
2635             virtio_queue_set_notification(q->tx_vq, 0);
2636             q->async_tx.elem = elem;
2637             return -EBUSY;
2638         }
2639 
2640 drop:
2641         virtqueue_push(q->tx_vq, elem, 0);
2642         virtio_notify(vdev, q->tx_vq);
2643         g_free(elem);
2644 
2645         if (++num_packets >= n->tx_burst) {
2646             break;
2647         }
2648     }
2649     return num_packets;
2650 }
2651 
2652 static void virtio_net_tx_timer(void *opaque);
2653 
2654 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2655 {
2656     VirtIONet *n = VIRTIO_NET(vdev);
2657     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2658 
2659     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2660         virtio_net_drop_tx_queue_data(vdev, vq);
2661         return;
2662     }
2663 
2664     /* This happens when device was stopped but VCPU wasn't. */
2665     if (!vdev->vm_running) {
2666         q->tx_waiting = 1;
2667         return;
2668     }
2669 
2670     if (q->tx_waiting) {
2671         /* We already have queued packets, immediately flush */
2672         timer_del(q->tx_timer);
2673         virtio_net_tx_timer(q);
2674     } else {
2675         /* re-arm timer to flush it (and more) on next tick */
2676         timer_mod(q->tx_timer,
2677                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2678         q->tx_waiting = 1;
2679         virtio_queue_set_notification(vq, 0);
2680     }
2681 }
2682 
2683 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2684 {
2685     VirtIONet *n = VIRTIO_NET(vdev);
2686     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2687 
2688     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2689         virtio_net_drop_tx_queue_data(vdev, vq);
2690         return;
2691     }
2692 
2693     if (unlikely(q->tx_waiting)) {
2694         return;
2695     }
2696     q->tx_waiting = 1;
2697     /* This happens when device was stopped but VCPU wasn't. */
2698     if (!vdev->vm_running) {
2699         return;
2700     }
2701     virtio_queue_set_notification(vq, 0);
2702     qemu_bh_schedule(q->tx_bh);
2703 }
2704 
2705 static void virtio_net_tx_timer(void *opaque)
2706 {
2707     VirtIONetQueue *q = opaque;
2708     VirtIONet *n = q->n;
2709     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2710     int ret;
2711 
2712     /* This happens when device was stopped but BH wasn't. */
2713     if (!vdev->vm_running) {
2714         /* Make sure tx waiting is set, so we'll run when restarted. */
2715         assert(q->tx_waiting);
2716         return;
2717     }
2718 
2719     q->tx_waiting = 0;
2720 
2721     /* Just in case the driver is not ready on more */
2722     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2723         return;
2724     }
2725 
2726     ret = virtio_net_flush_tx(q);
2727     if (ret == -EBUSY || ret == -EINVAL) {
2728         return;
2729     }
2730     /*
2731      * If we flush a full burst of packets, assume there are
2732      * more coming and immediately rearm
2733      */
2734     if (ret >= n->tx_burst) {
2735         q->tx_waiting = 1;
2736         timer_mod(q->tx_timer,
2737                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2738         return;
2739     }
2740     /*
2741      * If less than a full burst, re-enable notification and flush
2742      * anything that may have come in while we weren't looking.  If
2743      * we find something, assume the guest is still active and rearm
2744      */
2745     virtio_queue_set_notification(q->tx_vq, 1);
2746     ret = virtio_net_flush_tx(q);
2747     if (ret > 0) {
2748         virtio_queue_set_notification(q->tx_vq, 0);
2749         q->tx_waiting = 1;
2750         timer_mod(q->tx_timer,
2751                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2752     }
2753 }
2754 
2755 static void virtio_net_tx_bh(void *opaque)
2756 {
2757     VirtIONetQueue *q = opaque;
2758     VirtIONet *n = q->n;
2759     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2760     int32_t ret;
2761 
2762     /* This happens when device was stopped but BH wasn't. */
2763     if (!vdev->vm_running) {
2764         /* Make sure tx waiting is set, so we'll run when restarted. */
2765         assert(q->tx_waiting);
2766         return;
2767     }
2768 
2769     q->tx_waiting = 0;
2770 
2771     /* Just in case the driver is not ready on more */
2772     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2773         return;
2774     }
2775 
2776     ret = virtio_net_flush_tx(q);
2777     if (ret == -EBUSY || ret == -EINVAL) {
2778         return; /* Notification re-enable handled by tx_complete or device
2779                  * broken */
2780     }
2781 
2782     /* If we flush a full burst of packets, assume there are
2783      * more coming and immediately reschedule */
2784     if (ret >= n->tx_burst) {
2785         qemu_bh_schedule(q->tx_bh);
2786         q->tx_waiting = 1;
2787         return;
2788     }
2789 
2790     /* If less than a full burst, re-enable notification and flush
2791      * anything that may have come in while we weren't looking.  If
2792      * we find something, assume the guest is still active and reschedule */
2793     virtio_queue_set_notification(q->tx_vq, 1);
2794     ret = virtio_net_flush_tx(q);
2795     if (ret == -EINVAL) {
2796         return;
2797     } else if (ret > 0) {
2798         virtio_queue_set_notification(q->tx_vq, 0);
2799         qemu_bh_schedule(q->tx_bh);
2800         q->tx_waiting = 1;
2801     }
2802 }
2803 
2804 static void virtio_net_add_queue(VirtIONet *n, int index)
2805 {
2806     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2807 
2808     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2809                                            virtio_net_handle_rx);
2810 
2811     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2812         n->vqs[index].tx_vq =
2813             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2814                              virtio_net_handle_tx_timer);
2815         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2816                                               virtio_net_tx_timer,
2817                                               &n->vqs[index]);
2818     } else {
2819         n->vqs[index].tx_vq =
2820             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2821                              virtio_net_handle_tx_bh);
2822         n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2823     }
2824 
2825     n->vqs[index].tx_waiting = 0;
2826     n->vqs[index].n = n;
2827 }
2828 
2829 static void virtio_net_del_queue(VirtIONet *n, int index)
2830 {
2831     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2832     VirtIONetQueue *q = &n->vqs[index];
2833     NetClientState *nc = qemu_get_subqueue(n->nic, index);
2834 
2835     qemu_purge_queued_packets(nc);
2836 
2837     virtio_del_queue(vdev, index * 2);
2838     if (q->tx_timer) {
2839         timer_free(q->tx_timer);
2840         q->tx_timer = NULL;
2841     } else {
2842         qemu_bh_delete(q->tx_bh);
2843         q->tx_bh = NULL;
2844     }
2845     q->tx_waiting = 0;
2846     virtio_del_queue(vdev, index * 2 + 1);
2847 }
2848 
2849 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
2850 {
2851     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2852     int old_num_queues = virtio_get_num_queues(vdev);
2853     int new_num_queues = new_max_queue_pairs * 2 + 1;
2854     int i;
2855 
2856     assert(old_num_queues >= 3);
2857     assert(old_num_queues % 2 == 1);
2858 
2859     if (old_num_queues == new_num_queues) {
2860         return;
2861     }
2862 
2863     /*
2864      * We always need to remove and add ctrl vq if
2865      * old_num_queues != new_num_queues. Remove ctrl_vq first,
2866      * and then we only enter one of the following two loops.
2867      */
2868     virtio_del_queue(vdev, old_num_queues - 1);
2869 
2870     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2871         /* new_num_queues < old_num_queues */
2872         virtio_net_del_queue(n, i / 2);
2873     }
2874 
2875     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2876         /* new_num_queues > old_num_queues */
2877         virtio_net_add_queue(n, i / 2);
2878     }
2879 
2880     /* add ctrl_vq last */
2881     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2882 }
2883 
2884 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2885 {
2886     int max = multiqueue ? n->max_queue_pairs : 1;
2887 
2888     n->multiqueue = multiqueue;
2889     virtio_net_change_num_queue_pairs(n, max);
2890 
2891     virtio_net_set_queue_pairs(n);
2892 }
2893 
2894 static int virtio_net_post_load_device(void *opaque, int version_id)
2895 {
2896     VirtIONet *n = opaque;
2897     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2898     int i, link_down;
2899 
2900     trace_virtio_net_post_load_device();
2901     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2902                                virtio_vdev_has_feature(vdev,
2903                                                        VIRTIO_F_VERSION_1),
2904                                virtio_vdev_has_feature(vdev,
2905                                                        VIRTIO_NET_F_HASH_REPORT));
2906 
2907     /* MAC_TABLE_ENTRIES may be different from the saved image */
2908     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2909         n->mac_table.in_use = 0;
2910     }
2911 
2912     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2913         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2914     }
2915 
2916     /*
2917      * curr_guest_offloads will be later overwritten by the
2918      * virtio_set_features_nocheck call done from the virtio_load.
2919      * Here we make sure it is preserved and restored accordingly
2920      * in the virtio_net_post_load_virtio callback.
2921      */
2922     n->saved_guest_offloads = n->curr_guest_offloads;
2923 
2924     virtio_net_set_queue_pairs(n);
2925 
2926     /* Find the first multicast entry in the saved MAC filter */
2927     for (i = 0; i < n->mac_table.in_use; i++) {
2928         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2929             break;
2930         }
2931     }
2932     n->mac_table.first_multi = i;
2933 
2934     /* nc.link_down can't be migrated, so infer link_down according
2935      * to link status bit in n->status */
2936     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2937     for (i = 0; i < n->max_queue_pairs; i++) {
2938         qemu_get_subqueue(n->nic, i)->link_down = link_down;
2939     }
2940 
2941     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2942         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2943         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2944                                   QEMU_CLOCK_VIRTUAL,
2945                                   virtio_net_announce_timer, n);
2946         if (n->announce_timer.round) {
2947             timer_mod(n->announce_timer.tm,
2948                       qemu_clock_get_ms(n->announce_timer.type));
2949         } else {
2950             qemu_announce_timer_del(&n->announce_timer, false);
2951         }
2952     }
2953 
2954     if (n->rss_data.enabled) {
2955         n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
2956         if (!n->rss_data.populate_hash) {
2957             if (!virtio_net_attach_epbf_rss(n)) {
2958                 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
2959                     warn_report("Can't post-load eBPF RSS for vhost");
2960                 } else {
2961                     warn_report("Can't post-load eBPF RSS - "
2962                                 "fallback to software RSS");
2963                     n->rss_data.enabled_software_rss = true;
2964                 }
2965             }
2966         }
2967 
2968         trace_virtio_net_rss_enable(n->rss_data.hash_types,
2969                                     n->rss_data.indirections_len,
2970                                     sizeof(n->rss_data.key));
2971     } else {
2972         trace_virtio_net_rss_disable();
2973     }
2974     return 0;
2975 }
2976 
2977 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2978 {
2979     VirtIONet *n = VIRTIO_NET(vdev);
2980     /*
2981      * The actual needed state is now in saved_guest_offloads,
2982      * see virtio_net_post_load_device for detail.
2983      * Restore it back and apply the desired offloads.
2984      */
2985     n->curr_guest_offloads = n->saved_guest_offloads;
2986     if (peer_has_vnet_hdr(n)) {
2987         virtio_net_apply_guest_offloads(n);
2988     }
2989 
2990     return 0;
2991 }
2992 
2993 /* tx_waiting field of a VirtIONetQueue */
2994 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2995     .name = "virtio-net-queue-tx_waiting",
2996     .fields = (VMStateField[]) {
2997         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2998         VMSTATE_END_OF_LIST()
2999    },
3000 };
3001 
3002 static bool max_queue_pairs_gt_1(void *opaque, int version_id)
3003 {
3004     return VIRTIO_NET(opaque)->max_queue_pairs > 1;
3005 }
3006 
3007 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
3008 {
3009     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
3010                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3011 }
3012 
3013 static bool mac_table_fits(void *opaque, int version_id)
3014 {
3015     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
3016 }
3017 
3018 static bool mac_table_doesnt_fit(void *opaque, int version_id)
3019 {
3020     return !mac_table_fits(opaque, version_id);
3021 }
3022 
3023 /* This temporary type is shared by all the WITH_TMP methods
3024  * although only some fields are used by each.
3025  */
3026 struct VirtIONetMigTmp {
3027     VirtIONet      *parent;
3028     VirtIONetQueue *vqs_1;
3029     uint16_t        curr_queue_pairs_1;
3030     uint8_t         has_ufo;
3031     uint32_t        has_vnet_hdr;
3032 };
3033 
3034 /* The 2nd and subsequent tx_waiting flags are loaded later than
3035  * the 1st entry in the queue_pairs and only if there's more than one
3036  * entry.  We use the tmp mechanism to calculate a temporary
3037  * pointer and count and also validate the count.
3038  */
3039 
3040 static int virtio_net_tx_waiting_pre_save(void *opaque)
3041 {
3042     struct VirtIONetMigTmp *tmp = opaque;
3043 
3044     tmp->vqs_1 = tmp->parent->vqs + 1;
3045     tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3046     if (tmp->parent->curr_queue_pairs == 0) {
3047         tmp->curr_queue_pairs_1 = 0;
3048     }
3049 
3050     return 0;
3051 }
3052 
3053 static int virtio_net_tx_waiting_pre_load(void *opaque)
3054 {
3055     struct VirtIONetMigTmp *tmp = opaque;
3056 
3057     /* Reuse the pointer setup from save */
3058     virtio_net_tx_waiting_pre_save(opaque);
3059 
3060     if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3061         error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3062             tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
3063 
3064         return -EINVAL;
3065     }
3066 
3067     return 0; /* all good */
3068 }
3069 
3070 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3071     .name      = "virtio-net-tx_waiting",
3072     .pre_load  = virtio_net_tx_waiting_pre_load,
3073     .pre_save  = virtio_net_tx_waiting_pre_save,
3074     .fields    = (VMStateField[]) {
3075         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3076                                      curr_queue_pairs_1,
3077                                      vmstate_virtio_net_queue_tx_waiting,
3078                                      struct VirtIONetQueue),
3079         VMSTATE_END_OF_LIST()
3080     },
3081 };
3082 
3083 /* the 'has_ufo' flag is just tested; if the incoming stream has the
3084  * flag set we need to check that we have it
3085  */
3086 static int virtio_net_ufo_post_load(void *opaque, int version_id)
3087 {
3088     struct VirtIONetMigTmp *tmp = opaque;
3089 
3090     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3091         error_report("virtio-net: saved image requires TUN_F_UFO support");
3092         return -EINVAL;
3093     }
3094 
3095     return 0;
3096 }
3097 
3098 static int virtio_net_ufo_pre_save(void *opaque)
3099 {
3100     struct VirtIONetMigTmp *tmp = opaque;
3101 
3102     tmp->has_ufo = tmp->parent->has_ufo;
3103 
3104     return 0;
3105 }
3106 
3107 static const VMStateDescription vmstate_virtio_net_has_ufo = {
3108     .name      = "virtio-net-ufo",
3109     .post_load = virtio_net_ufo_post_load,
3110     .pre_save  = virtio_net_ufo_pre_save,
3111     .fields    = (VMStateField[]) {
3112         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3113         VMSTATE_END_OF_LIST()
3114     },
3115 };
3116 
3117 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3118  * flag set we need to check that we have it
3119  */
3120 static int virtio_net_vnet_post_load(void *opaque, int version_id)
3121 {
3122     struct VirtIONetMigTmp *tmp = opaque;
3123 
3124     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3125         error_report("virtio-net: saved image requires vnet_hdr=on");
3126         return -EINVAL;
3127     }
3128 
3129     return 0;
3130 }
3131 
3132 static int virtio_net_vnet_pre_save(void *opaque)
3133 {
3134     struct VirtIONetMigTmp *tmp = opaque;
3135 
3136     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3137 
3138     return 0;
3139 }
3140 
3141 static const VMStateDescription vmstate_virtio_net_has_vnet = {
3142     .name      = "virtio-net-vnet",
3143     .post_load = virtio_net_vnet_post_load,
3144     .pre_save  = virtio_net_vnet_pre_save,
3145     .fields    = (VMStateField[]) {
3146         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3147         VMSTATE_END_OF_LIST()
3148     },
3149 };
3150 
3151 static bool virtio_net_rss_needed(void *opaque)
3152 {
3153     return VIRTIO_NET(opaque)->rss_data.enabled;
3154 }
3155 
3156 static const VMStateDescription vmstate_virtio_net_rss = {
3157     .name      = "virtio-net-device/rss",
3158     .version_id = 1,
3159     .minimum_version_id = 1,
3160     .needed = virtio_net_rss_needed,
3161     .fields = (VMStateField[]) {
3162         VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3163         VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3164         VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3165         VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3166         VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3167         VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3168         VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3169                             VIRTIO_NET_RSS_MAX_KEY_SIZE),
3170         VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3171                                     rss_data.indirections_len, 0,
3172                                     vmstate_info_uint16, uint16_t),
3173         VMSTATE_END_OF_LIST()
3174     },
3175 };
3176 
3177 static const VMStateDescription vmstate_virtio_net_device = {
3178     .name = "virtio-net-device",
3179     .version_id = VIRTIO_NET_VM_VERSION,
3180     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3181     .post_load = virtio_net_post_load_device,
3182     .fields = (VMStateField[]) {
3183         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3184         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3185                                vmstate_virtio_net_queue_tx_waiting,
3186                                VirtIONetQueue),
3187         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3188         VMSTATE_UINT16(status, VirtIONet),
3189         VMSTATE_UINT8(promisc, VirtIONet),
3190         VMSTATE_UINT8(allmulti, VirtIONet),
3191         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3192 
3193         /* Guarded pair: If it fits we load it, else we throw it away
3194          * - can happen if source has a larger MAC table.; post-load
3195          *  sets flags in this case.
3196          */
3197         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3198                                 0, mac_table_fits, mac_table.in_use,
3199                                  ETH_ALEN),
3200         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3201                                      mac_table.in_use, ETH_ALEN),
3202 
3203         /* Note: This is an array of uint32's that's always been saved as a
3204          * buffer; hold onto your endiannesses; it's actually used as a bitmap
3205          * but based on the uint.
3206          */
3207         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3208         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3209                          vmstate_virtio_net_has_vnet),
3210         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3211         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3212         VMSTATE_UINT8(alluni, VirtIONet),
3213         VMSTATE_UINT8(nomulti, VirtIONet),
3214         VMSTATE_UINT8(nouni, VirtIONet),
3215         VMSTATE_UINT8(nobcast, VirtIONet),
3216         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3217                          vmstate_virtio_net_has_ufo),
3218         VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3219                             vmstate_info_uint16_equal, uint16_t),
3220         VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3221         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3222                          vmstate_virtio_net_tx_waiting),
3223         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3224                             has_ctrl_guest_offloads),
3225         VMSTATE_END_OF_LIST()
3226    },
3227     .subsections = (const VMStateDescription * []) {
3228         &vmstate_virtio_net_rss,
3229         NULL
3230     }
3231 };
3232 
3233 static NetClientInfo net_virtio_info = {
3234     .type = NET_CLIENT_DRIVER_NIC,
3235     .size = sizeof(NICState),
3236     .can_receive = virtio_net_can_receive,
3237     .receive = virtio_net_receive,
3238     .link_status_changed = virtio_net_set_link_status,
3239     .query_rx_filter = virtio_net_query_rxfilter,
3240     .announce = virtio_net_announce,
3241 };
3242 
3243 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3244 {
3245     VirtIONet *n = VIRTIO_NET(vdev);
3246     NetClientState *nc;
3247     assert(n->vhost_started);
3248     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3249         /* Must guard against invalid features and bogus queue index
3250          * from being set by malicious guest, or penetrated through
3251          * buggy migration stream.
3252          */
3253         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3254             qemu_log_mask(LOG_GUEST_ERROR,
3255                           "%s: bogus vq index ignored\n", __func__);
3256             return false;
3257         }
3258         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3259     } else {
3260         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3261     }
3262     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3263 }
3264 
3265 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3266                                            bool mask)
3267 {
3268     VirtIONet *n = VIRTIO_NET(vdev);
3269     NetClientState *nc;
3270     assert(n->vhost_started);
3271     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3272         /* Must guard against invalid features and bogus queue index
3273          * from being set by malicious guest, or penetrated through
3274          * buggy migration stream.
3275          */
3276         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3277             qemu_log_mask(LOG_GUEST_ERROR,
3278                           "%s: bogus vq index ignored\n", __func__);
3279             return;
3280         }
3281         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3282     } else {
3283         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3284     }
3285     vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3286                              vdev, idx, mask);
3287 }
3288 
3289 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3290 {
3291     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3292 
3293     n->config_size = virtio_get_config_size(&cfg_size_params, host_features);
3294 }
3295 
3296 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3297                                    const char *type)
3298 {
3299     /*
3300      * The name can be NULL, the netclient name will be type.x.
3301      */
3302     assert(type != NULL);
3303 
3304     g_free(n->netclient_name);
3305     g_free(n->netclient_type);
3306     n->netclient_name = g_strdup(name);
3307     n->netclient_type = g_strdup(type);
3308 }
3309 
3310 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3311 {
3312     HotplugHandler *hotplug_ctrl;
3313     PCIDevice *pci_dev;
3314     Error *err = NULL;
3315 
3316     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3317     if (hotplug_ctrl) {
3318         pci_dev = PCI_DEVICE(dev);
3319         pci_dev->partially_hotplugged = true;
3320         hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3321         if (err) {
3322             error_report_err(err);
3323             return false;
3324         }
3325     } else {
3326         return false;
3327     }
3328     return true;
3329 }
3330 
3331 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3332                                     Error **errp)
3333 {
3334     Error *err = NULL;
3335     HotplugHandler *hotplug_ctrl;
3336     PCIDevice *pdev = PCI_DEVICE(dev);
3337     BusState *primary_bus;
3338 
3339     if (!pdev->partially_hotplugged) {
3340         return true;
3341     }
3342     primary_bus = dev->parent_bus;
3343     if (!primary_bus) {
3344         error_setg(errp, "virtio_net: couldn't find primary bus");
3345         return false;
3346     }
3347     qdev_set_parent_bus(dev, primary_bus, &error_abort);
3348     qatomic_set(&n->failover_primary_hidden, false);
3349     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3350     if (hotplug_ctrl) {
3351         hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3352         if (err) {
3353             goto out;
3354         }
3355         hotplug_handler_plug(hotplug_ctrl, dev, &err);
3356     }
3357     pdev->partially_hotplugged = false;
3358 
3359 out:
3360     error_propagate(errp, err);
3361     return !err;
3362 }
3363 
3364 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3365 {
3366     bool should_be_hidden;
3367     Error *err = NULL;
3368     DeviceState *dev = failover_find_primary_device(n);
3369 
3370     if (!dev) {
3371         return;
3372     }
3373 
3374     should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3375 
3376     if (migration_in_setup(s) && !should_be_hidden) {
3377         if (failover_unplug_primary(n, dev)) {
3378             vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3379             qapi_event_send_unplug_primary(dev->id);
3380             qatomic_set(&n->failover_primary_hidden, true);
3381         } else {
3382             warn_report("couldn't unplug primary device");
3383         }
3384     } else if (migration_has_failed(s)) {
3385         /* We already unplugged the device let's plug it back */
3386         if (!failover_replug_primary(n, dev, &err)) {
3387             if (err) {
3388                 error_report_err(err);
3389             }
3390         }
3391     }
3392 }
3393 
3394 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3395 {
3396     MigrationState *s = data;
3397     VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3398     virtio_net_handle_migration_primary(n, s);
3399 }
3400 
3401 static bool failover_hide_primary_device(DeviceListener *listener,
3402                                          const QDict *device_opts,
3403                                          bool from_json,
3404                                          Error **errp)
3405 {
3406     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3407     const char *standby_id;
3408 
3409     if (!device_opts) {
3410         return false;
3411     }
3412 
3413     if (!qdict_haskey(device_opts, "failover_pair_id")) {
3414         return false;
3415     }
3416 
3417     if (!qdict_haskey(device_opts, "id")) {
3418         error_setg(errp, "Device with failover_pair_id needs to have id");
3419         return false;
3420     }
3421 
3422     standby_id = qdict_get_str(device_opts, "failover_pair_id");
3423     if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3424         return false;
3425     }
3426 
3427     /*
3428      * The hide helper can be called several times for a given device.
3429      * Check there is only one primary for a virtio-net device but
3430      * don't duplicate the qdict several times if it's called for the same
3431      * device.
3432      */
3433     if (n->primary_opts) {
3434         const char *old, *new;
3435         /* devices with failover_pair_id always have an id */
3436         old = qdict_get_str(n->primary_opts, "id");
3437         new = qdict_get_str(device_opts, "id");
3438         if (strcmp(old, new) != 0) {
3439             error_setg(errp, "Cannot attach more than one primary device to "
3440                        "'%s': '%s' and '%s'", n->netclient_name, old, new);
3441             return false;
3442         }
3443     } else {
3444         n->primary_opts = qdict_clone_shallow(device_opts);
3445         n->primary_opts_from_json = from_json;
3446     }
3447 
3448     /* failover_primary_hidden is set during feature negotiation */
3449     return qatomic_read(&n->failover_primary_hidden);
3450 }
3451 
3452 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3453 {
3454     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3455     VirtIONet *n = VIRTIO_NET(dev);
3456     NetClientState *nc;
3457     int i;
3458 
3459     if (n->net_conf.mtu) {
3460         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3461     }
3462 
3463     if (n->net_conf.duplex_str) {
3464         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3465             n->net_conf.duplex = DUPLEX_HALF;
3466         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3467             n->net_conf.duplex = DUPLEX_FULL;
3468         } else {
3469             error_setg(errp, "'duplex' must be 'half' or 'full'");
3470             return;
3471         }
3472         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3473     } else {
3474         n->net_conf.duplex = DUPLEX_UNKNOWN;
3475     }
3476 
3477     if (n->net_conf.speed < SPEED_UNKNOWN) {
3478         error_setg(errp, "'speed' must be between 0 and INT_MAX");
3479         return;
3480     }
3481     if (n->net_conf.speed >= 0) {
3482         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3483     }
3484 
3485     if (n->failover) {
3486         n->primary_listener.hide_device = failover_hide_primary_device;
3487         qatomic_set(&n->failover_primary_hidden, true);
3488         device_listener_register(&n->primary_listener);
3489         n->migration_state.notify = virtio_net_migration_state_notifier;
3490         add_migration_state_change_notifier(&n->migration_state);
3491         n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3492     }
3493 
3494     virtio_net_set_config_size(n, n->host_features);
3495     virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
3496 
3497     /*
3498      * We set a lower limit on RX queue size to what it always was.
3499      * Guests that want a smaller ring can always resize it without
3500      * help from us (using virtio 1 and up).
3501      */
3502     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3503         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3504         !is_power_of_2(n->net_conf.rx_queue_size)) {
3505         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3506                    "must be a power of 2 between %d and %d.",
3507                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3508                    VIRTQUEUE_MAX_SIZE);
3509         virtio_cleanup(vdev);
3510         return;
3511     }
3512 
3513     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3514         n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3515         !is_power_of_2(n->net_conf.tx_queue_size)) {
3516         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3517                    "must be a power of 2 between %d and %d",
3518                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3519                    VIRTQUEUE_MAX_SIZE);
3520         virtio_cleanup(vdev);
3521         return;
3522     }
3523 
3524     n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3525 
3526     /*
3527      * Figure out the datapath queue pairs since the backend could
3528      * provide control queue via peers as well.
3529      */
3530     if (n->nic_conf.peers.queues) {
3531         for (i = 0; i < n->max_ncs; i++) {
3532             if (n->nic_conf.peers.ncs[i]->is_datapath) {
3533                 ++n->max_queue_pairs;
3534             }
3535         }
3536     }
3537     n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3538 
3539     if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3540         error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3541                    "must be a positive integer less than %d.",
3542                    n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3543         virtio_cleanup(vdev);
3544         return;
3545     }
3546     n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3547     n->curr_queue_pairs = 1;
3548     n->tx_timeout = n->net_conf.txtimer;
3549 
3550     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3551                        && strcmp(n->net_conf.tx, "bh")) {
3552         warn_report("virtio-net: "
3553                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3554                     n->net_conf.tx);
3555         error_printf("Defaulting to \"bh\"");
3556     }
3557 
3558     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3559                                     n->net_conf.tx_queue_size);
3560 
3561     for (i = 0; i < n->max_queue_pairs; i++) {
3562         virtio_net_add_queue(n, i);
3563     }
3564 
3565     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3566     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3567     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3568     n->status = VIRTIO_NET_S_LINK_UP;
3569     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3570                               QEMU_CLOCK_VIRTUAL,
3571                               virtio_net_announce_timer, n);
3572     n->announce_timer.round = 0;
3573 
3574     if (n->netclient_type) {
3575         /*
3576          * Happen when virtio_net_set_netclient_name has been called.
3577          */
3578         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3579                               n->netclient_type, n->netclient_name, n);
3580     } else {
3581         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3582                               object_get_typename(OBJECT(dev)), dev->id, n);
3583     }
3584 
3585     for (i = 0; i < n->max_queue_pairs; i++) {
3586         n->nic->ncs[i].do_not_pad = true;
3587     }
3588 
3589     peer_test_vnet_hdr(n);
3590     if (peer_has_vnet_hdr(n)) {
3591         for (i = 0; i < n->max_queue_pairs; i++) {
3592             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3593         }
3594         n->host_hdr_len = sizeof(struct virtio_net_hdr);
3595     } else {
3596         n->host_hdr_len = 0;
3597     }
3598 
3599     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3600 
3601     n->vqs[0].tx_waiting = 0;
3602     n->tx_burst = n->net_conf.txburst;
3603     virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3604     n->promisc = 1; /* for compatibility */
3605 
3606     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3607 
3608     n->vlans = g_malloc0(MAX_VLAN >> 3);
3609 
3610     nc = qemu_get_queue(n->nic);
3611     nc->rxfilter_notify_enabled = 1;
3612 
3613    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3614         struct virtio_net_config netcfg = {};
3615         memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3616         vhost_net_set_config(get_vhost_net(nc->peer),
3617             (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
3618     }
3619     QTAILQ_INIT(&n->rsc_chains);
3620     n->qdev = dev;
3621 
3622     net_rx_pkt_init(&n->rx_pkt, false);
3623 
3624     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3625         virtio_net_load_ebpf(n);
3626     }
3627 }
3628 
3629 static void virtio_net_device_unrealize(DeviceState *dev)
3630 {
3631     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3632     VirtIONet *n = VIRTIO_NET(dev);
3633     int i, max_queue_pairs;
3634 
3635     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3636         virtio_net_unload_ebpf(n);
3637     }
3638 
3639     /* This will stop vhost backend if appropriate. */
3640     virtio_net_set_status(vdev, 0);
3641 
3642     g_free(n->netclient_name);
3643     n->netclient_name = NULL;
3644     g_free(n->netclient_type);
3645     n->netclient_type = NULL;
3646 
3647     g_free(n->mac_table.macs);
3648     g_free(n->vlans);
3649 
3650     if (n->failover) {
3651         qobject_unref(n->primary_opts);
3652         device_listener_unregister(&n->primary_listener);
3653         remove_migration_state_change_notifier(&n->migration_state);
3654     } else {
3655         assert(n->primary_opts == NULL);
3656     }
3657 
3658     max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3659     for (i = 0; i < max_queue_pairs; i++) {
3660         virtio_net_del_queue(n, i);
3661     }
3662     /* delete also control vq */
3663     virtio_del_queue(vdev, max_queue_pairs * 2);
3664     qemu_announce_timer_del(&n->announce_timer, false);
3665     g_free(n->vqs);
3666     qemu_del_nic(n->nic);
3667     virtio_net_rsc_cleanup(n);
3668     g_free(n->rss_data.indirections_table);
3669     net_rx_pkt_uninit(n->rx_pkt);
3670     virtio_cleanup(vdev);
3671 }
3672 
3673 static void virtio_net_instance_init(Object *obj)
3674 {
3675     VirtIONet *n = VIRTIO_NET(obj);
3676 
3677     /*
3678      * The default config_size is sizeof(struct virtio_net_config).
3679      * Can be overriden with virtio_net_set_config_size.
3680      */
3681     n->config_size = sizeof(struct virtio_net_config);
3682     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3683                                   "bootindex", "/ethernet-phy@0",
3684                                   DEVICE(n));
3685 
3686     ebpf_rss_init(&n->ebpf_rss);
3687 }
3688 
3689 static int virtio_net_pre_save(void *opaque)
3690 {
3691     VirtIONet *n = opaque;
3692 
3693     /* At this point, backend must be stopped, otherwise
3694      * it might keep writing to memory. */
3695     assert(!n->vhost_started);
3696 
3697     return 0;
3698 }
3699 
3700 static bool primary_unplug_pending(void *opaque)
3701 {
3702     DeviceState *dev = opaque;
3703     DeviceState *primary;
3704     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3705     VirtIONet *n = VIRTIO_NET(vdev);
3706 
3707     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3708         return false;
3709     }
3710     primary = failover_find_primary_device(n);
3711     return primary ? primary->pending_deleted_event : false;
3712 }
3713 
3714 static bool dev_unplug_pending(void *opaque)
3715 {
3716     DeviceState *dev = opaque;
3717     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3718 
3719     return vdc->primary_unplug_pending(dev);
3720 }
3721 
3722 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3723 {
3724     VirtIONet *n = VIRTIO_NET(vdev);
3725     NetClientState *nc = qemu_get_queue(n->nic);
3726     struct vhost_net *net = get_vhost_net(nc->peer);
3727     return &net->dev;
3728 }
3729 
3730 static const VMStateDescription vmstate_virtio_net = {
3731     .name = "virtio-net",
3732     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3733     .version_id = VIRTIO_NET_VM_VERSION,
3734     .fields = (VMStateField[]) {
3735         VMSTATE_VIRTIO_DEVICE,
3736         VMSTATE_END_OF_LIST()
3737     },
3738     .pre_save = virtio_net_pre_save,
3739     .dev_unplug_pending = dev_unplug_pending,
3740 };
3741 
3742 static Property virtio_net_properties[] = {
3743     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3744                     VIRTIO_NET_F_CSUM, true),
3745     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3746                     VIRTIO_NET_F_GUEST_CSUM, true),
3747     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3748     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3749                     VIRTIO_NET_F_GUEST_TSO4, true),
3750     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3751                     VIRTIO_NET_F_GUEST_TSO6, true),
3752     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3753                     VIRTIO_NET_F_GUEST_ECN, true),
3754     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3755                     VIRTIO_NET_F_GUEST_UFO, true),
3756     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3757                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3758     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3759                     VIRTIO_NET_F_HOST_TSO4, true),
3760     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3761                     VIRTIO_NET_F_HOST_TSO6, true),
3762     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3763                     VIRTIO_NET_F_HOST_ECN, true),
3764     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3765                     VIRTIO_NET_F_HOST_UFO, true),
3766     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3767                     VIRTIO_NET_F_MRG_RXBUF, true),
3768     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3769                     VIRTIO_NET_F_STATUS, true),
3770     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3771                     VIRTIO_NET_F_CTRL_VQ, true),
3772     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3773                     VIRTIO_NET_F_CTRL_RX, true),
3774     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3775                     VIRTIO_NET_F_CTRL_VLAN, true),
3776     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3777                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3778     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3779                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3780     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3781                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3782     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3783     DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3784                     VIRTIO_NET_F_RSS, false),
3785     DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3786                     VIRTIO_NET_F_HASH_REPORT, false),
3787     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3788                     VIRTIO_NET_F_RSC_EXT, false),
3789     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3790                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3791     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3792     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3793                        TX_TIMER_INTERVAL),
3794     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3795     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3796     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3797                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3798     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3799                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3800     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3801     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3802                      true),
3803     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3804     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3805     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3806     DEFINE_PROP_END_OF_LIST(),
3807 };
3808 
3809 static void virtio_net_class_init(ObjectClass *klass, void *data)
3810 {
3811     DeviceClass *dc = DEVICE_CLASS(klass);
3812     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3813 
3814     device_class_set_props(dc, virtio_net_properties);
3815     dc->vmsd = &vmstate_virtio_net;
3816     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3817     vdc->realize = virtio_net_device_realize;
3818     vdc->unrealize = virtio_net_device_unrealize;
3819     vdc->get_config = virtio_net_get_config;
3820     vdc->set_config = virtio_net_set_config;
3821     vdc->get_features = virtio_net_get_features;
3822     vdc->set_features = virtio_net_set_features;
3823     vdc->bad_features = virtio_net_bad_features;
3824     vdc->reset = virtio_net_reset;
3825     vdc->set_status = virtio_net_set_status;
3826     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3827     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3828     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3829     vdc->post_load = virtio_net_post_load_virtio;
3830     vdc->vmsd = &vmstate_virtio_net_device;
3831     vdc->primary_unplug_pending = primary_unplug_pending;
3832     vdc->get_vhost = virtio_net_get_vhost;
3833 }
3834 
3835 static const TypeInfo virtio_net_info = {
3836     .name = TYPE_VIRTIO_NET,
3837     .parent = TYPE_VIRTIO_DEVICE,
3838     .instance_size = sizeof(VirtIONet),
3839     .instance_init = virtio_net_instance_init,
3840     .class_init = virtio_net_class_init,
3841 };
3842 
3843 static void virtio_register_types(void)
3844 {
3845     type_register_static(&virtio_net_info);
3846 }
3847 
3848 type_init(virtio_register_types)
3849