xref: /openbmc/qemu/hw/net/virtio-net.c (revision e0091133)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/log.h"
18 #include "qemu/main-loop.h"
19 #include "qemu/module.h"
20 #include "hw/virtio/virtio.h"
21 #include "net/net.h"
22 #include "net/checksum.h"
23 #include "net/tap.h"
24 #include "qemu/error-report.h"
25 #include "qemu/timer.h"
26 #include "qemu/option.h"
27 #include "qemu/option_int.h"
28 #include "qemu/config-file.h"
29 #include "qapi/qmp/qdict.h"
30 #include "hw/virtio/virtio-net.h"
31 #include "net/vhost_net.h"
32 #include "net/announce.h"
33 #include "hw/virtio/virtio-bus.h"
34 #include "qapi/error.h"
35 #include "qapi/qapi-events-net.h"
36 #include "hw/qdev-properties.h"
37 #include "qapi/qapi-types-migration.h"
38 #include "qapi/qapi-events-migration.h"
39 #include "hw/virtio/virtio-access.h"
40 #include "migration/misc.h"
41 #include "standard-headers/linux/ethtool.h"
42 #include "sysemu/sysemu.h"
43 #include "trace.h"
44 #include "monitor/qdev.h"
45 #include "hw/pci/pci.h"
46 #include "net_rx_pkt.h"
47 #include "hw/virtio/vhost.h"
48 #include "sysemu/qtest.h"
49 
50 #define VIRTIO_NET_VM_VERSION    11
51 
52 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
53 
54 /* previously fixed value */
55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
57 
58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
61 
62 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
63 
64 #define VIRTIO_NET_TCP_FLAG         0x3F
65 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
66 
67 /* IPv4 max payload, 16 bits in the header */
68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
70 
71 /* header length value in ip header without option */
72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
73 
74 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
76 
77 /* Purge coalesced packets timer interval, This value affects the performance
78    a lot, and should be tuned carefully, '300000'(300us) is the recommended
79    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
80    tso/gso/gro 'off'. */
81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
82 
83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
84                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
85                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
86                                          VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
87                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
88                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
89                                          VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
90                                          VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
91                                          VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
92 
93 static const VirtIOFeature feature_sizes[] = {
94     {.flags = 1ULL << VIRTIO_NET_F_MAC,
95      .end = endof(struct virtio_net_config, mac)},
96     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
97      .end = endof(struct virtio_net_config, status)},
98     {.flags = 1ULL << VIRTIO_NET_F_MQ,
99      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
100     {.flags = 1ULL << VIRTIO_NET_F_MTU,
101      .end = endof(struct virtio_net_config, mtu)},
102     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
103      .end = endof(struct virtio_net_config, duplex)},
104     {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
105      .end = endof(struct virtio_net_config, supported_hash_types)},
106     {}
107 };
108 
109 static const VirtIOConfigSizeParams cfg_size_params = {
110     .min_size = endof(struct virtio_net_config, mac),
111     .max_size = sizeof(struct virtio_net_config),
112     .feature_sizes = feature_sizes
113 };
114 
115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
116 {
117     VirtIONet *n = qemu_get_nic_opaque(nc);
118 
119     return &n->vqs[nc->queue_index];
120 }
121 
122 static int vq2q(int queue_index)
123 {
124     return queue_index / 2;
125 }
126 
127 static void flush_or_purge_queued_packets(NetClientState *nc)
128 {
129     if (!nc->peer) {
130         return;
131     }
132 
133     qemu_flush_or_purge_queued_packets(nc->peer, true);
134     assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
135 }
136 
137 /* TODO
138  * - we could suppress RX interrupt if we were so inclined.
139  */
140 
141 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
142 {
143     VirtIONet *n = VIRTIO_NET(vdev);
144     struct virtio_net_config netcfg;
145     NetClientState *nc = qemu_get_queue(n->nic);
146     static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
147 
148     int ret = 0;
149     memset(&netcfg, 0 , sizeof(struct virtio_net_config));
150     virtio_stw_p(vdev, &netcfg.status, n->status);
151     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
152     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
153     memcpy(netcfg.mac, n->mac, ETH_ALEN);
154     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
155     netcfg.duplex = n->net_conf.duplex;
156     netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
157     virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
158                  virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
159                  VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
160     virtio_stl_p(vdev, &netcfg.supported_hash_types,
161                  VIRTIO_NET_RSS_SUPPORTED_HASHES);
162     memcpy(config, &netcfg, n->config_size);
163 
164     /*
165      * Is this VDPA? No peer means not VDPA: there's no way to
166      * disconnect/reconnect a VDPA peer.
167      */
168     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
169         ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
170                                    n->config_size);
171         if (ret != -1) {
172             /*
173              * Some NIC/kernel combinations present 0 as the mac address.  As
174              * that is not a legal address, try to proceed with the
175              * address from the QEMU command line in the hope that the
176              * address has been configured correctly elsewhere - just not
177              * reported by the device.
178              */
179             if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
180                 info_report("Zero hardware mac address detected. Ignoring.");
181                 memcpy(netcfg.mac, n->mac, ETH_ALEN);
182             }
183             memcpy(config, &netcfg, n->config_size);
184         }
185     }
186 }
187 
188 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
189 {
190     VirtIONet *n = VIRTIO_NET(vdev);
191     struct virtio_net_config netcfg = {};
192     NetClientState *nc = qemu_get_queue(n->nic);
193 
194     memcpy(&netcfg, config, n->config_size);
195 
196     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
197         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
198         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
199         memcpy(n->mac, netcfg.mac, ETH_ALEN);
200         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
201     }
202 
203     /*
204      * Is this VDPA? No peer means not VDPA: there's no way to
205      * disconnect/reconnect a VDPA peer.
206      */
207     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
208         vhost_net_set_config(get_vhost_net(nc->peer),
209                              (uint8_t *)&netcfg, 0, n->config_size,
210                              VHOST_SET_CONFIG_TYPE_MASTER);
211       }
212 }
213 
214 static bool virtio_net_started(VirtIONet *n, uint8_t status)
215 {
216     VirtIODevice *vdev = VIRTIO_DEVICE(n);
217     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
218         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
219 }
220 
221 static void virtio_net_announce_notify(VirtIONet *net)
222 {
223     VirtIODevice *vdev = VIRTIO_DEVICE(net);
224     trace_virtio_net_announce_notify();
225 
226     net->status |= VIRTIO_NET_S_ANNOUNCE;
227     virtio_notify_config(vdev);
228 }
229 
230 static void virtio_net_announce_timer(void *opaque)
231 {
232     VirtIONet *n = opaque;
233     trace_virtio_net_announce_timer(n->announce_timer.round);
234 
235     n->announce_timer.round--;
236     virtio_net_announce_notify(n);
237 }
238 
239 static void virtio_net_announce(NetClientState *nc)
240 {
241     VirtIONet *n = qemu_get_nic_opaque(nc);
242     VirtIODevice *vdev = VIRTIO_DEVICE(n);
243 
244     /*
245      * Make sure the virtio migration announcement timer isn't running
246      * If it is, let it trigger announcement so that we do not cause
247      * confusion.
248      */
249     if (n->announce_timer.round) {
250         return;
251     }
252 
253     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
254         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
255             virtio_net_announce_notify(n);
256     }
257 }
258 
259 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
260 {
261     VirtIODevice *vdev = VIRTIO_DEVICE(n);
262     NetClientState *nc = qemu_get_queue(n->nic);
263     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
264     int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
265               n->max_ncs - n->max_queue_pairs : 0;
266 
267     if (!get_vhost_net(nc->peer)) {
268         return;
269     }
270 
271     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
272         !!n->vhost_started) {
273         return;
274     }
275     if (!n->vhost_started) {
276         int r, i;
277 
278         if (n->needs_vnet_hdr_swap) {
279             error_report("backend does not support %s vnet headers; "
280                          "falling back on userspace virtio",
281                          virtio_is_big_endian(vdev) ? "BE" : "LE");
282             return;
283         }
284 
285         /* Any packets outstanding? Purge them to avoid touching rings
286          * when vhost is running.
287          */
288         for (i = 0;  i < queue_pairs; i++) {
289             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
290 
291             /* Purge both directions: TX and RX. */
292             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
293             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
294         }
295 
296         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
297             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
298             if (r < 0) {
299                 error_report("%uBytes MTU not supported by the backend",
300                              n->net_conf.mtu);
301 
302                 return;
303             }
304         }
305 
306         n->vhost_started = 1;
307         r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
308         if (r < 0) {
309             error_report("unable to start vhost net: %d: "
310                          "falling back on userspace virtio", -r);
311             n->vhost_started = 0;
312         }
313     } else {
314         vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
315         n->vhost_started = 0;
316     }
317 }
318 
319 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
320                                           NetClientState *peer,
321                                           bool enable)
322 {
323     if (virtio_is_big_endian(vdev)) {
324         return qemu_set_vnet_be(peer, enable);
325     } else {
326         return qemu_set_vnet_le(peer, enable);
327     }
328 }
329 
330 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
331                                        int queue_pairs, bool enable)
332 {
333     int i;
334 
335     for (i = 0; i < queue_pairs; i++) {
336         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
337             enable) {
338             while (--i >= 0) {
339                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
340             }
341 
342             return true;
343         }
344     }
345 
346     return false;
347 }
348 
349 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
350 {
351     VirtIODevice *vdev = VIRTIO_DEVICE(n);
352     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
353 
354     if (virtio_net_started(n, status)) {
355         /* Before using the device, we tell the network backend about the
356          * endianness to use when parsing vnet headers. If the backend
357          * can't do it, we fallback onto fixing the headers in the core
358          * virtio-net code.
359          */
360         n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
361                                                             queue_pairs, true);
362     } else if (virtio_net_started(n, vdev->status)) {
363         /* After using the device, we need to reset the network backend to
364          * the default (guest native endianness), otherwise the guest may
365          * lose network connectivity if it is rebooted into a different
366          * endianness.
367          */
368         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
369     }
370 }
371 
372 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
373 {
374     unsigned int dropped = virtqueue_drop_all(vq);
375     if (dropped) {
376         virtio_notify(vdev, vq);
377     }
378 }
379 
380 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
381 {
382     VirtIONet *n = VIRTIO_NET(vdev);
383     VirtIONetQueue *q;
384     int i;
385     uint8_t queue_status;
386 
387     virtio_net_vnet_endian_status(n, status);
388     virtio_net_vhost_status(n, status);
389 
390     for (i = 0; i < n->max_queue_pairs; i++) {
391         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
392         bool queue_started;
393         q = &n->vqs[i];
394 
395         if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
396             queue_status = 0;
397         } else {
398             queue_status = status;
399         }
400         queue_started =
401             virtio_net_started(n, queue_status) && !n->vhost_started;
402 
403         if (queue_started) {
404             qemu_flush_queued_packets(ncs);
405         }
406 
407         if (!q->tx_waiting) {
408             continue;
409         }
410 
411         if (queue_started) {
412             if (q->tx_timer) {
413                 timer_mod(q->tx_timer,
414                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
415             } else {
416                 qemu_bh_schedule(q->tx_bh);
417             }
418         } else {
419             if (q->tx_timer) {
420                 timer_del(q->tx_timer);
421             } else {
422                 qemu_bh_cancel(q->tx_bh);
423             }
424             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
425                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
426                 vdev->vm_running) {
427                 /* if tx is waiting we are likely have some packets in tx queue
428                  * and disabled notification */
429                 q->tx_waiting = 0;
430                 virtio_queue_set_notification(q->tx_vq, 1);
431                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
432             }
433         }
434     }
435 }
436 
437 static void virtio_net_set_link_status(NetClientState *nc)
438 {
439     VirtIONet *n = qemu_get_nic_opaque(nc);
440     VirtIODevice *vdev = VIRTIO_DEVICE(n);
441     uint16_t old_status = n->status;
442 
443     if (nc->link_down)
444         n->status &= ~VIRTIO_NET_S_LINK_UP;
445     else
446         n->status |= VIRTIO_NET_S_LINK_UP;
447 
448     if (n->status != old_status)
449         virtio_notify_config(vdev);
450 
451     virtio_net_set_status(vdev, vdev->status);
452 }
453 
454 static void rxfilter_notify(NetClientState *nc)
455 {
456     VirtIONet *n = qemu_get_nic_opaque(nc);
457 
458     if (nc->rxfilter_notify_enabled) {
459         char *path = object_get_canonical_path(OBJECT(n->qdev));
460         qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
461                                               n->netclient_name, path);
462         g_free(path);
463 
464         /* disable event notification to avoid events flooding */
465         nc->rxfilter_notify_enabled = 0;
466     }
467 }
468 
469 static intList *get_vlan_table(VirtIONet *n)
470 {
471     intList *list;
472     int i, j;
473 
474     list = NULL;
475     for (i = 0; i < MAX_VLAN >> 5; i++) {
476         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
477             if (n->vlans[i] & (1U << j)) {
478                 QAPI_LIST_PREPEND(list, (i << 5) + j);
479             }
480         }
481     }
482 
483     return list;
484 }
485 
486 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
487 {
488     VirtIONet *n = qemu_get_nic_opaque(nc);
489     VirtIODevice *vdev = VIRTIO_DEVICE(n);
490     RxFilterInfo *info;
491     strList *str_list;
492     int i;
493 
494     info = g_malloc0(sizeof(*info));
495     info->name = g_strdup(nc->name);
496     info->promiscuous = n->promisc;
497 
498     if (n->nouni) {
499         info->unicast = RX_STATE_NONE;
500     } else if (n->alluni) {
501         info->unicast = RX_STATE_ALL;
502     } else {
503         info->unicast = RX_STATE_NORMAL;
504     }
505 
506     if (n->nomulti) {
507         info->multicast = RX_STATE_NONE;
508     } else if (n->allmulti) {
509         info->multicast = RX_STATE_ALL;
510     } else {
511         info->multicast = RX_STATE_NORMAL;
512     }
513 
514     info->broadcast_allowed = n->nobcast;
515     info->multicast_overflow = n->mac_table.multi_overflow;
516     info->unicast_overflow = n->mac_table.uni_overflow;
517 
518     info->main_mac = qemu_mac_strdup_printf(n->mac);
519 
520     str_list = NULL;
521     for (i = 0; i < n->mac_table.first_multi; i++) {
522         QAPI_LIST_PREPEND(str_list,
523                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
524     }
525     info->unicast_table = str_list;
526 
527     str_list = NULL;
528     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
529         QAPI_LIST_PREPEND(str_list,
530                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
531     }
532     info->multicast_table = str_list;
533     info->vlan_table = get_vlan_table(n);
534 
535     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
536         info->vlan = RX_STATE_ALL;
537     } else if (!info->vlan_table) {
538         info->vlan = RX_STATE_NONE;
539     } else {
540         info->vlan = RX_STATE_NORMAL;
541     }
542 
543     /* enable event notification after query */
544     nc->rxfilter_notify_enabled = 1;
545 
546     return info;
547 }
548 
549 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
550 {
551     VirtIONet *n = VIRTIO_NET(vdev);
552     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
553 
554     if (!nc->peer) {
555         return;
556     }
557 
558     if (get_vhost_net(nc->peer) &&
559         nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
560         vhost_net_virtqueue_reset(vdev, nc, queue_index);
561     }
562 
563     flush_or_purge_queued_packets(nc);
564 }
565 
566 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
567 {
568     VirtIONet *n = VIRTIO_NET(vdev);
569     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
570     int r;
571 
572     if (!nc->peer || !vdev->vhost_started) {
573         return;
574     }
575 
576     if (get_vhost_net(nc->peer) &&
577         nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
578         r = vhost_net_virtqueue_restart(vdev, nc, queue_index);
579         if (r < 0) {
580             error_report("unable to restart vhost net virtqueue: %d, "
581                             "when resetting the queue", queue_index);
582         }
583     }
584 }
585 
586 static void virtio_net_reset(VirtIODevice *vdev)
587 {
588     VirtIONet *n = VIRTIO_NET(vdev);
589     int i;
590 
591     /* Reset back to compatibility mode */
592     n->promisc = 1;
593     n->allmulti = 0;
594     n->alluni = 0;
595     n->nomulti = 0;
596     n->nouni = 0;
597     n->nobcast = 0;
598     /* multiqueue is disabled by default */
599     n->curr_queue_pairs = 1;
600     timer_del(n->announce_timer.tm);
601     n->announce_timer.round = 0;
602     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
603 
604     /* Flush any MAC and VLAN filter table state */
605     n->mac_table.in_use = 0;
606     n->mac_table.first_multi = 0;
607     n->mac_table.multi_overflow = 0;
608     n->mac_table.uni_overflow = 0;
609     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
610     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
611     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
612     memset(n->vlans, 0, MAX_VLAN >> 3);
613 
614     /* Flush any async TX */
615     for (i = 0;  i < n->max_queue_pairs; i++) {
616         flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
617     }
618 }
619 
620 static void peer_test_vnet_hdr(VirtIONet *n)
621 {
622     NetClientState *nc = qemu_get_queue(n->nic);
623     if (!nc->peer) {
624         return;
625     }
626 
627     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
628 }
629 
630 static int peer_has_vnet_hdr(VirtIONet *n)
631 {
632     return n->has_vnet_hdr;
633 }
634 
635 static int peer_has_ufo(VirtIONet *n)
636 {
637     if (!peer_has_vnet_hdr(n))
638         return 0;
639 
640     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
641 
642     return n->has_ufo;
643 }
644 
645 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
646                                        int version_1, int hash_report)
647 {
648     int i;
649     NetClientState *nc;
650 
651     n->mergeable_rx_bufs = mergeable_rx_bufs;
652 
653     if (version_1) {
654         n->guest_hdr_len = hash_report ?
655             sizeof(struct virtio_net_hdr_v1_hash) :
656             sizeof(struct virtio_net_hdr_mrg_rxbuf);
657         n->rss_data.populate_hash = !!hash_report;
658     } else {
659         n->guest_hdr_len = n->mergeable_rx_bufs ?
660             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
661             sizeof(struct virtio_net_hdr);
662     }
663 
664     for (i = 0; i < n->max_queue_pairs; i++) {
665         nc = qemu_get_subqueue(n->nic, i);
666 
667         if (peer_has_vnet_hdr(n) &&
668             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
669             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
670             n->host_hdr_len = n->guest_hdr_len;
671         }
672     }
673 }
674 
675 static int virtio_net_max_tx_queue_size(VirtIONet *n)
676 {
677     NetClientState *peer = n->nic_conf.peers.ncs[0];
678 
679     /*
680      * Backends other than vhost-user or vhost-vdpa don't support max queue
681      * size.
682      */
683     if (!peer) {
684         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
685     }
686 
687     switch(peer->info->type) {
688     case NET_CLIENT_DRIVER_VHOST_USER:
689     case NET_CLIENT_DRIVER_VHOST_VDPA:
690         return VIRTQUEUE_MAX_SIZE;
691     default:
692         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
693     };
694 }
695 
696 static int peer_attach(VirtIONet *n, int index)
697 {
698     NetClientState *nc = qemu_get_subqueue(n->nic, index);
699 
700     if (!nc->peer) {
701         return 0;
702     }
703 
704     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
705         vhost_set_vring_enable(nc->peer, 1);
706     }
707 
708     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
709         return 0;
710     }
711 
712     if (n->max_queue_pairs == 1) {
713         return 0;
714     }
715 
716     return tap_enable(nc->peer);
717 }
718 
719 static int peer_detach(VirtIONet *n, int index)
720 {
721     NetClientState *nc = qemu_get_subqueue(n->nic, index);
722 
723     if (!nc->peer) {
724         return 0;
725     }
726 
727     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
728         vhost_set_vring_enable(nc->peer, 0);
729     }
730 
731     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
732         return 0;
733     }
734 
735     return tap_disable(nc->peer);
736 }
737 
738 static void virtio_net_set_queue_pairs(VirtIONet *n)
739 {
740     int i;
741     int r;
742 
743     if (n->nic->peer_deleted) {
744         return;
745     }
746 
747     for (i = 0; i < n->max_queue_pairs; i++) {
748         if (i < n->curr_queue_pairs) {
749             r = peer_attach(n, i);
750             assert(!r);
751         } else {
752             r = peer_detach(n, i);
753             assert(!r);
754         }
755     }
756 }
757 
758 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
759 
760 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
761                                         Error **errp)
762 {
763     VirtIONet *n = VIRTIO_NET(vdev);
764     NetClientState *nc = qemu_get_queue(n->nic);
765 
766     /* Firstly sync all virtio-net possible supported features */
767     features |= n->host_features;
768 
769     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
770 
771     if (!peer_has_vnet_hdr(n)) {
772         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
773         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
774         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
775         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
776 
777         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
778         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
779         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
780         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
781 
782         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
783     }
784 
785     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
786         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
787         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
788     }
789 
790     if (!get_vhost_net(nc->peer)) {
791         virtio_add_feature(&features, VIRTIO_F_RING_RESET);
792         return features;
793     }
794 
795     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
796         virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
797     }
798     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
799     vdev->backend_features = features;
800 
801     if (n->mtu_bypass_backend &&
802             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
803         features |= (1ULL << VIRTIO_NET_F_MTU);
804     }
805 
806     return features;
807 }
808 
809 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
810 {
811     uint64_t features = 0;
812 
813     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
814      * but also these: */
815     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
816     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
817     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
818     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
819     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
820 
821     return features;
822 }
823 
824 static void virtio_net_apply_guest_offloads(VirtIONet *n)
825 {
826     qemu_set_offload(qemu_get_queue(n->nic)->peer,
827             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
828             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
829             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
830             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
831             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
832 }
833 
834 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
835 {
836     static const uint64_t guest_offloads_mask =
837         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
838         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
839         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
840         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
841         (1ULL << VIRTIO_NET_F_GUEST_UFO);
842 
843     return guest_offloads_mask & features;
844 }
845 
846 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
847 {
848     VirtIODevice *vdev = VIRTIO_DEVICE(n);
849     return virtio_net_guest_offloads_by_features(vdev->guest_features);
850 }
851 
852 typedef struct {
853     VirtIONet *n;
854     DeviceState *dev;
855 } FailoverDevice;
856 
857 /**
858  * Set the failover primary device
859  *
860  * @opaque: FailoverId to setup
861  * @opts: opts for device we are handling
862  * @errp: returns an error if this function fails
863  */
864 static int failover_set_primary(DeviceState *dev, void *opaque)
865 {
866     FailoverDevice *fdev = opaque;
867     PCIDevice *pci_dev = (PCIDevice *)
868         object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
869 
870     if (!pci_dev) {
871         return 0;
872     }
873 
874     if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
875         fdev->dev = dev;
876         return 1;
877     }
878 
879     return 0;
880 }
881 
882 /**
883  * Find the primary device for this failover virtio-net
884  *
885  * @n: VirtIONet device
886  * @errp: returns an error if this function fails
887  */
888 static DeviceState *failover_find_primary_device(VirtIONet *n)
889 {
890     FailoverDevice fdev = {
891         .n = n,
892     };
893 
894     qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
895                        NULL, NULL, &fdev);
896     return fdev.dev;
897 }
898 
899 static void failover_add_primary(VirtIONet *n, Error **errp)
900 {
901     Error *err = NULL;
902     DeviceState *dev = failover_find_primary_device(n);
903 
904     if (dev) {
905         return;
906     }
907 
908     if (!n->primary_opts) {
909         error_setg(errp, "Primary device not found");
910         error_append_hint(errp, "Virtio-net failover will not work. Make "
911                           "sure primary device has parameter"
912                           " failover_pair_id=%s\n", n->netclient_name);
913         return;
914     }
915 
916     dev = qdev_device_add_from_qdict(n->primary_opts,
917                                      n->primary_opts_from_json,
918                                      &err);
919     if (err) {
920         qobject_unref(n->primary_opts);
921         n->primary_opts = NULL;
922     } else {
923         object_unref(OBJECT(dev));
924     }
925     error_propagate(errp, err);
926 }
927 
928 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
929 {
930     VirtIONet *n = VIRTIO_NET(vdev);
931     Error *err = NULL;
932     int i;
933 
934     if (n->mtu_bypass_backend &&
935             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
936         features &= ~(1ULL << VIRTIO_NET_F_MTU);
937     }
938 
939     virtio_net_set_multiqueue(n,
940                               virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
941                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
942 
943     virtio_net_set_mrg_rx_bufs(n,
944                                virtio_has_feature(features,
945                                                   VIRTIO_NET_F_MRG_RXBUF),
946                                virtio_has_feature(features,
947                                                   VIRTIO_F_VERSION_1),
948                                virtio_has_feature(features,
949                                                   VIRTIO_NET_F_HASH_REPORT));
950 
951     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
952         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
953     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
954         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
955     n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
956 
957     if (n->has_vnet_hdr) {
958         n->curr_guest_offloads =
959             virtio_net_guest_offloads_by_features(features);
960         virtio_net_apply_guest_offloads(n);
961     }
962 
963     for (i = 0;  i < n->max_queue_pairs; i++) {
964         NetClientState *nc = qemu_get_subqueue(n->nic, i);
965 
966         if (!get_vhost_net(nc->peer)) {
967             continue;
968         }
969         vhost_net_ack_features(get_vhost_net(nc->peer), features);
970     }
971 
972     if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
973         memset(n->vlans, 0, MAX_VLAN >> 3);
974     } else {
975         memset(n->vlans, 0xff, MAX_VLAN >> 3);
976     }
977 
978     if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
979         qapi_event_send_failover_negotiated(n->netclient_name);
980         qatomic_set(&n->failover_primary_hidden, false);
981         failover_add_primary(n, &err);
982         if (err) {
983             if (!qtest_enabled()) {
984                 warn_report_err(err);
985             } else {
986                 error_free(err);
987             }
988         }
989     }
990 }
991 
992 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
993                                      struct iovec *iov, unsigned int iov_cnt)
994 {
995     uint8_t on;
996     size_t s;
997     NetClientState *nc = qemu_get_queue(n->nic);
998 
999     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
1000     if (s != sizeof(on)) {
1001         return VIRTIO_NET_ERR;
1002     }
1003 
1004     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
1005         n->promisc = on;
1006     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
1007         n->allmulti = on;
1008     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
1009         n->alluni = on;
1010     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
1011         n->nomulti = on;
1012     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
1013         n->nouni = on;
1014     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
1015         n->nobcast = on;
1016     } else {
1017         return VIRTIO_NET_ERR;
1018     }
1019 
1020     rxfilter_notify(nc);
1021 
1022     return VIRTIO_NET_OK;
1023 }
1024 
1025 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
1026                                      struct iovec *iov, unsigned int iov_cnt)
1027 {
1028     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1029     uint64_t offloads;
1030     size_t s;
1031 
1032     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1033         return VIRTIO_NET_ERR;
1034     }
1035 
1036     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
1037     if (s != sizeof(offloads)) {
1038         return VIRTIO_NET_ERR;
1039     }
1040 
1041     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1042         uint64_t supported_offloads;
1043 
1044         offloads = virtio_ldq_p(vdev, &offloads);
1045 
1046         if (!n->has_vnet_hdr) {
1047             return VIRTIO_NET_ERR;
1048         }
1049 
1050         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1051             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1052         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1053             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1054         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1055 
1056         supported_offloads = virtio_net_supported_guest_offloads(n);
1057         if (offloads & ~supported_offloads) {
1058             return VIRTIO_NET_ERR;
1059         }
1060 
1061         n->curr_guest_offloads = offloads;
1062         virtio_net_apply_guest_offloads(n);
1063 
1064         return VIRTIO_NET_OK;
1065     } else {
1066         return VIRTIO_NET_ERR;
1067     }
1068 }
1069 
1070 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1071                                  struct iovec *iov, unsigned int iov_cnt)
1072 {
1073     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1074     struct virtio_net_ctrl_mac mac_data;
1075     size_t s;
1076     NetClientState *nc = qemu_get_queue(n->nic);
1077 
1078     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1079         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1080             return VIRTIO_NET_ERR;
1081         }
1082         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1083         assert(s == sizeof(n->mac));
1084         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1085         rxfilter_notify(nc);
1086 
1087         return VIRTIO_NET_OK;
1088     }
1089 
1090     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1091         return VIRTIO_NET_ERR;
1092     }
1093 
1094     int in_use = 0;
1095     int first_multi = 0;
1096     uint8_t uni_overflow = 0;
1097     uint8_t multi_overflow = 0;
1098     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1099 
1100     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1101                    sizeof(mac_data.entries));
1102     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1103     if (s != sizeof(mac_data.entries)) {
1104         goto error;
1105     }
1106     iov_discard_front(&iov, &iov_cnt, s);
1107 
1108     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1109         goto error;
1110     }
1111 
1112     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1113         s = iov_to_buf(iov, iov_cnt, 0, macs,
1114                        mac_data.entries * ETH_ALEN);
1115         if (s != mac_data.entries * ETH_ALEN) {
1116             goto error;
1117         }
1118         in_use += mac_data.entries;
1119     } else {
1120         uni_overflow = 1;
1121     }
1122 
1123     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1124 
1125     first_multi = in_use;
1126 
1127     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1128                    sizeof(mac_data.entries));
1129     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1130     if (s != sizeof(mac_data.entries)) {
1131         goto error;
1132     }
1133 
1134     iov_discard_front(&iov, &iov_cnt, s);
1135 
1136     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1137         goto error;
1138     }
1139 
1140     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1141         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1142                        mac_data.entries * ETH_ALEN);
1143         if (s != mac_data.entries * ETH_ALEN) {
1144             goto error;
1145         }
1146         in_use += mac_data.entries;
1147     } else {
1148         multi_overflow = 1;
1149     }
1150 
1151     n->mac_table.in_use = in_use;
1152     n->mac_table.first_multi = first_multi;
1153     n->mac_table.uni_overflow = uni_overflow;
1154     n->mac_table.multi_overflow = multi_overflow;
1155     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1156     g_free(macs);
1157     rxfilter_notify(nc);
1158 
1159     return VIRTIO_NET_OK;
1160 
1161 error:
1162     g_free(macs);
1163     return VIRTIO_NET_ERR;
1164 }
1165 
1166 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1167                                         struct iovec *iov, unsigned int iov_cnt)
1168 {
1169     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1170     uint16_t vid;
1171     size_t s;
1172     NetClientState *nc = qemu_get_queue(n->nic);
1173 
1174     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1175     vid = virtio_lduw_p(vdev, &vid);
1176     if (s != sizeof(vid)) {
1177         return VIRTIO_NET_ERR;
1178     }
1179 
1180     if (vid >= MAX_VLAN)
1181         return VIRTIO_NET_ERR;
1182 
1183     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1184         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1185     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1186         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1187     else
1188         return VIRTIO_NET_ERR;
1189 
1190     rxfilter_notify(nc);
1191 
1192     return VIRTIO_NET_OK;
1193 }
1194 
1195 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1196                                       struct iovec *iov, unsigned int iov_cnt)
1197 {
1198     trace_virtio_net_handle_announce(n->announce_timer.round);
1199     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1200         n->status & VIRTIO_NET_S_ANNOUNCE) {
1201         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1202         if (n->announce_timer.round) {
1203             qemu_announce_timer_step(&n->announce_timer);
1204         }
1205         return VIRTIO_NET_OK;
1206     } else {
1207         return VIRTIO_NET_ERR;
1208     }
1209 }
1210 
1211 static void virtio_net_detach_epbf_rss(VirtIONet *n);
1212 
1213 static void virtio_net_disable_rss(VirtIONet *n)
1214 {
1215     if (n->rss_data.enabled) {
1216         trace_virtio_net_rss_disable();
1217     }
1218     n->rss_data.enabled = false;
1219 
1220     virtio_net_detach_epbf_rss(n);
1221 }
1222 
1223 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1224 {
1225     NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1226     if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1227         return false;
1228     }
1229 
1230     return nc->info->set_steering_ebpf(nc, prog_fd);
1231 }
1232 
1233 static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1234                                    struct EBPFRSSConfig *config)
1235 {
1236     config->redirect = data->redirect;
1237     config->populate_hash = data->populate_hash;
1238     config->hash_types = data->hash_types;
1239     config->indirections_len = data->indirections_len;
1240     config->default_queue = data->default_queue;
1241 }
1242 
1243 static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1244 {
1245     struct EBPFRSSConfig config = {};
1246 
1247     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1248         return false;
1249     }
1250 
1251     rss_data_to_rss_config(&n->rss_data, &config);
1252 
1253     if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1254                           n->rss_data.indirections_table, n->rss_data.key)) {
1255         return false;
1256     }
1257 
1258     if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1259         return false;
1260     }
1261 
1262     return true;
1263 }
1264 
1265 static void virtio_net_detach_epbf_rss(VirtIONet *n)
1266 {
1267     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1268 }
1269 
1270 static bool virtio_net_load_ebpf(VirtIONet *n)
1271 {
1272     if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1273         /* backend does't support steering ebpf */
1274         return false;
1275     }
1276 
1277     return ebpf_rss_load(&n->ebpf_rss);
1278 }
1279 
1280 static void virtio_net_unload_ebpf(VirtIONet *n)
1281 {
1282     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1283     ebpf_rss_unload(&n->ebpf_rss);
1284 }
1285 
1286 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1287                                       struct iovec *iov,
1288                                       unsigned int iov_cnt,
1289                                       bool do_rss)
1290 {
1291     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1292     struct virtio_net_rss_config cfg;
1293     size_t s, offset = 0, size_get;
1294     uint16_t queue_pairs, i;
1295     struct {
1296         uint16_t us;
1297         uint8_t b;
1298     } QEMU_PACKED temp;
1299     const char *err_msg = "";
1300     uint32_t err_value = 0;
1301 
1302     if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1303         err_msg = "RSS is not negotiated";
1304         goto error;
1305     }
1306     if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1307         err_msg = "Hash report is not negotiated";
1308         goto error;
1309     }
1310     size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1311     s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1312     if (s != size_get) {
1313         err_msg = "Short command buffer";
1314         err_value = (uint32_t)s;
1315         goto error;
1316     }
1317     n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1318     n->rss_data.indirections_len =
1319         virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1320     n->rss_data.indirections_len++;
1321     if (!do_rss) {
1322         n->rss_data.indirections_len = 1;
1323     }
1324     if (!is_power_of_2(n->rss_data.indirections_len)) {
1325         err_msg = "Invalid size of indirection table";
1326         err_value = n->rss_data.indirections_len;
1327         goto error;
1328     }
1329     if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1330         err_msg = "Too large indirection table";
1331         err_value = n->rss_data.indirections_len;
1332         goto error;
1333     }
1334     n->rss_data.default_queue = do_rss ?
1335         virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1336     if (n->rss_data.default_queue >= n->max_queue_pairs) {
1337         err_msg = "Invalid default queue";
1338         err_value = n->rss_data.default_queue;
1339         goto error;
1340     }
1341     offset += size_get;
1342     size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1343     g_free(n->rss_data.indirections_table);
1344     n->rss_data.indirections_table = g_malloc(size_get);
1345     if (!n->rss_data.indirections_table) {
1346         err_msg = "Can't allocate indirections table";
1347         err_value = n->rss_data.indirections_len;
1348         goto error;
1349     }
1350     s = iov_to_buf(iov, iov_cnt, offset,
1351                    n->rss_data.indirections_table, size_get);
1352     if (s != size_get) {
1353         err_msg = "Short indirection table buffer";
1354         err_value = (uint32_t)s;
1355         goto error;
1356     }
1357     for (i = 0; i < n->rss_data.indirections_len; ++i) {
1358         uint16_t val = n->rss_data.indirections_table[i];
1359         n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1360     }
1361     offset += size_get;
1362     size_get = sizeof(temp);
1363     s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1364     if (s != size_get) {
1365         err_msg = "Can't get queue_pairs";
1366         err_value = (uint32_t)s;
1367         goto error;
1368     }
1369     queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1370     if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1371         err_msg = "Invalid number of queue_pairs";
1372         err_value = queue_pairs;
1373         goto error;
1374     }
1375     if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1376         err_msg = "Invalid key size";
1377         err_value = temp.b;
1378         goto error;
1379     }
1380     if (!temp.b && n->rss_data.hash_types) {
1381         err_msg = "No key provided";
1382         err_value = 0;
1383         goto error;
1384     }
1385     if (!temp.b && !n->rss_data.hash_types) {
1386         virtio_net_disable_rss(n);
1387         return queue_pairs;
1388     }
1389     offset += size_get;
1390     size_get = temp.b;
1391     s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1392     if (s != size_get) {
1393         err_msg = "Can get key buffer";
1394         err_value = (uint32_t)s;
1395         goto error;
1396     }
1397     n->rss_data.enabled = true;
1398 
1399     if (!n->rss_data.populate_hash) {
1400         if (!virtio_net_attach_epbf_rss(n)) {
1401             /* EBPF must be loaded for vhost */
1402             if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1403                 warn_report("Can't load eBPF RSS for vhost");
1404                 goto error;
1405             }
1406             /* fallback to software RSS */
1407             warn_report("Can't load eBPF RSS - fallback to software RSS");
1408             n->rss_data.enabled_software_rss = true;
1409         }
1410     } else {
1411         /* use software RSS for hash populating */
1412         /* and detach eBPF if was loaded before */
1413         virtio_net_detach_epbf_rss(n);
1414         n->rss_data.enabled_software_rss = true;
1415     }
1416 
1417     trace_virtio_net_rss_enable(n->rss_data.hash_types,
1418                                 n->rss_data.indirections_len,
1419                                 temp.b);
1420     return queue_pairs;
1421 error:
1422     trace_virtio_net_rss_error(err_msg, err_value);
1423     virtio_net_disable_rss(n);
1424     return 0;
1425 }
1426 
1427 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1428                                 struct iovec *iov, unsigned int iov_cnt)
1429 {
1430     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1431     uint16_t queue_pairs;
1432     NetClientState *nc = qemu_get_queue(n->nic);
1433 
1434     virtio_net_disable_rss(n);
1435     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1436         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1437         return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1438     }
1439     if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1440         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1441     } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1442         struct virtio_net_ctrl_mq mq;
1443         size_t s;
1444         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1445             return VIRTIO_NET_ERR;
1446         }
1447         s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1448         if (s != sizeof(mq)) {
1449             return VIRTIO_NET_ERR;
1450         }
1451         queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1452 
1453     } else {
1454         return VIRTIO_NET_ERR;
1455     }
1456 
1457     if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1458         queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1459         queue_pairs > n->max_queue_pairs ||
1460         !n->multiqueue) {
1461         return VIRTIO_NET_ERR;
1462     }
1463 
1464     n->curr_queue_pairs = queue_pairs;
1465     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
1466         /*
1467          * Avoid updating the backend for a vdpa device: We're only interested
1468          * in updating the device model queues.
1469          */
1470         return VIRTIO_NET_OK;
1471     }
1472     /* stop the backend before changing the number of queue_pairs to avoid handling a
1473      * disabled queue */
1474     virtio_net_set_status(vdev, vdev->status);
1475     virtio_net_set_queue_pairs(n);
1476 
1477     return VIRTIO_NET_OK;
1478 }
1479 
1480 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1481                                   const struct iovec *in_sg, unsigned in_num,
1482                                   const struct iovec *out_sg,
1483                                   unsigned out_num)
1484 {
1485     VirtIONet *n = VIRTIO_NET(vdev);
1486     struct virtio_net_ctrl_hdr ctrl;
1487     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1488     size_t s;
1489     struct iovec *iov, *iov2;
1490 
1491     if (iov_size(in_sg, in_num) < sizeof(status) ||
1492         iov_size(out_sg, out_num) < sizeof(ctrl)) {
1493         virtio_error(vdev, "virtio-net ctrl missing headers");
1494         return 0;
1495     }
1496 
1497     iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1498     s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1499     iov_discard_front(&iov, &out_num, sizeof(ctrl));
1500     if (s != sizeof(ctrl)) {
1501         status = VIRTIO_NET_ERR;
1502     } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1503         status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1504     } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1505         status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1506     } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1507         status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1508     } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1509         status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1510     } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1511         status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1512     } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1513         status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1514     }
1515 
1516     s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1517     assert(s == sizeof(status));
1518 
1519     g_free(iov2);
1520     return sizeof(status);
1521 }
1522 
1523 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1524 {
1525     VirtQueueElement *elem;
1526 
1527     for (;;) {
1528         size_t written;
1529         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1530         if (!elem) {
1531             break;
1532         }
1533 
1534         written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1535                                              elem->out_sg, elem->out_num);
1536         if (written > 0) {
1537             virtqueue_push(vq, elem, written);
1538             virtio_notify(vdev, vq);
1539             g_free(elem);
1540         } else {
1541             virtqueue_detach_element(vq, elem, 0);
1542             g_free(elem);
1543             break;
1544         }
1545     }
1546 }
1547 
1548 /* RX */
1549 
1550 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1551 {
1552     VirtIONet *n = VIRTIO_NET(vdev);
1553     int queue_index = vq2q(virtio_get_queue_index(vq));
1554 
1555     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1556 }
1557 
1558 static bool virtio_net_can_receive(NetClientState *nc)
1559 {
1560     VirtIONet *n = qemu_get_nic_opaque(nc);
1561     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1562     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1563 
1564     if (!vdev->vm_running) {
1565         return false;
1566     }
1567 
1568     if (nc->queue_index >= n->curr_queue_pairs) {
1569         return false;
1570     }
1571 
1572     if (!virtio_queue_ready(q->rx_vq) ||
1573         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1574         return false;
1575     }
1576 
1577     return true;
1578 }
1579 
1580 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1581 {
1582     VirtIONet *n = q->n;
1583     if (virtio_queue_empty(q->rx_vq) ||
1584         (n->mergeable_rx_bufs &&
1585          !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1586         virtio_queue_set_notification(q->rx_vq, 1);
1587 
1588         /* To avoid a race condition where the guest has made some buffers
1589          * available after the above check but before notification was
1590          * enabled, check for available buffers again.
1591          */
1592         if (virtio_queue_empty(q->rx_vq) ||
1593             (n->mergeable_rx_bufs &&
1594              !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1595             return 0;
1596         }
1597     }
1598 
1599     virtio_queue_set_notification(q->rx_vq, 0);
1600     return 1;
1601 }
1602 
1603 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1604 {
1605     virtio_tswap16s(vdev, &hdr->hdr_len);
1606     virtio_tswap16s(vdev, &hdr->gso_size);
1607     virtio_tswap16s(vdev, &hdr->csum_start);
1608     virtio_tswap16s(vdev, &hdr->csum_offset);
1609 }
1610 
1611 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1612  * it never finds out that the packets don't have valid checksums.  This
1613  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1614  * fix this with Xen but it hasn't appeared in an upstream release of
1615  * dhclient yet.
1616  *
1617  * To avoid breaking existing guests, we catch udp packets and add
1618  * checksums.  This is terrible but it's better than hacking the guest
1619  * kernels.
1620  *
1621  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1622  * we should provide a mechanism to disable it to avoid polluting the host
1623  * cache.
1624  */
1625 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1626                                         uint8_t *buf, size_t size)
1627 {
1628     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1629         (size > 27 && size < 1500) && /* normal sized MTU */
1630         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1631         (buf[23] == 17) && /* ip.protocol == UDP */
1632         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1633         net_checksum_calculate(buf, size, CSUM_UDP);
1634         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1635     }
1636 }
1637 
1638 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1639                            const void *buf, size_t size)
1640 {
1641     if (n->has_vnet_hdr) {
1642         /* FIXME this cast is evil */
1643         void *wbuf = (void *)buf;
1644         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1645                                     size - n->host_hdr_len);
1646 
1647         if (n->needs_vnet_hdr_swap) {
1648             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1649         }
1650         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1651     } else {
1652         struct virtio_net_hdr hdr = {
1653             .flags = 0,
1654             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1655         };
1656         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1657     }
1658 }
1659 
1660 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1661 {
1662     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1663     static const uint8_t vlan[] = {0x81, 0x00};
1664     uint8_t *ptr = (uint8_t *)buf;
1665     int i;
1666 
1667     if (n->promisc)
1668         return 1;
1669 
1670     ptr += n->host_hdr_len;
1671 
1672     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1673         int vid = lduw_be_p(ptr + 14) & 0xfff;
1674         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1675             return 0;
1676     }
1677 
1678     if (ptr[0] & 1) { // multicast
1679         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1680             return !n->nobcast;
1681         } else if (n->nomulti) {
1682             return 0;
1683         } else if (n->allmulti || n->mac_table.multi_overflow) {
1684             return 1;
1685         }
1686 
1687         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1688             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1689                 return 1;
1690             }
1691         }
1692     } else { // unicast
1693         if (n->nouni) {
1694             return 0;
1695         } else if (n->alluni || n->mac_table.uni_overflow) {
1696             return 1;
1697         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1698             return 1;
1699         }
1700 
1701         for (i = 0; i < n->mac_table.first_multi; i++) {
1702             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1703                 return 1;
1704             }
1705         }
1706     }
1707 
1708     return 0;
1709 }
1710 
1711 static uint8_t virtio_net_get_hash_type(bool isip4,
1712                                         bool isip6,
1713                                         bool isudp,
1714                                         bool istcp,
1715                                         uint32_t types)
1716 {
1717     if (isip4) {
1718         if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1719             return NetPktRssIpV4Tcp;
1720         }
1721         if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1722             return NetPktRssIpV4Udp;
1723         }
1724         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1725             return NetPktRssIpV4;
1726         }
1727     } else if (isip6) {
1728         uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1729                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1730 
1731         if (istcp && (types & mask)) {
1732             return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1733                 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1734         }
1735         mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1736         if (isudp && (types & mask)) {
1737             return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1738                 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1739         }
1740         mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1741         if (types & mask) {
1742             return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1743                 NetPktRssIpV6Ex : NetPktRssIpV6;
1744         }
1745     }
1746     return 0xff;
1747 }
1748 
1749 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1750                                    uint32_t hash)
1751 {
1752     struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1753     hdr->hash_value = hash;
1754     hdr->hash_report = report;
1755 }
1756 
1757 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1758                                   size_t size)
1759 {
1760     VirtIONet *n = qemu_get_nic_opaque(nc);
1761     unsigned int index = nc->queue_index, new_index = index;
1762     struct NetRxPkt *pkt = n->rx_pkt;
1763     uint8_t net_hash_type;
1764     uint32_t hash;
1765     bool isip4, isip6, isudp, istcp;
1766     static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1767         VIRTIO_NET_HASH_REPORT_IPv4,
1768         VIRTIO_NET_HASH_REPORT_TCPv4,
1769         VIRTIO_NET_HASH_REPORT_TCPv6,
1770         VIRTIO_NET_HASH_REPORT_IPv6,
1771         VIRTIO_NET_HASH_REPORT_IPv6_EX,
1772         VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1773         VIRTIO_NET_HASH_REPORT_UDPv4,
1774         VIRTIO_NET_HASH_REPORT_UDPv6,
1775         VIRTIO_NET_HASH_REPORT_UDPv6_EX
1776     };
1777 
1778     net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1779                              size - n->host_hdr_len);
1780     net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1781     if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1782         istcp = isudp = false;
1783     }
1784     if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1785         istcp = isudp = false;
1786     }
1787     net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1788                                              n->rss_data.hash_types);
1789     if (net_hash_type > NetPktRssIpV6UdpEx) {
1790         if (n->rss_data.populate_hash) {
1791             virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1792         }
1793         return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1794     }
1795 
1796     hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1797 
1798     if (n->rss_data.populate_hash) {
1799         virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1800     }
1801 
1802     if (n->rss_data.redirect) {
1803         new_index = hash & (n->rss_data.indirections_len - 1);
1804         new_index = n->rss_data.indirections_table[new_index];
1805     }
1806 
1807     return (index == new_index) ? -1 : new_index;
1808 }
1809 
1810 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1811                                       size_t size, bool no_rss)
1812 {
1813     VirtIONet *n = qemu_get_nic_opaque(nc);
1814     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1815     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1816     VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1817     size_t lens[VIRTQUEUE_MAX_SIZE];
1818     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1819     struct virtio_net_hdr_mrg_rxbuf mhdr;
1820     unsigned mhdr_cnt = 0;
1821     size_t offset, i, guest_offset, j;
1822     ssize_t err;
1823 
1824     if (!virtio_net_can_receive(nc)) {
1825         return -1;
1826     }
1827 
1828     if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1829         int index = virtio_net_process_rss(nc, buf, size);
1830         if (index >= 0) {
1831             NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1832             return virtio_net_receive_rcu(nc2, buf, size, true);
1833         }
1834     }
1835 
1836     /* hdr_len refers to the header we supply to the guest */
1837     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1838         return 0;
1839     }
1840 
1841     if (!receive_filter(n, buf, size))
1842         return size;
1843 
1844     offset = i = 0;
1845 
1846     while (offset < size) {
1847         VirtQueueElement *elem;
1848         int len, total;
1849         const struct iovec *sg;
1850 
1851         total = 0;
1852 
1853         if (i == VIRTQUEUE_MAX_SIZE) {
1854             virtio_error(vdev, "virtio-net unexpected long buffer chain");
1855             err = size;
1856             goto err;
1857         }
1858 
1859         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1860         if (!elem) {
1861             if (i) {
1862                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1863                              "i %zd mergeable %d offset %zd, size %zd, "
1864                              "guest hdr len %zd, host hdr len %zd "
1865                              "guest features 0x%" PRIx64,
1866                              i, n->mergeable_rx_bufs, offset, size,
1867                              n->guest_hdr_len, n->host_hdr_len,
1868                              vdev->guest_features);
1869             }
1870             err = -1;
1871             goto err;
1872         }
1873 
1874         if (elem->in_num < 1) {
1875             virtio_error(vdev,
1876                          "virtio-net receive queue contains no in buffers");
1877             virtqueue_detach_element(q->rx_vq, elem, 0);
1878             g_free(elem);
1879             err = -1;
1880             goto err;
1881         }
1882 
1883         sg = elem->in_sg;
1884         if (i == 0) {
1885             assert(offset == 0);
1886             if (n->mergeable_rx_bufs) {
1887                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1888                                     sg, elem->in_num,
1889                                     offsetof(typeof(mhdr), num_buffers),
1890                                     sizeof(mhdr.num_buffers));
1891             }
1892 
1893             receive_header(n, sg, elem->in_num, buf, size);
1894             if (n->rss_data.populate_hash) {
1895                 offset = sizeof(mhdr);
1896                 iov_from_buf(sg, elem->in_num, offset,
1897                              buf + offset, n->host_hdr_len - sizeof(mhdr));
1898             }
1899             offset = n->host_hdr_len;
1900             total += n->guest_hdr_len;
1901             guest_offset = n->guest_hdr_len;
1902         } else {
1903             guest_offset = 0;
1904         }
1905 
1906         /* copy in packet.  ugh */
1907         len = iov_from_buf(sg, elem->in_num, guest_offset,
1908                            buf + offset, size - offset);
1909         total += len;
1910         offset += len;
1911         /* If buffers can't be merged, at this point we
1912          * must have consumed the complete packet.
1913          * Otherwise, drop it. */
1914         if (!n->mergeable_rx_bufs && offset < size) {
1915             virtqueue_unpop(q->rx_vq, elem, total);
1916             g_free(elem);
1917             err = size;
1918             goto err;
1919         }
1920 
1921         elems[i] = elem;
1922         lens[i] = total;
1923         i++;
1924     }
1925 
1926     if (mhdr_cnt) {
1927         virtio_stw_p(vdev, &mhdr.num_buffers, i);
1928         iov_from_buf(mhdr_sg, mhdr_cnt,
1929                      0,
1930                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
1931     }
1932 
1933     for (j = 0; j < i; j++) {
1934         /* signal other side */
1935         virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
1936         g_free(elems[j]);
1937     }
1938 
1939     virtqueue_flush(q->rx_vq, i);
1940     virtio_notify(vdev, q->rx_vq);
1941 
1942     return size;
1943 
1944 err:
1945     for (j = 0; j < i; j++) {
1946         virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
1947         g_free(elems[j]);
1948     }
1949 
1950     return err;
1951 }
1952 
1953 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1954                                   size_t size)
1955 {
1956     RCU_READ_LOCK_GUARD();
1957 
1958     return virtio_net_receive_rcu(nc, buf, size, false);
1959 }
1960 
1961 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1962                                          const uint8_t *buf,
1963                                          VirtioNetRscUnit *unit)
1964 {
1965     uint16_t ip_hdrlen;
1966     struct ip_header *ip;
1967 
1968     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1969                               + sizeof(struct eth_header));
1970     unit->ip = (void *)ip;
1971     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1972     unit->ip_plen = &ip->ip_len;
1973     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1974     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1975     unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1976 }
1977 
1978 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1979                                          const uint8_t *buf,
1980                                          VirtioNetRscUnit *unit)
1981 {
1982     struct ip6_header *ip6;
1983 
1984     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1985                                  + sizeof(struct eth_header));
1986     unit->ip = ip6;
1987     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1988     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1989                                         + sizeof(struct ip6_header));
1990     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1991 
1992     /* There is a difference between payload lenght in ipv4 and v6,
1993        ip header is excluded in ipv6 */
1994     unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1995 }
1996 
1997 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1998                                        VirtioNetRscSeg *seg)
1999 {
2000     int ret;
2001     struct virtio_net_hdr_v1 *h;
2002 
2003     h = (struct virtio_net_hdr_v1 *)seg->buf;
2004     h->flags = 0;
2005     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
2006 
2007     if (seg->is_coalesced) {
2008         h->rsc.segments = seg->packets;
2009         h->rsc.dup_acks = seg->dup_ack;
2010         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
2011         if (chain->proto == ETH_P_IP) {
2012             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2013         } else {
2014             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2015         }
2016     }
2017 
2018     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
2019     QTAILQ_REMOVE(&chain->buffers, seg, next);
2020     g_free(seg->buf);
2021     g_free(seg);
2022 
2023     return ret;
2024 }
2025 
2026 static void virtio_net_rsc_purge(void *opq)
2027 {
2028     VirtioNetRscSeg *seg, *rn;
2029     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
2030 
2031     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
2032         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2033             chain->stat.purge_failed++;
2034             continue;
2035         }
2036     }
2037 
2038     chain->stat.timer++;
2039     if (!QTAILQ_EMPTY(&chain->buffers)) {
2040         timer_mod(chain->drain_timer,
2041               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2042     }
2043 }
2044 
2045 static void virtio_net_rsc_cleanup(VirtIONet *n)
2046 {
2047     VirtioNetRscChain *chain, *rn_chain;
2048     VirtioNetRscSeg *seg, *rn_seg;
2049 
2050     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2051         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2052             QTAILQ_REMOVE(&chain->buffers, seg, next);
2053             g_free(seg->buf);
2054             g_free(seg);
2055         }
2056 
2057         timer_free(chain->drain_timer);
2058         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2059         g_free(chain);
2060     }
2061 }
2062 
2063 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2064                                      NetClientState *nc,
2065                                      const uint8_t *buf, size_t size)
2066 {
2067     uint16_t hdr_len;
2068     VirtioNetRscSeg *seg;
2069 
2070     hdr_len = chain->n->guest_hdr_len;
2071     seg = g_new(VirtioNetRscSeg, 1);
2072     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2073         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2074     memcpy(seg->buf, buf, size);
2075     seg->size = size;
2076     seg->packets = 1;
2077     seg->dup_ack = 0;
2078     seg->is_coalesced = 0;
2079     seg->nc = nc;
2080 
2081     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2082     chain->stat.cache++;
2083 
2084     switch (chain->proto) {
2085     case ETH_P_IP:
2086         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2087         break;
2088     case ETH_P_IPV6:
2089         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2090         break;
2091     default:
2092         g_assert_not_reached();
2093     }
2094 }
2095 
2096 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2097                                          VirtioNetRscSeg *seg,
2098                                          const uint8_t *buf,
2099                                          struct tcp_header *n_tcp,
2100                                          struct tcp_header *o_tcp)
2101 {
2102     uint32_t nack, oack;
2103     uint16_t nwin, owin;
2104 
2105     nack = htonl(n_tcp->th_ack);
2106     nwin = htons(n_tcp->th_win);
2107     oack = htonl(o_tcp->th_ack);
2108     owin = htons(o_tcp->th_win);
2109 
2110     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2111         chain->stat.ack_out_of_win++;
2112         return RSC_FINAL;
2113     } else if (nack == oack) {
2114         /* duplicated ack or window probe */
2115         if (nwin == owin) {
2116             /* duplicated ack, add dup ack count due to whql test up to 1 */
2117             chain->stat.dup_ack++;
2118             return RSC_FINAL;
2119         } else {
2120             /* Coalesce window update */
2121             o_tcp->th_win = n_tcp->th_win;
2122             chain->stat.win_update++;
2123             return RSC_COALESCE;
2124         }
2125     } else {
2126         /* pure ack, go to 'C', finalize*/
2127         chain->stat.pure_ack++;
2128         return RSC_FINAL;
2129     }
2130 }
2131 
2132 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2133                                             VirtioNetRscSeg *seg,
2134                                             const uint8_t *buf,
2135                                             VirtioNetRscUnit *n_unit)
2136 {
2137     void *data;
2138     uint16_t o_ip_len;
2139     uint32_t nseq, oseq;
2140     VirtioNetRscUnit *o_unit;
2141 
2142     o_unit = &seg->unit;
2143     o_ip_len = htons(*o_unit->ip_plen);
2144     nseq = htonl(n_unit->tcp->th_seq);
2145     oseq = htonl(o_unit->tcp->th_seq);
2146 
2147     /* out of order or retransmitted. */
2148     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2149         chain->stat.data_out_of_win++;
2150         return RSC_FINAL;
2151     }
2152 
2153     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2154     if (nseq == oseq) {
2155         if ((o_unit->payload == 0) && n_unit->payload) {
2156             /* From no payload to payload, normal case, not a dup ack or etc */
2157             chain->stat.data_after_pure_ack++;
2158             goto coalesce;
2159         } else {
2160             return virtio_net_rsc_handle_ack(chain, seg, buf,
2161                                              n_unit->tcp, o_unit->tcp);
2162         }
2163     } else if ((nseq - oseq) != o_unit->payload) {
2164         /* Not a consistent packet, out of order */
2165         chain->stat.data_out_of_order++;
2166         return RSC_FINAL;
2167     } else {
2168 coalesce:
2169         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2170             chain->stat.over_size++;
2171             return RSC_FINAL;
2172         }
2173 
2174         /* Here comes the right data, the payload length in v4/v6 is different,
2175            so use the field value to update and record the new data len */
2176         o_unit->payload += n_unit->payload; /* update new data len */
2177 
2178         /* update field in ip header */
2179         *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2180 
2181         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2182            for windows guest, while this may change the behavior for linux
2183            guest (only if it uses RSC feature). */
2184         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2185 
2186         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2187         o_unit->tcp->th_win = n_unit->tcp->th_win;
2188 
2189         memmove(seg->buf + seg->size, data, n_unit->payload);
2190         seg->size += n_unit->payload;
2191         seg->packets++;
2192         chain->stat.coalesced++;
2193         return RSC_COALESCE;
2194     }
2195 }
2196 
2197 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2198                                         VirtioNetRscSeg *seg,
2199                                         const uint8_t *buf, size_t size,
2200                                         VirtioNetRscUnit *unit)
2201 {
2202     struct ip_header *ip1, *ip2;
2203 
2204     ip1 = (struct ip_header *)(unit->ip);
2205     ip2 = (struct ip_header *)(seg->unit.ip);
2206     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2207         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2208         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2209         chain->stat.no_match++;
2210         return RSC_NO_MATCH;
2211     }
2212 
2213     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2214 }
2215 
2216 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2217                                         VirtioNetRscSeg *seg,
2218                                         const uint8_t *buf, size_t size,
2219                                         VirtioNetRscUnit *unit)
2220 {
2221     struct ip6_header *ip1, *ip2;
2222 
2223     ip1 = (struct ip6_header *)(unit->ip);
2224     ip2 = (struct ip6_header *)(seg->unit.ip);
2225     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2226         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2227         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2228         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2229             chain->stat.no_match++;
2230             return RSC_NO_MATCH;
2231     }
2232 
2233     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2234 }
2235 
2236 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2237  * to prevent out of order */
2238 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2239                                          struct tcp_header *tcp)
2240 {
2241     uint16_t tcp_hdr;
2242     uint16_t tcp_flag;
2243 
2244     tcp_flag = htons(tcp->th_offset_flags);
2245     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2246     tcp_flag &= VIRTIO_NET_TCP_FLAG;
2247     if (tcp_flag & TH_SYN) {
2248         chain->stat.tcp_syn++;
2249         return RSC_BYPASS;
2250     }
2251 
2252     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2253         chain->stat.tcp_ctrl_drain++;
2254         return RSC_FINAL;
2255     }
2256 
2257     if (tcp_hdr > sizeof(struct tcp_header)) {
2258         chain->stat.tcp_all_opt++;
2259         return RSC_FINAL;
2260     }
2261 
2262     return RSC_CANDIDATE;
2263 }
2264 
2265 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2266                                          NetClientState *nc,
2267                                          const uint8_t *buf, size_t size,
2268                                          VirtioNetRscUnit *unit)
2269 {
2270     int ret;
2271     VirtioNetRscSeg *seg, *nseg;
2272 
2273     if (QTAILQ_EMPTY(&chain->buffers)) {
2274         chain->stat.empty_cache++;
2275         virtio_net_rsc_cache_buf(chain, nc, buf, size);
2276         timer_mod(chain->drain_timer,
2277               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2278         return size;
2279     }
2280 
2281     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2282         if (chain->proto == ETH_P_IP) {
2283             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2284         } else {
2285             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2286         }
2287 
2288         if (ret == RSC_FINAL) {
2289             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2290                 /* Send failed */
2291                 chain->stat.final_failed++;
2292                 return 0;
2293             }
2294 
2295             /* Send current packet */
2296             return virtio_net_do_receive(nc, buf, size);
2297         } else if (ret == RSC_NO_MATCH) {
2298             continue;
2299         } else {
2300             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2301             seg->is_coalesced = 1;
2302             return size;
2303         }
2304     }
2305 
2306     chain->stat.no_match_cache++;
2307     virtio_net_rsc_cache_buf(chain, nc, buf, size);
2308     return size;
2309 }
2310 
2311 /* Drain a connection data, this is to avoid out of order segments */
2312 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2313                                         NetClientState *nc,
2314                                         const uint8_t *buf, size_t size,
2315                                         uint16_t ip_start, uint16_t ip_size,
2316                                         uint16_t tcp_port)
2317 {
2318     VirtioNetRscSeg *seg, *nseg;
2319     uint32_t ppair1, ppair2;
2320 
2321     ppair1 = *(uint32_t *)(buf + tcp_port);
2322     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2323         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2324         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2325             || (ppair1 != ppair2)) {
2326             continue;
2327         }
2328         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2329             chain->stat.drain_failed++;
2330         }
2331 
2332         break;
2333     }
2334 
2335     return virtio_net_do_receive(nc, buf, size);
2336 }
2337 
2338 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2339                                             struct ip_header *ip,
2340                                             const uint8_t *buf, size_t size)
2341 {
2342     uint16_t ip_len;
2343 
2344     /* Not an ipv4 packet */
2345     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2346         chain->stat.ip_option++;
2347         return RSC_BYPASS;
2348     }
2349 
2350     /* Don't handle packets with ip option */
2351     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2352         chain->stat.ip_option++;
2353         return RSC_BYPASS;
2354     }
2355 
2356     if (ip->ip_p != IPPROTO_TCP) {
2357         chain->stat.bypass_not_tcp++;
2358         return RSC_BYPASS;
2359     }
2360 
2361     /* Don't handle packets with ip fragment */
2362     if (!(htons(ip->ip_off) & IP_DF)) {
2363         chain->stat.ip_frag++;
2364         return RSC_BYPASS;
2365     }
2366 
2367     /* Don't handle packets with ecn flag */
2368     if (IPTOS_ECN(ip->ip_tos)) {
2369         chain->stat.ip_ecn++;
2370         return RSC_BYPASS;
2371     }
2372 
2373     ip_len = htons(ip->ip_len);
2374     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2375         || ip_len > (size - chain->n->guest_hdr_len -
2376                      sizeof(struct eth_header))) {
2377         chain->stat.ip_hacked++;
2378         return RSC_BYPASS;
2379     }
2380 
2381     return RSC_CANDIDATE;
2382 }
2383 
2384 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2385                                       NetClientState *nc,
2386                                       const uint8_t *buf, size_t size)
2387 {
2388     int32_t ret;
2389     uint16_t hdr_len;
2390     VirtioNetRscUnit unit;
2391 
2392     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2393 
2394     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2395         + sizeof(struct tcp_header))) {
2396         chain->stat.bypass_not_tcp++;
2397         return virtio_net_do_receive(nc, buf, size);
2398     }
2399 
2400     virtio_net_rsc_extract_unit4(chain, buf, &unit);
2401     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2402         != RSC_CANDIDATE) {
2403         return virtio_net_do_receive(nc, buf, size);
2404     }
2405 
2406     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2407     if (ret == RSC_BYPASS) {
2408         return virtio_net_do_receive(nc, buf, size);
2409     } else if (ret == RSC_FINAL) {
2410         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2411                 ((hdr_len + sizeof(struct eth_header)) + 12),
2412                 VIRTIO_NET_IP4_ADDR_SIZE,
2413                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2414     }
2415 
2416     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2417 }
2418 
2419 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2420                                             struct ip6_header *ip6,
2421                                             const uint8_t *buf, size_t size)
2422 {
2423     uint16_t ip_len;
2424 
2425     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2426         != IP_HEADER_VERSION_6) {
2427         return RSC_BYPASS;
2428     }
2429 
2430     /* Both option and protocol is checked in this */
2431     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2432         chain->stat.bypass_not_tcp++;
2433         return RSC_BYPASS;
2434     }
2435 
2436     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2437     if (ip_len < sizeof(struct tcp_header) ||
2438         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2439                   - sizeof(struct ip6_header))) {
2440         chain->stat.ip_hacked++;
2441         return RSC_BYPASS;
2442     }
2443 
2444     /* Don't handle packets with ecn flag */
2445     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2446         chain->stat.ip_ecn++;
2447         return RSC_BYPASS;
2448     }
2449 
2450     return RSC_CANDIDATE;
2451 }
2452 
2453 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2454                                       const uint8_t *buf, size_t size)
2455 {
2456     int32_t ret;
2457     uint16_t hdr_len;
2458     VirtioNetRscChain *chain;
2459     VirtioNetRscUnit unit;
2460 
2461     chain = (VirtioNetRscChain *)opq;
2462     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2463 
2464     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2465         + sizeof(tcp_header))) {
2466         return virtio_net_do_receive(nc, buf, size);
2467     }
2468 
2469     virtio_net_rsc_extract_unit6(chain, buf, &unit);
2470     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2471                                                  unit.ip, buf, size)) {
2472         return virtio_net_do_receive(nc, buf, size);
2473     }
2474 
2475     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2476     if (ret == RSC_BYPASS) {
2477         return virtio_net_do_receive(nc, buf, size);
2478     } else if (ret == RSC_FINAL) {
2479         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2480                 ((hdr_len + sizeof(struct eth_header)) + 8),
2481                 VIRTIO_NET_IP6_ADDR_SIZE,
2482                 hdr_len + sizeof(struct eth_header)
2483                 + sizeof(struct ip6_header));
2484     }
2485 
2486     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2487 }
2488 
2489 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2490                                                       NetClientState *nc,
2491                                                       uint16_t proto)
2492 {
2493     VirtioNetRscChain *chain;
2494 
2495     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2496         return NULL;
2497     }
2498 
2499     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2500         if (chain->proto == proto) {
2501             return chain;
2502         }
2503     }
2504 
2505     chain = g_malloc(sizeof(*chain));
2506     chain->n = n;
2507     chain->proto = proto;
2508     if (proto == (uint16_t)ETH_P_IP) {
2509         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2510         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2511     } else {
2512         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2513         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2514     }
2515     chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2516                                       virtio_net_rsc_purge, chain);
2517     memset(&chain->stat, 0, sizeof(chain->stat));
2518 
2519     QTAILQ_INIT(&chain->buffers);
2520     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2521 
2522     return chain;
2523 }
2524 
2525 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2526                                       const uint8_t *buf,
2527                                       size_t size)
2528 {
2529     uint16_t proto;
2530     VirtioNetRscChain *chain;
2531     struct eth_header *eth;
2532     VirtIONet *n;
2533 
2534     n = qemu_get_nic_opaque(nc);
2535     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2536         return virtio_net_do_receive(nc, buf, size);
2537     }
2538 
2539     eth = (struct eth_header *)(buf + n->guest_hdr_len);
2540     proto = htons(eth->h_proto);
2541 
2542     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2543     if (chain) {
2544         chain->stat.received++;
2545         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2546             return virtio_net_rsc_receive4(chain, nc, buf, size);
2547         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2548             return virtio_net_rsc_receive6(chain, nc, buf, size);
2549         }
2550     }
2551     return virtio_net_do_receive(nc, buf, size);
2552 }
2553 
2554 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2555                                   size_t size)
2556 {
2557     VirtIONet *n = qemu_get_nic_opaque(nc);
2558     if ((n->rsc4_enabled || n->rsc6_enabled)) {
2559         return virtio_net_rsc_receive(nc, buf, size);
2560     } else {
2561         return virtio_net_do_receive(nc, buf, size);
2562     }
2563 }
2564 
2565 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2566 
2567 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2568 {
2569     VirtIONet *n = qemu_get_nic_opaque(nc);
2570     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2571     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2572     int ret;
2573 
2574     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2575     virtio_notify(vdev, q->tx_vq);
2576 
2577     g_free(q->async_tx.elem);
2578     q->async_tx.elem = NULL;
2579 
2580     virtio_queue_set_notification(q->tx_vq, 1);
2581     ret = virtio_net_flush_tx(q);
2582     if (ret >= n->tx_burst) {
2583         /*
2584          * the flush has been stopped by tx_burst
2585          * we will not receive notification for the
2586          * remainining part, so re-schedule
2587          */
2588         virtio_queue_set_notification(q->tx_vq, 0);
2589         if (q->tx_bh) {
2590             qemu_bh_schedule(q->tx_bh);
2591         } else {
2592             timer_mod(q->tx_timer,
2593                       qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2594         }
2595         q->tx_waiting = 1;
2596     }
2597 }
2598 
2599 /* TX */
2600 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2601 {
2602     VirtIONet *n = q->n;
2603     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2604     VirtQueueElement *elem;
2605     int32_t num_packets = 0;
2606     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2607     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2608         return num_packets;
2609     }
2610 
2611     if (q->async_tx.elem) {
2612         virtio_queue_set_notification(q->tx_vq, 0);
2613         return num_packets;
2614     }
2615 
2616     for (;;) {
2617         ssize_t ret;
2618         unsigned int out_num;
2619         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2620         struct virtio_net_hdr_mrg_rxbuf mhdr;
2621 
2622         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2623         if (!elem) {
2624             break;
2625         }
2626 
2627         out_num = elem->out_num;
2628         out_sg = elem->out_sg;
2629         if (out_num < 1) {
2630             virtio_error(vdev, "virtio-net header not in first element");
2631             virtqueue_detach_element(q->tx_vq, elem, 0);
2632             g_free(elem);
2633             return -EINVAL;
2634         }
2635 
2636         if (n->has_vnet_hdr) {
2637             if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2638                 n->guest_hdr_len) {
2639                 virtio_error(vdev, "virtio-net header incorrect");
2640                 virtqueue_detach_element(q->tx_vq, elem, 0);
2641                 g_free(elem);
2642                 return -EINVAL;
2643             }
2644             if (n->needs_vnet_hdr_swap) {
2645                 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2646                 sg2[0].iov_base = &mhdr;
2647                 sg2[0].iov_len = n->guest_hdr_len;
2648                 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2649                                    out_sg, out_num,
2650                                    n->guest_hdr_len, -1);
2651                 if (out_num == VIRTQUEUE_MAX_SIZE) {
2652                     goto drop;
2653                 }
2654                 out_num += 1;
2655                 out_sg = sg2;
2656             }
2657         }
2658         /*
2659          * If host wants to see the guest header as is, we can
2660          * pass it on unchanged. Otherwise, copy just the parts
2661          * that host is interested in.
2662          */
2663         assert(n->host_hdr_len <= n->guest_hdr_len);
2664         if (n->host_hdr_len != n->guest_hdr_len) {
2665             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2666                                        out_sg, out_num,
2667                                        0, n->host_hdr_len);
2668             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2669                              out_sg, out_num,
2670                              n->guest_hdr_len, -1);
2671             out_num = sg_num;
2672             out_sg = sg;
2673         }
2674 
2675         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2676                                       out_sg, out_num, virtio_net_tx_complete);
2677         if (ret == 0) {
2678             virtio_queue_set_notification(q->tx_vq, 0);
2679             q->async_tx.elem = elem;
2680             return -EBUSY;
2681         }
2682 
2683 drop:
2684         virtqueue_push(q->tx_vq, elem, 0);
2685         virtio_notify(vdev, q->tx_vq);
2686         g_free(elem);
2687 
2688         if (++num_packets >= n->tx_burst) {
2689             break;
2690         }
2691     }
2692     return num_packets;
2693 }
2694 
2695 static void virtio_net_tx_timer(void *opaque);
2696 
2697 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2698 {
2699     VirtIONet *n = VIRTIO_NET(vdev);
2700     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2701 
2702     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2703         virtio_net_drop_tx_queue_data(vdev, vq);
2704         return;
2705     }
2706 
2707     /* This happens when device was stopped but VCPU wasn't. */
2708     if (!vdev->vm_running) {
2709         q->tx_waiting = 1;
2710         return;
2711     }
2712 
2713     if (q->tx_waiting) {
2714         /* We already have queued packets, immediately flush */
2715         timer_del(q->tx_timer);
2716         virtio_net_tx_timer(q);
2717     } else {
2718         /* re-arm timer to flush it (and more) on next tick */
2719         timer_mod(q->tx_timer,
2720                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2721         q->tx_waiting = 1;
2722         virtio_queue_set_notification(vq, 0);
2723     }
2724 }
2725 
2726 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2727 {
2728     VirtIONet *n = VIRTIO_NET(vdev);
2729     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2730 
2731     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2732         virtio_net_drop_tx_queue_data(vdev, vq);
2733         return;
2734     }
2735 
2736     if (unlikely(q->tx_waiting)) {
2737         return;
2738     }
2739     q->tx_waiting = 1;
2740     /* This happens when device was stopped but VCPU wasn't. */
2741     if (!vdev->vm_running) {
2742         return;
2743     }
2744     virtio_queue_set_notification(vq, 0);
2745     qemu_bh_schedule(q->tx_bh);
2746 }
2747 
2748 static void virtio_net_tx_timer(void *opaque)
2749 {
2750     VirtIONetQueue *q = opaque;
2751     VirtIONet *n = q->n;
2752     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2753     int ret;
2754 
2755     /* This happens when device was stopped but BH wasn't. */
2756     if (!vdev->vm_running) {
2757         /* Make sure tx waiting is set, so we'll run when restarted. */
2758         assert(q->tx_waiting);
2759         return;
2760     }
2761 
2762     q->tx_waiting = 0;
2763 
2764     /* Just in case the driver is not ready on more */
2765     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2766         return;
2767     }
2768 
2769     ret = virtio_net_flush_tx(q);
2770     if (ret == -EBUSY || ret == -EINVAL) {
2771         return;
2772     }
2773     /*
2774      * If we flush a full burst of packets, assume there are
2775      * more coming and immediately rearm
2776      */
2777     if (ret >= n->tx_burst) {
2778         q->tx_waiting = 1;
2779         timer_mod(q->tx_timer,
2780                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2781         return;
2782     }
2783     /*
2784      * If less than a full burst, re-enable notification and flush
2785      * anything that may have come in while we weren't looking.  If
2786      * we find something, assume the guest is still active and rearm
2787      */
2788     virtio_queue_set_notification(q->tx_vq, 1);
2789     ret = virtio_net_flush_tx(q);
2790     if (ret > 0) {
2791         virtio_queue_set_notification(q->tx_vq, 0);
2792         q->tx_waiting = 1;
2793         timer_mod(q->tx_timer,
2794                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2795     }
2796 }
2797 
2798 static void virtio_net_tx_bh(void *opaque)
2799 {
2800     VirtIONetQueue *q = opaque;
2801     VirtIONet *n = q->n;
2802     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2803     int32_t ret;
2804 
2805     /* This happens when device was stopped but BH wasn't. */
2806     if (!vdev->vm_running) {
2807         /* Make sure tx waiting is set, so we'll run when restarted. */
2808         assert(q->tx_waiting);
2809         return;
2810     }
2811 
2812     q->tx_waiting = 0;
2813 
2814     /* Just in case the driver is not ready on more */
2815     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2816         return;
2817     }
2818 
2819     ret = virtio_net_flush_tx(q);
2820     if (ret == -EBUSY || ret == -EINVAL) {
2821         return; /* Notification re-enable handled by tx_complete or device
2822                  * broken */
2823     }
2824 
2825     /* If we flush a full burst of packets, assume there are
2826      * more coming and immediately reschedule */
2827     if (ret >= n->tx_burst) {
2828         qemu_bh_schedule(q->tx_bh);
2829         q->tx_waiting = 1;
2830         return;
2831     }
2832 
2833     /* If less than a full burst, re-enable notification and flush
2834      * anything that may have come in while we weren't looking.  If
2835      * we find something, assume the guest is still active and reschedule */
2836     virtio_queue_set_notification(q->tx_vq, 1);
2837     ret = virtio_net_flush_tx(q);
2838     if (ret == -EINVAL) {
2839         return;
2840     } else if (ret > 0) {
2841         virtio_queue_set_notification(q->tx_vq, 0);
2842         qemu_bh_schedule(q->tx_bh);
2843         q->tx_waiting = 1;
2844     }
2845 }
2846 
2847 static void virtio_net_add_queue(VirtIONet *n, int index)
2848 {
2849     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2850 
2851     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2852                                            virtio_net_handle_rx);
2853 
2854     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2855         n->vqs[index].tx_vq =
2856             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2857                              virtio_net_handle_tx_timer);
2858         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2859                                               virtio_net_tx_timer,
2860                                               &n->vqs[index]);
2861     } else {
2862         n->vqs[index].tx_vq =
2863             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2864                              virtio_net_handle_tx_bh);
2865         n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2866     }
2867 
2868     n->vqs[index].tx_waiting = 0;
2869     n->vqs[index].n = n;
2870 }
2871 
2872 static void virtio_net_del_queue(VirtIONet *n, int index)
2873 {
2874     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2875     VirtIONetQueue *q = &n->vqs[index];
2876     NetClientState *nc = qemu_get_subqueue(n->nic, index);
2877 
2878     qemu_purge_queued_packets(nc);
2879 
2880     virtio_del_queue(vdev, index * 2);
2881     if (q->tx_timer) {
2882         timer_free(q->tx_timer);
2883         q->tx_timer = NULL;
2884     } else {
2885         qemu_bh_delete(q->tx_bh);
2886         q->tx_bh = NULL;
2887     }
2888     q->tx_waiting = 0;
2889     virtio_del_queue(vdev, index * 2 + 1);
2890 }
2891 
2892 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
2893 {
2894     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2895     int old_num_queues = virtio_get_num_queues(vdev);
2896     int new_num_queues = new_max_queue_pairs * 2 + 1;
2897     int i;
2898 
2899     assert(old_num_queues >= 3);
2900     assert(old_num_queues % 2 == 1);
2901 
2902     if (old_num_queues == new_num_queues) {
2903         return;
2904     }
2905 
2906     /*
2907      * We always need to remove and add ctrl vq if
2908      * old_num_queues != new_num_queues. Remove ctrl_vq first,
2909      * and then we only enter one of the following two loops.
2910      */
2911     virtio_del_queue(vdev, old_num_queues - 1);
2912 
2913     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2914         /* new_num_queues < old_num_queues */
2915         virtio_net_del_queue(n, i / 2);
2916     }
2917 
2918     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2919         /* new_num_queues > old_num_queues */
2920         virtio_net_add_queue(n, i / 2);
2921     }
2922 
2923     /* add ctrl_vq last */
2924     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2925 }
2926 
2927 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2928 {
2929     int max = multiqueue ? n->max_queue_pairs : 1;
2930 
2931     n->multiqueue = multiqueue;
2932     virtio_net_change_num_queue_pairs(n, max);
2933 
2934     virtio_net_set_queue_pairs(n);
2935 }
2936 
2937 static int virtio_net_post_load_device(void *opaque, int version_id)
2938 {
2939     VirtIONet *n = opaque;
2940     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2941     int i, link_down;
2942 
2943     trace_virtio_net_post_load_device();
2944     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2945                                virtio_vdev_has_feature(vdev,
2946                                                        VIRTIO_F_VERSION_1),
2947                                virtio_vdev_has_feature(vdev,
2948                                                        VIRTIO_NET_F_HASH_REPORT));
2949 
2950     /* MAC_TABLE_ENTRIES may be different from the saved image */
2951     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2952         n->mac_table.in_use = 0;
2953     }
2954 
2955     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2956         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2957     }
2958 
2959     /*
2960      * curr_guest_offloads will be later overwritten by the
2961      * virtio_set_features_nocheck call done from the virtio_load.
2962      * Here we make sure it is preserved and restored accordingly
2963      * in the virtio_net_post_load_virtio callback.
2964      */
2965     n->saved_guest_offloads = n->curr_guest_offloads;
2966 
2967     virtio_net_set_queue_pairs(n);
2968 
2969     /* Find the first multicast entry in the saved MAC filter */
2970     for (i = 0; i < n->mac_table.in_use; i++) {
2971         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2972             break;
2973         }
2974     }
2975     n->mac_table.first_multi = i;
2976 
2977     /* nc.link_down can't be migrated, so infer link_down according
2978      * to link status bit in n->status */
2979     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2980     for (i = 0; i < n->max_queue_pairs; i++) {
2981         qemu_get_subqueue(n->nic, i)->link_down = link_down;
2982     }
2983 
2984     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2985         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2986         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2987                                   QEMU_CLOCK_VIRTUAL,
2988                                   virtio_net_announce_timer, n);
2989         if (n->announce_timer.round) {
2990             timer_mod(n->announce_timer.tm,
2991                       qemu_clock_get_ms(n->announce_timer.type));
2992         } else {
2993             qemu_announce_timer_del(&n->announce_timer, false);
2994         }
2995     }
2996 
2997     if (n->rss_data.enabled) {
2998         n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
2999         if (!n->rss_data.populate_hash) {
3000             if (!virtio_net_attach_epbf_rss(n)) {
3001                 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
3002                     warn_report("Can't post-load eBPF RSS for vhost");
3003                 } else {
3004                     warn_report("Can't post-load eBPF RSS - "
3005                                 "fallback to software RSS");
3006                     n->rss_data.enabled_software_rss = true;
3007                 }
3008             }
3009         }
3010 
3011         trace_virtio_net_rss_enable(n->rss_data.hash_types,
3012                                     n->rss_data.indirections_len,
3013                                     sizeof(n->rss_data.key));
3014     } else {
3015         trace_virtio_net_rss_disable();
3016     }
3017     return 0;
3018 }
3019 
3020 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
3021 {
3022     VirtIONet *n = VIRTIO_NET(vdev);
3023     /*
3024      * The actual needed state is now in saved_guest_offloads,
3025      * see virtio_net_post_load_device for detail.
3026      * Restore it back and apply the desired offloads.
3027      */
3028     n->curr_guest_offloads = n->saved_guest_offloads;
3029     if (peer_has_vnet_hdr(n)) {
3030         virtio_net_apply_guest_offloads(n);
3031     }
3032 
3033     return 0;
3034 }
3035 
3036 /* tx_waiting field of a VirtIONetQueue */
3037 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
3038     .name = "virtio-net-queue-tx_waiting",
3039     .fields = (VMStateField[]) {
3040         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
3041         VMSTATE_END_OF_LIST()
3042    },
3043 };
3044 
3045 static bool max_queue_pairs_gt_1(void *opaque, int version_id)
3046 {
3047     return VIRTIO_NET(opaque)->max_queue_pairs > 1;
3048 }
3049 
3050 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
3051 {
3052     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
3053                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3054 }
3055 
3056 static bool mac_table_fits(void *opaque, int version_id)
3057 {
3058     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
3059 }
3060 
3061 static bool mac_table_doesnt_fit(void *opaque, int version_id)
3062 {
3063     return !mac_table_fits(opaque, version_id);
3064 }
3065 
3066 /* This temporary type is shared by all the WITH_TMP methods
3067  * although only some fields are used by each.
3068  */
3069 struct VirtIONetMigTmp {
3070     VirtIONet      *parent;
3071     VirtIONetQueue *vqs_1;
3072     uint16_t        curr_queue_pairs_1;
3073     uint8_t         has_ufo;
3074     uint32_t        has_vnet_hdr;
3075 };
3076 
3077 /* The 2nd and subsequent tx_waiting flags are loaded later than
3078  * the 1st entry in the queue_pairs and only if there's more than one
3079  * entry.  We use the tmp mechanism to calculate a temporary
3080  * pointer and count and also validate the count.
3081  */
3082 
3083 static int virtio_net_tx_waiting_pre_save(void *opaque)
3084 {
3085     struct VirtIONetMigTmp *tmp = opaque;
3086 
3087     tmp->vqs_1 = tmp->parent->vqs + 1;
3088     tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3089     if (tmp->parent->curr_queue_pairs == 0) {
3090         tmp->curr_queue_pairs_1 = 0;
3091     }
3092 
3093     return 0;
3094 }
3095 
3096 static int virtio_net_tx_waiting_pre_load(void *opaque)
3097 {
3098     struct VirtIONetMigTmp *tmp = opaque;
3099 
3100     /* Reuse the pointer setup from save */
3101     virtio_net_tx_waiting_pre_save(opaque);
3102 
3103     if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3104         error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3105             tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
3106 
3107         return -EINVAL;
3108     }
3109 
3110     return 0; /* all good */
3111 }
3112 
3113 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3114     .name      = "virtio-net-tx_waiting",
3115     .pre_load  = virtio_net_tx_waiting_pre_load,
3116     .pre_save  = virtio_net_tx_waiting_pre_save,
3117     .fields    = (VMStateField[]) {
3118         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3119                                      curr_queue_pairs_1,
3120                                      vmstate_virtio_net_queue_tx_waiting,
3121                                      struct VirtIONetQueue),
3122         VMSTATE_END_OF_LIST()
3123     },
3124 };
3125 
3126 /* the 'has_ufo' flag is just tested; if the incoming stream has the
3127  * flag set we need to check that we have it
3128  */
3129 static int virtio_net_ufo_post_load(void *opaque, int version_id)
3130 {
3131     struct VirtIONetMigTmp *tmp = opaque;
3132 
3133     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3134         error_report("virtio-net: saved image requires TUN_F_UFO support");
3135         return -EINVAL;
3136     }
3137 
3138     return 0;
3139 }
3140 
3141 static int virtio_net_ufo_pre_save(void *opaque)
3142 {
3143     struct VirtIONetMigTmp *tmp = opaque;
3144 
3145     tmp->has_ufo = tmp->parent->has_ufo;
3146 
3147     return 0;
3148 }
3149 
3150 static const VMStateDescription vmstate_virtio_net_has_ufo = {
3151     .name      = "virtio-net-ufo",
3152     .post_load = virtio_net_ufo_post_load,
3153     .pre_save  = virtio_net_ufo_pre_save,
3154     .fields    = (VMStateField[]) {
3155         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3156         VMSTATE_END_OF_LIST()
3157     },
3158 };
3159 
3160 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3161  * flag set we need to check that we have it
3162  */
3163 static int virtio_net_vnet_post_load(void *opaque, int version_id)
3164 {
3165     struct VirtIONetMigTmp *tmp = opaque;
3166 
3167     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3168         error_report("virtio-net: saved image requires vnet_hdr=on");
3169         return -EINVAL;
3170     }
3171 
3172     return 0;
3173 }
3174 
3175 static int virtio_net_vnet_pre_save(void *opaque)
3176 {
3177     struct VirtIONetMigTmp *tmp = opaque;
3178 
3179     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3180 
3181     return 0;
3182 }
3183 
3184 static const VMStateDescription vmstate_virtio_net_has_vnet = {
3185     .name      = "virtio-net-vnet",
3186     .post_load = virtio_net_vnet_post_load,
3187     .pre_save  = virtio_net_vnet_pre_save,
3188     .fields    = (VMStateField[]) {
3189         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3190         VMSTATE_END_OF_LIST()
3191     },
3192 };
3193 
3194 static bool virtio_net_rss_needed(void *opaque)
3195 {
3196     return VIRTIO_NET(opaque)->rss_data.enabled;
3197 }
3198 
3199 static const VMStateDescription vmstate_virtio_net_rss = {
3200     .name      = "virtio-net-device/rss",
3201     .version_id = 1,
3202     .minimum_version_id = 1,
3203     .needed = virtio_net_rss_needed,
3204     .fields = (VMStateField[]) {
3205         VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3206         VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3207         VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3208         VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3209         VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3210         VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3211         VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3212                             VIRTIO_NET_RSS_MAX_KEY_SIZE),
3213         VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3214                                     rss_data.indirections_len, 0,
3215                                     vmstate_info_uint16, uint16_t),
3216         VMSTATE_END_OF_LIST()
3217     },
3218 };
3219 
3220 static const VMStateDescription vmstate_virtio_net_device = {
3221     .name = "virtio-net-device",
3222     .version_id = VIRTIO_NET_VM_VERSION,
3223     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3224     .post_load = virtio_net_post_load_device,
3225     .fields = (VMStateField[]) {
3226         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3227         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3228                                vmstate_virtio_net_queue_tx_waiting,
3229                                VirtIONetQueue),
3230         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3231         VMSTATE_UINT16(status, VirtIONet),
3232         VMSTATE_UINT8(promisc, VirtIONet),
3233         VMSTATE_UINT8(allmulti, VirtIONet),
3234         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3235 
3236         /* Guarded pair: If it fits we load it, else we throw it away
3237          * - can happen if source has a larger MAC table.; post-load
3238          *  sets flags in this case.
3239          */
3240         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3241                                 0, mac_table_fits, mac_table.in_use,
3242                                  ETH_ALEN),
3243         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3244                                      mac_table.in_use, ETH_ALEN),
3245 
3246         /* Note: This is an array of uint32's that's always been saved as a
3247          * buffer; hold onto your endiannesses; it's actually used as a bitmap
3248          * but based on the uint.
3249          */
3250         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3251         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3252                          vmstate_virtio_net_has_vnet),
3253         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3254         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3255         VMSTATE_UINT8(alluni, VirtIONet),
3256         VMSTATE_UINT8(nomulti, VirtIONet),
3257         VMSTATE_UINT8(nouni, VirtIONet),
3258         VMSTATE_UINT8(nobcast, VirtIONet),
3259         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3260                          vmstate_virtio_net_has_ufo),
3261         VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3262                             vmstate_info_uint16_equal, uint16_t),
3263         VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3264         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3265                          vmstate_virtio_net_tx_waiting),
3266         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3267                             has_ctrl_guest_offloads),
3268         VMSTATE_END_OF_LIST()
3269    },
3270     .subsections = (const VMStateDescription * []) {
3271         &vmstate_virtio_net_rss,
3272         NULL
3273     }
3274 };
3275 
3276 static NetClientInfo net_virtio_info = {
3277     .type = NET_CLIENT_DRIVER_NIC,
3278     .size = sizeof(NICState),
3279     .can_receive = virtio_net_can_receive,
3280     .receive = virtio_net_receive,
3281     .link_status_changed = virtio_net_set_link_status,
3282     .query_rx_filter = virtio_net_query_rxfilter,
3283     .announce = virtio_net_announce,
3284 };
3285 
3286 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3287 {
3288     VirtIONet *n = VIRTIO_NET(vdev);
3289     NetClientState *nc;
3290     assert(n->vhost_started);
3291     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3292         /* Must guard against invalid features and bogus queue index
3293          * from being set by malicious guest, or penetrated through
3294          * buggy migration stream.
3295          */
3296         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3297             qemu_log_mask(LOG_GUEST_ERROR,
3298                           "%s: bogus vq index ignored\n", __func__);
3299             return false;
3300         }
3301         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3302     } else {
3303         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3304     }
3305     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3306 }
3307 
3308 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3309                                            bool mask)
3310 {
3311     VirtIONet *n = VIRTIO_NET(vdev);
3312     NetClientState *nc;
3313     assert(n->vhost_started);
3314     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3315         /* Must guard against invalid features and bogus queue index
3316          * from being set by malicious guest, or penetrated through
3317          * buggy migration stream.
3318          */
3319         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3320             qemu_log_mask(LOG_GUEST_ERROR,
3321                           "%s: bogus vq index ignored\n", __func__);
3322             return;
3323         }
3324         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3325     } else {
3326         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3327     }
3328     vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3329                              vdev, idx, mask);
3330 }
3331 
3332 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3333 {
3334     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3335 
3336     n->config_size = virtio_get_config_size(&cfg_size_params, host_features);
3337 }
3338 
3339 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3340                                    const char *type)
3341 {
3342     /*
3343      * The name can be NULL, the netclient name will be type.x.
3344      */
3345     assert(type != NULL);
3346 
3347     g_free(n->netclient_name);
3348     g_free(n->netclient_type);
3349     n->netclient_name = g_strdup(name);
3350     n->netclient_type = g_strdup(type);
3351 }
3352 
3353 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3354 {
3355     HotplugHandler *hotplug_ctrl;
3356     PCIDevice *pci_dev;
3357     Error *err = NULL;
3358 
3359     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3360     if (hotplug_ctrl) {
3361         pci_dev = PCI_DEVICE(dev);
3362         pci_dev->partially_hotplugged = true;
3363         hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3364         if (err) {
3365             error_report_err(err);
3366             return false;
3367         }
3368     } else {
3369         return false;
3370     }
3371     return true;
3372 }
3373 
3374 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3375                                     Error **errp)
3376 {
3377     Error *err = NULL;
3378     HotplugHandler *hotplug_ctrl;
3379     PCIDevice *pdev = PCI_DEVICE(dev);
3380     BusState *primary_bus;
3381 
3382     if (!pdev->partially_hotplugged) {
3383         return true;
3384     }
3385     primary_bus = dev->parent_bus;
3386     if (!primary_bus) {
3387         error_setg(errp, "virtio_net: couldn't find primary bus");
3388         return false;
3389     }
3390     qdev_set_parent_bus(dev, primary_bus, &error_abort);
3391     qatomic_set(&n->failover_primary_hidden, false);
3392     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3393     if (hotplug_ctrl) {
3394         hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3395         if (err) {
3396             goto out;
3397         }
3398         hotplug_handler_plug(hotplug_ctrl, dev, &err);
3399     }
3400     pdev->partially_hotplugged = false;
3401 
3402 out:
3403     error_propagate(errp, err);
3404     return !err;
3405 }
3406 
3407 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3408 {
3409     bool should_be_hidden;
3410     Error *err = NULL;
3411     DeviceState *dev = failover_find_primary_device(n);
3412 
3413     if (!dev) {
3414         return;
3415     }
3416 
3417     should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3418 
3419     if (migration_in_setup(s) && !should_be_hidden) {
3420         if (failover_unplug_primary(n, dev)) {
3421             vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3422             qapi_event_send_unplug_primary(dev->id);
3423             qatomic_set(&n->failover_primary_hidden, true);
3424         } else {
3425             warn_report("couldn't unplug primary device");
3426         }
3427     } else if (migration_has_failed(s)) {
3428         /* We already unplugged the device let's plug it back */
3429         if (!failover_replug_primary(n, dev, &err)) {
3430             if (err) {
3431                 error_report_err(err);
3432             }
3433         }
3434     }
3435 }
3436 
3437 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3438 {
3439     MigrationState *s = data;
3440     VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3441     virtio_net_handle_migration_primary(n, s);
3442 }
3443 
3444 static bool failover_hide_primary_device(DeviceListener *listener,
3445                                          const QDict *device_opts,
3446                                          bool from_json,
3447                                          Error **errp)
3448 {
3449     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3450     const char *standby_id;
3451 
3452     if (!device_opts) {
3453         return false;
3454     }
3455 
3456     if (!qdict_haskey(device_opts, "failover_pair_id")) {
3457         return false;
3458     }
3459 
3460     if (!qdict_haskey(device_opts, "id")) {
3461         error_setg(errp, "Device with failover_pair_id needs to have id");
3462         return false;
3463     }
3464 
3465     standby_id = qdict_get_str(device_opts, "failover_pair_id");
3466     if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3467         return false;
3468     }
3469 
3470     /*
3471      * The hide helper can be called several times for a given device.
3472      * Check there is only one primary for a virtio-net device but
3473      * don't duplicate the qdict several times if it's called for the same
3474      * device.
3475      */
3476     if (n->primary_opts) {
3477         const char *old, *new;
3478         /* devices with failover_pair_id always have an id */
3479         old = qdict_get_str(n->primary_opts, "id");
3480         new = qdict_get_str(device_opts, "id");
3481         if (strcmp(old, new) != 0) {
3482             error_setg(errp, "Cannot attach more than one primary device to "
3483                        "'%s': '%s' and '%s'", n->netclient_name, old, new);
3484             return false;
3485         }
3486     } else {
3487         n->primary_opts = qdict_clone_shallow(device_opts);
3488         n->primary_opts_from_json = from_json;
3489     }
3490 
3491     /* failover_primary_hidden is set during feature negotiation */
3492     return qatomic_read(&n->failover_primary_hidden);
3493 }
3494 
3495 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3496 {
3497     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3498     VirtIONet *n = VIRTIO_NET(dev);
3499     NetClientState *nc;
3500     int i;
3501 
3502     if (n->net_conf.mtu) {
3503         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3504     }
3505 
3506     if (n->net_conf.duplex_str) {
3507         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3508             n->net_conf.duplex = DUPLEX_HALF;
3509         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3510             n->net_conf.duplex = DUPLEX_FULL;
3511         } else {
3512             error_setg(errp, "'duplex' must be 'half' or 'full'");
3513             return;
3514         }
3515         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3516     } else {
3517         n->net_conf.duplex = DUPLEX_UNKNOWN;
3518     }
3519 
3520     if (n->net_conf.speed < SPEED_UNKNOWN) {
3521         error_setg(errp, "'speed' must be between 0 and INT_MAX");
3522         return;
3523     }
3524     if (n->net_conf.speed >= 0) {
3525         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3526     }
3527 
3528     if (n->failover) {
3529         n->primary_listener.hide_device = failover_hide_primary_device;
3530         qatomic_set(&n->failover_primary_hidden, true);
3531         device_listener_register(&n->primary_listener);
3532         n->migration_state.notify = virtio_net_migration_state_notifier;
3533         add_migration_state_change_notifier(&n->migration_state);
3534         n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3535     }
3536 
3537     virtio_net_set_config_size(n, n->host_features);
3538     virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
3539 
3540     /*
3541      * We set a lower limit on RX queue size to what it always was.
3542      * Guests that want a smaller ring can always resize it without
3543      * help from us (using virtio 1 and up).
3544      */
3545     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3546         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3547         !is_power_of_2(n->net_conf.rx_queue_size)) {
3548         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3549                    "must be a power of 2 between %d and %d.",
3550                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3551                    VIRTQUEUE_MAX_SIZE);
3552         virtio_cleanup(vdev);
3553         return;
3554     }
3555 
3556     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3557         n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3558         !is_power_of_2(n->net_conf.tx_queue_size)) {
3559         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3560                    "must be a power of 2 between %d and %d",
3561                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3562                    VIRTQUEUE_MAX_SIZE);
3563         virtio_cleanup(vdev);
3564         return;
3565     }
3566 
3567     n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3568 
3569     /*
3570      * Figure out the datapath queue pairs since the backend could
3571      * provide control queue via peers as well.
3572      */
3573     if (n->nic_conf.peers.queues) {
3574         for (i = 0; i < n->max_ncs; i++) {
3575             if (n->nic_conf.peers.ncs[i]->is_datapath) {
3576                 ++n->max_queue_pairs;
3577             }
3578         }
3579     }
3580     n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3581 
3582     if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3583         error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3584                    "must be a positive integer less than %d.",
3585                    n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3586         virtio_cleanup(vdev);
3587         return;
3588     }
3589     n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3590     n->curr_queue_pairs = 1;
3591     n->tx_timeout = n->net_conf.txtimer;
3592 
3593     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3594                        && strcmp(n->net_conf.tx, "bh")) {
3595         warn_report("virtio-net: "
3596                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3597                     n->net_conf.tx);
3598         error_printf("Defaulting to \"bh\"");
3599     }
3600 
3601     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3602                                     n->net_conf.tx_queue_size);
3603 
3604     for (i = 0; i < n->max_queue_pairs; i++) {
3605         virtio_net_add_queue(n, i);
3606     }
3607 
3608     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3609     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3610     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3611     n->status = VIRTIO_NET_S_LINK_UP;
3612     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3613                               QEMU_CLOCK_VIRTUAL,
3614                               virtio_net_announce_timer, n);
3615     n->announce_timer.round = 0;
3616 
3617     if (n->netclient_type) {
3618         /*
3619          * Happen when virtio_net_set_netclient_name has been called.
3620          */
3621         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3622                               n->netclient_type, n->netclient_name, n);
3623     } else {
3624         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3625                               object_get_typename(OBJECT(dev)), dev->id, n);
3626     }
3627 
3628     for (i = 0; i < n->max_queue_pairs; i++) {
3629         n->nic->ncs[i].do_not_pad = true;
3630     }
3631 
3632     peer_test_vnet_hdr(n);
3633     if (peer_has_vnet_hdr(n)) {
3634         for (i = 0; i < n->max_queue_pairs; i++) {
3635             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3636         }
3637         n->host_hdr_len = sizeof(struct virtio_net_hdr);
3638     } else {
3639         n->host_hdr_len = 0;
3640     }
3641 
3642     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3643 
3644     n->vqs[0].tx_waiting = 0;
3645     n->tx_burst = n->net_conf.txburst;
3646     virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3647     n->promisc = 1; /* for compatibility */
3648 
3649     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3650 
3651     n->vlans = g_malloc0(MAX_VLAN >> 3);
3652 
3653     nc = qemu_get_queue(n->nic);
3654     nc->rxfilter_notify_enabled = 1;
3655 
3656    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3657         struct virtio_net_config netcfg = {};
3658         memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3659         vhost_net_set_config(get_vhost_net(nc->peer),
3660             (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
3661     }
3662     QTAILQ_INIT(&n->rsc_chains);
3663     n->qdev = dev;
3664 
3665     net_rx_pkt_init(&n->rx_pkt, false);
3666 
3667     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3668         virtio_net_load_ebpf(n);
3669     }
3670 }
3671 
3672 static void virtio_net_device_unrealize(DeviceState *dev)
3673 {
3674     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3675     VirtIONet *n = VIRTIO_NET(dev);
3676     int i, max_queue_pairs;
3677 
3678     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3679         virtio_net_unload_ebpf(n);
3680     }
3681 
3682     /* This will stop vhost backend if appropriate. */
3683     virtio_net_set_status(vdev, 0);
3684 
3685     g_free(n->netclient_name);
3686     n->netclient_name = NULL;
3687     g_free(n->netclient_type);
3688     n->netclient_type = NULL;
3689 
3690     g_free(n->mac_table.macs);
3691     g_free(n->vlans);
3692 
3693     if (n->failover) {
3694         qobject_unref(n->primary_opts);
3695         device_listener_unregister(&n->primary_listener);
3696         remove_migration_state_change_notifier(&n->migration_state);
3697     } else {
3698         assert(n->primary_opts == NULL);
3699     }
3700 
3701     max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3702     for (i = 0; i < max_queue_pairs; i++) {
3703         virtio_net_del_queue(n, i);
3704     }
3705     /* delete also control vq */
3706     virtio_del_queue(vdev, max_queue_pairs * 2);
3707     qemu_announce_timer_del(&n->announce_timer, false);
3708     g_free(n->vqs);
3709     qemu_del_nic(n->nic);
3710     virtio_net_rsc_cleanup(n);
3711     g_free(n->rss_data.indirections_table);
3712     net_rx_pkt_uninit(n->rx_pkt);
3713     virtio_cleanup(vdev);
3714 }
3715 
3716 static void virtio_net_instance_init(Object *obj)
3717 {
3718     VirtIONet *n = VIRTIO_NET(obj);
3719 
3720     /*
3721      * The default config_size is sizeof(struct virtio_net_config).
3722      * Can be overriden with virtio_net_set_config_size.
3723      */
3724     n->config_size = sizeof(struct virtio_net_config);
3725     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3726                                   "bootindex", "/ethernet-phy@0",
3727                                   DEVICE(n));
3728 
3729     ebpf_rss_init(&n->ebpf_rss);
3730 }
3731 
3732 static int virtio_net_pre_save(void *opaque)
3733 {
3734     VirtIONet *n = opaque;
3735 
3736     /* At this point, backend must be stopped, otherwise
3737      * it might keep writing to memory. */
3738     assert(!n->vhost_started);
3739 
3740     return 0;
3741 }
3742 
3743 static bool primary_unplug_pending(void *opaque)
3744 {
3745     DeviceState *dev = opaque;
3746     DeviceState *primary;
3747     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3748     VirtIONet *n = VIRTIO_NET(vdev);
3749 
3750     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3751         return false;
3752     }
3753     primary = failover_find_primary_device(n);
3754     return primary ? primary->pending_deleted_event : false;
3755 }
3756 
3757 static bool dev_unplug_pending(void *opaque)
3758 {
3759     DeviceState *dev = opaque;
3760     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3761 
3762     return vdc->primary_unplug_pending(dev);
3763 }
3764 
3765 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3766 {
3767     VirtIONet *n = VIRTIO_NET(vdev);
3768     NetClientState *nc = qemu_get_queue(n->nic);
3769     struct vhost_net *net = get_vhost_net(nc->peer);
3770     return &net->dev;
3771 }
3772 
3773 static const VMStateDescription vmstate_virtio_net = {
3774     .name = "virtio-net",
3775     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3776     .version_id = VIRTIO_NET_VM_VERSION,
3777     .fields = (VMStateField[]) {
3778         VMSTATE_VIRTIO_DEVICE,
3779         VMSTATE_END_OF_LIST()
3780     },
3781     .pre_save = virtio_net_pre_save,
3782     .dev_unplug_pending = dev_unplug_pending,
3783 };
3784 
3785 static Property virtio_net_properties[] = {
3786     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3787                     VIRTIO_NET_F_CSUM, true),
3788     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3789                     VIRTIO_NET_F_GUEST_CSUM, true),
3790     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3791     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3792                     VIRTIO_NET_F_GUEST_TSO4, true),
3793     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3794                     VIRTIO_NET_F_GUEST_TSO6, true),
3795     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3796                     VIRTIO_NET_F_GUEST_ECN, true),
3797     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3798                     VIRTIO_NET_F_GUEST_UFO, true),
3799     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3800                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3801     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3802                     VIRTIO_NET_F_HOST_TSO4, true),
3803     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3804                     VIRTIO_NET_F_HOST_TSO6, true),
3805     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3806                     VIRTIO_NET_F_HOST_ECN, true),
3807     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3808                     VIRTIO_NET_F_HOST_UFO, true),
3809     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3810                     VIRTIO_NET_F_MRG_RXBUF, true),
3811     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3812                     VIRTIO_NET_F_STATUS, true),
3813     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3814                     VIRTIO_NET_F_CTRL_VQ, true),
3815     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3816                     VIRTIO_NET_F_CTRL_RX, true),
3817     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3818                     VIRTIO_NET_F_CTRL_VLAN, true),
3819     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3820                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3821     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3822                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3823     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3824                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3825     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3826     DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3827                     VIRTIO_NET_F_RSS, false),
3828     DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3829                     VIRTIO_NET_F_HASH_REPORT, false),
3830     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3831                     VIRTIO_NET_F_RSC_EXT, false),
3832     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3833                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3834     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3835     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3836                        TX_TIMER_INTERVAL),
3837     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3838     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3839     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3840                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3841     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3842                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3843     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3844     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3845                      true),
3846     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3847     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3848     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3849     DEFINE_PROP_END_OF_LIST(),
3850 };
3851 
3852 static void virtio_net_class_init(ObjectClass *klass, void *data)
3853 {
3854     DeviceClass *dc = DEVICE_CLASS(klass);
3855     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3856 
3857     device_class_set_props(dc, virtio_net_properties);
3858     dc->vmsd = &vmstate_virtio_net;
3859     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3860     vdc->realize = virtio_net_device_realize;
3861     vdc->unrealize = virtio_net_device_unrealize;
3862     vdc->get_config = virtio_net_get_config;
3863     vdc->set_config = virtio_net_set_config;
3864     vdc->get_features = virtio_net_get_features;
3865     vdc->set_features = virtio_net_set_features;
3866     vdc->bad_features = virtio_net_bad_features;
3867     vdc->reset = virtio_net_reset;
3868     vdc->queue_reset = virtio_net_queue_reset;
3869     vdc->queue_enable = virtio_net_queue_enable;
3870     vdc->set_status = virtio_net_set_status;
3871     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3872     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3873     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3874     vdc->post_load = virtio_net_post_load_virtio;
3875     vdc->vmsd = &vmstate_virtio_net_device;
3876     vdc->primary_unplug_pending = primary_unplug_pending;
3877     vdc->get_vhost = virtio_net_get_vhost;
3878 }
3879 
3880 static const TypeInfo virtio_net_info = {
3881     .name = TYPE_VIRTIO_NET,
3882     .parent = TYPE_VIRTIO_DEVICE,
3883     .instance_size = sizeof(VirtIONet),
3884     .instance_init = virtio_net_instance_init,
3885     .class_init = virtio_net_class_init,
3886 };
3887 
3888 static void virtio_register_types(void)
3889 {
3890     type_register_static(&virtio_net_info);
3891 }
3892 
3893 type_init(virtio_register_types)
3894