xref: /openbmc/qemu/hw/net/virtio-net.c (revision 587d82fa)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/log.h"
18 #include "qemu/main-loop.h"
19 #include "qemu/module.h"
20 #include "hw/virtio/virtio.h"
21 #include "net/net.h"
22 #include "net/checksum.h"
23 #include "net/tap.h"
24 #include "qemu/error-report.h"
25 #include "qemu/timer.h"
26 #include "qemu/option.h"
27 #include "qemu/option_int.h"
28 #include "qemu/config-file.h"
29 #include "qapi/qmp/qdict.h"
30 #include "hw/virtio/virtio-net.h"
31 #include "net/vhost_net.h"
32 #include "net/announce.h"
33 #include "hw/virtio/virtio-bus.h"
34 #include "qapi/error.h"
35 #include "qapi/qapi-events-net.h"
36 #include "hw/qdev-properties.h"
37 #include "qapi/qapi-types-migration.h"
38 #include "qapi/qapi-events-migration.h"
39 #include "hw/virtio/virtio-access.h"
40 #include "migration/misc.h"
41 #include "standard-headers/linux/ethtool.h"
42 #include "sysemu/sysemu.h"
43 #include "trace.h"
44 #include "monitor/qdev.h"
45 #include "hw/pci/pci.h"
46 #include "net_rx_pkt.h"
47 #include "hw/virtio/vhost.h"
48 #include "sysemu/qtest.h"
49 
50 #define VIRTIO_NET_VM_VERSION    11
51 
52 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
53 
54 /* previously fixed value */
55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
57 
58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
61 
62 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
63 
64 #define VIRTIO_NET_TCP_FLAG         0x3F
65 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
66 
67 /* IPv4 max payload, 16 bits in the header */
68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
70 
71 /* header length value in ip header without option */
72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
73 
74 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
76 
77 /* Purge coalesced packets timer interval, This value affects the performance
78    a lot, and should be tuned carefully, '300000'(300us) is the recommended
79    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
80    tso/gso/gro 'off'. */
81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
82 
83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
84                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
85                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
86                                          VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
87                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
88                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
89                                          VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
90                                          VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
91                                          VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
92 
93 static const VirtIOFeature feature_sizes[] = {
94     {.flags = 1ULL << VIRTIO_NET_F_MAC,
95      .end = endof(struct virtio_net_config, mac)},
96     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
97      .end = endof(struct virtio_net_config, status)},
98     {.flags = 1ULL << VIRTIO_NET_F_MQ,
99      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
100     {.flags = 1ULL << VIRTIO_NET_F_MTU,
101      .end = endof(struct virtio_net_config, mtu)},
102     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
103      .end = endof(struct virtio_net_config, duplex)},
104     {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
105      .end = endof(struct virtio_net_config, supported_hash_types)},
106     {}
107 };
108 
109 static const VirtIOConfigSizeParams cfg_size_params = {
110     .min_size = endof(struct virtio_net_config, mac),
111     .max_size = sizeof(struct virtio_net_config),
112     .feature_sizes = feature_sizes
113 };
114 
115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
116 {
117     VirtIONet *n = qemu_get_nic_opaque(nc);
118 
119     return &n->vqs[nc->queue_index];
120 }
121 
122 static int vq2q(int queue_index)
123 {
124     return queue_index / 2;
125 }
126 
127 static void flush_or_purge_queued_packets(NetClientState *nc)
128 {
129     if (!nc->peer) {
130         return;
131     }
132 
133     qemu_flush_or_purge_queued_packets(nc->peer, true);
134     assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
135 }
136 
137 /* TODO
138  * - we could suppress RX interrupt if we were so inclined.
139  */
140 
141 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
142 {
143     VirtIONet *n = VIRTIO_NET(vdev);
144     struct virtio_net_config netcfg;
145     NetClientState *nc = qemu_get_queue(n->nic);
146     static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
147 
148     int ret = 0;
149     memset(&netcfg, 0 , sizeof(struct virtio_net_config));
150     virtio_stw_p(vdev, &netcfg.status, n->status);
151     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
152     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
153     memcpy(netcfg.mac, n->mac, ETH_ALEN);
154     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
155     netcfg.duplex = n->net_conf.duplex;
156     netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
157     virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
158                  virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
159                  VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
160     virtio_stl_p(vdev, &netcfg.supported_hash_types,
161                  VIRTIO_NET_RSS_SUPPORTED_HASHES);
162     memcpy(config, &netcfg, n->config_size);
163 
164     /*
165      * Is this VDPA? No peer means not VDPA: there's no way to
166      * disconnect/reconnect a VDPA peer.
167      */
168     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
169         ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
170                                    n->config_size);
171         if (ret != -1) {
172             /*
173              * Some NIC/kernel combinations present 0 as the mac address.  As
174              * that is not a legal address, try to proceed with the
175              * address from the QEMU command line in the hope that the
176              * address has been configured correctly elsewhere - just not
177              * reported by the device.
178              */
179             if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
180                 info_report("Zero hardware mac address detected. Ignoring.");
181                 memcpy(netcfg.mac, n->mac, ETH_ALEN);
182             }
183             memcpy(config, &netcfg, n->config_size);
184         }
185     }
186 }
187 
188 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
189 {
190     VirtIONet *n = VIRTIO_NET(vdev);
191     struct virtio_net_config netcfg = {};
192     NetClientState *nc = qemu_get_queue(n->nic);
193 
194     memcpy(&netcfg, config, n->config_size);
195 
196     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
197         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
198         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
199         memcpy(n->mac, netcfg.mac, ETH_ALEN);
200         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
201     }
202 
203     /*
204      * Is this VDPA? No peer means not VDPA: there's no way to
205      * disconnect/reconnect a VDPA peer.
206      */
207     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
208         vhost_net_set_config(get_vhost_net(nc->peer),
209                              (uint8_t *)&netcfg, 0, n->config_size,
210                              VHOST_SET_CONFIG_TYPE_MASTER);
211       }
212 }
213 
214 static bool virtio_net_started(VirtIONet *n, uint8_t status)
215 {
216     VirtIODevice *vdev = VIRTIO_DEVICE(n);
217     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
218         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
219 }
220 
221 static void virtio_net_announce_notify(VirtIONet *net)
222 {
223     VirtIODevice *vdev = VIRTIO_DEVICE(net);
224     trace_virtio_net_announce_notify();
225 
226     net->status |= VIRTIO_NET_S_ANNOUNCE;
227     virtio_notify_config(vdev);
228 }
229 
230 static void virtio_net_announce_timer(void *opaque)
231 {
232     VirtIONet *n = opaque;
233     trace_virtio_net_announce_timer(n->announce_timer.round);
234 
235     n->announce_timer.round--;
236     virtio_net_announce_notify(n);
237 }
238 
239 static void virtio_net_announce(NetClientState *nc)
240 {
241     VirtIONet *n = qemu_get_nic_opaque(nc);
242     VirtIODevice *vdev = VIRTIO_DEVICE(n);
243 
244     /*
245      * Make sure the virtio migration announcement timer isn't running
246      * If it is, let it trigger announcement so that we do not cause
247      * confusion.
248      */
249     if (n->announce_timer.round) {
250         return;
251     }
252 
253     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
254         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
255             virtio_net_announce_notify(n);
256     }
257 }
258 
259 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
260 {
261     VirtIODevice *vdev = VIRTIO_DEVICE(n);
262     NetClientState *nc = qemu_get_queue(n->nic);
263     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
264     int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
265               n->max_ncs - n->max_queue_pairs : 0;
266 
267     if (!get_vhost_net(nc->peer)) {
268         return;
269     }
270 
271     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
272         !!n->vhost_started) {
273         return;
274     }
275     if (!n->vhost_started) {
276         int r, i;
277 
278         if (n->needs_vnet_hdr_swap) {
279             error_report("backend does not support %s vnet headers; "
280                          "falling back on userspace virtio",
281                          virtio_is_big_endian(vdev) ? "BE" : "LE");
282             return;
283         }
284 
285         /* Any packets outstanding? Purge them to avoid touching rings
286          * when vhost is running.
287          */
288         for (i = 0;  i < queue_pairs; i++) {
289             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
290 
291             /* Purge both directions: TX and RX. */
292             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
293             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
294         }
295 
296         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
297             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
298             if (r < 0) {
299                 error_report("%uBytes MTU not supported by the backend",
300                              n->net_conf.mtu);
301 
302                 return;
303             }
304         }
305 
306         n->vhost_started = 1;
307         r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
308         if (r < 0) {
309             error_report("unable to start vhost net: %d: "
310                          "falling back on userspace virtio", -r);
311             n->vhost_started = 0;
312         }
313     } else {
314         vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
315         n->vhost_started = 0;
316     }
317 }
318 
319 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
320                                           NetClientState *peer,
321                                           bool enable)
322 {
323     if (virtio_is_big_endian(vdev)) {
324         return qemu_set_vnet_be(peer, enable);
325     } else {
326         return qemu_set_vnet_le(peer, enable);
327     }
328 }
329 
330 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
331                                        int queue_pairs, bool enable)
332 {
333     int i;
334 
335     for (i = 0; i < queue_pairs; i++) {
336         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
337             enable) {
338             while (--i >= 0) {
339                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
340             }
341 
342             return true;
343         }
344     }
345 
346     return false;
347 }
348 
349 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
350 {
351     VirtIODevice *vdev = VIRTIO_DEVICE(n);
352     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
353 
354     if (virtio_net_started(n, status)) {
355         /* Before using the device, we tell the network backend about the
356          * endianness to use when parsing vnet headers. If the backend
357          * can't do it, we fallback onto fixing the headers in the core
358          * virtio-net code.
359          */
360         n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
361                                                             queue_pairs, true);
362     } else if (virtio_net_started(n, vdev->status)) {
363         /* After using the device, we need to reset the network backend to
364          * the default (guest native endianness), otherwise the guest may
365          * lose network connectivity if it is rebooted into a different
366          * endianness.
367          */
368         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
369     }
370 }
371 
372 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
373 {
374     unsigned int dropped = virtqueue_drop_all(vq);
375     if (dropped) {
376         virtio_notify(vdev, vq);
377     }
378 }
379 
380 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
381 {
382     VirtIONet *n = VIRTIO_NET(vdev);
383     VirtIONetQueue *q;
384     int i;
385     uint8_t queue_status;
386 
387     virtio_net_vnet_endian_status(n, status);
388     virtio_net_vhost_status(n, status);
389 
390     for (i = 0; i < n->max_queue_pairs; i++) {
391         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
392         bool queue_started;
393         q = &n->vqs[i];
394 
395         if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
396             queue_status = 0;
397         } else {
398             queue_status = status;
399         }
400         queue_started =
401             virtio_net_started(n, queue_status) && !n->vhost_started;
402 
403         if (queue_started) {
404             qemu_flush_queued_packets(ncs);
405         }
406 
407         if (!q->tx_waiting) {
408             continue;
409         }
410 
411         if (queue_started) {
412             if (q->tx_timer) {
413                 timer_mod(q->tx_timer,
414                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
415             } else {
416                 qemu_bh_schedule(q->tx_bh);
417             }
418         } else {
419             if (q->tx_timer) {
420                 timer_del(q->tx_timer);
421             } else {
422                 qemu_bh_cancel(q->tx_bh);
423             }
424             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
425                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
426                 vdev->vm_running) {
427                 /* if tx is waiting we are likely have some packets in tx queue
428                  * and disabled notification */
429                 q->tx_waiting = 0;
430                 virtio_queue_set_notification(q->tx_vq, 1);
431                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
432             }
433         }
434     }
435 }
436 
437 static void virtio_net_set_link_status(NetClientState *nc)
438 {
439     VirtIONet *n = qemu_get_nic_opaque(nc);
440     VirtIODevice *vdev = VIRTIO_DEVICE(n);
441     uint16_t old_status = n->status;
442 
443     if (nc->link_down)
444         n->status &= ~VIRTIO_NET_S_LINK_UP;
445     else
446         n->status |= VIRTIO_NET_S_LINK_UP;
447 
448     if (n->status != old_status)
449         virtio_notify_config(vdev);
450 
451     virtio_net_set_status(vdev, vdev->status);
452 }
453 
454 static void rxfilter_notify(NetClientState *nc)
455 {
456     VirtIONet *n = qemu_get_nic_opaque(nc);
457 
458     if (nc->rxfilter_notify_enabled) {
459         char *path = object_get_canonical_path(OBJECT(n->qdev));
460         qapi_event_send_nic_rx_filter_changed(n->netclient_name, path);
461         g_free(path);
462 
463         /* disable event notification to avoid events flooding */
464         nc->rxfilter_notify_enabled = 0;
465     }
466 }
467 
468 static intList *get_vlan_table(VirtIONet *n)
469 {
470     intList *list;
471     int i, j;
472 
473     list = NULL;
474     for (i = 0; i < MAX_VLAN >> 5; i++) {
475         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
476             if (n->vlans[i] & (1U << j)) {
477                 QAPI_LIST_PREPEND(list, (i << 5) + j);
478             }
479         }
480     }
481 
482     return list;
483 }
484 
485 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
486 {
487     VirtIONet *n = qemu_get_nic_opaque(nc);
488     VirtIODevice *vdev = VIRTIO_DEVICE(n);
489     RxFilterInfo *info;
490     strList *str_list;
491     int i;
492 
493     info = g_malloc0(sizeof(*info));
494     info->name = g_strdup(nc->name);
495     info->promiscuous = n->promisc;
496 
497     if (n->nouni) {
498         info->unicast = RX_STATE_NONE;
499     } else if (n->alluni) {
500         info->unicast = RX_STATE_ALL;
501     } else {
502         info->unicast = RX_STATE_NORMAL;
503     }
504 
505     if (n->nomulti) {
506         info->multicast = RX_STATE_NONE;
507     } else if (n->allmulti) {
508         info->multicast = RX_STATE_ALL;
509     } else {
510         info->multicast = RX_STATE_NORMAL;
511     }
512 
513     info->broadcast_allowed = n->nobcast;
514     info->multicast_overflow = n->mac_table.multi_overflow;
515     info->unicast_overflow = n->mac_table.uni_overflow;
516 
517     info->main_mac = qemu_mac_strdup_printf(n->mac);
518 
519     str_list = NULL;
520     for (i = 0; i < n->mac_table.first_multi; i++) {
521         QAPI_LIST_PREPEND(str_list,
522                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
523     }
524     info->unicast_table = str_list;
525 
526     str_list = NULL;
527     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
528         QAPI_LIST_PREPEND(str_list,
529                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
530     }
531     info->multicast_table = str_list;
532     info->vlan_table = get_vlan_table(n);
533 
534     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
535         info->vlan = RX_STATE_ALL;
536     } else if (!info->vlan_table) {
537         info->vlan = RX_STATE_NONE;
538     } else {
539         info->vlan = RX_STATE_NORMAL;
540     }
541 
542     /* enable event notification after query */
543     nc->rxfilter_notify_enabled = 1;
544 
545     return info;
546 }
547 
548 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
549 {
550     VirtIONet *n = VIRTIO_NET(vdev);
551     NetClientState *nc;
552 
553     /* validate queue_index and skip for cvq */
554     if (queue_index >= n->max_queue_pairs * 2) {
555         return;
556     }
557 
558     nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
559 
560     if (!nc->peer) {
561         return;
562     }
563 
564     if (get_vhost_net(nc->peer) &&
565         nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
566         vhost_net_virtqueue_reset(vdev, nc, queue_index);
567     }
568 
569     flush_or_purge_queued_packets(nc);
570 }
571 
572 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
573 {
574     VirtIONet *n = VIRTIO_NET(vdev);
575     NetClientState *nc;
576     int r;
577 
578     /* validate queue_index and skip for cvq */
579     if (queue_index >= n->max_queue_pairs * 2) {
580         return;
581     }
582 
583     nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
584 
585     if (!nc->peer || !vdev->vhost_started) {
586         return;
587     }
588 
589     if (get_vhost_net(nc->peer) &&
590         nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
591         r = vhost_net_virtqueue_restart(vdev, nc, queue_index);
592         if (r < 0) {
593             error_report("unable to restart vhost net virtqueue: %d, "
594                             "when resetting the queue", queue_index);
595         }
596     }
597 }
598 
599 static void virtio_net_reset(VirtIODevice *vdev)
600 {
601     VirtIONet *n = VIRTIO_NET(vdev);
602     int i;
603 
604     /* Reset back to compatibility mode */
605     n->promisc = 1;
606     n->allmulti = 0;
607     n->alluni = 0;
608     n->nomulti = 0;
609     n->nouni = 0;
610     n->nobcast = 0;
611     /* multiqueue is disabled by default */
612     n->curr_queue_pairs = 1;
613     timer_del(n->announce_timer.tm);
614     n->announce_timer.round = 0;
615     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
616 
617     /* Flush any MAC and VLAN filter table state */
618     n->mac_table.in_use = 0;
619     n->mac_table.first_multi = 0;
620     n->mac_table.multi_overflow = 0;
621     n->mac_table.uni_overflow = 0;
622     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
623     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
624     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
625     memset(n->vlans, 0, MAX_VLAN >> 3);
626 
627     /* Flush any async TX */
628     for (i = 0;  i < n->max_queue_pairs; i++) {
629         flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
630     }
631 }
632 
633 static void peer_test_vnet_hdr(VirtIONet *n)
634 {
635     NetClientState *nc = qemu_get_queue(n->nic);
636     if (!nc->peer) {
637         return;
638     }
639 
640     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
641 }
642 
643 static int peer_has_vnet_hdr(VirtIONet *n)
644 {
645     return n->has_vnet_hdr;
646 }
647 
648 static int peer_has_ufo(VirtIONet *n)
649 {
650     if (!peer_has_vnet_hdr(n))
651         return 0;
652 
653     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
654 
655     return n->has_ufo;
656 }
657 
658 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
659                                        int version_1, int hash_report)
660 {
661     int i;
662     NetClientState *nc;
663 
664     n->mergeable_rx_bufs = mergeable_rx_bufs;
665 
666     if (version_1) {
667         n->guest_hdr_len = hash_report ?
668             sizeof(struct virtio_net_hdr_v1_hash) :
669             sizeof(struct virtio_net_hdr_mrg_rxbuf);
670         n->rss_data.populate_hash = !!hash_report;
671     } else {
672         n->guest_hdr_len = n->mergeable_rx_bufs ?
673             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
674             sizeof(struct virtio_net_hdr);
675     }
676 
677     for (i = 0; i < n->max_queue_pairs; i++) {
678         nc = qemu_get_subqueue(n->nic, i);
679 
680         if (peer_has_vnet_hdr(n) &&
681             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
682             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
683             n->host_hdr_len = n->guest_hdr_len;
684         }
685     }
686 }
687 
688 static int virtio_net_max_tx_queue_size(VirtIONet *n)
689 {
690     NetClientState *peer = n->nic_conf.peers.ncs[0];
691 
692     /*
693      * Backends other than vhost-user or vhost-vdpa don't support max queue
694      * size.
695      */
696     if (!peer) {
697         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
698     }
699 
700     switch(peer->info->type) {
701     case NET_CLIENT_DRIVER_VHOST_USER:
702     case NET_CLIENT_DRIVER_VHOST_VDPA:
703         return VIRTQUEUE_MAX_SIZE;
704     default:
705         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
706     };
707 }
708 
709 static int peer_attach(VirtIONet *n, int index)
710 {
711     NetClientState *nc = qemu_get_subqueue(n->nic, index);
712 
713     if (!nc->peer) {
714         return 0;
715     }
716 
717     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
718         vhost_set_vring_enable(nc->peer, 1);
719     }
720 
721     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
722         return 0;
723     }
724 
725     if (n->max_queue_pairs == 1) {
726         return 0;
727     }
728 
729     return tap_enable(nc->peer);
730 }
731 
732 static int peer_detach(VirtIONet *n, int index)
733 {
734     NetClientState *nc = qemu_get_subqueue(n->nic, index);
735 
736     if (!nc->peer) {
737         return 0;
738     }
739 
740     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
741         vhost_set_vring_enable(nc->peer, 0);
742     }
743 
744     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
745         return 0;
746     }
747 
748     return tap_disable(nc->peer);
749 }
750 
751 static void virtio_net_set_queue_pairs(VirtIONet *n)
752 {
753     int i;
754     int r;
755 
756     if (n->nic->peer_deleted) {
757         return;
758     }
759 
760     for (i = 0; i < n->max_queue_pairs; i++) {
761         if (i < n->curr_queue_pairs) {
762             r = peer_attach(n, i);
763             assert(!r);
764         } else {
765             r = peer_detach(n, i);
766             assert(!r);
767         }
768     }
769 }
770 
771 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
772 
773 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
774                                         Error **errp)
775 {
776     VirtIONet *n = VIRTIO_NET(vdev);
777     NetClientState *nc = qemu_get_queue(n->nic);
778 
779     /* Firstly sync all virtio-net possible supported features */
780     features |= n->host_features;
781 
782     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
783 
784     if (!peer_has_vnet_hdr(n)) {
785         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
786         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
787         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
788         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
789 
790         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
791         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
792         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
793         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
794 
795         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
796     }
797 
798     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
799         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
800         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
801     }
802 
803     if (!get_vhost_net(nc->peer)) {
804         virtio_add_feature(&features, VIRTIO_F_RING_RESET);
805         return features;
806     }
807 
808     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
809         virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
810     }
811     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
812     vdev->backend_features = features;
813 
814     if (n->mtu_bypass_backend &&
815             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
816         features |= (1ULL << VIRTIO_NET_F_MTU);
817     }
818 
819     return features;
820 }
821 
822 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
823 {
824     uint64_t features = 0;
825 
826     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
827      * but also these: */
828     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
829     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
830     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
831     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
832     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
833 
834     return features;
835 }
836 
837 static void virtio_net_apply_guest_offloads(VirtIONet *n)
838 {
839     qemu_set_offload(qemu_get_queue(n->nic)->peer,
840             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
841             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
842             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
843             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
844             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
845 }
846 
847 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
848 {
849     static const uint64_t guest_offloads_mask =
850         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
851         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
852         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
853         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
854         (1ULL << VIRTIO_NET_F_GUEST_UFO);
855 
856     return guest_offloads_mask & features;
857 }
858 
859 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
860 {
861     VirtIODevice *vdev = VIRTIO_DEVICE(n);
862     return virtio_net_guest_offloads_by_features(vdev->guest_features);
863 }
864 
865 typedef struct {
866     VirtIONet *n;
867     DeviceState *dev;
868 } FailoverDevice;
869 
870 /**
871  * Set the failover primary device
872  *
873  * @opaque: FailoverId to setup
874  * @opts: opts for device we are handling
875  * @errp: returns an error if this function fails
876  */
877 static int failover_set_primary(DeviceState *dev, void *opaque)
878 {
879     FailoverDevice *fdev = opaque;
880     PCIDevice *pci_dev = (PCIDevice *)
881         object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
882 
883     if (!pci_dev) {
884         return 0;
885     }
886 
887     if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
888         fdev->dev = dev;
889         return 1;
890     }
891 
892     return 0;
893 }
894 
895 /**
896  * Find the primary device for this failover virtio-net
897  *
898  * @n: VirtIONet device
899  * @errp: returns an error if this function fails
900  */
901 static DeviceState *failover_find_primary_device(VirtIONet *n)
902 {
903     FailoverDevice fdev = {
904         .n = n,
905     };
906 
907     qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
908                        NULL, NULL, &fdev);
909     return fdev.dev;
910 }
911 
912 static void failover_add_primary(VirtIONet *n, Error **errp)
913 {
914     Error *err = NULL;
915     DeviceState *dev = failover_find_primary_device(n);
916 
917     if (dev) {
918         return;
919     }
920 
921     if (!n->primary_opts) {
922         error_setg(errp, "Primary device not found");
923         error_append_hint(errp, "Virtio-net failover will not work. Make "
924                           "sure primary device has parameter"
925                           " failover_pair_id=%s\n", n->netclient_name);
926         return;
927     }
928 
929     dev = qdev_device_add_from_qdict(n->primary_opts,
930                                      n->primary_opts_from_json,
931                                      &err);
932     if (err) {
933         qobject_unref(n->primary_opts);
934         n->primary_opts = NULL;
935     } else {
936         object_unref(OBJECT(dev));
937     }
938     error_propagate(errp, err);
939 }
940 
941 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
942 {
943     VirtIONet *n = VIRTIO_NET(vdev);
944     Error *err = NULL;
945     int i;
946 
947     if (n->mtu_bypass_backend &&
948             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
949         features &= ~(1ULL << VIRTIO_NET_F_MTU);
950     }
951 
952     virtio_net_set_multiqueue(n,
953                               virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
954                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
955 
956     virtio_net_set_mrg_rx_bufs(n,
957                                virtio_has_feature(features,
958                                                   VIRTIO_NET_F_MRG_RXBUF),
959                                virtio_has_feature(features,
960                                                   VIRTIO_F_VERSION_1),
961                                virtio_has_feature(features,
962                                                   VIRTIO_NET_F_HASH_REPORT));
963 
964     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
965         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
966     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
967         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
968     n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
969 
970     if (n->has_vnet_hdr) {
971         n->curr_guest_offloads =
972             virtio_net_guest_offloads_by_features(features);
973         virtio_net_apply_guest_offloads(n);
974     }
975 
976     for (i = 0;  i < n->max_queue_pairs; i++) {
977         NetClientState *nc = qemu_get_subqueue(n->nic, i);
978 
979         if (!get_vhost_net(nc->peer)) {
980             continue;
981         }
982         vhost_net_ack_features(get_vhost_net(nc->peer), features);
983     }
984 
985     if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
986         memset(n->vlans, 0, MAX_VLAN >> 3);
987     } else {
988         memset(n->vlans, 0xff, MAX_VLAN >> 3);
989     }
990 
991     if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
992         qapi_event_send_failover_negotiated(n->netclient_name);
993         qatomic_set(&n->failover_primary_hidden, false);
994         failover_add_primary(n, &err);
995         if (err) {
996             if (!qtest_enabled()) {
997                 warn_report_err(err);
998             } else {
999                 error_free(err);
1000             }
1001         }
1002     }
1003 }
1004 
1005 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
1006                                      struct iovec *iov, unsigned int iov_cnt)
1007 {
1008     uint8_t on;
1009     size_t s;
1010     NetClientState *nc = qemu_get_queue(n->nic);
1011 
1012     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
1013     if (s != sizeof(on)) {
1014         return VIRTIO_NET_ERR;
1015     }
1016 
1017     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
1018         n->promisc = on;
1019     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
1020         n->allmulti = on;
1021     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
1022         n->alluni = on;
1023     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
1024         n->nomulti = on;
1025     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
1026         n->nouni = on;
1027     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
1028         n->nobcast = on;
1029     } else {
1030         return VIRTIO_NET_ERR;
1031     }
1032 
1033     rxfilter_notify(nc);
1034 
1035     return VIRTIO_NET_OK;
1036 }
1037 
1038 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
1039                                      struct iovec *iov, unsigned int iov_cnt)
1040 {
1041     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1042     uint64_t offloads;
1043     size_t s;
1044 
1045     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1046         return VIRTIO_NET_ERR;
1047     }
1048 
1049     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
1050     if (s != sizeof(offloads)) {
1051         return VIRTIO_NET_ERR;
1052     }
1053 
1054     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1055         uint64_t supported_offloads;
1056 
1057         offloads = virtio_ldq_p(vdev, &offloads);
1058 
1059         if (!n->has_vnet_hdr) {
1060             return VIRTIO_NET_ERR;
1061         }
1062 
1063         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1064             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1065         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1066             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1067         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1068 
1069         supported_offloads = virtio_net_supported_guest_offloads(n);
1070         if (offloads & ~supported_offloads) {
1071             return VIRTIO_NET_ERR;
1072         }
1073 
1074         n->curr_guest_offloads = offloads;
1075         virtio_net_apply_guest_offloads(n);
1076 
1077         return VIRTIO_NET_OK;
1078     } else {
1079         return VIRTIO_NET_ERR;
1080     }
1081 }
1082 
1083 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1084                                  struct iovec *iov, unsigned int iov_cnt)
1085 {
1086     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1087     struct virtio_net_ctrl_mac mac_data;
1088     size_t s;
1089     NetClientState *nc = qemu_get_queue(n->nic);
1090 
1091     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1092         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1093             return VIRTIO_NET_ERR;
1094         }
1095         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1096         assert(s == sizeof(n->mac));
1097         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1098         rxfilter_notify(nc);
1099 
1100         return VIRTIO_NET_OK;
1101     }
1102 
1103     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1104         return VIRTIO_NET_ERR;
1105     }
1106 
1107     int in_use = 0;
1108     int first_multi = 0;
1109     uint8_t uni_overflow = 0;
1110     uint8_t multi_overflow = 0;
1111     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1112 
1113     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1114                    sizeof(mac_data.entries));
1115     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1116     if (s != sizeof(mac_data.entries)) {
1117         goto error;
1118     }
1119     iov_discard_front(&iov, &iov_cnt, s);
1120 
1121     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1122         goto error;
1123     }
1124 
1125     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1126         s = iov_to_buf(iov, iov_cnt, 0, macs,
1127                        mac_data.entries * ETH_ALEN);
1128         if (s != mac_data.entries * ETH_ALEN) {
1129             goto error;
1130         }
1131         in_use += mac_data.entries;
1132     } else {
1133         uni_overflow = 1;
1134     }
1135 
1136     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1137 
1138     first_multi = in_use;
1139 
1140     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1141                    sizeof(mac_data.entries));
1142     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1143     if (s != sizeof(mac_data.entries)) {
1144         goto error;
1145     }
1146 
1147     iov_discard_front(&iov, &iov_cnt, s);
1148 
1149     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1150         goto error;
1151     }
1152 
1153     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1154         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1155                        mac_data.entries * ETH_ALEN);
1156         if (s != mac_data.entries * ETH_ALEN) {
1157             goto error;
1158         }
1159         in_use += mac_data.entries;
1160     } else {
1161         multi_overflow = 1;
1162     }
1163 
1164     n->mac_table.in_use = in_use;
1165     n->mac_table.first_multi = first_multi;
1166     n->mac_table.uni_overflow = uni_overflow;
1167     n->mac_table.multi_overflow = multi_overflow;
1168     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1169     g_free(macs);
1170     rxfilter_notify(nc);
1171 
1172     return VIRTIO_NET_OK;
1173 
1174 error:
1175     g_free(macs);
1176     return VIRTIO_NET_ERR;
1177 }
1178 
1179 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1180                                         struct iovec *iov, unsigned int iov_cnt)
1181 {
1182     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1183     uint16_t vid;
1184     size_t s;
1185     NetClientState *nc = qemu_get_queue(n->nic);
1186 
1187     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1188     vid = virtio_lduw_p(vdev, &vid);
1189     if (s != sizeof(vid)) {
1190         return VIRTIO_NET_ERR;
1191     }
1192 
1193     if (vid >= MAX_VLAN)
1194         return VIRTIO_NET_ERR;
1195 
1196     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1197         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1198     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1199         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1200     else
1201         return VIRTIO_NET_ERR;
1202 
1203     rxfilter_notify(nc);
1204 
1205     return VIRTIO_NET_OK;
1206 }
1207 
1208 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1209                                       struct iovec *iov, unsigned int iov_cnt)
1210 {
1211     trace_virtio_net_handle_announce(n->announce_timer.round);
1212     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1213         n->status & VIRTIO_NET_S_ANNOUNCE) {
1214         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1215         if (n->announce_timer.round) {
1216             qemu_announce_timer_step(&n->announce_timer);
1217         }
1218         return VIRTIO_NET_OK;
1219     } else {
1220         return VIRTIO_NET_ERR;
1221     }
1222 }
1223 
1224 static void virtio_net_detach_epbf_rss(VirtIONet *n);
1225 
1226 static void virtio_net_disable_rss(VirtIONet *n)
1227 {
1228     if (n->rss_data.enabled) {
1229         trace_virtio_net_rss_disable();
1230     }
1231     n->rss_data.enabled = false;
1232 
1233     virtio_net_detach_epbf_rss(n);
1234 }
1235 
1236 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1237 {
1238     NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1239     if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1240         return false;
1241     }
1242 
1243     return nc->info->set_steering_ebpf(nc, prog_fd);
1244 }
1245 
1246 static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1247                                    struct EBPFRSSConfig *config)
1248 {
1249     config->redirect = data->redirect;
1250     config->populate_hash = data->populate_hash;
1251     config->hash_types = data->hash_types;
1252     config->indirections_len = data->indirections_len;
1253     config->default_queue = data->default_queue;
1254 }
1255 
1256 static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1257 {
1258     struct EBPFRSSConfig config = {};
1259 
1260     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1261         return false;
1262     }
1263 
1264     rss_data_to_rss_config(&n->rss_data, &config);
1265 
1266     if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1267                           n->rss_data.indirections_table, n->rss_data.key)) {
1268         return false;
1269     }
1270 
1271     if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1272         return false;
1273     }
1274 
1275     return true;
1276 }
1277 
1278 static void virtio_net_detach_epbf_rss(VirtIONet *n)
1279 {
1280     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1281 }
1282 
1283 static bool virtio_net_load_ebpf(VirtIONet *n)
1284 {
1285     if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1286         /* backend does't support steering ebpf */
1287         return false;
1288     }
1289 
1290     return ebpf_rss_load(&n->ebpf_rss);
1291 }
1292 
1293 static void virtio_net_unload_ebpf(VirtIONet *n)
1294 {
1295     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1296     ebpf_rss_unload(&n->ebpf_rss);
1297 }
1298 
1299 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1300                                       struct iovec *iov,
1301                                       unsigned int iov_cnt,
1302                                       bool do_rss)
1303 {
1304     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1305     struct virtio_net_rss_config cfg;
1306     size_t s, offset = 0, size_get;
1307     uint16_t queue_pairs, i;
1308     struct {
1309         uint16_t us;
1310         uint8_t b;
1311     } QEMU_PACKED temp;
1312     const char *err_msg = "";
1313     uint32_t err_value = 0;
1314 
1315     if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1316         err_msg = "RSS is not negotiated";
1317         goto error;
1318     }
1319     if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1320         err_msg = "Hash report is not negotiated";
1321         goto error;
1322     }
1323     size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1324     s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1325     if (s != size_get) {
1326         err_msg = "Short command buffer";
1327         err_value = (uint32_t)s;
1328         goto error;
1329     }
1330     n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1331     n->rss_data.indirections_len =
1332         virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1333     n->rss_data.indirections_len++;
1334     if (!do_rss) {
1335         n->rss_data.indirections_len = 1;
1336     }
1337     if (!is_power_of_2(n->rss_data.indirections_len)) {
1338         err_msg = "Invalid size of indirection table";
1339         err_value = n->rss_data.indirections_len;
1340         goto error;
1341     }
1342     if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1343         err_msg = "Too large indirection table";
1344         err_value = n->rss_data.indirections_len;
1345         goto error;
1346     }
1347     n->rss_data.default_queue = do_rss ?
1348         virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1349     if (n->rss_data.default_queue >= n->max_queue_pairs) {
1350         err_msg = "Invalid default queue";
1351         err_value = n->rss_data.default_queue;
1352         goto error;
1353     }
1354     offset += size_get;
1355     size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1356     g_free(n->rss_data.indirections_table);
1357     n->rss_data.indirections_table = g_malloc(size_get);
1358     if (!n->rss_data.indirections_table) {
1359         err_msg = "Can't allocate indirections table";
1360         err_value = n->rss_data.indirections_len;
1361         goto error;
1362     }
1363     s = iov_to_buf(iov, iov_cnt, offset,
1364                    n->rss_data.indirections_table, size_get);
1365     if (s != size_get) {
1366         err_msg = "Short indirection table buffer";
1367         err_value = (uint32_t)s;
1368         goto error;
1369     }
1370     for (i = 0; i < n->rss_data.indirections_len; ++i) {
1371         uint16_t val = n->rss_data.indirections_table[i];
1372         n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1373     }
1374     offset += size_get;
1375     size_get = sizeof(temp);
1376     s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1377     if (s != size_get) {
1378         err_msg = "Can't get queue_pairs";
1379         err_value = (uint32_t)s;
1380         goto error;
1381     }
1382     queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1383     if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1384         err_msg = "Invalid number of queue_pairs";
1385         err_value = queue_pairs;
1386         goto error;
1387     }
1388     if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1389         err_msg = "Invalid key size";
1390         err_value = temp.b;
1391         goto error;
1392     }
1393     if (!temp.b && n->rss_data.hash_types) {
1394         err_msg = "No key provided";
1395         err_value = 0;
1396         goto error;
1397     }
1398     if (!temp.b && !n->rss_data.hash_types) {
1399         virtio_net_disable_rss(n);
1400         return queue_pairs;
1401     }
1402     offset += size_get;
1403     size_get = temp.b;
1404     s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1405     if (s != size_get) {
1406         err_msg = "Can get key buffer";
1407         err_value = (uint32_t)s;
1408         goto error;
1409     }
1410     n->rss_data.enabled = true;
1411 
1412     if (!n->rss_data.populate_hash) {
1413         if (!virtio_net_attach_epbf_rss(n)) {
1414             /* EBPF must be loaded for vhost */
1415             if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1416                 warn_report("Can't load eBPF RSS for vhost");
1417                 goto error;
1418             }
1419             /* fallback to software RSS */
1420             warn_report("Can't load eBPF RSS - fallback to software RSS");
1421             n->rss_data.enabled_software_rss = true;
1422         }
1423     } else {
1424         /* use software RSS for hash populating */
1425         /* and detach eBPF if was loaded before */
1426         virtio_net_detach_epbf_rss(n);
1427         n->rss_data.enabled_software_rss = true;
1428     }
1429 
1430     trace_virtio_net_rss_enable(n->rss_data.hash_types,
1431                                 n->rss_data.indirections_len,
1432                                 temp.b);
1433     return queue_pairs;
1434 error:
1435     trace_virtio_net_rss_error(err_msg, err_value);
1436     virtio_net_disable_rss(n);
1437     return 0;
1438 }
1439 
1440 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1441                                 struct iovec *iov, unsigned int iov_cnt)
1442 {
1443     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1444     uint16_t queue_pairs;
1445     NetClientState *nc = qemu_get_queue(n->nic);
1446 
1447     virtio_net_disable_rss(n);
1448     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1449         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1450         return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1451     }
1452     if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1453         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1454     } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1455         struct virtio_net_ctrl_mq mq;
1456         size_t s;
1457         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1458             return VIRTIO_NET_ERR;
1459         }
1460         s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1461         if (s != sizeof(mq)) {
1462             return VIRTIO_NET_ERR;
1463         }
1464         queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1465 
1466     } else {
1467         return VIRTIO_NET_ERR;
1468     }
1469 
1470     if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1471         queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1472         queue_pairs > n->max_queue_pairs ||
1473         !n->multiqueue) {
1474         return VIRTIO_NET_ERR;
1475     }
1476 
1477     n->curr_queue_pairs = queue_pairs;
1478     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
1479         /*
1480          * Avoid updating the backend for a vdpa device: We're only interested
1481          * in updating the device model queues.
1482          */
1483         return VIRTIO_NET_OK;
1484     }
1485     /* stop the backend before changing the number of queue_pairs to avoid handling a
1486      * disabled queue */
1487     virtio_net_set_status(vdev, vdev->status);
1488     virtio_net_set_queue_pairs(n);
1489 
1490     return VIRTIO_NET_OK;
1491 }
1492 
1493 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1494                                   const struct iovec *in_sg, unsigned in_num,
1495                                   const struct iovec *out_sg,
1496                                   unsigned out_num)
1497 {
1498     VirtIONet *n = VIRTIO_NET(vdev);
1499     struct virtio_net_ctrl_hdr ctrl;
1500     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1501     size_t s;
1502     struct iovec *iov, *iov2;
1503 
1504     if (iov_size(in_sg, in_num) < sizeof(status) ||
1505         iov_size(out_sg, out_num) < sizeof(ctrl)) {
1506         virtio_error(vdev, "virtio-net ctrl missing headers");
1507         return 0;
1508     }
1509 
1510     iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1511     s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1512     iov_discard_front(&iov, &out_num, sizeof(ctrl));
1513     if (s != sizeof(ctrl)) {
1514         status = VIRTIO_NET_ERR;
1515     } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1516         status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1517     } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1518         status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1519     } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1520         status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1521     } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1522         status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1523     } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1524         status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1525     } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1526         status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1527     }
1528 
1529     s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1530     assert(s == sizeof(status));
1531 
1532     g_free(iov2);
1533     return sizeof(status);
1534 }
1535 
1536 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1537 {
1538     VirtQueueElement *elem;
1539 
1540     for (;;) {
1541         size_t written;
1542         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1543         if (!elem) {
1544             break;
1545         }
1546 
1547         written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1548                                              elem->out_sg, elem->out_num);
1549         if (written > 0) {
1550             virtqueue_push(vq, elem, written);
1551             virtio_notify(vdev, vq);
1552             g_free(elem);
1553         } else {
1554             virtqueue_detach_element(vq, elem, 0);
1555             g_free(elem);
1556             break;
1557         }
1558     }
1559 }
1560 
1561 /* RX */
1562 
1563 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1564 {
1565     VirtIONet *n = VIRTIO_NET(vdev);
1566     int queue_index = vq2q(virtio_get_queue_index(vq));
1567 
1568     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1569 }
1570 
1571 static bool virtio_net_can_receive(NetClientState *nc)
1572 {
1573     VirtIONet *n = qemu_get_nic_opaque(nc);
1574     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1575     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1576 
1577     if (!vdev->vm_running) {
1578         return false;
1579     }
1580 
1581     if (nc->queue_index >= n->curr_queue_pairs) {
1582         return false;
1583     }
1584 
1585     if (!virtio_queue_ready(q->rx_vq) ||
1586         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1587         return false;
1588     }
1589 
1590     return true;
1591 }
1592 
1593 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1594 {
1595     VirtIONet *n = q->n;
1596     if (virtio_queue_empty(q->rx_vq) ||
1597         (n->mergeable_rx_bufs &&
1598          !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1599         virtio_queue_set_notification(q->rx_vq, 1);
1600 
1601         /* To avoid a race condition where the guest has made some buffers
1602          * available after the above check but before notification was
1603          * enabled, check for available buffers again.
1604          */
1605         if (virtio_queue_empty(q->rx_vq) ||
1606             (n->mergeable_rx_bufs &&
1607              !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1608             return 0;
1609         }
1610     }
1611 
1612     virtio_queue_set_notification(q->rx_vq, 0);
1613     return 1;
1614 }
1615 
1616 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1617 {
1618     virtio_tswap16s(vdev, &hdr->hdr_len);
1619     virtio_tswap16s(vdev, &hdr->gso_size);
1620     virtio_tswap16s(vdev, &hdr->csum_start);
1621     virtio_tswap16s(vdev, &hdr->csum_offset);
1622 }
1623 
1624 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1625  * it never finds out that the packets don't have valid checksums.  This
1626  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1627  * fix this with Xen but it hasn't appeared in an upstream release of
1628  * dhclient yet.
1629  *
1630  * To avoid breaking existing guests, we catch udp packets and add
1631  * checksums.  This is terrible but it's better than hacking the guest
1632  * kernels.
1633  *
1634  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1635  * we should provide a mechanism to disable it to avoid polluting the host
1636  * cache.
1637  */
1638 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1639                                         uint8_t *buf, size_t size)
1640 {
1641     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1642         (size > 27 && size < 1500) && /* normal sized MTU */
1643         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1644         (buf[23] == 17) && /* ip.protocol == UDP */
1645         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1646         net_checksum_calculate(buf, size, CSUM_UDP);
1647         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1648     }
1649 }
1650 
1651 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1652                            const void *buf, size_t size)
1653 {
1654     if (n->has_vnet_hdr) {
1655         /* FIXME this cast is evil */
1656         void *wbuf = (void *)buf;
1657         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1658                                     size - n->host_hdr_len);
1659 
1660         if (n->needs_vnet_hdr_swap) {
1661             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1662         }
1663         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1664     } else {
1665         struct virtio_net_hdr hdr = {
1666             .flags = 0,
1667             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1668         };
1669         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1670     }
1671 }
1672 
1673 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1674 {
1675     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1676     static const uint8_t vlan[] = {0x81, 0x00};
1677     uint8_t *ptr = (uint8_t *)buf;
1678     int i;
1679 
1680     if (n->promisc)
1681         return 1;
1682 
1683     ptr += n->host_hdr_len;
1684 
1685     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1686         int vid = lduw_be_p(ptr + 14) & 0xfff;
1687         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1688             return 0;
1689     }
1690 
1691     if (ptr[0] & 1) { // multicast
1692         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1693             return !n->nobcast;
1694         } else if (n->nomulti) {
1695             return 0;
1696         } else if (n->allmulti || n->mac_table.multi_overflow) {
1697             return 1;
1698         }
1699 
1700         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1701             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1702                 return 1;
1703             }
1704         }
1705     } else { // unicast
1706         if (n->nouni) {
1707             return 0;
1708         } else if (n->alluni || n->mac_table.uni_overflow) {
1709             return 1;
1710         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1711             return 1;
1712         }
1713 
1714         for (i = 0; i < n->mac_table.first_multi; i++) {
1715             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1716                 return 1;
1717             }
1718         }
1719     }
1720 
1721     return 0;
1722 }
1723 
1724 static uint8_t virtio_net_get_hash_type(bool isip4,
1725                                         bool isip6,
1726                                         bool isudp,
1727                                         bool istcp,
1728                                         uint32_t types)
1729 {
1730     if (isip4) {
1731         if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1732             return NetPktRssIpV4Tcp;
1733         }
1734         if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1735             return NetPktRssIpV4Udp;
1736         }
1737         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1738             return NetPktRssIpV4;
1739         }
1740     } else if (isip6) {
1741         uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1742                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1743 
1744         if (istcp && (types & mask)) {
1745             return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1746                 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1747         }
1748         mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1749         if (isudp && (types & mask)) {
1750             return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1751                 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1752         }
1753         mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1754         if (types & mask) {
1755             return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1756                 NetPktRssIpV6Ex : NetPktRssIpV6;
1757         }
1758     }
1759     return 0xff;
1760 }
1761 
1762 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1763                                    uint32_t hash)
1764 {
1765     struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1766     hdr->hash_value = hash;
1767     hdr->hash_report = report;
1768 }
1769 
1770 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1771                                   size_t size)
1772 {
1773     VirtIONet *n = qemu_get_nic_opaque(nc);
1774     unsigned int index = nc->queue_index, new_index = index;
1775     struct NetRxPkt *pkt = n->rx_pkt;
1776     uint8_t net_hash_type;
1777     uint32_t hash;
1778     bool isip4, isip6, isudp, istcp;
1779     static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1780         VIRTIO_NET_HASH_REPORT_IPv4,
1781         VIRTIO_NET_HASH_REPORT_TCPv4,
1782         VIRTIO_NET_HASH_REPORT_TCPv6,
1783         VIRTIO_NET_HASH_REPORT_IPv6,
1784         VIRTIO_NET_HASH_REPORT_IPv6_EX,
1785         VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1786         VIRTIO_NET_HASH_REPORT_UDPv4,
1787         VIRTIO_NET_HASH_REPORT_UDPv6,
1788         VIRTIO_NET_HASH_REPORT_UDPv6_EX
1789     };
1790 
1791     net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1792                              size - n->host_hdr_len);
1793     net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1794     if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1795         istcp = isudp = false;
1796     }
1797     if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1798         istcp = isudp = false;
1799     }
1800     net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1801                                              n->rss_data.hash_types);
1802     if (net_hash_type > NetPktRssIpV6UdpEx) {
1803         if (n->rss_data.populate_hash) {
1804             virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1805         }
1806         return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1807     }
1808 
1809     hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1810 
1811     if (n->rss_data.populate_hash) {
1812         virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1813     }
1814 
1815     if (n->rss_data.redirect) {
1816         new_index = hash & (n->rss_data.indirections_len - 1);
1817         new_index = n->rss_data.indirections_table[new_index];
1818     }
1819 
1820     return (index == new_index) ? -1 : new_index;
1821 }
1822 
1823 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1824                                       size_t size, bool no_rss)
1825 {
1826     VirtIONet *n = qemu_get_nic_opaque(nc);
1827     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1828     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1829     VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1830     size_t lens[VIRTQUEUE_MAX_SIZE];
1831     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1832     struct virtio_net_hdr_mrg_rxbuf mhdr;
1833     unsigned mhdr_cnt = 0;
1834     size_t offset, i, guest_offset, j;
1835     ssize_t err;
1836 
1837     if (!virtio_net_can_receive(nc)) {
1838         return -1;
1839     }
1840 
1841     if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1842         int index = virtio_net_process_rss(nc, buf, size);
1843         if (index >= 0) {
1844             NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1845             return virtio_net_receive_rcu(nc2, buf, size, true);
1846         }
1847     }
1848 
1849     /* hdr_len refers to the header we supply to the guest */
1850     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1851         return 0;
1852     }
1853 
1854     if (!receive_filter(n, buf, size))
1855         return size;
1856 
1857     offset = i = 0;
1858 
1859     while (offset < size) {
1860         VirtQueueElement *elem;
1861         int len, total;
1862         const struct iovec *sg;
1863 
1864         total = 0;
1865 
1866         if (i == VIRTQUEUE_MAX_SIZE) {
1867             virtio_error(vdev, "virtio-net unexpected long buffer chain");
1868             err = size;
1869             goto err;
1870         }
1871 
1872         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1873         if (!elem) {
1874             if (i) {
1875                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1876                              "i %zd mergeable %d offset %zd, size %zd, "
1877                              "guest hdr len %zd, host hdr len %zd "
1878                              "guest features 0x%" PRIx64,
1879                              i, n->mergeable_rx_bufs, offset, size,
1880                              n->guest_hdr_len, n->host_hdr_len,
1881                              vdev->guest_features);
1882             }
1883             err = -1;
1884             goto err;
1885         }
1886 
1887         if (elem->in_num < 1) {
1888             virtio_error(vdev,
1889                          "virtio-net receive queue contains no in buffers");
1890             virtqueue_detach_element(q->rx_vq, elem, 0);
1891             g_free(elem);
1892             err = -1;
1893             goto err;
1894         }
1895 
1896         sg = elem->in_sg;
1897         if (i == 0) {
1898             assert(offset == 0);
1899             if (n->mergeable_rx_bufs) {
1900                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1901                                     sg, elem->in_num,
1902                                     offsetof(typeof(mhdr), num_buffers),
1903                                     sizeof(mhdr.num_buffers));
1904             }
1905 
1906             receive_header(n, sg, elem->in_num, buf, size);
1907             if (n->rss_data.populate_hash) {
1908                 offset = sizeof(mhdr);
1909                 iov_from_buf(sg, elem->in_num, offset,
1910                              buf + offset, n->host_hdr_len - sizeof(mhdr));
1911             }
1912             offset = n->host_hdr_len;
1913             total += n->guest_hdr_len;
1914             guest_offset = n->guest_hdr_len;
1915         } else {
1916             guest_offset = 0;
1917         }
1918 
1919         /* copy in packet.  ugh */
1920         len = iov_from_buf(sg, elem->in_num, guest_offset,
1921                            buf + offset, size - offset);
1922         total += len;
1923         offset += len;
1924         /* If buffers can't be merged, at this point we
1925          * must have consumed the complete packet.
1926          * Otherwise, drop it. */
1927         if (!n->mergeable_rx_bufs && offset < size) {
1928             virtqueue_unpop(q->rx_vq, elem, total);
1929             g_free(elem);
1930             err = size;
1931             goto err;
1932         }
1933 
1934         elems[i] = elem;
1935         lens[i] = total;
1936         i++;
1937     }
1938 
1939     if (mhdr_cnt) {
1940         virtio_stw_p(vdev, &mhdr.num_buffers, i);
1941         iov_from_buf(mhdr_sg, mhdr_cnt,
1942                      0,
1943                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
1944     }
1945 
1946     for (j = 0; j < i; j++) {
1947         /* signal other side */
1948         virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
1949         g_free(elems[j]);
1950     }
1951 
1952     virtqueue_flush(q->rx_vq, i);
1953     virtio_notify(vdev, q->rx_vq);
1954 
1955     return size;
1956 
1957 err:
1958     for (j = 0; j < i; j++) {
1959         virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
1960         g_free(elems[j]);
1961     }
1962 
1963     return err;
1964 }
1965 
1966 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1967                                   size_t size)
1968 {
1969     RCU_READ_LOCK_GUARD();
1970 
1971     return virtio_net_receive_rcu(nc, buf, size, false);
1972 }
1973 
1974 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1975                                          const uint8_t *buf,
1976                                          VirtioNetRscUnit *unit)
1977 {
1978     uint16_t ip_hdrlen;
1979     struct ip_header *ip;
1980 
1981     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1982                               + sizeof(struct eth_header));
1983     unit->ip = (void *)ip;
1984     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1985     unit->ip_plen = &ip->ip_len;
1986     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1987     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1988     unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1989 }
1990 
1991 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1992                                          const uint8_t *buf,
1993                                          VirtioNetRscUnit *unit)
1994 {
1995     struct ip6_header *ip6;
1996 
1997     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1998                                  + sizeof(struct eth_header));
1999     unit->ip = ip6;
2000     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2001     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
2002                                         + sizeof(struct ip6_header));
2003     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2004 
2005     /* There is a difference between payload lenght in ipv4 and v6,
2006        ip header is excluded in ipv6 */
2007     unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
2008 }
2009 
2010 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
2011                                        VirtioNetRscSeg *seg)
2012 {
2013     int ret;
2014     struct virtio_net_hdr_v1 *h;
2015 
2016     h = (struct virtio_net_hdr_v1 *)seg->buf;
2017     h->flags = 0;
2018     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
2019 
2020     if (seg->is_coalesced) {
2021         h->rsc.segments = seg->packets;
2022         h->rsc.dup_acks = seg->dup_ack;
2023         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
2024         if (chain->proto == ETH_P_IP) {
2025             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2026         } else {
2027             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2028         }
2029     }
2030 
2031     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
2032     QTAILQ_REMOVE(&chain->buffers, seg, next);
2033     g_free(seg->buf);
2034     g_free(seg);
2035 
2036     return ret;
2037 }
2038 
2039 static void virtio_net_rsc_purge(void *opq)
2040 {
2041     VirtioNetRscSeg *seg, *rn;
2042     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
2043 
2044     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
2045         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2046             chain->stat.purge_failed++;
2047             continue;
2048         }
2049     }
2050 
2051     chain->stat.timer++;
2052     if (!QTAILQ_EMPTY(&chain->buffers)) {
2053         timer_mod(chain->drain_timer,
2054               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2055     }
2056 }
2057 
2058 static void virtio_net_rsc_cleanup(VirtIONet *n)
2059 {
2060     VirtioNetRscChain *chain, *rn_chain;
2061     VirtioNetRscSeg *seg, *rn_seg;
2062 
2063     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2064         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2065             QTAILQ_REMOVE(&chain->buffers, seg, next);
2066             g_free(seg->buf);
2067             g_free(seg);
2068         }
2069 
2070         timer_free(chain->drain_timer);
2071         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2072         g_free(chain);
2073     }
2074 }
2075 
2076 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2077                                      NetClientState *nc,
2078                                      const uint8_t *buf, size_t size)
2079 {
2080     uint16_t hdr_len;
2081     VirtioNetRscSeg *seg;
2082 
2083     hdr_len = chain->n->guest_hdr_len;
2084     seg = g_new(VirtioNetRscSeg, 1);
2085     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2086         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2087     memcpy(seg->buf, buf, size);
2088     seg->size = size;
2089     seg->packets = 1;
2090     seg->dup_ack = 0;
2091     seg->is_coalesced = 0;
2092     seg->nc = nc;
2093 
2094     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2095     chain->stat.cache++;
2096 
2097     switch (chain->proto) {
2098     case ETH_P_IP:
2099         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2100         break;
2101     case ETH_P_IPV6:
2102         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2103         break;
2104     default:
2105         g_assert_not_reached();
2106     }
2107 }
2108 
2109 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2110                                          VirtioNetRscSeg *seg,
2111                                          const uint8_t *buf,
2112                                          struct tcp_header *n_tcp,
2113                                          struct tcp_header *o_tcp)
2114 {
2115     uint32_t nack, oack;
2116     uint16_t nwin, owin;
2117 
2118     nack = htonl(n_tcp->th_ack);
2119     nwin = htons(n_tcp->th_win);
2120     oack = htonl(o_tcp->th_ack);
2121     owin = htons(o_tcp->th_win);
2122 
2123     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2124         chain->stat.ack_out_of_win++;
2125         return RSC_FINAL;
2126     } else if (nack == oack) {
2127         /* duplicated ack or window probe */
2128         if (nwin == owin) {
2129             /* duplicated ack, add dup ack count due to whql test up to 1 */
2130             chain->stat.dup_ack++;
2131             return RSC_FINAL;
2132         } else {
2133             /* Coalesce window update */
2134             o_tcp->th_win = n_tcp->th_win;
2135             chain->stat.win_update++;
2136             return RSC_COALESCE;
2137         }
2138     } else {
2139         /* pure ack, go to 'C', finalize*/
2140         chain->stat.pure_ack++;
2141         return RSC_FINAL;
2142     }
2143 }
2144 
2145 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2146                                             VirtioNetRscSeg *seg,
2147                                             const uint8_t *buf,
2148                                             VirtioNetRscUnit *n_unit)
2149 {
2150     void *data;
2151     uint16_t o_ip_len;
2152     uint32_t nseq, oseq;
2153     VirtioNetRscUnit *o_unit;
2154 
2155     o_unit = &seg->unit;
2156     o_ip_len = htons(*o_unit->ip_plen);
2157     nseq = htonl(n_unit->tcp->th_seq);
2158     oseq = htonl(o_unit->tcp->th_seq);
2159 
2160     /* out of order or retransmitted. */
2161     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2162         chain->stat.data_out_of_win++;
2163         return RSC_FINAL;
2164     }
2165 
2166     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2167     if (nseq == oseq) {
2168         if ((o_unit->payload == 0) && n_unit->payload) {
2169             /* From no payload to payload, normal case, not a dup ack or etc */
2170             chain->stat.data_after_pure_ack++;
2171             goto coalesce;
2172         } else {
2173             return virtio_net_rsc_handle_ack(chain, seg, buf,
2174                                              n_unit->tcp, o_unit->tcp);
2175         }
2176     } else if ((nseq - oseq) != o_unit->payload) {
2177         /* Not a consistent packet, out of order */
2178         chain->stat.data_out_of_order++;
2179         return RSC_FINAL;
2180     } else {
2181 coalesce:
2182         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2183             chain->stat.over_size++;
2184             return RSC_FINAL;
2185         }
2186 
2187         /* Here comes the right data, the payload length in v4/v6 is different,
2188            so use the field value to update and record the new data len */
2189         o_unit->payload += n_unit->payload; /* update new data len */
2190 
2191         /* update field in ip header */
2192         *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2193 
2194         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2195            for windows guest, while this may change the behavior for linux
2196            guest (only if it uses RSC feature). */
2197         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2198 
2199         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2200         o_unit->tcp->th_win = n_unit->tcp->th_win;
2201 
2202         memmove(seg->buf + seg->size, data, n_unit->payload);
2203         seg->size += n_unit->payload;
2204         seg->packets++;
2205         chain->stat.coalesced++;
2206         return RSC_COALESCE;
2207     }
2208 }
2209 
2210 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2211                                         VirtioNetRscSeg *seg,
2212                                         const uint8_t *buf, size_t size,
2213                                         VirtioNetRscUnit *unit)
2214 {
2215     struct ip_header *ip1, *ip2;
2216 
2217     ip1 = (struct ip_header *)(unit->ip);
2218     ip2 = (struct ip_header *)(seg->unit.ip);
2219     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2220         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2221         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2222         chain->stat.no_match++;
2223         return RSC_NO_MATCH;
2224     }
2225 
2226     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2227 }
2228 
2229 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2230                                         VirtioNetRscSeg *seg,
2231                                         const uint8_t *buf, size_t size,
2232                                         VirtioNetRscUnit *unit)
2233 {
2234     struct ip6_header *ip1, *ip2;
2235 
2236     ip1 = (struct ip6_header *)(unit->ip);
2237     ip2 = (struct ip6_header *)(seg->unit.ip);
2238     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2239         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2240         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2241         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2242             chain->stat.no_match++;
2243             return RSC_NO_MATCH;
2244     }
2245 
2246     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2247 }
2248 
2249 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2250  * to prevent out of order */
2251 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2252                                          struct tcp_header *tcp)
2253 {
2254     uint16_t tcp_hdr;
2255     uint16_t tcp_flag;
2256 
2257     tcp_flag = htons(tcp->th_offset_flags);
2258     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2259     tcp_flag &= VIRTIO_NET_TCP_FLAG;
2260     if (tcp_flag & TH_SYN) {
2261         chain->stat.tcp_syn++;
2262         return RSC_BYPASS;
2263     }
2264 
2265     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2266         chain->stat.tcp_ctrl_drain++;
2267         return RSC_FINAL;
2268     }
2269 
2270     if (tcp_hdr > sizeof(struct tcp_header)) {
2271         chain->stat.tcp_all_opt++;
2272         return RSC_FINAL;
2273     }
2274 
2275     return RSC_CANDIDATE;
2276 }
2277 
2278 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2279                                          NetClientState *nc,
2280                                          const uint8_t *buf, size_t size,
2281                                          VirtioNetRscUnit *unit)
2282 {
2283     int ret;
2284     VirtioNetRscSeg *seg, *nseg;
2285 
2286     if (QTAILQ_EMPTY(&chain->buffers)) {
2287         chain->stat.empty_cache++;
2288         virtio_net_rsc_cache_buf(chain, nc, buf, size);
2289         timer_mod(chain->drain_timer,
2290               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2291         return size;
2292     }
2293 
2294     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2295         if (chain->proto == ETH_P_IP) {
2296             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2297         } else {
2298             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2299         }
2300 
2301         if (ret == RSC_FINAL) {
2302             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2303                 /* Send failed */
2304                 chain->stat.final_failed++;
2305                 return 0;
2306             }
2307 
2308             /* Send current packet */
2309             return virtio_net_do_receive(nc, buf, size);
2310         } else if (ret == RSC_NO_MATCH) {
2311             continue;
2312         } else {
2313             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2314             seg->is_coalesced = 1;
2315             return size;
2316         }
2317     }
2318 
2319     chain->stat.no_match_cache++;
2320     virtio_net_rsc_cache_buf(chain, nc, buf, size);
2321     return size;
2322 }
2323 
2324 /* Drain a connection data, this is to avoid out of order segments */
2325 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2326                                         NetClientState *nc,
2327                                         const uint8_t *buf, size_t size,
2328                                         uint16_t ip_start, uint16_t ip_size,
2329                                         uint16_t tcp_port)
2330 {
2331     VirtioNetRscSeg *seg, *nseg;
2332     uint32_t ppair1, ppair2;
2333 
2334     ppair1 = *(uint32_t *)(buf + tcp_port);
2335     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2336         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2337         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2338             || (ppair1 != ppair2)) {
2339             continue;
2340         }
2341         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2342             chain->stat.drain_failed++;
2343         }
2344 
2345         break;
2346     }
2347 
2348     return virtio_net_do_receive(nc, buf, size);
2349 }
2350 
2351 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2352                                             struct ip_header *ip,
2353                                             const uint8_t *buf, size_t size)
2354 {
2355     uint16_t ip_len;
2356 
2357     /* Not an ipv4 packet */
2358     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2359         chain->stat.ip_option++;
2360         return RSC_BYPASS;
2361     }
2362 
2363     /* Don't handle packets with ip option */
2364     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2365         chain->stat.ip_option++;
2366         return RSC_BYPASS;
2367     }
2368 
2369     if (ip->ip_p != IPPROTO_TCP) {
2370         chain->stat.bypass_not_tcp++;
2371         return RSC_BYPASS;
2372     }
2373 
2374     /* Don't handle packets with ip fragment */
2375     if (!(htons(ip->ip_off) & IP_DF)) {
2376         chain->stat.ip_frag++;
2377         return RSC_BYPASS;
2378     }
2379 
2380     /* Don't handle packets with ecn flag */
2381     if (IPTOS_ECN(ip->ip_tos)) {
2382         chain->stat.ip_ecn++;
2383         return RSC_BYPASS;
2384     }
2385 
2386     ip_len = htons(ip->ip_len);
2387     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2388         || ip_len > (size - chain->n->guest_hdr_len -
2389                      sizeof(struct eth_header))) {
2390         chain->stat.ip_hacked++;
2391         return RSC_BYPASS;
2392     }
2393 
2394     return RSC_CANDIDATE;
2395 }
2396 
2397 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2398                                       NetClientState *nc,
2399                                       const uint8_t *buf, size_t size)
2400 {
2401     int32_t ret;
2402     uint16_t hdr_len;
2403     VirtioNetRscUnit unit;
2404 
2405     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2406 
2407     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2408         + sizeof(struct tcp_header))) {
2409         chain->stat.bypass_not_tcp++;
2410         return virtio_net_do_receive(nc, buf, size);
2411     }
2412 
2413     virtio_net_rsc_extract_unit4(chain, buf, &unit);
2414     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2415         != RSC_CANDIDATE) {
2416         return virtio_net_do_receive(nc, buf, size);
2417     }
2418 
2419     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2420     if (ret == RSC_BYPASS) {
2421         return virtio_net_do_receive(nc, buf, size);
2422     } else if (ret == RSC_FINAL) {
2423         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2424                 ((hdr_len + sizeof(struct eth_header)) + 12),
2425                 VIRTIO_NET_IP4_ADDR_SIZE,
2426                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2427     }
2428 
2429     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2430 }
2431 
2432 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2433                                             struct ip6_header *ip6,
2434                                             const uint8_t *buf, size_t size)
2435 {
2436     uint16_t ip_len;
2437 
2438     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2439         != IP_HEADER_VERSION_6) {
2440         return RSC_BYPASS;
2441     }
2442 
2443     /* Both option and protocol is checked in this */
2444     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2445         chain->stat.bypass_not_tcp++;
2446         return RSC_BYPASS;
2447     }
2448 
2449     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2450     if (ip_len < sizeof(struct tcp_header) ||
2451         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2452                   - sizeof(struct ip6_header))) {
2453         chain->stat.ip_hacked++;
2454         return RSC_BYPASS;
2455     }
2456 
2457     /* Don't handle packets with ecn flag */
2458     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2459         chain->stat.ip_ecn++;
2460         return RSC_BYPASS;
2461     }
2462 
2463     return RSC_CANDIDATE;
2464 }
2465 
2466 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2467                                       const uint8_t *buf, size_t size)
2468 {
2469     int32_t ret;
2470     uint16_t hdr_len;
2471     VirtioNetRscChain *chain;
2472     VirtioNetRscUnit unit;
2473 
2474     chain = opq;
2475     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2476 
2477     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2478         + sizeof(tcp_header))) {
2479         return virtio_net_do_receive(nc, buf, size);
2480     }
2481 
2482     virtio_net_rsc_extract_unit6(chain, buf, &unit);
2483     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2484                                                  unit.ip, buf, size)) {
2485         return virtio_net_do_receive(nc, buf, size);
2486     }
2487 
2488     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2489     if (ret == RSC_BYPASS) {
2490         return virtio_net_do_receive(nc, buf, size);
2491     } else if (ret == RSC_FINAL) {
2492         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2493                 ((hdr_len + sizeof(struct eth_header)) + 8),
2494                 VIRTIO_NET_IP6_ADDR_SIZE,
2495                 hdr_len + sizeof(struct eth_header)
2496                 + sizeof(struct ip6_header));
2497     }
2498 
2499     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2500 }
2501 
2502 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2503                                                       NetClientState *nc,
2504                                                       uint16_t proto)
2505 {
2506     VirtioNetRscChain *chain;
2507 
2508     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2509         return NULL;
2510     }
2511 
2512     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2513         if (chain->proto == proto) {
2514             return chain;
2515         }
2516     }
2517 
2518     chain = g_malloc(sizeof(*chain));
2519     chain->n = n;
2520     chain->proto = proto;
2521     if (proto == (uint16_t)ETH_P_IP) {
2522         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2523         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2524     } else {
2525         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2526         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2527     }
2528     chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2529                                       virtio_net_rsc_purge, chain);
2530     memset(&chain->stat, 0, sizeof(chain->stat));
2531 
2532     QTAILQ_INIT(&chain->buffers);
2533     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2534 
2535     return chain;
2536 }
2537 
2538 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2539                                       const uint8_t *buf,
2540                                       size_t size)
2541 {
2542     uint16_t proto;
2543     VirtioNetRscChain *chain;
2544     struct eth_header *eth;
2545     VirtIONet *n;
2546 
2547     n = qemu_get_nic_opaque(nc);
2548     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2549         return virtio_net_do_receive(nc, buf, size);
2550     }
2551 
2552     eth = (struct eth_header *)(buf + n->guest_hdr_len);
2553     proto = htons(eth->h_proto);
2554 
2555     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2556     if (chain) {
2557         chain->stat.received++;
2558         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2559             return virtio_net_rsc_receive4(chain, nc, buf, size);
2560         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2561             return virtio_net_rsc_receive6(chain, nc, buf, size);
2562         }
2563     }
2564     return virtio_net_do_receive(nc, buf, size);
2565 }
2566 
2567 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2568                                   size_t size)
2569 {
2570     VirtIONet *n = qemu_get_nic_opaque(nc);
2571     if ((n->rsc4_enabled || n->rsc6_enabled)) {
2572         return virtio_net_rsc_receive(nc, buf, size);
2573     } else {
2574         return virtio_net_do_receive(nc, buf, size);
2575     }
2576 }
2577 
2578 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2579 
2580 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2581 {
2582     VirtIONet *n = qemu_get_nic_opaque(nc);
2583     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2584     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2585     int ret;
2586 
2587     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2588     virtio_notify(vdev, q->tx_vq);
2589 
2590     g_free(q->async_tx.elem);
2591     q->async_tx.elem = NULL;
2592 
2593     virtio_queue_set_notification(q->tx_vq, 1);
2594     ret = virtio_net_flush_tx(q);
2595     if (ret >= n->tx_burst) {
2596         /*
2597          * the flush has been stopped by tx_burst
2598          * we will not receive notification for the
2599          * remainining part, so re-schedule
2600          */
2601         virtio_queue_set_notification(q->tx_vq, 0);
2602         if (q->tx_bh) {
2603             qemu_bh_schedule(q->tx_bh);
2604         } else {
2605             timer_mod(q->tx_timer,
2606                       qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2607         }
2608         q->tx_waiting = 1;
2609     }
2610 }
2611 
2612 /* TX */
2613 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2614 {
2615     VirtIONet *n = q->n;
2616     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2617     VirtQueueElement *elem;
2618     int32_t num_packets = 0;
2619     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2620     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2621         return num_packets;
2622     }
2623 
2624     if (q->async_tx.elem) {
2625         virtio_queue_set_notification(q->tx_vq, 0);
2626         return num_packets;
2627     }
2628 
2629     for (;;) {
2630         ssize_t ret;
2631         unsigned int out_num;
2632         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2633         struct virtio_net_hdr_mrg_rxbuf mhdr;
2634 
2635         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2636         if (!elem) {
2637             break;
2638         }
2639 
2640         out_num = elem->out_num;
2641         out_sg = elem->out_sg;
2642         if (out_num < 1) {
2643             virtio_error(vdev, "virtio-net header not in first element");
2644             virtqueue_detach_element(q->tx_vq, elem, 0);
2645             g_free(elem);
2646             return -EINVAL;
2647         }
2648 
2649         if (n->has_vnet_hdr) {
2650             if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2651                 n->guest_hdr_len) {
2652                 virtio_error(vdev, "virtio-net header incorrect");
2653                 virtqueue_detach_element(q->tx_vq, elem, 0);
2654                 g_free(elem);
2655                 return -EINVAL;
2656             }
2657             if (n->needs_vnet_hdr_swap) {
2658                 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2659                 sg2[0].iov_base = &mhdr;
2660                 sg2[0].iov_len = n->guest_hdr_len;
2661                 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2662                                    out_sg, out_num,
2663                                    n->guest_hdr_len, -1);
2664                 if (out_num == VIRTQUEUE_MAX_SIZE) {
2665                     goto drop;
2666                 }
2667                 out_num += 1;
2668                 out_sg = sg2;
2669             }
2670         }
2671         /*
2672          * If host wants to see the guest header as is, we can
2673          * pass it on unchanged. Otherwise, copy just the parts
2674          * that host is interested in.
2675          */
2676         assert(n->host_hdr_len <= n->guest_hdr_len);
2677         if (n->host_hdr_len != n->guest_hdr_len) {
2678             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2679                                        out_sg, out_num,
2680                                        0, n->host_hdr_len);
2681             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2682                              out_sg, out_num,
2683                              n->guest_hdr_len, -1);
2684             out_num = sg_num;
2685             out_sg = sg;
2686         }
2687 
2688         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2689                                       out_sg, out_num, virtio_net_tx_complete);
2690         if (ret == 0) {
2691             virtio_queue_set_notification(q->tx_vq, 0);
2692             q->async_tx.elem = elem;
2693             return -EBUSY;
2694         }
2695 
2696 drop:
2697         virtqueue_push(q->tx_vq, elem, 0);
2698         virtio_notify(vdev, q->tx_vq);
2699         g_free(elem);
2700 
2701         if (++num_packets >= n->tx_burst) {
2702             break;
2703         }
2704     }
2705     return num_packets;
2706 }
2707 
2708 static void virtio_net_tx_timer(void *opaque);
2709 
2710 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2711 {
2712     VirtIONet *n = VIRTIO_NET(vdev);
2713     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2714 
2715     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2716         virtio_net_drop_tx_queue_data(vdev, vq);
2717         return;
2718     }
2719 
2720     /* This happens when device was stopped but VCPU wasn't. */
2721     if (!vdev->vm_running) {
2722         q->tx_waiting = 1;
2723         return;
2724     }
2725 
2726     if (q->tx_waiting) {
2727         /* We already have queued packets, immediately flush */
2728         timer_del(q->tx_timer);
2729         virtio_net_tx_timer(q);
2730     } else {
2731         /* re-arm timer to flush it (and more) on next tick */
2732         timer_mod(q->tx_timer,
2733                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2734         q->tx_waiting = 1;
2735         virtio_queue_set_notification(vq, 0);
2736     }
2737 }
2738 
2739 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2740 {
2741     VirtIONet *n = VIRTIO_NET(vdev);
2742     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2743 
2744     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2745         virtio_net_drop_tx_queue_data(vdev, vq);
2746         return;
2747     }
2748 
2749     if (unlikely(q->tx_waiting)) {
2750         return;
2751     }
2752     q->tx_waiting = 1;
2753     /* This happens when device was stopped but VCPU wasn't. */
2754     if (!vdev->vm_running) {
2755         return;
2756     }
2757     virtio_queue_set_notification(vq, 0);
2758     qemu_bh_schedule(q->tx_bh);
2759 }
2760 
2761 static void virtio_net_tx_timer(void *opaque)
2762 {
2763     VirtIONetQueue *q = opaque;
2764     VirtIONet *n = q->n;
2765     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2766     int ret;
2767 
2768     /* This happens when device was stopped but BH wasn't. */
2769     if (!vdev->vm_running) {
2770         /* Make sure tx waiting is set, so we'll run when restarted. */
2771         assert(q->tx_waiting);
2772         return;
2773     }
2774 
2775     q->tx_waiting = 0;
2776 
2777     /* Just in case the driver is not ready on more */
2778     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2779         return;
2780     }
2781 
2782     ret = virtio_net_flush_tx(q);
2783     if (ret == -EBUSY || ret == -EINVAL) {
2784         return;
2785     }
2786     /*
2787      * If we flush a full burst of packets, assume there are
2788      * more coming and immediately rearm
2789      */
2790     if (ret >= n->tx_burst) {
2791         q->tx_waiting = 1;
2792         timer_mod(q->tx_timer,
2793                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2794         return;
2795     }
2796     /*
2797      * If less than a full burst, re-enable notification and flush
2798      * anything that may have come in while we weren't looking.  If
2799      * we find something, assume the guest is still active and rearm
2800      */
2801     virtio_queue_set_notification(q->tx_vq, 1);
2802     ret = virtio_net_flush_tx(q);
2803     if (ret > 0) {
2804         virtio_queue_set_notification(q->tx_vq, 0);
2805         q->tx_waiting = 1;
2806         timer_mod(q->tx_timer,
2807                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2808     }
2809 }
2810 
2811 static void virtio_net_tx_bh(void *opaque)
2812 {
2813     VirtIONetQueue *q = opaque;
2814     VirtIONet *n = q->n;
2815     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2816     int32_t ret;
2817 
2818     /* This happens when device was stopped but BH wasn't. */
2819     if (!vdev->vm_running) {
2820         /* Make sure tx waiting is set, so we'll run when restarted. */
2821         assert(q->tx_waiting);
2822         return;
2823     }
2824 
2825     q->tx_waiting = 0;
2826 
2827     /* Just in case the driver is not ready on more */
2828     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2829         return;
2830     }
2831 
2832     ret = virtio_net_flush_tx(q);
2833     if (ret == -EBUSY || ret == -EINVAL) {
2834         return; /* Notification re-enable handled by tx_complete or device
2835                  * broken */
2836     }
2837 
2838     /* If we flush a full burst of packets, assume there are
2839      * more coming and immediately reschedule */
2840     if (ret >= n->tx_burst) {
2841         qemu_bh_schedule(q->tx_bh);
2842         q->tx_waiting = 1;
2843         return;
2844     }
2845 
2846     /* If less than a full burst, re-enable notification and flush
2847      * anything that may have come in while we weren't looking.  If
2848      * we find something, assume the guest is still active and reschedule */
2849     virtio_queue_set_notification(q->tx_vq, 1);
2850     ret = virtio_net_flush_tx(q);
2851     if (ret == -EINVAL) {
2852         return;
2853     } else if (ret > 0) {
2854         virtio_queue_set_notification(q->tx_vq, 0);
2855         qemu_bh_schedule(q->tx_bh);
2856         q->tx_waiting = 1;
2857     }
2858 }
2859 
2860 static void virtio_net_add_queue(VirtIONet *n, int index)
2861 {
2862     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2863 
2864     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2865                                            virtio_net_handle_rx);
2866 
2867     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2868         n->vqs[index].tx_vq =
2869             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2870                              virtio_net_handle_tx_timer);
2871         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2872                                               virtio_net_tx_timer,
2873                                               &n->vqs[index]);
2874     } else {
2875         n->vqs[index].tx_vq =
2876             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2877                              virtio_net_handle_tx_bh);
2878         n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2879     }
2880 
2881     n->vqs[index].tx_waiting = 0;
2882     n->vqs[index].n = n;
2883 }
2884 
2885 static void virtio_net_del_queue(VirtIONet *n, int index)
2886 {
2887     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2888     VirtIONetQueue *q = &n->vqs[index];
2889     NetClientState *nc = qemu_get_subqueue(n->nic, index);
2890 
2891     qemu_purge_queued_packets(nc);
2892 
2893     virtio_del_queue(vdev, index * 2);
2894     if (q->tx_timer) {
2895         timer_free(q->tx_timer);
2896         q->tx_timer = NULL;
2897     } else {
2898         qemu_bh_delete(q->tx_bh);
2899         q->tx_bh = NULL;
2900     }
2901     q->tx_waiting = 0;
2902     virtio_del_queue(vdev, index * 2 + 1);
2903 }
2904 
2905 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
2906 {
2907     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2908     int old_num_queues = virtio_get_num_queues(vdev);
2909     int new_num_queues = new_max_queue_pairs * 2 + 1;
2910     int i;
2911 
2912     assert(old_num_queues >= 3);
2913     assert(old_num_queues % 2 == 1);
2914 
2915     if (old_num_queues == new_num_queues) {
2916         return;
2917     }
2918 
2919     /*
2920      * We always need to remove and add ctrl vq if
2921      * old_num_queues != new_num_queues. Remove ctrl_vq first,
2922      * and then we only enter one of the following two loops.
2923      */
2924     virtio_del_queue(vdev, old_num_queues - 1);
2925 
2926     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2927         /* new_num_queues < old_num_queues */
2928         virtio_net_del_queue(n, i / 2);
2929     }
2930 
2931     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2932         /* new_num_queues > old_num_queues */
2933         virtio_net_add_queue(n, i / 2);
2934     }
2935 
2936     /* add ctrl_vq last */
2937     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2938 }
2939 
2940 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2941 {
2942     int max = multiqueue ? n->max_queue_pairs : 1;
2943 
2944     n->multiqueue = multiqueue;
2945     virtio_net_change_num_queue_pairs(n, max);
2946 
2947     virtio_net_set_queue_pairs(n);
2948 }
2949 
2950 static int virtio_net_post_load_device(void *opaque, int version_id)
2951 {
2952     VirtIONet *n = opaque;
2953     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2954     int i, link_down;
2955 
2956     trace_virtio_net_post_load_device();
2957     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2958                                virtio_vdev_has_feature(vdev,
2959                                                        VIRTIO_F_VERSION_1),
2960                                virtio_vdev_has_feature(vdev,
2961                                                        VIRTIO_NET_F_HASH_REPORT));
2962 
2963     /* MAC_TABLE_ENTRIES may be different from the saved image */
2964     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2965         n->mac_table.in_use = 0;
2966     }
2967 
2968     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2969         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2970     }
2971 
2972     /*
2973      * curr_guest_offloads will be later overwritten by the
2974      * virtio_set_features_nocheck call done from the virtio_load.
2975      * Here we make sure it is preserved and restored accordingly
2976      * in the virtio_net_post_load_virtio callback.
2977      */
2978     n->saved_guest_offloads = n->curr_guest_offloads;
2979 
2980     virtio_net_set_queue_pairs(n);
2981 
2982     /* Find the first multicast entry in the saved MAC filter */
2983     for (i = 0; i < n->mac_table.in_use; i++) {
2984         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2985             break;
2986         }
2987     }
2988     n->mac_table.first_multi = i;
2989 
2990     /* nc.link_down can't be migrated, so infer link_down according
2991      * to link status bit in n->status */
2992     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2993     for (i = 0; i < n->max_queue_pairs; i++) {
2994         qemu_get_subqueue(n->nic, i)->link_down = link_down;
2995     }
2996 
2997     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2998         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2999         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3000                                   QEMU_CLOCK_VIRTUAL,
3001                                   virtio_net_announce_timer, n);
3002         if (n->announce_timer.round) {
3003             timer_mod(n->announce_timer.tm,
3004                       qemu_clock_get_ms(n->announce_timer.type));
3005         } else {
3006             qemu_announce_timer_del(&n->announce_timer, false);
3007         }
3008     }
3009 
3010     if (n->rss_data.enabled) {
3011         n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
3012         if (!n->rss_data.populate_hash) {
3013             if (!virtio_net_attach_epbf_rss(n)) {
3014                 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
3015                     warn_report("Can't post-load eBPF RSS for vhost");
3016                 } else {
3017                     warn_report("Can't post-load eBPF RSS - "
3018                                 "fallback to software RSS");
3019                     n->rss_data.enabled_software_rss = true;
3020                 }
3021             }
3022         }
3023 
3024         trace_virtio_net_rss_enable(n->rss_data.hash_types,
3025                                     n->rss_data.indirections_len,
3026                                     sizeof(n->rss_data.key));
3027     } else {
3028         trace_virtio_net_rss_disable();
3029     }
3030     return 0;
3031 }
3032 
3033 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
3034 {
3035     VirtIONet *n = VIRTIO_NET(vdev);
3036     /*
3037      * The actual needed state is now in saved_guest_offloads,
3038      * see virtio_net_post_load_device for detail.
3039      * Restore it back and apply the desired offloads.
3040      */
3041     n->curr_guest_offloads = n->saved_guest_offloads;
3042     if (peer_has_vnet_hdr(n)) {
3043         virtio_net_apply_guest_offloads(n);
3044     }
3045 
3046     return 0;
3047 }
3048 
3049 /* tx_waiting field of a VirtIONetQueue */
3050 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
3051     .name = "virtio-net-queue-tx_waiting",
3052     .fields = (VMStateField[]) {
3053         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
3054         VMSTATE_END_OF_LIST()
3055    },
3056 };
3057 
3058 static bool max_queue_pairs_gt_1(void *opaque, int version_id)
3059 {
3060     return VIRTIO_NET(opaque)->max_queue_pairs > 1;
3061 }
3062 
3063 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
3064 {
3065     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
3066                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3067 }
3068 
3069 static bool mac_table_fits(void *opaque, int version_id)
3070 {
3071     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
3072 }
3073 
3074 static bool mac_table_doesnt_fit(void *opaque, int version_id)
3075 {
3076     return !mac_table_fits(opaque, version_id);
3077 }
3078 
3079 /* This temporary type is shared by all the WITH_TMP methods
3080  * although only some fields are used by each.
3081  */
3082 struct VirtIONetMigTmp {
3083     VirtIONet      *parent;
3084     VirtIONetQueue *vqs_1;
3085     uint16_t        curr_queue_pairs_1;
3086     uint8_t         has_ufo;
3087     uint32_t        has_vnet_hdr;
3088 };
3089 
3090 /* The 2nd and subsequent tx_waiting flags are loaded later than
3091  * the 1st entry in the queue_pairs and only if there's more than one
3092  * entry.  We use the tmp mechanism to calculate a temporary
3093  * pointer and count and also validate the count.
3094  */
3095 
3096 static int virtio_net_tx_waiting_pre_save(void *opaque)
3097 {
3098     struct VirtIONetMigTmp *tmp = opaque;
3099 
3100     tmp->vqs_1 = tmp->parent->vqs + 1;
3101     tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3102     if (tmp->parent->curr_queue_pairs == 0) {
3103         tmp->curr_queue_pairs_1 = 0;
3104     }
3105 
3106     return 0;
3107 }
3108 
3109 static int virtio_net_tx_waiting_pre_load(void *opaque)
3110 {
3111     struct VirtIONetMigTmp *tmp = opaque;
3112 
3113     /* Reuse the pointer setup from save */
3114     virtio_net_tx_waiting_pre_save(opaque);
3115 
3116     if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3117         error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3118             tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
3119 
3120         return -EINVAL;
3121     }
3122 
3123     return 0; /* all good */
3124 }
3125 
3126 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3127     .name      = "virtio-net-tx_waiting",
3128     .pre_load  = virtio_net_tx_waiting_pre_load,
3129     .pre_save  = virtio_net_tx_waiting_pre_save,
3130     .fields    = (VMStateField[]) {
3131         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3132                                      curr_queue_pairs_1,
3133                                      vmstate_virtio_net_queue_tx_waiting,
3134                                      struct VirtIONetQueue),
3135         VMSTATE_END_OF_LIST()
3136     },
3137 };
3138 
3139 /* the 'has_ufo' flag is just tested; if the incoming stream has the
3140  * flag set we need to check that we have it
3141  */
3142 static int virtio_net_ufo_post_load(void *opaque, int version_id)
3143 {
3144     struct VirtIONetMigTmp *tmp = opaque;
3145 
3146     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3147         error_report("virtio-net: saved image requires TUN_F_UFO support");
3148         return -EINVAL;
3149     }
3150 
3151     return 0;
3152 }
3153 
3154 static int virtio_net_ufo_pre_save(void *opaque)
3155 {
3156     struct VirtIONetMigTmp *tmp = opaque;
3157 
3158     tmp->has_ufo = tmp->parent->has_ufo;
3159 
3160     return 0;
3161 }
3162 
3163 static const VMStateDescription vmstate_virtio_net_has_ufo = {
3164     .name      = "virtio-net-ufo",
3165     .post_load = virtio_net_ufo_post_load,
3166     .pre_save  = virtio_net_ufo_pre_save,
3167     .fields    = (VMStateField[]) {
3168         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3169         VMSTATE_END_OF_LIST()
3170     },
3171 };
3172 
3173 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3174  * flag set we need to check that we have it
3175  */
3176 static int virtio_net_vnet_post_load(void *opaque, int version_id)
3177 {
3178     struct VirtIONetMigTmp *tmp = opaque;
3179 
3180     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3181         error_report("virtio-net: saved image requires vnet_hdr=on");
3182         return -EINVAL;
3183     }
3184 
3185     return 0;
3186 }
3187 
3188 static int virtio_net_vnet_pre_save(void *opaque)
3189 {
3190     struct VirtIONetMigTmp *tmp = opaque;
3191 
3192     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3193 
3194     return 0;
3195 }
3196 
3197 static const VMStateDescription vmstate_virtio_net_has_vnet = {
3198     .name      = "virtio-net-vnet",
3199     .post_load = virtio_net_vnet_post_load,
3200     .pre_save  = virtio_net_vnet_pre_save,
3201     .fields    = (VMStateField[]) {
3202         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3203         VMSTATE_END_OF_LIST()
3204     },
3205 };
3206 
3207 static bool virtio_net_rss_needed(void *opaque)
3208 {
3209     return VIRTIO_NET(opaque)->rss_data.enabled;
3210 }
3211 
3212 static const VMStateDescription vmstate_virtio_net_rss = {
3213     .name      = "virtio-net-device/rss",
3214     .version_id = 1,
3215     .minimum_version_id = 1,
3216     .needed = virtio_net_rss_needed,
3217     .fields = (VMStateField[]) {
3218         VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3219         VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3220         VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3221         VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3222         VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3223         VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3224         VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3225                             VIRTIO_NET_RSS_MAX_KEY_SIZE),
3226         VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3227                                     rss_data.indirections_len, 0,
3228                                     vmstate_info_uint16, uint16_t),
3229         VMSTATE_END_OF_LIST()
3230     },
3231 };
3232 
3233 static const VMStateDescription vmstate_virtio_net_device = {
3234     .name = "virtio-net-device",
3235     .version_id = VIRTIO_NET_VM_VERSION,
3236     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3237     .post_load = virtio_net_post_load_device,
3238     .fields = (VMStateField[]) {
3239         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3240         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3241                                vmstate_virtio_net_queue_tx_waiting,
3242                                VirtIONetQueue),
3243         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3244         VMSTATE_UINT16(status, VirtIONet),
3245         VMSTATE_UINT8(promisc, VirtIONet),
3246         VMSTATE_UINT8(allmulti, VirtIONet),
3247         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3248 
3249         /* Guarded pair: If it fits we load it, else we throw it away
3250          * - can happen if source has a larger MAC table.; post-load
3251          *  sets flags in this case.
3252          */
3253         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3254                                 0, mac_table_fits, mac_table.in_use,
3255                                  ETH_ALEN),
3256         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3257                                      mac_table.in_use, ETH_ALEN),
3258 
3259         /* Note: This is an array of uint32's that's always been saved as a
3260          * buffer; hold onto your endiannesses; it's actually used as a bitmap
3261          * but based on the uint.
3262          */
3263         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3264         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3265                          vmstate_virtio_net_has_vnet),
3266         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3267         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3268         VMSTATE_UINT8(alluni, VirtIONet),
3269         VMSTATE_UINT8(nomulti, VirtIONet),
3270         VMSTATE_UINT8(nouni, VirtIONet),
3271         VMSTATE_UINT8(nobcast, VirtIONet),
3272         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3273                          vmstate_virtio_net_has_ufo),
3274         VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3275                             vmstate_info_uint16_equal, uint16_t),
3276         VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3277         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3278                          vmstate_virtio_net_tx_waiting),
3279         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3280                             has_ctrl_guest_offloads),
3281         VMSTATE_END_OF_LIST()
3282    },
3283     .subsections = (const VMStateDescription * []) {
3284         &vmstate_virtio_net_rss,
3285         NULL
3286     }
3287 };
3288 
3289 static NetClientInfo net_virtio_info = {
3290     .type = NET_CLIENT_DRIVER_NIC,
3291     .size = sizeof(NICState),
3292     .can_receive = virtio_net_can_receive,
3293     .receive = virtio_net_receive,
3294     .link_status_changed = virtio_net_set_link_status,
3295     .query_rx_filter = virtio_net_query_rxfilter,
3296     .announce = virtio_net_announce,
3297 };
3298 
3299 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3300 {
3301     VirtIONet *n = VIRTIO_NET(vdev);
3302     NetClientState *nc;
3303     assert(n->vhost_started);
3304     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3305         /* Must guard against invalid features and bogus queue index
3306          * from being set by malicious guest, or penetrated through
3307          * buggy migration stream.
3308          */
3309         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3310             qemu_log_mask(LOG_GUEST_ERROR,
3311                           "%s: bogus vq index ignored\n", __func__);
3312             return false;
3313         }
3314         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3315     } else {
3316         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3317     }
3318     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3319 }
3320 
3321 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3322                                            bool mask)
3323 {
3324     VirtIONet *n = VIRTIO_NET(vdev);
3325     NetClientState *nc;
3326     assert(n->vhost_started);
3327     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3328         /* Must guard against invalid features and bogus queue index
3329          * from being set by malicious guest, or penetrated through
3330          * buggy migration stream.
3331          */
3332         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3333             qemu_log_mask(LOG_GUEST_ERROR,
3334                           "%s: bogus vq index ignored\n", __func__);
3335             return;
3336         }
3337         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3338     } else {
3339         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3340     }
3341     vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3342                              vdev, idx, mask);
3343 }
3344 
3345 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3346 {
3347     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3348 
3349     n->config_size = virtio_get_config_size(&cfg_size_params, host_features);
3350 }
3351 
3352 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3353                                    const char *type)
3354 {
3355     /*
3356      * The name can be NULL, the netclient name will be type.x.
3357      */
3358     assert(type != NULL);
3359 
3360     g_free(n->netclient_name);
3361     g_free(n->netclient_type);
3362     n->netclient_name = g_strdup(name);
3363     n->netclient_type = g_strdup(type);
3364 }
3365 
3366 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3367 {
3368     HotplugHandler *hotplug_ctrl;
3369     PCIDevice *pci_dev;
3370     Error *err = NULL;
3371 
3372     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3373     if (hotplug_ctrl) {
3374         pci_dev = PCI_DEVICE(dev);
3375         pci_dev->partially_hotplugged = true;
3376         hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3377         if (err) {
3378             error_report_err(err);
3379             return false;
3380         }
3381     } else {
3382         return false;
3383     }
3384     return true;
3385 }
3386 
3387 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3388                                     Error **errp)
3389 {
3390     Error *err = NULL;
3391     HotplugHandler *hotplug_ctrl;
3392     PCIDevice *pdev = PCI_DEVICE(dev);
3393     BusState *primary_bus;
3394 
3395     if (!pdev->partially_hotplugged) {
3396         return true;
3397     }
3398     primary_bus = dev->parent_bus;
3399     if (!primary_bus) {
3400         error_setg(errp, "virtio_net: couldn't find primary bus");
3401         return false;
3402     }
3403     qdev_set_parent_bus(dev, primary_bus, &error_abort);
3404     qatomic_set(&n->failover_primary_hidden, false);
3405     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3406     if (hotplug_ctrl) {
3407         hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3408         if (err) {
3409             goto out;
3410         }
3411         hotplug_handler_plug(hotplug_ctrl, dev, &err);
3412     }
3413     pdev->partially_hotplugged = false;
3414 
3415 out:
3416     error_propagate(errp, err);
3417     return !err;
3418 }
3419 
3420 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3421 {
3422     bool should_be_hidden;
3423     Error *err = NULL;
3424     DeviceState *dev = failover_find_primary_device(n);
3425 
3426     if (!dev) {
3427         return;
3428     }
3429 
3430     should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3431 
3432     if (migration_in_setup(s) && !should_be_hidden) {
3433         if (failover_unplug_primary(n, dev)) {
3434             vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3435             qapi_event_send_unplug_primary(dev->id);
3436             qatomic_set(&n->failover_primary_hidden, true);
3437         } else {
3438             warn_report("couldn't unplug primary device");
3439         }
3440     } else if (migration_has_failed(s)) {
3441         /* We already unplugged the device let's plug it back */
3442         if (!failover_replug_primary(n, dev, &err)) {
3443             if (err) {
3444                 error_report_err(err);
3445             }
3446         }
3447     }
3448 }
3449 
3450 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3451 {
3452     MigrationState *s = data;
3453     VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3454     virtio_net_handle_migration_primary(n, s);
3455 }
3456 
3457 static bool failover_hide_primary_device(DeviceListener *listener,
3458                                          const QDict *device_opts,
3459                                          bool from_json,
3460                                          Error **errp)
3461 {
3462     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3463     const char *standby_id;
3464 
3465     if (!device_opts) {
3466         return false;
3467     }
3468 
3469     if (!qdict_haskey(device_opts, "failover_pair_id")) {
3470         return false;
3471     }
3472 
3473     if (!qdict_haskey(device_opts, "id")) {
3474         error_setg(errp, "Device with failover_pair_id needs to have id");
3475         return false;
3476     }
3477 
3478     standby_id = qdict_get_str(device_opts, "failover_pair_id");
3479     if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3480         return false;
3481     }
3482 
3483     /*
3484      * The hide helper can be called several times for a given device.
3485      * Check there is only one primary for a virtio-net device but
3486      * don't duplicate the qdict several times if it's called for the same
3487      * device.
3488      */
3489     if (n->primary_opts) {
3490         const char *old, *new;
3491         /* devices with failover_pair_id always have an id */
3492         old = qdict_get_str(n->primary_opts, "id");
3493         new = qdict_get_str(device_opts, "id");
3494         if (strcmp(old, new) != 0) {
3495             error_setg(errp, "Cannot attach more than one primary device to "
3496                        "'%s': '%s' and '%s'", n->netclient_name, old, new);
3497             return false;
3498         }
3499     } else {
3500         n->primary_opts = qdict_clone_shallow(device_opts);
3501         n->primary_opts_from_json = from_json;
3502     }
3503 
3504     /* failover_primary_hidden is set during feature negotiation */
3505     return qatomic_read(&n->failover_primary_hidden);
3506 }
3507 
3508 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3509 {
3510     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3511     VirtIONet *n = VIRTIO_NET(dev);
3512     NetClientState *nc;
3513     int i;
3514 
3515     if (n->net_conf.mtu) {
3516         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3517     }
3518 
3519     if (n->net_conf.duplex_str) {
3520         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3521             n->net_conf.duplex = DUPLEX_HALF;
3522         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3523             n->net_conf.duplex = DUPLEX_FULL;
3524         } else {
3525             error_setg(errp, "'duplex' must be 'half' or 'full'");
3526             return;
3527         }
3528         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3529     } else {
3530         n->net_conf.duplex = DUPLEX_UNKNOWN;
3531     }
3532 
3533     if (n->net_conf.speed < SPEED_UNKNOWN) {
3534         error_setg(errp, "'speed' must be between 0 and INT_MAX");
3535         return;
3536     }
3537     if (n->net_conf.speed >= 0) {
3538         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3539     }
3540 
3541     if (n->failover) {
3542         n->primary_listener.hide_device = failover_hide_primary_device;
3543         qatomic_set(&n->failover_primary_hidden, true);
3544         device_listener_register(&n->primary_listener);
3545         n->migration_state.notify = virtio_net_migration_state_notifier;
3546         add_migration_state_change_notifier(&n->migration_state);
3547         n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3548     }
3549 
3550     virtio_net_set_config_size(n, n->host_features);
3551     virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
3552 
3553     /*
3554      * We set a lower limit on RX queue size to what it always was.
3555      * Guests that want a smaller ring can always resize it without
3556      * help from us (using virtio 1 and up).
3557      */
3558     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3559         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3560         !is_power_of_2(n->net_conf.rx_queue_size)) {
3561         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3562                    "must be a power of 2 between %d and %d.",
3563                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3564                    VIRTQUEUE_MAX_SIZE);
3565         virtio_cleanup(vdev);
3566         return;
3567     }
3568 
3569     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3570         n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3571         !is_power_of_2(n->net_conf.tx_queue_size)) {
3572         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3573                    "must be a power of 2 between %d and %d",
3574                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3575                    VIRTQUEUE_MAX_SIZE);
3576         virtio_cleanup(vdev);
3577         return;
3578     }
3579 
3580     n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3581 
3582     /*
3583      * Figure out the datapath queue pairs since the backend could
3584      * provide control queue via peers as well.
3585      */
3586     if (n->nic_conf.peers.queues) {
3587         for (i = 0; i < n->max_ncs; i++) {
3588             if (n->nic_conf.peers.ncs[i]->is_datapath) {
3589                 ++n->max_queue_pairs;
3590             }
3591         }
3592     }
3593     n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3594 
3595     if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3596         error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3597                    "must be a positive integer less than %d.",
3598                    n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3599         virtio_cleanup(vdev);
3600         return;
3601     }
3602     n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3603     n->curr_queue_pairs = 1;
3604     n->tx_timeout = n->net_conf.txtimer;
3605 
3606     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3607                        && strcmp(n->net_conf.tx, "bh")) {
3608         warn_report("virtio-net: "
3609                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3610                     n->net_conf.tx);
3611         error_printf("Defaulting to \"bh\"");
3612     }
3613 
3614     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3615                                     n->net_conf.tx_queue_size);
3616 
3617     for (i = 0; i < n->max_queue_pairs; i++) {
3618         virtio_net_add_queue(n, i);
3619     }
3620 
3621     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3622     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3623     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3624     n->status = VIRTIO_NET_S_LINK_UP;
3625     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3626                               QEMU_CLOCK_VIRTUAL,
3627                               virtio_net_announce_timer, n);
3628     n->announce_timer.round = 0;
3629 
3630     if (n->netclient_type) {
3631         /*
3632          * Happen when virtio_net_set_netclient_name has been called.
3633          */
3634         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3635                               n->netclient_type, n->netclient_name, n);
3636     } else {
3637         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3638                               object_get_typename(OBJECT(dev)), dev->id, n);
3639     }
3640 
3641     for (i = 0; i < n->max_queue_pairs; i++) {
3642         n->nic->ncs[i].do_not_pad = true;
3643     }
3644 
3645     peer_test_vnet_hdr(n);
3646     if (peer_has_vnet_hdr(n)) {
3647         for (i = 0; i < n->max_queue_pairs; i++) {
3648             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3649         }
3650         n->host_hdr_len = sizeof(struct virtio_net_hdr);
3651     } else {
3652         n->host_hdr_len = 0;
3653     }
3654 
3655     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3656 
3657     n->vqs[0].tx_waiting = 0;
3658     n->tx_burst = n->net_conf.txburst;
3659     virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3660     n->promisc = 1; /* for compatibility */
3661 
3662     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3663 
3664     n->vlans = g_malloc0(MAX_VLAN >> 3);
3665 
3666     nc = qemu_get_queue(n->nic);
3667     nc->rxfilter_notify_enabled = 1;
3668 
3669    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3670         struct virtio_net_config netcfg = {};
3671         memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3672         vhost_net_set_config(get_vhost_net(nc->peer),
3673             (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
3674     }
3675     QTAILQ_INIT(&n->rsc_chains);
3676     n->qdev = dev;
3677 
3678     net_rx_pkt_init(&n->rx_pkt, false);
3679 
3680     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3681         virtio_net_load_ebpf(n);
3682     }
3683 }
3684 
3685 static void virtio_net_device_unrealize(DeviceState *dev)
3686 {
3687     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3688     VirtIONet *n = VIRTIO_NET(dev);
3689     int i, max_queue_pairs;
3690 
3691     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3692         virtio_net_unload_ebpf(n);
3693     }
3694 
3695     /* This will stop vhost backend if appropriate. */
3696     virtio_net_set_status(vdev, 0);
3697 
3698     g_free(n->netclient_name);
3699     n->netclient_name = NULL;
3700     g_free(n->netclient_type);
3701     n->netclient_type = NULL;
3702 
3703     g_free(n->mac_table.macs);
3704     g_free(n->vlans);
3705 
3706     if (n->failover) {
3707         qobject_unref(n->primary_opts);
3708         device_listener_unregister(&n->primary_listener);
3709         remove_migration_state_change_notifier(&n->migration_state);
3710     } else {
3711         assert(n->primary_opts == NULL);
3712     }
3713 
3714     max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3715     for (i = 0; i < max_queue_pairs; i++) {
3716         virtio_net_del_queue(n, i);
3717     }
3718     /* delete also control vq */
3719     virtio_del_queue(vdev, max_queue_pairs * 2);
3720     qemu_announce_timer_del(&n->announce_timer, false);
3721     g_free(n->vqs);
3722     qemu_del_nic(n->nic);
3723     virtio_net_rsc_cleanup(n);
3724     g_free(n->rss_data.indirections_table);
3725     net_rx_pkt_uninit(n->rx_pkt);
3726     virtio_cleanup(vdev);
3727 }
3728 
3729 static void virtio_net_instance_init(Object *obj)
3730 {
3731     VirtIONet *n = VIRTIO_NET(obj);
3732 
3733     /*
3734      * The default config_size is sizeof(struct virtio_net_config).
3735      * Can be overriden with virtio_net_set_config_size.
3736      */
3737     n->config_size = sizeof(struct virtio_net_config);
3738     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3739                                   "bootindex", "/ethernet-phy@0",
3740                                   DEVICE(n));
3741 
3742     ebpf_rss_init(&n->ebpf_rss);
3743 }
3744 
3745 static int virtio_net_pre_save(void *opaque)
3746 {
3747     VirtIONet *n = opaque;
3748 
3749     /* At this point, backend must be stopped, otherwise
3750      * it might keep writing to memory. */
3751     assert(!n->vhost_started);
3752 
3753     return 0;
3754 }
3755 
3756 static bool primary_unplug_pending(void *opaque)
3757 {
3758     DeviceState *dev = opaque;
3759     DeviceState *primary;
3760     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3761     VirtIONet *n = VIRTIO_NET(vdev);
3762 
3763     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3764         return false;
3765     }
3766     primary = failover_find_primary_device(n);
3767     return primary ? primary->pending_deleted_event : false;
3768 }
3769 
3770 static bool dev_unplug_pending(void *opaque)
3771 {
3772     DeviceState *dev = opaque;
3773     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3774 
3775     return vdc->primary_unplug_pending(dev);
3776 }
3777 
3778 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3779 {
3780     VirtIONet *n = VIRTIO_NET(vdev);
3781     NetClientState *nc = qemu_get_queue(n->nic);
3782     struct vhost_net *net = get_vhost_net(nc->peer);
3783     return &net->dev;
3784 }
3785 
3786 static const VMStateDescription vmstate_virtio_net = {
3787     .name = "virtio-net",
3788     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3789     .version_id = VIRTIO_NET_VM_VERSION,
3790     .fields = (VMStateField[]) {
3791         VMSTATE_VIRTIO_DEVICE,
3792         VMSTATE_END_OF_LIST()
3793     },
3794     .pre_save = virtio_net_pre_save,
3795     .dev_unplug_pending = dev_unplug_pending,
3796 };
3797 
3798 static Property virtio_net_properties[] = {
3799     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3800                     VIRTIO_NET_F_CSUM, true),
3801     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3802                     VIRTIO_NET_F_GUEST_CSUM, true),
3803     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3804     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3805                     VIRTIO_NET_F_GUEST_TSO4, true),
3806     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3807                     VIRTIO_NET_F_GUEST_TSO6, true),
3808     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3809                     VIRTIO_NET_F_GUEST_ECN, true),
3810     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3811                     VIRTIO_NET_F_GUEST_UFO, true),
3812     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3813                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3814     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3815                     VIRTIO_NET_F_HOST_TSO4, true),
3816     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3817                     VIRTIO_NET_F_HOST_TSO6, true),
3818     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3819                     VIRTIO_NET_F_HOST_ECN, true),
3820     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3821                     VIRTIO_NET_F_HOST_UFO, true),
3822     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3823                     VIRTIO_NET_F_MRG_RXBUF, true),
3824     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3825                     VIRTIO_NET_F_STATUS, true),
3826     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3827                     VIRTIO_NET_F_CTRL_VQ, true),
3828     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3829                     VIRTIO_NET_F_CTRL_RX, true),
3830     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3831                     VIRTIO_NET_F_CTRL_VLAN, true),
3832     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3833                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3834     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3835                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3836     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3837                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3838     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3839     DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3840                     VIRTIO_NET_F_RSS, false),
3841     DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3842                     VIRTIO_NET_F_HASH_REPORT, false),
3843     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3844                     VIRTIO_NET_F_RSC_EXT, false),
3845     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3846                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3847     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3848     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3849                        TX_TIMER_INTERVAL),
3850     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3851     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3852     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3853                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3854     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3855                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3856     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3857     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3858                      true),
3859     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3860     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3861     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3862     DEFINE_PROP_END_OF_LIST(),
3863 };
3864 
3865 static void virtio_net_class_init(ObjectClass *klass, void *data)
3866 {
3867     DeviceClass *dc = DEVICE_CLASS(klass);
3868     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3869 
3870     device_class_set_props(dc, virtio_net_properties);
3871     dc->vmsd = &vmstate_virtio_net;
3872     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3873     vdc->realize = virtio_net_device_realize;
3874     vdc->unrealize = virtio_net_device_unrealize;
3875     vdc->get_config = virtio_net_get_config;
3876     vdc->set_config = virtio_net_set_config;
3877     vdc->get_features = virtio_net_get_features;
3878     vdc->set_features = virtio_net_set_features;
3879     vdc->bad_features = virtio_net_bad_features;
3880     vdc->reset = virtio_net_reset;
3881     vdc->queue_reset = virtio_net_queue_reset;
3882     vdc->queue_enable = virtio_net_queue_enable;
3883     vdc->set_status = virtio_net_set_status;
3884     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3885     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3886     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3887     vdc->post_load = virtio_net_post_load_virtio;
3888     vdc->vmsd = &vmstate_virtio_net_device;
3889     vdc->primary_unplug_pending = primary_unplug_pending;
3890     vdc->get_vhost = virtio_net_get_vhost;
3891 }
3892 
3893 static const TypeInfo virtio_net_info = {
3894     .name = TYPE_VIRTIO_NET,
3895     .parent = TYPE_VIRTIO_DEVICE,
3896     .instance_size = sizeof(VirtIONet),
3897     .instance_init = virtio_net_instance_init,
3898     .class_init = virtio_net_class_init,
3899 };
3900 
3901 static void virtio_register_types(void)
3902 {
3903     type_register_static(&virtio_net_info);
3904 }
3905 
3906 type_init(virtio_register_types)
3907