xref: /openbmc/qemu/hw/net/virtio-net.c (revision e69b2c67)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/log.h"
18 #include "qemu/main-loop.h"
19 #include "qemu/module.h"
20 #include "hw/virtio/virtio.h"
21 #include "net/net.h"
22 #include "net/checksum.h"
23 #include "net/tap.h"
24 #include "qemu/error-report.h"
25 #include "qemu/timer.h"
26 #include "qemu/option.h"
27 #include "qemu/option_int.h"
28 #include "qemu/config-file.h"
29 #include "qapi/qmp/qdict.h"
30 #include "hw/virtio/virtio-net.h"
31 #include "net/vhost_net.h"
32 #include "net/announce.h"
33 #include "hw/virtio/virtio-bus.h"
34 #include "qapi/error.h"
35 #include "qapi/qapi-events-net.h"
36 #include "hw/qdev-properties.h"
37 #include "qapi/qapi-types-migration.h"
38 #include "qapi/qapi-events-migration.h"
39 #include "hw/virtio/virtio-access.h"
40 #include "migration/misc.h"
41 #include "standard-headers/linux/ethtool.h"
42 #include "sysemu/sysemu.h"
43 #include "sysemu/replay.h"
44 #include "trace.h"
45 #include "monitor/qdev.h"
46 #include "monitor/monitor.h"
47 #include "hw/pci/pci_device.h"
48 #include "net_rx_pkt.h"
49 #include "hw/virtio/vhost.h"
50 #include "sysemu/qtest.h"
51 
52 #define VIRTIO_NET_VM_VERSION    11
53 
54 /* previously fixed value */
55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
57 
58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
61 
62 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
63 
64 #define VIRTIO_NET_TCP_FLAG         0x3F
65 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
66 
67 /* IPv4 max payload, 16 bits in the header */
68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
70 
71 /* header length value in ip header without option */
72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
73 
74 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
76 
77 /* Purge coalesced packets timer interval, This value affects the performance
78    a lot, and should be tuned carefully, '300000'(300us) is the recommended
79    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
80    tso/gso/gro 'off'. */
81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
82 
83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
84                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
85                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
86                                          VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
87                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
88                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
89                                          VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
90                                          VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
91                                          VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
92 
93 static const VirtIOFeature feature_sizes[] = {
94     {.flags = 1ULL << VIRTIO_NET_F_MAC,
95      .end = endof(struct virtio_net_config, mac)},
96     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
97      .end = endof(struct virtio_net_config, status)},
98     {.flags = 1ULL << VIRTIO_NET_F_MQ,
99      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
100     {.flags = 1ULL << VIRTIO_NET_F_MTU,
101      .end = endof(struct virtio_net_config, mtu)},
102     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
103      .end = endof(struct virtio_net_config, duplex)},
104     {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
105      .end = endof(struct virtio_net_config, supported_hash_types)},
106     {}
107 };
108 
109 static const VirtIOConfigSizeParams cfg_size_params = {
110     .min_size = endof(struct virtio_net_config, mac),
111     .max_size = sizeof(struct virtio_net_config),
112     .feature_sizes = feature_sizes
113 };
114 
115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
116 {
117     VirtIONet *n = qemu_get_nic_opaque(nc);
118 
119     return &n->vqs[nc->queue_index];
120 }
121 
122 static int vq2q(int queue_index)
123 {
124     return queue_index / 2;
125 }
126 
127 static void flush_or_purge_queued_packets(NetClientState *nc)
128 {
129     if (!nc->peer) {
130         return;
131     }
132 
133     qemu_flush_or_purge_queued_packets(nc->peer, true);
134     assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
135 }
136 
137 /* TODO
138  * - we could suppress RX interrupt if we were so inclined.
139  */
140 
141 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
142 {
143     VirtIONet *n = VIRTIO_NET(vdev);
144     struct virtio_net_config netcfg;
145     NetClientState *nc = qemu_get_queue(n->nic);
146     static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
147 
148     int ret = 0;
149     memset(&netcfg, 0 , sizeof(struct virtio_net_config));
150     virtio_stw_p(vdev, &netcfg.status, n->status);
151     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
152     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
153     memcpy(netcfg.mac, n->mac, ETH_ALEN);
154     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
155     netcfg.duplex = n->net_conf.duplex;
156     netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
157     virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
158                  virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
159                  VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
160     virtio_stl_p(vdev, &netcfg.supported_hash_types,
161                  VIRTIO_NET_RSS_SUPPORTED_HASHES);
162     memcpy(config, &netcfg, n->config_size);
163 
164     /*
165      * Is this VDPA? No peer means not VDPA: there's no way to
166      * disconnect/reconnect a VDPA peer.
167      */
168     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
169         ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
170                                    n->config_size);
171         if (ret == -1) {
172             return;
173         }
174 
175         /*
176          * Some NIC/kernel combinations present 0 as the mac address.  As that
177          * is not a legal address, try to proceed with the address from the
178          * QEMU command line in the hope that the address has been configured
179          * correctly elsewhere - just not reported by the device.
180          */
181         if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
182             info_report("Zero hardware mac address detected. Ignoring.");
183             memcpy(netcfg.mac, n->mac, ETH_ALEN);
184         }
185 
186         netcfg.status |= virtio_tswap16(vdev,
187                                         n->status & VIRTIO_NET_S_ANNOUNCE);
188         memcpy(config, &netcfg, n->config_size);
189     }
190 }
191 
192 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
193 {
194     VirtIONet *n = VIRTIO_NET(vdev);
195     struct virtio_net_config netcfg = {};
196     NetClientState *nc = qemu_get_queue(n->nic);
197 
198     memcpy(&netcfg, config, n->config_size);
199 
200     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
201         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
202         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
203         memcpy(n->mac, netcfg.mac, ETH_ALEN);
204         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
205     }
206 
207     /*
208      * Is this VDPA? No peer means not VDPA: there's no way to
209      * disconnect/reconnect a VDPA peer.
210      */
211     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
212         vhost_net_set_config(get_vhost_net(nc->peer),
213                              (uint8_t *)&netcfg, 0, n->config_size,
214                              VHOST_SET_CONFIG_TYPE_FRONTEND);
215       }
216 }
217 
218 static bool virtio_net_started(VirtIONet *n, uint8_t status)
219 {
220     VirtIODevice *vdev = VIRTIO_DEVICE(n);
221     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
222         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
223 }
224 
225 static void virtio_net_announce_notify(VirtIONet *net)
226 {
227     VirtIODevice *vdev = VIRTIO_DEVICE(net);
228     trace_virtio_net_announce_notify();
229 
230     net->status |= VIRTIO_NET_S_ANNOUNCE;
231     virtio_notify_config(vdev);
232 }
233 
234 static void virtio_net_announce_timer(void *opaque)
235 {
236     VirtIONet *n = opaque;
237     trace_virtio_net_announce_timer(n->announce_timer.round);
238 
239     n->announce_timer.round--;
240     virtio_net_announce_notify(n);
241 }
242 
243 static void virtio_net_announce(NetClientState *nc)
244 {
245     VirtIONet *n = qemu_get_nic_opaque(nc);
246     VirtIODevice *vdev = VIRTIO_DEVICE(n);
247 
248     /*
249      * Make sure the virtio migration announcement timer isn't running
250      * If it is, let it trigger announcement so that we do not cause
251      * confusion.
252      */
253     if (n->announce_timer.round) {
254         return;
255     }
256 
257     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
258         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
259             virtio_net_announce_notify(n);
260     }
261 }
262 
263 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
264 {
265     VirtIODevice *vdev = VIRTIO_DEVICE(n);
266     NetClientState *nc = qemu_get_queue(n->nic);
267     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
268     int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
269               n->max_ncs - n->max_queue_pairs : 0;
270 
271     if (!get_vhost_net(nc->peer)) {
272         return;
273     }
274 
275     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
276         !!n->vhost_started) {
277         return;
278     }
279     if (!n->vhost_started) {
280         int r, i;
281 
282         if (n->needs_vnet_hdr_swap) {
283             error_report("backend does not support %s vnet headers; "
284                          "falling back on userspace virtio",
285                          virtio_is_big_endian(vdev) ? "BE" : "LE");
286             return;
287         }
288 
289         /* Any packets outstanding? Purge them to avoid touching rings
290          * when vhost is running.
291          */
292         for (i = 0;  i < queue_pairs; i++) {
293             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
294 
295             /* Purge both directions: TX and RX. */
296             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
297             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
298         }
299 
300         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
301             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
302             if (r < 0) {
303                 error_report("%uBytes MTU not supported by the backend",
304                              n->net_conf.mtu);
305 
306                 return;
307             }
308         }
309 
310         n->vhost_started = 1;
311         r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
312         if (r < 0) {
313             error_report("unable to start vhost net: %d: "
314                          "falling back on userspace virtio", -r);
315             n->vhost_started = 0;
316         }
317     } else {
318         vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
319         n->vhost_started = 0;
320     }
321 }
322 
323 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
324                                           NetClientState *peer,
325                                           bool enable)
326 {
327     if (virtio_is_big_endian(vdev)) {
328         return qemu_set_vnet_be(peer, enable);
329     } else {
330         return qemu_set_vnet_le(peer, enable);
331     }
332 }
333 
334 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
335                                        int queue_pairs, bool enable)
336 {
337     int i;
338 
339     for (i = 0; i < queue_pairs; i++) {
340         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
341             enable) {
342             while (--i >= 0) {
343                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
344             }
345 
346             return true;
347         }
348     }
349 
350     return false;
351 }
352 
353 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
354 {
355     VirtIODevice *vdev = VIRTIO_DEVICE(n);
356     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
357 
358     if (virtio_net_started(n, status)) {
359         /* Before using the device, we tell the network backend about the
360          * endianness to use when parsing vnet headers. If the backend
361          * can't do it, we fallback onto fixing the headers in the core
362          * virtio-net code.
363          */
364         n->needs_vnet_hdr_swap = n->has_vnet_hdr &&
365                                  virtio_net_set_vnet_endian(vdev, n->nic->ncs,
366                                                             queue_pairs, true);
367     } else if (virtio_net_started(n, vdev->status)) {
368         /* After using the device, we need to reset the network backend to
369          * the default (guest native endianness), otherwise the guest may
370          * lose network connectivity if it is rebooted into a different
371          * endianness.
372          */
373         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
374     }
375 }
376 
377 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
378 {
379     unsigned int dropped = virtqueue_drop_all(vq);
380     if (dropped) {
381         virtio_notify(vdev, vq);
382     }
383 }
384 
385 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
386 {
387     VirtIONet *n = VIRTIO_NET(vdev);
388     VirtIONetQueue *q;
389     int i;
390     uint8_t queue_status;
391 
392     virtio_net_vnet_endian_status(n, status);
393     virtio_net_vhost_status(n, status);
394 
395     for (i = 0; i < n->max_queue_pairs; i++) {
396         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
397         bool queue_started;
398         q = &n->vqs[i];
399 
400         if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
401             queue_status = 0;
402         } else {
403             queue_status = status;
404         }
405         queue_started =
406             virtio_net_started(n, queue_status) && !n->vhost_started;
407 
408         if (queue_started) {
409             qemu_flush_queued_packets(ncs);
410         }
411 
412         if (!q->tx_waiting) {
413             continue;
414         }
415 
416         if (queue_started) {
417             if (q->tx_timer) {
418                 timer_mod(q->tx_timer,
419                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
420             } else {
421                 replay_bh_schedule_event(q->tx_bh);
422             }
423         } else {
424             if (q->tx_timer) {
425                 timer_del(q->tx_timer);
426             } else {
427                 qemu_bh_cancel(q->tx_bh);
428             }
429             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
430                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
431                 vdev->vm_running) {
432                 /* if tx is waiting we are likely have some packets in tx queue
433                  * and disabled notification */
434                 q->tx_waiting = 0;
435                 virtio_queue_set_notification(q->tx_vq, 1);
436                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
437             }
438         }
439     }
440 }
441 
442 static void virtio_net_set_link_status(NetClientState *nc)
443 {
444     VirtIONet *n = qemu_get_nic_opaque(nc);
445     VirtIODevice *vdev = VIRTIO_DEVICE(n);
446     uint16_t old_status = n->status;
447 
448     if (nc->link_down)
449         n->status &= ~VIRTIO_NET_S_LINK_UP;
450     else
451         n->status |= VIRTIO_NET_S_LINK_UP;
452 
453     if (n->status != old_status)
454         virtio_notify_config(vdev);
455 
456     virtio_net_set_status(vdev, vdev->status);
457 }
458 
459 static void rxfilter_notify(NetClientState *nc)
460 {
461     VirtIONet *n = qemu_get_nic_opaque(nc);
462 
463     if (nc->rxfilter_notify_enabled) {
464         char *path = object_get_canonical_path(OBJECT(n->qdev));
465         qapi_event_send_nic_rx_filter_changed(n->netclient_name, path);
466         g_free(path);
467 
468         /* disable event notification to avoid events flooding */
469         nc->rxfilter_notify_enabled = 0;
470     }
471 }
472 
473 static intList *get_vlan_table(VirtIONet *n)
474 {
475     intList *list;
476     int i, j;
477 
478     list = NULL;
479     for (i = 0; i < MAX_VLAN >> 5; i++) {
480         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
481             if (n->vlans[i] & (1U << j)) {
482                 QAPI_LIST_PREPEND(list, (i << 5) + j);
483             }
484         }
485     }
486 
487     return list;
488 }
489 
490 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
491 {
492     VirtIONet *n = qemu_get_nic_opaque(nc);
493     VirtIODevice *vdev = VIRTIO_DEVICE(n);
494     RxFilterInfo *info;
495     strList *str_list;
496     int i;
497 
498     info = g_malloc0(sizeof(*info));
499     info->name = g_strdup(nc->name);
500     info->promiscuous = n->promisc;
501 
502     if (n->nouni) {
503         info->unicast = RX_STATE_NONE;
504     } else if (n->alluni) {
505         info->unicast = RX_STATE_ALL;
506     } else {
507         info->unicast = RX_STATE_NORMAL;
508     }
509 
510     if (n->nomulti) {
511         info->multicast = RX_STATE_NONE;
512     } else if (n->allmulti) {
513         info->multicast = RX_STATE_ALL;
514     } else {
515         info->multicast = RX_STATE_NORMAL;
516     }
517 
518     info->broadcast_allowed = n->nobcast;
519     info->multicast_overflow = n->mac_table.multi_overflow;
520     info->unicast_overflow = n->mac_table.uni_overflow;
521 
522     info->main_mac = qemu_mac_strdup_printf(n->mac);
523 
524     str_list = NULL;
525     for (i = 0; i < n->mac_table.first_multi; i++) {
526         QAPI_LIST_PREPEND(str_list,
527                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
528     }
529     info->unicast_table = str_list;
530 
531     str_list = NULL;
532     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
533         QAPI_LIST_PREPEND(str_list,
534                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
535     }
536     info->multicast_table = str_list;
537     info->vlan_table = get_vlan_table(n);
538 
539     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
540         info->vlan = RX_STATE_ALL;
541     } else if (!info->vlan_table) {
542         info->vlan = RX_STATE_NONE;
543     } else {
544         info->vlan = RX_STATE_NORMAL;
545     }
546 
547     /* enable event notification after query */
548     nc->rxfilter_notify_enabled = 1;
549 
550     return info;
551 }
552 
553 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
554 {
555     VirtIONet *n = VIRTIO_NET(vdev);
556     NetClientState *nc;
557 
558     /* validate queue_index and skip for cvq */
559     if (queue_index >= n->max_queue_pairs * 2) {
560         return;
561     }
562 
563     nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
564 
565     if (!nc->peer) {
566         return;
567     }
568 
569     if (get_vhost_net(nc->peer) &&
570         nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
571         vhost_net_virtqueue_reset(vdev, nc, queue_index);
572     }
573 
574     flush_or_purge_queued_packets(nc);
575 }
576 
577 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
578 {
579     VirtIONet *n = VIRTIO_NET(vdev);
580     NetClientState *nc;
581     int r;
582 
583     /* validate queue_index and skip for cvq */
584     if (queue_index >= n->max_queue_pairs * 2) {
585         return;
586     }
587 
588     nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
589 
590     if (!nc->peer || !vdev->vhost_started) {
591         return;
592     }
593 
594     if (get_vhost_net(nc->peer) &&
595         nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
596         r = vhost_net_virtqueue_restart(vdev, nc, queue_index);
597         if (r < 0) {
598             error_report("unable to restart vhost net virtqueue: %d, "
599                             "when resetting the queue", queue_index);
600         }
601     }
602 }
603 
604 static void peer_test_vnet_hdr(VirtIONet *n)
605 {
606     NetClientState *nc = qemu_get_queue(n->nic);
607     if (!nc->peer) {
608         return;
609     }
610 
611     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
612 }
613 
614 static int peer_has_vnet_hdr(VirtIONet *n)
615 {
616     return n->has_vnet_hdr;
617 }
618 
619 static int peer_has_ufo(VirtIONet *n)
620 {
621     if (!peer_has_vnet_hdr(n))
622         return 0;
623 
624     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
625 
626     return n->has_ufo;
627 }
628 
629 static int peer_has_uso(VirtIONet *n)
630 {
631     if (!peer_has_vnet_hdr(n)) {
632         return 0;
633     }
634 
635     return qemu_has_uso(qemu_get_queue(n->nic)->peer);
636 }
637 
638 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
639                                        int version_1, int hash_report)
640 {
641     int i;
642     NetClientState *nc;
643 
644     n->mergeable_rx_bufs = mergeable_rx_bufs;
645 
646     if (version_1) {
647         n->guest_hdr_len = hash_report ?
648             sizeof(struct virtio_net_hdr_v1_hash) :
649             sizeof(struct virtio_net_hdr_mrg_rxbuf);
650         n->rss_data.populate_hash = !!hash_report;
651     } else {
652         n->guest_hdr_len = n->mergeable_rx_bufs ?
653             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
654             sizeof(struct virtio_net_hdr);
655         n->rss_data.populate_hash = false;
656     }
657 
658     for (i = 0; i < n->max_queue_pairs; i++) {
659         nc = qemu_get_subqueue(n->nic, i);
660 
661         if (peer_has_vnet_hdr(n) &&
662             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
663             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
664             n->host_hdr_len = n->guest_hdr_len;
665         }
666     }
667 }
668 
669 static int virtio_net_max_tx_queue_size(VirtIONet *n)
670 {
671     NetClientState *peer = n->nic_conf.peers.ncs[0];
672 
673     /*
674      * Backends other than vhost-user or vhost-vdpa don't support max queue
675      * size.
676      */
677     if (!peer) {
678         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
679     }
680 
681     switch(peer->info->type) {
682     case NET_CLIENT_DRIVER_VHOST_USER:
683     case NET_CLIENT_DRIVER_VHOST_VDPA:
684         return VIRTQUEUE_MAX_SIZE;
685     default:
686         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
687     };
688 }
689 
690 static int peer_attach(VirtIONet *n, int index)
691 {
692     NetClientState *nc = qemu_get_subqueue(n->nic, index);
693 
694     if (!nc->peer) {
695         return 0;
696     }
697 
698     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
699         vhost_set_vring_enable(nc->peer, 1);
700     }
701 
702     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
703         return 0;
704     }
705 
706     if (n->max_queue_pairs == 1) {
707         return 0;
708     }
709 
710     return tap_enable(nc->peer);
711 }
712 
713 static int peer_detach(VirtIONet *n, int index)
714 {
715     NetClientState *nc = qemu_get_subqueue(n->nic, index);
716 
717     if (!nc->peer) {
718         return 0;
719     }
720 
721     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
722         vhost_set_vring_enable(nc->peer, 0);
723     }
724 
725     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
726         return 0;
727     }
728 
729     return tap_disable(nc->peer);
730 }
731 
732 static void virtio_net_set_queue_pairs(VirtIONet *n)
733 {
734     int i;
735     int r;
736 
737     if (n->nic->peer_deleted) {
738         return;
739     }
740 
741     for (i = 0; i < n->max_queue_pairs; i++) {
742         if (i < n->curr_queue_pairs) {
743             r = peer_attach(n, i);
744             assert(!r);
745         } else {
746             r = peer_detach(n, i);
747             assert(!r);
748         }
749     }
750 }
751 
752 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
753 
754 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
755                                         Error **errp)
756 {
757     VirtIONet *n = VIRTIO_NET(vdev);
758     NetClientState *nc = qemu_get_queue(n->nic);
759 
760     /* Firstly sync all virtio-net possible supported features */
761     features |= n->host_features;
762 
763     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
764 
765     if (!peer_has_vnet_hdr(n)) {
766         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
767         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
768         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
769         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
770 
771         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
772         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
773         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
774         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
775 
776         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
777         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
778         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
779 
780         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
781     }
782 
783     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
784         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
785         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
786     }
787 
788     if (!peer_has_uso(n)) {
789         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
790         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
791         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
792     }
793 
794     if (!get_vhost_net(nc->peer)) {
795         return features;
796     }
797 
798     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
799         virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
800     }
801     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
802     vdev->backend_features = features;
803 
804     if (n->mtu_bypass_backend &&
805             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
806         features |= (1ULL << VIRTIO_NET_F_MTU);
807     }
808 
809     /*
810      * Since GUEST_ANNOUNCE is emulated the feature bit could be set without
811      * enabled. This happens in the vDPA case.
812      *
813      * Make sure the feature set is not incoherent, as the driver could refuse
814      * to start.
815      *
816      * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes,
817      * helping guest to notify the new location with vDPA devices that does not
818      * support it.
819      */
820     if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) {
821         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE);
822     }
823 
824     return features;
825 }
826 
827 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
828 {
829     uint64_t features = 0;
830 
831     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
832      * but also these: */
833     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
834     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
835     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
836     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
837     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
838 
839     return features;
840 }
841 
842 static void virtio_net_apply_guest_offloads(VirtIONet *n)
843 {
844     qemu_set_offload(qemu_get_queue(n->nic)->peer,
845             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
846             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
847             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
848             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
849             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
850             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
851             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)));
852 }
853 
854 static uint64_t virtio_net_guest_offloads_by_features(uint64_t features)
855 {
856     static const uint64_t guest_offloads_mask =
857         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
858         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
859         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
860         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
861         (1ULL << VIRTIO_NET_F_GUEST_UFO)  |
862         (1ULL << VIRTIO_NET_F_GUEST_USO4) |
863         (1ULL << VIRTIO_NET_F_GUEST_USO6);
864 
865     return guest_offloads_mask & features;
866 }
867 
868 uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n)
869 {
870     VirtIODevice *vdev = VIRTIO_DEVICE(n);
871     return virtio_net_guest_offloads_by_features(vdev->guest_features);
872 }
873 
874 typedef struct {
875     VirtIONet *n;
876     DeviceState *dev;
877 } FailoverDevice;
878 
879 /**
880  * Set the failover primary device
881  *
882  * @opaque: FailoverId to setup
883  * @opts: opts for device we are handling
884  * @errp: returns an error if this function fails
885  */
886 static int failover_set_primary(DeviceState *dev, void *opaque)
887 {
888     FailoverDevice *fdev = opaque;
889     PCIDevice *pci_dev = (PCIDevice *)
890         object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
891 
892     if (!pci_dev) {
893         return 0;
894     }
895 
896     if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
897         fdev->dev = dev;
898         return 1;
899     }
900 
901     return 0;
902 }
903 
904 /**
905  * Find the primary device for this failover virtio-net
906  *
907  * @n: VirtIONet device
908  * @errp: returns an error if this function fails
909  */
910 static DeviceState *failover_find_primary_device(VirtIONet *n)
911 {
912     FailoverDevice fdev = {
913         .n = n,
914     };
915 
916     qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
917                        NULL, NULL, &fdev);
918     return fdev.dev;
919 }
920 
921 static void failover_add_primary(VirtIONet *n, Error **errp)
922 {
923     Error *err = NULL;
924     DeviceState *dev = failover_find_primary_device(n);
925 
926     if (dev) {
927         return;
928     }
929 
930     if (!n->primary_opts) {
931         error_setg(errp, "Primary device not found");
932         error_append_hint(errp, "Virtio-net failover will not work. Make "
933                           "sure primary device has parameter"
934                           " failover_pair_id=%s\n", n->netclient_name);
935         return;
936     }
937 
938     dev = qdev_device_add_from_qdict(n->primary_opts,
939                                      n->primary_opts_from_json,
940                                      &err);
941     if (err) {
942         qobject_unref(n->primary_opts);
943         n->primary_opts = NULL;
944     } else {
945         object_unref(OBJECT(dev));
946     }
947     error_propagate(errp, err);
948 }
949 
950 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
951 {
952     VirtIONet *n = VIRTIO_NET(vdev);
953     Error *err = NULL;
954     int i;
955 
956     if (n->mtu_bypass_backend &&
957             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
958         features &= ~(1ULL << VIRTIO_NET_F_MTU);
959     }
960 
961     virtio_net_set_multiqueue(n,
962                               virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
963                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
964 
965     virtio_net_set_mrg_rx_bufs(n,
966                                virtio_has_feature(features,
967                                                   VIRTIO_NET_F_MRG_RXBUF),
968                                virtio_has_feature(features,
969                                                   VIRTIO_F_VERSION_1),
970                                virtio_has_feature(features,
971                                                   VIRTIO_NET_F_HASH_REPORT));
972 
973     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
974         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
975     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
976         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
977     n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
978 
979     if (n->has_vnet_hdr) {
980         n->curr_guest_offloads =
981             virtio_net_guest_offloads_by_features(features);
982         virtio_net_apply_guest_offloads(n);
983     }
984 
985     for (i = 0;  i < n->max_queue_pairs; i++) {
986         NetClientState *nc = qemu_get_subqueue(n->nic, i);
987 
988         if (!get_vhost_net(nc->peer)) {
989             continue;
990         }
991         vhost_net_ack_features(get_vhost_net(nc->peer), features);
992 
993         /*
994          * keep acked_features in NetVhostUserState up-to-date so it
995          * can't miss any features configured by guest virtio driver.
996          */
997         vhost_net_save_acked_features(nc->peer);
998     }
999 
1000     if (!virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
1001         memset(n->vlans, 0xff, MAX_VLAN >> 3);
1002     }
1003 
1004     if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
1005         qapi_event_send_failover_negotiated(n->netclient_name);
1006         qatomic_set(&n->failover_primary_hidden, false);
1007         failover_add_primary(n, &err);
1008         if (err) {
1009             if (!qtest_enabled()) {
1010                 warn_report_err(err);
1011             } else {
1012                 error_free(err);
1013             }
1014         }
1015     }
1016 }
1017 
1018 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
1019                                      struct iovec *iov, unsigned int iov_cnt)
1020 {
1021     uint8_t on;
1022     size_t s;
1023     NetClientState *nc = qemu_get_queue(n->nic);
1024 
1025     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
1026     if (s != sizeof(on)) {
1027         return VIRTIO_NET_ERR;
1028     }
1029 
1030     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
1031         n->promisc = on;
1032     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
1033         n->allmulti = on;
1034     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
1035         n->alluni = on;
1036     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
1037         n->nomulti = on;
1038     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
1039         n->nouni = on;
1040     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
1041         n->nobcast = on;
1042     } else {
1043         return VIRTIO_NET_ERR;
1044     }
1045 
1046     rxfilter_notify(nc);
1047 
1048     return VIRTIO_NET_OK;
1049 }
1050 
1051 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
1052                                      struct iovec *iov, unsigned int iov_cnt)
1053 {
1054     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1055     uint64_t offloads;
1056     size_t s;
1057 
1058     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1059         return VIRTIO_NET_ERR;
1060     }
1061 
1062     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
1063     if (s != sizeof(offloads)) {
1064         return VIRTIO_NET_ERR;
1065     }
1066 
1067     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1068         uint64_t supported_offloads;
1069 
1070         offloads = virtio_ldq_p(vdev, &offloads);
1071 
1072         if (!n->has_vnet_hdr) {
1073             return VIRTIO_NET_ERR;
1074         }
1075 
1076         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1077             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1078         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1079             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1080         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1081 
1082         supported_offloads = virtio_net_supported_guest_offloads(n);
1083         if (offloads & ~supported_offloads) {
1084             return VIRTIO_NET_ERR;
1085         }
1086 
1087         n->curr_guest_offloads = offloads;
1088         virtio_net_apply_guest_offloads(n);
1089 
1090         return VIRTIO_NET_OK;
1091     } else {
1092         return VIRTIO_NET_ERR;
1093     }
1094 }
1095 
1096 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1097                                  struct iovec *iov, unsigned int iov_cnt)
1098 {
1099     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1100     struct virtio_net_ctrl_mac mac_data;
1101     size_t s;
1102     NetClientState *nc = qemu_get_queue(n->nic);
1103 
1104     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1105         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1106             return VIRTIO_NET_ERR;
1107         }
1108         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1109         assert(s == sizeof(n->mac));
1110         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1111         rxfilter_notify(nc);
1112 
1113         return VIRTIO_NET_OK;
1114     }
1115 
1116     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1117         return VIRTIO_NET_ERR;
1118     }
1119 
1120     int in_use = 0;
1121     int first_multi = 0;
1122     uint8_t uni_overflow = 0;
1123     uint8_t multi_overflow = 0;
1124     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1125 
1126     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1127                    sizeof(mac_data.entries));
1128     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1129     if (s != sizeof(mac_data.entries)) {
1130         goto error;
1131     }
1132     iov_discard_front(&iov, &iov_cnt, s);
1133 
1134     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1135         goto error;
1136     }
1137 
1138     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1139         s = iov_to_buf(iov, iov_cnt, 0, macs,
1140                        mac_data.entries * ETH_ALEN);
1141         if (s != mac_data.entries * ETH_ALEN) {
1142             goto error;
1143         }
1144         in_use += mac_data.entries;
1145     } else {
1146         uni_overflow = 1;
1147     }
1148 
1149     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1150 
1151     first_multi = in_use;
1152 
1153     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1154                    sizeof(mac_data.entries));
1155     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1156     if (s != sizeof(mac_data.entries)) {
1157         goto error;
1158     }
1159 
1160     iov_discard_front(&iov, &iov_cnt, s);
1161 
1162     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1163         goto error;
1164     }
1165 
1166     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1167         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1168                        mac_data.entries * ETH_ALEN);
1169         if (s != mac_data.entries * ETH_ALEN) {
1170             goto error;
1171         }
1172         in_use += mac_data.entries;
1173     } else {
1174         multi_overflow = 1;
1175     }
1176 
1177     n->mac_table.in_use = in_use;
1178     n->mac_table.first_multi = first_multi;
1179     n->mac_table.uni_overflow = uni_overflow;
1180     n->mac_table.multi_overflow = multi_overflow;
1181     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1182     g_free(macs);
1183     rxfilter_notify(nc);
1184 
1185     return VIRTIO_NET_OK;
1186 
1187 error:
1188     g_free(macs);
1189     return VIRTIO_NET_ERR;
1190 }
1191 
1192 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1193                                         struct iovec *iov, unsigned int iov_cnt)
1194 {
1195     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1196     uint16_t vid;
1197     size_t s;
1198     NetClientState *nc = qemu_get_queue(n->nic);
1199 
1200     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1201     vid = virtio_lduw_p(vdev, &vid);
1202     if (s != sizeof(vid)) {
1203         return VIRTIO_NET_ERR;
1204     }
1205 
1206     if (vid >= MAX_VLAN)
1207         return VIRTIO_NET_ERR;
1208 
1209     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1210         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1211     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1212         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1213     else
1214         return VIRTIO_NET_ERR;
1215 
1216     rxfilter_notify(nc);
1217 
1218     return VIRTIO_NET_OK;
1219 }
1220 
1221 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1222                                       struct iovec *iov, unsigned int iov_cnt)
1223 {
1224     trace_virtio_net_handle_announce(n->announce_timer.round);
1225     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1226         n->status & VIRTIO_NET_S_ANNOUNCE) {
1227         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1228         if (n->announce_timer.round) {
1229             qemu_announce_timer_step(&n->announce_timer);
1230         }
1231         return VIRTIO_NET_OK;
1232     } else {
1233         return VIRTIO_NET_ERR;
1234     }
1235 }
1236 
1237 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1238 {
1239     NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1240     if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1241         return false;
1242     }
1243 
1244     return nc->info->set_steering_ebpf(nc, prog_fd);
1245 }
1246 
1247 static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1248                                    struct EBPFRSSConfig *config)
1249 {
1250     config->redirect = data->redirect;
1251     config->populate_hash = data->populate_hash;
1252     config->hash_types = data->hash_types;
1253     config->indirections_len = data->indirections_len;
1254     config->default_queue = data->default_queue;
1255 }
1256 
1257 static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1258 {
1259     struct EBPFRSSConfig config = {};
1260 
1261     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1262         return false;
1263     }
1264 
1265     rss_data_to_rss_config(&n->rss_data, &config);
1266 
1267     if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1268                           n->rss_data.indirections_table, n->rss_data.key)) {
1269         return false;
1270     }
1271 
1272     if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1273         return false;
1274     }
1275 
1276     return true;
1277 }
1278 
1279 static void virtio_net_detach_epbf_rss(VirtIONet *n)
1280 {
1281     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1282 }
1283 
1284 static void virtio_net_commit_rss_config(VirtIONet *n)
1285 {
1286     if (n->rss_data.enabled) {
1287         n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
1288         if (n->rss_data.populate_hash) {
1289             virtio_net_detach_epbf_rss(n);
1290         } else if (!virtio_net_attach_epbf_rss(n)) {
1291             if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1292                 warn_report("Can't load eBPF RSS for vhost");
1293             } else {
1294                 warn_report("Can't load eBPF RSS - fallback to software RSS");
1295                 n->rss_data.enabled_software_rss = true;
1296             }
1297         }
1298 
1299         trace_virtio_net_rss_enable(n->rss_data.hash_types,
1300                                     n->rss_data.indirections_len,
1301                                     sizeof(n->rss_data.key));
1302     } else {
1303         virtio_net_detach_epbf_rss(n);
1304         trace_virtio_net_rss_disable();
1305     }
1306 }
1307 
1308 static void virtio_net_disable_rss(VirtIONet *n)
1309 {
1310     if (!n->rss_data.enabled) {
1311         return;
1312     }
1313 
1314     n->rss_data.enabled = false;
1315     virtio_net_commit_rss_config(n);
1316 }
1317 
1318 static bool virtio_net_load_ebpf_fds(VirtIONet *n)
1319 {
1320     int fds[EBPF_RSS_MAX_FDS] = { [0 ... EBPF_RSS_MAX_FDS - 1] = -1};
1321     int ret = true;
1322     int i = 0;
1323 
1324     if (n->nr_ebpf_rss_fds != EBPF_RSS_MAX_FDS) {
1325         warn_report("Expected %d file descriptors but got %d",
1326                     EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds);
1327        return false;
1328    }
1329 
1330     for (i = 0; i < n->nr_ebpf_rss_fds; i++) {
1331         fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i],
1332                                   &error_warn);
1333         if (fds[i] < 0) {
1334             ret = false;
1335             goto exit;
1336         }
1337     }
1338 
1339     ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3]);
1340 
1341 exit:
1342     if (!ret) {
1343         for (i = 0; i < n->nr_ebpf_rss_fds && fds[i] != -1; i++) {
1344             close(fds[i]);
1345         }
1346     }
1347 
1348     return ret;
1349 }
1350 
1351 static bool virtio_net_load_ebpf(VirtIONet *n)
1352 {
1353     bool ret = false;
1354 
1355     if (virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1356         if (!(n->ebpf_rss_fds && virtio_net_load_ebpf_fds(n))) {
1357             ret = ebpf_rss_load(&n->ebpf_rss);
1358         }
1359     }
1360 
1361     return ret;
1362 }
1363 
1364 static void virtio_net_unload_ebpf(VirtIONet *n)
1365 {
1366     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1367     ebpf_rss_unload(&n->ebpf_rss);
1368 }
1369 
1370 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1371                                       struct iovec *iov,
1372                                       unsigned int iov_cnt,
1373                                       bool do_rss)
1374 {
1375     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1376     struct virtio_net_rss_config cfg;
1377     size_t s, offset = 0, size_get;
1378     uint16_t queue_pairs, i;
1379     struct {
1380         uint16_t us;
1381         uint8_t b;
1382     } QEMU_PACKED temp;
1383     const char *err_msg = "";
1384     uint32_t err_value = 0;
1385 
1386     if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1387         err_msg = "RSS is not negotiated";
1388         goto error;
1389     }
1390     if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1391         err_msg = "Hash report is not negotiated";
1392         goto error;
1393     }
1394     size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1395     s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1396     if (s != size_get) {
1397         err_msg = "Short command buffer";
1398         err_value = (uint32_t)s;
1399         goto error;
1400     }
1401     n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1402     n->rss_data.indirections_len =
1403         virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1404     n->rss_data.indirections_len++;
1405     if (!do_rss) {
1406         n->rss_data.indirections_len = 1;
1407     }
1408     if (!is_power_of_2(n->rss_data.indirections_len)) {
1409         err_msg = "Invalid size of indirection table";
1410         err_value = n->rss_data.indirections_len;
1411         goto error;
1412     }
1413     if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1414         err_msg = "Too large indirection table";
1415         err_value = n->rss_data.indirections_len;
1416         goto error;
1417     }
1418     n->rss_data.default_queue = do_rss ?
1419         virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1420     if (n->rss_data.default_queue >= n->max_queue_pairs) {
1421         err_msg = "Invalid default queue";
1422         err_value = n->rss_data.default_queue;
1423         goto error;
1424     }
1425     offset += size_get;
1426     size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1427     g_free(n->rss_data.indirections_table);
1428     n->rss_data.indirections_table = g_malloc(size_get);
1429     if (!n->rss_data.indirections_table) {
1430         err_msg = "Can't allocate indirections table";
1431         err_value = n->rss_data.indirections_len;
1432         goto error;
1433     }
1434     s = iov_to_buf(iov, iov_cnt, offset,
1435                    n->rss_data.indirections_table, size_get);
1436     if (s != size_get) {
1437         err_msg = "Short indirection table buffer";
1438         err_value = (uint32_t)s;
1439         goto error;
1440     }
1441     for (i = 0; i < n->rss_data.indirections_len; ++i) {
1442         uint16_t val = n->rss_data.indirections_table[i];
1443         n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1444     }
1445     offset += size_get;
1446     size_get = sizeof(temp);
1447     s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1448     if (s != size_get) {
1449         err_msg = "Can't get queue_pairs";
1450         err_value = (uint32_t)s;
1451         goto error;
1452     }
1453     queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1454     if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1455         err_msg = "Invalid number of queue_pairs";
1456         err_value = queue_pairs;
1457         goto error;
1458     }
1459     if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1460         err_msg = "Invalid key size";
1461         err_value = temp.b;
1462         goto error;
1463     }
1464     if (!temp.b && n->rss_data.hash_types) {
1465         err_msg = "No key provided";
1466         err_value = 0;
1467         goto error;
1468     }
1469     if (!temp.b && !n->rss_data.hash_types) {
1470         virtio_net_disable_rss(n);
1471         return queue_pairs;
1472     }
1473     offset += size_get;
1474     size_get = temp.b;
1475     s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1476     if (s != size_get) {
1477         err_msg = "Can get key buffer";
1478         err_value = (uint32_t)s;
1479         goto error;
1480     }
1481     n->rss_data.enabled = true;
1482     virtio_net_commit_rss_config(n);
1483     return queue_pairs;
1484 error:
1485     trace_virtio_net_rss_error(err_msg, err_value);
1486     virtio_net_disable_rss(n);
1487     return 0;
1488 }
1489 
1490 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1491                                 struct iovec *iov, unsigned int iov_cnt)
1492 {
1493     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1494     uint16_t queue_pairs;
1495     NetClientState *nc = qemu_get_queue(n->nic);
1496 
1497     virtio_net_disable_rss(n);
1498     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1499         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1500         return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1501     }
1502     if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1503         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1504     } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1505         struct virtio_net_ctrl_mq mq;
1506         size_t s;
1507         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1508             return VIRTIO_NET_ERR;
1509         }
1510         s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1511         if (s != sizeof(mq)) {
1512             return VIRTIO_NET_ERR;
1513         }
1514         queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1515 
1516     } else {
1517         return VIRTIO_NET_ERR;
1518     }
1519 
1520     if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1521         queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1522         queue_pairs > n->max_queue_pairs ||
1523         !n->multiqueue) {
1524         return VIRTIO_NET_ERR;
1525     }
1526 
1527     n->curr_queue_pairs = queue_pairs;
1528     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
1529         /*
1530          * Avoid updating the backend for a vdpa device: We're only interested
1531          * in updating the device model queues.
1532          */
1533         return VIRTIO_NET_OK;
1534     }
1535     /* stop the backend before changing the number of queue_pairs to avoid handling a
1536      * disabled queue */
1537     virtio_net_set_status(vdev, vdev->status);
1538     virtio_net_set_queue_pairs(n);
1539 
1540     return VIRTIO_NET_OK;
1541 }
1542 
1543 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1544                                   const struct iovec *in_sg, unsigned in_num,
1545                                   const struct iovec *out_sg,
1546                                   unsigned out_num)
1547 {
1548     VirtIONet *n = VIRTIO_NET(vdev);
1549     struct virtio_net_ctrl_hdr ctrl;
1550     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1551     size_t s;
1552     struct iovec *iov, *iov2;
1553 
1554     if (iov_size(in_sg, in_num) < sizeof(status) ||
1555         iov_size(out_sg, out_num) < sizeof(ctrl)) {
1556         virtio_error(vdev, "virtio-net ctrl missing headers");
1557         return 0;
1558     }
1559 
1560     iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1561     s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1562     iov_discard_front(&iov, &out_num, sizeof(ctrl));
1563     if (s != sizeof(ctrl)) {
1564         status = VIRTIO_NET_ERR;
1565     } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1566         status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1567     } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1568         status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1569     } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1570         status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1571     } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1572         status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1573     } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1574         status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1575     } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1576         status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1577     }
1578 
1579     s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1580     assert(s == sizeof(status));
1581 
1582     g_free(iov2);
1583     return sizeof(status);
1584 }
1585 
1586 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1587 {
1588     VirtQueueElement *elem;
1589 
1590     for (;;) {
1591         size_t written;
1592         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1593         if (!elem) {
1594             break;
1595         }
1596 
1597         written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1598                                              elem->out_sg, elem->out_num);
1599         if (written > 0) {
1600             virtqueue_push(vq, elem, written);
1601             virtio_notify(vdev, vq);
1602             g_free(elem);
1603         } else {
1604             virtqueue_detach_element(vq, elem, 0);
1605             g_free(elem);
1606             break;
1607         }
1608     }
1609 }
1610 
1611 /* RX */
1612 
1613 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1614 {
1615     VirtIONet *n = VIRTIO_NET(vdev);
1616     int queue_index = vq2q(virtio_get_queue_index(vq));
1617 
1618     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1619 }
1620 
1621 static bool virtio_net_can_receive(NetClientState *nc)
1622 {
1623     VirtIONet *n = qemu_get_nic_opaque(nc);
1624     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1625     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1626 
1627     if (!vdev->vm_running) {
1628         return false;
1629     }
1630 
1631     if (nc->queue_index >= n->curr_queue_pairs) {
1632         return false;
1633     }
1634 
1635     if (!virtio_queue_ready(q->rx_vq) ||
1636         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1637         return false;
1638     }
1639 
1640     return true;
1641 }
1642 
1643 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1644 {
1645     int opaque;
1646     unsigned int in_bytes;
1647     VirtIONet *n = q->n;
1648 
1649     while (virtio_queue_empty(q->rx_vq) || n->mergeable_rx_bufs) {
1650         opaque = virtqueue_get_avail_bytes(q->rx_vq, &in_bytes, NULL,
1651                                            bufsize, 0);
1652         /* Buffer is enough, disable notifiaction */
1653         if (bufsize <= in_bytes) {
1654             break;
1655         }
1656 
1657         if (virtio_queue_enable_notification_and_check(q->rx_vq, opaque)) {
1658             /* Guest has added some buffers, try again */
1659             continue;
1660         } else {
1661             return 0;
1662         }
1663     }
1664 
1665     virtio_queue_set_notification(q->rx_vq, 0);
1666 
1667     return 1;
1668 }
1669 
1670 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1671 {
1672     virtio_tswap16s(vdev, &hdr->hdr_len);
1673     virtio_tswap16s(vdev, &hdr->gso_size);
1674     virtio_tswap16s(vdev, &hdr->csum_start);
1675     virtio_tswap16s(vdev, &hdr->csum_offset);
1676 }
1677 
1678 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1679  * it never finds out that the packets don't have valid checksums.  This
1680  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1681  * fix this with Xen but it hasn't appeared in an upstream release of
1682  * dhclient yet.
1683  *
1684  * To avoid breaking existing guests, we catch udp packets and add
1685  * checksums.  This is terrible but it's better than hacking the guest
1686  * kernels.
1687  *
1688  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1689  * we should provide a mechanism to disable it to avoid polluting the host
1690  * cache.
1691  */
1692 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1693                                         uint8_t *buf, size_t size)
1694 {
1695     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1696         (size > 27 && size < 1500) && /* normal sized MTU */
1697         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1698         (buf[23] == 17) && /* ip.protocol == UDP */
1699         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1700         net_checksum_calculate(buf, size, CSUM_UDP);
1701         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1702     }
1703 }
1704 
1705 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1706                            const void *buf, size_t size)
1707 {
1708     if (n->has_vnet_hdr) {
1709         /* FIXME this cast is evil */
1710         void *wbuf = (void *)buf;
1711         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1712                                     size - n->host_hdr_len);
1713 
1714         if (n->needs_vnet_hdr_swap) {
1715             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1716         }
1717         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1718     } else {
1719         struct virtio_net_hdr hdr = {
1720             .flags = 0,
1721             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1722         };
1723         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1724     }
1725 }
1726 
1727 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1728 {
1729     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1730     static const uint8_t vlan[] = {0x81, 0x00};
1731     uint8_t *ptr = (uint8_t *)buf;
1732     int i;
1733 
1734     if (n->promisc)
1735         return 1;
1736 
1737     ptr += n->host_hdr_len;
1738 
1739     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1740         int vid = lduw_be_p(ptr + 14) & 0xfff;
1741         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1742             return 0;
1743     }
1744 
1745     if (ptr[0] & 1) { // multicast
1746         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1747             return !n->nobcast;
1748         } else if (n->nomulti) {
1749             return 0;
1750         } else if (n->allmulti || n->mac_table.multi_overflow) {
1751             return 1;
1752         }
1753 
1754         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1755             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1756                 return 1;
1757             }
1758         }
1759     } else { // unicast
1760         if (n->nouni) {
1761             return 0;
1762         } else if (n->alluni || n->mac_table.uni_overflow) {
1763             return 1;
1764         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1765             return 1;
1766         }
1767 
1768         for (i = 0; i < n->mac_table.first_multi; i++) {
1769             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1770                 return 1;
1771             }
1772         }
1773     }
1774 
1775     return 0;
1776 }
1777 
1778 static uint8_t virtio_net_get_hash_type(bool hasip4,
1779                                         bool hasip6,
1780                                         EthL4HdrProto l4hdr_proto,
1781                                         uint32_t types)
1782 {
1783     if (hasip4) {
1784         switch (l4hdr_proto) {
1785         case ETH_L4_HDR_PROTO_TCP:
1786             if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
1787                 return NetPktRssIpV4Tcp;
1788             }
1789             break;
1790 
1791         case ETH_L4_HDR_PROTO_UDP:
1792             if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
1793                 return NetPktRssIpV4Udp;
1794             }
1795             break;
1796 
1797         default:
1798             break;
1799         }
1800 
1801         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1802             return NetPktRssIpV4;
1803         }
1804     } else if (hasip6) {
1805         switch (l4hdr_proto) {
1806         case ETH_L4_HDR_PROTO_TCP:
1807             if (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) {
1808                 return NetPktRssIpV6TcpEx;
1809             }
1810             if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
1811                 return NetPktRssIpV6Tcp;
1812             }
1813             break;
1814 
1815         case ETH_L4_HDR_PROTO_UDP:
1816             if (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) {
1817                 return NetPktRssIpV6UdpEx;
1818             }
1819             if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
1820                 return NetPktRssIpV6Udp;
1821             }
1822             break;
1823 
1824         default:
1825             break;
1826         }
1827 
1828         if (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) {
1829             return NetPktRssIpV6Ex;
1830         }
1831         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
1832             return NetPktRssIpV6;
1833         }
1834     }
1835     return 0xff;
1836 }
1837 
1838 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1839                                   size_t size,
1840                                   struct virtio_net_hdr_v1_hash *hdr)
1841 {
1842     VirtIONet *n = qemu_get_nic_opaque(nc);
1843     unsigned int index = nc->queue_index, new_index = index;
1844     struct NetRxPkt *pkt = n->rx_pkt;
1845     uint8_t net_hash_type;
1846     uint32_t hash;
1847     bool hasip4, hasip6;
1848     EthL4HdrProto l4hdr_proto;
1849     static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1850         VIRTIO_NET_HASH_REPORT_IPv4,
1851         VIRTIO_NET_HASH_REPORT_TCPv4,
1852         VIRTIO_NET_HASH_REPORT_TCPv6,
1853         VIRTIO_NET_HASH_REPORT_IPv6,
1854         VIRTIO_NET_HASH_REPORT_IPv6_EX,
1855         VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1856         VIRTIO_NET_HASH_REPORT_UDPv4,
1857         VIRTIO_NET_HASH_REPORT_UDPv6,
1858         VIRTIO_NET_HASH_REPORT_UDPv6_EX
1859     };
1860     struct iovec iov = {
1861         .iov_base = (void *)buf,
1862         .iov_len = size
1863     };
1864 
1865     net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len);
1866     net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
1867     net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto,
1868                                              n->rss_data.hash_types);
1869     if (net_hash_type > NetPktRssIpV6UdpEx) {
1870         if (n->rss_data.populate_hash) {
1871             hdr->hash_value = VIRTIO_NET_HASH_REPORT_NONE;
1872             hdr->hash_report = 0;
1873         }
1874         return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1875     }
1876 
1877     hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1878 
1879     if (n->rss_data.populate_hash) {
1880         hdr->hash_value = hash;
1881         hdr->hash_report = reports[net_hash_type];
1882     }
1883 
1884     if (n->rss_data.redirect) {
1885         new_index = hash & (n->rss_data.indirections_len - 1);
1886         new_index = n->rss_data.indirections_table[new_index];
1887     }
1888 
1889     return (index == new_index) ? -1 : new_index;
1890 }
1891 
1892 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1893                                       size_t size, bool no_rss)
1894 {
1895     VirtIONet *n = qemu_get_nic_opaque(nc);
1896     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1897     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1898     VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1899     size_t lens[VIRTQUEUE_MAX_SIZE];
1900     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1901     struct virtio_net_hdr_v1_hash extra_hdr;
1902     unsigned mhdr_cnt = 0;
1903     size_t offset, i, guest_offset, j;
1904     ssize_t err;
1905 
1906     if (!virtio_net_can_receive(nc)) {
1907         return -1;
1908     }
1909 
1910     if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1911         int index = virtio_net_process_rss(nc, buf, size, &extra_hdr);
1912         if (index >= 0) {
1913             NetClientState *nc2 =
1914                 qemu_get_subqueue(n->nic, index % n->curr_queue_pairs);
1915             return virtio_net_receive_rcu(nc2, buf, size, true);
1916         }
1917     }
1918 
1919     /* hdr_len refers to the header we supply to the guest */
1920     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1921         return 0;
1922     }
1923 
1924     if (!receive_filter(n, buf, size))
1925         return size;
1926 
1927     offset = i = 0;
1928 
1929     while (offset < size) {
1930         VirtQueueElement *elem;
1931         int len, total;
1932         const struct iovec *sg;
1933 
1934         total = 0;
1935 
1936         if (i == VIRTQUEUE_MAX_SIZE) {
1937             virtio_error(vdev, "virtio-net unexpected long buffer chain");
1938             err = size;
1939             goto err;
1940         }
1941 
1942         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1943         if (!elem) {
1944             if (i) {
1945                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1946                              "i %zd mergeable %d offset %zd, size %zd, "
1947                              "guest hdr len %zd, host hdr len %zd "
1948                              "guest features 0x%" PRIx64,
1949                              i, n->mergeable_rx_bufs, offset, size,
1950                              n->guest_hdr_len, n->host_hdr_len,
1951                              vdev->guest_features);
1952             }
1953             err = -1;
1954             goto err;
1955         }
1956 
1957         if (elem->in_num < 1) {
1958             virtio_error(vdev,
1959                          "virtio-net receive queue contains no in buffers");
1960             virtqueue_detach_element(q->rx_vq, elem, 0);
1961             g_free(elem);
1962             err = -1;
1963             goto err;
1964         }
1965 
1966         sg = elem->in_sg;
1967         if (i == 0) {
1968             assert(offset == 0);
1969             if (n->mergeable_rx_bufs) {
1970                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1971                                     sg, elem->in_num,
1972                                     offsetof(typeof(extra_hdr), hdr.num_buffers),
1973                                     sizeof(extra_hdr.hdr.num_buffers));
1974             }
1975 
1976             receive_header(n, sg, elem->in_num, buf, size);
1977             if (n->rss_data.populate_hash) {
1978                 offset = offsetof(typeof(extra_hdr), hash_value);
1979                 iov_from_buf(sg, elem->in_num, offset,
1980                              (char *)&extra_hdr + offset,
1981                              sizeof(extra_hdr.hash_value) +
1982                              sizeof(extra_hdr.hash_report));
1983             }
1984             offset = n->host_hdr_len;
1985             total += n->guest_hdr_len;
1986             guest_offset = n->guest_hdr_len;
1987         } else {
1988             guest_offset = 0;
1989         }
1990 
1991         /* copy in packet.  ugh */
1992         len = iov_from_buf(sg, elem->in_num, guest_offset,
1993                            buf + offset, size - offset);
1994         total += len;
1995         offset += len;
1996         /* If buffers can't be merged, at this point we
1997          * must have consumed the complete packet.
1998          * Otherwise, drop it. */
1999         if (!n->mergeable_rx_bufs && offset < size) {
2000             virtqueue_unpop(q->rx_vq, elem, total);
2001             g_free(elem);
2002             err = size;
2003             goto err;
2004         }
2005 
2006         elems[i] = elem;
2007         lens[i] = total;
2008         i++;
2009     }
2010 
2011     if (mhdr_cnt) {
2012         virtio_stw_p(vdev, &extra_hdr.hdr.num_buffers, i);
2013         iov_from_buf(mhdr_sg, mhdr_cnt,
2014                      0,
2015                      &extra_hdr.hdr.num_buffers,
2016                      sizeof extra_hdr.hdr.num_buffers);
2017     }
2018 
2019     for (j = 0; j < i; j++) {
2020         /* signal other side */
2021         virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
2022         g_free(elems[j]);
2023     }
2024 
2025     virtqueue_flush(q->rx_vq, i);
2026     virtio_notify(vdev, q->rx_vq);
2027 
2028     return size;
2029 
2030 err:
2031     for (j = 0; j < i; j++) {
2032         virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
2033         g_free(elems[j]);
2034     }
2035 
2036     return err;
2037 }
2038 
2039 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
2040                                   size_t size)
2041 {
2042     RCU_READ_LOCK_GUARD();
2043 
2044     return virtio_net_receive_rcu(nc, buf, size, false);
2045 }
2046 
2047 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
2048                                          const uint8_t *buf,
2049                                          VirtioNetRscUnit *unit)
2050 {
2051     uint16_t ip_hdrlen;
2052     struct ip_header *ip;
2053 
2054     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
2055                               + sizeof(struct eth_header));
2056     unit->ip = (void *)ip;
2057     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
2058     unit->ip_plen = &ip->ip_len;
2059     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
2060     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2061     unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
2062 }
2063 
2064 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
2065                                          const uint8_t *buf,
2066                                          VirtioNetRscUnit *unit)
2067 {
2068     struct ip6_header *ip6;
2069 
2070     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
2071                                  + sizeof(struct eth_header));
2072     unit->ip = ip6;
2073     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2074     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
2075                                         + sizeof(struct ip6_header));
2076     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2077 
2078     /* There is a difference between payload length in ipv4 and v6,
2079        ip header is excluded in ipv6 */
2080     unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
2081 }
2082 
2083 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
2084                                        VirtioNetRscSeg *seg)
2085 {
2086     int ret;
2087     struct virtio_net_hdr_v1 *h;
2088 
2089     h = (struct virtio_net_hdr_v1 *)seg->buf;
2090     h->flags = 0;
2091     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
2092 
2093     if (seg->is_coalesced) {
2094         h->rsc.segments = seg->packets;
2095         h->rsc.dup_acks = seg->dup_ack;
2096         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
2097         if (chain->proto == ETH_P_IP) {
2098             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2099         } else {
2100             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2101         }
2102     }
2103 
2104     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
2105     QTAILQ_REMOVE(&chain->buffers, seg, next);
2106     g_free(seg->buf);
2107     g_free(seg);
2108 
2109     return ret;
2110 }
2111 
2112 static void virtio_net_rsc_purge(void *opq)
2113 {
2114     VirtioNetRscSeg *seg, *rn;
2115     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
2116 
2117     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
2118         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2119             chain->stat.purge_failed++;
2120             continue;
2121         }
2122     }
2123 
2124     chain->stat.timer++;
2125     if (!QTAILQ_EMPTY(&chain->buffers)) {
2126         timer_mod(chain->drain_timer,
2127               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout);
2128     }
2129 }
2130 
2131 static void virtio_net_rsc_cleanup(VirtIONet *n)
2132 {
2133     VirtioNetRscChain *chain, *rn_chain;
2134     VirtioNetRscSeg *seg, *rn_seg;
2135 
2136     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2137         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2138             QTAILQ_REMOVE(&chain->buffers, seg, next);
2139             g_free(seg->buf);
2140             g_free(seg);
2141         }
2142 
2143         timer_free(chain->drain_timer);
2144         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2145         g_free(chain);
2146     }
2147 }
2148 
2149 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2150                                      NetClientState *nc,
2151                                      const uint8_t *buf, size_t size)
2152 {
2153     uint16_t hdr_len;
2154     VirtioNetRscSeg *seg;
2155 
2156     hdr_len = chain->n->guest_hdr_len;
2157     seg = g_new(VirtioNetRscSeg, 1);
2158     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2159         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2160     memcpy(seg->buf, buf, size);
2161     seg->size = size;
2162     seg->packets = 1;
2163     seg->dup_ack = 0;
2164     seg->is_coalesced = 0;
2165     seg->nc = nc;
2166 
2167     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2168     chain->stat.cache++;
2169 
2170     switch (chain->proto) {
2171     case ETH_P_IP:
2172         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2173         break;
2174     case ETH_P_IPV6:
2175         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2176         break;
2177     default:
2178         g_assert_not_reached();
2179     }
2180 }
2181 
2182 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2183                                          VirtioNetRscSeg *seg,
2184                                          const uint8_t *buf,
2185                                          struct tcp_header *n_tcp,
2186                                          struct tcp_header *o_tcp)
2187 {
2188     uint32_t nack, oack;
2189     uint16_t nwin, owin;
2190 
2191     nack = htonl(n_tcp->th_ack);
2192     nwin = htons(n_tcp->th_win);
2193     oack = htonl(o_tcp->th_ack);
2194     owin = htons(o_tcp->th_win);
2195 
2196     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2197         chain->stat.ack_out_of_win++;
2198         return RSC_FINAL;
2199     } else if (nack == oack) {
2200         /* duplicated ack or window probe */
2201         if (nwin == owin) {
2202             /* duplicated ack, add dup ack count due to whql test up to 1 */
2203             chain->stat.dup_ack++;
2204             return RSC_FINAL;
2205         } else {
2206             /* Coalesce window update */
2207             o_tcp->th_win = n_tcp->th_win;
2208             chain->stat.win_update++;
2209             return RSC_COALESCE;
2210         }
2211     } else {
2212         /* pure ack, go to 'C', finalize*/
2213         chain->stat.pure_ack++;
2214         return RSC_FINAL;
2215     }
2216 }
2217 
2218 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2219                                             VirtioNetRscSeg *seg,
2220                                             const uint8_t *buf,
2221                                             VirtioNetRscUnit *n_unit)
2222 {
2223     void *data;
2224     uint16_t o_ip_len;
2225     uint32_t nseq, oseq;
2226     VirtioNetRscUnit *o_unit;
2227 
2228     o_unit = &seg->unit;
2229     o_ip_len = htons(*o_unit->ip_plen);
2230     nseq = htonl(n_unit->tcp->th_seq);
2231     oseq = htonl(o_unit->tcp->th_seq);
2232 
2233     /* out of order or retransmitted. */
2234     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2235         chain->stat.data_out_of_win++;
2236         return RSC_FINAL;
2237     }
2238 
2239     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2240     if (nseq == oseq) {
2241         if ((o_unit->payload == 0) && n_unit->payload) {
2242             /* From no payload to payload, normal case, not a dup ack or etc */
2243             chain->stat.data_after_pure_ack++;
2244             goto coalesce;
2245         } else {
2246             return virtio_net_rsc_handle_ack(chain, seg, buf,
2247                                              n_unit->tcp, o_unit->tcp);
2248         }
2249     } else if ((nseq - oseq) != o_unit->payload) {
2250         /* Not a consistent packet, out of order */
2251         chain->stat.data_out_of_order++;
2252         return RSC_FINAL;
2253     } else {
2254 coalesce:
2255         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2256             chain->stat.over_size++;
2257             return RSC_FINAL;
2258         }
2259 
2260         /* Here comes the right data, the payload length in v4/v6 is different,
2261            so use the field value to update and record the new data len */
2262         o_unit->payload += n_unit->payload; /* update new data len */
2263 
2264         /* update field in ip header */
2265         *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2266 
2267         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2268            for windows guest, while this may change the behavior for linux
2269            guest (only if it uses RSC feature). */
2270         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2271 
2272         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2273         o_unit->tcp->th_win = n_unit->tcp->th_win;
2274 
2275         memmove(seg->buf + seg->size, data, n_unit->payload);
2276         seg->size += n_unit->payload;
2277         seg->packets++;
2278         chain->stat.coalesced++;
2279         return RSC_COALESCE;
2280     }
2281 }
2282 
2283 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2284                                         VirtioNetRscSeg *seg,
2285                                         const uint8_t *buf, size_t size,
2286                                         VirtioNetRscUnit *unit)
2287 {
2288     struct ip_header *ip1, *ip2;
2289 
2290     ip1 = (struct ip_header *)(unit->ip);
2291     ip2 = (struct ip_header *)(seg->unit.ip);
2292     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2293         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2294         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2295         chain->stat.no_match++;
2296         return RSC_NO_MATCH;
2297     }
2298 
2299     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2300 }
2301 
2302 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2303                                         VirtioNetRscSeg *seg,
2304                                         const uint8_t *buf, size_t size,
2305                                         VirtioNetRscUnit *unit)
2306 {
2307     struct ip6_header *ip1, *ip2;
2308 
2309     ip1 = (struct ip6_header *)(unit->ip);
2310     ip2 = (struct ip6_header *)(seg->unit.ip);
2311     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2312         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2313         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2314         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2315             chain->stat.no_match++;
2316             return RSC_NO_MATCH;
2317     }
2318 
2319     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2320 }
2321 
2322 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2323  * to prevent out of order */
2324 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2325                                          struct tcp_header *tcp)
2326 {
2327     uint16_t tcp_hdr;
2328     uint16_t tcp_flag;
2329 
2330     tcp_flag = htons(tcp->th_offset_flags);
2331     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2332     tcp_flag &= VIRTIO_NET_TCP_FLAG;
2333     if (tcp_flag & TH_SYN) {
2334         chain->stat.tcp_syn++;
2335         return RSC_BYPASS;
2336     }
2337 
2338     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2339         chain->stat.tcp_ctrl_drain++;
2340         return RSC_FINAL;
2341     }
2342 
2343     if (tcp_hdr > sizeof(struct tcp_header)) {
2344         chain->stat.tcp_all_opt++;
2345         return RSC_FINAL;
2346     }
2347 
2348     return RSC_CANDIDATE;
2349 }
2350 
2351 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2352                                          NetClientState *nc,
2353                                          const uint8_t *buf, size_t size,
2354                                          VirtioNetRscUnit *unit)
2355 {
2356     int ret;
2357     VirtioNetRscSeg *seg, *nseg;
2358 
2359     if (QTAILQ_EMPTY(&chain->buffers)) {
2360         chain->stat.empty_cache++;
2361         virtio_net_rsc_cache_buf(chain, nc, buf, size);
2362         timer_mod(chain->drain_timer,
2363               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout);
2364         return size;
2365     }
2366 
2367     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2368         if (chain->proto == ETH_P_IP) {
2369             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2370         } else {
2371             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2372         }
2373 
2374         if (ret == RSC_FINAL) {
2375             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2376                 /* Send failed */
2377                 chain->stat.final_failed++;
2378                 return 0;
2379             }
2380 
2381             /* Send current packet */
2382             return virtio_net_do_receive(nc, buf, size);
2383         } else if (ret == RSC_NO_MATCH) {
2384             continue;
2385         } else {
2386             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2387             seg->is_coalesced = 1;
2388             return size;
2389         }
2390     }
2391 
2392     chain->stat.no_match_cache++;
2393     virtio_net_rsc_cache_buf(chain, nc, buf, size);
2394     return size;
2395 }
2396 
2397 /* Drain a connection data, this is to avoid out of order segments */
2398 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2399                                         NetClientState *nc,
2400                                         const uint8_t *buf, size_t size,
2401                                         uint16_t ip_start, uint16_t ip_size,
2402                                         uint16_t tcp_port)
2403 {
2404     VirtioNetRscSeg *seg, *nseg;
2405     uint32_t ppair1, ppair2;
2406 
2407     ppair1 = *(uint32_t *)(buf + tcp_port);
2408     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2409         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2410         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2411             || (ppair1 != ppair2)) {
2412             continue;
2413         }
2414         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2415             chain->stat.drain_failed++;
2416         }
2417 
2418         break;
2419     }
2420 
2421     return virtio_net_do_receive(nc, buf, size);
2422 }
2423 
2424 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2425                                             struct ip_header *ip,
2426                                             const uint8_t *buf, size_t size)
2427 {
2428     uint16_t ip_len;
2429 
2430     /* Not an ipv4 packet */
2431     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2432         chain->stat.ip_option++;
2433         return RSC_BYPASS;
2434     }
2435 
2436     /* Don't handle packets with ip option */
2437     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2438         chain->stat.ip_option++;
2439         return RSC_BYPASS;
2440     }
2441 
2442     if (ip->ip_p != IPPROTO_TCP) {
2443         chain->stat.bypass_not_tcp++;
2444         return RSC_BYPASS;
2445     }
2446 
2447     /* Don't handle packets with ip fragment */
2448     if (!(htons(ip->ip_off) & IP_DF)) {
2449         chain->stat.ip_frag++;
2450         return RSC_BYPASS;
2451     }
2452 
2453     /* Don't handle packets with ecn flag */
2454     if (IPTOS_ECN(ip->ip_tos)) {
2455         chain->stat.ip_ecn++;
2456         return RSC_BYPASS;
2457     }
2458 
2459     ip_len = htons(ip->ip_len);
2460     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2461         || ip_len > (size - chain->n->guest_hdr_len -
2462                      sizeof(struct eth_header))) {
2463         chain->stat.ip_hacked++;
2464         return RSC_BYPASS;
2465     }
2466 
2467     return RSC_CANDIDATE;
2468 }
2469 
2470 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2471                                       NetClientState *nc,
2472                                       const uint8_t *buf, size_t size)
2473 {
2474     int32_t ret;
2475     uint16_t hdr_len;
2476     VirtioNetRscUnit unit;
2477 
2478     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2479 
2480     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2481         + sizeof(struct tcp_header))) {
2482         chain->stat.bypass_not_tcp++;
2483         return virtio_net_do_receive(nc, buf, size);
2484     }
2485 
2486     virtio_net_rsc_extract_unit4(chain, buf, &unit);
2487     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2488         != RSC_CANDIDATE) {
2489         return virtio_net_do_receive(nc, buf, size);
2490     }
2491 
2492     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2493     if (ret == RSC_BYPASS) {
2494         return virtio_net_do_receive(nc, buf, size);
2495     } else if (ret == RSC_FINAL) {
2496         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2497                 ((hdr_len + sizeof(struct eth_header)) + 12),
2498                 VIRTIO_NET_IP4_ADDR_SIZE,
2499                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2500     }
2501 
2502     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2503 }
2504 
2505 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2506                                             struct ip6_header *ip6,
2507                                             const uint8_t *buf, size_t size)
2508 {
2509     uint16_t ip_len;
2510 
2511     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2512         != IP_HEADER_VERSION_6) {
2513         return RSC_BYPASS;
2514     }
2515 
2516     /* Both option and protocol is checked in this */
2517     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2518         chain->stat.bypass_not_tcp++;
2519         return RSC_BYPASS;
2520     }
2521 
2522     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2523     if (ip_len < sizeof(struct tcp_header) ||
2524         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2525                   - sizeof(struct ip6_header))) {
2526         chain->stat.ip_hacked++;
2527         return RSC_BYPASS;
2528     }
2529 
2530     /* Don't handle packets with ecn flag */
2531     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2532         chain->stat.ip_ecn++;
2533         return RSC_BYPASS;
2534     }
2535 
2536     return RSC_CANDIDATE;
2537 }
2538 
2539 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2540                                       const uint8_t *buf, size_t size)
2541 {
2542     int32_t ret;
2543     uint16_t hdr_len;
2544     VirtioNetRscChain *chain;
2545     VirtioNetRscUnit unit;
2546 
2547     chain = opq;
2548     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2549 
2550     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2551         + sizeof(tcp_header))) {
2552         return virtio_net_do_receive(nc, buf, size);
2553     }
2554 
2555     virtio_net_rsc_extract_unit6(chain, buf, &unit);
2556     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2557                                                  unit.ip, buf, size)) {
2558         return virtio_net_do_receive(nc, buf, size);
2559     }
2560 
2561     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2562     if (ret == RSC_BYPASS) {
2563         return virtio_net_do_receive(nc, buf, size);
2564     } else if (ret == RSC_FINAL) {
2565         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2566                 ((hdr_len + sizeof(struct eth_header)) + 8),
2567                 VIRTIO_NET_IP6_ADDR_SIZE,
2568                 hdr_len + sizeof(struct eth_header)
2569                 + sizeof(struct ip6_header));
2570     }
2571 
2572     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2573 }
2574 
2575 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2576                                                       NetClientState *nc,
2577                                                       uint16_t proto)
2578 {
2579     VirtioNetRscChain *chain;
2580 
2581     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2582         return NULL;
2583     }
2584 
2585     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2586         if (chain->proto == proto) {
2587             return chain;
2588         }
2589     }
2590 
2591     chain = g_malloc(sizeof(*chain));
2592     chain->n = n;
2593     chain->proto = proto;
2594     if (proto == (uint16_t)ETH_P_IP) {
2595         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2596         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2597     } else {
2598         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2599         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2600     }
2601     chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2602                                       virtio_net_rsc_purge, chain);
2603     memset(&chain->stat, 0, sizeof(chain->stat));
2604 
2605     QTAILQ_INIT(&chain->buffers);
2606     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2607 
2608     return chain;
2609 }
2610 
2611 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2612                                       const uint8_t *buf,
2613                                       size_t size)
2614 {
2615     uint16_t proto;
2616     VirtioNetRscChain *chain;
2617     struct eth_header *eth;
2618     VirtIONet *n;
2619 
2620     n = qemu_get_nic_opaque(nc);
2621     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2622         return virtio_net_do_receive(nc, buf, size);
2623     }
2624 
2625     eth = (struct eth_header *)(buf + n->guest_hdr_len);
2626     proto = htons(eth->h_proto);
2627 
2628     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2629     if (chain) {
2630         chain->stat.received++;
2631         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2632             return virtio_net_rsc_receive4(chain, nc, buf, size);
2633         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2634             return virtio_net_rsc_receive6(chain, nc, buf, size);
2635         }
2636     }
2637     return virtio_net_do_receive(nc, buf, size);
2638 }
2639 
2640 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2641                                   size_t size)
2642 {
2643     VirtIONet *n = qemu_get_nic_opaque(nc);
2644     if ((n->rsc4_enabled || n->rsc6_enabled)) {
2645         return virtio_net_rsc_receive(nc, buf, size);
2646     } else {
2647         return virtio_net_do_receive(nc, buf, size);
2648     }
2649 }
2650 
2651 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2652 
2653 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2654 {
2655     VirtIONet *n = qemu_get_nic_opaque(nc);
2656     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2657     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2658     int ret;
2659 
2660     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2661     virtio_notify(vdev, q->tx_vq);
2662 
2663     g_free(q->async_tx.elem);
2664     q->async_tx.elem = NULL;
2665 
2666     virtio_queue_set_notification(q->tx_vq, 1);
2667     ret = virtio_net_flush_tx(q);
2668     if (ret >= n->tx_burst) {
2669         /*
2670          * the flush has been stopped by tx_burst
2671          * we will not receive notification for the
2672          * remainining part, so re-schedule
2673          */
2674         virtio_queue_set_notification(q->tx_vq, 0);
2675         if (q->tx_bh) {
2676             replay_bh_schedule_event(q->tx_bh);
2677         } else {
2678             timer_mod(q->tx_timer,
2679                       qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2680         }
2681         q->tx_waiting = 1;
2682     }
2683 }
2684 
2685 /* TX */
2686 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2687 {
2688     VirtIONet *n = q->n;
2689     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2690     VirtQueueElement *elem;
2691     int32_t num_packets = 0;
2692     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2693     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2694         return num_packets;
2695     }
2696 
2697     if (q->async_tx.elem) {
2698         virtio_queue_set_notification(q->tx_vq, 0);
2699         return num_packets;
2700     }
2701 
2702     for (;;) {
2703         ssize_t ret;
2704         unsigned int out_num;
2705         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2706         struct virtio_net_hdr vhdr;
2707 
2708         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2709         if (!elem) {
2710             break;
2711         }
2712 
2713         out_num = elem->out_num;
2714         out_sg = elem->out_sg;
2715         if (out_num < 1) {
2716             virtio_error(vdev, "virtio-net header not in first element");
2717             goto detach;
2718         }
2719 
2720         if (n->needs_vnet_hdr_swap) {
2721             if (iov_to_buf(out_sg, out_num, 0, &vhdr, sizeof(vhdr)) <
2722                 sizeof(vhdr)) {
2723                 virtio_error(vdev, "virtio-net header incorrect");
2724                 goto detach;
2725             }
2726             virtio_net_hdr_swap(vdev, &vhdr);
2727             sg2[0].iov_base = &vhdr;
2728             sg2[0].iov_len = sizeof(vhdr);
2729             out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, out_sg, out_num,
2730                                sizeof(vhdr), -1);
2731             if (out_num == VIRTQUEUE_MAX_SIZE) {
2732                 goto drop;
2733             }
2734             out_num += 1;
2735             out_sg = sg2;
2736         }
2737         /*
2738          * If host wants to see the guest header as is, we can
2739          * pass it on unchanged. Otherwise, copy just the parts
2740          * that host is interested in.
2741          */
2742         assert(n->host_hdr_len <= n->guest_hdr_len);
2743         if (n->host_hdr_len != n->guest_hdr_len) {
2744             if (iov_size(out_sg, out_num) < n->guest_hdr_len) {
2745                 virtio_error(vdev, "virtio-net header is invalid");
2746                 goto detach;
2747             }
2748             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2749                                        out_sg, out_num,
2750                                        0, n->host_hdr_len);
2751             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2752                              out_sg, out_num,
2753                              n->guest_hdr_len, -1);
2754             out_num = sg_num;
2755             out_sg = sg;
2756 
2757             if (out_num < 1) {
2758                 virtio_error(vdev, "virtio-net nothing to send");
2759                 goto detach;
2760             }
2761         }
2762 
2763         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2764                                       out_sg, out_num, virtio_net_tx_complete);
2765         if (ret == 0) {
2766             virtio_queue_set_notification(q->tx_vq, 0);
2767             q->async_tx.elem = elem;
2768             return -EBUSY;
2769         }
2770 
2771 drop:
2772         virtqueue_push(q->tx_vq, elem, 0);
2773         virtio_notify(vdev, q->tx_vq);
2774         g_free(elem);
2775 
2776         if (++num_packets >= n->tx_burst) {
2777             break;
2778         }
2779     }
2780     return num_packets;
2781 
2782 detach:
2783     virtqueue_detach_element(q->tx_vq, elem, 0);
2784     g_free(elem);
2785     return -EINVAL;
2786 }
2787 
2788 static void virtio_net_tx_timer(void *opaque);
2789 
2790 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2791 {
2792     VirtIONet *n = VIRTIO_NET(vdev);
2793     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2794 
2795     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2796         virtio_net_drop_tx_queue_data(vdev, vq);
2797         return;
2798     }
2799 
2800     /* This happens when device was stopped but VCPU wasn't. */
2801     if (!vdev->vm_running) {
2802         q->tx_waiting = 1;
2803         return;
2804     }
2805 
2806     if (q->tx_waiting) {
2807         /* We already have queued packets, immediately flush */
2808         timer_del(q->tx_timer);
2809         virtio_net_tx_timer(q);
2810     } else {
2811         /* re-arm timer to flush it (and more) on next tick */
2812         timer_mod(q->tx_timer,
2813                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2814         q->tx_waiting = 1;
2815         virtio_queue_set_notification(vq, 0);
2816     }
2817 }
2818 
2819 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2820 {
2821     VirtIONet *n = VIRTIO_NET(vdev);
2822     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2823 
2824     if (unlikely(n->vhost_started)) {
2825         return;
2826     }
2827 
2828     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2829         virtio_net_drop_tx_queue_data(vdev, vq);
2830         return;
2831     }
2832 
2833     if (unlikely(q->tx_waiting)) {
2834         return;
2835     }
2836     q->tx_waiting = 1;
2837     /* This happens when device was stopped but VCPU wasn't. */
2838     if (!vdev->vm_running) {
2839         return;
2840     }
2841     virtio_queue_set_notification(vq, 0);
2842     replay_bh_schedule_event(q->tx_bh);
2843 }
2844 
2845 static void virtio_net_tx_timer(void *opaque)
2846 {
2847     VirtIONetQueue *q = opaque;
2848     VirtIONet *n = q->n;
2849     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2850     int ret;
2851 
2852     /* This happens when device was stopped but BH wasn't. */
2853     if (!vdev->vm_running) {
2854         /* Make sure tx waiting is set, so we'll run when restarted. */
2855         assert(q->tx_waiting);
2856         return;
2857     }
2858 
2859     q->tx_waiting = 0;
2860 
2861     /* Just in case the driver is not ready on more */
2862     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2863         return;
2864     }
2865 
2866     ret = virtio_net_flush_tx(q);
2867     if (ret == -EBUSY || ret == -EINVAL) {
2868         return;
2869     }
2870     /*
2871      * If we flush a full burst of packets, assume there are
2872      * more coming and immediately rearm
2873      */
2874     if (ret >= n->tx_burst) {
2875         q->tx_waiting = 1;
2876         timer_mod(q->tx_timer,
2877                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2878         return;
2879     }
2880     /*
2881      * If less than a full burst, re-enable notification and flush
2882      * anything that may have come in while we weren't looking.  If
2883      * we find something, assume the guest is still active and rearm
2884      */
2885     virtio_queue_set_notification(q->tx_vq, 1);
2886     ret = virtio_net_flush_tx(q);
2887     if (ret > 0) {
2888         virtio_queue_set_notification(q->tx_vq, 0);
2889         q->tx_waiting = 1;
2890         timer_mod(q->tx_timer,
2891                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2892     }
2893 }
2894 
2895 static void virtio_net_tx_bh(void *opaque)
2896 {
2897     VirtIONetQueue *q = opaque;
2898     VirtIONet *n = q->n;
2899     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2900     int32_t ret;
2901 
2902     /* This happens when device was stopped but BH wasn't. */
2903     if (!vdev->vm_running) {
2904         /* Make sure tx waiting is set, so we'll run when restarted. */
2905         assert(q->tx_waiting);
2906         return;
2907     }
2908 
2909     q->tx_waiting = 0;
2910 
2911     /* Just in case the driver is not ready on more */
2912     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2913         return;
2914     }
2915 
2916     ret = virtio_net_flush_tx(q);
2917     if (ret == -EBUSY || ret == -EINVAL) {
2918         return; /* Notification re-enable handled by tx_complete or device
2919                  * broken */
2920     }
2921 
2922     /* If we flush a full burst of packets, assume there are
2923      * more coming and immediately reschedule */
2924     if (ret >= n->tx_burst) {
2925         replay_bh_schedule_event(q->tx_bh);
2926         q->tx_waiting = 1;
2927         return;
2928     }
2929 
2930     /* If less than a full burst, re-enable notification and flush
2931      * anything that may have come in while we weren't looking.  If
2932      * we find something, assume the guest is still active and reschedule */
2933     virtio_queue_set_notification(q->tx_vq, 1);
2934     ret = virtio_net_flush_tx(q);
2935     if (ret == -EINVAL) {
2936         return;
2937     } else if (ret > 0) {
2938         virtio_queue_set_notification(q->tx_vq, 0);
2939         replay_bh_schedule_event(q->tx_bh);
2940         q->tx_waiting = 1;
2941     }
2942 }
2943 
2944 static void virtio_net_add_queue(VirtIONet *n, int index)
2945 {
2946     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2947 
2948     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2949                                            virtio_net_handle_rx);
2950 
2951     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2952         n->vqs[index].tx_vq =
2953             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2954                              virtio_net_handle_tx_timer);
2955         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2956                                               virtio_net_tx_timer,
2957                                               &n->vqs[index]);
2958     } else {
2959         n->vqs[index].tx_vq =
2960             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2961                              virtio_net_handle_tx_bh);
2962         n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
2963                                                   &DEVICE(vdev)->mem_reentrancy_guard);
2964     }
2965 
2966     n->vqs[index].tx_waiting = 0;
2967     n->vqs[index].n = n;
2968 }
2969 
2970 static void virtio_net_del_queue(VirtIONet *n, int index)
2971 {
2972     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2973     VirtIONetQueue *q = &n->vqs[index];
2974     NetClientState *nc = qemu_get_subqueue(n->nic, index);
2975 
2976     qemu_purge_queued_packets(nc);
2977 
2978     virtio_del_queue(vdev, index * 2);
2979     if (q->tx_timer) {
2980         timer_free(q->tx_timer);
2981         q->tx_timer = NULL;
2982     } else {
2983         qemu_bh_delete(q->tx_bh);
2984         q->tx_bh = NULL;
2985     }
2986     q->tx_waiting = 0;
2987     virtio_del_queue(vdev, index * 2 + 1);
2988 }
2989 
2990 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
2991 {
2992     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2993     int old_num_queues = virtio_get_num_queues(vdev);
2994     int new_num_queues = new_max_queue_pairs * 2 + 1;
2995     int i;
2996 
2997     assert(old_num_queues >= 3);
2998     assert(old_num_queues % 2 == 1);
2999 
3000     if (old_num_queues == new_num_queues) {
3001         return;
3002     }
3003 
3004     /*
3005      * We always need to remove and add ctrl vq if
3006      * old_num_queues != new_num_queues. Remove ctrl_vq first,
3007      * and then we only enter one of the following two loops.
3008      */
3009     virtio_del_queue(vdev, old_num_queues - 1);
3010 
3011     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
3012         /* new_num_queues < old_num_queues */
3013         virtio_net_del_queue(n, i / 2);
3014     }
3015 
3016     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
3017         /* new_num_queues > old_num_queues */
3018         virtio_net_add_queue(n, i / 2);
3019     }
3020 
3021     /* add ctrl_vq last */
3022     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3023 }
3024 
3025 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
3026 {
3027     int max = multiqueue ? n->max_queue_pairs : 1;
3028 
3029     n->multiqueue = multiqueue;
3030     virtio_net_change_num_queue_pairs(n, max);
3031 
3032     virtio_net_set_queue_pairs(n);
3033 }
3034 
3035 static int virtio_net_post_load_device(void *opaque, int version_id)
3036 {
3037     VirtIONet *n = opaque;
3038     VirtIODevice *vdev = VIRTIO_DEVICE(n);
3039     int i, link_down;
3040 
3041     trace_virtio_net_post_load_device();
3042     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
3043                                virtio_vdev_has_feature(vdev,
3044                                                        VIRTIO_F_VERSION_1),
3045                                virtio_vdev_has_feature(vdev,
3046                                                        VIRTIO_NET_F_HASH_REPORT));
3047 
3048     /* MAC_TABLE_ENTRIES may be different from the saved image */
3049     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
3050         n->mac_table.in_use = 0;
3051     }
3052 
3053     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
3054         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
3055     }
3056 
3057     /*
3058      * curr_guest_offloads will be later overwritten by the
3059      * virtio_set_features_nocheck call done from the virtio_load.
3060      * Here we make sure it is preserved and restored accordingly
3061      * in the virtio_net_post_load_virtio callback.
3062      */
3063     n->saved_guest_offloads = n->curr_guest_offloads;
3064 
3065     virtio_net_set_queue_pairs(n);
3066 
3067     /* Find the first multicast entry in the saved MAC filter */
3068     for (i = 0; i < n->mac_table.in_use; i++) {
3069         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
3070             break;
3071         }
3072     }
3073     n->mac_table.first_multi = i;
3074 
3075     /* nc.link_down can't be migrated, so infer link_down according
3076      * to link status bit in n->status */
3077     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
3078     for (i = 0; i < n->max_queue_pairs; i++) {
3079         qemu_get_subqueue(n->nic, i)->link_down = link_down;
3080     }
3081 
3082     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
3083         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3084         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3085                                   QEMU_CLOCK_VIRTUAL,
3086                                   virtio_net_announce_timer, n);
3087         if (n->announce_timer.round) {
3088             timer_mod(n->announce_timer.tm,
3089                       qemu_clock_get_ms(n->announce_timer.type));
3090         } else {
3091             qemu_announce_timer_del(&n->announce_timer, false);
3092         }
3093     }
3094 
3095     virtio_net_commit_rss_config(n);
3096     return 0;
3097 }
3098 
3099 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
3100 {
3101     VirtIONet *n = VIRTIO_NET(vdev);
3102     /*
3103      * The actual needed state is now in saved_guest_offloads,
3104      * see virtio_net_post_load_device for detail.
3105      * Restore it back and apply the desired offloads.
3106      */
3107     n->curr_guest_offloads = n->saved_guest_offloads;
3108     if (peer_has_vnet_hdr(n)) {
3109         virtio_net_apply_guest_offloads(n);
3110     }
3111 
3112     return 0;
3113 }
3114 
3115 /* tx_waiting field of a VirtIONetQueue */
3116 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
3117     .name = "virtio-net-queue-tx_waiting",
3118     .fields = (const VMStateField[]) {
3119         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
3120         VMSTATE_END_OF_LIST()
3121    },
3122 };
3123 
3124 static bool max_queue_pairs_gt_1(void *opaque, int version_id)
3125 {
3126     return VIRTIO_NET(opaque)->max_queue_pairs > 1;
3127 }
3128 
3129 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
3130 {
3131     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
3132                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3133 }
3134 
3135 static bool mac_table_fits(void *opaque, int version_id)
3136 {
3137     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
3138 }
3139 
3140 static bool mac_table_doesnt_fit(void *opaque, int version_id)
3141 {
3142     return !mac_table_fits(opaque, version_id);
3143 }
3144 
3145 /* This temporary type is shared by all the WITH_TMP methods
3146  * although only some fields are used by each.
3147  */
3148 struct VirtIONetMigTmp {
3149     VirtIONet      *parent;
3150     VirtIONetQueue *vqs_1;
3151     uint16_t        curr_queue_pairs_1;
3152     uint8_t         has_ufo;
3153     uint32_t        has_vnet_hdr;
3154 };
3155 
3156 /* The 2nd and subsequent tx_waiting flags are loaded later than
3157  * the 1st entry in the queue_pairs and only if there's more than one
3158  * entry.  We use the tmp mechanism to calculate a temporary
3159  * pointer and count and also validate the count.
3160  */
3161 
3162 static int virtio_net_tx_waiting_pre_save(void *opaque)
3163 {
3164     struct VirtIONetMigTmp *tmp = opaque;
3165 
3166     tmp->vqs_1 = tmp->parent->vqs + 1;
3167     tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3168     if (tmp->parent->curr_queue_pairs == 0) {
3169         tmp->curr_queue_pairs_1 = 0;
3170     }
3171 
3172     return 0;
3173 }
3174 
3175 static int virtio_net_tx_waiting_pre_load(void *opaque)
3176 {
3177     struct VirtIONetMigTmp *tmp = opaque;
3178 
3179     /* Reuse the pointer setup from save */
3180     virtio_net_tx_waiting_pre_save(opaque);
3181 
3182     if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3183         error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3184             tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
3185 
3186         return -EINVAL;
3187     }
3188 
3189     return 0; /* all good */
3190 }
3191 
3192 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3193     .name      = "virtio-net-tx_waiting",
3194     .pre_load  = virtio_net_tx_waiting_pre_load,
3195     .pre_save  = virtio_net_tx_waiting_pre_save,
3196     .fields    = (const VMStateField[]) {
3197         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3198                                      curr_queue_pairs_1,
3199                                      vmstate_virtio_net_queue_tx_waiting,
3200                                      struct VirtIONetQueue),
3201         VMSTATE_END_OF_LIST()
3202     },
3203 };
3204 
3205 /* the 'has_ufo' flag is just tested; if the incoming stream has the
3206  * flag set we need to check that we have it
3207  */
3208 static int virtio_net_ufo_post_load(void *opaque, int version_id)
3209 {
3210     struct VirtIONetMigTmp *tmp = opaque;
3211 
3212     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3213         error_report("virtio-net: saved image requires TUN_F_UFO support");
3214         return -EINVAL;
3215     }
3216 
3217     return 0;
3218 }
3219 
3220 static int virtio_net_ufo_pre_save(void *opaque)
3221 {
3222     struct VirtIONetMigTmp *tmp = opaque;
3223 
3224     tmp->has_ufo = tmp->parent->has_ufo;
3225 
3226     return 0;
3227 }
3228 
3229 static const VMStateDescription vmstate_virtio_net_has_ufo = {
3230     .name      = "virtio-net-ufo",
3231     .post_load = virtio_net_ufo_post_load,
3232     .pre_save  = virtio_net_ufo_pre_save,
3233     .fields    = (const VMStateField[]) {
3234         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3235         VMSTATE_END_OF_LIST()
3236     },
3237 };
3238 
3239 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3240  * flag set we need to check that we have it
3241  */
3242 static int virtio_net_vnet_post_load(void *opaque, int version_id)
3243 {
3244     struct VirtIONetMigTmp *tmp = opaque;
3245 
3246     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3247         error_report("virtio-net: saved image requires vnet_hdr=on");
3248         return -EINVAL;
3249     }
3250 
3251     return 0;
3252 }
3253 
3254 static int virtio_net_vnet_pre_save(void *opaque)
3255 {
3256     struct VirtIONetMigTmp *tmp = opaque;
3257 
3258     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3259 
3260     return 0;
3261 }
3262 
3263 static const VMStateDescription vmstate_virtio_net_has_vnet = {
3264     .name      = "virtio-net-vnet",
3265     .post_load = virtio_net_vnet_post_load,
3266     .pre_save  = virtio_net_vnet_pre_save,
3267     .fields    = (const VMStateField[]) {
3268         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3269         VMSTATE_END_OF_LIST()
3270     },
3271 };
3272 
3273 static bool virtio_net_rss_needed(void *opaque)
3274 {
3275     return VIRTIO_NET(opaque)->rss_data.enabled;
3276 }
3277 
3278 static const VMStateDescription vmstate_virtio_net_rss = {
3279     .name      = "virtio-net-device/rss",
3280     .version_id = 1,
3281     .minimum_version_id = 1,
3282     .needed = virtio_net_rss_needed,
3283     .fields = (const VMStateField[]) {
3284         VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3285         VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3286         VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3287         VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3288         VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3289         VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3290         VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3291                             VIRTIO_NET_RSS_MAX_KEY_SIZE),
3292         VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3293                                     rss_data.indirections_len, 0,
3294                                     vmstate_info_uint16, uint16_t),
3295         VMSTATE_END_OF_LIST()
3296     },
3297 };
3298 
3299 static const VMStateDescription vmstate_virtio_net_device = {
3300     .name = "virtio-net-device",
3301     .version_id = VIRTIO_NET_VM_VERSION,
3302     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3303     .post_load = virtio_net_post_load_device,
3304     .fields = (const VMStateField[]) {
3305         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3306         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3307                                vmstate_virtio_net_queue_tx_waiting,
3308                                VirtIONetQueue),
3309         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3310         VMSTATE_UINT16(status, VirtIONet),
3311         VMSTATE_UINT8(promisc, VirtIONet),
3312         VMSTATE_UINT8(allmulti, VirtIONet),
3313         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3314 
3315         /* Guarded pair: If it fits we load it, else we throw it away
3316          * - can happen if source has a larger MAC table.; post-load
3317          *  sets flags in this case.
3318          */
3319         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3320                                 0, mac_table_fits, mac_table.in_use,
3321                                  ETH_ALEN),
3322         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3323                                      mac_table.in_use, ETH_ALEN),
3324 
3325         /* Note: This is an array of uint32's that's always been saved as a
3326          * buffer; hold onto your endiannesses; it's actually used as a bitmap
3327          * but based on the uint.
3328          */
3329         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3330         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3331                          vmstate_virtio_net_has_vnet),
3332         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3333         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3334         VMSTATE_UINT8(alluni, VirtIONet),
3335         VMSTATE_UINT8(nomulti, VirtIONet),
3336         VMSTATE_UINT8(nouni, VirtIONet),
3337         VMSTATE_UINT8(nobcast, VirtIONet),
3338         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3339                          vmstate_virtio_net_has_ufo),
3340         VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3341                             vmstate_info_uint16_equal, uint16_t),
3342         VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3343         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3344                          vmstate_virtio_net_tx_waiting),
3345         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3346                             has_ctrl_guest_offloads),
3347         VMSTATE_END_OF_LIST()
3348     },
3349     .subsections = (const VMStateDescription * const []) {
3350         &vmstate_virtio_net_rss,
3351         NULL
3352     }
3353 };
3354 
3355 static NetClientInfo net_virtio_info = {
3356     .type = NET_CLIENT_DRIVER_NIC,
3357     .size = sizeof(NICState),
3358     .can_receive = virtio_net_can_receive,
3359     .receive = virtio_net_receive,
3360     .link_status_changed = virtio_net_set_link_status,
3361     .query_rx_filter = virtio_net_query_rxfilter,
3362     .announce = virtio_net_announce,
3363 };
3364 
3365 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3366 {
3367     VirtIONet *n = VIRTIO_NET(vdev);
3368     NetClientState *nc;
3369     assert(n->vhost_started);
3370     if (!n->multiqueue && idx == 2) {
3371         /* Must guard against invalid features and bogus queue index
3372          * from being set by malicious guest, or penetrated through
3373          * buggy migration stream.
3374          */
3375         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3376             qemu_log_mask(LOG_GUEST_ERROR,
3377                           "%s: bogus vq index ignored\n", __func__);
3378             return false;
3379         }
3380         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3381     } else {
3382         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3383     }
3384     /*
3385      * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3386      * as the macro of configure interrupt's IDX, If this driver does not
3387      * support, the function will return false
3388      */
3389 
3390     if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3391         return vhost_net_config_pending(get_vhost_net(nc->peer));
3392     }
3393     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3394 }
3395 
3396 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3397                                            bool mask)
3398 {
3399     VirtIONet *n = VIRTIO_NET(vdev);
3400     NetClientState *nc;
3401     assert(n->vhost_started);
3402     if (!n->multiqueue && idx == 2) {
3403         /* Must guard against invalid features and bogus queue index
3404          * from being set by malicious guest, or penetrated through
3405          * buggy migration stream.
3406          */
3407         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3408             qemu_log_mask(LOG_GUEST_ERROR,
3409                           "%s: bogus vq index ignored\n", __func__);
3410             return;
3411         }
3412         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3413     } else {
3414         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3415     }
3416     /*
3417      *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3418      * as the macro of configure interrupt's IDX, If this driver does not
3419      * support, the function will return
3420      */
3421 
3422     if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3423         vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask);
3424         return;
3425     }
3426     vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask);
3427 }
3428 
3429 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3430 {
3431     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3432 
3433     n->config_size = virtio_get_config_size(&cfg_size_params, host_features);
3434 }
3435 
3436 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3437                                    const char *type)
3438 {
3439     /*
3440      * The name can be NULL, the netclient name will be type.x.
3441      */
3442     assert(type != NULL);
3443 
3444     g_free(n->netclient_name);
3445     g_free(n->netclient_type);
3446     n->netclient_name = g_strdup(name);
3447     n->netclient_type = g_strdup(type);
3448 }
3449 
3450 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3451 {
3452     HotplugHandler *hotplug_ctrl;
3453     PCIDevice *pci_dev;
3454     Error *err = NULL;
3455 
3456     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3457     if (hotplug_ctrl) {
3458         pci_dev = PCI_DEVICE(dev);
3459         pci_dev->partially_hotplugged = true;
3460         hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3461         if (err) {
3462             error_report_err(err);
3463             return false;
3464         }
3465     } else {
3466         return false;
3467     }
3468     return true;
3469 }
3470 
3471 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3472                                     Error **errp)
3473 {
3474     Error *err = NULL;
3475     HotplugHandler *hotplug_ctrl;
3476     PCIDevice *pdev = PCI_DEVICE(dev);
3477     BusState *primary_bus;
3478 
3479     if (!pdev->partially_hotplugged) {
3480         return true;
3481     }
3482     primary_bus = dev->parent_bus;
3483     if (!primary_bus) {
3484         error_setg(errp, "virtio_net: couldn't find primary bus");
3485         return false;
3486     }
3487     qdev_set_parent_bus(dev, primary_bus, &error_abort);
3488     qatomic_set(&n->failover_primary_hidden, false);
3489     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3490     if (hotplug_ctrl) {
3491         hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3492         if (err) {
3493             goto out;
3494         }
3495         hotplug_handler_plug(hotplug_ctrl, dev, &err);
3496     }
3497     pdev->partially_hotplugged = false;
3498 
3499 out:
3500     error_propagate(errp, err);
3501     return !err;
3502 }
3503 
3504 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationEvent *e)
3505 {
3506     bool should_be_hidden;
3507     Error *err = NULL;
3508     DeviceState *dev = failover_find_primary_device(n);
3509 
3510     if (!dev) {
3511         return;
3512     }
3513 
3514     should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3515 
3516     if (e->type == MIG_EVENT_PRECOPY_SETUP && !should_be_hidden) {
3517         if (failover_unplug_primary(n, dev)) {
3518             vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3519             qapi_event_send_unplug_primary(dev->id);
3520             qatomic_set(&n->failover_primary_hidden, true);
3521         } else {
3522             warn_report("couldn't unplug primary device");
3523         }
3524     } else if (e->type == MIG_EVENT_PRECOPY_FAILED) {
3525         /* We already unplugged the device let's plug it back */
3526         if (!failover_replug_primary(n, dev, &err)) {
3527             if (err) {
3528                 error_report_err(err);
3529             }
3530         }
3531     }
3532 }
3533 
3534 static int virtio_net_migration_state_notifier(NotifierWithReturn *notifier,
3535                                                MigrationEvent *e, Error **errp)
3536 {
3537     VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3538     virtio_net_handle_migration_primary(n, e);
3539     return 0;
3540 }
3541 
3542 static bool failover_hide_primary_device(DeviceListener *listener,
3543                                          const QDict *device_opts,
3544                                          bool from_json,
3545                                          Error **errp)
3546 {
3547     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3548     const char *standby_id;
3549 
3550     if (!device_opts) {
3551         return false;
3552     }
3553 
3554     if (!qdict_haskey(device_opts, "failover_pair_id")) {
3555         return false;
3556     }
3557 
3558     if (!qdict_haskey(device_opts, "id")) {
3559         error_setg(errp, "Device with failover_pair_id needs to have id");
3560         return false;
3561     }
3562 
3563     standby_id = qdict_get_str(device_opts, "failover_pair_id");
3564     if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3565         return false;
3566     }
3567 
3568     /*
3569      * The hide helper can be called several times for a given device.
3570      * Check there is only one primary for a virtio-net device but
3571      * don't duplicate the qdict several times if it's called for the same
3572      * device.
3573      */
3574     if (n->primary_opts) {
3575         const char *old, *new;
3576         /* devices with failover_pair_id always have an id */
3577         old = qdict_get_str(n->primary_opts, "id");
3578         new = qdict_get_str(device_opts, "id");
3579         if (strcmp(old, new) != 0) {
3580             error_setg(errp, "Cannot attach more than one primary device to "
3581                        "'%s': '%s' and '%s'", n->netclient_name, old, new);
3582             return false;
3583         }
3584     } else {
3585         n->primary_opts = qdict_clone_shallow(device_opts);
3586         n->primary_opts_from_json = from_json;
3587     }
3588 
3589     /* failover_primary_hidden is set during feature negotiation */
3590     return qatomic_read(&n->failover_primary_hidden);
3591 }
3592 
3593 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3594 {
3595     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3596     VirtIONet *n = VIRTIO_NET(dev);
3597     NetClientState *nc;
3598     int i;
3599 
3600     if (n->net_conf.mtu) {
3601         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3602     }
3603 
3604     if (n->net_conf.duplex_str) {
3605         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3606             n->net_conf.duplex = DUPLEX_HALF;
3607         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3608             n->net_conf.duplex = DUPLEX_FULL;
3609         } else {
3610             error_setg(errp, "'duplex' must be 'half' or 'full'");
3611             return;
3612         }
3613         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3614     } else {
3615         n->net_conf.duplex = DUPLEX_UNKNOWN;
3616     }
3617 
3618     if (n->net_conf.speed < SPEED_UNKNOWN) {
3619         error_setg(errp, "'speed' must be between 0 and INT_MAX");
3620         return;
3621     }
3622     if (n->net_conf.speed >= 0) {
3623         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3624     }
3625 
3626     if (n->failover) {
3627         n->primary_listener.hide_device = failover_hide_primary_device;
3628         qatomic_set(&n->failover_primary_hidden, true);
3629         device_listener_register(&n->primary_listener);
3630         migration_add_notifier(&n->migration_state,
3631                                virtio_net_migration_state_notifier);
3632         n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3633     }
3634 
3635     virtio_net_set_config_size(n, n->host_features);
3636     virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
3637 
3638     /*
3639      * We set a lower limit on RX queue size to what it always was.
3640      * Guests that want a smaller ring can always resize it without
3641      * help from us (using virtio 1 and up).
3642      */
3643     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3644         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3645         !is_power_of_2(n->net_conf.rx_queue_size)) {
3646         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3647                    "must be a power of 2 between %d and %d.",
3648                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3649                    VIRTQUEUE_MAX_SIZE);
3650         virtio_cleanup(vdev);
3651         return;
3652     }
3653 
3654     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3655         n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) ||
3656         !is_power_of_2(n->net_conf.tx_queue_size)) {
3657         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3658                    "must be a power of 2 between %d and %d",
3659                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3660                    virtio_net_max_tx_queue_size(n));
3661         virtio_cleanup(vdev);
3662         return;
3663     }
3664 
3665     n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3666 
3667     /*
3668      * Figure out the datapath queue pairs since the backend could
3669      * provide control queue via peers as well.
3670      */
3671     if (n->nic_conf.peers.queues) {
3672         for (i = 0; i < n->max_ncs; i++) {
3673             if (n->nic_conf.peers.ncs[i]->is_datapath) {
3674                 ++n->max_queue_pairs;
3675             }
3676         }
3677     }
3678     n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3679 
3680     if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3681         error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3682                    "must be a positive integer less than %d.",
3683                    n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3684         virtio_cleanup(vdev);
3685         return;
3686     }
3687     n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3688     n->curr_queue_pairs = 1;
3689     n->tx_timeout = n->net_conf.txtimer;
3690 
3691     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3692                        && strcmp(n->net_conf.tx, "bh")) {
3693         warn_report("virtio-net: "
3694                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3695                     n->net_conf.tx);
3696         error_printf("Defaulting to \"bh\"");
3697     }
3698 
3699     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3700                                     n->net_conf.tx_queue_size);
3701 
3702     virtio_net_add_queue(n, 0);
3703 
3704     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3705     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3706     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3707     n->status = VIRTIO_NET_S_LINK_UP;
3708     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3709                               QEMU_CLOCK_VIRTUAL,
3710                               virtio_net_announce_timer, n);
3711     n->announce_timer.round = 0;
3712 
3713     if (n->netclient_type) {
3714         /*
3715          * Happen when virtio_net_set_netclient_name has been called.
3716          */
3717         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3718                               n->netclient_type, n->netclient_name,
3719                               &dev->mem_reentrancy_guard, n);
3720     } else {
3721         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3722                               object_get_typename(OBJECT(dev)), dev->id,
3723                               &dev->mem_reentrancy_guard, n);
3724     }
3725 
3726     for (i = 0; i < n->max_queue_pairs; i++) {
3727         n->nic->ncs[i].do_not_pad = true;
3728     }
3729 
3730     peer_test_vnet_hdr(n);
3731     if (peer_has_vnet_hdr(n)) {
3732         n->host_hdr_len = sizeof(struct virtio_net_hdr);
3733     } else {
3734         n->host_hdr_len = 0;
3735     }
3736 
3737     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3738 
3739     n->vqs[0].tx_waiting = 0;
3740     n->tx_burst = n->net_conf.txburst;
3741     virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3742     n->promisc = 1; /* for compatibility */
3743 
3744     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3745 
3746     n->vlans = g_malloc0(MAX_VLAN >> 3);
3747 
3748     nc = qemu_get_queue(n->nic);
3749     nc->rxfilter_notify_enabled = 1;
3750 
3751    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3752         struct virtio_net_config netcfg = {};
3753         memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3754         vhost_net_set_config(get_vhost_net(nc->peer),
3755             (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND);
3756     }
3757     QTAILQ_INIT(&n->rsc_chains);
3758     n->qdev = dev;
3759 
3760     net_rx_pkt_init(&n->rx_pkt);
3761 
3762     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3763         virtio_net_load_ebpf(n);
3764     }
3765 }
3766 
3767 static void virtio_net_device_unrealize(DeviceState *dev)
3768 {
3769     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3770     VirtIONet *n = VIRTIO_NET(dev);
3771     int i, max_queue_pairs;
3772 
3773     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3774         virtio_net_unload_ebpf(n);
3775     }
3776 
3777     /* This will stop vhost backend if appropriate. */
3778     virtio_net_set_status(vdev, 0);
3779 
3780     g_free(n->netclient_name);
3781     n->netclient_name = NULL;
3782     g_free(n->netclient_type);
3783     n->netclient_type = NULL;
3784 
3785     g_free(n->mac_table.macs);
3786     g_free(n->vlans);
3787 
3788     if (n->failover) {
3789         qobject_unref(n->primary_opts);
3790         device_listener_unregister(&n->primary_listener);
3791         migration_remove_notifier(&n->migration_state);
3792     } else {
3793         assert(n->primary_opts == NULL);
3794     }
3795 
3796     max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3797     for (i = 0; i < max_queue_pairs; i++) {
3798         virtio_net_del_queue(n, i);
3799     }
3800     /* delete also control vq */
3801     virtio_del_queue(vdev, max_queue_pairs * 2);
3802     qemu_announce_timer_del(&n->announce_timer, false);
3803     g_free(n->vqs);
3804     qemu_del_nic(n->nic);
3805     virtio_net_rsc_cleanup(n);
3806     g_free(n->rss_data.indirections_table);
3807     net_rx_pkt_uninit(n->rx_pkt);
3808     virtio_cleanup(vdev);
3809 }
3810 
3811 static void virtio_net_reset(VirtIODevice *vdev)
3812 {
3813     VirtIONet *n = VIRTIO_NET(vdev);
3814     int i;
3815 
3816     /* Reset back to compatibility mode */
3817     n->promisc = 1;
3818     n->allmulti = 0;
3819     n->alluni = 0;
3820     n->nomulti = 0;
3821     n->nouni = 0;
3822     n->nobcast = 0;
3823     /* multiqueue is disabled by default */
3824     n->curr_queue_pairs = 1;
3825     timer_del(n->announce_timer.tm);
3826     n->announce_timer.round = 0;
3827     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
3828 
3829     /* Flush any MAC and VLAN filter table state */
3830     n->mac_table.in_use = 0;
3831     n->mac_table.first_multi = 0;
3832     n->mac_table.multi_overflow = 0;
3833     n->mac_table.uni_overflow = 0;
3834     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
3835     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
3836     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
3837     memset(n->vlans, 0, MAX_VLAN >> 3);
3838 
3839     /* Flush any async TX */
3840     for (i = 0;  i < n->max_queue_pairs; i++) {
3841         flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
3842     }
3843 
3844     virtio_net_disable_rss(n);
3845 }
3846 
3847 static void virtio_net_instance_init(Object *obj)
3848 {
3849     VirtIONet *n = VIRTIO_NET(obj);
3850 
3851     /*
3852      * The default config_size is sizeof(struct virtio_net_config).
3853      * Can be overridden with virtio_net_set_config_size.
3854      */
3855     n->config_size = sizeof(struct virtio_net_config);
3856     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3857                                   "bootindex", "/ethernet-phy@0",
3858                                   DEVICE(n));
3859 
3860     ebpf_rss_init(&n->ebpf_rss);
3861 }
3862 
3863 static int virtio_net_pre_save(void *opaque)
3864 {
3865     VirtIONet *n = opaque;
3866 
3867     /* At this point, backend must be stopped, otherwise
3868      * it might keep writing to memory. */
3869     assert(!n->vhost_started);
3870 
3871     return 0;
3872 }
3873 
3874 static bool primary_unplug_pending(void *opaque)
3875 {
3876     DeviceState *dev = opaque;
3877     DeviceState *primary;
3878     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3879     VirtIONet *n = VIRTIO_NET(vdev);
3880 
3881     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3882         return false;
3883     }
3884     primary = failover_find_primary_device(n);
3885     return primary ? primary->pending_deleted_event : false;
3886 }
3887 
3888 static bool dev_unplug_pending(void *opaque)
3889 {
3890     DeviceState *dev = opaque;
3891     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3892 
3893     return vdc->primary_unplug_pending(dev);
3894 }
3895 
3896 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3897 {
3898     VirtIONet *n = VIRTIO_NET(vdev);
3899     NetClientState *nc = qemu_get_queue(n->nic);
3900     struct vhost_net *net = get_vhost_net(nc->peer);
3901     return &net->dev;
3902 }
3903 
3904 static const VMStateDescription vmstate_virtio_net = {
3905     .name = "virtio-net",
3906     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3907     .version_id = VIRTIO_NET_VM_VERSION,
3908     .fields = (const VMStateField[]) {
3909         VMSTATE_VIRTIO_DEVICE,
3910         VMSTATE_END_OF_LIST()
3911     },
3912     .pre_save = virtio_net_pre_save,
3913     .dev_unplug_pending = dev_unplug_pending,
3914 };
3915 
3916 static Property virtio_net_properties[] = {
3917     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3918                     VIRTIO_NET_F_CSUM, true),
3919     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3920                     VIRTIO_NET_F_GUEST_CSUM, true),
3921     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3922     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3923                     VIRTIO_NET_F_GUEST_TSO4, true),
3924     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3925                     VIRTIO_NET_F_GUEST_TSO6, true),
3926     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3927                     VIRTIO_NET_F_GUEST_ECN, true),
3928     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3929                     VIRTIO_NET_F_GUEST_UFO, true),
3930     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3931                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3932     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3933                     VIRTIO_NET_F_HOST_TSO4, true),
3934     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3935                     VIRTIO_NET_F_HOST_TSO6, true),
3936     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3937                     VIRTIO_NET_F_HOST_ECN, true),
3938     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3939                     VIRTIO_NET_F_HOST_UFO, true),
3940     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3941                     VIRTIO_NET_F_MRG_RXBUF, true),
3942     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3943                     VIRTIO_NET_F_STATUS, true),
3944     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3945                     VIRTIO_NET_F_CTRL_VQ, true),
3946     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3947                     VIRTIO_NET_F_CTRL_RX, true),
3948     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3949                     VIRTIO_NET_F_CTRL_VLAN, true),
3950     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3951                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3952     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3953                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3954     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3955                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3956     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3957     DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3958                     VIRTIO_NET_F_RSS, false),
3959     DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3960                     VIRTIO_NET_F_HASH_REPORT, false),
3961     DEFINE_PROP_ARRAY("ebpf-rss-fds", VirtIONet, nr_ebpf_rss_fds,
3962                       ebpf_rss_fds, qdev_prop_string, char*),
3963     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3964                     VIRTIO_NET_F_RSC_EXT, false),
3965     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3966                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3967     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3968     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3969                        TX_TIMER_INTERVAL),
3970     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3971     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3972     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3973                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3974     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3975                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3976     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3977     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3978                      true),
3979     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3980     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3981     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3982     DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features,
3983                       VIRTIO_NET_F_GUEST_USO4, true),
3984     DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features,
3985                       VIRTIO_NET_F_GUEST_USO6, true),
3986     DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
3987                       VIRTIO_NET_F_HOST_USO, true),
3988     DEFINE_PROP_END_OF_LIST(),
3989 };
3990 
3991 static void virtio_net_class_init(ObjectClass *klass, void *data)
3992 {
3993     DeviceClass *dc = DEVICE_CLASS(klass);
3994     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3995 
3996     device_class_set_props(dc, virtio_net_properties);
3997     dc->vmsd = &vmstate_virtio_net;
3998     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3999     vdc->realize = virtio_net_device_realize;
4000     vdc->unrealize = virtio_net_device_unrealize;
4001     vdc->get_config = virtio_net_get_config;
4002     vdc->set_config = virtio_net_set_config;
4003     vdc->get_features = virtio_net_get_features;
4004     vdc->set_features = virtio_net_set_features;
4005     vdc->bad_features = virtio_net_bad_features;
4006     vdc->reset = virtio_net_reset;
4007     vdc->queue_reset = virtio_net_queue_reset;
4008     vdc->queue_enable = virtio_net_queue_enable;
4009     vdc->set_status = virtio_net_set_status;
4010     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
4011     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
4012     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
4013     vdc->post_load = virtio_net_post_load_virtio;
4014     vdc->vmsd = &vmstate_virtio_net_device;
4015     vdc->primary_unplug_pending = primary_unplug_pending;
4016     vdc->get_vhost = virtio_net_get_vhost;
4017     vdc->toggle_device_iotlb = vhost_toggle_device_iotlb;
4018 }
4019 
4020 static const TypeInfo virtio_net_info = {
4021     .name = TYPE_VIRTIO_NET,
4022     .parent = TYPE_VIRTIO_DEVICE,
4023     .instance_size = sizeof(VirtIONet),
4024     .instance_init = virtio_net_instance_init,
4025     .class_init = virtio_net_class_init,
4026 };
4027 
4028 static void virtio_register_types(void)
4029 {
4030     type_register_static(&virtio_net_info);
4031 }
4032 
4033 type_init(virtio_register_types)
4034