xref: /openbmc/qemu/hw/net/virtio-net.c (revision e50a24ea)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/log.h"
18 #include "qemu/main-loop.h"
19 #include "qemu/module.h"
20 #include "hw/virtio/virtio.h"
21 #include "net/net.h"
22 #include "net/checksum.h"
23 #include "net/tap.h"
24 #include "qemu/error-report.h"
25 #include "qemu/timer.h"
26 #include "qemu/option.h"
27 #include "qemu/option_int.h"
28 #include "qemu/config-file.h"
29 #include "qapi/qmp/qdict.h"
30 #include "hw/virtio/virtio-net.h"
31 #include "net/vhost_net.h"
32 #include "net/announce.h"
33 #include "hw/virtio/virtio-bus.h"
34 #include "qapi/error.h"
35 #include "qapi/qapi-events-net.h"
36 #include "hw/qdev-properties.h"
37 #include "qapi/qapi-types-migration.h"
38 #include "qapi/qapi-events-migration.h"
39 #include "hw/virtio/virtio-access.h"
40 #include "migration/misc.h"
41 #include "standard-headers/linux/ethtool.h"
42 #include "sysemu/sysemu.h"
43 #include "sysemu/replay.h"
44 #include "trace.h"
45 #include "monitor/qdev.h"
46 #include "monitor/monitor.h"
47 #include "hw/pci/pci_device.h"
48 #include "net_rx_pkt.h"
49 #include "hw/virtio/vhost.h"
50 #include "sysemu/qtest.h"
51 
52 #define VIRTIO_NET_VM_VERSION    11
53 
54 /* previously fixed value */
55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
57 
58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
61 
62 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
63 
64 #define VIRTIO_NET_TCP_FLAG         0x3F
65 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
66 
67 /* IPv4 max payload, 16 bits in the header */
68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
70 
71 /* header length value in ip header without option */
72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
73 
74 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
76 
77 /* Purge coalesced packets timer interval, This value affects the performance
78    a lot, and should be tuned carefully, '300000'(300us) is the recommended
79    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
80    tso/gso/gro 'off'. */
81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
82 
83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
84                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
85                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
86                                          VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
87                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
88                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
89                                          VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
90                                          VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
91                                          VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
92 
93 static const VirtIOFeature feature_sizes[] = {
94     {.flags = 1ULL << VIRTIO_NET_F_MAC,
95      .end = endof(struct virtio_net_config, mac)},
96     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
97      .end = endof(struct virtio_net_config, status)},
98     {.flags = 1ULL << VIRTIO_NET_F_MQ,
99      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
100     {.flags = 1ULL << VIRTIO_NET_F_MTU,
101      .end = endof(struct virtio_net_config, mtu)},
102     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
103      .end = endof(struct virtio_net_config, duplex)},
104     {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
105      .end = endof(struct virtio_net_config, supported_hash_types)},
106     {}
107 };
108 
109 static const VirtIOConfigSizeParams cfg_size_params = {
110     .min_size = endof(struct virtio_net_config, mac),
111     .max_size = sizeof(struct virtio_net_config),
112     .feature_sizes = feature_sizes
113 };
114 
115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
116 {
117     VirtIONet *n = qemu_get_nic_opaque(nc);
118 
119     return &n->vqs[nc->queue_index];
120 }
121 
122 static int vq2q(int queue_index)
123 {
124     return queue_index / 2;
125 }
126 
127 static void flush_or_purge_queued_packets(NetClientState *nc)
128 {
129     if (!nc->peer) {
130         return;
131     }
132 
133     qemu_flush_or_purge_queued_packets(nc->peer, true);
134     assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
135 }
136 
137 /* TODO
138  * - we could suppress RX interrupt if we were so inclined.
139  */
140 
141 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
142 {
143     VirtIONet *n = VIRTIO_NET(vdev);
144     struct virtio_net_config netcfg;
145     NetClientState *nc = qemu_get_queue(n->nic);
146     static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
147 
148     int ret = 0;
149     memset(&netcfg, 0 , sizeof(struct virtio_net_config));
150     virtio_stw_p(vdev, &netcfg.status, n->status);
151     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
152     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
153     memcpy(netcfg.mac, n->mac, ETH_ALEN);
154     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
155     netcfg.duplex = n->net_conf.duplex;
156     netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
157     virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
158                  virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
159                  VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
160     virtio_stl_p(vdev, &netcfg.supported_hash_types,
161                  VIRTIO_NET_RSS_SUPPORTED_HASHES);
162     memcpy(config, &netcfg, n->config_size);
163 
164     /*
165      * Is this VDPA? No peer means not VDPA: there's no way to
166      * disconnect/reconnect a VDPA peer.
167      */
168     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
169         ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
170                                    n->config_size);
171         if (ret == -1) {
172             return;
173         }
174 
175         /*
176          * Some NIC/kernel combinations present 0 as the mac address.  As that
177          * is not a legal address, try to proceed with the address from the
178          * QEMU command line in the hope that the address has been configured
179          * correctly elsewhere - just not reported by the device.
180          */
181         if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
182             info_report("Zero hardware mac address detected. Ignoring.");
183             memcpy(netcfg.mac, n->mac, ETH_ALEN);
184         }
185 
186         netcfg.status |= virtio_tswap16(vdev,
187                                         n->status & VIRTIO_NET_S_ANNOUNCE);
188         memcpy(config, &netcfg, n->config_size);
189     }
190 }
191 
192 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
193 {
194     VirtIONet *n = VIRTIO_NET(vdev);
195     struct virtio_net_config netcfg = {};
196     NetClientState *nc = qemu_get_queue(n->nic);
197 
198     memcpy(&netcfg, config, n->config_size);
199 
200     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
201         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
202         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
203         memcpy(n->mac, netcfg.mac, ETH_ALEN);
204         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
205     }
206 
207     /*
208      * Is this VDPA? No peer means not VDPA: there's no way to
209      * disconnect/reconnect a VDPA peer.
210      */
211     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
212         vhost_net_set_config(get_vhost_net(nc->peer),
213                              (uint8_t *)&netcfg, 0, n->config_size,
214                              VHOST_SET_CONFIG_TYPE_FRONTEND);
215       }
216 }
217 
218 static bool virtio_net_started(VirtIONet *n, uint8_t status)
219 {
220     VirtIODevice *vdev = VIRTIO_DEVICE(n);
221     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
222         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
223 }
224 
225 static void virtio_net_announce_notify(VirtIONet *net)
226 {
227     VirtIODevice *vdev = VIRTIO_DEVICE(net);
228     trace_virtio_net_announce_notify();
229 
230     net->status |= VIRTIO_NET_S_ANNOUNCE;
231     virtio_notify_config(vdev);
232 }
233 
234 static void virtio_net_announce_timer(void *opaque)
235 {
236     VirtIONet *n = opaque;
237     trace_virtio_net_announce_timer(n->announce_timer.round);
238 
239     n->announce_timer.round--;
240     virtio_net_announce_notify(n);
241 }
242 
243 static void virtio_net_announce(NetClientState *nc)
244 {
245     VirtIONet *n = qemu_get_nic_opaque(nc);
246     VirtIODevice *vdev = VIRTIO_DEVICE(n);
247 
248     /*
249      * Make sure the virtio migration announcement timer isn't running
250      * If it is, let it trigger announcement so that we do not cause
251      * confusion.
252      */
253     if (n->announce_timer.round) {
254         return;
255     }
256 
257     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
258         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
259             virtio_net_announce_notify(n);
260     }
261 }
262 
263 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
264 {
265     VirtIODevice *vdev = VIRTIO_DEVICE(n);
266     NetClientState *nc = qemu_get_queue(n->nic);
267     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
268     int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
269               n->max_ncs - n->max_queue_pairs : 0;
270 
271     if (!get_vhost_net(nc->peer)) {
272         return;
273     }
274 
275     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
276         !!n->vhost_started) {
277         return;
278     }
279     if (!n->vhost_started) {
280         int r, i;
281 
282         if (n->needs_vnet_hdr_swap) {
283             error_report("backend does not support %s vnet headers; "
284                          "falling back on userspace virtio",
285                          virtio_is_big_endian(vdev) ? "BE" : "LE");
286             return;
287         }
288 
289         /* Any packets outstanding? Purge them to avoid touching rings
290          * when vhost is running.
291          */
292         for (i = 0;  i < queue_pairs; i++) {
293             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
294 
295             /* Purge both directions: TX and RX. */
296             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
297             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
298         }
299 
300         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
301             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
302             if (r < 0) {
303                 error_report("%uBytes MTU not supported by the backend",
304                              n->net_conf.mtu);
305 
306                 return;
307             }
308         }
309 
310         n->vhost_started = 1;
311         r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
312         if (r < 0) {
313             error_report("unable to start vhost net: %d: "
314                          "falling back on userspace virtio", -r);
315             n->vhost_started = 0;
316         }
317     } else {
318         vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
319         n->vhost_started = 0;
320     }
321 }
322 
323 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
324                                           NetClientState *peer,
325                                           bool enable)
326 {
327     if (virtio_is_big_endian(vdev)) {
328         return qemu_set_vnet_be(peer, enable);
329     } else {
330         return qemu_set_vnet_le(peer, enable);
331     }
332 }
333 
334 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
335                                        int queue_pairs, bool enable)
336 {
337     int i;
338 
339     for (i = 0; i < queue_pairs; i++) {
340         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
341             enable) {
342             while (--i >= 0) {
343                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
344             }
345 
346             return true;
347         }
348     }
349 
350     return false;
351 }
352 
353 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
354 {
355     VirtIODevice *vdev = VIRTIO_DEVICE(n);
356     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
357 
358     if (virtio_net_started(n, status)) {
359         /* Before using the device, we tell the network backend about the
360          * endianness to use when parsing vnet headers. If the backend
361          * can't do it, we fallback onto fixing the headers in the core
362          * virtio-net code.
363          */
364         n->needs_vnet_hdr_swap = n->has_vnet_hdr &&
365                                  virtio_net_set_vnet_endian(vdev, n->nic->ncs,
366                                                             queue_pairs, true);
367     } else if (virtio_net_started(n, vdev->status)) {
368         /* After using the device, we need to reset the network backend to
369          * the default (guest native endianness), otherwise the guest may
370          * lose network connectivity if it is rebooted into a different
371          * endianness.
372          */
373         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
374     }
375 }
376 
377 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
378 {
379     unsigned int dropped = virtqueue_drop_all(vq);
380     if (dropped) {
381         virtio_notify(vdev, vq);
382     }
383 }
384 
385 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
386 {
387     VirtIONet *n = VIRTIO_NET(vdev);
388     VirtIONetQueue *q;
389     int i;
390     uint8_t queue_status;
391 
392     virtio_net_vnet_endian_status(n, status);
393     virtio_net_vhost_status(n, status);
394 
395     for (i = 0; i < n->max_queue_pairs; i++) {
396         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
397         bool queue_started;
398         q = &n->vqs[i];
399 
400         if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
401             queue_status = 0;
402         } else {
403             queue_status = status;
404         }
405         queue_started =
406             virtio_net_started(n, queue_status) && !n->vhost_started;
407 
408         if (queue_started) {
409             qemu_flush_queued_packets(ncs);
410         }
411 
412         if (!q->tx_waiting) {
413             continue;
414         }
415 
416         if (queue_started) {
417             if (q->tx_timer) {
418                 timer_mod(q->tx_timer,
419                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
420             } else {
421                 replay_bh_schedule_event(q->tx_bh);
422             }
423         } else {
424             if (q->tx_timer) {
425                 timer_del(q->tx_timer);
426             } else {
427                 qemu_bh_cancel(q->tx_bh);
428             }
429             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
430                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
431                 vdev->vm_running) {
432                 /* if tx is waiting we are likely have some packets in tx queue
433                  * and disabled notification */
434                 q->tx_waiting = 0;
435                 virtio_queue_set_notification(q->tx_vq, 1);
436                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
437             }
438         }
439     }
440 }
441 
442 static void virtio_net_set_link_status(NetClientState *nc)
443 {
444     VirtIONet *n = qemu_get_nic_opaque(nc);
445     VirtIODevice *vdev = VIRTIO_DEVICE(n);
446     uint16_t old_status = n->status;
447 
448     if (nc->link_down)
449         n->status &= ~VIRTIO_NET_S_LINK_UP;
450     else
451         n->status |= VIRTIO_NET_S_LINK_UP;
452 
453     if (n->status != old_status)
454         virtio_notify_config(vdev);
455 
456     virtio_net_set_status(vdev, vdev->status);
457 }
458 
459 static void rxfilter_notify(NetClientState *nc)
460 {
461     VirtIONet *n = qemu_get_nic_opaque(nc);
462 
463     if (nc->rxfilter_notify_enabled) {
464         char *path = object_get_canonical_path(OBJECT(n->qdev));
465         qapi_event_send_nic_rx_filter_changed(n->netclient_name, path);
466         g_free(path);
467 
468         /* disable event notification to avoid events flooding */
469         nc->rxfilter_notify_enabled = 0;
470     }
471 }
472 
473 static intList *get_vlan_table(VirtIONet *n)
474 {
475     intList *list;
476     int i, j;
477 
478     list = NULL;
479     for (i = 0; i < MAX_VLAN >> 5; i++) {
480         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
481             if (n->vlans[i] & (1U << j)) {
482                 QAPI_LIST_PREPEND(list, (i << 5) + j);
483             }
484         }
485     }
486 
487     return list;
488 }
489 
490 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
491 {
492     VirtIONet *n = qemu_get_nic_opaque(nc);
493     VirtIODevice *vdev = VIRTIO_DEVICE(n);
494     RxFilterInfo *info;
495     strList *str_list;
496     int i;
497 
498     info = g_malloc0(sizeof(*info));
499     info->name = g_strdup(nc->name);
500     info->promiscuous = n->promisc;
501 
502     if (n->nouni) {
503         info->unicast = RX_STATE_NONE;
504     } else if (n->alluni) {
505         info->unicast = RX_STATE_ALL;
506     } else {
507         info->unicast = RX_STATE_NORMAL;
508     }
509 
510     if (n->nomulti) {
511         info->multicast = RX_STATE_NONE;
512     } else if (n->allmulti) {
513         info->multicast = RX_STATE_ALL;
514     } else {
515         info->multicast = RX_STATE_NORMAL;
516     }
517 
518     info->broadcast_allowed = n->nobcast;
519     info->multicast_overflow = n->mac_table.multi_overflow;
520     info->unicast_overflow = n->mac_table.uni_overflow;
521 
522     info->main_mac = qemu_mac_strdup_printf(n->mac);
523 
524     str_list = NULL;
525     for (i = 0; i < n->mac_table.first_multi; i++) {
526         QAPI_LIST_PREPEND(str_list,
527                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
528     }
529     info->unicast_table = str_list;
530 
531     str_list = NULL;
532     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
533         QAPI_LIST_PREPEND(str_list,
534                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
535     }
536     info->multicast_table = str_list;
537     info->vlan_table = get_vlan_table(n);
538 
539     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
540         info->vlan = RX_STATE_ALL;
541     } else if (!info->vlan_table) {
542         info->vlan = RX_STATE_NONE;
543     } else {
544         info->vlan = RX_STATE_NORMAL;
545     }
546 
547     /* enable event notification after query */
548     nc->rxfilter_notify_enabled = 1;
549 
550     return info;
551 }
552 
553 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
554 {
555     VirtIONet *n = VIRTIO_NET(vdev);
556     NetClientState *nc;
557 
558     /* validate queue_index and skip for cvq */
559     if (queue_index >= n->max_queue_pairs * 2) {
560         return;
561     }
562 
563     nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
564 
565     if (!nc->peer) {
566         return;
567     }
568 
569     if (get_vhost_net(nc->peer) &&
570         nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
571         vhost_net_virtqueue_reset(vdev, nc, queue_index);
572     }
573 
574     flush_or_purge_queued_packets(nc);
575 }
576 
577 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
578 {
579     VirtIONet *n = VIRTIO_NET(vdev);
580     NetClientState *nc;
581     int r;
582 
583     /* validate queue_index and skip for cvq */
584     if (queue_index >= n->max_queue_pairs * 2) {
585         return;
586     }
587 
588     nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
589 
590     if (!nc->peer || !vdev->vhost_started) {
591         return;
592     }
593 
594     if (get_vhost_net(nc->peer) &&
595         nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
596         r = vhost_net_virtqueue_restart(vdev, nc, queue_index);
597         if (r < 0) {
598             error_report("unable to restart vhost net virtqueue: %d, "
599                             "when resetting the queue", queue_index);
600         }
601     }
602 }
603 
604 static void peer_test_vnet_hdr(VirtIONet *n)
605 {
606     NetClientState *nc = qemu_get_queue(n->nic);
607     if (!nc->peer) {
608         return;
609     }
610 
611     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
612 }
613 
614 static int peer_has_vnet_hdr(VirtIONet *n)
615 {
616     return n->has_vnet_hdr;
617 }
618 
619 static int peer_has_ufo(VirtIONet *n)
620 {
621     if (!peer_has_vnet_hdr(n))
622         return 0;
623 
624     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
625 
626     return n->has_ufo;
627 }
628 
629 static int peer_has_uso(VirtIONet *n)
630 {
631     if (!peer_has_vnet_hdr(n)) {
632         return 0;
633     }
634 
635     return qemu_has_uso(qemu_get_queue(n->nic)->peer);
636 }
637 
638 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
639                                        int version_1, int hash_report)
640 {
641     int i;
642     NetClientState *nc;
643 
644     n->mergeable_rx_bufs = mergeable_rx_bufs;
645 
646     if (version_1) {
647         n->guest_hdr_len = hash_report ?
648             sizeof(struct virtio_net_hdr_v1_hash) :
649             sizeof(struct virtio_net_hdr_mrg_rxbuf);
650         n->rss_data.populate_hash = !!hash_report;
651     } else {
652         n->guest_hdr_len = n->mergeable_rx_bufs ?
653             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
654             sizeof(struct virtio_net_hdr);
655         n->rss_data.populate_hash = false;
656     }
657 
658     for (i = 0; i < n->max_queue_pairs; i++) {
659         nc = qemu_get_subqueue(n->nic, i);
660 
661         if (peer_has_vnet_hdr(n) &&
662             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
663             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
664             n->host_hdr_len = n->guest_hdr_len;
665         }
666     }
667 }
668 
669 static int virtio_net_max_tx_queue_size(VirtIONet *n)
670 {
671     NetClientState *peer = n->nic_conf.peers.ncs[0];
672 
673     /*
674      * Backends other than vhost-user or vhost-vdpa don't support max queue
675      * size.
676      */
677     if (!peer) {
678         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
679     }
680 
681     switch(peer->info->type) {
682     case NET_CLIENT_DRIVER_VHOST_USER:
683     case NET_CLIENT_DRIVER_VHOST_VDPA:
684         return VIRTQUEUE_MAX_SIZE;
685     default:
686         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
687     };
688 }
689 
690 static int peer_attach(VirtIONet *n, int index)
691 {
692     NetClientState *nc = qemu_get_subqueue(n->nic, index);
693 
694     if (!nc->peer) {
695         return 0;
696     }
697 
698     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
699         vhost_set_vring_enable(nc->peer, 1);
700     }
701 
702     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
703         return 0;
704     }
705 
706     if (n->max_queue_pairs == 1) {
707         return 0;
708     }
709 
710     return tap_enable(nc->peer);
711 }
712 
713 static int peer_detach(VirtIONet *n, int index)
714 {
715     NetClientState *nc = qemu_get_subqueue(n->nic, index);
716 
717     if (!nc->peer) {
718         return 0;
719     }
720 
721     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
722         vhost_set_vring_enable(nc->peer, 0);
723     }
724 
725     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
726         return 0;
727     }
728 
729     return tap_disable(nc->peer);
730 }
731 
732 static void virtio_net_set_queue_pairs(VirtIONet *n)
733 {
734     int i;
735     int r;
736 
737     if (n->nic->peer_deleted) {
738         return;
739     }
740 
741     for (i = 0; i < n->max_queue_pairs; i++) {
742         if (i < n->curr_queue_pairs) {
743             r = peer_attach(n, i);
744             assert(!r);
745         } else {
746             r = peer_detach(n, i);
747             assert(!r);
748         }
749     }
750 }
751 
752 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
753 
754 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
755                                         Error **errp)
756 {
757     VirtIONet *n = VIRTIO_NET(vdev);
758     NetClientState *nc = qemu_get_queue(n->nic);
759 
760     /* Firstly sync all virtio-net possible supported features */
761     features |= n->host_features;
762 
763     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
764 
765     if (!peer_has_vnet_hdr(n)) {
766         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
767         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
768         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
769         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
770 
771         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
772         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
773         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
774         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
775 
776         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
777         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
778         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
779 
780         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
781     }
782 
783     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
784         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
785         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
786     }
787 
788     if (!peer_has_uso(n)) {
789         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
790         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
791         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
792     }
793 
794     if (!get_vhost_net(nc->peer)) {
795         return features;
796     }
797 
798     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
799         virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
800     }
801     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
802     vdev->backend_features = features;
803 
804     if (n->mtu_bypass_backend &&
805             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
806         features |= (1ULL << VIRTIO_NET_F_MTU);
807     }
808 
809     /*
810      * Since GUEST_ANNOUNCE is emulated the feature bit could be set without
811      * enabled. This happens in the vDPA case.
812      *
813      * Make sure the feature set is not incoherent, as the driver could refuse
814      * to start.
815      *
816      * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes,
817      * helping guest to notify the new location with vDPA devices that does not
818      * support it.
819      */
820     if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) {
821         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE);
822     }
823 
824     return features;
825 }
826 
827 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
828 {
829     uint64_t features = 0;
830 
831     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
832      * but also these: */
833     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
834     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
835     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
836     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
837     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
838 
839     return features;
840 }
841 
842 static void virtio_net_apply_guest_offloads(VirtIONet *n)
843 {
844     qemu_set_offload(qemu_get_queue(n->nic)->peer,
845             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
846             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
847             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
848             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
849             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
850             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
851             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)));
852 }
853 
854 static uint64_t virtio_net_guest_offloads_by_features(uint64_t features)
855 {
856     static const uint64_t guest_offloads_mask =
857         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
858         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
859         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
860         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
861         (1ULL << VIRTIO_NET_F_GUEST_UFO)  |
862         (1ULL << VIRTIO_NET_F_GUEST_USO4) |
863         (1ULL << VIRTIO_NET_F_GUEST_USO6);
864 
865     return guest_offloads_mask & features;
866 }
867 
868 uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n)
869 {
870     VirtIODevice *vdev = VIRTIO_DEVICE(n);
871     return virtio_net_guest_offloads_by_features(vdev->guest_features);
872 }
873 
874 typedef struct {
875     VirtIONet *n;
876     DeviceState *dev;
877 } FailoverDevice;
878 
879 /**
880  * Set the failover primary device
881  *
882  * @opaque: FailoverId to setup
883  * @opts: opts for device we are handling
884  * @errp: returns an error if this function fails
885  */
886 static int failover_set_primary(DeviceState *dev, void *opaque)
887 {
888     FailoverDevice *fdev = opaque;
889     PCIDevice *pci_dev = (PCIDevice *)
890         object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
891 
892     if (!pci_dev) {
893         return 0;
894     }
895 
896     if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
897         fdev->dev = dev;
898         return 1;
899     }
900 
901     return 0;
902 }
903 
904 /**
905  * Find the primary device for this failover virtio-net
906  *
907  * @n: VirtIONet device
908  * @errp: returns an error if this function fails
909  */
910 static DeviceState *failover_find_primary_device(VirtIONet *n)
911 {
912     FailoverDevice fdev = {
913         .n = n,
914     };
915 
916     qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
917                        NULL, NULL, &fdev);
918     return fdev.dev;
919 }
920 
921 static void failover_add_primary(VirtIONet *n, Error **errp)
922 {
923     Error *err = NULL;
924     DeviceState *dev = failover_find_primary_device(n);
925 
926     if (dev) {
927         return;
928     }
929 
930     if (!n->primary_opts) {
931         error_setg(errp, "Primary device not found");
932         error_append_hint(errp, "Virtio-net failover will not work. Make "
933                           "sure primary device has parameter"
934                           " failover_pair_id=%s\n", n->netclient_name);
935         return;
936     }
937 
938     dev = qdev_device_add_from_qdict(n->primary_opts,
939                                      n->primary_opts_from_json,
940                                      &err);
941     if (err) {
942         qobject_unref(n->primary_opts);
943         n->primary_opts = NULL;
944     } else {
945         object_unref(OBJECT(dev));
946     }
947     error_propagate(errp, err);
948 }
949 
950 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
951 {
952     VirtIONet *n = VIRTIO_NET(vdev);
953     Error *err = NULL;
954     int i;
955 
956     if (n->mtu_bypass_backend &&
957             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
958         features &= ~(1ULL << VIRTIO_NET_F_MTU);
959     }
960 
961     virtio_net_set_multiqueue(n,
962                               virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
963                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
964 
965     virtio_net_set_mrg_rx_bufs(n,
966                                virtio_has_feature(features,
967                                                   VIRTIO_NET_F_MRG_RXBUF),
968                                virtio_has_feature(features,
969                                                   VIRTIO_F_VERSION_1),
970                                virtio_has_feature(features,
971                                                   VIRTIO_NET_F_HASH_REPORT));
972 
973     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
974         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
975     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
976         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
977     n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
978 
979     if (n->has_vnet_hdr) {
980         n->curr_guest_offloads =
981             virtio_net_guest_offloads_by_features(features);
982         virtio_net_apply_guest_offloads(n);
983     }
984 
985     for (i = 0;  i < n->max_queue_pairs; i++) {
986         NetClientState *nc = qemu_get_subqueue(n->nic, i);
987 
988         if (!get_vhost_net(nc->peer)) {
989             continue;
990         }
991         vhost_net_ack_features(get_vhost_net(nc->peer), features);
992 
993         /*
994          * keep acked_features in NetVhostUserState up-to-date so it
995          * can't miss any features configured by guest virtio driver.
996          */
997         vhost_net_save_acked_features(nc->peer);
998     }
999 
1000     if (!virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
1001         memset(n->vlans, 0xff, MAX_VLAN >> 3);
1002     }
1003 
1004     if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
1005         qapi_event_send_failover_negotiated(n->netclient_name);
1006         qatomic_set(&n->failover_primary_hidden, false);
1007         failover_add_primary(n, &err);
1008         if (err) {
1009             if (!qtest_enabled()) {
1010                 warn_report_err(err);
1011             } else {
1012                 error_free(err);
1013             }
1014         }
1015     }
1016 }
1017 
1018 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
1019                                      struct iovec *iov, unsigned int iov_cnt)
1020 {
1021     uint8_t on;
1022     size_t s;
1023     NetClientState *nc = qemu_get_queue(n->nic);
1024 
1025     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
1026     if (s != sizeof(on)) {
1027         return VIRTIO_NET_ERR;
1028     }
1029 
1030     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
1031         n->promisc = on;
1032     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
1033         n->allmulti = on;
1034     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
1035         n->alluni = on;
1036     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
1037         n->nomulti = on;
1038     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
1039         n->nouni = on;
1040     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
1041         n->nobcast = on;
1042     } else {
1043         return VIRTIO_NET_ERR;
1044     }
1045 
1046     rxfilter_notify(nc);
1047 
1048     return VIRTIO_NET_OK;
1049 }
1050 
1051 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
1052                                      struct iovec *iov, unsigned int iov_cnt)
1053 {
1054     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1055     uint64_t offloads;
1056     size_t s;
1057 
1058     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1059         return VIRTIO_NET_ERR;
1060     }
1061 
1062     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
1063     if (s != sizeof(offloads)) {
1064         return VIRTIO_NET_ERR;
1065     }
1066 
1067     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1068         uint64_t supported_offloads;
1069 
1070         offloads = virtio_ldq_p(vdev, &offloads);
1071 
1072         if (!n->has_vnet_hdr) {
1073             return VIRTIO_NET_ERR;
1074         }
1075 
1076         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1077             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1078         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1079             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1080         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1081 
1082         supported_offloads = virtio_net_supported_guest_offloads(n);
1083         if (offloads & ~supported_offloads) {
1084             return VIRTIO_NET_ERR;
1085         }
1086 
1087         n->curr_guest_offloads = offloads;
1088         virtio_net_apply_guest_offloads(n);
1089 
1090         return VIRTIO_NET_OK;
1091     } else {
1092         return VIRTIO_NET_ERR;
1093     }
1094 }
1095 
1096 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1097                                  struct iovec *iov, unsigned int iov_cnt)
1098 {
1099     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1100     struct virtio_net_ctrl_mac mac_data;
1101     size_t s;
1102     NetClientState *nc = qemu_get_queue(n->nic);
1103 
1104     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1105         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1106             return VIRTIO_NET_ERR;
1107         }
1108         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1109         assert(s == sizeof(n->mac));
1110         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1111         rxfilter_notify(nc);
1112 
1113         return VIRTIO_NET_OK;
1114     }
1115 
1116     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1117         return VIRTIO_NET_ERR;
1118     }
1119 
1120     int in_use = 0;
1121     int first_multi = 0;
1122     uint8_t uni_overflow = 0;
1123     uint8_t multi_overflow = 0;
1124     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1125 
1126     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1127                    sizeof(mac_data.entries));
1128     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1129     if (s != sizeof(mac_data.entries)) {
1130         goto error;
1131     }
1132     iov_discard_front(&iov, &iov_cnt, s);
1133 
1134     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1135         goto error;
1136     }
1137 
1138     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1139         s = iov_to_buf(iov, iov_cnt, 0, macs,
1140                        mac_data.entries * ETH_ALEN);
1141         if (s != mac_data.entries * ETH_ALEN) {
1142             goto error;
1143         }
1144         in_use += mac_data.entries;
1145     } else {
1146         uni_overflow = 1;
1147     }
1148 
1149     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1150 
1151     first_multi = in_use;
1152 
1153     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1154                    sizeof(mac_data.entries));
1155     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1156     if (s != sizeof(mac_data.entries)) {
1157         goto error;
1158     }
1159 
1160     iov_discard_front(&iov, &iov_cnt, s);
1161 
1162     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1163         goto error;
1164     }
1165 
1166     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1167         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1168                        mac_data.entries * ETH_ALEN);
1169         if (s != mac_data.entries * ETH_ALEN) {
1170             goto error;
1171         }
1172         in_use += mac_data.entries;
1173     } else {
1174         multi_overflow = 1;
1175     }
1176 
1177     n->mac_table.in_use = in_use;
1178     n->mac_table.first_multi = first_multi;
1179     n->mac_table.uni_overflow = uni_overflow;
1180     n->mac_table.multi_overflow = multi_overflow;
1181     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1182     g_free(macs);
1183     rxfilter_notify(nc);
1184 
1185     return VIRTIO_NET_OK;
1186 
1187 error:
1188     g_free(macs);
1189     return VIRTIO_NET_ERR;
1190 }
1191 
1192 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1193                                         struct iovec *iov, unsigned int iov_cnt)
1194 {
1195     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1196     uint16_t vid;
1197     size_t s;
1198     NetClientState *nc = qemu_get_queue(n->nic);
1199 
1200     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1201     vid = virtio_lduw_p(vdev, &vid);
1202     if (s != sizeof(vid)) {
1203         return VIRTIO_NET_ERR;
1204     }
1205 
1206     if (vid >= MAX_VLAN)
1207         return VIRTIO_NET_ERR;
1208 
1209     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1210         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1211     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1212         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1213     else
1214         return VIRTIO_NET_ERR;
1215 
1216     rxfilter_notify(nc);
1217 
1218     return VIRTIO_NET_OK;
1219 }
1220 
1221 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1222                                       struct iovec *iov, unsigned int iov_cnt)
1223 {
1224     trace_virtio_net_handle_announce(n->announce_timer.round);
1225     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1226         n->status & VIRTIO_NET_S_ANNOUNCE) {
1227         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1228         if (n->announce_timer.round) {
1229             qemu_announce_timer_step(&n->announce_timer);
1230         }
1231         return VIRTIO_NET_OK;
1232     } else {
1233         return VIRTIO_NET_ERR;
1234     }
1235 }
1236 
1237 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1238 {
1239     NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1240     if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1241         return false;
1242     }
1243 
1244     trace_virtio_net_rss_attach_ebpf(nic, prog_fd);
1245     return nc->info->set_steering_ebpf(nc, prog_fd);
1246 }
1247 
1248 static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1249                                    struct EBPFRSSConfig *config)
1250 {
1251     config->redirect = data->redirect;
1252     config->populate_hash = data->populate_hash;
1253     config->hash_types = data->hash_types;
1254     config->indirections_len = data->indirections_len;
1255     config->default_queue = data->default_queue;
1256 }
1257 
1258 static bool virtio_net_attach_ebpf_rss(VirtIONet *n)
1259 {
1260     struct EBPFRSSConfig config = {};
1261 
1262     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1263         return false;
1264     }
1265 
1266     rss_data_to_rss_config(&n->rss_data, &config);
1267 
1268     if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1269                           n->rss_data.indirections_table, n->rss_data.key,
1270                           NULL)) {
1271         return false;
1272     }
1273 
1274     if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1275         return false;
1276     }
1277 
1278     return true;
1279 }
1280 
1281 static void virtio_net_detach_ebpf_rss(VirtIONet *n)
1282 {
1283     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1284 }
1285 
1286 static void virtio_net_commit_rss_config(VirtIONet *n)
1287 {
1288     if (n->rss_data.enabled) {
1289         n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
1290         if (n->rss_data.populate_hash) {
1291             virtio_net_detach_ebpf_rss(n);
1292         } else if (!virtio_net_attach_ebpf_rss(n)) {
1293             if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1294                 warn_report("Can't load eBPF RSS for vhost");
1295             } else {
1296                 warn_report("Can't load eBPF RSS - fallback to software RSS");
1297                 n->rss_data.enabled_software_rss = true;
1298             }
1299         }
1300 
1301         trace_virtio_net_rss_enable(n,
1302                                     n->rss_data.hash_types,
1303                                     n->rss_data.indirections_len,
1304                                     sizeof(n->rss_data.key));
1305     } else {
1306         virtio_net_detach_ebpf_rss(n);
1307         trace_virtio_net_rss_disable(n);
1308     }
1309 }
1310 
1311 static void virtio_net_disable_rss(VirtIONet *n)
1312 {
1313     if (!n->rss_data.enabled) {
1314         return;
1315     }
1316 
1317     n->rss_data.enabled = false;
1318     virtio_net_commit_rss_config(n);
1319 }
1320 
1321 static bool virtio_net_load_ebpf_fds(VirtIONet *n, Error **errp)
1322 {
1323     int fds[EBPF_RSS_MAX_FDS] = { [0 ... EBPF_RSS_MAX_FDS - 1] = -1};
1324     int ret = true;
1325     int i = 0;
1326 
1327     if (n->nr_ebpf_rss_fds != EBPF_RSS_MAX_FDS) {
1328         error_setg(errp, "Expected %d file descriptors but got %d",
1329                    EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds);
1330         return false;
1331     }
1332 
1333     for (i = 0; i < n->nr_ebpf_rss_fds; i++) {
1334         fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i], errp);
1335         if (fds[i] < 0) {
1336             ret = false;
1337             goto exit;
1338         }
1339     }
1340 
1341     ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3], errp);
1342 
1343 exit:
1344     if (!ret) {
1345         for (i = 0; i < n->nr_ebpf_rss_fds && fds[i] != -1; i++) {
1346             close(fds[i]);
1347         }
1348     }
1349 
1350     return ret;
1351 }
1352 
1353 static bool virtio_net_load_ebpf(VirtIONet *n, Error **errp)
1354 {
1355     bool ret = false;
1356 
1357     if (virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1358         trace_virtio_net_rss_load(n, n->nr_ebpf_rss_fds, n->ebpf_rss_fds);
1359         if (n->ebpf_rss_fds) {
1360             ret = virtio_net_load_ebpf_fds(n, errp);
1361         } else {
1362             ret = ebpf_rss_load(&n->ebpf_rss, errp);
1363         }
1364     }
1365 
1366     return ret;
1367 }
1368 
1369 static void virtio_net_unload_ebpf(VirtIONet *n)
1370 {
1371     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1372     ebpf_rss_unload(&n->ebpf_rss);
1373 }
1374 
1375 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1376                                       struct iovec *iov,
1377                                       unsigned int iov_cnt,
1378                                       bool do_rss)
1379 {
1380     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1381     struct virtio_net_rss_config cfg;
1382     size_t s, offset = 0, size_get;
1383     uint16_t queue_pairs, i;
1384     struct {
1385         uint16_t us;
1386         uint8_t b;
1387     } QEMU_PACKED temp;
1388     const char *err_msg = "";
1389     uint32_t err_value = 0;
1390 
1391     if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1392         err_msg = "RSS is not negotiated";
1393         goto error;
1394     }
1395     if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1396         err_msg = "Hash report is not negotiated";
1397         goto error;
1398     }
1399     size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1400     s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1401     if (s != size_get) {
1402         err_msg = "Short command buffer";
1403         err_value = (uint32_t)s;
1404         goto error;
1405     }
1406     n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1407     n->rss_data.indirections_len =
1408         virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1409     if (!do_rss) {
1410         n->rss_data.indirections_len = 0;
1411     }
1412     if (n->rss_data.indirections_len >= VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1413         err_msg = "Too large indirection table";
1414         err_value = n->rss_data.indirections_len;
1415         goto error;
1416     }
1417     n->rss_data.indirections_len++;
1418     if (!is_power_of_2(n->rss_data.indirections_len)) {
1419         err_msg = "Invalid size of indirection table";
1420         err_value = n->rss_data.indirections_len;
1421         goto error;
1422     }
1423     n->rss_data.default_queue = do_rss ?
1424         virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1425     if (n->rss_data.default_queue >= n->max_queue_pairs) {
1426         err_msg = "Invalid default queue";
1427         err_value = n->rss_data.default_queue;
1428         goto error;
1429     }
1430     offset += size_get;
1431     size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1432     g_free(n->rss_data.indirections_table);
1433     n->rss_data.indirections_table = g_malloc(size_get);
1434     if (!n->rss_data.indirections_table) {
1435         err_msg = "Can't allocate indirections table";
1436         err_value = n->rss_data.indirections_len;
1437         goto error;
1438     }
1439     s = iov_to_buf(iov, iov_cnt, offset,
1440                    n->rss_data.indirections_table, size_get);
1441     if (s != size_get) {
1442         err_msg = "Short indirection table buffer";
1443         err_value = (uint32_t)s;
1444         goto error;
1445     }
1446     for (i = 0; i < n->rss_data.indirections_len; ++i) {
1447         uint16_t val = n->rss_data.indirections_table[i];
1448         n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1449     }
1450     offset += size_get;
1451     size_get = sizeof(temp);
1452     s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1453     if (s != size_get) {
1454         err_msg = "Can't get queue_pairs";
1455         err_value = (uint32_t)s;
1456         goto error;
1457     }
1458     queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1459     if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1460         err_msg = "Invalid number of queue_pairs";
1461         err_value = queue_pairs;
1462         goto error;
1463     }
1464     if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1465         err_msg = "Invalid key size";
1466         err_value = temp.b;
1467         goto error;
1468     }
1469     if (!temp.b && n->rss_data.hash_types) {
1470         err_msg = "No key provided";
1471         err_value = 0;
1472         goto error;
1473     }
1474     if (!temp.b && !n->rss_data.hash_types) {
1475         virtio_net_disable_rss(n);
1476         return queue_pairs;
1477     }
1478     offset += size_get;
1479     size_get = temp.b;
1480     s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1481     if (s != size_get) {
1482         err_msg = "Can get key buffer";
1483         err_value = (uint32_t)s;
1484         goto error;
1485     }
1486     n->rss_data.enabled = true;
1487     virtio_net_commit_rss_config(n);
1488     return queue_pairs;
1489 error:
1490     trace_virtio_net_rss_error(n, err_msg, err_value);
1491     virtio_net_disable_rss(n);
1492     return 0;
1493 }
1494 
1495 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1496                                 struct iovec *iov, unsigned int iov_cnt)
1497 {
1498     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1499     uint16_t queue_pairs;
1500     NetClientState *nc = qemu_get_queue(n->nic);
1501 
1502     virtio_net_disable_rss(n);
1503     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1504         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1505         return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1506     }
1507     if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1508         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1509     } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1510         struct virtio_net_ctrl_mq mq;
1511         size_t s;
1512         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1513             return VIRTIO_NET_ERR;
1514         }
1515         s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1516         if (s != sizeof(mq)) {
1517             return VIRTIO_NET_ERR;
1518         }
1519         queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1520 
1521     } else {
1522         return VIRTIO_NET_ERR;
1523     }
1524 
1525     if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1526         queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1527         queue_pairs > n->max_queue_pairs ||
1528         !n->multiqueue) {
1529         return VIRTIO_NET_ERR;
1530     }
1531 
1532     n->curr_queue_pairs = queue_pairs;
1533     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
1534         /*
1535          * Avoid updating the backend for a vdpa device: We're only interested
1536          * in updating the device model queues.
1537          */
1538         return VIRTIO_NET_OK;
1539     }
1540     /* stop the backend before changing the number of queue_pairs to avoid handling a
1541      * disabled queue */
1542     virtio_net_set_status(vdev, vdev->status);
1543     virtio_net_set_queue_pairs(n);
1544 
1545     return VIRTIO_NET_OK;
1546 }
1547 
1548 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1549                                   const struct iovec *in_sg, unsigned in_num,
1550                                   const struct iovec *out_sg,
1551                                   unsigned out_num)
1552 {
1553     VirtIONet *n = VIRTIO_NET(vdev);
1554     struct virtio_net_ctrl_hdr ctrl;
1555     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1556     size_t s;
1557     struct iovec *iov, *iov2;
1558 
1559     if (iov_size(in_sg, in_num) < sizeof(status) ||
1560         iov_size(out_sg, out_num) < sizeof(ctrl)) {
1561         virtio_error(vdev, "virtio-net ctrl missing headers");
1562         return 0;
1563     }
1564 
1565     iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1566     s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1567     iov_discard_front(&iov, &out_num, sizeof(ctrl));
1568     if (s != sizeof(ctrl)) {
1569         status = VIRTIO_NET_ERR;
1570     } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1571         status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1572     } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1573         status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1574     } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1575         status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1576     } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1577         status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1578     } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1579         status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1580     } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1581         status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1582     }
1583 
1584     s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1585     assert(s == sizeof(status));
1586 
1587     g_free(iov2);
1588     return sizeof(status);
1589 }
1590 
1591 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1592 {
1593     VirtQueueElement *elem;
1594 
1595     for (;;) {
1596         size_t written;
1597         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1598         if (!elem) {
1599             break;
1600         }
1601 
1602         written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1603                                              elem->out_sg, elem->out_num);
1604         if (written > 0) {
1605             virtqueue_push(vq, elem, written);
1606             virtio_notify(vdev, vq);
1607             g_free(elem);
1608         } else {
1609             virtqueue_detach_element(vq, elem, 0);
1610             g_free(elem);
1611             break;
1612         }
1613     }
1614 }
1615 
1616 /* RX */
1617 
1618 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1619 {
1620     VirtIONet *n = VIRTIO_NET(vdev);
1621     int queue_index = vq2q(virtio_get_queue_index(vq));
1622 
1623     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1624 }
1625 
1626 static bool virtio_net_can_receive(NetClientState *nc)
1627 {
1628     VirtIONet *n = qemu_get_nic_opaque(nc);
1629     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1630     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1631 
1632     if (!vdev->vm_running) {
1633         return false;
1634     }
1635 
1636     if (nc->queue_index >= n->curr_queue_pairs) {
1637         return false;
1638     }
1639 
1640     if (!virtio_queue_ready(q->rx_vq) ||
1641         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1642         return false;
1643     }
1644 
1645     return true;
1646 }
1647 
1648 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1649 {
1650     int opaque;
1651     unsigned int in_bytes;
1652     VirtIONet *n = q->n;
1653 
1654     while (virtio_queue_empty(q->rx_vq) || n->mergeable_rx_bufs) {
1655         opaque = virtqueue_get_avail_bytes(q->rx_vq, &in_bytes, NULL,
1656                                            bufsize, 0);
1657         /* Buffer is enough, disable notifiaction */
1658         if (bufsize <= in_bytes) {
1659             break;
1660         }
1661 
1662         if (virtio_queue_enable_notification_and_check(q->rx_vq, opaque)) {
1663             /* Guest has added some buffers, try again */
1664             continue;
1665         } else {
1666             return 0;
1667         }
1668     }
1669 
1670     virtio_queue_set_notification(q->rx_vq, 0);
1671 
1672     return 1;
1673 }
1674 
1675 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1676 {
1677     virtio_tswap16s(vdev, &hdr->hdr_len);
1678     virtio_tswap16s(vdev, &hdr->gso_size);
1679     virtio_tswap16s(vdev, &hdr->csum_start);
1680     virtio_tswap16s(vdev, &hdr->csum_offset);
1681 }
1682 
1683 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1684  * it never finds out that the packets don't have valid checksums.  This
1685  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1686  * fix this with Xen but it hasn't appeared in an upstream release of
1687  * dhclient yet.
1688  *
1689  * To avoid breaking existing guests, we catch udp packets and add
1690  * checksums.  This is terrible but it's better than hacking the guest
1691  * kernels.
1692  *
1693  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1694  * we should provide a mechanism to disable it to avoid polluting the host
1695  * cache.
1696  */
1697 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1698                                         uint8_t *buf, size_t size)
1699 {
1700     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1701         (size > 27 && size < 1500) && /* normal sized MTU */
1702         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1703         (buf[23] == 17) && /* ip.protocol == UDP */
1704         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1705         net_checksum_calculate(buf, size, CSUM_UDP);
1706         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1707     }
1708 }
1709 
1710 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1711                            const void *buf, size_t size)
1712 {
1713     if (n->has_vnet_hdr) {
1714         /* FIXME this cast is evil */
1715         void *wbuf = (void *)buf;
1716         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1717                                     size - n->host_hdr_len);
1718 
1719         if (n->needs_vnet_hdr_swap) {
1720             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1721         }
1722         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1723     } else {
1724         struct virtio_net_hdr hdr = {
1725             .flags = 0,
1726             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1727         };
1728         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1729     }
1730 }
1731 
1732 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1733 {
1734     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1735     static const uint8_t vlan[] = {0x81, 0x00};
1736     uint8_t *ptr = (uint8_t *)buf;
1737     int i;
1738 
1739     if (n->promisc)
1740         return 1;
1741 
1742     ptr += n->host_hdr_len;
1743 
1744     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1745         int vid = lduw_be_p(ptr + 14) & 0xfff;
1746         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1747             return 0;
1748     }
1749 
1750     if (ptr[0] & 1) { // multicast
1751         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1752             return !n->nobcast;
1753         } else if (n->nomulti) {
1754             return 0;
1755         } else if (n->allmulti || n->mac_table.multi_overflow) {
1756             return 1;
1757         }
1758 
1759         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1760             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1761                 return 1;
1762             }
1763         }
1764     } else { // unicast
1765         if (n->nouni) {
1766             return 0;
1767         } else if (n->alluni || n->mac_table.uni_overflow) {
1768             return 1;
1769         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1770             return 1;
1771         }
1772 
1773         for (i = 0; i < n->mac_table.first_multi; i++) {
1774             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1775                 return 1;
1776             }
1777         }
1778     }
1779 
1780     return 0;
1781 }
1782 
1783 static uint8_t virtio_net_get_hash_type(bool hasip4,
1784                                         bool hasip6,
1785                                         EthL4HdrProto l4hdr_proto,
1786                                         uint32_t types)
1787 {
1788     if (hasip4) {
1789         switch (l4hdr_proto) {
1790         case ETH_L4_HDR_PROTO_TCP:
1791             if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
1792                 return NetPktRssIpV4Tcp;
1793             }
1794             break;
1795 
1796         case ETH_L4_HDR_PROTO_UDP:
1797             if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
1798                 return NetPktRssIpV4Udp;
1799             }
1800             break;
1801 
1802         default:
1803             break;
1804         }
1805 
1806         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1807             return NetPktRssIpV4;
1808         }
1809     } else if (hasip6) {
1810         switch (l4hdr_proto) {
1811         case ETH_L4_HDR_PROTO_TCP:
1812             if (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) {
1813                 return NetPktRssIpV6TcpEx;
1814             }
1815             if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
1816                 return NetPktRssIpV6Tcp;
1817             }
1818             break;
1819 
1820         case ETH_L4_HDR_PROTO_UDP:
1821             if (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) {
1822                 return NetPktRssIpV6UdpEx;
1823             }
1824             if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
1825                 return NetPktRssIpV6Udp;
1826             }
1827             break;
1828 
1829         default:
1830             break;
1831         }
1832 
1833         if (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) {
1834             return NetPktRssIpV6Ex;
1835         }
1836         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
1837             return NetPktRssIpV6;
1838         }
1839     }
1840     return 0xff;
1841 }
1842 
1843 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1844                                   size_t size,
1845                                   struct virtio_net_hdr_v1_hash *hdr)
1846 {
1847     VirtIONet *n = qemu_get_nic_opaque(nc);
1848     unsigned int index = nc->queue_index, new_index = index;
1849     struct NetRxPkt *pkt = n->rx_pkt;
1850     uint8_t net_hash_type;
1851     uint32_t hash;
1852     bool hasip4, hasip6;
1853     EthL4HdrProto l4hdr_proto;
1854     static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1855         VIRTIO_NET_HASH_REPORT_IPv4,
1856         VIRTIO_NET_HASH_REPORT_TCPv4,
1857         VIRTIO_NET_HASH_REPORT_TCPv6,
1858         VIRTIO_NET_HASH_REPORT_IPv6,
1859         VIRTIO_NET_HASH_REPORT_IPv6_EX,
1860         VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1861         VIRTIO_NET_HASH_REPORT_UDPv4,
1862         VIRTIO_NET_HASH_REPORT_UDPv6,
1863         VIRTIO_NET_HASH_REPORT_UDPv6_EX
1864     };
1865     struct iovec iov = {
1866         .iov_base = (void *)buf,
1867         .iov_len = size
1868     };
1869 
1870     net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len);
1871     net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
1872     net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto,
1873                                              n->rss_data.hash_types);
1874     if (net_hash_type > NetPktRssIpV6UdpEx) {
1875         if (n->rss_data.populate_hash) {
1876             hdr->hash_value = VIRTIO_NET_HASH_REPORT_NONE;
1877             hdr->hash_report = 0;
1878         }
1879         return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1880     }
1881 
1882     hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1883 
1884     if (n->rss_data.populate_hash) {
1885         hdr->hash_value = hash;
1886         hdr->hash_report = reports[net_hash_type];
1887     }
1888 
1889     if (n->rss_data.redirect) {
1890         new_index = hash & (n->rss_data.indirections_len - 1);
1891         new_index = n->rss_data.indirections_table[new_index];
1892     }
1893 
1894     return (index == new_index) ? -1 : new_index;
1895 }
1896 
1897 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1898                                       size_t size, bool no_rss)
1899 {
1900     VirtIONet *n = qemu_get_nic_opaque(nc);
1901     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1902     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1903     VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1904     size_t lens[VIRTQUEUE_MAX_SIZE];
1905     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1906     struct virtio_net_hdr_v1_hash extra_hdr;
1907     unsigned mhdr_cnt = 0;
1908     size_t offset, i, guest_offset, j;
1909     ssize_t err;
1910 
1911     if (!virtio_net_can_receive(nc)) {
1912         return -1;
1913     }
1914 
1915     if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1916         int index = virtio_net_process_rss(nc, buf, size, &extra_hdr);
1917         if (index >= 0) {
1918             NetClientState *nc2 =
1919                 qemu_get_subqueue(n->nic, index % n->curr_queue_pairs);
1920             return virtio_net_receive_rcu(nc2, buf, size, true);
1921         }
1922     }
1923 
1924     /* hdr_len refers to the header we supply to the guest */
1925     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1926         return 0;
1927     }
1928 
1929     if (!receive_filter(n, buf, size))
1930         return size;
1931 
1932     offset = i = 0;
1933 
1934     while (offset < size) {
1935         VirtQueueElement *elem;
1936         int len, total;
1937         const struct iovec *sg;
1938 
1939         total = 0;
1940 
1941         if (i == VIRTQUEUE_MAX_SIZE) {
1942             virtio_error(vdev, "virtio-net unexpected long buffer chain");
1943             err = size;
1944             goto err;
1945         }
1946 
1947         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1948         if (!elem) {
1949             if (i) {
1950                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1951                              "i %zd mergeable %d offset %zd, size %zd, "
1952                              "guest hdr len %zd, host hdr len %zd "
1953                              "guest features 0x%" PRIx64,
1954                              i, n->mergeable_rx_bufs, offset, size,
1955                              n->guest_hdr_len, n->host_hdr_len,
1956                              vdev->guest_features);
1957             }
1958             err = -1;
1959             goto err;
1960         }
1961 
1962         if (elem->in_num < 1) {
1963             virtio_error(vdev,
1964                          "virtio-net receive queue contains no in buffers");
1965             virtqueue_detach_element(q->rx_vq, elem, 0);
1966             g_free(elem);
1967             err = -1;
1968             goto err;
1969         }
1970 
1971         sg = elem->in_sg;
1972         if (i == 0) {
1973             assert(offset == 0);
1974             if (n->mergeable_rx_bufs) {
1975                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1976                                     sg, elem->in_num,
1977                                     offsetof(typeof(extra_hdr), hdr.num_buffers),
1978                                     sizeof(extra_hdr.hdr.num_buffers));
1979             }
1980 
1981             receive_header(n, sg, elem->in_num, buf, size);
1982             if (n->rss_data.populate_hash) {
1983                 offset = offsetof(typeof(extra_hdr), hash_value);
1984                 iov_from_buf(sg, elem->in_num, offset,
1985                              (char *)&extra_hdr + offset,
1986                              sizeof(extra_hdr.hash_value) +
1987                              sizeof(extra_hdr.hash_report));
1988             }
1989             offset = n->host_hdr_len;
1990             total += n->guest_hdr_len;
1991             guest_offset = n->guest_hdr_len;
1992         } else {
1993             guest_offset = 0;
1994         }
1995 
1996         /* copy in packet.  ugh */
1997         len = iov_from_buf(sg, elem->in_num, guest_offset,
1998                            buf + offset, size - offset);
1999         total += len;
2000         offset += len;
2001         /* If buffers can't be merged, at this point we
2002          * must have consumed the complete packet.
2003          * Otherwise, drop it. */
2004         if (!n->mergeable_rx_bufs && offset < size) {
2005             virtqueue_unpop(q->rx_vq, elem, total);
2006             g_free(elem);
2007             err = size;
2008             goto err;
2009         }
2010 
2011         elems[i] = elem;
2012         lens[i] = total;
2013         i++;
2014     }
2015 
2016     if (mhdr_cnt) {
2017         virtio_stw_p(vdev, &extra_hdr.hdr.num_buffers, i);
2018         iov_from_buf(mhdr_sg, mhdr_cnt,
2019                      0,
2020                      &extra_hdr.hdr.num_buffers,
2021                      sizeof extra_hdr.hdr.num_buffers);
2022     }
2023 
2024     for (j = 0; j < i; j++) {
2025         /* signal other side */
2026         virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
2027         g_free(elems[j]);
2028     }
2029 
2030     virtqueue_flush(q->rx_vq, i);
2031     virtio_notify(vdev, q->rx_vq);
2032 
2033     return size;
2034 
2035 err:
2036     for (j = 0; j < i; j++) {
2037         virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
2038         g_free(elems[j]);
2039     }
2040 
2041     return err;
2042 }
2043 
2044 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
2045                                   size_t size)
2046 {
2047     RCU_READ_LOCK_GUARD();
2048 
2049     return virtio_net_receive_rcu(nc, buf, size, false);
2050 }
2051 
2052 /*
2053  * Accessors to read and write the IP packet data length field. This
2054  * is a potentially unaligned network-byte-order 16 bit unsigned integer
2055  * pointed to by unit->ip_len.
2056  */
2057 static uint16_t read_unit_ip_len(VirtioNetRscUnit *unit)
2058 {
2059     return lduw_be_p(unit->ip_plen);
2060 }
2061 
2062 static void write_unit_ip_len(VirtioNetRscUnit *unit, uint16_t l)
2063 {
2064     stw_be_p(unit->ip_plen, l);
2065 }
2066 
2067 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
2068                                          const uint8_t *buf,
2069                                          VirtioNetRscUnit *unit)
2070 {
2071     uint16_t ip_hdrlen;
2072     struct ip_header *ip;
2073 
2074     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
2075                               + sizeof(struct eth_header));
2076     unit->ip = (void *)ip;
2077     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
2078     unit->ip_plen = &ip->ip_len;
2079     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
2080     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2081     unit->payload = read_unit_ip_len(unit) - ip_hdrlen - unit->tcp_hdrlen;
2082 }
2083 
2084 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
2085                                          const uint8_t *buf,
2086                                          VirtioNetRscUnit *unit)
2087 {
2088     struct ip6_header *ip6;
2089 
2090     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
2091                                  + sizeof(struct eth_header));
2092     unit->ip = ip6;
2093     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2094     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
2095                                         + sizeof(struct ip6_header));
2096     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2097 
2098     /* There is a difference between payload length in ipv4 and v6,
2099        ip header is excluded in ipv6 */
2100     unit->payload = read_unit_ip_len(unit) - unit->tcp_hdrlen;
2101 }
2102 
2103 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
2104                                        VirtioNetRscSeg *seg)
2105 {
2106     int ret;
2107     struct virtio_net_hdr_v1 *h;
2108 
2109     h = (struct virtio_net_hdr_v1 *)seg->buf;
2110     h->flags = 0;
2111     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
2112 
2113     if (seg->is_coalesced) {
2114         h->rsc.segments = seg->packets;
2115         h->rsc.dup_acks = seg->dup_ack;
2116         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
2117         if (chain->proto == ETH_P_IP) {
2118             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2119         } else {
2120             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2121         }
2122     }
2123 
2124     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
2125     QTAILQ_REMOVE(&chain->buffers, seg, next);
2126     g_free(seg->buf);
2127     g_free(seg);
2128 
2129     return ret;
2130 }
2131 
2132 static void virtio_net_rsc_purge(void *opq)
2133 {
2134     VirtioNetRscSeg *seg, *rn;
2135     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
2136 
2137     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
2138         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2139             chain->stat.purge_failed++;
2140             continue;
2141         }
2142     }
2143 
2144     chain->stat.timer++;
2145     if (!QTAILQ_EMPTY(&chain->buffers)) {
2146         timer_mod(chain->drain_timer,
2147               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout);
2148     }
2149 }
2150 
2151 static void virtio_net_rsc_cleanup(VirtIONet *n)
2152 {
2153     VirtioNetRscChain *chain, *rn_chain;
2154     VirtioNetRscSeg *seg, *rn_seg;
2155 
2156     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2157         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2158             QTAILQ_REMOVE(&chain->buffers, seg, next);
2159             g_free(seg->buf);
2160             g_free(seg);
2161         }
2162 
2163         timer_free(chain->drain_timer);
2164         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2165         g_free(chain);
2166     }
2167 }
2168 
2169 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2170                                      NetClientState *nc,
2171                                      const uint8_t *buf, size_t size)
2172 {
2173     uint16_t hdr_len;
2174     VirtioNetRscSeg *seg;
2175 
2176     hdr_len = chain->n->guest_hdr_len;
2177     seg = g_new(VirtioNetRscSeg, 1);
2178     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2179         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2180     memcpy(seg->buf, buf, size);
2181     seg->size = size;
2182     seg->packets = 1;
2183     seg->dup_ack = 0;
2184     seg->is_coalesced = 0;
2185     seg->nc = nc;
2186 
2187     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2188     chain->stat.cache++;
2189 
2190     switch (chain->proto) {
2191     case ETH_P_IP:
2192         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2193         break;
2194     case ETH_P_IPV6:
2195         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2196         break;
2197     default:
2198         g_assert_not_reached();
2199     }
2200 }
2201 
2202 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2203                                          VirtioNetRscSeg *seg,
2204                                          const uint8_t *buf,
2205                                          struct tcp_header *n_tcp,
2206                                          struct tcp_header *o_tcp)
2207 {
2208     uint32_t nack, oack;
2209     uint16_t nwin, owin;
2210 
2211     nack = htonl(n_tcp->th_ack);
2212     nwin = htons(n_tcp->th_win);
2213     oack = htonl(o_tcp->th_ack);
2214     owin = htons(o_tcp->th_win);
2215 
2216     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2217         chain->stat.ack_out_of_win++;
2218         return RSC_FINAL;
2219     } else if (nack == oack) {
2220         /* duplicated ack or window probe */
2221         if (nwin == owin) {
2222             /* duplicated ack, add dup ack count due to whql test up to 1 */
2223             chain->stat.dup_ack++;
2224             return RSC_FINAL;
2225         } else {
2226             /* Coalesce window update */
2227             o_tcp->th_win = n_tcp->th_win;
2228             chain->stat.win_update++;
2229             return RSC_COALESCE;
2230         }
2231     } else {
2232         /* pure ack, go to 'C', finalize*/
2233         chain->stat.pure_ack++;
2234         return RSC_FINAL;
2235     }
2236 }
2237 
2238 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2239                                             VirtioNetRscSeg *seg,
2240                                             const uint8_t *buf,
2241                                             VirtioNetRscUnit *n_unit)
2242 {
2243     void *data;
2244     uint16_t o_ip_len;
2245     uint32_t nseq, oseq;
2246     VirtioNetRscUnit *o_unit;
2247 
2248     o_unit = &seg->unit;
2249     o_ip_len = read_unit_ip_len(o_unit);
2250     nseq = htonl(n_unit->tcp->th_seq);
2251     oseq = htonl(o_unit->tcp->th_seq);
2252 
2253     /* out of order or retransmitted. */
2254     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2255         chain->stat.data_out_of_win++;
2256         return RSC_FINAL;
2257     }
2258 
2259     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2260     if (nseq == oseq) {
2261         if ((o_unit->payload == 0) && n_unit->payload) {
2262             /* From no payload to payload, normal case, not a dup ack or etc */
2263             chain->stat.data_after_pure_ack++;
2264             goto coalesce;
2265         } else {
2266             return virtio_net_rsc_handle_ack(chain, seg, buf,
2267                                              n_unit->tcp, o_unit->tcp);
2268         }
2269     } else if ((nseq - oseq) != o_unit->payload) {
2270         /* Not a consistent packet, out of order */
2271         chain->stat.data_out_of_order++;
2272         return RSC_FINAL;
2273     } else {
2274 coalesce:
2275         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2276             chain->stat.over_size++;
2277             return RSC_FINAL;
2278         }
2279 
2280         /* Here comes the right data, the payload length in v4/v6 is different,
2281            so use the field value to update and record the new data len */
2282         o_unit->payload += n_unit->payload; /* update new data len */
2283 
2284         /* update field in ip header */
2285         write_unit_ip_len(o_unit, o_ip_len + n_unit->payload);
2286 
2287         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2288            for windows guest, while this may change the behavior for linux
2289            guest (only if it uses RSC feature). */
2290         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2291 
2292         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2293         o_unit->tcp->th_win = n_unit->tcp->th_win;
2294 
2295         memmove(seg->buf + seg->size, data, n_unit->payload);
2296         seg->size += n_unit->payload;
2297         seg->packets++;
2298         chain->stat.coalesced++;
2299         return RSC_COALESCE;
2300     }
2301 }
2302 
2303 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2304                                         VirtioNetRscSeg *seg,
2305                                         const uint8_t *buf, size_t size,
2306                                         VirtioNetRscUnit *unit)
2307 {
2308     struct ip_header *ip1, *ip2;
2309 
2310     ip1 = (struct ip_header *)(unit->ip);
2311     ip2 = (struct ip_header *)(seg->unit.ip);
2312     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2313         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2314         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2315         chain->stat.no_match++;
2316         return RSC_NO_MATCH;
2317     }
2318 
2319     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2320 }
2321 
2322 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2323                                         VirtioNetRscSeg *seg,
2324                                         const uint8_t *buf, size_t size,
2325                                         VirtioNetRscUnit *unit)
2326 {
2327     struct ip6_header *ip1, *ip2;
2328 
2329     ip1 = (struct ip6_header *)(unit->ip);
2330     ip2 = (struct ip6_header *)(seg->unit.ip);
2331     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2332         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2333         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2334         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2335             chain->stat.no_match++;
2336             return RSC_NO_MATCH;
2337     }
2338 
2339     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2340 }
2341 
2342 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2343  * to prevent out of order */
2344 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2345                                          struct tcp_header *tcp)
2346 {
2347     uint16_t tcp_hdr;
2348     uint16_t tcp_flag;
2349 
2350     tcp_flag = htons(tcp->th_offset_flags);
2351     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2352     tcp_flag &= VIRTIO_NET_TCP_FLAG;
2353     if (tcp_flag & TH_SYN) {
2354         chain->stat.tcp_syn++;
2355         return RSC_BYPASS;
2356     }
2357 
2358     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2359         chain->stat.tcp_ctrl_drain++;
2360         return RSC_FINAL;
2361     }
2362 
2363     if (tcp_hdr > sizeof(struct tcp_header)) {
2364         chain->stat.tcp_all_opt++;
2365         return RSC_FINAL;
2366     }
2367 
2368     return RSC_CANDIDATE;
2369 }
2370 
2371 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2372                                          NetClientState *nc,
2373                                          const uint8_t *buf, size_t size,
2374                                          VirtioNetRscUnit *unit)
2375 {
2376     int ret;
2377     VirtioNetRscSeg *seg, *nseg;
2378 
2379     if (QTAILQ_EMPTY(&chain->buffers)) {
2380         chain->stat.empty_cache++;
2381         virtio_net_rsc_cache_buf(chain, nc, buf, size);
2382         timer_mod(chain->drain_timer,
2383               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout);
2384         return size;
2385     }
2386 
2387     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2388         if (chain->proto == ETH_P_IP) {
2389             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2390         } else {
2391             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2392         }
2393 
2394         if (ret == RSC_FINAL) {
2395             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2396                 /* Send failed */
2397                 chain->stat.final_failed++;
2398                 return 0;
2399             }
2400 
2401             /* Send current packet */
2402             return virtio_net_do_receive(nc, buf, size);
2403         } else if (ret == RSC_NO_MATCH) {
2404             continue;
2405         } else {
2406             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2407             seg->is_coalesced = 1;
2408             return size;
2409         }
2410     }
2411 
2412     chain->stat.no_match_cache++;
2413     virtio_net_rsc_cache_buf(chain, nc, buf, size);
2414     return size;
2415 }
2416 
2417 /* Drain a connection data, this is to avoid out of order segments */
2418 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2419                                         NetClientState *nc,
2420                                         const uint8_t *buf, size_t size,
2421                                         uint16_t ip_start, uint16_t ip_size,
2422                                         uint16_t tcp_port)
2423 {
2424     VirtioNetRscSeg *seg, *nseg;
2425     uint32_t ppair1, ppair2;
2426 
2427     ppair1 = *(uint32_t *)(buf + tcp_port);
2428     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2429         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2430         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2431             || (ppair1 != ppair2)) {
2432             continue;
2433         }
2434         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2435             chain->stat.drain_failed++;
2436         }
2437 
2438         break;
2439     }
2440 
2441     return virtio_net_do_receive(nc, buf, size);
2442 }
2443 
2444 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2445                                             struct ip_header *ip,
2446                                             const uint8_t *buf, size_t size)
2447 {
2448     uint16_t ip_len;
2449 
2450     /* Not an ipv4 packet */
2451     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2452         chain->stat.ip_option++;
2453         return RSC_BYPASS;
2454     }
2455 
2456     /* Don't handle packets with ip option */
2457     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2458         chain->stat.ip_option++;
2459         return RSC_BYPASS;
2460     }
2461 
2462     if (ip->ip_p != IPPROTO_TCP) {
2463         chain->stat.bypass_not_tcp++;
2464         return RSC_BYPASS;
2465     }
2466 
2467     /* Don't handle packets with ip fragment */
2468     if (!(htons(ip->ip_off) & IP_DF)) {
2469         chain->stat.ip_frag++;
2470         return RSC_BYPASS;
2471     }
2472 
2473     /* Don't handle packets with ecn flag */
2474     if (IPTOS_ECN(ip->ip_tos)) {
2475         chain->stat.ip_ecn++;
2476         return RSC_BYPASS;
2477     }
2478 
2479     ip_len = htons(ip->ip_len);
2480     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2481         || ip_len > (size - chain->n->guest_hdr_len -
2482                      sizeof(struct eth_header))) {
2483         chain->stat.ip_hacked++;
2484         return RSC_BYPASS;
2485     }
2486 
2487     return RSC_CANDIDATE;
2488 }
2489 
2490 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2491                                       NetClientState *nc,
2492                                       const uint8_t *buf, size_t size)
2493 {
2494     int32_t ret;
2495     uint16_t hdr_len;
2496     VirtioNetRscUnit unit;
2497 
2498     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2499 
2500     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2501         + sizeof(struct tcp_header))) {
2502         chain->stat.bypass_not_tcp++;
2503         return virtio_net_do_receive(nc, buf, size);
2504     }
2505 
2506     virtio_net_rsc_extract_unit4(chain, buf, &unit);
2507     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2508         != RSC_CANDIDATE) {
2509         return virtio_net_do_receive(nc, buf, size);
2510     }
2511 
2512     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2513     if (ret == RSC_BYPASS) {
2514         return virtio_net_do_receive(nc, buf, size);
2515     } else if (ret == RSC_FINAL) {
2516         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2517                 ((hdr_len + sizeof(struct eth_header)) + 12),
2518                 VIRTIO_NET_IP4_ADDR_SIZE,
2519                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2520     }
2521 
2522     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2523 }
2524 
2525 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2526                                             struct ip6_header *ip6,
2527                                             const uint8_t *buf, size_t size)
2528 {
2529     uint16_t ip_len;
2530 
2531     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2532         != IP_HEADER_VERSION_6) {
2533         return RSC_BYPASS;
2534     }
2535 
2536     /* Both option and protocol is checked in this */
2537     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2538         chain->stat.bypass_not_tcp++;
2539         return RSC_BYPASS;
2540     }
2541 
2542     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2543     if (ip_len < sizeof(struct tcp_header) ||
2544         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2545                   - sizeof(struct ip6_header))) {
2546         chain->stat.ip_hacked++;
2547         return RSC_BYPASS;
2548     }
2549 
2550     /* Don't handle packets with ecn flag */
2551     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2552         chain->stat.ip_ecn++;
2553         return RSC_BYPASS;
2554     }
2555 
2556     return RSC_CANDIDATE;
2557 }
2558 
2559 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2560                                       const uint8_t *buf, size_t size)
2561 {
2562     int32_t ret;
2563     uint16_t hdr_len;
2564     VirtioNetRscChain *chain;
2565     VirtioNetRscUnit unit;
2566 
2567     chain = opq;
2568     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2569 
2570     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2571         + sizeof(tcp_header))) {
2572         return virtio_net_do_receive(nc, buf, size);
2573     }
2574 
2575     virtio_net_rsc_extract_unit6(chain, buf, &unit);
2576     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2577                                                  unit.ip, buf, size)) {
2578         return virtio_net_do_receive(nc, buf, size);
2579     }
2580 
2581     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2582     if (ret == RSC_BYPASS) {
2583         return virtio_net_do_receive(nc, buf, size);
2584     } else if (ret == RSC_FINAL) {
2585         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2586                 ((hdr_len + sizeof(struct eth_header)) + 8),
2587                 VIRTIO_NET_IP6_ADDR_SIZE,
2588                 hdr_len + sizeof(struct eth_header)
2589                 + sizeof(struct ip6_header));
2590     }
2591 
2592     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2593 }
2594 
2595 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2596                                                       NetClientState *nc,
2597                                                       uint16_t proto)
2598 {
2599     VirtioNetRscChain *chain;
2600 
2601     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2602         return NULL;
2603     }
2604 
2605     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2606         if (chain->proto == proto) {
2607             return chain;
2608         }
2609     }
2610 
2611     chain = g_malloc(sizeof(*chain));
2612     chain->n = n;
2613     chain->proto = proto;
2614     if (proto == (uint16_t)ETH_P_IP) {
2615         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2616         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2617     } else {
2618         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2619         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2620     }
2621     chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2622                                       virtio_net_rsc_purge, chain);
2623     memset(&chain->stat, 0, sizeof(chain->stat));
2624 
2625     QTAILQ_INIT(&chain->buffers);
2626     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2627 
2628     return chain;
2629 }
2630 
2631 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2632                                       const uint8_t *buf,
2633                                       size_t size)
2634 {
2635     uint16_t proto;
2636     VirtioNetRscChain *chain;
2637     struct eth_header *eth;
2638     VirtIONet *n;
2639 
2640     n = qemu_get_nic_opaque(nc);
2641     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2642         return virtio_net_do_receive(nc, buf, size);
2643     }
2644 
2645     eth = (struct eth_header *)(buf + n->guest_hdr_len);
2646     proto = htons(eth->h_proto);
2647 
2648     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2649     if (chain) {
2650         chain->stat.received++;
2651         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2652             return virtio_net_rsc_receive4(chain, nc, buf, size);
2653         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2654             return virtio_net_rsc_receive6(chain, nc, buf, size);
2655         }
2656     }
2657     return virtio_net_do_receive(nc, buf, size);
2658 }
2659 
2660 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2661                                   size_t size)
2662 {
2663     VirtIONet *n = qemu_get_nic_opaque(nc);
2664     if ((n->rsc4_enabled || n->rsc6_enabled)) {
2665         return virtio_net_rsc_receive(nc, buf, size);
2666     } else {
2667         return virtio_net_do_receive(nc, buf, size);
2668     }
2669 }
2670 
2671 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2672 
2673 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2674 {
2675     VirtIONet *n = qemu_get_nic_opaque(nc);
2676     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2677     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2678     int ret;
2679 
2680     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2681     virtio_notify(vdev, q->tx_vq);
2682 
2683     g_free(q->async_tx.elem);
2684     q->async_tx.elem = NULL;
2685 
2686     virtio_queue_set_notification(q->tx_vq, 1);
2687     ret = virtio_net_flush_tx(q);
2688     if (ret >= n->tx_burst) {
2689         /*
2690          * the flush has been stopped by tx_burst
2691          * we will not receive notification for the
2692          * remainining part, so re-schedule
2693          */
2694         virtio_queue_set_notification(q->tx_vq, 0);
2695         if (q->tx_bh) {
2696             replay_bh_schedule_event(q->tx_bh);
2697         } else {
2698             timer_mod(q->tx_timer,
2699                       qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2700         }
2701         q->tx_waiting = 1;
2702     }
2703 }
2704 
2705 /* TX */
2706 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2707 {
2708     VirtIONet *n = q->n;
2709     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2710     VirtQueueElement *elem;
2711     int32_t num_packets = 0;
2712     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2713     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2714         return num_packets;
2715     }
2716 
2717     if (q->async_tx.elem) {
2718         virtio_queue_set_notification(q->tx_vq, 0);
2719         return num_packets;
2720     }
2721 
2722     for (;;) {
2723         ssize_t ret;
2724         unsigned int out_num;
2725         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2726         struct virtio_net_hdr vhdr;
2727 
2728         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2729         if (!elem) {
2730             break;
2731         }
2732 
2733         out_num = elem->out_num;
2734         out_sg = elem->out_sg;
2735         if (out_num < 1) {
2736             virtio_error(vdev, "virtio-net header not in first element");
2737             goto detach;
2738         }
2739 
2740         if (n->needs_vnet_hdr_swap) {
2741             if (iov_to_buf(out_sg, out_num, 0, &vhdr, sizeof(vhdr)) <
2742                 sizeof(vhdr)) {
2743                 virtio_error(vdev, "virtio-net header incorrect");
2744                 goto detach;
2745             }
2746             virtio_net_hdr_swap(vdev, &vhdr);
2747             sg2[0].iov_base = &vhdr;
2748             sg2[0].iov_len = sizeof(vhdr);
2749             out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, out_sg, out_num,
2750                                sizeof(vhdr), -1);
2751             if (out_num == VIRTQUEUE_MAX_SIZE) {
2752                 goto drop;
2753             }
2754             out_num += 1;
2755             out_sg = sg2;
2756         }
2757         /*
2758          * If host wants to see the guest header as is, we can
2759          * pass it on unchanged. Otherwise, copy just the parts
2760          * that host is interested in.
2761          */
2762         assert(n->host_hdr_len <= n->guest_hdr_len);
2763         if (n->host_hdr_len != n->guest_hdr_len) {
2764             if (iov_size(out_sg, out_num) < n->guest_hdr_len) {
2765                 virtio_error(vdev, "virtio-net header is invalid");
2766                 goto detach;
2767             }
2768             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2769                                        out_sg, out_num,
2770                                        0, n->host_hdr_len);
2771             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2772                              out_sg, out_num,
2773                              n->guest_hdr_len, -1);
2774             out_num = sg_num;
2775             out_sg = sg;
2776 
2777             if (out_num < 1) {
2778                 virtio_error(vdev, "virtio-net nothing to send");
2779                 goto detach;
2780             }
2781         }
2782 
2783         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2784                                       out_sg, out_num, virtio_net_tx_complete);
2785         if (ret == 0) {
2786             virtio_queue_set_notification(q->tx_vq, 0);
2787             q->async_tx.elem = elem;
2788             return -EBUSY;
2789         }
2790 
2791 drop:
2792         virtqueue_push(q->tx_vq, elem, 0);
2793         virtio_notify(vdev, q->tx_vq);
2794         g_free(elem);
2795 
2796         if (++num_packets >= n->tx_burst) {
2797             break;
2798         }
2799     }
2800     return num_packets;
2801 
2802 detach:
2803     virtqueue_detach_element(q->tx_vq, elem, 0);
2804     g_free(elem);
2805     return -EINVAL;
2806 }
2807 
2808 static void virtio_net_tx_timer(void *opaque);
2809 
2810 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2811 {
2812     VirtIONet *n = VIRTIO_NET(vdev);
2813     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2814 
2815     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2816         virtio_net_drop_tx_queue_data(vdev, vq);
2817         return;
2818     }
2819 
2820     /* This happens when device was stopped but VCPU wasn't. */
2821     if (!vdev->vm_running) {
2822         q->tx_waiting = 1;
2823         return;
2824     }
2825 
2826     if (q->tx_waiting) {
2827         /* We already have queued packets, immediately flush */
2828         timer_del(q->tx_timer);
2829         virtio_net_tx_timer(q);
2830     } else {
2831         /* re-arm timer to flush it (and more) on next tick */
2832         timer_mod(q->tx_timer,
2833                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2834         q->tx_waiting = 1;
2835         virtio_queue_set_notification(vq, 0);
2836     }
2837 }
2838 
2839 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2840 {
2841     VirtIONet *n = VIRTIO_NET(vdev);
2842     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2843 
2844     if (unlikely(n->vhost_started)) {
2845         return;
2846     }
2847 
2848     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2849         virtio_net_drop_tx_queue_data(vdev, vq);
2850         return;
2851     }
2852 
2853     if (unlikely(q->tx_waiting)) {
2854         return;
2855     }
2856     q->tx_waiting = 1;
2857     /* This happens when device was stopped but VCPU wasn't. */
2858     if (!vdev->vm_running) {
2859         return;
2860     }
2861     virtio_queue_set_notification(vq, 0);
2862     replay_bh_schedule_event(q->tx_bh);
2863 }
2864 
2865 static void virtio_net_tx_timer(void *opaque)
2866 {
2867     VirtIONetQueue *q = opaque;
2868     VirtIONet *n = q->n;
2869     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2870     int ret;
2871 
2872     /* This happens when device was stopped but BH wasn't. */
2873     if (!vdev->vm_running) {
2874         /* Make sure tx waiting is set, so we'll run when restarted. */
2875         assert(q->tx_waiting);
2876         return;
2877     }
2878 
2879     q->tx_waiting = 0;
2880 
2881     /* Just in case the driver is not ready on more */
2882     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2883         return;
2884     }
2885 
2886     ret = virtio_net_flush_tx(q);
2887     if (ret == -EBUSY || ret == -EINVAL) {
2888         return;
2889     }
2890     /*
2891      * If we flush a full burst of packets, assume there are
2892      * more coming and immediately rearm
2893      */
2894     if (ret >= n->tx_burst) {
2895         q->tx_waiting = 1;
2896         timer_mod(q->tx_timer,
2897                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2898         return;
2899     }
2900     /*
2901      * If less than a full burst, re-enable notification and flush
2902      * anything that may have come in while we weren't looking.  If
2903      * we find something, assume the guest is still active and rearm
2904      */
2905     virtio_queue_set_notification(q->tx_vq, 1);
2906     ret = virtio_net_flush_tx(q);
2907     if (ret > 0) {
2908         virtio_queue_set_notification(q->tx_vq, 0);
2909         q->tx_waiting = 1;
2910         timer_mod(q->tx_timer,
2911                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2912     }
2913 }
2914 
2915 static void virtio_net_tx_bh(void *opaque)
2916 {
2917     VirtIONetQueue *q = opaque;
2918     VirtIONet *n = q->n;
2919     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2920     int32_t ret;
2921 
2922     /* This happens when device was stopped but BH wasn't. */
2923     if (!vdev->vm_running) {
2924         /* Make sure tx waiting is set, so we'll run when restarted. */
2925         assert(q->tx_waiting);
2926         return;
2927     }
2928 
2929     q->tx_waiting = 0;
2930 
2931     /* Just in case the driver is not ready on more */
2932     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2933         return;
2934     }
2935 
2936     ret = virtio_net_flush_tx(q);
2937     if (ret == -EBUSY || ret == -EINVAL) {
2938         return; /* Notification re-enable handled by tx_complete or device
2939                  * broken */
2940     }
2941 
2942     /* If we flush a full burst of packets, assume there are
2943      * more coming and immediately reschedule */
2944     if (ret >= n->tx_burst) {
2945         replay_bh_schedule_event(q->tx_bh);
2946         q->tx_waiting = 1;
2947         return;
2948     }
2949 
2950     /* If less than a full burst, re-enable notification and flush
2951      * anything that may have come in while we weren't looking.  If
2952      * we find something, assume the guest is still active and reschedule */
2953     virtio_queue_set_notification(q->tx_vq, 1);
2954     ret = virtio_net_flush_tx(q);
2955     if (ret == -EINVAL) {
2956         return;
2957     } else if (ret > 0) {
2958         virtio_queue_set_notification(q->tx_vq, 0);
2959         replay_bh_schedule_event(q->tx_bh);
2960         q->tx_waiting = 1;
2961     }
2962 }
2963 
2964 static void virtio_net_add_queue(VirtIONet *n, int index)
2965 {
2966     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2967 
2968     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2969                                            virtio_net_handle_rx);
2970 
2971     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2972         n->vqs[index].tx_vq =
2973             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2974                              virtio_net_handle_tx_timer);
2975         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2976                                               virtio_net_tx_timer,
2977                                               &n->vqs[index]);
2978     } else {
2979         n->vqs[index].tx_vq =
2980             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2981                              virtio_net_handle_tx_bh);
2982         n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
2983                                                   &DEVICE(vdev)->mem_reentrancy_guard);
2984     }
2985 
2986     n->vqs[index].tx_waiting = 0;
2987     n->vqs[index].n = n;
2988 }
2989 
2990 static void virtio_net_del_queue(VirtIONet *n, int index)
2991 {
2992     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2993     VirtIONetQueue *q = &n->vqs[index];
2994     NetClientState *nc = qemu_get_subqueue(n->nic, index);
2995 
2996     qemu_purge_queued_packets(nc);
2997 
2998     virtio_del_queue(vdev, index * 2);
2999     if (q->tx_timer) {
3000         timer_free(q->tx_timer);
3001         q->tx_timer = NULL;
3002     } else {
3003         qemu_bh_delete(q->tx_bh);
3004         q->tx_bh = NULL;
3005     }
3006     q->tx_waiting = 0;
3007     virtio_del_queue(vdev, index * 2 + 1);
3008 }
3009 
3010 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
3011 {
3012     VirtIODevice *vdev = VIRTIO_DEVICE(n);
3013     int old_num_queues = virtio_get_num_queues(vdev);
3014     int new_num_queues = new_max_queue_pairs * 2 + 1;
3015     int i;
3016 
3017     assert(old_num_queues >= 3);
3018     assert(old_num_queues % 2 == 1);
3019 
3020     if (old_num_queues == new_num_queues) {
3021         return;
3022     }
3023 
3024     /*
3025      * We always need to remove and add ctrl vq if
3026      * old_num_queues != new_num_queues. Remove ctrl_vq first,
3027      * and then we only enter one of the following two loops.
3028      */
3029     virtio_del_queue(vdev, old_num_queues - 1);
3030 
3031     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
3032         /* new_num_queues < old_num_queues */
3033         virtio_net_del_queue(n, i / 2);
3034     }
3035 
3036     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
3037         /* new_num_queues > old_num_queues */
3038         virtio_net_add_queue(n, i / 2);
3039     }
3040 
3041     /* add ctrl_vq last */
3042     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3043 }
3044 
3045 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
3046 {
3047     int max = multiqueue ? n->max_queue_pairs : 1;
3048 
3049     n->multiqueue = multiqueue;
3050     virtio_net_change_num_queue_pairs(n, max);
3051 
3052     virtio_net_set_queue_pairs(n);
3053 }
3054 
3055 static int virtio_net_post_load_device(void *opaque, int version_id)
3056 {
3057     VirtIONet *n = opaque;
3058     VirtIODevice *vdev = VIRTIO_DEVICE(n);
3059     int i, link_down;
3060 
3061     trace_virtio_net_post_load_device();
3062     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
3063                                virtio_vdev_has_feature(vdev,
3064                                                        VIRTIO_F_VERSION_1),
3065                                virtio_vdev_has_feature(vdev,
3066                                                        VIRTIO_NET_F_HASH_REPORT));
3067 
3068     /* MAC_TABLE_ENTRIES may be different from the saved image */
3069     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
3070         n->mac_table.in_use = 0;
3071     }
3072 
3073     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
3074         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
3075     }
3076 
3077     /*
3078      * curr_guest_offloads will be later overwritten by the
3079      * virtio_set_features_nocheck call done from the virtio_load.
3080      * Here we make sure it is preserved and restored accordingly
3081      * in the virtio_net_post_load_virtio callback.
3082      */
3083     n->saved_guest_offloads = n->curr_guest_offloads;
3084 
3085     virtio_net_set_queue_pairs(n);
3086 
3087     /* Find the first multicast entry in the saved MAC filter */
3088     for (i = 0; i < n->mac_table.in_use; i++) {
3089         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
3090             break;
3091         }
3092     }
3093     n->mac_table.first_multi = i;
3094 
3095     /* nc.link_down can't be migrated, so infer link_down according
3096      * to link status bit in n->status */
3097     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
3098     for (i = 0; i < n->max_queue_pairs; i++) {
3099         qemu_get_subqueue(n->nic, i)->link_down = link_down;
3100     }
3101 
3102     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
3103         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3104         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3105                                   QEMU_CLOCK_VIRTUAL,
3106                                   virtio_net_announce_timer, n);
3107         if (n->announce_timer.round) {
3108             timer_mod(n->announce_timer.tm,
3109                       qemu_clock_get_ms(n->announce_timer.type));
3110         } else {
3111             qemu_announce_timer_del(&n->announce_timer, false);
3112         }
3113     }
3114 
3115     virtio_net_commit_rss_config(n);
3116     return 0;
3117 }
3118 
3119 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
3120 {
3121     VirtIONet *n = VIRTIO_NET(vdev);
3122     /*
3123      * The actual needed state is now in saved_guest_offloads,
3124      * see virtio_net_post_load_device for detail.
3125      * Restore it back and apply the desired offloads.
3126      */
3127     n->curr_guest_offloads = n->saved_guest_offloads;
3128     if (peer_has_vnet_hdr(n)) {
3129         virtio_net_apply_guest_offloads(n);
3130     }
3131 
3132     return 0;
3133 }
3134 
3135 /* tx_waiting field of a VirtIONetQueue */
3136 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
3137     .name = "virtio-net-queue-tx_waiting",
3138     .fields = (const VMStateField[]) {
3139         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
3140         VMSTATE_END_OF_LIST()
3141    },
3142 };
3143 
3144 static bool max_queue_pairs_gt_1(void *opaque, int version_id)
3145 {
3146     return VIRTIO_NET(opaque)->max_queue_pairs > 1;
3147 }
3148 
3149 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
3150 {
3151     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
3152                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3153 }
3154 
3155 static bool mac_table_fits(void *opaque, int version_id)
3156 {
3157     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
3158 }
3159 
3160 static bool mac_table_doesnt_fit(void *opaque, int version_id)
3161 {
3162     return !mac_table_fits(opaque, version_id);
3163 }
3164 
3165 /* This temporary type is shared by all the WITH_TMP methods
3166  * although only some fields are used by each.
3167  */
3168 struct VirtIONetMigTmp {
3169     VirtIONet      *parent;
3170     VirtIONetQueue *vqs_1;
3171     uint16_t        curr_queue_pairs_1;
3172     uint8_t         has_ufo;
3173     uint32_t        has_vnet_hdr;
3174 };
3175 
3176 /* The 2nd and subsequent tx_waiting flags are loaded later than
3177  * the 1st entry in the queue_pairs and only if there's more than one
3178  * entry.  We use the tmp mechanism to calculate a temporary
3179  * pointer and count and also validate the count.
3180  */
3181 
3182 static int virtio_net_tx_waiting_pre_save(void *opaque)
3183 {
3184     struct VirtIONetMigTmp *tmp = opaque;
3185 
3186     tmp->vqs_1 = tmp->parent->vqs + 1;
3187     tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3188     if (tmp->parent->curr_queue_pairs == 0) {
3189         tmp->curr_queue_pairs_1 = 0;
3190     }
3191 
3192     return 0;
3193 }
3194 
3195 static int virtio_net_tx_waiting_pre_load(void *opaque)
3196 {
3197     struct VirtIONetMigTmp *tmp = opaque;
3198 
3199     /* Reuse the pointer setup from save */
3200     virtio_net_tx_waiting_pre_save(opaque);
3201 
3202     if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3203         error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3204             tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
3205 
3206         return -EINVAL;
3207     }
3208 
3209     return 0; /* all good */
3210 }
3211 
3212 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3213     .name      = "virtio-net-tx_waiting",
3214     .pre_load  = virtio_net_tx_waiting_pre_load,
3215     .pre_save  = virtio_net_tx_waiting_pre_save,
3216     .fields    = (const VMStateField[]) {
3217         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3218                                      curr_queue_pairs_1,
3219                                      vmstate_virtio_net_queue_tx_waiting,
3220                                      struct VirtIONetQueue),
3221         VMSTATE_END_OF_LIST()
3222     },
3223 };
3224 
3225 /* the 'has_ufo' flag is just tested; if the incoming stream has the
3226  * flag set we need to check that we have it
3227  */
3228 static int virtio_net_ufo_post_load(void *opaque, int version_id)
3229 {
3230     struct VirtIONetMigTmp *tmp = opaque;
3231 
3232     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3233         error_report("virtio-net: saved image requires TUN_F_UFO support");
3234         return -EINVAL;
3235     }
3236 
3237     return 0;
3238 }
3239 
3240 static int virtio_net_ufo_pre_save(void *opaque)
3241 {
3242     struct VirtIONetMigTmp *tmp = opaque;
3243 
3244     tmp->has_ufo = tmp->parent->has_ufo;
3245 
3246     return 0;
3247 }
3248 
3249 static const VMStateDescription vmstate_virtio_net_has_ufo = {
3250     .name      = "virtio-net-ufo",
3251     .post_load = virtio_net_ufo_post_load,
3252     .pre_save  = virtio_net_ufo_pre_save,
3253     .fields    = (const VMStateField[]) {
3254         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3255         VMSTATE_END_OF_LIST()
3256     },
3257 };
3258 
3259 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3260  * flag set we need to check that we have it
3261  */
3262 static int virtio_net_vnet_post_load(void *opaque, int version_id)
3263 {
3264     struct VirtIONetMigTmp *tmp = opaque;
3265 
3266     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3267         error_report("virtio-net: saved image requires vnet_hdr=on");
3268         return -EINVAL;
3269     }
3270 
3271     return 0;
3272 }
3273 
3274 static int virtio_net_vnet_pre_save(void *opaque)
3275 {
3276     struct VirtIONetMigTmp *tmp = opaque;
3277 
3278     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3279 
3280     return 0;
3281 }
3282 
3283 static const VMStateDescription vmstate_virtio_net_has_vnet = {
3284     .name      = "virtio-net-vnet",
3285     .post_load = virtio_net_vnet_post_load,
3286     .pre_save  = virtio_net_vnet_pre_save,
3287     .fields    = (const VMStateField[]) {
3288         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3289         VMSTATE_END_OF_LIST()
3290     },
3291 };
3292 
3293 static bool virtio_net_rss_needed(void *opaque)
3294 {
3295     return VIRTIO_NET(opaque)->rss_data.enabled;
3296 }
3297 
3298 static const VMStateDescription vmstate_virtio_net_rss = {
3299     .name      = "virtio-net-device/rss",
3300     .version_id = 1,
3301     .minimum_version_id = 1,
3302     .needed = virtio_net_rss_needed,
3303     .fields = (const VMStateField[]) {
3304         VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3305         VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3306         VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3307         VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3308         VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3309         VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3310         VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3311                             VIRTIO_NET_RSS_MAX_KEY_SIZE),
3312         VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3313                                     rss_data.indirections_len, 0,
3314                                     vmstate_info_uint16, uint16_t),
3315         VMSTATE_END_OF_LIST()
3316     },
3317 };
3318 
3319 static const VMStateDescription vmstate_virtio_net_device = {
3320     .name = "virtio-net-device",
3321     .version_id = VIRTIO_NET_VM_VERSION,
3322     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3323     .post_load = virtio_net_post_load_device,
3324     .fields = (const VMStateField[]) {
3325         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3326         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3327                                vmstate_virtio_net_queue_tx_waiting,
3328                                VirtIONetQueue),
3329         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3330         VMSTATE_UINT16(status, VirtIONet),
3331         VMSTATE_UINT8(promisc, VirtIONet),
3332         VMSTATE_UINT8(allmulti, VirtIONet),
3333         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3334 
3335         /* Guarded pair: If it fits we load it, else we throw it away
3336          * - can happen if source has a larger MAC table.; post-load
3337          *  sets flags in this case.
3338          */
3339         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3340                                 0, mac_table_fits, mac_table.in_use,
3341                                  ETH_ALEN),
3342         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3343                                      mac_table.in_use, ETH_ALEN),
3344 
3345         /* Note: This is an array of uint32's that's always been saved as a
3346          * buffer; hold onto your endiannesses; it's actually used as a bitmap
3347          * but based on the uint.
3348          */
3349         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3350         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3351                          vmstate_virtio_net_has_vnet),
3352         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3353         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3354         VMSTATE_UINT8(alluni, VirtIONet),
3355         VMSTATE_UINT8(nomulti, VirtIONet),
3356         VMSTATE_UINT8(nouni, VirtIONet),
3357         VMSTATE_UINT8(nobcast, VirtIONet),
3358         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3359                          vmstate_virtio_net_has_ufo),
3360         VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3361                             vmstate_info_uint16_equal, uint16_t),
3362         VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3363         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3364                          vmstate_virtio_net_tx_waiting),
3365         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3366                             has_ctrl_guest_offloads),
3367         VMSTATE_END_OF_LIST()
3368     },
3369     .subsections = (const VMStateDescription * const []) {
3370         &vmstate_virtio_net_rss,
3371         NULL
3372     }
3373 };
3374 
3375 static NetClientInfo net_virtio_info = {
3376     .type = NET_CLIENT_DRIVER_NIC,
3377     .size = sizeof(NICState),
3378     .can_receive = virtio_net_can_receive,
3379     .receive = virtio_net_receive,
3380     .link_status_changed = virtio_net_set_link_status,
3381     .query_rx_filter = virtio_net_query_rxfilter,
3382     .announce = virtio_net_announce,
3383 };
3384 
3385 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3386 {
3387     VirtIONet *n = VIRTIO_NET(vdev);
3388     NetClientState *nc;
3389     assert(n->vhost_started);
3390     if (!n->multiqueue && idx == 2) {
3391         /* Must guard against invalid features and bogus queue index
3392          * from being set by malicious guest, or penetrated through
3393          * buggy migration stream.
3394          */
3395         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3396             qemu_log_mask(LOG_GUEST_ERROR,
3397                           "%s: bogus vq index ignored\n", __func__);
3398             return false;
3399         }
3400         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3401     } else {
3402         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3403     }
3404     /*
3405      * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3406      * as the macro of configure interrupt's IDX, If this driver does not
3407      * support, the function will return false
3408      */
3409 
3410     if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3411         return vhost_net_config_pending(get_vhost_net(nc->peer));
3412     }
3413     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3414 }
3415 
3416 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3417                                            bool mask)
3418 {
3419     VirtIONet *n = VIRTIO_NET(vdev);
3420     NetClientState *nc;
3421     assert(n->vhost_started);
3422     if (!n->multiqueue && idx == 2) {
3423         /* Must guard against invalid features and bogus queue index
3424          * from being set by malicious guest, or penetrated through
3425          * buggy migration stream.
3426          */
3427         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3428             qemu_log_mask(LOG_GUEST_ERROR,
3429                           "%s: bogus vq index ignored\n", __func__);
3430             return;
3431         }
3432         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3433     } else {
3434         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3435     }
3436     /*
3437      *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3438      * as the macro of configure interrupt's IDX, If this driver does not
3439      * support, the function will return
3440      */
3441 
3442     if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3443         vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask);
3444         return;
3445     }
3446     vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask);
3447 }
3448 
3449 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3450 {
3451     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3452 
3453     n->config_size = virtio_get_config_size(&cfg_size_params, host_features);
3454 }
3455 
3456 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3457                                    const char *type)
3458 {
3459     /*
3460      * The name can be NULL, the netclient name will be type.x.
3461      */
3462     assert(type != NULL);
3463 
3464     g_free(n->netclient_name);
3465     g_free(n->netclient_type);
3466     n->netclient_name = g_strdup(name);
3467     n->netclient_type = g_strdup(type);
3468 }
3469 
3470 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3471 {
3472     HotplugHandler *hotplug_ctrl;
3473     PCIDevice *pci_dev;
3474     Error *err = NULL;
3475 
3476     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3477     if (hotplug_ctrl) {
3478         pci_dev = PCI_DEVICE(dev);
3479         pci_dev->partially_hotplugged = true;
3480         hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3481         if (err) {
3482             error_report_err(err);
3483             return false;
3484         }
3485     } else {
3486         return false;
3487     }
3488     return true;
3489 }
3490 
3491 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3492                                     Error **errp)
3493 {
3494     Error *err = NULL;
3495     HotplugHandler *hotplug_ctrl;
3496     PCIDevice *pdev = PCI_DEVICE(dev);
3497     BusState *primary_bus;
3498 
3499     if (!pdev->partially_hotplugged) {
3500         return true;
3501     }
3502     primary_bus = dev->parent_bus;
3503     if (!primary_bus) {
3504         error_setg(errp, "virtio_net: couldn't find primary bus");
3505         return false;
3506     }
3507     qdev_set_parent_bus(dev, primary_bus, &error_abort);
3508     qatomic_set(&n->failover_primary_hidden, false);
3509     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3510     if (hotplug_ctrl) {
3511         hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3512         if (err) {
3513             goto out;
3514         }
3515         hotplug_handler_plug(hotplug_ctrl, dev, &err);
3516     }
3517     pdev->partially_hotplugged = false;
3518 
3519 out:
3520     error_propagate(errp, err);
3521     return !err;
3522 }
3523 
3524 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationEvent *e)
3525 {
3526     bool should_be_hidden;
3527     Error *err = NULL;
3528     DeviceState *dev = failover_find_primary_device(n);
3529 
3530     if (!dev) {
3531         return;
3532     }
3533 
3534     should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3535 
3536     if (e->type == MIG_EVENT_PRECOPY_SETUP && !should_be_hidden) {
3537         if (failover_unplug_primary(n, dev)) {
3538             vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3539             qapi_event_send_unplug_primary(dev->id);
3540             qatomic_set(&n->failover_primary_hidden, true);
3541         } else {
3542             warn_report("couldn't unplug primary device");
3543         }
3544     } else if (e->type == MIG_EVENT_PRECOPY_FAILED) {
3545         /* We already unplugged the device let's plug it back */
3546         if (!failover_replug_primary(n, dev, &err)) {
3547             if (err) {
3548                 error_report_err(err);
3549             }
3550         }
3551     }
3552 }
3553 
3554 static int virtio_net_migration_state_notifier(NotifierWithReturn *notifier,
3555                                                MigrationEvent *e, Error **errp)
3556 {
3557     VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3558     virtio_net_handle_migration_primary(n, e);
3559     return 0;
3560 }
3561 
3562 static bool failover_hide_primary_device(DeviceListener *listener,
3563                                          const QDict *device_opts,
3564                                          bool from_json,
3565                                          Error **errp)
3566 {
3567     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3568     const char *standby_id;
3569 
3570     if (!device_opts) {
3571         return false;
3572     }
3573 
3574     if (!qdict_haskey(device_opts, "failover_pair_id")) {
3575         return false;
3576     }
3577 
3578     if (!qdict_haskey(device_opts, "id")) {
3579         error_setg(errp, "Device with failover_pair_id needs to have id");
3580         return false;
3581     }
3582 
3583     standby_id = qdict_get_str(device_opts, "failover_pair_id");
3584     if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3585         return false;
3586     }
3587 
3588     /*
3589      * The hide helper can be called several times for a given device.
3590      * Check there is only one primary for a virtio-net device but
3591      * don't duplicate the qdict several times if it's called for the same
3592      * device.
3593      */
3594     if (n->primary_opts) {
3595         const char *old, *new;
3596         /* devices with failover_pair_id always have an id */
3597         old = qdict_get_str(n->primary_opts, "id");
3598         new = qdict_get_str(device_opts, "id");
3599         if (strcmp(old, new) != 0) {
3600             error_setg(errp, "Cannot attach more than one primary device to "
3601                        "'%s': '%s' and '%s'", n->netclient_name, old, new);
3602             return false;
3603         }
3604     } else {
3605         n->primary_opts = qdict_clone_shallow(device_opts);
3606         n->primary_opts_from_json = from_json;
3607     }
3608 
3609     /* failover_primary_hidden is set during feature negotiation */
3610     return qatomic_read(&n->failover_primary_hidden);
3611 }
3612 
3613 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3614 {
3615     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3616     VirtIONet *n = VIRTIO_NET(dev);
3617     NetClientState *nc;
3618     int i;
3619 
3620     if (n->net_conf.mtu) {
3621         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3622     }
3623 
3624     if (n->net_conf.duplex_str) {
3625         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3626             n->net_conf.duplex = DUPLEX_HALF;
3627         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3628             n->net_conf.duplex = DUPLEX_FULL;
3629         } else {
3630             error_setg(errp, "'duplex' must be 'half' or 'full'");
3631             return;
3632         }
3633         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3634     } else {
3635         n->net_conf.duplex = DUPLEX_UNKNOWN;
3636     }
3637 
3638     if (n->net_conf.speed < SPEED_UNKNOWN) {
3639         error_setg(errp, "'speed' must be between 0 and INT_MAX");
3640         return;
3641     }
3642     if (n->net_conf.speed >= 0) {
3643         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3644     }
3645 
3646     if (n->failover) {
3647         n->primary_listener.hide_device = failover_hide_primary_device;
3648         qatomic_set(&n->failover_primary_hidden, true);
3649         device_listener_register(&n->primary_listener);
3650         migration_add_notifier(&n->migration_state,
3651                                virtio_net_migration_state_notifier);
3652         n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3653     }
3654 
3655     virtio_net_set_config_size(n, n->host_features);
3656     virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
3657 
3658     /*
3659      * We set a lower limit on RX queue size to what it always was.
3660      * Guests that want a smaller ring can always resize it without
3661      * help from us (using virtio 1 and up).
3662      */
3663     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3664         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3665         !is_power_of_2(n->net_conf.rx_queue_size)) {
3666         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3667                    "must be a power of 2 between %d and %d.",
3668                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3669                    VIRTQUEUE_MAX_SIZE);
3670         virtio_cleanup(vdev);
3671         return;
3672     }
3673 
3674     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3675         n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) ||
3676         !is_power_of_2(n->net_conf.tx_queue_size)) {
3677         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3678                    "must be a power of 2 between %d and %d",
3679                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3680                    virtio_net_max_tx_queue_size(n));
3681         virtio_cleanup(vdev);
3682         return;
3683     }
3684 
3685     n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3686 
3687     /*
3688      * Figure out the datapath queue pairs since the backend could
3689      * provide control queue via peers as well.
3690      */
3691     if (n->nic_conf.peers.queues) {
3692         for (i = 0; i < n->max_ncs; i++) {
3693             if (n->nic_conf.peers.ncs[i]->is_datapath) {
3694                 ++n->max_queue_pairs;
3695             }
3696         }
3697     }
3698     n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3699 
3700     if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3701         error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3702                    "must be a positive integer less than %d.",
3703                    n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3704         virtio_cleanup(vdev);
3705         return;
3706     }
3707     n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3708     n->curr_queue_pairs = 1;
3709     n->tx_timeout = n->net_conf.txtimer;
3710 
3711     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3712                        && strcmp(n->net_conf.tx, "bh")) {
3713         warn_report("virtio-net: "
3714                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3715                     n->net_conf.tx);
3716         error_printf("Defaulting to \"bh\"");
3717     }
3718 
3719     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3720                                     n->net_conf.tx_queue_size);
3721 
3722     virtio_net_add_queue(n, 0);
3723 
3724     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3725     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3726     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3727     n->status = VIRTIO_NET_S_LINK_UP;
3728     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3729                               QEMU_CLOCK_VIRTUAL,
3730                               virtio_net_announce_timer, n);
3731     n->announce_timer.round = 0;
3732 
3733     if (n->netclient_type) {
3734         /*
3735          * Happen when virtio_net_set_netclient_name has been called.
3736          */
3737         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3738                               n->netclient_type, n->netclient_name,
3739                               &dev->mem_reentrancy_guard, n);
3740     } else {
3741         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3742                               object_get_typename(OBJECT(dev)), dev->id,
3743                               &dev->mem_reentrancy_guard, n);
3744     }
3745 
3746     for (i = 0; i < n->max_queue_pairs; i++) {
3747         n->nic->ncs[i].do_not_pad = true;
3748     }
3749 
3750     peer_test_vnet_hdr(n);
3751     if (peer_has_vnet_hdr(n)) {
3752         n->host_hdr_len = sizeof(struct virtio_net_hdr);
3753     } else {
3754         n->host_hdr_len = 0;
3755     }
3756 
3757     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3758 
3759     n->vqs[0].tx_waiting = 0;
3760     n->tx_burst = n->net_conf.txburst;
3761     virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3762     n->promisc = 1; /* for compatibility */
3763 
3764     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3765 
3766     n->vlans = g_malloc0(MAX_VLAN >> 3);
3767 
3768     nc = qemu_get_queue(n->nic);
3769     nc->rxfilter_notify_enabled = 1;
3770 
3771    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3772         struct virtio_net_config netcfg = {};
3773         memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3774         vhost_net_set_config(get_vhost_net(nc->peer),
3775             (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND);
3776     }
3777     QTAILQ_INIT(&n->rsc_chains);
3778     n->qdev = dev;
3779 
3780     net_rx_pkt_init(&n->rx_pkt);
3781 
3782     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3783         Error *err = NULL;
3784         if (!virtio_net_load_ebpf(n, &err)) {
3785             /*
3786              * If user explicitly gave QEMU RSS FDs to use, then
3787              * failing to use them must be considered a fatal
3788              * error. If no RSS FDs were provided, QEMU is trying
3789              * eBPF on a "best effort" basis only, so report a
3790              * warning and allow fallback to software RSS.
3791              */
3792             if (n->ebpf_rss_fds) {
3793                 error_propagate(errp, err);
3794             } else {
3795                 warn_report("unable to load eBPF RSS: %s",
3796                             error_get_pretty(err));
3797                 error_free(err);
3798             }
3799         }
3800     }
3801 }
3802 
3803 static void virtio_net_device_unrealize(DeviceState *dev)
3804 {
3805     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3806     VirtIONet *n = VIRTIO_NET(dev);
3807     int i, max_queue_pairs;
3808 
3809     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3810         virtio_net_unload_ebpf(n);
3811     }
3812 
3813     /* This will stop vhost backend if appropriate. */
3814     virtio_net_set_status(vdev, 0);
3815 
3816     g_free(n->netclient_name);
3817     n->netclient_name = NULL;
3818     g_free(n->netclient_type);
3819     n->netclient_type = NULL;
3820 
3821     g_free(n->mac_table.macs);
3822     g_free(n->vlans);
3823 
3824     if (n->failover) {
3825         qobject_unref(n->primary_opts);
3826         device_listener_unregister(&n->primary_listener);
3827         migration_remove_notifier(&n->migration_state);
3828     } else {
3829         assert(n->primary_opts == NULL);
3830     }
3831 
3832     max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3833     for (i = 0; i < max_queue_pairs; i++) {
3834         virtio_net_del_queue(n, i);
3835     }
3836     /* delete also control vq */
3837     virtio_del_queue(vdev, max_queue_pairs * 2);
3838     qemu_announce_timer_del(&n->announce_timer, false);
3839     g_free(n->vqs);
3840     qemu_del_nic(n->nic);
3841     virtio_net_rsc_cleanup(n);
3842     g_free(n->rss_data.indirections_table);
3843     net_rx_pkt_uninit(n->rx_pkt);
3844     virtio_cleanup(vdev);
3845 }
3846 
3847 static void virtio_net_reset(VirtIODevice *vdev)
3848 {
3849     VirtIONet *n = VIRTIO_NET(vdev);
3850     int i;
3851 
3852     /* Reset back to compatibility mode */
3853     n->promisc = 1;
3854     n->allmulti = 0;
3855     n->alluni = 0;
3856     n->nomulti = 0;
3857     n->nouni = 0;
3858     n->nobcast = 0;
3859     /* multiqueue is disabled by default */
3860     n->curr_queue_pairs = 1;
3861     timer_del(n->announce_timer.tm);
3862     n->announce_timer.round = 0;
3863     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
3864 
3865     /* Flush any MAC and VLAN filter table state */
3866     n->mac_table.in_use = 0;
3867     n->mac_table.first_multi = 0;
3868     n->mac_table.multi_overflow = 0;
3869     n->mac_table.uni_overflow = 0;
3870     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
3871     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
3872     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
3873     memset(n->vlans, 0, MAX_VLAN >> 3);
3874 
3875     /* Flush any async TX */
3876     for (i = 0;  i < n->max_queue_pairs; i++) {
3877         flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
3878     }
3879 
3880     virtio_net_disable_rss(n);
3881 }
3882 
3883 static void virtio_net_instance_init(Object *obj)
3884 {
3885     VirtIONet *n = VIRTIO_NET(obj);
3886 
3887     /*
3888      * The default config_size is sizeof(struct virtio_net_config).
3889      * Can be overridden with virtio_net_set_config_size.
3890      */
3891     n->config_size = sizeof(struct virtio_net_config);
3892     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3893                                   "bootindex", "/ethernet-phy@0",
3894                                   DEVICE(n));
3895 
3896     ebpf_rss_init(&n->ebpf_rss);
3897 }
3898 
3899 static int virtio_net_pre_save(void *opaque)
3900 {
3901     VirtIONet *n = opaque;
3902 
3903     /* At this point, backend must be stopped, otherwise
3904      * it might keep writing to memory. */
3905     assert(!n->vhost_started);
3906 
3907     return 0;
3908 }
3909 
3910 static bool primary_unplug_pending(void *opaque)
3911 {
3912     DeviceState *dev = opaque;
3913     DeviceState *primary;
3914     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3915     VirtIONet *n = VIRTIO_NET(vdev);
3916 
3917     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3918         return false;
3919     }
3920     primary = failover_find_primary_device(n);
3921     return primary ? primary->pending_deleted_event : false;
3922 }
3923 
3924 static bool dev_unplug_pending(void *opaque)
3925 {
3926     DeviceState *dev = opaque;
3927     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3928 
3929     return vdc->primary_unplug_pending(dev);
3930 }
3931 
3932 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3933 {
3934     VirtIONet *n = VIRTIO_NET(vdev);
3935     NetClientState *nc;
3936     struct vhost_net *net;
3937 
3938     if (!n->nic) {
3939         return NULL;
3940     }
3941 
3942     nc = qemu_get_queue(n->nic);
3943     if (!nc) {
3944         return NULL;
3945     }
3946 
3947     net = get_vhost_net(nc->peer);
3948     if (!net) {
3949         return NULL;
3950     }
3951 
3952     return &net->dev;
3953 }
3954 
3955 static const VMStateDescription vmstate_virtio_net = {
3956     .name = "virtio-net",
3957     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3958     .version_id = VIRTIO_NET_VM_VERSION,
3959     .fields = (const VMStateField[]) {
3960         VMSTATE_VIRTIO_DEVICE,
3961         VMSTATE_END_OF_LIST()
3962     },
3963     .pre_save = virtio_net_pre_save,
3964     .dev_unplug_pending = dev_unplug_pending,
3965 };
3966 
3967 static Property virtio_net_properties[] = {
3968     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3969                     VIRTIO_NET_F_CSUM, true),
3970     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3971                     VIRTIO_NET_F_GUEST_CSUM, true),
3972     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3973     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3974                     VIRTIO_NET_F_GUEST_TSO4, true),
3975     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3976                     VIRTIO_NET_F_GUEST_TSO6, true),
3977     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3978                     VIRTIO_NET_F_GUEST_ECN, true),
3979     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3980                     VIRTIO_NET_F_GUEST_UFO, true),
3981     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3982                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3983     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3984                     VIRTIO_NET_F_HOST_TSO4, true),
3985     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3986                     VIRTIO_NET_F_HOST_TSO6, true),
3987     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3988                     VIRTIO_NET_F_HOST_ECN, true),
3989     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3990                     VIRTIO_NET_F_HOST_UFO, true),
3991     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3992                     VIRTIO_NET_F_MRG_RXBUF, true),
3993     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3994                     VIRTIO_NET_F_STATUS, true),
3995     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3996                     VIRTIO_NET_F_CTRL_VQ, true),
3997     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3998                     VIRTIO_NET_F_CTRL_RX, true),
3999     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
4000                     VIRTIO_NET_F_CTRL_VLAN, true),
4001     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
4002                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
4003     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
4004                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
4005     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
4006                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
4007     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
4008     DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
4009                     VIRTIO_NET_F_RSS, false),
4010     DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
4011                     VIRTIO_NET_F_HASH_REPORT, false),
4012     DEFINE_PROP_ARRAY("ebpf-rss-fds", VirtIONet, nr_ebpf_rss_fds,
4013                       ebpf_rss_fds, qdev_prop_string, char*),
4014     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
4015                     VIRTIO_NET_F_RSC_EXT, false),
4016     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
4017                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
4018     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
4019     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
4020                        TX_TIMER_INTERVAL),
4021     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
4022     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
4023     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
4024                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
4025     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
4026                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
4027     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
4028     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
4029                      true),
4030     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
4031     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
4032     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
4033     DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features,
4034                       VIRTIO_NET_F_GUEST_USO4, true),
4035     DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features,
4036                       VIRTIO_NET_F_GUEST_USO6, true),
4037     DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
4038                       VIRTIO_NET_F_HOST_USO, true),
4039     DEFINE_PROP_END_OF_LIST(),
4040 };
4041 
4042 static void virtio_net_class_init(ObjectClass *klass, void *data)
4043 {
4044     DeviceClass *dc = DEVICE_CLASS(klass);
4045     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
4046 
4047     device_class_set_props(dc, virtio_net_properties);
4048     dc->vmsd = &vmstate_virtio_net;
4049     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
4050     vdc->realize = virtio_net_device_realize;
4051     vdc->unrealize = virtio_net_device_unrealize;
4052     vdc->get_config = virtio_net_get_config;
4053     vdc->set_config = virtio_net_set_config;
4054     vdc->get_features = virtio_net_get_features;
4055     vdc->set_features = virtio_net_set_features;
4056     vdc->bad_features = virtio_net_bad_features;
4057     vdc->reset = virtio_net_reset;
4058     vdc->queue_reset = virtio_net_queue_reset;
4059     vdc->queue_enable = virtio_net_queue_enable;
4060     vdc->set_status = virtio_net_set_status;
4061     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
4062     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
4063     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
4064     vdc->post_load = virtio_net_post_load_virtio;
4065     vdc->vmsd = &vmstate_virtio_net_device;
4066     vdc->primary_unplug_pending = primary_unplug_pending;
4067     vdc->get_vhost = virtio_net_get_vhost;
4068     vdc->toggle_device_iotlb = vhost_toggle_device_iotlb;
4069 }
4070 
4071 static const TypeInfo virtio_net_info = {
4072     .name = TYPE_VIRTIO_NET,
4073     .parent = TYPE_VIRTIO_DEVICE,
4074     .instance_size = sizeof(VirtIONet),
4075     .instance_init = virtio_net_instance_init,
4076     .class_init = virtio_net_class_init,
4077 };
4078 
4079 static void virtio_register_types(void)
4080 {
4081     type_register_static(&virtio_net_info);
4082 }
4083 
4084 type_init(virtio_register_types)
4085