xref: /openbmc/qemu/hw/net/virtio-net.c (revision 17437418c4ed0b779f7e9747fbffb08e139aa0ae)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/log.h"
18 #include "qemu/main-loop.h"
19 #include "qemu/module.h"
20 #include "hw/virtio/virtio.h"
21 #include "net/net.h"
22 #include "net/checksum.h"
23 #include "net/tap.h"
24 #include "qemu/error-report.h"
25 #include "qemu/timer.h"
26 #include "qemu/option.h"
27 #include "qemu/option_int.h"
28 #include "qemu/config-file.h"
29 #include "qapi/qmp/qdict.h"
30 #include "hw/virtio/virtio-net.h"
31 #include "net/vhost_net.h"
32 #include "net/announce.h"
33 #include "hw/virtio/virtio-bus.h"
34 #include "qapi/error.h"
35 #include "qapi/qapi-events-net.h"
36 #include "hw/qdev-properties.h"
37 #include "qapi/qapi-types-migration.h"
38 #include "qapi/qapi-events-migration.h"
39 #include "hw/virtio/virtio-access.h"
40 #include "migration/misc.h"
41 #include "standard-headers/linux/ethtool.h"
42 #include "sysemu/sysemu.h"
43 #include "sysemu/replay.h"
44 #include "trace.h"
45 #include "monitor/qdev.h"
46 #include "monitor/monitor.h"
47 #include "hw/pci/pci_device.h"
48 #include "net_rx_pkt.h"
49 #include "hw/virtio/vhost.h"
50 #include "sysemu/qtest.h"
51 
52 #define VIRTIO_NET_VM_VERSION    11
53 
54 /* previously fixed value */
55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
57 
58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
61 
62 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
63 
64 #define VIRTIO_NET_TCP_FLAG         0x3F
65 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
66 
67 /* IPv4 max payload, 16 bits in the header */
68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
70 
71 /* header length value in ip header without option */
72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
73 
74 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
76 
77 /* Purge coalesced packets timer interval, This value affects the performance
78    a lot, and should be tuned carefully, '300000'(300us) is the recommended
79    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
80    tso/gso/gro 'off'. */
81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
82 
83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
84                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
85                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
86                                          VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
87                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
88                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
89                                          VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
90                                          VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
91                                          VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
92 
93 static const VirtIOFeature feature_sizes[] = {
94     {.flags = 1ULL << VIRTIO_NET_F_MAC,
95      .end = endof(struct virtio_net_config, mac)},
96     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
97      .end = endof(struct virtio_net_config, status)},
98     {.flags = 1ULL << VIRTIO_NET_F_MQ,
99      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
100     {.flags = 1ULL << VIRTIO_NET_F_MTU,
101      .end = endof(struct virtio_net_config, mtu)},
102     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
103      .end = endof(struct virtio_net_config, duplex)},
104     {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
105      .end = endof(struct virtio_net_config, supported_hash_types)},
106     {}
107 };
108 
109 static const VirtIOConfigSizeParams cfg_size_params = {
110     .min_size = endof(struct virtio_net_config, mac),
111     .max_size = sizeof(struct virtio_net_config),
112     .feature_sizes = feature_sizes
113 };
114 
115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
116 {
117     VirtIONet *n = qemu_get_nic_opaque(nc);
118 
119     return &n->vqs[nc->queue_index];
120 }
121 
122 static int vq2q(int queue_index)
123 {
124     return queue_index / 2;
125 }
126 
127 static void flush_or_purge_queued_packets(NetClientState *nc)
128 {
129     if (!nc->peer) {
130         return;
131     }
132 
133     qemu_flush_or_purge_queued_packets(nc->peer, true);
134     assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
135 }
136 
137 /* TODO
138  * - we could suppress RX interrupt if we were so inclined.
139  */
140 
141 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
142 {
143     VirtIONet *n = VIRTIO_NET(vdev);
144     struct virtio_net_config netcfg;
145     NetClientState *nc = qemu_get_queue(n->nic);
146     static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
147 
148     int ret = 0;
149     memset(&netcfg, 0 , sizeof(struct virtio_net_config));
150     virtio_stw_p(vdev, &netcfg.status, n->status);
151     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
152     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
153     memcpy(netcfg.mac, n->mac, ETH_ALEN);
154     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
155     netcfg.duplex = n->net_conf.duplex;
156     netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
157     virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
158                  virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
159                  VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
160     virtio_stl_p(vdev, &netcfg.supported_hash_types,
161                  VIRTIO_NET_RSS_SUPPORTED_HASHES);
162     memcpy(config, &netcfg, n->config_size);
163 
164     /*
165      * Is this VDPA? No peer means not VDPA: there's no way to
166      * disconnect/reconnect a VDPA peer.
167      */
168     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
169         ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
170                                    n->config_size);
171         if (ret == -1) {
172             return;
173         }
174 
175         /*
176          * Some NIC/kernel combinations present 0 as the mac address.  As that
177          * is not a legal address, try to proceed with the address from the
178          * QEMU command line in the hope that the address has been configured
179          * correctly elsewhere - just not reported by the device.
180          */
181         if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
182             info_report("Zero hardware mac address detected. Ignoring.");
183             memcpy(netcfg.mac, n->mac, ETH_ALEN);
184         }
185 
186         netcfg.status |= virtio_tswap16(vdev,
187                                         n->status & VIRTIO_NET_S_ANNOUNCE);
188         memcpy(config, &netcfg, n->config_size);
189     }
190 }
191 
192 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
193 {
194     VirtIONet *n = VIRTIO_NET(vdev);
195     struct virtio_net_config netcfg = {};
196     NetClientState *nc = qemu_get_queue(n->nic);
197 
198     memcpy(&netcfg, config, n->config_size);
199 
200     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
201         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
202         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
203         memcpy(n->mac, netcfg.mac, ETH_ALEN);
204         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
205     }
206 
207     /*
208      * Is this VDPA? No peer means not VDPA: there's no way to
209      * disconnect/reconnect a VDPA peer.
210      */
211     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
212         vhost_net_set_config(get_vhost_net(nc->peer),
213                              (uint8_t *)&netcfg, 0, n->config_size,
214                              VHOST_SET_CONFIG_TYPE_FRONTEND);
215       }
216 }
217 
218 static bool virtio_net_started(VirtIONet *n, uint8_t status)
219 {
220     VirtIODevice *vdev = VIRTIO_DEVICE(n);
221     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
222         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
223 }
224 
225 static void virtio_net_announce_notify(VirtIONet *net)
226 {
227     VirtIODevice *vdev = VIRTIO_DEVICE(net);
228     trace_virtio_net_announce_notify();
229 
230     net->status |= VIRTIO_NET_S_ANNOUNCE;
231     virtio_notify_config(vdev);
232 }
233 
234 static void virtio_net_announce_timer(void *opaque)
235 {
236     VirtIONet *n = opaque;
237     trace_virtio_net_announce_timer(n->announce_timer.round);
238 
239     n->announce_timer.round--;
240     virtio_net_announce_notify(n);
241 }
242 
243 static void virtio_net_announce(NetClientState *nc)
244 {
245     VirtIONet *n = qemu_get_nic_opaque(nc);
246     VirtIODevice *vdev = VIRTIO_DEVICE(n);
247 
248     /*
249      * Make sure the virtio migration announcement timer isn't running
250      * If it is, let it trigger announcement so that we do not cause
251      * confusion.
252      */
253     if (n->announce_timer.round) {
254         return;
255     }
256 
257     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
258         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
259             virtio_net_announce_notify(n);
260     }
261 }
262 
263 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
264 {
265     VirtIODevice *vdev = VIRTIO_DEVICE(n);
266     NetClientState *nc = qemu_get_queue(n->nic);
267     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
268     int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
269               n->max_ncs - n->max_queue_pairs : 0;
270 
271     if (!get_vhost_net(nc->peer)) {
272         return;
273     }
274 
275     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
276         !!n->vhost_started) {
277         return;
278     }
279     if (!n->vhost_started) {
280         int r, i;
281 
282         if (n->needs_vnet_hdr_swap) {
283             error_report("backend does not support %s vnet headers; "
284                          "falling back on userspace virtio",
285                          virtio_is_big_endian(vdev) ? "BE" : "LE");
286             return;
287         }
288 
289         /* Any packets outstanding? Purge them to avoid touching rings
290          * when vhost is running.
291          */
292         for (i = 0;  i < queue_pairs; i++) {
293             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
294 
295             /* Purge both directions: TX and RX. */
296             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
297             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
298         }
299 
300         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
301             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
302             if (r < 0) {
303                 error_report("%uBytes MTU not supported by the backend",
304                              n->net_conf.mtu);
305 
306                 return;
307             }
308         }
309 
310         n->vhost_started = 1;
311         r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
312         if (r < 0) {
313             error_report("unable to start vhost net: %d: "
314                          "falling back on userspace virtio", -r);
315             n->vhost_started = 0;
316         }
317     } else {
318         vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
319         n->vhost_started = 0;
320     }
321 }
322 
323 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
324                                           NetClientState *peer,
325                                           bool enable)
326 {
327     if (virtio_is_big_endian(vdev)) {
328         return qemu_set_vnet_be(peer, enable);
329     } else {
330         return qemu_set_vnet_le(peer, enable);
331     }
332 }
333 
334 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
335                                        int queue_pairs, bool enable)
336 {
337     int i;
338 
339     for (i = 0; i < queue_pairs; i++) {
340         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
341             enable) {
342             while (--i >= 0) {
343                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
344             }
345 
346             return true;
347         }
348     }
349 
350     return false;
351 }
352 
353 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
354 {
355     VirtIODevice *vdev = VIRTIO_DEVICE(n);
356     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
357 
358     if (virtio_net_started(n, status)) {
359         /* Before using the device, we tell the network backend about the
360          * endianness to use when parsing vnet headers. If the backend
361          * can't do it, we fallback onto fixing the headers in the core
362          * virtio-net code.
363          */
364         n->needs_vnet_hdr_swap = n->has_vnet_hdr &&
365                                  virtio_net_set_vnet_endian(vdev, n->nic->ncs,
366                                                             queue_pairs, true);
367     } else if (virtio_net_started(n, vdev->status)) {
368         /* After using the device, we need to reset the network backend to
369          * the default (guest native endianness), otherwise the guest may
370          * lose network connectivity if it is rebooted into a different
371          * endianness.
372          */
373         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
374     }
375 }
376 
377 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
378 {
379     unsigned int dropped = virtqueue_drop_all(vq);
380     if (dropped) {
381         virtio_notify(vdev, vq);
382     }
383 }
384 
385 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
386 {
387     VirtIONet *n = VIRTIO_NET(vdev);
388     VirtIONetQueue *q;
389     int i;
390     uint8_t queue_status;
391 
392     virtio_net_vnet_endian_status(n, status);
393     virtio_net_vhost_status(n, status);
394 
395     for (i = 0; i < n->max_queue_pairs; i++) {
396         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
397         bool queue_started;
398         q = &n->vqs[i];
399 
400         if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
401             queue_status = 0;
402         } else {
403             queue_status = status;
404         }
405         queue_started =
406             virtio_net_started(n, queue_status) && !n->vhost_started;
407 
408         if (queue_started) {
409             qemu_flush_queued_packets(ncs);
410         }
411 
412         if (!q->tx_waiting) {
413             continue;
414         }
415 
416         if (queue_started) {
417             if (q->tx_timer) {
418                 timer_mod(q->tx_timer,
419                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
420             } else {
421                 replay_bh_schedule_event(q->tx_bh);
422             }
423         } else {
424             if (q->tx_timer) {
425                 timer_del(q->tx_timer);
426             } else {
427                 qemu_bh_cancel(q->tx_bh);
428             }
429             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
430                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
431                 vdev->vm_running) {
432                 /* if tx is waiting we are likely have some packets in tx queue
433                  * and disabled notification */
434                 q->tx_waiting = 0;
435                 virtio_queue_set_notification(q->tx_vq, 1);
436                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
437             }
438         }
439     }
440 }
441 
442 static void virtio_net_set_link_status(NetClientState *nc)
443 {
444     VirtIONet *n = qemu_get_nic_opaque(nc);
445     VirtIODevice *vdev = VIRTIO_DEVICE(n);
446     uint16_t old_status = n->status;
447 
448     if (nc->link_down)
449         n->status &= ~VIRTIO_NET_S_LINK_UP;
450     else
451         n->status |= VIRTIO_NET_S_LINK_UP;
452 
453     if (n->status != old_status)
454         virtio_notify_config(vdev);
455 
456     virtio_net_set_status(vdev, vdev->status);
457 }
458 
459 static void rxfilter_notify(NetClientState *nc)
460 {
461     VirtIONet *n = qemu_get_nic_opaque(nc);
462 
463     if (nc->rxfilter_notify_enabled) {
464         char *path = object_get_canonical_path(OBJECT(n->qdev));
465         qapi_event_send_nic_rx_filter_changed(n->netclient_name, path);
466         g_free(path);
467 
468         /* disable event notification to avoid events flooding */
469         nc->rxfilter_notify_enabled = 0;
470     }
471 }
472 
473 static intList *get_vlan_table(VirtIONet *n)
474 {
475     intList *list;
476     int i, j;
477 
478     list = NULL;
479     for (i = 0; i < MAX_VLAN >> 5; i++) {
480         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
481             if (n->vlans[i] & (1U << j)) {
482                 QAPI_LIST_PREPEND(list, (i << 5) + j);
483             }
484         }
485     }
486 
487     return list;
488 }
489 
490 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
491 {
492     VirtIONet *n = qemu_get_nic_opaque(nc);
493     VirtIODevice *vdev = VIRTIO_DEVICE(n);
494     RxFilterInfo *info;
495     strList *str_list;
496     int i;
497 
498     info = g_malloc0(sizeof(*info));
499     info->name = g_strdup(nc->name);
500     info->promiscuous = n->promisc;
501 
502     if (n->nouni) {
503         info->unicast = RX_STATE_NONE;
504     } else if (n->alluni) {
505         info->unicast = RX_STATE_ALL;
506     } else {
507         info->unicast = RX_STATE_NORMAL;
508     }
509 
510     if (n->nomulti) {
511         info->multicast = RX_STATE_NONE;
512     } else if (n->allmulti) {
513         info->multicast = RX_STATE_ALL;
514     } else {
515         info->multicast = RX_STATE_NORMAL;
516     }
517 
518     info->broadcast_allowed = n->nobcast;
519     info->multicast_overflow = n->mac_table.multi_overflow;
520     info->unicast_overflow = n->mac_table.uni_overflow;
521 
522     info->main_mac = qemu_mac_strdup_printf(n->mac);
523 
524     str_list = NULL;
525     for (i = 0; i < n->mac_table.first_multi; i++) {
526         QAPI_LIST_PREPEND(str_list,
527                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
528     }
529     info->unicast_table = str_list;
530 
531     str_list = NULL;
532     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
533         QAPI_LIST_PREPEND(str_list,
534                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
535     }
536     info->multicast_table = str_list;
537     info->vlan_table = get_vlan_table(n);
538 
539     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
540         info->vlan = RX_STATE_ALL;
541     } else if (!info->vlan_table) {
542         info->vlan = RX_STATE_NONE;
543     } else {
544         info->vlan = RX_STATE_NORMAL;
545     }
546 
547     /* enable event notification after query */
548     nc->rxfilter_notify_enabled = 1;
549 
550     return info;
551 }
552 
553 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
554 {
555     VirtIONet *n = VIRTIO_NET(vdev);
556     NetClientState *nc;
557 
558     /* validate queue_index and skip for cvq */
559     if (queue_index >= n->max_queue_pairs * 2) {
560         return;
561     }
562 
563     nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
564 
565     if (!nc->peer) {
566         return;
567     }
568 
569     if (get_vhost_net(nc->peer) &&
570         nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
571         vhost_net_virtqueue_reset(vdev, nc, queue_index);
572     }
573 
574     flush_or_purge_queued_packets(nc);
575 }
576 
577 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
578 {
579     VirtIONet *n = VIRTIO_NET(vdev);
580     NetClientState *nc;
581     int r;
582 
583     /* validate queue_index and skip for cvq */
584     if (queue_index >= n->max_queue_pairs * 2) {
585         return;
586     }
587 
588     nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
589 
590     if (!nc->peer || !vdev->vhost_started) {
591         return;
592     }
593 
594     if (get_vhost_net(nc->peer) &&
595         nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
596         r = vhost_net_virtqueue_restart(vdev, nc, queue_index);
597         if (r < 0) {
598             error_report("unable to restart vhost net virtqueue: %d, "
599                             "when resetting the queue", queue_index);
600         }
601     }
602 }
603 
604 static void peer_test_vnet_hdr(VirtIONet *n)
605 {
606     NetClientState *nc = qemu_get_queue(n->nic);
607     if (!nc->peer) {
608         return;
609     }
610 
611     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
612 }
613 
614 static int peer_has_vnet_hdr(VirtIONet *n)
615 {
616     return n->has_vnet_hdr;
617 }
618 
619 static int peer_has_ufo(VirtIONet *n)
620 {
621     if (!peer_has_vnet_hdr(n))
622         return 0;
623 
624     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
625 
626     return n->has_ufo;
627 }
628 
629 static int peer_has_uso(VirtIONet *n)
630 {
631     if (!peer_has_vnet_hdr(n)) {
632         return 0;
633     }
634 
635     return qemu_has_uso(qemu_get_queue(n->nic)->peer);
636 }
637 
638 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
639                                        int version_1, int hash_report)
640 {
641     int i;
642     NetClientState *nc;
643 
644     n->mergeable_rx_bufs = mergeable_rx_bufs;
645 
646     if (version_1) {
647         n->guest_hdr_len = hash_report ?
648             sizeof(struct virtio_net_hdr_v1_hash) :
649             sizeof(struct virtio_net_hdr_mrg_rxbuf);
650         n->rss_data.populate_hash = !!hash_report;
651     } else {
652         n->guest_hdr_len = n->mergeable_rx_bufs ?
653             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
654             sizeof(struct virtio_net_hdr);
655         n->rss_data.populate_hash = false;
656     }
657 
658     for (i = 0; i < n->max_queue_pairs; i++) {
659         nc = qemu_get_subqueue(n->nic, i);
660 
661         if (peer_has_vnet_hdr(n) &&
662             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
663             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
664             n->host_hdr_len = n->guest_hdr_len;
665         }
666     }
667 }
668 
669 static int virtio_net_max_tx_queue_size(VirtIONet *n)
670 {
671     NetClientState *peer = n->nic_conf.peers.ncs[0];
672 
673     /*
674      * Backends other than vhost-user or vhost-vdpa don't support max queue
675      * size.
676      */
677     if (!peer) {
678         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
679     }
680 
681     switch(peer->info->type) {
682     case NET_CLIENT_DRIVER_VHOST_USER:
683     case NET_CLIENT_DRIVER_VHOST_VDPA:
684         return VIRTQUEUE_MAX_SIZE;
685     default:
686         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
687     };
688 }
689 
690 static int peer_attach(VirtIONet *n, int index)
691 {
692     NetClientState *nc = qemu_get_subqueue(n->nic, index);
693 
694     if (!nc->peer) {
695         return 0;
696     }
697 
698     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
699         vhost_set_vring_enable(nc->peer, 1);
700     }
701 
702     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
703         return 0;
704     }
705 
706     if (n->max_queue_pairs == 1) {
707         return 0;
708     }
709 
710     return tap_enable(nc->peer);
711 }
712 
713 static int peer_detach(VirtIONet *n, int index)
714 {
715     NetClientState *nc = qemu_get_subqueue(n->nic, index);
716 
717     if (!nc->peer) {
718         return 0;
719     }
720 
721     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
722         vhost_set_vring_enable(nc->peer, 0);
723     }
724 
725     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
726         return 0;
727     }
728 
729     return tap_disable(nc->peer);
730 }
731 
732 static void virtio_net_set_queue_pairs(VirtIONet *n)
733 {
734     int i;
735     int r;
736 
737     if (n->nic->peer_deleted) {
738         return;
739     }
740 
741     for (i = 0; i < n->max_queue_pairs; i++) {
742         if (i < n->curr_queue_pairs) {
743             r = peer_attach(n, i);
744             assert(!r);
745         } else {
746             r = peer_detach(n, i);
747             assert(!r);
748         }
749     }
750 }
751 
752 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
753 
754 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
755                                         Error **errp)
756 {
757     VirtIONet *n = VIRTIO_NET(vdev);
758     NetClientState *nc = qemu_get_queue(n->nic);
759 
760     /* Firstly sync all virtio-net possible supported features */
761     features |= n->host_features;
762 
763     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
764 
765     if (!peer_has_vnet_hdr(n)) {
766         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
767         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
768         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
769         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
770 
771         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
772         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
773         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
774         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
775 
776         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
777         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
778         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
779 
780         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
781     }
782 
783     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
784         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
785         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
786     }
787 
788     if (!peer_has_uso(n)) {
789         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
790         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
791         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
792     }
793 
794     if (!get_vhost_net(nc->peer)) {
795         return features;
796     }
797 
798     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
799         virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
800     }
801     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
802     vdev->backend_features = features;
803 
804     if (n->mtu_bypass_backend &&
805             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
806         features |= (1ULL << VIRTIO_NET_F_MTU);
807     }
808 
809     /*
810      * Since GUEST_ANNOUNCE is emulated the feature bit could be set without
811      * enabled. This happens in the vDPA case.
812      *
813      * Make sure the feature set is not incoherent, as the driver could refuse
814      * to start.
815      *
816      * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes,
817      * helping guest to notify the new location with vDPA devices that does not
818      * support it.
819      */
820     if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) {
821         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE);
822     }
823 
824     return features;
825 }
826 
827 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
828 {
829     uint64_t features = 0;
830 
831     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
832      * but also these: */
833     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
834     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
835     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
836     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
837     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
838 
839     return features;
840 }
841 
842 static void virtio_net_apply_guest_offloads(VirtIONet *n)
843 {
844     qemu_set_offload(qemu_get_queue(n->nic)->peer,
845             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
846             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
847             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
848             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
849             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
850             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
851             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)));
852 }
853 
854 static uint64_t virtio_net_guest_offloads_by_features(uint64_t features)
855 {
856     static const uint64_t guest_offloads_mask =
857         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
858         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
859         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
860         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
861         (1ULL << VIRTIO_NET_F_GUEST_UFO)  |
862         (1ULL << VIRTIO_NET_F_GUEST_USO4) |
863         (1ULL << VIRTIO_NET_F_GUEST_USO6);
864 
865     return guest_offloads_mask & features;
866 }
867 
868 uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n)
869 {
870     VirtIODevice *vdev = VIRTIO_DEVICE(n);
871     return virtio_net_guest_offloads_by_features(vdev->guest_features);
872 }
873 
874 typedef struct {
875     VirtIONet *n;
876     DeviceState *dev;
877 } FailoverDevice;
878 
879 /**
880  * Set the failover primary device
881  *
882  * @opaque: FailoverId to setup
883  * @opts: opts for device we are handling
884  * @errp: returns an error if this function fails
885  */
886 static int failover_set_primary(DeviceState *dev, void *opaque)
887 {
888     FailoverDevice *fdev = opaque;
889     PCIDevice *pci_dev = (PCIDevice *)
890         object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
891 
892     if (!pci_dev) {
893         return 0;
894     }
895 
896     if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
897         fdev->dev = dev;
898         return 1;
899     }
900 
901     return 0;
902 }
903 
904 /**
905  * Find the primary device for this failover virtio-net
906  *
907  * @n: VirtIONet device
908  * @errp: returns an error if this function fails
909  */
910 static DeviceState *failover_find_primary_device(VirtIONet *n)
911 {
912     FailoverDevice fdev = {
913         .n = n,
914     };
915 
916     qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
917                        NULL, NULL, &fdev);
918     return fdev.dev;
919 }
920 
921 static void failover_add_primary(VirtIONet *n, Error **errp)
922 {
923     Error *err = NULL;
924     DeviceState *dev = failover_find_primary_device(n);
925 
926     if (dev) {
927         return;
928     }
929 
930     if (!n->primary_opts) {
931         error_setg(errp, "Primary device not found");
932         error_append_hint(errp, "Virtio-net failover will not work. Make "
933                           "sure primary device has parameter"
934                           " failover_pair_id=%s\n", n->netclient_name);
935         return;
936     }
937 
938     dev = qdev_device_add_from_qdict(n->primary_opts,
939                                      n->primary_opts_from_json,
940                                      &err);
941     if (err) {
942         qobject_unref(n->primary_opts);
943         n->primary_opts = NULL;
944     } else {
945         object_unref(OBJECT(dev));
946     }
947     error_propagate(errp, err);
948 }
949 
950 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
951 {
952     VirtIONet *n = VIRTIO_NET(vdev);
953     Error *err = NULL;
954     int i;
955 
956     if (n->mtu_bypass_backend &&
957             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
958         features &= ~(1ULL << VIRTIO_NET_F_MTU);
959     }
960 
961     virtio_net_set_multiqueue(n,
962                               virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
963                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
964 
965     virtio_net_set_mrg_rx_bufs(n,
966                                virtio_has_feature(features,
967                                                   VIRTIO_NET_F_MRG_RXBUF),
968                                virtio_has_feature(features,
969                                                   VIRTIO_F_VERSION_1),
970                                virtio_has_feature(features,
971                                                   VIRTIO_NET_F_HASH_REPORT));
972 
973     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
974         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
975     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
976         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
977     n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
978 
979     if (n->has_vnet_hdr) {
980         n->curr_guest_offloads =
981             virtio_net_guest_offloads_by_features(features);
982         virtio_net_apply_guest_offloads(n);
983     }
984 
985     for (i = 0;  i < n->max_queue_pairs; i++) {
986         NetClientState *nc = qemu_get_subqueue(n->nic, i);
987 
988         if (!get_vhost_net(nc->peer)) {
989             continue;
990         }
991         vhost_net_ack_features(get_vhost_net(nc->peer), features);
992 
993         /*
994          * keep acked_features in NetVhostUserState up-to-date so it
995          * can't miss any features configured by guest virtio driver.
996          */
997         vhost_net_save_acked_features(nc->peer);
998     }
999 
1000     if (!virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
1001         memset(n->vlans, 0xff, MAX_VLAN >> 3);
1002     }
1003 
1004     if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
1005         qapi_event_send_failover_negotiated(n->netclient_name);
1006         qatomic_set(&n->failover_primary_hidden, false);
1007         failover_add_primary(n, &err);
1008         if (err) {
1009             if (!qtest_enabled()) {
1010                 warn_report_err(err);
1011             } else {
1012                 error_free(err);
1013             }
1014         }
1015     }
1016 }
1017 
1018 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
1019                                      struct iovec *iov, unsigned int iov_cnt)
1020 {
1021     uint8_t on;
1022     size_t s;
1023     NetClientState *nc = qemu_get_queue(n->nic);
1024 
1025     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
1026     if (s != sizeof(on)) {
1027         return VIRTIO_NET_ERR;
1028     }
1029 
1030     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
1031         n->promisc = on;
1032     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
1033         n->allmulti = on;
1034     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
1035         n->alluni = on;
1036     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
1037         n->nomulti = on;
1038     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
1039         n->nouni = on;
1040     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
1041         n->nobcast = on;
1042     } else {
1043         return VIRTIO_NET_ERR;
1044     }
1045 
1046     rxfilter_notify(nc);
1047 
1048     return VIRTIO_NET_OK;
1049 }
1050 
1051 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
1052                                      struct iovec *iov, unsigned int iov_cnt)
1053 {
1054     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1055     uint64_t offloads;
1056     size_t s;
1057 
1058     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1059         return VIRTIO_NET_ERR;
1060     }
1061 
1062     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
1063     if (s != sizeof(offloads)) {
1064         return VIRTIO_NET_ERR;
1065     }
1066 
1067     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1068         uint64_t supported_offloads;
1069 
1070         offloads = virtio_ldq_p(vdev, &offloads);
1071 
1072         if (!n->has_vnet_hdr) {
1073             return VIRTIO_NET_ERR;
1074         }
1075 
1076         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1077             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1078         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1079             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1080         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1081 
1082         supported_offloads = virtio_net_supported_guest_offloads(n);
1083         if (offloads & ~supported_offloads) {
1084             return VIRTIO_NET_ERR;
1085         }
1086 
1087         n->curr_guest_offloads = offloads;
1088         virtio_net_apply_guest_offloads(n);
1089 
1090         return VIRTIO_NET_OK;
1091     } else {
1092         return VIRTIO_NET_ERR;
1093     }
1094 }
1095 
1096 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1097                                  struct iovec *iov, unsigned int iov_cnt)
1098 {
1099     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1100     struct virtio_net_ctrl_mac mac_data;
1101     size_t s;
1102     NetClientState *nc = qemu_get_queue(n->nic);
1103 
1104     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1105         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1106             return VIRTIO_NET_ERR;
1107         }
1108         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1109         assert(s == sizeof(n->mac));
1110         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1111         rxfilter_notify(nc);
1112 
1113         return VIRTIO_NET_OK;
1114     }
1115 
1116     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1117         return VIRTIO_NET_ERR;
1118     }
1119 
1120     int in_use = 0;
1121     int first_multi = 0;
1122     uint8_t uni_overflow = 0;
1123     uint8_t multi_overflow = 0;
1124     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1125 
1126     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1127                    sizeof(mac_data.entries));
1128     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1129     if (s != sizeof(mac_data.entries)) {
1130         goto error;
1131     }
1132     iov_discard_front(&iov, &iov_cnt, s);
1133 
1134     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1135         goto error;
1136     }
1137 
1138     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1139         s = iov_to_buf(iov, iov_cnt, 0, macs,
1140                        mac_data.entries * ETH_ALEN);
1141         if (s != mac_data.entries * ETH_ALEN) {
1142             goto error;
1143         }
1144         in_use += mac_data.entries;
1145     } else {
1146         uni_overflow = 1;
1147     }
1148 
1149     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1150 
1151     first_multi = in_use;
1152 
1153     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1154                    sizeof(mac_data.entries));
1155     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1156     if (s != sizeof(mac_data.entries)) {
1157         goto error;
1158     }
1159 
1160     iov_discard_front(&iov, &iov_cnt, s);
1161 
1162     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1163         goto error;
1164     }
1165 
1166     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1167         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1168                        mac_data.entries * ETH_ALEN);
1169         if (s != mac_data.entries * ETH_ALEN) {
1170             goto error;
1171         }
1172         in_use += mac_data.entries;
1173     } else {
1174         multi_overflow = 1;
1175     }
1176 
1177     n->mac_table.in_use = in_use;
1178     n->mac_table.first_multi = first_multi;
1179     n->mac_table.uni_overflow = uni_overflow;
1180     n->mac_table.multi_overflow = multi_overflow;
1181     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1182     g_free(macs);
1183     rxfilter_notify(nc);
1184 
1185     return VIRTIO_NET_OK;
1186 
1187 error:
1188     g_free(macs);
1189     return VIRTIO_NET_ERR;
1190 }
1191 
1192 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1193                                         struct iovec *iov, unsigned int iov_cnt)
1194 {
1195     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1196     uint16_t vid;
1197     size_t s;
1198     NetClientState *nc = qemu_get_queue(n->nic);
1199 
1200     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1201     vid = virtio_lduw_p(vdev, &vid);
1202     if (s != sizeof(vid)) {
1203         return VIRTIO_NET_ERR;
1204     }
1205 
1206     if (vid >= MAX_VLAN)
1207         return VIRTIO_NET_ERR;
1208 
1209     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1210         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1211     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1212         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1213     else
1214         return VIRTIO_NET_ERR;
1215 
1216     rxfilter_notify(nc);
1217 
1218     return VIRTIO_NET_OK;
1219 }
1220 
1221 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1222                                       struct iovec *iov, unsigned int iov_cnt)
1223 {
1224     trace_virtio_net_handle_announce(n->announce_timer.round);
1225     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1226         n->status & VIRTIO_NET_S_ANNOUNCE) {
1227         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1228         if (n->announce_timer.round) {
1229             qemu_announce_timer_step(&n->announce_timer);
1230         }
1231         return VIRTIO_NET_OK;
1232     } else {
1233         return VIRTIO_NET_ERR;
1234     }
1235 }
1236 
1237 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1238 {
1239     NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1240     if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1241         return false;
1242     }
1243 
1244     trace_virtio_net_rss_attach_ebpf(nic, prog_fd);
1245     return nc->info->set_steering_ebpf(nc, prog_fd);
1246 }
1247 
1248 static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1249                                    struct EBPFRSSConfig *config)
1250 {
1251     config->redirect = data->redirect;
1252     config->populate_hash = data->populate_hash;
1253     config->hash_types = data->hash_types;
1254     config->indirections_len = data->indirections_len;
1255     config->default_queue = data->default_queue;
1256 }
1257 
1258 static bool virtio_net_attach_ebpf_rss(VirtIONet *n)
1259 {
1260     struct EBPFRSSConfig config = {};
1261 
1262     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1263         return false;
1264     }
1265 
1266     rss_data_to_rss_config(&n->rss_data, &config);
1267 
1268     if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1269                           n->rss_data.indirections_table, n->rss_data.key,
1270                           NULL)) {
1271         return false;
1272     }
1273 
1274     if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1275         return false;
1276     }
1277 
1278     return true;
1279 }
1280 
1281 static void virtio_net_detach_ebpf_rss(VirtIONet *n)
1282 {
1283     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1284 }
1285 
1286 static void virtio_net_commit_rss_config(VirtIONet *n)
1287 {
1288     if (n->rss_data.enabled) {
1289         n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
1290         if (n->rss_data.populate_hash) {
1291             virtio_net_detach_ebpf_rss(n);
1292         } else if (!virtio_net_attach_ebpf_rss(n)) {
1293             if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1294                 warn_report("Can't load eBPF RSS for vhost");
1295             } else {
1296                 warn_report("Can't load eBPF RSS - fallback to software RSS");
1297                 n->rss_data.enabled_software_rss = true;
1298             }
1299         }
1300 
1301         trace_virtio_net_rss_enable(n,
1302                                     n->rss_data.hash_types,
1303                                     n->rss_data.indirections_len,
1304                                     sizeof(n->rss_data.key));
1305     } else {
1306         virtio_net_detach_ebpf_rss(n);
1307         trace_virtio_net_rss_disable(n);
1308     }
1309 }
1310 
1311 static void virtio_net_disable_rss(VirtIONet *n)
1312 {
1313     if (!n->rss_data.enabled) {
1314         return;
1315     }
1316 
1317     n->rss_data.enabled = false;
1318     virtio_net_commit_rss_config(n);
1319 }
1320 
1321 static bool virtio_net_load_ebpf_fds(VirtIONet *n, Error **errp)
1322 {
1323     int fds[EBPF_RSS_MAX_FDS] = { [0 ... EBPF_RSS_MAX_FDS - 1] = -1};
1324     int ret = true;
1325     int i = 0;
1326 
1327     if (n->nr_ebpf_rss_fds != EBPF_RSS_MAX_FDS) {
1328         error_setg(errp, "Expected %d file descriptors but got %d",
1329                    EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds);
1330         return false;
1331     }
1332 
1333     for (i = 0; i < n->nr_ebpf_rss_fds; i++) {
1334         fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i], errp);
1335         if (fds[i] < 0) {
1336             ret = false;
1337             goto exit;
1338         }
1339     }
1340 
1341     ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3], errp);
1342 
1343 exit:
1344     if (!ret) {
1345         for (i = 0; i < n->nr_ebpf_rss_fds && fds[i] != -1; i++) {
1346             close(fds[i]);
1347         }
1348     }
1349 
1350     return ret;
1351 }
1352 
1353 static bool virtio_net_load_ebpf(VirtIONet *n, Error **errp)
1354 {
1355     bool ret = false;
1356 
1357     if (virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1358         trace_virtio_net_rss_load(n, n->nr_ebpf_rss_fds, n->ebpf_rss_fds);
1359         if (n->ebpf_rss_fds) {
1360             ret = virtio_net_load_ebpf_fds(n, errp);
1361         } else {
1362             ret = ebpf_rss_load(&n->ebpf_rss, errp);
1363         }
1364     }
1365 
1366     return ret;
1367 }
1368 
1369 static void virtio_net_unload_ebpf(VirtIONet *n)
1370 {
1371     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1372     ebpf_rss_unload(&n->ebpf_rss);
1373 }
1374 
1375 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1376                                       struct iovec *iov,
1377                                       unsigned int iov_cnt,
1378                                       bool do_rss)
1379 {
1380     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1381     struct virtio_net_rss_config cfg;
1382     size_t s, offset = 0, size_get;
1383     uint16_t queue_pairs, i;
1384     struct {
1385         uint16_t us;
1386         uint8_t b;
1387     } QEMU_PACKED temp;
1388     const char *err_msg = "";
1389     uint32_t err_value = 0;
1390 
1391     if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1392         err_msg = "RSS is not negotiated";
1393         goto error;
1394     }
1395     if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1396         err_msg = "Hash report is not negotiated";
1397         goto error;
1398     }
1399     size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1400     s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1401     if (s != size_get) {
1402         err_msg = "Short command buffer";
1403         err_value = (uint32_t)s;
1404         goto error;
1405     }
1406     n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1407     n->rss_data.indirections_len =
1408         virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1409     if (!do_rss) {
1410         n->rss_data.indirections_len = 0;
1411     }
1412     if (n->rss_data.indirections_len >= VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1413         err_msg = "Too large indirection table";
1414         err_value = n->rss_data.indirections_len;
1415         goto error;
1416     }
1417     n->rss_data.indirections_len++;
1418     if (!is_power_of_2(n->rss_data.indirections_len)) {
1419         err_msg = "Invalid size of indirection table";
1420         err_value = n->rss_data.indirections_len;
1421         goto error;
1422     }
1423     n->rss_data.default_queue = do_rss ?
1424         virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1425     if (n->rss_data.default_queue >= n->max_queue_pairs) {
1426         err_msg = "Invalid default queue";
1427         err_value = n->rss_data.default_queue;
1428         goto error;
1429     }
1430     offset += size_get;
1431     size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1432     g_free(n->rss_data.indirections_table);
1433     n->rss_data.indirections_table = g_malloc(size_get);
1434     if (!n->rss_data.indirections_table) {
1435         err_msg = "Can't allocate indirections table";
1436         err_value = n->rss_data.indirections_len;
1437         goto error;
1438     }
1439     s = iov_to_buf(iov, iov_cnt, offset,
1440                    n->rss_data.indirections_table, size_get);
1441     if (s != size_get) {
1442         err_msg = "Short indirection table buffer";
1443         err_value = (uint32_t)s;
1444         goto error;
1445     }
1446     for (i = 0; i < n->rss_data.indirections_len; ++i) {
1447         uint16_t val = n->rss_data.indirections_table[i];
1448         n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1449     }
1450     offset += size_get;
1451     size_get = sizeof(temp);
1452     s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1453     if (s != size_get) {
1454         err_msg = "Can't get queue_pairs";
1455         err_value = (uint32_t)s;
1456         goto error;
1457     }
1458     queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1459     if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1460         err_msg = "Invalid number of queue_pairs";
1461         err_value = queue_pairs;
1462         goto error;
1463     }
1464     if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1465         err_msg = "Invalid key size";
1466         err_value = temp.b;
1467         goto error;
1468     }
1469     if (!temp.b && n->rss_data.hash_types) {
1470         err_msg = "No key provided";
1471         err_value = 0;
1472         goto error;
1473     }
1474     if (!temp.b && !n->rss_data.hash_types) {
1475         virtio_net_disable_rss(n);
1476         return queue_pairs;
1477     }
1478     offset += size_get;
1479     size_get = temp.b;
1480     s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1481     if (s != size_get) {
1482         err_msg = "Can get key buffer";
1483         err_value = (uint32_t)s;
1484         goto error;
1485     }
1486     n->rss_data.enabled = true;
1487     virtio_net_commit_rss_config(n);
1488     return queue_pairs;
1489 error:
1490     trace_virtio_net_rss_error(n, err_msg, err_value);
1491     virtio_net_disable_rss(n);
1492     return 0;
1493 }
1494 
1495 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1496                                 struct iovec *iov, unsigned int iov_cnt)
1497 {
1498     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1499     uint16_t queue_pairs;
1500     NetClientState *nc = qemu_get_queue(n->nic);
1501 
1502     virtio_net_disable_rss(n);
1503     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1504         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1505         return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1506     }
1507     if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1508         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1509     } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1510         struct virtio_net_ctrl_mq mq;
1511         size_t s;
1512         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1513             return VIRTIO_NET_ERR;
1514         }
1515         s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1516         if (s != sizeof(mq)) {
1517             return VIRTIO_NET_ERR;
1518         }
1519         queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1520 
1521     } else {
1522         return VIRTIO_NET_ERR;
1523     }
1524 
1525     if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1526         queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1527         queue_pairs > n->max_queue_pairs ||
1528         !n->multiqueue) {
1529         return VIRTIO_NET_ERR;
1530     }
1531 
1532     n->curr_queue_pairs = queue_pairs;
1533     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
1534         /*
1535          * Avoid updating the backend for a vdpa device: We're only interested
1536          * in updating the device model queues.
1537          */
1538         return VIRTIO_NET_OK;
1539     }
1540     /* stop the backend before changing the number of queue_pairs to avoid handling a
1541      * disabled queue */
1542     virtio_net_set_status(vdev, vdev->status);
1543     virtio_net_set_queue_pairs(n);
1544 
1545     return VIRTIO_NET_OK;
1546 }
1547 
1548 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1549                                   const struct iovec *in_sg, unsigned in_num,
1550                                   const struct iovec *out_sg,
1551                                   unsigned out_num)
1552 {
1553     VirtIONet *n = VIRTIO_NET(vdev);
1554     struct virtio_net_ctrl_hdr ctrl;
1555     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1556     size_t s;
1557     struct iovec *iov, *iov2;
1558 
1559     if (iov_size(in_sg, in_num) < sizeof(status) ||
1560         iov_size(out_sg, out_num) < sizeof(ctrl)) {
1561         virtio_error(vdev, "virtio-net ctrl missing headers");
1562         return 0;
1563     }
1564 
1565     iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1566     s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1567     iov_discard_front(&iov, &out_num, sizeof(ctrl));
1568     if (s != sizeof(ctrl)) {
1569         status = VIRTIO_NET_ERR;
1570     } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1571         status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1572     } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1573         status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1574     } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1575         status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1576     } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1577         status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1578     } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1579         status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1580     } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1581         status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1582     }
1583 
1584     s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1585     assert(s == sizeof(status));
1586 
1587     g_free(iov2);
1588     return sizeof(status);
1589 }
1590 
1591 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1592 {
1593     VirtQueueElement *elem;
1594 
1595     for (;;) {
1596         size_t written;
1597         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1598         if (!elem) {
1599             break;
1600         }
1601 
1602         written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1603                                              elem->out_sg, elem->out_num);
1604         if (written > 0) {
1605             virtqueue_push(vq, elem, written);
1606             virtio_notify(vdev, vq);
1607             g_free(elem);
1608         } else {
1609             virtqueue_detach_element(vq, elem, 0);
1610             g_free(elem);
1611             break;
1612         }
1613     }
1614 }
1615 
1616 /* RX */
1617 
1618 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1619 {
1620     VirtIONet *n = VIRTIO_NET(vdev);
1621     int queue_index = vq2q(virtio_get_queue_index(vq));
1622 
1623     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1624 }
1625 
1626 static bool virtio_net_can_receive(NetClientState *nc)
1627 {
1628     VirtIONet *n = qemu_get_nic_opaque(nc);
1629     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1630     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1631 
1632     if (!vdev->vm_running) {
1633         return false;
1634     }
1635 
1636     if (nc->queue_index >= n->curr_queue_pairs) {
1637         return false;
1638     }
1639 
1640     if (!virtio_queue_ready(q->rx_vq) ||
1641         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1642         return false;
1643     }
1644 
1645     return true;
1646 }
1647 
1648 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1649 {
1650     int opaque;
1651     unsigned int in_bytes;
1652     VirtIONet *n = q->n;
1653 
1654     while (virtio_queue_empty(q->rx_vq) || n->mergeable_rx_bufs) {
1655         opaque = virtqueue_get_avail_bytes(q->rx_vq, &in_bytes, NULL,
1656                                            bufsize, 0);
1657         /* Buffer is enough, disable notifiaction */
1658         if (bufsize <= in_bytes) {
1659             break;
1660         }
1661 
1662         if (virtio_queue_enable_notification_and_check(q->rx_vq, opaque)) {
1663             /* Guest has added some buffers, try again */
1664             continue;
1665         } else {
1666             return 0;
1667         }
1668     }
1669 
1670     virtio_queue_set_notification(q->rx_vq, 0);
1671 
1672     return 1;
1673 }
1674 
1675 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1676 {
1677     virtio_tswap16s(vdev, &hdr->hdr_len);
1678     virtio_tswap16s(vdev, &hdr->gso_size);
1679     virtio_tswap16s(vdev, &hdr->csum_start);
1680     virtio_tswap16s(vdev, &hdr->csum_offset);
1681 }
1682 
1683 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1684  * it never finds out that the packets don't have valid checksums.  This
1685  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1686  * fix this with Xen but it hasn't appeared in an upstream release of
1687  * dhclient yet.
1688  *
1689  * To avoid breaking existing guests, we catch udp packets and add
1690  * checksums.  This is terrible but it's better than hacking the guest
1691  * kernels.
1692  *
1693  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1694  * we should provide a mechanism to disable it to avoid polluting the host
1695  * cache.
1696  */
1697 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1698                                         uint8_t *buf, size_t size)
1699 {
1700     size_t csum_size = ETH_HLEN + sizeof(struct ip_header) +
1701                        sizeof(struct udp_header);
1702 
1703     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1704         (size >= csum_size && size < 1500) && /* normal sized MTU */
1705         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1706         (buf[23] == 17) && /* ip.protocol == UDP */
1707         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1708         net_checksum_calculate(buf, size, CSUM_UDP);
1709         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1710     }
1711 }
1712 
1713 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1714                            const void *buf, size_t size)
1715 {
1716     if (n->has_vnet_hdr) {
1717         /* FIXME this cast is evil */
1718         void *wbuf = (void *)buf;
1719         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1720                                     size - n->host_hdr_len);
1721 
1722         if (n->needs_vnet_hdr_swap) {
1723             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1724         }
1725         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1726     } else {
1727         struct virtio_net_hdr hdr = {
1728             .flags = 0,
1729             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1730         };
1731         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1732     }
1733 }
1734 
1735 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1736 {
1737     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1738     static const uint8_t vlan[] = {0x81, 0x00};
1739     uint8_t *ptr = (uint8_t *)buf;
1740     int i;
1741 
1742     if (n->promisc)
1743         return 1;
1744 
1745     ptr += n->host_hdr_len;
1746 
1747     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1748         int vid = lduw_be_p(ptr + 14) & 0xfff;
1749         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1750             return 0;
1751     }
1752 
1753     if (ptr[0] & 1) { // multicast
1754         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1755             return !n->nobcast;
1756         } else if (n->nomulti) {
1757             return 0;
1758         } else if (n->allmulti || n->mac_table.multi_overflow) {
1759             return 1;
1760         }
1761 
1762         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1763             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1764                 return 1;
1765             }
1766         }
1767     } else { // unicast
1768         if (n->nouni) {
1769             return 0;
1770         } else if (n->alluni || n->mac_table.uni_overflow) {
1771             return 1;
1772         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1773             return 1;
1774         }
1775 
1776         for (i = 0; i < n->mac_table.first_multi; i++) {
1777             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1778                 return 1;
1779             }
1780         }
1781     }
1782 
1783     return 0;
1784 }
1785 
1786 static uint8_t virtio_net_get_hash_type(bool hasip4,
1787                                         bool hasip6,
1788                                         EthL4HdrProto l4hdr_proto,
1789                                         uint32_t types)
1790 {
1791     if (hasip4) {
1792         switch (l4hdr_proto) {
1793         case ETH_L4_HDR_PROTO_TCP:
1794             if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
1795                 return NetPktRssIpV4Tcp;
1796             }
1797             break;
1798 
1799         case ETH_L4_HDR_PROTO_UDP:
1800             if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
1801                 return NetPktRssIpV4Udp;
1802             }
1803             break;
1804 
1805         default:
1806             break;
1807         }
1808 
1809         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1810             return NetPktRssIpV4;
1811         }
1812     } else if (hasip6) {
1813         switch (l4hdr_proto) {
1814         case ETH_L4_HDR_PROTO_TCP:
1815             if (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) {
1816                 return NetPktRssIpV6TcpEx;
1817             }
1818             if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
1819                 return NetPktRssIpV6Tcp;
1820             }
1821             break;
1822 
1823         case ETH_L4_HDR_PROTO_UDP:
1824             if (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) {
1825                 return NetPktRssIpV6UdpEx;
1826             }
1827             if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
1828                 return NetPktRssIpV6Udp;
1829             }
1830             break;
1831 
1832         default:
1833             break;
1834         }
1835 
1836         if (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) {
1837             return NetPktRssIpV6Ex;
1838         }
1839         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
1840             return NetPktRssIpV6;
1841         }
1842     }
1843     return 0xff;
1844 }
1845 
1846 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1847                                   size_t size,
1848                                   struct virtio_net_hdr_v1_hash *hdr)
1849 {
1850     VirtIONet *n = qemu_get_nic_opaque(nc);
1851     unsigned int index = nc->queue_index, new_index = index;
1852     struct NetRxPkt *pkt = n->rx_pkt;
1853     uint8_t net_hash_type;
1854     uint32_t hash;
1855     bool hasip4, hasip6;
1856     EthL4HdrProto l4hdr_proto;
1857     static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1858         VIRTIO_NET_HASH_REPORT_IPv4,
1859         VIRTIO_NET_HASH_REPORT_TCPv4,
1860         VIRTIO_NET_HASH_REPORT_TCPv6,
1861         VIRTIO_NET_HASH_REPORT_IPv6,
1862         VIRTIO_NET_HASH_REPORT_IPv6_EX,
1863         VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1864         VIRTIO_NET_HASH_REPORT_UDPv4,
1865         VIRTIO_NET_HASH_REPORT_UDPv6,
1866         VIRTIO_NET_HASH_REPORT_UDPv6_EX
1867     };
1868     struct iovec iov = {
1869         .iov_base = (void *)buf,
1870         .iov_len = size
1871     };
1872 
1873     net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len);
1874     net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
1875     net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto,
1876                                              n->rss_data.hash_types);
1877     if (net_hash_type > NetPktRssIpV6UdpEx) {
1878         if (n->rss_data.populate_hash) {
1879             hdr->hash_value = VIRTIO_NET_HASH_REPORT_NONE;
1880             hdr->hash_report = 0;
1881         }
1882         return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1883     }
1884 
1885     hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1886 
1887     if (n->rss_data.populate_hash) {
1888         hdr->hash_value = hash;
1889         hdr->hash_report = reports[net_hash_type];
1890     }
1891 
1892     if (n->rss_data.redirect) {
1893         new_index = hash & (n->rss_data.indirections_len - 1);
1894         new_index = n->rss_data.indirections_table[new_index];
1895     }
1896 
1897     return (index == new_index) ? -1 : new_index;
1898 }
1899 
1900 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1901                                       size_t size)
1902 {
1903     VirtIONet *n = qemu_get_nic_opaque(nc);
1904     VirtIONetQueue *q;
1905     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1906     VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1907     size_t lens[VIRTQUEUE_MAX_SIZE];
1908     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1909     struct virtio_net_hdr_v1_hash extra_hdr;
1910     unsigned mhdr_cnt = 0;
1911     size_t offset, i, guest_offset, j;
1912     ssize_t err;
1913 
1914     memset(&extra_hdr, 0, sizeof(extra_hdr));
1915 
1916     if (n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1917         int index = virtio_net_process_rss(nc, buf, size, &extra_hdr);
1918         if (index >= 0) {
1919             nc = qemu_get_subqueue(n->nic, index % n->curr_queue_pairs);
1920         }
1921     }
1922 
1923     if (!virtio_net_can_receive(nc)) {
1924         return -1;
1925     }
1926 
1927     q = virtio_net_get_subqueue(nc);
1928 
1929     /* hdr_len refers to the header we supply to the guest */
1930     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1931         return 0;
1932     }
1933 
1934     if (!receive_filter(n, buf, size))
1935         return size;
1936 
1937     offset = i = 0;
1938 
1939     while (offset < size) {
1940         VirtQueueElement *elem;
1941         int len, total;
1942         const struct iovec *sg;
1943 
1944         total = 0;
1945 
1946         if (i == VIRTQUEUE_MAX_SIZE) {
1947             virtio_error(vdev, "virtio-net unexpected long buffer chain");
1948             err = size;
1949             goto err;
1950         }
1951 
1952         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1953         if (!elem) {
1954             if (i) {
1955                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1956                              "i %zd mergeable %d offset %zd, size %zd, "
1957                              "guest hdr len %zd, host hdr len %zd "
1958                              "guest features 0x%" PRIx64,
1959                              i, n->mergeable_rx_bufs, offset, size,
1960                              n->guest_hdr_len, n->host_hdr_len,
1961                              vdev->guest_features);
1962             }
1963             err = -1;
1964             goto err;
1965         }
1966 
1967         if (elem->in_num < 1) {
1968             virtio_error(vdev,
1969                          "virtio-net receive queue contains no in buffers");
1970             virtqueue_detach_element(q->rx_vq, elem, 0);
1971             g_free(elem);
1972             err = -1;
1973             goto err;
1974         }
1975 
1976         sg = elem->in_sg;
1977         if (i == 0) {
1978             assert(offset == 0);
1979             if (n->mergeable_rx_bufs) {
1980                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1981                                     sg, elem->in_num,
1982                                     offsetof(typeof(extra_hdr), hdr.num_buffers),
1983                                     sizeof(extra_hdr.hdr.num_buffers));
1984             }
1985 
1986             receive_header(n, sg, elem->in_num, buf, size);
1987             if (n->rss_data.populate_hash) {
1988                 offset = offsetof(typeof(extra_hdr), hash_value);
1989                 iov_from_buf(sg, elem->in_num, offset,
1990                              (char *)&extra_hdr + offset,
1991                              sizeof(extra_hdr.hash_value) +
1992                              sizeof(extra_hdr.hash_report));
1993             }
1994             offset = n->host_hdr_len;
1995             total += n->guest_hdr_len;
1996             guest_offset = n->guest_hdr_len;
1997         } else {
1998             guest_offset = 0;
1999         }
2000 
2001         /* copy in packet.  ugh */
2002         len = iov_from_buf(sg, elem->in_num, guest_offset,
2003                            buf + offset, size - offset);
2004         total += len;
2005         offset += len;
2006         /* If buffers can't be merged, at this point we
2007          * must have consumed the complete packet.
2008          * Otherwise, drop it. */
2009         if (!n->mergeable_rx_bufs && offset < size) {
2010             virtqueue_unpop(q->rx_vq, elem, total);
2011             g_free(elem);
2012             err = size;
2013             goto err;
2014         }
2015 
2016         elems[i] = elem;
2017         lens[i] = total;
2018         i++;
2019     }
2020 
2021     if (mhdr_cnt) {
2022         virtio_stw_p(vdev, &extra_hdr.hdr.num_buffers, i);
2023         iov_from_buf(mhdr_sg, mhdr_cnt,
2024                      0,
2025                      &extra_hdr.hdr.num_buffers,
2026                      sizeof extra_hdr.hdr.num_buffers);
2027     }
2028 
2029     for (j = 0; j < i; j++) {
2030         /* signal other side */
2031         virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
2032         g_free(elems[j]);
2033     }
2034 
2035     virtqueue_flush(q->rx_vq, i);
2036     virtio_notify(vdev, q->rx_vq);
2037 
2038     return size;
2039 
2040 err:
2041     for (j = 0; j < i; j++) {
2042         virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
2043         g_free(elems[j]);
2044     }
2045 
2046     return err;
2047 }
2048 
2049 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
2050                                   size_t size)
2051 {
2052     RCU_READ_LOCK_GUARD();
2053 
2054     return virtio_net_receive_rcu(nc, buf, size);
2055 }
2056 
2057 /*
2058  * Accessors to read and write the IP packet data length field. This
2059  * is a potentially unaligned network-byte-order 16 bit unsigned integer
2060  * pointed to by unit->ip_len.
2061  */
2062 static uint16_t read_unit_ip_len(VirtioNetRscUnit *unit)
2063 {
2064     return lduw_be_p(unit->ip_plen);
2065 }
2066 
2067 static void write_unit_ip_len(VirtioNetRscUnit *unit, uint16_t l)
2068 {
2069     stw_be_p(unit->ip_plen, l);
2070 }
2071 
2072 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
2073                                          const uint8_t *buf,
2074                                          VirtioNetRscUnit *unit)
2075 {
2076     uint16_t ip_hdrlen;
2077     struct ip_header *ip;
2078 
2079     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
2080                               + sizeof(struct eth_header));
2081     unit->ip = (void *)ip;
2082     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
2083     unit->ip_plen = &ip->ip_len;
2084     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
2085     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2086     unit->payload = read_unit_ip_len(unit) - ip_hdrlen - unit->tcp_hdrlen;
2087 }
2088 
2089 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
2090                                          const uint8_t *buf,
2091                                          VirtioNetRscUnit *unit)
2092 {
2093     struct ip6_header *ip6;
2094 
2095     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
2096                                  + sizeof(struct eth_header));
2097     unit->ip = ip6;
2098     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2099     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
2100                                         + sizeof(struct ip6_header));
2101     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2102 
2103     /* There is a difference between payload length in ipv4 and v6,
2104        ip header is excluded in ipv6 */
2105     unit->payload = read_unit_ip_len(unit) - unit->tcp_hdrlen;
2106 }
2107 
2108 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
2109                                        VirtioNetRscSeg *seg)
2110 {
2111     int ret;
2112     struct virtio_net_hdr_v1 *h;
2113 
2114     h = (struct virtio_net_hdr_v1 *)seg->buf;
2115     h->flags = 0;
2116     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
2117 
2118     if (seg->is_coalesced) {
2119         h->rsc.segments = seg->packets;
2120         h->rsc.dup_acks = seg->dup_ack;
2121         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
2122         if (chain->proto == ETH_P_IP) {
2123             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2124         } else {
2125             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2126         }
2127     }
2128 
2129     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
2130     QTAILQ_REMOVE(&chain->buffers, seg, next);
2131     g_free(seg->buf);
2132     g_free(seg);
2133 
2134     return ret;
2135 }
2136 
2137 static void virtio_net_rsc_purge(void *opq)
2138 {
2139     VirtioNetRscSeg *seg, *rn;
2140     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
2141 
2142     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
2143         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2144             chain->stat.purge_failed++;
2145             continue;
2146         }
2147     }
2148 
2149     chain->stat.timer++;
2150     if (!QTAILQ_EMPTY(&chain->buffers)) {
2151         timer_mod(chain->drain_timer,
2152               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout);
2153     }
2154 }
2155 
2156 static void virtio_net_rsc_cleanup(VirtIONet *n)
2157 {
2158     VirtioNetRscChain *chain, *rn_chain;
2159     VirtioNetRscSeg *seg, *rn_seg;
2160 
2161     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2162         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2163             QTAILQ_REMOVE(&chain->buffers, seg, next);
2164             g_free(seg->buf);
2165             g_free(seg);
2166         }
2167 
2168         timer_free(chain->drain_timer);
2169         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2170         g_free(chain);
2171     }
2172 }
2173 
2174 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2175                                      NetClientState *nc,
2176                                      const uint8_t *buf, size_t size)
2177 {
2178     uint16_t hdr_len;
2179     VirtioNetRscSeg *seg;
2180 
2181     hdr_len = chain->n->guest_hdr_len;
2182     seg = g_new(VirtioNetRscSeg, 1);
2183     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2184         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2185     memcpy(seg->buf, buf, size);
2186     seg->size = size;
2187     seg->packets = 1;
2188     seg->dup_ack = 0;
2189     seg->is_coalesced = 0;
2190     seg->nc = nc;
2191 
2192     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2193     chain->stat.cache++;
2194 
2195     switch (chain->proto) {
2196     case ETH_P_IP:
2197         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2198         break;
2199     case ETH_P_IPV6:
2200         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2201         break;
2202     default:
2203         g_assert_not_reached();
2204     }
2205 }
2206 
2207 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2208                                          VirtioNetRscSeg *seg,
2209                                          const uint8_t *buf,
2210                                          struct tcp_header *n_tcp,
2211                                          struct tcp_header *o_tcp)
2212 {
2213     uint32_t nack, oack;
2214     uint16_t nwin, owin;
2215 
2216     nack = htonl(n_tcp->th_ack);
2217     nwin = htons(n_tcp->th_win);
2218     oack = htonl(o_tcp->th_ack);
2219     owin = htons(o_tcp->th_win);
2220 
2221     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2222         chain->stat.ack_out_of_win++;
2223         return RSC_FINAL;
2224     } else if (nack == oack) {
2225         /* duplicated ack or window probe */
2226         if (nwin == owin) {
2227             /* duplicated ack, add dup ack count due to whql test up to 1 */
2228             chain->stat.dup_ack++;
2229             return RSC_FINAL;
2230         } else {
2231             /* Coalesce window update */
2232             o_tcp->th_win = n_tcp->th_win;
2233             chain->stat.win_update++;
2234             return RSC_COALESCE;
2235         }
2236     } else {
2237         /* pure ack, go to 'C', finalize*/
2238         chain->stat.pure_ack++;
2239         return RSC_FINAL;
2240     }
2241 }
2242 
2243 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2244                                             VirtioNetRscSeg *seg,
2245                                             const uint8_t *buf,
2246                                             VirtioNetRscUnit *n_unit)
2247 {
2248     void *data;
2249     uint16_t o_ip_len;
2250     uint32_t nseq, oseq;
2251     VirtioNetRscUnit *o_unit;
2252 
2253     o_unit = &seg->unit;
2254     o_ip_len = read_unit_ip_len(o_unit);
2255     nseq = htonl(n_unit->tcp->th_seq);
2256     oseq = htonl(o_unit->tcp->th_seq);
2257 
2258     /* out of order or retransmitted. */
2259     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2260         chain->stat.data_out_of_win++;
2261         return RSC_FINAL;
2262     }
2263 
2264     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2265     if (nseq == oseq) {
2266         if ((o_unit->payload == 0) && n_unit->payload) {
2267             /* From no payload to payload, normal case, not a dup ack or etc */
2268             chain->stat.data_after_pure_ack++;
2269             goto coalesce;
2270         } else {
2271             return virtio_net_rsc_handle_ack(chain, seg, buf,
2272                                              n_unit->tcp, o_unit->tcp);
2273         }
2274     } else if ((nseq - oseq) != o_unit->payload) {
2275         /* Not a consistent packet, out of order */
2276         chain->stat.data_out_of_order++;
2277         return RSC_FINAL;
2278     } else {
2279 coalesce:
2280         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2281             chain->stat.over_size++;
2282             return RSC_FINAL;
2283         }
2284 
2285         /* Here comes the right data, the payload length in v4/v6 is different,
2286            so use the field value to update and record the new data len */
2287         o_unit->payload += n_unit->payload; /* update new data len */
2288 
2289         /* update field in ip header */
2290         write_unit_ip_len(o_unit, o_ip_len + n_unit->payload);
2291 
2292         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2293            for windows guest, while this may change the behavior for linux
2294            guest (only if it uses RSC feature). */
2295         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2296 
2297         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2298         o_unit->tcp->th_win = n_unit->tcp->th_win;
2299 
2300         memmove(seg->buf + seg->size, data, n_unit->payload);
2301         seg->size += n_unit->payload;
2302         seg->packets++;
2303         chain->stat.coalesced++;
2304         return RSC_COALESCE;
2305     }
2306 }
2307 
2308 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2309                                         VirtioNetRscSeg *seg,
2310                                         const uint8_t *buf, size_t size,
2311                                         VirtioNetRscUnit *unit)
2312 {
2313     struct ip_header *ip1, *ip2;
2314 
2315     ip1 = (struct ip_header *)(unit->ip);
2316     ip2 = (struct ip_header *)(seg->unit.ip);
2317     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2318         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2319         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2320         chain->stat.no_match++;
2321         return RSC_NO_MATCH;
2322     }
2323 
2324     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2325 }
2326 
2327 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2328                                         VirtioNetRscSeg *seg,
2329                                         const uint8_t *buf, size_t size,
2330                                         VirtioNetRscUnit *unit)
2331 {
2332     struct ip6_header *ip1, *ip2;
2333 
2334     ip1 = (struct ip6_header *)(unit->ip);
2335     ip2 = (struct ip6_header *)(seg->unit.ip);
2336     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2337         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2338         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2339         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2340             chain->stat.no_match++;
2341             return RSC_NO_MATCH;
2342     }
2343 
2344     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2345 }
2346 
2347 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2348  * to prevent out of order */
2349 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2350                                          struct tcp_header *tcp)
2351 {
2352     uint16_t tcp_hdr;
2353     uint16_t tcp_flag;
2354 
2355     tcp_flag = htons(tcp->th_offset_flags);
2356     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2357     tcp_flag &= VIRTIO_NET_TCP_FLAG;
2358     if (tcp_flag & TH_SYN) {
2359         chain->stat.tcp_syn++;
2360         return RSC_BYPASS;
2361     }
2362 
2363     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2364         chain->stat.tcp_ctrl_drain++;
2365         return RSC_FINAL;
2366     }
2367 
2368     if (tcp_hdr > sizeof(struct tcp_header)) {
2369         chain->stat.tcp_all_opt++;
2370         return RSC_FINAL;
2371     }
2372 
2373     return RSC_CANDIDATE;
2374 }
2375 
2376 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2377                                          NetClientState *nc,
2378                                          const uint8_t *buf, size_t size,
2379                                          VirtioNetRscUnit *unit)
2380 {
2381     int ret;
2382     VirtioNetRscSeg *seg, *nseg;
2383 
2384     if (QTAILQ_EMPTY(&chain->buffers)) {
2385         chain->stat.empty_cache++;
2386         virtio_net_rsc_cache_buf(chain, nc, buf, size);
2387         timer_mod(chain->drain_timer,
2388               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout);
2389         return size;
2390     }
2391 
2392     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2393         if (chain->proto == ETH_P_IP) {
2394             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2395         } else {
2396             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2397         }
2398 
2399         if (ret == RSC_FINAL) {
2400             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2401                 /* Send failed */
2402                 chain->stat.final_failed++;
2403                 return 0;
2404             }
2405 
2406             /* Send current packet */
2407             return virtio_net_do_receive(nc, buf, size);
2408         } else if (ret == RSC_NO_MATCH) {
2409             continue;
2410         } else {
2411             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2412             seg->is_coalesced = 1;
2413             return size;
2414         }
2415     }
2416 
2417     chain->stat.no_match_cache++;
2418     virtio_net_rsc_cache_buf(chain, nc, buf, size);
2419     return size;
2420 }
2421 
2422 /* Drain a connection data, this is to avoid out of order segments */
2423 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2424                                         NetClientState *nc,
2425                                         const uint8_t *buf, size_t size,
2426                                         uint16_t ip_start, uint16_t ip_size,
2427                                         uint16_t tcp_port)
2428 {
2429     VirtioNetRscSeg *seg, *nseg;
2430     uint32_t ppair1, ppair2;
2431 
2432     ppair1 = *(uint32_t *)(buf + tcp_port);
2433     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2434         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2435         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2436             || (ppair1 != ppair2)) {
2437             continue;
2438         }
2439         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2440             chain->stat.drain_failed++;
2441         }
2442 
2443         break;
2444     }
2445 
2446     return virtio_net_do_receive(nc, buf, size);
2447 }
2448 
2449 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2450                                             struct ip_header *ip,
2451                                             const uint8_t *buf, size_t size)
2452 {
2453     uint16_t ip_len;
2454 
2455     /* Not an ipv4 packet */
2456     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2457         chain->stat.ip_option++;
2458         return RSC_BYPASS;
2459     }
2460 
2461     /* Don't handle packets with ip option */
2462     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2463         chain->stat.ip_option++;
2464         return RSC_BYPASS;
2465     }
2466 
2467     if (ip->ip_p != IPPROTO_TCP) {
2468         chain->stat.bypass_not_tcp++;
2469         return RSC_BYPASS;
2470     }
2471 
2472     /* Don't handle packets with ip fragment */
2473     if (!(htons(ip->ip_off) & IP_DF)) {
2474         chain->stat.ip_frag++;
2475         return RSC_BYPASS;
2476     }
2477 
2478     /* Don't handle packets with ecn flag */
2479     if (IPTOS_ECN(ip->ip_tos)) {
2480         chain->stat.ip_ecn++;
2481         return RSC_BYPASS;
2482     }
2483 
2484     ip_len = htons(ip->ip_len);
2485     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2486         || ip_len > (size - chain->n->guest_hdr_len -
2487                      sizeof(struct eth_header))) {
2488         chain->stat.ip_hacked++;
2489         return RSC_BYPASS;
2490     }
2491 
2492     return RSC_CANDIDATE;
2493 }
2494 
2495 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2496                                       NetClientState *nc,
2497                                       const uint8_t *buf, size_t size)
2498 {
2499     int32_t ret;
2500     uint16_t hdr_len;
2501     VirtioNetRscUnit unit;
2502 
2503     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2504 
2505     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2506         + sizeof(struct tcp_header))) {
2507         chain->stat.bypass_not_tcp++;
2508         return virtio_net_do_receive(nc, buf, size);
2509     }
2510 
2511     virtio_net_rsc_extract_unit4(chain, buf, &unit);
2512     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2513         != RSC_CANDIDATE) {
2514         return virtio_net_do_receive(nc, buf, size);
2515     }
2516 
2517     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2518     if (ret == RSC_BYPASS) {
2519         return virtio_net_do_receive(nc, buf, size);
2520     } else if (ret == RSC_FINAL) {
2521         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2522                 ((hdr_len + sizeof(struct eth_header)) + 12),
2523                 VIRTIO_NET_IP4_ADDR_SIZE,
2524                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2525     }
2526 
2527     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2528 }
2529 
2530 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2531                                             struct ip6_header *ip6,
2532                                             const uint8_t *buf, size_t size)
2533 {
2534     uint16_t ip_len;
2535 
2536     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2537         != IP_HEADER_VERSION_6) {
2538         return RSC_BYPASS;
2539     }
2540 
2541     /* Both option and protocol is checked in this */
2542     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2543         chain->stat.bypass_not_tcp++;
2544         return RSC_BYPASS;
2545     }
2546 
2547     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2548     if (ip_len < sizeof(struct tcp_header) ||
2549         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2550                   - sizeof(struct ip6_header))) {
2551         chain->stat.ip_hacked++;
2552         return RSC_BYPASS;
2553     }
2554 
2555     /* Don't handle packets with ecn flag */
2556     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2557         chain->stat.ip_ecn++;
2558         return RSC_BYPASS;
2559     }
2560 
2561     return RSC_CANDIDATE;
2562 }
2563 
2564 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2565                                       const uint8_t *buf, size_t size)
2566 {
2567     int32_t ret;
2568     uint16_t hdr_len;
2569     VirtioNetRscChain *chain;
2570     VirtioNetRscUnit unit;
2571 
2572     chain = opq;
2573     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2574 
2575     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2576         + sizeof(tcp_header))) {
2577         return virtio_net_do_receive(nc, buf, size);
2578     }
2579 
2580     virtio_net_rsc_extract_unit6(chain, buf, &unit);
2581     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2582                                                  unit.ip, buf, size)) {
2583         return virtio_net_do_receive(nc, buf, size);
2584     }
2585 
2586     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2587     if (ret == RSC_BYPASS) {
2588         return virtio_net_do_receive(nc, buf, size);
2589     } else if (ret == RSC_FINAL) {
2590         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2591                 ((hdr_len + sizeof(struct eth_header)) + 8),
2592                 VIRTIO_NET_IP6_ADDR_SIZE,
2593                 hdr_len + sizeof(struct eth_header)
2594                 + sizeof(struct ip6_header));
2595     }
2596 
2597     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2598 }
2599 
2600 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2601                                                       NetClientState *nc,
2602                                                       uint16_t proto)
2603 {
2604     VirtioNetRscChain *chain;
2605 
2606     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2607         return NULL;
2608     }
2609 
2610     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2611         if (chain->proto == proto) {
2612             return chain;
2613         }
2614     }
2615 
2616     chain = g_malloc(sizeof(*chain));
2617     chain->n = n;
2618     chain->proto = proto;
2619     if (proto == (uint16_t)ETH_P_IP) {
2620         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2621         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2622     } else {
2623         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2624         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2625     }
2626     chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2627                                       virtio_net_rsc_purge, chain);
2628     memset(&chain->stat, 0, sizeof(chain->stat));
2629 
2630     QTAILQ_INIT(&chain->buffers);
2631     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2632 
2633     return chain;
2634 }
2635 
2636 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2637                                       const uint8_t *buf,
2638                                       size_t size)
2639 {
2640     uint16_t proto;
2641     VirtioNetRscChain *chain;
2642     struct eth_header *eth;
2643     VirtIONet *n;
2644 
2645     n = qemu_get_nic_opaque(nc);
2646     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2647         return virtio_net_do_receive(nc, buf, size);
2648     }
2649 
2650     eth = (struct eth_header *)(buf + n->guest_hdr_len);
2651     proto = htons(eth->h_proto);
2652 
2653     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2654     if (chain) {
2655         chain->stat.received++;
2656         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2657             return virtio_net_rsc_receive4(chain, nc, buf, size);
2658         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2659             return virtio_net_rsc_receive6(chain, nc, buf, size);
2660         }
2661     }
2662     return virtio_net_do_receive(nc, buf, size);
2663 }
2664 
2665 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2666                                   size_t size)
2667 {
2668     VirtIONet *n = qemu_get_nic_opaque(nc);
2669     if ((n->rsc4_enabled || n->rsc6_enabled)) {
2670         return virtio_net_rsc_receive(nc, buf, size);
2671     } else {
2672         return virtio_net_do_receive(nc, buf, size);
2673     }
2674 }
2675 
2676 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2677 
2678 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2679 {
2680     VirtIONet *n = qemu_get_nic_opaque(nc);
2681     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2682     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2683     int ret;
2684 
2685     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2686     virtio_notify(vdev, q->tx_vq);
2687 
2688     g_free(q->async_tx.elem);
2689     q->async_tx.elem = NULL;
2690 
2691     virtio_queue_set_notification(q->tx_vq, 1);
2692     ret = virtio_net_flush_tx(q);
2693     if (ret >= n->tx_burst) {
2694         /*
2695          * the flush has been stopped by tx_burst
2696          * we will not receive notification for the
2697          * remainining part, so re-schedule
2698          */
2699         virtio_queue_set_notification(q->tx_vq, 0);
2700         if (q->tx_bh) {
2701             replay_bh_schedule_event(q->tx_bh);
2702         } else {
2703             timer_mod(q->tx_timer,
2704                       qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2705         }
2706         q->tx_waiting = 1;
2707     }
2708 }
2709 
2710 /* TX */
2711 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2712 {
2713     VirtIONet *n = q->n;
2714     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2715     VirtQueueElement *elem;
2716     int32_t num_packets = 0;
2717     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2718     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2719         return num_packets;
2720     }
2721 
2722     if (q->async_tx.elem) {
2723         virtio_queue_set_notification(q->tx_vq, 0);
2724         return num_packets;
2725     }
2726 
2727     for (;;) {
2728         ssize_t ret;
2729         unsigned int out_num;
2730         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2731         struct virtio_net_hdr vhdr;
2732 
2733         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2734         if (!elem) {
2735             break;
2736         }
2737 
2738         out_num = elem->out_num;
2739         out_sg = elem->out_sg;
2740         if (out_num < 1) {
2741             virtio_error(vdev, "virtio-net header not in first element");
2742             goto detach;
2743         }
2744 
2745         if (n->needs_vnet_hdr_swap) {
2746             if (iov_to_buf(out_sg, out_num, 0, &vhdr, sizeof(vhdr)) <
2747                 sizeof(vhdr)) {
2748                 virtio_error(vdev, "virtio-net header incorrect");
2749                 goto detach;
2750             }
2751             virtio_net_hdr_swap(vdev, &vhdr);
2752             sg2[0].iov_base = &vhdr;
2753             sg2[0].iov_len = sizeof(vhdr);
2754             out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, out_sg, out_num,
2755                                sizeof(vhdr), -1);
2756             if (out_num == VIRTQUEUE_MAX_SIZE) {
2757                 goto drop;
2758             }
2759             out_num += 1;
2760             out_sg = sg2;
2761         }
2762         /*
2763          * If host wants to see the guest header as is, we can
2764          * pass it on unchanged. Otherwise, copy just the parts
2765          * that host is interested in.
2766          */
2767         assert(n->host_hdr_len <= n->guest_hdr_len);
2768         if (n->host_hdr_len != n->guest_hdr_len) {
2769             if (iov_size(out_sg, out_num) < n->guest_hdr_len) {
2770                 virtio_error(vdev, "virtio-net header is invalid");
2771                 goto detach;
2772             }
2773             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2774                                        out_sg, out_num,
2775                                        0, n->host_hdr_len);
2776             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2777                              out_sg, out_num,
2778                              n->guest_hdr_len, -1);
2779             out_num = sg_num;
2780             out_sg = sg;
2781 
2782             if (out_num < 1) {
2783                 virtio_error(vdev, "virtio-net nothing to send");
2784                 goto detach;
2785             }
2786         }
2787 
2788         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2789                                       out_sg, out_num, virtio_net_tx_complete);
2790         if (ret == 0) {
2791             virtio_queue_set_notification(q->tx_vq, 0);
2792             q->async_tx.elem = elem;
2793             return -EBUSY;
2794         }
2795 
2796 drop:
2797         virtqueue_push(q->tx_vq, elem, 0);
2798         virtio_notify(vdev, q->tx_vq);
2799         g_free(elem);
2800 
2801         if (++num_packets >= n->tx_burst) {
2802             break;
2803         }
2804     }
2805     return num_packets;
2806 
2807 detach:
2808     virtqueue_detach_element(q->tx_vq, elem, 0);
2809     g_free(elem);
2810     return -EINVAL;
2811 }
2812 
2813 static void virtio_net_tx_timer(void *opaque);
2814 
2815 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2816 {
2817     VirtIONet *n = VIRTIO_NET(vdev);
2818     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2819 
2820     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2821         virtio_net_drop_tx_queue_data(vdev, vq);
2822         return;
2823     }
2824 
2825     /* This happens when device was stopped but VCPU wasn't. */
2826     if (!vdev->vm_running) {
2827         q->tx_waiting = 1;
2828         return;
2829     }
2830 
2831     if (q->tx_waiting) {
2832         /* We already have queued packets, immediately flush */
2833         timer_del(q->tx_timer);
2834         virtio_net_tx_timer(q);
2835     } else {
2836         /* re-arm timer to flush it (and more) on next tick */
2837         timer_mod(q->tx_timer,
2838                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2839         q->tx_waiting = 1;
2840         virtio_queue_set_notification(vq, 0);
2841     }
2842 }
2843 
2844 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2845 {
2846     VirtIONet *n = VIRTIO_NET(vdev);
2847     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2848 
2849     if (unlikely(n->vhost_started)) {
2850         return;
2851     }
2852 
2853     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2854         virtio_net_drop_tx_queue_data(vdev, vq);
2855         return;
2856     }
2857 
2858     if (unlikely(q->tx_waiting)) {
2859         return;
2860     }
2861     q->tx_waiting = 1;
2862     /* This happens when device was stopped but VCPU wasn't. */
2863     if (!vdev->vm_running) {
2864         return;
2865     }
2866     virtio_queue_set_notification(vq, 0);
2867     replay_bh_schedule_event(q->tx_bh);
2868 }
2869 
2870 static void virtio_net_tx_timer(void *opaque)
2871 {
2872     VirtIONetQueue *q = opaque;
2873     VirtIONet *n = q->n;
2874     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2875     int ret;
2876 
2877     /* This happens when device was stopped but BH wasn't. */
2878     if (!vdev->vm_running) {
2879         /* Make sure tx waiting is set, so we'll run when restarted. */
2880         assert(q->tx_waiting);
2881         return;
2882     }
2883 
2884     q->tx_waiting = 0;
2885 
2886     /* Just in case the driver is not ready on more */
2887     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2888         return;
2889     }
2890 
2891     ret = virtio_net_flush_tx(q);
2892     if (ret == -EBUSY || ret == -EINVAL) {
2893         return;
2894     }
2895     /*
2896      * If we flush a full burst of packets, assume there are
2897      * more coming and immediately rearm
2898      */
2899     if (ret >= n->tx_burst) {
2900         q->tx_waiting = 1;
2901         timer_mod(q->tx_timer,
2902                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2903         return;
2904     }
2905     /*
2906      * If less than a full burst, re-enable notification and flush
2907      * anything that may have come in while we weren't looking.  If
2908      * we find something, assume the guest is still active and rearm
2909      */
2910     virtio_queue_set_notification(q->tx_vq, 1);
2911     ret = virtio_net_flush_tx(q);
2912     if (ret > 0) {
2913         virtio_queue_set_notification(q->tx_vq, 0);
2914         q->tx_waiting = 1;
2915         timer_mod(q->tx_timer,
2916                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2917     }
2918 }
2919 
2920 static void virtio_net_tx_bh(void *opaque)
2921 {
2922     VirtIONetQueue *q = opaque;
2923     VirtIONet *n = q->n;
2924     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2925     int32_t ret;
2926 
2927     /* This happens when device was stopped but BH wasn't. */
2928     if (!vdev->vm_running) {
2929         /* Make sure tx waiting is set, so we'll run when restarted. */
2930         assert(q->tx_waiting);
2931         return;
2932     }
2933 
2934     q->tx_waiting = 0;
2935 
2936     /* Just in case the driver is not ready on more */
2937     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2938         return;
2939     }
2940 
2941     ret = virtio_net_flush_tx(q);
2942     if (ret == -EBUSY || ret == -EINVAL) {
2943         return; /* Notification re-enable handled by tx_complete or device
2944                  * broken */
2945     }
2946 
2947     /* If we flush a full burst of packets, assume there are
2948      * more coming and immediately reschedule */
2949     if (ret >= n->tx_burst) {
2950         replay_bh_schedule_event(q->tx_bh);
2951         q->tx_waiting = 1;
2952         return;
2953     }
2954 
2955     /* If less than a full burst, re-enable notification and flush
2956      * anything that may have come in while we weren't looking.  If
2957      * we find something, assume the guest is still active and reschedule */
2958     virtio_queue_set_notification(q->tx_vq, 1);
2959     ret = virtio_net_flush_tx(q);
2960     if (ret == -EINVAL) {
2961         return;
2962     } else if (ret > 0) {
2963         virtio_queue_set_notification(q->tx_vq, 0);
2964         replay_bh_schedule_event(q->tx_bh);
2965         q->tx_waiting = 1;
2966     }
2967 }
2968 
2969 static void virtio_net_add_queue(VirtIONet *n, int index)
2970 {
2971     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2972 
2973     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2974                                            virtio_net_handle_rx);
2975 
2976     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2977         n->vqs[index].tx_vq =
2978             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2979                              virtio_net_handle_tx_timer);
2980         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2981                                               virtio_net_tx_timer,
2982                                               &n->vqs[index]);
2983     } else {
2984         n->vqs[index].tx_vq =
2985             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2986                              virtio_net_handle_tx_bh);
2987         n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
2988                                                   &DEVICE(vdev)->mem_reentrancy_guard);
2989     }
2990 
2991     n->vqs[index].tx_waiting = 0;
2992     n->vqs[index].n = n;
2993 }
2994 
2995 static void virtio_net_del_queue(VirtIONet *n, int index)
2996 {
2997     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2998     VirtIONetQueue *q = &n->vqs[index];
2999     NetClientState *nc = qemu_get_subqueue(n->nic, index);
3000 
3001     qemu_purge_queued_packets(nc);
3002 
3003     virtio_del_queue(vdev, index * 2);
3004     if (q->tx_timer) {
3005         timer_free(q->tx_timer);
3006         q->tx_timer = NULL;
3007     } else {
3008         qemu_bh_delete(q->tx_bh);
3009         q->tx_bh = NULL;
3010     }
3011     q->tx_waiting = 0;
3012     virtio_del_queue(vdev, index * 2 + 1);
3013 }
3014 
3015 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
3016 {
3017     VirtIODevice *vdev = VIRTIO_DEVICE(n);
3018     int old_num_queues = virtio_get_num_queues(vdev);
3019     int new_num_queues = new_max_queue_pairs * 2 + 1;
3020     int i;
3021 
3022     assert(old_num_queues >= 3);
3023     assert(old_num_queues % 2 == 1);
3024 
3025     if (old_num_queues == new_num_queues) {
3026         return;
3027     }
3028 
3029     /*
3030      * We always need to remove and add ctrl vq if
3031      * old_num_queues != new_num_queues. Remove ctrl_vq first,
3032      * and then we only enter one of the following two loops.
3033      */
3034     virtio_del_queue(vdev, old_num_queues - 1);
3035 
3036     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
3037         /* new_num_queues < old_num_queues */
3038         virtio_net_del_queue(n, i / 2);
3039     }
3040 
3041     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
3042         /* new_num_queues > old_num_queues */
3043         virtio_net_add_queue(n, i / 2);
3044     }
3045 
3046     /* add ctrl_vq last */
3047     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3048 }
3049 
3050 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
3051 {
3052     int max = multiqueue ? n->max_queue_pairs : 1;
3053 
3054     n->multiqueue = multiqueue;
3055     virtio_net_change_num_queue_pairs(n, max);
3056 
3057     virtio_net_set_queue_pairs(n);
3058 }
3059 
3060 static int virtio_net_post_load_device(void *opaque, int version_id)
3061 {
3062     VirtIONet *n = opaque;
3063     VirtIODevice *vdev = VIRTIO_DEVICE(n);
3064     int i, link_down;
3065 
3066     trace_virtio_net_post_load_device();
3067     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
3068                                virtio_vdev_has_feature(vdev,
3069                                                        VIRTIO_F_VERSION_1),
3070                                virtio_vdev_has_feature(vdev,
3071                                                        VIRTIO_NET_F_HASH_REPORT));
3072 
3073     /* MAC_TABLE_ENTRIES may be different from the saved image */
3074     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
3075         n->mac_table.in_use = 0;
3076     }
3077 
3078     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
3079         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
3080     }
3081 
3082     /*
3083      * curr_guest_offloads will be later overwritten by the
3084      * virtio_set_features_nocheck call done from the virtio_load.
3085      * Here we make sure it is preserved and restored accordingly
3086      * in the virtio_net_post_load_virtio callback.
3087      */
3088     n->saved_guest_offloads = n->curr_guest_offloads;
3089 
3090     virtio_net_set_queue_pairs(n);
3091 
3092     /* Find the first multicast entry in the saved MAC filter */
3093     for (i = 0; i < n->mac_table.in_use; i++) {
3094         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
3095             break;
3096         }
3097     }
3098     n->mac_table.first_multi = i;
3099 
3100     /* nc.link_down can't be migrated, so infer link_down according
3101      * to link status bit in n->status */
3102     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
3103     for (i = 0; i < n->max_queue_pairs; i++) {
3104         qemu_get_subqueue(n->nic, i)->link_down = link_down;
3105     }
3106 
3107     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
3108         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3109         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3110                                   QEMU_CLOCK_VIRTUAL,
3111                                   virtio_net_announce_timer, n);
3112         if (n->announce_timer.round) {
3113             timer_mod(n->announce_timer.tm,
3114                       qemu_clock_get_ms(n->announce_timer.type));
3115         } else {
3116             qemu_announce_timer_del(&n->announce_timer, false);
3117         }
3118     }
3119 
3120     virtio_net_commit_rss_config(n);
3121     return 0;
3122 }
3123 
3124 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
3125 {
3126     VirtIONet *n = VIRTIO_NET(vdev);
3127     /*
3128      * The actual needed state is now in saved_guest_offloads,
3129      * see virtio_net_post_load_device for detail.
3130      * Restore it back and apply the desired offloads.
3131      */
3132     n->curr_guest_offloads = n->saved_guest_offloads;
3133     if (peer_has_vnet_hdr(n)) {
3134         virtio_net_apply_guest_offloads(n);
3135     }
3136 
3137     return 0;
3138 }
3139 
3140 /* tx_waiting field of a VirtIONetQueue */
3141 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
3142     .name = "virtio-net-queue-tx_waiting",
3143     .fields = (const VMStateField[]) {
3144         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
3145         VMSTATE_END_OF_LIST()
3146    },
3147 };
3148 
3149 static bool max_queue_pairs_gt_1(void *opaque, int version_id)
3150 {
3151     return VIRTIO_NET(opaque)->max_queue_pairs > 1;
3152 }
3153 
3154 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
3155 {
3156     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
3157                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3158 }
3159 
3160 static bool mac_table_fits(void *opaque, int version_id)
3161 {
3162     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
3163 }
3164 
3165 static bool mac_table_doesnt_fit(void *opaque, int version_id)
3166 {
3167     return !mac_table_fits(opaque, version_id);
3168 }
3169 
3170 /* This temporary type is shared by all the WITH_TMP methods
3171  * although only some fields are used by each.
3172  */
3173 struct VirtIONetMigTmp {
3174     VirtIONet      *parent;
3175     VirtIONetQueue *vqs_1;
3176     uint16_t        curr_queue_pairs_1;
3177     uint8_t         has_ufo;
3178     uint32_t        has_vnet_hdr;
3179 };
3180 
3181 /* The 2nd and subsequent tx_waiting flags are loaded later than
3182  * the 1st entry in the queue_pairs and only if there's more than one
3183  * entry.  We use the tmp mechanism to calculate a temporary
3184  * pointer and count and also validate the count.
3185  */
3186 
3187 static int virtio_net_tx_waiting_pre_save(void *opaque)
3188 {
3189     struct VirtIONetMigTmp *tmp = opaque;
3190 
3191     tmp->vqs_1 = tmp->parent->vqs + 1;
3192     tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3193     if (tmp->parent->curr_queue_pairs == 0) {
3194         tmp->curr_queue_pairs_1 = 0;
3195     }
3196 
3197     return 0;
3198 }
3199 
3200 static int virtio_net_tx_waiting_pre_load(void *opaque)
3201 {
3202     struct VirtIONetMigTmp *tmp = opaque;
3203 
3204     /* Reuse the pointer setup from save */
3205     virtio_net_tx_waiting_pre_save(opaque);
3206 
3207     if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3208         error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3209             tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
3210 
3211         return -EINVAL;
3212     }
3213 
3214     return 0; /* all good */
3215 }
3216 
3217 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3218     .name      = "virtio-net-tx_waiting",
3219     .pre_load  = virtio_net_tx_waiting_pre_load,
3220     .pre_save  = virtio_net_tx_waiting_pre_save,
3221     .fields    = (const VMStateField[]) {
3222         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3223                                      curr_queue_pairs_1,
3224                                      vmstate_virtio_net_queue_tx_waiting,
3225                                      struct VirtIONetQueue),
3226         VMSTATE_END_OF_LIST()
3227     },
3228 };
3229 
3230 /* the 'has_ufo' flag is just tested; if the incoming stream has the
3231  * flag set we need to check that we have it
3232  */
3233 static int virtio_net_ufo_post_load(void *opaque, int version_id)
3234 {
3235     struct VirtIONetMigTmp *tmp = opaque;
3236 
3237     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3238         error_report("virtio-net: saved image requires TUN_F_UFO support");
3239         return -EINVAL;
3240     }
3241 
3242     return 0;
3243 }
3244 
3245 static int virtio_net_ufo_pre_save(void *opaque)
3246 {
3247     struct VirtIONetMigTmp *tmp = opaque;
3248 
3249     tmp->has_ufo = tmp->parent->has_ufo;
3250 
3251     return 0;
3252 }
3253 
3254 static const VMStateDescription vmstate_virtio_net_has_ufo = {
3255     .name      = "virtio-net-ufo",
3256     .post_load = virtio_net_ufo_post_load,
3257     .pre_save  = virtio_net_ufo_pre_save,
3258     .fields    = (const VMStateField[]) {
3259         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3260         VMSTATE_END_OF_LIST()
3261     },
3262 };
3263 
3264 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3265  * flag set we need to check that we have it
3266  */
3267 static int virtio_net_vnet_post_load(void *opaque, int version_id)
3268 {
3269     struct VirtIONetMigTmp *tmp = opaque;
3270 
3271     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3272         error_report("virtio-net: saved image requires vnet_hdr=on");
3273         return -EINVAL;
3274     }
3275 
3276     return 0;
3277 }
3278 
3279 static int virtio_net_vnet_pre_save(void *opaque)
3280 {
3281     struct VirtIONetMigTmp *tmp = opaque;
3282 
3283     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3284 
3285     return 0;
3286 }
3287 
3288 static const VMStateDescription vmstate_virtio_net_has_vnet = {
3289     .name      = "virtio-net-vnet",
3290     .post_load = virtio_net_vnet_post_load,
3291     .pre_save  = virtio_net_vnet_pre_save,
3292     .fields    = (const VMStateField[]) {
3293         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3294         VMSTATE_END_OF_LIST()
3295     },
3296 };
3297 
3298 static bool virtio_net_rss_needed(void *opaque)
3299 {
3300     return VIRTIO_NET(opaque)->rss_data.enabled;
3301 }
3302 
3303 static const VMStateDescription vmstate_virtio_net_rss = {
3304     .name      = "virtio-net-device/rss",
3305     .version_id = 1,
3306     .minimum_version_id = 1,
3307     .needed = virtio_net_rss_needed,
3308     .fields = (const VMStateField[]) {
3309         VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3310         VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3311         VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3312         VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3313         VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3314         VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3315         VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3316                             VIRTIO_NET_RSS_MAX_KEY_SIZE),
3317         VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3318                                     rss_data.indirections_len, 0,
3319                                     vmstate_info_uint16, uint16_t),
3320         VMSTATE_END_OF_LIST()
3321     },
3322 };
3323 
3324 static const VMStateDescription vmstate_virtio_net_device = {
3325     .name = "virtio-net-device",
3326     .version_id = VIRTIO_NET_VM_VERSION,
3327     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3328     .post_load = virtio_net_post_load_device,
3329     .fields = (const VMStateField[]) {
3330         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3331         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3332                                vmstate_virtio_net_queue_tx_waiting,
3333                                VirtIONetQueue),
3334         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3335         VMSTATE_UINT16(status, VirtIONet),
3336         VMSTATE_UINT8(promisc, VirtIONet),
3337         VMSTATE_UINT8(allmulti, VirtIONet),
3338         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3339 
3340         /* Guarded pair: If it fits we load it, else we throw it away
3341          * - can happen if source has a larger MAC table.; post-load
3342          *  sets flags in this case.
3343          */
3344         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3345                                 0, mac_table_fits, mac_table.in_use,
3346                                  ETH_ALEN),
3347         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3348                                      mac_table.in_use, ETH_ALEN),
3349 
3350         /* Note: This is an array of uint32's that's always been saved as a
3351          * buffer; hold onto your endiannesses; it's actually used as a bitmap
3352          * but based on the uint.
3353          */
3354         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3355         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3356                          vmstate_virtio_net_has_vnet),
3357         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3358         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3359         VMSTATE_UINT8(alluni, VirtIONet),
3360         VMSTATE_UINT8(nomulti, VirtIONet),
3361         VMSTATE_UINT8(nouni, VirtIONet),
3362         VMSTATE_UINT8(nobcast, VirtIONet),
3363         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3364                          vmstate_virtio_net_has_ufo),
3365         VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3366                             vmstate_info_uint16_equal, uint16_t),
3367         VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3368         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3369                          vmstate_virtio_net_tx_waiting),
3370         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3371                             has_ctrl_guest_offloads),
3372         VMSTATE_END_OF_LIST()
3373     },
3374     .subsections = (const VMStateDescription * const []) {
3375         &vmstate_virtio_net_rss,
3376         NULL
3377     }
3378 };
3379 
3380 static NetClientInfo net_virtio_info = {
3381     .type = NET_CLIENT_DRIVER_NIC,
3382     .size = sizeof(NICState),
3383     .can_receive = virtio_net_can_receive,
3384     .receive = virtio_net_receive,
3385     .link_status_changed = virtio_net_set_link_status,
3386     .query_rx_filter = virtio_net_query_rxfilter,
3387     .announce = virtio_net_announce,
3388 };
3389 
3390 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3391 {
3392     VirtIONet *n = VIRTIO_NET(vdev);
3393     NetClientState *nc;
3394     assert(n->vhost_started);
3395     if (!n->multiqueue && idx == 2) {
3396         /* Must guard against invalid features and bogus queue index
3397          * from being set by malicious guest, or penetrated through
3398          * buggy migration stream.
3399          */
3400         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3401             qemu_log_mask(LOG_GUEST_ERROR,
3402                           "%s: bogus vq index ignored\n", __func__);
3403             return false;
3404         }
3405         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3406     } else {
3407         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3408     }
3409     /*
3410      * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3411      * as the macro of configure interrupt's IDX, If this driver does not
3412      * support, the function will return false
3413      */
3414 
3415     if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3416         return vhost_net_config_pending(get_vhost_net(nc->peer));
3417     }
3418     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3419 }
3420 
3421 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3422                                            bool mask)
3423 {
3424     VirtIONet *n = VIRTIO_NET(vdev);
3425     NetClientState *nc;
3426     assert(n->vhost_started);
3427     if (!n->multiqueue && idx == 2) {
3428         /* Must guard against invalid features and bogus queue index
3429          * from being set by malicious guest, or penetrated through
3430          * buggy migration stream.
3431          */
3432         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3433             qemu_log_mask(LOG_GUEST_ERROR,
3434                           "%s: bogus vq index ignored\n", __func__);
3435             return;
3436         }
3437         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3438     } else {
3439         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3440     }
3441     /*
3442      *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3443      * as the macro of configure interrupt's IDX, If this driver does not
3444      * support, the function will return
3445      */
3446 
3447     if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3448         vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask);
3449         return;
3450     }
3451     vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask);
3452 }
3453 
3454 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3455 {
3456     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3457 
3458     n->config_size = virtio_get_config_size(&cfg_size_params, host_features);
3459 }
3460 
3461 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3462                                    const char *type)
3463 {
3464     /*
3465      * The name can be NULL, the netclient name will be type.x.
3466      */
3467     assert(type != NULL);
3468 
3469     g_free(n->netclient_name);
3470     g_free(n->netclient_type);
3471     n->netclient_name = g_strdup(name);
3472     n->netclient_type = g_strdup(type);
3473 }
3474 
3475 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3476 {
3477     HotplugHandler *hotplug_ctrl;
3478     PCIDevice *pci_dev;
3479     Error *err = NULL;
3480 
3481     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3482     if (hotplug_ctrl) {
3483         pci_dev = PCI_DEVICE(dev);
3484         pci_dev->partially_hotplugged = true;
3485         hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3486         if (err) {
3487             error_report_err(err);
3488             return false;
3489         }
3490     } else {
3491         return false;
3492     }
3493     return true;
3494 }
3495 
3496 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3497                                     Error **errp)
3498 {
3499     Error *err = NULL;
3500     HotplugHandler *hotplug_ctrl;
3501     PCIDevice *pdev = PCI_DEVICE(dev);
3502     BusState *primary_bus;
3503 
3504     if (!pdev->partially_hotplugged) {
3505         return true;
3506     }
3507     primary_bus = dev->parent_bus;
3508     if (!primary_bus) {
3509         error_setg(errp, "virtio_net: couldn't find primary bus");
3510         return false;
3511     }
3512     qdev_set_parent_bus(dev, primary_bus, &error_abort);
3513     qatomic_set(&n->failover_primary_hidden, false);
3514     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3515     if (hotplug_ctrl) {
3516         hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3517         if (err) {
3518             goto out;
3519         }
3520         hotplug_handler_plug(hotplug_ctrl, dev, &err);
3521     }
3522     pdev->partially_hotplugged = false;
3523 
3524 out:
3525     error_propagate(errp, err);
3526     return !err;
3527 }
3528 
3529 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationEvent *e)
3530 {
3531     bool should_be_hidden;
3532     Error *err = NULL;
3533     DeviceState *dev = failover_find_primary_device(n);
3534 
3535     if (!dev) {
3536         return;
3537     }
3538 
3539     should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3540 
3541     if (e->type == MIG_EVENT_PRECOPY_SETUP && !should_be_hidden) {
3542         if (failover_unplug_primary(n, dev)) {
3543             vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3544             qapi_event_send_unplug_primary(dev->id);
3545             qatomic_set(&n->failover_primary_hidden, true);
3546         } else {
3547             warn_report("couldn't unplug primary device");
3548         }
3549     } else if (e->type == MIG_EVENT_PRECOPY_FAILED) {
3550         /* We already unplugged the device let's plug it back */
3551         if (!failover_replug_primary(n, dev, &err)) {
3552             if (err) {
3553                 error_report_err(err);
3554             }
3555         }
3556     }
3557 }
3558 
3559 static int virtio_net_migration_state_notifier(NotifierWithReturn *notifier,
3560                                                MigrationEvent *e, Error **errp)
3561 {
3562     VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3563     virtio_net_handle_migration_primary(n, e);
3564     return 0;
3565 }
3566 
3567 static bool failover_hide_primary_device(DeviceListener *listener,
3568                                          const QDict *device_opts,
3569                                          bool from_json,
3570                                          Error **errp)
3571 {
3572     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3573     const char *standby_id;
3574 
3575     if (!device_opts) {
3576         return false;
3577     }
3578 
3579     if (!qdict_haskey(device_opts, "failover_pair_id")) {
3580         return false;
3581     }
3582 
3583     if (!qdict_haskey(device_opts, "id")) {
3584         error_setg(errp, "Device with failover_pair_id needs to have id");
3585         return false;
3586     }
3587 
3588     standby_id = qdict_get_str(device_opts, "failover_pair_id");
3589     if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3590         return false;
3591     }
3592 
3593     /*
3594      * The hide helper can be called several times for a given device.
3595      * Check there is only one primary for a virtio-net device but
3596      * don't duplicate the qdict several times if it's called for the same
3597      * device.
3598      */
3599     if (n->primary_opts) {
3600         const char *old, *new;
3601         /* devices with failover_pair_id always have an id */
3602         old = qdict_get_str(n->primary_opts, "id");
3603         new = qdict_get_str(device_opts, "id");
3604         if (strcmp(old, new) != 0) {
3605             error_setg(errp, "Cannot attach more than one primary device to "
3606                        "'%s': '%s' and '%s'", n->netclient_name, old, new);
3607             return false;
3608         }
3609     } else {
3610         n->primary_opts = qdict_clone_shallow(device_opts);
3611         n->primary_opts_from_json = from_json;
3612     }
3613 
3614     /* failover_primary_hidden is set during feature negotiation */
3615     return qatomic_read(&n->failover_primary_hidden);
3616 }
3617 
3618 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3619 {
3620     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3621     VirtIONet *n = VIRTIO_NET(dev);
3622     NetClientState *nc;
3623     int i;
3624 
3625     if (n->net_conf.mtu) {
3626         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3627     }
3628 
3629     if (n->net_conf.duplex_str) {
3630         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3631             n->net_conf.duplex = DUPLEX_HALF;
3632         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3633             n->net_conf.duplex = DUPLEX_FULL;
3634         } else {
3635             error_setg(errp, "'duplex' must be 'half' or 'full'");
3636             return;
3637         }
3638         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3639     } else {
3640         n->net_conf.duplex = DUPLEX_UNKNOWN;
3641     }
3642 
3643     if (n->net_conf.speed < SPEED_UNKNOWN) {
3644         error_setg(errp, "'speed' must be between 0 and INT_MAX");
3645         return;
3646     }
3647     if (n->net_conf.speed >= 0) {
3648         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3649     }
3650 
3651     if (n->failover) {
3652         n->primary_listener.hide_device = failover_hide_primary_device;
3653         qatomic_set(&n->failover_primary_hidden, true);
3654         device_listener_register(&n->primary_listener);
3655         migration_add_notifier(&n->migration_state,
3656                                virtio_net_migration_state_notifier);
3657         n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3658     }
3659 
3660     virtio_net_set_config_size(n, n->host_features);
3661     virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
3662 
3663     /*
3664      * We set a lower limit on RX queue size to what it always was.
3665      * Guests that want a smaller ring can always resize it without
3666      * help from us (using virtio 1 and up).
3667      */
3668     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3669         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3670         !is_power_of_2(n->net_conf.rx_queue_size)) {
3671         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3672                    "must be a power of 2 between %d and %d.",
3673                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3674                    VIRTQUEUE_MAX_SIZE);
3675         virtio_cleanup(vdev);
3676         return;
3677     }
3678 
3679     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3680         n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) ||
3681         !is_power_of_2(n->net_conf.tx_queue_size)) {
3682         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3683                    "must be a power of 2 between %d and %d",
3684                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3685                    virtio_net_max_tx_queue_size(n));
3686         virtio_cleanup(vdev);
3687         return;
3688     }
3689 
3690     n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3691 
3692     /*
3693      * Figure out the datapath queue pairs since the backend could
3694      * provide control queue via peers as well.
3695      */
3696     if (n->nic_conf.peers.queues) {
3697         for (i = 0; i < n->max_ncs; i++) {
3698             if (n->nic_conf.peers.ncs[i]->is_datapath) {
3699                 ++n->max_queue_pairs;
3700             }
3701         }
3702     }
3703     n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3704 
3705     if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3706         error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3707                    "must be a positive integer less than %d.",
3708                    n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3709         virtio_cleanup(vdev);
3710         return;
3711     }
3712     n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3713     n->curr_queue_pairs = 1;
3714     n->tx_timeout = n->net_conf.txtimer;
3715 
3716     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3717                        && strcmp(n->net_conf.tx, "bh")) {
3718         warn_report("virtio-net: "
3719                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3720                     n->net_conf.tx);
3721         error_printf("Defaulting to \"bh\"");
3722     }
3723 
3724     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3725                                     n->net_conf.tx_queue_size);
3726 
3727     virtio_net_add_queue(n, 0);
3728 
3729     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3730     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3731     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3732     n->status = VIRTIO_NET_S_LINK_UP;
3733     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3734                               QEMU_CLOCK_VIRTUAL,
3735                               virtio_net_announce_timer, n);
3736     n->announce_timer.round = 0;
3737 
3738     if (n->netclient_type) {
3739         /*
3740          * Happen when virtio_net_set_netclient_name has been called.
3741          */
3742         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3743                               n->netclient_type, n->netclient_name,
3744                               &dev->mem_reentrancy_guard, n);
3745     } else {
3746         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3747                               object_get_typename(OBJECT(dev)), dev->id,
3748                               &dev->mem_reentrancy_guard, n);
3749     }
3750 
3751     for (i = 0; i < n->max_queue_pairs; i++) {
3752         n->nic->ncs[i].do_not_pad = true;
3753     }
3754 
3755     peer_test_vnet_hdr(n);
3756     if (peer_has_vnet_hdr(n)) {
3757         n->host_hdr_len = sizeof(struct virtio_net_hdr);
3758     } else {
3759         n->host_hdr_len = 0;
3760     }
3761 
3762     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3763 
3764     n->vqs[0].tx_waiting = 0;
3765     n->tx_burst = n->net_conf.txburst;
3766     virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3767     n->promisc = 1; /* for compatibility */
3768 
3769     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3770 
3771     n->vlans = g_malloc0(MAX_VLAN >> 3);
3772 
3773     nc = qemu_get_queue(n->nic);
3774     nc->rxfilter_notify_enabled = 1;
3775 
3776    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3777         struct virtio_net_config netcfg = {};
3778         memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3779         vhost_net_set_config(get_vhost_net(nc->peer),
3780             (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND);
3781     }
3782     QTAILQ_INIT(&n->rsc_chains);
3783     n->qdev = dev;
3784 
3785     net_rx_pkt_init(&n->rx_pkt);
3786 
3787     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3788         Error *err = NULL;
3789         if (!virtio_net_load_ebpf(n, &err)) {
3790             /*
3791              * If user explicitly gave QEMU RSS FDs to use, then
3792              * failing to use them must be considered a fatal
3793              * error. If no RSS FDs were provided, QEMU is trying
3794              * eBPF on a "best effort" basis only, so report a
3795              * warning and allow fallback to software RSS.
3796              */
3797             if (n->ebpf_rss_fds) {
3798                 error_propagate(errp, err);
3799             } else {
3800                 warn_report("unable to load eBPF RSS: %s",
3801                             error_get_pretty(err));
3802                 error_free(err);
3803             }
3804         }
3805     }
3806 }
3807 
3808 static void virtio_net_device_unrealize(DeviceState *dev)
3809 {
3810     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3811     VirtIONet *n = VIRTIO_NET(dev);
3812     int i, max_queue_pairs;
3813 
3814     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3815         virtio_net_unload_ebpf(n);
3816     }
3817 
3818     /* This will stop vhost backend if appropriate. */
3819     virtio_net_set_status(vdev, 0);
3820 
3821     g_free(n->netclient_name);
3822     n->netclient_name = NULL;
3823     g_free(n->netclient_type);
3824     n->netclient_type = NULL;
3825 
3826     g_free(n->mac_table.macs);
3827     g_free(n->vlans);
3828 
3829     if (n->failover) {
3830         qobject_unref(n->primary_opts);
3831         device_listener_unregister(&n->primary_listener);
3832         migration_remove_notifier(&n->migration_state);
3833     } else {
3834         assert(n->primary_opts == NULL);
3835     }
3836 
3837     max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3838     for (i = 0; i < max_queue_pairs; i++) {
3839         virtio_net_del_queue(n, i);
3840     }
3841     /* delete also control vq */
3842     virtio_del_queue(vdev, max_queue_pairs * 2);
3843     qemu_announce_timer_del(&n->announce_timer, false);
3844     g_free(n->vqs);
3845     qemu_del_nic(n->nic);
3846     virtio_net_rsc_cleanup(n);
3847     g_free(n->rss_data.indirections_table);
3848     net_rx_pkt_uninit(n->rx_pkt);
3849     virtio_cleanup(vdev);
3850 }
3851 
3852 static void virtio_net_reset(VirtIODevice *vdev)
3853 {
3854     VirtIONet *n = VIRTIO_NET(vdev);
3855     int i;
3856 
3857     /* Reset back to compatibility mode */
3858     n->promisc = 1;
3859     n->allmulti = 0;
3860     n->alluni = 0;
3861     n->nomulti = 0;
3862     n->nouni = 0;
3863     n->nobcast = 0;
3864     /* multiqueue is disabled by default */
3865     n->curr_queue_pairs = 1;
3866     timer_del(n->announce_timer.tm);
3867     n->announce_timer.round = 0;
3868     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
3869 
3870     /* Flush any MAC and VLAN filter table state */
3871     n->mac_table.in_use = 0;
3872     n->mac_table.first_multi = 0;
3873     n->mac_table.multi_overflow = 0;
3874     n->mac_table.uni_overflow = 0;
3875     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
3876     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
3877     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
3878     memset(n->vlans, 0, MAX_VLAN >> 3);
3879 
3880     /* Flush any async TX */
3881     for (i = 0;  i < n->max_queue_pairs; i++) {
3882         flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
3883     }
3884 
3885     virtio_net_disable_rss(n);
3886 }
3887 
3888 static void virtio_net_instance_init(Object *obj)
3889 {
3890     VirtIONet *n = VIRTIO_NET(obj);
3891 
3892     /*
3893      * The default config_size is sizeof(struct virtio_net_config).
3894      * Can be overridden with virtio_net_set_config_size.
3895      */
3896     n->config_size = sizeof(struct virtio_net_config);
3897     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3898                                   "bootindex", "/ethernet-phy@0",
3899                                   DEVICE(n));
3900 
3901     ebpf_rss_init(&n->ebpf_rss);
3902 }
3903 
3904 static int virtio_net_pre_save(void *opaque)
3905 {
3906     VirtIONet *n = opaque;
3907 
3908     /* At this point, backend must be stopped, otherwise
3909      * it might keep writing to memory. */
3910     assert(!n->vhost_started);
3911 
3912     return 0;
3913 }
3914 
3915 static bool primary_unplug_pending(void *opaque)
3916 {
3917     DeviceState *dev = opaque;
3918     DeviceState *primary;
3919     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3920     VirtIONet *n = VIRTIO_NET(vdev);
3921 
3922     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3923         return false;
3924     }
3925     primary = failover_find_primary_device(n);
3926     return primary ? primary->pending_deleted_event : false;
3927 }
3928 
3929 static bool dev_unplug_pending(void *opaque)
3930 {
3931     DeviceState *dev = opaque;
3932     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3933 
3934     return vdc->primary_unplug_pending(dev);
3935 }
3936 
3937 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3938 {
3939     VirtIONet *n = VIRTIO_NET(vdev);
3940     NetClientState *nc;
3941     struct vhost_net *net;
3942 
3943     if (!n->nic) {
3944         return NULL;
3945     }
3946 
3947     nc = qemu_get_queue(n->nic);
3948     if (!nc) {
3949         return NULL;
3950     }
3951 
3952     net = get_vhost_net(nc->peer);
3953     if (!net) {
3954         return NULL;
3955     }
3956 
3957     return &net->dev;
3958 }
3959 
3960 static const VMStateDescription vmstate_virtio_net = {
3961     .name = "virtio-net",
3962     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3963     .version_id = VIRTIO_NET_VM_VERSION,
3964     .fields = (const VMStateField[]) {
3965         VMSTATE_VIRTIO_DEVICE,
3966         VMSTATE_END_OF_LIST()
3967     },
3968     .pre_save = virtio_net_pre_save,
3969     .dev_unplug_pending = dev_unplug_pending,
3970 };
3971 
3972 static Property virtio_net_properties[] = {
3973     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3974                     VIRTIO_NET_F_CSUM, true),
3975     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3976                     VIRTIO_NET_F_GUEST_CSUM, true),
3977     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3978     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3979                     VIRTIO_NET_F_GUEST_TSO4, true),
3980     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3981                     VIRTIO_NET_F_GUEST_TSO6, true),
3982     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3983                     VIRTIO_NET_F_GUEST_ECN, true),
3984     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3985                     VIRTIO_NET_F_GUEST_UFO, true),
3986     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3987                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3988     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3989                     VIRTIO_NET_F_HOST_TSO4, true),
3990     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3991                     VIRTIO_NET_F_HOST_TSO6, true),
3992     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3993                     VIRTIO_NET_F_HOST_ECN, true),
3994     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3995                     VIRTIO_NET_F_HOST_UFO, true),
3996     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3997                     VIRTIO_NET_F_MRG_RXBUF, true),
3998     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3999                     VIRTIO_NET_F_STATUS, true),
4000     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
4001                     VIRTIO_NET_F_CTRL_VQ, true),
4002     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
4003                     VIRTIO_NET_F_CTRL_RX, true),
4004     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
4005                     VIRTIO_NET_F_CTRL_VLAN, true),
4006     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
4007                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
4008     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
4009                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
4010     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
4011                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
4012     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
4013     DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
4014                     VIRTIO_NET_F_RSS, false),
4015     DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
4016                     VIRTIO_NET_F_HASH_REPORT, false),
4017     DEFINE_PROP_ARRAY("ebpf-rss-fds", VirtIONet, nr_ebpf_rss_fds,
4018                       ebpf_rss_fds, qdev_prop_string, char*),
4019     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
4020                     VIRTIO_NET_F_RSC_EXT, false),
4021     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
4022                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
4023     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
4024     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
4025                        TX_TIMER_INTERVAL),
4026     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
4027     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
4028     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
4029                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
4030     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
4031                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
4032     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
4033     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
4034                      true),
4035     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
4036     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
4037     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
4038     DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features,
4039                       VIRTIO_NET_F_GUEST_USO4, true),
4040     DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features,
4041                       VIRTIO_NET_F_GUEST_USO6, true),
4042     DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
4043                       VIRTIO_NET_F_HOST_USO, true),
4044     DEFINE_PROP_END_OF_LIST(),
4045 };
4046 
4047 static void virtio_net_class_init(ObjectClass *klass, void *data)
4048 {
4049     DeviceClass *dc = DEVICE_CLASS(klass);
4050     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
4051 
4052     device_class_set_props(dc, virtio_net_properties);
4053     dc->vmsd = &vmstate_virtio_net;
4054     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
4055     vdc->realize = virtio_net_device_realize;
4056     vdc->unrealize = virtio_net_device_unrealize;
4057     vdc->get_config = virtio_net_get_config;
4058     vdc->set_config = virtio_net_set_config;
4059     vdc->get_features = virtio_net_get_features;
4060     vdc->set_features = virtio_net_set_features;
4061     vdc->bad_features = virtio_net_bad_features;
4062     vdc->reset = virtio_net_reset;
4063     vdc->queue_reset = virtio_net_queue_reset;
4064     vdc->queue_enable = virtio_net_queue_enable;
4065     vdc->set_status = virtio_net_set_status;
4066     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
4067     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
4068     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
4069     vdc->post_load = virtio_net_post_load_virtio;
4070     vdc->vmsd = &vmstate_virtio_net_device;
4071     vdc->primary_unplug_pending = primary_unplug_pending;
4072     vdc->get_vhost = virtio_net_get_vhost;
4073     vdc->toggle_device_iotlb = vhost_toggle_device_iotlb;
4074 }
4075 
4076 static const TypeInfo virtio_net_info = {
4077     .name = TYPE_VIRTIO_NET,
4078     .parent = TYPE_VIRTIO_DEVICE,
4079     .instance_size = sizeof(VirtIONet),
4080     .instance_init = virtio_net_instance_init,
4081     .class_init = virtio_net_class_init,
4082 };
4083 
4084 static void virtio_register_types(void)
4085 {
4086     type_register_static(&virtio_net_info);
4087 }
4088 
4089 type_init(virtio_register_types)
4090