xref: /openbmc/qemu/hw/net/virtio-net.c (revision 0d70c5aa1bbfb0f5099d53d6e084337a8246cc0c)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/log.h"
18 #include "qemu/main-loop.h"
19 #include "qemu/module.h"
20 #include "hw/virtio/virtio.h"
21 #include "net/net.h"
22 #include "net/checksum.h"
23 #include "net/tap.h"
24 #include "qemu/error-report.h"
25 #include "qemu/timer.h"
26 #include "qemu/option.h"
27 #include "qemu/option_int.h"
28 #include "qemu/config-file.h"
29 #include "qobject/qdict.h"
30 #include "hw/virtio/virtio-net.h"
31 #include "net/vhost_net.h"
32 #include "net/announce.h"
33 #include "hw/virtio/virtio-bus.h"
34 #include "qapi/error.h"
35 #include "qapi/qapi-events-net.h"
36 #include "hw/qdev-properties.h"
37 #include "qapi/qapi-types-migration.h"
38 #include "qapi/qapi-events-migration.h"
39 #include "hw/virtio/virtio-access.h"
40 #include "migration/misc.h"
41 #include "standard-headers/linux/ethtool.h"
42 #include "system/system.h"
43 #include "system/replay.h"
44 #include "trace.h"
45 #include "monitor/qdev.h"
46 #include "monitor/monitor.h"
47 #include "hw/pci/pci_device.h"
48 #include "net_rx_pkt.h"
49 #include "hw/virtio/vhost.h"
50 #include "system/qtest.h"
51 
52 #define VIRTIO_NET_VM_VERSION    11
53 
54 /* previously fixed value */
55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
57 
58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
61 
62 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
63 
64 #define VIRTIO_NET_TCP_FLAG         0x3F
65 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
66 
67 /* IPv4 max payload, 16 bits in the header */
68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
70 
71 /* header length value in ip header without option */
72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
73 
74 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
76 
77 /* Purge coalesced packets timer interval, This value affects the performance
78    a lot, and should be tuned carefully, '300000'(300us) is the recommended
79    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
80    tso/gso/gro 'off'. */
81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
82 
83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
84                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
85                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
86                                          VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
87                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
88                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
89                                          VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
90                                          VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
91                                          VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
92 
93 static const VirtIOFeature feature_sizes[] = {
94     {.flags = 1ULL << VIRTIO_NET_F_MAC,
95      .end = endof(struct virtio_net_config, mac)},
96     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
97      .end = endof(struct virtio_net_config, status)},
98     {.flags = 1ULL << VIRTIO_NET_F_MQ,
99      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
100     {.flags = 1ULL << VIRTIO_NET_F_MTU,
101      .end = endof(struct virtio_net_config, mtu)},
102     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
103      .end = endof(struct virtio_net_config, duplex)},
104     {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
105      .end = endof(struct virtio_net_config, supported_hash_types)},
106     {}
107 };
108 
109 static const VirtIOConfigSizeParams cfg_size_params = {
110     .min_size = endof(struct virtio_net_config, mac),
111     .max_size = sizeof(struct virtio_net_config),
112     .feature_sizes = feature_sizes
113 };
114 
115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
116 {
117     VirtIONet *n = qemu_get_nic_opaque(nc);
118 
119     return &n->vqs[nc->queue_index];
120 }
121 
122 static int vq2q(int queue_index)
123 {
124     return queue_index / 2;
125 }
126 
127 static void flush_or_purge_queued_packets(NetClientState *nc)
128 {
129     if (!nc->peer) {
130         return;
131     }
132 
133     qemu_flush_or_purge_queued_packets(nc->peer, true);
134     assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
135 }
136 
137 /* TODO
138  * - we could suppress RX interrupt if we were so inclined.
139  */
140 
141 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
142 {
143     VirtIONet *n = VIRTIO_NET(vdev);
144     struct virtio_net_config netcfg;
145     NetClientState *nc = qemu_get_queue(n->nic);
146     static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
147 
148     int ret = 0;
149     memset(&netcfg, 0 , sizeof(struct virtio_net_config));
150     virtio_stw_p(vdev, &netcfg.status, n->status);
151     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
152     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
153     memcpy(netcfg.mac, n->mac, ETH_ALEN);
154     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
155     netcfg.duplex = n->net_conf.duplex;
156     netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
157     virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
158                  virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
159                  VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
160     virtio_stl_p(vdev, &netcfg.supported_hash_types,
161                  n->rss_data.supported_hash_types);
162     memcpy(config, &netcfg, n->config_size);
163 
164     /*
165      * Is this VDPA? No peer means not VDPA: there's no way to
166      * disconnect/reconnect a VDPA peer.
167      */
168     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
169         ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
170                                    n->config_size);
171         if (ret == -1) {
172             return;
173         }
174 
175         /*
176          * Some NIC/kernel combinations present 0 as the mac address.  As that
177          * is not a legal address, try to proceed with the address from the
178          * QEMU command line in the hope that the address has been configured
179          * correctly elsewhere - just not reported by the device.
180          */
181         if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
182             info_report("Zero hardware mac address detected. Ignoring.");
183             memcpy(netcfg.mac, n->mac, ETH_ALEN);
184         }
185 
186         netcfg.status |= virtio_tswap16(vdev,
187                                         n->status & VIRTIO_NET_S_ANNOUNCE);
188         memcpy(config, &netcfg, n->config_size);
189     }
190 }
191 
192 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
193 {
194     VirtIONet *n = VIRTIO_NET(vdev);
195     struct virtio_net_config netcfg = {};
196     NetClientState *nc = qemu_get_queue(n->nic);
197 
198     memcpy(&netcfg, config, n->config_size);
199 
200     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
201         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
202         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
203         memcpy(n->mac, netcfg.mac, ETH_ALEN);
204         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
205     }
206 
207     /*
208      * Is this VDPA? No peer means not VDPA: there's no way to
209      * disconnect/reconnect a VDPA peer.
210      */
211     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
212         vhost_net_set_config(get_vhost_net(nc->peer),
213                              (uint8_t *)&netcfg, 0, n->config_size,
214                              VHOST_SET_CONFIG_TYPE_FRONTEND);
215       }
216 }
217 
218 static bool virtio_net_started(VirtIONet *n, uint8_t status)
219 {
220     VirtIODevice *vdev = VIRTIO_DEVICE(n);
221     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
222         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
223 }
224 
225 static void virtio_net_announce_notify(VirtIONet *net)
226 {
227     VirtIODevice *vdev = VIRTIO_DEVICE(net);
228     trace_virtio_net_announce_notify();
229 
230     net->status |= VIRTIO_NET_S_ANNOUNCE;
231     virtio_notify_config(vdev);
232 }
233 
234 static void virtio_net_announce_timer(void *opaque)
235 {
236     VirtIONet *n = opaque;
237     trace_virtio_net_announce_timer(n->announce_timer.round);
238 
239     n->announce_timer.round--;
240     virtio_net_announce_notify(n);
241 }
242 
243 static void virtio_net_announce(NetClientState *nc)
244 {
245     VirtIONet *n = qemu_get_nic_opaque(nc);
246     VirtIODevice *vdev = VIRTIO_DEVICE(n);
247 
248     /*
249      * Make sure the virtio migration announcement timer isn't running
250      * If it is, let it trigger announcement so that we do not cause
251      * confusion.
252      */
253     if (n->announce_timer.round) {
254         return;
255     }
256 
257     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
258         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
259             virtio_net_announce_notify(n);
260     }
261 }
262 
263 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
264 {
265     VirtIODevice *vdev = VIRTIO_DEVICE(n);
266     NetClientState *nc = qemu_get_queue(n->nic);
267     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
268     int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
269               n->max_ncs - n->max_queue_pairs : 0;
270 
271     if (!get_vhost_net(nc->peer)) {
272         return;
273     }
274 
275     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
276         !!n->vhost_started) {
277         return;
278     }
279     if (!n->vhost_started) {
280         int r, i;
281 
282         if (n->needs_vnet_hdr_swap) {
283             error_report("backend does not support %s vnet headers; "
284                          "falling back on userspace virtio",
285                          virtio_is_big_endian(vdev) ? "BE" : "LE");
286             return;
287         }
288 
289         /* Any packets outstanding? Purge them to avoid touching rings
290          * when vhost is running.
291          */
292         for (i = 0;  i < queue_pairs; i++) {
293             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
294 
295             /* Purge both directions: TX and RX. */
296             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
297             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
298         }
299 
300         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
301             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
302             if (r < 0) {
303                 error_report("%uBytes MTU not supported by the backend",
304                              n->net_conf.mtu);
305 
306                 return;
307             }
308         }
309 
310         n->vhost_started = 1;
311         r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
312         if (r < 0) {
313             error_report("unable to start vhost net: %d: "
314                          "falling back on userspace virtio", -r);
315             n->vhost_started = 0;
316         }
317     } else {
318         vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
319         n->vhost_started = 0;
320     }
321 }
322 
323 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
324                                           NetClientState *peer,
325                                           bool enable)
326 {
327     if (virtio_is_big_endian(vdev)) {
328         return qemu_set_vnet_be(peer, enable);
329     } else {
330         return qemu_set_vnet_le(peer, enable);
331     }
332 }
333 
334 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
335                                        int queue_pairs, bool enable)
336 {
337     int i;
338 
339     for (i = 0; i < queue_pairs; i++) {
340         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
341             enable) {
342             while (--i >= 0) {
343                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
344             }
345 
346             return true;
347         }
348     }
349 
350     return false;
351 }
352 
353 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
354 {
355     VirtIODevice *vdev = VIRTIO_DEVICE(n);
356     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
357 
358     if (virtio_net_started(n, status)) {
359         /* Before using the device, we tell the network backend about the
360          * endianness to use when parsing vnet headers. If the backend
361          * can't do it, we fallback onto fixing the headers in the core
362          * virtio-net code.
363          */
364         n->needs_vnet_hdr_swap = n->has_vnet_hdr &&
365                                  virtio_net_set_vnet_endian(vdev, n->nic->ncs,
366                                                             queue_pairs, true);
367     } else if (virtio_net_started(n, vdev->status)) {
368         /* After using the device, we need to reset the network backend to
369          * the default (guest native endianness), otherwise the guest may
370          * lose network connectivity if it is rebooted into a different
371          * endianness.
372          */
373         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
374     }
375 }
376 
377 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
378 {
379     unsigned int dropped = virtqueue_drop_all(vq);
380     if (dropped) {
381         virtio_notify(vdev, vq);
382     }
383 }
384 
385 static int virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
386 {
387     VirtIONet *n = VIRTIO_NET(vdev);
388     VirtIONetQueue *q;
389     int i;
390     uint8_t queue_status;
391 
392     virtio_net_vnet_endian_status(n, status);
393     virtio_net_vhost_status(n, status);
394 
395     for (i = 0; i < n->max_queue_pairs; i++) {
396         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
397         bool queue_started;
398         q = &n->vqs[i];
399 
400         if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
401             queue_status = 0;
402         } else {
403             queue_status = status;
404         }
405         queue_started =
406             virtio_net_started(n, queue_status) && !n->vhost_started;
407 
408         if (queue_started) {
409             qemu_flush_queued_packets(ncs);
410         }
411 
412         if (!q->tx_waiting) {
413             continue;
414         }
415 
416         if (queue_started) {
417             if (q->tx_timer) {
418                 timer_mod(q->tx_timer,
419                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
420             } else {
421                 replay_bh_schedule_event(q->tx_bh);
422             }
423         } else {
424             if (q->tx_timer) {
425                 timer_del(q->tx_timer);
426             } else {
427                 qemu_bh_cancel(q->tx_bh);
428             }
429             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
430                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
431                 vdev->vm_running) {
432                 /* if tx is waiting we are likely have some packets in tx queue
433                  * and disabled notification */
434                 q->tx_waiting = 0;
435                 virtio_queue_set_notification(q->tx_vq, 1);
436                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
437             }
438         }
439     }
440     return 0;
441 }
442 
443 static void virtio_net_set_link_status(NetClientState *nc)
444 {
445     VirtIONet *n = qemu_get_nic_opaque(nc);
446     VirtIODevice *vdev = VIRTIO_DEVICE(n);
447     uint16_t old_status = n->status;
448 
449     if (nc->link_down)
450         n->status &= ~VIRTIO_NET_S_LINK_UP;
451     else
452         n->status |= VIRTIO_NET_S_LINK_UP;
453 
454     if (n->status != old_status)
455         virtio_notify_config(vdev);
456 
457     virtio_net_set_status(vdev, vdev->status);
458 }
459 
460 static void rxfilter_notify(NetClientState *nc)
461 {
462     VirtIONet *n = qemu_get_nic_opaque(nc);
463 
464     if (nc->rxfilter_notify_enabled) {
465         char *path = object_get_canonical_path(OBJECT(n->qdev));
466         qapi_event_send_nic_rx_filter_changed(n->netclient_name, path);
467         g_free(path);
468 
469         /* disable event notification to avoid events flooding */
470         nc->rxfilter_notify_enabled = 0;
471     }
472 }
473 
474 static intList *get_vlan_table(VirtIONet *n)
475 {
476     intList *list;
477     int i, j;
478 
479     list = NULL;
480     for (i = 0; i < MAX_VLAN >> 5; i++) {
481         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
482             if (n->vlans[i] & (1U << j)) {
483                 QAPI_LIST_PREPEND(list, (i << 5) + j);
484             }
485         }
486     }
487 
488     return list;
489 }
490 
491 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
492 {
493     VirtIONet *n = qemu_get_nic_opaque(nc);
494     VirtIODevice *vdev = VIRTIO_DEVICE(n);
495     RxFilterInfo *info;
496     strList *str_list;
497     int i;
498 
499     info = g_malloc0(sizeof(*info));
500     info->name = g_strdup(nc->name);
501     info->promiscuous = n->promisc;
502 
503     if (n->nouni) {
504         info->unicast = RX_STATE_NONE;
505     } else if (n->alluni) {
506         info->unicast = RX_STATE_ALL;
507     } else {
508         info->unicast = RX_STATE_NORMAL;
509     }
510 
511     if (n->nomulti) {
512         info->multicast = RX_STATE_NONE;
513     } else if (n->allmulti) {
514         info->multicast = RX_STATE_ALL;
515     } else {
516         info->multicast = RX_STATE_NORMAL;
517     }
518 
519     info->broadcast_allowed = n->nobcast;
520     info->multicast_overflow = n->mac_table.multi_overflow;
521     info->unicast_overflow = n->mac_table.uni_overflow;
522 
523     info->main_mac = qemu_mac_strdup_printf(n->mac);
524 
525     str_list = NULL;
526     for (i = 0; i < n->mac_table.first_multi; i++) {
527         QAPI_LIST_PREPEND(str_list,
528                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
529     }
530     info->unicast_table = str_list;
531 
532     str_list = NULL;
533     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
534         QAPI_LIST_PREPEND(str_list,
535                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
536     }
537     info->multicast_table = str_list;
538     info->vlan_table = get_vlan_table(n);
539 
540     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
541         info->vlan = RX_STATE_ALL;
542     } else if (!info->vlan_table) {
543         info->vlan = RX_STATE_NONE;
544     } else {
545         info->vlan = RX_STATE_NORMAL;
546     }
547 
548     /* enable event notification after query */
549     nc->rxfilter_notify_enabled = 1;
550 
551     return info;
552 }
553 
554 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
555 {
556     VirtIONet *n = VIRTIO_NET(vdev);
557     NetClientState *nc;
558 
559     /* validate queue_index and skip for cvq */
560     if (queue_index >= n->max_queue_pairs * 2) {
561         return;
562     }
563 
564     nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
565 
566     if (!nc->peer) {
567         return;
568     }
569 
570     if (get_vhost_net(nc->peer) &&
571         nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
572         vhost_net_virtqueue_reset(vdev, nc, queue_index);
573     }
574 
575     flush_or_purge_queued_packets(nc);
576 }
577 
578 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
579 {
580     VirtIONet *n = VIRTIO_NET(vdev);
581     NetClientState *nc;
582     int r;
583 
584     /* validate queue_index and skip for cvq */
585     if (queue_index >= n->max_queue_pairs * 2) {
586         return;
587     }
588 
589     nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
590 
591     if (!nc->peer || !vdev->vhost_started) {
592         return;
593     }
594 
595     if (get_vhost_net(nc->peer) &&
596         nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
597         r = vhost_net_virtqueue_restart(vdev, nc, queue_index);
598         if (r < 0) {
599             error_report("unable to restart vhost net virtqueue: %d, "
600                             "when resetting the queue", queue_index);
601         }
602     }
603 }
604 
605 static void peer_test_vnet_hdr(VirtIONet *n)
606 {
607     NetClientState *nc = qemu_get_queue(n->nic);
608     if (!nc->peer) {
609         return;
610     }
611 
612     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
613 }
614 
615 static int peer_has_vnet_hdr(VirtIONet *n)
616 {
617     return n->has_vnet_hdr;
618 }
619 
620 static int peer_has_ufo(VirtIONet *n)
621 {
622     if (!peer_has_vnet_hdr(n))
623         return 0;
624 
625     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
626 
627     return n->has_ufo;
628 }
629 
630 static int peer_has_uso(VirtIONet *n)
631 {
632     if (!peer_has_vnet_hdr(n)) {
633         return 0;
634     }
635 
636     return qemu_has_uso(qemu_get_queue(n->nic)->peer);
637 }
638 
639 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
640                                        int version_1, int hash_report)
641 {
642     int i;
643     NetClientState *nc;
644 
645     n->mergeable_rx_bufs = mergeable_rx_bufs;
646 
647     if (version_1) {
648         n->guest_hdr_len = hash_report ?
649             sizeof(struct virtio_net_hdr_v1_hash) :
650             sizeof(struct virtio_net_hdr_mrg_rxbuf);
651         n->rss_data.populate_hash = !!hash_report;
652     } else {
653         n->guest_hdr_len = n->mergeable_rx_bufs ?
654             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
655             sizeof(struct virtio_net_hdr);
656         n->rss_data.populate_hash = false;
657     }
658 
659     for (i = 0; i < n->max_queue_pairs; i++) {
660         nc = qemu_get_subqueue(n->nic, i);
661 
662         if (peer_has_vnet_hdr(n) &&
663             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
664             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
665             n->host_hdr_len = n->guest_hdr_len;
666         }
667     }
668 }
669 
670 static int virtio_net_max_tx_queue_size(VirtIONet *n)
671 {
672     NetClientState *peer = n->nic_conf.peers.ncs[0];
673     struct vhost_net *net;
674 
675     if (!peer) {
676         goto default_value;
677     }
678 
679     net = get_vhost_net(peer);
680 
681     if (!net || !net->max_tx_queue_size) {
682         goto default_value;
683     }
684 
685     return net->max_tx_queue_size;
686 
687 default_value:
688     return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
689 }
690 
691 static int peer_attach(VirtIONet *n, int index)
692 {
693     NetClientState *nc = qemu_get_subqueue(n->nic, index);
694     struct vhost_net *net;
695 
696     if (!nc->peer) {
697         return 0;
698     }
699 
700     net = get_vhost_net(nc->peer);
701     if (net && net->is_vhost_user) {
702         vhost_net_set_vring_enable(nc->peer, 1);
703     }
704 
705     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
706         return 0;
707     }
708 
709     if (n->max_queue_pairs == 1) {
710         return 0;
711     }
712 
713     return tap_enable(nc->peer);
714 }
715 
716 static int peer_detach(VirtIONet *n, int index)
717 {
718     NetClientState *nc = qemu_get_subqueue(n->nic, index);
719     struct vhost_net *net;
720 
721     if (!nc->peer) {
722         return 0;
723     }
724 
725     net = get_vhost_net(nc->peer);
726     if (net && net->is_vhost_user) {
727         vhost_net_set_vring_enable(nc->peer, 0);
728     }
729 
730     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
731         return 0;
732     }
733 
734     return tap_disable(nc->peer);
735 }
736 
737 static void virtio_net_set_queue_pairs(VirtIONet *n)
738 {
739     int i;
740     int r;
741 
742     if (n->nic->peer_deleted) {
743         return;
744     }
745 
746     for (i = 0; i < n->max_queue_pairs; i++) {
747         if (i < n->curr_queue_pairs) {
748             r = peer_attach(n, i);
749             assert(!r);
750         } else {
751             r = peer_detach(n, i);
752             assert(!r);
753         }
754     }
755 }
756 
757 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
758 
759 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
760 {
761     uint64_t features = 0;
762 
763     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
764      * but also these: */
765     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
766     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
767     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
768     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
769     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
770 
771     return features;
772 }
773 
774 static void virtio_net_apply_guest_offloads(VirtIONet *n)
775 {
776     qemu_set_offload(qemu_get_queue(n->nic)->peer,
777             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
778             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
779             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
780             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
781             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
782             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
783             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)));
784 }
785 
786 static uint64_t virtio_net_guest_offloads_by_features(uint64_t features)
787 {
788     static const uint64_t guest_offloads_mask =
789         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
790         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
791         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
792         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
793         (1ULL << VIRTIO_NET_F_GUEST_UFO)  |
794         (1ULL << VIRTIO_NET_F_GUEST_USO4) |
795         (1ULL << VIRTIO_NET_F_GUEST_USO6);
796 
797     return guest_offloads_mask & features;
798 }
799 
800 uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n)
801 {
802     VirtIODevice *vdev = VIRTIO_DEVICE(n);
803     return virtio_net_guest_offloads_by_features(vdev->guest_features);
804 }
805 
806 typedef struct {
807     VirtIONet *n;
808     DeviceState *dev;
809 } FailoverDevice;
810 
811 /**
812  * Set the failover primary device
813  *
814  * @opaque: FailoverId to setup
815  * @opts: opts for device we are handling
816  * @errp: returns an error if this function fails
817  */
818 static int failover_set_primary(DeviceState *dev, void *opaque)
819 {
820     FailoverDevice *fdev = opaque;
821     PCIDevice *pci_dev = (PCIDevice *)
822         object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
823 
824     if (!pci_dev) {
825         return 0;
826     }
827 
828     if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
829         fdev->dev = dev;
830         return 1;
831     }
832 
833     return 0;
834 }
835 
836 /**
837  * Find the primary device for this failover virtio-net
838  *
839  * @n: VirtIONet device
840  * @errp: returns an error if this function fails
841  */
842 static DeviceState *failover_find_primary_device(VirtIONet *n)
843 {
844     FailoverDevice fdev = {
845         .n = n,
846     };
847 
848     qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
849                        NULL, NULL, &fdev);
850     return fdev.dev;
851 }
852 
853 static void failover_add_primary(VirtIONet *n, Error **errp)
854 {
855     Error *err = NULL;
856     DeviceState *dev = failover_find_primary_device(n);
857 
858     if (dev) {
859         return;
860     }
861 
862     if (!n->primary_opts) {
863         error_setg(errp, "Primary device not found");
864         error_append_hint(errp, "Virtio-net failover will not work. Make "
865                           "sure primary device has parameter"
866                           " failover_pair_id=%s\n", n->netclient_name);
867         return;
868     }
869 
870     dev = qdev_device_add_from_qdict(n->primary_opts,
871                                      n->primary_opts_from_json,
872                                      &err);
873     if (err) {
874         qobject_unref(n->primary_opts);
875         n->primary_opts = NULL;
876     } else {
877         object_unref(OBJECT(dev));
878     }
879     error_propagate(errp, err);
880 }
881 
882 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
883 {
884     VirtIONet *n = VIRTIO_NET(vdev);
885     Error *err = NULL;
886     int i;
887 
888     if (n->mtu_bypass_backend &&
889             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
890         features &= ~(1ULL << VIRTIO_NET_F_MTU);
891     }
892 
893     virtio_net_set_multiqueue(n,
894                               virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
895                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
896 
897     virtio_net_set_mrg_rx_bufs(n,
898                                virtio_has_feature(features,
899                                                   VIRTIO_NET_F_MRG_RXBUF),
900                                virtio_has_feature(features,
901                                                   VIRTIO_F_VERSION_1),
902                                virtio_has_feature(features,
903                                                   VIRTIO_NET_F_HASH_REPORT));
904 
905     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
906         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
907     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
908         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
909     n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
910 
911     if (n->has_vnet_hdr) {
912         n->curr_guest_offloads =
913             virtio_net_guest_offloads_by_features(features);
914         virtio_net_apply_guest_offloads(n);
915     }
916 
917     for (i = 0;  i < n->max_queue_pairs; i++) {
918         NetClientState *nc = qemu_get_subqueue(n->nic, i);
919 
920         if (!get_vhost_net(nc->peer)) {
921             continue;
922         }
923         vhost_net_ack_features(get_vhost_net(nc->peer), features);
924 
925         /*
926          * keep acked_features in NetVhostUserState up-to-date so it
927          * can't miss any features configured by guest virtio driver.
928          */
929         vhost_net_save_acked_features(nc->peer);
930     }
931 
932     if (!virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
933         memset(n->vlans, 0xff, MAX_VLAN >> 3);
934     }
935 
936     if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
937         qapi_event_send_failover_negotiated(n->netclient_name);
938         qatomic_set(&n->failover_primary_hidden, false);
939         failover_add_primary(n, &err);
940         if (err) {
941             if (!qtest_enabled()) {
942                 warn_report_err(err);
943             } else {
944                 error_free(err);
945             }
946         }
947     }
948 }
949 
950 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
951                                      struct iovec *iov, unsigned int iov_cnt)
952 {
953     uint8_t on;
954     size_t s;
955     NetClientState *nc = qemu_get_queue(n->nic);
956 
957     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
958     if (s != sizeof(on)) {
959         return VIRTIO_NET_ERR;
960     }
961 
962     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
963         n->promisc = on;
964     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
965         n->allmulti = on;
966     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
967         n->alluni = on;
968     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
969         n->nomulti = on;
970     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
971         n->nouni = on;
972     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
973         n->nobcast = on;
974     } else {
975         return VIRTIO_NET_ERR;
976     }
977 
978     rxfilter_notify(nc);
979 
980     return VIRTIO_NET_OK;
981 }
982 
983 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
984                                      struct iovec *iov, unsigned int iov_cnt)
985 {
986     VirtIODevice *vdev = VIRTIO_DEVICE(n);
987     uint64_t offloads;
988     size_t s;
989 
990     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
991         return VIRTIO_NET_ERR;
992     }
993 
994     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
995     if (s != sizeof(offloads)) {
996         return VIRTIO_NET_ERR;
997     }
998 
999     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1000         uint64_t supported_offloads;
1001 
1002         offloads = virtio_ldq_p(vdev, &offloads);
1003 
1004         if (!n->has_vnet_hdr) {
1005             return VIRTIO_NET_ERR;
1006         }
1007 
1008         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1009             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1010         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1011             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1012         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1013 
1014         supported_offloads = virtio_net_supported_guest_offloads(n);
1015         if (offloads & ~supported_offloads) {
1016             return VIRTIO_NET_ERR;
1017         }
1018 
1019         n->curr_guest_offloads = offloads;
1020         virtio_net_apply_guest_offloads(n);
1021 
1022         return VIRTIO_NET_OK;
1023     } else {
1024         return VIRTIO_NET_ERR;
1025     }
1026 }
1027 
1028 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1029                                  struct iovec *iov, unsigned int iov_cnt)
1030 {
1031     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1032     struct virtio_net_ctrl_mac mac_data;
1033     size_t s;
1034     NetClientState *nc = qemu_get_queue(n->nic);
1035 
1036     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1037         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1038             return VIRTIO_NET_ERR;
1039         }
1040         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1041         assert(s == sizeof(n->mac));
1042         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1043         rxfilter_notify(nc);
1044 
1045         return VIRTIO_NET_OK;
1046     }
1047 
1048     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1049         return VIRTIO_NET_ERR;
1050     }
1051 
1052     int in_use = 0;
1053     int first_multi = 0;
1054     uint8_t uni_overflow = 0;
1055     uint8_t multi_overflow = 0;
1056     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1057 
1058     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1059                    sizeof(mac_data.entries));
1060     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1061     if (s != sizeof(mac_data.entries)) {
1062         goto error;
1063     }
1064     iov_discard_front(&iov, &iov_cnt, s);
1065 
1066     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1067         goto error;
1068     }
1069 
1070     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1071         s = iov_to_buf(iov, iov_cnt, 0, macs,
1072                        mac_data.entries * ETH_ALEN);
1073         if (s != mac_data.entries * ETH_ALEN) {
1074             goto error;
1075         }
1076         in_use += mac_data.entries;
1077     } else {
1078         uni_overflow = 1;
1079     }
1080 
1081     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1082 
1083     first_multi = in_use;
1084 
1085     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1086                    sizeof(mac_data.entries));
1087     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1088     if (s != sizeof(mac_data.entries)) {
1089         goto error;
1090     }
1091 
1092     iov_discard_front(&iov, &iov_cnt, s);
1093 
1094     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1095         goto error;
1096     }
1097 
1098     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1099         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1100                        mac_data.entries * ETH_ALEN);
1101         if (s != mac_data.entries * ETH_ALEN) {
1102             goto error;
1103         }
1104         in_use += mac_data.entries;
1105     } else {
1106         multi_overflow = 1;
1107     }
1108 
1109     n->mac_table.in_use = in_use;
1110     n->mac_table.first_multi = first_multi;
1111     n->mac_table.uni_overflow = uni_overflow;
1112     n->mac_table.multi_overflow = multi_overflow;
1113     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1114     g_free(macs);
1115     rxfilter_notify(nc);
1116 
1117     return VIRTIO_NET_OK;
1118 
1119 error:
1120     g_free(macs);
1121     return VIRTIO_NET_ERR;
1122 }
1123 
1124 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1125                                         struct iovec *iov, unsigned int iov_cnt)
1126 {
1127     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1128     uint16_t vid;
1129     size_t s;
1130     NetClientState *nc = qemu_get_queue(n->nic);
1131 
1132     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1133     vid = virtio_lduw_p(vdev, &vid);
1134     if (s != sizeof(vid)) {
1135         return VIRTIO_NET_ERR;
1136     }
1137 
1138     if (vid >= MAX_VLAN)
1139         return VIRTIO_NET_ERR;
1140 
1141     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1142         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1143     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1144         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1145     else
1146         return VIRTIO_NET_ERR;
1147 
1148     rxfilter_notify(nc);
1149 
1150     return VIRTIO_NET_OK;
1151 }
1152 
1153 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1154                                       struct iovec *iov, unsigned int iov_cnt)
1155 {
1156     trace_virtio_net_handle_announce(n->announce_timer.round);
1157     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1158         n->status & VIRTIO_NET_S_ANNOUNCE) {
1159         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1160         if (n->announce_timer.round) {
1161             qemu_announce_timer_step(&n->announce_timer);
1162         }
1163         return VIRTIO_NET_OK;
1164     } else {
1165         return VIRTIO_NET_ERR;
1166     }
1167 }
1168 
1169 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1170 {
1171     NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1172     if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1173         return false;
1174     }
1175 
1176     trace_virtio_net_rss_attach_ebpf(nic, prog_fd);
1177     return nc->info->set_steering_ebpf(nc, prog_fd);
1178 }
1179 
1180 static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1181                                    struct EBPFRSSConfig *config)
1182 {
1183     config->redirect = data->redirect;
1184     config->populate_hash = data->populate_hash;
1185     config->hash_types = data->runtime_hash_types;
1186     config->indirections_len = data->indirections_len;
1187     config->default_queue = data->default_queue;
1188 }
1189 
1190 static bool virtio_net_attach_ebpf_rss(VirtIONet *n)
1191 {
1192     struct EBPFRSSConfig config = {};
1193 
1194     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1195         return false;
1196     }
1197 
1198     rss_data_to_rss_config(&n->rss_data, &config);
1199 
1200     if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1201                           n->rss_data.indirections_table, n->rss_data.key,
1202                           NULL)) {
1203         return false;
1204     }
1205 
1206     if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1207         return false;
1208     }
1209 
1210     return true;
1211 }
1212 
1213 static void virtio_net_detach_ebpf_rss(VirtIONet *n)
1214 {
1215     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1216 }
1217 
1218 static void virtio_net_commit_rss_config(VirtIONet *n)
1219 {
1220     if (n->rss_data.peer_hash_available) {
1221         return;
1222     }
1223 
1224     if (n->rss_data.enabled) {
1225         n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
1226         if (n->rss_data.populate_hash) {
1227             virtio_net_detach_ebpf_rss(n);
1228         } else if (!virtio_net_attach_ebpf_rss(n)) {
1229             if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1230                 warn_report("Can't load eBPF RSS for vhost");
1231             } else {
1232                 warn_report("Can't load eBPF RSS - fallback to software RSS");
1233                 n->rss_data.enabled_software_rss = true;
1234             }
1235         }
1236 
1237         trace_virtio_net_rss_enable(n,
1238                                     n->rss_data.runtime_hash_types,
1239                                     n->rss_data.indirections_len,
1240                                     sizeof(n->rss_data.key));
1241     } else {
1242         virtio_net_detach_ebpf_rss(n);
1243         trace_virtio_net_rss_disable(n);
1244     }
1245 }
1246 
1247 static void virtio_net_disable_rss(VirtIONet *n)
1248 {
1249     if (!n->rss_data.enabled) {
1250         return;
1251     }
1252 
1253     n->rss_data.enabled = false;
1254     virtio_net_commit_rss_config(n);
1255 }
1256 
1257 static bool virtio_net_load_ebpf_fds(VirtIONet *n, Error **errp)
1258 {
1259     int fds[EBPF_RSS_MAX_FDS] = { [0 ... EBPF_RSS_MAX_FDS - 1] = -1};
1260     int ret = true;
1261     int i = 0;
1262 
1263     if (n->nr_ebpf_rss_fds != EBPF_RSS_MAX_FDS) {
1264         error_setg(errp, "Expected %d file descriptors but got %d",
1265                    EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds);
1266         return false;
1267     }
1268 
1269     for (i = 0; i < n->nr_ebpf_rss_fds; i++) {
1270         fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i], errp);
1271         if (fds[i] < 0) {
1272             ret = false;
1273             goto exit;
1274         }
1275     }
1276 
1277     ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3], errp);
1278 
1279 exit:
1280     if (!ret) {
1281         for (i = 0; i < n->nr_ebpf_rss_fds && fds[i] != -1; i++) {
1282             close(fds[i]);
1283         }
1284     }
1285 
1286     return ret;
1287 }
1288 
1289 static bool virtio_net_load_ebpf(VirtIONet *n, Error **errp)
1290 {
1291     if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1292         return true;
1293     }
1294 
1295     trace_virtio_net_rss_load(n, n->nr_ebpf_rss_fds, n->ebpf_rss_fds);
1296 
1297     /*
1298      * If user explicitly gave QEMU RSS FDs to use, then
1299      * failing to use them must be considered a fatal
1300      * error. If no RSS FDs were provided, QEMU is trying
1301      * eBPF on a "best effort" basis only, so report a
1302      * warning and allow fallback to software RSS.
1303      */
1304     if (n->ebpf_rss_fds) {
1305         return virtio_net_load_ebpf_fds(n, errp);
1306     }
1307 
1308     ebpf_rss_load(&n->ebpf_rss, &error_warn);
1309     return true;
1310 }
1311 
1312 static void virtio_net_unload_ebpf(VirtIONet *n)
1313 {
1314     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1315     ebpf_rss_unload(&n->ebpf_rss);
1316 }
1317 
1318 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1319                                       struct iovec *iov,
1320                                       unsigned int iov_cnt,
1321                                       bool do_rss)
1322 {
1323     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1324     struct virtio_net_rss_config cfg;
1325     size_t s, offset = 0, size_get;
1326     uint16_t queue_pairs, i;
1327     struct {
1328         uint16_t us;
1329         uint8_t b;
1330     } QEMU_PACKED temp;
1331     const char *err_msg = "";
1332     uint32_t err_value = 0;
1333 
1334     if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1335         err_msg = "RSS is not negotiated";
1336         goto error;
1337     }
1338     if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1339         err_msg = "Hash report is not negotiated";
1340         goto error;
1341     }
1342     size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1343     s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1344     if (s != size_get) {
1345         err_msg = "Short command buffer";
1346         err_value = (uint32_t)s;
1347         goto error;
1348     }
1349     n->rss_data.runtime_hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1350     n->rss_data.indirections_len =
1351         virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1352     if (!do_rss) {
1353         n->rss_data.indirections_len = 0;
1354     }
1355     if (n->rss_data.indirections_len >= VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1356         err_msg = "Too large indirection table";
1357         err_value = n->rss_data.indirections_len;
1358         goto error;
1359     }
1360     n->rss_data.indirections_len++;
1361     if (!is_power_of_2(n->rss_data.indirections_len)) {
1362         err_msg = "Invalid size of indirection table";
1363         err_value = n->rss_data.indirections_len;
1364         goto error;
1365     }
1366     n->rss_data.default_queue = do_rss ?
1367         virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1368     if (n->rss_data.default_queue >= n->max_queue_pairs) {
1369         err_msg = "Invalid default queue";
1370         err_value = n->rss_data.default_queue;
1371         goto error;
1372     }
1373     offset += size_get;
1374     size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1375     g_free(n->rss_data.indirections_table);
1376     n->rss_data.indirections_table = g_malloc(size_get);
1377     if (!n->rss_data.indirections_table) {
1378         err_msg = "Can't allocate indirections table";
1379         err_value = n->rss_data.indirections_len;
1380         goto error;
1381     }
1382     s = iov_to_buf(iov, iov_cnt, offset,
1383                    n->rss_data.indirections_table, size_get);
1384     if (s != size_get) {
1385         err_msg = "Short indirection table buffer";
1386         err_value = (uint32_t)s;
1387         goto error;
1388     }
1389     for (i = 0; i < n->rss_data.indirections_len; ++i) {
1390         uint16_t val = n->rss_data.indirections_table[i];
1391         n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1392     }
1393     offset += size_get;
1394     size_get = sizeof(temp);
1395     s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1396     if (s != size_get) {
1397         err_msg = "Can't get queue_pairs";
1398         err_value = (uint32_t)s;
1399         goto error;
1400     }
1401     queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1402     if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1403         err_msg = "Invalid number of queue_pairs";
1404         err_value = queue_pairs;
1405         goto error;
1406     }
1407     if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1408         err_msg = "Invalid key size";
1409         err_value = temp.b;
1410         goto error;
1411     }
1412     if (!temp.b && n->rss_data.runtime_hash_types) {
1413         err_msg = "No key provided";
1414         err_value = 0;
1415         goto error;
1416     }
1417     if (!temp.b && !n->rss_data.runtime_hash_types) {
1418         virtio_net_disable_rss(n);
1419         return queue_pairs;
1420     }
1421     offset += size_get;
1422     size_get = temp.b;
1423     s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1424     if (s != size_get) {
1425         err_msg = "Can get key buffer";
1426         err_value = (uint32_t)s;
1427         goto error;
1428     }
1429     n->rss_data.enabled = true;
1430     virtio_net_commit_rss_config(n);
1431     return queue_pairs;
1432 error:
1433     trace_virtio_net_rss_error(n, err_msg, err_value);
1434     virtio_net_disable_rss(n);
1435     return 0;
1436 }
1437 
1438 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1439                                 struct iovec *iov, unsigned int iov_cnt)
1440 {
1441     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1442     uint16_t queue_pairs;
1443     NetClientState *nc = qemu_get_queue(n->nic);
1444 
1445     virtio_net_disable_rss(n);
1446     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1447         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1448         return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1449     }
1450     if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1451         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1452     } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1453         struct virtio_net_ctrl_mq mq;
1454         size_t s;
1455         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1456             return VIRTIO_NET_ERR;
1457         }
1458         s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1459         if (s != sizeof(mq)) {
1460             return VIRTIO_NET_ERR;
1461         }
1462         queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1463 
1464     } else {
1465         return VIRTIO_NET_ERR;
1466     }
1467 
1468     if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1469         queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1470         queue_pairs > n->max_queue_pairs ||
1471         !n->multiqueue) {
1472         return VIRTIO_NET_ERR;
1473     }
1474 
1475     n->curr_queue_pairs = queue_pairs;
1476     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
1477         /*
1478          * Avoid updating the backend for a vdpa device: We're only interested
1479          * in updating the device model queues.
1480          */
1481         return VIRTIO_NET_OK;
1482     }
1483     /* stop the backend before changing the number of queue_pairs to avoid handling a
1484      * disabled queue */
1485     virtio_net_set_status(vdev, vdev->status);
1486     virtio_net_set_queue_pairs(n);
1487 
1488     return VIRTIO_NET_OK;
1489 }
1490 
1491 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1492                                   const struct iovec *in_sg, unsigned in_num,
1493                                   const struct iovec *out_sg,
1494                                   unsigned out_num)
1495 {
1496     VirtIONet *n = VIRTIO_NET(vdev);
1497     struct virtio_net_ctrl_hdr ctrl;
1498     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1499     size_t s;
1500     struct iovec *iov, *iov2;
1501 
1502     if (iov_size(in_sg, in_num) < sizeof(status) ||
1503         iov_size(out_sg, out_num) < sizeof(ctrl)) {
1504         virtio_error(vdev, "virtio-net ctrl missing headers");
1505         return 0;
1506     }
1507 
1508     iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1509     s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1510     iov_discard_front(&iov, &out_num, sizeof(ctrl));
1511     if (s != sizeof(ctrl)) {
1512         status = VIRTIO_NET_ERR;
1513     } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1514         status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1515     } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1516         status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1517     } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1518         status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1519     } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1520         status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1521     } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1522         status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1523     } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1524         status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1525     }
1526 
1527     s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1528     assert(s == sizeof(status));
1529 
1530     g_free(iov2);
1531     return sizeof(status);
1532 }
1533 
1534 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1535 {
1536     VirtQueueElement *elem;
1537 
1538     for (;;) {
1539         size_t written;
1540         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1541         if (!elem) {
1542             break;
1543         }
1544 
1545         written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1546                                              elem->out_sg, elem->out_num);
1547         if (written > 0) {
1548             virtqueue_push(vq, elem, written);
1549             virtio_notify(vdev, vq);
1550             g_free(elem);
1551         } else {
1552             virtqueue_detach_element(vq, elem, 0);
1553             g_free(elem);
1554             break;
1555         }
1556     }
1557 }
1558 
1559 /* RX */
1560 
1561 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1562 {
1563     VirtIONet *n = VIRTIO_NET(vdev);
1564     int queue_index = vq2q(virtio_get_queue_index(vq));
1565 
1566     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1567 }
1568 
1569 static bool virtio_net_can_receive(NetClientState *nc)
1570 {
1571     VirtIONet *n = qemu_get_nic_opaque(nc);
1572     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1573     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1574 
1575     if (!vdev->vm_running) {
1576         return false;
1577     }
1578 
1579     if (nc->queue_index >= n->curr_queue_pairs) {
1580         return false;
1581     }
1582 
1583     if (!virtio_queue_ready(q->rx_vq) ||
1584         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1585         return false;
1586     }
1587 
1588     return true;
1589 }
1590 
1591 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1592 {
1593     int opaque;
1594     unsigned int in_bytes;
1595     VirtIONet *n = q->n;
1596 
1597     while (virtio_queue_empty(q->rx_vq) || n->mergeable_rx_bufs) {
1598         opaque = virtqueue_get_avail_bytes(q->rx_vq, &in_bytes, NULL,
1599                                            bufsize, 0);
1600         /* Buffer is enough, disable notifiaction */
1601         if (bufsize <= in_bytes) {
1602             break;
1603         }
1604 
1605         if (virtio_queue_enable_notification_and_check(q->rx_vq, opaque)) {
1606             /* Guest has added some buffers, try again */
1607             continue;
1608         } else {
1609             return 0;
1610         }
1611     }
1612 
1613     virtio_queue_set_notification(q->rx_vq, 0);
1614 
1615     return 1;
1616 }
1617 
1618 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1619 {
1620     virtio_tswap16s(vdev, &hdr->hdr_len);
1621     virtio_tswap16s(vdev, &hdr->gso_size);
1622     virtio_tswap16s(vdev, &hdr->csum_start);
1623     virtio_tswap16s(vdev, &hdr->csum_offset);
1624 }
1625 
1626 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1627  * it never finds out that the packets don't have valid checksums.  This
1628  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1629  * fix this with Xen but it hasn't appeared in an upstream release of
1630  * dhclient yet.
1631  *
1632  * To avoid breaking existing guests, we catch udp packets and add
1633  * checksums.  This is terrible but it's better than hacking the guest
1634  * kernels.
1635  *
1636  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1637  * we should provide a mechanism to disable it to avoid polluting the host
1638  * cache.
1639  */
1640 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1641                                         uint8_t *buf, size_t size)
1642 {
1643     size_t csum_size = ETH_HLEN + sizeof(struct ip_header) +
1644                        sizeof(struct udp_header);
1645 
1646     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1647         (size >= csum_size && size < 1500) && /* normal sized MTU */
1648         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1649         (buf[23] == 17) && /* ip.protocol == UDP */
1650         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1651         net_checksum_calculate(buf, size, CSUM_UDP);
1652         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1653     }
1654 }
1655 
1656 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1657                            const void *buf, size_t size)
1658 {
1659     if (n->has_vnet_hdr) {
1660         /* FIXME this cast is evil */
1661         void *wbuf = (void *)buf;
1662         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1663                                     size - n->host_hdr_len);
1664 
1665         if (n->needs_vnet_hdr_swap) {
1666             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1667         }
1668         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1669     } else {
1670         struct virtio_net_hdr hdr = {
1671             .flags = 0,
1672             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1673         };
1674         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1675     }
1676 }
1677 
1678 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1679 {
1680     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1681     static const uint8_t vlan[] = {0x81, 0x00};
1682     uint8_t *ptr = (uint8_t *)buf;
1683     int i;
1684 
1685     if (n->promisc)
1686         return 1;
1687 
1688     ptr += n->host_hdr_len;
1689 
1690     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1691         int vid = lduw_be_p(ptr + 14) & 0xfff;
1692         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1693             return 0;
1694     }
1695 
1696     if (ptr[0] & 1) { // multicast
1697         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1698             return !n->nobcast;
1699         } else if (n->nomulti) {
1700             return 0;
1701         } else if (n->allmulti || n->mac_table.multi_overflow) {
1702             return 1;
1703         }
1704 
1705         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1706             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1707                 return 1;
1708             }
1709         }
1710     } else { // unicast
1711         if (n->nouni) {
1712             return 0;
1713         } else if (n->alluni || n->mac_table.uni_overflow) {
1714             return 1;
1715         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1716             return 1;
1717         }
1718 
1719         for (i = 0; i < n->mac_table.first_multi; i++) {
1720             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1721                 return 1;
1722             }
1723         }
1724     }
1725 
1726     return 0;
1727 }
1728 
1729 static uint8_t virtio_net_get_hash_type(bool hasip4,
1730                                         bool hasip6,
1731                                         EthL4HdrProto l4hdr_proto,
1732                                         uint32_t types)
1733 {
1734     if (hasip4) {
1735         switch (l4hdr_proto) {
1736         case ETH_L4_HDR_PROTO_TCP:
1737             if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
1738                 return NetPktRssIpV4Tcp;
1739             }
1740             break;
1741 
1742         case ETH_L4_HDR_PROTO_UDP:
1743             if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
1744                 return NetPktRssIpV4Udp;
1745             }
1746             break;
1747 
1748         default:
1749             break;
1750         }
1751 
1752         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1753             return NetPktRssIpV4;
1754         }
1755     } else if (hasip6) {
1756         switch (l4hdr_proto) {
1757         case ETH_L4_HDR_PROTO_TCP:
1758             if (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) {
1759                 return NetPktRssIpV6TcpEx;
1760             }
1761             if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
1762                 return NetPktRssIpV6Tcp;
1763             }
1764             break;
1765 
1766         case ETH_L4_HDR_PROTO_UDP:
1767             if (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) {
1768                 return NetPktRssIpV6UdpEx;
1769             }
1770             if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
1771                 return NetPktRssIpV6Udp;
1772             }
1773             break;
1774 
1775         default:
1776             break;
1777         }
1778 
1779         if (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) {
1780             return NetPktRssIpV6Ex;
1781         }
1782         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
1783             return NetPktRssIpV6;
1784         }
1785     }
1786     return 0xff;
1787 }
1788 
1789 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1790                                   size_t size,
1791                                   struct virtio_net_hdr_v1_hash *hdr)
1792 {
1793     VirtIONet *n = qemu_get_nic_opaque(nc);
1794     unsigned int index = nc->queue_index, new_index = index;
1795     struct NetRxPkt *pkt = n->rx_pkt;
1796     uint8_t net_hash_type;
1797     uint32_t hash;
1798     bool hasip4, hasip6;
1799     EthL4HdrProto l4hdr_proto;
1800     static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1801         VIRTIO_NET_HASH_REPORT_IPv4,
1802         VIRTIO_NET_HASH_REPORT_TCPv4,
1803         VIRTIO_NET_HASH_REPORT_TCPv6,
1804         VIRTIO_NET_HASH_REPORT_IPv6,
1805         VIRTIO_NET_HASH_REPORT_IPv6_EX,
1806         VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1807         VIRTIO_NET_HASH_REPORT_UDPv4,
1808         VIRTIO_NET_HASH_REPORT_UDPv6,
1809         VIRTIO_NET_HASH_REPORT_UDPv6_EX
1810     };
1811     struct iovec iov = {
1812         .iov_base = (void *)buf,
1813         .iov_len = size
1814     };
1815 
1816     net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len);
1817     net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
1818     net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto,
1819                                              n->rss_data.runtime_hash_types);
1820     if (net_hash_type > NetPktRssIpV6UdpEx) {
1821         if (n->rss_data.populate_hash) {
1822             hdr->hash_value = VIRTIO_NET_HASH_REPORT_NONE;
1823             hdr->hash_report = 0;
1824         }
1825         return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1826     }
1827 
1828     hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1829 
1830     if (n->rss_data.populate_hash) {
1831         hdr->hash_value = hash;
1832         hdr->hash_report = reports[net_hash_type];
1833     }
1834 
1835     if (n->rss_data.redirect) {
1836         new_index = hash & (n->rss_data.indirections_len - 1);
1837         new_index = n->rss_data.indirections_table[new_index];
1838     }
1839 
1840     return (index == new_index) ? -1 : new_index;
1841 }
1842 
1843 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1844                                       size_t size)
1845 {
1846     VirtIONet *n = qemu_get_nic_opaque(nc);
1847     VirtIONetQueue *q;
1848     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1849     QEMU_UNINITIALIZED VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1850     QEMU_UNINITIALIZED size_t lens[VIRTQUEUE_MAX_SIZE];
1851     QEMU_UNINITIALIZED struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1852     struct virtio_net_hdr_v1_hash extra_hdr;
1853     unsigned mhdr_cnt = 0;
1854     size_t offset, i, guest_offset, j;
1855     ssize_t err;
1856 
1857     memset(&extra_hdr, 0, sizeof(extra_hdr));
1858 
1859     if (n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1860         int index = virtio_net_process_rss(nc, buf, size, &extra_hdr);
1861         if (index >= 0) {
1862             nc = qemu_get_subqueue(n->nic, index % n->curr_queue_pairs);
1863         }
1864     }
1865 
1866     if (!virtio_net_can_receive(nc)) {
1867         return -1;
1868     }
1869 
1870     q = virtio_net_get_subqueue(nc);
1871 
1872     /* hdr_len refers to the header we supply to the guest */
1873     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1874         return 0;
1875     }
1876 
1877     if (!receive_filter(n, buf, size))
1878         return size;
1879 
1880     offset = i = 0;
1881 
1882     while (offset < size) {
1883         VirtQueueElement *elem;
1884         int len, total;
1885         const struct iovec *sg;
1886 
1887         total = 0;
1888 
1889         if (i == VIRTQUEUE_MAX_SIZE) {
1890             virtio_error(vdev, "virtio-net unexpected long buffer chain");
1891             err = size;
1892             goto err;
1893         }
1894 
1895         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1896         if (!elem) {
1897             if (i) {
1898                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1899                              "i %zd mergeable %d offset %zd, size %zd, "
1900                              "guest hdr len %zd, host hdr len %zd "
1901                              "guest features 0x%" PRIx64,
1902                              i, n->mergeable_rx_bufs, offset, size,
1903                              n->guest_hdr_len, n->host_hdr_len,
1904                              vdev->guest_features);
1905             }
1906             err = -1;
1907             goto err;
1908         }
1909 
1910         if (elem->in_num < 1) {
1911             virtio_error(vdev,
1912                          "virtio-net receive queue contains no in buffers");
1913             virtqueue_detach_element(q->rx_vq, elem, 0);
1914             g_free(elem);
1915             err = -1;
1916             goto err;
1917         }
1918 
1919         sg = elem->in_sg;
1920         if (i == 0) {
1921             assert(offset == 0);
1922             if (n->mergeable_rx_bufs) {
1923                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1924                                     sg, elem->in_num,
1925                                     offsetof(typeof(extra_hdr), hdr.num_buffers),
1926                                     sizeof(extra_hdr.hdr.num_buffers));
1927             } else {
1928                 extra_hdr.hdr.num_buffers = cpu_to_le16(1);
1929             }
1930 
1931             receive_header(n, sg, elem->in_num, buf, size);
1932             if (n->rss_data.populate_hash) {
1933                 offset = offsetof(typeof(extra_hdr), hash_value);
1934                 iov_from_buf(sg, elem->in_num, offset,
1935                              (char *)&extra_hdr + offset,
1936                              sizeof(extra_hdr.hash_value) +
1937                              sizeof(extra_hdr.hash_report));
1938             }
1939             offset = n->host_hdr_len;
1940             total += n->guest_hdr_len;
1941             guest_offset = n->guest_hdr_len;
1942         } else {
1943             guest_offset = 0;
1944         }
1945 
1946         /* copy in packet.  ugh */
1947         len = iov_from_buf(sg, elem->in_num, guest_offset,
1948                            buf + offset, size - offset);
1949         total += len;
1950         offset += len;
1951         /* If buffers can't be merged, at this point we
1952          * must have consumed the complete packet.
1953          * Otherwise, drop it. */
1954         if (!n->mergeable_rx_bufs && offset < size) {
1955             virtqueue_unpop(q->rx_vq, elem, total);
1956             g_free(elem);
1957             err = size;
1958             goto err;
1959         }
1960 
1961         elems[i] = elem;
1962         lens[i] = total;
1963         i++;
1964     }
1965 
1966     if (mhdr_cnt) {
1967         virtio_stw_p(vdev, &extra_hdr.hdr.num_buffers, i);
1968         iov_from_buf(mhdr_sg, mhdr_cnt,
1969                      0,
1970                      &extra_hdr.hdr.num_buffers,
1971                      sizeof extra_hdr.hdr.num_buffers);
1972     }
1973 
1974     for (j = 0; j < i; j++) {
1975         /* signal other side */
1976         virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
1977         g_free(elems[j]);
1978     }
1979 
1980     virtqueue_flush(q->rx_vq, i);
1981     virtio_notify(vdev, q->rx_vq);
1982 
1983     return size;
1984 
1985 err:
1986     for (j = 0; j < i; j++) {
1987         virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
1988         g_free(elems[j]);
1989     }
1990 
1991     return err;
1992 }
1993 
1994 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1995                                   size_t size)
1996 {
1997     RCU_READ_LOCK_GUARD();
1998 
1999     return virtio_net_receive_rcu(nc, buf, size);
2000 }
2001 
2002 /*
2003  * Accessors to read and write the IP packet data length field. This
2004  * is a potentially unaligned network-byte-order 16 bit unsigned integer
2005  * pointed to by unit->ip_len.
2006  */
2007 static uint16_t read_unit_ip_len(VirtioNetRscUnit *unit)
2008 {
2009     return lduw_be_p(unit->ip_plen);
2010 }
2011 
2012 static void write_unit_ip_len(VirtioNetRscUnit *unit, uint16_t l)
2013 {
2014     stw_be_p(unit->ip_plen, l);
2015 }
2016 
2017 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
2018                                          const uint8_t *buf,
2019                                          VirtioNetRscUnit *unit)
2020 {
2021     uint16_t ip_hdrlen;
2022     struct ip_header *ip;
2023 
2024     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
2025                               + sizeof(struct eth_header));
2026     unit->ip = (void *)ip;
2027     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
2028     unit->ip_plen = &ip->ip_len;
2029     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
2030     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2031     unit->payload = read_unit_ip_len(unit) - ip_hdrlen - unit->tcp_hdrlen;
2032 }
2033 
2034 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
2035                                          const uint8_t *buf,
2036                                          VirtioNetRscUnit *unit)
2037 {
2038     struct ip6_header *ip6;
2039 
2040     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
2041                                  + sizeof(struct eth_header));
2042     unit->ip = ip6;
2043     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2044     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
2045                                         + sizeof(struct ip6_header));
2046     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2047 
2048     /* There is a difference between payload length in ipv4 and v6,
2049        ip header is excluded in ipv6 */
2050     unit->payload = read_unit_ip_len(unit) - unit->tcp_hdrlen;
2051 }
2052 
2053 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
2054                                        VirtioNetRscSeg *seg)
2055 {
2056     int ret;
2057     struct virtio_net_hdr_v1 *h;
2058 
2059     h = (struct virtio_net_hdr_v1 *)seg->buf;
2060     h->flags = 0;
2061     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
2062 
2063     if (seg->is_coalesced) {
2064         h->rsc.segments = seg->packets;
2065         h->rsc.dup_acks = seg->dup_ack;
2066         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
2067         if (chain->proto == ETH_P_IP) {
2068             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2069         } else {
2070             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2071         }
2072     }
2073 
2074     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
2075     QTAILQ_REMOVE(&chain->buffers, seg, next);
2076     g_free(seg->buf);
2077     g_free(seg);
2078 
2079     return ret;
2080 }
2081 
2082 static void virtio_net_rsc_purge(void *opq)
2083 {
2084     VirtioNetRscSeg *seg, *rn;
2085     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
2086 
2087     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
2088         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2089             chain->stat.purge_failed++;
2090             continue;
2091         }
2092     }
2093 
2094     chain->stat.timer++;
2095     if (!QTAILQ_EMPTY(&chain->buffers)) {
2096         timer_mod(chain->drain_timer,
2097               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout);
2098     }
2099 }
2100 
2101 static void virtio_net_rsc_cleanup(VirtIONet *n)
2102 {
2103     VirtioNetRscChain *chain, *rn_chain;
2104     VirtioNetRscSeg *seg, *rn_seg;
2105 
2106     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2107         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2108             QTAILQ_REMOVE(&chain->buffers, seg, next);
2109             g_free(seg->buf);
2110             g_free(seg);
2111         }
2112 
2113         timer_free(chain->drain_timer);
2114         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2115         g_free(chain);
2116     }
2117 }
2118 
2119 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2120                                      NetClientState *nc,
2121                                      const uint8_t *buf, size_t size)
2122 {
2123     uint16_t hdr_len;
2124     VirtioNetRscSeg *seg;
2125 
2126     hdr_len = chain->n->guest_hdr_len;
2127     seg = g_new(VirtioNetRscSeg, 1);
2128     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2129         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2130     memcpy(seg->buf, buf, size);
2131     seg->size = size;
2132     seg->packets = 1;
2133     seg->dup_ack = 0;
2134     seg->is_coalesced = 0;
2135     seg->nc = nc;
2136 
2137     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2138     chain->stat.cache++;
2139 
2140     switch (chain->proto) {
2141     case ETH_P_IP:
2142         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2143         break;
2144     case ETH_P_IPV6:
2145         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2146         break;
2147     default:
2148         g_assert_not_reached();
2149     }
2150 }
2151 
2152 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2153                                          VirtioNetRscSeg *seg,
2154                                          const uint8_t *buf,
2155                                          struct tcp_header *n_tcp,
2156                                          struct tcp_header *o_tcp)
2157 {
2158     uint32_t nack, oack;
2159     uint16_t nwin, owin;
2160 
2161     nack = htonl(n_tcp->th_ack);
2162     nwin = htons(n_tcp->th_win);
2163     oack = htonl(o_tcp->th_ack);
2164     owin = htons(o_tcp->th_win);
2165 
2166     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2167         chain->stat.ack_out_of_win++;
2168         return RSC_FINAL;
2169     } else if (nack == oack) {
2170         /* duplicated ack or window probe */
2171         if (nwin == owin) {
2172             /* duplicated ack, add dup ack count due to whql test up to 1 */
2173             chain->stat.dup_ack++;
2174             return RSC_FINAL;
2175         } else {
2176             /* Coalesce window update */
2177             o_tcp->th_win = n_tcp->th_win;
2178             chain->stat.win_update++;
2179             return RSC_COALESCE;
2180         }
2181     } else {
2182         /* pure ack, go to 'C', finalize*/
2183         chain->stat.pure_ack++;
2184         return RSC_FINAL;
2185     }
2186 }
2187 
2188 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2189                                             VirtioNetRscSeg *seg,
2190                                             const uint8_t *buf,
2191                                             VirtioNetRscUnit *n_unit)
2192 {
2193     void *data;
2194     uint16_t o_ip_len;
2195     uint32_t nseq, oseq;
2196     VirtioNetRscUnit *o_unit;
2197 
2198     o_unit = &seg->unit;
2199     o_ip_len = read_unit_ip_len(o_unit);
2200     nseq = htonl(n_unit->tcp->th_seq);
2201     oseq = htonl(o_unit->tcp->th_seq);
2202 
2203     /* out of order or retransmitted. */
2204     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2205         chain->stat.data_out_of_win++;
2206         return RSC_FINAL;
2207     }
2208 
2209     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2210     if (nseq == oseq) {
2211         if ((o_unit->payload == 0) && n_unit->payload) {
2212             /* From no payload to payload, normal case, not a dup ack or etc */
2213             chain->stat.data_after_pure_ack++;
2214             goto coalesce;
2215         } else {
2216             return virtio_net_rsc_handle_ack(chain, seg, buf,
2217                                              n_unit->tcp, o_unit->tcp);
2218         }
2219     } else if ((nseq - oseq) != o_unit->payload) {
2220         /* Not a consistent packet, out of order */
2221         chain->stat.data_out_of_order++;
2222         return RSC_FINAL;
2223     } else {
2224 coalesce:
2225         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2226             chain->stat.over_size++;
2227             return RSC_FINAL;
2228         }
2229 
2230         /* Here comes the right data, the payload length in v4/v6 is different,
2231            so use the field value to update and record the new data len */
2232         o_unit->payload += n_unit->payload; /* update new data len */
2233 
2234         /* update field in ip header */
2235         write_unit_ip_len(o_unit, o_ip_len + n_unit->payload);
2236 
2237         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2238            for windows guest, while this may change the behavior for linux
2239            guest (only if it uses RSC feature). */
2240         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2241 
2242         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2243         o_unit->tcp->th_win = n_unit->tcp->th_win;
2244 
2245         memmove(seg->buf + seg->size, data, n_unit->payload);
2246         seg->size += n_unit->payload;
2247         seg->packets++;
2248         chain->stat.coalesced++;
2249         return RSC_COALESCE;
2250     }
2251 }
2252 
2253 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2254                                         VirtioNetRscSeg *seg,
2255                                         const uint8_t *buf, size_t size,
2256                                         VirtioNetRscUnit *unit)
2257 {
2258     struct ip_header *ip1, *ip2;
2259 
2260     ip1 = (struct ip_header *)(unit->ip);
2261     ip2 = (struct ip_header *)(seg->unit.ip);
2262     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2263         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2264         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2265         chain->stat.no_match++;
2266         return RSC_NO_MATCH;
2267     }
2268 
2269     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2270 }
2271 
2272 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2273                                         VirtioNetRscSeg *seg,
2274                                         const uint8_t *buf, size_t size,
2275                                         VirtioNetRscUnit *unit)
2276 {
2277     struct ip6_header *ip1, *ip2;
2278 
2279     ip1 = (struct ip6_header *)(unit->ip);
2280     ip2 = (struct ip6_header *)(seg->unit.ip);
2281     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2282         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2283         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2284         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2285             chain->stat.no_match++;
2286             return RSC_NO_MATCH;
2287     }
2288 
2289     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2290 }
2291 
2292 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2293  * to prevent out of order */
2294 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2295                                          struct tcp_header *tcp)
2296 {
2297     uint16_t tcp_hdr;
2298     uint16_t tcp_flag;
2299 
2300     tcp_flag = htons(tcp->th_offset_flags);
2301     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2302     tcp_flag &= VIRTIO_NET_TCP_FLAG;
2303     if (tcp_flag & TH_SYN) {
2304         chain->stat.tcp_syn++;
2305         return RSC_BYPASS;
2306     }
2307 
2308     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2309         chain->stat.tcp_ctrl_drain++;
2310         return RSC_FINAL;
2311     }
2312 
2313     if (tcp_hdr > sizeof(struct tcp_header)) {
2314         chain->stat.tcp_all_opt++;
2315         return RSC_FINAL;
2316     }
2317 
2318     return RSC_CANDIDATE;
2319 }
2320 
2321 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2322                                          NetClientState *nc,
2323                                          const uint8_t *buf, size_t size,
2324                                          VirtioNetRscUnit *unit)
2325 {
2326     int ret;
2327     VirtioNetRscSeg *seg, *nseg;
2328 
2329     if (QTAILQ_EMPTY(&chain->buffers)) {
2330         chain->stat.empty_cache++;
2331         virtio_net_rsc_cache_buf(chain, nc, buf, size);
2332         timer_mod(chain->drain_timer,
2333               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout);
2334         return size;
2335     }
2336 
2337     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2338         if (chain->proto == ETH_P_IP) {
2339             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2340         } else {
2341             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2342         }
2343 
2344         if (ret == RSC_FINAL) {
2345             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2346                 /* Send failed */
2347                 chain->stat.final_failed++;
2348                 return 0;
2349             }
2350 
2351             /* Send current packet */
2352             return virtio_net_do_receive(nc, buf, size);
2353         } else if (ret == RSC_NO_MATCH) {
2354             continue;
2355         } else {
2356             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2357             seg->is_coalesced = 1;
2358             return size;
2359         }
2360     }
2361 
2362     chain->stat.no_match_cache++;
2363     virtio_net_rsc_cache_buf(chain, nc, buf, size);
2364     return size;
2365 }
2366 
2367 /* Drain a connection data, this is to avoid out of order segments */
2368 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2369                                         NetClientState *nc,
2370                                         const uint8_t *buf, size_t size,
2371                                         uint16_t ip_start, uint16_t ip_size,
2372                                         uint16_t tcp_port)
2373 {
2374     VirtioNetRscSeg *seg, *nseg;
2375     uint32_t ppair1, ppair2;
2376 
2377     ppair1 = *(uint32_t *)(buf + tcp_port);
2378     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2379         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2380         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2381             || (ppair1 != ppair2)) {
2382             continue;
2383         }
2384         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2385             chain->stat.drain_failed++;
2386         }
2387 
2388         break;
2389     }
2390 
2391     return virtio_net_do_receive(nc, buf, size);
2392 }
2393 
2394 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2395                                             struct ip_header *ip,
2396                                             const uint8_t *buf, size_t size)
2397 {
2398     uint16_t ip_len;
2399 
2400     /* Not an ipv4 packet */
2401     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2402         chain->stat.ip_option++;
2403         return RSC_BYPASS;
2404     }
2405 
2406     /* Don't handle packets with ip option */
2407     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2408         chain->stat.ip_option++;
2409         return RSC_BYPASS;
2410     }
2411 
2412     if (ip->ip_p != IPPROTO_TCP) {
2413         chain->stat.bypass_not_tcp++;
2414         return RSC_BYPASS;
2415     }
2416 
2417     /* Don't handle packets with ip fragment */
2418     if (!(htons(ip->ip_off) & IP_DF)) {
2419         chain->stat.ip_frag++;
2420         return RSC_BYPASS;
2421     }
2422 
2423     /* Don't handle packets with ecn flag */
2424     if (IPTOS_ECN(ip->ip_tos)) {
2425         chain->stat.ip_ecn++;
2426         return RSC_BYPASS;
2427     }
2428 
2429     ip_len = htons(ip->ip_len);
2430     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2431         || ip_len > (size - chain->n->guest_hdr_len -
2432                      sizeof(struct eth_header))) {
2433         chain->stat.ip_hacked++;
2434         return RSC_BYPASS;
2435     }
2436 
2437     return RSC_CANDIDATE;
2438 }
2439 
2440 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2441                                       NetClientState *nc,
2442                                       const uint8_t *buf, size_t size)
2443 {
2444     int32_t ret;
2445     uint16_t hdr_len;
2446     VirtioNetRscUnit unit;
2447 
2448     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2449 
2450     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2451         + sizeof(struct tcp_header))) {
2452         chain->stat.bypass_not_tcp++;
2453         return virtio_net_do_receive(nc, buf, size);
2454     }
2455 
2456     virtio_net_rsc_extract_unit4(chain, buf, &unit);
2457     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2458         != RSC_CANDIDATE) {
2459         return virtio_net_do_receive(nc, buf, size);
2460     }
2461 
2462     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2463     if (ret == RSC_BYPASS) {
2464         return virtio_net_do_receive(nc, buf, size);
2465     } else if (ret == RSC_FINAL) {
2466         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2467                 ((hdr_len + sizeof(struct eth_header)) + 12),
2468                 VIRTIO_NET_IP4_ADDR_SIZE,
2469                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2470     }
2471 
2472     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2473 }
2474 
2475 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2476                                             struct ip6_header *ip6,
2477                                             const uint8_t *buf, size_t size)
2478 {
2479     uint16_t ip_len;
2480 
2481     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2482         != IP_HEADER_VERSION_6) {
2483         return RSC_BYPASS;
2484     }
2485 
2486     /* Both option and protocol is checked in this */
2487     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2488         chain->stat.bypass_not_tcp++;
2489         return RSC_BYPASS;
2490     }
2491 
2492     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2493     if (ip_len < sizeof(struct tcp_header) ||
2494         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2495                   - sizeof(struct ip6_header))) {
2496         chain->stat.ip_hacked++;
2497         return RSC_BYPASS;
2498     }
2499 
2500     /* Don't handle packets with ecn flag */
2501     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2502         chain->stat.ip_ecn++;
2503         return RSC_BYPASS;
2504     }
2505 
2506     return RSC_CANDIDATE;
2507 }
2508 
2509 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2510                                       const uint8_t *buf, size_t size)
2511 {
2512     int32_t ret;
2513     uint16_t hdr_len;
2514     VirtioNetRscChain *chain;
2515     VirtioNetRscUnit unit;
2516 
2517     chain = opq;
2518     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2519 
2520     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2521         + sizeof(tcp_header))) {
2522         return virtio_net_do_receive(nc, buf, size);
2523     }
2524 
2525     virtio_net_rsc_extract_unit6(chain, buf, &unit);
2526     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2527                                                  unit.ip, buf, size)) {
2528         return virtio_net_do_receive(nc, buf, size);
2529     }
2530 
2531     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2532     if (ret == RSC_BYPASS) {
2533         return virtio_net_do_receive(nc, buf, size);
2534     } else if (ret == RSC_FINAL) {
2535         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2536                 ((hdr_len + sizeof(struct eth_header)) + 8),
2537                 VIRTIO_NET_IP6_ADDR_SIZE,
2538                 hdr_len + sizeof(struct eth_header)
2539                 + sizeof(struct ip6_header));
2540     }
2541 
2542     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2543 }
2544 
2545 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2546                                                       NetClientState *nc,
2547                                                       uint16_t proto)
2548 {
2549     VirtioNetRscChain *chain;
2550 
2551     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2552         return NULL;
2553     }
2554 
2555     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2556         if (chain->proto == proto) {
2557             return chain;
2558         }
2559     }
2560 
2561     chain = g_malloc(sizeof(*chain));
2562     chain->n = n;
2563     chain->proto = proto;
2564     if (proto == (uint16_t)ETH_P_IP) {
2565         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2566         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2567     } else {
2568         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2569         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2570     }
2571     chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2572                                       virtio_net_rsc_purge, chain);
2573     memset(&chain->stat, 0, sizeof(chain->stat));
2574 
2575     QTAILQ_INIT(&chain->buffers);
2576     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2577 
2578     return chain;
2579 }
2580 
2581 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2582                                       const uint8_t *buf,
2583                                       size_t size)
2584 {
2585     uint16_t proto;
2586     VirtioNetRscChain *chain;
2587     struct eth_header *eth;
2588     VirtIONet *n;
2589 
2590     n = qemu_get_nic_opaque(nc);
2591     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2592         return virtio_net_do_receive(nc, buf, size);
2593     }
2594 
2595     eth = (struct eth_header *)(buf + n->guest_hdr_len);
2596     proto = htons(eth->h_proto);
2597 
2598     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2599     if (chain) {
2600         chain->stat.received++;
2601         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2602             return virtio_net_rsc_receive4(chain, nc, buf, size);
2603         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2604             return virtio_net_rsc_receive6(chain, nc, buf, size);
2605         }
2606     }
2607     return virtio_net_do_receive(nc, buf, size);
2608 }
2609 
2610 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2611                                   size_t size)
2612 {
2613     VirtIONet *n = qemu_get_nic_opaque(nc);
2614     if ((n->rsc4_enabled || n->rsc6_enabled)) {
2615         return virtio_net_rsc_receive(nc, buf, size);
2616     } else {
2617         return virtio_net_do_receive(nc, buf, size);
2618     }
2619 }
2620 
2621 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2622 
2623 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2624 {
2625     VirtIONet *n = qemu_get_nic_opaque(nc);
2626     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2627     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2628     int ret;
2629 
2630     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2631     virtio_notify(vdev, q->tx_vq);
2632 
2633     g_free(q->async_tx.elem);
2634     q->async_tx.elem = NULL;
2635 
2636     virtio_queue_set_notification(q->tx_vq, 1);
2637     ret = virtio_net_flush_tx(q);
2638     if (ret >= n->tx_burst) {
2639         /*
2640          * the flush has been stopped by tx_burst
2641          * we will not receive notification for the
2642          * remainining part, so re-schedule
2643          */
2644         virtio_queue_set_notification(q->tx_vq, 0);
2645         if (q->tx_bh) {
2646             replay_bh_schedule_event(q->tx_bh);
2647         } else {
2648             timer_mod(q->tx_timer,
2649                       qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2650         }
2651         q->tx_waiting = 1;
2652     }
2653 }
2654 
2655 /* TX */
2656 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2657 {
2658     VirtIONet *n = q->n;
2659     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2660     VirtQueueElement *elem;
2661     int32_t num_packets = 0;
2662     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2663     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2664         return num_packets;
2665     }
2666 
2667     if (q->async_tx.elem) {
2668         virtio_queue_set_notification(q->tx_vq, 0);
2669         return num_packets;
2670     }
2671 
2672     for (;;) {
2673         ssize_t ret;
2674         unsigned int out_num;
2675         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2676         struct virtio_net_hdr vhdr;
2677 
2678         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2679         if (!elem) {
2680             break;
2681         }
2682 
2683         out_num = elem->out_num;
2684         out_sg = elem->out_sg;
2685         if (out_num < 1) {
2686             virtio_error(vdev, "virtio-net header not in first element");
2687             goto detach;
2688         }
2689 
2690         if (n->needs_vnet_hdr_swap) {
2691             if (iov_to_buf(out_sg, out_num, 0, &vhdr, sizeof(vhdr)) <
2692                 sizeof(vhdr)) {
2693                 virtio_error(vdev, "virtio-net header incorrect");
2694                 goto detach;
2695             }
2696             virtio_net_hdr_swap(vdev, &vhdr);
2697             sg2[0].iov_base = &vhdr;
2698             sg2[0].iov_len = sizeof(vhdr);
2699             out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, out_sg, out_num,
2700                                sizeof(vhdr), -1);
2701             if (out_num == VIRTQUEUE_MAX_SIZE) {
2702                 goto drop;
2703             }
2704             out_num += 1;
2705             out_sg = sg2;
2706         }
2707         /*
2708          * If host wants to see the guest header as is, we can
2709          * pass it on unchanged. Otherwise, copy just the parts
2710          * that host is interested in.
2711          */
2712         assert(n->host_hdr_len <= n->guest_hdr_len);
2713         if (n->host_hdr_len != n->guest_hdr_len) {
2714             if (iov_size(out_sg, out_num) < n->guest_hdr_len) {
2715                 virtio_error(vdev, "virtio-net header is invalid");
2716                 goto detach;
2717             }
2718             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2719                                        out_sg, out_num,
2720                                        0, n->host_hdr_len);
2721             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2722                              out_sg, out_num,
2723                              n->guest_hdr_len, -1);
2724             out_num = sg_num;
2725             out_sg = sg;
2726 
2727             if (out_num < 1) {
2728                 virtio_error(vdev, "virtio-net nothing to send");
2729                 goto detach;
2730             }
2731         }
2732 
2733         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2734                                       out_sg, out_num, virtio_net_tx_complete);
2735         if (ret == 0) {
2736             virtio_queue_set_notification(q->tx_vq, 0);
2737             q->async_tx.elem = elem;
2738             return -EBUSY;
2739         }
2740 
2741 drop:
2742         virtqueue_push(q->tx_vq, elem, 0);
2743         virtio_notify(vdev, q->tx_vq);
2744         g_free(elem);
2745 
2746         if (++num_packets >= n->tx_burst) {
2747             break;
2748         }
2749     }
2750     return num_packets;
2751 
2752 detach:
2753     virtqueue_detach_element(q->tx_vq, elem, 0);
2754     g_free(elem);
2755     return -EINVAL;
2756 }
2757 
2758 static void virtio_net_tx_timer(void *opaque);
2759 
2760 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2761 {
2762     VirtIONet *n = VIRTIO_NET(vdev);
2763     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2764 
2765     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2766         virtio_net_drop_tx_queue_data(vdev, vq);
2767         return;
2768     }
2769 
2770     /* This happens when device was stopped but VCPU wasn't. */
2771     if (!vdev->vm_running) {
2772         q->tx_waiting = 1;
2773         return;
2774     }
2775 
2776     if (q->tx_waiting) {
2777         /* We already have queued packets, immediately flush */
2778         timer_del(q->tx_timer);
2779         virtio_net_tx_timer(q);
2780     } else {
2781         /* re-arm timer to flush it (and more) on next tick */
2782         timer_mod(q->tx_timer,
2783                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2784         q->tx_waiting = 1;
2785         virtio_queue_set_notification(vq, 0);
2786     }
2787 }
2788 
2789 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2790 {
2791     VirtIONet *n = VIRTIO_NET(vdev);
2792     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2793 
2794     if (unlikely(n->vhost_started)) {
2795         return;
2796     }
2797 
2798     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2799         virtio_net_drop_tx_queue_data(vdev, vq);
2800         return;
2801     }
2802 
2803     if (unlikely(q->tx_waiting)) {
2804         return;
2805     }
2806     q->tx_waiting = 1;
2807     /* This happens when device was stopped but VCPU wasn't. */
2808     if (!vdev->vm_running) {
2809         return;
2810     }
2811     virtio_queue_set_notification(vq, 0);
2812     replay_bh_schedule_event(q->tx_bh);
2813 }
2814 
2815 static void virtio_net_tx_timer(void *opaque)
2816 {
2817     VirtIONetQueue *q = opaque;
2818     VirtIONet *n = q->n;
2819     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2820     int ret;
2821 
2822     /* This happens when device was stopped but BH wasn't. */
2823     if (!vdev->vm_running) {
2824         /* Make sure tx waiting is set, so we'll run when restarted. */
2825         assert(q->tx_waiting);
2826         return;
2827     }
2828 
2829     q->tx_waiting = 0;
2830 
2831     /* Just in case the driver is not ready on more */
2832     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2833         return;
2834     }
2835 
2836     ret = virtio_net_flush_tx(q);
2837     if (ret == -EBUSY || ret == -EINVAL) {
2838         return;
2839     }
2840     /*
2841      * If we flush a full burst of packets, assume there are
2842      * more coming and immediately rearm
2843      */
2844     if (ret >= n->tx_burst) {
2845         q->tx_waiting = 1;
2846         timer_mod(q->tx_timer,
2847                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2848         return;
2849     }
2850     /*
2851      * If less than a full burst, re-enable notification and flush
2852      * anything that may have come in while we weren't looking.  If
2853      * we find something, assume the guest is still active and rearm
2854      */
2855     virtio_queue_set_notification(q->tx_vq, 1);
2856     ret = virtio_net_flush_tx(q);
2857     if (ret > 0) {
2858         virtio_queue_set_notification(q->tx_vq, 0);
2859         q->tx_waiting = 1;
2860         timer_mod(q->tx_timer,
2861                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2862     }
2863 }
2864 
2865 static void virtio_net_tx_bh(void *opaque)
2866 {
2867     VirtIONetQueue *q = opaque;
2868     VirtIONet *n = q->n;
2869     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2870     int32_t ret;
2871 
2872     /* This happens when device was stopped but BH wasn't. */
2873     if (!vdev->vm_running) {
2874         /* Make sure tx waiting is set, so we'll run when restarted. */
2875         assert(q->tx_waiting);
2876         return;
2877     }
2878 
2879     q->tx_waiting = 0;
2880 
2881     /* Just in case the driver is not ready on more */
2882     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2883         return;
2884     }
2885 
2886     ret = virtio_net_flush_tx(q);
2887     if (ret == -EBUSY || ret == -EINVAL) {
2888         return; /* Notification re-enable handled by tx_complete or device
2889                  * broken */
2890     }
2891 
2892     /* If we flush a full burst of packets, assume there are
2893      * more coming and immediately reschedule */
2894     if (ret >= n->tx_burst) {
2895         replay_bh_schedule_event(q->tx_bh);
2896         q->tx_waiting = 1;
2897         return;
2898     }
2899 
2900     /* If less than a full burst, re-enable notification and flush
2901      * anything that may have come in while we weren't looking.  If
2902      * we find something, assume the guest is still active and reschedule */
2903     virtio_queue_set_notification(q->tx_vq, 1);
2904     ret = virtio_net_flush_tx(q);
2905     if (ret == -EINVAL) {
2906         return;
2907     } else if (ret > 0) {
2908         virtio_queue_set_notification(q->tx_vq, 0);
2909         replay_bh_schedule_event(q->tx_bh);
2910         q->tx_waiting = 1;
2911     }
2912 }
2913 
2914 static void virtio_net_add_queue(VirtIONet *n, int index)
2915 {
2916     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2917 
2918     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2919                                            virtio_net_handle_rx);
2920 
2921     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2922         n->vqs[index].tx_vq =
2923             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2924                              virtio_net_handle_tx_timer);
2925         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2926                                               virtio_net_tx_timer,
2927                                               &n->vqs[index]);
2928     } else {
2929         n->vqs[index].tx_vq =
2930             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2931                              virtio_net_handle_tx_bh);
2932         n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
2933                                                   &DEVICE(vdev)->mem_reentrancy_guard);
2934     }
2935 
2936     n->vqs[index].tx_waiting = 0;
2937     n->vqs[index].n = n;
2938 }
2939 
2940 static void virtio_net_del_queue(VirtIONet *n, int index)
2941 {
2942     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2943     VirtIONetQueue *q = &n->vqs[index];
2944     NetClientState *nc = qemu_get_subqueue(n->nic, index);
2945 
2946     qemu_purge_queued_packets(nc);
2947 
2948     virtio_del_queue(vdev, index * 2);
2949     if (q->tx_timer) {
2950         timer_free(q->tx_timer);
2951         q->tx_timer = NULL;
2952     } else {
2953         qemu_bh_delete(q->tx_bh);
2954         q->tx_bh = NULL;
2955     }
2956     q->tx_waiting = 0;
2957     virtio_del_queue(vdev, index * 2 + 1);
2958 }
2959 
2960 static void virtio_net_change_num_queues(VirtIONet *n, int new_num_queues)
2961 {
2962     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2963     int old_num_queues = virtio_get_num_queues(vdev);
2964     int i;
2965 
2966     assert(old_num_queues >= 3);
2967     assert(old_num_queues % 2 == 1);
2968 
2969     if (old_num_queues == new_num_queues) {
2970         return;
2971     }
2972 
2973     /*
2974      * We always need to remove and add ctrl vq if
2975      * old_num_queues != new_num_queues. Remove ctrl_vq first,
2976      * and then we only enter one of the following two loops.
2977      */
2978     virtio_del_queue(vdev, old_num_queues - 1);
2979 
2980     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2981         /* new_num_queues < old_num_queues */
2982         virtio_net_del_queue(n, i / 2);
2983     }
2984 
2985     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2986         /* new_num_queues > old_num_queues */
2987         virtio_net_add_queue(n, i / 2);
2988     }
2989 
2990     /* add ctrl_vq last */
2991     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2992 }
2993 
2994 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2995 {
2996     int max = multiqueue ? n->max_queue_pairs : 1;
2997 
2998     n->multiqueue = multiqueue;
2999     virtio_net_change_num_queues(n, max * 2 + 1);
3000 
3001     virtio_net_set_queue_pairs(n);
3002 }
3003 
3004 static int virtio_net_pre_load_queues(VirtIODevice *vdev, uint32_t n)
3005 {
3006     virtio_net_change_num_queues(VIRTIO_NET(vdev), n);
3007 
3008     return 0;
3009 }
3010 
3011 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
3012                                         Error **errp)
3013 {
3014     VirtIONet *n = VIRTIO_NET(vdev);
3015     NetClientState *nc = qemu_get_queue(n->nic);
3016     uint32_t supported_hash_types = n->rss_data.supported_hash_types;
3017     uint32_t peer_hash_types = n->rss_data.peer_hash_types;
3018     bool use_own_hash =
3019         (supported_hash_types & VIRTIO_NET_RSS_SUPPORTED_HASHES) ==
3020         supported_hash_types;
3021     bool use_peer_hash =
3022         n->rss_data.peer_hash_available &&
3023         (supported_hash_types & peer_hash_types) == supported_hash_types;
3024 
3025     /* Firstly sync all virtio-net possible supported features */
3026     features |= n->host_features;
3027 
3028     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
3029 
3030     if (!peer_has_vnet_hdr(n)) {
3031         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
3032         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
3033         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
3034         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
3035 
3036         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
3037         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
3038         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
3039         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
3040 
3041         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
3042         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
3043         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
3044 
3045         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
3046     }
3047 
3048     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
3049         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
3050         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
3051     }
3052 
3053     if (!peer_has_uso(n)) {
3054         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
3055         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
3056         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
3057     }
3058 
3059     if (!get_vhost_net(nc->peer)) {
3060         if (!use_own_hash) {
3061             virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
3062             virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
3063         } else if (virtio_has_feature(features, VIRTIO_NET_F_RSS)) {
3064             virtio_net_load_ebpf(n, errp);
3065         }
3066 
3067         return features;
3068     }
3069 
3070     if (!use_peer_hash) {
3071         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
3072 
3073         if (!use_own_hash || !virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
3074             if (!virtio_net_load_ebpf(n, errp)) {
3075                 return features;
3076             }
3077 
3078             virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
3079         }
3080     }
3081 
3082     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
3083     vdev->backend_features = features;
3084 
3085     if (n->mtu_bypass_backend &&
3086             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
3087         features |= (1ULL << VIRTIO_NET_F_MTU);
3088     }
3089 
3090     /*
3091      * Since GUEST_ANNOUNCE is emulated the feature bit could be set without
3092      * enabled. This happens in the vDPA case.
3093      *
3094      * Make sure the feature set is not incoherent, as the driver could refuse
3095      * to start.
3096      *
3097      * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes,
3098      * helping guest to notify the new location with vDPA devices that does not
3099      * support it.
3100      */
3101     if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) {
3102         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE);
3103     }
3104 
3105     return features;
3106 }
3107 
3108 static int virtio_net_post_load_device(void *opaque, int version_id)
3109 {
3110     VirtIONet *n = opaque;
3111     VirtIODevice *vdev = VIRTIO_DEVICE(n);
3112     int i, link_down;
3113 
3114     trace_virtio_net_post_load_device();
3115     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
3116                                virtio_vdev_has_feature(vdev,
3117                                                        VIRTIO_F_VERSION_1),
3118                                virtio_vdev_has_feature(vdev,
3119                                                        VIRTIO_NET_F_HASH_REPORT));
3120 
3121     /* MAC_TABLE_ENTRIES may be different from the saved image */
3122     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
3123         n->mac_table.in_use = 0;
3124     }
3125 
3126     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
3127         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
3128     }
3129 
3130     /*
3131      * curr_guest_offloads will be later overwritten by the
3132      * virtio_set_features_nocheck call done from the virtio_load.
3133      * Here we make sure it is preserved and restored accordingly
3134      * in the virtio_net_post_load_virtio callback.
3135      */
3136     n->saved_guest_offloads = n->curr_guest_offloads;
3137 
3138     virtio_net_set_queue_pairs(n);
3139 
3140     /* Find the first multicast entry in the saved MAC filter */
3141     for (i = 0; i < n->mac_table.in_use; i++) {
3142         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
3143             break;
3144         }
3145     }
3146     n->mac_table.first_multi = i;
3147 
3148     /* nc.link_down can't be migrated, so infer link_down according
3149      * to link status bit in n->status */
3150     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
3151     for (i = 0; i < n->max_queue_pairs; i++) {
3152         qemu_get_subqueue(n->nic, i)->link_down = link_down;
3153     }
3154 
3155     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
3156         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3157         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3158                                   QEMU_CLOCK_VIRTUAL,
3159                                   virtio_net_announce_timer, n);
3160         if (n->announce_timer.round) {
3161             timer_mod(n->announce_timer.tm,
3162                       qemu_clock_get_ms(n->announce_timer.type));
3163         } else {
3164             qemu_announce_timer_del(&n->announce_timer, false);
3165         }
3166     }
3167 
3168     virtio_net_commit_rss_config(n);
3169     return 0;
3170 }
3171 
3172 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
3173 {
3174     VirtIONet *n = VIRTIO_NET(vdev);
3175     /*
3176      * The actual needed state is now in saved_guest_offloads,
3177      * see virtio_net_post_load_device for detail.
3178      * Restore it back and apply the desired offloads.
3179      */
3180     n->curr_guest_offloads = n->saved_guest_offloads;
3181     if (peer_has_vnet_hdr(n)) {
3182         virtio_net_apply_guest_offloads(n);
3183     }
3184 
3185     return 0;
3186 }
3187 
3188 /* tx_waiting field of a VirtIONetQueue */
3189 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
3190     .name = "virtio-net-queue-tx_waiting",
3191     .fields = (const VMStateField[]) {
3192         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
3193         VMSTATE_END_OF_LIST()
3194    },
3195 };
3196 
3197 static bool max_queue_pairs_gt_1(void *opaque, int version_id)
3198 {
3199     return VIRTIO_NET(opaque)->max_queue_pairs > 1;
3200 }
3201 
3202 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
3203 {
3204     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
3205                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3206 }
3207 
3208 static bool mac_table_fits(void *opaque, int version_id)
3209 {
3210     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
3211 }
3212 
3213 static bool mac_table_doesnt_fit(void *opaque, int version_id)
3214 {
3215     return !mac_table_fits(opaque, version_id);
3216 }
3217 
3218 /* This temporary type is shared by all the WITH_TMP methods
3219  * although only some fields are used by each.
3220  */
3221 struct VirtIONetMigTmp {
3222     VirtIONet      *parent;
3223     VirtIONetQueue *vqs_1;
3224     uint16_t        curr_queue_pairs_1;
3225     uint8_t         has_ufo;
3226     uint32_t        has_vnet_hdr;
3227 };
3228 
3229 /* The 2nd and subsequent tx_waiting flags are loaded later than
3230  * the 1st entry in the queue_pairs and only if there's more than one
3231  * entry.  We use the tmp mechanism to calculate a temporary
3232  * pointer and count and also validate the count.
3233  */
3234 
3235 static int virtio_net_tx_waiting_pre_save(void *opaque)
3236 {
3237     struct VirtIONetMigTmp *tmp = opaque;
3238 
3239     tmp->vqs_1 = tmp->parent->vqs + 1;
3240     tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3241     if (tmp->parent->curr_queue_pairs == 0) {
3242         tmp->curr_queue_pairs_1 = 0;
3243     }
3244 
3245     return 0;
3246 }
3247 
3248 static int virtio_net_tx_waiting_pre_load(void *opaque)
3249 {
3250     struct VirtIONetMigTmp *tmp = opaque;
3251 
3252     /* Reuse the pointer setup from save */
3253     virtio_net_tx_waiting_pre_save(opaque);
3254 
3255     if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3256         error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3257             tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
3258 
3259         return -EINVAL;
3260     }
3261 
3262     return 0; /* all good */
3263 }
3264 
3265 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3266     .name      = "virtio-net-tx_waiting",
3267     .pre_load  = virtio_net_tx_waiting_pre_load,
3268     .pre_save  = virtio_net_tx_waiting_pre_save,
3269     .fields    = (const VMStateField[]) {
3270         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3271                                      curr_queue_pairs_1,
3272                                      vmstate_virtio_net_queue_tx_waiting,
3273                                      struct VirtIONetQueue),
3274         VMSTATE_END_OF_LIST()
3275     },
3276 };
3277 
3278 /* the 'has_ufo' flag is just tested; if the incoming stream has the
3279  * flag set we need to check that we have it
3280  */
3281 static int virtio_net_ufo_post_load(void *opaque, int version_id)
3282 {
3283     struct VirtIONetMigTmp *tmp = opaque;
3284 
3285     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3286         error_report("virtio-net: saved image requires TUN_F_UFO support");
3287         return -EINVAL;
3288     }
3289 
3290     return 0;
3291 }
3292 
3293 static int virtio_net_ufo_pre_save(void *opaque)
3294 {
3295     struct VirtIONetMigTmp *tmp = opaque;
3296 
3297     tmp->has_ufo = tmp->parent->has_ufo;
3298 
3299     return 0;
3300 }
3301 
3302 static const VMStateDescription vmstate_virtio_net_has_ufo = {
3303     .name      = "virtio-net-ufo",
3304     .post_load = virtio_net_ufo_post_load,
3305     .pre_save  = virtio_net_ufo_pre_save,
3306     .fields    = (const VMStateField[]) {
3307         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3308         VMSTATE_END_OF_LIST()
3309     },
3310 };
3311 
3312 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3313  * flag set we need to check that we have it
3314  */
3315 static int virtio_net_vnet_post_load(void *opaque, int version_id)
3316 {
3317     struct VirtIONetMigTmp *tmp = opaque;
3318 
3319     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3320         error_report("virtio-net: saved image requires vnet_hdr=on");
3321         return -EINVAL;
3322     }
3323 
3324     return 0;
3325 }
3326 
3327 static int virtio_net_vnet_pre_save(void *opaque)
3328 {
3329     struct VirtIONetMigTmp *tmp = opaque;
3330 
3331     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3332 
3333     return 0;
3334 }
3335 
3336 static const VMStateDescription vmstate_virtio_net_has_vnet = {
3337     .name      = "virtio-net-vnet",
3338     .post_load = virtio_net_vnet_post_load,
3339     .pre_save  = virtio_net_vnet_pre_save,
3340     .fields    = (const VMStateField[]) {
3341         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3342         VMSTATE_END_OF_LIST()
3343     },
3344 };
3345 
3346 static int virtio_net_rss_post_load(void *opaque, int version_id)
3347 {
3348     VirtIONet *n = VIRTIO_NET(opaque);
3349 
3350     if (version_id == 1) {
3351         n->rss_data.supported_hash_types = VIRTIO_NET_RSS_SUPPORTED_HASHES;
3352     }
3353 
3354     return 0;
3355 }
3356 
3357 static bool virtio_net_rss_needed(void *opaque)
3358 {
3359     return VIRTIO_NET(opaque)->rss_data.enabled;
3360 }
3361 
3362 static const VMStateDescription vmstate_virtio_net_rss = {
3363     .name      = "virtio-net-device/rss",
3364     .version_id = 2,
3365     .minimum_version_id = 1,
3366     .post_load = virtio_net_rss_post_load,
3367     .needed = virtio_net_rss_needed,
3368     .fields = (const VMStateField[]) {
3369         VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3370         VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3371         VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3372         VMSTATE_UINT32(rss_data.runtime_hash_types, VirtIONet),
3373         VMSTATE_UINT32_V(rss_data.supported_hash_types, VirtIONet, 2),
3374         VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3375         VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3376         VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3377                             VIRTIO_NET_RSS_MAX_KEY_SIZE),
3378         VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3379                                     rss_data.indirections_len, 0,
3380                                     vmstate_info_uint16, uint16_t),
3381         VMSTATE_END_OF_LIST()
3382     },
3383 };
3384 
3385 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3386 {
3387     VirtIONet *n = VIRTIO_NET(vdev);
3388     NetClientState *nc;
3389     struct vhost_net *net;
3390 
3391     if (!n->nic) {
3392         return NULL;
3393     }
3394 
3395     nc = qemu_get_queue(n->nic);
3396     if (!nc) {
3397         return NULL;
3398     }
3399 
3400     net = get_vhost_net(nc->peer);
3401     if (!net) {
3402         return NULL;
3403     }
3404 
3405     return &net->dev;
3406 }
3407 
3408 static int vhost_user_net_save_state(QEMUFile *f, void *pv, size_t size,
3409                                      const VMStateField *field,
3410                                      JSONWriter *vmdesc)
3411 {
3412     VirtIONet *n = pv;
3413     VirtIODevice *vdev = VIRTIO_DEVICE(n);
3414     struct vhost_dev *vhdev;
3415     Error *local_error = NULL;
3416     int ret;
3417 
3418     vhdev = virtio_net_get_vhost(vdev);
3419     if (vhdev == NULL) {
3420         error_reportf_err(local_error,
3421                           "Error getting vhost back-end of %s device %s: ",
3422                           vdev->name, vdev->parent_obj.canonical_path);
3423         return -1;
3424     }
3425 
3426     ret = vhost_save_backend_state(vhdev, f, &local_error);
3427     if (ret < 0) {
3428         error_reportf_err(local_error,
3429                           "Error saving back-end state of %s device %s: ",
3430                           vdev->name, vdev->parent_obj.canonical_path);
3431         return ret;
3432     }
3433 
3434     return 0;
3435 }
3436 
3437 static int vhost_user_net_load_state(QEMUFile *f, void *pv, size_t size,
3438                                      const VMStateField *field)
3439 {
3440     VirtIONet *n = pv;
3441     VirtIODevice *vdev = VIRTIO_DEVICE(n);
3442     struct vhost_dev *vhdev;
3443     Error *local_error = NULL;
3444     int ret;
3445 
3446     vhdev = virtio_net_get_vhost(vdev);
3447     if (vhdev == NULL) {
3448         error_reportf_err(local_error,
3449                           "Error getting vhost back-end of %s device %s: ",
3450                           vdev->name, vdev->parent_obj.canonical_path);
3451         return -1;
3452     }
3453 
3454     ret = vhost_load_backend_state(vhdev, f, &local_error);
3455     if (ret < 0) {
3456         error_reportf_err(local_error,
3457                           "Error loading  back-end state of %s device %s: ",
3458                           vdev->name, vdev->parent_obj.canonical_path);
3459         return ret;
3460     }
3461 
3462     return 0;
3463 }
3464 
3465 static bool vhost_user_net_is_internal_migration(void *opaque)
3466 {
3467     VirtIONet *n = opaque;
3468     VirtIODevice *vdev = VIRTIO_DEVICE(n);
3469     struct vhost_dev *vhdev;
3470 
3471     vhdev = virtio_net_get_vhost(vdev);
3472     if (vhdev == NULL) {
3473         return false;
3474     }
3475 
3476     return vhost_supports_device_state(vhdev);
3477 }
3478 
3479 static const VMStateDescription vhost_user_net_backend_state = {
3480     .name = "virtio-net-device/backend",
3481     .version_id = 0,
3482     .needed = vhost_user_net_is_internal_migration,
3483     .fields = (const VMStateField[]) {
3484         {
3485             .name = "backend",
3486             .info = &(const VMStateInfo) {
3487                 .name = "virtio-net vhost-user backend state",
3488                 .get = vhost_user_net_load_state,
3489                 .put = vhost_user_net_save_state,
3490             },
3491          },
3492          VMSTATE_END_OF_LIST()
3493     }
3494 };
3495 
3496 static const VMStateDescription vmstate_virtio_net_device = {
3497     .name = "virtio-net-device",
3498     .version_id = VIRTIO_NET_VM_VERSION,
3499     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3500     .post_load = virtio_net_post_load_device,
3501     .fields = (const VMStateField[]) {
3502         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3503         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3504                                vmstate_virtio_net_queue_tx_waiting,
3505                                VirtIONetQueue),
3506         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3507         VMSTATE_UINT16(status, VirtIONet),
3508         VMSTATE_UINT8(promisc, VirtIONet),
3509         VMSTATE_UINT8(allmulti, VirtIONet),
3510         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3511 
3512         /* Guarded pair: If it fits we load it, else we throw it away
3513          * - can happen if source has a larger MAC table.; post-load
3514          *  sets flags in this case.
3515          */
3516         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3517                                 0, mac_table_fits, mac_table.in_use,
3518                                  ETH_ALEN),
3519         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3520                                      mac_table.in_use, ETH_ALEN),
3521 
3522         /* Note: This is an array of uint32's that's always been saved as a
3523          * buffer; hold onto your endiannesses; it's actually used as a bitmap
3524          * but based on the uint.
3525          */
3526         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3527         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3528                          vmstate_virtio_net_has_vnet),
3529         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3530         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3531         VMSTATE_UINT8(alluni, VirtIONet),
3532         VMSTATE_UINT8(nomulti, VirtIONet),
3533         VMSTATE_UINT8(nouni, VirtIONet),
3534         VMSTATE_UINT8(nobcast, VirtIONet),
3535         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3536                          vmstate_virtio_net_has_ufo),
3537         VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3538                             vmstate_info_uint16_equal, uint16_t),
3539         VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3540         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3541                          vmstate_virtio_net_tx_waiting),
3542         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3543                             has_ctrl_guest_offloads),
3544         VMSTATE_END_OF_LIST()
3545     },
3546     .subsections = (const VMStateDescription * const []) {
3547         &vmstate_virtio_net_rss,
3548         &vhost_user_net_backend_state,
3549         NULL
3550     }
3551 };
3552 
3553 static NetClientInfo net_virtio_info = {
3554     .type = NET_CLIENT_DRIVER_NIC,
3555     .size = sizeof(NICState),
3556     .can_receive = virtio_net_can_receive,
3557     .receive = virtio_net_receive,
3558     .link_status_changed = virtio_net_set_link_status,
3559     .query_rx_filter = virtio_net_query_rxfilter,
3560     .announce = virtio_net_announce,
3561 };
3562 
3563 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3564 {
3565     VirtIONet *n = VIRTIO_NET(vdev);
3566     NetClientState *nc;
3567     assert(n->vhost_started);
3568     if (!n->multiqueue && idx == 2) {
3569         /* Must guard against invalid features and bogus queue index
3570          * from being set by malicious guest, or penetrated through
3571          * buggy migration stream.
3572          */
3573         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3574             qemu_log_mask(LOG_GUEST_ERROR,
3575                           "%s: bogus vq index ignored\n", __func__);
3576             return false;
3577         }
3578         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3579     } else {
3580         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3581     }
3582     /*
3583      * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3584      * as the macro of configure interrupt's IDX, If this driver does not
3585      * support, the function will return false
3586      */
3587 
3588     if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3589         return vhost_net_config_pending(get_vhost_net(nc->peer));
3590     }
3591     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3592 }
3593 
3594 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3595                                            bool mask)
3596 {
3597     VirtIONet *n = VIRTIO_NET(vdev);
3598     NetClientState *nc;
3599     assert(n->vhost_started);
3600     if (!n->multiqueue && idx == 2) {
3601         /* Must guard against invalid features and bogus queue index
3602          * from being set by malicious guest, or penetrated through
3603          * buggy migration stream.
3604          */
3605         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3606             qemu_log_mask(LOG_GUEST_ERROR,
3607                           "%s: bogus vq index ignored\n", __func__);
3608             return;
3609         }
3610         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3611     } else {
3612         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3613     }
3614     /*
3615      *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3616      * as the macro of configure interrupt's IDX, If this driver does not
3617      * support, the function will return
3618      */
3619 
3620     if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3621         vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask);
3622         return;
3623     }
3624     vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask);
3625 }
3626 
3627 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3628 {
3629     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3630 
3631     n->config_size = virtio_get_config_size(&cfg_size_params, host_features);
3632 }
3633 
3634 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3635                                    const char *type)
3636 {
3637     /*
3638      * The name can be NULL, the netclient name will be type.x.
3639      */
3640     assert(type != NULL);
3641 
3642     g_free(n->netclient_name);
3643     g_free(n->netclient_type);
3644     n->netclient_name = g_strdup(name);
3645     n->netclient_type = g_strdup(type);
3646 }
3647 
3648 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3649 {
3650     HotplugHandler *hotplug_ctrl;
3651     PCIDevice *pci_dev;
3652     Error *err = NULL;
3653 
3654     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3655     if (hotplug_ctrl) {
3656         pci_dev = PCI_DEVICE(dev);
3657         pci_dev->partially_hotplugged = true;
3658         hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3659         if (err) {
3660             error_report_err(err);
3661             return false;
3662         }
3663     } else {
3664         return false;
3665     }
3666     return true;
3667 }
3668 
3669 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3670                                     Error **errp)
3671 {
3672     Error *err = NULL;
3673     HotplugHandler *hotplug_ctrl;
3674     PCIDevice *pdev = PCI_DEVICE(dev);
3675     BusState *primary_bus;
3676 
3677     if (!pdev->partially_hotplugged) {
3678         return true;
3679     }
3680     primary_bus = dev->parent_bus;
3681     if (!primary_bus) {
3682         error_setg(errp, "virtio_net: couldn't find primary bus");
3683         return false;
3684     }
3685     qdev_set_parent_bus(dev, primary_bus, &error_abort);
3686     qatomic_set(&n->failover_primary_hidden, false);
3687     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3688     if (hotplug_ctrl) {
3689         hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3690         if (err) {
3691             goto out;
3692         }
3693         hotplug_handler_plug(hotplug_ctrl, dev, &err);
3694     }
3695     pdev->partially_hotplugged = false;
3696 
3697 out:
3698     error_propagate(errp, err);
3699     return !err;
3700 }
3701 
3702 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationEvent *e)
3703 {
3704     bool should_be_hidden;
3705     Error *err = NULL;
3706     DeviceState *dev = failover_find_primary_device(n);
3707 
3708     if (!dev) {
3709         return;
3710     }
3711 
3712     should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3713 
3714     if (e->type == MIG_EVENT_PRECOPY_SETUP && !should_be_hidden) {
3715         if (failover_unplug_primary(n, dev)) {
3716             vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3717             qapi_event_send_unplug_primary(dev->id);
3718             qatomic_set(&n->failover_primary_hidden, true);
3719         } else {
3720             warn_report("couldn't unplug primary device");
3721         }
3722     } else if (e->type == MIG_EVENT_PRECOPY_FAILED) {
3723         /* We already unplugged the device let's plug it back */
3724         if (!failover_replug_primary(n, dev, &err)) {
3725             if (err) {
3726                 error_report_err(err);
3727             }
3728         }
3729     }
3730 }
3731 
3732 static int virtio_net_migration_state_notifier(NotifierWithReturn *notifier,
3733                                                MigrationEvent *e, Error **errp)
3734 {
3735     VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3736     virtio_net_handle_migration_primary(n, e);
3737     return 0;
3738 }
3739 
3740 static bool failover_hide_primary_device(DeviceListener *listener,
3741                                          const QDict *device_opts,
3742                                          bool from_json,
3743                                          Error **errp)
3744 {
3745     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3746     const char *standby_id;
3747 
3748     if (!device_opts) {
3749         return false;
3750     }
3751 
3752     if (!qdict_haskey(device_opts, "failover_pair_id")) {
3753         return false;
3754     }
3755 
3756     if (!qdict_haskey(device_opts, "id")) {
3757         error_setg(errp, "Device with failover_pair_id needs to have id");
3758         return false;
3759     }
3760 
3761     standby_id = qdict_get_str(device_opts, "failover_pair_id");
3762     if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3763         return false;
3764     }
3765 
3766     /*
3767      * The hide helper can be called several times for a given device.
3768      * Check there is only one primary for a virtio-net device but
3769      * don't duplicate the qdict several times if it's called for the same
3770      * device.
3771      */
3772     if (n->primary_opts) {
3773         const char *old, *new;
3774         /* devices with failover_pair_id always have an id */
3775         old = qdict_get_str(n->primary_opts, "id");
3776         new = qdict_get_str(device_opts, "id");
3777         if (strcmp(old, new) != 0) {
3778             error_setg(errp, "Cannot attach more than one primary device to "
3779                        "'%s': '%s' and '%s'", n->netclient_name, old, new);
3780             return false;
3781         }
3782     } else {
3783         n->primary_opts = qdict_clone_shallow(device_opts);
3784         n->primary_opts_from_json = from_json;
3785     }
3786 
3787     /* failover_primary_hidden is set during feature negotiation */
3788     return qatomic_read(&n->failover_primary_hidden);
3789 }
3790 
3791 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3792 {
3793     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3794     VirtIONet *n = VIRTIO_NET(dev);
3795     NetClientState *nc;
3796     int i;
3797 
3798     if (n->net_conf.mtu) {
3799         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3800     }
3801 
3802     if (n->net_conf.duplex_str) {
3803         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3804             n->net_conf.duplex = DUPLEX_HALF;
3805         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3806             n->net_conf.duplex = DUPLEX_FULL;
3807         } else {
3808             error_setg(errp, "'duplex' must be 'half' or 'full'");
3809             return;
3810         }
3811         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3812     } else {
3813         n->net_conf.duplex = DUPLEX_UNKNOWN;
3814     }
3815 
3816     if (n->net_conf.speed < SPEED_UNKNOWN) {
3817         error_setg(errp, "'speed' must be between 0 and INT_MAX");
3818         return;
3819     }
3820     if (n->net_conf.speed >= 0) {
3821         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3822     }
3823 
3824     if (n->failover) {
3825         n->primary_listener.hide_device = failover_hide_primary_device;
3826         qatomic_set(&n->failover_primary_hidden, true);
3827         device_listener_register(&n->primary_listener);
3828         migration_add_notifier(&n->migration_state,
3829                                virtio_net_migration_state_notifier);
3830         n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3831     }
3832 
3833     virtio_net_set_config_size(n, n->host_features);
3834     virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
3835 
3836     /*
3837      * We set a lower limit on RX queue size to what it always was.
3838      * Guests that want a smaller ring can always resize it without
3839      * help from us (using virtio 1 and up).
3840      */
3841     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3842         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3843         !is_power_of_2(n->net_conf.rx_queue_size)) {
3844         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3845                    "must be a power of 2 between %d and %d.",
3846                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3847                    VIRTQUEUE_MAX_SIZE);
3848         virtio_cleanup(vdev);
3849         return;
3850     }
3851 
3852     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3853         n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) ||
3854         !is_power_of_2(n->net_conf.tx_queue_size)) {
3855         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3856                    "must be a power of 2 between %d and %d",
3857                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3858                    virtio_net_max_tx_queue_size(n));
3859         virtio_cleanup(vdev);
3860         return;
3861     }
3862 
3863     n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3864 
3865     /*
3866      * Figure out the datapath queue pairs since the backend could
3867      * provide control queue via peers as well.
3868      */
3869     if (n->nic_conf.peers.queues) {
3870         for (i = 0; i < n->max_ncs; i++) {
3871             if (n->nic_conf.peers.ncs[i]->is_datapath) {
3872                 ++n->max_queue_pairs;
3873             }
3874         }
3875     }
3876     n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3877 
3878     if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3879         error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3880                    "must be a positive integer less than %d.",
3881                    n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3882         virtio_cleanup(vdev);
3883         return;
3884     }
3885     n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3886     n->curr_queue_pairs = 1;
3887     n->tx_timeout = n->net_conf.txtimer;
3888 
3889     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3890                        && strcmp(n->net_conf.tx, "bh")) {
3891         warn_report("virtio-net: "
3892                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3893                     n->net_conf.tx);
3894         error_printf("Defaulting to \"bh\"");
3895     }
3896 
3897     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3898                                     n->net_conf.tx_queue_size);
3899 
3900     virtio_net_add_queue(n, 0);
3901 
3902     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3903     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3904     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3905     n->status = VIRTIO_NET_S_LINK_UP;
3906     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3907                               QEMU_CLOCK_VIRTUAL,
3908                               virtio_net_announce_timer, n);
3909     n->announce_timer.round = 0;
3910 
3911     if (n->netclient_type) {
3912         /*
3913          * Happen when virtio_net_set_netclient_name has been called.
3914          */
3915         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3916                               n->netclient_type, n->netclient_name,
3917                               &dev->mem_reentrancy_guard, n);
3918     } else {
3919         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3920                               object_get_typename(OBJECT(dev)), dev->id,
3921                               &dev->mem_reentrancy_guard, n);
3922     }
3923 
3924     for (i = 0; i < n->max_queue_pairs; i++) {
3925         n->nic->ncs[i].do_not_pad = true;
3926     }
3927 
3928     peer_test_vnet_hdr(n);
3929     if (peer_has_vnet_hdr(n)) {
3930         n->host_hdr_len = sizeof(struct virtio_net_hdr);
3931     } else {
3932         n->host_hdr_len = 0;
3933     }
3934 
3935     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3936 
3937     n->vqs[0].tx_waiting = 0;
3938     n->tx_burst = n->net_conf.txburst;
3939     virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3940     n->promisc = 1; /* for compatibility */
3941 
3942     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3943 
3944     n->vlans = g_malloc0(MAX_VLAN >> 3);
3945 
3946     nc = qemu_get_queue(n->nic);
3947     nc->rxfilter_notify_enabled = 1;
3948 
3949    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3950         struct virtio_net_config netcfg = {};
3951         memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3952         vhost_net_set_config(get_vhost_net(nc->peer),
3953             (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND);
3954     }
3955     QTAILQ_INIT(&n->rsc_chains);
3956     n->qdev = dev;
3957 
3958     net_rx_pkt_init(&n->rx_pkt);
3959 
3960     if (qemu_get_vnet_hash_supported_types(qemu_get_queue(n->nic)->peer,
3961                                            &n->rss_data.peer_hash_types)) {
3962         n->rss_data.peer_hash_available = true;
3963         n->rss_data.supported_hash_types =
3964             n->rss_data.specified_hash_types.on_bits |
3965             (n->rss_data.specified_hash_types.auto_bits &
3966              n->rss_data.peer_hash_types);
3967     } else {
3968         n->rss_data.supported_hash_types =
3969             n->rss_data.specified_hash_types.on_bits |
3970             n->rss_data.specified_hash_types.auto_bits;
3971     }
3972 }
3973 
3974 static void virtio_net_device_unrealize(DeviceState *dev)
3975 {
3976     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3977     VirtIONet *n = VIRTIO_NET(dev);
3978     int i, max_queue_pairs;
3979 
3980     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3981         virtio_net_unload_ebpf(n);
3982     }
3983 
3984     /* This will stop vhost backend if appropriate. */
3985     virtio_net_set_status(vdev, 0);
3986 
3987     g_free(n->netclient_name);
3988     n->netclient_name = NULL;
3989     g_free(n->netclient_type);
3990     n->netclient_type = NULL;
3991 
3992     g_free(n->mac_table.macs);
3993     g_free(n->vlans);
3994 
3995     if (n->failover) {
3996         qobject_unref(n->primary_opts);
3997         device_listener_unregister(&n->primary_listener);
3998         migration_remove_notifier(&n->migration_state);
3999     } else {
4000         assert(n->primary_opts == NULL);
4001     }
4002 
4003     max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
4004     for (i = 0; i < max_queue_pairs; i++) {
4005         virtio_net_del_queue(n, i);
4006     }
4007     /* delete also control vq */
4008     virtio_del_queue(vdev, max_queue_pairs * 2);
4009     qemu_announce_timer_del(&n->announce_timer, false);
4010     g_free(n->vqs);
4011     qemu_del_nic(n->nic);
4012     virtio_net_rsc_cleanup(n);
4013     g_free(n->rss_data.indirections_table);
4014     net_rx_pkt_uninit(n->rx_pkt);
4015     virtio_cleanup(vdev);
4016 }
4017 
4018 static void virtio_net_reset(VirtIODevice *vdev)
4019 {
4020     VirtIONet *n = VIRTIO_NET(vdev);
4021     int i;
4022 
4023     /* Reset back to compatibility mode */
4024     n->promisc = 1;
4025     n->allmulti = 0;
4026     n->alluni = 0;
4027     n->nomulti = 0;
4028     n->nouni = 0;
4029     n->nobcast = 0;
4030     /* multiqueue is disabled by default */
4031     n->curr_queue_pairs = 1;
4032     timer_del(n->announce_timer.tm);
4033     n->announce_timer.round = 0;
4034     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
4035 
4036     /* Flush any MAC and VLAN filter table state */
4037     n->mac_table.in_use = 0;
4038     n->mac_table.first_multi = 0;
4039     n->mac_table.multi_overflow = 0;
4040     n->mac_table.uni_overflow = 0;
4041     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
4042     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
4043     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
4044     memset(n->vlans, 0, MAX_VLAN >> 3);
4045 
4046     /* Flush any async TX */
4047     for (i = 0;  i < n->max_queue_pairs; i++) {
4048         flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
4049     }
4050 
4051     virtio_net_disable_rss(n);
4052 }
4053 
4054 static void virtio_net_instance_init(Object *obj)
4055 {
4056     VirtIONet *n = VIRTIO_NET(obj);
4057 
4058     /*
4059      * The default config_size is sizeof(struct virtio_net_config).
4060      * Can be overridden with virtio_net_set_config_size.
4061      */
4062     n->config_size = sizeof(struct virtio_net_config);
4063     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
4064                                   "bootindex", "/ethernet-phy@0",
4065                                   DEVICE(n));
4066 
4067     ebpf_rss_init(&n->ebpf_rss);
4068 }
4069 
4070 static int virtio_net_pre_save(void *opaque)
4071 {
4072     VirtIONet *n = opaque;
4073 
4074     /* At this point, backend must be stopped, otherwise
4075      * it might keep writing to memory. */
4076     assert(!n->vhost_started);
4077 
4078     return 0;
4079 }
4080 
4081 static bool primary_unplug_pending(void *opaque)
4082 {
4083     DeviceState *dev = opaque;
4084     DeviceState *primary;
4085     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
4086     VirtIONet *n = VIRTIO_NET(vdev);
4087 
4088     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
4089         return false;
4090     }
4091     primary = failover_find_primary_device(n);
4092     return primary ? primary->pending_deleted_event : false;
4093 }
4094 
4095 static bool dev_unplug_pending(void *opaque)
4096 {
4097     DeviceState *dev = opaque;
4098     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
4099 
4100     return vdc->primary_unplug_pending(dev);
4101 }
4102 
4103 static const VMStateDescription vmstate_virtio_net = {
4104     .name = "virtio-net",
4105     .minimum_version_id = VIRTIO_NET_VM_VERSION,
4106     .version_id = VIRTIO_NET_VM_VERSION,
4107     .fields = (const VMStateField[]) {
4108         VMSTATE_VIRTIO_DEVICE,
4109         VMSTATE_END_OF_LIST()
4110     },
4111     .pre_save = virtio_net_pre_save,
4112     .dev_unplug_pending = dev_unplug_pending,
4113 };
4114 
4115 static const Property virtio_net_properties[] = {
4116     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
4117                     VIRTIO_NET_F_CSUM, true),
4118     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
4119                     VIRTIO_NET_F_GUEST_CSUM, true),
4120     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
4121     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
4122                     VIRTIO_NET_F_GUEST_TSO4, true),
4123     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
4124                     VIRTIO_NET_F_GUEST_TSO6, true),
4125     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
4126                     VIRTIO_NET_F_GUEST_ECN, true),
4127     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
4128                     VIRTIO_NET_F_GUEST_UFO, true),
4129     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
4130                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
4131     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
4132                     VIRTIO_NET_F_HOST_TSO4, true),
4133     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
4134                     VIRTIO_NET_F_HOST_TSO6, true),
4135     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
4136                     VIRTIO_NET_F_HOST_ECN, true),
4137     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
4138                     VIRTIO_NET_F_HOST_UFO, true),
4139     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
4140                     VIRTIO_NET_F_MRG_RXBUF, true),
4141     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
4142                     VIRTIO_NET_F_STATUS, true),
4143     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
4144                     VIRTIO_NET_F_CTRL_VQ, true),
4145     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
4146                     VIRTIO_NET_F_CTRL_RX, true),
4147     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
4148                     VIRTIO_NET_F_CTRL_VLAN, true),
4149     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
4150                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
4151     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
4152                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
4153     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
4154                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
4155     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
4156     DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
4157                     VIRTIO_NET_F_RSS, false),
4158     DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
4159                     VIRTIO_NET_F_HASH_REPORT, false),
4160     DEFINE_PROP_ARRAY("ebpf-rss-fds", VirtIONet, nr_ebpf_rss_fds,
4161                       ebpf_rss_fds, qdev_prop_string, char*),
4162     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
4163                     VIRTIO_NET_F_RSC_EXT, false),
4164     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
4165                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
4166     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
4167     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
4168                        TX_TIMER_INTERVAL),
4169     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
4170     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
4171     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
4172                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
4173     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
4174                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
4175     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
4176     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
4177                      true),
4178     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
4179     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
4180     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
4181     DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features,
4182                       VIRTIO_NET_F_GUEST_USO4, true),
4183     DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features,
4184                       VIRTIO_NET_F_GUEST_USO6, true),
4185     DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
4186                       VIRTIO_NET_F_HOST_USO, true),
4187     DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-ipv4", VirtIONet,
4188                                   rss_data.specified_hash_types,
4189                                   VIRTIO_NET_HASH_REPORT_IPv4 - 1,
4190                                   ON_OFF_AUTO_AUTO),
4191     DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-tcp4", VirtIONet,
4192                                   rss_data.specified_hash_types,
4193                                   VIRTIO_NET_HASH_REPORT_TCPv4 - 1,
4194                                   ON_OFF_AUTO_AUTO),
4195     DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-udp4", VirtIONet,
4196                                   rss_data.specified_hash_types,
4197                                   VIRTIO_NET_HASH_REPORT_UDPv4 - 1,
4198                                   ON_OFF_AUTO_AUTO),
4199     DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-ipv6", VirtIONet,
4200                                   rss_data.specified_hash_types,
4201                                   VIRTIO_NET_HASH_REPORT_IPv6 - 1,
4202                                   ON_OFF_AUTO_AUTO),
4203     DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-tcp6", VirtIONet,
4204                                   rss_data.specified_hash_types,
4205                                   VIRTIO_NET_HASH_REPORT_TCPv6 - 1,
4206                                   ON_OFF_AUTO_AUTO),
4207     DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-udp6", VirtIONet,
4208                                   rss_data.specified_hash_types,
4209                                   VIRTIO_NET_HASH_REPORT_UDPv6 - 1,
4210                                   ON_OFF_AUTO_AUTO),
4211     DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-ipv6ex", VirtIONet,
4212                                   rss_data.specified_hash_types,
4213                                   VIRTIO_NET_HASH_REPORT_IPv6_EX - 1,
4214                                   ON_OFF_AUTO_AUTO),
4215     DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-tcp6ex", VirtIONet,
4216                                   rss_data.specified_hash_types,
4217                                   VIRTIO_NET_HASH_REPORT_TCPv6_EX - 1,
4218                                   ON_OFF_AUTO_AUTO),
4219     DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-udp6ex", VirtIONet,
4220                                   rss_data.specified_hash_types,
4221                                   VIRTIO_NET_HASH_REPORT_UDPv6_EX - 1,
4222                                   ON_OFF_AUTO_AUTO),
4223 };
4224 
4225 static void virtio_net_class_init(ObjectClass *klass, const void *data)
4226 {
4227     DeviceClass *dc = DEVICE_CLASS(klass);
4228     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
4229 
4230     device_class_set_props(dc, virtio_net_properties);
4231     dc->vmsd = &vmstate_virtio_net;
4232     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
4233     vdc->realize = virtio_net_device_realize;
4234     vdc->unrealize = virtio_net_device_unrealize;
4235     vdc->get_config = virtio_net_get_config;
4236     vdc->set_config = virtio_net_set_config;
4237     vdc->get_features = virtio_net_get_features;
4238     vdc->set_features = virtio_net_set_features;
4239     vdc->bad_features = virtio_net_bad_features;
4240     vdc->reset = virtio_net_reset;
4241     vdc->queue_reset = virtio_net_queue_reset;
4242     vdc->queue_enable = virtio_net_queue_enable;
4243     vdc->set_status = virtio_net_set_status;
4244     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
4245     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
4246     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
4247     vdc->pre_load_queues = virtio_net_pre_load_queues;
4248     vdc->post_load = virtio_net_post_load_virtio;
4249     vdc->vmsd = &vmstate_virtio_net_device;
4250     vdc->primary_unplug_pending = primary_unplug_pending;
4251     vdc->get_vhost = virtio_net_get_vhost;
4252     vdc->toggle_device_iotlb = vhost_toggle_device_iotlb;
4253 }
4254 
4255 static const TypeInfo virtio_net_info = {
4256     .name = TYPE_VIRTIO_NET,
4257     .parent = TYPE_VIRTIO_DEVICE,
4258     .instance_size = sizeof(VirtIONet),
4259     .instance_init = virtio_net_instance_init,
4260     .class_init = virtio_net_class_init,
4261 };
4262 
4263 static void virtio_register_types(void)
4264 {
4265     type_register_static(&virtio_net_info);
4266 }
4267 
4268 type_init(virtio_register_types)
4269