xref: /openbmc/qemu/hw/net/virtio-net.c (revision 854ee02b22220377f3fa3806adf7e0718c3a5c5a)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/log.h"
18 #include "qemu/main-loop.h"
19 #include "qemu/module.h"
20 #include "hw/virtio/virtio.h"
21 #include "net/net.h"
22 #include "net/checksum.h"
23 #include "net/tap.h"
24 #include "qemu/error-report.h"
25 #include "qemu/timer.h"
26 #include "qemu/option.h"
27 #include "qemu/option_int.h"
28 #include "qemu/config-file.h"
29 #include "qobject/qdict.h"
30 #include "hw/virtio/virtio-net.h"
31 #include "net/vhost_net.h"
32 #include "net/announce.h"
33 #include "hw/virtio/virtio-bus.h"
34 #include "qapi/error.h"
35 #include "qapi/qapi-events-net.h"
36 #include "hw/qdev-properties.h"
37 #include "qapi/qapi-types-migration.h"
38 #include "qapi/qapi-events-migration.h"
39 #include "hw/virtio/virtio-access.h"
40 #include "migration/misc.h"
41 #include "standard-headers/linux/ethtool.h"
42 #include "system/system.h"
43 #include "system/replay.h"
44 #include "trace.h"
45 #include "monitor/qdev.h"
46 #include "monitor/monitor.h"
47 #include "hw/pci/pci_device.h"
48 #include "net_rx_pkt.h"
49 #include "hw/virtio/vhost.h"
50 #include "system/qtest.h"
51 
52 #define VIRTIO_NET_VM_VERSION    11
53 
54 /* previously fixed value */
55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
57 
58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
61 
62 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
63 
64 #define VIRTIO_NET_TCP_FLAG         0x3F
65 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
66 
67 /* IPv4 max payload, 16 bits in the header */
68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
70 
71 /* header length value in ip header without option */
72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
73 
74 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
76 
77 /* Purge coalesced packets timer interval, This value affects the performance
78    a lot, and should be tuned carefully, '300000'(300us) is the recommended
79    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
80    tso/gso/gro 'off'. */
81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
82 
83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
84                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
85                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
86                                          VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
87                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
88                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
89                                          VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
90                                          VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
91                                          VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
92 
93 static const VirtIOFeature feature_sizes[] = {
94     {.flags = 1ULL << VIRTIO_NET_F_MAC,
95      .end = endof(struct virtio_net_config, mac)},
96     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
97      .end = endof(struct virtio_net_config, status)},
98     {.flags = 1ULL << VIRTIO_NET_F_MQ,
99      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
100     {.flags = 1ULL << VIRTIO_NET_F_MTU,
101      .end = endof(struct virtio_net_config, mtu)},
102     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
103      .end = endof(struct virtio_net_config, duplex)},
104     {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
105      .end = endof(struct virtio_net_config, supported_hash_types)},
106     {}
107 };
108 
109 static const VirtIOConfigSizeParams cfg_size_params = {
110     .min_size = endof(struct virtio_net_config, mac),
111     .max_size = sizeof(struct virtio_net_config),
112     .feature_sizes = feature_sizes
113 };
114 
115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
116 {
117     VirtIONet *n = qemu_get_nic_opaque(nc);
118 
119     return &n->vqs[nc->queue_index];
120 }
121 
122 static int vq2q(int queue_index)
123 {
124     return queue_index / 2;
125 }
126 
127 static void flush_or_purge_queued_packets(NetClientState *nc)
128 {
129     if (!nc->peer) {
130         return;
131     }
132 
133     qemu_flush_or_purge_queued_packets(nc->peer, true);
134     assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
135 }
136 
137 /* TODO
138  * - we could suppress RX interrupt if we were so inclined.
139  */
140 
141 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
142 {
143     VirtIONet *n = VIRTIO_NET(vdev);
144     struct virtio_net_config netcfg;
145     NetClientState *nc = qemu_get_queue(n->nic);
146     static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
147 
148     int ret = 0;
149     memset(&netcfg, 0 , sizeof(struct virtio_net_config));
150     virtio_stw_p(vdev, &netcfg.status, n->status);
151     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
152     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
153     memcpy(netcfg.mac, n->mac, ETH_ALEN);
154     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
155     netcfg.duplex = n->net_conf.duplex;
156     netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
157     virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
158                  virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
159                  VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
160     virtio_stl_p(vdev, &netcfg.supported_hash_types,
161                  VIRTIO_NET_RSS_SUPPORTED_HASHES);
162     memcpy(config, &netcfg, n->config_size);
163 
164     /*
165      * Is this VDPA? No peer means not VDPA: there's no way to
166      * disconnect/reconnect a VDPA peer.
167      */
168     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
169         ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
170                                    n->config_size);
171         if (ret == -1) {
172             return;
173         }
174 
175         /*
176          * Some NIC/kernel combinations present 0 as the mac address.  As that
177          * is not a legal address, try to proceed with the address from the
178          * QEMU command line in the hope that the address has been configured
179          * correctly elsewhere - just not reported by the device.
180          */
181         if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
182             info_report("Zero hardware mac address detected. Ignoring.");
183             memcpy(netcfg.mac, n->mac, ETH_ALEN);
184         }
185 
186         netcfg.status |= virtio_tswap16(vdev,
187                                         n->status & VIRTIO_NET_S_ANNOUNCE);
188         memcpy(config, &netcfg, n->config_size);
189     }
190 }
191 
192 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
193 {
194     VirtIONet *n = VIRTIO_NET(vdev);
195     struct virtio_net_config netcfg = {};
196     NetClientState *nc = qemu_get_queue(n->nic);
197 
198     memcpy(&netcfg, config, n->config_size);
199 
200     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
201         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
202         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
203         memcpy(n->mac, netcfg.mac, ETH_ALEN);
204         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
205     }
206 
207     /*
208      * Is this VDPA? No peer means not VDPA: there's no way to
209      * disconnect/reconnect a VDPA peer.
210      */
211     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
212         vhost_net_set_config(get_vhost_net(nc->peer),
213                              (uint8_t *)&netcfg, 0, n->config_size,
214                              VHOST_SET_CONFIG_TYPE_FRONTEND);
215       }
216 }
217 
218 static bool virtio_net_started(VirtIONet *n, uint8_t status)
219 {
220     VirtIODevice *vdev = VIRTIO_DEVICE(n);
221     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
222         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
223 }
224 
225 static void virtio_net_announce_notify(VirtIONet *net)
226 {
227     VirtIODevice *vdev = VIRTIO_DEVICE(net);
228     trace_virtio_net_announce_notify();
229 
230     net->status |= VIRTIO_NET_S_ANNOUNCE;
231     virtio_notify_config(vdev);
232 }
233 
234 static void virtio_net_announce_timer(void *opaque)
235 {
236     VirtIONet *n = opaque;
237     trace_virtio_net_announce_timer(n->announce_timer.round);
238 
239     n->announce_timer.round--;
240     virtio_net_announce_notify(n);
241 }
242 
243 static void virtio_net_announce(NetClientState *nc)
244 {
245     VirtIONet *n = qemu_get_nic_opaque(nc);
246     VirtIODevice *vdev = VIRTIO_DEVICE(n);
247 
248     /*
249      * Make sure the virtio migration announcement timer isn't running
250      * If it is, let it trigger announcement so that we do not cause
251      * confusion.
252      */
253     if (n->announce_timer.round) {
254         return;
255     }
256 
257     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
258         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
259             virtio_net_announce_notify(n);
260     }
261 }
262 
263 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
264 {
265     VirtIODevice *vdev = VIRTIO_DEVICE(n);
266     NetClientState *nc = qemu_get_queue(n->nic);
267     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
268     int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
269               n->max_ncs - n->max_queue_pairs : 0;
270 
271     if (!get_vhost_net(nc->peer)) {
272         return;
273     }
274 
275     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
276         !!n->vhost_started) {
277         return;
278     }
279     if (!n->vhost_started) {
280         int r, i;
281 
282         if (n->needs_vnet_hdr_swap) {
283             error_report("backend does not support %s vnet headers; "
284                          "falling back on userspace virtio",
285                          virtio_is_big_endian(vdev) ? "BE" : "LE");
286             return;
287         }
288 
289         /* Any packets outstanding? Purge them to avoid touching rings
290          * when vhost is running.
291          */
292         for (i = 0;  i < queue_pairs; i++) {
293             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
294 
295             /* Purge both directions: TX and RX. */
296             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
297             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
298         }
299 
300         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
301             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
302             if (r < 0) {
303                 error_report("%uBytes MTU not supported by the backend",
304                              n->net_conf.mtu);
305 
306                 return;
307             }
308         }
309 
310         n->vhost_started = 1;
311         r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
312         if (r < 0) {
313             error_report("unable to start vhost net: %d: "
314                          "falling back on userspace virtio", -r);
315             n->vhost_started = 0;
316         }
317     } else {
318         vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
319         n->vhost_started = 0;
320     }
321 }
322 
323 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
324                                           NetClientState *peer,
325                                           bool enable)
326 {
327     if (virtio_is_big_endian(vdev)) {
328         return qemu_set_vnet_be(peer, enable);
329     } else {
330         return qemu_set_vnet_le(peer, enable);
331     }
332 }
333 
334 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
335                                        int queue_pairs, bool enable)
336 {
337     int i;
338 
339     for (i = 0; i < queue_pairs; i++) {
340         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
341             enable) {
342             while (--i >= 0) {
343                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
344             }
345 
346             return true;
347         }
348     }
349 
350     return false;
351 }
352 
353 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
354 {
355     VirtIODevice *vdev = VIRTIO_DEVICE(n);
356     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
357 
358     if (virtio_net_started(n, status)) {
359         /* Before using the device, we tell the network backend about the
360          * endianness to use when parsing vnet headers. If the backend
361          * can't do it, we fallback onto fixing the headers in the core
362          * virtio-net code.
363          */
364         n->needs_vnet_hdr_swap = n->has_vnet_hdr &&
365                                  virtio_net_set_vnet_endian(vdev, n->nic->ncs,
366                                                             queue_pairs, true);
367     } else if (virtio_net_started(n, vdev->status)) {
368         /* After using the device, we need to reset the network backend to
369          * the default (guest native endianness), otherwise the guest may
370          * lose network connectivity if it is rebooted into a different
371          * endianness.
372          */
373         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
374     }
375 }
376 
377 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
378 {
379     unsigned int dropped = virtqueue_drop_all(vq);
380     if (dropped) {
381         virtio_notify(vdev, vq);
382     }
383 }
384 
385 static int virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
386 {
387     VirtIONet *n = VIRTIO_NET(vdev);
388     VirtIONetQueue *q;
389     int i;
390     uint8_t queue_status;
391 
392     virtio_net_vnet_endian_status(n, status);
393     virtio_net_vhost_status(n, status);
394 
395     for (i = 0; i < n->max_queue_pairs; i++) {
396         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
397         bool queue_started;
398         q = &n->vqs[i];
399 
400         if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
401             queue_status = 0;
402         } else {
403             queue_status = status;
404         }
405         queue_started =
406             virtio_net_started(n, queue_status) && !n->vhost_started;
407 
408         if (queue_started) {
409             qemu_flush_queued_packets(ncs);
410         }
411 
412         if (!q->tx_waiting) {
413             continue;
414         }
415 
416         if (queue_started) {
417             if (q->tx_timer) {
418                 timer_mod(q->tx_timer,
419                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
420             } else {
421                 replay_bh_schedule_event(q->tx_bh);
422             }
423         } else {
424             if (q->tx_timer) {
425                 timer_del(q->tx_timer);
426             } else {
427                 qemu_bh_cancel(q->tx_bh);
428             }
429             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
430                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
431                 vdev->vm_running) {
432                 /* if tx is waiting we are likely have some packets in tx queue
433                  * and disabled notification */
434                 q->tx_waiting = 0;
435                 virtio_queue_set_notification(q->tx_vq, 1);
436                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
437             }
438         }
439     }
440     return 0;
441 }
442 
443 static void virtio_net_set_link_status(NetClientState *nc)
444 {
445     VirtIONet *n = qemu_get_nic_opaque(nc);
446     VirtIODevice *vdev = VIRTIO_DEVICE(n);
447     uint16_t old_status = n->status;
448 
449     if (nc->link_down)
450         n->status &= ~VIRTIO_NET_S_LINK_UP;
451     else
452         n->status |= VIRTIO_NET_S_LINK_UP;
453 
454     if (n->status != old_status)
455         virtio_notify_config(vdev);
456 
457     virtio_net_set_status(vdev, vdev->status);
458 }
459 
460 static void rxfilter_notify(NetClientState *nc)
461 {
462     VirtIONet *n = qemu_get_nic_opaque(nc);
463 
464     if (nc->rxfilter_notify_enabled) {
465         char *path = object_get_canonical_path(OBJECT(n->qdev));
466         qapi_event_send_nic_rx_filter_changed(n->netclient_name, path);
467         g_free(path);
468 
469         /* disable event notification to avoid events flooding */
470         nc->rxfilter_notify_enabled = 0;
471     }
472 }
473 
474 static intList *get_vlan_table(VirtIONet *n)
475 {
476     intList *list;
477     int i, j;
478 
479     list = NULL;
480     for (i = 0; i < MAX_VLAN >> 5; i++) {
481         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
482             if (n->vlans[i] & (1U << j)) {
483                 QAPI_LIST_PREPEND(list, (i << 5) + j);
484             }
485         }
486     }
487 
488     return list;
489 }
490 
491 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
492 {
493     VirtIONet *n = qemu_get_nic_opaque(nc);
494     VirtIODevice *vdev = VIRTIO_DEVICE(n);
495     RxFilterInfo *info;
496     strList *str_list;
497     int i;
498 
499     info = g_malloc0(sizeof(*info));
500     info->name = g_strdup(nc->name);
501     info->promiscuous = n->promisc;
502 
503     if (n->nouni) {
504         info->unicast = RX_STATE_NONE;
505     } else if (n->alluni) {
506         info->unicast = RX_STATE_ALL;
507     } else {
508         info->unicast = RX_STATE_NORMAL;
509     }
510 
511     if (n->nomulti) {
512         info->multicast = RX_STATE_NONE;
513     } else if (n->allmulti) {
514         info->multicast = RX_STATE_ALL;
515     } else {
516         info->multicast = RX_STATE_NORMAL;
517     }
518 
519     info->broadcast_allowed = n->nobcast;
520     info->multicast_overflow = n->mac_table.multi_overflow;
521     info->unicast_overflow = n->mac_table.uni_overflow;
522 
523     info->main_mac = qemu_mac_strdup_printf(n->mac);
524 
525     str_list = NULL;
526     for (i = 0; i < n->mac_table.first_multi; i++) {
527         QAPI_LIST_PREPEND(str_list,
528                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
529     }
530     info->unicast_table = str_list;
531 
532     str_list = NULL;
533     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
534         QAPI_LIST_PREPEND(str_list,
535                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
536     }
537     info->multicast_table = str_list;
538     info->vlan_table = get_vlan_table(n);
539 
540     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
541         info->vlan = RX_STATE_ALL;
542     } else if (!info->vlan_table) {
543         info->vlan = RX_STATE_NONE;
544     } else {
545         info->vlan = RX_STATE_NORMAL;
546     }
547 
548     /* enable event notification after query */
549     nc->rxfilter_notify_enabled = 1;
550 
551     return info;
552 }
553 
554 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
555 {
556     VirtIONet *n = VIRTIO_NET(vdev);
557     NetClientState *nc;
558 
559     /* validate queue_index and skip for cvq */
560     if (queue_index >= n->max_queue_pairs * 2) {
561         return;
562     }
563 
564     nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
565 
566     if (!nc->peer) {
567         return;
568     }
569 
570     if (get_vhost_net(nc->peer) &&
571         nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
572         vhost_net_virtqueue_reset(vdev, nc, queue_index);
573     }
574 
575     flush_or_purge_queued_packets(nc);
576 }
577 
578 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
579 {
580     VirtIONet *n = VIRTIO_NET(vdev);
581     NetClientState *nc;
582     int r;
583 
584     /* validate queue_index and skip for cvq */
585     if (queue_index >= n->max_queue_pairs * 2) {
586         return;
587     }
588 
589     nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
590 
591     if (!nc->peer || !vdev->vhost_started) {
592         return;
593     }
594 
595     if (get_vhost_net(nc->peer) &&
596         nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
597         r = vhost_net_virtqueue_restart(vdev, nc, queue_index);
598         if (r < 0) {
599             error_report("unable to restart vhost net virtqueue: %d, "
600                             "when resetting the queue", queue_index);
601         }
602     }
603 }
604 
605 static void peer_test_vnet_hdr(VirtIONet *n)
606 {
607     NetClientState *nc = qemu_get_queue(n->nic);
608     if (!nc->peer) {
609         return;
610     }
611 
612     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
613 }
614 
615 static int peer_has_vnet_hdr(VirtIONet *n)
616 {
617     return n->has_vnet_hdr;
618 }
619 
620 static int peer_has_ufo(VirtIONet *n)
621 {
622     if (!peer_has_vnet_hdr(n))
623         return 0;
624 
625     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
626 
627     return n->has_ufo;
628 }
629 
630 static int peer_has_uso(VirtIONet *n)
631 {
632     if (!peer_has_vnet_hdr(n)) {
633         return 0;
634     }
635 
636     return qemu_has_uso(qemu_get_queue(n->nic)->peer);
637 }
638 
639 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
640                                        int version_1, int hash_report)
641 {
642     int i;
643     NetClientState *nc;
644 
645     n->mergeable_rx_bufs = mergeable_rx_bufs;
646 
647     if (version_1) {
648         n->guest_hdr_len = hash_report ?
649             sizeof(struct virtio_net_hdr_v1_hash) :
650             sizeof(struct virtio_net_hdr_mrg_rxbuf);
651         n->rss_data.populate_hash = !!hash_report;
652     } else {
653         n->guest_hdr_len = n->mergeable_rx_bufs ?
654             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
655             sizeof(struct virtio_net_hdr);
656         n->rss_data.populate_hash = false;
657     }
658 
659     for (i = 0; i < n->max_queue_pairs; i++) {
660         nc = qemu_get_subqueue(n->nic, i);
661 
662         if (peer_has_vnet_hdr(n) &&
663             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
664             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
665             n->host_hdr_len = n->guest_hdr_len;
666         }
667     }
668 }
669 
670 static int virtio_net_max_tx_queue_size(VirtIONet *n)
671 {
672     NetClientState *peer = n->nic_conf.peers.ncs[0];
673     struct vhost_net *net;
674 
675     if (!peer) {
676         goto default_value;
677     }
678 
679     net = get_vhost_net(peer);
680 
681     if (!net || !net->max_tx_queue_size) {
682         goto default_value;
683     }
684 
685     return net->max_tx_queue_size;
686 
687 default_value:
688     return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
689 }
690 
691 static int peer_attach(VirtIONet *n, int index)
692 {
693     NetClientState *nc = qemu_get_subqueue(n->nic, index);
694     struct vhost_net *net;
695 
696     if (!nc->peer) {
697         return 0;
698     }
699 
700     net = get_vhost_net(nc->peer);
701     if (net && net->is_vhost_user) {
702         vhost_net_set_vring_enable(nc->peer, 1);
703     }
704 
705     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
706         return 0;
707     }
708 
709     if (n->max_queue_pairs == 1) {
710         return 0;
711     }
712 
713     return tap_enable(nc->peer);
714 }
715 
716 static int peer_detach(VirtIONet *n, int index)
717 {
718     NetClientState *nc = qemu_get_subqueue(n->nic, index);
719     struct vhost_net *net;
720 
721     if (!nc->peer) {
722         return 0;
723     }
724 
725     net = get_vhost_net(nc->peer);
726     if (net && net->is_vhost_user) {
727         vhost_net_set_vring_enable(nc->peer, 0);
728     }
729 
730     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
731         return 0;
732     }
733 
734     return tap_disable(nc->peer);
735 }
736 
737 static void virtio_net_set_queue_pairs(VirtIONet *n)
738 {
739     int i;
740     int r;
741 
742     if (n->nic->peer_deleted) {
743         return;
744     }
745 
746     for (i = 0; i < n->max_queue_pairs; i++) {
747         if (i < n->curr_queue_pairs) {
748             r = peer_attach(n, i);
749             assert(!r);
750         } else {
751             r = peer_detach(n, i);
752             assert(!r);
753         }
754     }
755 }
756 
757 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
758 
759 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
760                                         Error **errp)
761 {
762     VirtIONet *n = VIRTIO_NET(vdev);
763     NetClientState *nc = qemu_get_queue(n->nic);
764 
765     /* Firstly sync all virtio-net possible supported features */
766     features |= n->host_features;
767 
768     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
769 
770     if (!peer_has_vnet_hdr(n)) {
771         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
772         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
773         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
774         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
775 
776         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
777         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
778         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
779         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
780 
781         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
782         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
783         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
784 
785         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
786     }
787 
788     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
789         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
790         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
791     }
792 
793     if (!peer_has_uso(n)) {
794         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
795         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
796         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
797     }
798 
799     if (!get_vhost_net(nc->peer)) {
800         return features;
801     }
802 
803     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
804         virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
805     }
806     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
807     vdev->backend_features = features;
808 
809     if (n->mtu_bypass_backend &&
810             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
811         features |= (1ULL << VIRTIO_NET_F_MTU);
812     }
813 
814     /*
815      * Since GUEST_ANNOUNCE is emulated the feature bit could be set without
816      * enabled. This happens in the vDPA case.
817      *
818      * Make sure the feature set is not incoherent, as the driver could refuse
819      * to start.
820      *
821      * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes,
822      * helping guest to notify the new location with vDPA devices that does not
823      * support it.
824      */
825     if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) {
826         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE);
827     }
828 
829     return features;
830 }
831 
832 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
833 {
834     uint64_t features = 0;
835 
836     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
837      * but also these: */
838     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
839     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
840     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
841     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
842     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
843 
844     return features;
845 }
846 
847 static void virtio_net_apply_guest_offloads(VirtIONet *n)
848 {
849     qemu_set_offload(qemu_get_queue(n->nic)->peer,
850             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
851             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
852             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
853             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
854             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
855             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
856             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)));
857 }
858 
859 static uint64_t virtio_net_guest_offloads_by_features(uint64_t features)
860 {
861     static const uint64_t guest_offloads_mask =
862         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
863         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
864         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
865         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
866         (1ULL << VIRTIO_NET_F_GUEST_UFO)  |
867         (1ULL << VIRTIO_NET_F_GUEST_USO4) |
868         (1ULL << VIRTIO_NET_F_GUEST_USO6);
869 
870     return guest_offloads_mask & features;
871 }
872 
873 uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n)
874 {
875     VirtIODevice *vdev = VIRTIO_DEVICE(n);
876     return virtio_net_guest_offloads_by_features(vdev->guest_features);
877 }
878 
879 typedef struct {
880     VirtIONet *n;
881     DeviceState *dev;
882 } FailoverDevice;
883 
884 /**
885  * Set the failover primary device
886  *
887  * @opaque: FailoverId to setup
888  * @opts: opts for device we are handling
889  * @errp: returns an error if this function fails
890  */
891 static int failover_set_primary(DeviceState *dev, void *opaque)
892 {
893     FailoverDevice *fdev = opaque;
894     PCIDevice *pci_dev = (PCIDevice *)
895         object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
896 
897     if (!pci_dev) {
898         return 0;
899     }
900 
901     if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
902         fdev->dev = dev;
903         return 1;
904     }
905 
906     return 0;
907 }
908 
909 /**
910  * Find the primary device for this failover virtio-net
911  *
912  * @n: VirtIONet device
913  * @errp: returns an error if this function fails
914  */
915 static DeviceState *failover_find_primary_device(VirtIONet *n)
916 {
917     FailoverDevice fdev = {
918         .n = n,
919     };
920 
921     qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
922                        NULL, NULL, &fdev);
923     return fdev.dev;
924 }
925 
926 static void failover_add_primary(VirtIONet *n, Error **errp)
927 {
928     Error *err = NULL;
929     DeviceState *dev = failover_find_primary_device(n);
930 
931     if (dev) {
932         return;
933     }
934 
935     if (!n->primary_opts) {
936         error_setg(errp, "Primary device not found");
937         error_append_hint(errp, "Virtio-net failover will not work. Make "
938                           "sure primary device has parameter"
939                           " failover_pair_id=%s\n", n->netclient_name);
940         return;
941     }
942 
943     dev = qdev_device_add_from_qdict(n->primary_opts,
944                                      n->primary_opts_from_json,
945                                      &err);
946     if (err) {
947         qobject_unref(n->primary_opts);
948         n->primary_opts = NULL;
949     } else {
950         object_unref(OBJECT(dev));
951     }
952     error_propagate(errp, err);
953 }
954 
955 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
956 {
957     VirtIONet *n = VIRTIO_NET(vdev);
958     Error *err = NULL;
959     int i;
960 
961     if (n->mtu_bypass_backend &&
962             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
963         features &= ~(1ULL << VIRTIO_NET_F_MTU);
964     }
965 
966     virtio_net_set_multiqueue(n,
967                               virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
968                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
969 
970     virtio_net_set_mrg_rx_bufs(n,
971                                virtio_has_feature(features,
972                                                   VIRTIO_NET_F_MRG_RXBUF),
973                                virtio_has_feature(features,
974                                                   VIRTIO_F_VERSION_1),
975                                virtio_has_feature(features,
976                                                   VIRTIO_NET_F_HASH_REPORT));
977 
978     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
979         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
980     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
981         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
982     n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
983 
984     if (n->has_vnet_hdr) {
985         n->curr_guest_offloads =
986             virtio_net_guest_offloads_by_features(features);
987         virtio_net_apply_guest_offloads(n);
988     }
989 
990     for (i = 0;  i < n->max_queue_pairs; i++) {
991         NetClientState *nc = qemu_get_subqueue(n->nic, i);
992 
993         if (!get_vhost_net(nc->peer)) {
994             continue;
995         }
996         vhost_net_ack_features(get_vhost_net(nc->peer), features);
997 
998         /*
999          * keep acked_features in NetVhostUserState up-to-date so it
1000          * can't miss any features configured by guest virtio driver.
1001          */
1002         vhost_net_save_acked_features(nc->peer);
1003     }
1004 
1005     if (!virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
1006         memset(n->vlans, 0xff, MAX_VLAN >> 3);
1007     }
1008 
1009     if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
1010         qapi_event_send_failover_negotiated(n->netclient_name);
1011         qatomic_set(&n->failover_primary_hidden, false);
1012         failover_add_primary(n, &err);
1013         if (err) {
1014             if (!qtest_enabled()) {
1015                 warn_report_err(err);
1016             } else {
1017                 error_free(err);
1018             }
1019         }
1020     }
1021 }
1022 
1023 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
1024                                      struct iovec *iov, unsigned int iov_cnt)
1025 {
1026     uint8_t on;
1027     size_t s;
1028     NetClientState *nc = qemu_get_queue(n->nic);
1029 
1030     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
1031     if (s != sizeof(on)) {
1032         return VIRTIO_NET_ERR;
1033     }
1034 
1035     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
1036         n->promisc = on;
1037     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
1038         n->allmulti = on;
1039     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
1040         n->alluni = on;
1041     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
1042         n->nomulti = on;
1043     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
1044         n->nouni = on;
1045     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
1046         n->nobcast = on;
1047     } else {
1048         return VIRTIO_NET_ERR;
1049     }
1050 
1051     rxfilter_notify(nc);
1052 
1053     return VIRTIO_NET_OK;
1054 }
1055 
1056 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
1057                                      struct iovec *iov, unsigned int iov_cnt)
1058 {
1059     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1060     uint64_t offloads;
1061     size_t s;
1062 
1063     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1064         return VIRTIO_NET_ERR;
1065     }
1066 
1067     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
1068     if (s != sizeof(offloads)) {
1069         return VIRTIO_NET_ERR;
1070     }
1071 
1072     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1073         uint64_t supported_offloads;
1074 
1075         offloads = virtio_ldq_p(vdev, &offloads);
1076 
1077         if (!n->has_vnet_hdr) {
1078             return VIRTIO_NET_ERR;
1079         }
1080 
1081         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1082             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1083         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1084             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1085         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1086 
1087         supported_offloads = virtio_net_supported_guest_offloads(n);
1088         if (offloads & ~supported_offloads) {
1089             return VIRTIO_NET_ERR;
1090         }
1091 
1092         n->curr_guest_offloads = offloads;
1093         virtio_net_apply_guest_offloads(n);
1094 
1095         return VIRTIO_NET_OK;
1096     } else {
1097         return VIRTIO_NET_ERR;
1098     }
1099 }
1100 
1101 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1102                                  struct iovec *iov, unsigned int iov_cnt)
1103 {
1104     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1105     struct virtio_net_ctrl_mac mac_data;
1106     size_t s;
1107     NetClientState *nc = qemu_get_queue(n->nic);
1108 
1109     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1110         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1111             return VIRTIO_NET_ERR;
1112         }
1113         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1114         assert(s == sizeof(n->mac));
1115         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1116         rxfilter_notify(nc);
1117 
1118         return VIRTIO_NET_OK;
1119     }
1120 
1121     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1122         return VIRTIO_NET_ERR;
1123     }
1124 
1125     int in_use = 0;
1126     int first_multi = 0;
1127     uint8_t uni_overflow = 0;
1128     uint8_t multi_overflow = 0;
1129     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1130 
1131     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1132                    sizeof(mac_data.entries));
1133     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1134     if (s != sizeof(mac_data.entries)) {
1135         goto error;
1136     }
1137     iov_discard_front(&iov, &iov_cnt, s);
1138 
1139     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1140         goto error;
1141     }
1142 
1143     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1144         s = iov_to_buf(iov, iov_cnt, 0, macs,
1145                        mac_data.entries * ETH_ALEN);
1146         if (s != mac_data.entries * ETH_ALEN) {
1147             goto error;
1148         }
1149         in_use += mac_data.entries;
1150     } else {
1151         uni_overflow = 1;
1152     }
1153 
1154     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1155 
1156     first_multi = in_use;
1157 
1158     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1159                    sizeof(mac_data.entries));
1160     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1161     if (s != sizeof(mac_data.entries)) {
1162         goto error;
1163     }
1164 
1165     iov_discard_front(&iov, &iov_cnt, s);
1166 
1167     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1168         goto error;
1169     }
1170 
1171     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1172         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1173                        mac_data.entries * ETH_ALEN);
1174         if (s != mac_data.entries * ETH_ALEN) {
1175             goto error;
1176         }
1177         in_use += mac_data.entries;
1178     } else {
1179         multi_overflow = 1;
1180     }
1181 
1182     n->mac_table.in_use = in_use;
1183     n->mac_table.first_multi = first_multi;
1184     n->mac_table.uni_overflow = uni_overflow;
1185     n->mac_table.multi_overflow = multi_overflow;
1186     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1187     g_free(macs);
1188     rxfilter_notify(nc);
1189 
1190     return VIRTIO_NET_OK;
1191 
1192 error:
1193     g_free(macs);
1194     return VIRTIO_NET_ERR;
1195 }
1196 
1197 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1198                                         struct iovec *iov, unsigned int iov_cnt)
1199 {
1200     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1201     uint16_t vid;
1202     size_t s;
1203     NetClientState *nc = qemu_get_queue(n->nic);
1204 
1205     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1206     vid = virtio_lduw_p(vdev, &vid);
1207     if (s != sizeof(vid)) {
1208         return VIRTIO_NET_ERR;
1209     }
1210 
1211     if (vid >= MAX_VLAN)
1212         return VIRTIO_NET_ERR;
1213 
1214     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1215         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1216     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1217         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1218     else
1219         return VIRTIO_NET_ERR;
1220 
1221     rxfilter_notify(nc);
1222 
1223     return VIRTIO_NET_OK;
1224 }
1225 
1226 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1227                                       struct iovec *iov, unsigned int iov_cnt)
1228 {
1229     trace_virtio_net_handle_announce(n->announce_timer.round);
1230     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1231         n->status & VIRTIO_NET_S_ANNOUNCE) {
1232         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1233         if (n->announce_timer.round) {
1234             qemu_announce_timer_step(&n->announce_timer);
1235         }
1236         return VIRTIO_NET_OK;
1237     } else {
1238         return VIRTIO_NET_ERR;
1239     }
1240 }
1241 
1242 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1243 {
1244     NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1245     if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1246         return false;
1247     }
1248 
1249     trace_virtio_net_rss_attach_ebpf(nic, prog_fd);
1250     return nc->info->set_steering_ebpf(nc, prog_fd);
1251 }
1252 
1253 static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1254                                    struct EBPFRSSConfig *config)
1255 {
1256     config->redirect = data->redirect;
1257     config->populate_hash = data->populate_hash;
1258     config->hash_types = data->hash_types;
1259     config->indirections_len = data->indirections_len;
1260     config->default_queue = data->default_queue;
1261 }
1262 
1263 static bool virtio_net_attach_ebpf_rss(VirtIONet *n)
1264 {
1265     struct EBPFRSSConfig config = {};
1266 
1267     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1268         return false;
1269     }
1270 
1271     rss_data_to_rss_config(&n->rss_data, &config);
1272 
1273     if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1274                           n->rss_data.indirections_table, n->rss_data.key,
1275                           NULL)) {
1276         return false;
1277     }
1278 
1279     if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1280         return false;
1281     }
1282 
1283     return true;
1284 }
1285 
1286 static void virtio_net_detach_ebpf_rss(VirtIONet *n)
1287 {
1288     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1289 }
1290 
1291 static void virtio_net_commit_rss_config(VirtIONet *n)
1292 {
1293     if (n->rss_data.enabled) {
1294         n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
1295         if (n->rss_data.populate_hash) {
1296             virtio_net_detach_ebpf_rss(n);
1297         } else if (!virtio_net_attach_ebpf_rss(n)) {
1298             if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1299                 warn_report("Can't load eBPF RSS for vhost");
1300             } else {
1301                 warn_report("Can't load eBPF RSS - fallback to software RSS");
1302                 n->rss_data.enabled_software_rss = true;
1303             }
1304         }
1305 
1306         trace_virtio_net_rss_enable(n,
1307                                     n->rss_data.hash_types,
1308                                     n->rss_data.indirections_len,
1309                                     sizeof(n->rss_data.key));
1310     } else {
1311         virtio_net_detach_ebpf_rss(n);
1312         trace_virtio_net_rss_disable(n);
1313     }
1314 }
1315 
1316 static void virtio_net_disable_rss(VirtIONet *n)
1317 {
1318     if (!n->rss_data.enabled) {
1319         return;
1320     }
1321 
1322     n->rss_data.enabled = false;
1323     virtio_net_commit_rss_config(n);
1324 }
1325 
1326 static bool virtio_net_load_ebpf_fds(VirtIONet *n, Error **errp)
1327 {
1328     int fds[EBPF_RSS_MAX_FDS] = { [0 ... EBPF_RSS_MAX_FDS - 1] = -1};
1329     int ret = true;
1330     int i = 0;
1331 
1332     if (n->nr_ebpf_rss_fds != EBPF_RSS_MAX_FDS) {
1333         error_setg(errp, "Expected %d file descriptors but got %d",
1334                    EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds);
1335         return false;
1336     }
1337 
1338     for (i = 0; i < n->nr_ebpf_rss_fds; i++) {
1339         fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i], errp);
1340         if (fds[i] < 0) {
1341             ret = false;
1342             goto exit;
1343         }
1344     }
1345 
1346     ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3], errp);
1347 
1348 exit:
1349     if (!ret) {
1350         for (i = 0; i < n->nr_ebpf_rss_fds && fds[i] != -1; i++) {
1351             close(fds[i]);
1352         }
1353     }
1354 
1355     return ret;
1356 }
1357 
1358 static bool virtio_net_load_ebpf(VirtIONet *n, Error **errp)
1359 {
1360     if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1361         return true;
1362     }
1363 
1364     trace_virtio_net_rss_load(n, n->nr_ebpf_rss_fds, n->ebpf_rss_fds);
1365 
1366     /*
1367      * If user explicitly gave QEMU RSS FDs to use, then
1368      * failing to use them must be considered a fatal
1369      * error. If no RSS FDs were provided, QEMU is trying
1370      * eBPF on a "best effort" basis only, so report a
1371      * warning and allow fallback to software RSS.
1372      */
1373     if (n->ebpf_rss_fds) {
1374         return virtio_net_load_ebpf_fds(n, errp);
1375     }
1376 
1377     ebpf_rss_load(&n->ebpf_rss, &error_warn);
1378     return true;
1379 }
1380 
1381 static void virtio_net_unload_ebpf(VirtIONet *n)
1382 {
1383     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1384     ebpf_rss_unload(&n->ebpf_rss);
1385 }
1386 
1387 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1388                                       struct iovec *iov,
1389                                       unsigned int iov_cnt,
1390                                       bool do_rss)
1391 {
1392     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1393     struct virtio_net_rss_config cfg;
1394     size_t s, offset = 0, size_get;
1395     uint16_t queue_pairs, i;
1396     struct {
1397         uint16_t us;
1398         uint8_t b;
1399     } QEMU_PACKED temp;
1400     const char *err_msg = "";
1401     uint32_t err_value = 0;
1402 
1403     if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1404         err_msg = "RSS is not negotiated";
1405         goto error;
1406     }
1407     if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1408         err_msg = "Hash report is not negotiated";
1409         goto error;
1410     }
1411     size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1412     s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1413     if (s != size_get) {
1414         err_msg = "Short command buffer";
1415         err_value = (uint32_t)s;
1416         goto error;
1417     }
1418     n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1419     n->rss_data.indirections_len =
1420         virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1421     if (!do_rss) {
1422         n->rss_data.indirections_len = 0;
1423     }
1424     if (n->rss_data.indirections_len >= VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1425         err_msg = "Too large indirection table";
1426         err_value = n->rss_data.indirections_len;
1427         goto error;
1428     }
1429     n->rss_data.indirections_len++;
1430     if (!is_power_of_2(n->rss_data.indirections_len)) {
1431         err_msg = "Invalid size of indirection table";
1432         err_value = n->rss_data.indirections_len;
1433         goto error;
1434     }
1435     n->rss_data.default_queue = do_rss ?
1436         virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1437     if (n->rss_data.default_queue >= n->max_queue_pairs) {
1438         err_msg = "Invalid default queue";
1439         err_value = n->rss_data.default_queue;
1440         goto error;
1441     }
1442     offset += size_get;
1443     size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1444     g_free(n->rss_data.indirections_table);
1445     n->rss_data.indirections_table = g_malloc(size_get);
1446     if (!n->rss_data.indirections_table) {
1447         err_msg = "Can't allocate indirections table";
1448         err_value = n->rss_data.indirections_len;
1449         goto error;
1450     }
1451     s = iov_to_buf(iov, iov_cnt, offset,
1452                    n->rss_data.indirections_table, size_get);
1453     if (s != size_get) {
1454         err_msg = "Short indirection table buffer";
1455         err_value = (uint32_t)s;
1456         goto error;
1457     }
1458     for (i = 0; i < n->rss_data.indirections_len; ++i) {
1459         uint16_t val = n->rss_data.indirections_table[i];
1460         n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1461     }
1462     offset += size_get;
1463     size_get = sizeof(temp);
1464     s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1465     if (s != size_get) {
1466         err_msg = "Can't get queue_pairs";
1467         err_value = (uint32_t)s;
1468         goto error;
1469     }
1470     queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1471     if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1472         err_msg = "Invalid number of queue_pairs";
1473         err_value = queue_pairs;
1474         goto error;
1475     }
1476     if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1477         err_msg = "Invalid key size";
1478         err_value = temp.b;
1479         goto error;
1480     }
1481     if (!temp.b && n->rss_data.hash_types) {
1482         err_msg = "No key provided";
1483         err_value = 0;
1484         goto error;
1485     }
1486     if (!temp.b && !n->rss_data.hash_types) {
1487         virtio_net_disable_rss(n);
1488         return queue_pairs;
1489     }
1490     offset += size_get;
1491     size_get = temp.b;
1492     s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1493     if (s != size_get) {
1494         err_msg = "Can get key buffer";
1495         err_value = (uint32_t)s;
1496         goto error;
1497     }
1498     n->rss_data.enabled = true;
1499     virtio_net_commit_rss_config(n);
1500     return queue_pairs;
1501 error:
1502     trace_virtio_net_rss_error(n, err_msg, err_value);
1503     virtio_net_disable_rss(n);
1504     return 0;
1505 }
1506 
1507 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1508                                 struct iovec *iov, unsigned int iov_cnt)
1509 {
1510     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1511     uint16_t queue_pairs;
1512     NetClientState *nc = qemu_get_queue(n->nic);
1513 
1514     virtio_net_disable_rss(n);
1515     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1516         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1517         return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1518     }
1519     if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1520         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1521     } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1522         struct virtio_net_ctrl_mq mq;
1523         size_t s;
1524         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1525             return VIRTIO_NET_ERR;
1526         }
1527         s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1528         if (s != sizeof(mq)) {
1529             return VIRTIO_NET_ERR;
1530         }
1531         queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1532 
1533     } else {
1534         return VIRTIO_NET_ERR;
1535     }
1536 
1537     if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1538         queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1539         queue_pairs > n->max_queue_pairs ||
1540         !n->multiqueue) {
1541         return VIRTIO_NET_ERR;
1542     }
1543 
1544     n->curr_queue_pairs = queue_pairs;
1545     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
1546         /*
1547          * Avoid updating the backend for a vdpa device: We're only interested
1548          * in updating the device model queues.
1549          */
1550         return VIRTIO_NET_OK;
1551     }
1552     /* stop the backend before changing the number of queue_pairs to avoid handling a
1553      * disabled queue */
1554     virtio_net_set_status(vdev, vdev->status);
1555     virtio_net_set_queue_pairs(n);
1556 
1557     return VIRTIO_NET_OK;
1558 }
1559 
1560 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1561                                   const struct iovec *in_sg, unsigned in_num,
1562                                   const struct iovec *out_sg,
1563                                   unsigned out_num)
1564 {
1565     VirtIONet *n = VIRTIO_NET(vdev);
1566     struct virtio_net_ctrl_hdr ctrl;
1567     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1568     size_t s;
1569     struct iovec *iov, *iov2;
1570 
1571     if (iov_size(in_sg, in_num) < sizeof(status) ||
1572         iov_size(out_sg, out_num) < sizeof(ctrl)) {
1573         virtio_error(vdev, "virtio-net ctrl missing headers");
1574         return 0;
1575     }
1576 
1577     iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1578     s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1579     iov_discard_front(&iov, &out_num, sizeof(ctrl));
1580     if (s != sizeof(ctrl)) {
1581         status = VIRTIO_NET_ERR;
1582     } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1583         status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1584     } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1585         status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1586     } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1587         status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1588     } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1589         status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1590     } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1591         status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1592     } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1593         status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1594     }
1595 
1596     s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1597     assert(s == sizeof(status));
1598 
1599     g_free(iov2);
1600     return sizeof(status);
1601 }
1602 
1603 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1604 {
1605     VirtQueueElement *elem;
1606 
1607     for (;;) {
1608         size_t written;
1609         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1610         if (!elem) {
1611             break;
1612         }
1613 
1614         written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1615                                              elem->out_sg, elem->out_num);
1616         if (written > 0) {
1617             virtqueue_push(vq, elem, written);
1618             virtio_notify(vdev, vq);
1619             g_free(elem);
1620         } else {
1621             virtqueue_detach_element(vq, elem, 0);
1622             g_free(elem);
1623             break;
1624         }
1625     }
1626 }
1627 
1628 /* RX */
1629 
1630 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1631 {
1632     VirtIONet *n = VIRTIO_NET(vdev);
1633     int queue_index = vq2q(virtio_get_queue_index(vq));
1634 
1635     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1636 }
1637 
1638 static bool virtio_net_can_receive(NetClientState *nc)
1639 {
1640     VirtIONet *n = qemu_get_nic_opaque(nc);
1641     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1642     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1643 
1644     if (!vdev->vm_running) {
1645         return false;
1646     }
1647 
1648     if (nc->queue_index >= n->curr_queue_pairs) {
1649         return false;
1650     }
1651 
1652     if (!virtio_queue_ready(q->rx_vq) ||
1653         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1654         return false;
1655     }
1656 
1657     return true;
1658 }
1659 
1660 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1661 {
1662     int opaque;
1663     unsigned int in_bytes;
1664     VirtIONet *n = q->n;
1665 
1666     while (virtio_queue_empty(q->rx_vq) || n->mergeable_rx_bufs) {
1667         opaque = virtqueue_get_avail_bytes(q->rx_vq, &in_bytes, NULL,
1668                                            bufsize, 0);
1669         /* Buffer is enough, disable notifiaction */
1670         if (bufsize <= in_bytes) {
1671             break;
1672         }
1673 
1674         if (virtio_queue_enable_notification_and_check(q->rx_vq, opaque)) {
1675             /* Guest has added some buffers, try again */
1676             continue;
1677         } else {
1678             return 0;
1679         }
1680     }
1681 
1682     virtio_queue_set_notification(q->rx_vq, 0);
1683 
1684     return 1;
1685 }
1686 
1687 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1688 {
1689     virtio_tswap16s(vdev, &hdr->hdr_len);
1690     virtio_tswap16s(vdev, &hdr->gso_size);
1691     virtio_tswap16s(vdev, &hdr->csum_start);
1692     virtio_tswap16s(vdev, &hdr->csum_offset);
1693 }
1694 
1695 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1696  * it never finds out that the packets don't have valid checksums.  This
1697  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1698  * fix this with Xen but it hasn't appeared in an upstream release of
1699  * dhclient yet.
1700  *
1701  * To avoid breaking existing guests, we catch udp packets and add
1702  * checksums.  This is terrible but it's better than hacking the guest
1703  * kernels.
1704  *
1705  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1706  * we should provide a mechanism to disable it to avoid polluting the host
1707  * cache.
1708  */
1709 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1710                                         uint8_t *buf, size_t size)
1711 {
1712     size_t csum_size = ETH_HLEN + sizeof(struct ip_header) +
1713                        sizeof(struct udp_header);
1714 
1715     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1716         (size >= csum_size && size < 1500) && /* normal sized MTU */
1717         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1718         (buf[23] == 17) && /* ip.protocol == UDP */
1719         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1720         net_checksum_calculate(buf, size, CSUM_UDP);
1721         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1722     }
1723 }
1724 
1725 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1726                            const void *buf, size_t size)
1727 {
1728     if (n->has_vnet_hdr) {
1729         /* FIXME this cast is evil */
1730         void *wbuf = (void *)buf;
1731         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1732                                     size - n->host_hdr_len);
1733 
1734         if (n->needs_vnet_hdr_swap) {
1735             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1736         }
1737         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1738     } else {
1739         struct virtio_net_hdr hdr = {
1740             .flags = 0,
1741             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1742         };
1743         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1744     }
1745 }
1746 
1747 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1748 {
1749     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1750     static const uint8_t vlan[] = {0x81, 0x00};
1751     uint8_t *ptr = (uint8_t *)buf;
1752     int i;
1753 
1754     if (n->promisc)
1755         return 1;
1756 
1757     ptr += n->host_hdr_len;
1758 
1759     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1760         int vid = lduw_be_p(ptr + 14) & 0xfff;
1761         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1762             return 0;
1763     }
1764 
1765     if (ptr[0] & 1) { // multicast
1766         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1767             return !n->nobcast;
1768         } else if (n->nomulti) {
1769             return 0;
1770         } else if (n->allmulti || n->mac_table.multi_overflow) {
1771             return 1;
1772         }
1773 
1774         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1775             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1776                 return 1;
1777             }
1778         }
1779     } else { // unicast
1780         if (n->nouni) {
1781             return 0;
1782         } else if (n->alluni || n->mac_table.uni_overflow) {
1783             return 1;
1784         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1785             return 1;
1786         }
1787 
1788         for (i = 0; i < n->mac_table.first_multi; i++) {
1789             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1790                 return 1;
1791             }
1792         }
1793     }
1794 
1795     return 0;
1796 }
1797 
1798 static uint8_t virtio_net_get_hash_type(bool hasip4,
1799                                         bool hasip6,
1800                                         EthL4HdrProto l4hdr_proto,
1801                                         uint32_t types)
1802 {
1803     if (hasip4) {
1804         switch (l4hdr_proto) {
1805         case ETH_L4_HDR_PROTO_TCP:
1806             if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
1807                 return NetPktRssIpV4Tcp;
1808             }
1809             break;
1810 
1811         case ETH_L4_HDR_PROTO_UDP:
1812             if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
1813                 return NetPktRssIpV4Udp;
1814             }
1815             break;
1816 
1817         default:
1818             break;
1819         }
1820 
1821         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1822             return NetPktRssIpV4;
1823         }
1824     } else if (hasip6) {
1825         switch (l4hdr_proto) {
1826         case ETH_L4_HDR_PROTO_TCP:
1827             if (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) {
1828                 return NetPktRssIpV6TcpEx;
1829             }
1830             if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
1831                 return NetPktRssIpV6Tcp;
1832             }
1833             break;
1834 
1835         case ETH_L4_HDR_PROTO_UDP:
1836             if (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) {
1837                 return NetPktRssIpV6UdpEx;
1838             }
1839             if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
1840                 return NetPktRssIpV6Udp;
1841             }
1842             break;
1843 
1844         default:
1845             break;
1846         }
1847 
1848         if (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) {
1849             return NetPktRssIpV6Ex;
1850         }
1851         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
1852             return NetPktRssIpV6;
1853         }
1854     }
1855     return 0xff;
1856 }
1857 
1858 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1859                                   size_t size,
1860                                   struct virtio_net_hdr_v1_hash *hdr)
1861 {
1862     VirtIONet *n = qemu_get_nic_opaque(nc);
1863     unsigned int index = nc->queue_index, new_index = index;
1864     struct NetRxPkt *pkt = n->rx_pkt;
1865     uint8_t net_hash_type;
1866     uint32_t hash;
1867     bool hasip4, hasip6;
1868     EthL4HdrProto l4hdr_proto;
1869     static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1870         VIRTIO_NET_HASH_REPORT_IPv4,
1871         VIRTIO_NET_HASH_REPORT_TCPv4,
1872         VIRTIO_NET_HASH_REPORT_TCPv6,
1873         VIRTIO_NET_HASH_REPORT_IPv6,
1874         VIRTIO_NET_HASH_REPORT_IPv6_EX,
1875         VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1876         VIRTIO_NET_HASH_REPORT_UDPv4,
1877         VIRTIO_NET_HASH_REPORT_UDPv6,
1878         VIRTIO_NET_HASH_REPORT_UDPv6_EX
1879     };
1880     struct iovec iov = {
1881         .iov_base = (void *)buf,
1882         .iov_len = size
1883     };
1884 
1885     net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len);
1886     net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
1887     net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto,
1888                                              n->rss_data.hash_types);
1889     if (net_hash_type > NetPktRssIpV6UdpEx) {
1890         if (n->rss_data.populate_hash) {
1891             hdr->hash_value = VIRTIO_NET_HASH_REPORT_NONE;
1892             hdr->hash_report = 0;
1893         }
1894         return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1895     }
1896 
1897     hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1898 
1899     if (n->rss_data.populate_hash) {
1900         hdr->hash_value = hash;
1901         hdr->hash_report = reports[net_hash_type];
1902     }
1903 
1904     if (n->rss_data.redirect) {
1905         new_index = hash & (n->rss_data.indirections_len - 1);
1906         new_index = n->rss_data.indirections_table[new_index];
1907     }
1908 
1909     return (index == new_index) ? -1 : new_index;
1910 }
1911 
1912 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1913                                       size_t size)
1914 {
1915     VirtIONet *n = qemu_get_nic_opaque(nc);
1916     VirtIONetQueue *q;
1917     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1918     QEMU_UNINITIALIZED VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1919     QEMU_UNINITIALIZED size_t lens[VIRTQUEUE_MAX_SIZE];
1920     QEMU_UNINITIALIZED struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1921     struct virtio_net_hdr_v1_hash extra_hdr;
1922     unsigned mhdr_cnt = 0;
1923     size_t offset, i, guest_offset, j;
1924     ssize_t err;
1925 
1926     memset(&extra_hdr, 0, sizeof(extra_hdr));
1927 
1928     if (n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1929         int index = virtio_net_process_rss(nc, buf, size, &extra_hdr);
1930         if (index >= 0) {
1931             nc = qemu_get_subqueue(n->nic, index % n->curr_queue_pairs);
1932         }
1933     }
1934 
1935     if (!virtio_net_can_receive(nc)) {
1936         return -1;
1937     }
1938 
1939     q = virtio_net_get_subqueue(nc);
1940 
1941     /* hdr_len refers to the header we supply to the guest */
1942     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1943         return 0;
1944     }
1945 
1946     if (!receive_filter(n, buf, size))
1947         return size;
1948 
1949     offset = i = 0;
1950 
1951     while (offset < size) {
1952         VirtQueueElement *elem;
1953         int len, total;
1954         const struct iovec *sg;
1955 
1956         total = 0;
1957 
1958         if (i == VIRTQUEUE_MAX_SIZE) {
1959             virtio_error(vdev, "virtio-net unexpected long buffer chain");
1960             err = size;
1961             goto err;
1962         }
1963 
1964         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1965         if (!elem) {
1966             if (i) {
1967                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1968                              "i %zd mergeable %d offset %zd, size %zd, "
1969                              "guest hdr len %zd, host hdr len %zd "
1970                              "guest features 0x%" PRIx64,
1971                              i, n->mergeable_rx_bufs, offset, size,
1972                              n->guest_hdr_len, n->host_hdr_len,
1973                              vdev->guest_features);
1974             }
1975             err = -1;
1976             goto err;
1977         }
1978 
1979         if (elem->in_num < 1) {
1980             virtio_error(vdev,
1981                          "virtio-net receive queue contains no in buffers");
1982             virtqueue_detach_element(q->rx_vq, elem, 0);
1983             g_free(elem);
1984             err = -1;
1985             goto err;
1986         }
1987 
1988         sg = elem->in_sg;
1989         if (i == 0) {
1990             assert(offset == 0);
1991             if (n->mergeable_rx_bufs) {
1992                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1993                                     sg, elem->in_num,
1994                                     offsetof(typeof(extra_hdr), hdr.num_buffers),
1995                                     sizeof(extra_hdr.hdr.num_buffers));
1996             } else {
1997                 extra_hdr.hdr.num_buffers = cpu_to_le16(1);
1998             }
1999 
2000             receive_header(n, sg, elem->in_num, buf, size);
2001             if (n->rss_data.populate_hash) {
2002                 offset = offsetof(typeof(extra_hdr), hash_value);
2003                 iov_from_buf(sg, elem->in_num, offset,
2004                              (char *)&extra_hdr + offset,
2005                              sizeof(extra_hdr.hash_value) +
2006                              sizeof(extra_hdr.hash_report));
2007             }
2008             offset = n->host_hdr_len;
2009             total += n->guest_hdr_len;
2010             guest_offset = n->guest_hdr_len;
2011         } else {
2012             guest_offset = 0;
2013         }
2014 
2015         /* copy in packet.  ugh */
2016         len = iov_from_buf(sg, elem->in_num, guest_offset,
2017                            buf + offset, size - offset);
2018         total += len;
2019         offset += len;
2020         /* If buffers can't be merged, at this point we
2021          * must have consumed the complete packet.
2022          * Otherwise, drop it. */
2023         if (!n->mergeable_rx_bufs && offset < size) {
2024             virtqueue_unpop(q->rx_vq, elem, total);
2025             g_free(elem);
2026             err = size;
2027             goto err;
2028         }
2029 
2030         elems[i] = elem;
2031         lens[i] = total;
2032         i++;
2033     }
2034 
2035     if (mhdr_cnt) {
2036         virtio_stw_p(vdev, &extra_hdr.hdr.num_buffers, i);
2037         iov_from_buf(mhdr_sg, mhdr_cnt,
2038                      0,
2039                      &extra_hdr.hdr.num_buffers,
2040                      sizeof extra_hdr.hdr.num_buffers);
2041     }
2042 
2043     for (j = 0; j < i; j++) {
2044         /* signal other side */
2045         virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
2046         g_free(elems[j]);
2047     }
2048 
2049     virtqueue_flush(q->rx_vq, i);
2050     virtio_notify(vdev, q->rx_vq);
2051 
2052     return size;
2053 
2054 err:
2055     for (j = 0; j < i; j++) {
2056         virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
2057         g_free(elems[j]);
2058     }
2059 
2060     return err;
2061 }
2062 
2063 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
2064                                   size_t size)
2065 {
2066     RCU_READ_LOCK_GUARD();
2067 
2068     return virtio_net_receive_rcu(nc, buf, size);
2069 }
2070 
2071 /*
2072  * Accessors to read and write the IP packet data length field. This
2073  * is a potentially unaligned network-byte-order 16 bit unsigned integer
2074  * pointed to by unit->ip_len.
2075  */
2076 static uint16_t read_unit_ip_len(VirtioNetRscUnit *unit)
2077 {
2078     return lduw_be_p(unit->ip_plen);
2079 }
2080 
2081 static void write_unit_ip_len(VirtioNetRscUnit *unit, uint16_t l)
2082 {
2083     stw_be_p(unit->ip_plen, l);
2084 }
2085 
2086 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
2087                                          const uint8_t *buf,
2088                                          VirtioNetRscUnit *unit)
2089 {
2090     uint16_t ip_hdrlen;
2091     struct ip_header *ip;
2092 
2093     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
2094                               + sizeof(struct eth_header));
2095     unit->ip = (void *)ip;
2096     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
2097     unit->ip_plen = &ip->ip_len;
2098     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
2099     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2100     unit->payload = read_unit_ip_len(unit) - ip_hdrlen - unit->tcp_hdrlen;
2101 }
2102 
2103 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
2104                                          const uint8_t *buf,
2105                                          VirtioNetRscUnit *unit)
2106 {
2107     struct ip6_header *ip6;
2108 
2109     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
2110                                  + sizeof(struct eth_header));
2111     unit->ip = ip6;
2112     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2113     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
2114                                         + sizeof(struct ip6_header));
2115     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2116 
2117     /* There is a difference between payload length in ipv4 and v6,
2118        ip header is excluded in ipv6 */
2119     unit->payload = read_unit_ip_len(unit) - unit->tcp_hdrlen;
2120 }
2121 
2122 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
2123                                        VirtioNetRscSeg *seg)
2124 {
2125     int ret;
2126     struct virtio_net_hdr_v1 *h;
2127 
2128     h = (struct virtio_net_hdr_v1 *)seg->buf;
2129     h->flags = 0;
2130     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
2131 
2132     if (seg->is_coalesced) {
2133         h->rsc.segments = seg->packets;
2134         h->rsc.dup_acks = seg->dup_ack;
2135         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
2136         if (chain->proto == ETH_P_IP) {
2137             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2138         } else {
2139             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2140         }
2141     }
2142 
2143     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
2144     QTAILQ_REMOVE(&chain->buffers, seg, next);
2145     g_free(seg->buf);
2146     g_free(seg);
2147 
2148     return ret;
2149 }
2150 
2151 static void virtio_net_rsc_purge(void *opq)
2152 {
2153     VirtioNetRscSeg *seg, *rn;
2154     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
2155 
2156     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
2157         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2158             chain->stat.purge_failed++;
2159             continue;
2160         }
2161     }
2162 
2163     chain->stat.timer++;
2164     if (!QTAILQ_EMPTY(&chain->buffers)) {
2165         timer_mod(chain->drain_timer,
2166               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout);
2167     }
2168 }
2169 
2170 static void virtio_net_rsc_cleanup(VirtIONet *n)
2171 {
2172     VirtioNetRscChain *chain, *rn_chain;
2173     VirtioNetRscSeg *seg, *rn_seg;
2174 
2175     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2176         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2177             QTAILQ_REMOVE(&chain->buffers, seg, next);
2178             g_free(seg->buf);
2179             g_free(seg);
2180         }
2181 
2182         timer_free(chain->drain_timer);
2183         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2184         g_free(chain);
2185     }
2186 }
2187 
2188 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2189                                      NetClientState *nc,
2190                                      const uint8_t *buf, size_t size)
2191 {
2192     uint16_t hdr_len;
2193     VirtioNetRscSeg *seg;
2194 
2195     hdr_len = chain->n->guest_hdr_len;
2196     seg = g_new(VirtioNetRscSeg, 1);
2197     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2198         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2199     memcpy(seg->buf, buf, size);
2200     seg->size = size;
2201     seg->packets = 1;
2202     seg->dup_ack = 0;
2203     seg->is_coalesced = 0;
2204     seg->nc = nc;
2205 
2206     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2207     chain->stat.cache++;
2208 
2209     switch (chain->proto) {
2210     case ETH_P_IP:
2211         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2212         break;
2213     case ETH_P_IPV6:
2214         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2215         break;
2216     default:
2217         g_assert_not_reached();
2218     }
2219 }
2220 
2221 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2222                                          VirtioNetRscSeg *seg,
2223                                          const uint8_t *buf,
2224                                          struct tcp_header *n_tcp,
2225                                          struct tcp_header *o_tcp)
2226 {
2227     uint32_t nack, oack;
2228     uint16_t nwin, owin;
2229 
2230     nack = htonl(n_tcp->th_ack);
2231     nwin = htons(n_tcp->th_win);
2232     oack = htonl(o_tcp->th_ack);
2233     owin = htons(o_tcp->th_win);
2234 
2235     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2236         chain->stat.ack_out_of_win++;
2237         return RSC_FINAL;
2238     } else if (nack == oack) {
2239         /* duplicated ack or window probe */
2240         if (nwin == owin) {
2241             /* duplicated ack, add dup ack count due to whql test up to 1 */
2242             chain->stat.dup_ack++;
2243             return RSC_FINAL;
2244         } else {
2245             /* Coalesce window update */
2246             o_tcp->th_win = n_tcp->th_win;
2247             chain->stat.win_update++;
2248             return RSC_COALESCE;
2249         }
2250     } else {
2251         /* pure ack, go to 'C', finalize*/
2252         chain->stat.pure_ack++;
2253         return RSC_FINAL;
2254     }
2255 }
2256 
2257 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2258                                             VirtioNetRscSeg *seg,
2259                                             const uint8_t *buf,
2260                                             VirtioNetRscUnit *n_unit)
2261 {
2262     void *data;
2263     uint16_t o_ip_len;
2264     uint32_t nseq, oseq;
2265     VirtioNetRscUnit *o_unit;
2266 
2267     o_unit = &seg->unit;
2268     o_ip_len = read_unit_ip_len(o_unit);
2269     nseq = htonl(n_unit->tcp->th_seq);
2270     oseq = htonl(o_unit->tcp->th_seq);
2271 
2272     /* out of order or retransmitted. */
2273     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2274         chain->stat.data_out_of_win++;
2275         return RSC_FINAL;
2276     }
2277 
2278     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2279     if (nseq == oseq) {
2280         if ((o_unit->payload == 0) && n_unit->payload) {
2281             /* From no payload to payload, normal case, not a dup ack or etc */
2282             chain->stat.data_after_pure_ack++;
2283             goto coalesce;
2284         } else {
2285             return virtio_net_rsc_handle_ack(chain, seg, buf,
2286                                              n_unit->tcp, o_unit->tcp);
2287         }
2288     } else if ((nseq - oseq) != o_unit->payload) {
2289         /* Not a consistent packet, out of order */
2290         chain->stat.data_out_of_order++;
2291         return RSC_FINAL;
2292     } else {
2293 coalesce:
2294         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2295             chain->stat.over_size++;
2296             return RSC_FINAL;
2297         }
2298 
2299         /* Here comes the right data, the payload length in v4/v6 is different,
2300            so use the field value to update and record the new data len */
2301         o_unit->payload += n_unit->payload; /* update new data len */
2302 
2303         /* update field in ip header */
2304         write_unit_ip_len(o_unit, o_ip_len + n_unit->payload);
2305 
2306         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2307            for windows guest, while this may change the behavior for linux
2308            guest (only if it uses RSC feature). */
2309         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2310 
2311         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2312         o_unit->tcp->th_win = n_unit->tcp->th_win;
2313 
2314         memmove(seg->buf + seg->size, data, n_unit->payload);
2315         seg->size += n_unit->payload;
2316         seg->packets++;
2317         chain->stat.coalesced++;
2318         return RSC_COALESCE;
2319     }
2320 }
2321 
2322 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2323                                         VirtioNetRscSeg *seg,
2324                                         const uint8_t *buf, size_t size,
2325                                         VirtioNetRscUnit *unit)
2326 {
2327     struct ip_header *ip1, *ip2;
2328 
2329     ip1 = (struct ip_header *)(unit->ip);
2330     ip2 = (struct ip_header *)(seg->unit.ip);
2331     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2332         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2333         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2334         chain->stat.no_match++;
2335         return RSC_NO_MATCH;
2336     }
2337 
2338     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2339 }
2340 
2341 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2342                                         VirtioNetRscSeg *seg,
2343                                         const uint8_t *buf, size_t size,
2344                                         VirtioNetRscUnit *unit)
2345 {
2346     struct ip6_header *ip1, *ip2;
2347 
2348     ip1 = (struct ip6_header *)(unit->ip);
2349     ip2 = (struct ip6_header *)(seg->unit.ip);
2350     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2351         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2352         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2353         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2354             chain->stat.no_match++;
2355             return RSC_NO_MATCH;
2356     }
2357 
2358     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2359 }
2360 
2361 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2362  * to prevent out of order */
2363 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2364                                          struct tcp_header *tcp)
2365 {
2366     uint16_t tcp_hdr;
2367     uint16_t tcp_flag;
2368 
2369     tcp_flag = htons(tcp->th_offset_flags);
2370     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2371     tcp_flag &= VIRTIO_NET_TCP_FLAG;
2372     if (tcp_flag & TH_SYN) {
2373         chain->stat.tcp_syn++;
2374         return RSC_BYPASS;
2375     }
2376 
2377     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2378         chain->stat.tcp_ctrl_drain++;
2379         return RSC_FINAL;
2380     }
2381 
2382     if (tcp_hdr > sizeof(struct tcp_header)) {
2383         chain->stat.tcp_all_opt++;
2384         return RSC_FINAL;
2385     }
2386 
2387     return RSC_CANDIDATE;
2388 }
2389 
2390 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2391                                          NetClientState *nc,
2392                                          const uint8_t *buf, size_t size,
2393                                          VirtioNetRscUnit *unit)
2394 {
2395     int ret;
2396     VirtioNetRscSeg *seg, *nseg;
2397 
2398     if (QTAILQ_EMPTY(&chain->buffers)) {
2399         chain->stat.empty_cache++;
2400         virtio_net_rsc_cache_buf(chain, nc, buf, size);
2401         timer_mod(chain->drain_timer,
2402               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout);
2403         return size;
2404     }
2405 
2406     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2407         if (chain->proto == ETH_P_IP) {
2408             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2409         } else {
2410             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2411         }
2412 
2413         if (ret == RSC_FINAL) {
2414             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2415                 /* Send failed */
2416                 chain->stat.final_failed++;
2417                 return 0;
2418             }
2419 
2420             /* Send current packet */
2421             return virtio_net_do_receive(nc, buf, size);
2422         } else if (ret == RSC_NO_MATCH) {
2423             continue;
2424         } else {
2425             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2426             seg->is_coalesced = 1;
2427             return size;
2428         }
2429     }
2430 
2431     chain->stat.no_match_cache++;
2432     virtio_net_rsc_cache_buf(chain, nc, buf, size);
2433     return size;
2434 }
2435 
2436 /* Drain a connection data, this is to avoid out of order segments */
2437 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2438                                         NetClientState *nc,
2439                                         const uint8_t *buf, size_t size,
2440                                         uint16_t ip_start, uint16_t ip_size,
2441                                         uint16_t tcp_port)
2442 {
2443     VirtioNetRscSeg *seg, *nseg;
2444     uint32_t ppair1, ppair2;
2445 
2446     ppair1 = *(uint32_t *)(buf + tcp_port);
2447     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2448         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2449         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2450             || (ppair1 != ppair2)) {
2451             continue;
2452         }
2453         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2454             chain->stat.drain_failed++;
2455         }
2456 
2457         break;
2458     }
2459 
2460     return virtio_net_do_receive(nc, buf, size);
2461 }
2462 
2463 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2464                                             struct ip_header *ip,
2465                                             const uint8_t *buf, size_t size)
2466 {
2467     uint16_t ip_len;
2468 
2469     /* Not an ipv4 packet */
2470     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2471         chain->stat.ip_option++;
2472         return RSC_BYPASS;
2473     }
2474 
2475     /* Don't handle packets with ip option */
2476     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2477         chain->stat.ip_option++;
2478         return RSC_BYPASS;
2479     }
2480 
2481     if (ip->ip_p != IPPROTO_TCP) {
2482         chain->stat.bypass_not_tcp++;
2483         return RSC_BYPASS;
2484     }
2485 
2486     /* Don't handle packets with ip fragment */
2487     if (!(htons(ip->ip_off) & IP_DF)) {
2488         chain->stat.ip_frag++;
2489         return RSC_BYPASS;
2490     }
2491 
2492     /* Don't handle packets with ecn flag */
2493     if (IPTOS_ECN(ip->ip_tos)) {
2494         chain->stat.ip_ecn++;
2495         return RSC_BYPASS;
2496     }
2497 
2498     ip_len = htons(ip->ip_len);
2499     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2500         || ip_len > (size - chain->n->guest_hdr_len -
2501                      sizeof(struct eth_header))) {
2502         chain->stat.ip_hacked++;
2503         return RSC_BYPASS;
2504     }
2505 
2506     return RSC_CANDIDATE;
2507 }
2508 
2509 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2510                                       NetClientState *nc,
2511                                       const uint8_t *buf, size_t size)
2512 {
2513     int32_t ret;
2514     uint16_t hdr_len;
2515     VirtioNetRscUnit unit;
2516 
2517     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2518 
2519     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2520         + sizeof(struct tcp_header))) {
2521         chain->stat.bypass_not_tcp++;
2522         return virtio_net_do_receive(nc, buf, size);
2523     }
2524 
2525     virtio_net_rsc_extract_unit4(chain, buf, &unit);
2526     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2527         != RSC_CANDIDATE) {
2528         return virtio_net_do_receive(nc, buf, size);
2529     }
2530 
2531     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2532     if (ret == RSC_BYPASS) {
2533         return virtio_net_do_receive(nc, buf, size);
2534     } else if (ret == RSC_FINAL) {
2535         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2536                 ((hdr_len + sizeof(struct eth_header)) + 12),
2537                 VIRTIO_NET_IP4_ADDR_SIZE,
2538                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2539     }
2540 
2541     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2542 }
2543 
2544 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2545                                             struct ip6_header *ip6,
2546                                             const uint8_t *buf, size_t size)
2547 {
2548     uint16_t ip_len;
2549 
2550     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2551         != IP_HEADER_VERSION_6) {
2552         return RSC_BYPASS;
2553     }
2554 
2555     /* Both option and protocol is checked in this */
2556     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2557         chain->stat.bypass_not_tcp++;
2558         return RSC_BYPASS;
2559     }
2560 
2561     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2562     if (ip_len < sizeof(struct tcp_header) ||
2563         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2564                   - sizeof(struct ip6_header))) {
2565         chain->stat.ip_hacked++;
2566         return RSC_BYPASS;
2567     }
2568 
2569     /* Don't handle packets with ecn flag */
2570     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2571         chain->stat.ip_ecn++;
2572         return RSC_BYPASS;
2573     }
2574 
2575     return RSC_CANDIDATE;
2576 }
2577 
2578 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2579                                       const uint8_t *buf, size_t size)
2580 {
2581     int32_t ret;
2582     uint16_t hdr_len;
2583     VirtioNetRscChain *chain;
2584     VirtioNetRscUnit unit;
2585 
2586     chain = opq;
2587     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2588 
2589     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2590         + sizeof(tcp_header))) {
2591         return virtio_net_do_receive(nc, buf, size);
2592     }
2593 
2594     virtio_net_rsc_extract_unit6(chain, buf, &unit);
2595     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2596                                                  unit.ip, buf, size)) {
2597         return virtio_net_do_receive(nc, buf, size);
2598     }
2599 
2600     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2601     if (ret == RSC_BYPASS) {
2602         return virtio_net_do_receive(nc, buf, size);
2603     } else if (ret == RSC_FINAL) {
2604         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2605                 ((hdr_len + sizeof(struct eth_header)) + 8),
2606                 VIRTIO_NET_IP6_ADDR_SIZE,
2607                 hdr_len + sizeof(struct eth_header)
2608                 + sizeof(struct ip6_header));
2609     }
2610 
2611     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2612 }
2613 
2614 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2615                                                       NetClientState *nc,
2616                                                       uint16_t proto)
2617 {
2618     VirtioNetRscChain *chain;
2619 
2620     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2621         return NULL;
2622     }
2623 
2624     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2625         if (chain->proto == proto) {
2626             return chain;
2627         }
2628     }
2629 
2630     chain = g_malloc(sizeof(*chain));
2631     chain->n = n;
2632     chain->proto = proto;
2633     if (proto == (uint16_t)ETH_P_IP) {
2634         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2635         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2636     } else {
2637         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2638         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2639     }
2640     chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2641                                       virtio_net_rsc_purge, chain);
2642     memset(&chain->stat, 0, sizeof(chain->stat));
2643 
2644     QTAILQ_INIT(&chain->buffers);
2645     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2646 
2647     return chain;
2648 }
2649 
2650 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2651                                       const uint8_t *buf,
2652                                       size_t size)
2653 {
2654     uint16_t proto;
2655     VirtioNetRscChain *chain;
2656     struct eth_header *eth;
2657     VirtIONet *n;
2658 
2659     n = qemu_get_nic_opaque(nc);
2660     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2661         return virtio_net_do_receive(nc, buf, size);
2662     }
2663 
2664     eth = (struct eth_header *)(buf + n->guest_hdr_len);
2665     proto = htons(eth->h_proto);
2666 
2667     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2668     if (chain) {
2669         chain->stat.received++;
2670         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2671             return virtio_net_rsc_receive4(chain, nc, buf, size);
2672         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2673             return virtio_net_rsc_receive6(chain, nc, buf, size);
2674         }
2675     }
2676     return virtio_net_do_receive(nc, buf, size);
2677 }
2678 
2679 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2680                                   size_t size)
2681 {
2682     VirtIONet *n = qemu_get_nic_opaque(nc);
2683     if ((n->rsc4_enabled || n->rsc6_enabled)) {
2684         return virtio_net_rsc_receive(nc, buf, size);
2685     } else {
2686         return virtio_net_do_receive(nc, buf, size);
2687     }
2688 }
2689 
2690 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2691 
2692 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2693 {
2694     VirtIONet *n = qemu_get_nic_opaque(nc);
2695     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2696     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2697     int ret;
2698 
2699     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2700     virtio_notify(vdev, q->tx_vq);
2701 
2702     g_free(q->async_tx.elem);
2703     q->async_tx.elem = NULL;
2704 
2705     virtio_queue_set_notification(q->tx_vq, 1);
2706     ret = virtio_net_flush_tx(q);
2707     if (ret >= n->tx_burst) {
2708         /*
2709          * the flush has been stopped by tx_burst
2710          * we will not receive notification for the
2711          * remainining part, so re-schedule
2712          */
2713         virtio_queue_set_notification(q->tx_vq, 0);
2714         if (q->tx_bh) {
2715             replay_bh_schedule_event(q->tx_bh);
2716         } else {
2717             timer_mod(q->tx_timer,
2718                       qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2719         }
2720         q->tx_waiting = 1;
2721     }
2722 }
2723 
2724 /* TX */
2725 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2726 {
2727     VirtIONet *n = q->n;
2728     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2729     VirtQueueElement *elem;
2730     int32_t num_packets = 0;
2731     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2732     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2733         return num_packets;
2734     }
2735 
2736     if (q->async_tx.elem) {
2737         virtio_queue_set_notification(q->tx_vq, 0);
2738         return num_packets;
2739     }
2740 
2741     for (;;) {
2742         ssize_t ret;
2743         unsigned int out_num;
2744         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2745         struct virtio_net_hdr vhdr;
2746 
2747         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2748         if (!elem) {
2749             break;
2750         }
2751 
2752         out_num = elem->out_num;
2753         out_sg = elem->out_sg;
2754         if (out_num < 1) {
2755             virtio_error(vdev, "virtio-net header not in first element");
2756             goto detach;
2757         }
2758 
2759         if (n->needs_vnet_hdr_swap) {
2760             if (iov_to_buf(out_sg, out_num, 0, &vhdr, sizeof(vhdr)) <
2761                 sizeof(vhdr)) {
2762                 virtio_error(vdev, "virtio-net header incorrect");
2763                 goto detach;
2764             }
2765             virtio_net_hdr_swap(vdev, &vhdr);
2766             sg2[0].iov_base = &vhdr;
2767             sg2[0].iov_len = sizeof(vhdr);
2768             out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, out_sg, out_num,
2769                                sizeof(vhdr), -1);
2770             if (out_num == VIRTQUEUE_MAX_SIZE) {
2771                 goto drop;
2772             }
2773             out_num += 1;
2774             out_sg = sg2;
2775         }
2776         /*
2777          * If host wants to see the guest header as is, we can
2778          * pass it on unchanged. Otherwise, copy just the parts
2779          * that host is interested in.
2780          */
2781         assert(n->host_hdr_len <= n->guest_hdr_len);
2782         if (n->host_hdr_len != n->guest_hdr_len) {
2783             if (iov_size(out_sg, out_num) < n->guest_hdr_len) {
2784                 virtio_error(vdev, "virtio-net header is invalid");
2785                 goto detach;
2786             }
2787             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2788                                        out_sg, out_num,
2789                                        0, n->host_hdr_len);
2790             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2791                              out_sg, out_num,
2792                              n->guest_hdr_len, -1);
2793             out_num = sg_num;
2794             out_sg = sg;
2795 
2796             if (out_num < 1) {
2797                 virtio_error(vdev, "virtio-net nothing to send");
2798                 goto detach;
2799             }
2800         }
2801 
2802         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2803                                       out_sg, out_num, virtio_net_tx_complete);
2804         if (ret == 0) {
2805             virtio_queue_set_notification(q->tx_vq, 0);
2806             q->async_tx.elem = elem;
2807             return -EBUSY;
2808         }
2809 
2810 drop:
2811         virtqueue_push(q->tx_vq, elem, 0);
2812         virtio_notify(vdev, q->tx_vq);
2813         g_free(elem);
2814 
2815         if (++num_packets >= n->tx_burst) {
2816             break;
2817         }
2818     }
2819     return num_packets;
2820 
2821 detach:
2822     virtqueue_detach_element(q->tx_vq, elem, 0);
2823     g_free(elem);
2824     return -EINVAL;
2825 }
2826 
2827 static void virtio_net_tx_timer(void *opaque);
2828 
2829 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2830 {
2831     VirtIONet *n = VIRTIO_NET(vdev);
2832     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2833 
2834     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2835         virtio_net_drop_tx_queue_data(vdev, vq);
2836         return;
2837     }
2838 
2839     /* This happens when device was stopped but VCPU wasn't. */
2840     if (!vdev->vm_running) {
2841         q->tx_waiting = 1;
2842         return;
2843     }
2844 
2845     if (q->tx_waiting) {
2846         /* We already have queued packets, immediately flush */
2847         timer_del(q->tx_timer);
2848         virtio_net_tx_timer(q);
2849     } else {
2850         /* re-arm timer to flush it (and more) on next tick */
2851         timer_mod(q->tx_timer,
2852                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2853         q->tx_waiting = 1;
2854         virtio_queue_set_notification(vq, 0);
2855     }
2856 }
2857 
2858 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2859 {
2860     VirtIONet *n = VIRTIO_NET(vdev);
2861     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2862 
2863     if (unlikely(n->vhost_started)) {
2864         return;
2865     }
2866 
2867     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2868         virtio_net_drop_tx_queue_data(vdev, vq);
2869         return;
2870     }
2871 
2872     if (unlikely(q->tx_waiting)) {
2873         return;
2874     }
2875     q->tx_waiting = 1;
2876     /* This happens when device was stopped but VCPU wasn't. */
2877     if (!vdev->vm_running) {
2878         return;
2879     }
2880     virtio_queue_set_notification(vq, 0);
2881     replay_bh_schedule_event(q->tx_bh);
2882 }
2883 
2884 static void virtio_net_tx_timer(void *opaque)
2885 {
2886     VirtIONetQueue *q = opaque;
2887     VirtIONet *n = q->n;
2888     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2889     int ret;
2890 
2891     /* This happens when device was stopped but BH wasn't. */
2892     if (!vdev->vm_running) {
2893         /* Make sure tx waiting is set, so we'll run when restarted. */
2894         assert(q->tx_waiting);
2895         return;
2896     }
2897 
2898     q->tx_waiting = 0;
2899 
2900     /* Just in case the driver is not ready on more */
2901     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2902         return;
2903     }
2904 
2905     ret = virtio_net_flush_tx(q);
2906     if (ret == -EBUSY || ret == -EINVAL) {
2907         return;
2908     }
2909     /*
2910      * If we flush a full burst of packets, assume there are
2911      * more coming and immediately rearm
2912      */
2913     if (ret >= n->tx_burst) {
2914         q->tx_waiting = 1;
2915         timer_mod(q->tx_timer,
2916                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2917         return;
2918     }
2919     /*
2920      * If less than a full burst, re-enable notification and flush
2921      * anything that may have come in while we weren't looking.  If
2922      * we find something, assume the guest is still active and rearm
2923      */
2924     virtio_queue_set_notification(q->tx_vq, 1);
2925     ret = virtio_net_flush_tx(q);
2926     if (ret > 0) {
2927         virtio_queue_set_notification(q->tx_vq, 0);
2928         q->tx_waiting = 1;
2929         timer_mod(q->tx_timer,
2930                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2931     }
2932 }
2933 
2934 static void virtio_net_tx_bh(void *opaque)
2935 {
2936     VirtIONetQueue *q = opaque;
2937     VirtIONet *n = q->n;
2938     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2939     int32_t ret;
2940 
2941     /* This happens when device was stopped but BH wasn't. */
2942     if (!vdev->vm_running) {
2943         /* Make sure tx waiting is set, so we'll run when restarted. */
2944         assert(q->tx_waiting);
2945         return;
2946     }
2947 
2948     q->tx_waiting = 0;
2949 
2950     /* Just in case the driver is not ready on more */
2951     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2952         return;
2953     }
2954 
2955     ret = virtio_net_flush_tx(q);
2956     if (ret == -EBUSY || ret == -EINVAL) {
2957         return; /* Notification re-enable handled by tx_complete or device
2958                  * broken */
2959     }
2960 
2961     /* If we flush a full burst of packets, assume there are
2962      * more coming and immediately reschedule */
2963     if (ret >= n->tx_burst) {
2964         replay_bh_schedule_event(q->tx_bh);
2965         q->tx_waiting = 1;
2966         return;
2967     }
2968 
2969     /* If less than a full burst, re-enable notification and flush
2970      * anything that may have come in while we weren't looking.  If
2971      * we find something, assume the guest is still active and reschedule */
2972     virtio_queue_set_notification(q->tx_vq, 1);
2973     ret = virtio_net_flush_tx(q);
2974     if (ret == -EINVAL) {
2975         return;
2976     } else if (ret > 0) {
2977         virtio_queue_set_notification(q->tx_vq, 0);
2978         replay_bh_schedule_event(q->tx_bh);
2979         q->tx_waiting = 1;
2980     }
2981 }
2982 
2983 static void virtio_net_add_queue(VirtIONet *n, int index)
2984 {
2985     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2986 
2987     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2988                                            virtio_net_handle_rx);
2989 
2990     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2991         n->vqs[index].tx_vq =
2992             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2993                              virtio_net_handle_tx_timer);
2994         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2995                                               virtio_net_tx_timer,
2996                                               &n->vqs[index]);
2997     } else {
2998         n->vqs[index].tx_vq =
2999             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
3000                              virtio_net_handle_tx_bh);
3001         n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
3002                                                   &DEVICE(vdev)->mem_reentrancy_guard);
3003     }
3004 
3005     n->vqs[index].tx_waiting = 0;
3006     n->vqs[index].n = n;
3007 }
3008 
3009 static void virtio_net_del_queue(VirtIONet *n, int index)
3010 {
3011     VirtIODevice *vdev = VIRTIO_DEVICE(n);
3012     VirtIONetQueue *q = &n->vqs[index];
3013     NetClientState *nc = qemu_get_subqueue(n->nic, index);
3014 
3015     qemu_purge_queued_packets(nc);
3016 
3017     virtio_del_queue(vdev, index * 2);
3018     if (q->tx_timer) {
3019         timer_free(q->tx_timer);
3020         q->tx_timer = NULL;
3021     } else {
3022         qemu_bh_delete(q->tx_bh);
3023         q->tx_bh = NULL;
3024     }
3025     q->tx_waiting = 0;
3026     virtio_del_queue(vdev, index * 2 + 1);
3027 }
3028 
3029 static void virtio_net_change_num_queues(VirtIONet *n, int new_num_queues)
3030 {
3031     VirtIODevice *vdev = VIRTIO_DEVICE(n);
3032     int old_num_queues = virtio_get_num_queues(vdev);
3033     int i;
3034 
3035     assert(old_num_queues >= 3);
3036     assert(old_num_queues % 2 == 1);
3037 
3038     if (old_num_queues == new_num_queues) {
3039         return;
3040     }
3041 
3042     /*
3043      * We always need to remove and add ctrl vq if
3044      * old_num_queues != new_num_queues. Remove ctrl_vq first,
3045      * and then we only enter one of the following two loops.
3046      */
3047     virtio_del_queue(vdev, old_num_queues - 1);
3048 
3049     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
3050         /* new_num_queues < old_num_queues */
3051         virtio_net_del_queue(n, i / 2);
3052     }
3053 
3054     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
3055         /* new_num_queues > old_num_queues */
3056         virtio_net_add_queue(n, i / 2);
3057     }
3058 
3059     /* add ctrl_vq last */
3060     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3061 }
3062 
3063 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
3064 {
3065     int max = multiqueue ? n->max_queue_pairs : 1;
3066 
3067     n->multiqueue = multiqueue;
3068     virtio_net_change_num_queues(n, max * 2 + 1);
3069 
3070     virtio_net_set_queue_pairs(n);
3071 }
3072 
3073 static int virtio_net_pre_load_queues(VirtIODevice *vdev, uint32_t n)
3074 {
3075     virtio_net_change_num_queues(VIRTIO_NET(vdev), n);
3076 
3077     return 0;
3078 }
3079 
3080 static int virtio_net_post_load_device(void *opaque, int version_id)
3081 {
3082     VirtIONet *n = opaque;
3083     VirtIODevice *vdev = VIRTIO_DEVICE(n);
3084     int i, link_down;
3085 
3086     trace_virtio_net_post_load_device();
3087     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
3088                                virtio_vdev_has_feature(vdev,
3089                                                        VIRTIO_F_VERSION_1),
3090                                virtio_vdev_has_feature(vdev,
3091                                                        VIRTIO_NET_F_HASH_REPORT));
3092 
3093     /* MAC_TABLE_ENTRIES may be different from the saved image */
3094     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
3095         n->mac_table.in_use = 0;
3096     }
3097 
3098     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
3099         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
3100     }
3101 
3102     /*
3103      * curr_guest_offloads will be later overwritten by the
3104      * virtio_set_features_nocheck call done from the virtio_load.
3105      * Here we make sure it is preserved and restored accordingly
3106      * in the virtio_net_post_load_virtio callback.
3107      */
3108     n->saved_guest_offloads = n->curr_guest_offloads;
3109 
3110     virtio_net_set_queue_pairs(n);
3111 
3112     /* Find the first multicast entry in the saved MAC filter */
3113     for (i = 0; i < n->mac_table.in_use; i++) {
3114         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
3115             break;
3116         }
3117     }
3118     n->mac_table.first_multi = i;
3119 
3120     /* nc.link_down can't be migrated, so infer link_down according
3121      * to link status bit in n->status */
3122     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
3123     for (i = 0; i < n->max_queue_pairs; i++) {
3124         qemu_get_subqueue(n->nic, i)->link_down = link_down;
3125     }
3126 
3127     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
3128         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3129         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3130                                   QEMU_CLOCK_VIRTUAL,
3131                                   virtio_net_announce_timer, n);
3132         if (n->announce_timer.round) {
3133             timer_mod(n->announce_timer.tm,
3134                       qemu_clock_get_ms(n->announce_timer.type));
3135         } else {
3136             qemu_announce_timer_del(&n->announce_timer, false);
3137         }
3138     }
3139 
3140     virtio_net_commit_rss_config(n);
3141     return 0;
3142 }
3143 
3144 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
3145 {
3146     VirtIONet *n = VIRTIO_NET(vdev);
3147     /*
3148      * The actual needed state is now in saved_guest_offloads,
3149      * see virtio_net_post_load_device for detail.
3150      * Restore it back and apply the desired offloads.
3151      */
3152     n->curr_guest_offloads = n->saved_guest_offloads;
3153     if (peer_has_vnet_hdr(n)) {
3154         virtio_net_apply_guest_offloads(n);
3155     }
3156 
3157     return 0;
3158 }
3159 
3160 /* tx_waiting field of a VirtIONetQueue */
3161 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
3162     .name = "virtio-net-queue-tx_waiting",
3163     .fields = (const VMStateField[]) {
3164         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
3165         VMSTATE_END_OF_LIST()
3166    },
3167 };
3168 
3169 static bool max_queue_pairs_gt_1(void *opaque, int version_id)
3170 {
3171     return VIRTIO_NET(opaque)->max_queue_pairs > 1;
3172 }
3173 
3174 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
3175 {
3176     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
3177                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3178 }
3179 
3180 static bool mac_table_fits(void *opaque, int version_id)
3181 {
3182     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
3183 }
3184 
3185 static bool mac_table_doesnt_fit(void *opaque, int version_id)
3186 {
3187     return !mac_table_fits(opaque, version_id);
3188 }
3189 
3190 /* This temporary type is shared by all the WITH_TMP methods
3191  * although only some fields are used by each.
3192  */
3193 struct VirtIONetMigTmp {
3194     VirtIONet      *parent;
3195     VirtIONetQueue *vqs_1;
3196     uint16_t        curr_queue_pairs_1;
3197     uint8_t         has_ufo;
3198     uint32_t        has_vnet_hdr;
3199 };
3200 
3201 /* The 2nd and subsequent tx_waiting flags are loaded later than
3202  * the 1st entry in the queue_pairs and only if there's more than one
3203  * entry.  We use the tmp mechanism to calculate a temporary
3204  * pointer and count and also validate the count.
3205  */
3206 
3207 static int virtio_net_tx_waiting_pre_save(void *opaque)
3208 {
3209     struct VirtIONetMigTmp *tmp = opaque;
3210 
3211     tmp->vqs_1 = tmp->parent->vqs + 1;
3212     tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3213     if (tmp->parent->curr_queue_pairs == 0) {
3214         tmp->curr_queue_pairs_1 = 0;
3215     }
3216 
3217     return 0;
3218 }
3219 
3220 static int virtio_net_tx_waiting_pre_load(void *opaque)
3221 {
3222     struct VirtIONetMigTmp *tmp = opaque;
3223 
3224     /* Reuse the pointer setup from save */
3225     virtio_net_tx_waiting_pre_save(opaque);
3226 
3227     if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3228         error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3229             tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
3230 
3231         return -EINVAL;
3232     }
3233 
3234     return 0; /* all good */
3235 }
3236 
3237 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3238     .name      = "virtio-net-tx_waiting",
3239     .pre_load  = virtio_net_tx_waiting_pre_load,
3240     .pre_save  = virtio_net_tx_waiting_pre_save,
3241     .fields    = (const VMStateField[]) {
3242         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3243                                      curr_queue_pairs_1,
3244                                      vmstate_virtio_net_queue_tx_waiting,
3245                                      struct VirtIONetQueue),
3246         VMSTATE_END_OF_LIST()
3247     },
3248 };
3249 
3250 /* the 'has_ufo' flag is just tested; if the incoming stream has the
3251  * flag set we need to check that we have it
3252  */
3253 static int virtio_net_ufo_post_load(void *opaque, int version_id)
3254 {
3255     struct VirtIONetMigTmp *tmp = opaque;
3256 
3257     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3258         error_report("virtio-net: saved image requires TUN_F_UFO support");
3259         return -EINVAL;
3260     }
3261 
3262     return 0;
3263 }
3264 
3265 static int virtio_net_ufo_pre_save(void *opaque)
3266 {
3267     struct VirtIONetMigTmp *tmp = opaque;
3268 
3269     tmp->has_ufo = tmp->parent->has_ufo;
3270 
3271     return 0;
3272 }
3273 
3274 static const VMStateDescription vmstate_virtio_net_has_ufo = {
3275     .name      = "virtio-net-ufo",
3276     .post_load = virtio_net_ufo_post_load,
3277     .pre_save  = virtio_net_ufo_pre_save,
3278     .fields    = (const VMStateField[]) {
3279         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3280         VMSTATE_END_OF_LIST()
3281     },
3282 };
3283 
3284 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3285  * flag set we need to check that we have it
3286  */
3287 static int virtio_net_vnet_post_load(void *opaque, int version_id)
3288 {
3289     struct VirtIONetMigTmp *tmp = opaque;
3290 
3291     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3292         error_report("virtio-net: saved image requires vnet_hdr=on");
3293         return -EINVAL;
3294     }
3295 
3296     return 0;
3297 }
3298 
3299 static int virtio_net_vnet_pre_save(void *opaque)
3300 {
3301     struct VirtIONetMigTmp *tmp = opaque;
3302 
3303     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3304 
3305     return 0;
3306 }
3307 
3308 static const VMStateDescription vmstate_virtio_net_has_vnet = {
3309     .name      = "virtio-net-vnet",
3310     .post_load = virtio_net_vnet_post_load,
3311     .pre_save  = virtio_net_vnet_pre_save,
3312     .fields    = (const VMStateField[]) {
3313         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3314         VMSTATE_END_OF_LIST()
3315     },
3316 };
3317 
3318 static bool virtio_net_rss_needed(void *opaque)
3319 {
3320     return VIRTIO_NET(opaque)->rss_data.enabled;
3321 }
3322 
3323 static const VMStateDescription vmstate_virtio_net_rss = {
3324     .name      = "virtio-net-device/rss",
3325     .version_id = 1,
3326     .minimum_version_id = 1,
3327     .needed = virtio_net_rss_needed,
3328     .fields = (const VMStateField[]) {
3329         VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3330         VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3331         VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3332         VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3333         VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3334         VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3335         VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3336                             VIRTIO_NET_RSS_MAX_KEY_SIZE),
3337         VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3338                                     rss_data.indirections_len, 0,
3339                                     vmstate_info_uint16, uint16_t),
3340         VMSTATE_END_OF_LIST()
3341     },
3342 };
3343 
3344 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3345 {
3346     VirtIONet *n = VIRTIO_NET(vdev);
3347     NetClientState *nc;
3348     struct vhost_net *net;
3349 
3350     if (!n->nic) {
3351         return NULL;
3352     }
3353 
3354     nc = qemu_get_queue(n->nic);
3355     if (!nc) {
3356         return NULL;
3357     }
3358 
3359     net = get_vhost_net(nc->peer);
3360     if (!net) {
3361         return NULL;
3362     }
3363 
3364     return &net->dev;
3365 }
3366 
3367 static int vhost_user_net_save_state(QEMUFile *f, void *pv, size_t size,
3368                                      const VMStateField *field,
3369                                      JSONWriter *vmdesc)
3370 {
3371     VirtIONet *n = pv;
3372     VirtIODevice *vdev = VIRTIO_DEVICE(n);
3373     struct vhost_dev *vhdev;
3374     Error *local_error = NULL;
3375     int ret;
3376 
3377     vhdev = virtio_net_get_vhost(vdev);
3378     if (vhdev == NULL) {
3379         error_reportf_err(local_error,
3380                           "Error getting vhost back-end of %s device %s: ",
3381                           vdev->name, vdev->parent_obj.canonical_path);
3382         return -1;
3383     }
3384 
3385     ret = vhost_save_backend_state(vhdev, f, &local_error);
3386     if (ret < 0) {
3387         error_reportf_err(local_error,
3388                           "Error saving back-end state of %s device %s: ",
3389                           vdev->name, vdev->parent_obj.canonical_path);
3390         return ret;
3391     }
3392 
3393     return 0;
3394 }
3395 
3396 static int vhost_user_net_load_state(QEMUFile *f, void *pv, size_t size,
3397                                      const VMStateField *field)
3398 {
3399     VirtIONet *n = pv;
3400     VirtIODevice *vdev = VIRTIO_DEVICE(n);
3401     struct vhost_dev *vhdev;
3402     Error *local_error = NULL;
3403     int ret;
3404 
3405     vhdev = virtio_net_get_vhost(vdev);
3406     if (vhdev == NULL) {
3407         error_reportf_err(local_error,
3408                           "Error getting vhost back-end of %s device %s: ",
3409                           vdev->name, vdev->parent_obj.canonical_path);
3410         return -1;
3411     }
3412 
3413     ret = vhost_load_backend_state(vhdev, f, &local_error);
3414     if (ret < 0) {
3415         error_reportf_err(local_error,
3416                           "Error loading  back-end state of %s device %s: ",
3417                           vdev->name, vdev->parent_obj.canonical_path);
3418         return ret;
3419     }
3420 
3421     return 0;
3422 }
3423 
3424 static bool vhost_user_net_is_internal_migration(void *opaque)
3425 {
3426     VirtIONet *n = opaque;
3427     VirtIODevice *vdev = VIRTIO_DEVICE(n);
3428     struct vhost_dev *vhdev;
3429 
3430     vhdev = virtio_net_get_vhost(vdev);
3431     if (vhdev == NULL) {
3432         return false;
3433     }
3434 
3435     return vhost_supports_device_state(vhdev);
3436 }
3437 
3438 static const VMStateDescription vhost_user_net_backend_state = {
3439     .name = "virtio-net-device/backend",
3440     .version_id = 0,
3441     .needed = vhost_user_net_is_internal_migration,
3442     .fields = (const VMStateField[]) {
3443         {
3444             .name = "backend",
3445             .info = &(const VMStateInfo) {
3446                 .name = "virtio-net vhost-user backend state",
3447                 .get = vhost_user_net_load_state,
3448                 .put = vhost_user_net_save_state,
3449             },
3450          },
3451          VMSTATE_END_OF_LIST()
3452     }
3453 };
3454 
3455 static const VMStateDescription vmstate_virtio_net_device = {
3456     .name = "virtio-net-device",
3457     .version_id = VIRTIO_NET_VM_VERSION,
3458     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3459     .post_load = virtio_net_post_load_device,
3460     .fields = (const VMStateField[]) {
3461         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3462         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3463                                vmstate_virtio_net_queue_tx_waiting,
3464                                VirtIONetQueue),
3465         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3466         VMSTATE_UINT16(status, VirtIONet),
3467         VMSTATE_UINT8(promisc, VirtIONet),
3468         VMSTATE_UINT8(allmulti, VirtIONet),
3469         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3470 
3471         /* Guarded pair: If it fits we load it, else we throw it away
3472          * - can happen if source has a larger MAC table.; post-load
3473          *  sets flags in this case.
3474          */
3475         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3476                                 0, mac_table_fits, mac_table.in_use,
3477                                  ETH_ALEN),
3478         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3479                                      mac_table.in_use, ETH_ALEN),
3480 
3481         /* Note: This is an array of uint32's that's always been saved as a
3482          * buffer; hold onto your endiannesses; it's actually used as a bitmap
3483          * but based on the uint.
3484          */
3485         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3486         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3487                          vmstate_virtio_net_has_vnet),
3488         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3489         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3490         VMSTATE_UINT8(alluni, VirtIONet),
3491         VMSTATE_UINT8(nomulti, VirtIONet),
3492         VMSTATE_UINT8(nouni, VirtIONet),
3493         VMSTATE_UINT8(nobcast, VirtIONet),
3494         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3495                          vmstate_virtio_net_has_ufo),
3496         VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3497                             vmstate_info_uint16_equal, uint16_t),
3498         VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3499         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3500                          vmstate_virtio_net_tx_waiting),
3501         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3502                             has_ctrl_guest_offloads),
3503         VMSTATE_END_OF_LIST()
3504     },
3505     .subsections = (const VMStateDescription * const []) {
3506         &vmstate_virtio_net_rss,
3507         &vhost_user_net_backend_state,
3508         NULL
3509     }
3510 };
3511 
3512 static NetClientInfo net_virtio_info = {
3513     .type = NET_CLIENT_DRIVER_NIC,
3514     .size = sizeof(NICState),
3515     .can_receive = virtio_net_can_receive,
3516     .receive = virtio_net_receive,
3517     .link_status_changed = virtio_net_set_link_status,
3518     .query_rx_filter = virtio_net_query_rxfilter,
3519     .announce = virtio_net_announce,
3520 };
3521 
3522 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3523 {
3524     VirtIONet *n = VIRTIO_NET(vdev);
3525     NetClientState *nc;
3526     assert(n->vhost_started);
3527     if (!n->multiqueue && idx == 2) {
3528         /* Must guard against invalid features and bogus queue index
3529          * from being set by malicious guest, or penetrated through
3530          * buggy migration stream.
3531          */
3532         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3533             qemu_log_mask(LOG_GUEST_ERROR,
3534                           "%s: bogus vq index ignored\n", __func__);
3535             return false;
3536         }
3537         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3538     } else {
3539         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3540     }
3541     /*
3542      * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3543      * as the macro of configure interrupt's IDX, If this driver does not
3544      * support, the function will return false
3545      */
3546 
3547     if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3548         return vhost_net_config_pending(get_vhost_net(nc->peer));
3549     }
3550     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3551 }
3552 
3553 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3554                                            bool mask)
3555 {
3556     VirtIONet *n = VIRTIO_NET(vdev);
3557     NetClientState *nc;
3558     assert(n->vhost_started);
3559     if (!n->multiqueue && idx == 2) {
3560         /* Must guard against invalid features and bogus queue index
3561          * from being set by malicious guest, or penetrated through
3562          * buggy migration stream.
3563          */
3564         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3565             qemu_log_mask(LOG_GUEST_ERROR,
3566                           "%s: bogus vq index ignored\n", __func__);
3567             return;
3568         }
3569         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3570     } else {
3571         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3572     }
3573     /*
3574      *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3575      * as the macro of configure interrupt's IDX, If this driver does not
3576      * support, the function will return
3577      */
3578 
3579     if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3580         vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask);
3581         return;
3582     }
3583     vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask);
3584 }
3585 
3586 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3587 {
3588     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3589 
3590     n->config_size = virtio_get_config_size(&cfg_size_params, host_features);
3591 }
3592 
3593 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3594                                    const char *type)
3595 {
3596     /*
3597      * The name can be NULL, the netclient name will be type.x.
3598      */
3599     assert(type != NULL);
3600 
3601     g_free(n->netclient_name);
3602     g_free(n->netclient_type);
3603     n->netclient_name = g_strdup(name);
3604     n->netclient_type = g_strdup(type);
3605 }
3606 
3607 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3608 {
3609     HotplugHandler *hotplug_ctrl;
3610     PCIDevice *pci_dev;
3611     Error *err = NULL;
3612 
3613     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3614     if (hotplug_ctrl) {
3615         pci_dev = PCI_DEVICE(dev);
3616         pci_dev->partially_hotplugged = true;
3617         hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3618         if (err) {
3619             error_report_err(err);
3620             return false;
3621         }
3622     } else {
3623         return false;
3624     }
3625     return true;
3626 }
3627 
3628 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3629                                     Error **errp)
3630 {
3631     Error *err = NULL;
3632     HotplugHandler *hotplug_ctrl;
3633     PCIDevice *pdev = PCI_DEVICE(dev);
3634     BusState *primary_bus;
3635 
3636     if (!pdev->partially_hotplugged) {
3637         return true;
3638     }
3639     primary_bus = dev->parent_bus;
3640     if (!primary_bus) {
3641         error_setg(errp, "virtio_net: couldn't find primary bus");
3642         return false;
3643     }
3644     qdev_set_parent_bus(dev, primary_bus, &error_abort);
3645     qatomic_set(&n->failover_primary_hidden, false);
3646     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3647     if (hotplug_ctrl) {
3648         hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3649         if (err) {
3650             goto out;
3651         }
3652         hotplug_handler_plug(hotplug_ctrl, dev, &err);
3653     }
3654     pdev->partially_hotplugged = false;
3655 
3656 out:
3657     error_propagate(errp, err);
3658     return !err;
3659 }
3660 
3661 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationEvent *e)
3662 {
3663     bool should_be_hidden;
3664     Error *err = NULL;
3665     DeviceState *dev = failover_find_primary_device(n);
3666 
3667     if (!dev) {
3668         return;
3669     }
3670 
3671     should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3672 
3673     if (e->type == MIG_EVENT_PRECOPY_SETUP && !should_be_hidden) {
3674         if (failover_unplug_primary(n, dev)) {
3675             vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3676             qapi_event_send_unplug_primary(dev->id);
3677             qatomic_set(&n->failover_primary_hidden, true);
3678         } else {
3679             warn_report("couldn't unplug primary device");
3680         }
3681     } else if (e->type == MIG_EVENT_PRECOPY_FAILED) {
3682         /* We already unplugged the device let's plug it back */
3683         if (!failover_replug_primary(n, dev, &err)) {
3684             if (err) {
3685                 error_report_err(err);
3686             }
3687         }
3688     }
3689 }
3690 
3691 static int virtio_net_migration_state_notifier(NotifierWithReturn *notifier,
3692                                                MigrationEvent *e, Error **errp)
3693 {
3694     VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3695     virtio_net_handle_migration_primary(n, e);
3696     return 0;
3697 }
3698 
3699 static bool failover_hide_primary_device(DeviceListener *listener,
3700                                          const QDict *device_opts,
3701                                          bool from_json,
3702                                          Error **errp)
3703 {
3704     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3705     const char *standby_id;
3706 
3707     if (!device_opts) {
3708         return false;
3709     }
3710 
3711     if (!qdict_haskey(device_opts, "failover_pair_id")) {
3712         return false;
3713     }
3714 
3715     if (!qdict_haskey(device_opts, "id")) {
3716         error_setg(errp, "Device with failover_pair_id needs to have id");
3717         return false;
3718     }
3719 
3720     standby_id = qdict_get_str(device_opts, "failover_pair_id");
3721     if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3722         return false;
3723     }
3724 
3725     /*
3726      * The hide helper can be called several times for a given device.
3727      * Check there is only one primary for a virtio-net device but
3728      * don't duplicate the qdict several times if it's called for the same
3729      * device.
3730      */
3731     if (n->primary_opts) {
3732         const char *old, *new;
3733         /* devices with failover_pair_id always have an id */
3734         old = qdict_get_str(n->primary_opts, "id");
3735         new = qdict_get_str(device_opts, "id");
3736         if (strcmp(old, new) != 0) {
3737             error_setg(errp, "Cannot attach more than one primary device to "
3738                        "'%s': '%s' and '%s'", n->netclient_name, old, new);
3739             return false;
3740         }
3741     } else {
3742         n->primary_opts = qdict_clone_shallow(device_opts);
3743         n->primary_opts_from_json = from_json;
3744     }
3745 
3746     /* failover_primary_hidden is set during feature negotiation */
3747     return qatomic_read(&n->failover_primary_hidden);
3748 }
3749 
3750 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3751 {
3752     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3753     VirtIONet *n = VIRTIO_NET(dev);
3754     NetClientState *nc;
3755     int i;
3756 
3757     if (n->net_conf.mtu) {
3758         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3759     }
3760 
3761     if (n->net_conf.duplex_str) {
3762         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3763             n->net_conf.duplex = DUPLEX_HALF;
3764         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3765             n->net_conf.duplex = DUPLEX_FULL;
3766         } else {
3767             error_setg(errp, "'duplex' must be 'half' or 'full'");
3768             return;
3769         }
3770         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3771     } else {
3772         n->net_conf.duplex = DUPLEX_UNKNOWN;
3773     }
3774 
3775     if (n->net_conf.speed < SPEED_UNKNOWN) {
3776         error_setg(errp, "'speed' must be between 0 and INT_MAX");
3777         return;
3778     }
3779     if (n->net_conf.speed >= 0) {
3780         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3781     }
3782 
3783     if (n->failover) {
3784         n->primary_listener.hide_device = failover_hide_primary_device;
3785         qatomic_set(&n->failover_primary_hidden, true);
3786         device_listener_register(&n->primary_listener);
3787         migration_add_notifier(&n->migration_state,
3788                                virtio_net_migration_state_notifier);
3789         n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3790     }
3791 
3792     virtio_net_set_config_size(n, n->host_features);
3793     virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
3794 
3795     /*
3796      * We set a lower limit on RX queue size to what it always was.
3797      * Guests that want a smaller ring can always resize it without
3798      * help from us (using virtio 1 and up).
3799      */
3800     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3801         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3802         !is_power_of_2(n->net_conf.rx_queue_size)) {
3803         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3804                    "must be a power of 2 between %d and %d.",
3805                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3806                    VIRTQUEUE_MAX_SIZE);
3807         virtio_cleanup(vdev);
3808         return;
3809     }
3810 
3811     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3812         n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) ||
3813         !is_power_of_2(n->net_conf.tx_queue_size)) {
3814         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3815                    "must be a power of 2 between %d and %d",
3816                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3817                    virtio_net_max_tx_queue_size(n));
3818         virtio_cleanup(vdev);
3819         return;
3820     }
3821 
3822     n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3823 
3824     /*
3825      * Figure out the datapath queue pairs since the backend could
3826      * provide control queue via peers as well.
3827      */
3828     if (n->nic_conf.peers.queues) {
3829         for (i = 0; i < n->max_ncs; i++) {
3830             if (n->nic_conf.peers.ncs[i]->is_datapath) {
3831                 ++n->max_queue_pairs;
3832             }
3833         }
3834     }
3835     n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3836 
3837     if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3838         error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3839                    "must be a positive integer less than %d.",
3840                    n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3841         virtio_cleanup(vdev);
3842         return;
3843     }
3844     n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3845     n->curr_queue_pairs = 1;
3846     n->tx_timeout = n->net_conf.txtimer;
3847 
3848     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3849                        && strcmp(n->net_conf.tx, "bh")) {
3850         warn_report("virtio-net: "
3851                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3852                     n->net_conf.tx);
3853         error_printf("Defaulting to \"bh\"");
3854     }
3855 
3856     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3857                                     n->net_conf.tx_queue_size);
3858 
3859     virtio_net_add_queue(n, 0);
3860 
3861     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3862     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3863     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3864     n->status = VIRTIO_NET_S_LINK_UP;
3865     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3866                               QEMU_CLOCK_VIRTUAL,
3867                               virtio_net_announce_timer, n);
3868     n->announce_timer.round = 0;
3869 
3870     if (n->netclient_type) {
3871         /*
3872          * Happen when virtio_net_set_netclient_name has been called.
3873          */
3874         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3875                               n->netclient_type, n->netclient_name,
3876                               &dev->mem_reentrancy_guard, n);
3877     } else {
3878         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3879                               object_get_typename(OBJECT(dev)), dev->id,
3880                               &dev->mem_reentrancy_guard, n);
3881     }
3882 
3883     for (i = 0; i < n->max_queue_pairs; i++) {
3884         n->nic->ncs[i].do_not_pad = true;
3885     }
3886 
3887     peer_test_vnet_hdr(n);
3888     if (peer_has_vnet_hdr(n)) {
3889         n->host_hdr_len = sizeof(struct virtio_net_hdr);
3890     } else {
3891         n->host_hdr_len = 0;
3892     }
3893 
3894     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3895 
3896     n->vqs[0].tx_waiting = 0;
3897     n->tx_burst = n->net_conf.txburst;
3898     virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3899     n->promisc = 1; /* for compatibility */
3900 
3901     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3902 
3903     n->vlans = g_malloc0(MAX_VLAN >> 3);
3904 
3905     nc = qemu_get_queue(n->nic);
3906     nc->rxfilter_notify_enabled = 1;
3907 
3908    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3909         struct virtio_net_config netcfg = {};
3910         memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3911         vhost_net_set_config(get_vhost_net(nc->peer),
3912             (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND);
3913     }
3914     QTAILQ_INIT(&n->rsc_chains);
3915     n->qdev = dev;
3916 
3917     net_rx_pkt_init(&n->rx_pkt);
3918 
3919     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3920         virtio_net_load_ebpf(n, errp);
3921     }
3922 }
3923 
3924 static void virtio_net_device_unrealize(DeviceState *dev)
3925 {
3926     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3927     VirtIONet *n = VIRTIO_NET(dev);
3928     int i, max_queue_pairs;
3929 
3930     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3931         virtio_net_unload_ebpf(n);
3932     }
3933 
3934     /* This will stop vhost backend if appropriate. */
3935     virtio_net_set_status(vdev, 0);
3936 
3937     g_free(n->netclient_name);
3938     n->netclient_name = NULL;
3939     g_free(n->netclient_type);
3940     n->netclient_type = NULL;
3941 
3942     g_free(n->mac_table.macs);
3943     g_free(n->vlans);
3944 
3945     if (n->failover) {
3946         qobject_unref(n->primary_opts);
3947         device_listener_unregister(&n->primary_listener);
3948         migration_remove_notifier(&n->migration_state);
3949     } else {
3950         assert(n->primary_opts == NULL);
3951     }
3952 
3953     max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3954     for (i = 0; i < max_queue_pairs; i++) {
3955         virtio_net_del_queue(n, i);
3956     }
3957     /* delete also control vq */
3958     virtio_del_queue(vdev, max_queue_pairs * 2);
3959     qemu_announce_timer_del(&n->announce_timer, false);
3960     g_free(n->vqs);
3961     qemu_del_nic(n->nic);
3962     virtio_net_rsc_cleanup(n);
3963     g_free(n->rss_data.indirections_table);
3964     net_rx_pkt_uninit(n->rx_pkt);
3965     virtio_cleanup(vdev);
3966 }
3967 
3968 static void virtio_net_reset(VirtIODevice *vdev)
3969 {
3970     VirtIONet *n = VIRTIO_NET(vdev);
3971     int i;
3972 
3973     /* Reset back to compatibility mode */
3974     n->promisc = 1;
3975     n->allmulti = 0;
3976     n->alluni = 0;
3977     n->nomulti = 0;
3978     n->nouni = 0;
3979     n->nobcast = 0;
3980     /* multiqueue is disabled by default */
3981     n->curr_queue_pairs = 1;
3982     timer_del(n->announce_timer.tm);
3983     n->announce_timer.round = 0;
3984     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
3985 
3986     /* Flush any MAC and VLAN filter table state */
3987     n->mac_table.in_use = 0;
3988     n->mac_table.first_multi = 0;
3989     n->mac_table.multi_overflow = 0;
3990     n->mac_table.uni_overflow = 0;
3991     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
3992     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
3993     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
3994     memset(n->vlans, 0, MAX_VLAN >> 3);
3995 
3996     /* Flush any async TX */
3997     for (i = 0;  i < n->max_queue_pairs; i++) {
3998         flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
3999     }
4000 
4001     virtio_net_disable_rss(n);
4002 }
4003 
4004 static void virtio_net_instance_init(Object *obj)
4005 {
4006     VirtIONet *n = VIRTIO_NET(obj);
4007 
4008     /*
4009      * The default config_size is sizeof(struct virtio_net_config).
4010      * Can be overridden with virtio_net_set_config_size.
4011      */
4012     n->config_size = sizeof(struct virtio_net_config);
4013     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
4014                                   "bootindex", "/ethernet-phy@0",
4015                                   DEVICE(n));
4016 
4017     ebpf_rss_init(&n->ebpf_rss);
4018 }
4019 
4020 static int virtio_net_pre_save(void *opaque)
4021 {
4022     VirtIONet *n = opaque;
4023 
4024     /* At this point, backend must be stopped, otherwise
4025      * it might keep writing to memory. */
4026     assert(!n->vhost_started);
4027 
4028     return 0;
4029 }
4030 
4031 static bool primary_unplug_pending(void *opaque)
4032 {
4033     DeviceState *dev = opaque;
4034     DeviceState *primary;
4035     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
4036     VirtIONet *n = VIRTIO_NET(vdev);
4037 
4038     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
4039         return false;
4040     }
4041     primary = failover_find_primary_device(n);
4042     return primary ? primary->pending_deleted_event : false;
4043 }
4044 
4045 static bool dev_unplug_pending(void *opaque)
4046 {
4047     DeviceState *dev = opaque;
4048     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
4049 
4050     return vdc->primary_unplug_pending(dev);
4051 }
4052 
4053 static const VMStateDescription vmstate_virtio_net = {
4054     .name = "virtio-net",
4055     .minimum_version_id = VIRTIO_NET_VM_VERSION,
4056     .version_id = VIRTIO_NET_VM_VERSION,
4057     .fields = (const VMStateField[]) {
4058         VMSTATE_VIRTIO_DEVICE,
4059         VMSTATE_END_OF_LIST()
4060     },
4061     .pre_save = virtio_net_pre_save,
4062     .dev_unplug_pending = dev_unplug_pending,
4063 };
4064 
4065 static const Property virtio_net_properties[] = {
4066     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
4067                     VIRTIO_NET_F_CSUM, true),
4068     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
4069                     VIRTIO_NET_F_GUEST_CSUM, true),
4070     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
4071     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
4072                     VIRTIO_NET_F_GUEST_TSO4, true),
4073     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
4074                     VIRTIO_NET_F_GUEST_TSO6, true),
4075     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
4076                     VIRTIO_NET_F_GUEST_ECN, true),
4077     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
4078                     VIRTIO_NET_F_GUEST_UFO, true),
4079     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
4080                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
4081     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
4082                     VIRTIO_NET_F_HOST_TSO4, true),
4083     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
4084                     VIRTIO_NET_F_HOST_TSO6, true),
4085     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
4086                     VIRTIO_NET_F_HOST_ECN, true),
4087     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
4088                     VIRTIO_NET_F_HOST_UFO, true),
4089     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
4090                     VIRTIO_NET_F_MRG_RXBUF, true),
4091     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
4092                     VIRTIO_NET_F_STATUS, true),
4093     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
4094                     VIRTIO_NET_F_CTRL_VQ, true),
4095     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
4096                     VIRTIO_NET_F_CTRL_RX, true),
4097     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
4098                     VIRTIO_NET_F_CTRL_VLAN, true),
4099     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
4100                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
4101     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
4102                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
4103     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
4104                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
4105     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
4106     DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
4107                     VIRTIO_NET_F_RSS, false),
4108     DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
4109                     VIRTIO_NET_F_HASH_REPORT, false),
4110     DEFINE_PROP_ARRAY("ebpf-rss-fds", VirtIONet, nr_ebpf_rss_fds,
4111                       ebpf_rss_fds, qdev_prop_string, char*),
4112     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
4113                     VIRTIO_NET_F_RSC_EXT, false),
4114     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
4115                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
4116     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
4117     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
4118                        TX_TIMER_INTERVAL),
4119     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
4120     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
4121     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
4122                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
4123     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
4124                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
4125     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
4126     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
4127                      true),
4128     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
4129     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
4130     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
4131     DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features,
4132                       VIRTIO_NET_F_GUEST_USO4, true),
4133     DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features,
4134                       VIRTIO_NET_F_GUEST_USO6, true),
4135     DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
4136                       VIRTIO_NET_F_HOST_USO, true),
4137 };
4138 
4139 static void virtio_net_class_init(ObjectClass *klass, const void *data)
4140 {
4141     DeviceClass *dc = DEVICE_CLASS(klass);
4142     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
4143 
4144     device_class_set_props(dc, virtio_net_properties);
4145     dc->vmsd = &vmstate_virtio_net;
4146     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
4147     vdc->realize = virtio_net_device_realize;
4148     vdc->unrealize = virtio_net_device_unrealize;
4149     vdc->get_config = virtio_net_get_config;
4150     vdc->set_config = virtio_net_set_config;
4151     vdc->get_features = virtio_net_get_features;
4152     vdc->set_features = virtio_net_set_features;
4153     vdc->bad_features = virtio_net_bad_features;
4154     vdc->reset = virtio_net_reset;
4155     vdc->queue_reset = virtio_net_queue_reset;
4156     vdc->queue_enable = virtio_net_queue_enable;
4157     vdc->set_status = virtio_net_set_status;
4158     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
4159     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
4160     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
4161     vdc->pre_load_queues = virtio_net_pre_load_queues;
4162     vdc->post_load = virtio_net_post_load_virtio;
4163     vdc->vmsd = &vmstate_virtio_net_device;
4164     vdc->primary_unplug_pending = primary_unplug_pending;
4165     vdc->get_vhost = virtio_net_get_vhost;
4166     vdc->toggle_device_iotlb = vhost_toggle_device_iotlb;
4167 }
4168 
4169 static const TypeInfo virtio_net_info = {
4170     .name = TYPE_VIRTIO_NET,
4171     .parent = TYPE_VIRTIO_DEVICE,
4172     .instance_size = sizeof(VirtIONet),
4173     .instance_init = virtio_net_instance_init,
4174     .class_init = virtio_net_class_init,
4175 };
4176 
4177 static void virtio_register_types(void)
4178 {
4179     type_register_static(&virtio_net_info);
4180 }
4181 
4182 type_init(virtio_register_types)
4183