xref: /openbmc/qemu/hw/net/virtio-net.c (revision c63ca4ff)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/main-loop.h"
18 #include "qemu/module.h"
19 #include "hw/virtio/virtio.h"
20 #include "net/net.h"
21 #include "net/checksum.h"
22 #include "net/tap.h"
23 #include "qemu/error-report.h"
24 #include "qemu/timer.h"
25 #include "qemu/option.h"
26 #include "qemu/option_int.h"
27 #include "qemu/config-file.h"
28 #include "qapi/qmp/qdict.h"
29 #include "hw/virtio/virtio-net.h"
30 #include "net/vhost_net.h"
31 #include "net/announce.h"
32 #include "hw/virtio/virtio-bus.h"
33 #include "qapi/error.h"
34 #include "qapi/qapi-events-net.h"
35 #include "hw/qdev-properties.h"
36 #include "qapi/qapi-types-migration.h"
37 #include "qapi/qapi-events-migration.h"
38 #include "hw/virtio/virtio-access.h"
39 #include "migration/misc.h"
40 #include "standard-headers/linux/ethtool.h"
41 #include "sysemu/sysemu.h"
42 #include "trace.h"
43 #include "monitor/qdev.h"
44 #include "hw/pci/pci.h"
45 #include "net_rx_pkt.h"
46 #include "hw/virtio/vhost.h"
47 
48 #define VIRTIO_NET_VM_VERSION    11
49 
50 #define MAC_TABLE_ENTRIES    64
51 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
52 
53 /* previously fixed value */
54 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
55 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
56 
57 /* for now, only allow larger queues; with virtio-1, guest can downsize */
58 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
59 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
60 
61 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
62 
63 #define VIRTIO_NET_TCP_FLAG         0x3F
64 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
65 
66 /* IPv4 max payload, 16 bits in the header */
67 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
68 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
69 
70 /* header length value in ip header without option */
71 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
72 
73 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
74 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
75 
76 /* Purge coalesced packets timer interval, This value affects the performance
77    a lot, and should be tuned carefully, '300000'(300us) is the recommended
78    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
79    tso/gso/gro 'off'. */
80 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
81 
82 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
83                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
84                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
85                                          VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
86                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
87                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
88                                          VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
89                                          VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
90                                          VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
91 
92 static VirtIOFeature feature_sizes[] = {
93     {.flags = 1ULL << VIRTIO_NET_F_MAC,
94      .end = endof(struct virtio_net_config, mac)},
95     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
96      .end = endof(struct virtio_net_config, status)},
97     {.flags = 1ULL << VIRTIO_NET_F_MQ,
98      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
99     {.flags = 1ULL << VIRTIO_NET_F_MTU,
100      .end = endof(struct virtio_net_config, mtu)},
101     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
102      .end = endof(struct virtio_net_config, duplex)},
103     {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
104      .end = endof(struct virtio_net_config, supported_hash_types)},
105     {}
106 };
107 
108 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
109 {
110     VirtIONet *n = qemu_get_nic_opaque(nc);
111 
112     return &n->vqs[nc->queue_index];
113 }
114 
115 static int vq2q(int queue_index)
116 {
117     return queue_index / 2;
118 }
119 
120 /* TODO
121  * - we could suppress RX interrupt if we were so inclined.
122  */
123 
124 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
125 {
126     VirtIONet *n = VIRTIO_NET(vdev);
127     struct virtio_net_config netcfg;
128     NetClientState *nc = qemu_get_queue(n->nic);
129 
130     int ret = 0;
131     memset(&netcfg, 0 , sizeof(struct virtio_net_config));
132     virtio_stw_p(vdev, &netcfg.status, n->status);
133     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
134     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
135     memcpy(netcfg.mac, n->mac, ETH_ALEN);
136     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
137     netcfg.duplex = n->net_conf.duplex;
138     netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
139     virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
140                  virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
141                  VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
142     virtio_stl_p(vdev, &netcfg.supported_hash_types,
143                  VIRTIO_NET_RSS_SUPPORTED_HASHES);
144     memcpy(config, &netcfg, n->config_size);
145 
146     /*
147      * Is this VDPA? No peer means not VDPA: there's no way to
148      * disconnect/reconnect a VDPA peer.
149      */
150     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
151         ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
152                                    n->config_size);
153         if (ret != -1) {
154             memcpy(config, &netcfg, n->config_size);
155         }
156     }
157 }
158 
159 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
160 {
161     VirtIONet *n = VIRTIO_NET(vdev);
162     struct virtio_net_config netcfg = {};
163     NetClientState *nc = qemu_get_queue(n->nic);
164 
165     memcpy(&netcfg, config, n->config_size);
166 
167     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
168         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
169         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
170         memcpy(n->mac, netcfg.mac, ETH_ALEN);
171         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
172     }
173 
174     /*
175      * Is this VDPA? No peer means not VDPA: there's no way to
176      * disconnect/reconnect a VDPA peer.
177      */
178     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
179         vhost_net_set_config(get_vhost_net(nc->peer),
180                              (uint8_t *)&netcfg, 0, n->config_size,
181                              VHOST_SET_CONFIG_TYPE_MASTER);
182       }
183 }
184 
185 static bool virtio_net_started(VirtIONet *n, uint8_t status)
186 {
187     VirtIODevice *vdev = VIRTIO_DEVICE(n);
188     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
189         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
190 }
191 
192 static void virtio_net_announce_notify(VirtIONet *net)
193 {
194     VirtIODevice *vdev = VIRTIO_DEVICE(net);
195     trace_virtio_net_announce_notify();
196 
197     net->status |= VIRTIO_NET_S_ANNOUNCE;
198     virtio_notify_config(vdev);
199 }
200 
201 static void virtio_net_announce_timer(void *opaque)
202 {
203     VirtIONet *n = opaque;
204     trace_virtio_net_announce_timer(n->announce_timer.round);
205 
206     n->announce_timer.round--;
207     virtio_net_announce_notify(n);
208 }
209 
210 static void virtio_net_announce(NetClientState *nc)
211 {
212     VirtIONet *n = qemu_get_nic_opaque(nc);
213     VirtIODevice *vdev = VIRTIO_DEVICE(n);
214 
215     /*
216      * Make sure the virtio migration announcement timer isn't running
217      * If it is, let it trigger announcement so that we do not cause
218      * confusion.
219      */
220     if (n->announce_timer.round) {
221         return;
222     }
223 
224     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
225         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
226             virtio_net_announce_notify(n);
227     }
228 }
229 
230 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
231 {
232     VirtIODevice *vdev = VIRTIO_DEVICE(n);
233     NetClientState *nc = qemu_get_queue(n->nic);
234     int queues = n->multiqueue ? n->max_queues : 1;
235 
236     if (!get_vhost_net(nc->peer)) {
237         return;
238     }
239 
240     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
241         !!n->vhost_started) {
242         return;
243     }
244     if (!n->vhost_started) {
245         int r, i;
246 
247         if (n->needs_vnet_hdr_swap) {
248             error_report("backend does not support %s vnet headers; "
249                          "falling back on userspace virtio",
250                          virtio_is_big_endian(vdev) ? "BE" : "LE");
251             return;
252         }
253 
254         /* Any packets outstanding? Purge them to avoid touching rings
255          * when vhost is running.
256          */
257         for (i = 0;  i < queues; i++) {
258             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
259 
260             /* Purge both directions: TX and RX. */
261             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
262             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
263         }
264 
265         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
266             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
267             if (r < 0) {
268                 error_report("%uBytes MTU not supported by the backend",
269                              n->net_conf.mtu);
270 
271                 return;
272             }
273         }
274 
275         n->vhost_started = 1;
276         r = vhost_net_start(vdev, n->nic->ncs, queues);
277         if (r < 0) {
278             error_report("unable to start vhost net: %d: "
279                          "falling back on userspace virtio", -r);
280             n->vhost_started = 0;
281         }
282     } else {
283         vhost_net_stop(vdev, n->nic->ncs, queues);
284         n->vhost_started = 0;
285     }
286 }
287 
288 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
289                                           NetClientState *peer,
290                                           bool enable)
291 {
292     if (virtio_is_big_endian(vdev)) {
293         return qemu_set_vnet_be(peer, enable);
294     } else {
295         return qemu_set_vnet_le(peer, enable);
296     }
297 }
298 
299 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
300                                        int queues, bool enable)
301 {
302     int i;
303 
304     for (i = 0; i < queues; i++) {
305         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
306             enable) {
307             while (--i >= 0) {
308                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
309             }
310 
311             return true;
312         }
313     }
314 
315     return false;
316 }
317 
318 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
319 {
320     VirtIODevice *vdev = VIRTIO_DEVICE(n);
321     int queues = n->multiqueue ? n->max_queues : 1;
322 
323     if (virtio_net_started(n, status)) {
324         /* Before using the device, we tell the network backend about the
325          * endianness to use when parsing vnet headers. If the backend
326          * can't do it, we fallback onto fixing the headers in the core
327          * virtio-net code.
328          */
329         n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
330                                                             queues, true);
331     } else if (virtio_net_started(n, vdev->status)) {
332         /* After using the device, we need to reset the network backend to
333          * the default (guest native endianness), otherwise the guest may
334          * lose network connectivity if it is rebooted into a different
335          * endianness.
336          */
337         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
338     }
339 }
340 
341 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
342 {
343     unsigned int dropped = virtqueue_drop_all(vq);
344     if (dropped) {
345         virtio_notify(vdev, vq);
346     }
347 }
348 
349 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
350 {
351     VirtIONet *n = VIRTIO_NET(vdev);
352     VirtIONetQueue *q;
353     int i;
354     uint8_t queue_status;
355 
356     virtio_net_vnet_endian_status(n, status);
357     virtio_net_vhost_status(n, status);
358 
359     for (i = 0; i < n->max_queues; i++) {
360         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
361         bool queue_started;
362         q = &n->vqs[i];
363 
364         if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
365             queue_status = 0;
366         } else {
367             queue_status = status;
368         }
369         queue_started =
370             virtio_net_started(n, queue_status) && !n->vhost_started;
371 
372         if (queue_started) {
373             qemu_flush_queued_packets(ncs);
374         }
375 
376         if (!q->tx_waiting) {
377             continue;
378         }
379 
380         if (queue_started) {
381             if (q->tx_timer) {
382                 timer_mod(q->tx_timer,
383                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
384             } else {
385                 qemu_bh_schedule(q->tx_bh);
386             }
387         } else {
388             if (q->tx_timer) {
389                 timer_del(q->tx_timer);
390             } else {
391                 qemu_bh_cancel(q->tx_bh);
392             }
393             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
394                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
395                 vdev->vm_running) {
396                 /* if tx is waiting we are likely have some packets in tx queue
397                  * and disabled notification */
398                 q->tx_waiting = 0;
399                 virtio_queue_set_notification(q->tx_vq, 1);
400                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
401             }
402         }
403     }
404 }
405 
406 static void virtio_net_set_link_status(NetClientState *nc)
407 {
408     VirtIONet *n = qemu_get_nic_opaque(nc);
409     VirtIODevice *vdev = VIRTIO_DEVICE(n);
410     uint16_t old_status = n->status;
411 
412     if (nc->link_down)
413         n->status &= ~VIRTIO_NET_S_LINK_UP;
414     else
415         n->status |= VIRTIO_NET_S_LINK_UP;
416 
417     if (n->status != old_status)
418         virtio_notify_config(vdev);
419 
420     virtio_net_set_status(vdev, vdev->status);
421 }
422 
423 static void rxfilter_notify(NetClientState *nc)
424 {
425     VirtIONet *n = qemu_get_nic_opaque(nc);
426 
427     if (nc->rxfilter_notify_enabled) {
428         char *path = object_get_canonical_path(OBJECT(n->qdev));
429         qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
430                                               n->netclient_name, path);
431         g_free(path);
432 
433         /* disable event notification to avoid events flooding */
434         nc->rxfilter_notify_enabled = 0;
435     }
436 }
437 
438 static intList *get_vlan_table(VirtIONet *n)
439 {
440     intList *list, *entry;
441     int i, j;
442 
443     list = NULL;
444     for (i = 0; i < MAX_VLAN >> 5; i++) {
445         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
446             if (n->vlans[i] & (1U << j)) {
447                 entry = g_malloc0(sizeof(*entry));
448                 entry->value = (i << 5) + j;
449                 entry->next = list;
450                 list = entry;
451             }
452         }
453     }
454 
455     return list;
456 }
457 
458 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
459 {
460     VirtIONet *n = qemu_get_nic_opaque(nc);
461     VirtIODevice *vdev = VIRTIO_DEVICE(n);
462     RxFilterInfo *info;
463     strList *str_list, *entry;
464     int i;
465 
466     info = g_malloc0(sizeof(*info));
467     info->name = g_strdup(nc->name);
468     info->promiscuous = n->promisc;
469 
470     if (n->nouni) {
471         info->unicast = RX_STATE_NONE;
472     } else if (n->alluni) {
473         info->unicast = RX_STATE_ALL;
474     } else {
475         info->unicast = RX_STATE_NORMAL;
476     }
477 
478     if (n->nomulti) {
479         info->multicast = RX_STATE_NONE;
480     } else if (n->allmulti) {
481         info->multicast = RX_STATE_ALL;
482     } else {
483         info->multicast = RX_STATE_NORMAL;
484     }
485 
486     info->broadcast_allowed = n->nobcast;
487     info->multicast_overflow = n->mac_table.multi_overflow;
488     info->unicast_overflow = n->mac_table.uni_overflow;
489 
490     info->main_mac = qemu_mac_strdup_printf(n->mac);
491 
492     str_list = NULL;
493     for (i = 0; i < n->mac_table.first_multi; i++) {
494         entry = g_malloc0(sizeof(*entry));
495         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
496         entry->next = str_list;
497         str_list = entry;
498     }
499     info->unicast_table = str_list;
500 
501     str_list = NULL;
502     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
503         entry = g_malloc0(sizeof(*entry));
504         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
505         entry->next = str_list;
506         str_list = entry;
507     }
508     info->multicast_table = str_list;
509     info->vlan_table = get_vlan_table(n);
510 
511     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
512         info->vlan = RX_STATE_ALL;
513     } else if (!info->vlan_table) {
514         info->vlan = RX_STATE_NONE;
515     } else {
516         info->vlan = RX_STATE_NORMAL;
517     }
518 
519     /* enable event notification after query */
520     nc->rxfilter_notify_enabled = 1;
521 
522     return info;
523 }
524 
525 static void virtio_net_reset(VirtIODevice *vdev)
526 {
527     VirtIONet *n = VIRTIO_NET(vdev);
528     int i;
529 
530     /* Reset back to compatibility mode */
531     n->promisc = 1;
532     n->allmulti = 0;
533     n->alluni = 0;
534     n->nomulti = 0;
535     n->nouni = 0;
536     n->nobcast = 0;
537     /* multiqueue is disabled by default */
538     n->curr_queues = 1;
539     timer_del(n->announce_timer.tm);
540     n->announce_timer.round = 0;
541     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
542 
543     /* Flush any MAC and VLAN filter table state */
544     n->mac_table.in_use = 0;
545     n->mac_table.first_multi = 0;
546     n->mac_table.multi_overflow = 0;
547     n->mac_table.uni_overflow = 0;
548     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
549     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
550     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
551     memset(n->vlans, 0, MAX_VLAN >> 3);
552 
553     /* Flush any async TX */
554     for (i = 0;  i < n->max_queues; i++) {
555         NetClientState *nc = qemu_get_subqueue(n->nic, i);
556 
557         if (nc->peer) {
558             qemu_flush_or_purge_queued_packets(nc->peer, true);
559             assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
560         }
561     }
562 }
563 
564 static void peer_test_vnet_hdr(VirtIONet *n)
565 {
566     NetClientState *nc = qemu_get_queue(n->nic);
567     if (!nc->peer) {
568         return;
569     }
570 
571     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
572 }
573 
574 static int peer_has_vnet_hdr(VirtIONet *n)
575 {
576     return n->has_vnet_hdr;
577 }
578 
579 static int peer_has_ufo(VirtIONet *n)
580 {
581     if (!peer_has_vnet_hdr(n))
582         return 0;
583 
584     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
585 
586     return n->has_ufo;
587 }
588 
589 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
590                                        int version_1, int hash_report)
591 {
592     int i;
593     NetClientState *nc;
594 
595     n->mergeable_rx_bufs = mergeable_rx_bufs;
596 
597     if (version_1) {
598         n->guest_hdr_len = hash_report ?
599             sizeof(struct virtio_net_hdr_v1_hash) :
600             sizeof(struct virtio_net_hdr_mrg_rxbuf);
601         n->rss_data.populate_hash = !!hash_report;
602     } else {
603         n->guest_hdr_len = n->mergeable_rx_bufs ?
604             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
605             sizeof(struct virtio_net_hdr);
606     }
607 
608     for (i = 0; i < n->max_queues; i++) {
609         nc = qemu_get_subqueue(n->nic, i);
610 
611         if (peer_has_vnet_hdr(n) &&
612             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
613             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
614             n->host_hdr_len = n->guest_hdr_len;
615         }
616     }
617 }
618 
619 static int virtio_net_max_tx_queue_size(VirtIONet *n)
620 {
621     NetClientState *peer = n->nic_conf.peers.ncs[0];
622 
623     /*
624      * Backends other than vhost-user don't support max queue size.
625      */
626     if (!peer) {
627         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
628     }
629 
630     if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
631         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
632     }
633 
634     return VIRTQUEUE_MAX_SIZE;
635 }
636 
637 static int peer_attach(VirtIONet *n, int index)
638 {
639     NetClientState *nc = qemu_get_subqueue(n->nic, index);
640 
641     if (!nc->peer) {
642         return 0;
643     }
644 
645     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
646         vhost_set_vring_enable(nc->peer, 1);
647     }
648 
649     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
650         return 0;
651     }
652 
653     if (n->max_queues == 1) {
654         return 0;
655     }
656 
657     return tap_enable(nc->peer);
658 }
659 
660 static int peer_detach(VirtIONet *n, int index)
661 {
662     NetClientState *nc = qemu_get_subqueue(n->nic, index);
663 
664     if (!nc->peer) {
665         return 0;
666     }
667 
668     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
669         vhost_set_vring_enable(nc->peer, 0);
670     }
671 
672     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
673         return 0;
674     }
675 
676     return tap_disable(nc->peer);
677 }
678 
679 static void virtio_net_set_queues(VirtIONet *n)
680 {
681     int i;
682     int r;
683 
684     if (n->nic->peer_deleted) {
685         return;
686     }
687 
688     for (i = 0; i < n->max_queues; i++) {
689         if (i < n->curr_queues) {
690             r = peer_attach(n, i);
691             assert(!r);
692         } else {
693             r = peer_detach(n, i);
694             assert(!r);
695         }
696     }
697 }
698 
699 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
700 
701 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
702                                         Error **errp)
703 {
704     VirtIONet *n = VIRTIO_NET(vdev);
705     NetClientState *nc = qemu_get_queue(n->nic);
706 
707     /* Firstly sync all virtio-net possible supported features */
708     features |= n->host_features;
709 
710     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
711 
712     if (!peer_has_vnet_hdr(n)) {
713         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
714         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
715         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
716         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
717 
718         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
719         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
720         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
721         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
722 
723         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
724     }
725 
726     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
727         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
728         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
729     }
730 
731     if (!get_vhost_net(nc->peer)) {
732         return features;
733     }
734 
735     virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
736     virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
737     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
738     vdev->backend_features = features;
739 
740     if (n->mtu_bypass_backend &&
741             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
742         features |= (1ULL << VIRTIO_NET_F_MTU);
743     }
744 
745     return features;
746 }
747 
748 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
749 {
750     uint64_t features = 0;
751 
752     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
753      * but also these: */
754     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
755     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
756     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
757     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
758     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
759 
760     return features;
761 }
762 
763 static void virtio_net_apply_guest_offloads(VirtIONet *n)
764 {
765     qemu_set_offload(qemu_get_queue(n->nic)->peer,
766             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
767             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
768             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
769             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
770             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
771 }
772 
773 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
774 {
775     static const uint64_t guest_offloads_mask =
776         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
777         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
778         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
779         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
780         (1ULL << VIRTIO_NET_F_GUEST_UFO);
781 
782     return guest_offloads_mask & features;
783 }
784 
785 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
786 {
787     VirtIODevice *vdev = VIRTIO_DEVICE(n);
788     return virtio_net_guest_offloads_by_features(vdev->guest_features);
789 }
790 
791 typedef struct {
792     VirtIONet *n;
793     char *id;
794 } FailoverId;
795 
796 /**
797  * Set the id of the failover primary device
798  *
799  * @opaque: FailoverId to setup
800  * @opts: opts for device we are handling
801  * @errp: returns an error if this function fails
802  */
803 static int failover_set_primary(void *opaque, QemuOpts *opts, Error **errp)
804 {
805     FailoverId *fid = opaque;
806     const char *standby_id = qemu_opt_get(opts, "failover_pair_id");
807 
808     if (g_strcmp0(standby_id, fid->n->netclient_name) == 0) {
809         fid->id = g_strdup(opts->id);
810         return 1;
811     }
812 
813     return 0;
814 }
815 
816 /**
817  * Find the primary device id for this failover virtio-net
818  *
819  * @n: VirtIONet device
820  * @errp: returns an error if this function fails
821  */
822 static char *failover_find_primary_device_id(VirtIONet *n)
823 {
824     Error *err = NULL;
825     FailoverId fid;
826 
827     fid.n = n;
828     if (!qemu_opts_foreach(qemu_find_opts("device"),
829                            failover_set_primary, &fid, &err)) {
830         return NULL;
831     }
832     return fid.id;
833 }
834 
835 /**
836  * Find the primary device for this failover virtio-net
837  *
838  * @n: VirtIONet device
839  * @errp: returns an error if this function fails
840  */
841 static DeviceState *failover_find_primary_device(VirtIONet *n)
842 {
843     char *id = failover_find_primary_device_id(n);
844 
845     if (!id) {
846         return NULL;
847     }
848 
849     return qdev_find_recursive(sysbus_get_default(), id);
850 }
851 
852 static void failover_add_primary(VirtIONet *n, Error **errp)
853 {
854     Error *err = NULL;
855     QemuOpts *opts;
856     char *id;
857     DeviceState *dev = failover_find_primary_device(n);
858 
859     if (dev) {
860         return;
861     }
862 
863     id = failover_find_primary_device_id(n);
864     if (!id) {
865         return;
866     }
867     opts = qemu_opts_find(qemu_find_opts("device"), id);
868     if (opts) {
869         dev = qdev_device_add(opts, &err);
870         if (err) {
871             qemu_opts_del(opts);
872         }
873     } else {
874         error_setg(errp, "Primary device not found");
875         error_append_hint(errp, "Virtio-net failover will not work. Make "
876                           "sure primary device has parameter"
877                           " failover_pair_id=<virtio-net-id>\n");
878     }
879     error_propagate(errp, err);
880 }
881 
882 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
883 {
884     VirtIONet *n = VIRTIO_NET(vdev);
885     Error *err = NULL;
886     int i;
887 
888     if (n->mtu_bypass_backend &&
889             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
890         features &= ~(1ULL << VIRTIO_NET_F_MTU);
891     }
892 
893     virtio_net_set_multiqueue(n,
894                               virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
895                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
896 
897     virtio_net_set_mrg_rx_bufs(n,
898                                virtio_has_feature(features,
899                                                   VIRTIO_NET_F_MRG_RXBUF),
900                                virtio_has_feature(features,
901                                                   VIRTIO_F_VERSION_1),
902                                virtio_has_feature(features,
903                                                   VIRTIO_NET_F_HASH_REPORT));
904 
905     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
906         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
907     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
908         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
909     n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
910 
911     if (n->has_vnet_hdr) {
912         n->curr_guest_offloads =
913             virtio_net_guest_offloads_by_features(features);
914         virtio_net_apply_guest_offloads(n);
915     }
916 
917     for (i = 0;  i < n->max_queues; i++) {
918         NetClientState *nc = qemu_get_subqueue(n->nic, i);
919 
920         if (!get_vhost_net(nc->peer)) {
921             continue;
922         }
923         vhost_net_ack_features(get_vhost_net(nc->peer), features);
924     }
925 
926     if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
927         memset(n->vlans, 0, MAX_VLAN >> 3);
928     } else {
929         memset(n->vlans, 0xff, MAX_VLAN >> 3);
930     }
931 
932     if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
933         qapi_event_send_failover_negotiated(n->netclient_name);
934         qatomic_set(&n->failover_primary_hidden, false);
935         failover_add_primary(n, &err);
936         if (err) {
937             warn_report_err(err);
938         }
939     }
940 }
941 
942 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
943                                      struct iovec *iov, unsigned int iov_cnt)
944 {
945     uint8_t on;
946     size_t s;
947     NetClientState *nc = qemu_get_queue(n->nic);
948 
949     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
950     if (s != sizeof(on)) {
951         return VIRTIO_NET_ERR;
952     }
953 
954     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
955         n->promisc = on;
956     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
957         n->allmulti = on;
958     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
959         n->alluni = on;
960     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
961         n->nomulti = on;
962     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
963         n->nouni = on;
964     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
965         n->nobcast = on;
966     } else {
967         return VIRTIO_NET_ERR;
968     }
969 
970     rxfilter_notify(nc);
971 
972     return VIRTIO_NET_OK;
973 }
974 
975 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
976                                      struct iovec *iov, unsigned int iov_cnt)
977 {
978     VirtIODevice *vdev = VIRTIO_DEVICE(n);
979     uint64_t offloads;
980     size_t s;
981 
982     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
983         return VIRTIO_NET_ERR;
984     }
985 
986     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
987     if (s != sizeof(offloads)) {
988         return VIRTIO_NET_ERR;
989     }
990 
991     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
992         uint64_t supported_offloads;
993 
994         offloads = virtio_ldq_p(vdev, &offloads);
995 
996         if (!n->has_vnet_hdr) {
997             return VIRTIO_NET_ERR;
998         }
999 
1000         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1001             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1002         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1003             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1004         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1005 
1006         supported_offloads = virtio_net_supported_guest_offloads(n);
1007         if (offloads & ~supported_offloads) {
1008             return VIRTIO_NET_ERR;
1009         }
1010 
1011         n->curr_guest_offloads = offloads;
1012         virtio_net_apply_guest_offloads(n);
1013 
1014         return VIRTIO_NET_OK;
1015     } else {
1016         return VIRTIO_NET_ERR;
1017     }
1018 }
1019 
1020 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1021                                  struct iovec *iov, unsigned int iov_cnt)
1022 {
1023     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1024     struct virtio_net_ctrl_mac mac_data;
1025     size_t s;
1026     NetClientState *nc = qemu_get_queue(n->nic);
1027 
1028     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1029         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1030             return VIRTIO_NET_ERR;
1031         }
1032         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1033         assert(s == sizeof(n->mac));
1034         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1035         rxfilter_notify(nc);
1036 
1037         return VIRTIO_NET_OK;
1038     }
1039 
1040     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1041         return VIRTIO_NET_ERR;
1042     }
1043 
1044     int in_use = 0;
1045     int first_multi = 0;
1046     uint8_t uni_overflow = 0;
1047     uint8_t multi_overflow = 0;
1048     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1049 
1050     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1051                    sizeof(mac_data.entries));
1052     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1053     if (s != sizeof(mac_data.entries)) {
1054         goto error;
1055     }
1056     iov_discard_front(&iov, &iov_cnt, s);
1057 
1058     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1059         goto error;
1060     }
1061 
1062     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1063         s = iov_to_buf(iov, iov_cnt, 0, macs,
1064                        mac_data.entries * ETH_ALEN);
1065         if (s != mac_data.entries * ETH_ALEN) {
1066             goto error;
1067         }
1068         in_use += mac_data.entries;
1069     } else {
1070         uni_overflow = 1;
1071     }
1072 
1073     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1074 
1075     first_multi = in_use;
1076 
1077     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1078                    sizeof(mac_data.entries));
1079     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1080     if (s != sizeof(mac_data.entries)) {
1081         goto error;
1082     }
1083 
1084     iov_discard_front(&iov, &iov_cnt, s);
1085 
1086     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1087         goto error;
1088     }
1089 
1090     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1091         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1092                        mac_data.entries * ETH_ALEN);
1093         if (s != mac_data.entries * ETH_ALEN) {
1094             goto error;
1095         }
1096         in_use += mac_data.entries;
1097     } else {
1098         multi_overflow = 1;
1099     }
1100 
1101     n->mac_table.in_use = in_use;
1102     n->mac_table.first_multi = first_multi;
1103     n->mac_table.uni_overflow = uni_overflow;
1104     n->mac_table.multi_overflow = multi_overflow;
1105     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1106     g_free(macs);
1107     rxfilter_notify(nc);
1108 
1109     return VIRTIO_NET_OK;
1110 
1111 error:
1112     g_free(macs);
1113     return VIRTIO_NET_ERR;
1114 }
1115 
1116 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1117                                         struct iovec *iov, unsigned int iov_cnt)
1118 {
1119     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1120     uint16_t vid;
1121     size_t s;
1122     NetClientState *nc = qemu_get_queue(n->nic);
1123 
1124     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1125     vid = virtio_lduw_p(vdev, &vid);
1126     if (s != sizeof(vid)) {
1127         return VIRTIO_NET_ERR;
1128     }
1129 
1130     if (vid >= MAX_VLAN)
1131         return VIRTIO_NET_ERR;
1132 
1133     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1134         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1135     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1136         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1137     else
1138         return VIRTIO_NET_ERR;
1139 
1140     rxfilter_notify(nc);
1141 
1142     return VIRTIO_NET_OK;
1143 }
1144 
1145 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1146                                       struct iovec *iov, unsigned int iov_cnt)
1147 {
1148     trace_virtio_net_handle_announce(n->announce_timer.round);
1149     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1150         n->status & VIRTIO_NET_S_ANNOUNCE) {
1151         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1152         if (n->announce_timer.round) {
1153             qemu_announce_timer_step(&n->announce_timer);
1154         }
1155         return VIRTIO_NET_OK;
1156     } else {
1157         return VIRTIO_NET_ERR;
1158     }
1159 }
1160 
1161 static void virtio_net_disable_rss(VirtIONet *n)
1162 {
1163     if (n->rss_data.enabled) {
1164         trace_virtio_net_rss_disable();
1165     }
1166     n->rss_data.enabled = false;
1167 }
1168 
1169 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1170                                       struct iovec *iov,
1171                                       unsigned int iov_cnt,
1172                                       bool do_rss)
1173 {
1174     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1175     struct virtio_net_rss_config cfg;
1176     size_t s, offset = 0, size_get;
1177     uint16_t queues, i;
1178     struct {
1179         uint16_t us;
1180         uint8_t b;
1181     } QEMU_PACKED temp;
1182     const char *err_msg = "";
1183     uint32_t err_value = 0;
1184 
1185     if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1186         err_msg = "RSS is not negotiated";
1187         goto error;
1188     }
1189     if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1190         err_msg = "Hash report is not negotiated";
1191         goto error;
1192     }
1193     size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1194     s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1195     if (s != size_get) {
1196         err_msg = "Short command buffer";
1197         err_value = (uint32_t)s;
1198         goto error;
1199     }
1200     n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1201     n->rss_data.indirections_len =
1202         virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1203     n->rss_data.indirections_len++;
1204     if (!do_rss) {
1205         n->rss_data.indirections_len = 1;
1206     }
1207     if (!is_power_of_2(n->rss_data.indirections_len)) {
1208         err_msg = "Invalid size of indirection table";
1209         err_value = n->rss_data.indirections_len;
1210         goto error;
1211     }
1212     if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1213         err_msg = "Too large indirection table";
1214         err_value = n->rss_data.indirections_len;
1215         goto error;
1216     }
1217     n->rss_data.default_queue = do_rss ?
1218         virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1219     if (n->rss_data.default_queue >= n->max_queues) {
1220         err_msg = "Invalid default queue";
1221         err_value = n->rss_data.default_queue;
1222         goto error;
1223     }
1224     offset += size_get;
1225     size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1226     g_free(n->rss_data.indirections_table);
1227     n->rss_data.indirections_table = g_malloc(size_get);
1228     if (!n->rss_data.indirections_table) {
1229         err_msg = "Can't allocate indirections table";
1230         err_value = n->rss_data.indirections_len;
1231         goto error;
1232     }
1233     s = iov_to_buf(iov, iov_cnt, offset,
1234                    n->rss_data.indirections_table, size_get);
1235     if (s != size_get) {
1236         err_msg = "Short indirection table buffer";
1237         err_value = (uint32_t)s;
1238         goto error;
1239     }
1240     for (i = 0; i < n->rss_data.indirections_len; ++i) {
1241         uint16_t val = n->rss_data.indirections_table[i];
1242         n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1243     }
1244     offset += size_get;
1245     size_get = sizeof(temp);
1246     s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1247     if (s != size_get) {
1248         err_msg = "Can't get queues";
1249         err_value = (uint32_t)s;
1250         goto error;
1251     }
1252     queues = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queues;
1253     if (queues == 0 || queues > n->max_queues) {
1254         err_msg = "Invalid number of queues";
1255         err_value = queues;
1256         goto error;
1257     }
1258     if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1259         err_msg = "Invalid key size";
1260         err_value = temp.b;
1261         goto error;
1262     }
1263     if (!temp.b && n->rss_data.hash_types) {
1264         err_msg = "No key provided";
1265         err_value = 0;
1266         goto error;
1267     }
1268     if (!temp.b && !n->rss_data.hash_types) {
1269         virtio_net_disable_rss(n);
1270         return queues;
1271     }
1272     offset += size_get;
1273     size_get = temp.b;
1274     s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1275     if (s != size_get) {
1276         err_msg = "Can get key buffer";
1277         err_value = (uint32_t)s;
1278         goto error;
1279     }
1280     n->rss_data.enabled = true;
1281     trace_virtio_net_rss_enable(n->rss_data.hash_types,
1282                                 n->rss_data.indirections_len,
1283                                 temp.b);
1284     return queues;
1285 error:
1286     trace_virtio_net_rss_error(err_msg, err_value);
1287     virtio_net_disable_rss(n);
1288     return 0;
1289 }
1290 
1291 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1292                                 struct iovec *iov, unsigned int iov_cnt)
1293 {
1294     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1295     uint16_t queues;
1296 
1297     virtio_net_disable_rss(n);
1298     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1299         queues = virtio_net_handle_rss(n, iov, iov_cnt, false);
1300         return queues ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1301     }
1302     if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1303         queues = virtio_net_handle_rss(n, iov, iov_cnt, true);
1304     } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1305         struct virtio_net_ctrl_mq mq;
1306         size_t s;
1307         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1308             return VIRTIO_NET_ERR;
1309         }
1310         s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1311         if (s != sizeof(mq)) {
1312             return VIRTIO_NET_ERR;
1313         }
1314         queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1315 
1316     } else {
1317         return VIRTIO_NET_ERR;
1318     }
1319 
1320     if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1321         queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1322         queues > n->max_queues ||
1323         !n->multiqueue) {
1324         return VIRTIO_NET_ERR;
1325     }
1326 
1327     n->curr_queues = queues;
1328     /* stop the backend before changing the number of queues to avoid handling a
1329      * disabled queue */
1330     virtio_net_set_status(vdev, vdev->status);
1331     virtio_net_set_queues(n);
1332 
1333     return VIRTIO_NET_OK;
1334 }
1335 
1336 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1337 {
1338     VirtIONet *n = VIRTIO_NET(vdev);
1339     struct virtio_net_ctrl_hdr ctrl;
1340     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1341     VirtQueueElement *elem;
1342     size_t s;
1343     struct iovec *iov, *iov2;
1344     unsigned int iov_cnt;
1345 
1346     for (;;) {
1347         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1348         if (!elem) {
1349             break;
1350         }
1351         if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1352             iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1353             virtio_error(vdev, "virtio-net ctrl missing headers");
1354             virtqueue_detach_element(vq, elem, 0);
1355             g_free(elem);
1356             break;
1357         }
1358 
1359         iov_cnt = elem->out_num;
1360         iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1361         s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1362         iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1363         if (s != sizeof(ctrl)) {
1364             status = VIRTIO_NET_ERR;
1365         } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1366             status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1367         } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1368             status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1369         } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1370             status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1371         } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1372             status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1373         } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1374             status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1375         } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1376             status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1377         }
1378 
1379         s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1380         assert(s == sizeof(status));
1381 
1382         virtqueue_push(vq, elem, sizeof(status));
1383         virtio_notify(vdev, vq);
1384         g_free(iov2);
1385         g_free(elem);
1386     }
1387 }
1388 
1389 /* RX */
1390 
1391 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1392 {
1393     VirtIONet *n = VIRTIO_NET(vdev);
1394     int queue_index = vq2q(virtio_get_queue_index(vq));
1395 
1396     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1397 }
1398 
1399 static bool virtio_net_can_receive(NetClientState *nc)
1400 {
1401     VirtIONet *n = qemu_get_nic_opaque(nc);
1402     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1403     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1404 
1405     if (!vdev->vm_running) {
1406         return false;
1407     }
1408 
1409     if (nc->queue_index >= n->curr_queues) {
1410         return false;
1411     }
1412 
1413     if (!virtio_queue_ready(q->rx_vq) ||
1414         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1415         return false;
1416     }
1417 
1418     return true;
1419 }
1420 
1421 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1422 {
1423     VirtIONet *n = q->n;
1424     if (virtio_queue_empty(q->rx_vq) ||
1425         (n->mergeable_rx_bufs &&
1426          !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1427         virtio_queue_set_notification(q->rx_vq, 1);
1428 
1429         /* To avoid a race condition where the guest has made some buffers
1430          * available after the above check but before notification was
1431          * enabled, check for available buffers again.
1432          */
1433         if (virtio_queue_empty(q->rx_vq) ||
1434             (n->mergeable_rx_bufs &&
1435              !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1436             return 0;
1437         }
1438     }
1439 
1440     virtio_queue_set_notification(q->rx_vq, 0);
1441     return 1;
1442 }
1443 
1444 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1445 {
1446     virtio_tswap16s(vdev, &hdr->hdr_len);
1447     virtio_tswap16s(vdev, &hdr->gso_size);
1448     virtio_tswap16s(vdev, &hdr->csum_start);
1449     virtio_tswap16s(vdev, &hdr->csum_offset);
1450 }
1451 
1452 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1453  * it never finds out that the packets don't have valid checksums.  This
1454  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1455  * fix this with Xen but it hasn't appeared in an upstream release of
1456  * dhclient yet.
1457  *
1458  * To avoid breaking existing guests, we catch udp packets and add
1459  * checksums.  This is terrible but it's better than hacking the guest
1460  * kernels.
1461  *
1462  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1463  * we should provide a mechanism to disable it to avoid polluting the host
1464  * cache.
1465  */
1466 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1467                                         uint8_t *buf, size_t size)
1468 {
1469     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1470         (size > 27 && size < 1500) && /* normal sized MTU */
1471         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1472         (buf[23] == 17) && /* ip.protocol == UDP */
1473         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1474         net_checksum_calculate(buf, size);
1475         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1476     }
1477 }
1478 
1479 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1480                            const void *buf, size_t size)
1481 {
1482     if (n->has_vnet_hdr) {
1483         /* FIXME this cast is evil */
1484         void *wbuf = (void *)buf;
1485         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1486                                     size - n->host_hdr_len);
1487 
1488         if (n->needs_vnet_hdr_swap) {
1489             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1490         }
1491         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1492     } else {
1493         struct virtio_net_hdr hdr = {
1494             .flags = 0,
1495             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1496         };
1497         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1498     }
1499 }
1500 
1501 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1502 {
1503     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1504     static const uint8_t vlan[] = {0x81, 0x00};
1505     uint8_t *ptr = (uint8_t *)buf;
1506     int i;
1507 
1508     if (n->promisc)
1509         return 1;
1510 
1511     ptr += n->host_hdr_len;
1512 
1513     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1514         int vid = lduw_be_p(ptr + 14) & 0xfff;
1515         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1516             return 0;
1517     }
1518 
1519     if (ptr[0] & 1) { // multicast
1520         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1521             return !n->nobcast;
1522         } else if (n->nomulti) {
1523             return 0;
1524         } else if (n->allmulti || n->mac_table.multi_overflow) {
1525             return 1;
1526         }
1527 
1528         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1529             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1530                 return 1;
1531             }
1532         }
1533     } else { // unicast
1534         if (n->nouni) {
1535             return 0;
1536         } else if (n->alluni || n->mac_table.uni_overflow) {
1537             return 1;
1538         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1539             return 1;
1540         }
1541 
1542         for (i = 0; i < n->mac_table.first_multi; i++) {
1543             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1544                 return 1;
1545             }
1546         }
1547     }
1548 
1549     return 0;
1550 }
1551 
1552 static uint8_t virtio_net_get_hash_type(bool isip4,
1553                                         bool isip6,
1554                                         bool isudp,
1555                                         bool istcp,
1556                                         uint32_t types)
1557 {
1558     if (isip4) {
1559         if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1560             return NetPktRssIpV4Tcp;
1561         }
1562         if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1563             return NetPktRssIpV4Udp;
1564         }
1565         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1566             return NetPktRssIpV4;
1567         }
1568     } else if (isip6) {
1569         uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1570                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1571 
1572         if (istcp && (types & mask)) {
1573             return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1574                 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1575         }
1576         mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1577         if (isudp && (types & mask)) {
1578             return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1579                 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1580         }
1581         mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1582         if (types & mask) {
1583             return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1584                 NetPktRssIpV6Ex : NetPktRssIpV6;
1585         }
1586     }
1587     return 0xff;
1588 }
1589 
1590 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1591                                    uint32_t hash)
1592 {
1593     struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1594     hdr->hash_value = hash;
1595     hdr->hash_report = report;
1596 }
1597 
1598 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1599                                   size_t size)
1600 {
1601     VirtIONet *n = qemu_get_nic_opaque(nc);
1602     unsigned int index = nc->queue_index, new_index = index;
1603     struct NetRxPkt *pkt = n->rx_pkt;
1604     uint8_t net_hash_type;
1605     uint32_t hash;
1606     bool isip4, isip6, isudp, istcp;
1607     static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1608         VIRTIO_NET_HASH_REPORT_IPv4,
1609         VIRTIO_NET_HASH_REPORT_TCPv4,
1610         VIRTIO_NET_HASH_REPORT_TCPv6,
1611         VIRTIO_NET_HASH_REPORT_IPv6,
1612         VIRTIO_NET_HASH_REPORT_IPv6_EX,
1613         VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1614         VIRTIO_NET_HASH_REPORT_UDPv4,
1615         VIRTIO_NET_HASH_REPORT_UDPv6,
1616         VIRTIO_NET_HASH_REPORT_UDPv6_EX
1617     };
1618 
1619     net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1620                              size - n->host_hdr_len);
1621     net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1622     if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1623         istcp = isudp = false;
1624     }
1625     if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1626         istcp = isudp = false;
1627     }
1628     net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1629                                              n->rss_data.hash_types);
1630     if (net_hash_type > NetPktRssIpV6UdpEx) {
1631         if (n->rss_data.populate_hash) {
1632             virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1633         }
1634         return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1635     }
1636 
1637     hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1638 
1639     if (n->rss_data.populate_hash) {
1640         virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1641     }
1642 
1643     if (n->rss_data.redirect) {
1644         new_index = hash & (n->rss_data.indirections_len - 1);
1645         new_index = n->rss_data.indirections_table[new_index];
1646     }
1647 
1648     return (index == new_index) ? -1 : new_index;
1649 }
1650 
1651 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1652                                       size_t size, bool no_rss)
1653 {
1654     VirtIONet *n = qemu_get_nic_opaque(nc);
1655     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1656     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1657     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1658     struct virtio_net_hdr_mrg_rxbuf mhdr;
1659     unsigned mhdr_cnt = 0;
1660     size_t offset, i, guest_offset;
1661 
1662     if (!virtio_net_can_receive(nc)) {
1663         return -1;
1664     }
1665 
1666     if (!no_rss && n->rss_data.enabled) {
1667         int index = virtio_net_process_rss(nc, buf, size);
1668         if (index >= 0) {
1669             NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1670             return virtio_net_receive_rcu(nc2, buf, size, true);
1671         }
1672     }
1673 
1674     /* hdr_len refers to the header we supply to the guest */
1675     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1676         return 0;
1677     }
1678 
1679     if (!receive_filter(n, buf, size))
1680         return size;
1681 
1682     offset = i = 0;
1683 
1684     while (offset < size) {
1685         VirtQueueElement *elem;
1686         int len, total;
1687         const struct iovec *sg;
1688 
1689         total = 0;
1690 
1691         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1692         if (!elem) {
1693             if (i) {
1694                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1695                              "i %zd mergeable %d offset %zd, size %zd, "
1696                              "guest hdr len %zd, host hdr len %zd "
1697                              "guest features 0x%" PRIx64,
1698                              i, n->mergeable_rx_bufs, offset, size,
1699                              n->guest_hdr_len, n->host_hdr_len,
1700                              vdev->guest_features);
1701             }
1702             return -1;
1703         }
1704 
1705         if (elem->in_num < 1) {
1706             virtio_error(vdev,
1707                          "virtio-net receive queue contains no in buffers");
1708             virtqueue_detach_element(q->rx_vq, elem, 0);
1709             g_free(elem);
1710             return -1;
1711         }
1712 
1713         sg = elem->in_sg;
1714         if (i == 0) {
1715             assert(offset == 0);
1716             if (n->mergeable_rx_bufs) {
1717                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1718                                     sg, elem->in_num,
1719                                     offsetof(typeof(mhdr), num_buffers),
1720                                     sizeof(mhdr.num_buffers));
1721             }
1722 
1723             receive_header(n, sg, elem->in_num, buf, size);
1724             if (n->rss_data.populate_hash) {
1725                 offset = sizeof(mhdr);
1726                 iov_from_buf(sg, elem->in_num, offset,
1727                              buf + offset, n->host_hdr_len - sizeof(mhdr));
1728             }
1729             offset = n->host_hdr_len;
1730             total += n->guest_hdr_len;
1731             guest_offset = n->guest_hdr_len;
1732         } else {
1733             guest_offset = 0;
1734         }
1735 
1736         /* copy in packet.  ugh */
1737         len = iov_from_buf(sg, elem->in_num, guest_offset,
1738                            buf + offset, size - offset);
1739         total += len;
1740         offset += len;
1741         /* If buffers can't be merged, at this point we
1742          * must have consumed the complete packet.
1743          * Otherwise, drop it. */
1744         if (!n->mergeable_rx_bufs && offset < size) {
1745             virtqueue_unpop(q->rx_vq, elem, total);
1746             g_free(elem);
1747             return size;
1748         }
1749 
1750         /* signal other side */
1751         virtqueue_fill(q->rx_vq, elem, total, i++);
1752         g_free(elem);
1753     }
1754 
1755     if (mhdr_cnt) {
1756         virtio_stw_p(vdev, &mhdr.num_buffers, i);
1757         iov_from_buf(mhdr_sg, mhdr_cnt,
1758                      0,
1759                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
1760     }
1761 
1762     virtqueue_flush(q->rx_vq, i);
1763     virtio_notify(vdev, q->rx_vq);
1764 
1765     return size;
1766 }
1767 
1768 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1769                                   size_t size)
1770 {
1771     RCU_READ_LOCK_GUARD();
1772 
1773     return virtio_net_receive_rcu(nc, buf, size, false);
1774 }
1775 
1776 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1777                                          const uint8_t *buf,
1778                                          VirtioNetRscUnit *unit)
1779 {
1780     uint16_t ip_hdrlen;
1781     struct ip_header *ip;
1782 
1783     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1784                               + sizeof(struct eth_header));
1785     unit->ip = (void *)ip;
1786     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1787     unit->ip_plen = &ip->ip_len;
1788     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1789     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1790     unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1791 }
1792 
1793 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1794                                          const uint8_t *buf,
1795                                          VirtioNetRscUnit *unit)
1796 {
1797     struct ip6_header *ip6;
1798 
1799     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1800                                  + sizeof(struct eth_header));
1801     unit->ip = ip6;
1802     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1803     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1804                                         + sizeof(struct ip6_header));
1805     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1806 
1807     /* There is a difference between payload lenght in ipv4 and v6,
1808        ip header is excluded in ipv6 */
1809     unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1810 }
1811 
1812 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1813                                        VirtioNetRscSeg *seg)
1814 {
1815     int ret;
1816     struct virtio_net_hdr_v1 *h;
1817 
1818     h = (struct virtio_net_hdr_v1 *)seg->buf;
1819     h->flags = 0;
1820     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1821 
1822     if (seg->is_coalesced) {
1823         h->rsc.segments = seg->packets;
1824         h->rsc.dup_acks = seg->dup_ack;
1825         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1826         if (chain->proto == ETH_P_IP) {
1827             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1828         } else {
1829             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1830         }
1831     }
1832 
1833     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1834     QTAILQ_REMOVE(&chain->buffers, seg, next);
1835     g_free(seg->buf);
1836     g_free(seg);
1837 
1838     return ret;
1839 }
1840 
1841 static void virtio_net_rsc_purge(void *opq)
1842 {
1843     VirtioNetRscSeg *seg, *rn;
1844     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1845 
1846     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1847         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1848             chain->stat.purge_failed++;
1849             continue;
1850         }
1851     }
1852 
1853     chain->stat.timer++;
1854     if (!QTAILQ_EMPTY(&chain->buffers)) {
1855         timer_mod(chain->drain_timer,
1856               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1857     }
1858 }
1859 
1860 static void virtio_net_rsc_cleanup(VirtIONet *n)
1861 {
1862     VirtioNetRscChain *chain, *rn_chain;
1863     VirtioNetRscSeg *seg, *rn_seg;
1864 
1865     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1866         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1867             QTAILQ_REMOVE(&chain->buffers, seg, next);
1868             g_free(seg->buf);
1869             g_free(seg);
1870         }
1871 
1872         timer_del(chain->drain_timer);
1873         timer_free(chain->drain_timer);
1874         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1875         g_free(chain);
1876     }
1877 }
1878 
1879 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1880                                      NetClientState *nc,
1881                                      const uint8_t *buf, size_t size)
1882 {
1883     uint16_t hdr_len;
1884     VirtioNetRscSeg *seg;
1885 
1886     hdr_len = chain->n->guest_hdr_len;
1887     seg = g_malloc(sizeof(VirtioNetRscSeg));
1888     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1889         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1890     memcpy(seg->buf, buf, size);
1891     seg->size = size;
1892     seg->packets = 1;
1893     seg->dup_ack = 0;
1894     seg->is_coalesced = 0;
1895     seg->nc = nc;
1896 
1897     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1898     chain->stat.cache++;
1899 
1900     switch (chain->proto) {
1901     case ETH_P_IP:
1902         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
1903         break;
1904     case ETH_P_IPV6:
1905         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
1906         break;
1907     default:
1908         g_assert_not_reached();
1909     }
1910 }
1911 
1912 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
1913                                          VirtioNetRscSeg *seg,
1914                                          const uint8_t *buf,
1915                                          struct tcp_header *n_tcp,
1916                                          struct tcp_header *o_tcp)
1917 {
1918     uint32_t nack, oack;
1919     uint16_t nwin, owin;
1920 
1921     nack = htonl(n_tcp->th_ack);
1922     nwin = htons(n_tcp->th_win);
1923     oack = htonl(o_tcp->th_ack);
1924     owin = htons(o_tcp->th_win);
1925 
1926     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
1927         chain->stat.ack_out_of_win++;
1928         return RSC_FINAL;
1929     } else if (nack == oack) {
1930         /* duplicated ack or window probe */
1931         if (nwin == owin) {
1932             /* duplicated ack, add dup ack count due to whql test up to 1 */
1933             chain->stat.dup_ack++;
1934             return RSC_FINAL;
1935         } else {
1936             /* Coalesce window update */
1937             o_tcp->th_win = n_tcp->th_win;
1938             chain->stat.win_update++;
1939             return RSC_COALESCE;
1940         }
1941     } else {
1942         /* pure ack, go to 'C', finalize*/
1943         chain->stat.pure_ack++;
1944         return RSC_FINAL;
1945     }
1946 }
1947 
1948 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
1949                                             VirtioNetRscSeg *seg,
1950                                             const uint8_t *buf,
1951                                             VirtioNetRscUnit *n_unit)
1952 {
1953     void *data;
1954     uint16_t o_ip_len;
1955     uint32_t nseq, oseq;
1956     VirtioNetRscUnit *o_unit;
1957 
1958     o_unit = &seg->unit;
1959     o_ip_len = htons(*o_unit->ip_plen);
1960     nseq = htonl(n_unit->tcp->th_seq);
1961     oseq = htonl(o_unit->tcp->th_seq);
1962 
1963     /* out of order or retransmitted. */
1964     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
1965         chain->stat.data_out_of_win++;
1966         return RSC_FINAL;
1967     }
1968 
1969     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
1970     if (nseq == oseq) {
1971         if ((o_unit->payload == 0) && n_unit->payload) {
1972             /* From no payload to payload, normal case, not a dup ack or etc */
1973             chain->stat.data_after_pure_ack++;
1974             goto coalesce;
1975         } else {
1976             return virtio_net_rsc_handle_ack(chain, seg, buf,
1977                                              n_unit->tcp, o_unit->tcp);
1978         }
1979     } else if ((nseq - oseq) != o_unit->payload) {
1980         /* Not a consistent packet, out of order */
1981         chain->stat.data_out_of_order++;
1982         return RSC_FINAL;
1983     } else {
1984 coalesce:
1985         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
1986             chain->stat.over_size++;
1987             return RSC_FINAL;
1988         }
1989 
1990         /* Here comes the right data, the payload length in v4/v6 is different,
1991            so use the field value to update and record the new data len */
1992         o_unit->payload += n_unit->payload; /* update new data len */
1993 
1994         /* update field in ip header */
1995         *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
1996 
1997         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
1998            for windows guest, while this may change the behavior for linux
1999            guest (only if it uses RSC feature). */
2000         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2001 
2002         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2003         o_unit->tcp->th_win = n_unit->tcp->th_win;
2004 
2005         memmove(seg->buf + seg->size, data, n_unit->payload);
2006         seg->size += n_unit->payload;
2007         seg->packets++;
2008         chain->stat.coalesced++;
2009         return RSC_COALESCE;
2010     }
2011 }
2012 
2013 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2014                                         VirtioNetRscSeg *seg,
2015                                         const uint8_t *buf, size_t size,
2016                                         VirtioNetRscUnit *unit)
2017 {
2018     struct ip_header *ip1, *ip2;
2019 
2020     ip1 = (struct ip_header *)(unit->ip);
2021     ip2 = (struct ip_header *)(seg->unit.ip);
2022     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2023         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2024         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2025         chain->stat.no_match++;
2026         return RSC_NO_MATCH;
2027     }
2028 
2029     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2030 }
2031 
2032 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2033                                         VirtioNetRscSeg *seg,
2034                                         const uint8_t *buf, size_t size,
2035                                         VirtioNetRscUnit *unit)
2036 {
2037     struct ip6_header *ip1, *ip2;
2038 
2039     ip1 = (struct ip6_header *)(unit->ip);
2040     ip2 = (struct ip6_header *)(seg->unit.ip);
2041     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2042         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2043         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2044         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2045             chain->stat.no_match++;
2046             return RSC_NO_MATCH;
2047     }
2048 
2049     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2050 }
2051 
2052 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2053  * to prevent out of order */
2054 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2055                                          struct tcp_header *tcp)
2056 {
2057     uint16_t tcp_hdr;
2058     uint16_t tcp_flag;
2059 
2060     tcp_flag = htons(tcp->th_offset_flags);
2061     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2062     tcp_flag &= VIRTIO_NET_TCP_FLAG;
2063     if (tcp_flag & TH_SYN) {
2064         chain->stat.tcp_syn++;
2065         return RSC_BYPASS;
2066     }
2067 
2068     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2069         chain->stat.tcp_ctrl_drain++;
2070         return RSC_FINAL;
2071     }
2072 
2073     if (tcp_hdr > sizeof(struct tcp_header)) {
2074         chain->stat.tcp_all_opt++;
2075         return RSC_FINAL;
2076     }
2077 
2078     return RSC_CANDIDATE;
2079 }
2080 
2081 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2082                                          NetClientState *nc,
2083                                          const uint8_t *buf, size_t size,
2084                                          VirtioNetRscUnit *unit)
2085 {
2086     int ret;
2087     VirtioNetRscSeg *seg, *nseg;
2088 
2089     if (QTAILQ_EMPTY(&chain->buffers)) {
2090         chain->stat.empty_cache++;
2091         virtio_net_rsc_cache_buf(chain, nc, buf, size);
2092         timer_mod(chain->drain_timer,
2093               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2094         return size;
2095     }
2096 
2097     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2098         if (chain->proto == ETH_P_IP) {
2099             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2100         } else {
2101             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2102         }
2103 
2104         if (ret == RSC_FINAL) {
2105             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2106                 /* Send failed */
2107                 chain->stat.final_failed++;
2108                 return 0;
2109             }
2110 
2111             /* Send current packet */
2112             return virtio_net_do_receive(nc, buf, size);
2113         } else if (ret == RSC_NO_MATCH) {
2114             continue;
2115         } else {
2116             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2117             seg->is_coalesced = 1;
2118             return size;
2119         }
2120     }
2121 
2122     chain->stat.no_match_cache++;
2123     virtio_net_rsc_cache_buf(chain, nc, buf, size);
2124     return size;
2125 }
2126 
2127 /* Drain a connection data, this is to avoid out of order segments */
2128 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2129                                         NetClientState *nc,
2130                                         const uint8_t *buf, size_t size,
2131                                         uint16_t ip_start, uint16_t ip_size,
2132                                         uint16_t tcp_port)
2133 {
2134     VirtioNetRscSeg *seg, *nseg;
2135     uint32_t ppair1, ppair2;
2136 
2137     ppair1 = *(uint32_t *)(buf + tcp_port);
2138     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2139         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2140         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2141             || (ppair1 != ppair2)) {
2142             continue;
2143         }
2144         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2145             chain->stat.drain_failed++;
2146         }
2147 
2148         break;
2149     }
2150 
2151     return virtio_net_do_receive(nc, buf, size);
2152 }
2153 
2154 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2155                                             struct ip_header *ip,
2156                                             const uint8_t *buf, size_t size)
2157 {
2158     uint16_t ip_len;
2159 
2160     /* Not an ipv4 packet */
2161     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2162         chain->stat.ip_option++;
2163         return RSC_BYPASS;
2164     }
2165 
2166     /* Don't handle packets with ip option */
2167     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2168         chain->stat.ip_option++;
2169         return RSC_BYPASS;
2170     }
2171 
2172     if (ip->ip_p != IPPROTO_TCP) {
2173         chain->stat.bypass_not_tcp++;
2174         return RSC_BYPASS;
2175     }
2176 
2177     /* Don't handle packets with ip fragment */
2178     if (!(htons(ip->ip_off) & IP_DF)) {
2179         chain->stat.ip_frag++;
2180         return RSC_BYPASS;
2181     }
2182 
2183     /* Don't handle packets with ecn flag */
2184     if (IPTOS_ECN(ip->ip_tos)) {
2185         chain->stat.ip_ecn++;
2186         return RSC_BYPASS;
2187     }
2188 
2189     ip_len = htons(ip->ip_len);
2190     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2191         || ip_len > (size - chain->n->guest_hdr_len -
2192                      sizeof(struct eth_header))) {
2193         chain->stat.ip_hacked++;
2194         return RSC_BYPASS;
2195     }
2196 
2197     return RSC_CANDIDATE;
2198 }
2199 
2200 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2201                                       NetClientState *nc,
2202                                       const uint8_t *buf, size_t size)
2203 {
2204     int32_t ret;
2205     uint16_t hdr_len;
2206     VirtioNetRscUnit unit;
2207 
2208     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2209 
2210     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2211         + sizeof(struct tcp_header))) {
2212         chain->stat.bypass_not_tcp++;
2213         return virtio_net_do_receive(nc, buf, size);
2214     }
2215 
2216     virtio_net_rsc_extract_unit4(chain, buf, &unit);
2217     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2218         != RSC_CANDIDATE) {
2219         return virtio_net_do_receive(nc, buf, size);
2220     }
2221 
2222     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2223     if (ret == RSC_BYPASS) {
2224         return virtio_net_do_receive(nc, buf, size);
2225     } else if (ret == RSC_FINAL) {
2226         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2227                 ((hdr_len + sizeof(struct eth_header)) + 12),
2228                 VIRTIO_NET_IP4_ADDR_SIZE,
2229                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2230     }
2231 
2232     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2233 }
2234 
2235 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2236                                             struct ip6_header *ip6,
2237                                             const uint8_t *buf, size_t size)
2238 {
2239     uint16_t ip_len;
2240 
2241     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2242         != IP_HEADER_VERSION_6) {
2243         return RSC_BYPASS;
2244     }
2245 
2246     /* Both option and protocol is checked in this */
2247     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2248         chain->stat.bypass_not_tcp++;
2249         return RSC_BYPASS;
2250     }
2251 
2252     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2253     if (ip_len < sizeof(struct tcp_header) ||
2254         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2255                   - sizeof(struct ip6_header))) {
2256         chain->stat.ip_hacked++;
2257         return RSC_BYPASS;
2258     }
2259 
2260     /* Don't handle packets with ecn flag */
2261     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2262         chain->stat.ip_ecn++;
2263         return RSC_BYPASS;
2264     }
2265 
2266     return RSC_CANDIDATE;
2267 }
2268 
2269 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2270                                       const uint8_t *buf, size_t size)
2271 {
2272     int32_t ret;
2273     uint16_t hdr_len;
2274     VirtioNetRscChain *chain;
2275     VirtioNetRscUnit unit;
2276 
2277     chain = (VirtioNetRscChain *)opq;
2278     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2279 
2280     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2281         + sizeof(tcp_header))) {
2282         return virtio_net_do_receive(nc, buf, size);
2283     }
2284 
2285     virtio_net_rsc_extract_unit6(chain, buf, &unit);
2286     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2287                                                  unit.ip, buf, size)) {
2288         return virtio_net_do_receive(nc, buf, size);
2289     }
2290 
2291     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2292     if (ret == RSC_BYPASS) {
2293         return virtio_net_do_receive(nc, buf, size);
2294     } else if (ret == RSC_FINAL) {
2295         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2296                 ((hdr_len + sizeof(struct eth_header)) + 8),
2297                 VIRTIO_NET_IP6_ADDR_SIZE,
2298                 hdr_len + sizeof(struct eth_header)
2299                 + sizeof(struct ip6_header));
2300     }
2301 
2302     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2303 }
2304 
2305 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2306                                                       NetClientState *nc,
2307                                                       uint16_t proto)
2308 {
2309     VirtioNetRscChain *chain;
2310 
2311     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2312         return NULL;
2313     }
2314 
2315     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2316         if (chain->proto == proto) {
2317             return chain;
2318         }
2319     }
2320 
2321     chain = g_malloc(sizeof(*chain));
2322     chain->n = n;
2323     chain->proto = proto;
2324     if (proto == (uint16_t)ETH_P_IP) {
2325         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2326         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2327     } else {
2328         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2329         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2330     }
2331     chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2332                                       virtio_net_rsc_purge, chain);
2333     memset(&chain->stat, 0, sizeof(chain->stat));
2334 
2335     QTAILQ_INIT(&chain->buffers);
2336     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2337 
2338     return chain;
2339 }
2340 
2341 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2342                                       const uint8_t *buf,
2343                                       size_t size)
2344 {
2345     uint16_t proto;
2346     VirtioNetRscChain *chain;
2347     struct eth_header *eth;
2348     VirtIONet *n;
2349 
2350     n = qemu_get_nic_opaque(nc);
2351     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2352         return virtio_net_do_receive(nc, buf, size);
2353     }
2354 
2355     eth = (struct eth_header *)(buf + n->guest_hdr_len);
2356     proto = htons(eth->h_proto);
2357 
2358     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2359     if (chain) {
2360         chain->stat.received++;
2361         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2362             return virtio_net_rsc_receive4(chain, nc, buf, size);
2363         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2364             return virtio_net_rsc_receive6(chain, nc, buf, size);
2365         }
2366     }
2367     return virtio_net_do_receive(nc, buf, size);
2368 }
2369 
2370 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2371                                   size_t size)
2372 {
2373     VirtIONet *n = qemu_get_nic_opaque(nc);
2374     if ((n->rsc4_enabled || n->rsc6_enabled)) {
2375         return virtio_net_rsc_receive(nc, buf, size);
2376     } else {
2377         return virtio_net_do_receive(nc, buf, size);
2378     }
2379 }
2380 
2381 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2382 
2383 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2384 {
2385     VirtIONet *n = qemu_get_nic_opaque(nc);
2386     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2387     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2388 
2389     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2390     virtio_notify(vdev, q->tx_vq);
2391 
2392     g_free(q->async_tx.elem);
2393     q->async_tx.elem = NULL;
2394 
2395     virtio_queue_set_notification(q->tx_vq, 1);
2396     virtio_net_flush_tx(q);
2397 }
2398 
2399 /* TX */
2400 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2401 {
2402     VirtIONet *n = q->n;
2403     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2404     VirtQueueElement *elem;
2405     int32_t num_packets = 0;
2406     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2407     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2408         return num_packets;
2409     }
2410 
2411     if (q->async_tx.elem) {
2412         virtio_queue_set_notification(q->tx_vq, 0);
2413         return num_packets;
2414     }
2415 
2416     for (;;) {
2417         ssize_t ret;
2418         unsigned int out_num;
2419         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2420         struct virtio_net_hdr_mrg_rxbuf mhdr;
2421 
2422         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2423         if (!elem) {
2424             break;
2425         }
2426 
2427         out_num = elem->out_num;
2428         out_sg = elem->out_sg;
2429         if (out_num < 1) {
2430             virtio_error(vdev, "virtio-net header not in first element");
2431             virtqueue_detach_element(q->tx_vq, elem, 0);
2432             g_free(elem);
2433             return -EINVAL;
2434         }
2435 
2436         if (n->has_vnet_hdr) {
2437             if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2438                 n->guest_hdr_len) {
2439                 virtio_error(vdev, "virtio-net header incorrect");
2440                 virtqueue_detach_element(q->tx_vq, elem, 0);
2441                 g_free(elem);
2442                 return -EINVAL;
2443             }
2444             if (n->needs_vnet_hdr_swap) {
2445                 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2446                 sg2[0].iov_base = &mhdr;
2447                 sg2[0].iov_len = n->guest_hdr_len;
2448                 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2449                                    out_sg, out_num,
2450                                    n->guest_hdr_len, -1);
2451                 if (out_num == VIRTQUEUE_MAX_SIZE) {
2452                     goto drop;
2453                 }
2454                 out_num += 1;
2455                 out_sg = sg2;
2456             }
2457         }
2458         /*
2459          * If host wants to see the guest header as is, we can
2460          * pass it on unchanged. Otherwise, copy just the parts
2461          * that host is interested in.
2462          */
2463         assert(n->host_hdr_len <= n->guest_hdr_len);
2464         if (n->host_hdr_len != n->guest_hdr_len) {
2465             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2466                                        out_sg, out_num,
2467                                        0, n->host_hdr_len);
2468             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2469                              out_sg, out_num,
2470                              n->guest_hdr_len, -1);
2471             out_num = sg_num;
2472             out_sg = sg;
2473         }
2474 
2475         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2476                                       out_sg, out_num, virtio_net_tx_complete);
2477         if (ret == 0) {
2478             virtio_queue_set_notification(q->tx_vq, 0);
2479             q->async_tx.elem = elem;
2480             return -EBUSY;
2481         }
2482 
2483 drop:
2484         virtqueue_push(q->tx_vq, elem, 0);
2485         virtio_notify(vdev, q->tx_vq);
2486         g_free(elem);
2487 
2488         if (++num_packets >= n->tx_burst) {
2489             break;
2490         }
2491     }
2492     return num_packets;
2493 }
2494 
2495 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2496 {
2497     VirtIONet *n = VIRTIO_NET(vdev);
2498     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2499 
2500     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2501         virtio_net_drop_tx_queue_data(vdev, vq);
2502         return;
2503     }
2504 
2505     /* This happens when device was stopped but VCPU wasn't. */
2506     if (!vdev->vm_running) {
2507         q->tx_waiting = 1;
2508         return;
2509     }
2510 
2511     if (q->tx_waiting) {
2512         virtio_queue_set_notification(vq, 1);
2513         timer_del(q->tx_timer);
2514         q->tx_waiting = 0;
2515         if (virtio_net_flush_tx(q) == -EINVAL) {
2516             return;
2517         }
2518     } else {
2519         timer_mod(q->tx_timer,
2520                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2521         q->tx_waiting = 1;
2522         virtio_queue_set_notification(vq, 0);
2523     }
2524 }
2525 
2526 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2527 {
2528     VirtIONet *n = VIRTIO_NET(vdev);
2529     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2530 
2531     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2532         virtio_net_drop_tx_queue_data(vdev, vq);
2533         return;
2534     }
2535 
2536     if (unlikely(q->tx_waiting)) {
2537         return;
2538     }
2539     q->tx_waiting = 1;
2540     /* This happens when device was stopped but VCPU wasn't. */
2541     if (!vdev->vm_running) {
2542         return;
2543     }
2544     virtio_queue_set_notification(vq, 0);
2545     qemu_bh_schedule(q->tx_bh);
2546 }
2547 
2548 static void virtio_net_tx_timer(void *opaque)
2549 {
2550     VirtIONetQueue *q = opaque;
2551     VirtIONet *n = q->n;
2552     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2553     /* This happens when device was stopped but BH wasn't. */
2554     if (!vdev->vm_running) {
2555         /* Make sure tx waiting is set, so we'll run when restarted. */
2556         assert(q->tx_waiting);
2557         return;
2558     }
2559 
2560     q->tx_waiting = 0;
2561 
2562     /* Just in case the driver is not ready on more */
2563     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2564         return;
2565     }
2566 
2567     virtio_queue_set_notification(q->tx_vq, 1);
2568     virtio_net_flush_tx(q);
2569 }
2570 
2571 static void virtio_net_tx_bh(void *opaque)
2572 {
2573     VirtIONetQueue *q = opaque;
2574     VirtIONet *n = q->n;
2575     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2576     int32_t ret;
2577 
2578     /* This happens when device was stopped but BH wasn't. */
2579     if (!vdev->vm_running) {
2580         /* Make sure tx waiting is set, so we'll run when restarted. */
2581         assert(q->tx_waiting);
2582         return;
2583     }
2584 
2585     q->tx_waiting = 0;
2586 
2587     /* Just in case the driver is not ready on more */
2588     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2589         return;
2590     }
2591 
2592     ret = virtio_net_flush_tx(q);
2593     if (ret == -EBUSY || ret == -EINVAL) {
2594         return; /* Notification re-enable handled by tx_complete or device
2595                  * broken */
2596     }
2597 
2598     /* If we flush a full burst of packets, assume there are
2599      * more coming and immediately reschedule */
2600     if (ret >= n->tx_burst) {
2601         qemu_bh_schedule(q->tx_bh);
2602         q->tx_waiting = 1;
2603         return;
2604     }
2605 
2606     /* If less than a full burst, re-enable notification and flush
2607      * anything that may have come in while we weren't looking.  If
2608      * we find something, assume the guest is still active and reschedule */
2609     virtio_queue_set_notification(q->tx_vq, 1);
2610     ret = virtio_net_flush_tx(q);
2611     if (ret == -EINVAL) {
2612         return;
2613     } else if (ret > 0) {
2614         virtio_queue_set_notification(q->tx_vq, 0);
2615         qemu_bh_schedule(q->tx_bh);
2616         q->tx_waiting = 1;
2617     }
2618 }
2619 
2620 static void virtio_net_add_queue(VirtIONet *n, int index)
2621 {
2622     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2623 
2624     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2625                                            virtio_net_handle_rx);
2626 
2627     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2628         n->vqs[index].tx_vq =
2629             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2630                              virtio_net_handle_tx_timer);
2631         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2632                                               virtio_net_tx_timer,
2633                                               &n->vqs[index]);
2634     } else {
2635         n->vqs[index].tx_vq =
2636             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2637                              virtio_net_handle_tx_bh);
2638         n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2639     }
2640 
2641     n->vqs[index].tx_waiting = 0;
2642     n->vqs[index].n = n;
2643 }
2644 
2645 static void virtio_net_del_queue(VirtIONet *n, int index)
2646 {
2647     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2648     VirtIONetQueue *q = &n->vqs[index];
2649     NetClientState *nc = qemu_get_subqueue(n->nic, index);
2650 
2651     qemu_purge_queued_packets(nc);
2652 
2653     virtio_del_queue(vdev, index * 2);
2654     if (q->tx_timer) {
2655         timer_del(q->tx_timer);
2656         timer_free(q->tx_timer);
2657         q->tx_timer = NULL;
2658     } else {
2659         qemu_bh_delete(q->tx_bh);
2660         q->tx_bh = NULL;
2661     }
2662     q->tx_waiting = 0;
2663     virtio_del_queue(vdev, index * 2 + 1);
2664 }
2665 
2666 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2667 {
2668     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2669     int old_num_queues = virtio_get_num_queues(vdev);
2670     int new_num_queues = new_max_queues * 2 + 1;
2671     int i;
2672 
2673     assert(old_num_queues >= 3);
2674     assert(old_num_queues % 2 == 1);
2675 
2676     if (old_num_queues == new_num_queues) {
2677         return;
2678     }
2679 
2680     /*
2681      * We always need to remove and add ctrl vq if
2682      * old_num_queues != new_num_queues. Remove ctrl_vq first,
2683      * and then we only enter one of the following two loops.
2684      */
2685     virtio_del_queue(vdev, old_num_queues - 1);
2686 
2687     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2688         /* new_num_queues < old_num_queues */
2689         virtio_net_del_queue(n, i / 2);
2690     }
2691 
2692     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2693         /* new_num_queues > old_num_queues */
2694         virtio_net_add_queue(n, i / 2);
2695     }
2696 
2697     /* add ctrl_vq last */
2698     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2699 }
2700 
2701 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2702 {
2703     int max = multiqueue ? n->max_queues : 1;
2704 
2705     n->multiqueue = multiqueue;
2706     virtio_net_change_num_queues(n, max);
2707 
2708     virtio_net_set_queues(n);
2709 }
2710 
2711 static int virtio_net_post_load_device(void *opaque, int version_id)
2712 {
2713     VirtIONet *n = opaque;
2714     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2715     int i, link_down;
2716 
2717     trace_virtio_net_post_load_device();
2718     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2719                                virtio_vdev_has_feature(vdev,
2720                                                        VIRTIO_F_VERSION_1),
2721                                virtio_vdev_has_feature(vdev,
2722                                                        VIRTIO_NET_F_HASH_REPORT));
2723 
2724     /* MAC_TABLE_ENTRIES may be different from the saved image */
2725     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2726         n->mac_table.in_use = 0;
2727     }
2728 
2729     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2730         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2731     }
2732 
2733     /*
2734      * curr_guest_offloads will be later overwritten by the
2735      * virtio_set_features_nocheck call done from the virtio_load.
2736      * Here we make sure it is preserved and restored accordingly
2737      * in the virtio_net_post_load_virtio callback.
2738      */
2739     n->saved_guest_offloads = n->curr_guest_offloads;
2740 
2741     virtio_net_set_queues(n);
2742 
2743     /* Find the first multicast entry in the saved MAC filter */
2744     for (i = 0; i < n->mac_table.in_use; i++) {
2745         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2746             break;
2747         }
2748     }
2749     n->mac_table.first_multi = i;
2750 
2751     /* nc.link_down can't be migrated, so infer link_down according
2752      * to link status bit in n->status */
2753     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2754     for (i = 0; i < n->max_queues; i++) {
2755         qemu_get_subqueue(n->nic, i)->link_down = link_down;
2756     }
2757 
2758     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2759         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2760         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2761                                   QEMU_CLOCK_VIRTUAL,
2762                                   virtio_net_announce_timer, n);
2763         if (n->announce_timer.round) {
2764             timer_mod(n->announce_timer.tm,
2765                       qemu_clock_get_ms(n->announce_timer.type));
2766         } else {
2767             qemu_announce_timer_del(&n->announce_timer, false);
2768         }
2769     }
2770 
2771     if (n->rss_data.enabled) {
2772         trace_virtio_net_rss_enable(n->rss_data.hash_types,
2773                                     n->rss_data.indirections_len,
2774                                     sizeof(n->rss_data.key));
2775     } else {
2776         trace_virtio_net_rss_disable();
2777     }
2778     return 0;
2779 }
2780 
2781 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2782 {
2783     VirtIONet *n = VIRTIO_NET(vdev);
2784     /*
2785      * The actual needed state is now in saved_guest_offloads,
2786      * see virtio_net_post_load_device for detail.
2787      * Restore it back and apply the desired offloads.
2788      */
2789     n->curr_guest_offloads = n->saved_guest_offloads;
2790     if (peer_has_vnet_hdr(n)) {
2791         virtio_net_apply_guest_offloads(n);
2792     }
2793 
2794     return 0;
2795 }
2796 
2797 /* tx_waiting field of a VirtIONetQueue */
2798 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2799     .name = "virtio-net-queue-tx_waiting",
2800     .fields = (VMStateField[]) {
2801         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2802         VMSTATE_END_OF_LIST()
2803    },
2804 };
2805 
2806 static bool max_queues_gt_1(void *opaque, int version_id)
2807 {
2808     return VIRTIO_NET(opaque)->max_queues > 1;
2809 }
2810 
2811 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2812 {
2813     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2814                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2815 }
2816 
2817 static bool mac_table_fits(void *opaque, int version_id)
2818 {
2819     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2820 }
2821 
2822 static bool mac_table_doesnt_fit(void *opaque, int version_id)
2823 {
2824     return !mac_table_fits(opaque, version_id);
2825 }
2826 
2827 /* This temporary type is shared by all the WITH_TMP methods
2828  * although only some fields are used by each.
2829  */
2830 struct VirtIONetMigTmp {
2831     VirtIONet      *parent;
2832     VirtIONetQueue *vqs_1;
2833     uint16_t        curr_queues_1;
2834     uint8_t         has_ufo;
2835     uint32_t        has_vnet_hdr;
2836 };
2837 
2838 /* The 2nd and subsequent tx_waiting flags are loaded later than
2839  * the 1st entry in the queues and only if there's more than one
2840  * entry.  We use the tmp mechanism to calculate a temporary
2841  * pointer and count and also validate the count.
2842  */
2843 
2844 static int virtio_net_tx_waiting_pre_save(void *opaque)
2845 {
2846     struct VirtIONetMigTmp *tmp = opaque;
2847 
2848     tmp->vqs_1 = tmp->parent->vqs + 1;
2849     tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2850     if (tmp->parent->curr_queues == 0) {
2851         tmp->curr_queues_1 = 0;
2852     }
2853 
2854     return 0;
2855 }
2856 
2857 static int virtio_net_tx_waiting_pre_load(void *opaque)
2858 {
2859     struct VirtIONetMigTmp *tmp = opaque;
2860 
2861     /* Reuse the pointer setup from save */
2862     virtio_net_tx_waiting_pre_save(opaque);
2863 
2864     if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2865         error_report("virtio-net: curr_queues %x > max_queues %x",
2866             tmp->parent->curr_queues, tmp->parent->max_queues);
2867 
2868         return -EINVAL;
2869     }
2870 
2871     return 0; /* all good */
2872 }
2873 
2874 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2875     .name      = "virtio-net-tx_waiting",
2876     .pre_load  = virtio_net_tx_waiting_pre_load,
2877     .pre_save  = virtio_net_tx_waiting_pre_save,
2878     .fields    = (VMStateField[]) {
2879         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2880                                      curr_queues_1,
2881                                      vmstate_virtio_net_queue_tx_waiting,
2882                                      struct VirtIONetQueue),
2883         VMSTATE_END_OF_LIST()
2884     },
2885 };
2886 
2887 /* the 'has_ufo' flag is just tested; if the incoming stream has the
2888  * flag set we need to check that we have it
2889  */
2890 static int virtio_net_ufo_post_load(void *opaque, int version_id)
2891 {
2892     struct VirtIONetMigTmp *tmp = opaque;
2893 
2894     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
2895         error_report("virtio-net: saved image requires TUN_F_UFO support");
2896         return -EINVAL;
2897     }
2898 
2899     return 0;
2900 }
2901 
2902 static int virtio_net_ufo_pre_save(void *opaque)
2903 {
2904     struct VirtIONetMigTmp *tmp = opaque;
2905 
2906     tmp->has_ufo = tmp->parent->has_ufo;
2907 
2908     return 0;
2909 }
2910 
2911 static const VMStateDescription vmstate_virtio_net_has_ufo = {
2912     .name      = "virtio-net-ufo",
2913     .post_load = virtio_net_ufo_post_load,
2914     .pre_save  = virtio_net_ufo_pre_save,
2915     .fields    = (VMStateField[]) {
2916         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
2917         VMSTATE_END_OF_LIST()
2918     },
2919 };
2920 
2921 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2922  * flag set we need to check that we have it
2923  */
2924 static int virtio_net_vnet_post_load(void *opaque, int version_id)
2925 {
2926     struct VirtIONetMigTmp *tmp = opaque;
2927 
2928     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
2929         error_report("virtio-net: saved image requires vnet_hdr=on");
2930         return -EINVAL;
2931     }
2932 
2933     return 0;
2934 }
2935 
2936 static int virtio_net_vnet_pre_save(void *opaque)
2937 {
2938     struct VirtIONetMigTmp *tmp = opaque;
2939 
2940     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
2941 
2942     return 0;
2943 }
2944 
2945 static const VMStateDescription vmstate_virtio_net_has_vnet = {
2946     .name      = "virtio-net-vnet",
2947     .post_load = virtio_net_vnet_post_load,
2948     .pre_save  = virtio_net_vnet_pre_save,
2949     .fields    = (VMStateField[]) {
2950         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
2951         VMSTATE_END_OF_LIST()
2952     },
2953 };
2954 
2955 static bool virtio_net_rss_needed(void *opaque)
2956 {
2957     return VIRTIO_NET(opaque)->rss_data.enabled;
2958 }
2959 
2960 static const VMStateDescription vmstate_virtio_net_rss = {
2961     .name      = "virtio-net-device/rss",
2962     .version_id = 1,
2963     .minimum_version_id = 1,
2964     .needed = virtio_net_rss_needed,
2965     .fields = (VMStateField[]) {
2966         VMSTATE_BOOL(rss_data.enabled, VirtIONet),
2967         VMSTATE_BOOL(rss_data.redirect, VirtIONet),
2968         VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
2969         VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
2970         VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
2971         VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
2972         VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
2973                             VIRTIO_NET_RSS_MAX_KEY_SIZE),
2974         VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
2975                                     rss_data.indirections_len, 0,
2976                                     vmstate_info_uint16, uint16_t),
2977         VMSTATE_END_OF_LIST()
2978     },
2979 };
2980 
2981 static const VMStateDescription vmstate_virtio_net_device = {
2982     .name = "virtio-net-device",
2983     .version_id = VIRTIO_NET_VM_VERSION,
2984     .minimum_version_id = VIRTIO_NET_VM_VERSION,
2985     .post_load = virtio_net_post_load_device,
2986     .fields = (VMStateField[]) {
2987         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
2988         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
2989                                vmstate_virtio_net_queue_tx_waiting,
2990                                VirtIONetQueue),
2991         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
2992         VMSTATE_UINT16(status, VirtIONet),
2993         VMSTATE_UINT8(promisc, VirtIONet),
2994         VMSTATE_UINT8(allmulti, VirtIONet),
2995         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
2996 
2997         /* Guarded pair: If it fits we load it, else we throw it away
2998          * - can happen if source has a larger MAC table.; post-load
2999          *  sets flags in this case.
3000          */
3001         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3002                                 0, mac_table_fits, mac_table.in_use,
3003                                  ETH_ALEN),
3004         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3005                                      mac_table.in_use, ETH_ALEN),
3006 
3007         /* Note: This is an array of uint32's that's always been saved as a
3008          * buffer; hold onto your endiannesses; it's actually used as a bitmap
3009          * but based on the uint.
3010          */
3011         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3012         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3013                          vmstate_virtio_net_has_vnet),
3014         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3015         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3016         VMSTATE_UINT8(alluni, VirtIONet),
3017         VMSTATE_UINT8(nomulti, VirtIONet),
3018         VMSTATE_UINT8(nouni, VirtIONet),
3019         VMSTATE_UINT8(nobcast, VirtIONet),
3020         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3021                          vmstate_virtio_net_has_ufo),
3022         VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
3023                             vmstate_info_uint16_equal, uint16_t),
3024         VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
3025         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3026                          vmstate_virtio_net_tx_waiting),
3027         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3028                             has_ctrl_guest_offloads),
3029         VMSTATE_END_OF_LIST()
3030    },
3031     .subsections = (const VMStateDescription * []) {
3032         &vmstate_virtio_net_rss,
3033         NULL
3034     }
3035 };
3036 
3037 static NetClientInfo net_virtio_info = {
3038     .type = NET_CLIENT_DRIVER_NIC,
3039     .size = sizeof(NICState),
3040     .can_receive = virtio_net_can_receive,
3041     .receive = virtio_net_receive,
3042     .link_status_changed = virtio_net_set_link_status,
3043     .query_rx_filter = virtio_net_query_rxfilter,
3044     .announce = virtio_net_announce,
3045 };
3046 
3047 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3048 {
3049     VirtIONet *n = VIRTIO_NET(vdev);
3050     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3051     assert(n->vhost_started);
3052     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3053 }
3054 
3055 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3056                                            bool mask)
3057 {
3058     VirtIONet *n = VIRTIO_NET(vdev);
3059     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3060     assert(n->vhost_started);
3061     vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3062                              vdev, idx, mask);
3063 }
3064 
3065 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3066 {
3067     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3068 
3069     n->config_size = virtio_feature_get_config_size(feature_sizes,
3070                                                     host_features);
3071 }
3072 
3073 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3074                                    const char *type)
3075 {
3076     /*
3077      * The name can be NULL, the netclient name will be type.x.
3078      */
3079     assert(type != NULL);
3080 
3081     g_free(n->netclient_name);
3082     g_free(n->netclient_type);
3083     n->netclient_name = g_strdup(name);
3084     n->netclient_type = g_strdup(type);
3085 }
3086 
3087 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3088 {
3089     HotplugHandler *hotplug_ctrl;
3090     PCIDevice *pci_dev;
3091     Error *err = NULL;
3092 
3093     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3094     if (hotplug_ctrl) {
3095         pci_dev = PCI_DEVICE(dev);
3096         pci_dev->partially_hotplugged = true;
3097         hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3098         if (err) {
3099             error_report_err(err);
3100             return false;
3101         }
3102     } else {
3103         return false;
3104     }
3105     return true;
3106 }
3107 
3108 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3109                                     Error **errp)
3110 {
3111     Error *err = NULL;
3112     HotplugHandler *hotplug_ctrl;
3113     PCIDevice *pdev = PCI_DEVICE(dev);
3114     BusState *primary_bus;
3115 
3116     if (!pdev->partially_hotplugged) {
3117         return true;
3118     }
3119     primary_bus = dev->parent_bus;
3120     if (!primary_bus) {
3121         error_setg(errp, "virtio_net: couldn't find primary bus");
3122         return false;
3123     }
3124     qdev_set_parent_bus(dev, primary_bus, &error_abort);
3125     qatomic_set(&n->failover_primary_hidden, false);
3126     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3127     if (hotplug_ctrl) {
3128         hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3129         if (err) {
3130             goto out;
3131         }
3132         hotplug_handler_plug(hotplug_ctrl, dev, &err);
3133     }
3134 
3135 out:
3136     error_propagate(errp, err);
3137     return !err;
3138 }
3139 
3140 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3141 {
3142     bool should_be_hidden;
3143     Error *err = NULL;
3144     DeviceState *dev = failover_find_primary_device(n);
3145 
3146     if (!dev) {
3147         return;
3148     }
3149 
3150     should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3151 
3152     if (migration_in_setup(s) && !should_be_hidden) {
3153         if (failover_unplug_primary(n, dev)) {
3154             vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3155             qapi_event_send_unplug_primary(dev->id);
3156             qatomic_set(&n->failover_primary_hidden, true);
3157         } else {
3158             warn_report("couldn't unplug primary device");
3159         }
3160     } else if (migration_has_failed(s)) {
3161         /* We already unplugged the device let's plug it back */
3162         if (!failover_replug_primary(n, dev, &err)) {
3163             if (err) {
3164                 error_report_err(err);
3165             }
3166         }
3167     }
3168 }
3169 
3170 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3171 {
3172     MigrationState *s = data;
3173     VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3174     virtio_net_handle_migration_primary(n, s);
3175 }
3176 
3177 static bool failover_hide_primary_device(DeviceListener *listener,
3178                                          QemuOpts *device_opts)
3179 {
3180     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3181     const char *standby_id;
3182 
3183     if (!device_opts) {
3184         return false;
3185     }
3186     standby_id = qemu_opt_get(device_opts, "failover_pair_id");
3187     if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3188         return false;
3189     }
3190 
3191     /* failover_primary_hidden is set during feature negotiation */
3192     return qatomic_read(&n->failover_primary_hidden);
3193 }
3194 
3195 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3196 {
3197     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3198     VirtIONet *n = VIRTIO_NET(dev);
3199     NetClientState *nc;
3200     int i;
3201 
3202     if (n->net_conf.mtu) {
3203         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3204     }
3205 
3206     if (n->net_conf.duplex_str) {
3207         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3208             n->net_conf.duplex = DUPLEX_HALF;
3209         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3210             n->net_conf.duplex = DUPLEX_FULL;
3211         } else {
3212             error_setg(errp, "'duplex' must be 'half' or 'full'");
3213             return;
3214         }
3215         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3216     } else {
3217         n->net_conf.duplex = DUPLEX_UNKNOWN;
3218     }
3219 
3220     if (n->net_conf.speed < SPEED_UNKNOWN) {
3221         error_setg(errp, "'speed' must be between 0 and INT_MAX");
3222         return;
3223     }
3224     if (n->net_conf.speed >= 0) {
3225         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3226     }
3227 
3228     if (n->failover) {
3229         n->primary_listener.hide_device = failover_hide_primary_device;
3230         qatomic_set(&n->failover_primary_hidden, true);
3231         device_listener_register(&n->primary_listener);
3232         n->migration_state.notify = virtio_net_migration_state_notifier;
3233         add_migration_state_change_notifier(&n->migration_state);
3234         n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3235     }
3236 
3237     virtio_net_set_config_size(n, n->host_features);
3238     virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
3239 
3240     /*
3241      * We set a lower limit on RX queue size to what it always was.
3242      * Guests that want a smaller ring can always resize it without
3243      * help from us (using virtio 1 and up).
3244      */
3245     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3246         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3247         !is_power_of_2(n->net_conf.rx_queue_size)) {
3248         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3249                    "must be a power of 2 between %d and %d.",
3250                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3251                    VIRTQUEUE_MAX_SIZE);
3252         virtio_cleanup(vdev);
3253         return;
3254     }
3255 
3256     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3257         n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3258         !is_power_of_2(n->net_conf.tx_queue_size)) {
3259         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3260                    "must be a power of 2 between %d and %d",
3261                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3262                    VIRTQUEUE_MAX_SIZE);
3263         virtio_cleanup(vdev);
3264         return;
3265     }
3266 
3267     n->max_queues = MAX(n->nic_conf.peers.queues, 1);
3268     if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
3269         error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
3270                    "must be a positive integer less than %d.",
3271                    n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
3272         virtio_cleanup(vdev);
3273         return;
3274     }
3275     n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
3276     n->curr_queues = 1;
3277     n->tx_timeout = n->net_conf.txtimer;
3278 
3279     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3280                        && strcmp(n->net_conf.tx, "bh")) {
3281         warn_report("virtio-net: "
3282                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3283                     n->net_conf.tx);
3284         error_printf("Defaulting to \"bh\"");
3285     }
3286 
3287     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3288                                     n->net_conf.tx_queue_size);
3289 
3290     for (i = 0; i < n->max_queues; i++) {
3291         virtio_net_add_queue(n, i);
3292     }
3293 
3294     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3295     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3296     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3297     n->status = VIRTIO_NET_S_LINK_UP;
3298     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3299                               QEMU_CLOCK_VIRTUAL,
3300                               virtio_net_announce_timer, n);
3301     n->announce_timer.round = 0;
3302 
3303     if (n->netclient_type) {
3304         /*
3305          * Happen when virtio_net_set_netclient_name has been called.
3306          */
3307         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3308                               n->netclient_type, n->netclient_name, n);
3309     } else {
3310         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3311                               object_get_typename(OBJECT(dev)), dev->id, n);
3312     }
3313 
3314     peer_test_vnet_hdr(n);
3315     if (peer_has_vnet_hdr(n)) {
3316         for (i = 0; i < n->max_queues; i++) {
3317             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3318         }
3319         n->host_hdr_len = sizeof(struct virtio_net_hdr);
3320     } else {
3321         n->host_hdr_len = 0;
3322     }
3323 
3324     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3325 
3326     n->vqs[0].tx_waiting = 0;
3327     n->tx_burst = n->net_conf.txburst;
3328     virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3329     n->promisc = 1; /* for compatibility */
3330 
3331     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3332 
3333     n->vlans = g_malloc0(MAX_VLAN >> 3);
3334 
3335     nc = qemu_get_queue(n->nic);
3336     nc->rxfilter_notify_enabled = 1;
3337 
3338    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3339         struct virtio_net_config netcfg = {};
3340         memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3341         vhost_net_set_config(get_vhost_net(nc->peer),
3342             (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
3343     }
3344     QTAILQ_INIT(&n->rsc_chains);
3345     n->qdev = dev;
3346 
3347     net_rx_pkt_init(&n->rx_pkt, false);
3348 }
3349 
3350 static void virtio_net_device_unrealize(DeviceState *dev)
3351 {
3352     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3353     VirtIONet *n = VIRTIO_NET(dev);
3354     int i, max_queues;
3355 
3356     /* This will stop vhost backend if appropriate. */
3357     virtio_net_set_status(vdev, 0);
3358 
3359     g_free(n->netclient_name);
3360     n->netclient_name = NULL;
3361     g_free(n->netclient_type);
3362     n->netclient_type = NULL;
3363 
3364     g_free(n->mac_table.macs);
3365     g_free(n->vlans);
3366 
3367     if (n->failover) {
3368         device_listener_unregister(&n->primary_listener);
3369     }
3370 
3371     max_queues = n->multiqueue ? n->max_queues : 1;
3372     for (i = 0; i < max_queues; i++) {
3373         virtio_net_del_queue(n, i);
3374     }
3375     /* delete also control vq */
3376     virtio_del_queue(vdev, max_queues * 2);
3377     qemu_announce_timer_del(&n->announce_timer, false);
3378     g_free(n->vqs);
3379     qemu_del_nic(n->nic);
3380     virtio_net_rsc_cleanup(n);
3381     g_free(n->rss_data.indirections_table);
3382     net_rx_pkt_uninit(n->rx_pkt);
3383     virtio_cleanup(vdev);
3384 }
3385 
3386 static void virtio_net_instance_init(Object *obj)
3387 {
3388     VirtIONet *n = VIRTIO_NET(obj);
3389 
3390     /*
3391      * The default config_size is sizeof(struct virtio_net_config).
3392      * Can be overriden with virtio_net_set_config_size.
3393      */
3394     n->config_size = sizeof(struct virtio_net_config);
3395     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3396                                   "bootindex", "/ethernet-phy@0",
3397                                   DEVICE(n));
3398 }
3399 
3400 static int virtio_net_pre_save(void *opaque)
3401 {
3402     VirtIONet *n = opaque;
3403 
3404     /* At this point, backend must be stopped, otherwise
3405      * it might keep writing to memory. */
3406     assert(!n->vhost_started);
3407 
3408     return 0;
3409 }
3410 
3411 static bool primary_unplug_pending(void *opaque)
3412 {
3413     DeviceState *dev = opaque;
3414     DeviceState *primary;
3415     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3416     VirtIONet *n = VIRTIO_NET(vdev);
3417 
3418     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3419         return false;
3420     }
3421     primary = failover_find_primary_device(n);
3422     return primary ? primary->pending_deleted_event : false;
3423 }
3424 
3425 static bool dev_unplug_pending(void *opaque)
3426 {
3427     DeviceState *dev = opaque;
3428     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3429 
3430     return vdc->primary_unplug_pending(dev);
3431 }
3432 
3433 static const VMStateDescription vmstate_virtio_net = {
3434     .name = "virtio-net",
3435     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3436     .version_id = VIRTIO_NET_VM_VERSION,
3437     .fields = (VMStateField[]) {
3438         VMSTATE_VIRTIO_DEVICE,
3439         VMSTATE_END_OF_LIST()
3440     },
3441     .pre_save = virtio_net_pre_save,
3442     .dev_unplug_pending = dev_unplug_pending,
3443 };
3444 
3445 static Property virtio_net_properties[] = {
3446     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3447                     VIRTIO_NET_F_CSUM, true),
3448     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3449                     VIRTIO_NET_F_GUEST_CSUM, true),
3450     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3451     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3452                     VIRTIO_NET_F_GUEST_TSO4, true),
3453     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3454                     VIRTIO_NET_F_GUEST_TSO6, true),
3455     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3456                     VIRTIO_NET_F_GUEST_ECN, true),
3457     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3458                     VIRTIO_NET_F_GUEST_UFO, true),
3459     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3460                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3461     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3462                     VIRTIO_NET_F_HOST_TSO4, true),
3463     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3464                     VIRTIO_NET_F_HOST_TSO6, true),
3465     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3466                     VIRTIO_NET_F_HOST_ECN, true),
3467     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3468                     VIRTIO_NET_F_HOST_UFO, true),
3469     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3470                     VIRTIO_NET_F_MRG_RXBUF, true),
3471     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3472                     VIRTIO_NET_F_STATUS, true),
3473     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3474                     VIRTIO_NET_F_CTRL_VQ, true),
3475     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3476                     VIRTIO_NET_F_CTRL_RX, true),
3477     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3478                     VIRTIO_NET_F_CTRL_VLAN, true),
3479     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3480                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3481     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3482                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3483     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3484                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3485     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3486     DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3487                     VIRTIO_NET_F_RSS, false),
3488     DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3489                     VIRTIO_NET_F_HASH_REPORT, false),
3490     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3491                     VIRTIO_NET_F_RSC_EXT, false),
3492     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3493                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3494     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3495     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3496                        TX_TIMER_INTERVAL),
3497     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3498     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3499     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3500                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3501     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3502                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3503     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3504     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3505                      true),
3506     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3507     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3508     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3509     DEFINE_PROP_END_OF_LIST(),
3510 };
3511 
3512 static void virtio_net_class_init(ObjectClass *klass, void *data)
3513 {
3514     DeviceClass *dc = DEVICE_CLASS(klass);
3515     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3516 
3517     device_class_set_props(dc, virtio_net_properties);
3518     dc->vmsd = &vmstate_virtio_net;
3519     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3520     vdc->realize = virtio_net_device_realize;
3521     vdc->unrealize = virtio_net_device_unrealize;
3522     vdc->get_config = virtio_net_get_config;
3523     vdc->set_config = virtio_net_set_config;
3524     vdc->get_features = virtio_net_get_features;
3525     vdc->set_features = virtio_net_set_features;
3526     vdc->bad_features = virtio_net_bad_features;
3527     vdc->reset = virtio_net_reset;
3528     vdc->set_status = virtio_net_set_status;
3529     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3530     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3531     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3532     vdc->post_load = virtio_net_post_load_virtio;
3533     vdc->vmsd = &vmstate_virtio_net_device;
3534     vdc->primary_unplug_pending = primary_unplug_pending;
3535 }
3536 
3537 static const TypeInfo virtio_net_info = {
3538     .name = TYPE_VIRTIO_NET,
3539     .parent = TYPE_VIRTIO_DEVICE,
3540     .instance_size = sizeof(VirtIONet),
3541     .instance_init = virtio_net_instance_init,
3542     .class_init = virtio_net_class_init,
3543 };
3544 
3545 static void virtio_register_types(void)
3546 {
3547     type_register_static(&virtio_net_info);
3548 }
3549 
3550 type_init(virtio_register_types)
3551