xref: /openbmc/qemu/hw/net/virtio-net.c (revision 7acafcfa)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/main-loop.h"
18 #include "qemu/module.h"
19 #include "hw/virtio/virtio.h"
20 #include "net/net.h"
21 #include "net/checksum.h"
22 #include "net/tap.h"
23 #include "qemu/error-report.h"
24 #include "qemu/timer.h"
25 #include "qemu/option.h"
26 #include "qemu/option_int.h"
27 #include "qemu/config-file.h"
28 #include "qapi/qmp/qdict.h"
29 #include "hw/virtio/virtio-net.h"
30 #include "net/vhost_net.h"
31 #include "net/announce.h"
32 #include "hw/virtio/virtio-bus.h"
33 #include "qapi/error.h"
34 #include "qapi/qapi-events-net.h"
35 #include "hw/qdev-properties.h"
36 #include "qapi/qapi-types-migration.h"
37 #include "qapi/qapi-events-migration.h"
38 #include "hw/virtio/virtio-access.h"
39 #include "migration/misc.h"
40 #include "standard-headers/linux/ethtool.h"
41 #include "sysemu/sysemu.h"
42 #include "trace.h"
43 #include "monitor/qdev.h"
44 #include "hw/pci/pci.h"
45 #include "net_rx_pkt.h"
46 #include "hw/virtio/vhost.h"
47 
48 #define VIRTIO_NET_VM_VERSION    11
49 
50 #define MAC_TABLE_ENTRIES    64
51 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
52 
53 /* previously fixed value */
54 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
55 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
56 
57 /* for now, only allow larger queues; with virtio-1, guest can downsize */
58 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
59 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
60 
61 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
62 
63 #define VIRTIO_NET_TCP_FLAG         0x3F
64 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
65 
66 /* IPv4 max payload, 16 bits in the header */
67 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
68 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
69 
70 /* header length value in ip header without option */
71 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
72 
73 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
74 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
75 
76 /* Purge coalesced packets timer interval, This value affects the performance
77    a lot, and should be tuned carefully, '300000'(300us) is the recommended
78    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
79    tso/gso/gro 'off'. */
80 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
81 
82 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
83                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
84                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
85                                          VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
86                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
87                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
88                                          VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
89                                          VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
90                                          VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
91 
92 static VirtIOFeature feature_sizes[] = {
93     {.flags = 1ULL << VIRTIO_NET_F_MAC,
94      .end = endof(struct virtio_net_config, mac)},
95     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
96      .end = endof(struct virtio_net_config, status)},
97     {.flags = 1ULL << VIRTIO_NET_F_MQ,
98      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
99     {.flags = 1ULL << VIRTIO_NET_F_MTU,
100      .end = endof(struct virtio_net_config, mtu)},
101     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
102      .end = endof(struct virtio_net_config, duplex)},
103     {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
104      .end = endof(struct virtio_net_config, supported_hash_types)},
105     {}
106 };
107 
108 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
109 {
110     VirtIONet *n = qemu_get_nic_opaque(nc);
111 
112     return &n->vqs[nc->queue_index];
113 }
114 
115 static int vq2q(int queue_index)
116 {
117     return queue_index / 2;
118 }
119 
120 /* TODO
121  * - we could suppress RX interrupt if we were so inclined.
122  */
123 
124 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
125 {
126     VirtIONet *n = VIRTIO_NET(vdev);
127     struct virtio_net_config netcfg;
128 
129     int ret = 0;
130     memset(&netcfg, 0 , sizeof(struct virtio_net_config));
131     virtio_stw_p(vdev, &netcfg.status, n->status);
132     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
133     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
134     memcpy(netcfg.mac, n->mac, ETH_ALEN);
135     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
136     netcfg.duplex = n->net_conf.duplex;
137     netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
138     virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
139                  virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
140                  VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
141     virtio_stl_p(vdev, &netcfg.supported_hash_types,
142                  VIRTIO_NET_RSS_SUPPORTED_HASHES);
143     memcpy(config, &netcfg, n->config_size);
144 
145     NetClientState *nc = qemu_get_queue(n->nic);
146     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
147         ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
148                              n->config_size);
149     if (ret != -1) {
150         memcpy(config, &netcfg, n->config_size);
151     }
152     }
153 }
154 
155 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
156 {
157     VirtIONet *n = VIRTIO_NET(vdev);
158     struct virtio_net_config netcfg = {};
159 
160     memcpy(&netcfg, config, n->config_size);
161 
162     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
163         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
164         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
165         memcpy(n->mac, netcfg.mac, ETH_ALEN);
166         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
167     }
168 
169     NetClientState *nc = qemu_get_queue(n->nic);
170     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
171         vhost_net_set_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
172                                0, n->config_size,
173                         VHOST_SET_CONFIG_TYPE_MASTER);
174       }
175 }
176 
177 static bool virtio_net_started(VirtIONet *n, uint8_t status)
178 {
179     VirtIODevice *vdev = VIRTIO_DEVICE(n);
180     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
181         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
182 }
183 
184 static void virtio_net_announce_notify(VirtIONet *net)
185 {
186     VirtIODevice *vdev = VIRTIO_DEVICE(net);
187     trace_virtio_net_announce_notify();
188 
189     net->status |= VIRTIO_NET_S_ANNOUNCE;
190     virtio_notify_config(vdev);
191 }
192 
193 static void virtio_net_announce_timer(void *opaque)
194 {
195     VirtIONet *n = opaque;
196     trace_virtio_net_announce_timer(n->announce_timer.round);
197 
198     n->announce_timer.round--;
199     virtio_net_announce_notify(n);
200 }
201 
202 static void virtio_net_announce(NetClientState *nc)
203 {
204     VirtIONet *n = qemu_get_nic_opaque(nc);
205     VirtIODevice *vdev = VIRTIO_DEVICE(n);
206 
207     /*
208      * Make sure the virtio migration announcement timer isn't running
209      * If it is, let it trigger announcement so that we do not cause
210      * confusion.
211      */
212     if (n->announce_timer.round) {
213         return;
214     }
215 
216     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
217         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
218             virtio_net_announce_notify(n);
219     }
220 }
221 
222 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
223 {
224     VirtIODevice *vdev = VIRTIO_DEVICE(n);
225     NetClientState *nc = qemu_get_queue(n->nic);
226     int queues = n->multiqueue ? n->max_queues : 1;
227 
228     if (!get_vhost_net(nc->peer)) {
229         return;
230     }
231 
232     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
233         !!n->vhost_started) {
234         return;
235     }
236     if (!n->vhost_started) {
237         int r, i;
238 
239         if (n->needs_vnet_hdr_swap) {
240             error_report("backend does not support %s vnet headers; "
241                          "falling back on userspace virtio",
242                          virtio_is_big_endian(vdev) ? "BE" : "LE");
243             return;
244         }
245 
246         /* Any packets outstanding? Purge them to avoid touching rings
247          * when vhost is running.
248          */
249         for (i = 0;  i < queues; i++) {
250             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
251 
252             /* Purge both directions: TX and RX. */
253             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
254             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
255         }
256 
257         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
258             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
259             if (r < 0) {
260                 error_report("%uBytes MTU not supported by the backend",
261                              n->net_conf.mtu);
262 
263                 return;
264             }
265         }
266 
267         n->vhost_started = 1;
268         r = vhost_net_start(vdev, n->nic->ncs, queues);
269         if (r < 0) {
270             error_report("unable to start vhost net: %d: "
271                          "falling back on userspace virtio", -r);
272             n->vhost_started = 0;
273         }
274     } else {
275         vhost_net_stop(vdev, n->nic->ncs, queues);
276         n->vhost_started = 0;
277     }
278 }
279 
280 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
281                                           NetClientState *peer,
282                                           bool enable)
283 {
284     if (virtio_is_big_endian(vdev)) {
285         return qemu_set_vnet_be(peer, enable);
286     } else {
287         return qemu_set_vnet_le(peer, enable);
288     }
289 }
290 
291 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
292                                        int queues, bool enable)
293 {
294     int i;
295 
296     for (i = 0; i < queues; i++) {
297         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
298             enable) {
299             while (--i >= 0) {
300                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
301             }
302 
303             return true;
304         }
305     }
306 
307     return false;
308 }
309 
310 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
311 {
312     VirtIODevice *vdev = VIRTIO_DEVICE(n);
313     int queues = n->multiqueue ? n->max_queues : 1;
314 
315     if (virtio_net_started(n, status)) {
316         /* Before using the device, we tell the network backend about the
317          * endianness to use when parsing vnet headers. If the backend
318          * can't do it, we fallback onto fixing the headers in the core
319          * virtio-net code.
320          */
321         n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
322                                                             queues, true);
323     } else if (virtio_net_started(n, vdev->status)) {
324         /* After using the device, we need to reset the network backend to
325          * the default (guest native endianness), otherwise the guest may
326          * lose network connectivity if it is rebooted into a different
327          * endianness.
328          */
329         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
330     }
331 }
332 
333 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
334 {
335     unsigned int dropped = virtqueue_drop_all(vq);
336     if (dropped) {
337         virtio_notify(vdev, vq);
338     }
339 }
340 
341 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
342 {
343     VirtIONet *n = VIRTIO_NET(vdev);
344     VirtIONetQueue *q;
345     int i;
346     uint8_t queue_status;
347 
348     virtio_net_vnet_endian_status(n, status);
349     virtio_net_vhost_status(n, status);
350 
351     for (i = 0; i < n->max_queues; i++) {
352         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
353         bool queue_started;
354         q = &n->vqs[i];
355 
356         if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
357             queue_status = 0;
358         } else {
359             queue_status = status;
360         }
361         queue_started =
362             virtio_net_started(n, queue_status) && !n->vhost_started;
363 
364         if (queue_started) {
365             qemu_flush_queued_packets(ncs);
366         }
367 
368         if (!q->tx_waiting) {
369             continue;
370         }
371 
372         if (queue_started) {
373             if (q->tx_timer) {
374                 timer_mod(q->tx_timer,
375                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
376             } else {
377                 qemu_bh_schedule(q->tx_bh);
378             }
379         } else {
380             if (q->tx_timer) {
381                 timer_del(q->tx_timer);
382             } else {
383                 qemu_bh_cancel(q->tx_bh);
384             }
385             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
386                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
387                 vdev->vm_running) {
388                 /* if tx is waiting we are likely have some packets in tx queue
389                  * and disabled notification */
390                 q->tx_waiting = 0;
391                 virtio_queue_set_notification(q->tx_vq, 1);
392                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
393             }
394         }
395     }
396 }
397 
398 static void virtio_net_set_link_status(NetClientState *nc)
399 {
400     VirtIONet *n = qemu_get_nic_opaque(nc);
401     VirtIODevice *vdev = VIRTIO_DEVICE(n);
402     uint16_t old_status = n->status;
403 
404     if (nc->link_down)
405         n->status &= ~VIRTIO_NET_S_LINK_UP;
406     else
407         n->status |= VIRTIO_NET_S_LINK_UP;
408 
409     if (n->status != old_status)
410         virtio_notify_config(vdev);
411 
412     virtio_net_set_status(vdev, vdev->status);
413 }
414 
415 static void rxfilter_notify(NetClientState *nc)
416 {
417     VirtIONet *n = qemu_get_nic_opaque(nc);
418 
419     if (nc->rxfilter_notify_enabled) {
420         char *path = object_get_canonical_path(OBJECT(n->qdev));
421         qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
422                                               n->netclient_name, path);
423         g_free(path);
424 
425         /* disable event notification to avoid events flooding */
426         nc->rxfilter_notify_enabled = 0;
427     }
428 }
429 
430 static intList *get_vlan_table(VirtIONet *n)
431 {
432     intList *list, *entry;
433     int i, j;
434 
435     list = NULL;
436     for (i = 0; i < MAX_VLAN >> 5; i++) {
437         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
438             if (n->vlans[i] & (1U << j)) {
439                 entry = g_malloc0(sizeof(*entry));
440                 entry->value = (i << 5) + j;
441                 entry->next = list;
442                 list = entry;
443             }
444         }
445     }
446 
447     return list;
448 }
449 
450 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
451 {
452     VirtIONet *n = qemu_get_nic_opaque(nc);
453     VirtIODevice *vdev = VIRTIO_DEVICE(n);
454     RxFilterInfo *info;
455     strList *str_list, *entry;
456     int i;
457 
458     info = g_malloc0(sizeof(*info));
459     info->name = g_strdup(nc->name);
460     info->promiscuous = n->promisc;
461 
462     if (n->nouni) {
463         info->unicast = RX_STATE_NONE;
464     } else if (n->alluni) {
465         info->unicast = RX_STATE_ALL;
466     } else {
467         info->unicast = RX_STATE_NORMAL;
468     }
469 
470     if (n->nomulti) {
471         info->multicast = RX_STATE_NONE;
472     } else if (n->allmulti) {
473         info->multicast = RX_STATE_ALL;
474     } else {
475         info->multicast = RX_STATE_NORMAL;
476     }
477 
478     info->broadcast_allowed = n->nobcast;
479     info->multicast_overflow = n->mac_table.multi_overflow;
480     info->unicast_overflow = n->mac_table.uni_overflow;
481 
482     info->main_mac = qemu_mac_strdup_printf(n->mac);
483 
484     str_list = NULL;
485     for (i = 0; i < n->mac_table.first_multi; i++) {
486         entry = g_malloc0(sizeof(*entry));
487         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
488         entry->next = str_list;
489         str_list = entry;
490     }
491     info->unicast_table = str_list;
492 
493     str_list = NULL;
494     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
495         entry = g_malloc0(sizeof(*entry));
496         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
497         entry->next = str_list;
498         str_list = entry;
499     }
500     info->multicast_table = str_list;
501     info->vlan_table = get_vlan_table(n);
502 
503     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
504         info->vlan = RX_STATE_ALL;
505     } else if (!info->vlan_table) {
506         info->vlan = RX_STATE_NONE;
507     } else {
508         info->vlan = RX_STATE_NORMAL;
509     }
510 
511     /* enable event notification after query */
512     nc->rxfilter_notify_enabled = 1;
513 
514     return info;
515 }
516 
517 static void virtio_net_reset(VirtIODevice *vdev)
518 {
519     VirtIONet *n = VIRTIO_NET(vdev);
520     int i;
521 
522     /* Reset back to compatibility mode */
523     n->promisc = 1;
524     n->allmulti = 0;
525     n->alluni = 0;
526     n->nomulti = 0;
527     n->nouni = 0;
528     n->nobcast = 0;
529     /* multiqueue is disabled by default */
530     n->curr_queues = 1;
531     timer_del(n->announce_timer.tm);
532     n->announce_timer.round = 0;
533     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
534 
535     /* Flush any MAC and VLAN filter table state */
536     n->mac_table.in_use = 0;
537     n->mac_table.first_multi = 0;
538     n->mac_table.multi_overflow = 0;
539     n->mac_table.uni_overflow = 0;
540     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
541     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
542     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
543     memset(n->vlans, 0, MAX_VLAN >> 3);
544 
545     /* Flush any async TX */
546     for (i = 0;  i < n->max_queues; i++) {
547         NetClientState *nc = qemu_get_subqueue(n->nic, i);
548 
549         if (nc->peer) {
550             qemu_flush_or_purge_queued_packets(nc->peer, true);
551             assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
552         }
553     }
554 }
555 
556 static void peer_test_vnet_hdr(VirtIONet *n)
557 {
558     NetClientState *nc = qemu_get_queue(n->nic);
559     if (!nc->peer) {
560         return;
561     }
562 
563     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
564 }
565 
566 static int peer_has_vnet_hdr(VirtIONet *n)
567 {
568     return n->has_vnet_hdr;
569 }
570 
571 static int peer_has_ufo(VirtIONet *n)
572 {
573     if (!peer_has_vnet_hdr(n))
574         return 0;
575 
576     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
577 
578     return n->has_ufo;
579 }
580 
581 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
582                                        int version_1, int hash_report)
583 {
584     int i;
585     NetClientState *nc;
586 
587     n->mergeable_rx_bufs = mergeable_rx_bufs;
588 
589     if (version_1) {
590         n->guest_hdr_len = hash_report ?
591             sizeof(struct virtio_net_hdr_v1_hash) :
592             sizeof(struct virtio_net_hdr_mrg_rxbuf);
593         n->rss_data.populate_hash = !!hash_report;
594     } else {
595         n->guest_hdr_len = n->mergeable_rx_bufs ?
596             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
597             sizeof(struct virtio_net_hdr);
598     }
599 
600     for (i = 0; i < n->max_queues; i++) {
601         nc = qemu_get_subqueue(n->nic, i);
602 
603         if (peer_has_vnet_hdr(n) &&
604             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
605             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
606             n->host_hdr_len = n->guest_hdr_len;
607         }
608     }
609 }
610 
611 static int virtio_net_max_tx_queue_size(VirtIONet *n)
612 {
613     NetClientState *peer = n->nic_conf.peers.ncs[0];
614 
615     /*
616      * Backends other than vhost-user don't support max queue size.
617      */
618     if (!peer) {
619         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
620     }
621 
622     if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
623         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
624     }
625 
626     return VIRTQUEUE_MAX_SIZE;
627 }
628 
629 static int peer_attach(VirtIONet *n, int index)
630 {
631     NetClientState *nc = qemu_get_subqueue(n->nic, index);
632 
633     if (!nc->peer) {
634         return 0;
635     }
636 
637     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
638         vhost_set_vring_enable(nc->peer, 1);
639     }
640 
641     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
642         return 0;
643     }
644 
645     if (n->max_queues == 1) {
646         return 0;
647     }
648 
649     return tap_enable(nc->peer);
650 }
651 
652 static int peer_detach(VirtIONet *n, int index)
653 {
654     NetClientState *nc = qemu_get_subqueue(n->nic, index);
655 
656     if (!nc->peer) {
657         return 0;
658     }
659 
660     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
661         vhost_set_vring_enable(nc->peer, 0);
662     }
663 
664     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
665         return 0;
666     }
667 
668     return tap_disable(nc->peer);
669 }
670 
671 static void virtio_net_set_queues(VirtIONet *n)
672 {
673     int i;
674     int r;
675 
676     if (n->nic->peer_deleted) {
677         return;
678     }
679 
680     for (i = 0; i < n->max_queues; i++) {
681         if (i < n->curr_queues) {
682             r = peer_attach(n, i);
683             assert(!r);
684         } else {
685             r = peer_detach(n, i);
686             assert(!r);
687         }
688     }
689 }
690 
691 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
692 
693 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
694                                         Error **errp)
695 {
696     VirtIONet *n = VIRTIO_NET(vdev);
697     NetClientState *nc = qemu_get_queue(n->nic);
698 
699     /* Firstly sync all virtio-net possible supported features */
700     features |= n->host_features;
701 
702     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
703 
704     if (!peer_has_vnet_hdr(n)) {
705         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
706         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
707         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
708         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
709 
710         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
711         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
712         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
713         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
714 
715         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
716     }
717 
718     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
719         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
720         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
721     }
722 
723     if (!get_vhost_net(nc->peer)) {
724         return features;
725     }
726 
727     virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
728     virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
729     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
730     vdev->backend_features = features;
731 
732     if (n->mtu_bypass_backend &&
733             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
734         features |= (1ULL << VIRTIO_NET_F_MTU);
735     }
736 
737     return features;
738 }
739 
740 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
741 {
742     uint64_t features = 0;
743 
744     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
745      * but also these: */
746     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
747     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
748     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
749     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
750     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
751 
752     return features;
753 }
754 
755 static void virtio_net_apply_guest_offloads(VirtIONet *n)
756 {
757     qemu_set_offload(qemu_get_queue(n->nic)->peer,
758             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
759             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
760             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
761             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
762             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
763 }
764 
765 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
766 {
767     static const uint64_t guest_offloads_mask =
768         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
769         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
770         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
771         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
772         (1ULL << VIRTIO_NET_F_GUEST_UFO);
773 
774     return guest_offloads_mask & features;
775 }
776 
777 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
778 {
779     VirtIODevice *vdev = VIRTIO_DEVICE(n);
780     return virtio_net_guest_offloads_by_features(vdev->guest_features);
781 }
782 
783 static void failover_add_primary(VirtIONet *n, Error **errp)
784 {
785     Error *err = NULL;
786 
787     if (n->primary_dev) {
788         return;
789     }
790 
791     n->primary_device_opts = qemu_opts_find(qemu_find_opts("device"),
792             n->primary_device_id);
793     if (n->primary_device_opts) {
794         n->primary_dev = qdev_device_add(n->primary_device_opts, &err);
795         if (err) {
796             qemu_opts_del(n->primary_device_opts);
797         }
798         if (n->primary_dev) {
799             n->primary_bus = n->primary_dev->parent_bus;
800             if (err) {
801                 qdev_unplug(n->primary_dev, &err);
802                 qdev_set_id(n->primary_dev, "");
803 
804             }
805         }
806     } else {
807         error_setg(errp, "Primary device not found");
808         error_append_hint(errp, "Virtio-net failover will not work. Make "
809             "sure primary device has parameter"
810             " failover_pair_id=<virtio-net-id>\n");
811 }
812     if (err) {
813         error_propagate(errp, err);
814     }
815 }
816 
817 static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp)
818 {
819     VirtIONet *n = opaque;
820     int ret = 0;
821 
822     const char *standby_id = qemu_opt_get(opts, "failover_pair_id");
823 
824     if (standby_id != NULL && (g_strcmp0(standby_id, n->netclient_name) == 0)) {
825         n->primary_device_id = g_strdup(opts->id);
826         ret = 1;
827     }
828 
829     return ret;
830 }
831 
832 static DeviceState *virtio_net_find_primary(VirtIONet *n, Error **errp)
833 {
834     DeviceState *dev = NULL;
835     Error *err = NULL;
836 
837     if (qemu_opts_foreach(qemu_find_opts("device"),
838                          is_my_primary, n, &err)) {
839         if (err) {
840             error_propagate(errp, err);
841             return NULL;
842         }
843         if (n->primary_device_id) {
844             dev = qdev_find_recursive(sysbus_get_default(),
845                     n->primary_device_id);
846         } else {
847             error_setg(errp, "Primary device id not found");
848             return NULL;
849         }
850     }
851     return dev;
852 }
853 
854 
855 
856 static DeviceState *virtio_connect_failover_devices(VirtIONet *n,
857                                                     DeviceState *dev,
858                                                     Error **errp)
859 {
860     DeviceState *prim_dev = NULL;
861     Error *err = NULL;
862 
863     prim_dev = virtio_net_find_primary(n, &err);
864     if (prim_dev) {
865         n->primary_device_id = g_strdup(prim_dev->id);
866         n->primary_device_opts = prim_dev->opts;
867     } else {
868         if (err) {
869             error_propagate(errp, err);
870         }
871     }
872 
873     return prim_dev;
874 }
875 
876 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
877 {
878     VirtIONet *n = VIRTIO_NET(vdev);
879     Error *err = NULL;
880     int i;
881 
882     if (n->mtu_bypass_backend &&
883             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
884         features &= ~(1ULL << VIRTIO_NET_F_MTU);
885     }
886 
887     virtio_net_set_multiqueue(n,
888                               virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
889                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
890 
891     virtio_net_set_mrg_rx_bufs(n,
892                                virtio_has_feature(features,
893                                                   VIRTIO_NET_F_MRG_RXBUF),
894                                virtio_has_feature(features,
895                                                   VIRTIO_F_VERSION_1),
896                                virtio_has_feature(features,
897                                                   VIRTIO_NET_F_HASH_REPORT));
898 
899     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
900         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
901     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
902         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
903     n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
904 
905     if (n->has_vnet_hdr) {
906         n->curr_guest_offloads =
907             virtio_net_guest_offloads_by_features(features);
908         virtio_net_apply_guest_offloads(n);
909     }
910 
911     for (i = 0;  i < n->max_queues; i++) {
912         NetClientState *nc = qemu_get_subqueue(n->nic, i);
913 
914         if (!get_vhost_net(nc->peer)) {
915             continue;
916         }
917         vhost_net_ack_features(get_vhost_net(nc->peer), features);
918     }
919 
920     if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
921         memset(n->vlans, 0, MAX_VLAN >> 3);
922     } else {
923         memset(n->vlans, 0xff, MAX_VLAN >> 3);
924     }
925 
926     if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
927         qapi_event_send_failover_negotiated(n->netclient_name);
928         atomic_set(&n->primary_should_be_hidden, false);
929         failover_add_primary(n, &err);
930         if (err) {
931             n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err);
932             if (err) {
933                 goto out_err;
934             }
935             failover_add_primary(n, &err);
936             if (err) {
937                 goto out_err;
938             }
939         }
940     }
941     return;
942 
943 out_err:
944     if (err) {
945         warn_report_err(err);
946     }
947 }
948 
949 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
950                                      struct iovec *iov, unsigned int iov_cnt)
951 {
952     uint8_t on;
953     size_t s;
954     NetClientState *nc = qemu_get_queue(n->nic);
955 
956     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
957     if (s != sizeof(on)) {
958         return VIRTIO_NET_ERR;
959     }
960 
961     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
962         n->promisc = on;
963     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
964         n->allmulti = on;
965     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
966         n->alluni = on;
967     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
968         n->nomulti = on;
969     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
970         n->nouni = on;
971     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
972         n->nobcast = on;
973     } else {
974         return VIRTIO_NET_ERR;
975     }
976 
977     rxfilter_notify(nc);
978 
979     return VIRTIO_NET_OK;
980 }
981 
982 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
983                                      struct iovec *iov, unsigned int iov_cnt)
984 {
985     VirtIODevice *vdev = VIRTIO_DEVICE(n);
986     uint64_t offloads;
987     size_t s;
988 
989     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
990         return VIRTIO_NET_ERR;
991     }
992 
993     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
994     if (s != sizeof(offloads)) {
995         return VIRTIO_NET_ERR;
996     }
997 
998     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
999         uint64_t supported_offloads;
1000 
1001         offloads = virtio_ldq_p(vdev, &offloads);
1002 
1003         if (!n->has_vnet_hdr) {
1004             return VIRTIO_NET_ERR;
1005         }
1006 
1007         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1008             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1009         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1010             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1011         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1012 
1013         supported_offloads = virtio_net_supported_guest_offloads(n);
1014         if (offloads & ~supported_offloads) {
1015             return VIRTIO_NET_ERR;
1016         }
1017 
1018         n->curr_guest_offloads = offloads;
1019         virtio_net_apply_guest_offloads(n);
1020 
1021         return VIRTIO_NET_OK;
1022     } else {
1023         return VIRTIO_NET_ERR;
1024     }
1025 }
1026 
1027 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1028                                  struct iovec *iov, unsigned int iov_cnt)
1029 {
1030     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1031     struct virtio_net_ctrl_mac mac_data;
1032     size_t s;
1033     NetClientState *nc = qemu_get_queue(n->nic);
1034 
1035     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1036         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1037             return VIRTIO_NET_ERR;
1038         }
1039         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1040         assert(s == sizeof(n->mac));
1041         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1042         rxfilter_notify(nc);
1043 
1044         return VIRTIO_NET_OK;
1045     }
1046 
1047     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1048         return VIRTIO_NET_ERR;
1049     }
1050 
1051     int in_use = 0;
1052     int first_multi = 0;
1053     uint8_t uni_overflow = 0;
1054     uint8_t multi_overflow = 0;
1055     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1056 
1057     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1058                    sizeof(mac_data.entries));
1059     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1060     if (s != sizeof(mac_data.entries)) {
1061         goto error;
1062     }
1063     iov_discard_front(&iov, &iov_cnt, s);
1064 
1065     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1066         goto error;
1067     }
1068 
1069     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1070         s = iov_to_buf(iov, iov_cnt, 0, macs,
1071                        mac_data.entries * ETH_ALEN);
1072         if (s != mac_data.entries * ETH_ALEN) {
1073             goto error;
1074         }
1075         in_use += mac_data.entries;
1076     } else {
1077         uni_overflow = 1;
1078     }
1079 
1080     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1081 
1082     first_multi = in_use;
1083 
1084     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1085                    sizeof(mac_data.entries));
1086     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1087     if (s != sizeof(mac_data.entries)) {
1088         goto error;
1089     }
1090 
1091     iov_discard_front(&iov, &iov_cnt, s);
1092 
1093     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1094         goto error;
1095     }
1096 
1097     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1098         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1099                        mac_data.entries * ETH_ALEN);
1100         if (s != mac_data.entries * ETH_ALEN) {
1101             goto error;
1102         }
1103         in_use += mac_data.entries;
1104     } else {
1105         multi_overflow = 1;
1106     }
1107 
1108     n->mac_table.in_use = in_use;
1109     n->mac_table.first_multi = first_multi;
1110     n->mac_table.uni_overflow = uni_overflow;
1111     n->mac_table.multi_overflow = multi_overflow;
1112     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1113     g_free(macs);
1114     rxfilter_notify(nc);
1115 
1116     return VIRTIO_NET_OK;
1117 
1118 error:
1119     g_free(macs);
1120     return VIRTIO_NET_ERR;
1121 }
1122 
1123 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1124                                         struct iovec *iov, unsigned int iov_cnt)
1125 {
1126     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1127     uint16_t vid;
1128     size_t s;
1129     NetClientState *nc = qemu_get_queue(n->nic);
1130 
1131     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1132     vid = virtio_lduw_p(vdev, &vid);
1133     if (s != sizeof(vid)) {
1134         return VIRTIO_NET_ERR;
1135     }
1136 
1137     if (vid >= MAX_VLAN)
1138         return VIRTIO_NET_ERR;
1139 
1140     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1141         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1142     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1143         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1144     else
1145         return VIRTIO_NET_ERR;
1146 
1147     rxfilter_notify(nc);
1148 
1149     return VIRTIO_NET_OK;
1150 }
1151 
1152 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1153                                       struct iovec *iov, unsigned int iov_cnt)
1154 {
1155     trace_virtio_net_handle_announce(n->announce_timer.round);
1156     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1157         n->status & VIRTIO_NET_S_ANNOUNCE) {
1158         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1159         if (n->announce_timer.round) {
1160             qemu_announce_timer_step(&n->announce_timer);
1161         }
1162         return VIRTIO_NET_OK;
1163     } else {
1164         return VIRTIO_NET_ERR;
1165     }
1166 }
1167 
1168 static void virtio_net_disable_rss(VirtIONet *n)
1169 {
1170     if (n->rss_data.enabled) {
1171         trace_virtio_net_rss_disable();
1172     }
1173     n->rss_data.enabled = false;
1174 }
1175 
1176 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1177                                       struct iovec *iov,
1178                                       unsigned int iov_cnt,
1179                                       bool do_rss)
1180 {
1181     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1182     struct virtio_net_rss_config cfg;
1183     size_t s, offset = 0, size_get;
1184     uint16_t queues, i;
1185     struct {
1186         uint16_t us;
1187         uint8_t b;
1188     } QEMU_PACKED temp;
1189     const char *err_msg = "";
1190     uint32_t err_value = 0;
1191 
1192     if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1193         err_msg = "RSS is not negotiated";
1194         goto error;
1195     }
1196     if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1197         err_msg = "Hash report is not negotiated";
1198         goto error;
1199     }
1200     size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1201     s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1202     if (s != size_get) {
1203         err_msg = "Short command buffer";
1204         err_value = (uint32_t)s;
1205         goto error;
1206     }
1207     n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1208     n->rss_data.indirections_len =
1209         virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1210     n->rss_data.indirections_len++;
1211     if (!do_rss) {
1212         n->rss_data.indirections_len = 1;
1213     }
1214     if (!is_power_of_2(n->rss_data.indirections_len)) {
1215         err_msg = "Invalid size of indirection table";
1216         err_value = n->rss_data.indirections_len;
1217         goto error;
1218     }
1219     if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1220         err_msg = "Too large indirection table";
1221         err_value = n->rss_data.indirections_len;
1222         goto error;
1223     }
1224     n->rss_data.default_queue = do_rss ?
1225         virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1226     if (n->rss_data.default_queue >= n->max_queues) {
1227         err_msg = "Invalid default queue";
1228         err_value = n->rss_data.default_queue;
1229         goto error;
1230     }
1231     offset += size_get;
1232     size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1233     g_free(n->rss_data.indirections_table);
1234     n->rss_data.indirections_table = g_malloc(size_get);
1235     if (!n->rss_data.indirections_table) {
1236         err_msg = "Can't allocate indirections table";
1237         err_value = n->rss_data.indirections_len;
1238         goto error;
1239     }
1240     s = iov_to_buf(iov, iov_cnt, offset,
1241                    n->rss_data.indirections_table, size_get);
1242     if (s != size_get) {
1243         err_msg = "Short indirection table buffer";
1244         err_value = (uint32_t)s;
1245         goto error;
1246     }
1247     for (i = 0; i < n->rss_data.indirections_len; ++i) {
1248         uint16_t val = n->rss_data.indirections_table[i];
1249         n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1250     }
1251     offset += size_get;
1252     size_get = sizeof(temp);
1253     s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1254     if (s != size_get) {
1255         err_msg = "Can't get queues";
1256         err_value = (uint32_t)s;
1257         goto error;
1258     }
1259     queues = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queues;
1260     if (queues == 0 || queues > n->max_queues) {
1261         err_msg = "Invalid number of queues";
1262         err_value = queues;
1263         goto error;
1264     }
1265     if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1266         err_msg = "Invalid key size";
1267         err_value = temp.b;
1268         goto error;
1269     }
1270     if (!temp.b && n->rss_data.hash_types) {
1271         err_msg = "No key provided";
1272         err_value = 0;
1273         goto error;
1274     }
1275     if (!temp.b && !n->rss_data.hash_types) {
1276         virtio_net_disable_rss(n);
1277         return queues;
1278     }
1279     offset += size_get;
1280     size_get = temp.b;
1281     s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1282     if (s != size_get) {
1283         err_msg = "Can get key buffer";
1284         err_value = (uint32_t)s;
1285         goto error;
1286     }
1287     n->rss_data.enabled = true;
1288     trace_virtio_net_rss_enable(n->rss_data.hash_types,
1289                                 n->rss_data.indirections_len,
1290                                 temp.b);
1291     return queues;
1292 error:
1293     trace_virtio_net_rss_error(err_msg, err_value);
1294     virtio_net_disable_rss(n);
1295     return 0;
1296 }
1297 
1298 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1299                                 struct iovec *iov, unsigned int iov_cnt)
1300 {
1301     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1302     uint16_t queues;
1303 
1304     virtio_net_disable_rss(n);
1305     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1306         queues = virtio_net_handle_rss(n, iov, iov_cnt, false);
1307         return queues ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1308     }
1309     if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1310         queues = virtio_net_handle_rss(n, iov, iov_cnt, true);
1311     } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1312         struct virtio_net_ctrl_mq mq;
1313         size_t s;
1314         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1315             return VIRTIO_NET_ERR;
1316         }
1317         s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1318         if (s != sizeof(mq)) {
1319             return VIRTIO_NET_ERR;
1320         }
1321         queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1322 
1323     } else {
1324         return VIRTIO_NET_ERR;
1325     }
1326 
1327     if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1328         queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1329         queues > n->max_queues ||
1330         !n->multiqueue) {
1331         return VIRTIO_NET_ERR;
1332     }
1333 
1334     n->curr_queues = queues;
1335     /* stop the backend before changing the number of queues to avoid handling a
1336      * disabled queue */
1337     virtio_net_set_status(vdev, vdev->status);
1338     virtio_net_set_queues(n);
1339 
1340     return VIRTIO_NET_OK;
1341 }
1342 
1343 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1344 {
1345     VirtIONet *n = VIRTIO_NET(vdev);
1346     struct virtio_net_ctrl_hdr ctrl;
1347     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1348     VirtQueueElement *elem;
1349     size_t s;
1350     struct iovec *iov, *iov2;
1351     unsigned int iov_cnt;
1352 
1353     for (;;) {
1354         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1355         if (!elem) {
1356             break;
1357         }
1358         if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1359             iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1360             virtio_error(vdev, "virtio-net ctrl missing headers");
1361             virtqueue_detach_element(vq, elem, 0);
1362             g_free(elem);
1363             break;
1364         }
1365 
1366         iov_cnt = elem->out_num;
1367         iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1368         s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1369         iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1370         if (s != sizeof(ctrl)) {
1371             status = VIRTIO_NET_ERR;
1372         } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1373             status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1374         } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1375             status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1376         } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1377             status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1378         } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1379             status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1380         } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1381             status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1382         } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1383             status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1384         }
1385 
1386         s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1387         assert(s == sizeof(status));
1388 
1389         virtqueue_push(vq, elem, sizeof(status));
1390         virtio_notify(vdev, vq);
1391         g_free(iov2);
1392         g_free(elem);
1393     }
1394 }
1395 
1396 /* RX */
1397 
1398 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1399 {
1400     VirtIONet *n = VIRTIO_NET(vdev);
1401     int queue_index = vq2q(virtio_get_queue_index(vq));
1402 
1403     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1404 }
1405 
1406 static bool virtio_net_can_receive(NetClientState *nc)
1407 {
1408     VirtIONet *n = qemu_get_nic_opaque(nc);
1409     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1410     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1411 
1412     if (!vdev->vm_running) {
1413         return false;
1414     }
1415 
1416     if (nc->queue_index >= n->curr_queues) {
1417         return false;
1418     }
1419 
1420     if (!virtio_queue_ready(q->rx_vq) ||
1421         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1422         return false;
1423     }
1424 
1425     return true;
1426 }
1427 
1428 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1429 {
1430     VirtIONet *n = q->n;
1431     if (virtio_queue_empty(q->rx_vq) ||
1432         (n->mergeable_rx_bufs &&
1433          !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1434         virtio_queue_set_notification(q->rx_vq, 1);
1435 
1436         /* To avoid a race condition where the guest has made some buffers
1437          * available after the above check but before notification was
1438          * enabled, check for available buffers again.
1439          */
1440         if (virtio_queue_empty(q->rx_vq) ||
1441             (n->mergeable_rx_bufs &&
1442              !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1443             return 0;
1444         }
1445     }
1446 
1447     virtio_queue_set_notification(q->rx_vq, 0);
1448     return 1;
1449 }
1450 
1451 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1452 {
1453     virtio_tswap16s(vdev, &hdr->hdr_len);
1454     virtio_tswap16s(vdev, &hdr->gso_size);
1455     virtio_tswap16s(vdev, &hdr->csum_start);
1456     virtio_tswap16s(vdev, &hdr->csum_offset);
1457 }
1458 
1459 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1460  * it never finds out that the packets don't have valid checksums.  This
1461  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1462  * fix this with Xen but it hasn't appeared in an upstream release of
1463  * dhclient yet.
1464  *
1465  * To avoid breaking existing guests, we catch udp packets and add
1466  * checksums.  This is terrible but it's better than hacking the guest
1467  * kernels.
1468  *
1469  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1470  * we should provide a mechanism to disable it to avoid polluting the host
1471  * cache.
1472  */
1473 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1474                                         uint8_t *buf, size_t size)
1475 {
1476     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1477         (size > 27 && size < 1500) && /* normal sized MTU */
1478         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1479         (buf[23] == 17) && /* ip.protocol == UDP */
1480         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1481         net_checksum_calculate(buf, size);
1482         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1483     }
1484 }
1485 
1486 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1487                            const void *buf, size_t size)
1488 {
1489     if (n->has_vnet_hdr) {
1490         /* FIXME this cast is evil */
1491         void *wbuf = (void *)buf;
1492         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1493                                     size - n->host_hdr_len);
1494 
1495         if (n->needs_vnet_hdr_swap) {
1496             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1497         }
1498         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1499     } else {
1500         struct virtio_net_hdr hdr = {
1501             .flags = 0,
1502             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1503         };
1504         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1505     }
1506 }
1507 
1508 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1509 {
1510     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1511     static const uint8_t vlan[] = {0x81, 0x00};
1512     uint8_t *ptr = (uint8_t *)buf;
1513     int i;
1514 
1515     if (n->promisc)
1516         return 1;
1517 
1518     ptr += n->host_hdr_len;
1519 
1520     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1521         int vid = lduw_be_p(ptr + 14) & 0xfff;
1522         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1523             return 0;
1524     }
1525 
1526     if (ptr[0] & 1) { // multicast
1527         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1528             return !n->nobcast;
1529         } else if (n->nomulti) {
1530             return 0;
1531         } else if (n->allmulti || n->mac_table.multi_overflow) {
1532             return 1;
1533         }
1534 
1535         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1536             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1537                 return 1;
1538             }
1539         }
1540     } else { // unicast
1541         if (n->nouni) {
1542             return 0;
1543         } else if (n->alluni || n->mac_table.uni_overflow) {
1544             return 1;
1545         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1546             return 1;
1547         }
1548 
1549         for (i = 0; i < n->mac_table.first_multi; i++) {
1550             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1551                 return 1;
1552             }
1553         }
1554     }
1555 
1556     return 0;
1557 }
1558 
1559 static uint8_t virtio_net_get_hash_type(bool isip4,
1560                                         bool isip6,
1561                                         bool isudp,
1562                                         bool istcp,
1563                                         uint32_t types)
1564 {
1565     if (isip4) {
1566         if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1567             return NetPktRssIpV4Tcp;
1568         }
1569         if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1570             return NetPktRssIpV4Udp;
1571         }
1572         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1573             return NetPktRssIpV4;
1574         }
1575     } else if (isip6) {
1576         uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1577                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1578 
1579         if (istcp && (types & mask)) {
1580             return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1581                 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1582         }
1583         mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1584         if (isudp && (types & mask)) {
1585             return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1586                 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1587         }
1588         mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1589         if (types & mask) {
1590             return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1591                 NetPktRssIpV6Ex : NetPktRssIpV6;
1592         }
1593     }
1594     return 0xff;
1595 }
1596 
1597 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1598                                    uint32_t hash)
1599 {
1600     struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1601     hdr->hash_value = hash;
1602     hdr->hash_report = report;
1603 }
1604 
1605 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1606                                   size_t size)
1607 {
1608     VirtIONet *n = qemu_get_nic_opaque(nc);
1609     unsigned int index = nc->queue_index, new_index = index;
1610     struct NetRxPkt *pkt = n->rx_pkt;
1611     uint8_t net_hash_type;
1612     uint32_t hash;
1613     bool isip4, isip6, isudp, istcp;
1614     static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1615         VIRTIO_NET_HASH_REPORT_IPv4,
1616         VIRTIO_NET_HASH_REPORT_TCPv4,
1617         VIRTIO_NET_HASH_REPORT_TCPv6,
1618         VIRTIO_NET_HASH_REPORT_IPv6,
1619         VIRTIO_NET_HASH_REPORT_IPv6_EX,
1620         VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1621         VIRTIO_NET_HASH_REPORT_UDPv4,
1622         VIRTIO_NET_HASH_REPORT_UDPv6,
1623         VIRTIO_NET_HASH_REPORT_UDPv6_EX
1624     };
1625 
1626     net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1627                              size - n->host_hdr_len);
1628     net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1629     if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1630         istcp = isudp = false;
1631     }
1632     if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1633         istcp = isudp = false;
1634     }
1635     net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1636                                              n->rss_data.hash_types);
1637     if (net_hash_type > NetPktRssIpV6UdpEx) {
1638         if (n->rss_data.populate_hash) {
1639             virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1640         }
1641         return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1642     }
1643 
1644     hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1645 
1646     if (n->rss_data.populate_hash) {
1647         virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1648     }
1649 
1650     if (n->rss_data.redirect) {
1651         new_index = hash & (n->rss_data.indirections_len - 1);
1652         new_index = n->rss_data.indirections_table[new_index];
1653     }
1654 
1655     return (index == new_index) ? -1 : new_index;
1656 }
1657 
1658 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1659                                       size_t size, bool no_rss)
1660 {
1661     VirtIONet *n = qemu_get_nic_opaque(nc);
1662     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1663     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1664     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1665     struct virtio_net_hdr_mrg_rxbuf mhdr;
1666     unsigned mhdr_cnt = 0;
1667     size_t offset, i, guest_offset;
1668 
1669     if (!virtio_net_can_receive(nc)) {
1670         return -1;
1671     }
1672 
1673     if (!no_rss && n->rss_data.enabled) {
1674         int index = virtio_net_process_rss(nc, buf, size);
1675         if (index >= 0) {
1676             NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1677             return virtio_net_receive_rcu(nc2, buf, size, true);
1678         }
1679     }
1680 
1681     /* hdr_len refers to the header we supply to the guest */
1682     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1683         return 0;
1684     }
1685 
1686     if (!receive_filter(n, buf, size))
1687         return size;
1688 
1689     offset = i = 0;
1690 
1691     while (offset < size) {
1692         VirtQueueElement *elem;
1693         int len, total;
1694         const struct iovec *sg;
1695 
1696         total = 0;
1697 
1698         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1699         if (!elem) {
1700             if (i) {
1701                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1702                              "i %zd mergeable %d offset %zd, size %zd, "
1703                              "guest hdr len %zd, host hdr len %zd "
1704                              "guest features 0x%" PRIx64,
1705                              i, n->mergeable_rx_bufs, offset, size,
1706                              n->guest_hdr_len, n->host_hdr_len,
1707                              vdev->guest_features);
1708             }
1709             return -1;
1710         }
1711 
1712         if (elem->in_num < 1) {
1713             virtio_error(vdev,
1714                          "virtio-net receive queue contains no in buffers");
1715             virtqueue_detach_element(q->rx_vq, elem, 0);
1716             g_free(elem);
1717             return -1;
1718         }
1719 
1720         sg = elem->in_sg;
1721         if (i == 0) {
1722             assert(offset == 0);
1723             if (n->mergeable_rx_bufs) {
1724                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1725                                     sg, elem->in_num,
1726                                     offsetof(typeof(mhdr), num_buffers),
1727                                     sizeof(mhdr.num_buffers));
1728             }
1729 
1730             receive_header(n, sg, elem->in_num, buf, size);
1731             if (n->rss_data.populate_hash) {
1732                 offset = sizeof(mhdr);
1733                 iov_from_buf(sg, elem->in_num, offset,
1734                              buf + offset, n->host_hdr_len - sizeof(mhdr));
1735             }
1736             offset = n->host_hdr_len;
1737             total += n->guest_hdr_len;
1738             guest_offset = n->guest_hdr_len;
1739         } else {
1740             guest_offset = 0;
1741         }
1742 
1743         /* copy in packet.  ugh */
1744         len = iov_from_buf(sg, elem->in_num, guest_offset,
1745                            buf + offset, size - offset);
1746         total += len;
1747         offset += len;
1748         /* If buffers can't be merged, at this point we
1749          * must have consumed the complete packet.
1750          * Otherwise, drop it. */
1751         if (!n->mergeable_rx_bufs && offset < size) {
1752             virtqueue_unpop(q->rx_vq, elem, total);
1753             g_free(elem);
1754             return size;
1755         }
1756 
1757         /* signal other side */
1758         virtqueue_fill(q->rx_vq, elem, total, i++);
1759         g_free(elem);
1760     }
1761 
1762     if (mhdr_cnt) {
1763         virtio_stw_p(vdev, &mhdr.num_buffers, i);
1764         iov_from_buf(mhdr_sg, mhdr_cnt,
1765                      0,
1766                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
1767     }
1768 
1769     virtqueue_flush(q->rx_vq, i);
1770     virtio_notify(vdev, q->rx_vq);
1771 
1772     return size;
1773 }
1774 
1775 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1776                                   size_t size)
1777 {
1778     RCU_READ_LOCK_GUARD();
1779 
1780     return virtio_net_receive_rcu(nc, buf, size, false);
1781 }
1782 
1783 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1784                                          const uint8_t *buf,
1785                                          VirtioNetRscUnit *unit)
1786 {
1787     uint16_t ip_hdrlen;
1788     struct ip_header *ip;
1789 
1790     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1791                               + sizeof(struct eth_header));
1792     unit->ip = (void *)ip;
1793     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1794     unit->ip_plen = &ip->ip_len;
1795     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1796     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1797     unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1798 }
1799 
1800 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1801                                          const uint8_t *buf,
1802                                          VirtioNetRscUnit *unit)
1803 {
1804     struct ip6_header *ip6;
1805 
1806     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1807                                  + sizeof(struct eth_header));
1808     unit->ip = ip6;
1809     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1810     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1811                                         + sizeof(struct ip6_header));
1812     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1813 
1814     /* There is a difference between payload lenght in ipv4 and v6,
1815        ip header is excluded in ipv6 */
1816     unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1817 }
1818 
1819 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1820                                        VirtioNetRscSeg *seg)
1821 {
1822     int ret;
1823     struct virtio_net_hdr_v1 *h;
1824 
1825     h = (struct virtio_net_hdr_v1 *)seg->buf;
1826     h->flags = 0;
1827     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1828 
1829     if (seg->is_coalesced) {
1830         h->rsc.segments = seg->packets;
1831         h->rsc.dup_acks = seg->dup_ack;
1832         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1833         if (chain->proto == ETH_P_IP) {
1834             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1835         } else {
1836             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1837         }
1838     }
1839 
1840     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1841     QTAILQ_REMOVE(&chain->buffers, seg, next);
1842     g_free(seg->buf);
1843     g_free(seg);
1844 
1845     return ret;
1846 }
1847 
1848 static void virtio_net_rsc_purge(void *opq)
1849 {
1850     VirtioNetRscSeg *seg, *rn;
1851     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1852 
1853     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1854         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1855             chain->stat.purge_failed++;
1856             continue;
1857         }
1858     }
1859 
1860     chain->stat.timer++;
1861     if (!QTAILQ_EMPTY(&chain->buffers)) {
1862         timer_mod(chain->drain_timer,
1863               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1864     }
1865 }
1866 
1867 static void virtio_net_rsc_cleanup(VirtIONet *n)
1868 {
1869     VirtioNetRscChain *chain, *rn_chain;
1870     VirtioNetRscSeg *seg, *rn_seg;
1871 
1872     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1873         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1874             QTAILQ_REMOVE(&chain->buffers, seg, next);
1875             g_free(seg->buf);
1876             g_free(seg);
1877         }
1878 
1879         timer_del(chain->drain_timer);
1880         timer_free(chain->drain_timer);
1881         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1882         g_free(chain);
1883     }
1884 }
1885 
1886 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1887                                      NetClientState *nc,
1888                                      const uint8_t *buf, size_t size)
1889 {
1890     uint16_t hdr_len;
1891     VirtioNetRscSeg *seg;
1892 
1893     hdr_len = chain->n->guest_hdr_len;
1894     seg = g_malloc(sizeof(VirtioNetRscSeg));
1895     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1896         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1897     memcpy(seg->buf, buf, size);
1898     seg->size = size;
1899     seg->packets = 1;
1900     seg->dup_ack = 0;
1901     seg->is_coalesced = 0;
1902     seg->nc = nc;
1903 
1904     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1905     chain->stat.cache++;
1906 
1907     switch (chain->proto) {
1908     case ETH_P_IP:
1909         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
1910         break;
1911     case ETH_P_IPV6:
1912         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
1913         break;
1914     default:
1915         g_assert_not_reached();
1916     }
1917 }
1918 
1919 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
1920                                          VirtioNetRscSeg *seg,
1921                                          const uint8_t *buf,
1922                                          struct tcp_header *n_tcp,
1923                                          struct tcp_header *o_tcp)
1924 {
1925     uint32_t nack, oack;
1926     uint16_t nwin, owin;
1927 
1928     nack = htonl(n_tcp->th_ack);
1929     nwin = htons(n_tcp->th_win);
1930     oack = htonl(o_tcp->th_ack);
1931     owin = htons(o_tcp->th_win);
1932 
1933     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
1934         chain->stat.ack_out_of_win++;
1935         return RSC_FINAL;
1936     } else if (nack == oack) {
1937         /* duplicated ack or window probe */
1938         if (nwin == owin) {
1939             /* duplicated ack, add dup ack count due to whql test up to 1 */
1940             chain->stat.dup_ack++;
1941             return RSC_FINAL;
1942         } else {
1943             /* Coalesce window update */
1944             o_tcp->th_win = n_tcp->th_win;
1945             chain->stat.win_update++;
1946             return RSC_COALESCE;
1947         }
1948     } else {
1949         /* pure ack, go to 'C', finalize*/
1950         chain->stat.pure_ack++;
1951         return RSC_FINAL;
1952     }
1953 }
1954 
1955 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
1956                                             VirtioNetRscSeg *seg,
1957                                             const uint8_t *buf,
1958                                             VirtioNetRscUnit *n_unit)
1959 {
1960     void *data;
1961     uint16_t o_ip_len;
1962     uint32_t nseq, oseq;
1963     VirtioNetRscUnit *o_unit;
1964 
1965     o_unit = &seg->unit;
1966     o_ip_len = htons(*o_unit->ip_plen);
1967     nseq = htonl(n_unit->tcp->th_seq);
1968     oseq = htonl(o_unit->tcp->th_seq);
1969 
1970     /* out of order or retransmitted. */
1971     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
1972         chain->stat.data_out_of_win++;
1973         return RSC_FINAL;
1974     }
1975 
1976     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
1977     if (nseq == oseq) {
1978         if ((o_unit->payload == 0) && n_unit->payload) {
1979             /* From no payload to payload, normal case, not a dup ack or etc */
1980             chain->stat.data_after_pure_ack++;
1981             goto coalesce;
1982         } else {
1983             return virtio_net_rsc_handle_ack(chain, seg, buf,
1984                                              n_unit->tcp, o_unit->tcp);
1985         }
1986     } else if ((nseq - oseq) != o_unit->payload) {
1987         /* Not a consistent packet, out of order */
1988         chain->stat.data_out_of_order++;
1989         return RSC_FINAL;
1990     } else {
1991 coalesce:
1992         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
1993             chain->stat.over_size++;
1994             return RSC_FINAL;
1995         }
1996 
1997         /* Here comes the right data, the payload length in v4/v6 is different,
1998            so use the field value to update and record the new data len */
1999         o_unit->payload += n_unit->payload; /* update new data len */
2000 
2001         /* update field in ip header */
2002         *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2003 
2004         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2005            for windows guest, while this may change the behavior for linux
2006            guest (only if it uses RSC feature). */
2007         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2008 
2009         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2010         o_unit->tcp->th_win = n_unit->tcp->th_win;
2011 
2012         memmove(seg->buf + seg->size, data, n_unit->payload);
2013         seg->size += n_unit->payload;
2014         seg->packets++;
2015         chain->stat.coalesced++;
2016         return RSC_COALESCE;
2017     }
2018 }
2019 
2020 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2021                                         VirtioNetRscSeg *seg,
2022                                         const uint8_t *buf, size_t size,
2023                                         VirtioNetRscUnit *unit)
2024 {
2025     struct ip_header *ip1, *ip2;
2026 
2027     ip1 = (struct ip_header *)(unit->ip);
2028     ip2 = (struct ip_header *)(seg->unit.ip);
2029     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2030         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2031         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2032         chain->stat.no_match++;
2033         return RSC_NO_MATCH;
2034     }
2035 
2036     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2037 }
2038 
2039 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2040                                         VirtioNetRscSeg *seg,
2041                                         const uint8_t *buf, size_t size,
2042                                         VirtioNetRscUnit *unit)
2043 {
2044     struct ip6_header *ip1, *ip2;
2045 
2046     ip1 = (struct ip6_header *)(unit->ip);
2047     ip2 = (struct ip6_header *)(seg->unit.ip);
2048     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2049         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2050         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2051         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2052             chain->stat.no_match++;
2053             return RSC_NO_MATCH;
2054     }
2055 
2056     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2057 }
2058 
2059 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2060  * to prevent out of order */
2061 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2062                                          struct tcp_header *tcp)
2063 {
2064     uint16_t tcp_hdr;
2065     uint16_t tcp_flag;
2066 
2067     tcp_flag = htons(tcp->th_offset_flags);
2068     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2069     tcp_flag &= VIRTIO_NET_TCP_FLAG;
2070     tcp_flag = htons(tcp->th_offset_flags) & 0x3F;
2071     if (tcp_flag & TH_SYN) {
2072         chain->stat.tcp_syn++;
2073         return RSC_BYPASS;
2074     }
2075 
2076     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2077         chain->stat.tcp_ctrl_drain++;
2078         return RSC_FINAL;
2079     }
2080 
2081     if (tcp_hdr > sizeof(struct tcp_header)) {
2082         chain->stat.tcp_all_opt++;
2083         return RSC_FINAL;
2084     }
2085 
2086     return RSC_CANDIDATE;
2087 }
2088 
2089 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2090                                          NetClientState *nc,
2091                                          const uint8_t *buf, size_t size,
2092                                          VirtioNetRscUnit *unit)
2093 {
2094     int ret;
2095     VirtioNetRscSeg *seg, *nseg;
2096 
2097     if (QTAILQ_EMPTY(&chain->buffers)) {
2098         chain->stat.empty_cache++;
2099         virtio_net_rsc_cache_buf(chain, nc, buf, size);
2100         timer_mod(chain->drain_timer,
2101               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2102         return size;
2103     }
2104 
2105     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2106         if (chain->proto == ETH_P_IP) {
2107             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2108         } else {
2109             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2110         }
2111 
2112         if (ret == RSC_FINAL) {
2113             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2114                 /* Send failed */
2115                 chain->stat.final_failed++;
2116                 return 0;
2117             }
2118 
2119             /* Send current packet */
2120             return virtio_net_do_receive(nc, buf, size);
2121         } else if (ret == RSC_NO_MATCH) {
2122             continue;
2123         } else {
2124             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2125             seg->is_coalesced = 1;
2126             return size;
2127         }
2128     }
2129 
2130     chain->stat.no_match_cache++;
2131     virtio_net_rsc_cache_buf(chain, nc, buf, size);
2132     return size;
2133 }
2134 
2135 /* Drain a connection data, this is to avoid out of order segments */
2136 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2137                                         NetClientState *nc,
2138                                         const uint8_t *buf, size_t size,
2139                                         uint16_t ip_start, uint16_t ip_size,
2140                                         uint16_t tcp_port)
2141 {
2142     VirtioNetRscSeg *seg, *nseg;
2143     uint32_t ppair1, ppair2;
2144 
2145     ppair1 = *(uint32_t *)(buf + tcp_port);
2146     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2147         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2148         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2149             || (ppair1 != ppair2)) {
2150             continue;
2151         }
2152         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2153             chain->stat.drain_failed++;
2154         }
2155 
2156         break;
2157     }
2158 
2159     return virtio_net_do_receive(nc, buf, size);
2160 }
2161 
2162 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2163                                             struct ip_header *ip,
2164                                             const uint8_t *buf, size_t size)
2165 {
2166     uint16_t ip_len;
2167 
2168     /* Not an ipv4 packet */
2169     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2170         chain->stat.ip_option++;
2171         return RSC_BYPASS;
2172     }
2173 
2174     /* Don't handle packets with ip option */
2175     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2176         chain->stat.ip_option++;
2177         return RSC_BYPASS;
2178     }
2179 
2180     if (ip->ip_p != IPPROTO_TCP) {
2181         chain->stat.bypass_not_tcp++;
2182         return RSC_BYPASS;
2183     }
2184 
2185     /* Don't handle packets with ip fragment */
2186     if (!(htons(ip->ip_off) & IP_DF)) {
2187         chain->stat.ip_frag++;
2188         return RSC_BYPASS;
2189     }
2190 
2191     /* Don't handle packets with ecn flag */
2192     if (IPTOS_ECN(ip->ip_tos)) {
2193         chain->stat.ip_ecn++;
2194         return RSC_BYPASS;
2195     }
2196 
2197     ip_len = htons(ip->ip_len);
2198     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2199         || ip_len > (size - chain->n->guest_hdr_len -
2200                      sizeof(struct eth_header))) {
2201         chain->stat.ip_hacked++;
2202         return RSC_BYPASS;
2203     }
2204 
2205     return RSC_CANDIDATE;
2206 }
2207 
2208 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2209                                       NetClientState *nc,
2210                                       const uint8_t *buf, size_t size)
2211 {
2212     int32_t ret;
2213     uint16_t hdr_len;
2214     VirtioNetRscUnit unit;
2215 
2216     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2217 
2218     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2219         + sizeof(struct tcp_header))) {
2220         chain->stat.bypass_not_tcp++;
2221         return virtio_net_do_receive(nc, buf, size);
2222     }
2223 
2224     virtio_net_rsc_extract_unit4(chain, buf, &unit);
2225     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2226         != RSC_CANDIDATE) {
2227         return virtio_net_do_receive(nc, buf, size);
2228     }
2229 
2230     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2231     if (ret == RSC_BYPASS) {
2232         return virtio_net_do_receive(nc, buf, size);
2233     } else if (ret == RSC_FINAL) {
2234         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2235                 ((hdr_len + sizeof(struct eth_header)) + 12),
2236                 VIRTIO_NET_IP4_ADDR_SIZE,
2237                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2238     }
2239 
2240     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2241 }
2242 
2243 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2244                                             struct ip6_header *ip6,
2245                                             const uint8_t *buf, size_t size)
2246 {
2247     uint16_t ip_len;
2248 
2249     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2250         != IP_HEADER_VERSION_6) {
2251         return RSC_BYPASS;
2252     }
2253 
2254     /* Both option and protocol is checked in this */
2255     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2256         chain->stat.bypass_not_tcp++;
2257         return RSC_BYPASS;
2258     }
2259 
2260     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2261     if (ip_len < sizeof(struct tcp_header) ||
2262         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2263                   - sizeof(struct ip6_header))) {
2264         chain->stat.ip_hacked++;
2265         return RSC_BYPASS;
2266     }
2267 
2268     /* Don't handle packets with ecn flag */
2269     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2270         chain->stat.ip_ecn++;
2271         return RSC_BYPASS;
2272     }
2273 
2274     return RSC_CANDIDATE;
2275 }
2276 
2277 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2278                                       const uint8_t *buf, size_t size)
2279 {
2280     int32_t ret;
2281     uint16_t hdr_len;
2282     VirtioNetRscChain *chain;
2283     VirtioNetRscUnit unit;
2284 
2285     chain = (VirtioNetRscChain *)opq;
2286     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2287 
2288     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2289         + sizeof(tcp_header))) {
2290         return virtio_net_do_receive(nc, buf, size);
2291     }
2292 
2293     virtio_net_rsc_extract_unit6(chain, buf, &unit);
2294     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2295                                                  unit.ip, buf, size)) {
2296         return virtio_net_do_receive(nc, buf, size);
2297     }
2298 
2299     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2300     if (ret == RSC_BYPASS) {
2301         return virtio_net_do_receive(nc, buf, size);
2302     } else if (ret == RSC_FINAL) {
2303         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2304                 ((hdr_len + sizeof(struct eth_header)) + 8),
2305                 VIRTIO_NET_IP6_ADDR_SIZE,
2306                 hdr_len + sizeof(struct eth_header)
2307                 + sizeof(struct ip6_header));
2308     }
2309 
2310     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2311 }
2312 
2313 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2314                                                       NetClientState *nc,
2315                                                       uint16_t proto)
2316 {
2317     VirtioNetRscChain *chain;
2318 
2319     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2320         return NULL;
2321     }
2322 
2323     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2324         if (chain->proto == proto) {
2325             return chain;
2326         }
2327     }
2328 
2329     chain = g_malloc(sizeof(*chain));
2330     chain->n = n;
2331     chain->proto = proto;
2332     if (proto == (uint16_t)ETH_P_IP) {
2333         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2334         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2335     } else {
2336         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2337         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2338     }
2339     chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2340                                       virtio_net_rsc_purge, chain);
2341     memset(&chain->stat, 0, sizeof(chain->stat));
2342 
2343     QTAILQ_INIT(&chain->buffers);
2344     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2345 
2346     return chain;
2347 }
2348 
2349 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2350                                       const uint8_t *buf,
2351                                       size_t size)
2352 {
2353     uint16_t proto;
2354     VirtioNetRscChain *chain;
2355     struct eth_header *eth;
2356     VirtIONet *n;
2357 
2358     n = qemu_get_nic_opaque(nc);
2359     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2360         return virtio_net_do_receive(nc, buf, size);
2361     }
2362 
2363     eth = (struct eth_header *)(buf + n->guest_hdr_len);
2364     proto = htons(eth->h_proto);
2365 
2366     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2367     if (chain) {
2368         chain->stat.received++;
2369         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2370             return virtio_net_rsc_receive4(chain, nc, buf, size);
2371         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2372             return virtio_net_rsc_receive6(chain, nc, buf, size);
2373         }
2374     }
2375     return virtio_net_do_receive(nc, buf, size);
2376 }
2377 
2378 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2379                                   size_t size)
2380 {
2381     VirtIONet *n = qemu_get_nic_opaque(nc);
2382     if ((n->rsc4_enabled || n->rsc6_enabled)) {
2383         return virtio_net_rsc_receive(nc, buf, size);
2384     } else {
2385         return virtio_net_do_receive(nc, buf, size);
2386     }
2387 }
2388 
2389 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2390 
2391 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2392 {
2393     VirtIONet *n = qemu_get_nic_opaque(nc);
2394     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2395     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2396 
2397     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2398     virtio_notify(vdev, q->tx_vq);
2399 
2400     g_free(q->async_tx.elem);
2401     q->async_tx.elem = NULL;
2402 
2403     virtio_queue_set_notification(q->tx_vq, 1);
2404     virtio_net_flush_tx(q);
2405 }
2406 
2407 /* TX */
2408 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2409 {
2410     VirtIONet *n = q->n;
2411     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2412     VirtQueueElement *elem;
2413     int32_t num_packets = 0;
2414     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2415     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2416         return num_packets;
2417     }
2418 
2419     if (q->async_tx.elem) {
2420         virtio_queue_set_notification(q->tx_vq, 0);
2421         return num_packets;
2422     }
2423 
2424     for (;;) {
2425         ssize_t ret;
2426         unsigned int out_num;
2427         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2428         struct virtio_net_hdr_mrg_rxbuf mhdr;
2429 
2430         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2431         if (!elem) {
2432             break;
2433         }
2434 
2435         out_num = elem->out_num;
2436         out_sg = elem->out_sg;
2437         if (out_num < 1) {
2438             virtio_error(vdev, "virtio-net header not in first element");
2439             virtqueue_detach_element(q->tx_vq, elem, 0);
2440             g_free(elem);
2441             return -EINVAL;
2442         }
2443 
2444         if (n->has_vnet_hdr) {
2445             if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2446                 n->guest_hdr_len) {
2447                 virtio_error(vdev, "virtio-net header incorrect");
2448                 virtqueue_detach_element(q->tx_vq, elem, 0);
2449                 g_free(elem);
2450                 return -EINVAL;
2451             }
2452             if (n->needs_vnet_hdr_swap) {
2453                 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2454                 sg2[0].iov_base = &mhdr;
2455                 sg2[0].iov_len = n->guest_hdr_len;
2456                 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2457                                    out_sg, out_num,
2458                                    n->guest_hdr_len, -1);
2459                 if (out_num == VIRTQUEUE_MAX_SIZE) {
2460                     goto drop;
2461                 }
2462                 out_num += 1;
2463                 out_sg = sg2;
2464             }
2465         }
2466         /*
2467          * If host wants to see the guest header as is, we can
2468          * pass it on unchanged. Otherwise, copy just the parts
2469          * that host is interested in.
2470          */
2471         assert(n->host_hdr_len <= n->guest_hdr_len);
2472         if (n->host_hdr_len != n->guest_hdr_len) {
2473             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2474                                        out_sg, out_num,
2475                                        0, n->host_hdr_len);
2476             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2477                              out_sg, out_num,
2478                              n->guest_hdr_len, -1);
2479             out_num = sg_num;
2480             out_sg = sg;
2481         }
2482 
2483         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2484                                       out_sg, out_num, virtio_net_tx_complete);
2485         if (ret == 0) {
2486             virtio_queue_set_notification(q->tx_vq, 0);
2487             q->async_tx.elem = elem;
2488             return -EBUSY;
2489         }
2490 
2491 drop:
2492         virtqueue_push(q->tx_vq, elem, 0);
2493         virtio_notify(vdev, q->tx_vq);
2494         g_free(elem);
2495 
2496         if (++num_packets >= n->tx_burst) {
2497             break;
2498         }
2499     }
2500     return num_packets;
2501 }
2502 
2503 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2504 {
2505     VirtIONet *n = VIRTIO_NET(vdev);
2506     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2507 
2508     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2509         virtio_net_drop_tx_queue_data(vdev, vq);
2510         return;
2511     }
2512 
2513     /* This happens when device was stopped but VCPU wasn't. */
2514     if (!vdev->vm_running) {
2515         q->tx_waiting = 1;
2516         return;
2517     }
2518 
2519     if (q->tx_waiting) {
2520         virtio_queue_set_notification(vq, 1);
2521         timer_del(q->tx_timer);
2522         q->tx_waiting = 0;
2523         if (virtio_net_flush_tx(q) == -EINVAL) {
2524             return;
2525         }
2526     } else {
2527         timer_mod(q->tx_timer,
2528                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2529         q->tx_waiting = 1;
2530         virtio_queue_set_notification(vq, 0);
2531     }
2532 }
2533 
2534 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2535 {
2536     VirtIONet *n = VIRTIO_NET(vdev);
2537     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2538 
2539     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2540         virtio_net_drop_tx_queue_data(vdev, vq);
2541         return;
2542     }
2543 
2544     if (unlikely(q->tx_waiting)) {
2545         return;
2546     }
2547     q->tx_waiting = 1;
2548     /* This happens when device was stopped but VCPU wasn't. */
2549     if (!vdev->vm_running) {
2550         return;
2551     }
2552     virtio_queue_set_notification(vq, 0);
2553     qemu_bh_schedule(q->tx_bh);
2554 }
2555 
2556 static void virtio_net_tx_timer(void *opaque)
2557 {
2558     VirtIONetQueue *q = opaque;
2559     VirtIONet *n = q->n;
2560     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2561     /* This happens when device was stopped but BH wasn't. */
2562     if (!vdev->vm_running) {
2563         /* Make sure tx waiting is set, so we'll run when restarted. */
2564         assert(q->tx_waiting);
2565         return;
2566     }
2567 
2568     q->tx_waiting = 0;
2569 
2570     /* Just in case the driver is not ready on more */
2571     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2572         return;
2573     }
2574 
2575     virtio_queue_set_notification(q->tx_vq, 1);
2576     virtio_net_flush_tx(q);
2577 }
2578 
2579 static void virtio_net_tx_bh(void *opaque)
2580 {
2581     VirtIONetQueue *q = opaque;
2582     VirtIONet *n = q->n;
2583     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2584     int32_t ret;
2585 
2586     /* This happens when device was stopped but BH wasn't. */
2587     if (!vdev->vm_running) {
2588         /* Make sure tx waiting is set, so we'll run when restarted. */
2589         assert(q->tx_waiting);
2590         return;
2591     }
2592 
2593     q->tx_waiting = 0;
2594 
2595     /* Just in case the driver is not ready on more */
2596     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2597         return;
2598     }
2599 
2600     ret = virtio_net_flush_tx(q);
2601     if (ret == -EBUSY || ret == -EINVAL) {
2602         return; /* Notification re-enable handled by tx_complete or device
2603                  * broken */
2604     }
2605 
2606     /* If we flush a full burst of packets, assume there are
2607      * more coming and immediately reschedule */
2608     if (ret >= n->tx_burst) {
2609         qemu_bh_schedule(q->tx_bh);
2610         q->tx_waiting = 1;
2611         return;
2612     }
2613 
2614     /* If less than a full burst, re-enable notification and flush
2615      * anything that may have come in while we weren't looking.  If
2616      * we find something, assume the guest is still active and reschedule */
2617     virtio_queue_set_notification(q->tx_vq, 1);
2618     ret = virtio_net_flush_tx(q);
2619     if (ret == -EINVAL) {
2620         return;
2621     } else if (ret > 0) {
2622         virtio_queue_set_notification(q->tx_vq, 0);
2623         qemu_bh_schedule(q->tx_bh);
2624         q->tx_waiting = 1;
2625     }
2626 }
2627 
2628 static void virtio_net_add_queue(VirtIONet *n, int index)
2629 {
2630     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2631 
2632     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2633                                            virtio_net_handle_rx);
2634 
2635     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2636         n->vqs[index].tx_vq =
2637             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2638                              virtio_net_handle_tx_timer);
2639         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2640                                               virtio_net_tx_timer,
2641                                               &n->vqs[index]);
2642     } else {
2643         n->vqs[index].tx_vq =
2644             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2645                              virtio_net_handle_tx_bh);
2646         n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2647     }
2648 
2649     n->vqs[index].tx_waiting = 0;
2650     n->vqs[index].n = n;
2651 }
2652 
2653 static void virtio_net_del_queue(VirtIONet *n, int index)
2654 {
2655     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2656     VirtIONetQueue *q = &n->vqs[index];
2657     NetClientState *nc = qemu_get_subqueue(n->nic, index);
2658 
2659     qemu_purge_queued_packets(nc);
2660 
2661     virtio_del_queue(vdev, index * 2);
2662     if (q->tx_timer) {
2663         timer_del(q->tx_timer);
2664         timer_free(q->tx_timer);
2665         q->tx_timer = NULL;
2666     } else {
2667         qemu_bh_delete(q->tx_bh);
2668         q->tx_bh = NULL;
2669     }
2670     q->tx_waiting = 0;
2671     virtio_del_queue(vdev, index * 2 + 1);
2672 }
2673 
2674 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2675 {
2676     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2677     int old_num_queues = virtio_get_num_queues(vdev);
2678     int new_num_queues = new_max_queues * 2 + 1;
2679     int i;
2680 
2681     assert(old_num_queues >= 3);
2682     assert(old_num_queues % 2 == 1);
2683 
2684     if (old_num_queues == new_num_queues) {
2685         return;
2686     }
2687 
2688     /*
2689      * We always need to remove and add ctrl vq if
2690      * old_num_queues != new_num_queues. Remove ctrl_vq first,
2691      * and then we only enter one of the following two loops.
2692      */
2693     virtio_del_queue(vdev, old_num_queues - 1);
2694 
2695     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2696         /* new_num_queues < old_num_queues */
2697         virtio_net_del_queue(n, i / 2);
2698     }
2699 
2700     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2701         /* new_num_queues > old_num_queues */
2702         virtio_net_add_queue(n, i / 2);
2703     }
2704 
2705     /* add ctrl_vq last */
2706     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2707 }
2708 
2709 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2710 {
2711     int max = multiqueue ? n->max_queues : 1;
2712 
2713     n->multiqueue = multiqueue;
2714     virtio_net_change_num_queues(n, max);
2715 
2716     virtio_net_set_queues(n);
2717 }
2718 
2719 static int virtio_net_post_load_device(void *opaque, int version_id)
2720 {
2721     VirtIONet *n = opaque;
2722     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2723     int i, link_down;
2724 
2725     trace_virtio_net_post_load_device();
2726     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2727                                virtio_vdev_has_feature(vdev,
2728                                                        VIRTIO_F_VERSION_1),
2729                                virtio_vdev_has_feature(vdev,
2730                                                        VIRTIO_NET_F_HASH_REPORT));
2731 
2732     /* MAC_TABLE_ENTRIES may be different from the saved image */
2733     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2734         n->mac_table.in_use = 0;
2735     }
2736 
2737     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2738         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2739     }
2740 
2741     /*
2742      * curr_guest_offloads will be later overwritten by the
2743      * virtio_set_features_nocheck call done from the virtio_load.
2744      * Here we make sure it is preserved and restored accordingly
2745      * in the virtio_net_post_load_virtio callback.
2746      */
2747     n->saved_guest_offloads = n->curr_guest_offloads;
2748 
2749     virtio_net_set_queues(n);
2750 
2751     /* Find the first multicast entry in the saved MAC filter */
2752     for (i = 0; i < n->mac_table.in_use; i++) {
2753         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2754             break;
2755         }
2756     }
2757     n->mac_table.first_multi = i;
2758 
2759     /* nc.link_down can't be migrated, so infer link_down according
2760      * to link status bit in n->status */
2761     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2762     for (i = 0; i < n->max_queues; i++) {
2763         qemu_get_subqueue(n->nic, i)->link_down = link_down;
2764     }
2765 
2766     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2767         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2768         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2769                                   QEMU_CLOCK_VIRTUAL,
2770                                   virtio_net_announce_timer, n);
2771         if (n->announce_timer.round) {
2772             timer_mod(n->announce_timer.tm,
2773                       qemu_clock_get_ms(n->announce_timer.type));
2774         } else {
2775             qemu_announce_timer_del(&n->announce_timer, false);
2776         }
2777     }
2778 
2779     if (n->rss_data.enabled) {
2780         trace_virtio_net_rss_enable(n->rss_data.hash_types,
2781                                     n->rss_data.indirections_len,
2782                                     sizeof(n->rss_data.key));
2783     } else {
2784         trace_virtio_net_rss_disable();
2785     }
2786     return 0;
2787 }
2788 
2789 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2790 {
2791     VirtIONet *n = VIRTIO_NET(vdev);
2792     /*
2793      * The actual needed state is now in saved_guest_offloads,
2794      * see virtio_net_post_load_device for detail.
2795      * Restore it back and apply the desired offloads.
2796      */
2797     n->curr_guest_offloads = n->saved_guest_offloads;
2798     if (peer_has_vnet_hdr(n)) {
2799         virtio_net_apply_guest_offloads(n);
2800     }
2801 
2802     return 0;
2803 }
2804 
2805 /* tx_waiting field of a VirtIONetQueue */
2806 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2807     .name = "virtio-net-queue-tx_waiting",
2808     .fields = (VMStateField[]) {
2809         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2810         VMSTATE_END_OF_LIST()
2811    },
2812 };
2813 
2814 static bool max_queues_gt_1(void *opaque, int version_id)
2815 {
2816     return VIRTIO_NET(opaque)->max_queues > 1;
2817 }
2818 
2819 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2820 {
2821     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2822                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2823 }
2824 
2825 static bool mac_table_fits(void *opaque, int version_id)
2826 {
2827     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2828 }
2829 
2830 static bool mac_table_doesnt_fit(void *opaque, int version_id)
2831 {
2832     return !mac_table_fits(opaque, version_id);
2833 }
2834 
2835 /* This temporary type is shared by all the WITH_TMP methods
2836  * although only some fields are used by each.
2837  */
2838 struct VirtIONetMigTmp {
2839     VirtIONet      *parent;
2840     VirtIONetQueue *vqs_1;
2841     uint16_t        curr_queues_1;
2842     uint8_t         has_ufo;
2843     uint32_t        has_vnet_hdr;
2844 };
2845 
2846 /* The 2nd and subsequent tx_waiting flags are loaded later than
2847  * the 1st entry in the queues and only if there's more than one
2848  * entry.  We use the tmp mechanism to calculate a temporary
2849  * pointer and count and also validate the count.
2850  */
2851 
2852 static int virtio_net_tx_waiting_pre_save(void *opaque)
2853 {
2854     struct VirtIONetMigTmp *tmp = opaque;
2855 
2856     tmp->vqs_1 = tmp->parent->vqs + 1;
2857     tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2858     if (tmp->parent->curr_queues == 0) {
2859         tmp->curr_queues_1 = 0;
2860     }
2861 
2862     return 0;
2863 }
2864 
2865 static int virtio_net_tx_waiting_pre_load(void *opaque)
2866 {
2867     struct VirtIONetMigTmp *tmp = opaque;
2868 
2869     /* Reuse the pointer setup from save */
2870     virtio_net_tx_waiting_pre_save(opaque);
2871 
2872     if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2873         error_report("virtio-net: curr_queues %x > max_queues %x",
2874             tmp->parent->curr_queues, tmp->parent->max_queues);
2875 
2876         return -EINVAL;
2877     }
2878 
2879     return 0; /* all good */
2880 }
2881 
2882 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2883     .name      = "virtio-net-tx_waiting",
2884     .pre_load  = virtio_net_tx_waiting_pre_load,
2885     .pre_save  = virtio_net_tx_waiting_pre_save,
2886     .fields    = (VMStateField[]) {
2887         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2888                                      curr_queues_1,
2889                                      vmstate_virtio_net_queue_tx_waiting,
2890                                      struct VirtIONetQueue),
2891         VMSTATE_END_OF_LIST()
2892     },
2893 };
2894 
2895 /* the 'has_ufo' flag is just tested; if the incoming stream has the
2896  * flag set we need to check that we have it
2897  */
2898 static int virtio_net_ufo_post_load(void *opaque, int version_id)
2899 {
2900     struct VirtIONetMigTmp *tmp = opaque;
2901 
2902     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
2903         error_report("virtio-net: saved image requires TUN_F_UFO support");
2904         return -EINVAL;
2905     }
2906 
2907     return 0;
2908 }
2909 
2910 static int virtio_net_ufo_pre_save(void *opaque)
2911 {
2912     struct VirtIONetMigTmp *tmp = opaque;
2913 
2914     tmp->has_ufo = tmp->parent->has_ufo;
2915 
2916     return 0;
2917 }
2918 
2919 static const VMStateDescription vmstate_virtio_net_has_ufo = {
2920     .name      = "virtio-net-ufo",
2921     .post_load = virtio_net_ufo_post_load,
2922     .pre_save  = virtio_net_ufo_pre_save,
2923     .fields    = (VMStateField[]) {
2924         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
2925         VMSTATE_END_OF_LIST()
2926     },
2927 };
2928 
2929 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2930  * flag set we need to check that we have it
2931  */
2932 static int virtio_net_vnet_post_load(void *opaque, int version_id)
2933 {
2934     struct VirtIONetMigTmp *tmp = opaque;
2935 
2936     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
2937         error_report("virtio-net: saved image requires vnet_hdr=on");
2938         return -EINVAL;
2939     }
2940 
2941     return 0;
2942 }
2943 
2944 static int virtio_net_vnet_pre_save(void *opaque)
2945 {
2946     struct VirtIONetMigTmp *tmp = opaque;
2947 
2948     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
2949 
2950     return 0;
2951 }
2952 
2953 static const VMStateDescription vmstate_virtio_net_has_vnet = {
2954     .name      = "virtio-net-vnet",
2955     .post_load = virtio_net_vnet_post_load,
2956     .pre_save  = virtio_net_vnet_pre_save,
2957     .fields    = (VMStateField[]) {
2958         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
2959         VMSTATE_END_OF_LIST()
2960     },
2961 };
2962 
2963 static bool virtio_net_rss_needed(void *opaque)
2964 {
2965     return VIRTIO_NET(opaque)->rss_data.enabled;
2966 }
2967 
2968 static const VMStateDescription vmstate_virtio_net_rss = {
2969     .name      = "virtio-net-device/rss",
2970     .version_id = 1,
2971     .minimum_version_id = 1,
2972     .needed = virtio_net_rss_needed,
2973     .fields = (VMStateField[]) {
2974         VMSTATE_BOOL(rss_data.enabled, VirtIONet),
2975         VMSTATE_BOOL(rss_data.redirect, VirtIONet),
2976         VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
2977         VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
2978         VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
2979         VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
2980         VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
2981                             VIRTIO_NET_RSS_MAX_KEY_SIZE),
2982         VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
2983                                     rss_data.indirections_len, 0,
2984                                     vmstate_info_uint16, uint16_t),
2985         VMSTATE_END_OF_LIST()
2986     },
2987 };
2988 
2989 static const VMStateDescription vmstate_virtio_net_device = {
2990     .name = "virtio-net-device",
2991     .version_id = VIRTIO_NET_VM_VERSION,
2992     .minimum_version_id = VIRTIO_NET_VM_VERSION,
2993     .post_load = virtio_net_post_load_device,
2994     .fields = (VMStateField[]) {
2995         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
2996         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
2997                                vmstate_virtio_net_queue_tx_waiting,
2998                                VirtIONetQueue),
2999         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3000         VMSTATE_UINT16(status, VirtIONet),
3001         VMSTATE_UINT8(promisc, VirtIONet),
3002         VMSTATE_UINT8(allmulti, VirtIONet),
3003         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3004 
3005         /* Guarded pair: If it fits we load it, else we throw it away
3006          * - can happen if source has a larger MAC table.; post-load
3007          *  sets flags in this case.
3008          */
3009         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3010                                 0, mac_table_fits, mac_table.in_use,
3011                                  ETH_ALEN),
3012         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3013                                      mac_table.in_use, ETH_ALEN),
3014 
3015         /* Note: This is an array of uint32's that's always been saved as a
3016          * buffer; hold onto your endiannesses; it's actually used as a bitmap
3017          * but based on the uint.
3018          */
3019         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3020         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3021                          vmstate_virtio_net_has_vnet),
3022         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3023         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3024         VMSTATE_UINT8(alluni, VirtIONet),
3025         VMSTATE_UINT8(nomulti, VirtIONet),
3026         VMSTATE_UINT8(nouni, VirtIONet),
3027         VMSTATE_UINT8(nobcast, VirtIONet),
3028         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3029                          vmstate_virtio_net_has_ufo),
3030         VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
3031                             vmstate_info_uint16_equal, uint16_t),
3032         VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
3033         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3034                          vmstate_virtio_net_tx_waiting),
3035         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3036                             has_ctrl_guest_offloads),
3037         VMSTATE_END_OF_LIST()
3038    },
3039     .subsections = (const VMStateDescription * []) {
3040         &vmstate_virtio_net_rss,
3041         NULL
3042     }
3043 };
3044 
3045 static NetClientInfo net_virtio_info = {
3046     .type = NET_CLIENT_DRIVER_NIC,
3047     .size = sizeof(NICState),
3048     .can_receive = virtio_net_can_receive,
3049     .receive = virtio_net_receive,
3050     .link_status_changed = virtio_net_set_link_status,
3051     .query_rx_filter = virtio_net_query_rxfilter,
3052     .announce = virtio_net_announce,
3053 };
3054 
3055 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3056 {
3057     VirtIONet *n = VIRTIO_NET(vdev);
3058     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3059     assert(n->vhost_started);
3060     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3061 }
3062 
3063 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3064                                            bool mask)
3065 {
3066     VirtIONet *n = VIRTIO_NET(vdev);
3067     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3068     assert(n->vhost_started);
3069     vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3070                              vdev, idx, mask);
3071 }
3072 
3073 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3074 {
3075     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3076 
3077     n->config_size = virtio_feature_get_config_size(feature_sizes,
3078                                                     host_features);
3079 }
3080 
3081 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3082                                    const char *type)
3083 {
3084     /*
3085      * The name can be NULL, the netclient name will be type.x.
3086      */
3087     assert(type != NULL);
3088 
3089     g_free(n->netclient_name);
3090     g_free(n->netclient_type);
3091     n->netclient_name = g_strdup(name);
3092     n->netclient_type = g_strdup(type);
3093 }
3094 
3095 static bool failover_unplug_primary(VirtIONet *n)
3096 {
3097     HotplugHandler *hotplug_ctrl;
3098     PCIDevice *pci_dev;
3099     Error *err = NULL;
3100 
3101     hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
3102     if (hotplug_ctrl) {
3103         pci_dev = PCI_DEVICE(n->primary_dev);
3104         pci_dev->partially_hotplugged = true;
3105         hotplug_handler_unplug_request(hotplug_ctrl, n->primary_dev, &err);
3106         if (err) {
3107             error_report_err(err);
3108             return false;
3109         }
3110     } else {
3111         return false;
3112     }
3113     return true;
3114 }
3115 
3116 static bool failover_replug_primary(VirtIONet *n, Error **errp)
3117 {
3118     Error *err = NULL;
3119     HotplugHandler *hotplug_ctrl;
3120     PCIDevice *pdev = PCI_DEVICE(n->primary_dev);
3121 
3122     if (!pdev->partially_hotplugged) {
3123         return true;
3124     }
3125     if (!n->primary_device_opts) {
3126         n->primary_device_opts = qemu_opts_from_qdict(
3127                 qemu_find_opts("device"),
3128                 n->primary_device_dict, errp);
3129         if (!n->primary_device_opts) {
3130             return false;
3131         }
3132     }
3133     n->primary_bus = n->primary_dev->parent_bus;
3134     if (!n->primary_bus) {
3135         error_setg(errp, "virtio_net: couldn't find primary bus");
3136         return false;
3137     }
3138     qdev_set_parent_bus(n->primary_dev, n->primary_bus);
3139     n->primary_should_be_hidden = false;
3140     if (!qemu_opt_set_bool(n->primary_device_opts,
3141                            "partially_hotplugged", true, errp)) {
3142         return false;
3143     }
3144     hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
3145     if (hotplug_ctrl) {
3146         hotplug_handler_pre_plug(hotplug_ctrl, n->primary_dev, &err);
3147         if (err) {
3148             goto out;
3149         }
3150         hotplug_handler_plug(hotplug_ctrl, n->primary_dev, &err);
3151     }
3152 
3153 out:
3154     error_propagate(errp, err);
3155     return !err;
3156 }
3157 
3158 static void virtio_net_handle_migration_primary(VirtIONet *n,
3159                                                 MigrationState *s)
3160 {
3161     bool should_be_hidden;
3162     Error *err = NULL;
3163 
3164     should_be_hidden = atomic_read(&n->primary_should_be_hidden);
3165 
3166     if (!n->primary_dev) {
3167         n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err);
3168         if (!n->primary_dev) {
3169             return;
3170         }
3171     }
3172 
3173     if (migration_in_setup(s) && !should_be_hidden) {
3174         if (failover_unplug_primary(n)) {
3175             vmstate_unregister(VMSTATE_IF(n->primary_dev),
3176                     qdev_get_vmsd(n->primary_dev),
3177                     n->primary_dev);
3178             qapi_event_send_unplug_primary(n->primary_device_id);
3179             atomic_set(&n->primary_should_be_hidden, true);
3180         } else {
3181             warn_report("couldn't unplug primary device");
3182         }
3183     } else if (migration_has_failed(s)) {
3184         /* We already unplugged the device let's plug it back */
3185         if (!failover_replug_primary(n, &err)) {
3186             if (err) {
3187                 error_report_err(err);
3188             }
3189         }
3190     }
3191 }
3192 
3193 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3194 {
3195     MigrationState *s = data;
3196     VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3197     virtio_net_handle_migration_primary(n, s);
3198 }
3199 
3200 static int virtio_net_primary_should_be_hidden(DeviceListener *listener,
3201             QemuOpts *device_opts)
3202 {
3203     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3204     bool match_found = false;
3205     bool hide = false;
3206 
3207     if (!device_opts) {
3208         return -1;
3209     }
3210     n->primary_device_dict = qemu_opts_to_qdict(device_opts,
3211             n->primary_device_dict);
3212     if (n->primary_device_dict) {
3213         g_free(n->standby_id);
3214         n->standby_id = g_strdup(qdict_get_try_str(n->primary_device_dict,
3215                     "failover_pair_id"));
3216     }
3217     if (g_strcmp0(n->standby_id, n->netclient_name) == 0) {
3218         match_found = true;
3219     } else {
3220         match_found = false;
3221         hide = false;
3222         g_free(n->standby_id);
3223         n->primary_device_dict = NULL;
3224         goto out;
3225     }
3226 
3227     n->primary_device_opts = device_opts;
3228 
3229     /* primary_should_be_hidden is set during feature negotiation */
3230     hide = atomic_read(&n->primary_should_be_hidden);
3231 
3232     if (n->primary_device_dict) {
3233         g_free(n->primary_device_id);
3234         n->primary_device_id = g_strdup(qdict_get_try_str(
3235                     n->primary_device_dict, "id"));
3236         if (!n->primary_device_id) {
3237             warn_report("primary_device_id not set");
3238         }
3239     }
3240 
3241 out:
3242     if (match_found && hide) {
3243         return 1;
3244     } else if (match_found && !hide) {
3245         return 0;
3246     } else {
3247         return -1;
3248     }
3249 }
3250 
3251 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3252 {
3253     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3254     VirtIONet *n = VIRTIO_NET(dev);
3255     NetClientState *nc;
3256     int i;
3257 
3258     if (n->net_conf.mtu) {
3259         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3260     }
3261 
3262     if (n->net_conf.duplex_str) {
3263         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3264             n->net_conf.duplex = DUPLEX_HALF;
3265         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3266             n->net_conf.duplex = DUPLEX_FULL;
3267         } else {
3268             error_setg(errp, "'duplex' must be 'half' or 'full'");
3269             return;
3270         }
3271         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3272     } else {
3273         n->net_conf.duplex = DUPLEX_UNKNOWN;
3274     }
3275 
3276     if (n->net_conf.speed < SPEED_UNKNOWN) {
3277         error_setg(errp, "'speed' must be between 0 and INT_MAX");
3278         return;
3279     }
3280     if (n->net_conf.speed >= 0) {
3281         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3282     }
3283 
3284     if (n->failover) {
3285         n->primary_listener.should_be_hidden =
3286             virtio_net_primary_should_be_hidden;
3287         atomic_set(&n->primary_should_be_hidden, true);
3288         device_listener_register(&n->primary_listener);
3289         n->migration_state.notify = virtio_net_migration_state_notifier;
3290         add_migration_state_change_notifier(&n->migration_state);
3291         n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3292     }
3293 
3294     virtio_net_set_config_size(n, n->host_features);
3295     virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
3296 
3297     /*
3298      * We set a lower limit on RX queue size to what it always was.
3299      * Guests that want a smaller ring can always resize it without
3300      * help from us (using virtio 1 and up).
3301      */
3302     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3303         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3304         !is_power_of_2(n->net_conf.rx_queue_size)) {
3305         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3306                    "must be a power of 2 between %d and %d.",
3307                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3308                    VIRTQUEUE_MAX_SIZE);
3309         virtio_cleanup(vdev);
3310         return;
3311     }
3312 
3313     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3314         n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3315         !is_power_of_2(n->net_conf.tx_queue_size)) {
3316         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3317                    "must be a power of 2 between %d and %d",
3318                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3319                    VIRTQUEUE_MAX_SIZE);
3320         virtio_cleanup(vdev);
3321         return;
3322     }
3323 
3324     n->max_queues = MAX(n->nic_conf.peers.queues, 1);
3325     if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
3326         error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
3327                    "must be a positive integer less than %d.",
3328                    n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
3329         virtio_cleanup(vdev);
3330         return;
3331     }
3332     n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
3333     n->curr_queues = 1;
3334     n->tx_timeout = n->net_conf.txtimer;
3335 
3336     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3337                        && strcmp(n->net_conf.tx, "bh")) {
3338         warn_report("virtio-net: "
3339                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3340                     n->net_conf.tx);
3341         error_printf("Defaulting to \"bh\"");
3342     }
3343 
3344     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3345                                     n->net_conf.tx_queue_size);
3346 
3347     for (i = 0; i < n->max_queues; i++) {
3348         virtio_net_add_queue(n, i);
3349     }
3350 
3351     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3352     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3353     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3354     n->status = VIRTIO_NET_S_LINK_UP;
3355     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3356                               QEMU_CLOCK_VIRTUAL,
3357                               virtio_net_announce_timer, n);
3358     n->announce_timer.round = 0;
3359 
3360     if (n->netclient_type) {
3361         /*
3362          * Happen when virtio_net_set_netclient_name has been called.
3363          */
3364         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3365                               n->netclient_type, n->netclient_name, n);
3366     } else {
3367         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3368                               object_get_typename(OBJECT(dev)), dev->id, n);
3369     }
3370 
3371     peer_test_vnet_hdr(n);
3372     if (peer_has_vnet_hdr(n)) {
3373         for (i = 0; i < n->max_queues; i++) {
3374             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3375         }
3376         n->host_hdr_len = sizeof(struct virtio_net_hdr);
3377     } else {
3378         n->host_hdr_len = 0;
3379     }
3380 
3381     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3382 
3383     n->vqs[0].tx_waiting = 0;
3384     n->tx_burst = n->net_conf.txburst;
3385     virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3386     n->promisc = 1; /* for compatibility */
3387 
3388     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3389 
3390     n->vlans = g_malloc0(MAX_VLAN >> 3);
3391 
3392     nc = qemu_get_queue(n->nic);
3393     nc->rxfilter_notify_enabled = 1;
3394 
3395     QTAILQ_INIT(&n->rsc_chains);
3396     n->qdev = dev;
3397 
3398     net_rx_pkt_init(&n->rx_pkt, false);
3399 }
3400 
3401 static void virtio_net_device_unrealize(DeviceState *dev)
3402 {
3403     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3404     VirtIONet *n = VIRTIO_NET(dev);
3405     int i, max_queues;
3406 
3407     /* This will stop vhost backend if appropriate. */
3408     virtio_net_set_status(vdev, 0);
3409 
3410     g_free(n->netclient_name);
3411     n->netclient_name = NULL;
3412     g_free(n->netclient_type);
3413     n->netclient_type = NULL;
3414 
3415     g_free(n->mac_table.macs);
3416     g_free(n->vlans);
3417 
3418     if (n->failover) {
3419         g_free(n->primary_device_id);
3420         g_free(n->standby_id);
3421         qobject_unref(n->primary_device_dict);
3422         n->primary_device_dict = NULL;
3423     }
3424 
3425     max_queues = n->multiqueue ? n->max_queues : 1;
3426     for (i = 0; i < max_queues; i++) {
3427         virtio_net_del_queue(n, i);
3428     }
3429     /* delete also control vq */
3430     virtio_del_queue(vdev, max_queues * 2);
3431     qemu_announce_timer_del(&n->announce_timer, false);
3432     g_free(n->vqs);
3433     qemu_del_nic(n->nic);
3434     virtio_net_rsc_cleanup(n);
3435     g_free(n->rss_data.indirections_table);
3436     net_rx_pkt_uninit(n->rx_pkt);
3437     virtio_cleanup(vdev);
3438 }
3439 
3440 static void virtio_net_instance_init(Object *obj)
3441 {
3442     VirtIONet *n = VIRTIO_NET(obj);
3443 
3444     /*
3445      * The default config_size is sizeof(struct virtio_net_config).
3446      * Can be overriden with virtio_net_set_config_size.
3447      */
3448     n->config_size = sizeof(struct virtio_net_config);
3449     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3450                                   "bootindex", "/ethernet-phy@0",
3451                                   DEVICE(n));
3452 }
3453 
3454 static int virtio_net_pre_save(void *opaque)
3455 {
3456     VirtIONet *n = opaque;
3457 
3458     /* At this point, backend must be stopped, otherwise
3459      * it might keep writing to memory. */
3460     assert(!n->vhost_started);
3461 
3462     return 0;
3463 }
3464 
3465 static bool primary_unplug_pending(void *opaque)
3466 {
3467     DeviceState *dev = opaque;
3468     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3469     VirtIONet *n = VIRTIO_NET(vdev);
3470 
3471     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3472         return false;
3473     }
3474     return n->primary_dev ? n->primary_dev->pending_deleted_event : false;
3475 }
3476 
3477 static bool dev_unplug_pending(void *opaque)
3478 {
3479     DeviceState *dev = opaque;
3480     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3481 
3482     return vdc->primary_unplug_pending(dev);
3483 }
3484 
3485 static const VMStateDescription vmstate_virtio_net = {
3486     .name = "virtio-net",
3487     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3488     .version_id = VIRTIO_NET_VM_VERSION,
3489     .fields = (VMStateField[]) {
3490         VMSTATE_VIRTIO_DEVICE,
3491         VMSTATE_END_OF_LIST()
3492     },
3493     .pre_save = virtio_net_pre_save,
3494     .dev_unplug_pending = dev_unplug_pending,
3495 };
3496 
3497 static Property virtio_net_properties[] = {
3498     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3499                     VIRTIO_NET_F_CSUM, true),
3500     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3501                     VIRTIO_NET_F_GUEST_CSUM, true),
3502     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3503     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3504                     VIRTIO_NET_F_GUEST_TSO4, true),
3505     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3506                     VIRTIO_NET_F_GUEST_TSO6, true),
3507     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3508                     VIRTIO_NET_F_GUEST_ECN, true),
3509     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3510                     VIRTIO_NET_F_GUEST_UFO, true),
3511     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3512                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3513     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3514                     VIRTIO_NET_F_HOST_TSO4, true),
3515     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3516                     VIRTIO_NET_F_HOST_TSO6, true),
3517     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3518                     VIRTIO_NET_F_HOST_ECN, true),
3519     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3520                     VIRTIO_NET_F_HOST_UFO, true),
3521     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3522                     VIRTIO_NET_F_MRG_RXBUF, true),
3523     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3524                     VIRTIO_NET_F_STATUS, true),
3525     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3526                     VIRTIO_NET_F_CTRL_VQ, true),
3527     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3528                     VIRTIO_NET_F_CTRL_RX, true),
3529     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3530                     VIRTIO_NET_F_CTRL_VLAN, true),
3531     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3532                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3533     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3534                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3535     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3536                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3537     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3538     DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3539                     VIRTIO_NET_F_RSS, false),
3540     DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3541                     VIRTIO_NET_F_HASH_REPORT, false),
3542     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3543                     VIRTIO_NET_F_RSC_EXT, false),
3544     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3545                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3546     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3547     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3548                        TX_TIMER_INTERVAL),
3549     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3550     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3551     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3552                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3553     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3554                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3555     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3556     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3557                      true),
3558     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3559     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3560     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3561     DEFINE_PROP_END_OF_LIST(),
3562 };
3563 
3564 static void virtio_net_class_init(ObjectClass *klass, void *data)
3565 {
3566     DeviceClass *dc = DEVICE_CLASS(klass);
3567     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3568 
3569     device_class_set_props(dc, virtio_net_properties);
3570     dc->vmsd = &vmstate_virtio_net;
3571     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3572     vdc->realize = virtio_net_device_realize;
3573     vdc->unrealize = virtio_net_device_unrealize;
3574     vdc->get_config = virtio_net_get_config;
3575     vdc->set_config = virtio_net_set_config;
3576     vdc->get_features = virtio_net_get_features;
3577     vdc->set_features = virtio_net_set_features;
3578     vdc->bad_features = virtio_net_bad_features;
3579     vdc->reset = virtio_net_reset;
3580     vdc->set_status = virtio_net_set_status;
3581     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3582     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3583     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3584     vdc->post_load = virtio_net_post_load_virtio;
3585     vdc->vmsd = &vmstate_virtio_net_device;
3586     vdc->primary_unplug_pending = primary_unplug_pending;
3587 }
3588 
3589 static const TypeInfo virtio_net_info = {
3590     .name = TYPE_VIRTIO_NET,
3591     .parent = TYPE_VIRTIO_DEVICE,
3592     .instance_size = sizeof(VirtIONet),
3593     .instance_init = virtio_net_instance_init,
3594     .class_init = virtio_net_class_init,
3595 };
3596 
3597 static void virtio_register_types(void)
3598 {
3599     type_register_static(&virtio_net_info);
3600 }
3601 
3602 type_init(virtio_register_types)
3603