xref: /openbmc/qemu/hw/net/virtio-net.c (revision f5e1847ba50a8d1adf66c0cf312e53c162e52487)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/main-loop.h"
18 #include "qemu/module.h"
19 #include "hw/virtio/virtio.h"
20 #include "net/net.h"
21 #include "net/checksum.h"
22 #include "net/tap.h"
23 #include "qemu/error-report.h"
24 #include "qemu/timer.h"
25 #include "qemu/option.h"
26 #include "qemu/option_int.h"
27 #include "qemu/config-file.h"
28 #include "qapi/qmp/qdict.h"
29 #include "hw/virtio/virtio-net.h"
30 #include "net/vhost_net.h"
31 #include "net/announce.h"
32 #include "hw/virtio/virtio-bus.h"
33 #include "qapi/error.h"
34 #include "qapi/qapi-events-net.h"
35 #include "hw/qdev-properties.h"
36 #include "qapi/qapi-types-migration.h"
37 #include "qapi/qapi-events-migration.h"
38 #include "hw/virtio/virtio-access.h"
39 #include "migration/misc.h"
40 #include "standard-headers/linux/ethtool.h"
41 #include "sysemu/sysemu.h"
42 #include "trace.h"
43 #include "monitor/qdev.h"
44 #include "hw/pci/pci.h"
45 #include "net_rx_pkt.h"
46 #include "hw/virtio/vhost.h"
47 
48 #define VIRTIO_NET_VM_VERSION    11
49 
50 #define MAC_TABLE_ENTRIES    64
51 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
52 
53 /* previously fixed value */
54 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
55 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
56 
57 /* for now, only allow larger queues; with virtio-1, guest can downsize */
58 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
59 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
60 
61 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
62 
63 #define VIRTIO_NET_TCP_FLAG         0x3F
64 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
65 
66 /* IPv4 max payload, 16 bits in the header */
67 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
68 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
69 
70 /* header length value in ip header without option */
71 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
72 
73 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
74 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
75 
76 /* Purge coalesced packets timer interval, This value affects the performance
77    a lot, and should be tuned carefully, '300000'(300us) is the recommended
78    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
79    tso/gso/gro 'off'. */
80 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
81 
82 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
83                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
84                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
85                                          VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
86                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
87                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
88                                          VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
89                                          VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
90                                          VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
91 
92 static VirtIOFeature feature_sizes[] = {
93     {.flags = 1ULL << VIRTIO_NET_F_MAC,
94      .end = endof(struct virtio_net_config, mac)},
95     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
96      .end = endof(struct virtio_net_config, status)},
97     {.flags = 1ULL << VIRTIO_NET_F_MQ,
98      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
99     {.flags = 1ULL << VIRTIO_NET_F_MTU,
100      .end = endof(struct virtio_net_config, mtu)},
101     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
102      .end = endof(struct virtio_net_config, duplex)},
103     {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
104      .end = endof(struct virtio_net_config, supported_hash_types)},
105     {}
106 };
107 
108 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
109 {
110     VirtIONet *n = qemu_get_nic_opaque(nc);
111 
112     return &n->vqs[nc->queue_index];
113 }
114 
115 static int vq2q(int queue_index)
116 {
117     return queue_index / 2;
118 }
119 
120 /* TODO
121  * - we could suppress RX interrupt if we were so inclined.
122  */
123 
124 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
125 {
126     VirtIONet *n = VIRTIO_NET(vdev);
127     struct virtio_net_config netcfg;
128     NetClientState *nc = qemu_get_queue(n->nic);
129 
130     int ret = 0;
131     memset(&netcfg, 0 , sizeof(struct virtio_net_config));
132     virtio_stw_p(vdev, &netcfg.status, n->status);
133     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
134     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
135     memcpy(netcfg.mac, n->mac, ETH_ALEN);
136     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
137     netcfg.duplex = n->net_conf.duplex;
138     netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
139     virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
140                  virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
141                  VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
142     virtio_stl_p(vdev, &netcfg.supported_hash_types,
143                  VIRTIO_NET_RSS_SUPPORTED_HASHES);
144     memcpy(config, &netcfg, n->config_size);
145 
146     /*
147      * Is this VDPA? No peer means not VDPA: there's no way to
148      * disconnect/reconnect a VDPA peer.
149      */
150     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
151         ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
152                                    n->config_size);
153         if (ret != -1) {
154             memcpy(config, &netcfg, n->config_size);
155         }
156     }
157 }
158 
159 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
160 {
161     VirtIONet *n = VIRTIO_NET(vdev);
162     struct virtio_net_config netcfg = {};
163     NetClientState *nc = qemu_get_queue(n->nic);
164 
165     memcpy(&netcfg, config, n->config_size);
166 
167     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
168         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
169         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
170         memcpy(n->mac, netcfg.mac, ETH_ALEN);
171         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
172     }
173 
174     /*
175      * Is this VDPA? No peer means not VDPA: there's no way to
176      * disconnect/reconnect a VDPA peer.
177      */
178     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
179         vhost_net_set_config(get_vhost_net(nc->peer),
180                              (uint8_t *)&netcfg, 0, n->config_size,
181                              VHOST_SET_CONFIG_TYPE_MASTER);
182       }
183 }
184 
185 static bool virtio_net_started(VirtIONet *n, uint8_t status)
186 {
187     VirtIODevice *vdev = VIRTIO_DEVICE(n);
188     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
189         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
190 }
191 
192 static void virtio_net_announce_notify(VirtIONet *net)
193 {
194     VirtIODevice *vdev = VIRTIO_DEVICE(net);
195     trace_virtio_net_announce_notify();
196 
197     net->status |= VIRTIO_NET_S_ANNOUNCE;
198     virtio_notify_config(vdev);
199 }
200 
201 static void virtio_net_announce_timer(void *opaque)
202 {
203     VirtIONet *n = opaque;
204     trace_virtio_net_announce_timer(n->announce_timer.round);
205 
206     n->announce_timer.round--;
207     virtio_net_announce_notify(n);
208 }
209 
210 static void virtio_net_announce(NetClientState *nc)
211 {
212     VirtIONet *n = qemu_get_nic_opaque(nc);
213     VirtIODevice *vdev = VIRTIO_DEVICE(n);
214 
215     /*
216      * Make sure the virtio migration announcement timer isn't running
217      * If it is, let it trigger announcement so that we do not cause
218      * confusion.
219      */
220     if (n->announce_timer.round) {
221         return;
222     }
223 
224     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
225         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
226             virtio_net_announce_notify(n);
227     }
228 }
229 
230 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
231 {
232     VirtIODevice *vdev = VIRTIO_DEVICE(n);
233     NetClientState *nc = qemu_get_queue(n->nic);
234     int queues = n->multiqueue ? n->max_queues : 1;
235 
236     if (!get_vhost_net(nc->peer)) {
237         return;
238     }
239 
240     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
241         !!n->vhost_started) {
242         return;
243     }
244     if (!n->vhost_started) {
245         int r, i;
246 
247         if (n->needs_vnet_hdr_swap) {
248             error_report("backend does not support %s vnet headers; "
249                          "falling back on userspace virtio",
250                          virtio_is_big_endian(vdev) ? "BE" : "LE");
251             return;
252         }
253 
254         /* Any packets outstanding? Purge them to avoid touching rings
255          * when vhost is running.
256          */
257         for (i = 0;  i < queues; i++) {
258             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
259 
260             /* Purge both directions: TX and RX. */
261             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
262             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
263         }
264 
265         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
266             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
267             if (r < 0) {
268                 error_report("%uBytes MTU not supported by the backend",
269                              n->net_conf.mtu);
270 
271                 return;
272             }
273         }
274 
275         n->vhost_started = 1;
276         r = vhost_net_start(vdev, n->nic->ncs, queues);
277         if (r < 0) {
278             error_report("unable to start vhost net: %d: "
279                          "falling back on userspace virtio", -r);
280             n->vhost_started = 0;
281         }
282     } else {
283         vhost_net_stop(vdev, n->nic->ncs, queues);
284         n->vhost_started = 0;
285     }
286 }
287 
288 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
289                                           NetClientState *peer,
290                                           bool enable)
291 {
292     if (virtio_is_big_endian(vdev)) {
293         return qemu_set_vnet_be(peer, enable);
294     } else {
295         return qemu_set_vnet_le(peer, enable);
296     }
297 }
298 
299 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
300                                        int queues, bool enable)
301 {
302     int i;
303 
304     for (i = 0; i < queues; i++) {
305         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
306             enable) {
307             while (--i >= 0) {
308                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
309             }
310 
311             return true;
312         }
313     }
314 
315     return false;
316 }
317 
318 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
319 {
320     VirtIODevice *vdev = VIRTIO_DEVICE(n);
321     int queues = n->multiqueue ? n->max_queues : 1;
322 
323     if (virtio_net_started(n, status)) {
324         /* Before using the device, we tell the network backend about the
325          * endianness to use when parsing vnet headers. If the backend
326          * can't do it, we fallback onto fixing the headers in the core
327          * virtio-net code.
328          */
329         n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
330                                                             queues, true);
331     } else if (virtio_net_started(n, vdev->status)) {
332         /* After using the device, we need to reset the network backend to
333          * the default (guest native endianness), otherwise the guest may
334          * lose network connectivity if it is rebooted into a different
335          * endianness.
336          */
337         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
338     }
339 }
340 
341 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
342 {
343     unsigned int dropped = virtqueue_drop_all(vq);
344     if (dropped) {
345         virtio_notify(vdev, vq);
346     }
347 }
348 
349 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
350 {
351     VirtIONet *n = VIRTIO_NET(vdev);
352     VirtIONetQueue *q;
353     int i;
354     uint8_t queue_status;
355 
356     virtio_net_vnet_endian_status(n, status);
357     virtio_net_vhost_status(n, status);
358 
359     for (i = 0; i < n->max_queues; i++) {
360         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
361         bool queue_started;
362         q = &n->vqs[i];
363 
364         if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
365             queue_status = 0;
366         } else {
367             queue_status = status;
368         }
369         queue_started =
370             virtio_net_started(n, queue_status) && !n->vhost_started;
371 
372         if (queue_started) {
373             qemu_flush_queued_packets(ncs);
374         }
375 
376         if (!q->tx_waiting) {
377             continue;
378         }
379 
380         if (queue_started) {
381             if (q->tx_timer) {
382                 timer_mod(q->tx_timer,
383                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
384             } else {
385                 qemu_bh_schedule(q->tx_bh);
386             }
387         } else {
388             if (q->tx_timer) {
389                 timer_del(q->tx_timer);
390             } else {
391                 qemu_bh_cancel(q->tx_bh);
392             }
393             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
394                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
395                 vdev->vm_running) {
396                 /* if tx is waiting we are likely have some packets in tx queue
397                  * and disabled notification */
398                 q->tx_waiting = 0;
399                 virtio_queue_set_notification(q->tx_vq, 1);
400                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
401             }
402         }
403     }
404 }
405 
406 static void virtio_net_set_link_status(NetClientState *nc)
407 {
408     VirtIONet *n = qemu_get_nic_opaque(nc);
409     VirtIODevice *vdev = VIRTIO_DEVICE(n);
410     uint16_t old_status = n->status;
411 
412     if (nc->link_down)
413         n->status &= ~VIRTIO_NET_S_LINK_UP;
414     else
415         n->status |= VIRTIO_NET_S_LINK_UP;
416 
417     if (n->status != old_status)
418         virtio_notify_config(vdev);
419 
420     virtio_net_set_status(vdev, vdev->status);
421 }
422 
423 static void rxfilter_notify(NetClientState *nc)
424 {
425     VirtIONet *n = qemu_get_nic_opaque(nc);
426 
427     if (nc->rxfilter_notify_enabled) {
428         char *path = object_get_canonical_path(OBJECT(n->qdev));
429         qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
430                                               n->netclient_name, path);
431         g_free(path);
432 
433         /* disable event notification to avoid events flooding */
434         nc->rxfilter_notify_enabled = 0;
435     }
436 }
437 
438 static intList *get_vlan_table(VirtIONet *n)
439 {
440     intList *list, *entry;
441     int i, j;
442 
443     list = NULL;
444     for (i = 0; i < MAX_VLAN >> 5; i++) {
445         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
446             if (n->vlans[i] & (1U << j)) {
447                 entry = g_malloc0(sizeof(*entry));
448                 entry->value = (i << 5) + j;
449                 entry->next = list;
450                 list = entry;
451             }
452         }
453     }
454 
455     return list;
456 }
457 
458 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
459 {
460     VirtIONet *n = qemu_get_nic_opaque(nc);
461     VirtIODevice *vdev = VIRTIO_DEVICE(n);
462     RxFilterInfo *info;
463     strList *str_list, *entry;
464     int i;
465 
466     info = g_malloc0(sizeof(*info));
467     info->name = g_strdup(nc->name);
468     info->promiscuous = n->promisc;
469 
470     if (n->nouni) {
471         info->unicast = RX_STATE_NONE;
472     } else if (n->alluni) {
473         info->unicast = RX_STATE_ALL;
474     } else {
475         info->unicast = RX_STATE_NORMAL;
476     }
477 
478     if (n->nomulti) {
479         info->multicast = RX_STATE_NONE;
480     } else if (n->allmulti) {
481         info->multicast = RX_STATE_ALL;
482     } else {
483         info->multicast = RX_STATE_NORMAL;
484     }
485 
486     info->broadcast_allowed = n->nobcast;
487     info->multicast_overflow = n->mac_table.multi_overflow;
488     info->unicast_overflow = n->mac_table.uni_overflow;
489 
490     info->main_mac = qemu_mac_strdup_printf(n->mac);
491 
492     str_list = NULL;
493     for (i = 0; i < n->mac_table.first_multi; i++) {
494         entry = g_malloc0(sizeof(*entry));
495         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
496         entry->next = str_list;
497         str_list = entry;
498     }
499     info->unicast_table = str_list;
500 
501     str_list = NULL;
502     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
503         entry = g_malloc0(sizeof(*entry));
504         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
505         entry->next = str_list;
506         str_list = entry;
507     }
508     info->multicast_table = str_list;
509     info->vlan_table = get_vlan_table(n);
510 
511     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
512         info->vlan = RX_STATE_ALL;
513     } else if (!info->vlan_table) {
514         info->vlan = RX_STATE_NONE;
515     } else {
516         info->vlan = RX_STATE_NORMAL;
517     }
518 
519     /* enable event notification after query */
520     nc->rxfilter_notify_enabled = 1;
521 
522     return info;
523 }
524 
525 static void virtio_net_reset(VirtIODevice *vdev)
526 {
527     VirtIONet *n = VIRTIO_NET(vdev);
528     int i;
529 
530     /* Reset back to compatibility mode */
531     n->promisc = 1;
532     n->allmulti = 0;
533     n->alluni = 0;
534     n->nomulti = 0;
535     n->nouni = 0;
536     n->nobcast = 0;
537     /* multiqueue is disabled by default */
538     n->curr_queues = 1;
539     timer_del(n->announce_timer.tm);
540     n->announce_timer.round = 0;
541     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
542 
543     /* Flush any MAC and VLAN filter table state */
544     n->mac_table.in_use = 0;
545     n->mac_table.first_multi = 0;
546     n->mac_table.multi_overflow = 0;
547     n->mac_table.uni_overflow = 0;
548     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
549     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
550     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
551     memset(n->vlans, 0, MAX_VLAN >> 3);
552 
553     /* Flush any async TX */
554     for (i = 0;  i < n->max_queues; i++) {
555         NetClientState *nc = qemu_get_subqueue(n->nic, i);
556 
557         if (nc->peer) {
558             qemu_flush_or_purge_queued_packets(nc->peer, true);
559             assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
560         }
561     }
562 }
563 
564 static void peer_test_vnet_hdr(VirtIONet *n)
565 {
566     NetClientState *nc = qemu_get_queue(n->nic);
567     if (!nc->peer) {
568         return;
569     }
570 
571     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
572 }
573 
574 static int peer_has_vnet_hdr(VirtIONet *n)
575 {
576     return n->has_vnet_hdr;
577 }
578 
579 static int peer_has_ufo(VirtIONet *n)
580 {
581     if (!peer_has_vnet_hdr(n))
582         return 0;
583 
584     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
585 
586     return n->has_ufo;
587 }
588 
589 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
590                                        int version_1, int hash_report)
591 {
592     int i;
593     NetClientState *nc;
594 
595     n->mergeable_rx_bufs = mergeable_rx_bufs;
596 
597     if (version_1) {
598         n->guest_hdr_len = hash_report ?
599             sizeof(struct virtio_net_hdr_v1_hash) :
600             sizeof(struct virtio_net_hdr_mrg_rxbuf);
601         n->rss_data.populate_hash = !!hash_report;
602     } else {
603         n->guest_hdr_len = n->mergeable_rx_bufs ?
604             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
605             sizeof(struct virtio_net_hdr);
606     }
607 
608     for (i = 0; i < n->max_queues; i++) {
609         nc = qemu_get_subqueue(n->nic, i);
610 
611         if (peer_has_vnet_hdr(n) &&
612             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
613             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
614             n->host_hdr_len = n->guest_hdr_len;
615         }
616     }
617 }
618 
619 static int virtio_net_max_tx_queue_size(VirtIONet *n)
620 {
621     NetClientState *peer = n->nic_conf.peers.ncs[0];
622 
623     /*
624      * Backends other than vhost-user don't support max queue size.
625      */
626     if (!peer) {
627         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
628     }
629 
630     if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
631         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
632     }
633 
634     return VIRTQUEUE_MAX_SIZE;
635 }
636 
637 static int peer_attach(VirtIONet *n, int index)
638 {
639     NetClientState *nc = qemu_get_subqueue(n->nic, index);
640 
641     if (!nc->peer) {
642         return 0;
643     }
644 
645     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
646         vhost_set_vring_enable(nc->peer, 1);
647     }
648 
649     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
650         return 0;
651     }
652 
653     if (n->max_queues == 1) {
654         return 0;
655     }
656 
657     return tap_enable(nc->peer);
658 }
659 
660 static int peer_detach(VirtIONet *n, int index)
661 {
662     NetClientState *nc = qemu_get_subqueue(n->nic, index);
663 
664     if (!nc->peer) {
665         return 0;
666     }
667 
668     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
669         vhost_set_vring_enable(nc->peer, 0);
670     }
671 
672     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
673         return 0;
674     }
675 
676     return tap_disable(nc->peer);
677 }
678 
679 static void virtio_net_set_queues(VirtIONet *n)
680 {
681     int i;
682     int r;
683 
684     if (n->nic->peer_deleted) {
685         return;
686     }
687 
688     for (i = 0; i < n->max_queues; i++) {
689         if (i < n->curr_queues) {
690             r = peer_attach(n, i);
691             assert(!r);
692         } else {
693             r = peer_detach(n, i);
694             assert(!r);
695         }
696     }
697 }
698 
699 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
700 
701 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
702                                         Error **errp)
703 {
704     VirtIONet *n = VIRTIO_NET(vdev);
705     NetClientState *nc = qemu_get_queue(n->nic);
706 
707     /* Firstly sync all virtio-net possible supported features */
708     features |= n->host_features;
709 
710     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
711 
712     if (!peer_has_vnet_hdr(n)) {
713         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
714         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
715         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
716         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
717 
718         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
719         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
720         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
721         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
722 
723         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
724     }
725 
726     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
727         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
728         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
729     }
730 
731     if (!get_vhost_net(nc->peer)) {
732         return features;
733     }
734 
735     virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
736     virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
737     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
738     vdev->backend_features = features;
739 
740     if (n->mtu_bypass_backend &&
741             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
742         features |= (1ULL << VIRTIO_NET_F_MTU);
743     }
744 
745     return features;
746 }
747 
748 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
749 {
750     uint64_t features = 0;
751 
752     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
753      * but also these: */
754     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
755     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
756     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
757     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
758     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
759 
760     return features;
761 }
762 
763 static void virtio_net_apply_guest_offloads(VirtIONet *n)
764 {
765     qemu_set_offload(qemu_get_queue(n->nic)->peer,
766             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
767             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
768             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
769             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
770             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
771 }
772 
773 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
774 {
775     static const uint64_t guest_offloads_mask =
776         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
777         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
778         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
779         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
780         (1ULL << VIRTIO_NET_F_GUEST_UFO);
781 
782     return guest_offloads_mask & features;
783 }
784 
785 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
786 {
787     VirtIODevice *vdev = VIRTIO_DEVICE(n);
788     return virtio_net_guest_offloads_by_features(vdev->guest_features);
789 }
790 
791 typedef struct {
792     VirtIONet *n;
793     char *id;
794 } FailoverId;
795 
796 /**
797  * Set the id of the failover primary device
798  *
799  * @opaque: FailoverId to setup
800  * @opts: opts for device we are handling
801  * @errp: returns an error if this function fails
802  */
803 static int failover_set_primary(void *opaque, QemuOpts *opts, Error **errp)
804 {
805     FailoverId *fid = opaque;
806     const char *standby_id = qemu_opt_get(opts, "failover_pair_id");
807 
808     if (g_strcmp0(standby_id, fid->n->netclient_name) == 0) {
809         fid->id = g_strdup(opts->id);
810         return 1;
811     }
812 
813     return 0;
814 }
815 
816 /**
817  * Find the primary device id for this failover virtio-net
818  *
819  * @n: VirtIONet device
820  * @errp: returns an error if this function fails
821  */
822 static char *failover_find_primary_device_id(VirtIONet *n)
823 {
824     Error *err = NULL;
825     FailoverId fid;
826 
827     if (!qemu_opts_foreach(qemu_find_opts("device"),
828                            failover_set_primary, &fid, &err)) {
829         return NULL;
830     }
831     return fid.id;
832 }
833 
834 static void failover_add_primary(VirtIONet *n, Error **errp)
835 {
836     Error *err = NULL;
837     QemuOpts *opts;
838 
839     if (n->primary_dev) {
840         return;
841     }
842 
843     opts = qemu_opts_find(qemu_find_opts("device"), n->primary_device_id);
844     if (opts) {
845         n->primary_dev = qdev_device_add(opts, &err);
846         if (err) {
847             qemu_opts_del(opts);
848         }
849     } else {
850         error_setg(errp, "Primary device not found");
851         error_append_hint(errp, "Virtio-net failover will not work. Make "
852                           "sure primary device has parameter"
853                           " failover_pair_id=<virtio-net-id>\n");
854     }
855     error_propagate(errp, err);
856 }
857 
858 /**
859  * Find the primary device for this failover virtio-net
860  *
861  * @n: VirtIONet device
862  * @errp: returns an error if this function fails
863  */
864 static DeviceState *failover_find_primary_device(VirtIONet *n)
865 {
866     char *id = failover_find_primary_device_id(n);
867 
868     if (!id) {
869         return NULL;
870     }
871     n->primary_device_id = g_strdup(id);
872 
873     return qdev_find_recursive(sysbus_get_default(), n->primary_device_id);
874 }
875 
876 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
877 {
878     VirtIONet *n = VIRTIO_NET(vdev);
879     Error *err = NULL;
880     int i;
881 
882     if (n->mtu_bypass_backend &&
883             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
884         features &= ~(1ULL << VIRTIO_NET_F_MTU);
885     }
886 
887     virtio_net_set_multiqueue(n,
888                               virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
889                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
890 
891     virtio_net_set_mrg_rx_bufs(n,
892                                virtio_has_feature(features,
893                                                   VIRTIO_NET_F_MRG_RXBUF),
894                                virtio_has_feature(features,
895                                                   VIRTIO_F_VERSION_1),
896                                virtio_has_feature(features,
897                                                   VIRTIO_NET_F_HASH_REPORT));
898 
899     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
900         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
901     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
902         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
903     n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
904 
905     if (n->has_vnet_hdr) {
906         n->curr_guest_offloads =
907             virtio_net_guest_offloads_by_features(features);
908         virtio_net_apply_guest_offloads(n);
909     }
910 
911     for (i = 0;  i < n->max_queues; i++) {
912         NetClientState *nc = qemu_get_subqueue(n->nic, i);
913 
914         if (!get_vhost_net(nc->peer)) {
915             continue;
916         }
917         vhost_net_ack_features(get_vhost_net(nc->peer), features);
918     }
919 
920     if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
921         memset(n->vlans, 0, MAX_VLAN >> 3);
922     } else {
923         memset(n->vlans, 0xff, MAX_VLAN >> 3);
924     }
925 
926     if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
927         qapi_event_send_failover_negotiated(n->netclient_name);
928         qatomic_set(&n->failover_primary_hidden, false);
929         failover_add_primary(n, &err);
930         if (err) {
931             n->primary_dev = failover_find_primary_device(n);
932             failover_add_primary(n, &err);
933             if (err) {
934                 goto out_err;
935             }
936         }
937     }
938     return;
939 
940 out_err:
941     if (err) {
942         warn_report_err(err);
943     }
944 }
945 
946 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
947                                      struct iovec *iov, unsigned int iov_cnt)
948 {
949     uint8_t on;
950     size_t s;
951     NetClientState *nc = qemu_get_queue(n->nic);
952 
953     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
954     if (s != sizeof(on)) {
955         return VIRTIO_NET_ERR;
956     }
957 
958     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
959         n->promisc = on;
960     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
961         n->allmulti = on;
962     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
963         n->alluni = on;
964     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
965         n->nomulti = on;
966     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
967         n->nouni = on;
968     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
969         n->nobcast = on;
970     } else {
971         return VIRTIO_NET_ERR;
972     }
973 
974     rxfilter_notify(nc);
975 
976     return VIRTIO_NET_OK;
977 }
978 
979 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
980                                      struct iovec *iov, unsigned int iov_cnt)
981 {
982     VirtIODevice *vdev = VIRTIO_DEVICE(n);
983     uint64_t offloads;
984     size_t s;
985 
986     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
987         return VIRTIO_NET_ERR;
988     }
989 
990     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
991     if (s != sizeof(offloads)) {
992         return VIRTIO_NET_ERR;
993     }
994 
995     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
996         uint64_t supported_offloads;
997 
998         offloads = virtio_ldq_p(vdev, &offloads);
999 
1000         if (!n->has_vnet_hdr) {
1001             return VIRTIO_NET_ERR;
1002         }
1003 
1004         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1005             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1006         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1007             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1008         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1009 
1010         supported_offloads = virtio_net_supported_guest_offloads(n);
1011         if (offloads & ~supported_offloads) {
1012             return VIRTIO_NET_ERR;
1013         }
1014 
1015         n->curr_guest_offloads = offloads;
1016         virtio_net_apply_guest_offloads(n);
1017 
1018         return VIRTIO_NET_OK;
1019     } else {
1020         return VIRTIO_NET_ERR;
1021     }
1022 }
1023 
1024 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1025                                  struct iovec *iov, unsigned int iov_cnt)
1026 {
1027     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1028     struct virtio_net_ctrl_mac mac_data;
1029     size_t s;
1030     NetClientState *nc = qemu_get_queue(n->nic);
1031 
1032     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1033         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1034             return VIRTIO_NET_ERR;
1035         }
1036         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1037         assert(s == sizeof(n->mac));
1038         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1039         rxfilter_notify(nc);
1040 
1041         return VIRTIO_NET_OK;
1042     }
1043 
1044     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1045         return VIRTIO_NET_ERR;
1046     }
1047 
1048     int in_use = 0;
1049     int first_multi = 0;
1050     uint8_t uni_overflow = 0;
1051     uint8_t multi_overflow = 0;
1052     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1053 
1054     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1055                    sizeof(mac_data.entries));
1056     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1057     if (s != sizeof(mac_data.entries)) {
1058         goto error;
1059     }
1060     iov_discard_front(&iov, &iov_cnt, s);
1061 
1062     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1063         goto error;
1064     }
1065 
1066     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1067         s = iov_to_buf(iov, iov_cnt, 0, macs,
1068                        mac_data.entries * ETH_ALEN);
1069         if (s != mac_data.entries * ETH_ALEN) {
1070             goto error;
1071         }
1072         in_use += mac_data.entries;
1073     } else {
1074         uni_overflow = 1;
1075     }
1076 
1077     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1078 
1079     first_multi = in_use;
1080 
1081     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1082                    sizeof(mac_data.entries));
1083     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1084     if (s != sizeof(mac_data.entries)) {
1085         goto error;
1086     }
1087 
1088     iov_discard_front(&iov, &iov_cnt, s);
1089 
1090     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1091         goto error;
1092     }
1093 
1094     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1095         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1096                        mac_data.entries * ETH_ALEN);
1097         if (s != mac_data.entries * ETH_ALEN) {
1098             goto error;
1099         }
1100         in_use += mac_data.entries;
1101     } else {
1102         multi_overflow = 1;
1103     }
1104 
1105     n->mac_table.in_use = in_use;
1106     n->mac_table.first_multi = first_multi;
1107     n->mac_table.uni_overflow = uni_overflow;
1108     n->mac_table.multi_overflow = multi_overflow;
1109     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1110     g_free(macs);
1111     rxfilter_notify(nc);
1112 
1113     return VIRTIO_NET_OK;
1114 
1115 error:
1116     g_free(macs);
1117     return VIRTIO_NET_ERR;
1118 }
1119 
1120 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1121                                         struct iovec *iov, unsigned int iov_cnt)
1122 {
1123     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1124     uint16_t vid;
1125     size_t s;
1126     NetClientState *nc = qemu_get_queue(n->nic);
1127 
1128     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1129     vid = virtio_lduw_p(vdev, &vid);
1130     if (s != sizeof(vid)) {
1131         return VIRTIO_NET_ERR;
1132     }
1133 
1134     if (vid >= MAX_VLAN)
1135         return VIRTIO_NET_ERR;
1136 
1137     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1138         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1139     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1140         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1141     else
1142         return VIRTIO_NET_ERR;
1143 
1144     rxfilter_notify(nc);
1145 
1146     return VIRTIO_NET_OK;
1147 }
1148 
1149 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1150                                       struct iovec *iov, unsigned int iov_cnt)
1151 {
1152     trace_virtio_net_handle_announce(n->announce_timer.round);
1153     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1154         n->status & VIRTIO_NET_S_ANNOUNCE) {
1155         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1156         if (n->announce_timer.round) {
1157             qemu_announce_timer_step(&n->announce_timer);
1158         }
1159         return VIRTIO_NET_OK;
1160     } else {
1161         return VIRTIO_NET_ERR;
1162     }
1163 }
1164 
1165 static void virtio_net_disable_rss(VirtIONet *n)
1166 {
1167     if (n->rss_data.enabled) {
1168         trace_virtio_net_rss_disable();
1169     }
1170     n->rss_data.enabled = false;
1171 }
1172 
1173 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1174                                       struct iovec *iov,
1175                                       unsigned int iov_cnt,
1176                                       bool do_rss)
1177 {
1178     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1179     struct virtio_net_rss_config cfg;
1180     size_t s, offset = 0, size_get;
1181     uint16_t queues, i;
1182     struct {
1183         uint16_t us;
1184         uint8_t b;
1185     } QEMU_PACKED temp;
1186     const char *err_msg = "";
1187     uint32_t err_value = 0;
1188 
1189     if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1190         err_msg = "RSS is not negotiated";
1191         goto error;
1192     }
1193     if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1194         err_msg = "Hash report is not negotiated";
1195         goto error;
1196     }
1197     size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1198     s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1199     if (s != size_get) {
1200         err_msg = "Short command buffer";
1201         err_value = (uint32_t)s;
1202         goto error;
1203     }
1204     n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1205     n->rss_data.indirections_len =
1206         virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1207     n->rss_data.indirections_len++;
1208     if (!do_rss) {
1209         n->rss_data.indirections_len = 1;
1210     }
1211     if (!is_power_of_2(n->rss_data.indirections_len)) {
1212         err_msg = "Invalid size of indirection table";
1213         err_value = n->rss_data.indirections_len;
1214         goto error;
1215     }
1216     if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1217         err_msg = "Too large indirection table";
1218         err_value = n->rss_data.indirections_len;
1219         goto error;
1220     }
1221     n->rss_data.default_queue = do_rss ?
1222         virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1223     if (n->rss_data.default_queue >= n->max_queues) {
1224         err_msg = "Invalid default queue";
1225         err_value = n->rss_data.default_queue;
1226         goto error;
1227     }
1228     offset += size_get;
1229     size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1230     g_free(n->rss_data.indirections_table);
1231     n->rss_data.indirections_table = g_malloc(size_get);
1232     if (!n->rss_data.indirections_table) {
1233         err_msg = "Can't allocate indirections table";
1234         err_value = n->rss_data.indirections_len;
1235         goto error;
1236     }
1237     s = iov_to_buf(iov, iov_cnt, offset,
1238                    n->rss_data.indirections_table, size_get);
1239     if (s != size_get) {
1240         err_msg = "Short indirection table buffer";
1241         err_value = (uint32_t)s;
1242         goto error;
1243     }
1244     for (i = 0; i < n->rss_data.indirections_len; ++i) {
1245         uint16_t val = n->rss_data.indirections_table[i];
1246         n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1247     }
1248     offset += size_get;
1249     size_get = sizeof(temp);
1250     s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1251     if (s != size_get) {
1252         err_msg = "Can't get queues";
1253         err_value = (uint32_t)s;
1254         goto error;
1255     }
1256     queues = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queues;
1257     if (queues == 0 || queues > n->max_queues) {
1258         err_msg = "Invalid number of queues";
1259         err_value = queues;
1260         goto error;
1261     }
1262     if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1263         err_msg = "Invalid key size";
1264         err_value = temp.b;
1265         goto error;
1266     }
1267     if (!temp.b && n->rss_data.hash_types) {
1268         err_msg = "No key provided";
1269         err_value = 0;
1270         goto error;
1271     }
1272     if (!temp.b && !n->rss_data.hash_types) {
1273         virtio_net_disable_rss(n);
1274         return queues;
1275     }
1276     offset += size_get;
1277     size_get = temp.b;
1278     s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1279     if (s != size_get) {
1280         err_msg = "Can get key buffer";
1281         err_value = (uint32_t)s;
1282         goto error;
1283     }
1284     n->rss_data.enabled = true;
1285     trace_virtio_net_rss_enable(n->rss_data.hash_types,
1286                                 n->rss_data.indirections_len,
1287                                 temp.b);
1288     return queues;
1289 error:
1290     trace_virtio_net_rss_error(err_msg, err_value);
1291     virtio_net_disable_rss(n);
1292     return 0;
1293 }
1294 
1295 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1296                                 struct iovec *iov, unsigned int iov_cnt)
1297 {
1298     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1299     uint16_t queues;
1300 
1301     virtio_net_disable_rss(n);
1302     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1303         queues = virtio_net_handle_rss(n, iov, iov_cnt, false);
1304         return queues ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1305     }
1306     if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1307         queues = virtio_net_handle_rss(n, iov, iov_cnt, true);
1308     } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1309         struct virtio_net_ctrl_mq mq;
1310         size_t s;
1311         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1312             return VIRTIO_NET_ERR;
1313         }
1314         s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1315         if (s != sizeof(mq)) {
1316             return VIRTIO_NET_ERR;
1317         }
1318         queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1319 
1320     } else {
1321         return VIRTIO_NET_ERR;
1322     }
1323 
1324     if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1325         queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1326         queues > n->max_queues ||
1327         !n->multiqueue) {
1328         return VIRTIO_NET_ERR;
1329     }
1330 
1331     n->curr_queues = queues;
1332     /* stop the backend before changing the number of queues to avoid handling a
1333      * disabled queue */
1334     virtio_net_set_status(vdev, vdev->status);
1335     virtio_net_set_queues(n);
1336 
1337     return VIRTIO_NET_OK;
1338 }
1339 
1340 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1341 {
1342     VirtIONet *n = VIRTIO_NET(vdev);
1343     struct virtio_net_ctrl_hdr ctrl;
1344     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1345     VirtQueueElement *elem;
1346     size_t s;
1347     struct iovec *iov, *iov2;
1348     unsigned int iov_cnt;
1349 
1350     for (;;) {
1351         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1352         if (!elem) {
1353             break;
1354         }
1355         if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1356             iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1357             virtio_error(vdev, "virtio-net ctrl missing headers");
1358             virtqueue_detach_element(vq, elem, 0);
1359             g_free(elem);
1360             break;
1361         }
1362 
1363         iov_cnt = elem->out_num;
1364         iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1365         s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1366         iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1367         if (s != sizeof(ctrl)) {
1368             status = VIRTIO_NET_ERR;
1369         } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1370             status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1371         } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1372             status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1373         } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1374             status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1375         } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1376             status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1377         } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1378             status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1379         } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1380             status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1381         }
1382 
1383         s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1384         assert(s == sizeof(status));
1385 
1386         virtqueue_push(vq, elem, sizeof(status));
1387         virtio_notify(vdev, vq);
1388         g_free(iov2);
1389         g_free(elem);
1390     }
1391 }
1392 
1393 /* RX */
1394 
1395 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1396 {
1397     VirtIONet *n = VIRTIO_NET(vdev);
1398     int queue_index = vq2q(virtio_get_queue_index(vq));
1399 
1400     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1401 }
1402 
1403 static bool virtio_net_can_receive(NetClientState *nc)
1404 {
1405     VirtIONet *n = qemu_get_nic_opaque(nc);
1406     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1407     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1408 
1409     if (!vdev->vm_running) {
1410         return false;
1411     }
1412 
1413     if (nc->queue_index >= n->curr_queues) {
1414         return false;
1415     }
1416 
1417     if (!virtio_queue_ready(q->rx_vq) ||
1418         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1419         return false;
1420     }
1421 
1422     return true;
1423 }
1424 
1425 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1426 {
1427     VirtIONet *n = q->n;
1428     if (virtio_queue_empty(q->rx_vq) ||
1429         (n->mergeable_rx_bufs &&
1430          !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1431         virtio_queue_set_notification(q->rx_vq, 1);
1432 
1433         /* To avoid a race condition where the guest has made some buffers
1434          * available after the above check but before notification was
1435          * enabled, check for available buffers again.
1436          */
1437         if (virtio_queue_empty(q->rx_vq) ||
1438             (n->mergeable_rx_bufs &&
1439              !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1440             return 0;
1441         }
1442     }
1443 
1444     virtio_queue_set_notification(q->rx_vq, 0);
1445     return 1;
1446 }
1447 
1448 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1449 {
1450     virtio_tswap16s(vdev, &hdr->hdr_len);
1451     virtio_tswap16s(vdev, &hdr->gso_size);
1452     virtio_tswap16s(vdev, &hdr->csum_start);
1453     virtio_tswap16s(vdev, &hdr->csum_offset);
1454 }
1455 
1456 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1457  * it never finds out that the packets don't have valid checksums.  This
1458  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1459  * fix this with Xen but it hasn't appeared in an upstream release of
1460  * dhclient yet.
1461  *
1462  * To avoid breaking existing guests, we catch udp packets and add
1463  * checksums.  This is terrible but it's better than hacking the guest
1464  * kernels.
1465  *
1466  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1467  * we should provide a mechanism to disable it to avoid polluting the host
1468  * cache.
1469  */
1470 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1471                                         uint8_t *buf, size_t size)
1472 {
1473     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1474         (size > 27 && size < 1500) && /* normal sized MTU */
1475         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1476         (buf[23] == 17) && /* ip.protocol == UDP */
1477         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1478         net_checksum_calculate(buf, size);
1479         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1480     }
1481 }
1482 
1483 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1484                            const void *buf, size_t size)
1485 {
1486     if (n->has_vnet_hdr) {
1487         /* FIXME this cast is evil */
1488         void *wbuf = (void *)buf;
1489         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1490                                     size - n->host_hdr_len);
1491 
1492         if (n->needs_vnet_hdr_swap) {
1493             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1494         }
1495         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1496     } else {
1497         struct virtio_net_hdr hdr = {
1498             .flags = 0,
1499             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1500         };
1501         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1502     }
1503 }
1504 
1505 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1506 {
1507     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1508     static const uint8_t vlan[] = {0x81, 0x00};
1509     uint8_t *ptr = (uint8_t *)buf;
1510     int i;
1511 
1512     if (n->promisc)
1513         return 1;
1514 
1515     ptr += n->host_hdr_len;
1516 
1517     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1518         int vid = lduw_be_p(ptr + 14) & 0xfff;
1519         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1520             return 0;
1521     }
1522 
1523     if (ptr[0] & 1) { // multicast
1524         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1525             return !n->nobcast;
1526         } else if (n->nomulti) {
1527             return 0;
1528         } else if (n->allmulti || n->mac_table.multi_overflow) {
1529             return 1;
1530         }
1531 
1532         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1533             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1534                 return 1;
1535             }
1536         }
1537     } else { // unicast
1538         if (n->nouni) {
1539             return 0;
1540         } else if (n->alluni || n->mac_table.uni_overflow) {
1541             return 1;
1542         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1543             return 1;
1544         }
1545 
1546         for (i = 0; i < n->mac_table.first_multi; i++) {
1547             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1548                 return 1;
1549             }
1550         }
1551     }
1552 
1553     return 0;
1554 }
1555 
1556 static uint8_t virtio_net_get_hash_type(bool isip4,
1557                                         bool isip6,
1558                                         bool isudp,
1559                                         bool istcp,
1560                                         uint32_t types)
1561 {
1562     if (isip4) {
1563         if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1564             return NetPktRssIpV4Tcp;
1565         }
1566         if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1567             return NetPktRssIpV4Udp;
1568         }
1569         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1570             return NetPktRssIpV4;
1571         }
1572     } else if (isip6) {
1573         uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1574                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1575 
1576         if (istcp && (types & mask)) {
1577             return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1578                 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1579         }
1580         mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1581         if (isudp && (types & mask)) {
1582             return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1583                 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1584         }
1585         mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1586         if (types & mask) {
1587             return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1588                 NetPktRssIpV6Ex : NetPktRssIpV6;
1589         }
1590     }
1591     return 0xff;
1592 }
1593 
1594 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1595                                    uint32_t hash)
1596 {
1597     struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1598     hdr->hash_value = hash;
1599     hdr->hash_report = report;
1600 }
1601 
1602 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1603                                   size_t size)
1604 {
1605     VirtIONet *n = qemu_get_nic_opaque(nc);
1606     unsigned int index = nc->queue_index, new_index = index;
1607     struct NetRxPkt *pkt = n->rx_pkt;
1608     uint8_t net_hash_type;
1609     uint32_t hash;
1610     bool isip4, isip6, isudp, istcp;
1611     static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1612         VIRTIO_NET_HASH_REPORT_IPv4,
1613         VIRTIO_NET_HASH_REPORT_TCPv4,
1614         VIRTIO_NET_HASH_REPORT_TCPv6,
1615         VIRTIO_NET_HASH_REPORT_IPv6,
1616         VIRTIO_NET_HASH_REPORT_IPv6_EX,
1617         VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1618         VIRTIO_NET_HASH_REPORT_UDPv4,
1619         VIRTIO_NET_HASH_REPORT_UDPv6,
1620         VIRTIO_NET_HASH_REPORT_UDPv6_EX
1621     };
1622 
1623     net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1624                              size - n->host_hdr_len);
1625     net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1626     if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1627         istcp = isudp = false;
1628     }
1629     if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1630         istcp = isudp = false;
1631     }
1632     net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1633                                              n->rss_data.hash_types);
1634     if (net_hash_type > NetPktRssIpV6UdpEx) {
1635         if (n->rss_data.populate_hash) {
1636             virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1637         }
1638         return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1639     }
1640 
1641     hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1642 
1643     if (n->rss_data.populate_hash) {
1644         virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1645     }
1646 
1647     if (n->rss_data.redirect) {
1648         new_index = hash & (n->rss_data.indirections_len - 1);
1649         new_index = n->rss_data.indirections_table[new_index];
1650     }
1651 
1652     return (index == new_index) ? -1 : new_index;
1653 }
1654 
1655 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1656                                       size_t size, bool no_rss)
1657 {
1658     VirtIONet *n = qemu_get_nic_opaque(nc);
1659     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1660     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1661     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1662     struct virtio_net_hdr_mrg_rxbuf mhdr;
1663     unsigned mhdr_cnt = 0;
1664     size_t offset, i, guest_offset;
1665 
1666     if (!virtio_net_can_receive(nc)) {
1667         return -1;
1668     }
1669 
1670     if (!no_rss && n->rss_data.enabled) {
1671         int index = virtio_net_process_rss(nc, buf, size);
1672         if (index >= 0) {
1673             NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1674             return virtio_net_receive_rcu(nc2, buf, size, true);
1675         }
1676     }
1677 
1678     /* hdr_len refers to the header we supply to the guest */
1679     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1680         return 0;
1681     }
1682 
1683     if (!receive_filter(n, buf, size))
1684         return size;
1685 
1686     offset = i = 0;
1687 
1688     while (offset < size) {
1689         VirtQueueElement *elem;
1690         int len, total;
1691         const struct iovec *sg;
1692 
1693         total = 0;
1694 
1695         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1696         if (!elem) {
1697             if (i) {
1698                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1699                              "i %zd mergeable %d offset %zd, size %zd, "
1700                              "guest hdr len %zd, host hdr len %zd "
1701                              "guest features 0x%" PRIx64,
1702                              i, n->mergeable_rx_bufs, offset, size,
1703                              n->guest_hdr_len, n->host_hdr_len,
1704                              vdev->guest_features);
1705             }
1706             return -1;
1707         }
1708 
1709         if (elem->in_num < 1) {
1710             virtio_error(vdev,
1711                          "virtio-net receive queue contains no in buffers");
1712             virtqueue_detach_element(q->rx_vq, elem, 0);
1713             g_free(elem);
1714             return -1;
1715         }
1716 
1717         sg = elem->in_sg;
1718         if (i == 0) {
1719             assert(offset == 0);
1720             if (n->mergeable_rx_bufs) {
1721                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1722                                     sg, elem->in_num,
1723                                     offsetof(typeof(mhdr), num_buffers),
1724                                     sizeof(mhdr.num_buffers));
1725             }
1726 
1727             receive_header(n, sg, elem->in_num, buf, size);
1728             if (n->rss_data.populate_hash) {
1729                 offset = sizeof(mhdr);
1730                 iov_from_buf(sg, elem->in_num, offset,
1731                              buf + offset, n->host_hdr_len - sizeof(mhdr));
1732             }
1733             offset = n->host_hdr_len;
1734             total += n->guest_hdr_len;
1735             guest_offset = n->guest_hdr_len;
1736         } else {
1737             guest_offset = 0;
1738         }
1739 
1740         /* copy in packet.  ugh */
1741         len = iov_from_buf(sg, elem->in_num, guest_offset,
1742                            buf + offset, size - offset);
1743         total += len;
1744         offset += len;
1745         /* If buffers can't be merged, at this point we
1746          * must have consumed the complete packet.
1747          * Otherwise, drop it. */
1748         if (!n->mergeable_rx_bufs && offset < size) {
1749             virtqueue_unpop(q->rx_vq, elem, total);
1750             g_free(elem);
1751             return size;
1752         }
1753 
1754         /* signal other side */
1755         virtqueue_fill(q->rx_vq, elem, total, i++);
1756         g_free(elem);
1757     }
1758 
1759     if (mhdr_cnt) {
1760         virtio_stw_p(vdev, &mhdr.num_buffers, i);
1761         iov_from_buf(mhdr_sg, mhdr_cnt,
1762                      0,
1763                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
1764     }
1765 
1766     virtqueue_flush(q->rx_vq, i);
1767     virtio_notify(vdev, q->rx_vq);
1768 
1769     return size;
1770 }
1771 
1772 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1773                                   size_t size)
1774 {
1775     RCU_READ_LOCK_GUARD();
1776 
1777     return virtio_net_receive_rcu(nc, buf, size, false);
1778 }
1779 
1780 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1781                                          const uint8_t *buf,
1782                                          VirtioNetRscUnit *unit)
1783 {
1784     uint16_t ip_hdrlen;
1785     struct ip_header *ip;
1786 
1787     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1788                               + sizeof(struct eth_header));
1789     unit->ip = (void *)ip;
1790     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1791     unit->ip_plen = &ip->ip_len;
1792     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1793     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1794     unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1795 }
1796 
1797 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1798                                          const uint8_t *buf,
1799                                          VirtioNetRscUnit *unit)
1800 {
1801     struct ip6_header *ip6;
1802 
1803     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1804                                  + sizeof(struct eth_header));
1805     unit->ip = ip6;
1806     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1807     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1808                                         + sizeof(struct ip6_header));
1809     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1810 
1811     /* There is a difference between payload lenght in ipv4 and v6,
1812        ip header is excluded in ipv6 */
1813     unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1814 }
1815 
1816 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1817                                        VirtioNetRscSeg *seg)
1818 {
1819     int ret;
1820     struct virtio_net_hdr_v1 *h;
1821 
1822     h = (struct virtio_net_hdr_v1 *)seg->buf;
1823     h->flags = 0;
1824     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1825 
1826     if (seg->is_coalesced) {
1827         h->rsc.segments = seg->packets;
1828         h->rsc.dup_acks = seg->dup_ack;
1829         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1830         if (chain->proto == ETH_P_IP) {
1831             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1832         } else {
1833             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1834         }
1835     }
1836 
1837     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1838     QTAILQ_REMOVE(&chain->buffers, seg, next);
1839     g_free(seg->buf);
1840     g_free(seg);
1841 
1842     return ret;
1843 }
1844 
1845 static void virtio_net_rsc_purge(void *opq)
1846 {
1847     VirtioNetRscSeg *seg, *rn;
1848     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1849 
1850     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1851         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1852             chain->stat.purge_failed++;
1853             continue;
1854         }
1855     }
1856 
1857     chain->stat.timer++;
1858     if (!QTAILQ_EMPTY(&chain->buffers)) {
1859         timer_mod(chain->drain_timer,
1860               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1861     }
1862 }
1863 
1864 static void virtio_net_rsc_cleanup(VirtIONet *n)
1865 {
1866     VirtioNetRscChain *chain, *rn_chain;
1867     VirtioNetRscSeg *seg, *rn_seg;
1868 
1869     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1870         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1871             QTAILQ_REMOVE(&chain->buffers, seg, next);
1872             g_free(seg->buf);
1873             g_free(seg);
1874         }
1875 
1876         timer_del(chain->drain_timer);
1877         timer_free(chain->drain_timer);
1878         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1879         g_free(chain);
1880     }
1881 }
1882 
1883 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1884                                      NetClientState *nc,
1885                                      const uint8_t *buf, size_t size)
1886 {
1887     uint16_t hdr_len;
1888     VirtioNetRscSeg *seg;
1889 
1890     hdr_len = chain->n->guest_hdr_len;
1891     seg = g_malloc(sizeof(VirtioNetRscSeg));
1892     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1893         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1894     memcpy(seg->buf, buf, size);
1895     seg->size = size;
1896     seg->packets = 1;
1897     seg->dup_ack = 0;
1898     seg->is_coalesced = 0;
1899     seg->nc = nc;
1900 
1901     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1902     chain->stat.cache++;
1903 
1904     switch (chain->proto) {
1905     case ETH_P_IP:
1906         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
1907         break;
1908     case ETH_P_IPV6:
1909         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
1910         break;
1911     default:
1912         g_assert_not_reached();
1913     }
1914 }
1915 
1916 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
1917                                          VirtioNetRscSeg *seg,
1918                                          const uint8_t *buf,
1919                                          struct tcp_header *n_tcp,
1920                                          struct tcp_header *o_tcp)
1921 {
1922     uint32_t nack, oack;
1923     uint16_t nwin, owin;
1924 
1925     nack = htonl(n_tcp->th_ack);
1926     nwin = htons(n_tcp->th_win);
1927     oack = htonl(o_tcp->th_ack);
1928     owin = htons(o_tcp->th_win);
1929 
1930     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
1931         chain->stat.ack_out_of_win++;
1932         return RSC_FINAL;
1933     } else if (nack == oack) {
1934         /* duplicated ack or window probe */
1935         if (nwin == owin) {
1936             /* duplicated ack, add dup ack count due to whql test up to 1 */
1937             chain->stat.dup_ack++;
1938             return RSC_FINAL;
1939         } else {
1940             /* Coalesce window update */
1941             o_tcp->th_win = n_tcp->th_win;
1942             chain->stat.win_update++;
1943             return RSC_COALESCE;
1944         }
1945     } else {
1946         /* pure ack, go to 'C', finalize*/
1947         chain->stat.pure_ack++;
1948         return RSC_FINAL;
1949     }
1950 }
1951 
1952 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
1953                                             VirtioNetRscSeg *seg,
1954                                             const uint8_t *buf,
1955                                             VirtioNetRscUnit *n_unit)
1956 {
1957     void *data;
1958     uint16_t o_ip_len;
1959     uint32_t nseq, oseq;
1960     VirtioNetRscUnit *o_unit;
1961 
1962     o_unit = &seg->unit;
1963     o_ip_len = htons(*o_unit->ip_plen);
1964     nseq = htonl(n_unit->tcp->th_seq);
1965     oseq = htonl(o_unit->tcp->th_seq);
1966 
1967     /* out of order or retransmitted. */
1968     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
1969         chain->stat.data_out_of_win++;
1970         return RSC_FINAL;
1971     }
1972 
1973     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
1974     if (nseq == oseq) {
1975         if ((o_unit->payload == 0) && n_unit->payload) {
1976             /* From no payload to payload, normal case, not a dup ack or etc */
1977             chain->stat.data_after_pure_ack++;
1978             goto coalesce;
1979         } else {
1980             return virtio_net_rsc_handle_ack(chain, seg, buf,
1981                                              n_unit->tcp, o_unit->tcp);
1982         }
1983     } else if ((nseq - oseq) != o_unit->payload) {
1984         /* Not a consistent packet, out of order */
1985         chain->stat.data_out_of_order++;
1986         return RSC_FINAL;
1987     } else {
1988 coalesce:
1989         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
1990             chain->stat.over_size++;
1991             return RSC_FINAL;
1992         }
1993 
1994         /* Here comes the right data, the payload length in v4/v6 is different,
1995            so use the field value to update and record the new data len */
1996         o_unit->payload += n_unit->payload; /* update new data len */
1997 
1998         /* update field in ip header */
1999         *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2000 
2001         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2002            for windows guest, while this may change the behavior for linux
2003            guest (only if it uses RSC feature). */
2004         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2005 
2006         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2007         o_unit->tcp->th_win = n_unit->tcp->th_win;
2008 
2009         memmove(seg->buf + seg->size, data, n_unit->payload);
2010         seg->size += n_unit->payload;
2011         seg->packets++;
2012         chain->stat.coalesced++;
2013         return RSC_COALESCE;
2014     }
2015 }
2016 
2017 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2018                                         VirtioNetRscSeg *seg,
2019                                         const uint8_t *buf, size_t size,
2020                                         VirtioNetRscUnit *unit)
2021 {
2022     struct ip_header *ip1, *ip2;
2023 
2024     ip1 = (struct ip_header *)(unit->ip);
2025     ip2 = (struct ip_header *)(seg->unit.ip);
2026     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2027         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2028         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2029         chain->stat.no_match++;
2030         return RSC_NO_MATCH;
2031     }
2032 
2033     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2034 }
2035 
2036 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2037                                         VirtioNetRscSeg *seg,
2038                                         const uint8_t *buf, size_t size,
2039                                         VirtioNetRscUnit *unit)
2040 {
2041     struct ip6_header *ip1, *ip2;
2042 
2043     ip1 = (struct ip6_header *)(unit->ip);
2044     ip2 = (struct ip6_header *)(seg->unit.ip);
2045     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2046         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2047         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2048         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2049             chain->stat.no_match++;
2050             return RSC_NO_MATCH;
2051     }
2052 
2053     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2054 }
2055 
2056 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2057  * to prevent out of order */
2058 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2059                                          struct tcp_header *tcp)
2060 {
2061     uint16_t tcp_hdr;
2062     uint16_t tcp_flag;
2063 
2064     tcp_flag = htons(tcp->th_offset_flags);
2065     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2066     tcp_flag &= VIRTIO_NET_TCP_FLAG;
2067     if (tcp_flag & TH_SYN) {
2068         chain->stat.tcp_syn++;
2069         return RSC_BYPASS;
2070     }
2071 
2072     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2073         chain->stat.tcp_ctrl_drain++;
2074         return RSC_FINAL;
2075     }
2076 
2077     if (tcp_hdr > sizeof(struct tcp_header)) {
2078         chain->stat.tcp_all_opt++;
2079         return RSC_FINAL;
2080     }
2081 
2082     return RSC_CANDIDATE;
2083 }
2084 
2085 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2086                                          NetClientState *nc,
2087                                          const uint8_t *buf, size_t size,
2088                                          VirtioNetRscUnit *unit)
2089 {
2090     int ret;
2091     VirtioNetRscSeg *seg, *nseg;
2092 
2093     if (QTAILQ_EMPTY(&chain->buffers)) {
2094         chain->stat.empty_cache++;
2095         virtio_net_rsc_cache_buf(chain, nc, buf, size);
2096         timer_mod(chain->drain_timer,
2097               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2098         return size;
2099     }
2100 
2101     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2102         if (chain->proto == ETH_P_IP) {
2103             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2104         } else {
2105             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2106         }
2107 
2108         if (ret == RSC_FINAL) {
2109             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2110                 /* Send failed */
2111                 chain->stat.final_failed++;
2112                 return 0;
2113             }
2114 
2115             /* Send current packet */
2116             return virtio_net_do_receive(nc, buf, size);
2117         } else if (ret == RSC_NO_MATCH) {
2118             continue;
2119         } else {
2120             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2121             seg->is_coalesced = 1;
2122             return size;
2123         }
2124     }
2125 
2126     chain->stat.no_match_cache++;
2127     virtio_net_rsc_cache_buf(chain, nc, buf, size);
2128     return size;
2129 }
2130 
2131 /* Drain a connection data, this is to avoid out of order segments */
2132 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2133                                         NetClientState *nc,
2134                                         const uint8_t *buf, size_t size,
2135                                         uint16_t ip_start, uint16_t ip_size,
2136                                         uint16_t tcp_port)
2137 {
2138     VirtioNetRscSeg *seg, *nseg;
2139     uint32_t ppair1, ppair2;
2140 
2141     ppair1 = *(uint32_t *)(buf + tcp_port);
2142     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2143         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2144         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2145             || (ppair1 != ppair2)) {
2146             continue;
2147         }
2148         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2149             chain->stat.drain_failed++;
2150         }
2151 
2152         break;
2153     }
2154 
2155     return virtio_net_do_receive(nc, buf, size);
2156 }
2157 
2158 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2159                                             struct ip_header *ip,
2160                                             const uint8_t *buf, size_t size)
2161 {
2162     uint16_t ip_len;
2163 
2164     /* Not an ipv4 packet */
2165     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2166         chain->stat.ip_option++;
2167         return RSC_BYPASS;
2168     }
2169 
2170     /* Don't handle packets with ip option */
2171     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2172         chain->stat.ip_option++;
2173         return RSC_BYPASS;
2174     }
2175 
2176     if (ip->ip_p != IPPROTO_TCP) {
2177         chain->stat.bypass_not_tcp++;
2178         return RSC_BYPASS;
2179     }
2180 
2181     /* Don't handle packets with ip fragment */
2182     if (!(htons(ip->ip_off) & IP_DF)) {
2183         chain->stat.ip_frag++;
2184         return RSC_BYPASS;
2185     }
2186 
2187     /* Don't handle packets with ecn flag */
2188     if (IPTOS_ECN(ip->ip_tos)) {
2189         chain->stat.ip_ecn++;
2190         return RSC_BYPASS;
2191     }
2192 
2193     ip_len = htons(ip->ip_len);
2194     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2195         || ip_len > (size - chain->n->guest_hdr_len -
2196                      sizeof(struct eth_header))) {
2197         chain->stat.ip_hacked++;
2198         return RSC_BYPASS;
2199     }
2200 
2201     return RSC_CANDIDATE;
2202 }
2203 
2204 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2205                                       NetClientState *nc,
2206                                       const uint8_t *buf, size_t size)
2207 {
2208     int32_t ret;
2209     uint16_t hdr_len;
2210     VirtioNetRscUnit unit;
2211 
2212     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2213 
2214     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2215         + sizeof(struct tcp_header))) {
2216         chain->stat.bypass_not_tcp++;
2217         return virtio_net_do_receive(nc, buf, size);
2218     }
2219 
2220     virtio_net_rsc_extract_unit4(chain, buf, &unit);
2221     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2222         != RSC_CANDIDATE) {
2223         return virtio_net_do_receive(nc, buf, size);
2224     }
2225 
2226     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2227     if (ret == RSC_BYPASS) {
2228         return virtio_net_do_receive(nc, buf, size);
2229     } else if (ret == RSC_FINAL) {
2230         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2231                 ((hdr_len + sizeof(struct eth_header)) + 12),
2232                 VIRTIO_NET_IP4_ADDR_SIZE,
2233                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2234     }
2235 
2236     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2237 }
2238 
2239 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2240                                             struct ip6_header *ip6,
2241                                             const uint8_t *buf, size_t size)
2242 {
2243     uint16_t ip_len;
2244 
2245     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2246         != IP_HEADER_VERSION_6) {
2247         return RSC_BYPASS;
2248     }
2249 
2250     /* Both option and protocol is checked in this */
2251     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2252         chain->stat.bypass_not_tcp++;
2253         return RSC_BYPASS;
2254     }
2255 
2256     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2257     if (ip_len < sizeof(struct tcp_header) ||
2258         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2259                   - sizeof(struct ip6_header))) {
2260         chain->stat.ip_hacked++;
2261         return RSC_BYPASS;
2262     }
2263 
2264     /* Don't handle packets with ecn flag */
2265     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2266         chain->stat.ip_ecn++;
2267         return RSC_BYPASS;
2268     }
2269 
2270     return RSC_CANDIDATE;
2271 }
2272 
2273 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2274                                       const uint8_t *buf, size_t size)
2275 {
2276     int32_t ret;
2277     uint16_t hdr_len;
2278     VirtioNetRscChain *chain;
2279     VirtioNetRscUnit unit;
2280 
2281     chain = (VirtioNetRscChain *)opq;
2282     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2283 
2284     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2285         + sizeof(tcp_header))) {
2286         return virtio_net_do_receive(nc, buf, size);
2287     }
2288 
2289     virtio_net_rsc_extract_unit6(chain, buf, &unit);
2290     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2291                                                  unit.ip, buf, size)) {
2292         return virtio_net_do_receive(nc, buf, size);
2293     }
2294 
2295     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2296     if (ret == RSC_BYPASS) {
2297         return virtio_net_do_receive(nc, buf, size);
2298     } else if (ret == RSC_FINAL) {
2299         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2300                 ((hdr_len + sizeof(struct eth_header)) + 8),
2301                 VIRTIO_NET_IP6_ADDR_SIZE,
2302                 hdr_len + sizeof(struct eth_header)
2303                 + sizeof(struct ip6_header));
2304     }
2305 
2306     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2307 }
2308 
2309 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2310                                                       NetClientState *nc,
2311                                                       uint16_t proto)
2312 {
2313     VirtioNetRscChain *chain;
2314 
2315     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2316         return NULL;
2317     }
2318 
2319     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2320         if (chain->proto == proto) {
2321             return chain;
2322         }
2323     }
2324 
2325     chain = g_malloc(sizeof(*chain));
2326     chain->n = n;
2327     chain->proto = proto;
2328     if (proto == (uint16_t)ETH_P_IP) {
2329         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2330         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2331     } else {
2332         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2333         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2334     }
2335     chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2336                                       virtio_net_rsc_purge, chain);
2337     memset(&chain->stat, 0, sizeof(chain->stat));
2338 
2339     QTAILQ_INIT(&chain->buffers);
2340     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2341 
2342     return chain;
2343 }
2344 
2345 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2346                                       const uint8_t *buf,
2347                                       size_t size)
2348 {
2349     uint16_t proto;
2350     VirtioNetRscChain *chain;
2351     struct eth_header *eth;
2352     VirtIONet *n;
2353 
2354     n = qemu_get_nic_opaque(nc);
2355     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2356         return virtio_net_do_receive(nc, buf, size);
2357     }
2358 
2359     eth = (struct eth_header *)(buf + n->guest_hdr_len);
2360     proto = htons(eth->h_proto);
2361 
2362     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2363     if (chain) {
2364         chain->stat.received++;
2365         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2366             return virtio_net_rsc_receive4(chain, nc, buf, size);
2367         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2368             return virtio_net_rsc_receive6(chain, nc, buf, size);
2369         }
2370     }
2371     return virtio_net_do_receive(nc, buf, size);
2372 }
2373 
2374 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2375                                   size_t size)
2376 {
2377     VirtIONet *n = qemu_get_nic_opaque(nc);
2378     if ((n->rsc4_enabled || n->rsc6_enabled)) {
2379         return virtio_net_rsc_receive(nc, buf, size);
2380     } else {
2381         return virtio_net_do_receive(nc, buf, size);
2382     }
2383 }
2384 
2385 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2386 
2387 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2388 {
2389     VirtIONet *n = qemu_get_nic_opaque(nc);
2390     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2391     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2392 
2393     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2394     virtio_notify(vdev, q->tx_vq);
2395 
2396     g_free(q->async_tx.elem);
2397     q->async_tx.elem = NULL;
2398 
2399     virtio_queue_set_notification(q->tx_vq, 1);
2400     virtio_net_flush_tx(q);
2401 }
2402 
2403 /* TX */
2404 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2405 {
2406     VirtIONet *n = q->n;
2407     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2408     VirtQueueElement *elem;
2409     int32_t num_packets = 0;
2410     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2411     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2412         return num_packets;
2413     }
2414 
2415     if (q->async_tx.elem) {
2416         virtio_queue_set_notification(q->tx_vq, 0);
2417         return num_packets;
2418     }
2419 
2420     for (;;) {
2421         ssize_t ret;
2422         unsigned int out_num;
2423         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2424         struct virtio_net_hdr_mrg_rxbuf mhdr;
2425 
2426         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2427         if (!elem) {
2428             break;
2429         }
2430 
2431         out_num = elem->out_num;
2432         out_sg = elem->out_sg;
2433         if (out_num < 1) {
2434             virtio_error(vdev, "virtio-net header not in first element");
2435             virtqueue_detach_element(q->tx_vq, elem, 0);
2436             g_free(elem);
2437             return -EINVAL;
2438         }
2439 
2440         if (n->has_vnet_hdr) {
2441             if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2442                 n->guest_hdr_len) {
2443                 virtio_error(vdev, "virtio-net header incorrect");
2444                 virtqueue_detach_element(q->tx_vq, elem, 0);
2445                 g_free(elem);
2446                 return -EINVAL;
2447             }
2448             if (n->needs_vnet_hdr_swap) {
2449                 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2450                 sg2[0].iov_base = &mhdr;
2451                 sg2[0].iov_len = n->guest_hdr_len;
2452                 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2453                                    out_sg, out_num,
2454                                    n->guest_hdr_len, -1);
2455                 if (out_num == VIRTQUEUE_MAX_SIZE) {
2456                     goto drop;
2457                 }
2458                 out_num += 1;
2459                 out_sg = sg2;
2460             }
2461         }
2462         /*
2463          * If host wants to see the guest header as is, we can
2464          * pass it on unchanged. Otherwise, copy just the parts
2465          * that host is interested in.
2466          */
2467         assert(n->host_hdr_len <= n->guest_hdr_len);
2468         if (n->host_hdr_len != n->guest_hdr_len) {
2469             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2470                                        out_sg, out_num,
2471                                        0, n->host_hdr_len);
2472             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2473                              out_sg, out_num,
2474                              n->guest_hdr_len, -1);
2475             out_num = sg_num;
2476             out_sg = sg;
2477         }
2478 
2479         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2480                                       out_sg, out_num, virtio_net_tx_complete);
2481         if (ret == 0) {
2482             virtio_queue_set_notification(q->tx_vq, 0);
2483             q->async_tx.elem = elem;
2484             return -EBUSY;
2485         }
2486 
2487 drop:
2488         virtqueue_push(q->tx_vq, elem, 0);
2489         virtio_notify(vdev, q->tx_vq);
2490         g_free(elem);
2491 
2492         if (++num_packets >= n->tx_burst) {
2493             break;
2494         }
2495     }
2496     return num_packets;
2497 }
2498 
2499 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2500 {
2501     VirtIONet *n = VIRTIO_NET(vdev);
2502     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2503 
2504     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2505         virtio_net_drop_tx_queue_data(vdev, vq);
2506         return;
2507     }
2508 
2509     /* This happens when device was stopped but VCPU wasn't. */
2510     if (!vdev->vm_running) {
2511         q->tx_waiting = 1;
2512         return;
2513     }
2514 
2515     if (q->tx_waiting) {
2516         virtio_queue_set_notification(vq, 1);
2517         timer_del(q->tx_timer);
2518         q->tx_waiting = 0;
2519         if (virtio_net_flush_tx(q) == -EINVAL) {
2520             return;
2521         }
2522     } else {
2523         timer_mod(q->tx_timer,
2524                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2525         q->tx_waiting = 1;
2526         virtio_queue_set_notification(vq, 0);
2527     }
2528 }
2529 
2530 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2531 {
2532     VirtIONet *n = VIRTIO_NET(vdev);
2533     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2534 
2535     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2536         virtio_net_drop_tx_queue_data(vdev, vq);
2537         return;
2538     }
2539 
2540     if (unlikely(q->tx_waiting)) {
2541         return;
2542     }
2543     q->tx_waiting = 1;
2544     /* This happens when device was stopped but VCPU wasn't. */
2545     if (!vdev->vm_running) {
2546         return;
2547     }
2548     virtio_queue_set_notification(vq, 0);
2549     qemu_bh_schedule(q->tx_bh);
2550 }
2551 
2552 static void virtio_net_tx_timer(void *opaque)
2553 {
2554     VirtIONetQueue *q = opaque;
2555     VirtIONet *n = q->n;
2556     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2557     /* This happens when device was stopped but BH wasn't. */
2558     if (!vdev->vm_running) {
2559         /* Make sure tx waiting is set, so we'll run when restarted. */
2560         assert(q->tx_waiting);
2561         return;
2562     }
2563 
2564     q->tx_waiting = 0;
2565 
2566     /* Just in case the driver is not ready on more */
2567     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2568         return;
2569     }
2570 
2571     virtio_queue_set_notification(q->tx_vq, 1);
2572     virtio_net_flush_tx(q);
2573 }
2574 
2575 static void virtio_net_tx_bh(void *opaque)
2576 {
2577     VirtIONetQueue *q = opaque;
2578     VirtIONet *n = q->n;
2579     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2580     int32_t ret;
2581 
2582     /* This happens when device was stopped but BH wasn't. */
2583     if (!vdev->vm_running) {
2584         /* Make sure tx waiting is set, so we'll run when restarted. */
2585         assert(q->tx_waiting);
2586         return;
2587     }
2588 
2589     q->tx_waiting = 0;
2590 
2591     /* Just in case the driver is not ready on more */
2592     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2593         return;
2594     }
2595 
2596     ret = virtio_net_flush_tx(q);
2597     if (ret == -EBUSY || ret == -EINVAL) {
2598         return; /* Notification re-enable handled by tx_complete or device
2599                  * broken */
2600     }
2601 
2602     /* If we flush a full burst of packets, assume there are
2603      * more coming and immediately reschedule */
2604     if (ret >= n->tx_burst) {
2605         qemu_bh_schedule(q->tx_bh);
2606         q->tx_waiting = 1;
2607         return;
2608     }
2609 
2610     /* If less than a full burst, re-enable notification and flush
2611      * anything that may have come in while we weren't looking.  If
2612      * we find something, assume the guest is still active and reschedule */
2613     virtio_queue_set_notification(q->tx_vq, 1);
2614     ret = virtio_net_flush_tx(q);
2615     if (ret == -EINVAL) {
2616         return;
2617     } else if (ret > 0) {
2618         virtio_queue_set_notification(q->tx_vq, 0);
2619         qemu_bh_schedule(q->tx_bh);
2620         q->tx_waiting = 1;
2621     }
2622 }
2623 
2624 static void virtio_net_add_queue(VirtIONet *n, int index)
2625 {
2626     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2627 
2628     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2629                                            virtio_net_handle_rx);
2630 
2631     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2632         n->vqs[index].tx_vq =
2633             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2634                              virtio_net_handle_tx_timer);
2635         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2636                                               virtio_net_tx_timer,
2637                                               &n->vqs[index]);
2638     } else {
2639         n->vqs[index].tx_vq =
2640             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2641                              virtio_net_handle_tx_bh);
2642         n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2643     }
2644 
2645     n->vqs[index].tx_waiting = 0;
2646     n->vqs[index].n = n;
2647 }
2648 
2649 static void virtio_net_del_queue(VirtIONet *n, int index)
2650 {
2651     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2652     VirtIONetQueue *q = &n->vqs[index];
2653     NetClientState *nc = qemu_get_subqueue(n->nic, index);
2654 
2655     qemu_purge_queued_packets(nc);
2656 
2657     virtio_del_queue(vdev, index * 2);
2658     if (q->tx_timer) {
2659         timer_del(q->tx_timer);
2660         timer_free(q->tx_timer);
2661         q->tx_timer = NULL;
2662     } else {
2663         qemu_bh_delete(q->tx_bh);
2664         q->tx_bh = NULL;
2665     }
2666     q->tx_waiting = 0;
2667     virtio_del_queue(vdev, index * 2 + 1);
2668 }
2669 
2670 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2671 {
2672     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2673     int old_num_queues = virtio_get_num_queues(vdev);
2674     int new_num_queues = new_max_queues * 2 + 1;
2675     int i;
2676 
2677     assert(old_num_queues >= 3);
2678     assert(old_num_queues % 2 == 1);
2679 
2680     if (old_num_queues == new_num_queues) {
2681         return;
2682     }
2683 
2684     /*
2685      * We always need to remove and add ctrl vq if
2686      * old_num_queues != new_num_queues. Remove ctrl_vq first,
2687      * and then we only enter one of the following two loops.
2688      */
2689     virtio_del_queue(vdev, old_num_queues - 1);
2690 
2691     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2692         /* new_num_queues < old_num_queues */
2693         virtio_net_del_queue(n, i / 2);
2694     }
2695 
2696     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2697         /* new_num_queues > old_num_queues */
2698         virtio_net_add_queue(n, i / 2);
2699     }
2700 
2701     /* add ctrl_vq last */
2702     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2703 }
2704 
2705 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2706 {
2707     int max = multiqueue ? n->max_queues : 1;
2708 
2709     n->multiqueue = multiqueue;
2710     virtio_net_change_num_queues(n, max);
2711 
2712     virtio_net_set_queues(n);
2713 }
2714 
2715 static int virtio_net_post_load_device(void *opaque, int version_id)
2716 {
2717     VirtIONet *n = opaque;
2718     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2719     int i, link_down;
2720 
2721     trace_virtio_net_post_load_device();
2722     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2723                                virtio_vdev_has_feature(vdev,
2724                                                        VIRTIO_F_VERSION_1),
2725                                virtio_vdev_has_feature(vdev,
2726                                                        VIRTIO_NET_F_HASH_REPORT));
2727 
2728     /* MAC_TABLE_ENTRIES may be different from the saved image */
2729     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2730         n->mac_table.in_use = 0;
2731     }
2732 
2733     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2734         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2735     }
2736 
2737     /*
2738      * curr_guest_offloads will be later overwritten by the
2739      * virtio_set_features_nocheck call done from the virtio_load.
2740      * Here we make sure it is preserved and restored accordingly
2741      * in the virtio_net_post_load_virtio callback.
2742      */
2743     n->saved_guest_offloads = n->curr_guest_offloads;
2744 
2745     virtio_net_set_queues(n);
2746 
2747     /* Find the first multicast entry in the saved MAC filter */
2748     for (i = 0; i < n->mac_table.in_use; i++) {
2749         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2750             break;
2751         }
2752     }
2753     n->mac_table.first_multi = i;
2754 
2755     /* nc.link_down can't be migrated, so infer link_down according
2756      * to link status bit in n->status */
2757     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2758     for (i = 0; i < n->max_queues; i++) {
2759         qemu_get_subqueue(n->nic, i)->link_down = link_down;
2760     }
2761 
2762     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2763         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2764         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2765                                   QEMU_CLOCK_VIRTUAL,
2766                                   virtio_net_announce_timer, n);
2767         if (n->announce_timer.round) {
2768             timer_mod(n->announce_timer.tm,
2769                       qemu_clock_get_ms(n->announce_timer.type));
2770         } else {
2771             qemu_announce_timer_del(&n->announce_timer, false);
2772         }
2773     }
2774 
2775     if (n->rss_data.enabled) {
2776         trace_virtio_net_rss_enable(n->rss_data.hash_types,
2777                                     n->rss_data.indirections_len,
2778                                     sizeof(n->rss_data.key));
2779     } else {
2780         trace_virtio_net_rss_disable();
2781     }
2782     return 0;
2783 }
2784 
2785 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2786 {
2787     VirtIONet *n = VIRTIO_NET(vdev);
2788     /*
2789      * The actual needed state is now in saved_guest_offloads,
2790      * see virtio_net_post_load_device for detail.
2791      * Restore it back and apply the desired offloads.
2792      */
2793     n->curr_guest_offloads = n->saved_guest_offloads;
2794     if (peer_has_vnet_hdr(n)) {
2795         virtio_net_apply_guest_offloads(n);
2796     }
2797 
2798     return 0;
2799 }
2800 
2801 /* tx_waiting field of a VirtIONetQueue */
2802 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2803     .name = "virtio-net-queue-tx_waiting",
2804     .fields = (VMStateField[]) {
2805         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2806         VMSTATE_END_OF_LIST()
2807    },
2808 };
2809 
2810 static bool max_queues_gt_1(void *opaque, int version_id)
2811 {
2812     return VIRTIO_NET(opaque)->max_queues > 1;
2813 }
2814 
2815 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2816 {
2817     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2818                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2819 }
2820 
2821 static bool mac_table_fits(void *opaque, int version_id)
2822 {
2823     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2824 }
2825 
2826 static bool mac_table_doesnt_fit(void *opaque, int version_id)
2827 {
2828     return !mac_table_fits(opaque, version_id);
2829 }
2830 
2831 /* This temporary type is shared by all the WITH_TMP methods
2832  * although only some fields are used by each.
2833  */
2834 struct VirtIONetMigTmp {
2835     VirtIONet      *parent;
2836     VirtIONetQueue *vqs_1;
2837     uint16_t        curr_queues_1;
2838     uint8_t         has_ufo;
2839     uint32_t        has_vnet_hdr;
2840 };
2841 
2842 /* The 2nd and subsequent tx_waiting flags are loaded later than
2843  * the 1st entry in the queues and only if there's more than one
2844  * entry.  We use the tmp mechanism to calculate a temporary
2845  * pointer and count and also validate the count.
2846  */
2847 
2848 static int virtio_net_tx_waiting_pre_save(void *opaque)
2849 {
2850     struct VirtIONetMigTmp *tmp = opaque;
2851 
2852     tmp->vqs_1 = tmp->parent->vqs + 1;
2853     tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2854     if (tmp->parent->curr_queues == 0) {
2855         tmp->curr_queues_1 = 0;
2856     }
2857 
2858     return 0;
2859 }
2860 
2861 static int virtio_net_tx_waiting_pre_load(void *opaque)
2862 {
2863     struct VirtIONetMigTmp *tmp = opaque;
2864 
2865     /* Reuse the pointer setup from save */
2866     virtio_net_tx_waiting_pre_save(opaque);
2867 
2868     if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2869         error_report("virtio-net: curr_queues %x > max_queues %x",
2870             tmp->parent->curr_queues, tmp->parent->max_queues);
2871 
2872         return -EINVAL;
2873     }
2874 
2875     return 0; /* all good */
2876 }
2877 
2878 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2879     .name      = "virtio-net-tx_waiting",
2880     .pre_load  = virtio_net_tx_waiting_pre_load,
2881     .pre_save  = virtio_net_tx_waiting_pre_save,
2882     .fields    = (VMStateField[]) {
2883         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2884                                      curr_queues_1,
2885                                      vmstate_virtio_net_queue_tx_waiting,
2886                                      struct VirtIONetQueue),
2887         VMSTATE_END_OF_LIST()
2888     },
2889 };
2890 
2891 /* the 'has_ufo' flag is just tested; if the incoming stream has the
2892  * flag set we need to check that we have it
2893  */
2894 static int virtio_net_ufo_post_load(void *opaque, int version_id)
2895 {
2896     struct VirtIONetMigTmp *tmp = opaque;
2897 
2898     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
2899         error_report("virtio-net: saved image requires TUN_F_UFO support");
2900         return -EINVAL;
2901     }
2902 
2903     return 0;
2904 }
2905 
2906 static int virtio_net_ufo_pre_save(void *opaque)
2907 {
2908     struct VirtIONetMigTmp *tmp = opaque;
2909 
2910     tmp->has_ufo = tmp->parent->has_ufo;
2911 
2912     return 0;
2913 }
2914 
2915 static const VMStateDescription vmstate_virtio_net_has_ufo = {
2916     .name      = "virtio-net-ufo",
2917     .post_load = virtio_net_ufo_post_load,
2918     .pre_save  = virtio_net_ufo_pre_save,
2919     .fields    = (VMStateField[]) {
2920         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
2921         VMSTATE_END_OF_LIST()
2922     },
2923 };
2924 
2925 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2926  * flag set we need to check that we have it
2927  */
2928 static int virtio_net_vnet_post_load(void *opaque, int version_id)
2929 {
2930     struct VirtIONetMigTmp *tmp = opaque;
2931 
2932     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
2933         error_report("virtio-net: saved image requires vnet_hdr=on");
2934         return -EINVAL;
2935     }
2936 
2937     return 0;
2938 }
2939 
2940 static int virtio_net_vnet_pre_save(void *opaque)
2941 {
2942     struct VirtIONetMigTmp *tmp = opaque;
2943 
2944     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
2945 
2946     return 0;
2947 }
2948 
2949 static const VMStateDescription vmstate_virtio_net_has_vnet = {
2950     .name      = "virtio-net-vnet",
2951     .post_load = virtio_net_vnet_post_load,
2952     .pre_save  = virtio_net_vnet_pre_save,
2953     .fields    = (VMStateField[]) {
2954         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
2955         VMSTATE_END_OF_LIST()
2956     },
2957 };
2958 
2959 static bool virtio_net_rss_needed(void *opaque)
2960 {
2961     return VIRTIO_NET(opaque)->rss_data.enabled;
2962 }
2963 
2964 static const VMStateDescription vmstate_virtio_net_rss = {
2965     .name      = "virtio-net-device/rss",
2966     .version_id = 1,
2967     .minimum_version_id = 1,
2968     .needed = virtio_net_rss_needed,
2969     .fields = (VMStateField[]) {
2970         VMSTATE_BOOL(rss_data.enabled, VirtIONet),
2971         VMSTATE_BOOL(rss_data.redirect, VirtIONet),
2972         VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
2973         VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
2974         VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
2975         VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
2976         VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
2977                             VIRTIO_NET_RSS_MAX_KEY_SIZE),
2978         VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
2979                                     rss_data.indirections_len, 0,
2980                                     vmstate_info_uint16, uint16_t),
2981         VMSTATE_END_OF_LIST()
2982     },
2983 };
2984 
2985 static const VMStateDescription vmstate_virtio_net_device = {
2986     .name = "virtio-net-device",
2987     .version_id = VIRTIO_NET_VM_VERSION,
2988     .minimum_version_id = VIRTIO_NET_VM_VERSION,
2989     .post_load = virtio_net_post_load_device,
2990     .fields = (VMStateField[]) {
2991         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
2992         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
2993                                vmstate_virtio_net_queue_tx_waiting,
2994                                VirtIONetQueue),
2995         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
2996         VMSTATE_UINT16(status, VirtIONet),
2997         VMSTATE_UINT8(promisc, VirtIONet),
2998         VMSTATE_UINT8(allmulti, VirtIONet),
2999         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3000 
3001         /* Guarded pair: If it fits we load it, else we throw it away
3002          * - can happen if source has a larger MAC table.; post-load
3003          *  sets flags in this case.
3004          */
3005         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3006                                 0, mac_table_fits, mac_table.in_use,
3007                                  ETH_ALEN),
3008         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3009                                      mac_table.in_use, ETH_ALEN),
3010 
3011         /* Note: This is an array of uint32's that's always been saved as a
3012          * buffer; hold onto your endiannesses; it's actually used as a bitmap
3013          * but based on the uint.
3014          */
3015         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3016         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3017                          vmstate_virtio_net_has_vnet),
3018         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3019         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3020         VMSTATE_UINT8(alluni, VirtIONet),
3021         VMSTATE_UINT8(nomulti, VirtIONet),
3022         VMSTATE_UINT8(nouni, VirtIONet),
3023         VMSTATE_UINT8(nobcast, VirtIONet),
3024         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3025                          vmstate_virtio_net_has_ufo),
3026         VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
3027                             vmstate_info_uint16_equal, uint16_t),
3028         VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
3029         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3030                          vmstate_virtio_net_tx_waiting),
3031         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3032                             has_ctrl_guest_offloads),
3033         VMSTATE_END_OF_LIST()
3034    },
3035     .subsections = (const VMStateDescription * []) {
3036         &vmstate_virtio_net_rss,
3037         NULL
3038     }
3039 };
3040 
3041 static NetClientInfo net_virtio_info = {
3042     .type = NET_CLIENT_DRIVER_NIC,
3043     .size = sizeof(NICState),
3044     .can_receive = virtio_net_can_receive,
3045     .receive = virtio_net_receive,
3046     .link_status_changed = virtio_net_set_link_status,
3047     .query_rx_filter = virtio_net_query_rxfilter,
3048     .announce = virtio_net_announce,
3049 };
3050 
3051 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3052 {
3053     VirtIONet *n = VIRTIO_NET(vdev);
3054     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3055     assert(n->vhost_started);
3056     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3057 }
3058 
3059 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3060                                            bool mask)
3061 {
3062     VirtIONet *n = VIRTIO_NET(vdev);
3063     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3064     assert(n->vhost_started);
3065     vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3066                              vdev, idx, mask);
3067 }
3068 
3069 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3070 {
3071     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3072 
3073     n->config_size = virtio_feature_get_config_size(feature_sizes,
3074                                                     host_features);
3075 }
3076 
3077 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3078                                    const char *type)
3079 {
3080     /*
3081      * The name can be NULL, the netclient name will be type.x.
3082      */
3083     assert(type != NULL);
3084 
3085     g_free(n->netclient_name);
3086     g_free(n->netclient_type);
3087     n->netclient_name = g_strdup(name);
3088     n->netclient_type = g_strdup(type);
3089 }
3090 
3091 static bool failover_unplug_primary(VirtIONet *n)
3092 {
3093     HotplugHandler *hotplug_ctrl;
3094     PCIDevice *pci_dev;
3095     Error *err = NULL;
3096 
3097     hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
3098     if (hotplug_ctrl) {
3099         pci_dev = PCI_DEVICE(n->primary_dev);
3100         pci_dev->partially_hotplugged = true;
3101         hotplug_handler_unplug_request(hotplug_ctrl, n->primary_dev, &err);
3102         if (err) {
3103             error_report_err(err);
3104             return false;
3105         }
3106     } else {
3107         return false;
3108     }
3109     return true;
3110 }
3111 
3112 static bool failover_replug_primary(VirtIONet *n, Error **errp)
3113 {
3114     Error *err = NULL;
3115     HotplugHandler *hotplug_ctrl;
3116     PCIDevice *pdev = PCI_DEVICE(n->primary_dev);
3117     BusState *primary_bus;
3118 
3119     if (!pdev->partially_hotplugged) {
3120         return true;
3121     }
3122     primary_bus = n->primary_dev->parent_bus;
3123     if (!primary_bus) {
3124         error_setg(errp, "virtio_net: couldn't find primary bus");
3125         return false;
3126     }
3127     qdev_set_parent_bus(n->primary_dev, primary_bus, &error_abort);
3128     qatomic_set(&n->failover_primary_hidden, false);
3129     hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
3130     if (hotplug_ctrl) {
3131         hotplug_handler_pre_plug(hotplug_ctrl, n->primary_dev, &err);
3132         if (err) {
3133             goto out;
3134         }
3135         hotplug_handler_plug(hotplug_ctrl, n->primary_dev, &err);
3136     }
3137 
3138 out:
3139     error_propagate(errp, err);
3140     return !err;
3141 }
3142 
3143 static void virtio_net_handle_migration_primary(VirtIONet *n,
3144                                                 MigrationState *s)
3145 {
3146     bool should_be_hidden;
3147     Error *err = NULL;
3148 
3149     should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3150 
3151     if (!n->primary_dev) {
3152         n->primary_dev = failover_find_primary_device(n);
3153         if (!n->primary_dev) {
3154             return;
3155         }
3156     }
3157 
3158     if (migration_in_setup(s) && !should_be_hidden) {
3159         if (failover_unplug_primary(n)) {
3160             vmstate_unregister(VMSTATE_IF(n->primary_dev),
3161                                qdev_get_vmsd(n->primary_dev),
3162                                n->primary_dev);
3163             qapi_event_send_unplug_primary(n->primary_device_id);
3164             qatomic_set(&n->failover_primary_hidden, true);
3165         } else {
3166             warn_report("couldn't unplug primary device");
3167         }
3168     } else if (migration_has_failed(s)) {
3169         /* We already unplugged the device let's plug it back */
3170         if (!failover_replug_primary(n, &err)) {
3171             if (err) {
3172                 error_report_err(err);
3173             }
3174         }
3175     }
3176 }
3177 
3178 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3179 {
3180     MigrationState *s = data;
3181     VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3182     virtio_net_handle_migration_primary(n, s);
3183 }
3184 
3185 static bool failover_hide_primary_device(DeviceListener *listener,
3186                                          QemuOpts *device_opts)
3187 {
3188     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3189     bool hide;
3190     const char *standby_id;
3191 
3192     if (!device_opts) {
3193         return false;
3194     }
3195     standby_id = qemu_opt_get(device_opts, "failover_pair_id");
3196     if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3197         return false;
3198     }
3199 
3200     /* failover_primary_hidden is set during feature negotiation */
3201     hide = qatomic_read(&n->failover_primary_hidden);
3202     g_free(n->primary_device_id);
3203     n->primary_device_id = g_strdup(device_opts->id);
3204     return hide;
3205 }
3206 
3207 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3208 {
3209     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3210     VirtIONet *n = VIRTIO_NET(dev);
3211     NetClientState *nc;
3212     int i;
3213 
3214     if (n->net_conf.mtu) {
3215         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3216     }
3217 
3218     if (n->net_conf.duplex_str) {
3219         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3220             n->net_conf.duplex = DUPLEX_HALF;
3221         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3222             n->net_conf.duplex = DUPLEX_FULL;
3223         } else {
3224             error_setg(errp, "'duplex' must be 'half' or 'full'");
3225             return;
3226         }
3227         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3228     } else {
3229         n->net_conf.duplex = DUPLEX_UNKNOWN;
3230     }
3231 
3232     if (n->net_conf.speed < SPEED_UNKNOWN) {
3233         error_setg(errp, "'speed' must be between 0 and INT_MAX");
3234         return;
3235     }
3236     if (n->net_conf.speed >= 0) {
3237         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3238     }
3239 
3240     if (n->failover) {
3241         n->primary_listener.hide_device = failover_hide_primary_device;
3242         qatomic_set(&n->failover_primary_hidden, true);
3243         device_listener_register(&n->primary_listener);
3244         n->migration_state.notify = virtio_net_migration_state_notifier;
3245         add_migration_state_change_notifier(&n->migration_state);
3246         n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3247     }
3248 
3249     virtio_net_set_config_size(n, n->host_features);
3250     virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
3251 
3252     /*
3253      * We set a lower limit on RX queue size to what it always was.
3254      * Guests that want a smaller ring can always resize it without
3255      * help from us (using virtio 1 and up).
3256      */
3257     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3258         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3259         !is_power_of_2(n->net_conf.rx_queue_size)) {
3260         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3261                    "must be a power of 2 between %d and %d.",
3262                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3263                    VIRTQUEUE_MAX_SIZE);
3264         virtio_cleanup(vdev);
3265         return;
3266     }
3267 
3268     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3269         n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3270         !is_power_of_2(n->net_conf.tx_queue_size)) {
3271         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3272                    "must be a power of 2 between %d and %d",
3273                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3274                    VIRTQUEUE_MAX_SIZE);
3275         virtio_cleanup(vdev);
3276         return;
3277     }
3278 
3279     n->max_queues = MAX(n->nic_conf.peers.queues, 1);
3280     if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
3281         error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
3282                    "must be a positive integer less than %d.",
3283                    n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
3284         virtio_cleanup(vdev);
3285         return;
3286     }
3287     n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
3288     n->curr_queues = 1;
3289     n->tx_timeout = n->net_conf.txtimer;
3290 
3291     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3292                        && strcmp(n->net_conf.tx, "bh")) {
3293         warn_report("virtio-net: "
3294                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3295                     n->net_conf.tx);
3296         error_printf("Defaulting to \"bh\"");
3297     }
3298 
3299     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3300                                     n->net_conf.tx_queue_size);
3301 
3302     for (i = 0; i < n->max_queues; i++) {
3303         virtio_net_add_queue(n, i);
3304     }
3305 
3306     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3307     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3308     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3309     n->status = VIRTIO_NET_S_LINK_UP;
3310     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3311                               QEMU_CLOCK_VIRTUAL,
3312                               virtio_net_announce_timer, n);
3313     n->announce_timer.round = 0;
3314 
3315     if (n->netclient_type) {
3316         /*
3317          * Happen when virtio_net_set_netclient_name has been called.
3318          */
3319         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3320                               n->netclient_type, n->netclient_name, n);
3321     } else {
3322         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3323                               object_get_typename(OBJECT(dev)), dev->id, n);
3324     }
3325 
3326     peer_test_vnet_hdr(n);
3327     if (peer_has_vnet_hdr(n)) {
3328         for (i = 0; i < n->max_queues; i++) {
3329             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3330         }
3331         n->host_hdr_len = sizeof(struct virtio_net_hdr);
3332     } else {
3333         n->host_hdr_len = 0;
3334     }
3335 
3336     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3337 
3338     n->vqs[0].tx_waiting = 0;
3339     n->tx_burst = n->net_conf.txburst;
3340     virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3341     n->promisc = 1; /* for compatibility */
3342 
3343     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3344 
3345     n->vlans = g_malloc0(MAX_VLAN >> 3);
3346 
3347     nc = qemu_get_queue(n->nic);
3348     nc->rxfilter_notify_enabled = 1;
3349 
3350    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3351         struct virtio_net_config netcfg = {};
3352         memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3353         vhost_net_set_config(get_vhost_net(nc->peer),
3354             (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
3355     }
3356     QTAILQ_INIT(&n->rsc_chains);
3357     n->qdev = dev;
3358 
3359     net_rx_pkt_init(&n->rx_pkt, false);
3360 }
3361 
3362 static void virtio_net_device_unrealize(DeviceState *dev)
3363 {
3364     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3365     VirtIONet *n = VIRTIO_NET(dev);
3366     int i, max_queues;
3367 
3368     /* This will stop vhost backend if appropriate. */
3369     virtio_net_set_status(vdev, 0);
3370 
3371     g_free(n->netclient_name);
3372     n->netclient_name = NULL;
3373     g_free(n->netclient_type);
3374     n->netclient_type = NULL;
3375 
3376     g_free(n->mac_table.macs);
3377     g_free(n->vlans);
3378 
3379     if (n->failover) {
3380         device_listener_unregister(&n->primary_listener);
3381         g_free(n->primary_device_id);
3382     }
3383 
3384     max_queues = n->multiqueue ? n->max_queues : 1;
3385     for (i = 0; i < max_queues; i++) {
3386         virtio_net_del_queue(n, i);
3387     }
3388     /* delete also control vq */
3389     virtio_del_queue(vdev, max_queues * 2);
3390     qemu_announce_timer_del(&n->announce_timer, false);
3391     g_free(n->vqs);
3392     qemu_del_nic(n->nic);
3393     virtio_net_rsc_cleanup(n);
3394     g_free(n->rss_data.indirections_table);
3395     net_rx_pkt_uninit(n->rx_pkt);
3396     virtio_cleanup(vdev);
3397 }
3398 
3399 static void virtio_net_instance_init(Object *obj)
3400 {
3401     VirtIONet *n = VIRTIO_NET(obj);
3402 
3403     /*
3404      * The default config_size is sizeof(struct virtio_net_config).
3405      * Can be overriden with virtio_net_set_config_size.
3406      */
3407     n->config_size = sizeof(struct virtio_net_config);
3408     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3409                                   "bootindex", "/ethernet-phy@0",
3410                                   DEVICE(n));
3411 }
3412 
3413 static int virtio_net_pre_save(void *opaque)
3414 {
3415     VirtIONet *n = opaque;
3416 
3417     /* At this point, backend must be stopped, otherwise
3418      * it might keep writing to memory. */
3419     assert(!n->vhost_started);
3420 
3421     return 0;
3422 }
3423 
3424 static bool primary_unplug_pending(void *opaque)
3425 {
3426     DeviceState *dev = opaque;
3427     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3428     VirtIONet *n = VIRTIO_NET(vdev);
3429 
3430     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3431         return false;
3432     }
3433     return n->primary_dev ? n->primary_dev->pending_deleted_event : false;
3434 }
3435 
3436 static bool dev_unplug_pending(void *opaque)
3437 {
3438     DeviceState *dev = opaque;
3439     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3440 
3441     return vdc->primary_unplug_pending(dev);
3442 }
3443 
3444 static const VMStateDescription vmstate_virtio_net = {
3445     .name = "virtio-net",
3446     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3447     .version_id = VIRTIO_NET_VM_VERSION,
3448     .fields = (VMStateField[]) {
3449         VMSTATE_VIRTIO_DEVICE,
3450         VMSTATE_END_OF_LIST()
3451     },
3452     .pre_save = virtio_net_pre_save,
3453     .dev_unplug_pending = dev_unplug_pending,
3454 };
3455 
3456 static Property virtio_net_properties[] = {
3457     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3458                     VIRTIO_NET_F_CSUM, true),
3459     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3460                     VIRTIO_NET_F_GUEST_CSUM, true),
3461     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3462     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3463                     VIRTIO_NET_F_GUEST_TSO4, true),
3464     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3465                     VIRTIO_NET_F_GUEST_TSO6, true),
3466     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3467                     VIRTIO_NET_F_GUEST_ECN, true),
3468     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3469                     VIRTIO_NET_F_GUEST_UFO, true),
3470     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3471                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3472     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3473                     VIRTIO_NET_F_HOST_TSO4, true),
3474     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3475                     VIRTIO_NET_F_HOST_TSO6, true),
3476     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3477                     VIRTIO_NET_F_HOST_ECN, true),
3478     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3479                     VIRTIO_NET_F_HOST_UFO, true),
3480     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3481                     VIRTIO_NET_F_MRG_RXBUF, true),
3482     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3483                     VIRTIO_NET_F_STATUS, true),
3484     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3485                     VIRTIO_NET_F_CTRL_VQ, true),
3486     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3487                     VIRTIO_NET_F_CTRL_RX, true),
3488     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3489                     VIRTIO_NET_F_CTRL_VLAN, true),
3490     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3491                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3492     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3493                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3494     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3495                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3496     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3497     DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3498                     VIRTIO_NET_F_RSS, false),
3499     DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3500                     VIRTIO_NET_F_HASH_REPORT, false),
3501     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3502                     VIRTIO_NET_F_RSC_EXT, false),
3503     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3504                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3505     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3506     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3507                        TX_TIMER_INTERVAL),
3508     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3509     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3510     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3511                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3512     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3513                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3514     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3515     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3516                      true),
3517     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3518     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3519     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3520     DEFINE_PROP_END_OF_LIST(),
3521 };
3522 
3523 static void virtio_net_class_init(ObjectClass *klass, void *data)
3524 {
3525     DeviceClass *dc = DEVICE_CLASS(klass);
3526     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3527 
3528     device_class_set_props(dc, virtio_net_properties);
3529     dc->vmsd = &vmstate_virtio_net;
3530     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3531     vdc->realize = virtio_net_device_realize;
3532     vdc->unrealize = virtio_net_device_unrealize;
3533     vdc->get_config = virtio_net_get_config;
3534     vdc->set_config = virtio_net_set_config;
3535     vdc->get_features = virtio_net_get_features;
3536     vdc->set_features = virtio_net_set_features;
3537     vdc->bad_features = virtio_net_bad_features;
3538     vdc->reset = virtio_net_reset;
3539     vdc->set_status = virtio_net_set_status;
3540     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3541     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3542     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3543     vdc->post_load = virtio_net_post_load_virtio;
3544     vdc->vmsd = &vmstate_virtio_net_device;
3545     vdc->primary_unplug_pending = primary_unplug_pending;
3546 }
3547 
3548 static const TypeInfo virtio_net_info = {
3549     .name = TYPE_VIRTIO_NET,
3550     .parent = TYPE_VIRTIO_DEVICE,
3551     .instance_size = sizeof(VirtIONet),
3552     .instance_init = virtio_net_instance_init,
3553     .class_init = virtio_net_class_init,
3554 };
3555 
3556 static void virtio_register_types(void)
3557 {
3558     type_register_static(&virtio_net_info);
3559 }
3560 
3561 type_init(virtio_register_types)
3562