xref: /openbmc/qemu/hw/net/virtio-net.c (revision b15e402f)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/main-loop.h"
18 #include "qemu/module.h"
19 #include "hw/virtio/virtio.h"
20 #include "net/net.h"
21 #include "net/checksum.h"
22 #include "net/tap.h"
23 #include "qemu/error-report.h"
24 #include "qemu/timer.h"
25 #include "qemu/option.h"
26 #include "qemu/option_int.h"
27 #include "qemu/config-file.h"
28 #include "qapi/qmp/qdict.h"
29 #include "hw/virtio/virtio-net.h"
30 #include "net/vhost_net.h"
31 #include "net/announce.h"
32 #include "hw/virtio/virtio-bus.h"
33 #include "qapi/error.h"
34 #include "qapi/qapi-events-net.h"
35 #include "hw/qdev-properties.h"
36 #include "qapi/qapi-types-migration.h"
37 #include "qapi/qapi-events-migration.h"
38 #include "hw/virtio/virtio-access.h"
39 #include "migration/misc.h"
40 #include "standard-headers/linux/ethtool.h"
41 #include "sysemu/sysemu.h"
42 #include "trace.h"
43 #include "monitor/qdev.h"
44 #include "hw/pci/pci.h"
45 #include "net_rx_pkt.h"
46 #include "hw/virtio/vhost.h"
47 
48 #define VIRTIO_NET_VM_VERSION    11
49 
50 #define MAC_TABLE_ENTRIES    64
51 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
52 
53 /* previously fixed value */
54 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
55 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
56 
57 /* for now, only allow larger queues; with virtio-1, guest can downsize */
58 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
59 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
60 
61 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
62 
63 #define VIRTIO_NET_TCP_FLAG         0x3F
64 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
65 
66 /* IPv4 max payload, 16 bits in the header */
67 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
68 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
69 
70 /* header length value in ip header without option */
71 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
72 
73 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
74 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
75 
76 /* Purge coalesced packets timer interval, This value affects the performance
77    a lot, and should be tuned carefully, '300000'(300us) is the recommended
78    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
79    tso/gso/gro 'off'. */
80 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
81 
82 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
83                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
84                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
85                                          VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
86                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
87                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
88                                          VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
89                                          VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
90                                          VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
91 
92 static VirtIOFeature feature_sizes[] = {
93     {.flags = 1ULL << VIRTIO_NET_F_MAC,
94      .end = endof(struct virtio_net_config, mac)},
95     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
96      .end = endof(struct virtio_net_config, status)},
97     {.flags = 1ULL << VIRTIO_NET_F_MQ,
98      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
99     {.flags = 1ULL << VIRTIO_NET_F_MTU,
100      .end = endof(struct virtio_net_config, mtu)},
101     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
102      .end = endof(struct virtio_net_config, duplex)},
103     {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
104      .end = endof(struct virtio_net_config, supported_hash_types)},
105     {}
106 };
107 
108 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
109 {
110     VirtIONet *n = qemu_get_nic_opaque(nc);
111 
112     return &n->vqs[nc->queue_index];
113 }
114 
115 static int vq2q(int queue_index)
116 {
117     return queue_index / 2;
118 }
119 
120 /* TODO
121  * - we could suppress RX interrupt if we were so inclined.
122  */
123 
124 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
125 {
126     VirtIONet *n = VIRTIO_NET(vdev);
127     struct virtio_net_config netcfg;
128     NetClientState *nc = qemu_get_queue(n->nic);
129 
130     int ret = 0;
131     memset(&netcfg, 0 , sizeof(struct virtio_net_config));
132     virtio_stw_p(vdev, &netcfg.status, n->status);
133     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
134     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
135     memcpy(netcfg.mac, n->mac, ETH_ALEN);
136     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
137     netcfg.duplex = n->net_conf.duplex;
138     netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
139     virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
140                  virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
141                  VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
142     virtio_stl_p(vdev, &netcfg.supported_hash_types,
143                  VIRTIO_NET_RSS_SUPPORTED_HASHES);
144     memcpy(config, &netcfg, n->config_size);
145 
146     /*
147      * Is this VDPA? No peer means not VDPA: there's no way to
148      * disconnect/reconnect a VDPA peer.
149      */
150     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
151         ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
152                                    n->config_size);
153         if (ret != -1) {
154             memcpy(config, &netcfg, n->config_size);
155         }
156     }
157 }
158 
159 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
160 {
161     VirtIONet *n = VIRTIO_NET(vdev);
162     struct virtio_net_config netcfg = {};
163     NetClientState *nc = qemu_get_queue(n->nic);
164 
165     memcpy(&netcfg, config, n->config_size);
166 
167     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
168         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
169         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
170         memcpy(n->mac, netcfg.mac, ETH_ALEN);
171         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
172     }
173 
174     /*
175      * Is this VDPA? No peer means not VDPA: there's no way to
176      * disconnect/reconnect a VDPA peer.
177      */
178     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
179         vhost_net_set_config(get_vhost_net(nc->peer),
180                              (uint8_t *)&netcfg, 0, n->config_size,
181                              VHOST_SET_CONFIG_TYPE_MASTER);
182       }
183 }
184 
185 static bool virtio_net_started(VirtIONet *n, uint8_t status)
186 {
187     VirtIODevice *vdev = VIRTIO_DEVICE(n);
188     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
189         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
190 }
191 
192 static void virtio_net_announce_notify(VirtIONet *net)
193 {
194     VirtIODevice *vdev = VIRTIO_DEVICE(net);
195     trace_virtio_net_announce_notify();
196 
197     net->status |= VIRTIO_NET_S_ANNOUNCE;
198     virtio_notify_config(vdev);
199 }
200 
201 static void virtio_net_announce_timer(void *opaque)
202 {
203     VirtIONet *n = opaque;
204     trace_virtio_net_announce_timer(n->announce_timer.round);
205 
206     n->announce_timer.round--;
207     virtio_net_announce_notify(n);
208 }
209 
210 static void virtio_net_announce(NetClientState *nc)
211 {
212     VirtIONet *n = qemu_get_nic_opaque(nc);
213     VirtIODevice *vdev = VIRTIO_DEVICE(n);
214 
215     /*
216      * Make sure the virtio migration announcement timer isn't running
217      * If it is, let it trigger announcement so that we do not cause
218      * confusion.
219      */
220     if (n->announce_timer.round) {
221         return;
222     }
223 
224     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
225         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
226             virtio_net_announce_notify(n);
227     }
228 }
229 
230 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
231 {
232     VirtIODevice *vdev = VIRTIO_DEVICE(n);
233     NetClientState *nc = qemu_get_queue(n->nic);
234     int queues = n->multiqueue ? n->max_queues : 1;
235 
236     if (!get_vhost_net(nc->peer)) {
237         return;
238     }
239 
240     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
241         !!n->vhost_started) {
242         return;
243     }
244     if (!n->vhost_started) {
245         int r, i;
246 
247         if (n->needs_vnet_hdr_swap) {
248             error_report("backend does not support %s vnet headers; "
249                          "falling back on userspace virtio",
250                          virtio_is_big_endian(vdev) ? "BE" : "LE");
251             return;
252         }
253 
254         /* Any packets outstanding? Purge them to avoid touching rings
255          * when vhost is running.
256          */
257         for (i = 0;  i < queues; i++) {
258             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
259 
260             /* Purge both directions: TX and RX. */
261             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
262             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
263         }
264 
265         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
266             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
267             if (r < 0) {
268                 error_report("%uBytes MTU not supported by the backend",
269                              n->net_conf.mtu);
270 
271                 return;
272             }
273         }
274 
275         n->vhost_started = 1;
276         r = vhost_net_start(vdev, n->nic->ncs, queues);
277         if (r < 0) {
278             error_report("unable to start vhost net: %d: "
279                          "falling back on userspace virtio", -r);
280             n->vhost_started = 0;
281         }
282     } else {
283         vhost_net_stop(vdev, n->nic->ncs, queues);
284         n->vhost_started = 0;
285     }
286 }
287 
288 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
289                                           NetClientState *peer,
290                                           bool enable)
291 {
292     if (virtio_is_big_endian(vdev)) {
293         return qemu_set_vnet_be(peer, enable);
294     } else {
295         return qemu_set_vnet_le(peer, enable);
296     }
297 }
298 
299 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
300                                        int queues, bool enable)
301 {
302     int i;
303 
304     for (i = 0; i < queues; i++) {
305         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
306             enable) {
307             while (--i >= 0) {
308                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
309             }
310 
311             return true;
312         }
313     }
314 
315     return false;
316 }
317 
318 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
319 {
320     VirtIODevice *vdev = VIRTIO_DEVICE(n);
321     int queues = n->multiqueue ? n->max_queues : 1;
322 
323     if (virtio_net_started(n, status)) {
324         /* Before using the device, we tell the network backend about the
325          * endianness to use when parsing vnet headers. If the backend
326          * can't do it, we fallback onto fixing the headers in the core
327          * virtio-net code.
328          */
329         n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
330                                                             queues, true);
331     } else if (virtio_net_started(n, vdev->status)) {
332         /* After using the device, we need to reset the network backend to
333          * the default (guest native endianness), otherwise the guest may
334          * lose network connectivity if it is rebooted into a different
335          * endianness.
336          */
337         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
338     }
339 }
340 
341 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
342 {
343     unsigned int dropped = virtqueue_drop_all(vq);
344     if (dropped) {
345         virtio_notify(vdev, vq);
346     }
347 }
348 
349 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
350 {
351     VirtIONet *n = VIRTIO_NET(vdev);
352     VirtIONetQueue *q;
353     int i;
354     uint8_t queue_status;
355 
356     virtio_net_vnet_endian_status(n, status);
357     virtio_net_vhost_status(n, status);
358 
359     for (i = 0; i < n->max_queues; i++) {
360         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
361         bool queue_started;
362         q = &n->vqs[i];
363 
364         if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
365             queue_status = 0;
366         } else {
367             queue_status = status;
368         }
369         queue_started =
370             virtio_net_started(n, queue_status) && !n->vhost_started;
371 
372         if (queue_started) {
373             qemu_flush_queued_packets(ncs);
374         }
375 
376         if (!q->tx_waiting) {
377             continue;
378         }
379 
380         if (queue_started) {
381             if (q->tx_timer) {
382                 timer_mod(q->tx_timer,
383                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
384             } else {
385                 qemu_bh_schedule(q->tx_bh);
386             }
387         } else {
388             if (q->tx_timer) {
389                 timer_del(q->tx_timer);
390             } else {
391                 qemu_bh_cancel(q->tx_bh);
392             }
393             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
394                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
395                 vdev->vm_running) {
396                 /* if tx is waiting we are likely have some packets in tx queue
397                  * and disabled notification */
398                 q->tx_waiting = 0;
399                 virtio_queue_set_notification(q->tx_vq, 1);
400                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
401             }
402         }
403     }
404 }
405 
406 static void virtio_net_set_link_status(NetClientState *nc)
407 {
408     VirtIONet *n = qemu_get_nic_opaque(nc);
409     VirtIODevice *vdev = VIRTIO_DEVICE(n);
410     uint16_t old_status = n->status;
411 
412     if (nc->link_down)
413         n->status &= ~VIRTIO_NET_S_LINK_UP;
414     else
415         n->status |= VIRTIO_NET_S_LINK_UP;
416 
417     if (n->status != old_status)
418         virtio_notify_config(vdev);
419 
420     virtio_net_set_status(vdev, vdev->status);
421 }
422 
423 static void rxfilter_notify(NetClientState *nc)
424 {
425     VirtIONet *n = qemu_get_nic_opaque(nc);
426 
427     if (nc->rxfilter_notify_enabled) {
428         char *path = object_get_canonical_path(OBJECT(n->qdev));
429         qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
430                                               n->netclient_name, path);
431         g_free(path);
432 
433         /* disable event notification to avoid events flooding */
434         nc->rxfilter_notify_enabled = 0;
435     }
436 }
437 
438 static intList *get_vlan_table(VirtIONet *n)
439 {
440     intList *list, *entry;
441     int i, j;
442 
443     list = NULL;
444     for (i = 0; i < MAX_VLAN >> 5; i++) {
445         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
446             if (n->vlans[i] & (1U << j)) {
447                 entry = g_malloc0(sizeof(*entry));
448                 entry->value = (i << 5) + j;
449                 entry->next = list;
450                 list = entry;
451             }
452         }
453     }
454 
455     return list;
456 }
457 
458 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
459 {
460     VirtIONet *n = qemu_get_nic_opaque(nc);
461     VirtIODevice *vdev = VIRTIO_DEVICE(n);
462     RxFilterInfo *info;
463     strList *str_list, *entry;
464     int i;
465 
466     info = g_malloc0(sizeof(*info));
467     info->name = g_strdup(nc->name);
468     info->promiscuous = n->promisc;
469 
470     if (n->nouni) {
471         info->unicast = RX_STATE_NONE;
472     } else if (n->alluni) {
473         info->unicast = RX_STATE_ALL;
474     } else {
475         info->unicast = RX_STATE_NORMAL;
476     }
477 
478     if (n->nomulti) {
479         info->multicast = RX_STATE_NONE;
480     } else if (n->allmulti) {
481         info->multicast = RX_STATE_ALL;
482     } else {
483         info->multicast = RX_STATE_NORMAL;
484     }
485 
486     info->broadcast_allowed = n->nobcast;
487     info->multicast_overflow = n->mac_table.multi_overflow;
488     info->unicast_overflow = n->mac_table.uni_overflow;
489 
490     info->main_mac = qemu_mac_strdup_printf(n->mac);
491 
492     str_list = NULL;
493     for (i = 0; i < n->mac_table.first_multi; i++) {
494         entry = g_malloc0(sizeof(*entry));
495         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
496         entry->next = str_list;
497         str_list = entry;
498     }
499     info->unicast_table = str_list;
500 
501     str_list = NULL;
502     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
503         entry = g_malloc0(sizeof(*entry));
504         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
505         entry->next = str_list;
506         str_list = entry;
507     }
508     info->multicast_table = str_list;
509     info->vlan_table = get_vlan_table(n);
510 
511     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
512         info->vlan = RX_STATE_ALL;
513     } else if (!info->vlan_table) {
514         info->vlan = RX_STATE_NONE;
515     } else {
516         info->vlan = RX_STATE_NORMAL;
517     }
518 
519     /* enable event notification after query */
520     nc->rxfilter_notify_enabled = 1;
521 
522     return info;
523 }
524 
525 static void virtio_net_reset(VirtIODevice *vdev)
526 {
527     VirtIONet *n = VIRTIO_NET(vdev);
528     int i;
529 
530     /* Reset back to compatibility mode */
531     n->promisc = 1;
532     n->allmulti = 0;
533     n->alluni = 0;
534     n->nomulti = 0;
535     n->nouni = 0;
536     n->nobcast = 0;
537     /* multiqueue is disabled by default */
538     n->curr_queues = 1;
539     timer_del(n->announce_timer.tm);
540     n->announce_timer.round = 0;
541     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
542 
543     /* Flush any MAC and VLAN filter table state */
544     n->mac_table.in_use = 0;
545     n->mac_table.first_multi = 0;
546     n->mac_table.multi_overflow = 0;
547     n->mac_table.uni_overflow = 0;
548     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
549     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
550     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
551     memset(n->vlans, 0, MAX_VLAN >> 3);
552 
553     /* Flush any async TX */
554     for (i = 0;  i < n->max_queues; i++) {
555         NetClientState *nc = qemu_get_subqueue(n->nic, i);
556 
557         if (nc->peer) {
558             qemu_flush_or_purge_queued_packets(nc->peer, true);
559             assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
560         }
561     }
562 }
563 
564 static void peer_test_vnet_hdr(VirtIONet *n)
565 {
566     NetClientState *nc = qemu_get_queue(n->nic);
567     if (!nc->peer) {
568         return;
569     }
570 
571     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
572 }
573 
574 static int peer_has_vnet_hdr(VirtIONet *n)
575 {
576     return n->has_vnet_hdr;
577 }
578 
579 static int peer_has_ufo(VirtIONet *n)
580 {
581     if (!peer_has_vnet_hdr(n))
582         return 0;
583 
584     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
585 
586     return n->has_ufo;
587 }
588 
589 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
590                                        int version_1, int hash_report)
591 {
592     int i;
593     NetClientState *nc;
594 
595     n->mergeable_rx_bufs = mergeable_rx_bufs;
596 
597     if (version_1) {
598         n->guest_hdr_len = hash_report ?
599             sizeof(struct virtio_net_hdr_v1_hash) :
600             sizeof(struct virtio_net_hdr_mrg_rxbuf);
601         n->rss_data.populate_hash = !!hash_report;
602     } else {
603         n->guest_hdr_len = n->mergeable_rx_bufs ?
604             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
605             sizeof(struct virtio_net_hdr);
606     }
607 
608     for (i = 0; i < n->max_queues; i++) {
609         nc = qemu_get_subqueue(n->nic, i);
610 
611         if (peer_has_vnet_hdr(n) &&
612             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
613             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
614             n->host_hdr_len = n->guest_hdr_len;
615         }
616     }
617 }
618 
619 static int virtio_net_max_tx_queue_size(VirtIONet *n)
620 {
621     NetClientState *peer = n->nic_conf.peers.ncs[0];
622 
623     /*
624      * Backends other than vhost-user don't support max queue size.
625      */
626     if (!peer) {
627         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
628     }
629 
630     if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
631         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
632     }
633 
634     return VIRTQUEUE_MAX_SIZE;
635 }
636 
637 static int peer_attach(VirtIONet *n, int index)
638 {
639     NetClientState *nc = qemu_get_subqueue(n->nic, index);
640 
641     if (!nc->peer) {
642         return 0;
643     }
644 
645     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
646         vhost_set_vring_enable(nc->peer, 1);
647     }
648 
649     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
650         return 0;
651     }
652 
653     if (n->max_queues == 1) {
654         return 0;
655     }
656 
657     return tap_enable(nc->peer);
658 }
659 
660 static int peer_detach(VirtIONet *n, int index)
661 {
662     NetClientState *nc = qemu_get_subqueue(n->nic, index);
663 
664     if (!nc->peer) {
665         return 0;
666     }
667 
668     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
669         vhost_set_vring_enable(nc->peer, 0);
670     }
671 
672     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
673         return 0;
674     }
675 
676     return tap_disable(nc->peer);
677 }
678 
679 static void virtio_net_set_queues(VirtIONet *n)
680 {
681     int i;
682     int r;
683 
684     if (n->nic->peer_deleted) {
685         return;
686     }
687 
688     for (i = 0; i < n->max_queues; i++) {
689         if (i < n->curr_queues) {
690             r = peer_attach(n, i);
691             assert(!r);
692         } else {
693             r = peer_detach(n, i);
694             assert(!r);
695         }
696     }
697 }
698 
699 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
700 
701 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
702                                         Error **errp)
703 {
704     VirtIONet *n = VIRTIO_NET(vdev);
705     NetClientState *nc = qemu_get_queue(n->nic);
706 
707     /* Firstly sync all virtio-net possible supported features */
708     features |= n->host_features;
709 
710     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
711 
712     if (!peer_has_vnet_hdr(n)) {
713         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
714         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
715         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
716         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
717 
718         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
719         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
720         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
721         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
722 
723         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
724     }
725 
726     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
727         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
728         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
729     }
730 
731     if (!get_vhost_net(nc->peer)) {
732         return features;
733     }
734 
735     virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
736     virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
737     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
738     vdev->backend_features = features;
739 
740     if (n->mtu_bypass_backend &&
741             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
742         features |= (1ULL << VIRTIO_NET_F_MTU);
743     }
744 
745     return features;
746 }
747 
748 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
749 {
750     uint64_t features = 0;
751 
752     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
753      * but also these: */
754     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
755     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
756     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
757     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
758     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
759 
760     return features;
761 }
762 
763 static void virtio_net_apply_guest_offloads(VirtIONet *n)
764 {
765     qemu_set_offload(qemu_get_queue(n->nic)->peer,
766             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
767             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
768             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
769             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
770             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
771 }
772 
773 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
774 {
775     static const uint64_t guest_offloads_mask =
776         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
777         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
778         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
779         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
780         (1ULL << VIRTIO_NET_F_GUEST_UFO);
781 
782     return guest_offloads_mask & features;
783 }
784 
785 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
786 {
787     VirtIODevice *vdev = VIRTIO_DEVICE(n);
788     return virtio_net_guest_offloads_by_features(vdev->guest_features);
789 }
790 
791 static void failover_add_primary(VirtIONet *n, Error **errp)
792 {
793     Error *err = NULL;
794 
795     if (n->primary_dev) {
796         return;
797     }
798 
799     n->primary_device_opts = qemu_opts_find(qemu_find_opts("device"),
800             n->primary_device_id);
801     if (n->primary_device_opts) {
802         n->primary_dev = qdev_device_add(n->primary_device_opts, &err);
803         if (err) {
804             qemu_opts_del(n->primary_device_opts);
805         }
806         if (n->primary_dev) {
807             n->primary_bus = n->primary_dev->parent_bus;
808             if (err) {
809                 qdev_unplug(n->primary_dev, &err);
810                 qdev_set_id(n->primary_dev, "");
811 
812             }
813         }
814     } else {
815         error_setg(errp, "Primary device not found");
816         error_append_hint(errp, "Virtio-net failover will not work. Make "
817             "sure primary device has parameter"
818             " failover_pair_id=<virtio-net-id>\n");
819 }
820     if (err) {
821         error_propagate(errp, err);
822     }
823 }
824 
825 static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp)
826 {
827     VirtIONet *n = opaque;
828     int ret = 0;
829 
830     const char *standby_id = qemu_opt_get(opts, "failover_pair_id");
831 
832     if (standby_id != NULL && (g_strcmp0(standby_id, n->netclient_name) == 0)) {
833         n->primary_device_id = g_strdup(opts->id);
834         ret = 1;
835     }
836 
837     return ret;
838 }
839 
840 static DeviceState *virtio_net_find_primary(VirtIONet *n, Error **errp)
841 {
842     DeviceState *dev = NULL;
843     Error *err = NULL;
844 
845     if (qemu_opts_foreach(qemu_find_opts("device"),
846                          is_my_primary, n, &err)) {
847         if (err) {
848             error_propagate(errp, err);
849             return NULL;
850         }
851         if (n->primary_device_id) {
852             dev = qdev_find_recursive(sysbus_get_default(),
853                     n->primary_device_id);
854         } else {
855             error_setg(errp, "Primary device id not found");
856             return NULL;
857         }
858     }
859     return dev;
860 }
861 
862 
863 
864 static DeviceState *virtio_connect_failover_devices(VirtIONet *n,
865                                                     DeviceState *dev,
866                                                     Error **errp)
867 {
868     DeviceState *prim_dev = NULL;
869     Error *err = NULL;
870 
871     prim_dev = virtio_net_find_primary(n, &err);
872     if (prim_dev) {
873         n->primary_device_id = g_strdup(prim_dev->id);
874         n->primary_device_opts = prim_dev->opts;
875     } else {
876         if (err) {
877             error_propagate(errp, err);
878         }
879     }
880 
881     return prim_dev;
882 }
883 
884 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
885 {
886     VirtIONet *n = VIRTIO_NET(vdev);
887     Error *err = NULL;
888     int i;
889 
890     if (n->mtu_bypass_backend &&
891             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
892         features &= ~(1ULL << VIRTIO_NET_F_MTU);
893     }
894 
895     virtio_net_set_multiqueue(n,
896                               virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
897                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
898 
899     virtio_net_set_mrg_rx_bufs(n,
900                                virtio_has_feature(features,
901                                                   VIRTIO_NET_F_MRG_RXBUF),
902                                virtio_has_feature(features,
903                                                   VIRTIO_F_VERSION_1),
904                                virtio_has_feature(features,
905                                                   VIRTIO_NET_F_HASH_REPORT));
906 
907     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
908         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
909     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
910         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
911     n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
912 
913     if (n->has_vnet_hdr) {
914         n->curr_guest_offloads =
915             virtio_net_guest_offloads_by_features(features);
916         virtio_net_apply_guest_offloads(n);
917     }
918 
919     for (i = 0;  i < n->max_queues; i++) {
920         NetClientState *nc = qemu_get_subqueue(n->nic, i);
921 
922         if (!get_vhost_net(nc->peer)) {
923             continue;
924         }
925         vhost_net_ack_features(get_vhost_net(nc->peer), features);
926     }
927 
928     if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
929         memset(n->vlans, 0, MAX_VLAN >> 3);
930     } else {
931         memset(n->vlans, 0xff, MAX_VLAN >> 3);
932     }
933 
934     if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
935         qapi_event_send_failover_negotiated(n->netclient_name);
936         atomic_set(&n->primary_should_be_hidden, false);
937         failover_add_primary(n, &err);
938         if (err) {
939             n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err);
940             if (err) {
941                 goto out_err;
942             }
943             failover_add_primary(n, &err);
944             if (err) {
945                 goto out_err;
946             }
947         }
948     }
949     return;
950 
951 out_err:
952     if (err) {
953         warn_report_err(err);
954     }
955 }
956 
957 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
958                                      struct iovec *iov, unsigned int iov_cnt)
959 {
960     uint8_t on;
961     size_t s;
962     NetClientState *nc = qemu_get_queue(n->nic);
963 
964     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
965     if (s != sizeof(on)) {
966         return VIRTIO_NET_ERR;
967     }
968 
969     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
970         n->promisc = on;
971     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
972         n->allmulti = on;
973     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
974         n->alluni = on;
975     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
976         n->nomulti = on;
977     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
978         n->nouni = on;
979     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
980         n->nobcast = on;
981     } else {
982         return VIRTIO_NET_ERR;
983     }
984 
985     rxfilter_notify(nc);
986 
987     return VIRTIO_NET_OK;
988 }
989 
990 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
991                                      struct iovec *iov, unsigned int iov_cnt)
992 {
993     VirtIODevice *vdev = VIRTIO_DEVICE(n);
994     uint64_t offloads;
995     size_t s;
996 
997     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
998         return VIRTIO_NET_ERR;
999     }
1000 
1001     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
1002     if (s != sizeof(offloads)) {
1003         return VIRTIO_NET_ERR;
1004     }
1005 
1006     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1007         uint64_t supported_offloads;
1008 
1009         offloads = virtio_ldq_p(vdev, &offloads);
1010 
1011         if (!n->has_vnet_hdr) {
1012             return VIRTIO_NET_ERR;
1013         }
1014 
1015         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1016             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1017         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1018             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1019         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1020 
1021         supported_offloads = virtio_net_supported_guest_offloads(n);
1022         if (offloads & ~supported_offloads) {
1023             return VIRTIO_NET_ERR;
1024         }
1025 
1026         n->curr_guest_offloads = offloads;
1027         virtio_net_apply_guest_offloads(n);
1028 
1029         return VIRTIO_NET_OK;
1030     } else {
1031         return VIRTIO_NET_ERR;
1032     }
1033 }
1034 
1035 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1036                                  struct iovec *iov, unsigned int iov_cnt)
1037 {
1038     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1039     struct virtio_net_ctrl_mac mac_data;
1040     size_t s;
1041     NetClientState *nc = qemu_get_queue(n->nic);
1042 
1043     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1044         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1045             return VIRTIO_NET_ERR;
1046         }
1047         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1048         assert(s == sizeof(n->mac));
1049         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1050         rxfilter_notify(nc);
1051 
1052         return VIRTIO_NET_OK;
1053     }
1054 
1055     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1056         return VIRTIO_NET_ERR;
1057     }
1058 
1059     int in_use = 0;
1060     int first_multi = 0;
1061     uint8_t uni_overflow = 0;
1062     uint8_t multi_overflow = 0;
1063     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1064 
1065     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1066                    sizeof(mac_data.entries));
1067     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1068     if (s != sizeof(mac_data.entries)) {
1069         goto error;
1070     }
1071     iov_discard_front(&iov, &iov_cnt, s);
1072 
1073     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1074         goto error;
1075     }
1076 
1077     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1078         s = iov_to_buf(iov, iov_cnt, 0, macs,
1079                        mac_data.entries * ETH_ALEN);
1080         if (s != mac_data.entries * ETH_ALEN) {
1081             goto error;
1082         }
1083         in_use += mac_data.entries;
1084     } else {
1085         uni_overflow = 1;
1086     }
1087 
1088     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1089 
1090     first_multi = in_use;
1091 
1092     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1093                    sizeof(mac_data.entries));
1094     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1095     if (s != sizeof(mac_data.entries)) {
1096         goto error;
1097     }
1098 
1099     iov_discard_front(&iov, &iov_cnt, s);
1100 
1101     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1102         goto error;
1103     }
1104 
1105     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1106         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1107                        mac_data.entries * ETH_ALEN);
1108         if (s != mac_data.entries * ETH_ALEN) {
1109             goto error;
1110         }
1111         in_use += mac_data.entries;
1112     } else {
1113         multi_overflow = 1;
1114     }
1115 
1116     n->mac_table.in_use = in_use;
1117     n->mac_table.first_multi = first_multi;
1118     n->mac_table.uni_overflow = uni_overflow;
1119     n->mac_table.multi_overflow = multi_overflow;
1120     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1121     g_free(macs);
1122     rxfilter_notify(nc);
1123 
1124     return VIRTIO_NET_OK;
1125 
1126 error:
1127     g_free(macs);
1128     return VIRTIO_NET_ERR;
1129 }
1130 
1131 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1132                                         struct iovec *iov, unsigned int iov_cnt)
1133 {
1134     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1135     uint16_t vid;
1136     size_t s;
1137     NetClientState *nc = qemu_get_queue(n->nic);
1138 
1139     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1140     vid = virtio_lduw_p(vdev, &vid);
1141     if (s != sizeof(vid)) {
1142         return VIRTIO_NET_ERR;
1143     }
1144 
1145     if (vid >= MAX_VLAN)
1146         return VIRTIO_NET_ERR;
1147 
1148     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1149         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1150     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1151         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1152     else
1153         return VIRTIO_NET_ERR;
1154 
1155     rxfilter_notify(nc);
1156 
1157     return VIRTIO_NET_OK;
1158 }
1159 
1160 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1161                                       struct iovec *iov, unsigned int iov_cnt)
1162 {
1163     trace_virtio_net_handle_announce(n->announce_timer.round);
1164     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1165         n->status & VIRTIO_NET_S_ANNOUNCE) {
1166         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1167         if (n->announce_timer.round) {
1168             qemu_announce_timer_step(&n->announce_timer);
1169         }
1170         return VIRTIO_NET_OK;
1171     } else {
1172         return VIRTIO_NET_ERR;
1173     }
1174 }
1175 
1176 static void virtio_net_disable_rss(VirtIONet *n)
1177 {
1178     if (n->rss_data.enabled) {
1179         trace_virtio_net_rss_disable();
1180     }
1181     n->rss_data.enabled = false;
1182 }
1183 
1184 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1185                                       struct iovec *iov,
1186                                       unsigned int iov_cnt,
1187                                       bool do_rss)
1188 {
1189     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1190     struct virtio_net_rss_config cfg;
1191     size_t s, offset = 0, size_get;
1192     uint16_t queues, i;
1193     struct {
1194         uint16_t us;
1195         uint8_t b;
1196     } QEMU_PACKED temp;
1197     const char *err_msg = "";
1198     uint32_t err_value = 0;
1199 
1200     if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1201         err_msg = "RSS is not negotiated";
1202         goto error;
1203     }
1204     if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1205         err_msg = "Hash report is not negotiated";
1206         goto error;
1207     }
1208     size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1209     s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1210     if (s != size_get) {
1211         err_msg = "Short command buffer";
1212         err_value = (uint32_t)s;
1213         goto error;
1214     }
1215     n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1216     n->rss_data.indirections_len =
1217         virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1218     n->rss_data.indirections_len++;
1219     if (!do_rss) {
1220         n->rss_data.indirections_len = 1;
1221     }
1222     if (!is_power_of_2(n->rss_data.indirections_len)) {
1223         err_msg = "Invalid size of indirection table";
1224         err_value = n->rss_data.indirections_len;
1225         goto error;
1226     }
1227     if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1228         err_msg = "Too large indirection table";
1229         err_value = n->rss_data.indirections_len;
1230         goto error;
1231     }
1232     n->rss_data.default_queue = do_rss ?
1233         virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1234     if (n->rss_data.default_queue >= n->max_queues) {
1235         err_msg = "Invalid default queue";
1236         err_value = n->rss_data.default_queue;
1237         goto error;
1238     }
1239     offset += size_get;
1240     size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1241     g_free(n->rss_data.indirections_table);
1242     n->rss_data.indirections_table = g_malloc(size_get);
1243     if (!n->rss_data.indirections_table) {
1244         err_msg = "Can't allocate indirections table";
1245         err_value = n->rss_data.indirections_len;
1246         goto error;
1247     }
1248     s = iov_to_buf(iov, iov_cnt, offset,
1249                    n->rss_data.indirections_table, size_get);
1250     if (s != size_get) {
1251         err_msg = "Short indirection table buffer";
1252         err_value = (uint32_t)s;
1253         goto error;
1254     }
1255     for (i = 0; i < n->rss_data.indirections_len; ++i) {
1256         uint16_t val = n->rss_data.indirections_table[i];
1257         n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1258     }
1259     offset += size_get;
1260     size_get = sizeof(temp);
1261     s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1262     if (s != size_get) {
1263         err_msg = "Can't get queues";
1264         err_value = (uint32_t)s;
1265         goto error;
1266     }
1267     queues = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queues;
1268     if (queues == 0 || queues > n->max_queues) {
1269         err_msg = "Invalid number of queues";
1270         err_value = queues;
1271         goto error;
1272     }
1273     if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1274         err_msg = "Invalid key size";
1275         err_value = temp.b;
1276         goto error;
1277     }
1278     if (!temp.b && n->rss_data.hash_types) {
1279         err_msg = "No key provided";
1280         err_value = 0;
1281         goto error;
1282     }
1283     if (!temp.b && !n->rss_data.hash_types) {
1284         virtio_net_disable_rss(n);
1285         return queues;
1286     }
1287     offset += size_get;
1288     size_get = temp.b;
1289     s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1290     if (s != size_get) {
1291         err_msg = "Can get key buffer";
1292         err_value = (uint32_t)s;
1293         goto error;
1294     }
1295     n->rss_data.enabled = true;
1296     trace_virtio_net_rss_enable(n->rss_data.hash_types,
1297                                 n->rss_data.indirections_len,
1298                                 temp.b);
1299     return queues;
1300 error:
1301     trace_virtio_net_rss_error(err_msg, err_value);
1302     virtio_net_disable_rss(n);
1303     return 0;
1304 }
1305 
1306 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1307                                 struct iovec *iov, unsigned int iov_cnt)
1308 {
1309     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1310     uint16_t queues;
1311 
1312     virtio_net_disable_rss(n);
1313     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1314         queues = virtio_net_handle_rss(n, iov, iov_cnt, false);
1315         return queues ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1316     }
1317     if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1318         queues = virtio_net_handle_rss(n, iov, iov_cnt, true);
1319     } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1320         struct virtio_net_ctrl_mq mq;
1321         size_t s;
1322         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1323             return VIRTIO_NET_ERR;
1324         }
1325         s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1326         if (s != sizeof(mq)) {
1327             return VIRTIO_NET_ERR;
1328         }
1329         queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1330 
1331     } else {
1332         return VIRTIO_NET_ERR;
1333     }
1334 
1335     if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1336         queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1337         queues > n->max_queues ||
1338         !n->multiqueue) {
1339         return VIRTIO_NET_ERR;
1340     }
1341 
1342     n->curr_queues = queues;
1343     /* stop the backend before changing the number of queues to avoid handling a
1344      * disabled queue */
1345     virtio_net_set_status(vdev, vdev->status);
1346     virtio_net_set_queues(n);
1347 
1348     return VIRTIO_NET_OK;
1349 }
1350 
1351 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1352 {
1353     VirtIONet *n = VIRTIO_NET(vdev);
1354     struct virtio_net_ctrl_hdr ctrl;
1355     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1356     VirtQueueElement *elem;
1357     size_t s;
1358     struct iovec *iov, *iov2;
1359     unsigned int iov_cnt;
1360 
1361     for (;;) {
1362         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1363         if (!elem) {
1364             break;
1365         }
1366         if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1367             iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1368             virtio_error(vdev, "virtio-net ctrl missing headers");
1369             virtqueue_detach_element(vq, elem, 0);
1370             g_free(elem);
1371             break;
1372         }
1373 
1374         iov_cnt = elem->out_num;
1375         iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1376         s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1377         iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1378         if (s != sizeof(ctrl)) {
1379             status = VIRTIO_NET_ERR;
1380         } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1381             status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1382         } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1383             status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1384         } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1385             status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1386         } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1387             status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1388         } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1389             status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1390         } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1391             status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1392         }
1393 
1394         s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1395         assert(s == sizeof(status));
1396 
1397         virtqueue_push(vq, elem, sizeof(status));
1398         virtio_notify(vdev, vq);
1399         g_free(iov2);
1400         g_free(elem);
1401     }
1402 }
1403 
1404 /* RX */
1405 
1406 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1407 {
1408     VirtIONet *n = VIRTIO_NET(vdev);
1409     int queue_index = vq2q(virtio_get_queue_index(vq));
1410 
1411     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1412 }
1413 
1414 static bool virtio_net_can_receive(NetClientState *nc)
1415 {
1416     VirtIONet *n = qemu_get_nic_opaque(nc);
1417     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1418     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1419 
1420     if (!vdev->vm_running) {
1421         return false;
1422     }
1423 
1424     if (nc->queue_index >= n->curr_queues) {
1425         return false;
1426     }
1427 
1428     if (!virtio_queue_ready(q->rx_vq) ||
1429         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1430         return false;
1431     }
1432 
1433     return true;
1434 }
1435 
1436 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1437 {
1438     VirtIONet *n = q->n;
1439     if (virtio_queue_empty(q->rx_vq) ||
1440         (n->mergeable_rx_bufs &&
1441          !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1442         virtio_queue_set_notification(q->rx_vq, 1);
1443 
1444         /* To avoid a race condition where the guest has made some buffers
1445          * available after the above check but before notification was
1446          * enabled, check for available buffers again.
1447          */
1448         if (virtio_queue_empty(q->rx_vq) ||
1449             (n->mergeable_rx_bufs &&
1450              !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1451             return 0;
1452         }
1453     }
1454 
1455     virtio_queue_set_notification(q->rx_vq, 0);
1456     return 1;
1457 }
1458 
1459 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1460 {
1461     virtio_tswap16s(vdev, &hdr->hdr_len);
1462     virtio_tswap16s(vdev, &hdr->gso_size);
1463     virtio_tswap16s(vdev, &hdr->csum_start);
1464     virtio_tswap16s(vdev, &hdr->csum_offset);
1465 }
1466 
1467 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1468  * it never finds out that the packets don't have valid checksums.  This
1469  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1470  * fix this with Xen but it hasn't appeared in an upstream release of
1471  * dhclient yet.
1472  *
1473  * To avoid breaking existing guests, we catch udp packets and add
1474  * checksums.  This is terrible but it's better than hacking the guest
1475  * kernels.
1476  *
1477  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1478  * we should provide a mechanism to disable it to avoid polluting the host
1479  * cache.
1480  */
1481 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1482                                         uint8_t *buf, size_t size)
1483 {
1484     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1485         (size > 27 && size < 1500) && /* normal sized MTU */
1486         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1487         (buf[23] == 17) && /* ip.protocol == UDP */
1488         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1489         net_checksum_calculate(buf, size);
1490         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1491     }
1492 }
1493 
1494 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1495                            const void *buf, size_t size)
1496 {
1497     if (n->has_vnet_hdr) {
1498         /* FIXME this cast is evil */
1499         void *wbuf = (void *)buf;
1500         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1501                                     size - n->host_hdr_len);
1502 
1503         if (n->needs_vnet_hdr_swap) {
1504             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1505         }
1506         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1507     } else {
1508         struct virtio_net_hdr hdr = {
1509             .flags = 0,
1510             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1511         };
1512         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1513     }
1514 }
1515 
1516 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1517 {
1518     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1519     static const uint8_t vlan[] = {0x81, 0x00};
1520     uint8_t *ptr = (uint8_t *)buf;
1521     int i;
1522 
1523     if (n->promisc)
1524         return 1;
1525 
1526     ptr += n->host_hdr_len;
1527 
1528     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1529         int vid = lduw_be_p(ptr + 14) & 0xfff;
1530         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1531             return 0;
1532     }
1533 
1534     if (ptr[0] & 1) { // multicast
1535         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1536             return !n->nobcast;
1537         } else if (n->nomulti) {
1538             return 0;
1539         } else if (n->allmulti || n->mac_table.multi_overflow) {
1540             return 1;
1541         }
1542 
1543         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1544             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1545                 return 1;
1546             }
1547         }
1548     } else { // unicast
1549         if (n->nouni) {
1550             return 0;
1551         } else if (n->alluni || n->mac_table.uni_overflow) {
1552             return 1;
1553         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1554             return 1;
1555         }
1556 
1557         for (i = 0; i < n->mac_table.first_multi; i++) {
1558             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1559                 return 1;
1560             }
1561         }
1562     }
1563 
1564     return 0;
1565 }
1566 
1567 static uint8_t virtio_net_get_hash_type(bool isip4,
1568                                         bool isip6,
1569                                         bool isudp,
1570                                         bool istcp,
1571                                         uint32_t types)
1572 {
1573     if (isip4) {
1574         if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1575             return NetPktRssIpV4Tcp;
1576         }
1577         if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1578             return NetPktRssIpV4Udp;
1579         }
1580         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1581             return NetPktRssIpV4;
1582         }
1583     } else if (isip6) {
1584         uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1585                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1586 
1587         if (istcp && (types & mask)) {
1588             return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1589                 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1590         }
1591         mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1592         if (isudp && (types & mask)) {
1593             return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1594                 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1595         }
1596         mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1597         if (types & mask) {
1598             return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1599                 NetPktRssIpV6Ex : NetPktRssIpV6;
1600         }
1601     }
1602     return 0xff;
1603 }
1604 
1605 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1606                                    uint32_t hash)
1607 {
1608     struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1609     hdr->hash_value = hash;
1610     hdr->hash_report = report;
1611 }
1612 
1613 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1614                                   size_t size)
1615 {
1616     VirtIONet *n = qemu_get_nic_opaque(nc);
1617     unsigned int index = nc->queue_index, new_index = index;
1618     struct NetRxPkt *pkt = n->rx_pkt;
1619     uint8_t net_hash_type;
1620     uint32_t hash;
1621     bool isip4, isip6, isudp, istcp;
1622     static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1623         VIRTIO_NET_HASH_REPORT_IPv4,
1624         VIRTIO_NET_HASH_REPORT_TCPv4,
1625         VIRTIO_NET_HASH_REPORT_TCPv6,
1626         VIRTIO_NET_HASH_REPORT_IPv6,
1627         VIRTIO_NET_HASH_REPORT_IPv6_EX,
1628         VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1629         VIRTIO_NET_HASH_REPORT_UDPv4,
1630         VIRTIO_NET_HASH_REPORT_UDPv6,
1631         VIRTIO_NET_HASH_REPORT_UDPv6_EX
1632     };
1633 
1634     net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1635                              size - n->host_hdr_len);
1636     net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1637     if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1638         istcp = isudp = false;
1639     }
1640     if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1641         istcp = isudp = false;
1642     }
1643     net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1644                                              n->rss_data.hash_types);
1645     if (net_hash_type > NetPktRssIpV6UdpEx) {
1646         if (n->rss_data.populate_hash) {
1647             virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1648         }
1649         return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1650     }
1651 
1652     hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1653 
1654     if (n->rss_data.populate_hash) {
1655         virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1656     }
1657 
1658     if (n->rss_data.redirect) {
1659         new_index = hash & (n->rss_data.indirections_len - 1);
1660         new_index = n->rss_data.indirections_table[new_index];
1661     }
1662 
1663     return (index == new_index) ? -1 : new_index;
1664 }
1665 
1666 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1667                                       size_t size, bool no_rss)
1668 {
1669     VirtIONet *n = qemu_get_nic_opaque(nc);
1670     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1671     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1672     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1673     struct virtio_net_hdr_mrg_rxbuf mhdr;
1674     unsigned mhdr_cnt = 0;
1675     size_t offset, i, guest_offset;
1676 
1677     if (!virtio_net_can_receive(nc)) {
1678         return -1;
1679     }
1680 
1681     if (!no_rss && n->rss_data.enabled) {
1682         int index = virtio_net_process_rss(nc, buf, size);
1683         if (index >= 0) {
1684             NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1685             return virtio_net_receive_rcu(nc2, buf, size, true);
1686         }
1687     }
1688 
1689     /* hdr_len refers to the header we supply to the guest */
1690     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1691         return 0;
1692     }
1693 
1694     if (!receive_filter(n, buf, size))
1695         return size;
1696 
1697     offset = i = 0;
1698 
1699     while (offset < size) {
1700         VirtQueueElement *elem;
1701         int len, total;
1702         const struct iovec *sg;
1703 
1704         total = 0;
1705 
1706         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1707         if (!elem) {
1708             if (i) {
1709                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1710                              "i %zd mergeable %d offset %zd, size %zd, "
1711                              "guest hdr len %zd, host hdr len %zd "
1712                              "guest features 0x%" PRIx64,
1713                              i, n->mergeable_rx_bufs, offset, size,
1714                              n->guest_hdr_len, n->host_hdr_len,
1715                              vdev->guest_features);
1716             }
1717             return -1;
1718         }
1719 
1720         if (elem->in_num < 1) {
1721             virtio_error(vdev,
1722                          "virtio-net receive queue contains no in buffers");
1723             virtqueue_detach_element(q->rx_vq, elem, 0);
1724             g_free(elem);
1725             return -1;
1726         }
1727 
1728         sg = elem->in_sg;
1729         if (i == 0) {
1730             assert(offset == 0);
1731             if (n->mergeable_rx_bufs) {
1732                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1733                                     sg, elem->in_num,
1734                                     offsetof(typeof(mhdr), num_buffers),
1735                                     sizeof(mhdr.num_buffers));
1736             }
1737 
1738             receive_header(n, sg, elem->in_num, buf, size);
1739             if (n->rss_data.populate_hash) {
1740                 offset = sizeof(mhdr);
1741                 iov_from_buf(sg, elem->in_num, offset,
1742                              buf + offset, n->host_hdr_len - sizeof(mhdr));
1743             }
1744             offset = n->host_hdr_len;
1745             total += n->guest_hdr_len;
1746             guest_offset = n->guest_hdr_len;
1747         } else {
1748             guest_offset = 0;
1749         }
1750 
1751         /* copy in packet.  ugh */
1752         len = iov_from_buf(sg, elem->in_num, guest_offset,
1753                            buf + offset, size - offset);
1754         total += len;
1755         offset += len;
1756         /* If buffers can't be merged, at this point we
1757          * must have consumed the complete packet.
1758          * Otherwise, drop it. */
1759         if (!n->mergeable_rx_bufs && offset < size) {
1760             virtqueue_unpop(q->rx_vq, elem, total);
1761             g_free(elem);
1762             return size;
1763         }
1764 
1765         /* signal other side */
1766         virtqueue_fill(q->rx_vq, elem, total, i++);
1767         g_free(elem);
1768     }
1769 
1770     if (mhdr_cnt) {
1771         virtio_stw_p(vdev, &mhdr.num_buffers, i);
1772         iov_from_buf(mhdr_sg, mhdr_cnt,
1773                      0,
1774                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
1775     }
1776 
1777     virtqueue_flush(q->rx_vq, i);
1778     virtio_notify(vdev, q->rx_vq);
1779 
1780     return size;
1781 }
1782 
1783 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1784                                   size_t size)
1785 {
1786     RCU_READ_LOCK_GUARD();
1787 
1788     return virtio_net_receive_rcu(nc, buf, size, false);
1789 }
1790 
1791 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1792                                          const uint8_t *buf,
1793                                          VirtioNetRscUnit *unit)
1794 {
1795     uint16_t ip_hdrlen;
1796     struct ip_header *ip;
1797 
1798     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1799                               + sizeof(struct eth_header));
1800     unit->ip = (void *)ip;
1801     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1802     unit->ip_plen = &ip->ip_len;
1803     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1804     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1805     unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1806 }
1807 
1808 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1809                                          const uint8_t *buf,
1810                                          VirtioNetRscUnit *unit)
1811 {
1812     struct ip6_header *ip6;
1813 
1814     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1815                                  + sizeof(struct eth_header));
1816     unit->ip = ip6;
1817     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1818     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1819                                         + sizeof(struct ip6_header));
1820     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1821 
1822     /* There is a difference between payload lenght in ipv4 and v6,
1823        ip header is excluded in ipv6 */
1824     unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1825 }
1826 
1827 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1828                                        VirtioNetRscSeg *seg)
1829 {
1830     int ret;
1831     struct virtio_net_hdr_v1 *h;
1832 
1833     h = (struct virtio_net_hdr_v1 *)seg->buf;
1834     h->flags = 0;
1835     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1836 
1837     if (seg->is_coalesced) {
1838         h->rsc.segments = seg->packets;
1839         h->rsc.dup_acks = seg->dup_ack;
1840         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1841         if (chain->proto == ETH_P_IP) {
1842             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1843         } else {
1844             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1845         }
1846     }
1847 
1848     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1849     QTAILQ_REMOVE(&chain->buffers, seg, next);
1850     g_free(seg->buf);
1851     g_free(seg);
1852 
1853     return ret;
1854 }
1855 
1856 static void virtio_net_rsc_purge(void *opq)
1857 {
1858     VirtioNetRscSeg *seg, *rn;
1859     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1860 
1861     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1862         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1863             chain->stat.purge_failed++;
1864             continue;
1865         }
1866     }
1867 
1868     chain->stat.timer++;
1869     if (!QTAILQ_EMPTY(&chain->buffers)) {
1870         timer_mod(chain->drain_timer,
1871               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1872     }
1873 }
1874 
1875 static void virtio_net_rsc_cleanup(VirtIONet *n)
1876 {
1877     VirtioNetRscChain *chain, *rn_chain;
1878     VirtioNetRscSeg *seg, *rn_seg;
1879 
1880     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1881         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1882             QTAILQ_REMOVE(&chain->buffers, seg, next);
1883             g_free(seg->buf);
1884             g_free(seg);
1885         }
1886 
1887         timer_del(chain->drain_timer);
1888         timer_free(chain->drain_timer);
1889         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1890         g_free(chain);
1891     }
1892 }
1893 
1894 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1895                                      NetClientState *nc,
1896                                      const uint8_t *buf, size_t size)
1897 {
1898     uint16_t hdr_len;
1899     VirtioNetRscSeg *seg;
1900 
1901     hdr_len = chain->n->guest_hdr_len;
1902     seg = g_malloc(sizeof(VirtioNetRscSeg));
1903     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1904         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1905     memcpy(seg->buf, buf, size);
1906     seg->size = size;
1907     seg->packets = 1;
1908     seg->dup_ack = 0;
1909     seg->is_coalesced = 0;
1910     seg->nc = nc;
1911 
1912     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1913     chain->stat.cache++;
1914 
1915     switch (chain->proto) {
1916     case ETH_P_IP:
1917         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
1918         break;
1919     case ETH_P_IPV6:
1920         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
1921         break;
1922     default:
1923         g_assert_not_reached();
1924     }
1925 }
1926 
1927 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
1928                                          VirtioNetRscSeg *seg,
1929                                          const uint8_t *buf,
1930                                          struct tcp_header *n_tcp,
1931                                          struct tcp_header *o_tcp)
1932 {
1933     uint32_t nack, oack;
1934     uint16_t nwin, owin;
1935 
1936     nack = htonl(n_tcp->th_ack);
1937     nwin = htons(n_tcp->th_win);
1938     oack = htonl(o_tcp->th_ack);
1939     owin = htons(o_tcp->th_win);
1940 
1941     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
1942         chain->stat.ack_out_of_win++;
1943         return RSC_FINAL;
1944     } else if (nack == oack) {
1945         /* duplicated ack or window probe */
1946         if (nwin == owin) {
1947             /* duplicated ack, add dup ack count due to whql test up to 1 */
1948             chain->stat.dup_ack++;
1949             return RSC_FINAL;
1950         } else {
1951             /* Coalesce window update */
1952             o_tcp->th_win = n_tcp->th_win;
1953             chain->stat.win_update++;
1954             return RSC_COALESCE;
1955         }
1956     } else {
1957         /* pure ack, go to 'C', finalize*/
1958         chain->stat.pure_ack++;
1959         return RSC_FINAL;
1960     }
1961 }
1962 
1963 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
1964                                             VirtioNetRscSeg *seg,
1965                                             const uint8_t *buf,
1966                                             VirtioNetRscUnit *n_unit)
1967 {
1968     void *data;
1969     uint16_t o_ip_len;
1970     uint32_t nseq, oseq;
1971     VirtioNetRscUnit *o_unit;
1972 
1973     o_unit = &seg->unit;
1974     o_ip_len = htons(*o_unit->ip_plen);
1975     nseq = htonl(n_unit->tcp->th_seq);
1976     oseq = htonl(o_unit->tcp->th_seq);
1977 
1978     /* out of order or retransmitted. */
1979     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
1980         chain->stat.data_out_of_win++;
1981         return RSC_FINAL;
1982     }
1983 
1984     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
1985     if (nseq == oseq) {
1986         if ((o_unit->payload == 0) && n_unit->payload) {
1987             /* From no payload to payload, normal case, not a dup ack or etc */
1988             chain->stat.data_after_pure_ack++;
1989             goto coalesce;
1990         } else {
1991             return virtio_net_rsc_handle_ack(chain, seg, buf,
1992                                              n_unit->tcp, o_unit->tcp);
1993         }
1994     } else if ((nseq - oseq) != o_unit->payload) {
1995         /* Not a consistent packet, out of order */
1996         chain->stat.data_out_of_order++;
1997         return RSC_FINAL;
1998     } else {
1999 coalesce:
2000         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2001             chain->stat.over_size++;
2002             return RSC_FINAL;
2003         }
2004 
2005         /* Here comes the right data, the payload length in v4/v6 is different,
2006            so use the field value to update and record the new data len */
2007         o_unit->payload += n_unit->payload; /* update new data len */
2008 
2009         /* update field in ip header */
2010         *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2011 
2012         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2013            for windows guest, while this may change the behavior for linux
2014            guest (only if it uses RSC feature). */
2015         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2016 
2017         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2018         o_unit->tcp->th_win = n_unit->tcp->th_win;
2019 
2020         memmove(seg->buf + seg->size, data, n_unit->payload);
2021         seg->size += n_unit->payload;
2022         seg->packets++;
2023         chain->stat.coalesced++;
2024         return RSC_COALESCE;
2025     }
2026 }
2027 
2028 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2029                                         VirtioNetRscSeg *seg,
2030                                         const uint8_t *buf, size_t size,
2031                                         VirtioNetRscUnit *unit)
2032 {
2033     struct ip_header *ip1, *ip2;
2034 
2035     ip1 = (struct ip_header *)(unit->ip);
2036     ip2 = (struct ip_header *)(seg->unit.ip);
2037     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2038         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2039         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2040         chain->stat.no_match++;
2041         return RSC_NO_MATCH;
2042     }
2043 
2044     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2045 }
2046 
2047 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2048                                         VirtioNetRscSeg *seg,
2049                                         const uint8_t *buf, size_t size,
2050                                         VirtioNetRscUnit *unit)
2051 {
2052     struct ip6_header *ip1, *ip2;
2053 
2054     ip1 = (struct ip6_header *)(unit->ip);
2055     ip2 = (struct ip6_header *)(seg->unit.ip);
2056     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2057         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2058         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2059         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2060             chain->stat.no_match++;
2061             return RSC_NO_MATCH;
2062     }
2063 
2064     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2065 }
2066 
2067 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2068  * to prevent out of order */
2069 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2070                                          struct tcp_header *tcp)
2071 {
2072     uint16_t tcp_hdr;
2073     uint16_t tcp_flag;
2074 
2075     tcp_flag = htons(tcp->th_offset_flags);
2076     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2077     tcp_flag &= VIRTIO_NET_TCP_FLAG;
2078     if (tcp_flag & TH_SYN) {
2079         chain->stat.tcp_syn++;
2080         return RSC_BYPASS;
2081     }
2082 
2083     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2084         chain->stat.tcp_ctrl_drain++;
2085         return RSC_FINAL;
2086     }
2087 
2088     if (tcp_hdr > sizeof(struct tcp_header)) {
2089         chain->stat.tcp_all_opt++;
2090         return RSC_FINAL;
2091     }
2092 
2093     return RSC_CANDIDATE;
2094 }
2095 
2096 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2097                                          NetClientState *nc,
2098                                          const uint8_t *buf, size_t size,
2099                                          VirtioNetRscUnit *unit)
2100 {
2101     int ret;
2102     VirtioNetRscSeg *seg, *nseg;
2103 
2104     if (QTAILQ_EMPTY(&chain->buffers)) {
2105         chain->stat.empty_cache++;
2106         virtio_net_rsc_cache_buf(chain, nc, buf, size);
2107         timer_mod(chain->drain_timer,
2108               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2109         return size;
2110     }
2111 
2112     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2113         if (chain->proto == ETH_P_IP) {
2114             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2115         } else {
2116             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2117         }
2118 
2119         if (ret == RSC_FINAL) {
2120             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2121                 /* Send failed */
2122                 chain->stat.final_failed++;
2123                 return 0;
2124             }
2125 
2126             /* Send current packet */
2127             return virtio_net_do_receive(nc, buf, size);
2128         } else if (ret == RSC_NO_MATCH) {
2129             continue;
2130         } else {
2131             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2132             seg->is_coalesced = 1;
2133             return size;
2134         }
2135     }
2136 
2137     chain->stat.no_match_cache++;
2138     virtio_net_rsc_cache_buf(chain, nc, buf, size);
2139     return size;
2140 }
2141 
2142 /* Drain a connection data, this is to avoid out of order segments */
2143 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2144                                         NetClientState *nc,
2145                                         const uint8_t *buf, size_t size,
2146                                         uint16_t ip_start, uint16_t ip_size,
2147                                         uint16_t tcp_port)
2148 {
2149     VirtioNetRscSeg *seg, *nseg;
2150     uint32_t ppair1, ppair2;
2151 
2152     ppair1 = *(uint32_t *)(buf + tcp_port);
2153     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2154         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2155         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2156             || (ppair1 != ppair2)) {
2157             continue;
2158         }
2159         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2160             chain->stat.drain_failed++;
2161         }
2162 
2163         break;
2164     }
2165 
2166     return virtio_net_do_receive(nc, buf, size);
2167 }
2168 
2169 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2170                                             struct ip_header *ip,
2171                                             const uint8_t *buf, size_t size)
2172 {
2173     uint16_t ip_len;
2174 
2175     /* Not an ipv4 packet */
2176     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2177         chain->stat.ip_option++;
2178         return RSC_BYPASS;
2179     }
2180 
2181     /* Don't handle packets with ip option */
2182     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2183         chain->stat.ip_option++;
2184         return RSC_BYPASS;
2185     }
2186 
2187     if (ip->ip_p != IPPROTO_TCP) {
2188         chain->stat.bypass_not_tcp++;
2189         return RSC_BYPASS;
2190     }
2191 
2192     /* Don't handle packets with ip fragment */
2193     if (!(htons(ip->ip_off) & IP_DF)) {
2194         chain->stat.ip_frag++;
2195         return RSC_BYPASS;
2196     }
2197 
2198     /* Don't handle packets with ecn flag */
2199     if (IPTOS_ECN(ip->ip_tos)) {
2200         chain->stat.ip_ecn++;
2201         return RSC_BYPASS;
2202     }
2203 
2204     ip_len = htons(ip->ip_len);
2205     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2206         || ip_len > (size - chain->n->guest_hdr_len -
2207                      sizeof(struct eth_header))) {
2208         chain->stat.ip_hacked++;
2209         return RSC_BYPASS;
2210     }
2211 
2212     return RSC_CANDIDATE;
2213 }
2214 
2215 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2216                                       NetClientState *nc,
2217                                       const uint8_t *buf, size_t size)
2218 {
2219     int32_t ret;
2220     uint16_t hdr_len;
2221     VirtioNetRscUnit unit;
2222 
2223     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2224 
2225     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2226         + sizeof(struct tcp_header))) {
2227         chain->stat.bypass_not_tcp++;
2228         return virtio_net_do_receive(nc, buf, size);
2229     }
2230 
2231     virtio_net_rsc_extract_unit4(chain, buf, &unit);
2232     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2233         != RSC_CANDIDATE) {
2234         return virtio_net_do_receive(nc, buf, size);
2235     }
2236 
2237     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2238     if (ret == RSC_BYPASS) {
2239         return virtio_net_do_receive(nc, buf, size);
2240     } else if (ret == RSC_FINAL) {
2241         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2242                 ((hdr_len + sizeof(struct eth_header)) + 12),
2243                 VIRTIO_NET_IP4_ADDR_SIZE,
2244                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2245     }
2246 
2247     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2248 }
2249 
2250 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2251                                             struct ip6_header *ip6,
2252                                             const uint8_t *buf, size_t size)
2253 {
2254     uint16_t ip_len;
2255 
2256     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2257         != IP_HEADER_VERSION_6) {
2258         return RSC_BYPASS;
2259     }
2260 
2261     /* Both option and protocol is checked in this */
2262     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2263         chain->stat.bypass_not_tcp++;
2264         return RSC_BYPASS;
2265     }
2266 
2267     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2268     if (ip_len < sizeof(struct tcp_header) ||
2269         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2270                   - sizeof(struct ip6_header))) {
2271         chain->stat.ip_hacked++;
2272         return RSC_BYPASS;
2273     }
2274 
2275     /* Don't handle packets with ecn flag */
2276     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2277         chain->stat.ip_ecn++;
2278         return RSC_BYPASS;
2279     }
2280 
2281     return RSC_CANDIDATE;
2282 }
2283 
2284 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2285                                       const uint8_t *buf, size_t size)
2286 {
2287     int32_t ret;
2288     uint16_t hdr_len;
2289     VirtioNetRscChain *chain;
2290     VirtioNetRscUnit unit;
2291 
2292     chain = (VirtioNetRscChain *)opq;
2293     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2294 
2295     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2296         + sizeof(tcp_header))) {
2297         return virtio_net_do_receive(nc, buf, size);
2298     }
2299 
2300     virtio_net_rsc_extract_unit6(chain, buf, &unit);
2301     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2302                                                  unit.ip, buf, size)) {
2303         return virtio_net_do_receive(nc, buf, size);
2304     }
2305 
2306     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2307     if (ret == RSC_BYPASS) {
2308         return virtio_net_do_receive(nc, buf, size);
2309     } else if (ret == RSC_FINAL) {
2310         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2311                 ((hdr_len + sizeof(struct eth_header)) + 8),
2312                 VIRTIO_NET_IP6_ADDR_SIZE,
2313                 hdr_len + sizeof(struct eth_header)
2314                 + sizeof(struct ip6_header));
2315     }
2316 
2317     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2318 }
2319 
2320 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2321                                                       NetClientState *nc,
2322                                                       uint16_t proto)
2323 {
2324     VirtioNetRscChain *chain;
2325 
2326     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2327         return NULL;
2328     }
2329 
2330     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2331         if (chain->proto == proto) {
2332             return chain;
2333         }
2334     }
2335 
2336     chain = g_malloc(sizeof(*chain));
2337     chain->n = n;
2338     chain->proto = proto;
2339     if (proto == (uint16_t)ETH_P_IP) {
2340         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2341         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2342     } else {
2343         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2344         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2345     }
2346     chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2347                                       virtio_net_rsc_purge, chain);
2348     memset(&chain->stat, 0, sizeof(chain->stat));
2349 
2350     QTAILQ_INIT(&chain->buffers);
2351     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2352 
2353     return chain;
2354 }
2355 
2356 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2357                                       const uint8_t *buf,
2358                                       size_t size)
2359 {
2360     uint16_t proto;
2361     VirtioNetRscChain *chain;
2362     struct eth_header *eth;
2363     VirtIONet *n;
2364 
2365     n = qemu_get_nic_opaque(nc);
2366     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2367         return virtio_net_do_receive(nc, buf, size);
2368     }
2369 
2370     eth = (struct eth_header *)(buf + n->guest_hdr_len);
2371     proto = htons(eth->h_proto);
2372 
2373     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2374     if (chain) {
2375         chain->stat.received++;
2376         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2377             return virtio_net_rsc_receive4(chain, nc, buf, size);
2378         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2379             return virtio_net_rsc_receive6(chain, nc, buf, size);
2380         }
2381     }
2382     return virtio_net_do_receive(nc, buf, size);
2383 }
2384 
2385 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2386                                   size_t size)
2387 {
2388     VirtIONet *n = qemu_get_nic_opaque(nc);
2389     if ((n->rsc4_enabled || n->rsc6_enabled)) {
2390         return virtio_net_rsc_receive(nc, buf, size);
2391     } else {
2392         return virtio_net_do_receive(nc, buf, size);
2393     }
2394 }
2395 
2396 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2397 
2398 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2399 {
2400     VirtIONet *n = qemu_get_nic_opaque(nc);
2401     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2402     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2403 
2404     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2405     virtio_notify(vdev, q->tx_vq);
2406 
2407     g_free(q->async_tx.elem);
2408     q->async_tx.elem = NULL;
2409 
2410     virtio_queue_set_notification(q->tx_vq, 1);
2411     virtio_net_flush_tx(q);
2412 }
2413 
2414 /* TX */
2415 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2416 {
2417     VirtIONet *n = q->n;
2418     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2419     VirtQueueElement *elem;
2420     int32_t num_packets = 0;
2421     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2422     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2423         return num_packets;
2424     }
2425 
2426     if (q->async_tx.elem) {
2427         virtio_queue_set_notification(q->tx_vq, 0);
2428         return num_packets;
2429     }
2430 
2431     for (;;) {
2432         ssize_t ret;
2433         unsigned int out_num;
2434         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2435         struct virtio_net_hdr_mrg_rxbuf mhdr;
2436 
2437         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2438         if (!elem) {
2439             break;
2440         }
2441 
2442         out_num = elem->out_num;
2443         out_sg = elem->out_sg;
2444         if (out_num < 1) {
2445             virtio_error(vdev, "virtio-net header not in first element");
2446             virtqueue_detach_element(q->tx_vq, elem, 0);
2447             g_free(elem);
2448             return -EINVAL;
2449         }
2450 
2451         if (n->has_vnet_hdr) {
2452             if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2453                 n->guest_hdr_len) {
2454                 virtio_error(vdev, "virtio-net header incorrect");
2455                 virtqueue_detach_element(q->tx_vq, elem, 0);
2456                 g_free(elem);
2457                 return -EINVAL;
2458             }
2459             if (n->needs_vnet_hdr_swap) {
2460                 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2461                 sg2[0].iov_base = &mhdr;
2462                 sg2[0].iov_len = n->guest_hdr_len;
2463                 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2464                                    out_sg, out_num,
2465                                    n->guest_hdr_len, -1);
2466                 if (out_num == VIRTQUEUE_MAX_SIZE) {
2467                     goto drop;
2468                 }
2469                 out_num += 1;
2470                 out_sg = sg2;
2471             }
2472         }
2473         /*
2474          * If host wants to see the guest header as is, we can
2475          * pass it on unchanged. Otherwise, copy just the parts
2476          * that host is interested in.
2477          */
2478         assert(n->host_hdr_len <= n->guest_hdr_len);
2479         if (n->host_hdr_len != n->guest_hdr_len) {
2480             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2481                                        out_sg, out_num,
2482                                        0, n->host_hdr_len);
2483             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2484                              out_sg, out_num,
2485                              n->guest_hdr_len, -1);
2486             out_num = sg_num;
2487             out_sg = sg;
2488         }
2489 
2490         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2491                                       out_sg, out_num, virtio_net_tx_complete);
2492         if (ret == 0) {
2493             virtio_queue_set_notification(q->tx_vq, 0);
2494             q->async_tx.elem = elem;
2495             return -EBUSY;
2496         }
2497 
2498 drop:
2499         virtqueue_push(q->tx_vq, elem, 0);
2500         virtio_notify(vdev, q->tx_vq);
2501         g_free(elem);
2502 
2503         if (++num_packets >= n->tx_burst) {
2504             break;
2505         }
2506     }
2507     return num_packets;
2508 }
2509 
2510 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2511 {
2512     VirtIONet *n = VIRTIO_NET(vdev);
2513     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2514 
2515     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2516         virtio_net_drop_tx_queue_data(vdev, vq);
2517         return;
2518     }
2519 
2520     /* This happens when device was stopped but VCPU wasn't. */
2521     if (!vdev->vm_running) {
2522         q->tx_waiting = 1;
2523         return;
2524     }
2525 
2526     if (q->tx_waiting) {
2527         virtio_queue_set_notification(vq, 1);
2528         timer_del(q->tx_timer);
2529         q->tx_waiting = 0;
2530         if (virtio_net_flush_tx(q) == -EINVAL) {
2531             return;
2532         }
2533     } else {
2534         timer_mod(q->tx_timer,
2535                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2536         q->tx_waiting = 1;
2537         virtio_queue_set_notification(vq, 0);
2538     }
2539 }
2540 
2541 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2542 {
2543     VirtIONet *n = VIRTIO_NET(vdev);
2544     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2545 
2546     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2547         virtio_net_drop_tx_queue_data(vdev, vq);
2548         return;
2549     }
2550 
2551     if (unlikely(q->tx_waiting)) {
2552         return;
2553     }
2554     q->tx_waiting = 1;
2555     /* This happens when device was stopped but VCPU wasn't. */
2556     if (!vdev->vm_running) {
2557         return;
2558     }
2559     virtio_queue_set_notification(vq, 0);
2560     qemu_bh_schedule(q->tx_bh);
2561 }
2562 
2563 static void virtio_net_tx_timer(void *opaque)
2564 {
2565     VirtIONetQueue *q = opaque;
2566     VirtIONet *n = q->n;
2567     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2568     /* This happens when device was stopped but BH wasn't. */
2569     if (!vdev->vm_running) {
2570         /* Make sure tx waiting is set, so we'll run when restarted. */
2571         assert(q->tx_waiting);
2572         return;
2573     }
2574 
2575     q->tx_waiting = 0;
2576 
2577     /* Just in case the driver is not ready on more */
2578     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2579         return;
2580     }
2581 
2582     virtio_queue_set_notification(q->tx_vq, 1);
2583     virtio_net_flush_tx(q);
2584 }
2585 
2586 static void virtio_net_tx_bh(void *opaque)
2587 {
2588     VirtIONetQueue *q = opaque;
2589     VirtIONet *n = q->n;
2590     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2591     int32_t ret;
2592 
2593     /* This happens when device was stopped but BH wasn't. */
2594     if (!vdev->vm_running) {
2595         /* Make sure tx waiting is set, so we'll run when restarted. */
2596         assert(q->tx_waiting);
2597         return;
2598     }
2599 
2600     q->tx_waiting = 0;
2601 
2602     /* Just in case the driver is not ready on more */
2603     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2604         return;
2605     }
2606 
2607     ret = virtio_net_flush_tx(q);
2608     if (ret == -EBUSY || ret == -EINVAL) {
2609         return; /* Notification re-enable handled by tx_complete or device
2610                  * broken */
2611     }
2612 
2613     /* If we flush a full burst of packets, assume there are
2614      * more coming and immediately reschedule */
2615     if (ret >= n->tx_burst) {
2616         qemu_bh_schedule(q->tx_bh);
2617         q->tx_waiting = 1;
2618         return;
2619     }
2620 
2621     /* If less than a full burst, re-enable notification and flush
2622      * anything that may have come in while we weren't looking.  If
2623      * we find something, assume the guest is still active and reschedule */
2624     virtio_queue_set_notification(q->tx_vq, 1);
2625     ret = virtio_net_flush_tx(q);
2626     if (ret == -EINVAL) {
2627         return;
2628     } else if (ret > 0) {
2629         virtio_queue_set_notification(q->tx_vq, 0);
2630         qemu_bh_schedule(q->tx_bh);
2631         q->tx_waiting = 1;
2632     }
2633 }
2634 
2635 static void virtio_net_add_queue(VirtIONet *n, int index)
2636 {
2637     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2638 
2639     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2640                                            virtio_net_handle_rx);
2641 
2642     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2643         n->vqs[index].tx_vq =
2644             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2645                              virtio_net_handle_tx_timer);
2646         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2647                                               virtio_net_tx_timer,
2648                                               &n->vqs[index]);
2649     } else {
2650         n->vqs[index].tx_vq =
2651             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2652                              virtio_net_handle_tx_bh);
2653         n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2654     }
2655 
2656     n->vqs[index].tx_waiting = 0;
2657     n->vqs[index].n = n;
2658 }
2659 
2660 static void virtio_net_del_queue(VirtIONet *n, int index)
2661 {
2662     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2663     VirtIONetQueue *q = &n->vqs[index];
2664     NetClientState *nc = qemu_get_subqueue(n->nic, index);
2665 
2666     qemu_purge_queued_packets(nc);
2667 
2668     virtio_del_queue(vdev, index * 2);
2669     if (q->tx_timer) {
2670         timer_del(q->tx_timer);
2671         timer_free(q->tx_timer);
2672         q->tx_timer = NULL;
2673     } else {
2674         qemu_bh_delete(q->tx_bh);
2675         q->tx_bh = NULL;
2676     }
2677     q->tx_waiting = 0;
2678     virtio_del_queue(vdev, index * 2 + 1);
2679 }
2680 
2681 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2682 {
2683     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2684     int old_num_queues = virtio_get_num_queues(vdev);
2685     int new_num_queues = new_max_queues * 2 + 1;
2686     int i;
2687 
2688     assert(old_num_queues >= 3);
2689     assert(old_num_queues % 2 == 1);
2690 
2691     if (old_num_queues == new_num_queues) {
2692         return;
2693     }
2694 
2695     /*
2696      * We always need to remove and add ctrl vq if
2697      * old_num_queues != new_num_queues. Remove ctrl_vq first,
2698      * and then we only enter one of the following two loops.
2699      */
2700     virtio_del_queue(vdev, old_num_queues - 1);
2701 
2702     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2703         /* new_num_queues < old_num_queues */
2704         virtio_net_del_queue(n, i / 2);
2705     }
2706 
2707     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2708         /* new_num_queues > old_num_queues */
2709         virtio_net_add_queue(n, i / 2);
2710     }
2711 
2712     /* add ctrl_vq last */
2713     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2714 }
2715 
2716 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2717 {
2718     int max = multiqueue ? n->max_queues : 1;
2719 
2720     n->multiqueue = multiqueue;
2721     virtio_net_change_num_queues(n, max);
2722 
2723     virtio_net_set_queues(n);
2724 }
2725 
2726 static int virtio_net_post_load_device(void *opaque, int version_id)
2727 {
2728     VirtIONet *n = opaque;
2729     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2730     int i, link_down;
2731 
2732     trace_virtio_net_post_load_device();
2733     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2734                                virtio_vdev_has_feature(vdev,
2735                                                        VIRTIO_F_VERSION_1),
2736                                virtio_vdev_has_feature(vdev,
2737                                                        VIRTIO_NET_F_HASH_REPORT));
2738 
2739     /* MAC_TABLE_ENTRIES may be different from the saved image */
2740     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2741         n->mac_table.in_use = 0;
2742     }
2743 
2744     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2745         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2746     }
2747 
2748     /*
2749      * curr_guest_offloads will be later overwritten by the
2750      * virtio_set_features_nocheck call done from the virtio_load.
2751      * Here we make sure it is preserved and restored accordingly
2752      * in the virtio_net_post_load_virtio callback.
2753      */
2754     n->saved_guest_offloads = n->curr_guest_offloads;
2755 
2756     virtio_net_set_queues(n);
2757 
2758     /* Find the first multicast entry in the saved MAC filter */
2759     for (i = 0; i < n->mac_table.in_use; i++) {
2760         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2761             break;
2762         }
2763     }
2764     n->mac_table.first_multi = i;
2765 
2766     /* nc.link_down can't be migrated, so infer link_down according
2767      * to link status bit in n->status */
2768     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2769     for (i = 0; i < n->max_queues; i++) {
2770         qemu_get_subqueue(n->nic, i)->link_down = link_down;
2771     }
2772 
2773     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2774         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2775         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2776                                   QEMU_CLOCK_VIRTUAL,
2777                                   virtio_net_announce_timer, n);
2778         if (n->announce_timer.round) {
2779             timer_mod(n->announce_timer.tm,
2780                       qemu_clock_get_ms(n->announce_timer.type));
2781         } else {
2782             qemu_announce_timer_del(&n->announce_timer, false);
2783         }
2784     }
2785 
2786     if (n->rss_data.enabled) {
2787         trace_virtio_net_rss_enable(n->rss_data.hash_types,
2788                                     n->rss_data.indirections_len,
2789                                     sizeof(n->rss_data.key));
2790     } else {
2791         trace_virtio_net_rss_disable();
2792     }
2793     return 0;
2794 }
2795 
2796 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2797 {
2798     VirtIONet *n = VIRTIO_NET(vdev);
2799     /*
2800      * The actual needed state is now in saved_guest_offloads,
2801      * see virtio_net_post_load_device for detail.
2802      * Restore it back and apply the desired offloads.
2803      */
2804     n->curr_guest_offloads = n->saved_guest_offloads;
2805     if (peer_has_vnet_hdr(n)) {
2806         virtio_net_apply_guest_offloads(n);
2807     }
2808 
2809     return 0;
2810 }
2811 
2812 /* tx_waiting field of a VirtIONetQueue */
2813 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2814     .name = "virtio-net-queue-tx_waiting",
2815     .fields = (VMStateField[]) {
2816         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2817         VMSTATE_END_OF_LIST()
2818    },
2819 };
2820 
2821 static bool max_queues_gt_1(void *opaque, int version_id)
2822 {
2823     return VIRTIO_NET(opaque)->max_queues > 1;
2824 }
2825 
2826 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2827 {
2828     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2829                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2830 }
2831 
2832 static bool mac_table_fits(void *opaque, int version_id)
2833 {
2834     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2835 }
2836 
2837 static bool mac_table_doesnt_fit(void *opaque, int version_id)
2838 {
2839     return !mac_table_fits(opaque, version_id);
2840 }
2841 
2842 /* This temporary type is shared by all the WITH_TMP methods
2843  * although only some fields are used by each.
2844  */
2845 struct VirtIONetMigTmp {
2846     VirtIONet      *parent;
2847     VirtIONetQueue *vqs_1;
2848     uint16_t        curr_queues_1;
2849     uint8_t         has_ufo;
2850     uint32_t        has_vnet_hdr;
2851 };
2852 
2853 /* The 2nd and subsequent tx_waiting flags are loaded later than
2854  * the 1st entry in the queues and only if there's more than one
2855  * entry.  We use the tmp mechanism to calculate a temporary
2856  * pointer and count and also validate the count.
2857  */
2858 
2859 static int virtio_net_tx_waiting_pre_save(void *opaque)
2860 {
2861     struct VirtIONetMigTmp *tmp = opaque;
2862 
2863     tmp->vqs_1 = tmp->parent->vqs + 1;
2864     tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2865     if (tmp->parent->curr_queues == 0) {
2866         tmp->curr_queues_1 = 0;
2867     }
2868 
2869     return 0;
2870 }
2871 
2872 static int virtio_net_tx_waiting_pre_load(void *opaque)
2873 {
2874     struct VirtIONetMigTmp *tmp = opaque;
2875 
2876     /* Reuse the pointer setup from save */
2877     virtio_net_tx_waiting_pre_save(opaque);
2878 
2879     if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2880         error_report("virtio-net: curr_queues %x > max_queues %x",
2881             tmp->parent->curr_queues, tmp->parent->max_queues);
2882 
2883         return -EINVAL;
2884     }
2885 
2886     return 0; /* all good */
2887 }
2888 
2889 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2890     .name      = "virtio-net-tx_waiting",
2891     .pre_load  = virtio_net_tx_waiting_pre_load,
2892     .pre_save  = virtio_net_tx_waiting_pre_save,
2893     .fields    = (VMStateField[]) {
2894         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2895                                      curr_queues_1,
2896                                      vmstate_virtio_net_queue_tx_waiting,
2897                                      struct VirtIONetQueue),
2898         VMSTATE_END_OF_LIST()
2899     },
2900 };
2901 
2902 /* the 'has_ufo' flag is just tested; if the incoming stream has the
2903  * flag set we need to check that we have it
2904  */
2905 static int virtio_net_ufo_post_load(void *opaque, int version_id)
2906 {
2907     struct VirtIONetMigTmp *tmp = opaque;
2908 
2909     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
2910         error_report("virtio-net: saved image requires TUN_F_UFO support");
2911         return -EINVAL;
2912     }
2913 
2914     return 0;
2915 }
2916 
2917 static int virtio_net_ufo_pre_save(void *opaque)
2918 {
2919     struct VirtIONetMigTmp *tmp = opaque;
2920 
2921     tmp->has_ufo = tmp->parent->has_ufo;
2922 
2923     return 0;
2924 }
2925 
2926 static const VMStateDescription vmstate_virtio_net_has_ufo = {
2927     .name      = "virtio-net-ufo",
2928     .post_load = virtio_net_ufo_post_load,
2929     .pre_save  = virtio_net_ufo_pre_save,
2930     .fields    = (VMStateField[]) {
2931         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
2932         VMSTATE_END_OF_LIST()
2933     },
2934 };
2935 
2936 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2937  * flag set we need to check that we have it
2938  */
2939 static int virtio_net_vnet_post_load(void *opaque, int version_id)
2940 {
2941     struct VirtIONetMigTmp *tmp = opaque;
2942 
2943     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
2944         error_report("virtio-net: saved image requires vnet_hdr=on");
2945         return -EINVAL;
2946     }
2947 
2948     return 0;
2949 }
2950 
2951 static int virtio_net_vnet_pre_save(void *opaque)
2952 {
2953     struct VirtIONetMigTmp *tmp = opaque;
2954 
2955     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
2956 
2957     return 0;
2958 }
2959 
2960 static const VMStateDescription vmstate_virtio_net_has_vnet = {
2961     .name      = "virtio-net-vnet",
2962     .post_load = virtio_net_vnet_post_load,
2963     .pre_save  = virtio_net_vnet_pre_save,
2964     .fields    = (VMStateField[]) {
2965         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
2966         VMSTATE_END_OF_LIST()
2967     },
2968 };
2969 
2970 static bool virtio_net_rss_needed(void *opaque)
2971 {
2972     return VIRTIO_NET(opaque)->rss_data.enabled;
2973 }
2974 
2975 static const VMStateDescription vmstate_virtio_net_rss = {
2976     .name      = "virtio-net-device/rss",
2977     .version_id = 1,
2978     .minimum_version_id = 1,
2979     .needed = virtio_net_rss_needed,
2980     .fields = (VMStateField[]) {
2981         VMSTATE_BOOL(rss_data.enabled, VirtIONet),
2982         VMSTATE_BOOL(rss_data.redirect, VirtIONet),
2983         VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
2984         VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
2985         VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
2986         VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
2987         VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
2988                             VIRTIO_NET_RSS_MAX_KEY_SIZE),
2989         VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
2990                                     rss_data.indirections_len, 0,
2991                                     vmstate_info_uint16, uint16_t),
2992         VMSTATE_END_OF_LIST()
2993     },
2994 };
2995 
2996 static const VMStateDescription vmstate_virtio_net_device = {
2997     .name = "virtio-net-device",
2998     .version_id = VIRTIO_NET_VM_VERSION,
2999     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3000     .post_load = virtio_net_post_load_device,
3001     .fields = (VMStateField[]) {
3002         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3003         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3004                                vmstate_virtio_net_queue_tx_waiting,
3005                                VirtIONetQueue),
3006         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3007         VMSTATE_UINT16(status, VirtIONet),
3008         VMSTATE_UINT8(promisc, VirtIONet),
3009         VMSTATE_UINT8(allmulti, VirtIONet),
3010         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3011 
3012         /* Guarded pair: If it fits we load it, else we throw it away
3013          * - can happen if source has a larger MAC table.; post-load
3014          *  sets flags in this case.
3015          */
3016         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3017                                 0, mac_table_fits, mac_table.in_use,
3018                                  ETH_ALEN),
3019         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3020                                      mac_table.in_use, ETH_ALEN),
3021 
3022         /* Note: This is an array of uint32's that's always been saved as a
3023          * buffer; hold onto your endiannesses; it's actually used as a bitmap
3024          * but based on the uint.
3025          */
3026         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3027         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3028                          vmstate_virtio_net_has_vnet),
3029         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3030         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3031         VMSTATE_UINT8(alluni, VirtIONet),
3032         VMSTATE_UINT8(nomulti, VirtIONet),
3033         VMSTATE_UINT8(nouni, VirtIONet),
3034         VMSTATE_UINT8(nobcast, VirtIONet),
3035         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3036                          vmstate_virtio_net_has_ufo),
3037         VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
3038                             vmstate_info_uint16_equal, uint16_t),
3039         VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
3040         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3041                          vmstate_virtio_net_tx_waiting),
3042         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3043                             has_ctrl_guest_offloads),
3044         VMSTATE_END_OF_LIST()
3045    },
3046     .subsections = (const VMStateDescription * []) {
3047         &vmstate_virtio_net_rss,
3048         NULL
3049     }
3050 };
3051 
3052 static NetClientInfo net_virtio_info = {
3053     .type = NET_CLIENT_DRIVER_NIC,
3054     .size = sizeof(NICState),
3055     .can_receive = virtio_net_can_receive,
3056     .receive = virtio_net_receive,
3057     .link_status_changed = virtio_net_set_link_status,
3058     .query_rx_filter = virtio_net_query_rxfilter,
3059     .announce = virtio_net_announce,
3060 };
3061 
3062 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3063 {
3064     VirtIONet *n = VIRTIO_NET(vdev);
3065     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3066     assert(n->vhost_started);
3067     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3068 }
3069 
3070 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3071                                            bool mask)
3072 {
3073     VirtIONet *n = VIRTIO_NET(vdev);
3074     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3075     assert(n->vhost_started);
3076     vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3077                              vdev, idx, mask);
3078 }
3079 
3080 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3081 {
3082     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3083 
3084     n->config_size = virtio_feature_get_config_size(feature_sizes,
3085                                                     host_features);
3086 }
3087 
3088 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3089                                    const char *type)
3090 {
3091     /*
3092      * The name can be NULL, the netclient name will be type.x.
3093      */
3094     assert(type != NULL);
3095 
3096     g_free(n->netclient_name);
3097     g_free(n->netclient_type);
3098     n->netclient_name = g_strdup(name);
3099     n->netclient_type = g_strdup(type);
3100 }
3101 
3102 static bool failover_unplug_primary(VirtIONet *n)
3103 {
3104     HotplugHandler *hotplug_ctrl;
3105     PCIDevice *pci_dev;
3106     Error *err = NULL;
3107 
3108     hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
3109     if (hotplug_ctrl) {
3110         pci_dev = PCI_DEVICE(n->primary_dev);
3111         pci_dev->partially_hotplugged = true;
3112         hotplug_handler_unplug_request(hotplug_ctrl, n->primary_dev, &err);
3113         if (err) {
3114             error_report_err(err);
3115             return false;
3116         }
3117     } else {
3118         return false;
3119     }
3120     return true;
3121 }
3122 
3123 static bool failover_replug_primary(VirtIONet *n, Error **errp)
3124 {
3125     Error *err = NULL;
3126     HotplugHandler *hotplug_ctrl;
3127     PCIDevice *pdev = PCI_DEVICE(n->primary_dev);
3128 
3129     if (!pdev->partially_hotplugged) {
3130         return true;
3131     }
3132     if (!n->primary_device_opts) {
3133         n->primary_device_opts = qemu_opts_from_qdict(
3134                 qemu_find_opts("device"),
3135                 n->primary_device_dict, errp);
3136         if (!n->primary_device_opts) {
3137             return false;
3138         }
3139     }
3140     n->primary_bus = n->primary_dev->parent_bus;
3141     if (!n->primary_bus) {
3142         error_setg(errp, "virtio_net: couldn't find primary bus");
3143         return false;
3144     }
3145     qdev_set_parent_bus(n->primary_dev, n->primary_bus);
3146     n->primary_should_be_hidden = false;
3147     if (!qemu_opt_set_bool(n->primary_device_opts,
3148                            "partially_hotplugged", true, errp)) {
3149         return false;
3150     }
3151     hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
3152     if (hotplug_ctrl) {
3153         hotplug_handler_pre_plug(hotplug_ctrl, n->primary_dev, &err);
3154         if (err) {
3155             goto out;
3156         }
3157         hotplug_handler_plug(hotplug_ctrl, n->primary_dev, &err);
3158     }
3159 
3160 out:
3161     error_propagate(errp, err);
3162     return !err;
3163 }
3164 
3165 static void virtio_net_handle_migration_primary(VirtIONet *n,
3166                                                 MigrationState *s)
3167 {
3168     bool should_be_hidden;
3169     Error *err = NULL;
3170 
3171     should_be_hidden = atomic_read(&n->primary_should_be_hidden);
3172 
3173     if (!n->primary_dev) {
3174         n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err);
3175         if (!n->primary_dev) {
3176             return;
3177         }
3178     }
3179 
3180     if (migration_in_setup(s) && !should_be_hidden) {
3181         if (failover_unplug_primary(n)) {
3182             vmstate_unregister(VMSTATE_IF(n->primary_dev),
3183                     qdev_get_vmsd(n->primary_dev),
3184                     n->primary_dev);
3185             qapi_event_send_unplug_primary(n->primary_device_id);
3186             atomic_set(&n->primary_should_be_hidden, true);
3187         } else {
3188             warn_report("couldn't unplug primary device");
3189         }
3190     } else if (migration_has_failed(s)) {
3191         /* We already unplugged the device let's plug it back */
3192         if (!failover_replug_primary(n, &err)) {
3193             if (err) {
3194                 error_report_err(err);
3195             }
3196         }
3197     }
3198 }
3199 
3200 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3201 {
3202     MigrationState *s = data;
3203     VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3204     virtio_net_handle_migration_primary(n, s);
3205 }
3206 
3207 static int virtio_net_primary_should_be_hidden(DeviceListener *listener,
3208             QemuOpts *device_opts)
3209 {
3210     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3211     bool match_found = false;
3212     bool hide = false;
3213 
3214     if (!device_opts) {
3215         return -1;
3216     }
3217     n->primary_device_dict = qemu_opts_to_qdict(device_opts,
3218             n->primary_device_dict);
3219     if (n->primary_device_dict) {
3220         g_free(n->standby_id);
3221         n->standby_id = g_strdup(qdict_get_try_str(n->primary_device_dict,
3222                     "failover_pair_id"));
3223     }
3224     if (g_strcmp0(n->standby_id, n->netclient_name) == 0) {
3225         match_found = true;
3226     } else {
3227         match_found = false;
3228         hide = false;
3229         g_free(n->standby_id);
3230         n->primary_device_dict = NULL;
3231         goto out;
3232     }
3233 
3234     n->primary_device_opts = device_opts;
3235 
3236     /* primary_should_be_hidden is set during feature negotiation */
3237     hide = atomic_read(&n->primary_should_be_hidden);
3238 
3239     if (n->primary_device_dict) {
3240         g_free(n->primary_device_id);
3241         n->primary_device_id = g_strdup(qdict_get_try_str(
3242                     n->primary_device_dict, "id"));
3243         if (!n->primary_device_id) {
3244             warn_report("primary_device_id not set");
3245         }
3246     }
3247 
3248 out:
3249     if (match_found && hide) {
3250         return 1;
3251     } else if (match_found && !hide) {
3252         return 0;
3253     } else {
3254         return -1;
3255     }
3256 }
3257 
3258 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3259 {
3260     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3261     VirtIONet *n = VIRTIO_NET(dev);
3262     NetClientState *nc;
3263     int i;
3264 
3265     if (n->net_conf.mtu) {
3266         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3267     }
3268 
3269     if (n->net_conf.duplex_str) {
3270         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3271             n->net_conf.duplex = DUPLEX_HALF;
3272         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3273             n->net_conf.duplex = DUPLEX_FULL;
3274         } else {
3275             error_setg(errp, "'duplex' must be 'half' or 'full'");
3276             return;
3277         }
3278         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3279     } else {
3280         n->net_conf.duplex = DUPLEX_UNKNOWN;
3281     }
3282 
3283     if (n->net_conf.speed < SPEED_UNKNOWN) {
3284         error_setg(errp, "'speed' must be between 0 and INT_MAX");
3285         return;
3286     }
3287     if (n->net_conf.speed >= 0) {
3288         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3289     }
3290 
3291     if (n->failover) {
3292         n->primary_listener.should_be_hidden =
3293             virtio_net_primary_should_be_hidden;
3294         atomic_set(&n->primary_should_be_hidden, true);
3295         device_listener_register(&n->primary_listener);
3296         n->migration_state.notify = virtio_net_migration_state_notifier;
3297         add_migration_state_change_notifier(&n->migration_state);
3298         n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3299     }
3300 
3301     virtio_net_set_config_size(n, n->host_features);
3302     virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
3303 
3304     /*
3305      * We set a lower limit on RX queue size to what it always was.
3306      * Guests that want a smaller ring can always resize it without
3307      * help from us (using virtio 1 and up).
3308      */
3309     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3310         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3311         !is_power_of_2(n->net_conf.rx_queue_size)) {
3312         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3313                    "must be a power of 2 between %d and %d.",
3314                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3315                    VIRTQUEUE_MAX_SIZE);
3316         virtio_cleanup(vdev);
3317         return;
3318     }
3319 
3320     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3321         n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3322         !is_power_of_2(n->net_conf.tx_queue_size)) {
3323         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3324                    "must be a power of 2 between %d and %d",
3325                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3326                    VIRTQUEUE_MAX_SIZE);
3327         virtio_cleanup(vdev);
3328         return;
3329     }
3330 
3331     n->max_queues = MAX(n->nic_conf.peers.queues, 1);
3332     if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
3333         error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
3334                    "must be a positive integer less than %d.",
3335                    n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
3336         virtio_cleanup(vdev);
3337         return;
3338     }
3339     n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
3340     n->curr_queues = 1;
3341     n->tx_timeout = n->net_conf.txtimer;
3342 
3343     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3344                        && strcmp(n->net_conf.tx, "bh")) {
3345         warn_report("virtio-net: "
3346                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3347                     n->net_conf.tx);
3348         error_printf("Defaulting to \"bh\"");
3349     }
3350 
3351     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3352                                     n->net_conf.tx_queue_size);
3353 
3354     for (i = 0; i < n->max_queues; i++) {
3355         virtio_net_add_queue(n, i);
3356     }
3357 
3358     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3359     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3360     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3361     n->status = VIRTIO_NET_S_LINK_UP;
3362     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3363                               QEMU_CLOCK_VIRTUAL,
3364                               virtio_net_announce_timer, n);
3365     n->announce_timer.round = 0;
3366 
3367     if (n->netclient_type) {
3368         /*
3369          * Happen when virtio_net_set_netclient_name has been called.
3370          */
3371         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3372                               n->netclient_type, n->netclient_name, n);
3373     } else {
3374         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3375                               object_get_typename(OBJECT(dev)), dev->id, n);
3376     }
3377 
3378     peer_test_vnet_hdr(n);
3379     if (peer_has_vnet_hdr(n)) {
3380         for (i = 0; i < n->max_queues; i++) {
3381             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3382         }
3383         n->host_hdr_len = sizeof(struct virtio_net_hdr);
3384     } else {
3385         n->host_hdr_len = 0;
3386     }
3387 
3388     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3389 
3390     n->vqs[0].tx_waiting = 0;
3391     n->tx_burst = n->net_conf.txburst;
3392     virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3393     n->promisc = 1; /* for compatibility */
3394 
3395     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3396 
3397     n->vlans = g_malloc0(MAX_VLAN >> 3);
3398 
3399     nc = qemu_get_queue(n->nic);
3400     nc->rxfilter_notify_enabled = 1;
3401 
3402     QTAILQ_INIT(&n->rsc_chains);
3403     n->qdev = dev;
3404 
3405     net_rx_pkt_init(&n->rx_pkt, false);
3406 }
3407 
3408 static void virtio_net_device_unrealize(DeviceState *dev)
3409 {
3410     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3411     VirtIONet *n = VIRTIO_NET(dev);
3412     int i, max_queues;
3413 
3414     /* This will stop vhost backend if appropriate. */
3415     virtio_net_set_status(vdev, 0);
3416 
3417     g_free(n->netclient_name);
3418     n->netclient_name = NULL;
3419     g_free(n->netclient_type);
3420     n->netclient_type = NULL;
3421 
3422     g_free(n->mac_table.macs);
3423     g_free(n->vlans);
3424 
3425     if (n->failover) {
3426         device_listener_unregister(&n->primary_listener);
3427         g_free(n->primary_device_id);
3428         g_free(n->standby_id);
3429         qobject_unref(n->primary_device_dict);
3430         n->primary_device_dict = NULL;
3431     }
3432 
3433     max_queues = n->multiqueue ? n->max_queues : 1;
3434     for (i = 0; i < max_queues; i++) {
3435         virtio_net_del_queue(n, i);
3436     }
3437     /* delete also control vq */
3438     virtio_del_queue(vdev, max_queues * 2);
3439     qemu_announce_timer_del(&n->announce_timer, false);
3440     g_free(n->vqs);
3441     qemu_del_nic(n->nic);
3442     virtio_net_rsc_cleanup(n);
3443     g_free(n->rss_data.indirections_table);
3444     net_rx_pkt_uninit(n->rx_pkt);
3445     virtio_cleanup(vdev);
3446 }
3447 
3448 static void virtio_net_instance_init(Object *obj)
3449 {
3450     VirtIONet *n = VIRTIO_NET(obj);
3451 
3452     /*
3453      * The default config_size is sizeof(struct virtio_net_config).
3454      * Can be overriden with virtio_net_set_config_size.
3455      */
3456     n->config_size = sizeof(struct virtio_net_config);
3457     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3458                                   "bootindex", "/ethernet-phy@0",
3459                                   DEVICE(n));
3460 }
3461 
3462 static int virtio_net_pre_save(void *opaque)
3463 {
3464     VirtIONet *n = opaque;
3465 
3466     /* At this point, backend must be stopped, otherwise
3467      * it might keep writing to memory. */
3468     assert(!n->vhost_started);
3469 
3470     return 0;
3471 }
3472 
3473 static bool primary_unplug_pending(void *opaque)
3474 {
3475     DeviceState *dev = opaque;
3476     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3477     VirtIONet *n = VIRTIO_NET(vdev);
3478 
3479     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3480         return false;
3481     }
3482     return n->primary_dev ? n->primary_dev->pending_deleted_event : false;
3483 }
3484 
3485 static bool dev_unplug_pending(void *opaque)
3486 {
3487     DeviceState *dev = opaque;
3488     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3489 
3490     return vdc->primary_unplug_pending(dev);
3491 }
3492 
3493 static const VMStateDescription vmstate_virtio_net = {
3494     .name = "virtio-net",
3495     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3496     .version_id = VIRTIO_NET_VM_VERSION,
3497     .fields = (VMStateField[]) {
3498         VMSTATE_VIRTIO_DEVICE,
3499         VMSTATE_END_OF_LIST()
3500     },
3501     .pre_save = virtio_net_pre_save,
3502     .dev_unplug_pending = dev_unplug_pending,
3503 };
3504 
3505 static Property virtio_net_properties[] = {
3506     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3507                     VIRTIO_NET_F_CSUM, true),
3508     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3509                     VIRTIO_NET_F_GUEST_CSUM, true),
3510     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3511     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3512                     VIRTIO_NET_F_GUEST_TSO4, true),
3513     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3514                     VIRTIO_NET_F_GUEST_TSO6, true),
3515     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3516                     VIRTIO_NET_F_GUEST_ECN, true),
3517     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3518                     VIRTIO_NET_F_GUEST_UFO, true),
3519     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3520                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3521     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3522                     VIRTIO_NET_F_HOST_TSO4, true),
3523     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3524                     VIRTIO_NET_F_HOST_TSO6, true),
3525     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3526                     VIRTIO_NET_F_HOST_ECN, true),
3527     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3528                     VIRTIO_NET_F_HOST_UFO, true),
3529     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3530                     VIRTIO_NET_F_MRG_RXBUF, true),
3531     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3532                     VIRTIO_NET_F_STATUS, true),
3533     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3534                     VIRTIO_NET_F_CTRL_VQ, true),
3535     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3536                     VIRTIO_NET_F_CTRL_RX, true),
3537     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3538                     VIRTIO_NET_F_CTRL_VLAN, true),
3539     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3540                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3541     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3542                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3543     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3544                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3545     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3546     DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3547                     VIRTIO_NET_F_RSS, false),
3548     DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3549                     VIRTIO_NET_F_HASH_REPORT, false),
3550     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3551                     VIRTIO_NET_F_RSC_EXT, false),
3552     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3553                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3554     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3555     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3556                        TX_TIMER_INTERVAL),
3557     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3558     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3559     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3560                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3561     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3562                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3563     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3564     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3565                      true),
3566     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3567     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3568     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3569     DEFINE_PROP_END_OF_LIST(),
3570 };
3571 
3572 static void virtio_net_class_init(ObjectClass *klass, void *data)
3573 {
3574     DeviceClass *dc = DEVICE_CLASS(klass);
3575     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3576 
3577     device_class_set_props(dc, virtio_net_properties);
3578     dc->vmsd = &vmstate_virtio_net;
3579     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3580     vdc->realize = virtio_net_device_realize;
3581     vdc->unrealize = virtio_net_device_unrealize;
3582     vdc->get_config = virtio_net_get_config;
3583     vdc->set_config = virtio_net_set_config;
3584     vdc->get_features = virtio_net_get_features;
3585     vdc->set_features = virtio_net_set_features;
3586     vdc->bad_features = virtio_net_bad_features;
3587     vdc->reset = virtio_net_reset;
3588     vdc->set_status = virtio_net_set_status;
3589     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3590     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3591     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3592     vdc->post_load = virtio_net_post_load_virtio;
3593     vdc->vmsd = &vmstate_virtio_net_device;
3594     vdc->primary_unplug_pending = primary_unplug_pending;
3595 }
3596 
3597 static const TypeInfo virtio_net_info = {
3598     .name = TYPE_VIRTIO_NET,
3599     .parent = TYPE_VIRTIO_DEVICE,
3600     .instance_size = sizeof(VirtIONet),
3601     .instance_init = virtio_net_instance_init,
3602     .class_init = virtio_net_class_init,
3603 };
3604 
3605 static void virtio_register_types(void)
3606 {
3607     type_register_static(&virtio_net_info);
3608 }
3609 
3610 type_init(virtio_register_types)
3611