xref: /openbmc/qemu/hw/net/virtio-net.c (revision 10df8ff1)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/iov.h"
16 #include "hw/virtio/virtio.h"
17 #include "net/net.h"
18 #include "net/checksum.h"
19 #include "net/tap.h"
20 #include "qemu/error-report.h"
21 #include "qemu/timer.h"
22 #include "hw/virtio/virtio-net.h"
23 #include "net/vhost_net.h"
24 #include "hw/virtio/virtio-bus.h"
25 #include "qapi/error.h"
26 #include "qapi/qapi-events-net.h"
27 #include "hw/virtio/virtio-access.h"
28 #include "migration/misc.h"
29 #include "standard-headers/linux/ethtool.h"
30 
31 #define VIRTIO_NET_VM_VERSION    11
32 
33 #define MAC_TABLE_ENTRIES    64
34 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
35 
36 /* previously fixed value */
37 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
38 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
39 
40 /* for now, only allow larger queues; with virtio-1, guest can downsize */
41 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
42 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
43 
44 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
45 
46 #define VIRTIO_NET_TCP_FLAG         0x3F
47 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
48 
49 /* IPv4 max payload, 16 bits in the header */
50 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
51 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
52 
53 /* header length value in ip header without option */
54 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
55 
56 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
57 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
58 
59 /* Purge coalesced packets timer interval, This value affects the performance
60    a lot, and should be tuned carefully, '300000'(300us) is the recommended
61    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
62    tso/gso/gro 'off'. */
63 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
64 
65 /* temporary until standard header include it */
66 #if !defined(VIRTIO_NET_HDR_F_RSC_INFO)
67 
68 #define VIRTIO_NET_HDR_F_RSC_INFO  4 /* rsc_ext data in csum_ fields */
69 #define VIRTIO_NET_F_RSC_EXT       61
70 
71 static inline __virtio16 *virtio_net_rsc_ext_num_packets(
72     struct virtio_net_hdr *hdr)
73 {
74     return &hdr->csum_start;
75 }
76 
77 static inline __virtio16 *virtio_net_rsc_ext_num_dupacks(
78     struct virtio_net_hdr *hdr)
79 {
80     return &hdr->csum_offset;
81 }
82 
83 #endif
84 
85 /*
86  * Calculate the number of bytes up to and including the given 'field' of
87  * 'container'.
88  */
89 #define endof(container, field) \
90     (offsetof(container, field) + sizeof_field(container, field))
91 
92 typedef struct VirtIOFeature {
93     uint64_t flags;
94     size_t end;
95 } VirtIOFeature;
96 
97 static VirtIOFeature feature_sizes[] = {
98     {.flags = 1ULL << VIRTIO_NET_F_MAC,
99      .end = endof(struct virtio_net_config, mac)},
100     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
101      .end = endof(struct virtio_net_config, status)},
102     {.flags = 1ULL << VIRTIO_NET_F_MQ,
103      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
104     {.flags = 1ULL << VIRTIO_NET_F_MTU,
105      .end = endof(struct virtio_net_config, mtu)},
106     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
107      .end = endof(struct virtio_net_config, duplex)},
108     {}
109 };
110 
111 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
112 {
113     VirtIONet *n = qemu_get_nic_opaque(nc);
114 
115     return &n->vqs[nc->queue_index];
116 }
117 
118 static int vq2q(int queue_index)
119 {
120     return queue_index / 2;
121 }
122 
123 /* TODO
124  * - we could suppress RX interrupt if we were so inclined.
125  */
126 
127 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
128 {
129     VirtIONet *n = VIRTIO_NET(vdev);
130     struct virtio_net_config netcfg;
131 
132     virtio_stw_p(vdev, &netcfg.status, n->status);
133     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
134     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
135     memcpy(netcfg.mac, n->mac, ETH_ALEN);
136     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
137     netcfg.duplex = n->net_conf.duplex;
138     memcpy(config, &netcfg, n->config_size);
139 }
140 
141 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
142 {
143     VirtIONet *n = VIRTIO_NET(vdev);
144     struct virtio_net_config netcfg = {};
145 
146     memcpy(&netcfg, config, n->config_size);
147 
148     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
149         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
150         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
151         memcpy(n->mac, netcfg.mac, ETH_ALEN);
152         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
153     }
154 }
155 
156 static bool virtio_net_started(VirtIONet *n, uint8_t status)
157 {
158     VirtIODevice *vdev = VIRTIO_DEVICE(n);
159     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
160         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
161 }
162 
163 static void virtio_net_announce_timer(void *opaque)
164 {
165     VirtIONet *n = opaque;
166     VirtIODevice *vdev = VIRTIO_DEVICE(n);
167 
168     n->announce_counter--;
169     n->status |= VIRTIO_NET_S_ANNOUNCE;
170     virtio_notify_config(vdev);
171 }
172 
173 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
174 {
175     VirtIODevice *vdev = VIRTIO_DEVICE(n);
176     NetClientState *nc = qemu_get_queue(n->nic);
177     int queues = n->multiqueue ? n->max_queues : 1;
178 
179     if (!get_vhost_net(nc->peer)) {
180         return;
181     }
182 
183     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
184         !!n->vhost_started) {
185         return;
186     }
187     if (!n->vhost_started) {
188         int r, i;
189 
190         if (n->needs_vnet_hdr_swap) {
191             error_report("backend does not support %s vnet headers; "
192                          "falling back on userspace virtio",
193                          virtio_is_big_endian(vdev) ? "BE" : "LE");
194             return;
195         }
196 
197         /* Any packets outstanding? Purge them to avoid touching rings
198          * when vhost is running.
199          */
200         for (i = 0;  i < queues; i++) {
201             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
202 
203             /* Purge both directions: TX and RX. */
204             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
205             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
206         }
207 
208         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
209             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
210             if (r < 0) {
211                 error_report("%uBytes MTU not supported by the backend",
212                              n->net_conf.mtu);
213 
214                 return;
215             }
216         }
217 
218         n->vhost_started = 1;
219         r = vhost_net_start(vdev, n->nic->ncs, queues);
220         if (r < 0) {
221             error_report("unable to start vhost net: %d: "
222                          "falling back on userspace virtio", -r);
223             n->vhost_started = 0;
224         }
225     } else {
226         vhost_net_stop(vdev, n->nic->ncs, queues);
227         n->vhost_started = 0;
228     }
229 }
230 
231 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
232                                           NetClientState *peer,
233                                           bool enable)
234 {
235     if (virtio_is_big_endian(vdev)) {
236         return qemu_set_vnet_be(peer, enable);
237     } else {
238         return qemu_set_vnet_le(peer, enable);
239     }
240 }
241 
242 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
243                                        int queues, bool enable)
244 {
245     int i;
246 
247     for (i = 0; i < queues; i++) {
248         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
249             enable) {
250             while (--i >= 0) {
251                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
252             }
253 
254             return true;
255         }
256     }
257 
258     return false;
259 }
260 
261 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
262 {
263     VirtIODevice *vdev = VIRTIO_DEVICE(n);
264     int queues = n->multiqueue ? n->max_queues : 1;
265 
266     if (virtio_net_started(n, status)) {
267         /* Before using the device, we tell the network backend about the
268          * endianness to use when parsing vnet headers. If the backend
269          * can't do it, we fallback onto fixing the headers in the core
270          * virtio-net code.
271          */
272         n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
273                                                             queues, true);
274     } else if (virtio_net_started(n, vdev->status)) {
275         /* After using the device, we need to reset the network backend to
276          * the default (guest native endianness), otherwise the guest may
277          * lose network connectivity if it is rebooted into a different
278          * endianness.
279          */
280         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
281     }
282 }
283 
284 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
285 {
286     unsigned int dropped = virtqueue_drop_all(vq);
287     if (dropped) {
288         virtio_notify(vdev, vq);
289     }
290 }
291 
292 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
293 {
294     VirtIONet *n = VIRTIO_NET(vdev);
295     VirtIONetQueue *q;
296     int i;
297     uint8_t queue_status;
298 
299     virtio_net_vnet_endian_status(n, status);
300     virtio_net_vhost_status(n, status);
301 
302     for (i = 0; i < n->max_queues; i++) {
303         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
304         bool queue_started;
305         q = &n->vqs[i];
306 
307         if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
308             queue_status = 0;
309         } else {
310             queue_status = status;
311         }
312         queue_started =
313             virtio_net_started(n, queue_status) && !n->vhost_started;
314 
315         if (queue_started) {
316             qemu_flush_queued_packets(ncs);
317         }
318 
319         if (!q->tx_waiting) {
320             continue;
321         }
322 
323         if (queue_started) {
324             if (q->tx_timer) {
325                 timer_mod(q->tx_timer,
326                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
327             } else {
328                 qemu_bh_schedule(q->tx_bh);
329             }
330         } else {
331             if (q->tx_timer) {
332                 timer_del(q->tx_timer);
333             } else {
334                 qemu_bh_cancel(q->tx_bh);
335             }
336             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
337                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
338                 vdev->vm_running) {
339                 /* if tx is waiting we are likely have some packets in tx queue
340                  * and disabled notification */
341                 q->tx_waiting = 0;
342                 virtio_queue_set_notification(q->tx_vq, 1);
343                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
344             }
345         }
346     }
347 }
348 
349 static void virtio_net_set_link_status(NetClientState *nc)
350 {
351     VirtIONet *n = qemu_get_nic_opaque(nc);
352     VirtIODevice *vdev = VIRTIO_DEVICE(n);
353     uint16_t old_status = n->status;
354 
355     if (nc->link_down)
356         n->status &= ~VIRTIO_NET_S_LINK_UP;
357     else
358         n->status |= VIRTIO_NET_S_LINK_UP;
359 
360     if (n->status != old_status)
361         virtio_notify_config(vdev);
362 
363     virtio_net_set_status(vdev, vdev->status);
364 }
365 
366 static void rxfilter_notify(NetClientState *nc)
367 {
368     VirtIONet *n = qemu_get_nic_opaque(nc);
369 
370     if (nc->rxfilter_notify_enabled) {
371         gchar *path = object_get_canonical_path(OBJECT(n->qdev));
372         qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
373                                               n->netclient_name, path);
374         g_free(path);
375 
376         /* disable event notification to avoid events flooding */
377         nc->rxfilter_notify_enabled = 0;
378     }
379 }
380 
381 static intList *get_vlan_table(VirtIONet *n)
382 {
383     intList *list, *entry;
384     int i, j;
385 
386     list = NULL;
387     for (i = 0; i < MAX_VLAN >> 5; i++) {
388         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
389             if (n->vlans[i] & (1U << j)) {
390                 entry = g_malloc0(sizeof(*entry));
391                 entry->value = (i << 5) + j;
392                 entry->next = list;
393                 list = entry;
394             }
395         }
396     }
397 
398     return list;
399 }
400 
401 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
402 {
403     VirtIONet *n = qemu_get_nic_opaque(nc);
404     VirtIODevice *vdev = VIRTIO_DEVICE(n);
405     RxFilterInfo *info;
406     strList *str_list, *entry;
407     int i;
408 
409     info = g_malloc0(sizeof(*info));
410     info->name = g_strdup(nc->name);
411     info->promiscuous = n->promisc;
412 
413     if (n->nouni) {
414         info->unicast = RX_STATE_NONE;
415     } else if (n->alluni) {
416         info->unicast = RX_STATE_ALL;
417     } else {
418         info->unicast = RX_STATE_NORMAL;
419     }
420 
421     if (n->nomulti) {
422         info->multicast = RX_STATE_NONE;
423     } else if (n->allmulti) {
424         info->multicast = RX_STATE_ALL;
425     } else {
426         info->multicast = RX_STATE_NORMAL;
427     }
428 
429     info->broadcast_allowed = n->nobcast;
430     info->multicast_overflow = n->mac_table.multi_overflow;
431     info->unicast_overflow = n->mac_table.uni_overflow;
432 
433     info->main_mac = qemu_mac_strdup_printf(n->mac);
434 
435     str_list = NULL;
436     for (i = 0; i < n->mac_table.first_multi; i++) {
437         entry = g_malloc0(sizeof(*entry));
438         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
439         entry->next = str_list;
440         str_list = entry;
441     }
442     info->unicast_table = str_list;
443 
444     str_list = NULL;
445     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
446         entry = g_malloc0(sizeof(*entry));
447         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
448         entry->next = str_list;
449         str_list = entry;
450     }
451     info->multicast_table = str_list;
452     info->vlan_table = get_vlan_table(n);
453 
454     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
455         info->vlan = RX_STATE_ALL;
456     } else if (!info->vlan_table) {
457         info->vlan = RX_STATE_NONE;
458     } else {
459         info->vlan = RX_STATE_NORMAL;
460     }
461 
462     /* enable event notification after query */
463     nc->rxfilter_notify_enabled = 1;
464 
465     return info;
466 }
467 
468 static void virtio_net_reset(VirtIODevice *vdev)
469 {
470     VirtIONet *n = VIRTIO_NET(vdev);
471     int i;
472 
473     /* Reset back to compatibility mode */
474     n->promisc = 1;
475     n->allmulti = 0;
476     n->alluni = 0;
477     n->nomulti = 0;
478     n->nouni = 0;
479     n->nobcast = 0;
480     /* multiqueue is disabled by default */
481     n->curr_queues = 1;
482     timer_del(n->announce_timer);
483     n->announce_counter = 0;
484     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
485 
486     /* Flush any MAC and VLAN filter table state */
487     n->mac_table.in_use = 0;
488     n->mac_table.first_multi = 0;
489     n->mac_table.multi_overflow = 0;
490     n->mac_table.uni_overflow = 0;
491     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
492     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
493     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
494     memset(n->vlans, 0, MAX_VLAN >> 3);
495 
496     /* Flush any async TX */
497     for (i = 0;  i < n->max_queues; i++) {
498         NetClientState *nc = qemu_get_subqueue(n->nic, i);
499 
500         if (nc->peer) {
501             qemu_flush_or_purge_queued_packets(nc->peer, true);
502             assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
503         }
504     }
505 }
506 
507 static void peer_test_vnet_hdr(VirtIONet *n)
508 {
509     NetClientState *nc = qemu_get_queue(n->nic);
510     if (!nc->peer) {
511         return;
512     }
513 
514     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
515 }
516 
517 static int peer_has_vnet_hdr(VirtIONet *n)
518 {
519     return n->has_vnet_hdr;
520 }
521 
522 static int peer_has_ufo(VirtIONet *n)
523 {
524     if (!peer_has_vnet_hdr(n))
525         return 0;
526 
527     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
528 
529     return n->has_ufo;
530 }
531 
532 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
533                                        int version_1)
534 {
535     int i;
536     NetClientState *nc;
537 
538     n->mergeable_rx_bufs = mergeable_rx_bufs;
539 
540     if (version_1) {
541         n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
542     } else {
543         n->guest_hdr_len = n->mergeable_rx_bufs ?
544             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
545             sizeof(struct virtio_net_hdr);
546     }
547 
548     for (i = 0; i < n->max_queues; i++) {
549         nc = qemu_get_subqueue(n->nic, i);
550 
551         if (peer_has_vnet_hdr(n) &&
552             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
553             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
554             n->host_hdr_len = n->guest_hdr_len;
555         }
556     }
557 }
558 
559 static int virtio_net_max_tx_queue_size(VirtIONet *n)
560 {
561     NetClientState *peer = n->nic_conf.peers.ncs[0];
562 
563     /*
564      * Backends other than vhost-user don't support max queue size.
565      */
566     if (!peer) {
567         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
568     }
569 
570     if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
571         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
572     }
573 
574     return VIRTQUEUE_MAX_SIZE;
575 }
576 
577 static int peer_attach(VirtIONet *n, int index)
578 {
579     NetClientState *nc = qemu_get_subqueue(n->nic, index);
580 
581     if (!nc->peer) {
582         return 0;
583     }
584 
585     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
586         vhost_set_vring_enable(nc->peer, 1);
587     }
588 
589     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
590         return 0;
591     }
592 
593     if (n->max_queues == 1) {
594         return 0;
595     }
596 
597     return tap_enable(nc->peer);
598 }
599 
600 static int peer_detach(VirtIONet *n, int index)
601 {
602     NetClientState *nc = qemu_get_subqueue(n->nic, index);
603 
604     if (!nc->peer) {
605         return 0;
606     }
607 
608     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
609         vhost_set_vring_enable(nc->peer, 0);
610     }
611 
612     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
613         return 0;
614     }
615 
616     return tap_disable(nc->peer);
617 }
618 
619 static void virtio_net_set_queues(VirtIONet *n)
620 {
621     int i;
622     int r;
623 
624     if (n->nic->peer_deleted) {
625         return;
626     }
627 
628     for (i = 0; i < n->max_queues; i++) {
629         if (i < n->curr_queues) {
630             r = peer_attach(n, i);
631             assert(!r);
632         } else {
633             r = peer_detach(n, i);
634             assert(!r);
635         }
636     }
637 }
638 
639 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
640 
641 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
642                                         Error **errp)
643 {
644     VirtIONet *n = VIRTIO_NET(vdev);
645     NetClientState *nc = qemu_get_queue(n->nic);
646 
647     /* Firstly sync all virtio-net possible supported features */
648     features |= n->host_features;
649 
650     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
651 
652     if (!peer_has_vnet_hdr(n)) {
653         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
654         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
655         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
656         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
657 
658         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
659         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
660         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
661         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
662     }
663 
664     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
665         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
666         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
667     }
668 
669     if (!get_vhost_net(nc->peer)) {
670         return features;
671     }
672 
673     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
674     vdev->backend_features = features;
675 
676     if (n->mtu_bypass_backend &&
677             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
678         features |= (1ULL << VIRTIO_NET_F_MTU);
679     }
680 
681     return features;
682 }
683 
684 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
685 {
686     uint64_t features = 0;
687 
688     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
689      * but also these: */
690     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
691     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
692     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
693     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
694     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
695 
696     return features;
697 }
698 
699 static void virtio_net_apply_guest_offloads(VirtIONet *n)
700 {
701     qemu_set_offload(qemu_get_queue(n->nic)->peer,
702             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
703             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
704             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
705             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
706             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
707 }
708 
709 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
710 {
711     static const uint64_t guest_offloads_mask =
712         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
713         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
714         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
715         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
716         (1ULL << VIRTIO_NET_F_GUEST_UFO);
717 
718     return guest_offloads_mask & features;
719 }
720 
721 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
722 {
723     VirtIODevice *vdev = VIRTIO_DEVICE(n);
724     return virtio_net_guest_offloads_by_features(vdev->guest_features);
725 }
726 
727 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
728 {
729     VirtIONet *n = VIRTIO_NET(vdev);
730     int i;
731 
732     if (n->mtu_bypass_backend &&
733             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
734         features &= ~(1ULL << VIRTIO_NET_F_MTU);
735     }
736 
737     virtio_net_set_multiqueue(n,
738                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
739 
740     virtio_net_set_mrg_rx_bufs(n,
741                                virtio_has_feature(features,
742                                                   VIRTIO_NET_F_MRG_RXBUF),
743                                virtio_has_feature(features,
744                                                   VIRTIO_F_VERSION_1));
745 
746     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
747         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
748     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
749         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
750 
751     if (n->has_vnet_hdr) {
752         n->curr_guest_offloads =
753             virtio_net_guest_offloads_by_features(features);
754         virtio_net_apply_guest_offloads(n);
755     }
756 
757     for (i = 0;  i < n->max_queues; i++) {
758         NetClientState *nc = qemu_get_subqueue(n->nic, i);
759 
760         if (!get_vhost_net(nc->peer)) {
761             continue;
762         }
763         vhost_net_ack_features(get_vhost_net(nc->peer), features);
764     }
765 
766     if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
767         memset(n->vlans, 0, MAX_VLAN >> 3);
768     } else {
769         memset(n->vlans, 0xff, MAX_VLAN >> 3);
770     }
771 }
772 
773 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
774                                      struct iovec *iov, unsigned int iov_cnt)
775 {
776     uint8_t on;
777     size_t s;
778     NetClientState *nc = qemu_get_queue(n->nic);
779 
780     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
781     if (s != sizeof(on)) {
782         return VIRTIO_NET_ERR;
783     }
784 
785     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
786         n->promisc = on;
787     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
788         n->allmulti = on;
789     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
790         n->alluni = on;
791     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
792         n->nomulti = on;
793     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
794         n->nouni = on;
795     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
796         n->nobcast = on;
797     } else {
798         return VIRTIO_NET_ERR;
799     }
800 
801     rxfilter_notify(nc);
802 
803     return VIRTIO_NET_OK;
804 }
805 
806 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
807                                      struct iovec *iov, unsigned int iov_cnt)
808 {
809     VirtIODevice *vdev = VIRTIO_DEVICE(n);
810     uint64_t offloads;
811     size_t s;
812 
813     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
814         return VIRTIO_NET_ERR;
815     }
816 
817     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
818     if (s != sizeof(offloads)) {
819         return VIRTIO_NET_ERR;
820     }
821 
822     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
823         uint64_t supported_offloads;
824 
825         offloads = virtio_ldq_p(vdev, &offloads);
826 
827         if (!n->has_vnet_hdr) {
828             return VIRTIO_NET_ERR;
829         }
830 
831         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
832             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
833         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
834             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
835         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
836 
837         supported_offloads = virtio_net_supported_guest_offloads(n);
838         if (offloads & ~supported_offloads) {
839             return VIRTIO_NET_ERR;
840         }
841 
842         n->curr_guest_offloads = offloads;
843         virtio_net_apply_guest_offloads(n);
844 
845         return VIRTIO_NET_OK;
846     } else {
847         return VIRTIO_NET_ERR;
848     }
849 }
850 
851 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
852                                  struct iovec *iov, unsigned int iov_cnt)
853 {
854     VirtIODevice *vdev = VIRTIO_DEVICE(n);
855     struct virtio_net_ctrl_mac mac_data;
856     size_t s;
857     NetClientState *nc = qemu_get_queue(n->nic);
858 
859     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
860         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
861             return VIRTIO_NET_ERR;
862         }
863         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
864         assert(s == sizeof(n->mac));
865         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
866         rxfilter_notify(nc);
867 
868         return VIRTIO_NET_OK;
869     }
870 
871     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
872         return VIRTIO_NET_ERR;
873     }
874 
875     int in_use = 0;
876     int first_multi = 0;
877     uint8_t uni_overflow = 0;
878     uint8_t multi_overflow = 0;
879     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
880 
881     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
882                    sizeof(mac_data.entries));
883     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
884     if (s != sizeof(mac_data.entries)) {
885         goto error;
886     }
887     iov_discard_front(&iov, &iov_cnt, s);
888 
889     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
890         goto error;
891     }
892 
893     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
894         s = iov_to_buf(iov, iov_cnt, 0, macs,
895                        mac_data.entries * ETH_ALEN);
896         if (s != mac_data.entries * ETH_ALEN) {
897             goto error;
898         }
899         in_use += mac_data.entries;
900     } else {
901         uni_overflow = 1;
902     }
903 
904     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
905 
906     first_multi = in_use;
907 
908     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
909                    sizeof(mac_data.entries));
910     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
911     if (s != sizeof(mac_data.entries)) {
912         goto error;
913     }
914 
915     iov_discard_front(&iov, &iov_cnt, s);
916 
917     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
918         goto error;
919     }
920 
921     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
922         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
923                        mac_data.entries * ETH_ALEN);
924         if (s != mac_data.entries * ETH_ALEN) {
925             goto error;
926         }
927         in_use += mac_data.entries;
928     } else {
929         multi_overflow = 1;
930     }
931 
932     n->mac_table.in_use = in_use;
933     n->mac_table.first_multi = first_multi;
934     n->mac_table.uni_overflow = uni_overflow;
935     n->mac_table.multi_overflow = multi_overflow;
936     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
937     g_free(macs);
938     rxfilter_notify(nc);
939 
940     return VIRTIO_NET_OK;
941 
942 error:
943     g_free(macs);
944     return VIRTIO_NET_ERR;
945 }
946 
947 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
948                                         struct iovec *iov, unsigned int iov_cnt)
949 {
950     VirtIODevice *vdev = VIRTIO_DEVICE(n);
951     uint16_t vid;
952     size_t s;
953     NetClientState *nc = qemu_get_queue(n->nic);
954 
955     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
956     vid = virtio_lduw_p(vdev, &vid);
957     if (s != sizeof(vid)) {
958         return VIRTIO_NET_ERR;
959     }
960 
961     if (vid >= MAX_VLAN)
962         return VIRTIO_NET_ERR;
963 
964     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
965         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
966     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
967         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
968     else
969         return VIRTIO_NET_ERR;
970 
971     rxfilter_notify(nc);
972 
973     return VIRTIO_NET_OK;
974 }
975 
976 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
977                                       struct iovec *iov, unsigned int iov_cnt)
978 {
979     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
980         n->status & VIRTIO_NET_S_ANNOUNCE) {
981         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
982         if (n->announce_counter) {
983             timer_mod(n->announce_timer,
984                       qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
985                       self_announce_delay(n->announce_counter));
986         }
987         return VIRTIO_NET_OK;
988     } else {
989         return VIRTIO_NET_ERR;
990     }
991 }
992 
993 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
994                                 struct iovec *iov, unsigned int iov_cnt)
995 {
996     VirtIODevice *vdev = VIRTIO_DEVICE(n);
997     struct virtio_net_ctrl_mq mq;
998     size_t s;
999     uint16_t queues;
1000 
1001     s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1002     if (s != sizeof(mq)) {
1003         return VIRTIO_NET_ERR;
1004     }
1005 
1006     if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1007         return VIRTIO_NET_ERR;
1008     }
1009 
1010     queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1011 
1012     if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1013         queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1014         queues > n->max_queues ||
1015         !n->multiqueue) {
1016         return VIRTIO_NET_ERR;
1017     }
1018 
1019     n->curr_queues = queues;
1020     /* stop the backend before changing the number of queues to avoid handling a
1021      * disabled queue */
1022     virtio_net_set_status(vdev, vdev->status);
1023     virtio_net_set_queues(n);
1024 
1025     return VIRTIO_NET_OK;
1026 }
1027 
1028 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1029 {
1030     VirtIONet *n = VIRTIO_NET(vdev);
1031     struct virtio_net_ctrl_hdr ctrl;
1032     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1033     VirtQueueElement *elem;
1034     size_t s;
1035     struct iovec *iov, *iov2;
1036     unsigned int iov_cnt;
1037 
1038     for (;;) {
1039         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1040         if (!elem) {
1041             break;
1042         }
1043         if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1044             iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1045             virtio_error(vdev, "virtio-net ctrl missing headers");
1046             virtqueue_detach_element(vq, elem, 0);
1047             g_free(elem);
1048             break;
1049         }
1050 
1051         iov_cnt = elem->out_num;
1052         iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1053         s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1054         iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1055         if (s != sizeof(ctrl)) {
1056             status = VIRTIO_NET_ERR;
1057         } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1058             status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1059         } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1060             status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1061         } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1062             status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1063         } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1064             status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1065         } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1066             status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1067         } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1068             status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1069         }
1070 
1071         s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1072         assert(s == sizeof(status));
1073 
1074         virtqueue_push(vq, elem, sizeof(status));
1075         virtio_notify(vdev, vq);
1076         g_free(iov2);
1077         g_free(elem);
1078     }
1079 }
1080 
1081 /* RX */
1082 
1083 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1084 {
1085     VirtIONet *n = VIRTIO_NET(vdev);
1086     int queue_index = vq2q(virtio_get_queue_index(vq));
1087 
1088     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1089 }
1090 
1091 static int virtio_net_can_receive(NetClientState *nc)
1092 {
1093     VirtIONet *n = qemu_get_nic_opaque(nc);
1094     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1095     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1096 
1097     if (!vdev->vm_running) {
1098         return 0;
1099     }
1100 
1101     if (nc->queue_index >= n->curr_queues) {
1102         return 0;
1103     }
1104 
1105     if (!virtio_queue_ready(q->rx_vq) ||
1106         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1107         return 0;
1108     }
1109 
1110     return 1;
1111 }
1112 
1113 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1114 {
1115     VirtIONet *n = q->n;
1116     if (virtio_queue_empty(q->rx_vq) ||
1117         (n->mergeable_rx_bufs &&
1118          !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1119         virtio_queue_set_notification(q->rx_vq, 1);
1120 
1121         /* To avoid a race condition where the guest has made some buffers
1122          * available after the above check but before notification was
1123          * enabled, check for available buffers again.
1124          */
1125         if (virtio_queue_empty(q->rx_vq) ||
1126             (n->mergeable_rx_bufs &&
1127              !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1128             return 0;
1129         }
1130     }
1131 
1132     virtio_queue_set_notification(q->rx_vq, 0);
1133     return 1;
1134 }
1135 
1136 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1137 {
1138     virtio_tswap16s(vdev, &hdr->hdr_len);
1139     virtio_tswap16s(vdev, &hdr->gso_size);
1140     virtio_tswap16s(vdev, &hdr->csum_start);
1141     virtio_tswap16s(vdev, &hdr->csum_offset);
1142 }
1143 
1144 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1145  * it never finds out that the packets don't have valid checksums.  This
1146  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1147  * fix this with Xen but it hasn't appeared in an upstream release of
1148  * dhclient yet.
1149  *
1150  * To avoid breaking existing guests, we catch udp packets and add
1151  * checksums.  This is terrible but it's better than hacking the guest
1152  * kernels.
1153  *
1154  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1155  * we should provide a mechanism to disable it to avoid polluting the host
1156  * cache.
1157  */
1158 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1159                                         uint8_t *buf, size_t size)
1160 {
1161     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1162         (size > 27 && size < 1500) && /* normal sized MTU */
1163         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1164         (buf[23] == 17) && /* ip.protocol == UDP */
1165         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1166         net_checksum_calculate(buf, size);
1167         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1168     }
1169 }
1170 
1171 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1172                            const void *buf, size_t size)
1173 {
1174     if (n->has_vnet_hdr) {
1175         /* FIXME this cast is evil */
1176         void *wbuf = (void *)buf;
1177         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1178                                     size - n->host_hdr_len);
1179 
1180         if (n->needs_vnet_hdr_swap) {
1181             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1182         }
1183         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1184     } else {
1185         struct virtio_net_hdr hdr = {
1186             .flags = 0,
1187             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1188         };
1189         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1190     }
1191 }
1192 
1193 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1194 {
1195     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1196     static const uint8_t vlan[] = {0x81, 0x00};
1197     uint8_t *ptr = (uint8_t *)buf;
1198     int i;
1199 
1200     if (n->promisc)
1201         return 1;
1202 
1203     ptr += n->host_hdr_len;
1204 
1205     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1206         int vid = lduw_be_p(ptr + 14) & 0xfff;
1207         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1208             return 0;
1209     }
1210 
1211     if (ptr[0] & 1) { // multicast
1212         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1213             return !n->nobcast;
1214         } else if (n->nomulti) {
1215             return 0;
1216         } else if (n->allmulti || n->mac_table.multi_overflow) {
1217             return 1;
1218         }
1219 
1220         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1221             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1222                 return 1;
1223             }
1224         }
1225     } else { // unicast
1226         if (n->nouni) {
1227             return 0;
1228         } else if (n->alluni || n->mac_table.uni_overflow) {
1229             return 1;
1230         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1231             return 1;
1232         }
1233 
1234         for (i = 0; i < n->mac_table.first_multi; i++) {
1235             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1236                 return 1;
1237             }
1238         }
1239     }
1240 
1241     return 0;
1242 }
1243 
1244 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1245                                       size_t size)
1246 {
1247     VirtIONet *n = qemu_get_nic_opaque(nc);
1248     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1249     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1250     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1251     struct virtio_net_hdr_mrg_rxbuf mhdr;
1252     unsigned mhdr_cnt = 0;
1253     size_t offset, i, guest_offset;
1254 
1255     if (!virtio_net_can_receive(nc)) {
1256         return -1;
1257     }
1258 
1259     /* hdr_len refers to the header we supply to the guest */
1260     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1261         return 0;
1262     }
1263 
1264     if (!receive_filter(n, buf, size))
1265         return size;
1266 
1267     offset = i = 0;
1268 
1269     while (offset < size) {
1270         VirtQueueElement *elem;
1271         int len, total;
1272         const struct iovec *sg;
1273 
1274         total = 0;
1275 
1276         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1277         if (!elem) {
1278             if (i) {
1279                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1280                              "i %zd mergeable %d offset %zd, size %zd, "
1281                              "guest hdr len %zd, host hdr len %zd "
1282                              "guest features 0x%" PRIx64,
1283                              i, n->mergeable_rx_bufs, offset, size,
1284                              n->guest_hdr_len, n->host_hdr_len,
1285                              vdev->guest_features);
1286             }
1287             return -1;
1288         }
1289 
1290         if (elem->in_num < 1) {
1291             virtio_error(vdev,
1292                          "virtio-net receive queue contains no in buffers");
1293             virtqueue_detach_element(q->rx_vq, elem, 0);
1294             g_free(elem);
1295             return -1;
1296         }
1297 
1298         sg = elem->in_sg;
1299         if (i == 0) {
1300             assert(offset == 0);
1301             if (n->mergeable_rx_bufs) {
1302                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1303                                     sg, elem->in_num,
1304                                     offsetof(typeof(mhdr), num_buffers),
1305                                     sizeof(mhdr.num_buffers));
1306             }
1307 
1308             receive_header(n, sg, elem->in_num, buf, size);
1309             offset = n->host_hdr_len;
1310             total += n->guest_hdr_len;
1311             guest_offset = n->guest_hdr_len;
1312         } else {
1313             guest_offset = 0;
1314         }
1315 
1316         /* copy in packet.  ugh */
1317         len = iov_from_buf(sg, elem->in_num, guest_offset,
1318                            buf + offset, size - offset);
1319         total += len;
1320         offset += len;
1321         /* If buffers can't be merged, at this point we
1322          * must have consumed the complete packet.
1323          * Otherwise, drop it. */
1324         if (!n->mergeable_rx_bufs && offset < size) {
1325             virtqueue_unpop(q->rx_vq, elem, total);
1326             g_free(elem);
1327             return size;
1328         }
1329 
1330         /* signal other side */
1331         virtqueue_fill(q->rx_vq, elem, total, i++);
1332         g_free(elem);
1333     }
1334 
1335     if (mhdr_cnt) {
1336         virtio_stw_p(vdev, &mhdr.num_buffers, i);
1337         iov_from_buf(mhdr_sg, mhdr_cnt,
1338                      0,
1339                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
1340     }
1341 
1342     virtqueue_flush(q->rx_vq, i);
1343     virtio_notify(vdev, q->rx_vq);
1344 
1345     return size;
1346 }
1347 
1348 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1349                                   size_t size)
1350 {
1351     ssize_t r;
1352 
1353     rcu_read_lock();
1354     r = virtio_net_receive_rcu(nc, buf, size);
1355     rcu_read_unlock();
1356     return r;
1357 }
1358 
1359 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1360                                          const uint8_t *buf,
1361                                          VirtioNetRscUnit *unit)
1362 {
1363     uint16_t ip_hdrlen;
1364     struct ip_header *ip;
1365 
1366     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1367                               + sizeof(struct eth_header));
1368     unit->ip = (void *)ip;
1369     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1370     unit->ip_plen = &ip->ip_len;
1371     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1372     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1373     unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1374 }
1375 
1376 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1377                                          const uint8_t *buf,
1378                                          VirtioNetRscUnit *unit)
1379 {
1380     struct ip6_header *ip6;
1381 
1382     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1383                                  + sizeof(struct eth_header));
1384     unit->ip = ip6;
1385     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1386     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\
1387                                         + sizeof(struct ip6_header));
1388     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1389 
1390     /* There is a difference between payload lenght in ipv4 and v6,
1391        ip header is excluded in ipv6 */
1392     unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1393 }
1394 
1395 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1396                                        VirtioNetRscSeg *seg)
1397 {
1398     int ret;
1399     struct virtio_net_hdr *h;
1400 
1401     h = (struct virtio_net_hdr *)seg->buf;
1402     h->flags = 0;
1403     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1404 
1405     if (seg->is_coalesced) {
1406         *virtio_net_rsc_ext_num_packets(h) = seg->packets;
1407         *virtio_net_rsc_ext_num_dupacks(h) = seg->dup_ack;
1408         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1409         if (chain->proto == ETH_P_IP) {
1410             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1411         } else {
1412             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1413         }
1414     }
1415 
1416     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1417     QTAILQ_REMOVE(&chain->buffers, seg, next);
1418     g_free(seg->buf);
1419     g_free(seg);
1420 
1421     return ret;
1422 }
1423 
1424 static void virtio_net_rsc_purge(void *opq)
1425 {
1426     VirtioNetRscSeg *seg, *rn;
1427     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1428 
1429     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1430         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1431             chain->stat.purge_failed++;
1432             continue;
1433         }
1434     }
1435 
1436     chain->stat.timer++;
1437     if (!QTAILQ_EMPTY(&chain->buffers)) {
1438         timer_mod(chain->drain_timer,
1439               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1440     }
1441 }
1442 
1443 static void virtio_net_rsc_cleanup(VirtIONet *n)
1444 {
1445     VirtioNetRscChain *chain, *rn_chain;
1446     VirtioNetRscSeg *seg, *rn_seg;
1447 
1448     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1449         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1450             QTAILQ_REMOVE(&chain->buffers, seg, next);
1451             g_free(seg->buf);
1452             g_free(seg);
1453         }
1454 
1455         timer_del(chain->drain_timer);
1456         timer_free(chain->drain_timer);
1457         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1458         g_free(chain);
1459     }
1460 }
1461 
1462 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1463                                      NetClientState *nc,
1464                                      const uint8_t *buf, size_t size)
1465 {
1466     uint16_t hdr_len;
1467     VirtioNetRscSeg *seg;
1468 
1469     hdr_len = chain->n->guest_hdr_len;
1470     seg = g_malloc(sizeof(VirtioNetRscSeg));
1471     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1472         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1473     memcpy(seg->buf, buf, size);
1474     seg->size = size;
1475     seg->packets = 1;
1476     seg->dup_ack = 0;
1477     seg->is_coalesced = 0;
1478     seg->nc = nc;
1479 
1480     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1481     chain->stat.cache++;
1482 
1483     switch (chain->proto) {
1484     case ETH_P_IP:
1485         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
1486         break;
1487     case ETH_P_IPV6:
1488         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
1489         break;
1490     default:
1491         g_assert_not_reached();
1492     }
1493 }
1494 
1495 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
1496                                          VirtioNetRscSeg *seg,
1497                                          const uint8_t *buf,
1498                                          struct tcp_header *n_tcp,
1499                                          struct tcp_header *o_tcp)
1500 {
1501     uint32_t nack, oack;
1502     uint16_t nwin, owin;
1503 
1504     nack = htonl(n_tcp->th_ack);
1505     nwin = htons(n_tcp->th_win);
1506     oack = htonl(o_tcp->th_ack);
1507     owin = htons(o_tcp->th_win);
1508 
1509     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
1510         chain->stat.ack_out_of_win++;
1511         return RSC_FINAL;
1512     } else if (nack == oack) {
1513         /* duplicated ack or window probe */
1514         if (nwin == owin) {
1515             /* duplicated ack, add dup ack count due to whql test up to 1 */
1516             chain->stat.dup_ack++;
1517             return RSC_FINAL;
1518         } else {
1519             /* Coalesce window update */
1520             o_tcp->th_win = n_tcp->th_win;
1521             chain->stat.win_update++;
1522             return RSC_COALESCE;
1523         }
1524     } else {
1525         /* pure ack, go to 'C', finalize*/
1526         chain->stat.pure_ack++;
1527         return RSC_FINAL;
1528     }
1529 }
1530 
1531 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
1532                                             VirtioNetRscSeg *seg,
1533                                             const uint8_t *buf,
1534                                             VirtioNetRscUnit *n_unit)
1535 {
1536     void *data;
1537     uint16_t o_ip_len;
1538     uint32_t nseq, oseq;
1539     VirtioNetRscUnit *o_unit;
1540 
1541     o_unit = &seg->unit;
1542     o_ip_len = htons(*o_unit->ip_plen);
1543     nseq = htonl(n_unit->tcp->th_seq);
1544     oseq = htonl(o_unit->tcp->th_seq);
1545 
1546     /* out of order or retransmitted. */
1547     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
1548         chain->stat.data_out_of_win++;
1549         return RSC_FINAL;
1550     }
1551 
1552     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
1553     if (nseq == oseq) {
1554         if ((o_unit->payload == 0) && n_unit->payload) {
1555             /* From no payload to payload, normal case, not a dup ack or etc */
1556             chain->stat.data_after_pure_ack++;
1557             goto coalesce;
1558         } else {
1559             return virtio_net_rsc_handle_ack(chain, seg, buf,
1560                                              n_unit->tcp, o_unit->tcp);
1561         }
1562     } else if ((nseq - oseq) != o_unit->payload) {
1563         /* Not a consistent packet, out of order */
1564         chain->stat.data_out_of_order++;
1565         return RSC_FINAL;
1566     } else {
1567 coalesce:
1568         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
1569             chain->stat.over_size++;
1570             return RSC_FINAL;
1571         }
1572 
1573         /* Here comes the right data, the payload length in v4/v6 is different,
1574            so use the field value to update and record the new data len */
1575         o_unit->payload += n_unit->payload; /* update new data len */
1576 
1577         /* update field in ip header */
1578         *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
1579 
1580         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
1581            for windows guest, while this may change the behavior for linux
1582            guest (only if it uses RSC feature). */
1583         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
1584 
1585         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
1586         o_unit->tcp->th_win = n_unit->tcp->th_win;
1587 
1588         memmove(seg->buf + seg->size, data, n_unit->payload);
1589         seg->size += n_unit->payload;
1590         seg->packets++;
1591         chain->stat.coalesced++;
1592         return RSC_COALESCE;
1593     }
1594 }
1595 
1596 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
1597                                         VirtioNetRscSeg *seg,
1598                                         const uint8_t *buf, size_t size,
1599                                         VirtioNetRscUnit *unit)
1600 {
1601     struct ip_header *ip1, *ip2;
1602 
1603     ip1 = (struct ip_header *)(unit->ip);
1604     ip2 = (struct ip_header *)(seg->unit.ip);
1605     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
1606         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
1607         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
1608         chain->stat.no_match++;
1609         return RSC_NO_MATCH;
1610     }
1611 
1612     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
1613 }
1614 
1615 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
1616                                         VirtioNetRscSeg *seg,
1617                                         const uint8_t *buf, size_t size,
1618                                         VirtioNetRscUnit *unit)
1619 {
1620     struct ip6_header *ip1, *ip2;
1621 
1622     ip1 = (struct ip6_header *)(unit->ip);
1623     ip2 = (struct ip6_header *)(seg->unit.ip);
1624     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
1625         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
1626         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
1627         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
1628             chain->stat.no_match++;
1629             return RSC_NO_MATCH;
1630     }
1631 
1632     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
1633 }
1634 
1635 /* Packets with 'SYN' should bypass, other flag should be sent after drain
1636  * to prevent out of order */
1637 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
1638                                          struct tcp_header *tcp)
1639 {
1640     uint16_t tcp_hdr;
1641     uint16_t tcp_flag;
1642 
1643     tcp_flag = htons(tcp->th_offset_flags);
1644     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
1645     tcp_flag &= VIRTIO_NET_TCP_FLAG;
1646     tcp_flag = htons(tcp->th_offset_flags) & 0x3F;
1647     if (tcp_flag & TH_SYN) {
1648         chain->stat.tcp_syn++;
1649         return RSC_BYPASS;
1650     }
1651 
1652     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
1653         chain->stat.tcp_ctrl_drain++;
1654         return RSC_FINAL;
1655     }
1656 
1657     if (tcp_hdr > sizeof(struct tcp_header)) {
1658         chain->stat.tcp_all_opt++;
1659         return RSC_FINAL;
1660     }
1661 
1662     return RSC_CANDIDATE;
1663 }
1664 
1665 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
1666                                          NetClientState *nc,
1667                                          const uint8_t *buf, size_t size,
1668                                          VirtioNetRscUnit *unit)
1669 {
1670     int ret;
1671     VirtioNetRscSeg *seg, *nseg;
1672 
1673     if (QTAILQ_EMPTY(&chain->buffers)) {
1674         chain->stat.empty_cache++;
1675         virtio_net_rsc_cache_buf(chain, nc, buf, size);
1676         timer_mod(chain->drain_timer,
1677               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1678         return size;
1679     }
1680 
1681     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
1682         if (chain->proto == ETH_P_IP) {
1683             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
1684         } else {
1685             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
1686         }
1687 
1688         if (ret == RSC_FINAL) {
1689             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1690                 /* Send failed */
1691                 chain->stat.final_failed++;
1692                 return 0;
1693             }
1694 
1695             /* Send current packet */
1696             return virtio_net_do_receive(nc, buf, size);
1697         } else if (ret == RSC_NO_MATCH) {
1698             continue;
1699         } else {
1700             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
1701             seg->is_coalesced = 1;
1702             return size;
1703         }
1704     }
1705 
1706     chain->stat.no_match_cache++;
1707     virtio_net_rsc_cache_buf(chain, nc, buf, size);
1708     return size;
1709 }
1710 
1711 /* Drain a connection data, this is to avoid out of order segments */
1712 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
1713                                         NetClientState *nc,
1714                                         const uint8_t *buf, size_t size,
1715                                         uint16_t ip_start, uint16_t ip_size,
1716                                         uint16_t tcp_port)
1717 {
1718     VirtioNetRscSeg *seg, *nseg;
1719     uint32_t ppair1, ppair2;
1720 
1721     ppair1 = *(uint32_t *)(buf + tcp_port);
1722     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
1723         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
1724         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
1725             || (ppair1 != ppair2)) {
1726             continue;
1727         }
1728         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1729             chain->stat.drain_failed++;
1730         }
1731 
1732         break;
1733     }
1734 
1735     return virtio_net_do_receive(nc, buf, size);
1736 }
1737 
1738 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
1739                                             struct ip_header *ip,
1740                                             const uint8_t *buf, size_t size)
1741 {
1742     uint16_t ip_len;
1743 
1744     /* Not an ipv4 packet */
1745     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
1746         chain->stat.ip_option++;
1747         return RSC_BYPASS;
1748     }
1749 
1750     /* Don't handle packets with ip option */
1751     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
1752         chain->stat.ip_option++;
1753         return RSC_BYPASS;
1754     }
1755 
1756     if (ip->ip_p != IPPROTO_TCP) {
1757         chain->stat.bypass_not_tcp++;
1758         return RSC_BYPASS;
1759     }
1760 
1761     /* Don't handle packets with ip fragment */
1762     if (!(htons(ip->ip_off) & IP_DF)) {
1763         chain->stat.ip_frag++;
1764         return RSC_BYPASS;
1765     }
1766 
1767     /* Don't handle packets with ecn flag */
1768     if (IPTOS_ECN(ip->ip_tos)) {
1769         chain->stat.ip_ecn++;
1770         return RSC_BYPASS;
1771     }
1772 
1773     ip_len = htons(ip->ip_len);
1774     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
1775         || ip_len > (size - chain->n->guest_hdr_len -
1776                      sizeof(struct eth_header))) {
1777         chain->stat.ip_hacked++;
1778         return RSC_BYPASS;
1779     }
1780 
1781     return RSC_CANDIDATE;
1782 }
1783 
1784 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
1785                                       NetClientState *nc,
1786                                       const uint8_t *buf, size_t size)
1787 {
1788     int32_t ret;
1789     uint16_t hdr_len;
1790     VirtioNetRscUnit unit;
1791 
1792     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
1793 
1794     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
1795         + sizeof(struct tcp_header))) {
1796         chain->stat.bypass_not_tcp++;
1797         return virtio_net_do_receive(nc, buf, size);
1798     }
1799 
1800     virtio_net_rsc_extract_unit4(chain, buf, &unit);
1801     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
1802         != RSC_CANDIDATE) {
1803         return virtio_net_do_receive(nc, buf, size);
1804     }
1805 
1806     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
1807     if (ret == RSC_BYPASS) {
1808         return virtio_net_do_receive(nc, buf, size);
1809     } else if (ret == RSC_FINAL) {
1810         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
1811                 ((hdr_len + sizeof(struct eth_header)) + 12),
1812                 VIRTIO_NET_IP4_ADDR_SIZE,
1813                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
1814     }
1815 
1816     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
1817 }
1818 
1819 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
1820                                             struct ip6_header *ip6,
1821                                             const uint8_t *buf, size_t size)
1822 {
1823     uint16_t ip_len;
1824 
1825     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
1826         != IP_HEADER_VERSION_6) {
1827         return RSC_BYPASS;
1828     }
1829 
1830     /* Both option and protocol is checked in this */
1831     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
1832         chain->stat.bypass_not_tcp++;
1833         return RSC_BYPASS;
1834     }
1835 
1836     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1837     if (ip_len < sizeof(struct tcp_header) ||
1838         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
1839                   - sizeof(struct ip6_header))) {
1840         chain->stat.ip_hacked++;
1841         return RSC_BYPASS;
1842     }
1843 
1844     /* Don't handle packets with ecn flag */
1845     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
1846         chain->stat.ip_ecn++;
1847         return RSC_BYPASS;
1848     }
1849 
1850     return RSC_CANDIDATE;
1851 }
1852 
1853 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
1854                                       const uint8_t *buf, size_t size)
1855 {
1856     int32_t ret;
1857     uint16_t hdr_len;
1858     VirtioNetRscChain *chain;
1859     VirtioNetRscUnit unit;
1860 
1861     chain = (VirtioNetRscChain *)opq;
1862     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
1863 
1864     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
1865         + sizeof(tcp_header))) {
1866         return virtio_net_do_receive(nc, buf, size);
1867     }
1868 
1869     virtio_net_rsc_extract_unit6(chain, buf, &unit);
1870     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
1871                                                  unit.ip, buf, size)) {
1872         return virtio_net_do_receive(nc, buf, size);
1873     }
1874 
1875     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
1876     if (ret == RSC_BYPASS) {
1877         return virtio_net_do_receive(nc, buf, size);
1878     } else if (ret == RSC_FINAL) {
1879         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
1880                 ((hdr_len + sizeof(struct eth_header)) + 8),
1881                 VIRTIO_NET_IP6_ADDR_SIZE,
1882                 hdr_len + sizeof(struct eth_header)
1883                 + sizeof(struct ip6_header));
1884     }
1885 
1886     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
1887 }
1888 
1889 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
1890                                                       NetClientState *nc,
1891                                                       uint16_t proto)
1892 {
1893     VirtioNetRscChain *chain;
1894 
1895     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
1896         return NULL;
1897     }
1898 
1899     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
1900         if (chain->proto == proto) {
1901             return chain;
1902         }
1903     }
1904 
1905     chain = g_malloc(sizeof(*chain));
1906     chain->n = n;
1907     chain->proto = proto;
1908     if (proto == (uint16_t)ETH_P_IP) {
1909         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
1910         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1911     } else {
1912         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
1913         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1914     }
1915     chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
1916                                       virtio_net_rsc_purge, chain);
1917     memset(&chain->stat, 0, sizeof(chain->stat));
1918 
1919     QTAILQ_INIT(&chain->buffers);
1920     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
1921 
1922     return chain;
1923 }
1924 
1925 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
1926                                       const uint8_t *buf,
1927                                       size_t size)
1928 {
1929     uint16_t proto;
1930     VirtioNetRscChain *chain;
1931     struct eth_header *eth;
1932     VirtIONet *n;
1933 
1934     n = qemu_get_nic_opaque(nc);
1935     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
1936         return virtio_net_do_receive(nc, buf, size);
1937     }
1938 
1939     eth = (struct eth_header *)(buf + n->guest_hdr_len);
1940     proto = htons(eth->h_proto);
1941 
1942     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
1943     if (chain) {
1944         chain->stat.received++;
1945         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
1946             return virtio_net_rsc_receive4(chain, nc, buf, size);
1947         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
1948             return virtio_net_rsc_receive6(chain, nc, buf, size);
1949         }
1950     }
1951     return virtio_net_do_receive(nc, buf, size);
1952 }
1953 
1954 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
1955                                   size_t size)
1956 {
1957     VirtIONet *n = qemu_get_nic_opaque(nc);
1958     if ((n->rsc4_enabled || n->rsc6_enabled)) {
1959         return virtio_net_rsc_receive(nc, buf, size);
1960     } else {
1961         return virtio_net_do_receive(nc, buf, size);
1962     }
1963 }
1964 
1965 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
1966 
1967 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
1968 {
1969     VirtIONet *n = qemu_get_nic_opaque(nc);
1970     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1971     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1972 
1973     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
1974     virtio_notify(vdev, q->tx_vq);
1975 
1976     g_free(q->async_tx.elem);
1977     q->async_tx.elem = NULL;
1978 
1979     virtio_queue_set_notification(q->tx_vq, 1);
1980     virtio_net_flush_tx(q);
1981 }
1982 
1983 /* TX */
1984 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
1985 {
1986     VirtIONet *n = q->n;
1987     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1988     VirtQueueElement *elem;
1989     int32_t num_packets = 0;
1990     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
1991     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1992         return num_packets;
1993     }
1994 
1995     if (q->async_tx.elem) {
1996         virtio_queue_set_notification(q->tx_vq, 0);
1997         return num_packets;
1998     }
1999 
2000     for (;;) {
2001         ssize_t ret;
2002         unsigned int out_num;
2003         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2004         struct virtio_net_hdr_mrg_rxbuf mhdr;
2005 
2006         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2007         if (!elem) {
2008             break;
2009         }
2010 
2011         out_num = elem->out_num;
2012         out_sg = elem->out_sg;
2013         if (out_num < 1) {
2014             virtio_error(vdev, "virtio-net header not in first element");
2015             virtqueue_detach_element(q->tx_vq, elem, 0);
2016             g_free(elem);
2017             return -EINVAL;
2018         }
2019 
2020         if (n->has_vnet_hdr) {
2021             if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2022                 n->guest_hdr_len) {
2023                 virtio_error(vdev, "virtio-net header incorrect");
2024                 virtqueue_detach_element(q->tx_vq, elem, 0);
2025                 g_free(elem);
2026                 return -EINVAL;
2027             }
2028             if (n->needs_vnet_hdr_swap) {
2029                 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2030                 sg2[0].iov_base = &mhdr;
2031                 sg2[0].iov_len = n->guest_hdr_len;
2032                 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2033                                    out_sg, out_num,
2034                                    n->guest_hdr_len, -1);
2035                 if (out_num == VIRTQUEUE_MAX_SIZE) {
2036                     goto drop;
2037                 }
2038                 out_num += 1;
2039                 out_sg = sg2;
2040             }
2041         }
2042         /*
2043          * If host wants to see the guest header as is, we can
2044          * pass it on unchanged. Otherwise, copy just the parts
2045          * that host is interested in.
2046          */
2047         assert(n->host_hdr_len <= n->guest_hdr_len);
2048         if (n->host_hdr_len != n->guest_hdr_len) {
2049             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2050                                        out_sg, out_num,
2051                                        0, n->host_hdr_len);
2052             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2053                              out_sg, out_num,
2054                              n->guest_hdr_len, -1);
2055             out_num = sg_num;
2056             out_sg = sg;
2057         }
2058 
2059         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2060                                       out_sg, out_num, virtio_net_tx_complete);
2061         if (ret == 0) {
2062             virtio_queue_set_notification(q->tx_vq, 0);
2063             q->async_tx.elem = elem;
2064             return -EBUSY;
2065         }
2066 
2067 drop:
2068         virtqueue_push(q->tx_vq, elem, 0);
2069         virtio_notify(vdev, q->tx_vq);
2070         g_free(elem);
2071 
2072         if (++num_packets >= n->tx_burst) {
2073             break;
2074         }
2075     }
2076     return num_packets;
2077 }
2078 
2079 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2080 {
2081     VirtIONet *n = VIRTIO_NET(vdev);
2082     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2083 
2084     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2085         virtio_net_drop_tx_queue_data(vdev, vq);
2086         return;
2087     }
2088 
2089     /* This happens when device was stopped but VCPU wasn't. */
2090     if (!vdev->vm_running) {
2091         q->tx_waiting = 1;
2092         return;
2093     }
2094 
2095     if (q->tx_waiting) {
2096         virtio_queue_set_notification(vq, 1);
2097         timer_del(q->tx_timer);
2098         q->tx_waiting = 0;
2099         if (virtio_net_flush_tx(q) == -EINVAL) {
2100             return;
2101         }
2102     } else {
2103         timer_mod(q->tx_timer,
2104                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2105         q->tx_waiting = 1;
2106         virtio_queue_set_notification(vq, 0);
2107     }
2108 }
2109 
2110 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2111 {
2112     VirtIONet *n = VIRTIO_NET(vdev);
2113     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2114 
2115     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2116         virtio_net_drop_tx_queue_data(vdev, vq);
2117         return;
2118     }
2119 
2120     if (unlikely(q->tx_waiting)) {
2121         return;
2122     }
2123     q->tx_waiting = 1;
2124     /* This happens when device was stopped but VCPU wasn't. */
2125     if (!vdev->vm_running) {
2126         return;
2127     }
2128     virtio_queue_set_notification(vq, 0);
2129     qemu_bh_schedule(q->tx_bh);
2130 }
2131 
2132 static void virtio_net_tx_timer(void *opaque)
2133 {
2134     VirtIONetQueue *q = opaque;
2135     VirtIONet *n = q->n;
2136     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2137     /* This happens when device was stopped but BH wasn't. */
2138     if (!vdev->vm_running) {
2139         /* Make sure tx waiting is set, so we'll run when restarted. */
2140         assert(q->tx_waiting);
2141         return;
2142     }
2143 
2144     q->tx_waiting = 0;
2145 
2146     /* Just in case the driver is not ready on more */
2147     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2148         return;
2149     }
2150 
2151     virtio_queue_set_notification(q->tx_vq, 1);
2152     virtio_net_flush_tx(q);
2153 }
2154 
2155 static void virtio_net_tx_bh(void *opaque)
2156 {
2157     VirtIONetQueue *q = opaque;
2158     VirtIONet *n = q->n;
2159     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2160     int32_t ret;
2161 
2162     /* This happens when device was stopped but BH wasn't. */
2163     if (!vdev->vm_running) {
2164         /* Make sure tx waiting is set, so we'll run when restarted. */
2165         assert(q->tx_waiting);
2166         return;
2167     }
2168 
2169     q->tx_waiting = 0;
2170 
2171     /* Just in case the driver is not ready on more */
2172     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2173         return;
2174     }
2175 
2176     ret = virtio_net_flush_tx(q);
2177     if (ret == -EBUSY || ret == -EINVAL) {
2178         return; /* Notification re-enable handled by tx_complete or device
2179                  * broken */
2180     }
2181 
2182     /* If we flush a full burst of packets, assume there are
2183      * more coming and immediately reschedule */
2184     if (ret >= n->tx_burst) {
2185         qemu_bh_schedule(q->tx_bh);
2186         q->tx_waiting = 1;
2187         return;
2188     }
2189 
2190     /* If less than a full burst, re-enable notification and flush
2191      * anything that may have come in while we weren't looking.  If
2192      * we find something, assume the guest is still active and reschedule */
2193     virtio_queue_set_notification(q->tx_vq, 1);
2194     ret = virtio_net_flush_tx(q);
2195     if (ret == -EINVAL) {
2196         return;
2197     } else if (ret > 0) {
2198         virtio_queue_set_notification(q->tx_vq, 0);
2199         qemu_bh_schedule(q->tx_bh);
2200         q->tx_waiting = 1;
2201     }
2202 }
2203 
2204 static void virtio_net_add_queue(VirtIONet *n, int index)
2205 {
2206     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2207 
2208     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2209                                            virtio_net_handle_rx);
2210 
2211     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2212         n->vqs[index].tx_vq =
2213             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2214                              virtio_net_handle_tx_timer);
2215         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2216                                               virtio_net_tx_timer,
2217                                               &n->vqs[index]);
2218     } else {
2219         n->vqs[index].tx_vq =
2220             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2221                              virtio_net_handle_tx_bh);
2222         n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2223     }
2224 
2225     n->vqs[index].tx_waiting = 0;
2226     n->vqs[index].n = n;
2227 }
2228 
2229 static void virtio_net_del_queue(VirtIONet *n, int index)
2230 {
2231     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2232     VirtIONetQueue *q = &n->vqs[index];
2233     NetClientState *nc = qemu_get_subqueue(n->nic, index);
2234 
2235     qemu_purge_queued_packets(nc);
2236 
2237     virtio_del_queue(vdev, index * 2);
2238     if (q->tx_timer) {
2239         timer_del(q->tx_timer);
2240         timer_free(q->tx_timer);
2241         q->tx_timer = NULL;
2242     } else {
2243         qemu_bh_delete(q->tx_bh);
2244         q->tx_bh = NULL;
2245     }
2246     q->tx_waiting = 0;
2247     virtio_del_queue(vdev, index * 2 + 1);
2248 }
2249 
2250 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2251 {
2252     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2253     int old_num_queues = virtio_get_num_queues(vdev);
2254     int new_num_queues = new_max_queues * 2 + 1;
2255     int i;
2256 
2257     assert(old_num_queues >= 3);
2258     assert(old_num_queues % 2 == 1);
2259 
2260     if (old_num_queues == new_num_queues) {
2261         return;
2262     }
2263 
2264     /*
2265      * We always need to remove and add ctrl vq if
2266      * old_num_queues != new_num_queues. Remove ctrl_vq first,
2267      * and then we only enter one of the following too loops.
2268      */
2269     virtio_del_queue(vdev, old_num_queues - 1);
2270 
2271     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2272         /* new_num_queues < old_num_queues */
2273         virtio_net_del_queue(n, i / 2);
2274     }
2275 
2276     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2277         /* new_num_queues > old_num_queues */
2278         virtio_net_add_queue(n, i / 2);
2279     }
2280 
2281     /* add ctrl_vq last */
2282     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2283 }
2284 
2285 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2286 {
2287     int max = multiqueue ? n->max_queues : 1;
2288 
2289     n->multiqueue = multiqueue;
2290     virtio_net_change_num_queues(n, max);
2291 
2292     virtio_net_set_queues(n);
2293 }
2294 
2295 static int virtio_net_post_load_device(void *opaque, int version_id)
2296 {
2297     VirtIONet *n = opaque;
2298     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2299     int i, link_down;
2300 
2301     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2302                                virtio_vdev_has_feature(vdev,
2303                                                        VIRTIO_F_VERSION_1));
2304 
2305     /* MAC_TABLE_ENTRIES may be different from the saved image */
2306     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2307         n->mac_table.in_use = 0;
2308     }
2309 
2310     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2311         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2312     }
2313 
2314     if (peer_has_vnet_hdr(n)) {
2315         virtio_net_apply_guest_offloads(n);
2316     }
2317 
2318     virtio_net_set_queues(n);
2319 
2320     /* Find the first multicast entry in the saved MAC filter */
2321     for (i = 0; i < n->mac_table.in_use; i++) {
2322         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2323             break;
2324         }
2325     }
2326     n->mac_table.first_multi = i;
2327 
2328     /* nc.link_down can't be migrated, so infer link_down according
2329      * to link status bit in n->status */
2330     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2331     for (i = 0; i < n->max_queues; i++) {
2332         qemu_get_subqueue(n->nic, i)->link_down = link_down;
2333     }
2334 
2335     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2336         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2337         n->announce_counter = SELF_ANNOUNCE_ROUNDS;
2338         timer_mod(n->announce_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL));
2339     }
2340 
2341     return 0;
2342 }
2343 
2344 /* tx_waiting field of a VirtIONetQueue */
2345 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2346     .name = "virtio-net-queue-tx_waiting",
2347     .fields = (VMStateField[]) {
2348         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2349         VMSTATE_END_OF_LIST()
2350    },
2351 };
2352 
2353 static bool max_queues_gt_1(void *opaque, int version_id)
2354 {
2355     return VIRTIO_NET(opaque)->max_queues > 1;
2356 }
2357 
2358 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2359 {
2360     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2361                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2362 }
2363 
2364 static bool mac_table_fits(void *opaque, int version_id)
2365 {
2366     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2367 }
2368 
2369 static bool mac_table_doesnt_fit(void *opaque, int version_id)
2370 {
2371     return !mac_table_fits(opaque, version_id);
2372 }
2373 
2374 /* This temporary type is shared by all the WITH_TMP methods
2375  * although only some fields are used by each.
2376  */
2377 struct VirtIONetMigTmp {
2378     VirtIONet      *parent;
2379     VirtIONetQueue *vqs_1;
2380     uint16_t        curr_queues_1;
2381     uint8_t         has_ufo;
2382     uint32_t        has_vnet_hdr;
2383 };
2384 
2385 /* The 2nd and subsequent tx_waiting flags are loaded later than
2386  * the 1st entry in the queues and only if there's more than one
2387  * entry.  We use the tmp mechanism to calculate a temporary
2388  * pointer and count and also validate the count.
2389  */
2390 
2391 static int virtio_net_tx_waiting_pre_save(void *opaque)
2392 {
2393     struct VirtIONetMigTmp *tmp = opaque;
2394 
2395     tmp->vqs_1 = tmp->parent->vqs + 1;
2396     tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2397     if (tmp->parent->curr_queues == 0) {
2398         tmp->curr_queues_1 = 0;
2399     }
2400 
2401     return 0;
2402 }
2403 
2404 static int virtio_net_tx_waiting_pre_load(void *opaque)
2405 {
2406     struct VirtIONetMigTmp *tmp = opaque;
2407 
2408     /* Reuse the pointer setup from save */
2409     virtio_net_tx_waiting_pre_save(opaque);
2410 
2411     if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2412         error_report("virtio-net: curr_queues %x > max_queues %x",
2413             tmp->parent->curr_queues, tmp->parent->max_queues);
2414 
2415         return -EINVAL;
2416     }
2417 
2418     return 0; /* all good */
2419 }
2420 
2421 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2422     .name      = "virtio-net-tx_waiting",
2423     .pre_load  = virtio_net_tx_waiting_pre_load,
2424     .pre_save  = virtio_net_tx_waiting_pre_save,
2425     .fields    = (VMStateField[]) {
2426         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2427                                      curr_queues_1,
2428                                      vmstate_virtio_net_queue_tx_waiting,
2429                                      struct VirtIONetQueue),
2430         VMSTATE_END_OF_LIST()
2431     },
2432 };
2433 
2434 /* the 'has_ufo' flag is just tested; if the incoming stream has the
2435  * flag set we need to check that we have it
2436  */
2437 static int virtio_net_ufo_post_load(void *opaque, int version_id)
2438 {
2439     struct VirtIONetMigTmp *tmp = opaque;
2440 
2441     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
2442         error_report("virtio-net: saved image requires TUN_F_UFO support");
2443         return -EINVAL;
2444     }
2445 
2446     return 0;
2447 }
2448 
2449 static int virtio_net_ufo_pre_save(void *opaque)
2450 {
2451     struct VirtIONetMigTmp *tmp = opaque;
2452 
2453     tmp->has_ufo = tmp->parent->has_ufo;
2454 
2455     return 0;
2456 }
2457 
2458 static const VMStateDescription vmstate_virtio_net_has_ufo = {
2459     .name      = "virtio-net-ufo",
2460     .post_load = virtio_net_ufo_post_load,
2461     .pre_save  = virtio_net_ufo_pre_save,
2462     .fields    = (VMStateField[]) {
2463         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
2464         VMSTATE_END_OF_LIST()
2465     },
2466 };
2467 
2468 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2469  * flag set we need to check that we have it
2470  */
2471 static int virtio_net_vnet_post_load(void *opaque, int version_id)
2472 {
2473     struct VirtIONetMigTmp *tmp = opaque;
2474 
2475     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
2476         error_report("virtio-net: saved image requires vnet_hdr=on");
2477         return -EINVAL;
2478     }
2479 
2480     return 0;
2481 }
2482 
2483 static int virtio_net_vnet_pre_save(void *opaque)
2484 {
2485     struct VirtIONetMigTmp *tmp = opaque;
2486 
2487     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
2488 
2489     return 0;
2490 }
2491 
2492 static const VMStateDescription vmstate_virtio_net_has_vnet = {
2493     .name      = "virtio-net-vnet",
2494     .post_load = virtio_net_vnet_post_load,
2495     .pre_save  = virtio_net_vnet_pre_save,
2496     .fields    = (VMStateField[]) {
2497         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
2498         VMSTATE_END_OF_LIST()
2499     },
2500 };
2501 
2502 static const VMStateDescription vmstate_virtio_net_device = {
2503     .name = "virtio-net-device",
2504     .version_id = VIRTIO_NET_VM_VERSION,
2505     .minimum_version_id = VIRTIO_NET_VM_VERSION,
2506     .post_load = virtio_net_post_load_device,
2507     .fields = (VMStateField[]) {
2508         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
2509         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
2510                                vmstate_virtio_net_queue_tx_waiting,
2511                                VirtIONetQueue),
2512         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
2513         VMSTATE_UINT16(status, VirtIONet),
2514         VMSTATE_UINT8(promisc, VirtIONet),
2515         VMSTATE_UINT8(allmulti, VirtIONet),
2516         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
2517 
2518         /* Guarded pair: If it fits we load it, else we throw it away
2519          * - can happen if source has a larger MAC table.; post-load
2520          *  sets flags in this case.
2521          */
2522         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
2523                                 0, mac_table_fits, mac_table.in_use,
2524                                  ETH_ALEN),
2525         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
2526                                      mac_table.in_use, ETH_ALEN),
2527 
2528         /* Note: This is an array of uint32's that's always been saved as a
2529          * buffer; hold onto your endiannesses; it's actually used as a bitmap
2530          * but based on the uint.
2531          */
2532         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
2533         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2534                          vmstate_virtio_net_has_vnet),
2535         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
2536         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
2537         VMSTATE_UINT8(alluni, VirtIONet),
2538         VMSTATE_UINT8(nomulti, VirtIONet),
2539         VMSTATE_UINT8(nouni, VirtIONet),
2540         VMSTATE_UINT8(nobcast, VirtIONet),
2541         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2542                          vmstate_virtio_net_has_ufo),
2543         VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
2544                             vmstate_info_uint16_equal, uint16_t),
2545         VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
2546         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2547                          vmstate_virtio_net_tx_waiting),
2548         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
2549                             has_ctrl_guest_offloads),
2550         VMSTATE_END_OF_LIST()
2551    },
2552 };
2553 
2554 static NetClientInfo net_virtio_info = {
2555     .type = NET_CLIENT_DRIVER_NIC,
2556     .size = sizeof(NICState),
2557     .can_receive = virtio_net_can_receive,
2558     .receive = virtio_net_receive,
2559     .link_status_changed = virtio_net_set_link_status,
2560     .query_rx_filter = virtio_net_query_rxfilter,
2561 };
2562 
2563 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
2564 {
2565     VirtIONet *n = VIRTIO_NET(vdev);
2566     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
2567     assert(n->vhost_started);
2568     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
2569 }
2570 
2571 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
2572                                            bool mask)
2573 {
2574     VirtIONet *n = VIRTIO_NET(vdev);
2575     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
2576     assert(n->vhost_started);
2577     vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
2578                              vdev, idx, mask);
2579 }
2580 
2581 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
2582 {
2583     int i, config_size = 0;
2584     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
2585 
2586     for (i = 0; feature_sizes[i].flags != 0; i++) {
2587         if (host_features & feature_sizes[i].flags) {
2588             config_size = MAX(feature_sizes[i].end, config_size);
2589         }
2590     }
2591     n->config_size = config_size;
2592 }
2593 
2594 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
2595                                    const char *type)
2596 {
2597     /*
2598      * The name can be NULL, the netclient name will be type.x.
2599      */
2600     assert(type != NULL);
2601 
2602     g_free(n->netclient_name);
2603     g_free(n->netclient_type);
2604     n->netclient_name = g_strdup(name);
2605     n->netclient_type = g_strdup(type);
2606 }
2607 
2608 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
2609 {
2610     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2611     VirtIONet *n = VIRTIO_NET(dev);
2612     NetClientState *nc;
2613     int i;
2614 
2615     if (n->net_conf.mtu) {
2616         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
2617     }
2618 
2619     if (n->net_conf.duplex_str) {
2620         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
2621             n->net_conf.duplex = DUPLEX_HALF;
2622         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
2623             n->net_conf.duplex = DUPLEX_FULL;
2624         } else {
2625             error_setg(errp, "'duplex' must be 'half' or 'full'");
2626         }
2627         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
2628     } else {
2629         n->net_conf.duplex = DUPLEX_UNKNOWN;
2630     }
2631 
2632     if (n->net_conf.speed < SPEED_UNKNOWN) {
2633         error_setg(errp, "'speed' must be between 0 and INT_MAX");
2634     } else if (n->net_conf.speed >= 0) {
2635         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
2636     }
2637 
2638     virtio_net_set_config_size(n, n->host_features);
2639     virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
2640 
2641     /*
2642      * We set a lower limit on RX queue size to what it always was.
2643      * Guests that want a smaller ring can always resize it without
2644      * help from us (using virtio 1 and up).
2645      */
2646     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
2647         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
2648         !is_power_of_2(n->net_conf.rx_queue_size)) {
2649         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
2650                    "must be a power of 2 between %d and %d.",
2651                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
2652                    VIRTQUEUE_MAX_SIZE);
2653         virtio_cleanup(vdev);
2654         return;
2655     }
2656 
2657     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
2658         n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
2659         !is_power_of_2(n->net_conf.tx_queue_size)) {
2660         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
2661                    "must be a power of 2 between %d and %d",
2662                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
2663                    VIRTQUEUE_MAX_SIZE);
2664         virtio_cleanup(vdev);
2665         return;
2666     }
2667 
2668     n->max_queues = MAX(n->nic_conf.peers.queues, 1);
2669     if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
2670         error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
2671                    "must be a positive integer less than %d.",
2672                    n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
2673         virtio_cleanup(vdev);
2674         return;
2675     }
2676     n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
2677     n->curr_queues = 1;
2678     n->tx_timeout = n->net_conf.txtimer;
2679 
2680     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
2681                        && strcmp(n->net_conf.tx, "bh")) {
2682         warn_report("virtio-net: "
2683                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
2684                     n->net_conf.tx);
2685         error_printf("Defaulting to \"bh\"");
2686     }
2687 
2688     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
2689                                     n->net_conf.tx_queue_size);
2690 
2691     for (i = 0; i < n->max_queues; i++) {
2692         virtio_net_add_queue(n, i);
2693     }
2694 
2695     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2696     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
2697     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
2698     n->status = VIRTIO_NET_S_LINK_UP;
2699     n->announce_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
2700                                      virtio_net_announce_timer, n);
2701 
2702     if (n->netclient_type) {
2703         /*
2704          * Happen when virtio_net_set_netclient_name has been called.
2705          */
2706         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
2707                               n->netclient_type, n->netclient_name, n);
2708     } else {
2709         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
2710                               object_get_typename(OBJECT(dev)), dev->id, n);
2711     }
2712 
2713     peer_test_vnet_hdr(n);
2714     if (peer_has_vnet_hdr(n)) {
2715         for (i = 0; i < n->max_queues; i++) {
2716             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
2717         }
2718         n->host_hdr_len = sizeof(struct virtio_net_hdr);
2719     } else {
2720         n->host_hdr_len = 0;
2721     }
2722 
2723     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
2724 
2725     n->vqs[0].tx_waiting = 0;
2726     n->tx_burst = n->net_conf.txburst;
2727     virtio_net_set_mrg_rx_bufs(n, 0, 0);
2728     n->promisc = 1; /* for compatibility */
2729 
2730     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
2731 
2732     n->vlans = g_malloc0(MAX_VLAN >> 3);
2733 
2734     nc = qemu_get_queue(n->nic);
2735     nc->rxfilter_notify_enabled = 1;
2736 
2737     QTAILQ_INIT(&n->rsc_chains);
2738     n->qdev = dev;
2739 }
2740 
2741 static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
2742 {
2743     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2744     VirtIONet *n = VIRTIO_NET(dev);
2745     int i, max_queues;
2746 
2747     /* This will stop vhost backend if appropriate. */
2748     virtio_net_set_status(vdev, 0);
2749 
2750     g_free(n->netclient_name);
2751     n->netclient_name = NULL;
2752     g_free(n->netclient_type);
2753     n->netclient_type = NULL;
2754 
2755     g_free(n->mac_table.macs);
2756     g_free(n->vlans);
2757 
2758     max_queues = n->multiqueue ? n->max_queues : 1;
2759     for (i = 0; i < max_queues; i++) {
2760         virtio_net_del_queue(n, i);
2761     }
2762 
2763     timer_del(n->announce_timer);
2764     timer_free(n->announce_timer);
2765     g_free(n->vqs);
2766     qemu_del_nic(n->nic);
2767     virtio_net_rsc_cleanup(n);
2768     virtio_cleanup(vdev);
2769 }
2770 
2771 static void virtio_net_instance_init(Object *obj)
2772 {
2773     VirtIONet *n = VIRTIO_NET(obj);
2774 
2775     /*
2776      * The default config_size is sizeof(struct virtio_net_config).
2777      * Can be overriden with virtio_net_set_config_size.
2778      */
2779     n->config_size = sizeof(struct virtio_net_config);
2780     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
2781                                   "bootindex", "/ethernet-phy@0",
2782                                   DEVICE(n), NULL);
2783 }
2784 
2785 static int virtio_net_pre_save(void *opaque)
2786 {
2787     VirtIONet *n = opaque;
2788 
2789     /* At this point, backend must be stopped, otherwise
2790      * it might keep writing to memory. */
2791     assert(!n->vhost_started);
2792 
2793     return 0;
2794 }
2795 
2796 static const VMStateDescription vmstate_virtio_net = {
2797     .name = "virtio-net",
2798     .minimum_version_id = VIRTIO_NET_VM_VERSION,
2799     .version_id = VIRTIO_NET_VM_VERSION,
2800     .fields = (VMStateField[]) {
2801         VMSTATE_VIRTIO_DEVICE,
2802         VMSTATE_END_OF_LIST()
2803     },
2804     .pre_save = virtio_net_pre_save,
2805 };
2806 
2807 static Property virtio_net_properties[] = {
2808     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
2809                     VIRTIO_NET_F_CSUM, true),
2810     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
2811                     VIRTIO_NET_F_GUEST_CSUM, true),
2812     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
2813     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
2814                     VIRTIO_NET_F_GUEST_TSO4, true),
2815     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
2816                     VIRTIO_NET_F_GUEST_TSO6, true),
2817     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
2818                     VIRTIO_NET_F_GUEST_ECN, true),
2819     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
2820                     VIRTIO_NET_F_GUEST_UFO, true),
2821     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
2822                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
2823     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
2824                     VIRTIO_NET_F_HOST_TSO4, true),
2825     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
2826                     VIRTIO_NET_F_HOST_TSO6, true),
2827     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
2828                     VIRTIO_NET_F_HOST_ECN, true),
2829     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
2830                     VIRTIO_NET_F_HOST_UFO, true),
2831     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
2832                     VIRTIO_NET_F_MRG_RXBUF, true),
2833     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
2834                     VIRTIO_NET_F_STATUS, true),
2835     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
2836                     VIRTIO_NET_F_CTRL_VQ, true),
2837     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
2838                     VIRTIO_NET_F_CTRL_RX, true),
2839     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
2840                     VIRTIO_NET_F_CTRL_VLAN, true),
2841     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
2842                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
2843     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
2844                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
2845     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
2846                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
2847     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
2848     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
2849                     VIRTIO_NET_F_RSC_EXT, false),
2850     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
2851                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
2852     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
2853     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
2854                        TX_TIMER_INTERVAL),
2855     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
2856     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
2857     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
2858                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
2859     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
2860                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
2861     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
2862     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
2863                      true),
2864     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
2865     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
2866     DEFINE_PROP_END_OF_LIST(),
2867 };
2868 
2869 static void virtio_net_class_init(ObjectClass *klass, void *data)
2870 {
2871     DeviceClass *dc = DEVICE_CLASS(klass);
2872     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
2873 
2874     dc->props = virtio_net_properties;
2875     dc->vmsd = &vmstate_virtio_net;
2876     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
2877     vdc->realize = virtio_net_device_realize;
2878     vdc->unrealize = virtio_net_device_unrealize;
2879     vdc->get_config = virtio_net_get_config;
2880     vdc->set_config = virtio_net_set_config;
2881     vdc->get_features = virtio_net_get_features;
2882     vdc->set_features = virtio_net_set_features;
2883     vdc->bad_features = virtio_net_bad_features;
2884     vdc->reset = virtio_net_reset;
2885     vdc->set_status = virtio_net_set_status;
2886     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
2887     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
2888     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
2889     vdc->vmsd = &vmstate_virtio_net_device;
2890 }
2891 
2892 static const TypeInfo virtio_net_info = {
2893     .name = TYPE_VIRTIO_NET,
2894     .parent = TYPE_VIRTIO_DEVICE,
2895     .instance_size = sizeof(VirtIONet),
2896     .instance_init = virtio_net_instance_init,
2897     .class_init = virtio_net_class_init,
2898 };
2899 
2900 static void virtio_register_types(void)
2901 {
2902     type_register_static(&virtio_net_info);
2903 }
2904 
2905 type_init(virtio_register_types)
2906