xref: /openbmc/qemu/hw/net/virtio-net.c (revision 5b76dd13)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/iov.h"
16 #include "hw/virtio/virtio.h"
17 #include "net/net.h"
18 #include "net/checksum.h"
19 #include "net/tap.h"
20 #include "qemu/error-report.h"
21 #include "qemu/timer.h"
22 #include "hw/virtio/virtio-net.h"
23 #include "net/vhost_net.h"
24 #include "hw/virtio/virtio-bus.h"
25 #include "qapi/error.h"
26 #include "qapi/qapi-events-net.h"
27 #include "hw/virtio/virtio-access.h"
28 #include "migration/misc.h"
29 #include "standard-headers/linux/ethtool.h"
30 
31 #define VIRTIO_NET_VM_VERSION    11
32 
33 #define MAC_TABLE_ENTRIES    64
34 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
35 
36 /* previously fixed value */
37 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
38 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
39 
40 /* for now, only allow larger queues; with virtio-1, guest can downsize */
41 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
42 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
43 
44 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
45 
46 #define VIRTIO_NET_TCP_FLAG         0x3F
47 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
48 
49 /* IPv4 max payload, 16 bits in the header */
50 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
51 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
52 
53 /* header length value in ip header without option */
54 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
55 
56 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
57 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
58 
59 /* Purge coalesced packets timer interval, This value affects the performance
60    a lot, and should be tuned carefully, '300000'(300us) is the recommended
61    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
62    tso/gso/gro 'off'. */
63 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
64 
65 /* temporary until standard header include it */
66 #if !defined(VIRTIO_NET_HDR_F_RSC_INFO)
67 
68 #define VIRTIO_NET_HDR_F_RSC_INFO  4 /* rsc_ext data in csum_ fields */
69 #define VIRTIO_NET_F_RSC_EXT       61
70 
71 static inline __virtio16 *virtio_net_rsc_ext_num_packets(
72     struct virtio_net_hdr *hdr)
73 {
74     return &hdr->csum_start;
75 }
76 
77 static inline __virtio16 *virtio_net_rsc_ext_num_dupacks(
78     struct virtio_net_hdr *hdr)
79 {
80     return &hdr->csum_offset;
81 }
82 
83 #endif
84 
85 static VirtIOFeature feature_sizes[] = {
86     {.flags = 1ULL << VIRTIO_NET_F_MAC,
87      .end = virtio_endof(struct virtio_net_config, mac)},
88     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
89      .end = virtio_endof(struct virtio_net_config, status)},
90     {.flags = 1ULL << VIRTIO_NET_F_MQ,
91      .end = virtio_endof(struct virtio_net_config, max_virtqueue_pairs)},
92     {.flags = 1ULL << VIRTIO_NET_F_MTU,
93      .end = virtio_endof(struct virtio_net_config, mtu)},
94     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
95      .end = virtio_endof(struct virtio_net_config, duplex)},
96     {}
97 };
98 
99 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
100 {
101     VirtIONet *n = qemu_get_nic_opaque(nc);
102 
103     return &n->vqs[nc->queue_index];
104 }
105 
106 static int vq2q(int queue_index)
107 {
108     return queue_index / 2;
109 }
110 
111 /* TODO
112  * - we could suppress RX interrupt if we were so inclined.
113  */
114 
115 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
116 {
117     VirtIONet *n = VIRTIO_NET(vdev);
118     struct virtio_net_config netcfg;
119 
120     virtio_stw_p(vdev, &netcfg.status, n->status);
121     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
122     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
123     memcpy(netcfg.mac, n->mac, ETH_ALEN);
124     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
125     netcfg.duplex = n->net_conf.duplex;
126     memcpy(config, &netcfg, n->config_size);
127 }
128 
129 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
130 {
131     VirtIONet *n = VIRTIO_NET(vdev);
132     struct virtio_net_config netcfg = {};
133 
134     memcpy(&netcfg, config, n->config_size);
135 
136     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
137         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
138         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
139         memcpy(n->mac, netcfg.mac, ETH_ALEN);
140         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
141     }
142 }
143 
144 static bool virtio_net_started(VirtIONet *n, uint8_t status)
145 {
146     VirtIODevice *vdev = VIRTIO_DEVICE(n);
147     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
148         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
149 }
150 
151 static void virtio_net_announce_timer(void *opaque)
152 {
153     VirtIONet *n = opaque;
154     VirtIODevice *vdev = VIRTIO_DEVICE(n);
155 
156     n->announce_counter--;
157     n->status |= VIRTIO_NET_S_ANNOUNCE;
158     virtio_notify_config(vdev);
159 }
160 
161 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
162 {
163     VirtIODevice *vdev = VIRTIO_DEVICE(n);
164     NetClientState *nc = qemu_get_queue(n->nic);
165     int queues = n->multiqueue ? n->max_queues : 1;
166 
167     if (!get_vhost_net(nc->peer)) {
168         return;
169     }
170 
171     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
172         !!n->vhost_started) {
173         return;
174     }
175     if (!n->vhost_started) {
176         int r, i;
177 
178         if (n->needs_vnet_hdr_swap) {
179             error_report("backend does not support %s vnet headers; "
180                          "falling back on userspace virtio",
181                          virtio_is_big_endian(vdev) ? "BE" : "LE");
182             return;
183         }
184 
185         /* Any packets outstanding? Purge them to avoid touching rings
186          * when vhost is running.
187          */
188         for (i = 0;  i < queues; i++) {
189             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
190 
191             /* Purge both directions: TX and RX. */
192             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
193             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
194         }
195 
196         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
197             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
198             if (r < 0) {
199                 error_report("%uBytes MTU not supported by the backend",
200                              n->net_conf.mtu);
201 
202                 return;
203             }
204         }
205 
206         n->vhost_started = 1;
207         r = vhost_net_start(vdev, n->nic->ncs, queues);
208         if (r < 0) {
209             error_report("unable to start vhost net: %d: "
210                          "falling back on userspace virtio", -r);
211             n->vhost_started = 0;
212         }
213     } else {
214         vhost_net_stop(vdev, n->nic->ncs, queues);
215         n->vhost_started = 0;
216     }
217 }
218 
219 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
220                                           NetClientState *peer,
221                                           bool enable)
222 {
223     if (virtio_is_big_endian(vdev)) {
224         return qemu_set_vnet_be(peer, enable);
225     } else {
226         return qemu_set_vnet_le(peer, enable);
227     }
228 }
229 
230 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
231                                        int queues, bool enable)
232 {
233     int i;
234 
235     for (i = 0; i < queues; i++) {
236         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
237             enable) {
238             while (--i >= 0) {
239                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
240             }
241 
242             return true;
243         }
244     }
245 
246     return false;
247 }
248 
249 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
250 {
251     VirtIODevice *vdev = VIRTIO_DEVICE(n);
252     int queues = n->multiqueue ? n->max_queues : 1;
253 
254     if (virtio_net_started(n, status)) {
255         /* Before using the device, we tell the network backend about the
256          * endianness to use when parsing vnet headers. If the backend
257          * can't do it, we fallback onto fixing the headers in the core
258          * virtio-net code.
259          */
260         n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
261                                                             queues, true);
262     } else if (virtio_net_started(n, vdev->status)) {
263         /* After using the device, we need to reset the network backend to
264          * the default (guest native endianness), otherwise the guest may
265          * lose network connectivity if it is rebooted into a different
266          * endianness.
267          */
268         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
269     }
270 }
271 
272 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
273 {
274     unsigned int dropped = virtqueue_drop_all(vq);
275     if (dropped) {
276         virtio_notify(vdev, vq);
277     }
278 }
279 
280 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
281 {
282     VirtIONet *n = VIRTIO_NET(vdev);
283     VirtIONetQueue *q;
284     int i;
285     uint8_t queue_status;
286 
287     virtio_net_vnet_endian_status(n, status);
288     virtio_net_vhost_status(n, status);
289 
290     for (i = 0; i < n->max_queues; i++) {
291         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
292         bool queue_started;
293         q = &n->vqs[i];
294 
295         if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
296             queue_status = 0;
297         } else {
298             queue_status = status;
299         }
300         queue_started =
301             virtio_net_started(n, queue_status) && !n->vhost_started;
302 
303         if (queue_started) {
304             qemu_flush_queued_packets(ncs);
305         }
306 
307         if (!q->tx_waiting) {
308             continue;
309         }
310 
311         if (queue_started) {
312             if (q->tx_timer) {
313                 timer_mod(q->tx_timer,
314                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
315             } else {
316                 qemu_bh_schedule(q->tx_bh);
317             }
318         } else {
319             if (q->tx_timer) {
320                 timer_del(q->tx_timer);
321             } else {
322                 qemu_bh_cancel(q->tx_bh);
323             }
324             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
325                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
326                 vdev->vm_running) {
327                 /* if tx is waiting we are likely have some packets in tx queue
328                  * and disabled notification */
329                 q->tx_waiting = 0;
330                 virtio_queue_set_notification(q->tx_vq, 1);
331                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
332             }
333         }
334     }
335 }
336 
337 static void virtio_net_set_link_status(NetClientState *nc)
338 {
339     VirtIONet *n = qemu_get_nic_opaque(nc);
340     VirtIODevice *vdev = VIRTIO_DEVICE(n);
341     uint16_t old_status = n->status;
342 
343     if (nc->link_down)
344         n->status &= ~VIRTIO_NET_S_LINK_UP;
345     else
346         n->status |= VIRTIO_NET_S_LINK_UP;
347 
348     if (n->status != old_status)
349         virtio_notify_config(vdev);
350 
351     virtio_net_set_status(vdev, vdev->status);
352 }
353 
354 static void rxfilter_notify(NetClientState *nc)
355 {
356     VirtIONet *n = qemu_get_nic_opaque(nc);
357 
358     if (nc->rxfilter_notify_enabled) {
359         gchar *path = object_get_canonical_path(OBJECT(n->qdev));
360         qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
361                                               n->netclient_name, path);
362         g_free(path);
363 
364         /* disable event notification to avoid events flooding */
365         nc->rxfilter_notify_enabled = 0;
366     }
367 }
368 
369 static intList *get_vlan_table(VirtIONet *n)
370 {
371     intList *list, *entry;
372     int i, j;
373 
374     list = NULL;
375     for (i = 0; i < MAX_VLAN >> 5; i++) {
376         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
377             if (n->vlans[i] & (1U << j)) {
378                 entry = g_malloc0(sizeof(*entry));
379                 entry->value = (i << 5) + j;
380                 entry->next = list;
381                 list = entry;
382             }
383         }
384     }
385 
386     return list;
387 }
388 
389 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
390 {
391     VirtIONet *n = qemu_get_nic_opaque(nc);
392     VirtIODevice *vdev = VIRTIO_DEVICE(n);
393     RxFilterInfo *info;
394     strList *str_list, *entry;
395     int i;
396 
397     info = g_malloc0(sizeof(*info));
398     info->name = g_strdup(nc->name);
399     info->promiscuous = n->promisc;
400 
401     if (n->nouni) {
402         info->unicast = RX_STATE_NONE;
403     } else if (n->alluni) {
404         info->unicast = RX_STATE_ALL;
405     } else {
406         info->unicast = RX_STATE_NORMAL;
407     }
408 
409     if (n->nomulti) {
410         info->multicast = RX_STATE_NONE;
411     } else if (n->allmulti) {
412         info->multicast = RX_STATE_ALL;
413     } else {
414         info->multicast = RX_STATE_NORMAL;
415     }
416 
417     info->broadcast_allowed = n->nobcast;
418     info->multicast_overflow = n->mac_table.multi_overflow;
419     info->unicast_overflow = n->mac_table.uni_overflow;
420 
421     info->main_mac = qemu_mac_strdup_printf(n->mac);
422 
423     str_list = NULL;
424     for (i = 0; i < n->mac_table.first_multi; i++) {
425         entry = g_malloc0(sizeof(*entry));
426         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
427         entry->next = str_list;
428         str_list = entry;
429     }
430     info->unicast_table = str_list;
431 
432     str_list = NULL;
433     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
434         entry = g_malloc0(sizeof(*entry));
435         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
436         entry->next = str_list;
437         str_list = entry;
438     }
439     info->multicast_table = str_list;
440     info->vlan_table = get_vlan_table(n);
441 
442     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
443         info->vlan = RX_STATE_ALL;
444     } else if (!info->vlan_table) {
445         info->vlan = RX_STATE_NONE;
446     } else {
447         info->vlan = RX_STATE_NORMAL;
448     }
449 
450     /* enable event notification after query */
451     nc->rxfilter_notify_enabled = 1;
452 
453     return info;
454 }
455 
456 static void virtio_net_reset(VirtIODevice *vdev)
457 {
458     VirtIONet *n = VIRTIO_NET(vdev);
459     int i;
460 
461     /* Reset back to compatibility mode */
462     n->promisc = 1;
463     n->allmulti = 0;
464     n->alluni = 0;
465     n->nomulti = 0;
466     n->nouni = 0;
467     n->nobcast = 0;
468     /* multiqueue is disabled by default */
469     n->curr_queues = 1;
470     timer_del(n->announce_timer);
471     n->announce_counter = 0;
472     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
473 
474     /* Flush any MAC and VLAN filter table state */
475     n->mac_table.in_use = 0;
476     n->mac_table.first_multi = 0;
477     n->mac_table.multi_overflow = 0;
478     n->mac_table.uni_overflow = 0;
479     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
480     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
481     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
482     memset(n->vlans, 0, MAX_VLAN >> 3);
483 
484     /* Flush any async TX */
485     for (i = 0;  i < n->max_queues; i++) {
486         NetClientState *nc = qemu_get_subqueue(n->nic, i);
487 
488         if (nc->peer) {
489             qemu_flush_or_purge_queued_packets(nc->peer, true);
490             assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
491         }
492     }
493 }
494 
495 static void peer_test_vnet_hdr(VirtIONet *n)
496 {
497     NetClientState *nc = qemu_get_queue(n->nic);
498     if (!nc->peer) {
499         return;
500     }
501 
502     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
503 }
504 
505 static int peer_has_vnet_hdr(VirtIONet *n)
506 {
507     return n->has_vnet_hdr;
508 }
509 
510 static int peer_has_ufo(VirtIONet *n)
511 {
512     if (!peer_has_vnet_hdr(n))
513         return 0;
514 
515     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
516 
517     return n->has_ufo;
518 }
519 
520 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
521                                        int version_1)
522 {
523     int i;
524     NetClientState *nc;
525 
526     n->mergeable_rx_bufs = mergeable_rx_bufs;
527 
528     if (version_1) {
529         n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
530     } else {
531         n->guest_hdr_len = n->mergeable_rx_bufs ?
532             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
533             sizeof(struct virtio_net_hdr);
534     }
535 
536     for (i = 0; i < n->max_queues; i++) {
537         nc = qemu_get_subqueue(n->nic, i);
538 
539         if (peer_has_vnet_hdr(n) &&
540             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
541             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
542             n->host_hdr_len = n->guest_hdr_len;
543         }
544     }
545 }
546 
547 static int virtio_net_max_tx_queue_size(VirtIONet *n)
548 {
549     NetClientState *peer = n->nic_conf.peers.ncs[0];
550 
551     /*
552      * Backends other than vhost-user don't support max queue size.
553      */
554     if (!peer) {
555         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
556     }
557 
558     if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
559         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
560     }
561 
562     return VIRTQUEUE_MAX_SIZE;
563 }
564 
565 static int peer_attach(VirtIONet *n, int index)
566 {
567     NetClientState *nc = qemu_get_subqueue(n->nic, index);
568 
569     if (!nc->peer) {
570         return 0;
571     }
572 
573     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
574         vhost_set_vring_enable(nc->peer, 1);
575     }
576 
577     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
578         return 0;
579     }
580 
581     if (n->max_queues == 1) {
582         return 0;
583     }
584 
585     return tap_enable(nc->peer);
586 }
587 
588 static int peer_detach(VirtIONet *n, int index)
589 {
590     NetClientState *nc = qemu_get_subqueue(n->nic, index);
591 
592     if (!nc->peer) {
593         return 0;
594     }
595 
596     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
597         vhost_set_vring_enable(nc->peer, 0);
598     }
599 
600     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
601         return 0;
602     }
603 
604     return tap_disable(nc->peer);
605 }
606 
607 static void virtio_net_set_queues(VirtIONet *n)
608 {
609     int i;
610     int r;
611 
612     if (n->nic->peer_deleted) {
613         return;
614     }
615 
616     for (i = 0; i < n->max_queues; i++) {
617         if (i < n->curr_queues) {
618             r = peer_attach(n, i);
619             assert(!r);
620         } else {
621             r = peer_detach(n, i);
622             assert(!r);
623         }
624     }
625 }
626 
627 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
628 
629 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
630                                         Error **errp)
631 {
632     VirtIONet *n = VIRTIO_NET(vdev);
633     NetClientState *nc = qemu_get_queue(n->nic);
634 
635     /* Firstly sync all virtio-net possible supported features */
636     features |= n->host_features;
637 
638     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
639 
640     if (!peer_has_vnet_hdr(n)) {
641         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
642         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
643         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
644         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
645 
646         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
647         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
648         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
649         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
650     }
651 
652     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
653         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
654         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
655     }
656 
657     if (!get_vhost_net(nc->peer)) {
658         return features;
659     }
660 
661     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
662     vdev->backend_features = features;
663 
664     if (n->mtu_bypass_backend &&
665             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
666         features |= (1ULL << VIRTIO_NET_F_MTU);
667     }
668 
669     return features;
670 }
671 
672 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
673 {
674     uint64_t features = 0;
675 
676     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
677      * but also these: */
678     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
679     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
680     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
681     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
682     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
683 
684     return features;
685 }
686 
687 static void virtio_net_apply_guest_offloads(VirtIONet *n)
688 {
689     qemu_set_offload(qemu_get_queue(n->nic)->peer,
690             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
691             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
692             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
693             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
694             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
695 }
696 
697 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
698 {
699     static const uint64_t guest_offloads_mask =
700         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
701         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
702         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
703         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
704         (1ULL << VIRTIO_NET_F_GUEST_UFO);
705 
706     return guest_offloads_mask & features;
707 }
708 
709 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
710 {
711     VirtIODevice *vdev = VIRTIO_DEVICE(n);
712     return virtio_net_guest_offloads_by_features(vdev->guest_features);
713 }
714 
715 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
716 {
717     VirtIONet *n = VIRTIO_NET(vdev);
718     int i;
719 
720     if (n->mtu_bypass_backend &&
721             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
722         features &= ~(1ULL << VIRTIO_NET_F_MTU);
723     }
724 
725     virtio_net_set_multiqueue(n,
726                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
727 
728     virtio_net_set_mrg_rx_bufs(n,
729                                virtio_has_feature(features,
730                                                   VIRTIO_NET_F_MRG_RXBUF),
731                                virtio_has_feature(features,
732                                                   VIRTIO_F_VERSION_1));
733 
734     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
735         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
736     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
737         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
738 
739     if (n->has_vnet_hdr) {
740         n->curr_guest_offloads =
741             virtio_net_guest_offloads_by_features(features);
742         virtio_net_apply_guest_offloads(n);
743     }
744 
745     for (i = 0;  i < n->max_queues; i++) {
746         NetClientState *nc = qemu_get_subqueue(n->nic, i);
747 
748         if (!get_vhost_net(nc->peer)) {
749             continue;
750         }
751         vhost_net_ack_features(get_vhost_net(nc->peer), features);
752     }
753 
754     if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
755         memset(n->vlans, 0, MAX_VLAN >> 3);
756     } else {
757         memset(n->vlans, 0xff, MAX_VLAN >> 3);
758     }
759 }
760 
761 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
762                                      struct iovec *iov, unsigned int iov_cnt)
763 {
764     uint8_t on;
765     size_t s;
766     NetClientState *nc = qemu_get_queue(n->nic);
767 
768     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
769     if (s != sizeof(on)) {
770         return VIRTIO_NET_ERR;
771     }
772 
773     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
774         n->promisc = on;
775     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
776         n->allmulti = on;
777     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
778         n->alluni = on;
779     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
780         n->nomulti = on;
781     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
782         n->nouni = on;
783     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
784         n->nobcast = on;
785     } else {
786         return VIRTIO_NET_ERR;
787     }
788 
789     rxfilter_notify(nc);
790 
791     return VIRTIO_NET_OK;
792 }
793 
794 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
795                                      struct iovec *iov, unsigned int iov_cnt)
796 {
797     VirtIODevice *vdev = VIRTIO_DEVICE(n);
798     uint64_t offloads;
799     size_t s;
800 
801     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
802         return VIRTIO_NET_ERR;
803     }
804 
805     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
806     if (s != sizeof(offloads)) {
807         return VIRTIO_NET_ERR;
808     }
809 
810     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
811         uint64_t supported_offloads;
812 
813         offloads = virtio_ldq_p(vdev, &offloads);
814 
815         if (!n->has_vnet_hdr) {
816             return VIRTIO_NET_ERR;
817         }
818 
819         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
820             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
821         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
822             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
823         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
824 
825         supported_offloads = virtio_net_supported_guest_offloads(n);
826         if (offloads & ~supported_offloads) {
827             return VIRTIO_NET_ERR;
828         }
829 
830         n->curr_guest_offloads = offloads;
831         virtio_net_apply_guest_offloads(n);
832 
833         return VIRTIO_NET_OK;
834     } else {
835         return VIRTIO_NET_ERR;
836     }
837 }
838 
839 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
840                                  struct iovec *iov, unsigned int iov_cnt)
841 {
842     VirtIODevice *vdev = VIRTIO_DEVICE(n);
843     struct virtio_net_ctrl_mac mac_data;
844     size_t s;
845     NetClientState *nc = qemu_get_queue(n->nic);
846 
847     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
848         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
849             return VIRTIO_NET_ERR;
850         }
851         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
852         assert(s == sizeof(n->mac));
853         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
854         rxfilter_notify(nc);
855 
856         return VIRTIO_NET_OK;
857     }
858 
859     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
860         return VIRTIO_NET_ERR;
861     }
862 
863     int in_use = 0;
864     int first_multi = 0;
865     uint8_t uni_overflow = 0;
866     uint8_t multi_overflow = 0;
867     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
868 
869     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
870                    sizeof(mac_data.entries));
871     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
872     if (s != sizeof(mac_data.entries)) {
873         goto error;
874     }
875     iov_discard_front(&iov, &iov_cnt, s);
876 
877     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
878         goto error;
879     }
880 
881     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
882         s = iov_to_buf(iov, iov_cnt, 0, macs,
883                        mac_data.entries * ETH_ALEN);
884         if (s != mac_data.entries * ETH_ALEN) {
885             goto error;
886         }
887         in_use += mac_data.entries;
888     } else {
889         uni_overflow = 1;
890     }
891 
892     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
893 
894     first_multi = in_use;
895 
896     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
897                    sizeof(mac_data.entries));
898     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
899     if (s != sizeof(mac_data.entries)) {
900         goto error;
901     }
902 
903     iov_discard_front(&iov, &iov_cnt, s);
904 
905     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
906         goto error;
907     }
908 
909     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
910         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
911                        mac_data.entries * ETH_ALEN);
912         if (s != mac_data.entries * ETH_ALEN) {
913             goto error;
914         }
915         in_use += mac_data.entries;
916     } else {
917         multi_overflow = 1;
918     }
919 
920     n->mac_table.in_use = in_use;
921     n->mac_table.first_multi = first_multi;
922     n->mac_table.uni_overflow = uni_overflow;
923     n->mac_table.multi_overflow = multi_overflow;
924     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
925     g_free(macs);
926     rxfilter_notify(nc);
927 
928     return VIRTIO_NET_OK;
929 
930 error:
931     g_free(macs);
932     return VIRTIO_NET_ERR;
933 }
934 
935 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
936                                         struct iovec *iov, unsigned int iov_cnt)
937 {
938     VirtIODevice *vdev = VIRTIO_DEVICE(n);
939     uint16_t vid;
940     size_t s;
941     NetClientState *nc = qemu_get_queue(n->nic);
942 
943     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
944     vid = virtio_lduw_p(vdev, &vid);
945     if (s != sizeof(vid)) {
946         return VIRTIO_NET_ERR;
947     }
948 
949     if (vid >= MAX_VLAN)
950         return VIRTIO_NET_ERR;
951 
952     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
953         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
954     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
955         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
956     else
957         return VIRTIO_NET_ERR;
958 
959     rxfilter_notify(nc);
960 
961     return VIRTIO_NET_OK;
962 }
963 
964 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
965                                       struct iovec *iov, unsigned int iov_cnt)
966 {
967     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
968         n->status & VIRTIO_NET_S_ANNOUNCE) {
969         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
970         if (n->announce_counter) {
971             timer_mod(n->announce_timer,
972                       qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
973                       self_announce_delay(n->announce_counter));
974         }
975         return VIRTIO_NET_OK;
976     } else {
977         return VIRTIO_NET_ERR;
978     }
979 }
980 
981 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
982                                 struct iovec *iov, unsigned int iov_cnt)
983 {
984     VirtIODevice *vdev = VIRTIO_DEVICE(n);
985     struct virtio_net_ctrl_mq mq;
986     size_t s;
987     uint16_t queues;
988 
989     s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
990     if (s != sizeof(mq)) {
991         return VIRTIO_NET_ERR;
992     }
993 
994     if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
995         return VIRTIO_NET_ERR;
996     }
997 
998     queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
999 
1000     if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1001         queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1002         queues > n->max_queues ||
1003         !n->multiqueue) {
1004         return VIRTIO_NET_ERR;
1005     }
1006 
1007     n->curr_queues = queues;
1008     /* stop the backend before changing the number of queues to avoid handling a
1009      * disabled queue */
1010     virtio_net_set_status(vdev, vdev->status);
1011     virtio_net_set_queues(n);
1012 
1013     return VIRTIO_NET_OK;
1014 }
1015 
1016 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1017 {
1018     VirtIONet *n = VIRTIO_NET(vdev);
1019     struct virtio_net_ctrl_hdr ctrl;
1020     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1021     VirtQueueElement *elem;
1022     size_t s;
1023     struct iovec *iov, *iov2;
1024     unsigned int iov_cnt;
1025 
1026     for (;;) {
1027         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1028         if (!elem) {
1029             break;
1030         }
1031         if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1032             iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1033             virtio_error(vdev, "virtio-net ctrl missing headers");
1034             virtqueue_detach_element(vq, elem, 0);
1035             g_free(elem);
1036             break;
1037         }
1038 
1039         iov_cnt = elem->out_num;
1040         iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1041         s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1042         iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1043         if (s != sizeof(ctrl)) {
1044             status = VIRTIO_NET_ERR;
1045         } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1046             status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1047         } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1048             status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1049         } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1050             status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1051         } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1052             status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1053         } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1054             status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1055         } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1056             status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1057         }
1058 
1059         s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1060         assert(s == sizeof(status));
1061 
1062         virtqueue_push(vq, elem, sizeof(status));
1063         virtio_notify(vdev, vq);
1064         g_free(iov2);
1065         g_free(elem);
1066     }
1067 }
1068 
1069 /* RX */
1070 
1071 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1072 {
1073     VirtIONet *n = VIRTIO_NET(vdev);
1074     int queue_index = vq2q(virtio_get_queue_index(vq));
1075 
1076     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1077 }
1078 
1079 static int virtio_net_can_receive(NetClientState *nc)
1080 {
1081     VirtIONet *n = qemu_get_nic_opaque(nc);
1082     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1083     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1084 
1085     if (!vdev->vm_running) {
1086         return 0;
1087     }
1088 
1089     if (nc->queue_index >= n->curr_queues) {
1090         return 0;
1091     }
1092 
1093     if (!virtio_queue_ready(q->rx_vq) ||
1094         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1095         return 0;
1096     }
1097 
1098     return 1;
1099 }
1100 
1101 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1102 {
1103     VirtIONet *n = q->n;
1104     if (virtio_queue_empty(q->rx_vq) ||
1105         (n->mergeable_rx_bufs &&
1106          !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1107         virtio_queue_set_notification(q->rx_vq, 1);
1108 
1109         /* To avoid a race condition where the guest has made some buffers
1110          * available after the above check but before notification was
1111          * enabled, check for available buffers again.
1112          */
1113         if (virtio_queue_empty(q->rx_vq) ||
1114             (n->mergeable_rx_bufs &&
1115              !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1116             return 0;
1117         }
1118     }
1119 
1120     virtio_queue_set_notification(q->rx_vq, 0);
1121     return 1;
1122 }
1123 
1124 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1125 {
1126     virtio_tswap16s(vdev, &hdr->hdr_len);
1127     virtio_tswap16s(vdev, &hdr->gso_size);
1128     virtio_tswap16s(vdev, &hdr->csum_start);
1129     virtio_tswap16s(vdev, &hdr->csum_offset);
1130 }
1131 
1132 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1133  * it never finds out that the packets don't have valid checksums.  This
1134  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1135  * fix this with Xen but it hasn't appeared in an upstream release of
1136  * dhclient yet.
1137  *
1138  * To avoid breaking existing guests, we catch udp packets and add
1139  * checksums.  This is terrible but it's better than hacking the guest
1140  * kernels.
1141  *
1142  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1143  * we should provide a mechanism to disable it to avoid polluting the host
1144  * cache.
1145  */
1146 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1147                                         uint8_t *buf, size_t size)
1148 {
1149     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1150         (size > 27 && size < 1500) && /* normal sized MTU */
1151         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1152         (buf[23] == 17) && /* ip.protocol == UDP */
1153         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1154         net_checksum_calculate(buf, size);
1155         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1156     }
1157 }
1158 
1159 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1160                            const void *buf, size_t size)
1161 {
1162     if (n->has_vnet_hdr) {
1163         /* FIXME this cast is evil */
1164         void *wbuf = (void *)buf;
1165         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1166                                     size - n->host_hdr_len);
1167 
1168         if (n->needs_vnet_hdr_swap) {
1169             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1170         }
1171         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1172     } else {
1173         struct virtio_net_hdr hdr = {
1174             .flags = 0,
1175             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1176         };
1177         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1178     }
1179 }
1180 
1181 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1182 {
1183     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1184     static const uint8_t vlan[] = {0x81, 0x00};
1185     uint8_t *ptr = (uint8_t *)buf;
1186     int i;
1187 
1188     if (n->promisc)
1189         return 1;
1190 
1191     ptr += n->host_hdr_len;
1192 
1193     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1194         int vid = lduw_be_p(ptr + 14) & 0xfff;
1195         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1196             return 0;
1197     }
1198 
1199     if (ptr[0] & 1) { // multicast
1200         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1201             return !n->nobcast;
1202         } else if (n->nomulti) {
1203             return 0;
1204         } else if (n->allmulti || n->mac_table.multi_overflow) {
1205             return 1;
1206         }
1207 
1208         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1209             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1210                 return 1;
1211             }
1212         }
1213     } else { // unicast
1214         if (n->nouni) {
1215             return 0;
1216         } else if (n->alluni || n->mac_table.uni_overflow) {
1217             return 1;
1218         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1219             return 1;
1220         }
1221 
1222         for (i = 0; i < n->mac_table.first_multi; i++) {
1223             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1224                 return 1;
1225             }
1226         }
1227     }
1228 
1229     return 0;
1230 }
1231 
1232 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1233                                       size_t size)
1234 {
1235     VirtIONet *n = qemu_get_nic_opaque(nc);
1236     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1237     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1238     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1239     struct virtio_net_hdr_mrg_rxbuf mhdr;
1240     unsigned mhdr_cnt = 0;
1241     size_t offset, i, guest_offset;
1242 
1243     if (!virtio_net_can_receive(nc)) {
1244         return -1;
1245     }
1246 
1247     /* hdr_len refers to the header we supply to the guest */
1248     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1249         return 0;
1250     }
1251 
1252     if (!receive_filter(n, buf, size))
1253         return size;
1254 
1255     offset = i = 0;
1256 
1257     while (offset < size) {
1258         VirtQueueElement *elem;
1259         int len, total;
1260         const struct iovec *sg;
1261 
1262         total = 0;
1263 
1264         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1265         if (!elem) {
1266             if (i) {
1267                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1268                              "i %zd mergeable %d offset %zd, size %zd, "
1269                              "guest hdr len %zd, host hdr len %zd "
1270                              "guest features 0x%" PRIx64,
1271                              i, n->mergeable_rx_bufs, offset, size,
1272                              n->guest_hdr_len, n->host_hdr_len,
1273                              vdev->guest_features);
1274             }
1275             return -1;
1276         }
1277 
1278         if (elem->in_num < 1) {
1279             virtio_error(vdev,
1280                          "virtio-net receive queue contains no in buffers");
1281             virtqueue_detach_element(q->rx_vq, elem, 0);
1282             g_free(elem);
1283             return -1;
1284         }
1285 
1286         sg = elem->in_sg;
1287         if (i == 0) {
1288             assert(offset == 0);
1289             if (n->mergeable_rx_bufs) {
1290                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1291                                     sg, elem->in_num,
1292                                     offsetof(typeof(mhdr), num_buffers),
1293                                     sizeof(mhdr.num_buffers));
1294             }
1295 
1296             receive_header(n, sg, elem->in_num, buf, size);
1297             offset = n->host_hdr_len;
1298             total += n->guest_hdr_len;
1299             guest_offset = n->guest_hdr_len;
1300         } else {
1301             guest_offset = 0;
1302         }
1303 
1304         /* copy in packet.  ugh */
1305         len = iov_from_buf(sg, elem->in_num, guest_offset,
1306                            buf + offset, size - offset);
1307         total += len;
1308         offset += len;
1309         /* If buffers can't be merged, at this point we
1310          * must have consumed the complete packet.
1311          * Otherwise, drop it. */
1312         if (!n->mergeable_rx_bufs && offset < size) {
1313             virtqueue_unpop(q->rx_vq, elem, total);
1314             g_free(elem);
1315             return size;
1316         }
1317 
1318         /* signal other side */
1319         virtqueue_fill(q->rx_vq, elem, total, i++);
1320         g_free(elem);
1321     }
1322 
1323     if (mhdr_cnt) {
1324         virtio_stw_p(vdev, &mhdr.num_buffers, i);
1325         iov_from_buf(mhdr_sg, mhdr_cnt,
1326                      0,
1327                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
1328     }
1329 
1330     virtqueue_flush(q->rx_vq, i);
1331     virtio_notify(vdev, q->rx_vq);
1332 
1333     return size;
1334 }
1335 
1336 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1337                                   size_t size)
1338 {
1339     ssize_t r;
1340 
1341     rcu_read_lock();
1342     r = virtio_net_receive_rcu(nc, buf, size);
1343     rcu_read_unlock();
1344     return r;
1345 }
1346 
1347 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1348                                          const uint8_t *buf,
1349                                          VirtioNetRscUnit *unit)
1350 {
1351     uint16_t ip_hdrlen;
1352     struct ip_header *ip;
1353 
1354     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1355                               + sizeof(struct eth_header));
1356     unit->ip = (void *)ip;
1357     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1358     unit->ip_plen = &ip->ip_len;
1359     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1360     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1361     unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1362 }
1363 
1364 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1365                                          const uint8_t *buf,
1366                                          VirtioNetRscUnit *unit)
1367 {
1368     struct ip6_header *ip6;
1369 
1370     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1371                                  + sizeof(struct eth_header));
1372     unit->ip = ip6;
1373     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1374     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\
1375                                         + sizeof(struct ip6_header));
1376     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1377 
1378     /* There is a difference between payload lenght in ipv4 and v6,
1379        ip header is excluded in ipv6 */
1380     unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1381 }
1382 
1383 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1384                                        VirtioNetRscSeg *seg)
1385 {
1386     int ret;
1387     struct virtio_net_hdr *h;
1388 
1389     h = (struct virtio_net_hdr *)seg->buf;
1390     h->flags = 0;
1391     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1392 
1393     if (seg->is_coalesced) {
1394         *virtio_net_rsc_ext_num_packets(h) = seg->packets;
1395         *virtio_net_rsc_ext_num_dupacks(h) = seg->dup_ack;
1396         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1397         if (chain->proto == ETH_P_IP) {
1398             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1399         } else {
1400             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1401         }
1402     }
1403 
1404     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1405     QTAILQ_REMOVE(&chain->buffers, seg, next);
1406     g_free(seg->buf);
1407     g_free(seg);
1408 
1409     return ret;
1410 }
1411 
1412 static void virtio_net_rsc_purge(void *opq)
1413 {
1414     VirtioNetRscSeg *seg, *rn;
1415     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1416 
1417     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1418         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1419             chain->stat.purge_failed++;
1420             continue;
1421         }
1422     }
1423 
1424     chain->stat.timer++;
1425     if (!QTAILQ_EMPTY(&chain->buffers)) {
1426         timer_mod(chain->drain_timer,
1427               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1428     }
1429 }
1430 
1431 static void virtio_net_rsc_cleanup(VirtIONet *n)
1432 {
1433     VirtioNetRscChain *chain, *rn_chain;
1434     VirtioNetRscSeg *seg, *rn_seg;
1435 
1436     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1437         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1438             QTAILQ_REMOVE(&chain->buffers, seg, next);
1439             g_free(seg->buf);
1440             g_free(seg);
1441         }
1442 
1443         timer_del(chain->drain_timer);
1444         timer_free(chain->drain_timer);
1445         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1446         g_free(chain);
1447     }
1448 }
1449 
1450 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1451                                      NetClientState *nc,
1452                                      const uint8_t *buf, size_t size)
1453 {
1454     uint16_t hdr_len;
1455     VirtioNetRscSeg *seg;
1456 
1457     hdr_len = chain->n->guest_hdr_len;
1458     seg = g_malloc(sizeof(VirtioNetRscSeg));
1459     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1460         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1461     memcpy(seg->buf, buf, size);
1462     seg->size = size;
1463     seg->packets = 1;
1464     seg->dup_ack = 0;
1465     seg->is_coalesced = 0;
1466     seg->nc = nc;
1467 
1468     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1469     chain->stat.cache++;
1470 
1471     switch (chain->proto) {
1472     case ETH_P_IP:
1473         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
1474         break;
1475     case ETH_P_IPV6:
1476         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
1477         break;
1478     default:
1479         g_assert_not_reached();
1480     }
1481 }
1482 
1483 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
1484                                          VirtioNetRscSeg *seg,
1485                                          const uint8_t *buf,
1486                                          struct tcp_header *n_tcp,
1487                                          struct tcp_header *o_tcp)
1488 {
1489     uint32_t nack, oack;
1490     uint16_t nwin, owin;
1491 
1492     nack = htonl(n_tcp->th_ack);
1493     nwin = htons(n_tcp->th_win);
1494     oack = htonl(o_tcp->th_ack);
1495     owin = htons(o_tcp->th_win);
1496 
1497     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
1498         chain->stat.ack_out_of_win++;
1499         return RSC_FINAL;
1500     } else if (nack == oack) {
1501         /* duplicated ack or window probe */
1502         if (nwin == owin) {
1503             /* duplicated ack, add dup ack count due to whql test up to 1 */
1504             chain->stat.dup_ack++;
1505             return RSC_FINAL;
1506         } else {
1507             /* Coalesce window update */
1508             o_tcp->th_win = n_tcp->th_win;
1509             chain->stat.win_update++;
1510             return RSC_COALESCE;
1511         }
1512     } else {
1513         /* pure ack, go to 'C', finalize*/
1514         chain->stat.pure_ack++;
1515         return RSC_FINAL;
1516     }
1517 }
1518 
1519 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
1520                                             VirtioNetRscSeg *seg,
1521                                             const uint8_t *buf,
1522                                             VirtioNetRscUnit *n_unit)
1523 {
1524     void *data;
1525     uint16_t o_ip_len;
1526     uint32_t nseq, oseq;
1527     VirtioNetRscUnit *o_unit;
1528 
1529     o_unit = &seg->unit;
1530     o_ip_len = htons(*o_unit->ip_plen);
1531     nseq = htonl(n_unit->tcp->th_seq);
1532     oseq = htonl(o_unit->tcp->th_seq);
1533 
1534     /* out of order or retransmitted. */
1535     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
1536         chain->stat.data_out_of_win++;
1537         return RSC_FINAL;
1538     }
1539 
1540     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
1541     if (nseq == oseq) {
1542         if ((o_unit->payload == 0) && n_unit->payload) {
1543             /* From no payload to payload, normal case, not a dup ack or etc */
1544             chain->stat.data_after_pure_ack++;
1545             goto coalesce;
1546         } else {
1547             return virtio_net_rsc_handle_ack(chain, seg, buf,
1548                                              n_unit->tcp, o_unit->tcp);
1549         }
1550     } else if ((nseq - oseq) != o_unit->payload) {
1551         /* Not a consistent packet, out of order */
1552         chain->stat.data_out_of_order++;
1553         return RSC_FINAL;
1554     } else {
1555 coalesce:
1556         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
1557             chain->stat.over_size++;
1558             return RSC_FINAL;
1559         }
1560 
1561         /* Here comes the right data, the payload length in v4/v6 is different,
1562            so use the field value to update and record the new data len */
1563         o_unit->payload += n_unit->payload; /* update new data len */
1564 
1565         /* update field in ip header */
1566         *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
1567 
1568         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
1569            for windows guest, while this may change the behavior for linux
1570            guest (only if it uses RSC feature). */
1571         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
1572 
1573         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
1574         o_unit->tcp->th_win = n_unit->tcp->th_win;
1575 
1576         memmove(seg->buf + seg->size, data, n_unit->payload);
1577         seg->size += n_unit->payload;
1578         seg->packets++;
1579         chain->stat.coalesced++;
1580         return RSC_COALESCE;
1581     }
1582 }
1583 
1584 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
1585                                         VirtioNetRscSeg *seg,
1586                                         const uint8_t *buf, size_t size,
1587                                         VirtioNetRscUnit *unit)
1588 {
1589     struct ip_header *ip1, *ip2;
1590 
1591     ip1 = (struct ip_header *)(unit->ip);
1592     ip2 = (struct ip_header *)(seg->unit.ip);
1593     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
1594         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
1595         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
1596         chain->stat.no_match++;
1597         return RSC_NO_MATCH;
1598     }
1599 
1600     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
1601 }
1602 
1603 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
1604                                         VirtioNetRscSeg *seg,
1605                                         const uint8_t *buf, size_t size,
1606                                         VirtioNetRscUnit *unit)
1607 {
1608     struct ip6_header *ip1, *ip2;
1609 
1610     ip1 = (struct ip6_header *)(unit->ip);
1611     ip2 = (struct ip6_header *)(seg->unit.ip);
1612     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
1613         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
1614         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
1615         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
1616             chain->stat.no_match++;
1617             return RSC_NO_MATCH;
1618     }
1619 
1620     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
1621 }
1622 
1623 /* Packets with 'SYN' should bypass, other flag should be sent after drain
1624  * to prevent out of order */
1625 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
1626                                          struct tcp_header *tcp)
1627 {
1628     uint16_t tcp_hdr;
1629     uint16_t tcp_flag;
1630 
1631     tcp_flag = htons(tcp->th_offset_flags);
1632     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
1633     tcp_flag &= VIRTIO_NET_TCP_FLAG;
1634     tcp_flag = htons(tcp->th_offset_flags) & 0x3F;
1635     if (tcp_flag & TH_SYN) {
1636         chain->stat.tcp_syn++;
1637         return RSC_BYPASS;
1638     }
1639 
1640     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
1641         chain->stat.tcp_ctrl_drain++;
1642         return RSC_FINAL;
1643     }
1644 
1645     if (tcp_hdr > sizeof(struct tcp_header)) {
1646         chain->stat.tcp_all_opt++;
1647         return RSC_FINAL;
1648     }
1649 
1650     return RSC_CANDIDATE;
1651 }
1652 
1653 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
1654                                          NetClientState *nc,
1655                                          const uint8_t *buf, size_t size,
1656                                          VirtioNetRscUnit *unit)
1657 {
1658     int ret;
1659     VirtioNetRscSeg *seg, *nseg;
1660 
1661     if (QTAILQ_EMPTY(&chain->buffers)) {
1662         chain->stat.empty_cache++;
1663         virtio_net_rsc_cache_buf(chain, nc, buf, size);
1664         timer_mod(chain->drain_timer,
1665               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1666         return size;
1667     }
1668 
1669     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
1670         if (chain->proto == ETH_P_IP) {
1671             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
1672         } else {
1673             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
1674         }
1675 
1676         if (ret == RSC_FINAL) {
1677             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1678                 /* Send failed */
1679                 chain->stat.final_failed++;
1680                 return 0;
1681             }
1682 
1683             /* Send current packet */
1684             return virtio_net_do_receive(nc, buf, size);
1685         } else if (ret == RSC_NO_MATCH) {
1686             continue;
1687         } else {
1688             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
1689             seg->is_coalesced = 1;
1690             return size;
1691         }
1692     }
1693 
1694     chain->stat.no_match_cache++;
1695     virtio_net_rsc_cache_buf(chain, nc, buf, size);
1696     return size;
1697 }
1698 
1699 /* Drain a connection data, this is to avoid out of order segments */
1700 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
1701                                         NetClientState *nc,
1702                                         const uint8_t *buf, size_t size,
1703                                         uint16_t ip_start, uint16_t ip_size,
1704                                         uint16_t tcp_port)
1705 {
1706     VirtioNetRscSeg *seg, *nseg;
1707     uint32_t ppair1, ppair2;
1708 
1709     ppair1 = *(uint32_t *)(buf + tcp_port);
1710     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
1711         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
1712         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
1713             || (ppair1 != ppair2)) {
1714             continue;
1715         }
1716         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1717             chain->stat.drain_failed++;
1718         }
1719 
1720         break;
1721     }
1722 
1723     return virtio_net_do_receive(nc, buf, size);
1724 }
1725 
1726 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
1727                                             struct ip_header *ip,
1728                                             const uint8_t *buf, size_t size)
1729 {
1730     uint16_t ip_len;
1731 
1732     /* Not an ipv4 packet */
1733     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
1734         chain->stat.ip_option++;
1735         return RSC_BYPASS;
1736     }
1737 
1738     /* Don't handle packets with ip option */
1739     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
1740         chain->stat.ip_option++;
1741         return RSC_BYPASS;
1742     }
1743 
1744     if (ip->ip_p != IPPROTO_TCP) {
1745         chain->stat.bypass_not_tcp++;
1746         return RSC_BYPASS;
1747     }
1748 
1749     /* Don't handle packets with ip fragment */
1750     if (!(htons(ip->ip_off) & IP_DF)) {
1751         chain->stat.ip_frag++;
1752         return RSC_BYPASS;
1753     }
1754 
1755     /* Don't handle packets with ecn flag */
1756     if (IPTOS_ECN(ip->ip_tos)) {
1757         chain->stat.ip_ecn++;
1758         return RSC_BYPASS;
1759     }
1760 
1761     ip_len = htons(ip->ip_len);
1762     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
1763         || ip_len > (size - chain->n->guest_hdr_len -
1764                      sizeof(struct eth_header))) {
1765         chain->stat.ip_hacked++;
1766         return RSC_BYPASS;
1767     }
1768 
1769     return RSC_CANDIDATE;
1770 }
1771 
1772 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
1773                                       NetClientState *nc,
1774                                       const uint8_t *buf, size_t size)
1775 {
1776     int32_t ret;
1777     uint16_t hdr_len;
1778     VirtioNetRscUnit unit;
1779 
1780     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
1781 
1782     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
1783         + sizeof(struct tcp_header))) {
1784         chain->stat.bypass_not_tcp++;
1785         return virtio_net_do_receive(nc, buf, size);
1786     }
1787 
1788     virtio_net_rsc_extract_unit4(chain, buf, &unit);
1789     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
1790         != RSC_CANDIDATE) {
1791         return virtio_net_do_receive(nc, buf, size);
1792     }
1793 
1794     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
1795     if (ret == RSC_BYPASS) {
1796         return virtio_net_do_receive(nc, buf, size);
1797     } else if (ret == RSC_FINAL) {
1798         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
1799                 ((hdr_len + sizeof(struct eth_header)) + 12),
1800                 VIRTIO_NET_IP4_ADDR_SIZE,
1801                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
1802     }
1803 
1804     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
1805 }
1806 
1807 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
1808                                             struct ip6_header *ip6,
1809                                             const uint8_t *buf, size_t size)
1810 {
1811     uint16_t ip_len;
1812 
1813     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
1814         != IP_HEADER_VERSION_6) {
1815         return RSC_BYPASS;
1816     }
1817 
1818     /* Both option and protocol is checked in this */
1819     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
1820         chain->stat.bypass_not_tcp++;
1821         return RSC_BYPASS;
1822     }
1823 
1824     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1825     if (ip_len < sizeof(struct tcp_header) ||
1826         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
1827                   - sizeof(struct ip6_header))) {
1828         chain->stat.ip_hacked++;
1829         return RSC_BYPASS;
1830     }
1831 
1832     /* Don't handle packets with ecn flag */
1833     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
1834         chain->stat.ip_ecn++;
1835         return RSC_BYPASS;
1836     }
1837 
1838     return RSC_CANDIDATE;
1839 }
1840 
1841 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
1842                                       const uint8_t *buf, size_t size)
1843 {
1844     int32_t ret;
1845     uint16_t hdr_len;
1846     VirtioNetRscChain *chain;
1847     VirtioNetRscUnit unit;
1848 
1849     chain = (VirtioNetRscChain *)opq;
1850     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
1851 
1852     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
1853         + sizeof(tcp_header))) {
1854         return virtio_net_do_receive(nc, buf, size);
1855     }
1856 
1857     virtio_net_rsc_extract_unit6(chain, buf, &unit);
1858     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
1859                                                  unit.ip, buf, size)) {
1860         return virtio_net_do_receive(nc, buf, size);
1861     }
1862 
1863     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
1864     if (ret == RSC_BYPASS) {
1865         return virtio_net_do_receive(nc, buf, size);
1866     } else if (ret == RSC_FINAL) {
1867         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
1868                 ((hdr_len + sizeof(struct eth_header)) + 8),
1869                 VIRTIO_NET_IP6_ADDR_SIZE,
1870                 hdr_len + sizeof(struct eth_header)
1871                 + sizeof(struct ip6_header));
1872     }
1873 
1874     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
1875 }
1876 
1877 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
1878                                                       NetClientState *nc,
1879                                                       uint16_t proto)
1880 {
1881     VirtioNetRscChain *chain;
1882 
1883     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
1884         return NULL;
1885     }
1886 
1887     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
1888         if (chain->proto == proto) {
1889             return chain;
1890         }
1891     }
1892 
1893     chain = g_malloc(sizeof(*chain));
1894     chain->n = n;
1895     chain->proto = proto;
1896     if (proto == (uint16_t)ETH_P_IP) {
1897         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
1898         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1899     } else {
1900         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
1901         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1902     }
1903     chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
1904                                       virtio_net_rsc_purge, chain);
1905     memset(&chain->stat, 0, sizeof(chain->stat));
1906 
1907     QTAILQ_INIT(&chain->buffers);
1908     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
1909 
1910     return chain;
1911 }
1912 
1913 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
1914                                       const uint8_t *buf,
1915                                       size_t size)
1916 {
1917     uint16_t proto;
1918     VirtioNetRscChain *chain;
1919     struct eth_header *eth;
1920     VirtIONet *n;
1921 
1922     n = qemu_get_nic_opaque(nc);
1923     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
1924         return virtio_net_do_receive(nc, buf, size);
1925     }
1926 
1927     eth = (struct eth_header *)(buf + n->guest_hdr_len);
1928     proto = htons(eth->h_proto);
1929 
1930     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
1931     if (chain) {
1932         chain->stat.received++;
1933         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
1934             return virtio_net_rsc_receive4(chain, nc, buf, size);
1935         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
1936             return virtio_net_rsc_receive6(chain, nc, buf, size);
1937         }
1938     }
1939     return virtio_net_do_receive(nc, buf, size);
1940 }
1941 
1942 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
1943                                   size_t size)
1944 {
1945     VirtIONet *n = qemu_get_nic_opaque(nc);
1946     if ((n->rsc4_enabled || n->rsc6_enabled)) {
1947         return virtio_net_rsc_receive(nc, buf, size);
1948     } else {
1949         return virtio_net_do_receive(nc, buf, size);
1950     }
1951 }
1952 
1953 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
1954 
1955 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
1956 {
1957     VirtIONet *n = qemu_get_nic_opaque(nc);
1958     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1959     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1960 
1961     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
1962     virtio_notify(vdev, q->tx_vq);
1963 
1964     g_free(q->async_tx.elem);
1965     q->async_tx.elem = NULL;
1966 
1967     virtio_queue_set_notification(q->tx_vq, 1);
1968     virtio_net_flush_tx(q);
1969 }
1970 
1971 /* TX */
1972 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
1973 {
1974     VirtIONet *n = q->n;
1975     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1976     VirtQueueElement *elem;
1977     int32_t num_packets = 0;
1978     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
1979     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1980         return num_packets;
1981     }
1982 
1983     if (q->async_tx.elem) {
1984         virtio_queue_set_notification(q->tx_vq, 0);
1985         return num_packets;
1986     }
1987 
1988     for (;;) {
1989         ssize_t ret;
1990         unsigned int out_num;
1991         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
1992         struct virtio_net_hdr_mrg_rxbuf mhdr;
1993 
1994         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
1995         if (!elem) {
1996             break;
1997         }
1998 
1999         out_num = elem->out_num;
2000         out_sg = elem->out_sg;
2001         if (out_num < 1) {
2002             virtio_error(vdev, "virtio-net header not in first element");
2003             virtqueue_detach_element(q->tx_vq, elem, 0);
2004             g_free(elem);
2005             return -EINVAL;
2006         }
2007 
2008         if (n->has_vnet_hdr) {
2009             if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2010                 n->guest_hdr_len) {
2011                 virtio_error(vdev, "virtio-net header incorrect");
2012                 virtqueue_detach_element(q->tx_vq, elem, 0);
2013                 g_free(elem);
2014                 return -EINVAL;
2015             }
2016             if (n->needs_vnet_hdr_swap) {
2017                 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2018                 sg2[0].iov_base = &mhdr;
2019                 sg2[0].iov_len = n->guest_hdr_len;
2020                 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2021                                    out_sg, out_num,
2022                                    n->guest_hdr_len, -1);
2023                 if (out_num == VIRTQUEUE_MAX_SIZE) {
2024                     goto drop;
2025                 }
2026                 out_num += 1;
2027                 out_sg = sg2;
2028             }
2029         }
2030         /*
2031          * If host wants to see the guest header as is, we can
2032          * pass it on unchanged. Otherwise, copy just the parts
2033          * that host is interested in.
2034          */
2035         assert(n->host_hdr_len <= n->guest_hdr_len);
2036         if (n->host_hdr_len != n->guest_hdr_len) {
2037             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2038                                        out_sg, out_num,
2039                                        0, n->host_hdr_len);
2040             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2041                              out_sg, out_num,
2042                              n->guest_hdr_len, -1);
2043             out_num = sg_num;
2044             out_sg = sg;
2045         }
2046 
2047         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2048                                       out_sg, out_num, virtio_net_tx_complete);
2049         if (ret == 0) {
2050             virtio_queue_set_notification(q->tx_vq, 0);
2051             q->async_tx.elem = elem;
2052             return -EBUSY;
2053         }
2054 
2055 drop:
2056         virtqueue_push(q->tx_vq, elem, 0);
2057         virtio_notify(vdev, q->tx_vq);
2058         g_free(elem);
2059 
2060         if (++num_packets >= n->tx_burst) {
2061             break;
2062         }
2063     }
2064     return num_packets;
2065 }
2066 
2067 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2068 {
2069     VirtIONet *n = VIRTIO_NET(vdev);
2070     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2071 
2072     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2073         virtio_net_drop_tx_queue_data(vdev, vq);
2074         return;
2075     }
2076 
2077     /* This happens when device was stopped but VCPU wasn't. */
2078     if (!vdev->vm_running) {
2079         q->tx_waiting = 1;
2080         return;
2081     }
2082 
2083     if (q->tx_waiting) {
2084         virtio_queue_set_notification(vq, 1);
2085         timer_del(q->tx_timer);
2086         q->tx_waiting = 0;
2087         if (virtio_net_flush_tx(q) == -EINVAL) {
2088             return;
2089         }
2090     } else {
2091         timer_mod(q->tx_timer,
2092                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2093         q->tx_waiting = 1;
2094         virtio_queue_set_notification(vq, 0);
2095     }
2096 }
2097 
2098 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2099 {
2100     VirtIONet *n = VIRTIO_NET(vdev);
2101     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2102 
2103     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2104         virtio_net_drop_tx_queue_data(vdev, vq);
2105         return;
2106     }
2107 
2108     if (unlikely(q->tx_waiting)) {
2109         return;
2110     }
2111     q->tx_waiting = 1;
2112     /* This happens when device was stopped but VCPU wasn't. */
2113     if (!vdev->vm_running) {
2114         return;
2115     }
2116     virtio_queue_set_notification(vq, 0);
2117     qemu_bh_schedule(q->tx_bh);
2118 }
2119 
2120 static void virtio_net_tx_timer(void *opaque)
2121 {
2122     VirtIONetQueue *q = opaque;
2123     VirtIONet *n = q->n;
2124     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2125     /* This happens when device was stopped but BH wasn't. */
2126     if (!vdev->vm_running) {
2127         /* Make sure tx waiting is set, so we'll run when restarted. */
2128         assert(q->tx_waiting);
2129         return;
2130     }
2131 
2132     q->tx_waiting = 0;
2133 
2134     /* Just in case the driver is not ready on more */
2135     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2136         return;
2137     }
2138 
2139     virtio_queue_set_notification(q->tx_vq, 1);
2140     virtio_net_flush_tx(q);
2141 }
2142 
2143 static void virtio_net_tx_bh(void *opaque)
2144 {
2145     VirtIONetQueue *q = opaque;
2146     VirtIONet *n = q->n;
2147     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2148     int32_t ret;
2149 
2150     /* This happens when device was stopped but BH wasn't. */
2151     if (!vdev->vm_running) {
2152         /* Make sure tx waiting is set, so we'll run when restarted. */
2153         assert(q->tx_waiting);
2154         return;
2155     }
2156 
2157     q->tx_waiting = 0;
2158 
2159     /* Just in case the driver is not ready on more */
2160     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2161         return;
2162     }
2163 
2164     ret = virtio_net_flush_tx(q);
2165     if (ret == -EBUSY || ret == -EINVAL) {
2166         return; /* Notification re-enable handled by tx_complete or device
2167                  * broken */
2168     }
2169 
2170     /* If we flush a full burst of packets, assume there are
2171      * more coming and immediately reschedule */
2172     if (ret >= n->tx_burst) {
2173         qemu_bh_schedule(q->tx_bh);
2174         q->tx_waiting = 1;
2175         return;
2176     }
2177 
2178     /* If less than a full burst, re-enable notification and flush
2179      * anything that may have come in while we weren't looking.  If
2180      * we find something, assume the guest is still active and reschedule */
2181     virtio_queue_set_notification(q->tx_vq, 1);
2182     ret = virtio_net_flush_tx(q);
2183     if (ret == -EINVAL) {
2184         return;
2185     } else if (ret > 0) {
2186         virtio_queue_set_notification(q->tx_vq, 0);
2187         qemu_bh_schedule(q->tx_bh);
2188         q->tx_waiting = 1;
2189     }
2190 }
2191 
2192 static void virtio_net_add_queue(VirtIONet *n, int index)
2193 {
2194     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2195 
2196     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2197                                            virtio_net_handle_rx);
2198 
2199     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2200         n->vqs[index].tx_vq =
2201             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2202                              virtio_net_handle_tx_timer);
2203         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2204                                               virtio_net_tx_timer,
2205                                               &n->vqs[index]);
2206     } else {
2207         n->vqs[index].tx_vq =
2208             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2209                              virtio_net_handle_tx_bh);
2210         n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2211     }
2212 
2213     n->vqs[index].tx_waiting = 0;
2214     n->vqs[index].n = n;
2215 }
2216 
2217 static void virtio_net_del_queue(VirtIONet *n, int index)
2218 {
2219     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2220     VirtIONetQueue *q = &n->vqs[index];
2221     NetClientState *nc = qemu_get_subqueue(n->nic, index);
2222 
2223     qemu_purge_queued_packets(nc);
2224 
2225     virtio_del_queue(vdev, index * 2);
2226     if (q->tx_timer) {
2227         timer_del(q->tx_timer);
2228         timer_free(q->tx_timer);
2229         q->tx_timer = NULL;
2230     } else {
2231         qemu_bh_delete(q->tx_bh);
2232         q->tx_bh = NULL;
2233     }
2234     q->tx_waiting = 0;
2235     virtio_del_queue(vdev, index * 2 + 1);
2236 }
2237 
2238 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2239 {
2240     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2241     int old_num_queues = virtio_get_num_queues(vdev);
2242     int new_num_queues = new_max_queues * 2 + 1;
2243     int i;
2244 
2245     assert(old_num_queues >= 3);
2246     assert(old_num_queues % 2 == 1);
2247 
2248     if (old_num_queues == new_num_queues) {
2249         return;
2250     }
2251 
2252     /*
2253      * We always need to remove and add ctrl vq if
2254      * old_num_queues != new_num_queues. Remove ctrl_vq first,
2255      * and then we only enter one of the following too loops.
2256      */
2257     virtio_del_queue(vdev, old_num_queues - 1);
2258 
2259     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2260         /* new_num_queues < old_num_queues */
2261         virtio_net_del_queue(n, i / 2);
2262     }
2263 
2264     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2265         /* new_num_queues > old_num_queues */
2266         virtio_net_add_queue(n, i / 2);
2267     }
2268 
2269     /* add ctrl_vq last */
2270     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2271 }
2272 
2273 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2274 {
2275     int max = multiqueue ? n->max_queues : 1;
2276 
2277     n->multiqueue = multiqueue;
2278     virtio_net_change_num_queues(n, max);
2279 
2280     virtio_net_set_queues(n);
2281 }
2282 
2283 static int virtio_net_post_load_device(void *opaque, int version_id)
2284 {
2285     VirtIONet *n = opaque;
2286     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2287     int i, link_down;
2288 
2289     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2290                                virtio_vdev_has_feature(vdev,
2291                                                        VIRTIO_F_VERSION_1));
2292 
2293     /* MAC_TABLE_ENTRIES may be different from the saved image */
2294     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2295         n->mac_table.in_use = 0;
2296     }
2297 
2298     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2299         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2300     }
2301 
2302     if (peer_has_vnet_hdr(n)) {
2303         virtio_net_apply_guest_offloads(n);
2304     }
2305 
2306     virtio_net_set_queues(n);
2307 
2308     /* Find the first multicast entry in the saved MAC filter */
2309     for (i = 0; i < n->mac_table.in_use; i++) {
2310         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2311             break;
2312         }
2313     }
2314     n->mac_table.first_multi = i;
2315 
2316     /* nc.link_down can't be migrated, so infer link_down according
2317      * to link status bit in n->status */
2318     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2319     for (i = 0; i < n->max_queues; i++) {
2320         qemu_get_subqueue(n->nic, i)->link_down = link_down;
2321     }
2322 
2323     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2324         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2325         n->announce_counter = SELF_ANNOUNCE_ROUNDS;
2326         timer_mod(n->announce_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL));
2327     }
2328 
2329     return 0;
2330 }
2331 
2332 /* tx_waiting field of a VirtIONetQueue */
2333 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2334     .name = "virtio-net-queue-tx_waiting",
2335     .fields = (VMStateField[]) {
2336         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2337         VMSTATE_END_OF_LIST()
2338    },
2339 };
2340 
2341 static bool max_queues_gt_1(void *opaque, int version_id)
2342 {
2343     return VIRTIO_NET(opaque)->max_queues > 1;
2344 }
2345 
2346 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2347 {
2348     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2349                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2350 }
2351 
2352 static bool mac_table_fits(void *opaque, int version_id)
2353 {
2354     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2355 }
2356 
2357 static bool mac_table_doesnt_fit(void *opaque, int version_id)
2358 {
2359     return !mac_table_fits(opaque, version_id);
2360 }
2361 
2362 /* This temporary type is shared by all the WITH_TMP methods
2363  * although only some fields are used by each.
2364  */
2365 struct VirtIONetMigTmp {
2366     VirtIONet      *parent;
2367     VirtIONetQueue *vqs_1;
2368     uint16_t        curr_queues_1;
2369     uint8_t         has_ufo;
2370     uint32_t        has_vnet_hdr;
2371 };
2372 
2373 /* The 2nd and subsequent tx_waiting flags are loaded later than
2374  * the 1st entry in the queues and only if there's more than one
2375  * entry.  We use the tmp mechanism to calculate a temporary
2376  * pointer and count and also validate the count.
2377  */
2378 
2379 static int virtio_net_tx_waiting_pre_save(void *opaque)
2380 {
2381     struct VirtIONetMigTmp *tmp = opaque;
2382 
2383     tmp->vqs_1 = tmp->parent->vqs + 1;
2384     tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2385     if (tmp->parent->curr_queues == 0) {
2386         tmp->curr_queues_1 = 0;
2387     }
2388 
2389     return 0;
2390 }
2391 
2392 static int virtio_net_tx_waiting_pre_load(void *opaque)
2393 {
2394     struct VirtIONetMigTmp *tmp = opaque;
2395 
2396     /* Reuse the pointer setup from save */
2397     virtio_net_tx_waiting_pre_save(opaque);
2398 
2399     if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2400         error_report("virtio-net: curr_queues %x > max_queues %x",
2401             tmp->parent->curr_queues, tmp->parent->max_queues);
2402 
2403         return -EINVAL;
2404     }
2405 
2406     return 0; /* all good */
2407 }
2408 
2409 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2410     .name      = "virtio-net-tx_waiting",
2411     .pre_load  = virtio_net_tx_waiting_pre_load,
2412     .pre_save  = virtio_net_tx_waiting_pre_save,
2413     .fields    = (VMStateField[]) {
2414         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2415                                      curr_queues_1,
2416                                      vmstate_virtio_net_queue_tx_waiting,
2417                                      struct VirtIONetQueue),
2418         VMSTATE_END_OF_LIST()
2419     },
2420 };
2421 
2422 /* the 'has_ufo' flag is just tested; if the incoming stream has the
2423  * flag set we need to check that we have it
2424  */
2425 static int virtio_net_ufo_post_load(void *opaque, int version_id)
2426 {
2427     struct VirtIONetMigTmp *tmp = opaque;
2428 
2429     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
2430         error_report("virtio-net: saved image requires TUN_F_UFO support");
2431         return -EINVAL;
2432     }
2433 
2434     return 0;
2435 }
2436 
2437 static int virtio_net_ufo_pre_save(void *opaque)
2438 {
2439     struct VirtIONetMigTmp *tmp = opaque;
2440 
2441     tmp->has_ufo = tmp->parent->has_ufo;
2442 
2443     return 0;
2444 }
2445 
2446 static const VMStateDescription vmstate_virtio_net_has_ufo = {
2447     .name      = "virtio-net-ufo",
2448     .post_load = virtio_net_ufo_post_load,
2449     .pre_save  = virtio_net_ufo_pre_save,
2450     .fields    = (VMStateField[]) {
2451         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
2452         VMSTATE_END_OF_LIST()
2453     },
2454 };
2455 
2456 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2457  * flag set we need to check that we have it
2458  */
2459 static int virtio_net_vnet_post_load(void *opaque, int version_id)
2460 {
2461     struct VirtIONetMigTmp *tmp = opaque;
2462 
2463     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
2464         error_report("virtio-net: saved image requires vnet_hdr=on");
2465         return -EINVAL;
2466     }
2467 
2468     return 0;
2469 }
2470 
2471 static int virtio_net_vnet_pre_save(void *opaque)
2472 {
2473     struct VirtIONetMigTmp *tmp = opaque;
2474 
2475     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
2476 
2477     return 0;
2478 }
2479 
2480 static const VMStateDescription vmstate_virtio_net_has_vnet = {
2481     .name      = "virtio-net-vnet",
2482     .post_load = virtio_net_vnet_post_load,
2483     .pre_save  = virtio_net_vnet_pre_save,
2484     .fields    = (VMStateField[]) {
2485         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
2486         VMSTATE_END_OF_LIST()
2487     },
2488 };
2489 
2490 static const VMStateDescription vmstate_virtio_net_device = {
2491     .name = "virtio-net-device",
2492     .version_id = VIRTIO_NET_VM_VERSION,
2493     .minimum_version_id = VIRTIO_NET_VM_VERSION,
2494     .post_load = virtio_net_post_load_device,
2495     .fields = (VMStateField[]) {
2496         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
2497         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
2498                                vmstate_virtio_net_queue_tx_waiting,
2499                                VirtIONetQueue),
2500         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
2501         VMSTATE_UINT16(status, VirtIONet),
2502         VMSTATE_UINT8(promisc, VirtIONet),
2503         VMSTATE_UINT8(allmulti, VirtIONet),
2504         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
2505 
2506         /* Guarded pair: If it fits we load it, else we throw it away
2507          * - can happen if source has a larger MAC table.; post-load
2508          *  sets flags in this case.
2509          */
2510         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
2511                                 0, mac_table_fits, mac_table.in_use,
2512                                  ETH_ALEN),
2513         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
2514                                      mac_table.in_use, ETH_ALEN),
2515 
2516         /* Note: This is an array of uint32's that's always been saved as a
2517          * buffer; hold onto your endiannesses; it's actually used as a bitmap
2518          * but based on the uint.
2519          */
2520         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
2521         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2522                          vmstate_virtio_net_has_vnet),
2523         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
2524         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
2525         VMSTATE_UINT8(alluni, VirtIONet),
2526         VMSTATE_UINT8(nomulti, VirtIONet),
2527         VMSTATE_UINT8(nouni, VirtIONet),
2528         VMSTATE_UINT8(nobcast, VirtIONet),
2529         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2530                          vmstate_virtio_net_has_ufo),
2531         VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
2532                             vmstate_info_uint16_equal, uint16_t),
2533         VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
2534         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2535                          vmstate_virtio_net_tx_waiting),
2536         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
2537                             has_ctrl_guest_offloads),
2538         VMSTATE_END_OF_LIST()
2539    },
2540 };
2541 
2542 static NetClientInfo net_virtio_info = {
2543     .type = NET_CLIENT_DRIVER_NIC,
2544     .size = sizeof(NICState),
2545     .can_receive = virtio_net_can_receive,
2546     .receive = virtio_net_receive,
2547     .link_status_changed = virtio_net_set_link_status,
2548     .query_rx_filter = virtio_net_query_rxfilter,
2549 };
2550 
2551 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
2552 {
2553     VirtIONet *n = VIRTIO_NET(vdev);
2554     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
2555     assert(n->vhost_started);
2556     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
2557 }
2558 
2559 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
2560                                            bool mask)
2561 {
2562     VirtIONet *n = VIRTIO_NET(vdev);
2563     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
2564     assert(n->vhost_started);
2565     vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
2566                              vdev, idx, mask);
2567 }
2568 
2569 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
2570 {
2571     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
2572 
2573     n->config_size = virtio_feature_get_config_size(feature_sizes,
2574                                                     host_features);
2575 }
2576 
2577 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
2578                                    const char *type)
2579 {
2580     /*
2581      * The name can be NULL, the netclient name will be type.x.
2582      */
2583     assert(type != NULL);
2584 
2585     g_free(n->netclient_name);
2586     g_free(n->netclient_type);
2587     n->netclient_name = g_strdup(name);
2588     n->netclient_type = g_strdup(type);
2589 }
2590 
2591 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
2592 {
2593     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2594     VirtIONet *n = VIRTIO_NET(dev);
2595     NetClientState *nc;
2596     int i;
2597 
2598     if (n->net_conf.mtu) {
2599         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
2600     }
2601 
2602     if (n->net_conf.duplex_str) {
2603         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
2604             n->net_conf.duplex = DUPLEX_HALF;
2605         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
2606             n->net_conf.duplex = DUPLEX_FULL;
2607         } else {
2608             error_setg(errp, "'duplex' must be 'half' or 'full'");
2609         }
2610         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
2611     } else {
2612         n->net_conf.duplex = DUPLEX_UNKNOWN;
2613     }
2614 
2615     if (n->net_conf.speed < SPEED_UNKNOWN) {
2616         error_setg(errp, "'speed' must be between 0 and INT_MAX");
2617     } else if (n->net_conf.speed >= 0) {
2618         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
2619     }
2620 
2621     virtio_net_set_config_size(n, n->host_features);
2622     virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
2623 
2624     /*
2625      * We set a lower limit on RX queue size to what it always was.
2626      * Guests that want a smaller ring can always resize it without
2627      * help from us (using virtio 1 and up).
2628      */
2629     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
2630         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
2631         !is_power_of_2(n->net_conf.rx_queue_size)) {
2632         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
2633                    "must be a power of 2 between %d and %d.",
2634                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
2635                    VIRTQUEUE_MAX_SIZE);
2636         virtio_cleanup(vdev);
2637         return;
2638     }
2639 
2640     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
2641         n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
2642         !is_power_of_2(n->net_conf.tx_queue_size)) {
2643         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
2644                    "must be a power of 2 between %d and %d",
2645                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
2646                    VIRTQUEUE_MAX_SIZE);
2647         virtio_cleanup(vdev);
2648         return;
2649     }
2650 
2651     n->max_queues = MAX(n->nic_conf.peers.queues, 1);
2652     if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
2653         error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
2654                    "must be a positive integer less than %d.",
2655                    n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
2656         virtio_cleanup(vdev);
2657         return;
2658     }
2659     n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
2660     n->curr_queues = 1;
2661     n->tx_timeout = n->net_conf.txtimer;
2662 
2663     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
2664                        && strcmp(n->net_conf.tx, "bh")) {
2665         warn_report("virtio-net: "
2666                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
2667                     n->net_conf.tx);
2668         error_printf("Defaulting to \"bh\"");
2669     }
2670 
2671     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
2672                                     n->net_conf.tx_queue_size);
2673 
2674     for (i = 0; i < n->max_queues; i++) {
2675         virtio_net_add_queue(n, i);
2676     }
2677 
2678     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2679     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
2680     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
2681     n->status = VIRTIO_NET_S_LINK_UP;
2682     n->announce_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
2683                                      virtio_net_announce_timer, n);
2684 
2685     if (n->netclient_type) {
2686         /*
2687          * Happen when virtio_net_set_netclient_name has been called.
2688          */
2689         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
2690                               n->netclient_type, n->netclient_name, n);
2691     } else {
2692         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
2693                               object_get_typename(OBJECT(dev)), dev->id, n);
2694     }
2695 
2696     peer_test_vnet_hdr(n);
2697     if (peer_has_vnet_hdr(n)) {
2698         for (i = 0; i < n->max_queues; i++) {
2699             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
2700         }
2701         n->host_hdr_len = sizeof(struct virtio_net_hdr);
2702     } else {
2703         n->host_hdr_len = 0;
2704     }
2705 
2706     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
2707 
2708     n->vqs[0].tx_waiting = 0;
2709     n->tx_burst = n->net_conf.txburst;
2710     virtio_net_set_mrg_rx_bufs(n, 0, 0);
2711     n->promisc = 1; /* for compatibility */
2712 
2713     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
2714 
2715     n->vlans = g_malloc0(MAX_VLAN >> 3);
2716 
2717     nc = qemu_get_queue(n->nic);
2718     nc->rxfilter_notify_enabled = 1;
2719 
2720     QTAILQ_INIT(&n->rsc_chains);
2721     n->qdev = dev;
2722 }
2723 
2724 static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
2725 {
2726     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2727     VirtIONet *n = VIRTIO_NET(dev);
2728     int i, max_queues;
2729 
2730     /* This will stop vhost backend if appropriate. */
2731     virtio_net_set_status(vdev, 0);
2732 
2733     g_free(n->netclient_name);
2734     n->netclient_name = NULL;
2735     g_free(n->netclient_type);
2736     n->netclient_type = NULL;
2737 
2738     g_free(n->mac_table.macs);
2739     g_free(n->vlans);
2740 
2741     max_queues = n->multiqueue ? n->max_queues : 1;
2742     for (i = 0; i < max_queues; i++) {
2743         virtio_net_del_queue(n, i);
2744     }
2745 
2746     timer_del(n->announce_timer);
2747     timer_free(n->announce_timer);
2748     g_free(n->vqs);
2749     qemu_del_nic(n->nic);
2750     virtio_net_rsc_cleanup(n);
2751     virtio_cleanup(vdev);
2752 }
2753 
2754 static void virtio_net_instance_init(Object *obj)
2755 {
2756     VirtIONet *n = VIRTIO_NET(obj);
2757 
2758     /*
2759      * The default config_size is sizeof(struct virtio_net_config).
2760      * Can be overriden with virtio_net_set_config_size.
2761      */
2762     n->config_size = sizeof(struct virtio_net_config);
2763     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
2764                                   "bootindex", "/ethernet-phy@0",
2765                                   DEVICE(n), NULL);
2766 }
2767 
2768 static int virtio_net_pre_save(void *opaque)
2769 {
2770     VirtIONet *n = opaque;
2771 
2772     /* At this point, backend must be stopped, otherwise
2773      * it might keep writing to memory. */
2774     assert(!n->vhost_started);
2775 
2776     return 0;
2777 }
2778 
2779 static const VMStateDescription vmstate_virtio_net = {
2780     .name = "virtio-net",
2781     .minimum_version_id = VIRTIO_NET_VM_VERSION,
2782     .version_id = VIRTIO_NET_VM_VERSION,
2783     .fields = (VMStateField[]) {
2784         VMSTATE_VIRTIO_DEVICE,
2785         VMSTATE_END_OF_LIST()
2786     },
2787     .pre_save = virtio_net_pre_save,
2788 };
2789 
2790 static Property virtio_net_properties[] = {
2791     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
2792                     VIRTIO_NET_F_CSUM, true),
2793     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
2794                     VIRTIO_NET_F_GUEST_CSUM, true),
2795     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
2796     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
2797                     VIRTIO_NET_F_GUEST_TSO4, true),
2798     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
2799                     VIRTIO_NET_F_GUEST_TSO6, true),
2800     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
2801                     VIRTIO_NET_F_GUEST_ECN, true),
2802     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
2803                     VIRTIO_NET_F_GUEST_UFO, true),
2804     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
2805                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
2806     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
2807                     VIRTIO_NET_F_HOST_TSO4, true),
2808     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
2809                     VIRTIO_NET_F_HOST_TSO6, true),
2810     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
2811                     VIRTIO_NET_F_HOST_ECN, true),
2812     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
2813                     VIRTIO_NET_F_HOST_UFO, true),
2814     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
2815                     VIRTIO_NET_F_MRG_RXBUF, true),
2816     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
2817                     VIRTIO_NET_F_STATUS, true),
2818     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
2819                     VIRTIO_NET_F_CTRL_VQ, true),
2820     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
2821                     VIRTIO_NET_F_CTRL_RX, true),
2822     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
2823                     VIRTIO_NET_F_CTRL_VLAN, true),
2824     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
2825                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
2826     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
2827                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
2828     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
2829                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
2830     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
2831     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
2832                     VIRTIO_NET_F_RSC_EXT, false),
2833     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
2834                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
2835     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
2836     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
2837                        TX_TIMER_INTERVAL),
2838     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
2839     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
2840     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
2841                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
2842     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
2843                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
2844     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
2845     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
2846                      true),
2847     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
2848     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
2849     DEFINE_PROP_END_OF_LIST(),
2850 };
2851 
2852 static void virtio_net_class_init(ObjectClass *klass, void *data)
2853 {
2854     DeviceClass *dc = DEVICE_CLASS(klass);
2855     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
2856 
2857     dc->props = virtio_net_properties;
2858     dc->vmsd = &vmstate_virtio_net;
2859     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
2860     vdc->realize = virtio_net_device_realize;
2861     vdc->unrealize = virtio_net_device_unrealize;
2862     vdc->get_config = virtio_net_get_config;
2863     vdc->set_config = virtio_net_set_config;
2864     vdc->get_features = virtio_net_get_features;
2865     vdc->set_features = virtio_net_set_features;
2866     vdc->bad_features = virtio_net_bad_features;
2867     vdc->reset = virtio_net_reset;
2868     vdc->set_status = virtio_net_set_status;
2869     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
2870     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
2871     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
2872     vdc->vmsd = &vmstate_virtio_net_device;
2873 }
2874 
2875 static const TypeInfo virtio_net_info = {
2876     .name = TYPE_VIRTIO_NET,
2877     .parent = TYPE_VIRTIO_DEVICE,
2878     .instance_size = sizeof(VirtIONet),
2879     .instance_init = virtio_net_instance_init,
2880     .class_init = virtio_net_class_init,
2881 };
2882 
2883 static void virtio_register_types(void)
2884 {
2885     type_register_static(&virtio_net_info);
2886 }
2887 
2888 type_init(virtio_register_types)
2889