xref: /openbmc/qemu/hw/net/virtio-net.c (revision 9d8c6a258c70d8ff494489140a4fcb3a965909b2)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/iov.h"
16 #include "hw/virtio/virtio.h"
17 #include "net/net.h"
18 #include "net/checksum.h"
19 #include "net/tap.h"
20 #include "qemu/error-report.h"
21 #include "qemu/timer.h"
22 #include "hw/virtio/virtio-net.h"
23 #include "net/vhost_net.h"
24 #include "net/announce.h"
25 #include "hw/virtio/virtio-bus.h"
26 #include "qapi/error.h"
27 #include "qapi/qapi-events-net.h"
28 #include "hw/virtio/virtio-access.h"
29 #include "migration/misc.h"
30 #include "standard-headers/linux/ethtool.h"
31 #include "trace.h"
32 
33 #define VIRTIO_NET_VM_VERSION    11
34 
35 #define MAC_TABLE_ENTRIES    64
36 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
37 
38 /* previously fixed value */
39 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
40 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
41 
42 /* for now, only allow larger queues; with virtio-1, guest can downsize */
43 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
44 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
45 
46 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
47 
48 #define VIRTIO_NET_TCP_FLAG         0x3F
49 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
50 
51 /* IPv4 max payload, 16 bits in the header */
52 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
53 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
54 
55 /* header length value in ip header without option */
56 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
57 
58 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
59 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
60 
61 /* Purge coalesced packets timer interval, This value affects the performance
62    a lot, and should be tuned carefully, '300000'(300us) is the recommended
63    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
64    tso/gso/gro 'off'. */
65 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
66 
67 /* temporary until standard header include it */
68 #if !defined(VIRTIO_NET_HDR_F_RSC_INFO)
69 
70 #define VIRTIO_NET_HDR_F_RSC_INFO  4 /* rsc_ext data in csum_ fields */
71 #define VIRTIO_NET_F_RSC_EXT       61
72 
73 static inline __virtio16 *virtio_net_rsc_ext_num_packets(
74     struct virtio_net_hdr *hdr)
75 {
76     return &hdr->csum_start;
77 }
78 
79 static inline __virtio16 *virtio_net_rsc_ext_num_dupacks(
80     struct virtio_net_hdr *hdr)
81 {
82     return &hdr->csum_offset;
83 }
84 
85 #endif
86 
87 static VirtIOFeature feature_sizes[] = {
88     {.flags = 1ULL << VIRTIO_NET_F_MAC,
89      .end = virtio_endof(struct virtio_net_config, mac)},
90     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
91      .end = virtio_endof(struct virtio_net_config, status)},
92     {.flags = 1ULL << VIRTIO_NET_F_MQ,
93      .end = virtio_endof(struct virtio_net_config, max_virtqueue_pairs)},
94     {.flags = 1ULL << VIRTIO_NET_F_MTU,
95      .end = virtio_endof(struct virtio_net_config, mtu)},
96     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
97      .end = virtio_endof(struct virtio_net_config, duplex)},
98     {}
99 };
100 
101 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
102 {
103     VirtIONet *n = qemu_get_nic_opaque(nc);
104 
105     return &n->vqs[nc->queue_index];
106 }
107 
108 static int vq2q(int queue_index)
109 {
110     return queue_index / 2;
111 }
112 
113 /* TODO
114  * - we could suppress RX interrupt if we were so inclined.
115  */
116 
117 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
118 {
119     VirtIONet *n = VIRTIO_NET(vdev);
120     struct virtio_net_config netcfg;
121 
122     virtio_stw_p(vdev, &netcfg.status, n->status);
123     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
124     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
125     memcpy(netcfg.mac, n->mac, ETH_ALEN);
126     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
127     netcfg.duplex = n->net_conf.duplex;
128     memcpy(config, &netcfg, n->config_size);
129 }
130 
131 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
132 {
133     VirtIONet *n = VIRTIO_NET(vdev);
134     struct virtio_net_config netcfg = {};
135 
136     memcpy(&netcfg, config, n->config_size);
137 
138     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
139         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
140         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
141         memcpy(n->mac, netcfg.mac, ETH_ALEN);
142         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
143     }
144 }
145 
146 static bool virtio_net_started(VirtIONet *n, uint8_t status)
147 {
148     VirtIODevice *vdev = VIRTIO_DEVICE(n);
149     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
150         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
151 }
152 
153 static void virtio_net_announce_timer(void *opaque)
154 {
155     VirtIONet *n = opaque;
156     VirtIODevice *vdev = VIRTIO_DEVICE(n);
157     trace_virtio_net_announce_timer(n->announce_timer.round);
158 
159     n->announce_timer.round--;
160     n->status |= VIRTIO_NET_S_ANNOUNCE;
161     virtio_notify_config(vdev);
162 }
163 
164 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
165 {
166     VirtIODevice *vdev = VIRTIO_DEVICE(n);
167     NetClientState *nc = qemu_get_queue(n->nic);
168     int queues = n->multiqueue ? n->max_queues : 1;
169 
170     if (!get_vhost_net(nc->peer)) {
171         return;
172     }
173 
174     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
175         !!n->vhost_started) {
176         return;
177     }
178     if (!n->vhost_started) {
179         int r, i;
180 
181         if (n->needs_vnet_hdr_swap) {
182             error_report("backend does not support %s vnet headers; "
183                          "falling back on userspace virtio",
184                          virtio_is_big_endian(vdev) ? "BE" : "LE");
185             return;
186         }
187 
188         /* Any packets outstanding? Purge them to avoid touching rings
189          * when vhost is running.
190          */
191         for (i = 0;  i < queues; i++) {
192             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
193 
194             /* Purge both directions: TX and RX. */
195             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
196             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
197         }
198 
199         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
200             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
201             if (r < 0) {
202                 error_report("%uBytes MTU not supported by the backend",
203                              n->net_conf.mtu);
204 
205                 return;
206             }
207         }
208 
209         n->vhost_started = 1;
210         r = vhost_net_start(vdev, n->nic->ncs, queues);
211         if (r < 0) {
212             error_report("unable to start vhost net: %d: "
213                          "falling back on userspace virtio", -r);
214             n->vhost_started = 0;
215         }
216     } else {
217         vhost_net_stop(vdev, n->nic->ncs, queues);
218         n->vhost_started = 0;
219     }
220 }
221 
222 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
223                                           NetClientState *peer,
224                                           bool enable)
225 {
226     if (virtio_is_big_endian(vdev)) {
227         return qemu_set_vnet_be(peer, enable);
228     } else {
229         return qemu_set_vnet_le(peer, enable);
230     }
231 }
232 
233 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
234                                        int queues, bool enable)
235 {
236     int i;
237 
238     for (i = 0; i < queues; i++) {
239         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
240             enable) {
241             while (--i >= 0) {
242                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
243             }
244 
245             return true;
246         }
247     }
248 
249     return false;
250 }
251 
252 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
253 {
254     VirtIODevice *vdev = VIRTIO_DEVICE(n);
255     int queues = n->multiqueue ? n->max_queues : 1;
256 
257     if (virtio_net_started(n, status)) {
258         /* Before using the device, we tell the network backend about the
259          * endianness to use when parsing vnet headers. If the backend
260          * can't do it, we fallback onto fixing the headers in the core
261          * virtio-net code.
262          */
263         n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
264                                                             queues, true);
265     } else if (virtio_net_started(n, vdev->status)) {
266         /* After using the device, we need to reset the network backend to
267          * the default (guest native endianness), otherwise the guest may
268          * lose network connectivity if it is rebooted into a different
269          * endianness.
270          */
271         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
272     }
273 }
274 
275 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
276 {
277     unsigned int dropped = virtqueue_drop_all(vq);
278     if (dropped) {
279         virtio_notify(vdev, vq);
280     }
281 }
282 
283 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
284 {
285     VirtIONet *n = VIRTIO_NET(vdev);
286     VirtIONetQueue *q;
287     int i;
288     uint8_t queue_status;
289 
290     virtio_net_vnet_endian_status(n, status);
291     virtio_net_vhost_status(n, status);
292 
293     for (i = 0; i < n->max_queues; i++) {
294         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
295         bool queue_started;
296         q = &n->vqs[i];
297 
298         if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
299             queue_status = 0;
300         } else {
301             queue_status = status;
302         }
303         queue_started =
304             virtio_net_started(n, queue_status) && !n->vhost_started;
305 
306         if (queue_started) {
307             qemu_flush_queued_packets(ncs);
308         }
309 
310         if (!q->tx_waiting) {
311             continue;
312         }
313 
314         if (queue_started) {
315             if (q->tx_timer) {
316                 timer_mod(q->tx_timer,
317                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
318             } else {
319                 qemu_bh_schedule(q->tx_bh);
320             }
321         } else {
322             if (q->tx_timer) {
323                 timer_del(q->tx_timer);
324             } else {
325                 qemu_bh_cancel(q->tx_bh);
326             }
327             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
328                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
329                 vdev->vm_running) {
330                 /* if tx is waiting we are likely have some packets in tx queue
331                  * and disabled notification */
332                 q->tx_waiting = 0;
333                 virtio_queue_set_notification(q->tx_vq, 1);
334                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
335             }
336         }
337     }
338 }
339 
340 static void virtio_net_set_link_status(NetClientState *nc)
341 {
342     VirtIONet *n = qemu_get_nic_opaque(nc);
343     VirtIODevice *vdev = VIRTIO_DEVICE(n);
344     uint16_t old_status = n->status;
345 
346     if (nc->link_down)
347         n->status &= ~VIRTIO_NET_S_LINK_UP;
348     else
349         n->status |= VIRTIO_NET_S_LINK_UP;
350 
351     if (n->status != old_status)
352         virtio_notify_config(vdev);
353 
354     virtio_net_set_status(vdev, vdev->status);
355 }
356 
357 static void rxfilter_notify(NetClientState *nc)
358 {
359     VirtIONet *n = qemu_get_nic_opaque(nc);
360 
361     if (nc->rxfilter_notify_enabled) {
362         gchar *path = object_get_canonical_path(OBJECT(n->qdev));
363         qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
364                                               n->netclient_name, path);
365         g_free(path);
366 
367         /* disable event notification to avoid events flooding */
368         nc->rxfilter_notify_enabled = 0;
369     }
370 }
371 
372 static intList *get_vlan_table(VirtIONet *n)
373 {
374     intList *list, *entry;
375     int i, j;
376 
377     list = NULL;
378     for (i = 0; i < MAX_VLAN >> 5; i++) {
379         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
380             if (n->vlans[i] & (1U << j)) {
381                 entry = g_malloc0(sizeof(*entry));
382                 entry->value = (i << 5) + j;
383                 entry->next = list;
384                 list = entry;
385             }
386         }
387     }
388 
389     return list;
390 }
391 
392 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
393 {
394     VirtIONet *n = qemu_get_nic_opaque(nc);
395     VirtIODevice *vdev = VIRTIO_DEVICE(n);
396     RxFilterInfo *info;
397     strList *str_list, *entry;
398     int i;
399 
400     info = g_malloc0(sizeof(*info));
401     info->name = g_strdup(nc->name);
402     info->promiscuous = n->promisc;
403 
404     if (n->nouni) {
405         info->unicast = RX_STATE_NONE;
406     } else if (n->alluni) {
407         info->unicast = RX_STATE_ALL;
408     } else {
409         info->unicast = RX_STATE_NORMAL;
410     }
411 
412     if (n->nomulti) {
413         info->multicast = RX_STATE_NONE;
414     } else if (n->allmulti) {
415         info->multicast = RX_STATE_ALL;
416     } else {
417         info->multicast = RX_STATE_NORMAL;
418     }
419 
420     info->broadcast_allowed = n->nobcast;
421     info->multicast_overflow = n->mac_table.multi_overflow;
422     info->unicast_overflow = n->mac_table.uni_overflow;
423 
424     info->main_mac = qemu_mac_strdup_printf(n->mac);
425 
426     str_list = NULL;
427     for (i = 0; i < n->mac_table.first_multi; i++) {
428         entry = g_malloc0(sizeof(*entry));
429         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
430         entry->next = str_list;
431         str_list = entry;
432     }
433     info->unicast_table = str_list;
434 
435     str_list = NULL;
436     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
437         entry = g_malloc0(sizeof(*entry));
438         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
439         entry->next = str_list;
440         str_list = entry;
441     }
442     info->multicast_table = str_list;
443     info->vlan_table = get_vlan_table(n);
444 
445     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
446         info->vlan = RX_STATE_ALL;
447     } else if (!info->vlan_table) {
448         info->vlan = RX_STATE_NONE;
449     } else {
450         info->vlan = RX_STATE_NORMAL;
451     }
452 
453     /* enable event notification after query */
454     nc->rxfilter_notify_enabled = 1;
455 
456     return info;
457 }
458 
459 static void virtio_net_reset(VirtIODevice *vdev)
460 {
461     VirtIONet *n = VIRTIO_NET(vdev);
462     int i;
463 
464     /* Reset back to compatibility mode */
465     n->promisc = 1;
466     n->allmulti = 0;
467     n->alluni = 0;
468     n->nomulti = 0;
469     n->nouni = 0;
470     n->nobcast = 0;
471     /* multiqueue is disabled by default */
472     n->curr_queues = 1;
473     timer_del(n->announce_timer.tm);
474     n->announce_timer.round = 0;
475     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
476 
477     /* Flush any MAC and VLAN filter table state */
478     n->mac_table.in_use = 0;
479     n->mac_table.first_multi = 0;
480     n->mac_table.multi_overflow = 0;
481     n->mac_table.uni_overflow = 0;
482     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
483     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
484     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
485     memset(n->vlans, 0, MAX_VLAN >> 3);
486 
487     /* Flush any async TX */
488     for (i = 0;  i < n->max_queues; i++) {
489         NetClientState *nc = qemu_get_subqueue(n->nic, i);
490 
491         if (nc->peer) {
492             qemu_flush_or_purge_queued_packets(nc->peer, true);
493             assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
494         }
495     }
496 }
497 
498 static void peer_test_vnet_hdr(VirtIONet *n)
499 {
500     NetClientState *nc = qemu_get_queue(n->nic);
501     if (!nc->peer) {
502         return;
503     }
504 
505     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
506 }
507 
508 static int peer_has_vnet_hdr(VirtIONet *n)
509 {
510     return n->has_vnet_hdr;
511 }
512 
513 static int peer_has_ufo(VirtIONet *n)
514 {
515     if (!peer_has_vnet_hdr(n))
516         return 0;
517 
518     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
519 
520     return n->has_ufo;
521 }
522 
523 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
524                                        int version_1)
525 {
526     int i;
527     NetClientState *nc;
528 
529     n->mergeable_rx_bufs = mergeable_rx_bufs;
530 
531     if (version_1) {
532         n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
533     } else {
534         n->guest_hdr_len = n->mergeable_rx_bufs ?
535             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
536             sizeof(struct virtio_net_hdr);
537     }
538 
539     for (i = 0; i < n->max_queues; i++) {
540         nc = qemu_get_subqueue(n->nic, i);
541 
542         if (peer_has_vnet_hdr(n) &&
543             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
544             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
545             n->host_hdr_len = n->guest_hdr_len;
546         }
547     }
548 }
549 
550 static int virtio_net_max_tx_queue_size(VirtIONet *n)
551 {
552     NetClientState *peer = n->nic_conf.peers.ncs[0];
553 
554     /*
555      * Backends other than vhost-user don't support max queue size.
556      */
557     if (!peer) {
558         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
559     }
560 
561     if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
562         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
563     }
564 
565     return VIRTQUEUE_MAX_SIZE;
566 }
567 
568 static int peer_attach(VirtIONet *n, int index)
569 {
570     NetClientState *nc = qemu_get_subqueue(n->nic, index);
571 
572     if (!nc->peer) {
573         return 0;
574     }
575 
576     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
577         vhost_set_vring_enable(nc->peer, 1);
578     }
579 
580     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
581         return 0;
582     }
583 
584     if (n->max_queues == 1) {
585         return 0;
586     }
587 
588     return tap_enable(nc->peer);
589 }
590 
591 static int peer_detach(VirtIONet *n, int index)
592 {
593     NetClientState *nc = qemu_get_subqueue(n->nic, index);
594 
595     if (!nc->peer) {
596         return 0;
597     }
598 
599     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
600         vhost_set_vring_enable(nc->peer, 0);
601     }
602 
603     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
604         return 0;
605     }
606 
607     return tap_disable(nc->peer);
608 }
609 
610 static void virtio_net_set_queues(VirtIONet *n)
611 {
612     int i;
613     int r;
614 
615     if (n->nic->peer_deleted) {
616         return;
617     }
618 
619     for (i = 0; i < n->max_queues; i++) {
620         if (i < n->curr_queues) {
621             r = peer_attach(n, i);
622             assert(!r);
623         } else {
624             r = peer_detach(n, i);
625             assert(!r);
626         }
627     }
628 }
629 
630 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
631 
632 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
633                                         Error **errp)
634 {
635     VirtIONet *n = VIRTIO_NET(vdev);
636     NetClientState *nc = qemu_get_queue(n->nic);
637 
638     /* Firstly sync all virtio-net possible supported features */
639     features |= n->host_features;
640 
641     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
642 
643     if (!peer_has_vnet_hdr(n)) {
644         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
645         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
646         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
647         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
648 
649         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
650         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
651         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
652         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
653     }
654 
655     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
656         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
657         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
658     }
659 
660     if (!get_vhost_net(nc->peer)) {
661         return features;
662     }
663 
664     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
665     vdev->backend_features = features;
666 
667     if (n->mtu_bypass_backend &&
668             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
669         features |= (1ULL << VIRTIO_NET_F_MTU);
670     }
671 
672     return features;
673 }
674 
675 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
676 {
677     uint64_t features = 0;
678 
679     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
680      * but also these: */
681     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
682     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
683     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
684     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
685     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
686 
687     return features;
688 }
689 
690 static void virtio_net_apply_guest_offloads(VirtIONet *n)
691 {
692     qemu_set_offload(qemu_get_queue(n->nic)->peer,
693             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
694             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
695             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
696             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
697             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
698 }
699 
700 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
701 {
702     static const uint64_t guest_offloads_mask =
703         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
704         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
705         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
706         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
707         (1ULL << VIRTIO_NET_F_GUEST_UFO);
708 
709     return guest_offloads_mask & features;
710 }
711 
712 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
713 {
714     VirtIODevice *vdev = VIRTIO_DEVICE(n);
715     return virtio_net_guest_offloads_by_features(vdev->guest_features);
716 }
717 
718 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
719 {
720     VirtIONet *n = VIRTIO_NET(vdev);
721     int i;
722 
723     if (n->mtu_bypass_backend &&
724             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
725         features &= ~(1ULL << VIRTIO_NET_F_MTU);
726     }
727 
728     virtio_net_set_multiqueue(n,
729                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
730 
731     virtio_net_set_mrg_rx_bufs(n,
732                                virtio_has_feature(features,
733                                                   VIRTIO_NET_F_MRG_RXBUF),
734                                virtio_has_feature(features,
735                                                   VIRTIO_F_VERSION_1));
736 
737     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
738         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
739     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
740         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
741 
742     if (n->has_vnet_hdr) {
743         n->curr_guest_offloads =
744             virtio_net_guest_offloads_by_features(features);
745         virtio_net_apply_guest_offloads(n);
746     }
747 
748     for (i = 0;  i < n->max_queues; i++) {
749         NetClientState *nc = qemu_get_subqueue(n->nic, i);
750 
751         if (!get_vhost_net(nc->peer)) {
752             continue;
753         }
754         vhost_net_ack_features(get_vhost_net(nc->peer), features);
755     }
756 
757     if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
758         memset(n->vlans, 0, MAX_VLAN >> 3);
759     } else {
760         memset(n->vlans, 0xff, MAX_VLAN >> 3);
761     }
762 }
763 
764 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
765                                      struct iovec *iov, unsigned int iov_cnt)
766 {
767     uint8_t on;
768     size_t s;
769     NetClientState *nc = qemu_get_queue(n->nic);
770 
771     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
772     if (s != sizeof(on)) {
773         return VIRTIO_NET_ERR;
774     }
775 
776     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
777         n->promisc = on;
778     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
779         n->allmulti = on;
780     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
781         n->alluni = on;
782     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
783         n->nomulti = on;
784     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
785         n->nouni = on;
786     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
787         n->nobcast = on;
788     } else {
789         return VIRTIO_NET_ERR;
790     }
791 
792     rxfilter_notify(nc);
793 
794     return VIRTIO_NET_OK;
795 }
796 
797 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
798                                      struct iovec *iov, unsigned int iov_cnt)
799 {
800     VirtIODevice *vdev = VIRTIO_DEVICE(n);
801     uint64_t offloads;
802     size_t s;
803 
804     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
805         return VIRTIO_NET_ERR;
806     }
807 
808     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
809     if (s != sizeof(offloads)) {
810         return VIRTIO_NET_ERR;
811     }
812 
813     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
814         uint64_t supported_offloads;
815 
816         offloads = virtio_ldq_p(vdev, &offloads);
817 
818         if (!n->has_vnet_hdr) {
819             return VIRTIO_NET_ERR;
820         }
821 
822         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
823             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
824         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
825             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
826         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
827 
828         supported_offloads = virtio_net_supported_guest_offloads(n);
829         if (offloads & ~supported_offloads) {
830             return VIRTIO_NET_ERR;
831         }
832 
833         n->curr_guest_offloads = offloads;
834         virtio_net_apply_guest_offloads(n);
835 
836         return VIRTIO_NET_OK;
837     } else {
838         return VIRTIO_NET_ERR;
839     }
840 }
841 
842 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
843                                  struct iovec *iov, unsigned int iov_cnt)
844 {
845     VirtIODevice *vdev = VIRTIO_DEVICE(n);
846     struct virtio_net_ctrl_mac mac_data;
847     size_t s;
848     NetClientState *nc = qemu_get_queue(n->nic);
849 
850     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
851         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
852             return VIRTIO_NET_ERR;
853         }
854         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
855         assert(s == sizeof(n->mac));
856         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
857         rxfilter_notify(nc);
858 
859         return VIRTIO_NET_OK;
860     }
861 
862     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
863         return VIRTIO_NET_ERR;
864     }
865 
866     int in_use = 0;
867     int first_multi = 0;
868     uint8_t uni_overflow = 0;
869     uint8_t multi_overflow = 0;
870     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
871 
872     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
873                    sizeof(mac_data.entries));
874     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
875     if (s != sizeof(mac_data.entries)) {
876         goto error;
877     }
878     iov_discard_front(&iov, &iov_cnt, s);
879 
880     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
881         goto error;
882     }
883 
884     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
885         s = iov_to_buf(iov, iov_cnt, 0, macs,
886                        mac_data.entries * ETH_ALEN);
887         if (s != mac_data.entries * ETH_ALEN) {
888             goto error;
889         }
890         in_use += mac_data.entries;
891     } else {
892         uni_overflow = 1;
893     }
894 
895     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
896 
897     first_multi = in_use;
898 
899     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
900                    sizeof(mac_data.entries));
901     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
902     if (s != sizeof(mac_data.entries)) {
903         goto error;
904     }
905 
906     iov_discard_front(&iov, &iov_cnt, s);
907 
908     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
909         goto error;
910     }
911 
912     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
913         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
914                        mac_data.entries * ETH_ALEN);
915         if (s != mac_data.entries * ETH_ALEN) {
916             goto error;
917         }
918         in_use += mac_data.entries;
919     } else {
920         multi_overflow = 1;
921     }
922 
923     n->mac_table.in_use = in_use;
924     n->mac_table.first_multi = first_multi;
925     n->mac_table.uni_overflow = uni_overflow;
926     n->mac_table.multi_overflow = multi_overflow;
927     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
928     g_free(macs);
929     rxfilter_notify(nc);
930 
931     return VIRTIO_NET_OK;
932 
933 error:
934     g_free(macs);
935     return VIRTIO_NET_ERR;
936 }
937 
938 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
939                                         struct iovec *iov, unsigned int iov_cnt)
940 {
941     VirtIODevice *vdev = VIRTIO_DEVICE(n);
942     uint16_t vid;
943     size_t s;
944     NetClientState *nc = qemu_get_queue(n->nic);
945 
946     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
947     vid = virtio_lduw_p(vdev, &vid);
948     if (s != sizeof(vid)) {
949         return VIRTIO_NET_ERR;
950     }
951 
952     if (vid >= MAX_VLAN)
953         return VIRTIO_NET_ERR;
954 
955     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
956         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
957     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
958         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
959     else
960         return VIRTIO_NET_ERR;
961 
962     rxfilter_notify(nc);
963 
964     return VIRTIO_NET_OK;
965 }
966 
967 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
968                                       struct iovec *iov, unsigned int iov_cnt)
969 {
970     trace_virtio_net_handle_announce(n->announce_timer.round);
971     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
972         n->status & VIRTIO_NET_S_ANNOUNCE) {
973         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
974         if (n->announce_timer.round) {
975             qemu_announce_timer_step(&n->announce_timer);
976         }
977         return VIRTIO_NET_OK;
978     } else {
979         return VIRTIO_NET_ERR;
980     }
981 }
982 
983 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
984                                 struct iovec *iov, unsigned int iov_cnt)
985 {
986     VirtIODevice *vdev = VIRTIO_DEVICE(n);
987     struct virtio_net_ctrl_mq mq;
988     size_t s;
989     uint16_t queues;
990 
991     s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
992     if (s != sizeof(mq)) {
993         return VIRTIO_NET_ERR;
994     }
995 
996     if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
997         return VIRTIO_NET_ERR;
998     }
999 
1000     queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1001 
1002     if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1003         queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1004         queues > n->max_queues ||
1005         !n->multiqueue) {
1006         return VIRTIO_NET_ERR;
1007     }
1008 
1009     n->curr_queues = queues;
1010     /* stop the backend before changing the number of queues to avoid handling a
1011      * disabled queue */
1012     virtio_net_set_status(vdev, vdev->status);
1013     virtio_net_set_queues(n);
1014 
1015     return VIRTIO_NET_OK;
1016 }
1017 
1018 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1019 {
1020     VirtIONet *n = VIRTIO_NET(vdev);
1021     struct virtio_net_ctrl_hdr ctrl;
1022     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1023     VirtQueueElement *elem;
1024     size_t s;
1025     struct iovec *iov, *iov2;
1026     unsigned int iov_cnt;
1027 
1028     for (;;) {
1029         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1030         if (!elem) {
1031             break;
1032         }
1033         if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1034             iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1035             virtio_error(vdev, "virtio-net ctrl missing headers");
1036             virtqueue_detach_element(vq, elem, 0);
1037             g_free(elem);
1038             break;
1039         }
1040 
1041         iov_cnt = elem->out_num;
1042         iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1043         s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1044         iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1045         if (s != sizeof(ctrl)) {
1046             status = VIRTIO_NET_ERR;
1047         } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1048             status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1049         } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1050             status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1051         } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1052             status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1053         } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1054             status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1055         } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1056             status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1057         } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1058             status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1059         }
1060 
1061         s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1062         assert(s == sizeof(status));
1063 
1064         virtqueue_push(vq, elem, sizeof(status));
1065         virtio_notify(vdev, vq);
1066         g_free(iov2);
1067         g_free(elem);
1068     }
1069 }
1070 
1071 /* RX */
1072 
1073 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1074 {
1075     VirtIONet *n = VIRTIO_NET(vdev);
1076     int queue_index = vq2q(virtio_get_queue_index(vq));
1077 
1078     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1079 }
1080 
1081 static int virtio_net_can_receive(NetClientState *nc)
1082 {
1083     VirtIONet *n = qemu_get_nic_opaque(nc);
1084     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1085     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1086 
1087     if (!vdev->vm_running) {
1088         return 0;
1089     }
1090 
1091     if (nc->queue_index >= n->curr_queues) {
1092         return 0;
1093     }
1094 
1095     if (!virtio_queue_ready(q->rx_vq) ||
1096         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1097         return 0;
1098     }
1099 
1100     return 1;
1101 }
1102 
1103 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1104 {
1105     VirtIONet *n = q->n;
1106     if (virtio_queue_empty(q->rx_vq) ||
1107         (n->mergeable_rx_bufs &&
1108          !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1109         virtio_queue_set_notification(q->rx_vq, 1);
1110 
1111         /* To avoid a race condition where the guest has made some buffers
1112          * available after the above check but before notification was
1113          * enabled, check for available buffers again.
1114          */
1115         if (virtio_queue_empty(q->rx_vq) ||
1116             (n->mergeable_rx_bufs &&
1117              !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1118             return 0;
1119         }
1120     }
1121 
1122     virtio_queue_set_notification(q->rx_vq, 0);
1123     return 1;
1124 }
1125 
1126 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1127 {
1128     virtio_tswap16s(vdev, &hdr->hdr_len);
1129     virtio_tswap16s(vdev, &hdr->gso_size);
1130     virtio_tswap16s(vdev, &hdr->csum_start);
1131     virtio_tswap16s(vdev, &hdr->csum_offset);
1132 }
1133 
1134 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1135  * it never finds out that the packets don't have valid checksums.  This
1136  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1137  * fix this with Xen but it hasn't appeared in an upstream release of
1138  * dhclient yet.
1139  *
1140  * To avoid breaking existing guests, we catch udp packets and add
1141  * checksums.  This is terrible but it's better than hacking the guest
1142  * kernels.
1143  *
1144  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1145  * we should provide a mechanism to disable it to avoid polluting the host
1146  * cache.
1147  */
1148 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1149                                         uint8_t *buf, size_t size)
1150 {
1151     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1152         (size > 27 && size < 1500) && /* normal sized MTU */
1153         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1154         (buf[23] == 17) && /* ip.protocol == UDP */
1155         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1156         net_checksum_calculate(buf, size);
1157         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1158     }
1159 }
1160 
1161 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1162                            const void *buf, size_t size)
1163 {
1164     if (n->has_vnet_hdr) {
1165         /* FIXME this cast is evil */
1166         void *wbuf = (void *)buf;
1167         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1168                                     size - n->host_hdr_len);
1169 
1170         if (n->needs_vnet_hdr_swap) {
1171             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1172         }
1173         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1174     } else {
1175         struct virtio_net_hdr hdr = {
1176             .flags = 0,
1177             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1178         };
1179         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1180     }
1181 }
1182 
1183 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1184 {
1185     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1186     static const uint8_t vlan[] = {0x81, 0x00};
1187     uint8_t *ptr = (uint8_t *)buf;
1188     int i;
1189 
1190     if (n->promisc)
1191         return 1;
1192 
1193     ptr += n->host_hdr_len;
1194 
1195     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1196         int vid = lduw_be_p(ptr + 14) & 0xfff;
1197         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1198             return 0;
1199     }
1200 
1201     if (ptr[0] & 1) { // multicast
1202         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1203             return !n->nobcast;
1204         } else if (n->nomulti) {
1205             return 0;
1206         } else if (n->allmulti || n->mac_table.multi_overflow) {
1207             return 1;
1208         }
1209 
1210         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1211             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1212                 return 1;
1213             }
1214         }
1215     } else { // unicast
1216         if (n->nouni) {
1217             return 0;
1218         } else if (n->alluni || n->mac_table.uni_overflow) {
1219             return 1;
1220         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1221             return 1;
1222         }
1223 
1224         for (i = 0; i < n->mac_table.first_multi; i++) {
1225             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1226                 return 1;
1227             }
1228         }
1229     }
1230 
1231     return 0;
1232 }
1233 
1234 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1235                                       size_t size)
1236 {
1237     VirtIONet *n = qemu_get_nic_opaque(nc);
1238     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1239     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1240     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1241     struct virtio_net_hdr_mrg_rxbuf mhdr;
1242     unsigned mhdr_cnt = 0;
1243     size_t offset, i, guest_offset;
1244 
1245     if (!virtio_net_can_receive(nc)) {
1246         return -1;
1247     }
1248 
1249     /* hdr_len refers to the header we supply to the guest */
1250     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1251         return 0;
1252     }
1253 
1254     if (!receive_filter(n, buf, size))
1255         return size;
1256 
1257     offset = i = 0;
1258 
1259     while (offset < size) {
1260         VirtQueueElement *elem;
1261         int len, total;
1262         const struct iovec *sg;
1263 
1264         total = 0;
1265 
1266         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1267         if (!elem) {
1268             if (i) {
1269                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1270                              "i %zd mergeable %d offset %zd, size %zd, "
1271                              "guest hdr len %zd, host hdr len %zd "
1272                              "guest features 0x%" PRIx64,
1273                              i, n->mergeable_rx_bufs, offset, size,
1274                              n->guest_hdr_len, n->host_hdr_len,
1275                              vdev->guest_features);
1276             }
1277             return -1;
1278         }
1279 
1280         if (elem->in_num < 1) {
1281             virtio_error(vdev,
1282                          "virtio-net receive queue contains no in buffers");
1283             virtqueue_detach_element(q->rx_vq, elem, 0);
1284             g_free(elem);
1285             return -1;
1286         }
1287 
1288         sg = elem->in_sg;
1289         if (i == 0) {
1290             assert(offset == 0);
1291             if (n->mergeable_rx_bufs) {
1292                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1293                                     sg, elem->in_num,
1294                                     offsetof(typeof(mhdr), num_buffers),
1295                                     sizeof(mhdr.num_buffers));
1296             }
1297 
1298             receive_header(n, sg, elem->in_num, buf, size);
1299             offset = n->host_hdr_len;
1300             total += n->guest_hdr_len;
1301             guest_offset = n->guest_hdr_len;
1302         } else {
1303             guest_offset = 0;
1304         }
1305 
1306         /* copy in packet.  ugh */
1307         len = iov_from_buf(sg, elem->in_num, guest_offset,
1308                            buf + offset, size - offset);
1309         total += len;
1310         offset += len;
1311         /* If buffers can't be merged, at this point we
1312          * must have consumed the complete packet.
1313          * Otherwise, drop it. */
1314         if (!n->mergeable_rx_bufs && offset < size) {
1315             virtqueue_unpop(q->rx_vq, elem, total);
1316             g_free(elem);
1317             return size;
1318         }
1319 
1320         /* signal other side */
1321         virtqueue_fill(q->rx_vq, elem, total, i++);
1322         g_free(elem);
1323     }
1324 
1325     if (mhdr_cnt) {
1326         virtio_stw_p(vdev, &mhdr.num_buffers, i);
1327         iov_from_buf(mhdr_sg, mhdr_cnt,
1328                      0,
1329                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
1330     }
1331 
1332     virtqueue_flush(q->rx_vq, i);
1333     virtio_notify(vdev, q->rx_vq);
1334 
1335     return size;
1336 }
1337 
1338 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1339                                   size_t size)
1340 {
1341     ssize_t r;
1342 
1343     rcu_read_lock();
1344     r = virtio_net_receive_rcu(nc, buf, size);
1345     rcu_read_unlock();
1346     return r;
1347 }
1348 
1349 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1350                                          const uint8_t *buf,
1351                                          VirtioNetRscUnit *unit)
1352 {
1353     uint16_t ip_hdrlen;
1354     struct ip_header *ip;
1355 
1356     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1357                               + sizeof(struct eth_header));
1358     unit->ip = (void *)ip;
1359     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1360     unit->ip_plen = &ip->ip_len;
1361     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1362     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1363     unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1364 }
1365 
1366 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1367                                          const uint8_t *buf,
1368                                          VirtioNetRscUnit *unit)
1369 {
1370     struct ip6_header *ip6;
1371 
1372     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1373                                  + sizeof(struct eth_header));
1374     unit->ip = ip6;
1375     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1376     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\
1377                                         + sizeof(struct ip6_header));
1378     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1379 
1380     /* There is a difference between payload lenght in ipv4 and v6,
1381        ip header is excluded in ipv6 */
1382     unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1383 }
1384 
1385 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1386                                        VirtioNetRscSeg *seg)
1387 {
1388     int ret;
1389     struct virtio_net_hdr *h;
1390 
1391     h = (struct virtio_net_hdr *)seg->buf;
1392     h->flags = 0;
1393     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1394 
1395     if (seg->is_coalesced) {
1396         *virtio_net_rsc_ext_num_packets(h) = seg->packets;
1397         *virtio_net_rsc_ext_num_dupacks(h) = seg->dup_ack;
1398         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1399         if (chain->proto == ETH_P_IP) {
1400             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1401         } else {
1402             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1403         }
1404     }
1405 
1406     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1407     QTAILQ_REMOVE(&chain->buffers, seg, next);
1408     g_free(seg->buf);
1409     g_free(seg);
1410 
1411     return ret;
1412 }
1413 
1414 static void virtio_net_rsc_purge(void *opq)
1415 {
1416     VirtioNetRscSeg *seg, *rn;
1417     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1418 
1419     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1420         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1421             chain->stat.purge_failed++;
1422             continue;
1423         }
1424     }
1425 
1426     chain->stat.timer++;
1427     if (!QTAILQ_EMPTY(&chain->buffers)) {
1428         timer_mod(chain->drain_timer,
1429               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1430     }
1431 }
1432 
1433 static void virtio_net_rsc_cleanup(VirtIONet *n)
1434 {
1435     VirtioNetRscChain *chain, *rn_chain;
1436     VirtioNetRscSeg *seg, *rn_seg;
1437 
1438     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1439         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1440             QTAILQ_REMOVE(&chain->buffers, seg, next);
1441             g_free(seg->buf);
1442             g_free(seg);
1443         }
1444 
1445         timer_del(chain->drain_timer);
1446         timer_free(chain->drain_timer);
1447         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1448         g_free(chain);
1449     }
1450 }
1451 
1452 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1453                                      NetClientState *nc,
1454                                      const uint8_t *buf, size_t size)
1455 {
1456     uint16_t hdr_len;
1457     VirtioNetRscSeg *seg;
1458 
1459     hdr_len = chain->n->guest_hdr_len;
1460     seg = g_malloc(sizeof(VirtioNetRscSeg));
1461     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1462         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1463     memcpy(seg->buf, buf, size);
1464     seg->size = size;
1465     seg->packets = 1;
1466     seg->dup_ack = 0;
1467     seg->is_coalesced = 0;
1468     seg->nc = nc;
1469 
1470     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1471     chain->stat.cache++;
1472 
1473     switch (chain->proto) {
1474     case ETH_P_IP:
1475         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
1476         break;
1477     case ETH_P_IPV6:
1478         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
1479         break;
1480     default:
1481         g_assert_not_reached();
1482     }
1483 }
1484 
1485 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
1486                                          VirtioNetRscSeg *seg,
1487                                          const uint8_t *buf,
1488                                          struct tcp_header *n_tcp,
1489                                          struct tcp_header *o_tcp)
1490 {
1491     uint32_t nack, oack;
1492     uint16_t nwin, owin;
1493 
1494     nack = htonl(n_tcp->th_ack);
1495     nwin = htons(n_tcp->th_win);
1496     oack = htonl(o_tcp->th_ack);
1497     owin = htons(o_tcp->th_win);
1498 
1499     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
1500         chain->stat.ack_out_of_win++;
1501         return RSC_FINAL;
1502     } else if (nack == oack) {
1503         /* duplicated ack or window probe */
1504         if (nwin == owin) {
1505             /* duplicated ack, add dup ack count due to whql test up to 1 */
1506             chain->stat.dup_ack++;
1507             return RSC_FINAL;
1508         } else {
1509             /* Coalesce window update */
1510             o_tcp->th_win = n_tcp->th_win;
1511             chain->stat.win_update++;
1512             return RSC_COALESCE;
1513         }
1514     } else {
1515         /* pure ack, go to 'C', finalize*/
1516         chain->stat.pure_ack++;
1517         return RSC_FINAL;
1518     }
1519 }
1520 
1521 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
1522                                             VirtioNetRscSeg *seg,
1523                                             const uint8_t *buf,
1524                                             VirtioNetRscUnit *n_unit)
1525 {
1526     void *data;
1527     uint16_t o_ip_len;
1528     uint32_t nseq, oseq;
1529     VirtioNetRscUnit *o_unit;
1530 
1531     o_unit = &seg->unit;
1532     o_ip_len = htons(*o_unit->ip_plen);
1533     nseq = htonl(n_unit->tcp->th_seq);
1534     oseq = htonl(o_unit->tcp->th_seq);
1535 
1536     /* out of order or retransmitted. */
1537     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
1538         chain->stat.data_out_of_win++;
1539         return RSC_FINAL;
1540     }
1541 
1542     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
1543     if (nseq == oseq) {
1544         if ((o_unit->payload == 0) && n_unit->payload) {
1545             /* From no payload to payload, normal case, not a dup ack or etc */
1546             chain->stat.data_after_pure_ack++;
1547             goto coalesce;
1548         } else {
1549             return virtio_net_rsc_handle_ack(chain, seg, buf,
1550                                              n_unit->tcp, o_unit->tcp);
1551         }
1552     } else if ((nseq - oseq) != o_unit->payload) {
1553         /* Not a consistent packet, out of order */
1554         chain->stat.data_out_of_order++;
1555         return RSC_FINAL;
1556     } else {
1557 coalesce:
1558         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
1559             chain->stat.over_size++;
1560             return RSC_FINAL;
1561         }
1562 
1563         /* Here comes the right data, the payload length in v4/v6 is different,
1564            so use the field value to update and record the new data len */
1565         o_unit->payload += n_unit->payload; /* update new data len */
1566 
1567         /* update field in ip header */
1568         *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
1569 
1570         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
1571            for windows guest, while this may change the behavior for linux
1572            guest (only if it uses RSC feature). */
1573         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
1574 
1575         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
1576         o_unit->tcp->th_win = n_unit->tcp->th_win;
1577 
1578         memmove(seg->buf + seg->size, data, n_unit->payload);
1579         seg->size += n_unit->payload;
1580         seg->packets++;
1581         chain->stat.coalesced++;
1582         return RSC_COALESCE;
1583     }
1584 }
1585 
1586 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
1587                                         VirtioNetRscSeg *seg,
1588                                         const uint8_t *buf, size_t size,
1589                                         VirtioNetRscUnit *unit)
1590 {
1591     struct ip_header *ip1, *ip2;
1592 
1593     ip1 = (struct ip_header *)(unit->ip);
1594     ip2 = (struct ip_header *)(seg->unit.ip);
1595     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
1596         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
1597         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
1598         chain->stat.no_match++;
1599         return RSC_NO_MATCH;
1600     }
1601 
1602     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
1603 }
1604 
1605 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
1606                                         VirtioNetRscSeg *seg,
1607                                         const uint8_t *buf, size_t size,
1608                                         VirtioNetRscUnit *unit)
1609 {
1610     struct ip6_header *ip1, *ip2;
1611 
1612     ip1 = (struct ip6_header *)(unit->ip);
1613     ip2 = (struct ip6_header *)(seg->unit.ip);
1614     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
1615         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
1616         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
1617         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
1618             chain->stat.no_match++;
1619             return RSC_NO_MATCH;
1620     }
1621 
1622     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
1623 }
1624 
1625 /* Packets with 'SYN' should bypass, other flag should be sent after drain
1626  * to prevent out of order */
1627 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
1628                                          struct tcp_header *tcp)
1629 {
1630     uint16_t tcp_hdr;
1631     uint16_t tcp_flag;
1632 
1633     tcp_flag = htons(tcp->th_offset_flags);
1634     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
1635     tcp_flag &= VIRTIO_NET_TCP_FLAG;
1636     tcp_flag = htons(tcp->th_offset_flags) & 0x3F;
1637     if (tcp_flag & TH_SYN) {
1638         chain->stat.tcp_syn++;
1639         return RSC_BYPASS;
1640     }
1641 
1642     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
1643         chain->stat.tcp_ctrl_drain++;
1644         return RSC_FINAL;
1645     }
1646 
1647     if (tcp_hdr > sizeof(struct tcp_header)) {
1648         chain->stat.tcp_all_opt++;
1649         return RSC_FINAL;
1650     }
1651 
1652     return RSC_CANDIDATE;
1653 }
1654 
1655 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
1656                                          NetClientState *nc,
1657                                          const uint8_t *buf, size_t size,
1658                                          VirtioNetRscUnit *unit)
1659 {
1660     int ret;
1661     VirtioNetRscSeg *seg, *nseg;
1662 
1663     if (QTAILQ_EMPTY(&chain->buffers)) {
1664         chain->stat.empty_cache++;
1665         virtio_net_rsc_cache_buf(chain, nc, buf, size);
1666         timer_mod(chain->drain_timer,
1667               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1668         return size;
1669     }
1670 
1671     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
1672         if (chain->proto == ETH_P_IP) {
1673             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
1674         } else {
1675             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
1676         }
1677 
1678         if (ret == RSC_FINAL) {
1679             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1680                 /* Send failed */
1681                 chain->stat.final_failed++;
1682                 return 0;
1683             }
1684 
1685             /* Send current packet */
1686             return virtio_net_do_receive(nc, buf, size);
1687         } else if (ret == RSC_NO_MATCH) {
1688             continue;
1689         } else {
1690             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
1691             seg->is_coalesced = 1;
1692             return size;
1693         }
1694     }
1695 
1696     chain->stat.no_match_cache++;
1697     virtio_net_rsc_cache_buf(chain, nc, buf, size);
1698     return size;
1699 }
1700 
1701 /* Drain a connection data, this is to avoid out of order segments */
1702 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
1703                                         NetClientState *nc,
1704                                         const uint8_t *buf, size_t size,
1705                                         uint16_t ip_start, uint16_t ip_size,
1706                                         uint16_t tcp_port)
1707 {
1708     VirtioNetRscSeg *seg, *nseg;
1709     uint32_t ppair1, ppair2;
1710 
1711     ppair1 = *(uint32_t *)(buf + tcp_port);
1712     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
1713         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
1714         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
1715             || (ppair1 != ppair2)) {
1716             continue;
1717         }
1718         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1719             chain->stat.drain_failed++;
1720         }
1721 
1722         break;
1723     }
1724 
1725     return virtio_net_do_receive(nc, buf, size);
1726 }
1727 
1728 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
1729                                             struct ip_header *ip,
1730                                             const uint8_t *buf, size_t size)
1731 {
1732     uint16_t ip_len;
1733 
1734     /* Not an ipv4 packet */
1735     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
1736         chain->stat.ip_option++;
1737         return RSC_BYPASS;
1738     }
1739 
1740     /* Don't handle packets with ip option */
1741     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
1742         chain->stat.ip_option++;
1743         return RSC_BYPASS;
1744     }
1745 
1746     if (ip->ip_p != IPPROTO_TCP) {
1747         chain->stat.bypass_not_tcp++;
1748         return RSC_BYPASS;
1749     }
1750 
1751     /* Don't handle packets with ip fragment */
1752     if (!(htons(ip->ip_off) & IP_DF)) {
1753         chain->stat.ip_frag++;
1754         return RSC_BYPASS;
1755     }
1756 
1757     /* Don't handle packets with ecn flag */
1758     if (IPTOS_ECN(ip->ip_tos)) {
1759         chain->stat.ip_ecn++;
1760         return RSC_BYPASS;
1761     }
1762 
1763     ip_len = htons(ip->ip_len);
1764     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
1765         || ip_len > (size - chain->n->guest_hdr_len -
1766                      sizeof(struct eth_header))) {
1767         chain->stat.ip_hacked++;
1768         return RSC_BYPASS;
1769     }
1770 
1771     return RSC_CANDIDATE;
1772 }
1773 
1774 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
1775                                       NetClientState *nc,
1776                                       const uint8_t *buf, size_t size)
1777 {
1778     int32_t ret;
1779     uint16_t hdr_len;
1780     VirtioNetRscUnit unit;
1781 
1782     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
1783 
1784     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
1785         + sizeof(struct tcp_header))) {
1786         chain->stat.bypass_not_tcp++;
1787         return virtio_net_do_receive(nc, buf, size);
1788     }
1789 
1790     virtio_net_rsc_extract_unit4(chain, buf, &unit);
1791     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
1792         != RSC_CANDIDATE) {
1793         return virtio_net_do_receive(nc, buf, size);
1794     }
1795 
1796     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
1797     if (ret == RSC_BYPASS) {
1798         return virtio_net_do_receive(nc, buf, size);
1799     } else if (ret == RSC_FINAL) {
1800         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
1801                 ((hdr_len + sizeof(struct eth_header)) + 12),
1802                 VIRTIO_NET_IP4_ADDR_SIZE,
1803                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
1804     }
1805 
1806     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
1807 }
1808 
1809 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
1810                                             struct ip6_header *ip6,
1811                                             const uint8_t *buf, size_t size)
1812 {
1813     uint16_t ip_len;
1814 
1815     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
1816         != IP_HEADER_VERSION_6) {
1817         return RSC_BYPASS;
1818     }
1819 
1820     /* Both option and protocol is checked in this */
1821     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
1822         chain->stat.bypass_not_tcp++;
1823         return RSC_BYPASS;
1824     }
1825 
1826     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1827     if (ip_len < sizeof(struct tcp_header) ||
1828         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
1829                   - sizeof(struct ip6_header))) {
1830         chain->stat.ip_hacked++;
1831         return RSC_BYPASS;
1832     }
1833 
1834     /* Don't handle packets with ecn flag */
1835     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
1836         chain->stat.ip_ecn++;
1837         return RSC_BYPASS;
1838     }
1839 
1840     return RSC_CANDIDATE;
1841 }
1842 
1843 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
1844                                       const uint8_t *buf, size_t size)
1845 {
1846     int32_t ret;
1847     uint16_t hdr_len;
1848     VirtioNetRscChain *chain;
1849     VirtioNetRscUnit unit;
1850 
1851     chain = (VirtioNetRscChain *)opq;
1852     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
1853 
1854     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
1855         + sizeof(tcp_header))) {
1856         return virtio_net_do_receive(nc, buf, size);
1857     }
1858 
1859     virtio_net_rsc_extract_unit6(chain, buf, &unit);
1860     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
1861                                                  unit.ip, buf, size)) {
1862         return virtio_net_do_receive(nc, buf, size);
1863     }
1864 
1865     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
1866     if (ret == RSC_BYPASS) {
1867         return virtio_net_do_receive(nc, buf, size);
1868     } else if (ret == RSC_FINAL) {
1869         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
1870                 ((hdr_len + sizeof(struct eth_header)) + 8),
1871                 VIRTIO_NET_IP6_ADDR_SIZE,
1872                 hdr_len + sizeof(struct eth_header)
1873                 + sizeof(struct ip6_header));
1874     }
1875 
1876     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
1877 }
1878 
1879 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
1880                                                       NetClientState *nc,
1881                                                       uint16_t proto)
1882 {
1883     VirtioNetRscChain *chain;
1884 
1885     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
1886         return NULL;
1887     }
1888 
1889     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
1890         if (chain->proto == proto) {
1891             return chain;
1892         }
1893     }
1894 
1895     chain = g_malloc(sizeof(*chain));
1896     chain->n = n;
1897     chain->proto = proto;
1898     if (proto == (uint16_t)ETH_P_IP) {
1899         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
1900         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1901     } else {
1902         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
1903         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1904     }
1905     chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
1906                                       virtio_net_rsc_purge, chain);
1907     memset(&chain->stat, 0, sizeof(chain->stat));
1908 
1909     QTAILQ_INIT(&chain->buffers);
1910     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
1911 
1912     return chain;
1913 }
1914 
1915 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
1916                                       const uint8_t *buf,
1917                                       size_t size)
1918 {
1919     uint16_t proto;
1920     VirtioNetRscChain *chain;
1921     struct eth_header *eth;
1922     VirtIONet *n;
1923 
1924     n = qemu_get_nic_opaque(nc);
1925     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
1926         return virtio_net_do_receive(nc, buf, size);
1927     }
1928 
1929     eth = (struct eth_header *)(buf + n->guest_hdr_len);
1930     proto = htons(eth->h_proto);
1931 
1932     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
1933     if (chain) {
1934         chain->stat.received++;
1935         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
1936             return virtio_net_rsc_receive4(chain, nc, buf, size);
1937         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
1938             return virtio_net_rsc_receive6(chain, nc, buf, size);
1939         }
1940     }
1941     return virtio_net_do_receive(nc, buf, size);
1942 }
1943 
1944 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
1945                                   size_t size)
1946 {
1947     VirtIONet *n = qemu_get_nic_opaque(nc);
1948     if ((n->rsc4_enabled || n->rsc6_enabled)) {
1949         return virtio_net_rsc_receive(nc, buf, size);
1950     } else {
1951         return virtio_net_do_receive(nc, buf, size);
1952     }
1953 }
1954 
1955 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
1956 
1957 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
1958 {
1959     VirtIONet *n = qemu_get_nic_opaque(nc);
1960     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1961     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1962 
1963     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
1964     virtio_notify(vdev, q->tx_vq);
1965 
1966     g_free(q->async_tx.elem);
1967     q->async_tx.elem = NULL;
1968 
1969     virtio_queue_set_notification(q->tx_vq, 1);
1970     virtio_net_flush_tx(q);
1971 }
1972 
1973 /* TX */
1974 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
1975 {
1976     VirtIONet *n = q->n;
1977     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1978     VirtQueueElement *elem;
1979     int32_t num_packets = 0;
1980     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
1981     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1982         return num_packets;
1983     }
1984 
1985     if (q->async_tx.elem) {
1986         virtio_queue_set_notification(q->tx_vq, 0);
1987         return num_packets;
1988     }
1989 
1990     for (;;) {
1991         ssize_t ret;
1992         unsigned int out_num;
1993         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
1994         struct virtio_net_hdr_mrg_rxbuf mhdr;
1995 
1996         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
1997         if (!elem) {
1998             break;
1999         }
2000 
2001         out_num = elem->out_num;
2002         out_sg = elem->out_sg;
2003         if (out_num < 1) {
2004             virtio_error(vdev, "virtio-net header not in first element");
2005             virtqueue_detach_element(q->tx_vq, elem, 0);
2006             g_free(elem);
2007             return -EINVAL;
2008         }
2009 
2010         if (n->has_vnet_hdr) {
2011             if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2012                 n->guest_hdr_len) {
2013                 virtio_error(vdev, "virtio-net header incorrect");
2014                 virtqueue_detach_element(q->tx_vq, elem, 0);
2015                 g_free(elem);
2016                 return -EINVAL;
2017             }
2018             if (n->needs_vnet_hdr_swap) {
2019                 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2020                 sg2[0].iov_base = &mhdr;
2021                 sg2[0].iov_len = n->guest_hdr_len;
2022                 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2023                                    out_sg, out_num,
2024                                    n->guest_hdr_len, -1);
2025                 if (out_num == VIRTQUEUE_MAX_SIZE) {
2026                     goto drop;
2027                 }
2028                 out_num += 1;
2029                 out_sg = sg2;
2030             }
2031         }
2032         /*
2033          * If host wants to see the guest header as is, we can
2034          * pass it on unchanged. Otherwise, copy just the parts
2035          * that host is interested in.
2036          */
2037         assert(n->host_hdr_len <= n->guest_hdr_len);
2038         if (n->host_hdr_len != n->guest_hdr_len) {
2039             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2040                                        out_sg, out_num,
2041                                        0, n->host_hdr_len);
2042             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2043                              out_sg, out_num,
2044                              n->guest_hdr_len, -1);
2045             out_num = sg_num;
2046             out_sg = sg;
2047         }
2048 
2049         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2050                                       out_sg, out_num, virtio_net_tx_complete);
2051         if (ret == 0) {
2052             virtio_queue_set_notification(q->tx_vq, 0);
2053             q->async_tx.elem = elem;
2054             return -EBUSY;
2055         }
2056 
2057 drop:
2058         virtqueue_push(q->tx_vq, elem, 0);
2059         virtio_notify(vdev, q->tx_vq);
2060         g_free(elem);
2061 
2062         if (++num_packets >= n->tx_burst) {
2063             break;
2064         }
2065     }
2066     return num_packets;
2067 }
2068 
2069 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2070 {
2071     VirtIONet *n = VIRTIO_NET(vdev);
2072     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2073 
2074     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2075         virtio_net_drop_tx_queue_data(vdev, vq);
2076         return;
2077     }
2078 
2079     /* This happens when device was stopped but VCPU wasn't. */
2080     if (!vdev->vm_running) {
2081         q->tx_waiting = 1;
2082         return;
2083     }
2084 
2085     if (q->tx_waiting) {
2086         virtio_queue_set_notification(vq, 1);
2087         timer_del(q->tx_timer);
2088         q->tx_waiting = 0;
2089         if (virtio_net_flush_tx(q) == -EINVAL) {
2090             return;
2091         }
2092     } else {
2093         timer_mod(q->tx_timer,
2094                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2095         q->tx_waiting = 1;
2096         virtio_queue_set_notification(vq, 0);
2097     }
2098 }
2099 
2100 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2101 {
2102     VirtIONet *n = VIRTIO_NET(vdev);
2103     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2104 
2105     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2106         virtio_net_drop_tx_queue_data(vdev, vq);
2107         return;
2108     }
2109 
2110     if (unlikely(q->tx_waiting)) {
2111         return;
2112     }
2113     q->tx_waiting = 1;
2114     /* This happens when device was stopped but VCPU wasn't. */
2115     if (!vdev->vm_running) {
2116         return;
2117     }
2118     virtio_queue_set_notification(vq, 0);
2119     qemu_bh_schedule(q->tx_bh);
2120 }
2121 
2122 static void virtio_net_tx_timer(void *opaque)
2123 {
2124     VirtIONetQueue *q = opaque;
2125     VirtIONet *n = q->n;
2126     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2127     /* This happens when device was stopped but BH wasn't. */
2128     if (!vdev->vm_running) {
2129         /* Make sure tx waiting is set, so we'll run when restarted. */
2130         assert(q->tx_waiting);
2131         return;
2132     }
2133 
2134     q->tx_waiting = 0;
2135 
2136     /* Just in case the driver is not ready on more */
2137     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2138         return;
2139     }
2140 
2141     virtio_queue_set_notification(q->tx_vq, 1);
2142     virtio_net_flush_tx(q);
2143 }
2144 
2145 static void virtio_net_tx_bh(void *opaque)
2146 {
2147     VirtIONetQueue *q = opaque;
2148     VirtIONet *n = q->n;
2149     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2150     int32_t ret;
2151 
2152     /* This happens when device was stopped but BH wasn't. */
2153     if (!vdev->vm_running) {
2154         /* Make sure tx waiting is set, so we'll run when restarted. */
2155         assert(q->tx_waiting);
2156         return;
2157     }
2158 
2159     q->tx_waiting = 0;
2160 
2161     /* Just in case the driver is not ready on more */
2162     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2163         return;
2164     }
2165 
2166     ret = virtio_net_flush_tx(q);
2167     if (ret == -EBUSY || ret == -EINVAL) {
2168         return; /* Notification re-enable handled by tx_complete or device
2169                  * broken */
2170     }
2171 
2172     /* If we flush a full burst of packets, assume there are
2173      * more coming and immediately reschedule */
2174     if (ret >= n->tx_burst) {
2175         qemu_bh_schedule(q->tx_bh);
2176         q->tx_waiting = 1;
2177         return;
2178     }
2179 
2180     /* If less than a full burst, re-enable notification and flush
2181      * anything that may have come in while we weren't looking.  If
2182      * we find something, assume the guest is still active and reschedule */
2183     virtio_queue_set_notification(q->tx_vq, 1);
2184     ret = virtio_net_flush_tx(q);
2185     if (ret == -EINVAL) {
2186         return;
2187     } else if (ret > 0) {
2188         virtio_queue_set_notification(q->tx_vq, 0);
2189         qemu_bh_schedule(q->tx_bh);
2190         q->tx_waiting = 1;
2191     }
2192 }
2193 
2194 static void virtio_net_add_queue(VirtIONet *n, int index)
2195 {
2196     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2197 
2198     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2199                                            virtio_net_handle_rx);
2200 
2201     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2202         n->vqs[index].tx_vq =
2203             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2204                              virtio_net_handle_tx_timer);
2205         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2206                                               virtio_net_tx_timer,
2207                                               &n->vqs[index]);
2208     } else {
2209         n->vqs[index].tx_vq =
2210             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2211                              virtio_net_handle_tx_bh);
2212         n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2213     }
2214 
2215     n->vqs[index].tx_waiting = 0;
2216     n->vqs[index].n = n;
2217 }
2218 
2219 static void virtio_net_del_queue(VirtIONet *n, int index)
2220 {
2221     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2222     VirtIONetQueue *q = &n->vqs[index];
2223     NetClientState *nc = qemu_get_subqueue(n->nic, index);
2224 
2225     qemu_purge_queued_packets(nc);
2226 
2227     virtio_del_queue(vdev, index * 2);
2228     if (q->tx_timer) {
2229         timer_del(q->tx_timer);
2230         timer_free(q->tx_timer);
2231         q->tx_timer = NULL;
2232     } else {
2233         qemu_bh_delete(q->tx_bh);
2234         q->tx_bh = NULL;
2235     }
2236     q->tx_waiting = 0;
2237     virtio_del_queue(vdev, index * 2 + 1);
2238 }
2239 
2240 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2241 {
2242     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2243     int old_num_queues = virtio_get_num_queues(vdev);
2244     int new_num_queues = new_max_queues * 2 + 1;
2245     int i;
2246 
2247     assert(old_num_queues >= 3);
2248     assert(old_num_queues % 2 == 1);
2249 
2250     if (old_num_queues == new_num_queues) {
2251         return;
2252     }
2253 
2254     /*
2255      * We always need to remove and add ctrl vq if
2256      * old_num_queues != new_num_queues. Remove ctrl_vq first,
2257      * and then we only enter one of the following too loops.
2258      */
2259     virtio_del_queue(vdev, old_num_queues - 1);
2260 
2261     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2262         /* new_num_queues < old_num_queues */
2263         virtio_net_del_queue(n, i / 2);
2264     }
2265 
2266     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2267         /* new_num_queues > old_num_queues */
2268         virtio_net_add_queue(n, i / 2);
2269     }
2270 
2271     /* add ctrl_vq last */
2272     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2273 }
2274 
2275 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2276 {
2277     int max = multiqueue ? n->max_queues : 1;
2278 
2279     n->multiqueue = multiqueue;
2280     virtio_net_change_num_queues(n, max);
2281 
2282     virtio_net_set_queues(n);
2283 }
2284 
2285 static int virtio_net_post_load_device(void *opaque, int version_id)
2286 {
2287     VirtIONet *n = opaque;
2288     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2289     int i, link_down;
2290 
2291     trace_virtio_net_post_load_device();
2292     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2293                                virtio_vdev_has_feature(vdev,
2294                                                        VIRTIO_F_VERSION_1));
2295 
2296     /* MAC_TABLE_ENTRIES may be different from the saved image */
2297     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2298         n->mac_table.in_use = 0;
2299     }
2300 
2301     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2302         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2303     }
2304 
2305     if (peer_has_vnet_hdr(n)) {
2306         virtio_net_apply_guest_offloads(n);
2307     }
2308 
2309     virtio_net_set_queues(n);
2310 
2311     /* Find the first multicast entry in the saved MAC filter */
2312     for (i = 0; i < n->mac_table.in_use; i++) {
2313         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2314             break;
2315         }
2316     }
2317     n->mac_table.first_multi = i;
2318 
2319     /* nc.link_down can't be migrated, so infer link_down according
2320      * to link status bit in n->status */
2321     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2322     for (i = 0; i < n->max_queues; i++) {
2323         qemu_get_subqueue(n->nic, i)->link_down = link_down;
2324     }
2325 
2326     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2327         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2328         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2329                                   QEMU_CLOCK_VIRTUAL,
2330                                   virtio_net_announce_timer, n);
2331         if (n->announce_timer.round) {
2332             timer_mod(n->announce_timer.tm,
2333                       qemu_clock_get_ms(n->announce_timer.type));
2334         } else {
2335             qemu_announce_timer_del(&n->announce_timer);
2336         }
2337     }
2338 
2339     return 0;
2340 }
2341 
2342 /* tx_waiting field of a VirtIONetQueue */
2343 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2344     .name = "virtio-net-queue-tx_waiting",
2345     .fields = (VMStateField[]) {
2346         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2347         VMSTATE_END_OF_LIST()
2348    },
2349 };
2350 
2351 static bool max_queues_gt_1(void *opaque, int version_id)
2352 {
2353     return VIRTIO_NET(opaque)->max_queues > 1;
2354 }
2355 
2356 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2357 {
2358     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2359                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2360 }
2361 
2362 static bool mac_table_fits(void *opaque, int version_id)
2363 {
2364     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2365 }
2366 
2367 static bool mac_table_doesnt_fit(void *opaque, int version_id)
2368 {
2369     return !mac_table_fits(opaque, version_id);
2370 }
2371 
2372 /* This temporary type is shared by all the WITH_TMP methods
2373  * although only some fields are used by each.
2374  */
2375 struct VirtIONetMigTmp {
2376     VirtIONet      *parent;
2377     VirtIONetQueue *vqs_1;
2378     uint16_t        curr_queues_1;
2379     uint8_t         has_ufo;
2380     uint32_t        has_vnet_hdr;
2381 };
2382 
2383 /* The 2nd and subsequent tx_waiting flags are loaded later than
2384  * the 1st entry in the queues and only if there's more than one
2385  * entry.  We use the tmp mechanism to calculate a temporary
2386  * pointer and count and also validate the count.
2387  */
2388 
2389 static int virtio_net_tx_waiting_pre_save(void *opaque)
2390 {
2391     struct VirtIONetMigTmp *tmp = opaque;
2392 
2393     tmp->vqs_1 = tmp->parent->vqs + 1;
2394     tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2395     if (tmp->parent->curr_queues == 0) {
2396         tmp->curr_queues_1 = 0;
2397     }
2398 
2399     return 0;
2400 }
2401 
2402 static int virtio_net_tx_waiting_pre_load(void *opaque)
2403 {
2404     struct VirtIONetMigTmp *tmp = opaque;
2405 
2406     /* Reuse the pointer setup from save */
2407     virtio_net_tx_waiting_pre_save(opaque);
2408 
2409     if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2410         error_report("virtio-net: curr_queues %x > max_queues %x",
2411             tmp->parent->curr_queues, tmp->parent->max_queues);
2412 
2413         return -EINVAL;
2414     }
2415 
2416     return 0; /* all good */
2417 }
2418 
2419 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2420     .name      = "virtio-net-tx_waiting",
2421     .pre_load  = virtio_net_tx_waiting_pre_load,
2422     .pre_save  = virtio_net_tx_waiting_pre_save,
2423     .fields    = (VMStateField[]) {
2424         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2425                                      curr_queues_1,
2426                                      vmstate_virtio_net_queue_tx_waiting,
2427                                      struct VirtIONetQueue),
2428         VMSTATE_END_OF_LIST()
2429     },
2430 };
2431 
2432 /* the 'has_ufo' flag is just tested; if the incoming stream has the
2433  * flag set we need to check that we have it
2434  */
2435 static int virtio_net_ufo_post_load(void *opaque, int version_id)
2436 {
2437     struct VirtIONetMigTmp *tmp = opaque;
2438 
2439     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
2440         error_report("virtio-net: saved image requires TUN_F_UFO support");
2441         return -EINVAL;
2442     }
2443 
2444     return 0;
2445 }
2446 
2447 static int virtio_net_ufo_pre_save(void *opaque)
2448 {
2449     struct VirtIONetMigTmp *tmp = opaque;
2450 
2451     tmp->has_ufo = tmp->parent->has_ufo;
2452 
2453     return 0;
2454 }
2455 
2456 static const VMStateDescription vmstate_virtio_net_has_ufo = {
2457     .name      = "virtio-net-ufo",
2458     .post_load = virtio_net_ufo_post_load,
2459     .pre_save  = virtio_net_ufo_pre_save,
2460     .fields    = (VMStateField[]) {
2461         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
2462         VMSTATE_END_OF_LIST()
2463     },
2464 };
2465 
2466 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2467  * flag set we need to check that we have it
2468  */
2469 static int virtio_net_vnet_post_load(void *opaque, int version_id)
2470 {
2471     struct VirtIONetMigTmp *tmp = opaque;
2472 
2473     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
2474         error_report("virtio-net: saved image requires vnet_hdr=on");
2475         return -EINVAL;
2476     }
2477 
2478     return 0;
2479 }
2480 
2481 static int virtio_net_vnet_pre_save(void *opaque)
2482 {
2483     struct VirtIONetMigTmp *tmp = opaque;
2484 
2485     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
2486 
2487     return 0;
2488 }
2489 
2490 static const VMStateDescription vmstate_virtio_net_has_vnet = {
2491     .name      = "virtio-net-vnet",
2492     .post_load = virtio_net_vnet_post_load,
2493     .pre_save  = virtio_net_vnet_pre_save,
2494     .fields    = (VMStateField[]) {
2495         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
2496         VMSTATE_END_OF_LIST()
2497     },
2498 };
2499 
2500 static const VMStateDescription vmstate_virtio_net_device = {
2501     .name = "virtio-net-device",
2502     .version_id = VIRTIO_NET_VM_VERSION,
2503     .minimum_version_id = VIRTIO_NET_VM_VERSION,
2504     .post_load = virtio_net_post_load_device,
2505     .fields = (VMStateField[]) {
2506         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
2507         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
2508                                vmstate_virtio_net_queue_tx_waiting,
2509                                VirtIONetQueue),
2510         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
2511         VMSTATE_UINT16(status, VirtIONet),
2512         VMSTATE_UINT8(promisc, VirtIONet),
2513         VMSTATE_UINT8(allmulti, VirtIONet),
2514         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
2515 
2516         /* Guarded pair: If it fits we load it, else we throw it away
2517          * - can happen if source has a larger MAC table.; post-load
2518          *  sets flags in this case.
2519          */
2520         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
2521                                 0, mac_table_fits, mac_table.in_use,
2522                                  ETH_ALEN),
2523         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
2524                                      mac_table.in_use, ETH_ALEN),
2525 
2526         /* Note: This is an array of uint32's that's always been saved as a
2527          * buffer; hold onto your endiannesses; it's actually used as a bitmap
2528          * but based on the uint.
2529          */
2530         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
2531         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2532                          vmstate_virtio_net_has_vnet),
2533         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
2534         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
2535         VMSTATE_UINT8(alluni, VirtIONet),
2536         VMSTATE_UINT8(nomulti, VirtIONet),
2537         VMSTATE_UINT8(nouni, VirtIONet),
2538         VMSTATE_UINT8(nobcast, VirtIONet),
2539         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2540                          vmstate_virtio_net_has_ufo),
2541         VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
2542                             vmstate_info_uint16_equal, uint16_t),
2543         VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
2544         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2545                          vmstate_virtio_net_tx_waiting),
2546         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
2547                             has_ctrl_guest_offloads),
2548         VMSTATE_END_OF_LIST()
2549    },
2550 };
2551 
2552 static NetClientInfo net_virtio_info = {
2553     .type = NET_CLIENT_DRIVER_NIC,
2554     .size = sizeof(NICState),
2555     .can_receive = virtio_net_can_receive,
2556     .receive = virtio_net_receive,
2557     .link_status_changed = virtio_net_set_link_status,
2558     .query_rx_filter = virtio_net_query_rxfilter,
2559 };
2560 
2561 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
2562 {
2563     VirtIONet *n = VIRTIO_NET(vdev);
2564     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
2565     assert(n->vhost_started);
2566     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
2567 }
2568 
2569 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
2570                                            bool mask)
2571 {
2572     VirtIONet *n = VIRTIO_NET(vdev);
2573     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
2574     assert(n->vhost_started);
2575     vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
2576                              vdev, idx, mask);
2577 }
2578 
2579 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
2580 {
2581     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
2582 
2583     n->config_size = virtio_feature_get_config_size(feature_sizes,
2584                                                     host_features);
2585 }
2586 
2587 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
2588                                    const char *type)
2589 {
2590     /*
2591      * The name can be NULL, the netclient name will be type.x.
2592      */
2593     assert(type != NULL);
2594 
2595     g_free(n->netclient_name);
2596     g_free(n->netclient_type);
2597     n->netclient_name = g_strdup(name);
2598     n->netclient_type = g_strdup(type);
2599 }
2600 
2601 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
2602 {
2603     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2604     VirtIONet *n = VIRTIO_NET(dev);
2605     NetClientState *nc;
2606     int i;
2607 
2608     if (n->net_conf.mtu) {
2609         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
2610     }
2611 
2612     if (n->net_conf.duplex_str) {
2613         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
2614             n->net_conf.duplex = DUPLEX_HALF;
2615         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
2616             n->net_conf.duplex = DUPLEX_FULL;
2617         } else {
2618             error_setg(errp, "'duplex' must be 'half' or 'full'");
2619         }
2620         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
2621     } else {
2622         n->net_conf.duplex = DUPLEX_UNKNOWN;
2623     }
2624 
2625     if (n->net_conf.speed < SPEED_UNKNOWN) {
2626         error_setg(errp, "'speed' must be between 0 and INT_MAX");
2627     } else if (n->net_conf.speed >= 0) {
2628         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
2629     }
2630 
2631     virtio_net_set_config_size(n, n->host_features);
2632     virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
2633 
2634     /*
2635      * We set a lower limit on RX queue size to what it always was.
2636      * Guests that want a smaller ring can always resize it without
2637      * help from us (using virtio 1 and up).
2638      */
2639     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
2640         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
2641         !is_power_of_2(n->net_conf.rx_queue_size)) {
2642         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
2643                    "must be a power of 2 between %d and %d.",
2644                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
2645                    VIRTQUEUE_MAX_SIZE);
2646         virtio_cleanup(vdev);
2647         return;
2648     }
2649 
2650     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
2651         n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
2652         !is_power_of_2(n->net_conf.tx_queue_size)) {
2653         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
2654                    "must be a power of 2 between %d and %d",
2655                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
2656                    VIRTQUEUE_MAX_SIZE);
2657         virtio_cleanup(vdev);
2658         return;
2659     }
2660 
2661     n->max_queues = MAX(n->nic_conf.peers.queues, 1);
2662     if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
2663         error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
2664                    "must be a positive integer less than %d.",
2665                    n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
2666         virtio_cleanup(vdev);
2667         return;
2668     }
2669     n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
2670     n->curr_queues = 1;
2671     n->tx_timeout = n->net_conf.txtimer;
2672 
2673     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
2674                        && strcmp(n->net_conf.tx, "bh")) {
2675         warn_report("virtio-net: "
2676                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
2677                     n->net_conf.tx);
2678         error_printf("Defaulting to \"bh\"");
2679     }
2680 
2681     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
2682                                     n->net_conf.tx_queue_size);
2683 
2684     for (i = 0; i < n->max_queues; i++) {
2685         virtio_net_add_queue(n, i);
2686     }
2687 
2688     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2689     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
2690     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
2691     n->status = VIRTIO_NET_S_LINK_UP;
2692     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2693                               QEMU_CLOCK_VIRTUAL,
2694                               virtio_net_announce_timer, n);
2695 
2696     if (n->netclient_type) {
2697         /*
2698          * Happen when virtio_net_set_netclient_name has been called.
2699          */
2700         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
2701                               n->netclient_type, n->netclient_name, n);
2702     } else {
2703         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
2704                               object_get_typename(OBJECT(dev)), dev->id, n);
2705     }
2706 
2707     peer_test_vnet_hdr(n);
2708     if (peer_has_vnet_hdr(n)) {
2709         for (i = 0; i < n->max_queues; i++) {
2710             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
2711         }
2712         n->host_hdr_len = sizeof(struct virtio_net_hdr);
2713     } else {
2714         n->host_hdr_len = 0;
2715     }
2716 
2717     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
2718 
2719     n->vqs[0].tx_waiting = 0;
2720     n->tx_burst = n->net_conf.txburst;
2721     virtio_net_set_mrg_rx_bufs(n, 0, 0);
2722     n->promisc = 1; /* for compatibility */
2723 
2724     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
2725 
2726     n->vlans = g_malloc0(MAX_VLAN >> 3);
2727 
2728     nc = qemu_get_queue(n->nic);
2729     nc->rxfilter_notify_enabled = 1;
2730 
2731     QTAILQ_INIT(&n->rsc_chains);
2732     n->qdev = dev;
2733 }
2734 
2735 static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
2736 {
2737     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2738     VirtIONet *n = VIRTIO_NET(dev);
2739     int i, max_queues;
2740 
2741     /* This will stop vhost backend if appropriate. */
2742     virtio_net_set_status(vdev, 0);
2743 
2744     g_free(n->netclient_name);
2745     n->netclient_name = NULL;
2746     g_free(n->netclient_type);
2747     n->netclient_type = NULL;
2748 
2749     g_free(n->mac_table.macs);
2750     g_free(n->vlans);
2751 
2752     max_queues = n->multiqueue ? n->max_queues : 1;
2753     for (i = 0; i < max_queues; i++) {
2754         virtio_net_del_queue(n, i);
2755     }
2756 
2757     qemu_announce_timer_del(&n->announce_timer);
2758     g_free(n->vqs);
2759     qemu_del_nic(n->nic);
2760     virtio_net_rsc_cleanup(n);
2761     virtio_cleanup(vdev);
2762 }
2763 
2764 static void virtio_net_instance_init(Object *obj)
2765 {
2766     VirtIONet *n = VIRTIO_NET(obj);
2767 
2768     /*
2769      * The default config_size is sizeof(struct virtio_net_config).
2770      * Can be overriden with virtio_net_set_config_size.
2771      */
2772     n->config_size = sizeof(struct virtio_net_config);
2773     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
2774                                   "bootindex", "/ethernet-phy@0",
2775                                   DEVICE(n), NULL);
2776 }
2777 
2778 static int virtio_net_pre_save(void *opaque)
2779 {
2780     VirtIONet *n = opaque;
2781 
2782     /* At this point, backend must be stopped, otherwise
2783      * it might keep writing to memory. */
2784     assert(!n->vhost_started);
2785 
2786     return 0;
2787 }
2788 
2789 static const VMStateDescription vmstate_virtio_net = {
2790     .name = "virtio-net",
2791     .minimum_version_id = VIRTIO_NET_VM_VERSION,
2792     .version_id = VIRTIO_NET_VM_VERSION,
2793     .fields = (VMStateField[]) {
2794         VMSTATE_VIRTIO_DEVICE,
2795         VMSTATE_END_OF_LIST()
2796     },
2797     .pre_save = virtio_net_pre_save,
2798 };
2799 
2800 static Property virtio_net_properties[] = {
2801     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
2802                     VIRTIO_NET_F_CSUM, true),
2803     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
2804                     VIRTIO_NET_F_GUEST_CSUM, true),
2805     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
2806     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
2807                     VIRTIO_NET_F_GUEST_TSO4, true),
2808     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
2809                     VIRTIO_NET_F_GUEST_TSO6, true),
2810     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
2811                     VIRTIO_NET_F_GUEST_ECN, true),
2812     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
2813                     VIRTIO_NET_F_GUEST_UFO, true),
2814     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
2815                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
2816     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
2817                     VIRTIO_NET_F_HOST_TSO4, true),
2818     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
2819                     VIRTIO_NET_F_HOST_TSO6, true),
2820     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
2821                     VIRTIO_NET_F_HOST_ECN, true),
2822     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
2823                     VIRTIO_NET_F_HOST_UFO, true),
2824     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
2825                     VIRTIO_NET_F_MRG_RXBUF, true),
2826     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
2827                     VIRTIO_NET_F_STATUS, true),
2828     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
2829                     VIRTIO_NET_F_CTRL_VQ, true),
2830     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
2831                     VIRTIO_NET_F_CTRL_RX, true),
2832     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
2833                     VIRTIO_NET_F_CTRL_VLAN, true),
2834     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
2835                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
2836     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
2837                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
2838     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
2839                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
2840     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
2841     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
2842                     VIRTIO_NET_F_RSC_EXT, false),
2843     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
2844                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
2845     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
2846     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
2847                        TX_TIMER_INTERVAL),
2848     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
2849     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
2850     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
2851                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
2852     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
2853                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
2854     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
2855     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
2856                      true),
2857     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
2858     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
2859     DEFINE_PROP_END_OF_LIST(),
2860 };
2861 
2862 static void virtio_net_class_init(ObjectClass *klass, void *data)
2863 {
2864     DeviceClass *dc = DEVICE_CLASS(klass);
2865     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
2866 
2867     dc->props = virtio_net_properties;
2868     dc->vmsd = &vmstate_virtio_net;
2869     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
2870     vdc->realize = virtio_net_device_realize;
2871     vdc->unrealize = virtio_net_device_unrealize;
2872     vdc->get_config = virtio_net_get_config;
2873     vdc->set_config = virtio_net_set_config;
2874     vdc->get_features = virtio_net_get_features;
2875     vdc->set_features = virtio_net_set_features;
2876     vdc->bad_features = virtio_net_bad_features;
2877     vdc->reset = virtio_net_reset;
2878     vdc->set_status = virtio_net_set_status;
2879     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
2880     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
2881     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
2882     vdc->vmsd = &vmstate_virtio_net_device;
2883 }
2884 
2885 static const TypeInfo virtio_net_info = {
2886     .name = TYPE_VIRTIO_NET,
2887     .parent = TYPE_VIRTIO_DEVICE,
2888     .instance_size = sizeof(VirtIONet),
2889     .instance_init = virtio_net_instance_init,
2890     .class_init = virtio_net_class_init,
2891 };
2892 
2893 static void virtio_register_types(void)
2894 {
2895     type_register_static(&virtio_net_info);
2896 }
2897 
2898 type_init(virtio_register_types)
2899