xref: /openbmc/qemu/hw/net/virtio-net.c (revision a2e2d7fc46fd8be875035d9bb5c64788389f65c2)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/iov.h"
16 #include "hw/virtio/virtio.h"
17 #include "net/net.h"
18 #include "net/checksum.h"
19 #include "net/tap.h"
20 #include "qemu/error-report.h"
21 #include "qemu/timer.h"
22 #include "hw/virtio/virtio-net.h"
23 #include "net/vhost_net.h"
24 #include "hw/virtio/virtio-bus.h"
25 #include "qapi/error.h"
26 #include "qapi/qapi-events-net.h"
27 #include "hw/virtio/virtio-access.h"
28 #include "migration/misc.h"
29 #include "standard-headers/linux/ethtool.h"
30 
31 #define VIRTIO_NET_VM_VERSION    11
32 
33 #define MAC_TABLE_ENTRIES    64
34 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
35 
36 /* previously fixed value */
37 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
38 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
39 
40 /* for now, only allow larger queues; with virtio-1, guest can downsize */
41 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
42 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
43 
44 /*
45  * Calculate the number of bytes up to and including the given 'field' of
46  * 'container'.
47  */
48 #define endof(container, field) \
49     (offsetof(container, field) + sizeof(((container *)0)->field))
50 
51 typedef struct VirtIOFeature {
52     uint64_t flags;
53     size_t end;
54 } VirtIOFeature;
55 
56 static VirtIOFeature feature_sizes[] = {
57     {.flags = 1ULL << VIRTIO_NET_F_MAC,
58      .end = endof(struct virtio_net_config, mac)},
59     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
60      .end = endof(struct virtio_net_config, status)},
61     {.flags = 1ULL << VIRTIO_NET_F_MQ,
62      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
63     {.flags = 1ULL << VIRTIO_NET_F_MTU,
64      .end = endof(struct virtio_net_config, mtu)},
65     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
66      .end = endof(struct virtio_net_config, duplex)},
67     {}
68 };
69 
70 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
71 {
72     VirtIONet *n = qemu_get_nic_opaque(nc);
73 
74     return &n->vqs[nc->queue_index];
75 }
76 
77 static int vq2q(int queue_index)
78 {
79     return queue_index / 2;
80 }
81 
82 /* TODO
83  * - we could suppress RX interrupt if we were so inclined.
84  */
85 
86 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
87 {
88     VirtIONet *n = VIRTIO_NET(vdev);
89     struct virtio_net_config netcfg;
90 
91     virtio_stw_p(vdev, &netcfg.status, n->status);
92     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
93     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
94     memcpy(netcfg.mac, n->mac, ETH_ALEN);
95     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
96     netcfg.duplex = n->net_conf.duplex;
97     memcpy(config, &netcfg, n->config_size);
98 }
99 
100 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
101 {
102     VirtIONet *n = VIRTIO_NET(vdev);
103     struct virtio_net_config netcfg = {};
104 
105     memcpy(&netcfg, config, n->config_size);
106 
107     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
108         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
109         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
110         memcpy(n->mac, netcfg.mac, ETH_ALEN);
111         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
112     }
113 }
114 
115 static bool virtio_net_started(VirtIONet *n, uint8_t status)
116 {
117     VirtIODevice *vdev = VIRTIO_DEVICE(n);
118     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
119         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
120 }
121 
122 static void virtio_net_announce_timer(void *opaque)
123 {
124     VirtIONet *n = opaque;
125     VirtIODevice *vdev = VIRTIO_DEVICE(n);
126 
127     n->announce_counter--;
128     n->status |= VIRTIO_NET_S_ANNOUNCE;
129     virtio_notify_config(vdev);
130 }
131 
132 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
133 {
134     VirtIODevice *vdev = VIRTIO_DEVICE(n);
135     NetClientState *nc = qemu_get_queue(n->nic);
136     int queues = n->multiqueue ? n->max_queues : 1;
137 
138     if (!get_vhost_net(nc->peer)) {
139         return;
140     }
141 
142     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
143         !!n->vhost_started) {
144         return;
145     }
146     if (!n->vhost_started) {
147         int r, i;
148 
149         if (n->needs_vnet_hdr_swap) {
150             error_report("backend does not support %s vnet headers; "
151                          "falling back on userspace virtio",
152                          virtio_is_big_endian(vdev) ? "BE" : "LE");
153             return;
154         }
155 
156         /* Any packets outstanding? Purge them to avoid touching rings
157          * when vhost is running.
158          */
159         for (i = 0;  i < queues; i++) {
160             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
161 
162             /* Purge both directions: TX and RX. */
163             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
164             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
165         }
166 
167         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
168             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
169             if (r < 0) {
170                 error_report("%uBytes MTU not supported by the backend",
171                              n->net_conf.mtu);
172 
173                 return;
174             }
175         }
176 
177         n->vhost_started = 1;
178         r = vhost_net_start(vdev, n->nic->ncs, queues);
179         if (r < 0) {
180             error_report("unable to start vhost net: %d: "
181                          "falling back on userspace virtio", -r);
182             n->vhost_started = 0;
183         }
184     } else {
185         vhost_net_stop(vdev, n->nic->ncs, queues);
186         n->vhost_started = 0;
187     }
188 }
189 
190 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
191                                           NetClientState *peer,
192                                           bool enable)
193 {
194     if (virtio_is_big_endian(vdev)) {
195         return qemu_set_vnet_be(peer, enable);
196     } else {
197         return qemu_set_vnet_le(peer, enable);
198     }
199 }
200 
201 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
202                                        int queues, bool enable)
203 {
204     int i;
205 
206     for (i = 0; i < queues; i++) {
207         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
208             enable) {
209             while (--i >= 0) {
210                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
211             }
212 
213             return true;
214         }
215     }
216 
217     return false;
218 }
219 
220 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
221 {
222     VirtIODevice *vdev = VIRTIO_DEVICE(n);
223     int queues = n->multiqueue ? n->max_queues : 1;
224 
225     if (virtio_net_started(n, status)) {
226         /* Before using the device, we tell the network backend about the
227          * endianness to use when parsing vnet headers. If the backend
228          * can't do it, we fallback onto fixing the headers in the core
229          * virtio-net code.
230          */
231         n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
232                                                             queues, true);
233     } else if (virtio_net_started(n, vdev->status)) {
234         /* After using the device, we need to reset the network backend to
235          * the default (guest native endianness), otherwise the guest may
236          * lose network connectivity if it is rebooted into a different
237          * endianness.
238          */
239         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
240     }
241 }
242 
243 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
244 {
245     unsigned int dropped = virtqueue_drop_all(vq);
246     if (dropped) {
247         virtio_notify(vdev, vq);
248     }
249 }
250 
251 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
252 {
253     VirtIONet *n = VIRTIO_NET(vdev);
254     VirtIONetQueue *q;
255     int i;
256     uint8_t queue_status;
257 
258     virtio_net_vnet_endian_status(n, status);
259     virtio_net_vhost_status(n, status);
260 
261     for (i = 0; i < n->max_queues; i++) {
262         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
263         bool queue_started;
264         q = &n->vqs[i];
265 
266         if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
267             queue_status = 0;
268         } else {
269             queue_status = status;
270         }
271         queue_started =
272             virtio_net_started(n, queue_status) && !n->vhost_started;
273 
274         if (queue_started) {
275             qemu_flush_queued_packets(ncs);
276         }
277 
278         if (!q->tx_waiting) {
279             continue;
280         }
281 
282         if (queue_started) {
283             if (q->tx_timer) {
284                 timer_mod(q->tx_timer,
285                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
286             } else {
287                 qemu_bh_schedule(q->tx_bh);
288             }
289         } else {
290             if (q->tx_timer) {
291                 timer_del(q->tx_timer);
292             } else {
293                 qemu_bh_cancel(q->tx_bh);
294             }
295             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
296                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
297                 vdev->vm_running) {
298                 /* if tx is waiting we are likely have some packets in tx queue
299                  * and disabled notification */
300                 q->tx_waiting = 0;
301                 virtio_queue_set_notification(q->tx_vq, 1);
302                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
303             }
304         }
305     }
306 }
307 
308 static void virtio_net_set_link_status(NetClientState *nc)
309 {
310     VirtIONet *n = qemu_get_nic_opaque(nc);
311     VirtIODevice *vdev = VIRTIO_DEVICE(n);
312     uint16_t old_status = n->status;
313 
314     if (nc->link_down)
315         n->status &= ~VIRTIO_NET_S_LINK_UP;
316     else
317         n->status |= VIRTIO_NET_S_LINK_UP;
318 
319     if (n->status != old_status)
320         virtio_notify_config(vdev);
321 
322     virtio_net_set_status(vdev, vdev->status);
323 }
324 
325 static void rxfilter_notify(NetClientState *nc)
326 {
327     VirtIONet *n = qemu_get_nic_opaque(nc);
328 
329     if (nc->rxfilter_notify_enabled) {
330         gchar *path = object_get_canonical_path(OBJECT(n->qdev));
331         qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
332                                               n->netclient_name, path, &error_abort);
333         g_free(path);
334 
335         /* disable event notification to avoid events flooding */
336         nc->rxfilter_notify_enabled = 0;
337     }
338 }
339 
340 static intList *get_vlan_table(VirtIONet *n)
341 {
342     intList *list, *entry;
343     int i, j;
344 
345     list = NULL;
346     for (i = 0; i < MAX_VLAN >> 5; i++) {
347         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
348             if (n->vlans[i] & (1U << j)) {
349                 entry = g_malloc0(sizeof(*entry));
350                 entry->value = (i << 5) + j;
351                 entry->next = list;
352                 list = entry;
353             }
354         }
355     }
356 
357     return list;
358 }
359 
360 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
361 {
362     VirtIONet *n = qemu_get_nic_opaque(nc);
363     VirtIODevice *vdev = VIRTIO_DEVICE(n);
364     RxFilterInfo *info;
365     strList *str_list, *entry;
366     int i;
367 
368     info = g_malloc0(sizeof(*info));
369     info->name = g_strdup(nc->name);
370     info->promiscuous = n->promisc;
371 
372     if (n->nouni) {
373         info->unicast = RX_STATE_NONE;
374     } else if (n->alluni) {
375         info->unicast = RX_STATE_ALL;
376     } else {
377         info->unicast = RX_STATE_NORMAL;
378     }
379 
380     if (n->nomulti) {
381         info->multicast = RX_STATE_NONE;
382     } else if (n->allmulti) {
383         info->multicast = RX_STATE_ALL;
384     } else {
385         info->multicast = RX_STATE_NORMAL;
386     }
387 
388     info->broadcast_allowed = n->nobcast;
389     info->multicast_overflow = n->mac_table.multi_overflow;
390     info->unicast_overflow = n->mac_table.uni_overflow;
391 
392     info->main_mac = qemu_mac_strdup_printf(n->mac);
393 
394     str_list = NULL;
395     for (i = 0; i < n->mac_table.first_multi; i++) {
396         entry = g_malloc0(sizeof(*entry));
397         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
398         entry->next = str_list;
399         str_list = entry;
400     }
401     info->unicast_table = str_list;
402 
403     str_list = NULL;
404     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
405         entry = g_malloc0(sizeof(*entry));
406         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
407         entry->next = str_list;
408         str_list = entry;
409     }
410     info->multicast_table = str_list;
411     info->vlan_table = get_vlan_table(n);
412 
413     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
414         info->vlan = RX_STATE_ALL;
415     } else if (!info->vlan_table) {
416         info->vlan = RX_STATE_NONE;
417     } else {
418         info->vlan = RX_STATE_NORMAL;
419     }
420 
421     /* enable event notification after query */
422     nc->rxfilter_notify_enabled = 1;
423 
424     return info;
425 }
426 
427 static void virtio_net_reset(VirtIODevice *vdev)
428 {
429     VirtIONet *n = VIRTIO_NET(vdev);
430 
431     /* Reset back to compatibility mode */
432     n->promisc = 1;
433     n->allmulti = 0;
434     n->alluni = 0;
435     n->nomulti = 0;
436     n->nouni = 0;
437     n->nobcast = 0;
438     /* multiqueue is disabled by default */
439     n->curr_queues = 1;
440     timer_del(n->announce_timer);
441     n->announce_counter = 0;
442     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
443 
444     /* Flush any MAC and VLAN filter table state */
445     n->mac_table.in_use = 0;
446     n->mac_table.first_multi = 0;
447     n->mac_table.multi_overflow = 0;
448     n->mac_table.uni_overflow = 0;
449     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
450     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
451     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
452     memset(n->vlans, 0, MAX_VLAN >> 3);
453 }
454 
455 static void peer_test_vnet_hdr(VirtIONet *n)
456 {
457     NetClientState *nc = qemu_get_queue(n->nic);
458     if (!nc->peer) {
459         return;
460     }
461 
462     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
463 }
464 
465 static int peer_has_vnet_hdr(VirtIONet *n)
466 {
467     return n->has_vnet_hdr;
468 }
469 
470 static int peer_has_ufo(VirtIONet *n)
471 {
472     if (!peer_has_vnet_hdr(n))
473         return 0;
474 
475     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
476 
477     return n->has_ufo;
478 }
479 
480 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
481                                        int version_1)
482 {
483     int i;
484     NetClientState *nc;
485 
486     n->mergeable_rx_bufs = mergeable_rx_bufs;
487 
488     if (version_1) {
489         n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
490     } else {
491         n->guest_hdr_len = n->mergeable_rx_bufs ?
492             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
493             sizeof(struct virtio_net_hdr);
494     }
495 
496     for (i = 0; i < n->max_queues; i++) {
497         nc = qemu_get_subqueue(n->nic, i);
498 
499         if (peer_has_vnet_hdr(n) &&
500             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
501             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
502             n->host_hdr_len = n->guest_hdr_len;
503         }
504     }
505 }
506 
507 static int virtio_net_max_tx_queue_size(VirtIONet *n)
508 {
509     NetClientState *peer = n->nic_conf.peers.ncs[0];
510 
511     /*
512      * Backends other than vhost-user don't support max queue size.
513      */
514     if (!peer) {
515         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
516     }
517 
518     if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
519         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
520     }
521 
522     return VIRTQUEUE_MAX_SIZE;
523 }
524 
525 static int peer_attach(VirtIONet *n, int index)
526 {
527     NetClientState *nc = qemu_get_subqueue(n->nic, index);
528 
529     if (!nc->peer) {
530         return 0;
531     }
532 
533     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
534         vhost_set_vring_enable(nc->peer, 1);
535     }
536 
537     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
538         return 0;
539     }
540 
541     if (n->max_queues == 1) {
542         return 0;
543     }
544 
545     return tap_enable(nc->peer);
546 }
547 
548 static int peer_detach(VirtIONet *n, int index)
549 {
550     NetClientState *nc = qemu_get_subqueue(n->nic, index);
551 
552     if (!nc->peer) {
553         return 0;
554     }
555 
556     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
557         vhost_set_vring_enable(nc->peer, 0);
558     }
559 
560     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
561         return 0;
562     }
563 
564     return tap_disable(nc->peer);
565 }
566 
567 static void virtio_net_set_queues(VirtIONet *n)
568 {
569     int i;
570     int r;
571 
572     if (n->nic->peer_deleted) {
573         return;
574     }
575 
576     for (i = 0; i < n->max_queues; i++) {
577         if (i < n->curr_queues) {
578             r = peer_attach(n, i);
579             assert(!r);
580         } else {
581             r = peer_detach(n, i);
582             assert(!r);
583         }
584     }
585 }
586 
587 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
588 
589 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
590                                         Error **errp)
591 {
592     VirtIONet *n = VIRTIO_NET(vdev);
593     NetClientState *nc = qemu_get_queue(n->nic);
594 
595     /* Firstly sync all virtio-net possible supported features */
596     features |= n->host_features;
597 
598     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
599 
600     if (!peer_has_vnet_hdr(n)) {
601         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
602         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
603         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
604         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
605 
606         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
607         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
608         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
609         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
610     }
611 
612     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
613         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
614         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
615     }
616 
617     if (!get_vhost_net(nc->peer)) {
618         return features;
619     }
620     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
621     vdev->backend_features = features;
622 
623     if (n->mtu_bypass_backend &&
624             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
625         features |= (1ULL << VIRTIO_NET_F_MTU);
626     }
627 
628     return features;
629 }
630 
631 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
632 {
633     uint64_t features = 0;
634 
635     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
636      * but also these: */
637     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
638     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
639     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
640     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
641     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
642 
643     return features;
644 }
645 
646 static void virtio_net_apply_guest_offloads(VirtIONet *n)
647 {
648     qemu_set_offload(qemu_get_queue(n->nic)->peer,
649             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
650             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
651             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
652             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
653             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
654 }
655 
656 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
657 {
658     static const uint64_t guest_offloads_mask =
659         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
660         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
661         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
662         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
663         (1ULL << VIRTIO_NET_F_GUEST_UFO);
664 
665     return guest_offloads_mask & features;
666 }
667 
668 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
669 {
670     VirtIODevice *vdev = VIRTIO_DEVICE(n);
671     return virtio_net_guest_offloads_by_features(vdev->guest_features);
672 }
673 
674 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
675 {
676     VirtIONet *n = VIRTIO_NET(vdev);
677     int i;
678 
679     if (n->mtu_bypass_backend &&
680             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
681         features &= ~(1ULL << VIRTIO_NET_F_MTU);
682     }
683 
684     virtio_net_set_multiqueue(n,
685                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
686 
687     virtio_net_set_mrg_rx_bufs(n,
688                                virtio_has_feature(features,
689                                                   VIRTIO_NET_F_MRG_RXBUF),
690                                virtio_has_feature(features,
691                                                   VIRTIO_F_VERSION_1));
692 
693     if (n->has_vnet_hdr) {
694         n->curr_guest_offloads =
695             virtio_net_guest_offloads_by_features(features);
696         virtio_net_apply_guest_offloads(n);
697     }
698 
699     for (i = 0;  i < n->max_queues; i++) {
700         NetClientState *nc = qemu_get_subqueue(n->nic, i);
701 
702         if (!get_vhost_net(nc->peer)) {
703             continue;
704         }
705         vhost_net_ack_features(get_vhost_net(nc->peer), features);
706     }
707 
708     if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
709         memset(n->vlans, 0, MAX_VLAN >> 3);
710     } else {
711         memset(n->vlans, 0xff, MAX_VLAN >> 3);
712     }
713 }
714 
715 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
716                                      struct iovec *iov, unsigned int iov_cnt)
717 {
718     uint8_t on;
719     size_t s;
720     NetClientState *nc = qemu_get_queue(n->nic);
721 
722     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
723     if (s != sizeof(on)) {
724         return VIRTIO_NET_ERR;
725     }
726 
727     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
728         n->promisc = on;
729     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
730         n->allmulti = on;
731     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
732         n->alluni = on;
733     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
734         n->nomulti = on;
735     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
736         n->nouni = on;
737     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
738         n->nobcast = on;
739     } else {
740         return VIRTIO_NET_ERR;
741     }
742 
743     rxfilter_notify(nc);
744 
745     return VIRTIO_NET_OK;
746 }
747 
748 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
749                                      struct iovec *iov, unsigned int iov_cnt)
750 {
751     VirtIODevice *vdev = VIRTIO_DEVICE(n);
752     uint64_t offloads;
753     size_t s;
754 
755     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
756         return VIRTIO_NET_ERR;
757     }
758 
759     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
760     if (s != sizeof(offloads)) {
761         return VIRTIO_NET_ERR;
762     }
763 
764     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
765         uint64_t supported_offloads;
766 
767         offloads = virtio_ldq_p(vdev, &offloads);
768 
769         if (!n->has_vnet_hdr) {
770             return VIRTIO_NET_ERR;
771         }
772 
773         supported_offloads = virtio_net_supported_guest_offloads(n);
774         if (offloads & ~supported_offloads) {
775             return VIRTIO_NET_ERR;
776         }
777 
778         n->curr_guest_offloads = offloads;
779         virtio_net_apply_guest_offloads(n);
780 
781         return VIRTIO_NET_OK;
782     } else {
783         return VIRTIO_NET_ERR;
784     }
785 }
786 
787 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
788                                  struct iovec *iov, unsigned int iov_cnt)
789 {
790     VirtIODevice *vdev = VIRTIO_DEVICE(n);
791     struct virtio_net_ctrl_mac mac_data;
792     size_t s;
793     NetClientState *nc = qemu_get_queue(n->nic);
794 
795     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
796         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
797             return VIRTIO_NET_ERR;
798         }
799         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
800         assert(s == sizeof(n->mac));
801         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
802         rxfilter_notify(nc);
803 
804         return VIRTIO_NET_OK;
805     }
806 
807     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
808         return VIRTIO_NET_ERR;
809     }
810 
811     int in_use = 0;
812     int first_multi = 0;
813     uint8_t uni_overflow = 0;
814     uint8_t multi_overflow = 0;
815     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
816 
817     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
818                    sizeof(mac_data.entries));
819     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
820     if (s != sizeof(mac_data.entries)) {
821         goto error;
822     }
823     iov_discard_front(&iov, &iov_cnt, s);
824 
825     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
826         goto error;
827     }
828 
829     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
830         s = iov_to_buf(iov, iov_cnt, 0, macs,
831                        mac_data.entries * ETH_ALEN);
832         if (s != mac_data.entries * ETH_ALEN) {
833             goto error;
834         }
835         in_use += mac_data.entries;
836     } else {
837         uni_overflow = 1;
838     }
839 
840     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
841 
842     first_multi = in_use;
843 
844     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
845                    sizeof(mac_data.entries));
846     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
847     if (s != sizeof(mac_data.entries)) {
848         goto error;
849     }
850 
851     iov_discard_front(&iov, &iov_cnt, s);
852 
853     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
854         goto error;
855     }
856 
857     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
858         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
859                        mac_data.entries * ETH_ALEN);
860         if (s != mac_data.entries * ETH_ALEN) {
861             goto error;
862         }
863         in_use += mac_data.entries;
864     } else {
865         multi_overflow = 1;
866     }
867 
868     n->mac_table.in_use = in_use;
869     n->mac_table.first_multi = first_multi;
870     n->mac_table.uni_overflow = uni_overflow;
871     n->mac_table.multi_overflow = multi_overflow;
872     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
873     g_free(macs);
874     rxfilter_notify(nc);
875 
876     return VIRTIO_NET_OK;
877 
878 error:
879     g_free(macs);
880     return VIRTIO_NET_ERR;
881 }
882 
883 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
884                                         struct iovec *iov, unsigned int iov_cnt)
885 {
886     VirtIODevice *vdev = VIRTIO_DEVICE(n);
887     uint16_t vid;
888     size_t s;
889     NetClientState *nc = qemu_get_queue(n->nic);
890 
891     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
892     vid = virtio_lduw_p(vdev, &vid);
893     if (s != sizeof(vid)) {
894         return VIRTIO_NET_ERR;
895     }
896 
897     if (vid >= MAX_VLAN)
898         return VIRTIO_NET_ERR;
899 
900     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
901         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
902     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
903         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
904     else
905         return VIRTIO_NET_ERR;
906 
907     rxfilter_notify(nc);
908 
909     return VIRTIO_NET_OK;
910 }
911 
912 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
913                                       struct iovec *iov, unsigned int iov_cnt)
914 {
915     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
916         n->status & VIRTIO_NET_S_ANNOUNCE) {
917         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
918         if (n->announce_counter) {
919             timer_mod(n->announce_timer,
920                       qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
921                       self_announce_delay(n->announce_counter));
922         }
923         return VIRTIO_NET_OK;
924     } else {
925         return VIRTIO_NET_ERR;
926     }
927 }
928 
929 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
930                                 struct iovec *iov, unsigned int iov_cnt)
931 {
932     VirtIODevice *vdev = VIRTIO_DEVICE(n);
933     struct virtio_net_ctrl_mq mq;
934     size_t s;
935     uint16_t queues;
936 
937     s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
938     if (s != sizeof(mq)) {
939         return VIRTIO_NET_ERR;
940     }
941 
942     if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
943         return VIRTIO_NET_ERR;
944     }
945 
946     queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
947 
948     if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
949         queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
950         queues > n->max_queues ||
951         !n->multiqueue) {
952         return VIRTIO_NET_ERR;
953     }
954 
955     n->curr_queues = queues;
956     /* stop the backend before changing the number of queues to avoid handling a
957      * disabled queue */
958     virtio_net_set_status(vdev, vdev->status);
959     virtio_net_set_queues(n);
960 
961     return VIRTIO_NET_OK;
962 }
963 
964 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
965 {
966     VirtIONet *n = VIRTIO_NET(vdev);
967     struct virtio_net_ctrl_hdr ctrl;
968     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
969     VirtQueueElement *elem;
970     size_t s;
971     struct iovec *iov, *iov2;
972     unsigned int iov_cnt;
973 
974     for (;;) {
975         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
976         if (!elem) {
977             break;
978         }
979         if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
980             iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
981             virtio_error(vdev, "virtio-net ctrl missing headers");
982             virtqueue_detach_element(vq, elem, 0);
983             g_free(elem);
984             break;
985         }
986 
987         iov_cnt = elem->out_num;
988         iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
989         s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
990         iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
991         if (s != sizeof(ctrl)) {
992             status = VIRTIO_NET_ERR;
993         } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
994             status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
995         } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
996             status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
997         } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
998             status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
999         } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1000             status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1001         } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1002             status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1003         } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1004             status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1005         }
1006 
1007         s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1008         assert(s == sizeof(status));
1009 
1010         virtqueue_push(vq, elem, sizeof(status));
1011         virtio_notify(vdev, vq);
1012         g_free(iov2);
1013         g_free(elem);
1014     }
1015 }
1016 
1017 /* RX */
1018 
1019 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1020 {
1021     VirtIONet *n = VIRTIO_NET(vdev);
1022     int queue_index = vq2q(virtio_get_queue_index(vq));
1023 
1024     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1025 }
1026 
1027 static int virtio_net_can_receive(NetClientState *nc)
1028 {
1029     VirtIONet *n = qemu_get_nic_opaque(nc);
1030     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1031     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1032 
1033     if (!vdev->vm_running) {
1034         return 0;
1035     }
1036 
1037     if (nc->queue_index >= n->curr_queues) {
1038         return 0;
1039     }
1040 
1041     if (!virtio_queue_ready(q->rx_vq) ||
1042         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1043         return 0;
1044     }
1045 
1046     return 1;
1047 }
1048 
1049 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1050 {
1051     VirtIONet *n = q->n;
1052     if (virtio_queue_empty(q->rx_vq) ||
1053         (n->mergeable_rx_bufs &&
1054          !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1055         virtio_queue_set_notification(q->rx_vq, 1);
1056 
1057         /* To avoid a race condition where the guest has made some buffers
1058          * available after the above check but before notification was
1059          * enabled, check for available buffers again.
1060          */
1061         if (virtio_queue_empty(q->rx_vq) ||
1062             (n->mergeable_rx_bufs &&
1063              !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1064             return 0;
1065         }
1066     }
1067 
1068     virtio_queue_set_notification(q->rx_vq, 0);
1069     return 1;
1070 }
1071 
1072 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1073 {
1074     virtio_tswap16s(vdev, &hdr->hdr_len);
1075     virtio_tswap16s(vdev, &hdr->gso_size);
1076     virtio_tswap16s(vdev, &hdr->csum_start);
1077     virtio_tswap16s(vdev, &hdr->csum_offset);
1078 }
1079 
1080 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1081  * it never finds out that the packets don't have valid checksums.  This
1082  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1083  * fix this with Xen but it hasn't appeared in an upstream release of
1084  * dhclient yet.
1085  *
1086  * To avoid breaking existing guests, we catch udp packets and add
1087  * checksums.  This is terrible but it's better than hacking the guest
1088  * kernels.
1089  *
1090  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1091  * we should provide a mechanism to disable it to avoid polluting the host
1092  * cache.
1093  */
1094 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1095                                         uint8_t *buf, size_t size)
1096 {
1097     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1098         (size > 27 && size < 1500) && /* normal sized MTU */
1099         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1100         (buf[23] == 17) && /* ip.protocol == UDP */
1101         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1102         net_checksum_calculate(buf, size);
1103         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1104     }
1105 }
1106 
1107 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1108                            const void *buf, size_t size)
1109 {
1110     if (n->has_vnet_hdr) {
1111         /* FIXME this cast is evil */
1112         void *wbuf = (void *)buf;
1113         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1114                                     size - n->host_hdr_len);
1115 
1116         if (n->needs_vnet_hdr_swap) {
1117             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1118         }
1119         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1120     } else {
1121         struct virtio_net_hdr hdr = {
1122             .flags = 0,
1123             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1124         };
1125         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1126     }
1127 }
1128 
1129 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1130 {
1131     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1132     static const uint8_t vlan[] = {0x81, 0x00};
1133     uint8_t *ptr = (uint8_t *)buf;
1134     int i;
1135 
1136     if (n->promisc)
1137         return 1;
1138 
1139     ptr += n->host_hdr_len;
1140 
1141     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1142         int vid = lduw_be_p(ptr + 14) & 0xfff;
1143         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1144             return 0;
1145     }
1146 
1147     if (ptr[0] & 1) { // multicast
1148         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1149             return !n->nobcast;
1150         } else if (n->nomulti) {
1151             return 0;
1152         } else if (n->allmulti || n->mac_table.multi_overflow) {
1153             return 1;
1154         }
1155 
1156         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1157             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1158                 return 1;
1159             }
1160         }
1161     } else { // unicast
1162         if (n->nouni) {
1163             return 0;
1164         } else if (n->alluni || n->mac_table.uni_overflow) {
1165             return 1;
1166         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1167             return 1;
1168         }
1169 
1170         for (i = 0; i < n->mac_table.first_multi; i++) {
1171             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1172                 return 1;
1173             }
1174         }
1175     }
1176 
1177     return 0;
1178 }
1179 
1180 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1181                                       size_t size)
1182 {
1183     VirtIONet *n = qemu_get_nic_opaque(nc);
1184     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1185     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1186     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1187     struct virtio_net_hdr_mrg_rxbuf mhdr;
1188     unsigned mhdr_cnt = 0;
1189     size_t offset, i, guest_offset;
1190 
1191     if (!virtio_net_can_receive(nc)) {
1192         return -1;
1193     }
1194 
1195     /* hdr_len refers to the header we supply to the guest */
1196     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1197         return 0;
1198     }
1199 
1200     if (!receive_filter(n, buf, size))
1201         return size;
1202 
1203     offset = i = 0;
1204 
1205     while (offset < size) {
1206         VirtQueueElement *elem;
1207         int len, total;
1208         const struct iovec *sg;
1209 
1210         total = 0;
1211 
1212         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1213         if (!elem) {
1214             if (i) {
1215                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1216                              "i %zd mergeable %d offset %zd, size %zd, "
1217                              "guest hdr len %zd, host hdr len %zd "
1218                              "guest features 0x%" PRIx64,
1219                              i, n->mergeable_rx_bufs, offset, size,
1220                              n->guest_hdr_len, n->host_hdr_len,
1221                              vdev->guest_features);
1222             }
1223             return -1;
1224         }
1225 
1226         if (elem->in_num < 1) {
1227             virtio_error(vdev,
1228                          "virtio-net receive queue contains no in buffers");
1229             virtqueue_detach_element(q->rx_vq, elem, 0);
1230             g_free(elem);
1231             return -1;
1232         }
1233 
1234         sg = elem->in_sg;
1235         if (i == 0) {
1236             assert(offset == 0);
1237             if (n->mergeable_rx_bufs) {
1238                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1239                                     sg, elem->in_num,
1240                                     offsetof(typeof(mhdr), num_buffers),
1241                                     sizeof(mhdr.num_buffers));
1242             }
1243 
1244             receive_header(n, sg, elem->in_num, buf, size);
1245             offset = n->host_hdr_len;
1246             total += n->guest_hdr_len;
1247             guest_offset = n->guest_hdr_len;
1248         } else {
1249             guest_offset = 0;
1250         }
1251 
1252         /* copy in packet.  ugh */
1253         len = iov_from_buf(sg, elem->in_num, guest_offset,
1254                            buf + offset, size - offset);
1255         total += len;
1256         offset += len;
1257         /* If buffers can't be merged, at this point we
1258          * must have consumed the complete packet.
1259          * Otherwise, drop it. */
1260         if (!n->mergeable_rx_bufs && offset < size) {
1261             virtqueue_unpop(q->rx_vq, elem, total);
1262             g_free(elem);
1263             return size;
1264         }
1265 
1266         /* signal other side */
1267         virtqueue_fill(q->rx_vq, elem, total, i++);
1268         g_free(elem);
1269     }
1270 
1271     if (mhdr_cnt) {
1272         virtio_stw_p(vdev, &mhdr.num_buffers, i);
1273         iov_from_buf(mhdr_sg, mhdr_cnt,
1274                      0,
1275                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
1276     }
1277 
1278     virtqueue_flush(q->rx_vq, i);
1279     virtio_notify(vdev, q->rx_vq);
1280 
1281     return size;
1282 }
1283 
1284 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
1285                                   size_t size)
1286 {
1287     ssize_t r;
1288 
1289     rcu_read_lock();
1290     r = virtio_net_receive_rcu(nc, buf, size);
1291     rcu_read_unlock();
1292     return r;
1293 }
1294 
1295 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
1296 
1297 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
1298 {
1299     VirtIONet *n = qemu_get_nic_opaque(nc);
1300     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1301     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1302 
1303     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
1304     virtio_notify(vdev, q->tx_vq);
1305 
1306     g_free(q->async_tx.elem);
1307     q->async_tx.elem = NULL;
1308 
1309     virtio_queue_set_notification(q->tx_vq, 1);
1310     virtio_net_flush_tx(q);
1311 }
1312 
1313 /* TX */
1314 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
1315 {
1316     VirtIONet *n = q->n;
1317     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1318     VirtQueueElement *elem;
1319     int32_t num_packets = 0;
1320     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
1321     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1322         return num_packets;
1323     }
1324 
1325     if (q->async_tx.elem) {
1326         virtio_queue_set_notification(q->tx_vq, 0);
1327         return num_packets;
1328     }
1329 
1330     for (;;) {
1331         ssize_t ret;
1332         unsigned int out_num;
1333         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
1334         struct virtio_net_hdr_mrg_rxbuf mhdr;
1335 
1336         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
1337         if (!elem) {
1338             break;
1339         }
1340 
1341         out_num = elem->out_num;
1342         out_sg = elem->out_sg;
1343         if (out_num < 1) {
1344             virtio_error(vdev, "virtio-net header not in first element");
1345             virtqueue_detach_element(q->tx_vq, elem, 0);
1346             g_free(elem);
1347             return -EINVAL;
1348         }
1349 
1350         if (n->has_vnet_hdr) {
1351             if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
1352                 n->guest_hdr_len) {
1353                 virtio_error(vdev, "virtio-net header incorrect");
1354                 virtqueue_detach_element(q->tx_vq, elem, 0);
1355                 g_free(elem);
1356                 return -EINVAL;
1357             }
1358             if (n->needs_vnet_hdr_swap) {
1359                 virtio_net_hdr_swap(vdev, (void *) &mhdr);
1360                 sg2[0].iov_base = &mhdr;
1361                 sg2[0].iov_len = n->guest_hdr_len;
1362                 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
1363                                    out_sg, out_num,
1364                                    n->guest_hdr_len, -1);
1365                 if (out_num == VIRTQUEUE_MAX_SIZE) {
1366                     goto drop;
1367 		}
1368                 out_num += 1;
1369                 out_sg = sg2;
1370 	    }
1371         }
1372         /*
1373          * If host wants to see the guest header as is, we can
1374          * pass it on unchanged. Otherwise, copy just the parts
1375          * that host is interested in.
1376          */
1377         assert(n->host_hdr_len <= n->guest_hdr_len);
1378         if (n->host_hdr_len != n->guest_hdr_len) {
1379             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
1380                                        out_sg, out_num,
1381                                        0, n->host_hdr_len);
1382             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
1383                              out_sg, out_num,
1384                              n->guest_hdr_len, -1);
1385             out_num = sg_num;
1386             out_sg = sg;
1387         }
1388 
1389         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
1390                                       out_sg, out_num, virtio_net_tx_complete);
1391         if (ret == 0) {
1392             virtio_queue_set_notification(q->tx_vq, 0);
1393             q->async_tx.elem = elem;
1394             return -EBUSY;
1395         }
1396 
1397 drop:
1398         virtqueue_push(q->tx_vq, elem, 0);
1399         virtio_notify(vdev, q->tx_vq);
1400         g_free(elem);
1401 
1402         if (++num_packets >= n->tx_burst) {
1403             break;
1404         }
1405     }
1406     return num_packets;
1407 }
1408 
1409 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
1410 {
1411     VirtIONet *n = VIRTIO_NET(vdev);
1412     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
1413 
1414     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
1415         virtio_net_drop_tx_queue_data(vdev, vq);
1416         return;
1417     }
1418 
1419     /* This happens when device was stopped but VCPU wasn't. */
1420     if (!vdev->vm_running) {
1421         q->tx_waiting = 1;
1422         return;
1423     }
1424 
1425     if (q->tx_waiting) {
1426         virtio_queue_set_notification(vq, 1);
1427         timer_del(q->tx_timer);
1428         q->tx_waiting = 0;
1429         if (virtio_net_flush_tx(q) == -EINVAL) {
1430             return;
1431         }
1432     } else {
1433         timer_mod(q->tx_timer,
1434                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
1435         q->tx_waiting = 1;
1436         virtio_queue_set_notification(vq, 0);
1437     }
1438 }
1439 
1440 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
1441 {
1442     VirtIONet *n = VIRTIO_NET(vdev);
1443     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
1444 
1445     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
1446         virtio_net_drop_tx_queue_data(vdev, vq);
1447         return;
1448     }
1449 
1450     if (unlikely(q->tx_waiting)) {
1451         return;
1452     }
1453     q->tx_waiting = 1;
1454     /* This happens when device was stopped but VCPU wasn't. */
1455     if (!vdev->vm_running) {
1456         return;
1457     }
1458     virtio_queue_set_notification(vq, 0);
1459     qemu_bh_schedule(q->tx_bh);
1460 }
1461 
1462 static void virtio_net_tx_timer(void *opaque)
1463 {
1464     VirtIONetQueue *q = opaque;
1465     VirtIONet *n = q->n;
1466     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1467     /* This happens when device was stopped but BH wasn't. */
1468     if (!vdev->vm_running) {
1469         /* Make sure tx waiting is set, so we'll run when restarted. */
1470         assert(q->tx_waiting);
1471         return;
1472     }
1473 
1474     q->tx_waiting = 0;
1475 
1476     /* Just in case the driver is not ready on more */
1477     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1478         return;
1479     }
1480 
1481     virtio_queue_set_notification(q->tx_vq, 1);
1482     virtio_net_flush_tx(q);
1483 }
1484 
1485 static void virtio_net_tx_bh(void *opaque)
1486 {
1487     VirtIONetQueue *q = opaque;
1488     VirtIONet *n = q->n;
1489     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1490     int32_t ret;
1491 
1492     /* This happens when device was stopped but BH wasn't. */
1493     if (!vdev->vm_running) {
1494         /* Make sure tx waiting is set, so we'll run when restarted. */
1495         assert(q->tx_waiting);
1496         return;
1497     }
1498 
1499     q->tx_waiting = 0;
1500 
1501     /* Just in case the driver is not ready on more */
1502     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
1503         return;
1504     }
1505 
1506     ret = virtio_net_flush_tx(q);
1507     if (ret == -EBUSY || ret == -EINVAL) {
1508         return; /* Notification re-enable handled by tx_complete or device
1509                  * broken */
1510     }
1511 
1512     /* If we flush a full burst of packets, assume there are
1513      * more coming and immediately reschedule */
1514     if (ret >= n->tx_burst) {
1515         qemu_bh_schedule(q->tx_bh);
1516         q->tx_waiting = 1;
1517         return;
1518     }
1519 
1520     /* If less than a full burst, re-enable notification and flush
1521      * anything that may have come in while we weren't looking.  If
1522      * we find something, assume the guest is still active and reschedule */
1523     virtio_queue_set_notification(q->tx_vq, 1);
1524     ret = virtio_net_flush_tx(q);
1525     if (ret == -EINVAL) {
1526         return;
1527     } else if (ret > 0) {
1528         virtio_queue_set_notification(q->tx_vq, 0);
1529         qemu_bh_schedule(q->tx_bh);
1530         q->tx_waiting = 1;
1531     }
1532 }
1533 
1534 static void virtio_net_add_queue(VirtIONet *n, int index)
1535 {
1536     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1537 
1538     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
1539                                            virtio_net_handle_rx);
1540 
1541     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
1542         n->vqs[index].tx_vq =
1543             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
1544                              virtio_net_handle_tx_timer);
1545         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
1546                                               virtio_net_tx_timer,
1547                                               &n->vqs[index]);
1548     } else {
1549         n->vqs[index].tx_vq =
1550             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
1551                              virtio_net_handle_tx_bh);
1552         n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
1553     }
1554 
1555     n->vqs[index].tx_waiting = 0;
1556     n->vqs[index].n = n;
1557 }
1558 
1559 static void virtio_net_del_queue(VirtIONet *n, int index)
1560 {
1561     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1562     VirtIONetQueue *q = &n->vqs[index];
1563     NetClientState *nc = qemu_get_subqueue(n->nic, index);
1564 
1565     qemu_purge_queued_packets(nc);
1566 
1567     virtio_del_queue(vdev, index * 2);
1568     if (q->tx_timer) {
1569         timer_del(q->tx_timer);
1570         timer_free(q->tx_timer);
1571         q->tx_timer = NULL;
1572     } else {
1573         qemu_bh_delete(q->tx_bh);
1574         q->tx_bh = NULL;
1575     }
1576     q->tx_waiting = 0;
1577     virtio_del_queue(vdev, index * 2 + 1);
1578 }
1579 
1580 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
1581 {
1582     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1583     int old_num_queues = virtio_get_num_queues(vdev);
1584     int new_num_queues = new_max_queues * 2 + 1;
1585     int i;
1586 
1587     assert(old_num_queues >= 3);
1588     assert(old_num_queues % 2 == 1);
1589 
1590     if (old_num_queues == new_num_queues) {
1591         return;
1592     }
1593 
1594     /*
1595      * We always need to remove and add ctrl vq if
1596      * old_num_queues != new_num_queues. Remove ctrl_vq first,
1597      * and then we only enter one of the following too loops.
1598      */
1599     virtio_del_queue(vdev, old_num_queues - 1);
1600 
1601     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
1602         /* new_num_queues < old_num_queues */
1603         virtio_net_del_queue(n, i / 2);
1604     }
1605 
1606     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
1607         /* new_num_queues > old_num_queues */
1608         virtio_net_add_queue(n, i / 2);
1609     }
1610 
1611     /* add ctrl_vq last */
1612     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
1613 }
1614 
1615 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
1616 {
1617     int max = multiqueue ? n->max_queues : 1;
1618 
1619     n->multiqueue = multiqueue;
1620     virtio_net_change_num_queues(n, max);
1621 
1622     virtio_net_set_queues(n);
1623 }
1624 
1625 static int virtio_net_post_load_device(void *opaque, int version_id)
1626 {
1627     VirtIONet *n = opaque;
1628     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1629     int i, link_down;
1630 
1631     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
1632                                virtio_vdev_has_feature(vdev,
1633                                                        VIRTIO_F_VERSION_1));
1634 
1635     /* MAC_TABLE_ENTRIES may be different from the saved image */
1636     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
1637         n->mac_table.in_use = 0;
1638     }
1639 
1640     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1641         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
1642     }
1643 
1644     if (peer_has_vnet_hdr(n)) {
1645         virtio_net_apply_guest_offloads(n);
1646     }
1647 
1648     virtio_net_set_queues(n);
1649 
1650     /* Find the first multicast entry in the saved MAC filter */
1651     for (i = 0; i < n->mac_table.in_use; i++) {
1652         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
1653             break;
1654         }
1655     }
1656     n->mac_table.first_multi = i;
1657 
1658     /* nc.link_down can't be migrated, so infer link_down according
1659      * to link status bit in n->status */
1660     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
1661     for (i = 0; i < n->max_queues; i++) {
1662         qemu_get_subqueue(n->nic, i)->link_down = link_down;
1663     }
1664 
1665     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
1666         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
1667         n->announce_counter = SELF_ANNOUNCE_ROUNDS;
1668         timer_mod(n->announce_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL));
1669     }
1670 
1671     return 0;
1672 }
1673 
1674 /* tx_waiting field of a VirtIONetQueue */
1675 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
1676     .name = "virtio-net-queue-tx_waiting",
1677     .fields = (VMStateField[]) {
1678         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
1679         VMSTATE_END_OF_LIST()
1680    },
1681 };
1682 
1683 static bool max_queues_gt_1(void *opaque, int version_id)
1684 {
1685     return VIRTIO_NET(opaque)->max_queues > 1;
1686 }
1687 
1688 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
1689 {
1690     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
1691                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
1692 }
1693 
1694 static bool mac_table_fits(void *opaque, int version_id)
1695 {
1696     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
1697 }
1698 
1699 static bool mac_table_doesnt_fit(void *opaque, int version_id)
1700 {
1701     return !mac_table_fits(opaque, version_id);
1702 }
1703 
1704 /* This temporary type is shared by all the WITH_TMP methods
1705  * although only some fields are used by each.
1706  */
1707 struct VirtIONetMigTmp {
1708     VirtIONet      *parent;
1709     VirtIONetQueue *vqs_1;
1710     uint16_t        curr_queues_1;
1711     uint8_t         has_ufo;
1712     uint32_t        has_vnet_hdr;
1713 };
1714 
1715 /* The 2nd and subsequent tx_waiting flags are loaded later than
1716  * the 1st entry in the queues and only if there's more than one
1717  * entry.  We use the tmp mechanism to calculate a temporary
1718  * pointer and count and also validate the count.
1719  */
1720 
1721 static int virtio_net_tx_waiting_pre_save(void *opaque)
1722 {
1723     struct VirtIONetMigTmp *tmp = opaque;
1724 
1725     tmp->vqs_1 = tmp->parent->vqs + 1;
1726     tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
1727     if (tmp->parent->curr_queues == 0) {
1728         tmp->curr_queues_1 = 0;
1729     }
1730 
1731     return 0;
1732 }
1733 
1734 static int virtio_net_tx_waiting_pre_load(void *opaque)
1735 {
1736     struct VirtIONetMigTmp *tmp = opaque;
1737 
1738     /* Reuse the pointer setup from save */
1739     virtio_net_tx_waiting_pre_save(opaque);
1740 
1741     if (tmp->parent->curr_queues > tmp->parent->max_queues) {
1742         error_report("virtio-net: curr_queues %x > max_queues %x",
1743             tmp->parent->curr_queues, tmp->parent->max_queues);
1744 
1745         return -EINVAL;
1746     }
1747 
1748     return 0; /* all good */
1749 }
1750 
1751 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
1752     .name      = "virtio-net-tx_waiting",
1753     .pre_load  = virtio_net_tx_waiting_pre_load,
1754     .pre_save  = virtio_net_tx_waiting_pre_save,
1755     .fields    = (VMStateField[]) {
1756         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
1757                                      curr_queues_1,
1758                                      vmstate_virtio_net_queue_tx_waiting,
1759                                      struct VirtIONetQueue),
1760         VMSTATE_END_OF_LIST()
1761     },
1762 };
1763 
1764 /* the 'has_ufo' flag is just tested; if the incoming stream has the
1765  * flag set we need to check that we have it
1766  */
1767 static int virtio_net_ufo_post_load(void *opaque, int version_id)
1768 {
1769     struct VirtIONetMigTmp *tmp = opaque;
1770 
1771     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
1772         error_report("virtio-net: saved image requires TUN_F_UFO support");
1773         return -EINVAL;
1774     }
1775 
1776     return 0;
1777 }
1778 
1779 static int virtio_net_ufo_pre_save(void *opaque)
1780 {
1781     struct VirtIONetMigTmp *tmp = opaque;
1782 
1783     tmp->has_ufo = tmp->parent->has_ufo;
1784 
1785     return 0;
1786 }
1787 
1788 static const VMStateDescription vmstate_virtio_net_has_ufo = {
1789     .name      = "virtio-net-ufo",
1790     .post_load = virtio_net_ufo_post_load,
1791     .pre_save  = virtio_net_ufo_pre_save,
1792     .fields    = (VMStateField[]) {
1793         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
1794         VMSTATE_END_OF_LIST()
1795     },
1796 };
1797 
1798 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
1799  * flag set we need to check that we have it
1800  */
1801 static int virtio_net_vnet_post_load(void *opaque, int version_id)
1802 {
1803     struct VirtIONetMigTmp *tmp = opaque;
1804 
1805     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
1806         error_report("virtio-net: saved image requires vnet_hdr=on");
1807         return -EINVAL;
1808     }
1809 
1810     return 0;
1811 }
1812 
1813 static int virtio_net_vnet_pre_save(void *opaque)
1814 {
1815     struct VirtIONetMigTmp *tmp = opaque;
1816 
1817     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
1818 
1819     return 0;
1820 }
1821 
1822 static const VMStateDescription vmstate_virtio_net_has_vnet = {
1823     .name      = "virtio-net-vnet",
1824     .post_load = virtio_net_vnet_post_load,
1825     .pre_save  = virtio_net_vnet_pre_save,
1826     .fields    = (VMStateField[]) {
1827         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
1828         VMSTATE_END_OF_LIST()
1829     },
1830 };
1831 
1832 static const VMStateDescription vmstate_virtio_net_device = {
1833     .name = "virtio-net-device",
1834     .version_id = VIRTIO_NET_VM_VERSION,
1835     .minimum_version_id = VIRTIO_NET_VM_VERSION,
1836     .post_load = virtio_net_post_load_device,
1837     .fields = (VMStateField[]) {
1838         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
1839         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
1840                                vmstate_virtio_net_queue_tx_waiting,
1841                                VirtIONetQueue),
1842         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
1843         VMSTATE_UINT16(status, VirtIONet),
1844         VMSTATE_UINT8(promisc, VirtIONet),
1845         VMSTATE_UINT8(allmulti, VirtIONet),
1846         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
1847 
1848         /* Guarded pair: If it fits we load it, else we throw it away
1849          * - can happen if source has a larger MAC table.; post-load
1850          *  sets flags in this case.
1851          */
1852         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
1853                                 0, mac_table_fits, mac_table.in_use,
1854                                  ETH_ALEN),
1855         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
1856                                      mac_table.in_use, ETH_ALEN),
1857 
1858         /* Note: This is an array of uint32's that's always been saved as a
1859          * buffer; hold onto your endiannesses; it's actually used as a bitmap
1860          * but based on the uint.
1861          */
1862         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
1863         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
1864                          vmstate_virtio_net_has_vnet),
1865         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
1866         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
1867         VMSTATE_UINT8(alluni, VirtIONet),
1868         VMSTATE_UINT8(nomulti, VirtIONet),
1869         VMSTATE_UINT8(nouni, VirtIONet),
1870         VMSTATE_UINT8(nobcast, VirtIONet),
1871         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
1872                          vmstate_virtio_net_has_ufo),
1873         VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
1874                             vmstate_info_uint16_equal, uint16_t),
1875         VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
1876         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
1877                          vmstate_virtio_net_tx_waiting),
1878         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
1879                             has_ctrl_guest_offloads),
1880         VMSTATE_END_OF_LIST()
1881    },
1882 };
1883 
1884 static NetClientInfo net_virtio_info = {
1885     .type = NET_CLIENT_DRIVER_NIC,
1886     .size = sizeof(NICState),
1887     .can_receive = virtio_net_can_receive,
1888     .receive = virtio_net_receive,
1889     .link_status_changed = virtio_net_set_link_status,
1890     .query_rx_filter = virtio_net_query_rxfilter,
1891 };
1892 
1893 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
1894 {
1895     VirtIONet *n = VIRTIO_NET(vdev);
1896     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1897     assert(n->vhost_started);
1898     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
1899 }
1900 
1901 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
1902                                            bool mask)
1903 {
1904     VirtIONet *n = VIRTIO_NET(vdev);
1905     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1906     assert(n->vhost_started);
1907     vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
1908                              vdev, idx, mask);
1909 }
1910 
1911 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
1912 {
1913     int i, config_size = 0;
1914     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
1915 
1916     for (i = 0; feature_sizes[i].flags != 0; i++) {
1917         if (host_features & feature_sizes[i].flags) {
1918             config_size = MAX(feature_sizes[i].end, config_size);
1919         }
1920     }
1921     n->config_size = config_size;
1922 }
1923 
1924 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
1925                                    const char *type)
1926 {
1927     /*
1928      * The name can be NULL, the netclient name will be type.x.
1929      */
1930     assert(type != NULL);
1931 
1932     g_free(n->netclient_name);
1933     g_free(n->netclient_type);
1934     n->netclient_name = g_strdup(name);
1935     n->netclient_type = g_strdup(type);
1936 }
1937 
1938 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
1939 {
1940     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1941     VirtIONet *n = VIRTIO_NET(dev);
1942     NetClientState *nc;
1943     int i;
1944 
1945     if (n->net_conf.mtu) {
1946         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
1947     }
1948 
1949     if (n->net_conf.duplex_str) {
1950         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
1951             n->net_conf.duplex = DUPLEX_HALF;
1952         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
1953             n->net_conf.duplex = DUPLEX_FULL;
1954         } else {
1955             error_setg(errp, "'duplex' must be 'half' or 'full'");
1956         }
1957         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
1958     } else {
1959         n->net_conf.duplex = DUPLEX_UNKNOWN;
1960     }
1961 
1962     if (n->net_conf.speed < SPEED_UNKNOWN) {
1963         error_setg(errp, "'speed' must be between 0 and INT_MAX");
1964     } else if (n->net_conf.speed >= 0) {
1965         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
1966     }
1967 
1968     virtio_net_set_config_size(n, n->host_features);
1969     virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
1970 
1971     /*
1972      * We set a lower limit on RX queue size to what it always was.
1973      * Guests that want a smaller ring can always resize it without
1974      * help from us (using virtio 1 and up).
1975      */
1976     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
1977         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
1978         !is_power_of_2(n->net_conf.rx_queue_size)) {
1979         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
1980                    "must be a power of 2 between %d and %d.",
1981                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
1982                    VIRTQUEUE_MAX_SIZE);
1983         virtio_cleanup(vdev);
1984         return;
1985     }
1986 
1987     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
1988         n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
1989         !is_power_of_2(n->net_conf.tx_queue_size)) {
1990         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
1991                    "must be a power of 2 between %d and %d",
1992                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
1993                    VIRTQUEUE_MAX_SIZE);
1994         virtio_cleanup(vdev);
1995         return;
1996     }
1997 
1998     n->max_queues = MAX(n->nic_conf.peers.queues, 1);
1999     if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
2000         error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
2001                    "must be a positive integer less than %d.",
2002                    n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
2003         virtio_cleanup(vdev);
2004         return;
2005     }
2006     n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
2007     n->curr_queues = 1;
2008     n->tx_timeout = n->net_conf.txtimer;
2009 
2010     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
2011                        && strcmp(n->net_conf.tx, "bh")) {
2012         error_report("virtio-net: "
2013                      "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
2014                      n->net_conf.tx);
2015         error_report("Defaulting to \"bh\"");
2016     }
2017 
2018     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
2019                                     n->net_conf.tx_queue_size);
2020 
2021     for (i = 0; i < n->max_queues; i++) {
2022         virtio_net_add_queue(n, i);
2023     }
2024 
2025     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2026     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
2027     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
2028     n->status = VIRTIO_NET_S_LINK_UP;
2029     n->announce_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
2030                                      virtio_net_announce_timer, n);
2031 
2032     if (n->netclient_type) {
2033         /*
2034          * Happen when virtio_net_set_netclient_name has been called.
2035          */
2036         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
2037                               n->netclient_type, n->netclient_name, n);
2038     } else {
2039         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
2040                               object_get_typename(OBJECT(dev)), dev->id, n);
2041     }
2042 
2043     peer_test_vnet_hdr(n);
2044     if (peer_has_vnet_hdr(n)) {
2045         for (i = 0; i < n->max_queues; i++) {
2046             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
2047         }
2048         n->host_hdr_len = sizeof(struct virtio_net_hdr);
2049     } else {
2050         n->host_hdr_len = 0;
2051     }
2052 
2053     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
2054 
2055     n->vqs[0].tx_waiting = 0;
2056     n->tx_burst = n->net_conf.txburst;
2057     virtio_net_set_mrg_rx_bufs(n, 0, 0);
2058     n->promisc = 1; /* for compatibility */
2059 
2060     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
2061 
2062     n->vlans = g_malloc0(MAX_VLAN >> 3);
2063 
2064     nc = qemu_get_queue(n->nic);
2065     nc->rxfilter_notify_enabled = 1;
2066 
2067     n->qdev = dev;
2068 }
2069 
2070 static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
2071 {
2072     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2073     VirtIONet *n = VIRTIO_NET(dev);
2074     int i, max_queues;
2075 
2076     /* This will stop vhost backend if appropriate. */
2077     virtio_net_set_status(vdev, 0);
2078 
2079     g_free(n->netclient_name);
2080     n->netclient_name = NULL;
2081     g_free(n->netclient_type);
2082     n->netclient_type = NULL;
2083 
2084     g_free(n->mac_table.macs);
2085     g_free(n->vlans);
2086 
2087     max_queues = n->multiqueue ? n->max_queues : 1;
2088     for (i = 0; i < max_queues; i++) {
2089         virtio_net_del_queue(n, i);
2090     }
2091 
2092     timer_del(n->announce_timer);
2093     timer_free(n->announce_timer);
2094     g_free(n->vqs);
2095     qemu_del_nic(n->nic);
2096     virtio_cleanup(vdev);
2097 }
2098 
2099 static void virtio_net_instance_init(Object *obj)
2100 {
2101     VirtIONet *n = VIRTIO_NET(obj);
2102 
2103     /*
2104      * The default config_size is sizeof(struct virtio_net_config).
2105      * Can be overriden with virtio_net_set_config_size.
2106      */
2107     n->config_size = sizeof(struct virtio_net_config);
2108     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
2109                                   "bootindex", "/ethernet-phy@0",
2110                                   DEVICE(n), NULL);
2111 }
2112 
2113 static int virtio_net_pre_save(void *opaque)
2114 {
2115     VirtIONet *n = opaque;
2116 
2117     /* At this point, backend must be stopped, otherwise
2118      * it might keep writing to memory. */
2119     assert(!n->vhost_started);
2120 
2121     return 0;
2122 }
2123 
2124 static const VMStateDescription vmstate_virtio_net = {
2125     .name = "virtio-net",
2126     .minimum_version_id = VIRTIO_NET_VM_VERSION,
2127     .version_id = VIRTIO_NET_VM_VERSION,
2128     .fields = (VMStateField[]) {
2129         VMSTATE_VIRTIO_DEVICE,
2130         VMSTATE_END_OF_LIST()
2131     },
2132     .pre_save = virtio_net_pre_save,
2133 };
2134 
2135 static Property virtio_net_properties[] = {
2136     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
2137                     VIRTIO_NET_F_CSUM, true),
2138     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
2139                     VIRTIO_NET_F_GUEST_CSUM, true),
2140     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
2141     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
2142                     VIRTIO_NET_F_GUEST_TSO4, true),
2143     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
2144                     VIRTIO_NET_F_GUEST_TSO6, true),
2145     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
2146                     VIRTIO_NET_F_GUEST_ECN, true),
2147     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
2148                     VIRTIO_NET_F_GUEST_UFO, true),
2149     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
2150                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
2151     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
2152                     VIRTIO_NET_F_HOST_TSO4, true),
2153     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
2154                     VIRTIO_NET_F_HOST_TSO6, true),
2155     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
2156                     VIRTIO_NET_F_HOST_ECN, true),
2157     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
2158                     VIRTIO_NET_F_HOST_UFO, true),
2159     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
2160                     VIRTIO_NET_F_MRG_RXBUF, true),
2161     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
2162                     VIRTIO_NET_F_STATUS, true),
2163     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
2164                     VIRTIO_NET_F_CTRL_VQ, true),
2165     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
2166                     VIRTIO_NET_F_CTRL_RX, true),
2167     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
2168                     VIRTIO_NET_F_CTRL_VLAN, true),
2169     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
2170                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
2171     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
2172                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
2173     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
2174                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
2175     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
2176     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
2177     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
2178                        TX_TIMER_INTERVAL),
2179     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
2180     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
2181     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
2182                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
2183     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
2184                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
2185     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
2186     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
2187                      true),
2188     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
2189     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
2190     DEFINE_PROP_END_OF_LIST(),
2191 };
2192 
2193 static void virtio_net_class_init(ObjectClass *klass, void *data)
2194 {
2195     DeviceClass *dc = DEVICE_CLASS(klass);
2196     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
2197 
2198     dc->props = virtio_net_properties;
2199     dc->vmsd = &vmstate_virtio_net;
2200     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
2201     vdc->realize = virtio_net_device_realize;
2202     vdc->unrealize = virtio_net_device_unrealize;
2203     vdc->get_config = virtio_net_get_config;
2204     vdc->set_config = virtio_net_set_config;
2205     vdc->get_features = virtio_net_get_features;
2206     vdc->set_features = virtio_net_set_features;
2207     vdc->bad_features = virtio_net_bad_features;
2208     vdc->reset = virtio_net_reset;
2209     vdc->set_status = virtio_net_set_status;
2210     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
2211     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
2212     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
2213     vdc->vmsd = &vmstate_virtio_net_device;
2214 }
2215 
2216 static const TypeInfo virtio_net_info = {
2217     .name = TYPE_VIRTIO_NET,
2218     .parent = TYPE_VIRTIO_DEVICE,
2219     .instance_size = sizeof(VirtIONet),
2220     .instance_init = virtio_net_instance_init,
2221     .class_init = virtio_net_class_init,
2222 };
2223 
2224 static void virtio_register_types(void)
2225 {
2226     type_register_static(&virtio_net_info);
2227 }
2228 
2229 type_init(virtio_register_types)
2230