xref: /openbmc/qemu/hw/net/virtio-net.c (revision a9ded601)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/iov.h"
16 #include "hw/virtio/virtio.h"
17 #include "net/net.h"
18 #include "net/checksum.h"
19 #include "net/tap.h"
20 #include "qemu/error-report.h"
21 #include "qemu/timer.h"
22 #include "hw/virtio/virtio-net.h"
23 #include "net/vhost_net.h"
24 #include "hw/virtio/virtio-bus.h"
25 #include "qapi/qmp/qjson.h"
26 #include "qapi-event.h"
27 #include "hw/virtio/virtio-access.h"
28 #include "migration/misc.h"
29 
30 #define VIRTIO_NET_VM_VERSION    11
31 
32 #define MAC_TABLE_ENTRIES    64
33 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
34 
35 /* previously fixed value */
36 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
37 /* for now, only allow larger queues; with virtio-1, guest can downsize */
38 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
39 
40 /*
41  * Calculate the number of bytes up to and including the given 'field' of
42  * 'container'.
43  */
44 #define endof(container, field) \
45     (offsetof(container, field) + sizeof(((container *)0)->field))
46 
47 typedef struct VirtIOFeature {
48     uint32_t flags;
49     size_t end;
50 } VirtIOFeature;
51 
52 static VirtIOFeature feature_sizes[] = {
53     {.flags = 1 << VIRTIO_NET_F_MAC,
54      .end = endof(struct virtio_net_config, mac)},
55     {.flags = 1 << VIRTIO_NET_F_STATUS,
56      .end = endof(struct virtio_net_config, status)},
57     {.flags = 1 << VIRTIO_NET_F_MQ,
58      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
59     {.flags = 1 << VIRTIO_NET_F_MTU,
60      .end = endof(struct virtio_net_config, mtu)},
61     {}
62 };
63 
64 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
65 {
66     VirtIONet *n = qemu_get_nic_opaque(nc);
67 
68     return &n->vqs[nc->queue_index];
69 }
70 
71 static int vq2q(int queue_index)
72 {
73     return queue_index / 2;
74 }
75 
76 /* TODO
77  * - we could suppress RX interrupt if we were so inclined.
78  */
79 
80 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
81 {
82     VirtIONet *n = VIRTIO_NET(vdev);
83     struct virtio_net_config netcfg;
84 
85     virtio_stw_p(vdev, &netcfg.status, n->status);
86     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
87     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
88     memcpy(netcfg.mac, n->mac, ETH_ALEN);
89     memcpy(config, &netcfg, n->config_size);
90 }
91 
92 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
93 {
94     VirtIONet *n = VIRTIO_NET(vdev);
95     struct virtio_net_config netcfg = {};
96 
97     memcpy(&netcfg, config, n->config_size);
98 
99     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
100         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
101         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
102         memcpy(n->mac, netcfg.mac, ETH_ALEN);
103         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
104     }
105 }
106 
107 static bool virtio_net_started(VirtIONet *n, uint8_t status)
108 {
109     VirtIODevice *vdev = VIRTIO_DEVICE(n);
110     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
111         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
112 }
113 
114 static void virtio_net_announce_timer(void *opaque)
115 {
116     VirtIONet *n = opaque;
117     VirtIODevice *vdev = VIRTIO_DEVICE(n);
118 
119     n->announce_counter--;
120     n->status |= VIRTIO_NET_S_ANNOUNCE;
121     virtio_notify_config(vdev);
122 }
123 
124 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
125 {
126     VirtIODevice *vdev = VIRTIO_DEVICE(n);
127     NetClientState *nc = qemu_get_queue(n->nic);
128     int queues = n->multiqueue ? n->max_queues : 1;
129 
130     if (!get_vhost_net(nc->peer)) {
131         return;
132     }
133 
134     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
135         !!n->vhost_started) {
136         return;
137     }
138     if (!n->vhost_started) {
139         int r, i;
140 
141         if (n->needs_vnet_hdr_swap) {
142             error_report("backend does not support %s vnet headers; "
143                          "falling back on userspace virtio",
144                          virtio_is_big_endian(vdev) ? "BE" : "LE");
145             return;
146         }
147 
148         /* Any packets outstanding? Purge them to avoid touching rings
149          * when vhost is running.
150          */
151         for (i = 0;  i < queues; i++) {
152             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
153 
154             /* Purge both directions: TX and RX. */
155             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
156             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
157         }
158 
159         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
160             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
161             if (r < 0) {
162                 error_report("%uBytes MTU not supported by the backend",
163                              n->net_conf.mtu);
164 
165                 return;
166             }
167         }
168 
169         n->vhost_started = 1;
170         r = vhost_net_start(vdev, n->nic->ncs, queues);
171         if (r < 0) {
172             error_report("unable to start vhost net: %d: "
173                          "falling back on userspace virtio", -r);
174             n->vhost_started = 0;
175         }
176     } else {
177         vhost_net_stop(vdev, n->nic->ncs, queues);
178         n->vhost_started = 0;
179     }
180 }
181 
182 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
183                                           NetClientState *peer,
184                                           bool enable)
185 {
186     if (virtio_is_big_endian(vdev)) {
187         return qemu_set_vnet_be(peer, enable);
188     } else {
189         return qemu_set_vnet_le(peer, enable);
190     }
191 }
192 
193 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
194                                        int queues, bool enable)
195 {
196     int i;
197 
198     for (i = 0; i < queues; i++) {
199         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
200             enable) {
201             while (--i >= 0) {
202                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
203             }
204 
205             return true;
206         }
207     }
208 
209     return false;
210 }
211 
212 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
213 {
214     VirtIODevice *vdev = VIRTIO_DEVICE(n);
215     int queues = n->multiqueue ? n->max_queues : 1;
216 
217     if (virtio_net_started(n, status)) {
218         /* Before using the device, we tell the network backend about the
219          * endianness to use when parsing vnet headers. If the backend
220          * can't do it, we fallback onto fixing the headers in the core
221          * virtio-net code.
222          */
223         n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
224                                                             queues, true);
225     } else if (virtio_net_started(n, vdev->status)) {
226         /* After using the device, we need to reset the network backend to
227          * the default (guest native endianness), otherwise the guest may
228          * lose network connectivity if it is rebooted into a different
229          * endianness.
230          */
231         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
232     }
233 }
234 
235 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
236 {
237     unsigned int dropped = virtqueue_drop_all(vq);
238     if (dropped) {
239         virtio_notify(vdev, vq);
240     }
241 }
242 
243 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
244 {
245     VirtIONet *n = VIRTIO_NET(vdev);
246     VirtIONetQueue *q;
247     int i;
248     uint8_t queue_status;
249 
250     virtio_net_vnet_endian_status(n, status);
251     virtio_net_vhost_status(n, status);
252 
253     for (i = 0; i < n->max_queues; i++) {
254         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
255         bool queue_started;
256         q = &n->vqs[i];
257 
258         if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
259             queue_status = 0;
260         } else {
261             queue_status = status;
262         }
263         queue_started =
264             virtio_net_started(n, queue_status) && !n->vhost_started;
265 
266         if (queue_started) {
267             qemu_flush_queued_packets(ncs);
268         }
269 
270         if (!q->tx_waiting) {
271             continue;
272         }
273 
274         if (queue_started) {
275             if (q->tx_timer) {
276                 timer_mod(q->tx_timer,
277                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
278             } else {
279                 qemu_bh_schedule(q->tx_bh);
280             }
281         } else {
282             if (q->tx_timer) {
283                 timer_del(q->tx_timer);
284             } else {
285                 qemu_bh_cancel(q->tx_bh);
286             }
287             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
288                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK)) {
289                 /* if tx is waiting we are likely have some packets in tx queue
290                  * and disabled notification */
291                 q->tx_waiting = 0;
292                 virtio_queue_set_notification(q->tx_vq, 1);
293                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
294             }
295         }
296     }
297 }
298 
299 static void virtio_net_set_link_status(NetClientState *nc)
300 {
301     VirtIONet *n = qemu_get_nic_opaque(nc);
302     VirtIODevice *vdev = VIRTIO_DEVICE(n);
303     uint16_t old_status = n->status;
304 
305     if (nc->link_down)
306         n->status &= ~VIRTIO_NET_S_LINK_UP;
307     else
308         n->status |= VIRTIO_NET_S_LINK_UP;
309 
310     if (n->status != old_status)
311         virtio_notify_config(vdev);
312 
313     virtio_net_set_status(vdev, vdev->status);
314 }
315 
316 static void rxfilter_notify(NetClientState *nc)
317 {
318     VirtIONet *n = qemu_get_nic_opaque(nc);
319 
320     if (nc->rxfilter_notify_enabled) {
321         gchar *path = object_get_canonical_path(OBJECT(n->qdev));
322         qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
323                                               n->netclient_name, path, &error_abort);
324         g_free(path);
325 
326         /* disable event notification to avoid events flooding */
327         nc->rxfilter_notify_enabled = 0;
328     }
329 }
330 
331 static intList *get_vlan_table(VirtIONet *n)
332 {
333     intList *list, *entry;
334     int i, j;
335 
336     list = NULL;
337     for (i = 0; i < MAX_VLAN >> 5; i++) {
338         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
339             if (n->vlans[i] & (1U << j)) {
340                 entry = g_malloc0(sizeof(*entry));
341                 entry->value = (i << 5) + j;
342                 entry->next = list;
343                 list = entry;
344             }
345         }
346     }
347 
348     return list;
349 }
350 
351 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
352 {
353     VirtIONet *n = qemu_get_nic_opaque(nc);
354     VirtIODevice *vdev = VIRTIO_DEVICE(n);
355     RxFilterInfo *info;
356     strList *str_list, *entry;
357     int i;
358 
359     info = g_malloc0(sizeof(*info));
360     info->name = g_strdup(nc->name);
361     info->promiscuous = n->promisc;
362 
363     if (n->nouni) {
364         info->unicast = RX_STATE_NONE;
365     } else if (n->alluni) {
366         info->unicast = RX_STATE_ALL;
367     } else {
368         info->unicast = RX_STATE_NORMAL;
369     }
370 
371     if (n->nomulti) {
372         info->multicast = RX_STATE_NONE;
373     } else if (n->allmulti) {
374         info->multicast = RX_STATE_ALL;
375     } else {
376         info->multicast = RX_STATE_NORMAL;
377     }
378 
379     info->broadcast_allowed = n->nobcast;
380     info->multicast_overflow = n->mac_table.multi_overflow;
381     info->unicast_overflow = n->mac_table.uni_overflow;
382 
383     info->main_mac = qemu_mac_strdup_printf(n->mac);
384 
385     str_list = NULL;
386     for (i = 0; i < n->mac_table.first_multi; i++) {
387         entry = g_malloc0(sizeof(*entry));
388         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
389         entry->next = str_list;
390         str_list = entry;
391     }
392     info->unicast_table = str_list;
393 
394     str_list = NULL;
395     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
396         entry = g_malloc0(sizeof(*entry));
397         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
398         entry->next = str_list;
399         str_list = entry;
400     }
401     info->multicast_table = str_list;
402     info->vlan_table = get_vlan_table(n);
403 
404     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
405         info->vlan = RX_STATE_ALL;
406     } else if (!info->vlan_table) {
407         info->vlan = RX_STATE_NONE;
408     } else {
409         info->vlan = RX_STATE_NORMAL;
410     }
411 
412     /* enable event notification after query */
413     nc->rxfilter_notify_enabled = 1;
414 
415     return info;
416 }
417 
418 static void virtio_net_reset(VirtIODevice *vdev)
419 {
420     VirtIONet *n = VIRTIO_NET(vdev);
421 
422     /* Reset back to compatibility mode */
423     n->promisc = 1;
424     n->allmulti = 0;
425     n->alluni = 0;
426     n->nomulti = 0;
427     n->nouni = 0;
428     n->nobcast = 0;
429     /* multiqueue is disabled by default */
430     n->curr_queues = 1;
431     timer_del(n->announce_timer);
432     n->announce_counter = 0;
433     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
434 
435     /* Flush any MAC and VLAN filter table state */
436     n->mac_table.in_use = 0;
437     n->mac_table.first_multi = 0;
438     n->mac_table.multi_overflow = 0;
439     n->mac_table.uni_overflow = 0;
440     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
441     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
442     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
443     memset(n->vlans, 0, MAX_VLAN >> 3);
444 }
445 
446 static void peer_test_vnet_hdr(VirtIONet *n)
447 {
448     NetClientState *nc = qemu_get_queue(n->nic);
449     if (!nc->peer) {
450         return;
451     }
452 
453     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
454 }
455 
456 static int peer_has_vnet_hdr(VirtIONet *n)
457 {
458     return n->has_vnet_hdr;
459 }
460 
461 static int peer_has_ufo(VirtIONet *n)
462 {
463     if (!peer_has_vnet_hdr(n))
464         return 0;
465 
466     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
467 
468     return n->has_ufo;
469 }
470 
471 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
472                                        int version_1)
473 {
474     int i;
475     NetClientState *nc;
476 
477     n->mergeable_rx_bufs = mergeable_rx_bufs;
478 
479     if (version_1) {
480         n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
481     } else {
482         n->guest_hdr_len = n->mergeable_rx_bufs ?
483             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
484             sizeof(struct virtio_net_hdr);
485     }
486 
487     for (i = 0; i < n->max_queues; i++) {
488         nc = qemu_get_subqueue(n->nic, i);
489 
490         if (peer_has_vnet_hdr(n) &&
491             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
492             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
493             n->host_hdr_len = n->guest_hdr_len;
494         }
495     }
496 }
497 
498 static int peer_attach(VirtIONet *n, int index)
499 {
500     NetClientState *nc = qemu_get_subqueue(n->nic, index);
501 
502     if (!nc->peer) {
503         return 0;
504     }
505 
506     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
507         vhost_set_vring_enable(nc->peer, 1);
508     }
509 
510     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
511         return 0;
512     }
513 
514     if (n->max_queues == 1) {
515         return 0;
516     }
517 
518     return tap_enable(nc->peer);
519 }
520 
521 static int peer_detach(VirtIONet *n, int index)
522 {
523     NetClientState *nc = qemu_get_subqueue(n->nic, index);
524 
525     if (!nc->peer) {
526         return 0;
527     }
528 
529     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
530         vhost_set_vring_enable(nc->peer, 0);
531     }
532 
533     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
534         return 0;
535     }
536 
537     return tap_disable(nc->peer);
538 }
539 
540 static void virtio_net_set_queues(VirtIONet *n)
541 {
542     int i;
543     int r;
544 
545     if (n->nic->peer_deleted) {
546         return;
547     }
548 
549     for (i = 0; i < n->max_queues; i++) {
550         if (i < n->curr_queues) {
551             r = peer_attach(n, i);
552             assert(!r);
553         } else {
554             r = peer_detach(n, i);
555             assert(!r);
556         }
557     }
558 }
559 
560 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
561 
562 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
563                                         Error **errp)
564 {
565     VirtIONet *n = VIRTIO_NET(vdev);
566     NetClientState *nc = qemu_get_queue(n->nic);
567 
568     /* Firstly sync all virtio-net possible supported features */
569     features |= n->host_features;
570 
571     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
572 
573     if (!peer_has_vnet_hdr(n)) {
574         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
575         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
576         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
577         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
578 
579         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
580         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
581         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
582         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
583     }
584 
585     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
586         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
587         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
588     }
589 
590     if (!get_vhost_net(nc->peer)) {
591         return features;
592     }
593     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
594     vdev->backend_features = features;
595 
596     if (n->mtu_bypass_backend &&
597             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
598         features |= (1ULL << VIRTIO_NET_F_MTU);
599     }
600 
601     return features;
602 }
603 
604 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
605 {
606     uint64_t features = 0;
607 
608     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
609      * but also these: */
610     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
611     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
612     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
613     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
614     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
615 
616     return features;
617 }
618 
619 static void virtio_net_apply_guest_offloads(VirtIONet *n)
620 {
621     qemu_set_offload(qemu_get_queue(n->nic)->peer,
622             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
623             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
624             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
625             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
626             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
627 }
628 
629 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
630 {
631     static const uint64_t guest_offloads_mask =
632         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
633         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
634         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
635         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
636         (1ULL << VIRTIO_NET_F_GUEST_UFO);
637 
638     return guest_offloads_mask & features;
639 }
640 
641 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
642 {
643     VirtIODevice *vdev = VIRTIO_DEVICE(n);
644     return virtio_net_guest_offloads_by_features(vdev->guest_features);
645 }
646 
647 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
648 {
649     VirtIONet *n = VIRTIO_NET(vdev);
650     int i;
651 
652     if (n->mtu_bypass_backend &&
653             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
654         features &= ~(1ULL << VIRTIO_NET_F_MTU);
655     }
656 
657     virtio_net_set_multiqueue(n,
658                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
659 
660     virtio_net_set_mrg_rx_bufs(n,
661                                virtio_has_feature(features,
662                                                   VIRTIO_NET_F_MRG_RXBUF),
663                                virtio_has_feature(features,
664                                                   VIRTIO_F_VERSION_1));
665 
666     if (n->has_vnet_hdr) {
667         n->curr_guest_offloads =
668             virtio_net_guest_offloads_by_features(features);
669         virtio_net_apply_guest_offloads(n);
670     }
671 
672     for (i = 0;  i < n->max_queues; i++) {
673         NetClientState *nc = qemu_get_subqueue(n->nic, i);
674 
675         if (!get_vhost_net(nc->peer)) {
676             continue;
677         }
678         vhost_net_ack_features(get_vhost_net(nc->peer), features);
679     }
680 
681     if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
682         memset(n->vlans, 0, MAX_VLAN >> 3);
683     } else {
684         memset(n->vlans, 0xff, MAX_VLAN >> 3);
685     }
686 }
687 
688 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
689                                      struct iovec *iov, unsigned int iov_cnt)
690 {
691     uint8_t on;
692     size_t s;
693     NetClientState *nc = qemu_get_queue(n->nic);
694 
695     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
696     if (s != sizeof(on)) {
697         return VIRTIO_NET_ERR;
698     }
699 
700     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
701         n->promisc = on;
702     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
703         n->allmulti = on;
704     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
705         n->alluni = on;
706     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
707         n->nomulti = on;
708     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
709         n->nouni = on;
710     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
711         n->nobcast = on;
712     } else {
713         return VIRTIO_NET_ERR;
714     }
715 
716     rxfilter_notify(nc);
717 
718     return VIRTIO_NET_OK;
719 }
720 
721 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
722                                      struct iovec *iov, unsigned int iov_cnt)
723 {
724     VirtIODevice *vdev = VIRTIO_DEVICE(n);
725     uint64_t offloads;
726     size_t s;
727 
728     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
729         return VIRTIO_NET_ERR;
730     }
731 
732     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
733     if (s != sizeof(offloads)) {
734         return VIRTIO_NET_ERR;
735     }
736 
737     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
738         uint64_t supported_offloads;
739 
740         if (!n->has_vnet_hdr) {
741             return VIRTIO_NET_ERR;
742         }
743 
744         supported_offloads = virtio_net_supported_guest_offloads(n);
745         if (offloads & ~supported_offloads) {
746             return VIRTIO_NET_ERR;
747         }
748 
749         n->curr_guest_offloads = offloads;
750         virtio_net_apply_guest_offloads(n);
751 
752         return VIRTIO_NET_OK;
753     } else {
754         return VIRTIO_NET_ERR;
755     }
756 }
757 
758 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
759                                  struct iovec *iov, unsigned int iov_cnt)
760 {
761     VirtIODevice *vdev = VIRTIO_DEVICE(n);
762     struct virtio_net_ctrl_mac mac_data;
763     size_t s;
764     NetClientState *nc = qemu_get_queue(n->nic);
765 
766     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
767         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
768             return VIRTIO_NET_ERR;
769         }
770         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
771         assert(s == sizeof(n->mac));
772         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
773         rxfilter_notify(nc);
774 
775         return VIRTIO_NET_OK;
776     }
777 
778     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
779         return VIRTIO_NET_ERR;
780     }
781 
782     int in_use = 0;
783     int first_multi = 0;
784     uint8_t uni_overflow = 0;
785     uint8_t multi_overflow = 0;
786     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
787 
788     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
789                    sizeof(mac_data.entries));
790     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
791     if (s != sizeof(mac_data.entries)) {
792         goto error;
793     }
794     iov_discard_front(&iov, &iov_cnt, s);
795 
796     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
797         goto error;
798     }
799 
800     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
801         s = iov_to_buf(iov, iov_cnt, 0, macs,
802                        mac_data.entries * ETH_ALEN);
803         if (s != mac_data.entries * ETH_ALEN) {
804             goto error;
805         }
806         in_use += mac_data.entries;
807     } else {
808         uni_overflow = 1;
809     }
810 
811     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
812 
813     first_multi = in_use;
814 
815     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
816                    sizeof(mac_data.entries));
817     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
818     if (s != sizeof(mac_data.entries)) {
819         goto error;
820     }
821 
822     iov_discard_front(&iov, &iov_cnt, s);
823 
824     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
825         goto error;
826     }
827 
828     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
829         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
830                        mac_data.entries * ETH_ALEN);
831         if (s != mac_data.entries * ETH_ALEN) {
832             goto error;
833         }
834         in_use += mac_data.entries;
835     } else {
836         multi_overflow = 1;
837     }
838 
839     n->mac_table.in_use = in_use;
840     n->mac_table.first_multi = first_multi;
841     n->mac_table.uni_overflow = uni_overflow;
842     n->mac_table.multi_overflow = multi_overflow;
843     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
844     g_free(macs);
845     rxfilter_notify(nc);
846 
847     return VIRTIO_NET_OK;
848 
849 error:
850     g_free(macs);
851     return VIRTIO_NET_ERR;
852 }
853 
854 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
855                                         struct iovec *iov, unsigned int iov_cnt)
856 {
857     VirtIODevice *vdev = VIRTIO_DEVICE(n);
858     uint16_t vid;
859     size_t s;
860     NetClientState *nc = qemu_get_queue(n->nic);
861 
862     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
863     vid = virtio_lduw_p(vdev, &vid);
864     if (s != sizeof(vid)) {
865         return VIRTIO_NET_ERR;
866     }
867 
868     if (vid >= MAX_VLAN)
869         return VIRTIO_NET_ERR;
870 
871     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
872         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
873     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
874         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
875     else
876         return VIRTIO_NET_ERR;
877 
878     rxfilter_notify(nc);
879 
880     return VIRTIO_NET_OK;
881 }
882 
883 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
884                                       struct iovec *iov, unsigned int iov_cnt)
885 {
886     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
887         n->status & VIRTIO_NET_S_ANNOUNCE) {
888         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
889         if (n->announce_counter) {
890             timer_mod(n->announce_timer,
891                       qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
892                       self_announce_delay(n->announce_counter));
893         }
894         return VIRTIO_NET_OK;
895     } else {
896         return VIRTIO_NET_ERR;
897     }
898 }
899 
900 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
901                                 struct iovec *iov, unsigned int iov_cnt)
902 {
903     VirtIODevice *vdev = VIRTIO_DEVICE(n);
904     struct virtio_net_ctrl_mq mq;
905     size_t s;
906     uint16_t queues;
907 
908     s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
909     if (s != sizeof(mq)) {
910         return VIRTIO_NET_ERR;
911     }
912 
913     if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
914         return VIRTIO_NET_ERR;
915     }
916 
917     queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
918 
919     if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
920         queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
921         queues > n->max_queues ||
922         !n->multiqueue) {
923         return VIRTIO_NET_ERR;
924     }
925 
926     n->curr_queues = queues;
927     /* stop the backend before changing the number of queues to avoid handling a
928      * disabled queue */
929     virtio_net_set_status(vdev, vdev->status);
930     virtio_net_set_queues(n);
931 
932     return VIRTIO_NET_OK;
933 }
934 
935 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
936 {
937     VirtIONet *n = VIRTIO_NET(vdev);
938     struct virtio_net_ctrl_hdr ctrl;
939     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
940     VirtQueueElement *elem;
941     size_t s;
942     struct iovec *iov, *iov2;
943     unsigned int iov_cnt;
944 
945     for (;;) {
946         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
947         if (!elem) {
948             break;
949         }
950         if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
951             iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
952             virtio_error(vdev, "virtio-net ctrl missing headers");
953             virtqueue_detach_element(vq, elem, 0);
954             g_free(elem);
955             break;
956         }
957 
958         iov_cnt = elem->out_num;
959         iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
960         s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
961         iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
962         if (s != sizeof(ctrl)) {
963             status = VIRTIO_NET_ERR;
964         } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
965             status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
966         } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
967             status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
968         } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
969             status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
970         } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
971             status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
972         } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
973             status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
974         } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
975             status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
976         }
977 
978         s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
979         assert(s == sizeof(status));
980 
981         virtqueue_push(vq, elem, sizeof(status));
982         virtio_notify(vdev, vq);
983         g_free(iov2);
984         g_free(elem);
985     }
986 }
987 
988 /* RX */
989 
990 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
991 {
992     VirtIONet *n = VIRTIO_NET(vdev);
993     int queue_index = vq2q(virtio_get_queue_index(vq));
994 
995     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
996 }
997 
998 static int virtio_net_can_receive(NetClientState *nc)
999 {
1000     VirtIONet *n = qemu_get_nic_opaque(nc);
1001     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1002     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1003 
1004     if (!vdev->vm_running) {
1005         return 0;
1006     }
1007 
1008     if (nc->queue_index >= n->curr_queues) {
1009         return 0;
1010     }
1011 
1012     if (!virtio_queue_ready(q->rx_vq) ||
1013         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1014         return 0;
1015     }
1016 
1017     return 1;
1018 }
1019 
1020 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1021 {
1022     VirtIONet *n = q->n;
1023     if (virtio_queue_empty(q->rx_vq) ||
1024         (n->mergeable_rx_bufs &&
1025          !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1026         virtio_queue_set_notification(q->rx_vq, 1);
1027 
1028         /* To avoid a race condition where the guest has made some buffers
1029          * available after the above check but before notification was
1030          * enabled, check for available buffers again.
1031          */
1032         if (virtio_queue_empty(q->rx_vq) ||
1033             (n->mergeable_rx_bufs &&
1034              !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1035             return 0;
1036         }
1037     }
1038 
1039     virtio_queue_set_notification(q->rx_vq, 0);
1040     return 1;
1041 }
1042 
1043 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1044 {
1045     virtio_tswap16s(vdev, &hdr->hdr_len);
1046     virtio_tswap16s(vdev, &hdr->gso_size);
1047     virtio_tswap16s(vdev, &hdr->csum_start);
1048     virtio_tswap16s(vdev, &hdr->csum_offset);
1049 }
1050 
1051 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1052  * it never finds out that the packets don't have valid checksums.  This
1053  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1054  * fix this with Xen but it hasn't appeared in an upstream release of
1055  * dhclient yet.
1056  *
1057  * To avoid breaking existing guests, we catch udp packets and add
1058  * checksums.  This is terrible but it's better than hacking the guest
1059  * kernels.
1060  *
1061  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1062  * we should provide a mechanism to disable it to avoid polluting the host
1063  * cache.
1064  */
1065 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1066                                         uint8_t *buf, size_t size)
1067 {
1068     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1069         (size > 27 && size < 1500) && /* normal sized MTU */
1070         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1071         (buf[23] == 17) && /* ip.protocol == UDP */
1072         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1073         net_checksum_calculate(buf, size);
1074         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1075     }
1076 }
1077 
1078 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1079                            const void *buf, size_t size)
1080 {
1081     if (n->has_vnet_hdr) {
1082         /* FIXME this cast is evil */
1083         void *wbuf = (void *)buf;
1084         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1085                                     size - n->host_hdr_len);
1086 
1087         if (n->needs_vnet_hdr_swap) {
1088             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1089         }
1090         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1091     } else {
1092         struct virtio_net_hdr hdr = {
1093             .flags = 0,
1094             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1095         };
1096         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1097     }
1098 }
1099 
1100 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1101 {
1102     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1103     static const uint8_t vlan[] = {0x81, 0x00};
1104     uint8_t *ptr = (uint8_t *)buf;
1105     int i;
1106 
1107     if (n->promisc)
1108         return 1;
1109 
1110     ptr += n->host_hdr_len;
1111 
1112     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1113         int vid = lduw_be_p(ptr + 14) & 0xfff;
1114         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1115             return 0;
1116     }
1117 
1118     if (ptr[0] & 1) { // multicast
1119         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1120             return !n->nobcast;
1121         } else if (n->nomulti) {
1122             return 0;
1123         } else if (n->allmulti || n->mac_table.multi_overflow) {
1124             return 1;
1125         }
1126 
1127         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1128             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1129                 return 1;
1130             }
1131         }
1132     } else { // unicast
1133         if (n->nouni) {
1134             return 0;
1135         } else if (n->alluni || n->mac_table.uni_overflow) {
1136             return 1;
1137         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1138             return 1;
1139         }
1140 
1141         for (i = 0; i < n->mac_table.first_multi; i++) {
1142             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1143                 return 1;
1144             }
1145         }
1146     }
1147 
1148     return 0;
1149 }
1150 
1151 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1152                                       size_t size)
1153 {
1154     VirtIONet *n = qemu_get_nic_opaque(nc);
1155     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1156     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1157     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1158     struct virtio_net_hdr_mrg_rxbuf mhdr;
1159     unsigned mhdr_cnt = 0;
1160     size_t offset, i, guest_offset;
1161 
1162     if (!virtio_net_can_receive(nc)) {
1163         return -1;
1164     }
1165 
1166     /* hdr_len refers to the header we supply to the guest */
1167     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1168         return 0;
1169     }
1170 
1171     if (!receive_filter(n, buf, size))
1172         return size;
1173 
1174     offset = i = 0;
1175 
1176     while (offset < size) {
1177         VirtQueueElement *elem;
1178         int len, total;
1179         const struct iovec *sg;
1180 
1181         total = 0;
1182 
1183         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1184         if (!elem) {
1185             if (i) {
1186                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1187                              "i %zd mergeable %d offset %zd, size %zd, "
1188                              "guest hdr len %zd, host hdr len %zd "
1189                              "guest features 0x%" PRIx64,
1190                              i, n->mergeable_rx_bufs, offset, size,
1191                              n->guest_hdr_len, n->host_hdr_len,
1192                              vdev->guest_features);
1193             }
1194             return -1;
1195         }
1196 
1197         if (elem->in_num < 1) {
1198             virtio_error(vdev,
1199                          "virtio-net receive queue contains no in buffers");
1200             virtqueue_detach_element(q->rx_vq, elem, 0);
1201             g_free(elem);
1202             return -1;
1203         }
1204 
1205         sg = elem->in_sg;
1206         if (i == 0) {
1207             assert(offset == 0);
1208             if (n->mergeable_rx_bufs) {
1209                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1210                                     sg, elem->in_num,
1211                                     offsetof(typeof(mhdr), num_buffers),
1212                                     sizeof(mhdr.num_buffers));
1213             }
1214 
1215             receive_header(n, sg, elem->in_num, buf, size);
1216             offset = n->host_hdr_len;
1217             total += n->guest_hdr_len;
1218             guest_offset = n->guest_hdr_len;
1219         } else {
1220             guest_offset = 0;
1221         }
1222 
1223         /* copy in packet.  ugh */
1224         len = iov_from_buf(sg, elem->in_num, guest_offset,
1225                            buf + offset, size - offset);
1226         total += len;
1227         offset += len;
1228         /* If buffers can't be merged, at this point we
1229          * must have consumed the complete packet.
1230          * Otherwise, drop it. */
1231         if (!n->mergeable_rx_bufs && offset < size) {
1232             virtqueue_unpop(q->rx_vq, elem, total);
1233             g_free(elem);
1234             return size;
1235         }
1236 
1237         /* signal other side */
1238         virtqueue_fill(q->rx_vq, elem, total, i++);
1239         g_free(elem);
1240     }
1241 
1242     if (mhdr_cnt) {
1243         virtio_stw_p(vdev, &mhdr.num_buffers, i);
1244         iov_from_buf(mhdr_sg, mhdr_cnt,
1245                      0,
1246                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
1247     }
1248 
1249     virtqueue_flush(q->rx_vq, i);
1250     virtio_notify(vdev, q->rx_vq);
1251 
1252     return size;
1253 }
1254 
1255 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
1256                                   size_t size)
1257 {
1258     ssize_t r;
1259 
1260     rcu_read_lock();
1261     r = virtio_net_receive_rcu(nc, buf, size);
1262     rcu_read_unlock();
1263     return r;
1264 }
1265 
1266 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
1267 
1268 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
1269 {
1270     VirtIONet *n = qemu_get_nic_opaque(nc);
1271     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1272     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1273 
1274     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
1275     virtio_notify(vdev, q->tx_vq);
1276 
1277     g_free(q->async_tx.elem);
1278     q->async_tx.elem = NULL;
1279 
1280     virtio_queue_set_notification(q->tx_vq, 1);
1281     virtio_net_flush_tx(q);
1282 }
1283 
1284 /* TX */
1285 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
1286 {
1287     VirtIONet *n = q->n;
1288     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1289     VirtQueueElement *elem;
1290     int32_t num_packets = 0;
1291     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
1292     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1293         return num_packets;
1294     }
1295 
1296     if (q->async_tx.elem) {
1297         virtio_queue_set_notification(q->tx_vq, 0);
1298         return num_packets;
1299     }
1300 
1301     for (;;) {
1302         ssize_t ret;
1303         unsigned int out_num;
1304         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
1305         struct virtio_net_hdr_mrg_rxbuf mhdr;
1306 
1307         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
1308         if (!elem) {
1309             break;
1310         }
1311 
1312         out_num = elem->out_num;
1313         out_sg = elem->out_sg;
1314         if (out_num < 1) {
1315             virtio_error(vdev, "virtio-net header not in first element");
1316             virtqueue_detach_element(q->tx_vq, elem, 0);
1317             g_free(elem);
1318             return -EINVAL;
1319         }
1320 
1321         if (n->has_vnet_hdr) {
1322             if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
1323                 n->guest_hdr_len) {
1324                 virtio_error(vdev, "virtio-net header incorrect");
1325                 virtqueue_detach_element(q->tx_vq, elem, 0);
1326                 g_free(elem);
1327                 return -EINVAL;
1328             }
1329             if (n->needs_vnet_hdr_swap) {
1330                 virtio_net_hdr_swap(vdev, (void *) &mhdr);
1331                 sg2[0].iov_base = &mhdr;
1332                 sg2[0].iov_len = n->guest_hdr_len;
1333                 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
1334                                    out_sg, out_num,
1335                                    n->guest_hdr_len, -1);
1336                 if (out_num == VIRTQUEUE_MAX_SIZE) {
1337                     goto drop;
1338 		}
1339                 out_num += 1;
1340                 out_sg = sg2;
1341 	    }
1342         }
1343         /*
1344          * If host wants to see the guest header as is, we can
1345          * pass it on unchanged. Otherwise, copy just the parts
1346          * that host is interested in.
1347          */
1348         assert(n->host_hdr_len <= n->guest_hdr_len);
1349         if (n->host_hdr_len != n->guest_hdr_len) {
1350             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
1351                                        out_sg, out_num,
1352                                        0, n->host_hdr_len);
1353             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
1354                              out_sg, out_num,
1355                              n->guest_hdr_len, -1);
1356             out_num = sg_num;
1357             out_sg = sg;
1358         }
1359 
1360         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
1361                                       out_sg, out_num, virtio_net_tx_complete);
1362         if (ret == 0) {
1363             virtio_queue_set_notification(q->tx_vq, 0);
1364             q->async_tx.elem = elem;
1365             return -EBUSY;
1366         }
1367 
1368 drop:
1369         virtqueue_push(q->tx_vq, elem, 0);
1370         virtio_notify(vdev, q->tx_vq);
1371         g_free(elem);
1372 
1373         if (++num_packets >= n->tx_burst) {
1374             break;
1375         }
1376     }
1377     return num_packets;
1378 }
1379 
1380 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
1381 {
1382     VirtIONet *n = VIRTIO_NET(vdev);
1383     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
1384 
1385     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
1386         virtio_net_drop_tx_queue_data(vdev, vq);
1387         return;
1388     }
1389 
1390     /* This happens when device was stopped but VCPU wasn't. */
1391     if (!vdev->vm_running) {
1392         q->tx_waiting = 1;
1393         return;
1394     }
1395 
1396     if (q->tx_waiting) {
1397         virtio_queue_set_notification(vq, 1);
1398         timer_del(q->tx_timer);
1399         q->tx_waiting = 0;
1400         if (virtio_net_flush_tx(q) == -EINVAL) {
1401             return;
1402         }
1403     } else {
1404         timer_mod(q->tx_timer,
1405                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
1406         q->tx_waiting = 1;
1407         virtio_queue_set_notification(vq, 0);
1408     }
1409 }
1410 
1411 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
1412 {
1413     VirtIONet *n = VIRTIO_NET(vdev);
1414     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
1415 
1416     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
1417         virtio_net_drop_tx_queue_data(vdev, vq);
1418         return;
1419     }
1420 
1421     if (unlikely(q->tx_waiting)) {
1422         return;
1423     }
1424     q->tx_waiting = 1;
1425     /* This happens when device was stopped but VCPU wasn't. */
1426     if (!vdev->vm_running) {
1427         return;
1428     }
1429     virtio_queue_set_notification(vq, 0);
1430     qemu_bh_schedule(q->tx_bh);
1431 }
1432 
1433 static void virtio_net_tx_timer(void *opaque)
1434 {
1435     VirtIONetQueue *q = opaque;
1436     VirtIONet *n = q->n;
1437     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1438     /* This happens when device was stopped but BH wasn't. */
1439     if (!vdev->vm_running) {
1440         /* Make sure tx waiting is set, so we'll run when restarted. */
1441         assert(q->tx_waiting);
1442         return;
1443     }
1444 
1445     q->tx_waiting = 0;
1446 
1447     /* Just in case the driver is not ready on more */
1448     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1449         return;
1450     }
1451 
1452     virtio_queue_set_notification(q->tx_vq, 1);
1453     virtio_net_flush_tx(q);
1454 }
1455 
1456 static void virtio_net_tx_bh(void *opaque)
1457 {
1458     VirtIONetQueue *q = opaque;
1459     VirtIONet *n = q->n;
1460     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1461     int32_t ret;
1462 
1463     /* This happens when device was stopped but BH wasn't. */
1464     if (!vdev->vm_running) {
1465         /* Make sure tx waiting is set, so we'll run when restarted. */
1466         assert(q->tx_waiting);
1467         return;
1468     }
1469 
1470     q->tx_waiting = 0;
1471 
1472     /* Just in case the driver is not ready on more */
1473     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
1474         return;
1475     }
1476 
1477     ret = virtio_net_flush_tx(q);
1478     if (ret == -EBUSY || ret == -EINVAL) {
1479         return; /* Notification re-enable handled by tx_complete or device
1480                  * broken */
1481     }
1482 
1483     /* If we flush a full burst of packets, assume there are
1484      * more coming and immediately reschedule */
1485     if (ret >= n->tx_burst) {
1486         qemu_bh_schedule(q->tx_bh);
1487         q->tx_waiting = 1;
1488         return;
1489     }
1490 
1491     /* If less than a full burst, re-enable notification and flush
1492      * anything that may have come in while we weren't looking.  If
1493      * we find something, assume the guest is still active and reschedule */
1494     virtio_queue_set_notification(q->tx_vq, 1);
1495     ret = virtio_net_flush_tx(q);
1496     if (ret == -EINVAL) {
1497         return;
1498     } else if (ret > 0) {
1499         virtio_queue_set_notification(q->tx_vq, 0);
1500         qemu_bh_schedule(q->tx_bh);
1501         q->tx_waiting = 1;
1502     }
1503 }
1504 
1505 static void virtio_net_add_queue(VirtIONet *n, int index)
1506 {
1507     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1508 
1509     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
1510                                            virtio_net_handle_rx);
1511     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
1512         n->vqs[index].tx_vq =
1513             virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer);
1514         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
1515                                               virtio_net_tx_timer,
1516                                               &n->vqs[index]);
1517     } else {
1518         n->vqs[index].tx_vq =
1519             virtio_add_queue(vdev, 256, virtio_net_handle_tx_bh);
1520         n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
1521     }
1522 
1523     n->vqs[index].tx_waiting = 0;
1524     n->vqs[index].n = n;
1525 }
1526 
1527 static void virtio_net_del_queue(VirtIONet *n, int index)
1528 {
1529     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1530     VirtIONetQueue *q = &n->vqs[index];
1531     NetClientState *nc = qemu_get_subqueue(n->nic, index);
1532 
1533     qemu_purge_queued_packets(nc);
1534 
1535     virtio_del_queue(vdev, index * 2);
1536     if (q->tx_timer) {
1537         timer_del(q->tx_timer);
1538         timer_free(q->tx_timer);
1539         q->tx_timer = NULL;
1540     } else {
1541         qemu_bh_delete(q->tx_bh);
1542         q->tx_bh = NULL;
1543     }
1544     q->tx_waiting = 0;
1545     virtio_del_queue(vdev, index * 2 + 1);
1546 }
1547 
1548 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
1549 {
1550     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1551     int old_num_queues = virtio_get_num_queues(vdev);
1552     int new_num_queues = new_max_queues * 2 + 1;
1553     int i;
1554 
1555     assert(old_num_queues >= 3);
1556     assert(old_num_queues % 2 == 1);
1557 
1558     if (old_num_queues == new_num_queues) {
1559         return;
1560     }
1561 
1562     /*
1563      * We always need to remove and add ctrl vq if
1564      * old_num_queues != new_num_queues. Remove ctrl_vq first,
1565      * and then we only enter one of the following too loops.
1566      */
1567     virtio_del_queue(vdev, old_num_queues - 1);
1568 
1569     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
1570         /* new_num_queues < old_num_queues */
1571         virtio_net_del_queue(n, i / 2);
1572     }
1573 
1574     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
1575         /* new_num_queues > old_num_queues */
1576         virtio_net_add_queue(n, i / 2);
1577     }
1578 
1579     /* add ctrl_vq last */
1580     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
1581 }
1582 
1583 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
1584 {
1585     int max = multiqueue ? n->max_queues : 1;
1586 
1587     n->multiqueue = multiqueue;
1588     virtio_net_change_num_queues(n, max);
1589 
1590     virtio_net_set_queues(n);
1591 }
1592 
1593 static int virtio_net_post_load_device(void *opaque, int version_id)
1594 {
1595     VirtIONet *n = opaque;
1596     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1597     int i, link_down;
1598 
1599     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
1600                                virtio_vdev_has_feature(vdev,
1601                                                        VIRTIO_F_VERSION_1));
1602 
1603     /* MAC_TABLE_ENTRIES may be different from the saved image */
1604     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
1605         n->mac_table.in_use = 0;
1606     }
1607 
1608     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1609         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
1610     }
1611 
1612     if (peer_has_vnet_hdr(n)) {
1613         virtio_net_apply_guest_offloads(n);
1614     }
1615 
1616     virtio_net_set_queues(n);
1617 
1618     /* Find the first multicast entry in the saved MAC filter */
1619     for (i = 0; i < n->mac_table.in_use; i++) {
1620         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
1621             break;
1622         }
1623     }
1624     n->mac_table.first_multi = i;
1625 
1626     /* nc.link_down can't be migrated, so infer link_down according
1627      * to link status bit in n->status */
1628     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
1629     for (i = 0; i < n->max_queues; i++) {
1630         qemu_get_subqueue(n->nic, i)->link_down = link_down;
1631     }
1632 
1633     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
1634         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
1635         n->announce_counter = SELF_ANNOUNCE_ROUNDS;
1636         timer_mod(n->announce_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL));
1637     }
1638 
1639     return 0;
1640 }
1641 
1642 /* tx_waiting field of a VirtIONetQueue */
1643 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
1644     .name = "virtio-net-queue-tx_waiting",
1645     .fields = (VMStateField[]) {
1646         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
1647         VMSTATE_END_OF_LIST()
1648    },
1649 };
1650 
1651 static bool max_queues_gt_1(void *opaque, int version_id)
1652 {
1653     return VIRTIO_NET(opaque)->max_queues > 1;
1654 }
1655 
1656 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
1657 {
1658     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
1659                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
1660 }
1661 
1662 static bool mac_table_fits(void *opaque, int version_id)
1663 {
1664     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
1665 }
1666 
1667 static bool mac_table_doesnt_fit(void *opaque, int version_id)
1668 {
1669     return !mac_table_fits(opaque, version_id);
1670 }
1671 
1672 /* This temporary type is shared by all the WITH_TMP methods
1673  * although only some fields are used by each.
1674  */
1675 struct VirtIONetMigTmp {
1676     VirtIONet      *parent;
1677     VirtIONetQueue *vqs_1;
1678     uint16_t        curr_queues_1;
1679     uint8_t         has_ufo;
1680     uint32_t        has_vnet_hdr;
1681 };
1682 
1683 /* The 2nd and subsequent tx_waiting flags are loaded later than
1684  * the 1st entry in the queues and only if there's more than one
1685  * entry.  We use the tmp mechanism to calculate a temporary
1686  * pointer and count and also validate the count.
1687  */
1688 
1689 static void virtio_net_tx_waiting_pre_save(void *opaque)
1690 {
1691     struct VirtIONetMigTmp *tmp = opaque;
1692 
1693     tmp->vqs_1 = tmp->parent->vqs + 1;
1694     tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
1695     if (tmp->parent->curr_queues == 0) {
1696         tmp->curr_queues_1 = 0;
1697     }
1698 }
1699 
1700 static int virtio_net_tx_waiting_pre_load(void *opaque)
1701 {
1702     struct VirtIONetMigTmp *tmp = opaque;
1703 
1704     /* Reuse the pointer setup from save */
1705     virtio_net_tx_waiting_pre_save(opaque);
1706 
1707     if (tmp->parent->curr_queues > tmp->parent->max_queues) {
1708         error_report("virtio-net: curr_queues %x > max_queues %x",
1709             tmp->parent->curr_queues, tmp->parent->max_queues);
1710 
1711         return -EINVAL;
1712     }
1713 
1714     return 0; /* all good */
1715 }
1716 
1717 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
1718     .name      = "virtio-net-tx_waiting",
1719     .pre_load  = virtio_net_tx_waiting_pre_load,
1720     .pre_save  = virtio_net_tx_waiting_pre_save,
1721     .fields    = (VMStateField[]) {
1722         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
1723                                      curr_queues_1,
1724                                      vmstate_virtio_net_queue_tx_waiting,
1725                                      struct VirtIONetQueue),
1726         VMSTATE_END_OF_LIST()
1727     },
1728 };
1729 
1730 /* the 'has_ufo' flag is just tested; if the incoming stream has the
1731  * flag set we need to check that we have it
1732  */
1733 static int virtio_net_ufo_post_load(void *opaque, int version_id)
1734 {
1735     struct VirtIONetMigTmp *tmp = opaque;
1736 
1737     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
1738         error_report("virtio-net: saved image requires TUN_F_UFO support");
1739         return -EINVAL;
1740     }
1741 
1742     return 0;
1743 }
1744 
1745 static void virtio_net_ufo_pre_save(void *opaque)
1746 {
1747     struct VirtIONetMigTmp *tmp = opaque;
1748 
1749     tmp->has_ufo = tmp->parent->has_ufo;
1750 }
1751 
1752 static const VMStateDescription vmstate_virtio_net_has_ufo = {
1753     .name      = "virtio-net-ufo",
1754     .post_load = virtio_net_ufo_post_load,
1755     .pre_save  = virtio_net_ufo_pre_save,
1756     .fields    = (VMStateField[]) {
1757         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
1758         VMSTATE_END_OF_LIST()
1759     },
1760 };
1761 
1762 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
1763  * flag set we need to check that we have it
1764  */
1765 static int virtio_net_vnet_post_load(void *opaque, int version_id)
1766 {
1767     struct VirtIONetMigTmp *tmp = opaque;
1768 
1769     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
1770         error_report("virtio-net: saved image requires vnet_hdr=on");
1771         return -EINVAL;
1772     }
1773 
1774     return 0;
1775 }
1776 
1777 static void virtio_net_vnet_pre_save(void *opaque)
1778 {
1779     struct VirtIONetMigTmp *tmp = opaque;
1780 
1781     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
1782 }
1783 
1784 static const VMStateDescription vmstate_virtio_net_has_vnet = {
1785     .name      = "virtio-net-vnet",
1786     .post_load = virtio_net_vnet_post_load,
1787     .pre_save  = virtio_net_vnet_pre_save,
1788     .fields    = (VMStateField[]) {
1789         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
1790         VMSTATE_END_OF_LIST()
1791     },
1792 };
1793 
1794 static const VMStateDescription vmstate_virtio_net_device = {
1795     .name = "virtio-net-device",
1796     .version_id = VIRTIO_NET_VM_VERSION,
1797     .minimum_version_id = VIRTIO_NET_VM_VERSION,
1798     .post_load = virtio_net_post_load_device,
1799     .fields = (VMStateField[]) {
1800         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
1801         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
1802                                vmstate_virtio_net_queue_tx_waiting,
1803                                VirtIONetQueue),
1804         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
1805         VMSTATE_UINT16(status, VirtIONet),
1806         VMSTATE_UINT8(promisc, VirtIONet),
1807         VMSTATE_UINT8(allmulti, VirtIONet),
1808         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
1809 
1810         /* Guarded pair: If it fits we load it, else we throw it away
1811          * - can happen if source has a larger MAC table.; post-load
1812          *  sets flags in this case.
1813          */
1814         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
1815                                 0, mac_table_fits, mac_table.in_use,
1816                                  ETH_ALEN),
1817         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
1818                                      mac_table.in_use, ETH_ALEN),
1819 
1820         /* Note: This is an array of uint32's that's always been saved as a
1821          * buffer; hold onto your endiannesses; it's actually used as a bitmap
1822          * but based on the uint.
1823          */
1824         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
1825         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
1826                          vmstate_virtio_net_has_vnet),
1827         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
1828         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
1829         VMSTATE_UINT8(alluni, VirtIONet),
1830         VMSTATE_UINT8(nomulti, VirtIONet),
1831         VMSTATE_UINT8(nouni, VirtIONet),
1832         VMSTATE_UINT8(nobcast, VirtIONet),
1833         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
1834                          vmstate_virtio_net_has_ufo),
1835         VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
1836                             vmstate_info_uint16_equal, uint16_t),
1837         VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
1838         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
1839                          vmstate_virtio_net_tx_waiting),
1840         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
1841                             has_ctrl_guest_offloads),
1842         VMSTATE_END_OF_LIST()
1843    },
1844 };
1845 
1846 static NetClientInfo net_virtio_info = {
1847     .type = NET_CLIENT_DRIVER_NIC,
1848     .size = sizeof(NICState),
1849     .can_receive = virtio_net_can_receive,
1850     .receive = virtio_net_receive,
1851     .link_status_changed = virtio_net_set_link_status,
1852     .query_rx_filter = virtio_net_query_rxfilter,
1853 };
1854 
1855 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
1856 {
1857     VirtIONet *n = VIRTIO_NET(vdev);
1858     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1859     assert(n->vhost_started);
1860     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
1861 }
1862 
1863 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
1864                                            bool mask)
1865 {
1866     VirtIONet *n = VIRTIO_NET(vdev);
1867     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1868     assert(n->vhost_started);
1869     vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
1870                              vdev, idx, mask);
1871 }
1872 
1873 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
1874 {
1875     int i, config_size = 0;
1876     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
1877 
1878     for (i = 0; feature_sizes[i].flags != 0; i++) {
1879         if (host_features & feature_sizes[i].flags) {
1880             config_size = MAX(feature_sizes[i].end, config_size);
1881         }
1882     }
1883     n->config_size = config_size;
1884 }
1885 
1886 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
1887                                    const char *type)
1888 {
1889     /*
1890      * The name can be NULL, the netclient name will be type.x.
1891      */
1892     assert(type != NULL);
1893 
1894     g_free(n->netclient_name);
1895     g_free(n->netclient_type);
1896     n->netclient_name = g_strdup(name);
1897     n->netclient_type = g_strdup(type);
1898 }
1899 
1900 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
1901 {
1902     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1903     VirtIONet *n = VIRTIO_NET(dev);
1904     NetClientState *nc;
1905     int i;
1906 
1907     if (n->net_conf.mtu) {
1908         n->host_features |= (0x1 << VIRTIO_NET_F_MTU);
1909     }
1910 
1911     virtio_net_set_config_size(n, n->host_features);
1912     virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
1913 
1914     /*
1915      * We set a lower limit on RX queue size to what it always was.
1916      * Guests that want a smaller ring can always resize it without
1917      * help from us (using virtio 1 and up).
1918      */
1919     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
1920         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
1921         (n->net_conf.rx_queue_size & (n->net_conf.rx_queue_size - 1))) {
1922         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
1923                    "must be a power of 2 between %d and %d.",
1924                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
1925                    VIRTQUEUE_MAX_SIZE);
1926         virtio_cleanup(vdev);
1927         return;
1928     }
1929 
1930     n->max_queues = MAX(n->nic_conf.peers.queues, 1);
1931     if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
1932         error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
1933                    "must be a positive integer less than %d.",
1934                    n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
1935         virtio_cleanup(vdev);
1936         return;
1937     }
1938     n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
1939     n->curr_queues = 1;
1940     n->tx_timeout = n->net_conf.txtimer;
1941 
1942     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
1943                        && strcmp(n->net_conf.tx, "bh")) {
1944         error_report("virtio-net: "
1945                      "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
1946                      n->net_conf.tx);
1947         error_report("Defaulting to \"bh\"");
1948     }
1949 
1950     for (i = 0; i < n->max_queues; i++) {
1951         virtio_net_add_queue(n, i);
1952     }
1953 
1954     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
1955     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
1956     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
1957     n->status = VIRTIO_NET_S_LINK_UP;
1958     n->announce_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1959                                      virtio_net_announce_timer, n);
1960 
1961     if (n->netclient_type) {
1962         /*
1963          * Happen when virtio_net_set_netclient_name has been called.
1964          */
1965         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
1966                               n->netclient_type, n->netclient_name, n);
1967     } else {
1968         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
1969                               object_get_typename(OBJECT(dev)), dev->id, n);
1970     }
1971 
1972     peer_test_vnet_hdr(n);
1973     if (peer_has_vnet_hdr(n)) {
1974         for (i = 0; i < n->max_queues; i++) {
1975             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
1976         }
1977         n->host_hdr_len = sizeof(struct virtio_net_hdr);
1978     } else {
1979         n->host_hdr_len = 0;
1980     }
1981 
1982     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
1983 
1984     n->vqs[0].tx_waiting = 0;
1985     n->tx_burst = n->net_conf.txburst;
1986     virtio_net_set_mrg_rx_bufs(n, 0, 0);
1987     n->promisc = 1; /* for compatibility */
1988 
1989     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1990 
1991     n->vlans = g_malloc0(MAX_VLAN >> 3);
1992 
1993     nc = qemu_get_queue(n->nic);
1994     nc->rxfilter_notify_enabled = 1;
1995 
1996     n->qdev = dev;
1997 }
1998 
1999 static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
2000 {
2001     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2002     VirtIONet *n = VIRTIO_NET(dev);
2003     int i, max_queues;
2004 
2005     /* This will stop vhost backend if appropriate. */
2006     virtio_net_set_status(vdev, 0);
2007 
2008     g_free(n->netclient_name);
2009     n->netclient_name = NULL;
2010     g_free(n->netclient_type);
2011     n->netclient_type = NULL;
2012 
2013     g_free(n->mac_table.macs);
2014     g_free(n->vlans);
2015 
2016     max_queues = n->multiqueue ? n->max_queues : 1;
2017     for (i = 0; i < max_queues; i++) {
2018         virtio_net_del_queue(n, i);
2019     }
2020 
2021     timer_del(n->announce_timer);
2022     timer_free(n->announce_timer);
2023     g_free(n->vqs);
2024     qemu_del_nic(n->nic);
2025     virtio_cleanup(vdev);
2026 }
2027 
2028 static void virtio_net_instance_init(Object *obj)
2029 {
2030     VirtIONet *n = VIRTIO_NET(obj);
2031 
2032     /*
2033      * The default config_size is sizeof(struct virtio_net_config).
2034      * Can be overriden with virtio_net_set_config_size.
2035      */
2036     n->config_size = sizeof(struct virtio_net_config);
2037     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
2038                                   "bootindex", "/ethernet-phy@0",
2039                                   DEVICE(n), NULL);
2040 }
2041 
2042 static void virtio_net_pre_save(void *opaque)
2043 {
2044     VirtIONet *n = opaque;
2045 
2046     /* At this point, backend must be stopped, otherwise
2047      * it might keep writing to memory. */
2048     assert(!n->vhost_started);
2049 }
2050 
2051 static const VMStateDescription vmstate_virtio_net = {
2052     .name = "virtio-net",
2053     .minimum_version_id = VIRTIO_NET_VM_VERSION,
2054     .version_id = VIRTIO_NET_VM_VERSION,
2055     .fields = (VMStateField[]) {
2056         VMSTATE_VIRTIO_DEVICE,
2057         VMSTATE_END_OF_LIST()
2058     },
2059     .pre_save = virtio_net_pre_save,
2060 };
2061 
2062 static Property virtio_net_properties[] = {
2063     DEFINE_PROP_BIT("csum", VirtIONet, host_features, VIRTIO_NET_F_CSUM, true),
2064     DEFINE_PROP_BIT("guest_csum", VirtIONet, host_features,
2065                     VIRTIO_NET_F_GUEST_CSUM, true),
2066     DEFINE_PROP_BIT("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
2067     DEFINE_PROP_BIT("guest_tso4", VirtIONet, host_features,
2068                     VIRTIO_NET_F_GUEST_TSO4, true),
2069     DEFINE_PROP_BIT("guest_tso6", VirtIONet, host_features,
2070                     VIRTIO_NET_F_GUEST_TSO6, true),
2071     DEFINE_PROP_BIT("guest_ecn", VirtIONet, host_features,
2072                     VIRTIO_NET_F_GUEST_ECN, true),
2073     DEFINE_PROP_BIT("guest_ufo", VirtIONet, host_features,
2074                     VIRTIO_NET_F_GUEST_UFO, true),
2075     DEFINE_PROP_BIT("guest_announce", VirtIONet, host_features,
2076                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
2077     DEFINE_PROP_BIT("host_tso4", VirtIONet, host_features,
2078                     VIRTIO_NET_F_HOST_TSO4, true),
2079     DEFINE_PROP_BIT("host_tso6", VirtIONet, host_features,
2080                     VIRTIO_NET_F_HOST_TSO6, true),
2081     DEFINE_PROP_BIT("host_ecn", VirtIONet, host_features,
2082                     VIRTIO_NET_F_HOST_ECN, true),
2083     DEFINE_PROP_BIT("host_ufo", VirtIONet, host_features,
2084                     VIRTIO_NET_F_HOST_UFO, true),
2085     DEFINE_PROP_BIT("mrg_rxbuf", VirtIONet, host_features,
2086                     VIRTIO_NET_F_MRG_RXBUF, true),
2087     DEFINE_PROP_BIT("status", VirtIONet, host_features,
2088                     VIRTIO_NET_F_STATUS, true),
2089     DEFINE_PROP_BIT("ctrl_vq", VirtIONet, host_features,
2090                     VIRTIO_NET_F_CTRL_VQ, true),
2091     DEFINE_PROP_BIT("ctrl_rx", VirtIONet, host_features,
2092                     VIRTIO_NET_F_CTRL_RX, true),
2093     DEFINE_PROP_BIT("ctrl_vlan", VirtIONet, host_features,
2094                     VIRTIO_NET_F_CTRL_VLAN, true),
2095     DEFINE_PROP_BIT("ctrl_rx_extra", VirtIONet, host_features,
2096                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
2097     DEFINE_PROP_BIT("ctrl_mac_addr", VirtIONet, host_features,
2098                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
2099     DEFINE_PROP_BIT("ctrl_guest_offloads", VirtIONet, host_features,
2100                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
2101     DEFINE_PROP_BIT("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
2102     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
2103     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
2104                        TX_TIMER_INTERVAL),
2105     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
2106     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
2107     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
2108                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
2109     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
2110     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
2111                      true),
2112     DEFINE_PROP_END_OF_LIST(),
2113 };
2114 
2115 static void virtio_net_class_init(ObjectClass *klass, void *data)
2116 {
2117     DeviceClass *dc = DEVICE_CLASS(klass);
2118     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
2119 
2120     dc->props = virtio_net_properties;
2121     dc->vmsd = &vmstate_virtio_net;
2122     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
2123     vdc->realize = virtio_net_device_realize;
2124     vdc->unrealize = virtio_net_device_unrealize;
2125     vdc->get_config = virtio_net_get_config;
2126     vdc->set_config = virtio_net_set_config;
2127     vdc->get_features = virtio_net_get_features;
2128     vdc->set_features = virtio_net_set_features;
2129     vdc->bad_features = virtio_net_bad_features;
2130     vdc->reset = virtio_net_reset;
2131     vdc->set_status = virtio_net_set_status;
2132     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
2133     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
2134     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
2135     vdc->vmsd = &vmstate_virtio_net_device;
2136 }
2137 
2138 static const TypeInfo virtio_net_info = {
2139     .name = TYPE_VIRTIO_NET,
2140     .parent = TYPE_VIRTIO_DEVICE,
2141     .instance_size = sizeof(VirtIONet),
2142     .instance_init = virtio_net_instance_init,
2143     .class_init = virtio_net_class_init,
2144 };
2145 
2146 static void virtio_register_types(void)
2147 {
2148     type_register_static(&virtio_net_info);
2149 }
2150 
2151 type_init(virtio_register_types)
2152