xref: /openbmc/qemu/hw/net/virtio-net.c (revision becaeb72)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/iov.h"
15 #include "hw/virtio/virtio.h"
16 #include "net/net.h"
17 #include "net/checksum.h"
18 #include "net/tap.h"
19 #include "qemu/error-report.h"
20 #include "qemu/timer.h"
21 #include "hw/virtio/virtio-net.h"
22 #include "net/vhost_net.h"
23 #include "hw/virtio/virtio-bus.h"
24 #include "qapi/qmp/qjson.h"
25 #include "qapi-event.h"
26 #include "hw/virtio/virtio-access.h"
27 
28 #define VIRTIO_NET_VM_VERSION    11
29 
30 #define MAC_TABLE_ENTRIES    64
31 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
32 
33 /*
34  * Calculate the number of bytes up to and including the given 'field' of
35  * 'container'.
36  */
37 #define endof(container, field) \
38     (offsetof(container, field) + sizeof(((container *)0)->field))
39 
40 typedef struct VirtIOFeature {
41     uint32_t flags;
42     size_t end;
43 } VirtIOFeature;
44 
45 static VirtIOFeature feature_sizes[] = {
46     {.flags = 1 << VIRTIO_NET_F_MAC,
47      .end = endof(struct virtio_net_config, mac)},
48     {.flags = 1 << VIRTIO_NET_F_STATUS,
49      .end = endof(struct virtio_net_config, status)},
50     {.flags = 1 << VIRTIO_NET_F_MQ,
51      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
52     {}
53 };
54 
55 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
56 {
57     VirtIONet *n = qemu_get_nic_opaque(nc);
58 
59     return &n->vqs[nc->queue_index];
60 }
61 
62 static int vq2q(int queue_index)
63 {
64     return queue_index / 2;
65 }
66 
67 /* TODO
68  * - we could suppress RX interrupt if we were so inclined.
69  */
70 
71 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
72 {
73     VirtIONet *n = VIRTIO_NET(vdev);
74     struct virtio_net_config netcfg;
75 
76     virtio_stw_p(vdev, &netcfg.status, n->status);
77     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
78     memcpy(netcfg.mac, n->mac, ETH_ALEN);
79     memcpy(config, &netcfg, n->config_size);
80 }
81 
82 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
83 {
84     VirtIONet *n = VIRTIO_NET(vdev);
85     struct virtio_net_config netcfg = {};
86 
87     memcpy(&netcfg, config, n->config_size);
88 
89     if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
90         !virtio_has_feature(vdev, VIRTIO_F_VERSION_1) &&
91         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
92         memcpy(n->mac, netcfg.mac, ETH_ALEN);
93         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
94     }
95 }
96 
97 static bool virtio_net_started(VirtIONet *n, uint8_t status)
98 {
99     VirtIODevice *vdev = VIRTIO_DEVICE(n);
100     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
101         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
102 }
103 
104 static void virtio_net_announce_timer(void *opaque)
105 {
106     VirtIONet *n = opaque;
107     VirtIODevice *vdev = VIRTIO_DEVICE(n);
108 
109     n->announce_counter--;
110     n->status |= VIRTIO_NET_S_ANNOUNCE;
111     virtio_notify_config(vdev);
112 }
113 
114 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
115 {
116     VirtIODevice *vdev = VIRTIO_DEVICE(n);
117     NetClientState *nc = qemu_get_queue(n->nic);
118     int queues = n->multiqueue ? n->max_queues : 1;
119 
120     if (!get_vhost_net(nc->peer)) {
121         return;
122     }
123 
124     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
125         !!n->vhost_started) {
126         return;
127     }
128     if (!n->vhost_started) {
129         int r, i;
130 
131         /* Any packets outstanding? Purge them to avoid touching rings
132          * when vhost is running.
133          */
134         for (i = 0;  i < queues; i++) {
135             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
136 
137             /* Purge both directions: TX and RX. */
138             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
139             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
140         }
141 
142         n->vhost_started = 1;
143         r = vhost_net_start(vdev, n->nic->ncs, queues);
144         if (r < 0) {
145             error_report("unable to start vhost net: %d: "
146                          "falling back on userspace virtio", -r);
147             n->vhost_started = 0;
148         }
149     } else {
150         vhost_net_stop(vdev, n->nic->ncs, queues);
151         n->vhost_started = 0;
152     }
153 }
154 
155 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
156 {
157     VirtIONet *n = VIRTIO_NET(vdev);
158     VirtIONetQueue *q;
159     int i;
160     uint8_t queue_status;
161 
162     virtio_net_vhost_status(n, status);
163 
164     for (i = 0; i < n->max_queues; i++) {
165         q = &n->vqs[i];
166 
167         if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
168             queue_status = 0;
169         } else {
170             queue_status = status;
171         }
172 
173         if (!q->tx_waiting) {
174             continue;
175         }
176 
177         if (virtio_net_started(n, queue_status) && !n->vhost_started) {
178             if (q->tx_timer) {
179                 timer_mod(q->tx_timer,
180                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
181             } else {
182                 qemu_bh_schedule(q->tx_bh);
183             }
184         } else {
185             if (q->tx_timer) {
186                 timer_del(q->tx_timer);
187             } else {
188                 qemu_bh_cancel(q->tx_bh);
189             }
190         }
191     }
192 }
193 
194 static void virtio_net_set_link_status(NetClientState *nc)
195 {
196     VirtIONet *n = qemu_get_nic_opaque(nc);
197     VirtIODevice *vdev = VIRTIO_DEVICE(n);
198     uint16_t old_status = n->status;
199 
200     if (nc->link_down)
201         n->status &= ~VIRTIO_NET_S_LINK_UP;
202     else
203         n->status |= VIRTIO_NET_S_LINK_UP;
204 
205     if (n->status != old_status)
206         virtio_notify_config(vdev);
207 
208     virtio_net_set_status(vdev, vdev->status);
209 }
210 
211 static void rxfilter_notify(NetClientState *nc)
212 {
213     VirtIONet *n = qemu_get_nic_opaque(nc);
214 
215     if (nc->rxfilter_notify_enabled) {
216         gchar *path = object_get_canonical_path(OBJECT(n->qdev));
217         qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
218                                               n->netclient_name, path, &error_abort);
219         g_free(path);
220 
221         /* disable event notification to avoid events flooding */
222         nc->rxfilter_notify_enabled = 0;
223     }
224 }
225 
226 static intList *get_vlan_table(VirtIONet *n)
227 {
228     intList *list, *entry;
229     int i, j;
230 
231     list = NULL;
232     for (i = 0; i < MAX_VLAN >> 5; i++) {
233         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
234             if (n->vlans[i] & (1U << j)) {
235                 entry = g_malloc0(sizeof(*entry));
236                 entry->value = (i << 5) + j;
237                 entry->next = list;
238                 list = entry;
239             }
240         }
241     }
242 
243     return list;
244 }
245 
246 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
247 {
248     VirtIONet *n = qemu_get_nic_opaque(nc);
249     VirtIODevice *vdev = VIRTIO_DEVICE(n);
250     RxFilterInfo *info;
251     strList *str_list, *entry;
252     int i;
253 
254     info = g_malloc0(sizeof(*info));
255     info->name = g_strdup(nc->name);
256     info->promiscuous = n->promisc;
257 
258     if (n->nouni) {
259         info->unicast = RX_STATE_NONE;
260     } else if (n->alluni) {
261         info->unicast = RX_STATE_ALL;
262     } else {
263         info->unicast = RX_STATE_NORMAL;
264     }
265 
266     if (n->nomulti) {
267         info->multicast = RX_STATE_NONE;
268     } else if (n->allmulti) {
269         info->multicast = RX_STATE_ALL;
270     } else {
271         info->multicast = RX_STATE_NORMAL;
272     }
273 
274     info->broadcast_allowed = n->nobcast;
275     info->multicast_overflow = n->mac_table.multi_overflow;
276     info->unicast_overflow = n->mac_table.uni_overflow;
277 
278     info->main_mac = qemu_mac_strdup_printf(n->mac);
279 
280     str_list = NULL;
281     for (i = 0; i < n->mac_table.first_multi; i++) {
282         entry = g_malloc0(sizeof(*entry));
283         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
284         entry->next = str_list;
285         str_list = entry;
286     }
287     info->unicast_table = str_list;
288 
289     str_list = NULL;
290     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
291         entry = g_malloc0(sizeof(*entry));
292         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
293         entry->next = str_list;
294         str_list = entry;
295     }
296     info->multicast_table = str_list;
297     info->vlan_table = get_vlan_table(n);
298 
299     if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
300         info->vlan = RX_STATE_ALL;
301     } else if (!info->vlan_table) {
302         info->vlan = RX_STATE_NONE;
303     } else {
304         info->vlan = RX_STATE_NORMAL;
305     }
306 
307     /* enable event notification after query */
308     nc->rxfilter_notify_enabled = 1;
309 
310     return info;
311 }
312 
313 static void virtio_net_reset(VirtIODevice *vdev)
314 {
315     VirtIONet *n = VIRTIO_NET(vdev);
316 
317     /* Reset back to compatibility mode */
318     n->promisc = 1;
319     n->allmulti = 0;
320     n->alluni = 0;
321     n->nomulti = 0;
322     n->nouni = 0;
323     n->nobcast = 0;
324     /* multiqueue is disabled by default */
325     n->curr_queues = 1;
326     timer_del(n->announce_timer);
327     n->announce_counter = 0;
328     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
329 
330     /* Flush any MAC and VLAN filter table state */
331     n->mac_table.in_use = 0;
332     n->mac_table.first_multi = 0;
333     n->mac_table.multi_overflow = 0;
334     n->mac_table.uni_overflow = 0;
335     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
336     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
337     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
338     memset(n->vlans, 0, MAX_VLAN >> 3);
339 }
340 
341 static void peer_test_vnet_hdr(VirtIONet *n)
342 {
343     NetClientState *nc = qemu_get_queue(n->nic);
344     if (!nc->peer) {
345         return;
346     }
347 
348     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
349 }
350 
351 static int peer_has_vnet_hdr(VirtIONet *n)
352 {
353     return n->has_vnet_hdr;
354 }
355 
356 static int peer_has_ufo(VirtIONet *n)
357 {
358     if (!peer_has_vnet_hdr(n))
359         return 0;
360 
361     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
362 
363     return n->has_ufo;
364 }
365 
366 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
367                                        int version_1)
368 {
369     int i;
370     NetClientState *nc;
371 
372     n->mergeable_rx_bufs = mergeable_rx_bufs;
373 
374     if (version_1) {
375         n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
376     } else {
377         n->guest_hdr_len = n->mergeable_rx_bufs ?
378             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
379             sizeof(struct virtio_net_hdr);
380     }
381 
382     for (i = 0; i < n->max_queues; i++) {
383         nc = qemu_get_subqueue(n->nic, i);
384 
385         if (peer_has_vnet_hdr(n) &&
386             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
387             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
388             n->host_hdr_len = n->guest_hdr_len;
389         }
390     }
391 }
392 
393 static int peer_attach(VirtIONet *n, int index)
394 {
395     NetClientState *nc = qemu_get_subqueue(n->nic, index);
396 
397     if (!nc->peer) {
398         return 0;
399     }
400 
401     if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
402         return 0;
403     }
404 
405     return tap_enable(nc->peer);
406 }
407 
408 static int peer_detach(VirtIONet *n, int index)
409 {
410     NetClientState *nc = qemu_get_subqueue(n->nic, index);
411 
412     if (!nc->peer) {
413         return 0;
414     }
415 
416     if (nc->peer->info->type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
417         return 0;
418     }
419 
420     return tap_disable(nc->peer);
421 }
422 
423 static void virtio_net_set_queues(VirtIONet *n)
424 {
425     int i;
426     int r;
427 
428     for (i = 0; i < n->max_queues; i++) {
429         if (i < n->curr_queues) {
430             r = peer_attach(n, i);
431             assert(!r);
432         } else {
433             r = peer_detach(n, i);
434             assert(!r);
435         }
436     }
437 }
438 
439 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
440 
441 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features)
442 {
443     VirtIONet *n = VIRTIO_NET(vdev);
444     NetClientState *nc = qemu_get_queue(n->nic);
445 
446     /* Firstly sync all virtio-net possible supported features */
447     features |= n->host_features;
448 
449     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
450 
451     if (!peer_has_vnet_hdr(n)) {
452         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
453         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
454         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
455         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
456 
457         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
458         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
459         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
460         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
461     }
462 
463     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
464         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
465         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
466     }
467 
468     if (!get_vhost_net(nc->peer)) {
469         return features;
470     }
471     return vhost_net_get_features(get_vhost_net(nc->peer), features);
472 }
473 
474 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
475 {
476     uint64_t features = 0;
477 
478     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
479      * but also these: */
480     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
481     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
482     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
483     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
484     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
485 
486     return features;
487 }
488 
489 static void virtio_net_apply_guest_offloads(VirtIONet *n)
490 {
491     qemu_set_offload(qemu_get_queue(n->nic)->peer,
492             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
493             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
494             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
495             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
496             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
497 }
498 
499 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
500 {
501     static const uint64_t guest_offloads_mask =
502         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
503         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
504         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
505         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
506         (1ULL << VIRTIO_NET_F_GUEST_UFO);
507 
508     return guest_offloads_mask & features;
509 }
510 
511 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
512 {
513     VirtIODevice *vdev = VIRTIO_DEVICE(n);
514     return virtio_net_guest_offloads_by_features(vdev->guest_features);
515 }
516 
517 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
518 {
519     VirtIONet *n = VIRTIO_NET(vdev);
520     int i;
521 
522     virtio_net_set_multiqueue(n,
523                               __virtio_has_feature(features, VIRTIO_NET_F_MQ));
524 
525     virtio_net_set_mrg_rx_bufs(n,
526                                __virtio_has_feature(features,
527                                                     VIRTIO_NET_F_MRG_RXBUF),
528                                __virtio_has_feature(features,
529                                                     VIRTIO_F_VERSION_1));
530 
531     if (n->has_vnet_hdr) {
532         n->curr_guest_offloads =
533             virtio_net_guest_offloads_by_features(features);
534         virtio_net_apply_guest_offloads(n);
535     }
536 
537     for (i = 0;  i < n->max_queues; i++) {
538         NetClientState *nc = qemu_get_subqueue(n->nic, i);
539 
540         if (!get_vhost_net(nc->peer)) {
541             continue;
542         }
543         vhost_net_ack_features(get_vhost_net(nc->peer), features);
544     }
545 
546     if (__virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
547         memset(n->vlans, 0, MAX_VLAN >> 3);
548     } else {
549         memset(n->vlans, 0xff, MAX_VLAN >> 3);
550     }
551 }
552 
553 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
554                                      struct iovec *iov, unsigned int iov_cnt)
555 {
556     uint8_t on;
557     size_t s;
558     NetClientState *nc = qemu_get_queue(n->nic);
559 
560     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
561     if (s != sizeof(on)) {
562         return VIRTIO_NET_ERR;
563     }
564 
565     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
566         n->promisc = on;
567     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
568         n->allmulti = on;
569     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
570         n->alluni = on;
571     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
572         n->nomulti = on;
573     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
574         n->nouni = on;
575     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
576         n->nobcast = on;
577     } else {
578         return VIRTIO_NET_ERR;
579     }
580 
581     rxfilter_notify(nc);
582 
583     return VIRTIO_NET_OK;
584 }
585 
586 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
587                                      struct iovec *iov, unsigned int iov_cnt)
588 {
589     VirtIODevice *vdev = VIRTIO_DEVICE(n);
590     uint64_t offloads;
591     size_t s;
592 
593     if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
594         return VIRTIO_NET_ERR;
595     }
596 
597     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
598     if (s != sizeof(offloads)) {
599         return VIRTIO_NET_ERR;
600     }
601 
602     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
603         uint64_t supported_offloads;
604 
605         if (!n->has_vnet_hdr) {
606             return VIRTIO_NET_ERR;
607         }
608 
609         supported_offloads = virtio_net_supported_guest_offloads(n);
610         if (offloads & ~supported_offloads) {
611             return VIRTIO_NET_ERR;
612         }
613 
614         n->curr_guest_offloads = offloads;
615         virtio_net_apply_guest_offloads(n);
616 
617         return VIRTIO_NET_OK;
618     } else {
619         return VIRTIO_NET_ERR;
620     }
621 }
622 
623 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
624                                  struct iovec *iov, unsigned int iov_cnt)
625 {
626     VirtIODevice *vdev = VIRTIO_DEVICE(n);
627     struct virtio_net_ctrl_mac mac_data;
628     size_t s;
629     NetClientState *nc = qemu_get_queue(n->nic);
630 
631     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
632         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
633             return VIRTIO_NET_ERR;
634         }
635         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
636         assert(s == sizeof(n->mac));
637         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
638         rxfilter_notify(nc);
639 
640         return VIRTIO_NET_OK;
641     }
642 
643     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
644         return VIRTIO_NET_ERR;
645     }
646 
647     int in_use = 0;
648     int first_multi = 0;
649     uint8_t uni_overflow = 0;
650     uint8_t multi_overflow = 0;
651     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
652 
653     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
654                    sizeof(mac_data.entries));
655     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
656     if (s != sizeof(mac_data.entries)) {
657         goto error;
658     }
659     iov_discard_front(&iov, &iov_cnt, s);
660 
661     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
662         goto error;
663     }
664 
665     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
666         s = iov_to_buf(iov, iov_cnt, 0, macs,
667                        mac_data.entries * ETH_ALEN);
668         if (s != mac_data.entries * ETH_ALEN) {
669             goto error;
670         }
671         in_use += mac_data.entries;
672     } else {
673         uni_overflow = 1;
674     }
675 
676     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
677 
678     first_multi = in_use;
679 
680     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
681                    sizeof(mac_data.entries));
682     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
683     if (s != sizeof(mac_data.entries)) {
684         goto error;
685     }
686 
687     iov_discard_front(&iov, &iov_cnt, s);
688 
689     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
690         goto error;
691     }
692 
693     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
694         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
695                        mac_data.entries * ETH_ALEN);
696         if (s != mac_data.entries * ETH_ALEN) {
697             goto error;
698         }
699         in_use += mac_data.entries;
700     } else {
701         multi_overflow = 1;
702     }
703 
704     n->mac_table.in_use = in_use;
705     n->mac_table.first_multi = first_multi;
706     n->mac_table.uni_overflow = uni_overflow;
707     n->mac_table.multi_overflow = multi_overflow;
708     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
709     g_free(macs);
710     rxfilter_notify(nc);
711 
712     return VIRTIO_NET_OK;
713 
714 error:
715     g_free(macs);
716     return VIRTIO_NET_ERR;
717 }
718 
719 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
720                                         struct iovec *iov, unsigned int iov_cnt)
721 {
722     VirtIODevice *vdev = VIRTIO_DEVICE(n);
723     uint16_t vid;
724     size_t s;
725     NetClientState *nc = qemu_get_queue(n->nic);
726 
727     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
728     vid = virtio_lduw_p(vdev, &vid);
729     if (s != sizeof(vid)) {
730         return VIRTIO_NET_ERR;
731     }
732 
733     if (vid >= MAX_VLAN)
734         return VIRTIO_NET_ERR;
735 
736     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
737         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
738     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
739         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
740     else
741         return VIRTIO_NET_ERR;
742 
743     rxfilter_notify(nc);
744 
745     return VIRTIO_NET_OK;
746 }
747 
748 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
749                                       struct iovec *iov, unsigned int iov_cnt)
750 {
751     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
752         n->status & VIRTIO_NET_S_ANNOUNCE) {
753         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
754         if (n->announce_counter) {
755             timer_mod(n->announce_timer,
756                       qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
757                       self_announce_delay(n->announce_counter));
758         }
759         return VIRTIO_NET_OK;
760     } else {
761         return VIRTIO_NET_ERR;
762     }
763 }
764 
765 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
766                                 struct iovec *iov, unsigned int iov_cnt)
767 {
768     VirtIODevice *vdev = VIRTIO_DEVICE(n);
769     struct virtio_net_ctrl_mq mq;
770     size_t s;
771     uint16_t queues;
772 
773     s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
774     if (s != sizeof(mq)) {
775         return VIRTIO_NET_ERR;
776     }
777 
778     if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
779         return VIRTIO_NET_ERR;
780     }
781 
782     queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
783 
784     if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
785         queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
786         queues > n->max_queues ||
787         !n->multiqueue) {
788         return VIRTIO_NET_ERR;
789     }
790 
791     n->curr_queues = queues;
792     /* stop the backend before changing the number of queues to avoid handling a
793      * disabled queue */
794     virtio_net_set_status(vdev, vdev->status);
795     virtio_net_set_queues(n);
796 
797     return VIRTIO_NET_OK;
798 }
799 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
800 {
801     VirtIONet *n = VIRTIO_NET(vdev);
802     struct virtio_net_ctrl_hdr ctrl;
803     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
804     VirtQueueElement elem;
805     size_t s;
806     struct iovec *iov, *iov2;
807     unsigned int iov_cnt;
808 
809     while (virtqueue_pop(vq, &elem)) {
810         if (iov_size(elem.in_sg, elem.in_num) < sizeof(status) ||
811             iov_size(elem.out_sg, elem.out_num) < sizeof(ctrl)) {
812             error_report("virtio-net ctrl missing headers");
813             exit(1);
814         }
815 
816         iov_cnt = elem.out_num;
817         iov2 = iov = g_memdup(elem.out_sg, sizeof(struct iovec) * elem.out_num);
818         s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
819         iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
820         if (s != sizeof(ctrl)) {
821             status = VIRTIO_NET_ERR;
822         } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
823             status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
824         } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
825             status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
826         } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
827             status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
828         } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
829             status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
830         } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
831             status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
832         } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
833             status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
834         }
835 
836         s = iov_from_buf(elem.in_sg, elem.in_num, 0, &status, sizeof(status));
837         assert(s == sizeof(status));
838 
839         virtqueue_push(vq, &elem, sizeof(status));
840         virtio_notify(vdev, vq);
841         g_free(iov2);
842     }
843 }
844 
845 /* RX */
846 
847 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
848 {
849     VirtIONet *n = VIRTIO_NET(vdev);
850     int queue_index = vq2q(virtio_get_queue_index(vq));
851 
852     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
853 }
854 
855 static int virtio_net_can_receive(NetClientState *nc)
856 {
857     VirtIONet *n = qemu_get_nic_opaque(nc);
858     VirtIODevice *vdev = VIRTIO_DEVICE(n);
859     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
860 
861     if (!vdev->vm_running) {
862         return 0;
863     }
864 
865     if (nc->queue_index >= n->curr_queues) {
866         return 0;
867     }
868 
869     if (!virtio_queue_ready(q->rx_vq) ||
870         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
871         return 0;
872     }
873 
874     return 1;
875 }
876 
877 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
878 {
879     VirtIONet *n = q->n;
880     if (virtio_queue_empty(q->rx_vq) ||
881         (n->mergeable_rx_bufs &&
882          !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
883         virtio_queue_set_notification(q->rx_vq, 1);
884 
885         /* To avoid a race condition where the guest has made some buffers
886          * available after the above check but before notification was
887          * enabled, check for available buffers again.
888          */
889         if (virtio_queue_empty(q->rx_vq) ||
890             (n->mergeable_rx_bufs &&
891              !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
892             return 0;
893         }
894     }
895 
896     virtio_queue_set_notification(q->rx_vq, 0);
897     return 1;
898 }
899 
900 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
901 {
902     virtio_tswap16s(vdev, &hdr->hdr_len);
903     virtio_tswap16s(vdev, &hdr->gso_size);
904     virtio_tswap16s(vdev, &hdr->csum_start);
905     virtio_tswap16s(vdev, &hdr->csum_offset);
906 }
907 
908 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
909  * it never finds out that the packets don't have valid checksums.  This
910  * causes dhclient to get upset.  Fedora's carried a patch for ages to
911  * fix this with Xen but it hasn't appeared in an upstream release of
912  * dhclient yet.
913  *
914  * To avoid breaking existing guests, we catch udp packets and add
915  * checksums.  This is terrible but it's better than hacking the guest
916  * kernels.
917  *
918  * N.B. if we introduce a zero-copy API, this operation is no longer free so
919  * we should provide a mechanism to disable it to avoid polluting the host
920  * cache.
921  */
922 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
923                                         uint8_t *buf, size_t size)
924 {
925     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
926         (size > 27 && size < 1500) && /* normal sized MTU */
927         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
928         (buf[23] == 17) && /* ip.protocol == UDP */
929         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
930         net_checksum_calculate(buf, size);
931         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
932     }
933 }
934 
935 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
936                            const void *buf, size_t size)
937 {
938     if (n->has_vnet_hdr) {
939         /* FIXME this cast is evil */
940         void *wbuf = (void *)buf;
941         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
942                                     size - n->host_hdr_len);
943         virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
944         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
945     } else {
946         struct virtio_net_hdr hdr = {
947             .flags = 0,
948             .gso_type = VIRTIO_NET_HDR_GSO_NONE
949         };
950         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
951     }
952 }
953 
954 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
955 {
956     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
957     static const uint8_t vlan[] = {0x81, 0x00};
958     uint8_t *ptr = (uint8_t *)buf;
959     int i;
960 
961     if (n->promisc)
962         return 1;
963 
964     ptr += n->host_hdr_len;
965 
966     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
967         int vid = be16_to_cpup((uint16_t *)(ptr + 14)) & 0xfff;
968         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
969             return 0;
970     }
971 
972     if (ptr[0] & 1) { // multicast
973         if (!memcmp(ptr, bcast, sizeof(bcast))) {
974             return !n->nobcast;
975         } else if (n->nomulti) {
976             return 0;
977         } else if (n->allmulti || n->mac_table.multi_overflow) {
978             return 1;
979         }
980 
981         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
982             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
983                 return 1;
984             }
985         }
986     } else { // unicast
987         if (n->nouni) {
988             return 0;
989         } else if (n->alluni || n->mac_table.uni_overflow) {
990             return 1;
991         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
992             return 1;
993         }
994 
995         for (i = 0; i < n->mac_table.first_multi; i++) {
996             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
997                 return 1;
998             }
999         }
1000     }
1001 
1002     return 0;
1003 }
1004 
1005 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1006 {
1007     VirtIONet *n = qemu_get_nic_opaque(nc);
1008     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1009     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1010     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1011     struct virtio_net_hdr_mrg_rxbuf mhdr;
1012     unsigned mhdr_cnt = 0;
1013     size_t offset, i, guest_offset;
1014 
1015     if (!virtio_net_can_receive(nc)) {
1016         return -1;
1017     }
1018 
1019     /* hdr_len refers to the header we supply to the guest */
1020     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1021         return 0;
1022     }
1023 
1024     if (!receive_filter(n, buf, size))
1025         return size;
1026 
1027     offset = i = 0;
1028 
1029     while (offset < size) {
1030         VirtQueueElement elem;
1031         int len, total;
1032         const struct iovec *sg = elem.in_sg;
1033 
1034         total = 0;
1035 
1036         if (virtqueue_pop(q->rx_vq, &elem) == 0) {
1037             if (i == 0)
1038                 return -1;
1039             error_report("virtio-net unexpected empty queue: "
1040                          "i %zd mergeable %d offset %zd, size %zd, "
1041                          "guest hdr len %zd, host hdr len %zd "
1042                          "guest features 0x%" PRIx64,
1043                          i, n->mergeable_rx_bufs, offset, size,
1044                          n->guest_hdr_len, n->host_hdr_len,
1045                          vdev->guest_features);
1046             exit(1);
1047         }
1048 
1049         if (elem.in_num < 1) {
1050             error_report("virtio-net receive queue contains no in buffers");
1051             exit(1);
1052         }
1053 
1054         if (i == 0) {
1055             assert(offset == 0);
1056             if (n->mergeable_rx_bufs) {
1057                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1058                                     sg, elem.in_num,
1059                                     offsetof(typeof(mhdr), num_buffers),
1060                                     sizeof(mhdr.num_buffers));
1061             }
1062 
1063             receive_header(n, sg, elem.in_num, buf, size);
1064             offset = n->host_hdr_len;
1065             total += n->guest_hdr_len;
1066             guest_offset = n->guest_hdr_len;
1067         } else {
1068             guest_offset = 0;
1069         }
1070 
1071         /* copy in packet.  ugh */
1072         len = iov_from_buf(sg, elem.in_num, guest_offset,
1073                            buf + offset, size - offset);
1074         total += len;
1075         offset += len;
1076         /* If buffers can't be merged, at this point we
1077          * must have consumed the complete packet.
1078          * Otherwise, drop it. */
1079         if (!n->mergeable_rx_bufs && offset < size) {
1080 #if 0
1081             error_report("virtio-net truncated non-mergeable packet: "
1082                          "i %zd mergeable %d offset %zd, size %zd, "
1083                          "guest hdr len %zd, host hdr len %zd",
1084                          i, n->mergeable_rx_bufs,
1085                          offset, size, n->guest_hdr_len, n->host_hdr_len);
1086 #endif
1087             return size;
1088         }
1089 
1090         /* signal other side */
1091         virtqueue_fill(q->rx_vq, &elem, total, i++);
1092     }
1093 
1094     if (mhdr_cnt) {
1095         virtio_stw_p(vdev, &mhdr.num_buffers, i);
1096         iov_from_buf(mhdr_sg, mhdr_cnt,
1097                      0,
1098                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
1099     }
1100 
1101     virtqueue_flush(q->rx_vq, i);
1102     virtio_notify(vdev, q->rx_vq);
1103 
1104     return size;
1105 }
1106 
1107 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
1108 
1109 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
1110 {
1111     VirtIONet *n = qemu_get_nic_opaque(nc);
1112     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1113     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1114 
1115     virtqueue_push(q->tx_vq, &q->async_tx.elem, 0);
1116     virtio_notify(vdev, q->tx_vq);
1117 
1118     q->async_tx.elem.out_num = q->async_tx.len = 0;
1119 
1120     virtio_queue_set_notification(q->tx_vq, 1);
1121     virtio_net_flush_tx(q);
1122 }
1123 
1124 /* TX */
1125 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
1126 {
1127     VirtIONet *n = q->n;
1128     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1129     VirtQueueElement elem;
1130     int32_t num_packets = 0;
1131     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
1132     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1133         return num_packets;
1134     }
1135 
1136     if (q->async_tx.elem.out_num) {
1137         virtio_queue_set_notification(q->tx_vq, 0);
1138         return num_packets;
1139     }
1140 
1141     while (virtqueue_pop(q->tx_vq, &elem)) {
1142         ssize_t ret, len;
1143         unsigned int out_num = elem.out_num;
1144         struct iovec *out_sg = &elem.out_sg[0];
1145         struct iovec sg[VIRTQUEUE_MAX_SIZE];
1146 
1147         if (out_num < 1) {
1148             error_report("virtio-net header not in first element");
1149             exit(1);
1150         }
1151 
1152         if (n->has_vnet_hdr) {
1153             if (out_sg[0].iov_len < n->guest_hdr_len) {
1154                 error_report("virtio-net header incorrect");
1155                 exit(1);
1156             }
1157             virtio_net_hdr_swap(vdev, (void *) out_sg[0].iov_base);
1158         }
1159 
1160         /*
1161          * If host wants to see the guest header as is, we can
1162          * pass it on unchanged. Otherwise, copy just the parts
1163          * that host is interested in.
1164          */
1165         assert(n->host_hdr_len <= n->guest_hdr_len);
1166         if (n->host_hdr_len != n->guest_hdr_len) {
1167             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
1168                                        out_sg, out_num,
1169                                        0, n->host_hdr_len);
1170             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
1171                              out_sg, out_num,
1172                              n->guest_hdr_len, -1);
1173             out_num = sg_num;
1174             out_sg = sg;
1175         }
1176 
1177         len = n->guest_hdr_len;
1178 
1179         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
1180                                       out_sg, out_num, virtio_net_tx_complete);
1181         if (ret == 0) {
1182             virtio_queue_set_notification(q->tx_vq, 0);
1183             q->async_tx.elem = elem;
1184             q->async_tx.len  = len;
1185             return -EBUSY;
1186         }
1187 
1188         len += ret;
1189 
1190         virtqueue_push(q->tx_vq, &elem, 0);
1191         virtio_notify(vdev, q->tx_vq);
1192 
1193         if (++num_packets >= n->tx_burst) {
1194             break;
1195         }
1196     }
1197     return num_packets;
1198 }
1199 
1200 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
1201 {
1202     VirtIONet *n = VIRTIO_NET(vdev);
1203     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
1204 
1205     /* This happens when device was stopped but VCPU wasn't. */
1206     if (!vdev->vm_running) {
1207         q->tx_waiting = 1;
1208         return;
1209     }
1210 
1211     if (q->tx_waiting) {
1212         virtio_queue_set_notification(vq, 1);
1213         timer_del(q->tx_timer);
1214         q->tx_waiting = 0;
1215         virtio_net_flush_tx(q);
1216     } else {
1217         timer_mod(q->tx_timer,
1218                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
1219         q->tx_waiting = 1;
1220         virtio_queue_set_notification(vq, 0);
1221     }
1222 }
1223 
1224 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
1225 {
1226     VirtIONet *n = VIRTIO_NET(vdev);
1227     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
1228 
1229     if (unlikely(q->tx_waiting)) {
1230         return;
1231     }
1232     q->tx_waiting = 1;
1233     /* This happens when device was stopped but VCPU wasn't. */
1234     if (!vdev->vm_running) {
1235         return;
1236     }
1237     virtio_queue_set_notification(vq, 0);
1238     qemu_bh_schedule(q->tx_bh);
1239 }
1240 
1241 static void virtio_net_tx_timer(void *opaque)
1242 {
1243     VirtIONetQueue *q = opaque;
1244     VirtIONet *n = q->n;
1245     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1246     /* This happens when device was stopped but BH wasn't. */
1247     if (!vdev->vm_running) {
1248         /* Make sure tx waiting is set, so we'll run when restarted. */
1249         assert(q->tx_waiting);
1250         return;
1251     }
1252 
1253     q->tx_waiting = 0;
1254 
1255     /* Just in case the driver is not ready on more */
1256     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1257         return;
1258     }
1259 
1260     virtio_queue_set_notification(q->tx_vq, 1);
1261     virtio_net_flush_tx(q);
1262 }
1263 
1264 static void virtio_net_tx_bh(void *opaque)
1265 {
1266     VirtIONetQueue *q = opaque;
1267     VirtIONet *n = q->n;
1268     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1269     int32_t ret;
1270 
1271     /* This happens when device was stopped but BH wasn't. */
1272     if (!vdev->vm_running) {
1273         /* Make sure tx waiting is set, so we'll run when restarted. */
1274         assert(q->tx_waiting);
1275         return;
1276     }
1277 
1278     q->tx_waiting = 0;
1279 
1280     /* Just in case the driver is not ready on more */
1281     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
1282         return;
1283     }
1284 
1285     ret = virtio_net_flush_tx(q);
1286     if (ret == -EBUSY) {
1287         return; /* Notification re-enable handled by tx_complete */
1288     }
1289 
1290     /* If we flush a full burst of packets, assume there are
1291      * more coming and immediately reschedule */
1292     if (ret >= n->tx_burst) {
1293         qemu_bh_schedule(q->tx_bh);
1294         q->tx_waiting = 1;
1295         return;
1296     }
1297 
1298     /* If less than a full burst, re-enable notification and flush
1299      * anything that may have come in while we weren't looking.  If
1300      * we find something, assume the guest is still active and reschedule */
1301     virtio_queue_set_notification(q->tx_vq, 1);
1302     if (virtio_net_flush_tx(q) > 0) {
1303         virtio_queue_set_notification(q->tx_vq, 0);
1304         qemu_bh_schedule(q->tx_bh);
1305         q->tx_waiting = 1;
1306     }
1307 }
1308 
1309 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
1310 {
1311     n->multiqueue = multiqueue;
1312 
1313     virtio_net_set_queues(n);
1314 }
1315 
1316 static void virtio_net_save(QEMUFile *f, void *opaque)
1317 {
1318     VirtIONet *n = opaque;
1319     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1320 
1321     /* At this point, backend must be stopped, otherwise
1322      * it might keep writing to memory. */
1323     assert(!n->vhost_started);
1324     virtio_save(vdev, f);
1325 }
1326 
1327 static void virtio_net_save_device(VirtIODevice *vdev, QEMUFile *f)
1328 {
1329     VirtIONet *n = VIRTIO_NET(vdev);
1330     int i;
1331 
1332     qemu_put_buffer(f, n->mac, ETH_ALEN);
1333     qemu_put_be32(f, n->vqs[0].tx_waiting);
1334     qemu_put_be32(f, n->mergeable_rx_bufs);
1335     qemu_put_be16(f, n->status);
1336     qemu_put_byte(f, n->promisc);
1337     qemu_put_byte(f, n->allmulti);
1338     qemu_put_be32(f, n->mac_table.in_use);
1339     qemu_put_buffer(f, n->mac_table.macs, n->mac_table.in_use * ETH_ALEN);
1340     qemu_put_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
1341     qemu_put_be32(f, n->has_vnet_hdr);
1342     qemu_put_byte(f, n->mac_table.multi_overflow);
1343     qemu_put_byte(f, n->mac_table.uni_overflow);
1344     qemu_put_byte(f, n->alluni);
1345     qemu_put_byte(f, n->nomulti);
1346     qemu_put_byte(f, n->nouni);
1347     qemu_put_byte(f, n->nobcast);
1348     qemu_put_byte(f, n->has_ufo);
1349     if (n->max_queues > 1) {
1350         qemu_put_be16(f, n->max_queues);
1351         qemu_put_be16(f, n->curr_queues);
1352         for (i = 1; i < n->curr_queues; i++) {
1353             qemu_put_be32(f, n->vqs[i].tx_waiting);
1354         }
1355     }
1356 
1357     if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1358         qemu_put_be64(f, n->curr_guest_offloads);
1359     }
1360 }
1361 
1362 static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
1363 {
1364     VirtIONet *n = opaque;
1365     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1366 
1367     if (version_id < 2 || version_id > VIRTIO_NET_VM_VERSION)
1368         return -EINVAL;
1369 
1370     return virtio_load(vdev, f, version_id);
1371 }
1372 
1373 static int virtio_net_load_device(VirtIODevice *vdev, QEMUFile *f,
1374                                   int version_id)
1375 {
1376     VirtIONet *n = VIRTIO_NET(vdev);
1377     int i, link_down;
1378 
1379     qemu_get_buffer(f, n->mac, ETH_ALEN);
1380     n->vqs[0].tx_waiting = qemu_get_be32(f);
1381 
1382     virtio_net_set_mrg_rx_bufs(n, qemu_get_be32(f),
1383                                virtio_has_feature(vdev, VIRTIO_F_VERSION_1));
1384 
1385     if (version_id >= 3)
1386         n->status = qemu_get_be16(f);
1387 
1388     if (version_id >= 4) {
1389         if (version_id < 8) {
1390             n->promisc = qemu_get_be32(f);
1391             n->allmulti = qemu_get_be32(f);
1392         } else {
1393             n->promisc = qemu_get_byte(f);
1394             n->allmulti = qemu_get_byte(f);
1395         }
1396     }
1397 
1398     if (version_id >= 5) {
1399         n->mac_table.in_use = qemu_get_be32(f);
1400         /* MAC_TABLE_ENTRIES may be different from the saved image */
1401         if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) {
1402             qemu_get_buffer(f, n->mac_table.macs,
1403                             n->mac_table.in_use * ETH_ALEN);
1404         } else {
1405             int64_t i;
1406 
1407             /* Overflow detected - can happen if source has a larger MAC table.
1408              * We simply set overflow flag so there's no need to maintain the
1409              * table of addresses, discard them all.
1410              * Note: 64 bit math to avoid integer overflow.
1411              */
1412             for (i = 0; i < (int64_t)n->mac_table.in_use * ETH_ALEN; ++i) {
1413                 qemu_get_byte(f);
1414             }
1415             n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1;
1416             n->mac_table.in_use = 0;
1417         }
1418     }
1419 
1420     if (version_id >= 6)
1421         qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
1422 
1423     if (version_id >= 7) {
1424         if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) {
1425             error_report("virtio-net: saved image requires vnet_hdr=on");
1426             return -1;
1427         }
1428     }
1429 
1430     if (version_id >= 9) {
1431         n->mac_table.multi_overflow = qemu_get_byte(f);
1432         n->mac_table.uni_overflow = qemu_get_byte(f);
1433     }
1434 
1435     if (version_id >= 10) {
1436         n->alluni = qemu_get_byte(f);
1437         n->nomulti = qemu_get_byte(f);
1438         n->nouni = qemu_get_byte(f);
1439         n->nobcast = qemu_get_byte(f);
1440     }
1441 
1442     if (version_id >= 11) {
1443         if (qemu_get_byte(f) && !peer_has_ufo(n)) {
1444             error_report("virtio-net: saved image requires TUN_F_UFO support");
1445             return -1;
1446         }
1447     }
1448 
1449     if (n->max_queues > 1) {
1450         if (n->max_queues != qemu_get_be16(f)) {
1451             error_report("virtio-net: different max_queues ");
1452             return -1;
1453         }
1454 
1455         n->curr_queues = qemu_get_be16(f);
1456         if (n->curr_queues > n->max_queues) {
1457             error_report("virtio-net: curr_queues %x > max_queues %x",
1458                          n->curr_queues, n->max_queues);
1459             return -1;
1460         }
1461         for (i = 1; i < n->curr_queues; i++) {
1462             n->vqs[i].tx_waiting = qemu_get_be32(f);
1463         }
1464     }
1465 
1466     if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1467         n->curr_guest_offloads = qemu_get_be64(f);
1468     } else {
1469         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
1470     }
1471 
1472     if (peer_has_vnet_hdr(n)) {
1473         virtio_net_apply_guest_offloads(n);
1474     }
1475 
1476     virtio_net_set_queues(n);
1477 
1478     /* Find the first multicast entry in the saved MAC filter */
1479     for (i = 0; i < n->mac_table.in_use; i++) {
1480         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
1481             break;
1482         }
1483     }
1484     n->mac_table.first_multi = i;
1485 
1486     /* nc.link_down can't be migrated, so infer link_down according
1487      * to link status bit in n->status */
1488     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
1489     for (i = 0; i < n->max_queues; i++) {
1490         qemu_get_subqueue(n->nic, i)->link_down = link_down;
1491     }
1492 
1493     if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
1494         virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
1495         n->announce_counter = SELF_ANNOUNCE_ROUNDS;
1496         timer_mod(n->announce_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL));
1497     }
1498 
1499     return 0;
1500 }
1501 
1502 static NetClientInfo net_virtio_info = {
1503     .type = NET_CLIENT_OPTIONS_KIND_NIC,
1504     .size = sizeof(NICState),
1505     .can_receive = virtio_net_can_receive,
1506     .receive = virtio_net_receive,
1507     .link_status_changed = virtio_net_set_link_status,
1508     .query_rx_filter = virtio_net_query_rxfilter,
1509 };
1510 
1511 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
1512 {
1513     VirtIONet *n = VIRTIO_NET(vdev);
1514     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1515     assert(n->vhost_started);
1516     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
1517 }
1518 
1519 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
1520                                            bool mask)
1521 {
1522     VirtIONet *n = VIRTIO_NET(vdev);
1523     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1524     assert(n->vhost_started);
1525     vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
1526                              vdev, idx, mask);
1527 }
1528 
1529 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
1530 {
1531     int i, config_size = 0;
1532     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
1533     for (i = 0; feature_sizes[i].flags != 0; i++) {
1534         if (host_features & feature_sizes[i].flags) {
1535             config_size = MAX(feature_sizes[i].end, config_size);
1536         }
1537     }
1538     n->config_size = config_size;
1539 }
1540 
1541 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
1542                                    const char *type)
1543 {
1544     /*
1545      * The name can be NULL, the netclient name will be type.x.
1546      */
1547     assert(type != NULL);
1548 
1549     g_free(n->netclient_name);
1550     g_free(n->netclient_type);
1551     n->netclient_name = g_strdup(name);
1552     n->netclient_type = g_strdup(type);
1553 }
1554 
1555 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
1556 {
1557     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1558     VirtIONet *n = VIRTIO_NET(dev);
1559     NetClientState *nc;
1560     int i;
1561 
1562     virtio_net_set_config_size(n, n->host_features);
1563     virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
1564 
1565     n->max_queues = MAX(n->nic_conf.peers.queues, 1);
1566     if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
1567         error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
1568                    "must be a positive integer less than %d.",
1569                    n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
1570         virtio_cleanup(vdev);
1571         return;
1572     }
1573     n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
1574     n->curr_queues = 1;
1575     n->tx_timeout = n->net_conf.txtimer;
1576 
1577     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
1578                        && strcmp(n->net_conf.tx, "bh")) {
1579         error_report("virtio-net: "
1580                      "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
1581                      n->net_conf.tx);
1582         error_report("Defaulting to \"bh\"");
1583     }
1584 
1585     for (i = 0; i < n->max_queues; i++) {
1586         n->vqs[i].rx_vq = virtio_add_queue(vdev, 256, virtio_net_handle_rx);
1587         if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
1588             n->vqs[i].tx_vq =
1589                 virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer);
1590             n->vqs[i].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
1591                                               virtio_net_tx_timer,
1592                                               &n->vqs[i]);
1593         } else {
1594             n->vqs[i].tx_vq =
1595                 virtio_add_queue(vdev, 256, virtio_net_handle_tx_bh);
1596             n->vqs[i].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[i]);
1597         }
1598 
1599         n->vqs[i].tx_waiting = 0;
1600         n->vqs[i].n = n;
1601     }
1602 
1603     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
1604     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
1605     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
1606     n->status = VIRTIO_NET_S_LINK_UP;
1607     n->announce_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
1608                                      virtio_net_announce_timer, n);
1609 
1610     if (n->netclient_type) {
1611         /*
1612          * Happen when virtio_net_set_netclient_name has been called.
1613          */
1614         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
1615                               n->netclient_type, n->netclient_name, n);
1616     } else {
1617         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
1618                               object_get_typename(OBJECT(dev)), dev->id, n);
1619     }
1620 
1621     peer_test_vnet_hdr(n);
1622     if (peer_has_vnet_hdr(n)) {
1623         for (i = 0; i < n->max_queues; i++) {
1624             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
1625         }
1626         n->host_hdr_len = sizeof(struct virtio_net_hdr);
1627     } else {
1628         n->host_hdr_len = 0;
1629     }
1630 
1631     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
1632 
1633     n->vqs[0].tx_waiting = 0;
1634     n->tx_burst = n->net_conf.txburst;
1635     virtio_net_set_mrg_rx_bufs(n, 0, 0);
1636     n->promisc = 1; /* for compatibility */
1637 
1638     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1639 
1640     n->vlans = g_malloc0(MAX_VLAN >> 3);
1641 
1642     nc = qemu_get_queue(n->nic);
1643     nc->rxfilter_notify_enabled = 1;
1644 
1645     n->qdev = dev;
1646     register_savevm(dev, "virtio-net", -1, VIRTIO_NET_VM_VERSION,
1647                     virtio_net_save, virtio_net_load, n);
1648 }
1649 
1650 static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
1651 {
1652     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1653     VirtIONet *n = VIRTIO_NET(dev);
1654     int i;
1655 
1656     /* This will stop vhost backend if appropriate. */
1657     virtio_net_set_status(vdev, 0);
1658 
1659     unregister_savevm(dev, "virtio-net", n);
1660 
1661     g_free(n->netclient_name);
1662     n->netclient_name = NULL;
1663     g_free(n->netclient_type);
1664     n->netclient_type = NULL;
1665 
1666     g_free(n->mac_table.macs);
1667     g_free(n->vlans);
1668 
1669     for (i = 0; i < n->max_queues; i++) {
1670         VirtIONetQueue *q = &n->vqs[i];
1671         NetClientState *nc = qemu_get_subqueue(n->nic, i);
1672 
1673         qemu_purge_queued_packets(nc);
1674 
1675         if (q->tx_timer) {
1676             timer_del(q->tx_timer);
1677             timer_free(q->tx_timer);
1678         } else if (q->tx_bh) {
1679             qemu_bh_delete(q->tx_bh);
1680         }
1681     }
1682 
1683     timer_del(n->announce_timer);
1684     timer_free(n->announce_timer);
1685     g_free(n->vqs);
1686     qemu_del_nic(n->nic);
1687     virtio_cleanup(vdev);
1688 }
1689 
1690 static void virtio_net_instance_init(Object *obj)
1691 {
1692     VirtIONet *n = VIRTIO_NET(obj);
1693 
1694     /*
1695      * The default config_size is sizeof(struct virtio_net_config).
1696      * Can be overriden with virtio_net_set_config_size.
1697      */
1698     n->config_size = sizeof(struct virtio_net_config);
1699     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
1700                                   "bootindex", "/ethernet-phy@0",
1701                                   DEVICE(n), NULL);
1702 }
1703 
1704 static Property virtio_net_properties[] = {
1705     DEFINE_PROP_BIT("any_layout", VirtIONet, host_features,
1706                     VIRTIO_F_ANY_LAYOUT, true),
1707     DEFINE_PROP_BIT("csum", VirtIONet, host_features, VIRTIO_NET_F_CSUM, true),
1708     DEFINE_PROP_BIT("guest_csum", VirtIONet, host_features,
1709                     VIRTIO_NET_F_GUEST_CSUM, true),
1710     DEFINE_PROP_BIT("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
1711     DEFINE_PROP_BIT("guest_tso4", VirtIONet, host_features,
1712                     VIRTIO_NET_F_GUEST_TSO4, true),
1713     DEFINE_PROP_BIT("guest_tso6", VirtIONet, host_features,
1714                     VIRTIO_NET_F_GUEST_TSO6, true),
1715     DEFINE_PROP_BIT("guest_ecn", VirtIONet, host_features,
1716                     VIRTIO_NET_F_GUEST_ECN, true),
1717     DEFINE_PROP_BIT("guest_ufo", VirtIONet, host_features,
1718                     VIRTIO_NET_F_GUEST_UFO, true),
1719     DEFINE_PROP_BIT("guest_announce", VirtIONet, host_features,
1720                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
1721     DEFINE_PROP_BIT("host_tso4", VirtIONet, host_features,
1722                     VIRTIO_NET_F_HOST_TSO4, true),
1723     DEFINE_PROP_BIT("host_tso6", VirtIONet, host_features,
1724                     VIRTIO_NET_F_HOST_TSO6, true),
1725     DEFINE_PROP_BIT("host_ecn", VirtIONet, host_features,
1726                     VIRTIO_NET_F_HOST_ECN, true),
1727     DEFINE_PROP_BIT("host_ufo", VirtIONet, host_features,
1728                     VIRTIO_NET_F_HOST_UFO, true),
1729     DEFINE_PROP_BIT("mrg_rxbuf", VirtIONet, host_features,
1730                     VIRTIO_NET_F_MRG_RXBUF, true),
1731     DEFINE_PROP_BIT("status", VirtIONet, host_features,
1732                     VIRTIO_NET_F_STATUS, true),
1733     DEFINE_PROP_BIT("ctrl_vq", VirtIONet, host_features,
1734                     VIRTIO_NET_F_CTRL_VQ, true),
1735     DEFINE_PROP_BIT("ctrl_rx", VirtIONet, host_features,
1736                     VIRTIO_NET_F_CTRL_RX, true),
1737     DEFINE_PROP_BIT("ctrl_vlan", VirtIONet, host_features,
1738                     VIRTIO_NET_F_CTRL_VLAN, true),
1739     DEFINE_PROP_BIT("ctrl_rx_extra", VirtIONet, host_features,
1740                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
1741     DEFINE_PROP_BIT("ctrl_mac_addr", VirtIONet, host_features,
1742                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
1743     DEFINE_PROP_BIT("ctrl_guest_offloads", VirtIONet, host_features,
1744                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
1745     DEFINE_PROP_BIT("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
1746     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
1747     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
1748                        TX_TIMER_INTERVAL),
1749     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
1750     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
1751     DEFINE_PROP_END_OF_LIST(),
1752 };
1753 
1754 static void virtio_net_class_init(ObjectClass *klass, void *data)
1755 {
1756     DeviceClass *dc = DEVICE_CLASS(klass);
1757     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
1758 
1759     dc->props = virtio_net_properties;
1760     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1761     vdc->realize = virtio_net_device_realize;
1762     vdc->unrealize = virtio_net_device_unrealize;
1763     vdc->get_config = virtio_net_get_config;
1764     vdc->set_config = virtio_net_set_config;
1765     vdc->get_features = virtio_net_get_features;
1766     vdc->set_features = virtio_net_set_features;
1767     vdc->bad_features = virtio_net_bad_features;
1768     vdc->reset = virtio_net_reset;
1769     vdc->set_status = virtio_net_set_status;
1770     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
1771     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
1772     vdc->load = virtio_net_load_device;
1773     vdc->save = virtio_net_save_device;
1774 }
1775 
1776 static const TypeInfo virtio_net_info = {
1777     .name = TYPE_VIRTIO_NET,
1778     .parent = TYPE_VIRTIO_DEVICE,
1779     .instance_size = sizeof(VirtIONet),
1780     .instance_init = virtio_net_instance_init,
1781     .class_init = virtio_net_class_init,
1782 };
1783 
1784 static void virtio_register_types(void)
1785 {
1786     type_register_static(&virtio_net_info);
1787 }
1788 
1789 type_init(virtio_register_types)
1790