xref: /openbmc/qemu/hw/net/virtio-net.c (revision da278d58a092bfcc4e36f1e274229c1468dea731)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/main-loop.h"
18 #include "qemu/module.h"
19 #include "hw/virtio/virtio.h"
20 #include "net/net.h"
21 #include "net/checksum.h"
22 #include "net/tap.h"
23 #include "qemu/error-report.h"
24 #include "qemu/timer.h"
25 #include "qemu/option.h"
26 #include "qemu/option_int.h"
27 #include "qemu/config-file.h"
28 #include "qapi/qmp/qdict.h"
29 #include "hw/virtio/virtio-net.h"
30 #include "net/vhost_net.h"
31 #include "net/announce.h"
32 #include "hw/virtio/virtio-bus.h"
33 #include "qapi/error.h"
34 #include "qapi/qapi-events-net.h"
35 #include "hw/qdev-properties.h"
36 #include "qapi/qapi-types-migration.h"
37 #include "qapi/qapi-events-migration.h"
38 #include "hw/virtio/virtio-access.h"
39 #include "migration/misc.h"
40 #include "standard-headers/linux/ethtool.h"
41 #include "sysemu/sysemu.h"
42 #include "trace.h"
43 #include "monitor/qdev.h"
44 #include "hw/pci/pci.h"
45 
46 #define VIRTIO_NET_VM_VERSION    11
47 
48 #define MAC_TABLE_ENTRIES    64
49 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
50 
51 /* previously fixed value */
52 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
53 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
54 
55 /* for now, only allow larger queues; with virtio-1, guest can downsize */
56 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
57 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
58 
59 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
60 
61 #define VIRTIO_NET_TCP_FLAG         0x3F
62 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
63 
64 /* IPv4 max payload, 16 bits in the header */
65 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
66 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
67 
68 /* header length value in ip header without option */
69 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
70 
71 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
72 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
73 
74 /* Purge coalesced packets timer interval, This value affects the performance
75    a lot, and should be tuned carefully, '300000'(300us) is the recommended
76    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
77    tso/gso/gro 'off'. */
78 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
79 
80 /* temporary until standard header include it */
81 #if !defined(VIRTIO_NET_HDR_F_RSC_INFO)
82 
83 #define VIRTIO_NET_HDR_F_RSC_INFO  4 /* rsc_ext data in csum_ fields */
84 #define VIRTIO_NET_F_RSC_EXT       61
85 
86 #endif
87 
88 static inline __virtio16 *virtio_net_rsc_ext_num_packets(
89     struct virtio_net_hdr *hdr)
90 {
91     return &hdr->csum_start;
92 }
93 
94 static inline __virtio16 *virtio_net_rsc_ext_num_dupacks(
95     struct virtio_net_hdr *hdr)
96 {
97     return &hdr->csum_offset;
98 }
99 
100 static VirtIOFeature feature_sizes[] = {
101     {.flags = 1ULL << VIRTIO_NET_F_MAC,
102      .end = endof(struct virtio_net_config, mac)},
103     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
104      .end = endof(struct virtio_net_config, status)},
105     {.flags = 1ULL << VIRTIO_NET_F_MQ,
106      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
107     {.flags = 1ULL << VIRTIO_NET_F_MTU,
108      .end = endof(struct virtio_net_config, mtu)},
109     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
110      .end = endof(struct virtio_net_config, duplex)},
111     {}
112 };
113 
114 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
115 {
116     VirtIONet *n = qemu_get_nic_opaque(nc);
117 
118     return &n->vqs[nc->queue_index];
119 }
120 
121 static int vq2q(int queue_index)
122 {
123     return queue_index / 2;
124 }
125 
126 /* TODO
127  * - we could suppress RX interrupt if we were so inclined.
128  */
129 
130 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
131 {
132     VirtIONet *n = VIRTIO_NET(vdev);
133     struct virtio_net_config netcfg;
134 
135     virtio_stw_p(vdev, &netcfg.status, n->status);
136     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
137     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
138     memcpy(netcfg.mac, n->mac, ETH_ALEN);
139     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
140     netcfg.duplex = n->net_conf.duplex;
141     memcpy(config, &netcfg, n->config_size);
142 }
143 
144 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
145 {
146     VirtIONet *n = VIRTIO_NET(vdev);
147     struct virtio_net_config netcfg = {};
148 
149     memcpy(&netcfg, config, n->config_size);
150 
151     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
152         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
153         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
154         memcpy(n->mac, netcfg.mac, ETH_ALEN);
155         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
156     }
157 }
158 
159 static bool virtio_net_started(VirtIONet *n, uint8_t status)
160 {
161     VirtIODevice *vdev = VIRTIO_DEVICE(n);
162     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
163         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
164 }
165 
166 static void virtio_net_announce_notify(VirtIONet *net)
167 {
168     VirtIODevice *vdev = VIRTIO_DEVICE(net);
169     trace_virtio_net_announce_notify();
170 
171     net->status |= VIRTIO_NET_S_ANNOUNCE;
172     virtio_notify_config(vdev);
173 }
174 
175 static void virtio_net_announce_timer(void *opaque)
176 {
177     VirtIONet *n = opaque;
178     trace_virtio_net_announce_timer(n->announce_timer.round);
179 
180     n->announce_timer.round--;
181     virtio_net_announce_notify(n);
182 }
183 
184 static void virtio_net_announce(NetClientState *nc)
185 {
186     VirtIONet *n = qemu_get_nic_opaque(nc);
187     VirtIODevice *vdev = VIRTIO_DEVICE(n);
188 
189     /*
190      * Make sure the virtio migration announcement timer isn't running
191      * If it is, let it trigger announcement so that we do not cause
192      * confusion.
193      */
194     if (n->announce_timer.round) {
195         return;
196     }
197 
198     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
199         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
200             virtio_net_announce_notify(n);
201     }
202 }
203 
204 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
205 {
206     VirtIODevice *vdev = VIRTIO_DEVICE(n);
207     NetClientState *nc = qemu_get_queue(n->nic);
208     int queues = n->multiqueue ? n->max_queues : 1;
209 
210     if (!get_vhost_net(nc->peer)) {
211         return;
212     }
213 
214     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
215         !!n->vhost_started) {
216         return;
217     }
218     if (!n->vhost_started) {
219         int r, i;
220 
221         if (n->needs_vnet_hdr_swap) {
222             error_report("backend does not support %s vnet headers; "
223                          "falling back on userspace virtio",
224                          virtio_is_big_endian(vdev) ? "BE" : "LE");
225             return;
226         }
227 
228         /* Any packets outstanding? Purge them to avoid touching rings
229          * when vhost is running.
230          */
231         for (i = 0;  i < queues; i++) {
232             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
233 
234             /* Purge both directions: TX and RX. */
235             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
236             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
237         }
238 
239         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
240             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
241             if (r < 0) {
242                 error_report("%uBytes MTU not supported by the backend",
243                              n->net_conf.mtu);
244 
245                 return;
246             }
247         }
248 
249         n->vhost_started = 1;
250         r = vhost_net_start(vdev, n->nic->ncs, queues);
251         if (r < 0) {
252             error_report("unable to start vhost net: %d: "
253                          "falling back on userspace virtio", -r);
254             n->vhost_started = 0;
255         }
256     } else {
257         vhost_net_stop(vdev, n->nic->ncs, queues);
258         n->vhost_started = 0;
259     }
260 }
261 
262 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
263                                           NetClientState *peer,
264                                           bool enable)
265 {
266     if (virtio_is_big_endian(vdev)) {
267         return qemu_set_vnet_be(peer, enable);
268     } else {
269         return qemu_set_vnet_le(peer, enable);
270     }
271 }
272 
273 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
274                                        int queues, bool enable)
275 {
276     int i;
277 
278     for (i = 0; i < queues; i++) {
279         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
280             enable) {
281             while (--i >= 0) {
282                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
283             }
284 
285             return true;
286         }
287     }
288 
289     return false;
290 }
291 
292 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
293 {
294     VirtIODevice *vdev = VIRTIO_DEVICE(n);
295     int queues = n->multiqueue ? n->max_queues : 1;
296 
297     if (virtio_net_started(n, status)) {
298         /* Before using the device, we tell the network backend about the
299          * endianness to use when parsing vnet headers. If the backend
300          * can't do it, we fallback onto fixing the headers in the core
301          * virtio-net code.
302          */
303         n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
304                                                             queues, true);
305     } else if (virtio_net_started(n, vdev->status)) {
306         /* After using the device, we need to reset the network backend to
307          * the default (guest native endianness), otherwise the guest may
308          * lose network connectivity if it is rebooted into a different
309          * endianness.
310          */
311         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
312     }
313 }
314 
315 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
316 {
317     unsigned int dropped = virtqueue_drop_all(vq);
318     if (dropped) {
319         virtio_notify(vdev, vq);
320     }
321 }
322 
323 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
324 {
325     VirtIONet *n = VIRTIO_NET(vdev);
326     VirtIONetQueue *q;
327     int i;
328     uint8_t queue_status;
329 
330     virtio_net_vnet_endian_status(n, status);
331     virtio_net_vhost_status(n, status);
332 
333     for (i = 0; i < n->max_queues; i++) {
334         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
335         bool queue_started;
336         q = &n->vqs[i];
337 
338         if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
339             queue_status = 0;
340         } else {
341             queue_status = status;
342         }
343         queue_started =
344             virtio_net_started(n, queue_status) && !n->vhost_started;
345 
346         if (queue_started) {
347             qemu_flush_queued_packets(ncs);
348         }
349 
350         if (!q->tx_waiting) {
351             continue;
352         }
353 
354         if (queue_started) {
355             if (q->tx_timer) {
356                 timer_mod(q->tx_timer,
357                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
358             } else {
359                 qemu_bh_schedule(q->tx_bh);
360             }
361         } else {
362             if (q->tx_timer) {
363                 timer_del(q->tx_timer);
364             } else {
365                 qemu_bh_cancel(q->tx_bh);
366             }
367             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
368                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
369                 vdev->vm_running) {
370                 /* if tx is waiting we are likely have some packets in tx queue
371                  * and disabled notification */
372                 q->tx_waiting = 0;
373                 virtio_queue_set_notification(q->tx_vq, 1);
374                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
375             }
376         }
377     }
378 }
379 
380 static void virtio_net_set_link_status(NetClientState *nc)
381 {
382     VirtIONet *n = qemu_get_nic_opaque(nc);
383     VirtIODevice *vdev = VIRTIO_DEVICE(n);
384     uint16_t old_status = n->status;
385 
386     if (nc->link_down)
387         n->status &= ~VIRTIO_NET_S_LINK_UP;
388     else
389         n->status |= VIRTIO_NET_S_LINK_UP;
390 
391     if (n->status != old_status)
392         virtio_notify_config(vdev);
393 
394     virtio_net_set_status(vdev, vdev->status);
395 }
396 
397 static void rxfilter_notify(NetClientState *nc)
398 {
399     VirtIONet *n = qemu_get_nic_opaque(nc);
400 
401     if (nc->rxfilter_notify_enabled) {
402         char *path = object_get_canonical_path(OBJECT(n->qdev));
403         qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
404                                               n->netclient_name, path);
405         g_free(path);
406 
407         /* disable event notification to avoid events flooding */
408         nc->rxfilter_notify_enabled = 0;
409     }
410 }
411 
412 static intList *get_vlan_table(VirtIONet *n)
413 {
414     intList *list, *entry;
415     int i, j;
416 
417     list = NULL;
418     for (i = 0; i < MAX_VLAN >> 5; i++) {
419         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
420             if (n->vlans[i] & (1U << j)) {
421                 entry = g_malloc0(sizeof(*entry));
422                 entry->value = (i << 5) + j;
423                 entry->next = list;
424                 list = entry;
425             }
426         }
427     }
428 
429     return list;
430 }
431 
432 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
433 {
434     VirtIONet *n = qemu_get_nic_opaque(nc);
435     VirtIODevice *vdev = VIRTIO_DEVICE(n);
436     RxFilterInfo *info;
437     strList *str_list, *entry;
438     int i;
439 
440     info = g_malloc0(sizeof(*info));
441     info->name = g_strdup(nc->name);
442     info->promiscuous = n->promisc;
443 
444     if (n->nouni) {
445         info->unicast = RX_STATE_NONE;
446     } else if (n->alluni) {
447         info->unicast = RX_STATE_ALL;
448     } else {
449         info->unicast = RX_STATE_NORMAL;
450     }
451 
452     if (n->nomulti) {
453         info->multicast = RX_STATE_NONE;
454     } else if (n->allmulti) {
455         info->multicast = RX_STATE_ALL;
456     } else {
457         info->multicast = RX_STATE_NORMAL;
458     }
459 
460     info->broadcast_allowed = n->nobcast;
461     info->multicast_overflow = n->mac_table.multi_overflow;
462     info->unicast_overflow = n->mac_table.uni_overflow;
463 
464     info->main_mac = qemu_mac_strdup_printf(n->mac);
465 
466     str_list = NULL;
467     for (i = 0; i < n->mac_table.first_multi; i++) {
468         entry = g_malloc0(sizeof(*entry));
469         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
470         entry->next = str_list;
471         str_list = entry;
472     }
473     info->unicast_table = str_list;
474 
475     str_list = NULL;
476     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
477         entry = g_malloc0(sizeof(*entry));
478         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
479         entry->next = str_list;
480         str_list = entry;
481     }
482     info->multicast_table = str_list;
483     info->vlan_table = get_vlan_table(n);
484 
485     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
486         info->vlan = RX_STATE_ALL;
487     } else if (!info->vlan_table) {
488         info->vlan = RX_STATE_NONE;
489     } else {
490         info->vlan = RX_STATE_NORMAL;
491     }
492 
493     /* enable event notification after query */
494     nc->rxfilter_notify_enabled = 1;
495 
496     return info;
497 }
498 
499 static void virtio_net_reset(VirtIODevice *vdev)
500 {
501     VirtIONet *n = VIRTIO_NET(vdev);
502     int i;
503 
504     /* Reset back to compatibility mode */
505     n->promisc = 1;
506     n->allmulti = 0;
507     n->alluni = 0;
508     n->nomulti = 0;
509     n->nouni = 0;
510     n->nobcast = 0;
511     /* multiqueue is disabled by default */
512     n->curr_queues = 1;
513     timer_del(n->announce_timer.tm);
514     n->announce_timer.round = 0;
515     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
516 
517     /* Flush any MAC and VLAN filter table state */
518     n->mac_table.in_use = 0;
519     n->mac_table.first_multi = 0;
520     n->mac_table.multi_overflow = 0;
521     n->mac_table.uni_overflow = 0;
522     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
523     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
524     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
525     memset(n->vlans, 0, MAX_VLAN >> 3);
526 
527     /* Flush any async TX */
528     for (i = 0;  i < n->max_queues; i++) {
529         NetClientState *nc = qemu_get_subqueue(n->nic, i);
530 
531         if (nc->peer) {
532             qemu_flush_or_purge_queued_packets(nc->peer, true);
533             assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
534         }
535     }
536 }
537 
538 static void peer_test_vnet_hdr(VirtIONet *n)
539 {
540     NetClientState *nc = qemu_get_queue(n->nic);
541     if (!nc->peer) {
542         return;
543     }
544 
545     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
546 }
547 
548 static int peer_has_vnet_hdr(VirtIONet *n)
549 {
550     return n->has_vnet_hdr;
551 }
552 
553 static int peer_has_ufo(VirtIONet *n)
554 {
555     if (!peer_has_vnet_hdr(n))
556         return 0;
557 
558     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
559 
560     return n->has_ufo;
561 }
562 
563 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
564                                        int version_1)
565 {
566     int i;
567     NetClientState *nc;
568 
569     n->mergeable_rx_bufs = mergeable_rx_bufs;
570 
571     if (version_1) {
572         n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
573     } else {
574         n->guest_hdr_len = n->mergeable_rx_bufs ?
575             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
576             sizeof(struct virtio_net_hdr);
577     }
578 
579     for (i = 0; i < n->max_queues; i++) {
580         nc = qemu_get_subqueue(n->nic, i);
581 
582         if (peer_has_vnet_hdr(n) &&
583             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
584             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
585             n->host_hdr_len = n->guest_hdr_len;
586         }
587     }
588 }
589 
590 static int virtio_net_max_tx_queue_size(VirtIONet *n)
591 {
592     NetClientState *peer = n->nic_conf.peers.ncs[0];
593 
594     /*
595      * Backends other than vhost-user don't support max queue size.
596      */
597     if (!peer) {
598         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
599     }
600 
601     if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
602         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
603     }
604 
605     return VIRTQUEUE_MAX_SIZE;
606 }
607 
608 static int peer_attach(VirtIONet *n, int index)
609 {
610     NetClientState *nc = qemu_get_subqueue(n->nic, index);
611 
612     if (!nc->peer) {
613         return 0;
614     }
615 
616     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
617         vhost_set_vring_enable(nc->peer, 1);
618     }
619 
620     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
621         return 0;
622     }
623 
624     if (n->max_queues == 1) {
625         return 0;
626     }
627 
628     return tap_enable(nc->peer);
629 }
630 
631 static int peer_detach(VirtIONet *n, int index)
632 {
633     NetClientState *nc = qemu_get_subqueue(n->nic, index);
634 
635     if (!nc->peer) {
636         return 0;
637     }
638 
639     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
640         vhost_set_vring_enable(nc->peer, 0);
641     }
642 
643     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
644         return 0;
645     }
646 
647     return tap_disable(nc->peer);
648 }
649 
650 static void virtio_net_set_queues(VirtIONet *n)
651 {
652     int i;
653     int r;
654 
655     if (n->nic->peer_deleted) {
656         return;
657     }
658 
659     for (i = 0; i < n->max_queues; i++) {
660         if (i < n->curr_queues) {
661             r = peer_attach(n, i);
662             assert(!r);
663         } else {
664             r = peer_detach(n, i);
665             assert(!r);
666         }
667     }
668 }
669 
670 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
671 
672 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
673                                         Error **errp)
674 {
675     VirtIONet *n = VIRTIO_NET(vdev);
676     NetClientState *nc = qemu_get_queue(n->nic);
677 
678     /* Firstly sync all virtio-net possible supported features */
679     features |= n->host_features;
680 
681     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
682 
683     if (!peer_has_vnet_hdr(n)) {
684         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
685         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
686         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
687         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
688 
689         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
690         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
691         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
692         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
693     }
694 
695     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
696         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
697         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
698     }
699 
700     if (!get_vhost_net(nc->peer)) {
701         return features;
702     }
703 
704     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
705     vdev->backend_features = features;
706 
707     if (n->mtu_bypass_backend &&
708             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
709         features |= (1ULL << VIRTIO_NET_F_MTU);
710     }
711 
712     return features;
713 }
714 
715 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
716 {
717     uint64_t features = 0;
718 
719     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
720      * but also these: */
721     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
722     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
723     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
724     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
725     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
726 
727     return features;
728 }
729 
730 static void virtio_net_apply_guest_offloads(VirtIONet *n)
731 {
732     qemu_set_offload(qemu_get_queue(n->nic)->peer,
733             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
734             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
735             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
736             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
737             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
738 }
739 
740 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
741 {
742     static const uint64_t guest_offloads_mask =
743         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
744         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
745         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
746         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
747         (1ULL << VIRTIO_NET_F_GUEST_UFO);
748 
749     return guest_offloads_mask & features;
750 }
751 
752 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
753 {
754     VirtIODevice *vdev = VIRTIO_DEVICE(n);
755     return virtio_net_guest_offloads_by_features(vdev->guest_features);
756 }
757 
758 static void failover_add_primary(VirtIONet *n, Error **errp)
759 {
760     Error *err = NULL;
761 
762     if (n->primary_dev) {
763         return;
764     }
765 
766     n->primary_device_opts = qemu_opts_find(qemu_find_opts("device"),
767             n->primary_device_id);
768     if (n->primary_device_opts) {
769         n->primary_dev = qdev_device_add(n->primary_device_opts, &err);
770         if (err) {
771             qemu_opts_del(n->primary_device_opts);
772         }
773         if (n->primary_dev) {
774             n->primary_bus = n->primary_dev->parent_bus;
775             if (err) {
776                 qdev_unplug(n->primary_dev, &err);
777                 qdev_set_id(n->primary_dev, "");
778 
779             }
780         }
781     } else {
782         error_setg(errp, "Primary device not found");
783         error_append_hint(errp, "Virtio-net failover will not work. Make "
784             "sure primary device has parameter"
785             " failover_pair_id=<virtio-net-id>\n");
786 }
787     if (err) {
788         error_propagate(errp, err);
789     }
790 }
791 
792 static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp)
793 {
794     VirtIONet *n = opaque;
795     int ret = 0;
796 
797     const char *standby_id = qemu_opt_get(opts, "failover_pair_id");
798 
799     if (standby_id != NULL && (g_strcmp0(standby_id, n->netclient_name) == 0)) {
800         n->primary_device_id = g_strdup(opts->id);
801         ret = 1;
802     }
803 
804     return ret;
805 }
806 
807 static DeviceState *virtio_net_find_primary(VirtIONet *n, Error **errp)
808 {
809     DeviceState *dev = NULL;
810     Error *err = NULL;
811 
812     if (qemu_opts_foreach(qemu_find_opts("device"),
813                          is_my_primary, n, &err)) {
814         if (err) {
815             error_propagate(errp, err);
816             return NULL;
817         }
818         if (n->primary_device_id) {
819             dev = qdev_find_recursive(sysbus_get_default(),
820                     n->primary_device_id);
821         } else {
822             error_setg(errp, "Primary device id not found");
823             return NULL;
824         }
825     }
826     return dev;
827 }
828 
829 
830 
831 static DeviceState *virtio_connect_failover_devices(VirtIONet *n,
832                                                     DeviceState *dev,
833                                                     Error **errp)
834 {
835     DeviceState *prim_dev = NULL;
836     Error *err = NULL;
837 
838     prim_dev = virtio_net_find_primary(n, &err);
839     if (prim_dev) {
840         n->primary_device_id = g_strdup(prim_dev->id);
841         n->primary_device_opts = prim_dev->opts;
842     } else {
843         if (err) {
844             error_propagate(errp, err);
845         }
846     }
847 
848     return prim_dev;
849 }
850 
851 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
852 {
853     VirtIONet *n = VIRTIO_NET(vdev);
854     Error *err = NULL;
855     int i;
856 
857     if (n->mtu_bypass_backend &&
858             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
859         features &= ~(1ULL << VIRTIO_NET_F_MTU);
860     }
861 
862     virtio_net_set_multiqueue(n,
863                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
864 
865     virtio_net_set_mrg_rx_bufs(n,
866                                virtio_has_feature(features,
867                                                   VIRTIO_NET_F_MRG_RXBUF),
868                                virtio_has_feature(features,
869                                                   VIRTIO_F_VERSION_1));
870 
871     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
872         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
873     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
874         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
875 
876     if (n->has_vnet_hdr) {
877         n->curr_guest_offloads =
878             virtio_net_guest_offloads_by_features(features);
879         virtio_net_apply_guest_offloads(n);
880     }
881 
882     for (i = 0;  i < n->max_queues; i++) {
883         NetClientState *nc = qemu_get_subqueue(n->nic, i);
884 
885         if (!get_vhost_net(nc->peer)) {
886             continue;
887         }
888         vhost_net_ack_features(get_vhost_net(nc->peer), features);
889     }
890 
891     if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
892         memset(n->vlans, 0, MAX_VLAN >> 3);
893     } else {
894         memset(n->vlans, 0xff, MAX_VLAN >> 3);
895     }
896 
897     if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
898         qapi_event_send_failover_negotiated(n->netclient_name);
899         atomic_set(&n->primary_should_be_hidden, false);
900         failover_add_primary(n, &err);
901         if (err) {
902             n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err);
903             if (err) {
904                 goto out_err;
905             }
906             failover_add_primary(n, &err);
907             if (err) {
908                 goto out_err;
909             }
910         }
911     }
912     return;
913 
914 out_err:
915     if (err) {
916         warn_report_err(err);
917     }
918 }
919 
920 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
921                                      struct iovec *iov, unsigned int iov_cnt)
922 {
923     uint8_t on;
924     size_t s;
925     NetClientState *nc = qemu_get_queue(n->nic);
926 
927     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
928     if (s != sizeof(on)) {
929         return VIRTIO_NET_ERR;
930     }
931 
932     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
933         n->promisc = on;
934     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
935         n->allmulti = on;
936     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
937         n->alluni = on;
938     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
939         n->nomulti = on;
940     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
941         n->nouni = on;
942     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
943         n->nobcast = on;
944     } else {
945         return VIRTIO_NET_ERR;
946     }
947 
948     rxfilter_notify(nc);
949 
950     return VIRTIO_NET_OK;
951 }
952 
953 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
954                                      struct iovec *iov, unsigned int iov_cnt)
955 {
956     VirtIODevice *vdev = VIRTIO_DEVICE(n);
957     uint64_t offloads;
958     size_t s;
959 
960     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
961         return VIRTIO_NET_ERR;
962     }
963 
964     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
965     if (s != sizeof(offloads)) {
966         return VIRTIO_NET_ERR;
967     }
968 
969     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
970         uint64_t supported_offloads;
971 
972         offloads = virtio_ldq_p(vdev, &offloads);
973 
974         if (!n->has_vnet_hdr) {
975             return VIRTIO_NET_ERR;
976         }
977 
978         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
979             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
980         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
981             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
982         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
983 
984         supported_offloads = virtio_net_supported_guest_offloads(n);
985         if (offloads & ~supported_offloads) {
986             return VIRTIO_NET_ERR;
987         }
988 
989         n->curr_guest_offloads = offloads;
990         virtio_net_apply_guest_offloads(n);
991 
992         return VIRTIO_NET_OK;
993     } else {
994         return VIRTIO_NET_ERR;
995     }
996 }
997 
998 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
999                                  struct iovec *iov, unsigned int iov_cnt)
1000 {
1001     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1002     struct virtio_net_ctrl_mac mac_data;
1003     size_t s;
1004     NetClientState *nc = qemu_get_queue(n->nic);
1005 
1006     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1007         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1008             return VIRTIO_NET_ERR;
1009         }
1010         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1011         assert(s == sizeof(n->mac));
1012         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1013         rxfilter_notify(nc);
1014 
1015         return VIRTIO_NET_OK;
1016     }
1017 
1018     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1019         return VIRTIO_NET_ERR;
1020     }
1021 
1022     int in_use = 0;
1023     int first_multi = 0;
1024     uint8_t uni_overflow = 0;
1025     uint8_t multi_overflow = 0;
1026     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1027 
1028     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1029                    sizeof(mac_data.entries));
1030     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1031     if (s != sizeof(mac_data.entries)) {
1032         goto error;
1033     }
1034     iov_discard_front(&iov, &iov_cnt, s);
1035 
1036     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1037         goto error;
1038     }
1039 
1040     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1041         s = iov_to_buf(iov, iov_cnt, 0, macs,
1042                        mac_data.entries * ETH_ALEN);
1043         if (s != mac_data.entries * ETH_ALEN) {
1044             goto error;
1045         }
1046         in_use += mac_data.entries;
1047     } else {
1048         uni_overflow = 1;
1049     }
1050 
1051     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1052 
1053     first_multi = in_use;
1054 
1055     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1056                    sizeof(mac_data.entries));
1057     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1058     if (s != sizeof(mac_data.entries)) {
1059         goto error;
1060     }
1061 
1062     iov_discard_front(&iov, &iov_cnt, s);
1063 
1064     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1065         goto error;
1066     }
1067 
1068     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1069         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1070                        mac_data.entries * ETH_ALEN);
1071         if (s != mac_data.entries * ETH_ALEN) {
1072             goto error;
1073         }
1074         in_use += mac_data.entries;
1075     } else {
1076         multi_overflow = 1;
1077     }
1078 
1079     n->mac_table.in_use = in_use;
1080     n->mac_table.first_multi = first_multi;
1081     n->mac_table.uni_overflow = uni_overflow;
1082     n->mac_table.multi_overflow = multi_overflow;
1083     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1084     g_free(macs);
1085     rxfilter_notify(nc);
1086 
1087     return VIRTIO_NET_OK;
1088 
1089 error:
1090     g_free(macs);
1091     return VIRTIO_NET_ERR;
1092 }
1093 
1094 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1095                                         struct iovec *iov, unsigned int iov_cnt)
1096 {
1097     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1098     uint16_t vid;
1099     size_t s;
1100     NetClientState *nc = qemu_get_queue(n->nic);
1101 
1102     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1103     vid = virtio_lduw_p(vdev, &vid);
1104     if (s != sizeof(vid)) {
1105         return VIRTIO_NET_ERR;
1106     }
1107 
1108     if (vid >= MAX_VLAN)
1109         return VIRTIO_NET_ERR;
1110 
1111     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1112         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1113     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1114         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1115     else
1116         return VIRTIO_NET_ERR;
1117 
1118     rxfilter_notify(nc);
1119 
1120     return VIRTIO_NET_OK;
1121 }
1122 
1123 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1124                                       struct iovec *iov, unsigned int iov_cnt)
1125 {
1126     trace_virtio_net_handle_announce(n->announce_timer.round);
1127     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1128         n->status & VIRTIO_NET_S_ANNOUNCE) {
1129         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1130         if (n->announce_timer.round) {
1131             qemu_announce_timer_step(&n->announce_timer);
1132         }
1133         return VIRTIO_NET_OK;
1134     } else {
1135         return VIRTIO_NET_ERR;
1136     }
1137 }
1138 
1139 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1140                                 struct iovec *iov, unsigned int iov_cnt)
1141 {
1142     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1143     struct virtio_net_ctrl_mq mq;
1144     size_t s;
1145     uint16_t queues;
1146 
1147     s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1148     if (s != sizeof(mq)) {
1149         return VIRTIO_NET_ERR;
1150     }
1151 
1152     if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1153         return VIRTIO_NET_ERR;
1154     }
1155 
1156     queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1157 
1158     if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1159         queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1160         queues > n->max_queues ||
1161         !n->multiqueue) {
1162         return VIRTIO_NET_ERR;
1163     }
1164 
1165     n->curr_queues = queues;
1166     /* stop the backend before changing the number of queues to avoid handling a
1167      * disabled queue */
1168     virtio_net_set_status(vdev, vdev->status);
1169     virtio_net_set_queues(n);
1170 
1171     return VIRTIO_NET_OK;
1172 }
1173 
1174 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1175 {
1176     VirtIONet *n = VIRTIO_NET(vdev);
1177     struct virtio_net_ctrl_hdr ctrl;
1178     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1179     VirtQueueElement *elem;
1180     size_t s;
1181     struct iovec *iov, *iov2;
1182     unsigned int iov_cnt;
1183 
1184     for (;;) {
1185         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1186         if (!elem) {
1187             break;
1188         }
1189         if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1190             iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1191             virtio_error(vdev, "virtio-net ctrl missing headers");
1192             virtqueue_detach_element(vq, elem, 0);
1193             g_free(elem);
1194             break;
1195         }
1196 
1197         iov_cnt = elem->out_num;
1198         iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1199         s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1200         iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1201         if (s != sizeof(ctrl)) {
1202             status = VIRTIO_NET_ERR;
1203         } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1204             status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1205         } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1206             status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1207         } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1208             status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1209         } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1210             status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1211         } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1212             status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1213         } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1214             status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1215         }
1216 
1217         s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1218         assert(s == sizeof(status));
1219 
1220         virtqueue_push(vq, elem, sizeof(status));
1221         virtio_notify(vdev, vq);
1222         g_free(iov2);
1223         g_free(elem);
1224     }
1225 }
1226 
1227 /* RX */
1228 
1229 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1230 {
1231     VirtIONet *n = VIRTIO_NET(vdev);
1232     int queue_index = vq2q(virtio_get_queue_index(vq));
1233 
1234     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1235 }
1236 
1237 static bool virtio_net_can_receive(NetClientState *nc)
1238 {
1239     VirtIONet *n = qemu_get_nic_opaque(nc);
1240     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1241     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1242 
1243     if (!vdev->vm_running) {
1244         return false;
1245     }
1246 
1247     if (nc->queue_index >= n->curr_queues) {
1248         return false;
1249     }
1250 
1251     if (!virtio_queue_ready(q->rx_vq) ||
1252         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1253         return false;
1254     }
1255 
1256     return true;
1257 }
1258 
1259 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1260 {
1261     VirtIONet *n = q->n;
1262     if (virtio_queue_empty(q->rx_vq) ||
1263         (n->mergeable_rx_bufs &&
1264          !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1265         virtio_queue_set_notification(q->rx_vq, 1);
1266 
1267         /* To avoid a race condition where the guest has made some buffers
1268          * available after the above check but before notification was
1269          * enabled, check for available buffers again.
1270          */
1271         if (virtio_queue_empty(q->rx_vq) ||
1272             (n->mergeable_rx_bufs &&
1273              !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1274             return 0;
1275         }
1276     }
1277 
1278     virtio_queue_set_notification(q->rx_vq, 0);
1279     return 1;
1280 }
1281 
1282 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1283 {
1284     virtio_tswap16s(vdev, &hdr->hdr_len);
1285     virtio_tswap16s(vdev, &hdr->gso_size);
1286     virtio_tswap16s(vdev, &hdr->csum_start);
1287     virtio_tswap16s(vdev, &hdr->csum_offset);
1288 }
1289 
1290 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1291  * it never finds out that the packets don't have valid checksums.  This
1292  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1293  * fix this with Xen but it hasn't appeared in an upstream release of
1294  * dhclient yet.
1295  *
1296  * To avoid breaking existing guests, we catch udp packets and add
1297  * checksums.  This is terrible but it's better than hacking the guest
1298  * kernels.
1299  *
1300  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1301  * we should provide a mechanism to disable it to avoid polluting the host
1302  * cache.
1303  */
1304 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1305                                         uint8_t *buf, size_t size)
1306 {
1307     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1308         (size > 27 && size < 1500) && /* normal sized MTU */
1309         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1310         (buf[23] == 17) && /* ip.protocol == UDP */
1311         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1312         net_checksum_calculate(buf, size);
1313         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1314     }
1315 }
1316 
1317 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1318                            const void *buf, size_t size)
1319 {
1320     if (n->has_vnet_hdr) {
1321         /* FIXME this cast is evil */
1322         void *wbuf = (void *)buf;
1323         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1324                                     size - n->host_hdr_len);
1325 
1326         if (n->needs_vnet_hdr_swap) {
1327             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1328         }
1329         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1330     } else {
1331         struct virtio_net_hdr hdr = {
1332             .flags = 0,
1333             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1334         };
1335         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1336     }
1337 }
1338 
1339 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1340 {
1341     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1342     static const uint8_t vlan[] = {0x81, 0x00};
1343     uint8_t *ptr = (uint8_t *)buf;
1344     int i;
1345 
1346     if (n->promisc)
1347         return 1;
1348 
1349     ptr += n->host_hdr_len;
1350 
1351     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1352         int vid = lduw_be_p(ptr + 14) & 0xfff;
1353         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1354             return 0;
1355     }
1356 
1357     if (ptr[0] & 1) { // multicast
1358         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1359             return !n->nobcast;
1360         } else if (n->nomulti) {
1361             return 0;
1362         } else if (n->allmulti || n->mac_table.multi_overflow) {
1363             return 1;
1364         }
1365 
1366         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1367             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1368                 return 1;
1369             }
1370         }
1371     } else { // unicast
1372         if (n->nouni) {
1373             return 0;
1374         } else if (n->alluni || n->mac_table.uni_overflow) {
1375             return 1;
1376         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1377             return 1;
1378         }
1379 
1380         for (i = 0; i < n->mac_table.first_multi; i++) {
1381             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1382                 return 1;
1383             }
1384         }
1385     }
1386 
1387     return 0;
1388 }
1389 
1390 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1391                                       size_t size)
1392 {
1393     VirtIONet *n = qemu_get_nic_opaque(nc);
1394     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1395     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1396     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1397     struct virtio_net_hdr_mrg_rxbuf mhdr;
1398     unsigned mhdr_cnt = 0;
1399     size_t offset, i, guest_offset;
1400 
1401     if (!virtio_net_can_receive(nc)) {
1402         return -1;
1403     }
1404 
1405     /* hdr_len refers to the header we supply to the guest */
1406     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1407         return 0;
1408     }
1409 
1410     if (!receive_filter(n, buf, size))
1411         return size;
1412 
1413     offset = i = 0;
1414 
1415     while (offset < size) {
1416         VirtQueueElement *elem;
1417         int len, total;
1418         const struct iovec *sg;
1419 
1420         total = 0;
1421 
1422         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1423         if (!elem) {
1424             if (i) {
1425                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1426                              "i %zd mergeable %d offset %zd, size %zd, "
1427                              "guest hdr len %zd, host hdr len %zd "
1428                              "guest features 0x%" PRIx64,
1429                              i, n->mergeable_rx_bufs, offset, size,
1430                              n->guest_hdr_len, n->host_hdr_len,
1431                              vdev->guest_features);
1432             }
1433             return -1;
1434         }
1435 
1436         if (elem->in_num < 1) {
1437             virtio_error(vdev,
1438                          "virtio-net receive queue contains no in buffers");
1439             virtqueue_detach_element(q->rx_vq, elem, 0);
1440             g_free(elem);
1441             return -1;
1442         }
1443 
1444         sg = elem->in_sg;
1445         if (i == 0) {
1446             assert(offset == 0);
1447             if (n->mergeable_rx_bufs) {
1448                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1449                                     sg, elem->in_num,
1450                                     offsetof(typeof(mhdr), num_buffers),
1451                                     sizeof(mhdr.num_buffers));
1452             }
1453 
1454             receive_header(n, sg, elem->in_num, buf, size);
1455             offset = n->host_hdr_len;
1456             total += n->guest_hdr_len;
1457             guest_offset = n->guest_hdr_len;
1458         } else {
1459             guest_offset = 0;
1460         }
1461 
1462         /* copy in packet.  ugh */
1463         len = iov_from_buf(sg, elem->in_num, guest_offset,
1464                            buf + offset, size - offset);
1465         total += len;
1466         offset += len;
1467         /* If buffers can't be merged, at this point we
1468          * must have consumed the complete packet.
1469          * Otherwise, drop it. */
1470         if (!n->mergeable_rx_bufs && offset < size) {
1471             virtqueue_unpop(q->rx_vq, elem, total);
1472             g_free(elem);
1473             return size;
1474         }
1475 
1476         /* signal other side */
1477         virtqueue_fill(q->rx_vq, elem, total, i++);
1478         g_free(elem);
1479     }
1480 
1481     if (mhdr_cnt) {
1482         virtio_stw_p(vdev, &mhdr.num_buffers, i);
1483         iov_from_buf(mhdr_sg, mhdr_cnt,
1484                      0,
1485                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
1486     }
1487 
1488     virtqueue_flush(q->rx_vq, i);
1489     virtio_notify(vdev, q->rx_vq);
1490 
1491     return size;
1492 }
1493 
1494 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1495                                   size_t size)
1496 {
1497     RCU_READ_LOCK_GUARD();
1498 
1499     return virtio_net_receive_rcu(nc, buf, size);
1500 }
1501 
1502 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1503                                          const uint8_t *buf,
1504                                          VirtioNetRscUnit *unit)
1505 {
1506     uint16_t ip_hdrlen;
1507     struct ip_header *ip;
1508 
1509     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1510                               + sizeof(struct eth_header));
1511     unit->ip = (void *)ip;
1512     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1513     unit->ip_plen = &ip->ip_len;
1514     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1515     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1516     unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1517 }
1518 
1519 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1520                                          const uint8_t *buf,
1521                                          VirtioNetRscUnit *unit)
1522 {
1523     struct ip6_header *ip6;
1524 
1525     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1526                                  + sizeof(struct eth_header));
1527     unit->ip = ip6;
1528     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1529     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1530                                         + sizeof(struct ip6_header));
1531     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1532 
1533     /* There is a difference between payload lenght in ipv4 and v6,
1534        ip header is excluded in ipv6 */
1535     unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1536 }
1537 
1538 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1539                                        VirtioNetRscSeg *seg)
1540 {
1541     int ret;
1542     struct virtio_net_hdr *h;
1543 
1544     h = (struct virtio_net_hdr *)seg->buf;
1545     h->flags = 0;
1546     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1547 
1548     if (seg->is_coalesced) {
1549         *virtio_net_rsc_ext_num_packets(h) = seg->packets;
1550         *virtio_net_rsc_ext_num_dupacks(h) = seg->dup_ack;
1551         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1552         if (chain->proto == ETH_P_IP) {
1553             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1554         } else {
1555             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1556         }
1557     }
1558 
1559     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1560     QTAILQ_REMOVE(&chain->buffers, seg, next);
1561     g_free(seg->buf);
1562     g_free(seg);
1563 
1564     return ret;
1565 }
1566 
1567 static void virtio_net_rsc_purge(void *opq)
1568 {
1569     VirtioNetRscSeg *seg, *rn;
1570     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1571 
1572     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1573         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1574             chain->stat.purge_failed++;
1575             continue;
1576         }
1577     }
1578 
1579     chain->stat.timer++;
1580     if (!QTAILQ_EMPTY(&chain->buffers)) {
1581         timer_mod(chain->drain_timer,
1582               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1583     }
1584 }
1585 
1586 static void virtio_net_rsc_cleanup(VirtIONet *n)
1587 {
1588     VirtioNetRscChain *chain, *rn_chain;
1589     VirtioNetRscSeg *seg, *rn_seg;
1590 
1591     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1592         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1593             QTAILQ_REMOVE(&chain->buffers, seg, next);
1594             g_free(seg->buf);
1595             g_free(seg);
1596         }
1597 
1598         timer_del(chain->drain_timer);
1599         timer_free(chain->drain_timer);
1600         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1601         g_free(chain);
1602     }
1603 }
1604 
1605 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1606                                      NetClientState *nc,
1607                                      const uint8_t *buf, size_t size)
1608 {
1609     uint16_t hdr_len;
1610     VirtioNetRscSeg *seg;
1611 
1612     hdr_len = chain->n->guest_hdr_len;
1613     seg = g_malloc(sizeof(VirtioNetRscSeg));
1614     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1615         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1616     memcpy(seg->buf, buf, size);
1617     seg->size = size;
1618     seg->packets = 1;
1619     seg->dup_ack = 0;
1620     seg->is_coalesced = 0;
1621     seg->nc = nc;
1622 
1623     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1624     chain->stat.cache++;
1625 
1626     switch (chain->proto) {
1627     case ETH_P_IP:
1628         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
1629         break;
1630     case ETH_P_IPV6:
1631         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
1632         break;
1633     default:
1634         g_assert_not_reached();
1635     }
1636 }
1637 
1638 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
1639                                          VirtioNetRscSeg *seg,
1640                                          const uint8_t *buf,
1641                                          struct tcp_header *n_tcp,
1642                                          struct tcp_header *o_tcp)
1643 {
1644     uint32_t nack, oack;
1645     uint16_t nwin, owin;
1646 
1647     nack = htonl(n_tcp->th_ack);
1648     nwin = htons(n_tcp->th_win);
1649     oack = htonl(o_tcp->th_ack);
1650     owin = htons(o_tcp->th_win);
1651 
1652     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
1653         chain->stat.ack_out_of_win++;
1654         return RSC_FINAL;
1655     } else if (nack == oack) {
1656         /* duplicated ack or window probe */
1657         if (nwin == owin) {
1658             /* duplicated ack, add dup ack count due to whql test up to 1 */
1659             chain->stat.dup_ack++;
1660             return RSC_FINAL;
1661         } else {
1662             /* Coalesce window update */
1663             o_tcp->th_win = n_tcp->th_win;
1664             chain->stat.win_update++;
1665             return RSC_COALESCE;
1666         }
1667     } else {
1668         /* pure ack, go to 'C', finalize*/
1669         chain->stat.pure_ack++;
1670         return RSC_FINAL;
1671     }
1672 }
1673 
1674 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
1675                                             VirtioNetRscSeg *seg,
1676                                             const uint8_t *buf,
1677                                             VirtioNetRscUnit *n_unit)
1678 {
1679     void *data;
1680     uint16_t o_ip_len;
1681     uint32_t nseq, oseq;
1682     VirtioNetRscUnit *o_unit;
1683 
1684     o_unit = &seg->unit;
1685     o_ip_len = htons(*o_unit->ip_plen);
1686     nseq = htonl(n_unit->tcp->th_seq);
1687     oseq = htonl(o_unit->tcp->th_seq);
1688 
1689     /* out of order or retransmitted. */
1690     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
1691         chain->stat.data_out_of_win++;
1692         return RSC_FINAL;
1693     }
1694 
1695     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
1696     if (nseq == oseq) {
1697         if ((o_unit->payload == 0) && n_unit->payload) {
1698             /* From no payload to payload, normal case, not a dup ack or etc */
1699             chain->stat.data_after_pure_ack++;
1700             goto coalesce;
1701         } else {
1702             return virtio_net_rsc_handle_ack(chain, seg, buf,
1703                                              n_unit->tcp, o_unit->tcp);
1704         }
1705     } else if ((nseq - oseq) != o_unit->payload) {
1706         /* Not a consistent packet, out of order */
1707         chain->stat.data_out_of_order++;
1708         return RSC_FINAL;
1709     } else {
1710 coalesce:
1711         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
1712             chain->stat.over_size++;
1713             return RSC_FINAL;
1714         }
1715 
1716         /* Here comes the right data, the payload length in v4/v6 is different,
1717            so use the field value to update and record the new data len */
1718         o_unit->payload += n_unit->payload; /* update new data len */
1719 
1720         /* update field in ip header */
1721         *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
1722 
1723         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
1724            for windows guest, while this may change the behavior for linux
1725            guest (only if it uses RSC feature). */
1726         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
1727 
1728         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
1729         o_unit->tcp->th_win = n_unit->tcp->th_win;
1730 
1731         memmove(seg->buf + seg->size, data, n_unit->payload);
1732         seg->size += n_unit->payload;
1733         seg->packets++;
1734         chain->stat.coalesced++;
1735         return RSC_COALESCE;
1736     }
1737 }
1738 
1739 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
1740                                         VirtioNetRscSeg *seg,
1741                                         const uint8_t *buf, size_t size,
1742                                         VirtioNetRscUnit *unit)
1743 {
1744     struct ip_header *ip1, *ip2;
1745 
1746     ip1 = (struct ip_header *)(unit->ip);
1747     ip2 = (struct ip_header *)(seg->unit.ip);
1748     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
1749         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
1750         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
1751         chain->stat.no_match++;
1752         return RSC_NO_MATCH;
1753     }
1754 
1755     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
1756 }
1757 
1758 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
1759                                         VirtioNetRscSeg *seg,
1760                                         const uint8_t *buf, size_t size,
1761                                         VirtioNetRscUnit *unit)
1762 {
1763     struct ip6_header *ip1, *ip2;
1764 
1765     ip1 = (struct ip6_header *)(unit->ip);
1766     ip2 = (struct ip6_header *)(seg->unit.ip);
1767     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
1768         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
1769         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
1770         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
1771             chain->stat.no_match++;
1772             return RSC_NO_MATCH;
1773     }
1774 
1775     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
1776 }
1777 
1778 /* Packets with 'SYN' should bypass, other flag should be sent after drain
1779  * to prevent out of order */
1780 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
1781                                          struct tcp_header *tcp)
1782 {
1783     uint16_t tcp_hdr;
1784     uint16_t tcp_flag;
1785 
1786     tcp_flag = htons(tcp->th_offset_flags);
1787     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
1788     tcp_flag &= VIRTIO_NET_TCP_FLAG;
1789     tcp_flag = htons(tcp->th_offset_flags) & 0x3F;
1790     if (tcp_flag & TH_SYN) {
1791         chain->stat.tcp_syn++;
1792         return RSC_BYPASS;
1793     }
1794 
1795     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
1796         chain->stat.tcp_ctrl_drain++;
1797         return RSC_FINAL;
1798     }
1799 
1800     if (tcp_hdr > sizeof(struct tcp_header)) {
1801         chain->stat.tcp_all_opt++;
1802         return RSC_FINAL;
1803     }
1804 
1805     return RSC_CANDIDATE;
1806 }
1807 
1808 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
1809                                          NetClientState *nc,
1810                                          const uint8_t *buf, size_t size,
1811                                          VirtioNetRscUnit *unit)
1812 {
1813     int ret;
1814     VirtioNetRscSeg *seg, *nseg;
1815 
1816     if (QTAILQ_EMPTY(&chain->buffers)) {
1817         chain->stat.empty_cache++;
1818         virtio_net_rsc_cache_buf(chain, nc, buf, size);
1819         timer_mod(chain->drain_timer,
1820               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1821         return size;
1822     }
1823 
1824     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
1825         if (chain->proto == ETH_P_IP) {
1826             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
1827         } else {
1828             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
1829         }
1830 
1831         if (ret == RSC_FINAL) {
1832             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1833                 /* Send failed */
1834                 chain->stat.final_failed++;
1835                 return 0;
1836             }
1837 
1838             /* Send current packet */
1839             return virtio_net_do_receive(nc, buf, size);
1840         } else if (ret == RSC_NO_MATCH) {
1841             continue;
1842         } else {
1843             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
1844             seg->is_coalesced = 1;
1845             return size;
1846         }
1847     }
1848 
1849     chain->stat.no_match_cache++;
1850     virtio_net_rsc_cache_buf(chain, nc, buf, size);
1851     return size;
1852 }
1853 
1854 /* Drain a connection data, this is to avoid out of order segments */
1855 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
1856                                         NetClientState *nc,
1857                                         const uint8_t *buf, size_t size,
1858                                         uint16_t ip_start, uint16_t ip_size,
1859                                         uint16_t tcp_port)
1860 {
1861     VirtioNetRscSeg *seg, *nseg;
1862     uint32_t ppair1, ppair2;
1863 
1864     ppair1 = *(uint32_t *)(buf + tcp_port);
1865     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
1866         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
1867         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
1868             || (ppair1 != ppair2)) {
1869             continue;
1870         }
1871         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1872             chain->stat.drain_failed++;
1873         }
1874 
1875         break;
1876     }
1877 
1878     return virtio_net_do_receive(nc, buf, size);
1879 }
1880 
1881 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
1882                                             struct ip_header *ip,
1883                                             const uint8_t *buf, size_t size)
1884 {
1885     uint16_t ip_len;
1886 
1887     /* Not an ipv4 packet */
1888     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
1889         chain->stat.ip_option++;
1890         return RSC_BYPASS;
1891     }
1892 
1893     /* Don't handle packets with ip option */
1894     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
1895         chain->stat.ip_option++;
1896         return RSC_BYPASS;
1897     }
1898 
1899     if (ip->ip_p != IPPROTO_TCP) {
1900         chain->stat.bypass_not_tcp++;
1901         return RSC_BYPASS;
1902     }
1903 
1904     /* Don't handle packets with ip fragment */
1905     if (!(htons(ip->ip_off) & IP_DF)) {
1906         chain->stat.ip_frag++;
1907         return RSC_BYPASS;
1908     }
1909 
1910     /* Don't handle packets with ecn flag */
1911     if (IPTOS_ECN(ip->ip_tos)) {
1912         chain->stat.ip_ecn++;
1913         return RSC_BYPASS;
1914     }
1915 
1916     ip_len = htons(ip->ip_len);
1917     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
1918         || ip_len > (size - chain->n->guest_hdr_len -
1919                      sizeof(struct eth_header))) {
1920         chain->stat.ip_hacked++;
1921         return RSC_BYPASS;
1922     }
1923 
1924     return RSC_CANDIDATE;
1925 }
1926 
1927 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
1928                                       NetClientState *nc,
1929                                       const uint8_t *buf, size_t size)
1930 {
1931     int32_t ret;
1932     uint16_t hdr_len;
1933     VirtioNetRscUnit unit;
1934 
1935     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
1936 
1937     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
1938         + sizeof(struct tcp_header))) {
1939         chain->stat.bypass_not_tcp++;
1940         return virtio_net_do_receive(nc, buf, size);
1941     }
1942 
1943     virtio_net_rsc_extract_unit4(chain, buf, &unit);
1944     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
1945         != RSC_CANDIDATE) {
1946         return virtio_net_do_receive(nc, buf, size);
1947     }
1948 
1949     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
1950     if (ret == RSC_BYPASS) {
1951         return virtio_net_do_receive(nc, buf, size);
1952     } else if (ret == RSC_FINAL) {
1953         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
1954                 ((hdr_len + sizeof(struct eth_header)) + 12),
1955                 VIRTIO_NET_IP4_ADDR_SIZE,
1956                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
1957     }
1958 
1959     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
1960 }
1961 
1962 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
1963                                             struct ip6_header *ip6,
1964                                             const uint8_t *buf, size_t size)
1965 {
1966     uint16_t ip_len;
1967 
1968     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
1969         != IP_HEADER_VERSION_6) {
1970         return RSC_BYPASS;
1971     }
1972 
1973     /* Both option and protocol is checked in this */
1974     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
1975         chain->stat.bypass_not_tcp++;
1976         return RSC_BYPASS;
1977     }
1978 
1979     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1980     if (ip_len < sizeof(struct tcp_header) ||
1981         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
1982                   - sizeof(struct ip6_header))) {
1983         chain->stat.ip_hacked++;
1984         return RSC_BYPASS;
1985     }
1986 
1987     /* Don't handle packets with ecn flag */
1988     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
1989         chain->stat.ip_ecn++;
1990         return RSC_BYPASS;
1991     }
1992 
1993     return RSC_CANDIDATE;
1994 }
1995 
1996 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
1997                                       const uint8_t *buf, size_t size)
1998 {
1999     int32_t ret;
2000     uint16_t hdr_len;
2001     VirtioNetRscChain *chain;
2002     VirtioNetRscUnit unit;
2003 
2004     chain = (VirtioNetRscChain *)opq;
2005     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2006 
2007     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2008         + sizeof(tcp_header))) {
2009         return virtio_net_do_receive(nc, buf, size);
2010     }
2011 
2012     virtio_net_rsc_extract_unit6(chain, buf, &unit);
2013     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2014                                                  unit.ip, buf, size)) {
2015         return virtio_net_do_receive(nc, buf, size);
2016     }
2017 
2018     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2019     if (ret == RSC_BYPASS) {
2020         return virtio_net_do_receive(nc, buf, size);
2021     } else if (ret == RSC_FINAL) {
2022         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2023                 ((hdr_len + sizeof(struct eth_header)) + 8),
2024                 VIRTIO_NET_IP6_ADDR_SIZE,
2025                 hdr_len + sizeof(struct eth_header)
2026                 + sizeof(struct ip6_header));
2027     }
2028 
2029     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2030 }
2031 
2032 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2033                                                       NetClientState *nc,
2034                                                       uint16_t proto)
2035 {
2036     VirtioNetRscChain *chain;
2037 
2038     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2039         return NULL;
2040     }
2041 
2042     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2043         if (chain->proto == proto) {
2044             return chain;
2045         }
2046     }
2047 
2048     chain = g_malloc(sizeof(*chain));
2049     chain->n = n;
2050     chain->proto = proto;
2051     if (proto == (uint16_t)ETH_P_IP) {
2052         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2053         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2054     } else {
2055         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2056         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2057     }
2058     chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2059                                       virtio_net_rsc_purge, chain);
2060     memset(&chain->stat, 0, sizeof(chain->stat));
2061 
2062     QTAILQ_INIT(&chain->buffers);
2063     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2064 
2065     return chain;
2066 }
2067 
2068 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2069                                       const uint8_t *buf,
2070                                       size_t size)
2071 {
2072     uint16_t proto;
2073     VirtioNetRscChain *chain;
2074     struct eth_header *eth;
2075     VirtIONet *n;
2076 
2077     n = qemu_get_nic_opaque(nc);
2078     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2079         return virtio_net_do_receive(nc, buf, size);
2080     }
2081 
2082     eth = (struct eth_header *)(buf + n->guest_hdr_len);
2083     proto = htons(eth->h_proto);
2084 
2085     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2086     if (chain) {
2087         chain->stat.received++;
2088         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2089             return virtio_net_rsc_receive4(chain, nc, buf, size);
2090         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2091             return virtio_net_rsc_receive6(chain, nc, buf, size);
2092         }
2093     }
2094     return virtio_net_do_receive(nc, buf, size);
2095 }
2096 
2097 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2098                                   size_t size)
2099 {
2100     VirtIONet *n = qemu_get_nic_opaque(nc);
2101     if ((n->rsc4_enabled || n->rsc6_enabled)) {
2102         return virtio_net_rsc_receive(nc, buf, size);
2103     } else {
2104         return virtio_net_do_receive(nc, buf, size);
2105     }
2106 }
2107 
2108 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2109 
2110 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2111 {
2112     VirtIONet *n = qemu_get_nic_opaque(nc);
2113     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2114     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2115 
2116     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2117     virtio_notify(vdev, q->tx_vq);
2118 
2119     g_free(q->async_tx.elem);
2120     q->async_tx.elem = NULL;
2121 
2122     virtio_queue_set_notification(q->tx_vq, 1);
2123     virtio_net_flush_tx(q);
2124 }
2125 
2126 /* TX */
2127 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2128 {
2129     VirtIONet *n = q->n;
2130     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2131     VirtQueueElement *elem;
2132     int32_t num_packets = 0;
2133     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2134     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2135         return num_packets;
2136     }
2137 
2138     if (q->async_tx.elem) {
2139         virtio_queue_set_notification(q->tx_vq, 0);
2140         return num_packets;
2141     }
2142 
2143     for (;;) {
2144         ssize_t ret;
2145         unsigned int out_num;
2146         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2147         struct virtio_net_hdr_mrg_rxbuf mhdr;
2148 
2149         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2150         if (!elem) {
2151             break;
2152         }
2153 
2154         out_num = elem->out_num;
2155         out_sg = elem->out_sg;
2156         if (out_num < 1) {
2157             virtio_error(vdev, "virtio-net header not in first element");
2158             virtqueue_detach_element(q->tx_vq, elem, 0);
2159             g_free(elem);
2160             return -EINVAL;
2161         }
2162 
2163         if (n->has_vnet_hdr) {
2164             if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2165                 n->guest_hdr_len) {
2166                 virtio_error(vdev, "virtio-net header incorrect");
2167                 virtqueue_detach_element(q->tx_vq, elem, 0);
2168                 g_free(elem);
2169                 return -EINVAL;
2170             }
2171             if (n->needs_vnet_hdr_swap) {
2172                 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2173                 sg2[0].iov_base = &mhdr;
2174                 sg2[0].iov_len = n->guest_hdr_len;
2175                 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2176                                    out_sg, out_num,
2177                                    n->guest_hdr_len, -1);
2178                 if (out_num == VIRTQUEUE_MAX_SIZE) {
2179                     goto drop;
2180                 }
2181                 out_num += 1;
2182                 out_sg = sg2;
2183             }
2184         }
2185         /*
2186          * If host wants to see the guest header as is, we can
2187          * pass it on unchanged. Otherwise, copy just the parts
2188          * that host is interested in.
2189          */
2190         assert(n->host_hdr_len <= n->guest_hdr_len);
2191         if (n->host_hdr_len != n->guest_hdr_len) {
2192             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2193                                        out_sg, out_num,
2194                                        0, n->host_hdr_len);
2195             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2196                              out_sg, out_num,
2197                              n->guest_hdr_len, -1);
2198             out_num = sg_num;
2199             out_sg = sg;
2200         }
2201 
2202         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2203                                       out_sg, out_num, virtio_net_tx_complete);
2204         if (ret == 0) {
2205             virtio_queue_set_notification(q->tx_vq, 0);
2206             q->async_tx.elem = elem;
2207             return -EBUSY;
2208         }
2209 
2210 drop:
2211         virtqueue_push(q->tx_vq, elem, 0);
2212         virtio_notify(vdev, q->tx_vq);
2213         g_free(elem);
2214 
2215         if (++num_packets >= n->tx_burst) {
2216             break;
2217         }
2218     }
2219     return num_packets;
2220 }
2221 
2222 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2223 {
2224     VirtIONet *n = VIRTIO_NET(vdev);
2225     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2226 
2227     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2228         virtio_net_drop_tx_queue_data(vdev, vq);
2229         return;
2230     }
2231 
2232     /* This happens when device was stopped but VCPU wasn't. */
2233     if (!vdev->vm_running) {
2234         q->tx_waiting = 1;
2235         return;
2236     }
2237 
2238     if (q->tx_waiting) {
2239         virtio_queue_set_notification(vq, 1);
2240         timer_del(q->tx_timer);
2241         q->tx_waiting = 0;
2242         if (virtio_net_flush_tx(q) == -EINVAL) {
2243             return;
2244         }
2245     } else {
2246         timer_mod(q->tx_timer,
2247                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2248         q->tx_waiting = 1;
2249         virtio_queue_set_notification(vq, 0);
2250     }
2251 }
2252 
2253 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2254 {
2255     VirtIONet *n = VIRTIO_NET(vdev);
2256     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2257 
2258     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2259         virtio_net_drop_tx_queue_data(vdev, vq);
2260         return;
2261     }
2262 
2263     if (unlikely(q->tx_waiting)) {
2264         return;
2265     }
2266     q->tx_waiting = 1;
2267     /* This happens when device was stopped but VCPU wasn't. */
2268     if (!vdev->vm_running) {
2269         return;
2270     }
2271     virtio_queue_set_notification(vq, 0);
2272     qemu_bh_schedule(q->tx_bh);
2273 }
2274 
2275 static void virtio_net_tx_timer(void *opaque)
2276 {
2277     VirtIONetQueue *q = opaque;
2278     VirtIONet *n = q->n;
2279     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2280     /* This happens when device was stopped but BH wasn't. */
2281     if (!vdev->vm_running) {
2282         /* Make sure tx waiting is set, so we'll run when restarted. */
2283         assert(q->tx_waiting);
2284         return;
2285     }
2286 
2287     q->tx_waiting = 0;
2288 
2289     /* Just in case the driver is not ready on more */
2290     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2291         return;
2292     }
2293 
2294     virtio_queue_set_notification(q->tx_vq, 1);
2295     virtio_net_flush_tx(q);
2296 }
2297 
2298 static void virtio_net_tx_bh(void *opaque)
2299 {
2300     VirtIONetQueue *q = opaque;
2301     VirtIONet *n = q->n;
2302     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2303     int32_t ret;
2304 
2305     /* This happens when device was stopped but BH wasn't. */
2306     if (!vdev->vm_running) {
2307         /* Make sure tx waiting is set, so we'll run when restarted. */
2308         assert(q->tx_waiting);
2309         return;
2310     }
2311 
2312     q->tx_waiting = 0;
2313 
2314     /* Just in case the driver is not ready on more */
2315     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2316         return;
2317     }
2318 
2319     ret = virtio_net_flush_tx(q);
2320     if (ret == -EBUSY || ret == -EINVAL) {
2321         return; /* Notification re-enable handled by tx_complete or device
2322                  * broken */
2323     }
2324 
2325     /* If we flush a full burst of packets, assume there are
2326      * more coming and immediately reschedule */
2327     if (ret >= n->tx_burst) {
2328         qemu_bh_schedule(q->tx_bh);
2329         q->tx_waiting = 1;
2330         return;
2331     }
2332 
2333     /* If less than a full burst, re-enable notification and flush
2334      * anything that may have come in while we weren't looking.  If
2335      * we find something, assume the guest is still active and reschedule */
2336     virtio_queue_set_notification(q->tx_vq, 1);
2337     ret = virtio_net_flush_tx(q);
2338     if (ret == -EINVAL) {
2339         return;
2340     } else if (ret > 0) {
2341         virtio_queue_set_notification(q->tx_vq, 0);
2342         qemu_bh_schedule(q->tx_bh);
2343         q->tx_waiting = 1;
2344     }
2345 }
2346 
2347 static void virtio_net_add_queue(VirtIONet *n, int index)
2348 {
2349     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2350 
2351     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2352                                            virtio_net_handle_rx);
2353 
2354     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2355         n->vqs[index].tx_vq =
2356             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2357                              virtio_net_handle_tx_timer);
2358         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2359                                               virtio_net_tx_timer,
2360                                               &n->vqs[index]);
2361     } else {
2362         n->vqs[index].tx_vq =
2363             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2364                              virtio_net_handle_tx_bh);
2365         n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2366     }
2367 
2368     n->vqs[index].tx_waiting = 0;
2369     n->vqs[index].n = n;
2370 }
2371 
2372 static void virtio_net_del_queue(VirtIONet *n, int index)
2373 {
2374     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2375     VirtIONetQueue *q = &n->vqs[index];
2376     NetClientState *nc = qemu_get_subqueue(n->nic, index);
2377 
2378     qemu_purge_queued_packets(nc);
2379 
2380     virtio_del_queue(vdev, index * 2);
2381     if (q->tx_timer) {
2382         timer_del(q->tx_timer);
2383         timer_free(q->tx_timer);
2384         q->tx_timer = NULL;
2385     } else {
2386         qemu_bh_delete(q->tx_bh);
2387         q->tx_bh = NULL;
2388     }
2389     q->tx_waiting = 0;
2390     virtio_del_queue(vdev, index * 2 + 1);
2391 }
2392 
2393 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2394 {
2395     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2396     int old_num_queues = virtio_get_num_queues(vdev);
2397     int new_num_queues = new_max_queues * 2 + 1;
2398     int i;
2399 
2400     assert(old_num_queues >= 3);
2401     assert(old_num_queues % 2 == 1);
2402 
2403     if (old_num_queues == new_num_queues) {
2404         return;
2405     }
2406 
2407     /*
2408      * We always need to remove and add ctrl vq if
2409      * old_num_queues != new_num_queues. Remove ctrl_vq first,
2410      * and then we only enter one of the following two loops.
2411      */
2412     virtio_del_queue(vdev, old_num_queues - 1);
2413 
2414     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2415         /* new_num_queues < old_num_queues */
2416         virtio_net_del_queue(n, i / 2);
2417     }
2418 
2419     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2420         /* new_num_queues > old_num_queues */
2421         virtio_net_add_queue(n, i / 2);
2422     }
2423 
2424     /* add ctrl_vq last */
2425     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2426 }
2427 
2428 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2429 {
2430     int max = multiqueue ? n->max_queues : 1;
2431 
2432     n->multiqueue = multiqueue;
2433     virtio_net_change_num_queues(n, max);
2434 
2435     virtio_net_set_queues(n);
2436 }
2437 
2438 static int virtio_net_post_load_device(void *opaque, int version_id)
2439 {
2440     VirtIONet *n = opaque;
2441     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2442     int i, link_down;
2443 
2444     trace_virtio_net_post_load_device();
2445     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2446                                virtio_vdev_has_feature(vdev,
2447                                                        VIRTIO_F_VERSION_1));
2448 
2449     /* MAC_TABLE_ENTRIES may be different from the saved image */
2450     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2451         n->mac_table.in_use = 0;
2452     }
2453 
2454     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2455         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2456     }
2457 
2458     /*
2459      * curr_guest_offloads will be later overwritten by the
2460      * virtio_set_features_nocheck call done from the virtio_load.
2461      * Here we make sure it is preserved and restored accordingly
2462      * in the virtio_net_post_load_virtio callback.
2463      */
2464     n->saved_guest_offloads = n->curr_guest_offloads;
2465 
2466     virtio_net_set_queues(n);
2467 
2468     /* Find the first multicast entry in the saved MAC filter */
2469     for (i = 0; i < n->mac_table.in_use; i++) {
2470         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2471             break;
2472         }
2473     }
2474     n->mac_table.first_multi = i;
2475 
2476     /* nc.link_down can't be migrated, so infer link_down according
2477      * to link status bit in n->status */
2478     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2479     for (i = 0; i < n->max_queues; i++) {
2480         qemu_get_subqueue(n->nic, i)->link_down = link_down;
2481     }
2482 
2483     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2484         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2485         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2486                                   QEMU_CLOCK_VIRTUAL,
2487                                   virtio_net_announce_timer, n);
2488         if (n->announce_timer.round) {
2489             timer_mod(n->announce_timer.tm,
2490                       qemu_clock_get_ms(n->announce_timer.type));
2491         } else {
2492             qemu_announce_timer_del(&n->announce_timer, false);
2493         }
2494     }
2495 
2496     return 0;
2497 }
2498 
2499 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2500 {
2501     VirtIONet *n = VIRTIO_NET(vdev);
2502     /*
2503      * The actual needed state is now in saved_guest_offloads,
2504      * see virtio_net_post_load_device for detail.
2505      * Restore it back and apply the desired offloads.
2506      */
2507     n->curr_guest_offloads = n->saved_guest_offloads;
2508     if (peer_has_vnet_hdr(n)) {
2509         virtio_net_apply_guest_offloads(n);
2510     }
2511 
2512     return 0;
2513 }
2514 
2515 /* tx_waiting field of a VirtIONetQueue */
2516 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2517     .name = "virtio-net-queue-tx_waiting",
2518     .fields = (VMStateField[]) {
2519         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2520         VMSTATE_END_OF_LIST()
2521    },
2522 };
2523 
2524 static bool max_queues_gt_1(void *opaque, int version_id)
2525 {
2526     return VIRTIO_NET(opaque)->max_queues > 1;
2527 }
2528 
2529 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2530 {
2531     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2532                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2533 }
2534 
2535 static bool mac_table_fits(void *opaque, int version_id)
2536 {
2537     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2538 }
2539 
2540 static bool mac_table_doesnt_fit(void *opaque, int version_id)
2541 {
2542     return !mac_table_fits(opaque, version_id);
2543 }
2544 
2545 /* This temporary type is shared by all the WITH_TMP methods
2546  * although only some fields are used by each.
2547  */
2548 struct VirtIONetMigTmp {
2549     VirtIONet      *parent;
2550     VirtIONetQueue *vqs_1;
2551     uint16_t        curr_queues_1;
2552     uint8_t         has_ufo;
2553     uint32_t        has_vnet_hdr;
2554 };
2555 
2556 /* The 2nd and subsequent tx_waiting flags are loaded later than
2557  * the 1st entry in the queues and only if there's more than one
2558  * entry.  We use the tmp mechanism to calculate a temporary
2559  * pointer and count and also validate the count.
2560  */
2561 
2562 static int virtio_net_tx_waiting_pre_save(void *opaque)
2563 {
2564     struct VirtIONetMigTmp *tmp = opaque;
2565 
2566     tmp->vqs_1 = tmp->parent->vqs + 1;
2567     tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2568     if (tmp->parent->curr_queues == 0) {
2569         tmp->curr_queues_1 = 0;
2570     }
2571 
2572     return 0;
2573 }
2574 
2575 static int virtio_net_tx_waiting_pre_load(void *opaque)
2576 {
2577     struct VirtIONetMigTmp *tmp = opaque;
2578 
2579     /* Reuse the pointer setup from save */
2580     virtio_net_tx_waiting_pre_save(opaque);
2581 
2582     if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2583         error_report("virtio-net: curr_queues %x > max_queues %x",
2584             tmp->parent->curr_queues, tmp->parent->max_queues);
2585 
2586         return -EINVAL;
2587     }
2588 
2589     return 0; /* all good */
2590 }
2591 
2592 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2593     .name      = "virtio-net-tx_waiting",
2594     .pre_load  = virtio_net_tx_waiting_pre_load,
2595     .pre_save  = virtio_net_tx_waiting_pre_save,
2596     .fields    = (VMStateField[]) {
2597         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2598                                      curr_queues_1,
2599                                      vmstate_virtio_net_queue_tx_waiting,
2600                                      struct VirtIONetQueue),
2601         VMSTATE_END_OF_LIST()
2602     },
2603 };
2604 
2605 /* the 'has_ufo' flag is just tested; if the incoming stream has the
2606  * flag set we need to check that we have it
2607  */
2608 static int virtio_net_ufo_post_load(void *opaque, int version_id)
2609 {
2610     struct VirtIONetMigTmp *tmp = opaque;
2611 
2612     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
2613         error_report("virtio-net: saved image requires TUN_F_UFO support");
2614         return -EINVAL;
2615     }
2616 
2617     return 0;
2618 }
2619 
2620 static int virtio_net_ufo_pre_save(void *opaque)
2621 {
2622     struct VirtIONetMigTmp *tmp = opaque;
2623 
2624     tmp->has_ufo = tmp->parent->has_ufo;
2625 
2626     return 0;
2627 }
2628 
2629 static const VMStateDescription vmstate_virtio_net_has_ufo = {
2630     .name      = "virtio-net-ufo",
2631     .post_load = virtio_net_ufo_post_load,
2632     .pre_save  = virtio_net_ufo_pre_save,
2633     .fields    = (VMStateField[]) {
2634         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
2635         VMSTATE_END_OF_LIST()
2636     },
2637 };
2638 
2639 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2640  * flag set we need to check that we have it
2641  */
2642 static int virtio_net_vnet_post_load(void *opaque, int version_id)
2643 {
2644     struct VirtIONetMigTmp *tmp = opaque;
2645 
2646     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
2647         error_report("virtio-net: saved image requires vnet_hdr=on");
2648         return -EINVAL;
2649     }
2650 
2651     return 0;
2652 }
2653 
2654 static int virtio_net_vnet_pre_save(void *opaque)
2655 {
2656     struct VirtIONetMigTmp *tmp = opaque;
2657 
2658     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
2659 
2660     return 0;
2661 }
2662 
2663 static const VMStateDescription vmstate_virtio_net_has_vnet = {
2664     .name      = "virtio-net-vnet",
2665     .post_load = virtio_net_vnet_post_load,
2666     .pre_save  = virtio_net_vnet_pre_save,
2667     .fields    = (VMStateField[]) {
2668         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
2669         VMSTATE_END_OF_LIST()
2670     },
2671 };
2672 
2673 static const VMStateDescription vmstate_virtio_net_device = {
2674     .name = "virtio-net-device",
2675     .version_id = VIRTIO_NET_VM_VERSION,
2676     .minimum_version_id = VIRTIO_NET_VM_VERSION,
2677     .post_load = virtio_net_post_load_device,
2678     .fields = (VMStateField[]) {
2679         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
2680         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
2681                                vmstate_virtio_net_queue_tx_waiting,
2682                                VirtIONetQueue),
2683         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
2684         VMSTATE_UINT16(status, VirtIONet),
2685         VMSTATE_UINT8(promisc, VirtIONet),
2686         VMSTATE_UINT8(allmulti, VirtIONet),
2687         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
2688 
2689         /* Guarded pair: If it fits we load it, else we throw it away
2690          * - can happen if source has a larger MAC table.; post-load
2691          *  sets flags in this case.
2692          */
2693         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
2694                                 0, mac_table_fits, mac_table.in_use,
2695                                  ETH_ALEN),
2696         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
2697                                      mac_table.in_use, ETH_ALEN),
2698 
2699         /* Note: This is an array of uint32's that's always been saved as a
2700          * buffer; hold onto your endiannesses; it's actually used as a bitmap
2701          * but based on the uint.
2702          */
2703         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
2704         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2705                          vmstate_virtio_net_has_vnet),
2706         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
2707         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
2708         VMSTATE_UINT8(alluni, VirtIONet),
2709         VMSTATE_UINT8(nomulti, VirtIONet),
2710         VMSTATE_UINT8(nouni, VirtIONet),
2711         VMSTATE_UINT8(nobcast, VirtIONet),
2712         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2713                          vmstate_virtio_net_has_ufo),
2714         VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
2715                             vmstate_info_uint16_equal, uint16_t),
2716         VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
2717         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2718                          vmstate_virtio_net_tx_waiting),
2719         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
2720                             has_ctrl_guest_offloads),
2721         VMSTATE_END_OF_LIST()
2722    },
2723 };
2724 
2725 static NetClientInfo net_virtio_info = {
2726     .type = NET_CLIENT_DRIVER_NIC,
2727     .size = sizeof(NICState),
2728     .can_receive = virtio_net_can_receive,
2729     .receive = virtio_net_receive,
2730     .link_status_changed = virtio_net_set_link_status,
2731     .query_rx_filter = virtio_net_query_rxfilter,
2732     .announce = virtio_net_announce,
2733 };
2734 
2735 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
2736 {
2737     VirtIONet *n = VIRTIO_NET(vdev);
2738     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
2739     assert(n->vhost_started);
2740     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
2741 }
2742 
2743 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
2744                                            bool mask)
2745 {
2746     VirtIONet *n = VIRTIO_NET(vdev);
2747     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
2748     assert(n->vhost_started);
2749     vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
2750                              vdev, idx, mask);
2751 }
2752 
2753 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
2754 {
2755     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
2756 
2757     n->config_size = virtio_feature_get_config_size(feature_sizes,
2758                                                     host_features);
2759 }
2760 
2761 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
2762                                    const char *type)
2763 {
2764     /*
2765      * The name can be NULL, the netclient name will be type.x.
2766      */
2767     assert(type != NULL);
2768 
2769     g_free(n->netclient_name);
2770     g_free(n->netclient_type);
2771     n->netclient_name = g_strdup(name);
2772     n->netclient_type = g_strdup(type);
2773 }
2774 
2775 static bool failover_unplug_primary(VirtIONet *n)
2776 {
2777     HotplugHandler *hotplug_ctrl;
2778     PCIDevice *pci_dev;
2779     Error *err = NULL;
2780 
2781     hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
2782     if (hotplug_ctrl) {
2783         pci_dev = PCI_DEVICE(n->primary_dev);
2784         pci_dev->partially_hotplugged = true;
2785         hotplug_handler_unplug_request(hotplug_ctrl, n->primary_dev, &err);
2786         if (err) {
2787             error_report_err(err);
2788             return false;
2789         }
2790     } else {
2791         return false;
2792     }
2793     return true;
2794 }
2795 
2796 static bool failover_replug_primary(VirtIONet *n, Error **errp)
2797 {
2798     Error *err = NULL;
2799     HotplugHandler *hotplug_ctrl;
2800     PCIDevice *pdev = PCI_DEVICE(n->primary_dev);
2801 
2802     if (!pdev->partially_hotplugged) {
2803         return true;
2804     }
2805     if (!n->primary_device_opts) {
2806         n->primary_device_opts = qemu_opts_from_qdict(
2807                 qemu_find_opts("device"),
2808                 n->primary_device_dict, errp);
2809         if (!n->primary_device_opts) {
2810             return false;
2811         }
2812     }
2813     n->primary_bus = n->primary_dev->parent_bus;
2814     if (!n->primary_bus) {
2815         error_setg(errp, "virtio_net: couldn't find primary bus");
2816         return false;
2817     }
2818     qdev_set_parent_bus(n->primary_dev, n->primary_bus);
2819     n->primary_should_be_hidden = false;
2820     qemu_opt_set_bool(n->primary_device_opts,
2821                       "partially_hotplugged", true, &err);
2822     if (err) {
2823         goto out;
2824     }
2825     hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
2826     if (hotplug_ctrl) {
2827         hotplug_handler_pre_plug(hotplug_ctrl, n->primary_dev, &err);
2828         if (err) {
2829             goto out;
2830         }
2831         hotplug_handler_plug(hotplug_ctrl, n->primary_dev, errp);
2832     }
2833 
2834 out:
2835     error_propagate(errp, err);
2836     return !err;
2837 }
2838 
2839 static void virtio_net_handle_migration_primary(VirtIONet *n,
2840                                                 MigrationState *s)
2841 {
2842     bool should_be_hidden;
2843     Error *err = NULL;
2844 
2845     should_be_hidden = atomic_read(&n->primary_should_be_hidden);
2846 
2847     if (!n->primary_dev) {
2848         n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err);
2849         if (!n->primary_dev) {
2850             return;
2851         }
2852     }
2853 
2854     if (migration_in_setup(s) && !should_be_hidden) {
2855         if (failover_unplug_primary(n)) {
2856             vmstate_unregister(VMSTATE_IF(n->primary_dev),
2857                     qdev_get_vmsd(n->primary_dev),
2858                     n->primary_dev);
2859             qapi_event_send_unplug_primary(n->primary_device_id);
2860             atomic_set(&n->primary_should_be_hidden, true);
2861         } else {
2862             warn_report("couldn't unplug primary device");
2863         }
2864     } else if (migration_has_failed(s)) {
2865         /* We already unplugged the device let's plug it back */
2866         if (!failover_replug_primary(n, &err)) {
2867             if (err) {
2868                 error_report_err(err);
2869             }
2870         }
2871     }
2872 }
2873 
2874 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
2875 {
2876     MigrationState *s = data;
2877     VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
2878     virtio_net_handle_migration_primary(n, s);
2879 }
2880 
2881 static int virtio_net_primary_should_be_hidden(DeviceListener *listener,
2882             QemuOpts *device_opts)
2883 {
2884     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
2885     bool match_found = false;
2886     bool hide = false;
2887 
2888     if (!device_opts) {
2889         return -1;
2890     }
2891     n->primary_device_dict = qemu_opts_to_qdict(device_opts,
2892             n->primary_device_dict);
2893     if (n->primary_device_dict) {
2894         g_free(n->standby_id);
2895         n->standby_id = g_strdup(qdict_get_try_str(n->primary_device_dict,
2896                     "failover_pair_id"));
2897     }
2898     if (g_strcmp0(n->standby_id, n->netclient_name) == 0) {
2899         match_found = true;
2900     } else {
2901         match_found = false;
2902         hide = false;
2903         g_free(n->standby_id);
2904         n->primary_device_dict = NULL;
2905         goto out;
2906     }
2907 
2908     n->primary_device_opts = device_opts;
2909 
2910     /* primary_should_be_hidden is set during feature negotiation */
2911     hide = atomic_read(&n->primary_should_be_hidden);
2912 
2913     if (n->primary_device_dict) {
2914         g_free(n->primary_device_id);
2915         n->primary_device_id = g_strdup(qdict_get_try_str(
2916                     n->primary_device_dict, "id"));
2917         if (!n->primary_device_id) {
2918             warn_report("primary_device_id not set");
2919         }
2920     }
2921 
2922 out:
2923     if (match_found && hide) {
2924         return 1;
2925     } else if (match_found && !hide) {
2926         return 0;
2927     } else {
2928         return -1;
2929     }
2930 }
2931 
2932 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
2933 {
2934     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2935     VirtIONet *n = VIRTIO_NET(dev);
2936     NetClientState *nc;
2937     int i;
2938 
2939     if (n->net_conf.mtu) {
2940         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
2941     }
2942 
2943     if (n->net_conf.duplex_str) {
2944         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
2945             n->net_conf.duplex = DUPLEX_HALF;
2946         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
2947             n->net_conf.duplex = DUPLEX_FULL;
2948         } else {
2949             error_setg(errp, "'duplex' must be 'half' or 'full'");
2950             return;
2951         }
2952         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
2953     } else {
2954         n->net_conf.duplex = DUPLEX_UNKNOWN;
2955     }
2956 
2957     if (n->net_conf.speed < SPEED_UNKNOWN) {
2958         error_setg(errp, "'speed' must be between 0 and INT_MAX");
2959         return;
2960     }
2961     if (n->net_conf.speed >= 0) {
2962         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
2963     }
2964 
2965     if (n->failover) {
2966         n->primary_listener.should_be_hidden =
2967             virtio_net_primary_should_be_hidden;
2968         atomic_set(&n->primary_should_be_hidden, true);
2969         device_listener_register(&n->primary_listener);
2970         n->migration_state.notify = virtio_net_migration_state_notifier;
2971         add_migration_state_change_notifier(&n->migration_state);
2972         n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
2973     }
2974 
2975     virtio_net_set_config_size(n, n->host_features);
2976     virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
2977 
2978     /*
2979      * We set a lower limit on RX queue size to what it always was.
2980      * Guests that want a smaller ring can always resize it without
2981      * help from us (using virtio 1 and up).
2982      */
2983     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
2984         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
2985         !is_power_of_2(n->net_conf.rx_queue_size)) {
2986         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
2987                    "must be a power of 2 between %d and %d.",
2988                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
2989                    VIRTQUEUE_MAX_SIZE);
2990         virtio_cleanup(vdev);
2991         return;
2992     }
2993 
2994     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
2995         n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
2996         !is_power_of_2(n->net_conf.tx_queue_size)) {
2997         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
2998                    "must be a power of 2 between %d and %d",
2999                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3000                    VIRTQUEUE_MAX_SIZE);
3001         virtio_cleanup(vdev);
3002         return;
3003     }
3004 
3005     n->max_queues = MAX(n->nic_conf.peers.queues, 1);
3006     if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
3007         error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
3008                    "must be a positive integer less than %d.",
3009                    n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
3010         virtio_cleanup(vdev);
3011         return;
3012     }
3013     n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
3014     n->curr_queues = 1;
3015     n->tx_timeout = n->net_conf.txtimer;
3016 
3017     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3018                        && strcmp(n->net_conf.tx, "bh")) {
3019         warn_report("virtio-net: "
3020                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3021                     n->net_conf.tx);
3022         error_printf("Defaulting to \"bh\"");
3023     }
3024 
3025     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3026                                     n->net_conf.tx_queue_size);
3027 
3028     for (i = 0; i < n->max_queues; i++) {
3029         virtio_net_add_queue(n, i);
3030     }
3031 
3032     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3033     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3034     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3035     n->status = VIRTIO_NET_S_LINK_UP;
3036     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3037                               QEMU_CLOCK_VIRTUAL,
3038                               virtio_net_announce_timer, n);
3039     n->announce_timer.round = 0;
3040 
3041     if (n->netclient_type) {
3042         /*
3043          * Happen when virtio_net_set_netclient_name has been called.
3044          */
3045         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3046                               n->netclient_type, n->netclient_name, n);
3047     } else {
3048         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3049                               object_get_typename(OBJECT(dev)), dev->id, n);
3050     }
3051 
3052     peer_test_vnet_hdr(n);
3053     if (peer_has_vnet_hdr(n)) {
3054         for (i = 0; i < n->max_queues; i++) {
3055             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3056         }
3057         n->host_hdr_len = sizeof(struct virtio_net_hdr);
3058     } else {
3059         n->host_hdr_len = 0;
3060     }
3061 
3062     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3063 
3064     n->vqs[0].tx_waiting = 0;
3065     n->tx_burst = n->net_conf.txburst;
3066     virtio_net_set_mrg_rx_bufs(n, 0, 0);
3067     n->promisc = 1; /* for compatibility */
3068 
3069     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3070 
3071     n->vlans = g_malloc0(MAX_VLAN >> 3);
3072 
3073     nc = qemu_get_queue(n->nic);
3074     nc->rxfilter_notify_enabled = 1;
3075 
3076     QTAILQ_INIT(&n->rsc_chains);
3077     n->qdev = dev;
3078 }
3079 
3080 static void virtio_net_device_unrealize(DeviceState *dev)
3081 {
3082     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3083     VirtIONet *n = VIRTIO_NET(dev);
3084     int i, max_queues;
3085 
3086     /* This will stop vhost backend if appropriate. */
3087     virtio_net_set_status(vdev, 0);
3088 
3089     g_free(n->netclient_name);
3090     n->netclient_name = NULL;
3091     g_free(n->netclient_type);
3092     n->netclient_type = NULL;
3093 
3094     g_free(n->mac_table.macs);
3095     g_free(n->vlans);
3096 
3097     if (n->failover) {
3098         g_free(n->primary_device_id);
3099         g_free(n->standby_id);
3100         qobject_unref(n->primary_device_dict);
3101         n->primary_device_dict = NULL;
3102     }
3103 
3104     max_queues = n->multiqueue ? n->max_queues : 1;
3105     for (i = 0; i < max_queues; i++) {
3106         virtio_net_del_queue(n, i);
3107     }
3108     /* delete also control vq */
3109     virtio_del_queue(vdev, max_queues * 2);
3110     qemu_announce_timer_del(&n->announce_timer, false);
3111     g_free(n->vqs);
3112     qemu_del_nic(n->nic);
3113     virtio_net_rsc_cleanup(n);
3114     virtio_cleanup(vdev);
3115 }
3116 
3117 static void virtio_net_instance_init(Object *obj)
3118 {
3119     VirtIONet *n = VIRTIO_NET(obj);
3120 
3121     /*
3122      * The default config_size is sizeof(struct virtio_net_config).
3123      * Can be overriden with virtio_net_set_config_size.
3124      */
3125     n->config_size = sizeof(struct virtio_net_config);
3126     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3127                                   "bootindex", "/ethernet-phy@0",
3128                                   DEVICE(n));
3129 }
3130 
3131 static int virtio_net_pre_save(void *opaque)
3132 {
3133     VirtIONet *n = opaque;
3134 
3135     /* At this point, backend must be stopped, otherwise
3136      * it might keep writing to memory. */
3137     assert(!n->vhost_started);
3138 
3139     return 0;
3140 }
3141 
3142 static bool primary_unplug_pending(void *opaque)
3143 {
3144     DeviceState *dev = opaque;
3145     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3146     VirtIONet *n = VIRTIO_NET(vdev);
3147 
3148     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3149         return false;
3150     }
3151     return n->primary_dev ? n->primary_dev->pending_deleted_event : false;
3152 }
3153 
3154 static bool dev_unplug_pending(void *opaque)
3155 {
3156     DeviceState *dev = opaque;
3157     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3158 
3159     return vdc->primary_unplug_pending(dev);
3160 }
3161 
3162 static const VMStateDescription vmstate_virtio_net = {
3163     .name = "virtio-net",
3164     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3165     .version_id = VIRTIO_NET_VM_VERSION,
3166     .fields = (VMStateField[]) {
3167         VMSTATE_VIRTIO_DEVICE,
3168         VMSTATE_END_OF_LIST()
3169     },
3170     .pre_save = virtio_net_pre_save,
3171     .dev_unplug_pending = dev_unplug_pending,
3172 };
3173 
3174 static Property virtio_net_properties[] = {
3175     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3176                     VIRTIO_NET_F_CSUM, true),
3177     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3178                     VIRTIO_NET_F_GUEST_CSUM, true),
3179     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3180     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3181                     VIRTIO_NET_F_GUEST_TSO4, true),
3182     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3183                     VIRTIO_NET_F_GUEST_TSO6, true),
3184     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3185                     VIRTIO_NET_F_GUEST_ECN, true),
3186     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3187                     VIRTIO_NET_F_GUEST_UFO, true),
3188     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3189                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3190     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3191                     VIRTIO_NET_F_HOST_TSO4, true),
3192     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3193                     VIRTIO_NET_F_HOST_TSO6, true),
3194     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3195                     VIRTIO_NET_F_HOST_ECN, true),
3196     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3197                     VIRTIO_NET_F_HOST_UFO, true),
3198     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3199                     VIRTIO_NET_F_MRG_RXBUF, true),
3200     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3201                     VIRTIO_NET_F_STATUS, true),
3202     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3203                     VIRTIO_NET_F_CTRL_VQ, true),
3204     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3205                     VIRTIO_NET_F_CTRL_RX, true),
3206     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3207                     VIRTIO_NET_F_CTRL_VLAN, true),
3208     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3209                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3210     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3211                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3212     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3213                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3214     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3215     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3216                     VIRTIO_NET_F_RSC_EXT, false),
3217     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3218                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3219     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3220     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3221                        TX_TIMER_INTERVAL),
3222     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3223     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3224     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3225                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3226     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3227                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3228     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3229     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3230                      true),
3231     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3232     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3233     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3234     DEFINE_PROP_END_OF_LIST(),
3235 };
3236 
3237 static void virtio_net_class_init(ObjectClass *klass, void *data)
3238 {
3239     DeviceClass *dc = DEVICE_CLASS(klass);
3240     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3241 
3242     device_class_set_props(dc, virtio_net_properties);
3243     dc->vmsd = &vmstate_virtio_net;
3244     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3245     vdc->realize = virtio_net_device_realize;
3246     vdc->unrealize = virtio_net_device_unrealize;
3247     vdc->get_config = virtio_net_get_config;
3248     vdc->set_config = virtio_net_set_config;
3249     vdc->get_features = virtio_net_get_features;
3250     vdc->set_features = virtio_net_set_features;
3251     vdc->bad_features = virtio_net_bad_features;
3252     vdc->reset = virtio_net_reset;
3253     vdc->set_status = virtio_net_set_status;
3254     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3255     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3256     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3257     vdc->post_load = virtio_net_post_load_virtio;
3258     vdc->vmsd = &vmstate_virtio_net_device;
3259     vdc->primary_unplug_pending = primary_unplug_pending;
3260 }
3261 
3262 static const TypeInfo virtio_net_info = {
3263     .name = TYPE_VIRTIO_NET,
3264     .parent = TYPE_VIRTIO_DEVICE,
3265     .instance_size = sizeof(VirtIONet),
3266     .instance_init = virtio_net_instance_init,
3267     .class_init = virtio_net_class_init,
3268 };
3269 
3270 static void virtio_register_types(void)
3271 {
3272     type_register_static(&virtio_net_info);
3273 }
3274 
3275 type_init(virtio_register_types)
3276