xref: /openbmc/qemu/hw/net/virtio-net.c (revision 0221d73c)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/main-loop.h"
18 #include "qemu/module.h"
19 #include "hw/virtio/virtio.h"
20 #include "net/net.h"
21 #include "net/checksum.h"
22 #include "net/tap.h"
23 #include "qemu/error-report.h"
24 #include "qemu/timer.h"
25 #include "qemu/option.h"
26 #include "qemu/option_int.h"
27 #include "qemu/config-file.h"
28 #include "qapi/qmp/qdict.h"
29 #include "hw/virtio/virtio-net.h"
30 #include "net/vhost_net.h"
31 #include "net/announce.h"
32 #include "hw/virtio/virtio-bus.h"
33 #include "qapi/error.h"
34 #include "qapi/qapi-events-net.h"
35 #include "hw/qdev-properties.h"
36 #include "qapi/qapi-types-migration.h"
37 #include "qapi/qapi-events-migration.h"
38 #include "hw/virtio/virtio-access.h"
39 #include "migration/misc.h"
40 #include "standard-headers/linux/ethtool.h"
41 #include "sysemu/sysemu.h"
42 #include "trace.h"
43 #include "monitor/qdev.h"
44 #include "hw/pci/pci.h"
45 
46 #define VIRTIO_NET_VM_VERSION    11
47 
48 #define MAC_TABLE_ENTRIES    64
49 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
50 
51 /* previously fixed value */
52 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
53 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
54 
55 /* for now, only allow larger queues; with virtio-1, guest can downsize */
56 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
57 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
58 
59 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
60 
61 #define VIRTIO_NET_TCP_FLAG         0x3F
62 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
63 
64 /* IPv4 max payload, 16 bits in the header */
65 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
66 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
67 
68 /* header length value in ip header without option */
69 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
70 
71 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
72 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
73 
74 /* Purge coalesced packets timer interval, This value affects the performance
75    a lot, and should be tuned carefully, '300000'(300us) is the recommended
76    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
77    tso/gso/gro 'off'. */
78 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
79 
80 /* temporary until standard header include it */
81 #if !defined(VIRTIO_NET_HDR_F_RSC_INFO)
82 
83 #define VIRTIO_NET_HDR_F_RSC_INFO  4 /* rsc_ext data in csum_ fields */
84 #define VIRTIO_NET_F_RSC_EXT       61
85 
86 static inline __virtio16 *virtio_net_rsc_ext_num_packets(
87     struct virtio_net_hdr *hdr)
88 {
89     return &hdr->csum_start;
90 }
91 
92 static inline __virtio16 *virtio_net_rsc_ext_num_dupacks(
93     struct virtio_net_hdr *hdr)
94 {
95     return &hdr->csum_offset;
96 }
97 
98 #endif
99 
100 static VirtIOFeature feature_sizes[] = {
101     {.flags = 1ULL << VIRTIO_NET_F_MAC,
102      .end = endof(struct virtio_net_config, mac)},
103     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
104      .end = endof(struct virtio_net_config, status)},
105     {.flags = 1ULL << VIRTIO_NET_F_MQ,
106      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
107     {.flags = 1ULL << VIRTIO_NET_F_MTU,
108      .end = endof(struct virtio_net_config, mtu)},
109     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
110      .end = endof(struct virtio_net_config, duplex)},
111     {}
112 };
113 
114 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
115 {
116     VirtIONet *n = qemu_get_nic_opaque(nc);
117 
118     return &n->vqs[nc->queue_index];
119 }
120 
121 static int vq2q(int queue_index)
122 {
123     return queue_index / 2;
124 }
125 
126 /* TODO
127  * - we could suppress RX interrupt if we were so inclined.
128  */
129 
130 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
131 {
132     VirtIONet *n = VIRTIO_NET(vdev);
133     struct virtio_net_config netcfg;
134 
135     virtio_stw_p(vdev, &netcfg.status, n->status);
136     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
137     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
138     memcpy(netcfg.mac, n->mac, ETH_ALEN);
139     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
140     netcfg.duplex = n->net_conf.duplex;
141     memcpy(config, &netcfg, n->config_size);
142 }
143 
144 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
145 {
146     VirtIONet *n = VIRTIO_NET(vdev);
147     struct virtio_net_config netcfg = {};
148 
149     memcpy(&netcfg, config, n->config_size);
150 
151     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
152         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
153         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
154         memcpy(n->mac, netcfg.mac, ETH_ALEN);
155         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
156     }
157 }
158 
159 static bool virtio_net_started(VirtIONet *n, uint8_t status)
160 {
161     VirtIODevice *vdev = VIRTIO_DEVICE(n);
162     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
163         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
164 }
165 
166 static void virtio_net_announce_notify(VirtIONet *net)
167 {
168     VirtIODevice *vdev = VIRTIO_DEVICE(net);
169     trace_virtio_net_announce_notify();
170 
171     net->status |= VIRTIO_NET_S_ANNOUNCE;
172     virtio_notify_config(vdev);
173 }
174 
175 static void virtio_net_announce_timer(void *opaque)
176 {
177     VirtIONet *n = opaque;
178     trace_virtio_net_announce_timer(n->announce_timer.round);
179 
180     n->announce_timer.round--;
181     virtio_net_announce_notify(n);
182 }
183 
184 static void virtio_net_announce(NetClientState *nc)
185 {
186     VirtIONet *n = qemu_get_nic_opaque(nc);
187     VirtIODevice *vdev = VIRTIO_DEVICE(n);
188 
189     /*
190      * Make sure the virtio migration announcement timer isn't running
191      * If it is, let it trigger announcement so that we do not cause
192      * confusion.
193      */
194     if (n->announce_timer.round) {
195         return;
196     }
197 
198     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
199         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
200             virtio_net_announce_notify(n);
201     }
202 }
203 
204 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
205 {
206     VirtIODevice *vdev = VIRTIO_DEVICE(n);
207     NetClientState *nc = qemu_get_queue(n->nic);
208     int queues = n->multiqueue ? n->max_queues : 1;
209 
210     if (!get_vhost_net(nc->peer)) {
211         return;
212     }
213 
214     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
215         !!n->vhost_started) {
216         return;
217     }
218     if (!n->vhost_started) {
219         int r, i;
220 
221         if (n->needs_vnet_hdr_swap) {
222             error_report("backend does not support %s vnet headers; "
223                          "falling back on userspace virtio",
224                          virtio_is_big_endian(vdev) ? "BE" : "LE");
225             return;
226         }
227 
228         /* Any packets outstanding? Purge them to avoid touching rings
229          * when vhost is running.
230          */
231         for (i = 0;  i < queues; i++) {
232             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
233 
234             /* Purge both directions: TX and RX. */
235             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
236             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
237         }
238 
239         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
240             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
241             if (r < 0) {
242                 error_report("%uBytes MTU not supported by the backend",
243                              n->net_conf.mtu);
244 
245                 return;
246             }
247         }
248 
249         n->vhost_started = 1;
250         r = vhost_net_start(vdev, n->nic->ncs, queues);
251         if (r < 0) {
252             error_report("unable to start vhost net: %d: "
253                          "falling back on userspace virtio", -r);
254             n->vhost_started = 0;
255         }
256     } else {
257         vhost_net_stop(vdev, n->nic->ncs, queues);
258         n->vhost_started = 0;
259     }
260 }
261 
262 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
263                                           NetClientState *peer,
264                                           bool enable)
265 {
266     if (virtio_is_big_endian(vdev)) {
267         return qemu_set_vnet_be(peer, enable);
268     } else {
269         return qemu_set_vnet_le(peer, enable);
270     }
271 }
272 
273 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
274                                        int queues, bool enable)
275 {
276     int i;
277 
278     for (i = 0; i < queues; i++) {
279         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
280             enable) {
281             while (--i >= 0) {
282                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
283             }
284 
285             return true;
286         }
287     }
288 
289     return false;
290 }
291 
292 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
293 {
294     VirtIODevice *vdev = VIRTIO_DEVICE(n);
295     int queues = n->multiqueue ? n->max_queues : 1;
296 
297     if (virtio_net_started(n, status)) {
298         /* Before using the device, we tell the network backend about the
299          * endianness to use when parsing vnet headers. If the backend
300          * can't do it, we fallback onto fixing the headers in the core
301          * virtio-net code.
302          */
303         n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
304                                                             queues, true);
305     } else if (virtio_net_started(n, vdev->status)) {
306         /* After using the device, we need to reset the network backend to
307          * the default (guest native endianness), otherwise the guest may
308          * lose network connectivity if it is rebooted into a different
309          * endianness.
310          */
311         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
312     }
313 }
314 
315 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
316 {
317     unsigned int dropped = virtqueue_drop_all(vq);
318     if (dropped) {
319         virtio_notify(vdev, vq);
320     }
321 }
322 
323 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
324 {
325     VirtIONet *n = VIRTIO_NET(vdev);
326     VirtIONetQueue *q;
327     int i;
328     uint8_t queue_status;
329 
330     virtio_net_vnet_endian_status(n, status);
331     virtio_net_vhost_status(n, status);
332 
333     for (i = 0; i < n->max_queues; i++) {
334         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
335         bool queue_started;
336         q = &n->vqs[i];
337 
338         if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
339             queue_status = 0;
340         } else {
341             queue_status = status;
342         }
343         queue_started =
344             virtio_net_started(n, queue_status) && !n->vhost_started;
345 
346         if (queue_started) {
347             qemu_flush_queued_packets(ncs);
348         }
349 
350         if (!q->tx_waiting) {
351             continue;
352         }
353 
354         if (queue_started) {
355             if (q->tx_timer) {
356                 timer_mod(q->tx_timer,
357                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
358             } else {
359                 qemu_bh_schedule(q->tx_bh);
360             }
361         } else {
362             if (q->tx_timer) {
363                 timer_del(q->tx_timer);
364             } else {
365                 qemu_bh_cancel(q->tx_bh);
366             }
367             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
368                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
369                 vdev->vm_running) {
370                 /* if tx is waiting we are likely have some packets in tx queue
371                  * and disabled notification */
372                 q->tx_waiting = 0;
373                 virtio_queue_set_notification(q->tx_vq, 1);
374                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
375             }
376         }
377     }
378 }
379 
380 static void virtio_net_set_link_status(NetClientState *nc)
381 {
382     VirtIONet *n = qemu_get_nic_opaque(nc);
383     VirtIODevice *vdev = VIRTIO_DEVICE(n);
384     uint16_t old_status = n->status;
385 
386     if (nc->link_down)
387         n->status &= ~VIRTIO_NET_S_LINK_UP;
388     else
389         n->status |= VIRTIO_NET_S_LINK_UP;
390 
391     if (n->status != old_status)
392         virtio_notify_config(vdev);
393 
394     virtio_net_set_status(vdev, vdev->status);
395 }
396 
397 static void rxfilter_notify(NetClientState *nc)
398 {
399     VirtIONet *n = qemu_get_nic_opaque(nc);
400 
401     if (nc->rxfilter_notify_enabled) {
402         gchar *path = object_get_canonical_path(OBJECT(n->qdev));
403         qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
404                                               n->netclient_name, path);
405         g_free(path);
406 
407         /* disable event notification to avoid events flooding */
408         nc->rxfilter_notify_enabled = 0;
409     }
410 }
411 
412 static intList *get_vlan_table(VirtIONet *n)
413 {
414     intList *list, *entry;
415     int i, j;
416 
417     list = NULL;
418     for (i = 0; i < MAX_VLAN >> 5; i++) {
419         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
420             if (n->vlans[i] & (1U << j)) {
421                 entry = g_malloc0(sizeof(*entry));
422                 entry->value = (i << 5) + j;
423                 entry->next = list;
424                 list = entry;
425             }
426         }
427     }
428 
429     return list;
430 }
431 
432 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
433 {
434     VirtIONet *n = qemu_get_nic_opaque(nc);
435     VirtIODevice *vdev = VIRTIO_DEVICE(n);
436     RxFilterInfo *info;
437     strList *str_list, *entry;
438     int i;
439 
440     info = g_malloc0(sizeof(*info));
441     info->name = g_strdup(nc->name);
442     info->promiscuous = n->promisc;
443 
444     if (n->nouni) {
445         info->unicast = RX_STATE_NONE;
446     } else if (n->alluni) {
447         info->unicast = RX_STATE_ALL;
448     } else {
449         info->unicast = RX_STATE_NORMAL;
450     }
451 
452     if (n->nomulti) {
453         info->multicast = RX_STATE_NONE;
454     } else if (n->allmulti) {
455         info->multicast = RX_STATE_ALL;
456     } else {
457         info->multicast = RX_STATE_NORMAL;
458     }
459 
460     info->broadcast_allowed = n->nobcast;
461     info->multicast_overflow = n->mac_table.multi_overflow;
462     info->unicast_overflow = n->mac_table.uni_overflow;
463 
464     info->main_mac = qemu_mac_strdup_printf(n->mac);
465 
466     str_list = NULL;
467     for (i = 0; i < n->mac_table.first_multi; i++) {
468         entry = g_malloc0(sizeof(*entry));
469         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
470         entry->next = str_list;
471         str_list = entry;
472     }
473     info->unicast_table = str_list;
474 
475     str_list = NULL;
476     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
477         entry = g_malloc0(sizeof(*entry));
478         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
479         entry->next = str_list;
480         str_list = entry;
481     }
482     info->multicast_table = str_list;
483     info->vlan_table = get_vlan_table(n);
484 
485     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
486         info->vlan = RX_STATE_ALL;
487     } else if (!info->vlan_table) {
488         info->vlan = RX_STATE_NONE;
489     } else {
490         info->vlan = RX_STATE_NORMAL;
491     }
492 
493     /* enable event notification after query */
494     nc->rxfilter_notify_enabled = 1;
495 
496     return info;
497 }
498 
499 static void virtio_net_reset(VirtIODevice *vdev)
500 {
501     VirtIONet *n = VIRTIO_NET(vdev);
502     int i;
503 
504     /* Reset back to compatibility mode */
505     n->promisc = 1;
506     n->allmulti = 0;
507     n->alluni = 0;
508     n->nomulti = 0;
509     n->nouni = 0;
510     n->nobcast = 0;
511     /* multiqueue is disabled by default */
512     n->curr_queues = 1;
513     timer_del(n->announce_timer.tm);
514     n->announce_timer.round = 0;
515     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
516 
517     /* Flush any MAC and VLAN filter table state */
518     n->mac_table.in_use = 0;
519     n->mac_table.first_multi = 0;
520     n->mac_table.multi_overflow = 0;
521     n->mac_table.uni_overflow = 0;
522     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
523     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
524     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
525     memset(n->vlans, 0, MAX_VLAN >> 3);
526 
527     /* Flush any async TX */
528     for (i = 0;  i < n->max_queues; i++) {
529         NetClientState *nc = qemu_get_subqueue(n->nic, i);
530 
531         if (nc->peer) {
532             qemu_flush_or_purge_queued_packets(nc->peer, true);
533             assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
534         }
535     }
536 }
537 
538 static void peer_test_vnet_hdr(VirtIONet *n)
539 {
540     NetClientState *nc = qemu_get_queue(n->nic);
541     if (!nc->peer) {
542         return;
543     }
544 
545     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
546 }
547 
548 static int peer_has_vnet_hdr(VirtIONet *n)
549 {
550     return n->has_vnet_hdr;
551 }
552 
553 static int peer_has_ufo(VirtIONet *n)
554 {
555     if (!peer_has_vnet_hdr(n))
556         return 0;
557 
558     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
559 
560     return n->has_ufo;
561 }
562 
563 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
564                                        int version_1)
565 {
566     int i;
567     NetClientState *nc;
568 
569     n->mergeable_rx_bufs = mergeable_rx_bufs;
570 
571     if (version_1) {
572         n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
573     } else {
574         n->guest_hdr_len = n->mergeable_rx_bufs ?
575             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
576             sizeof(struct virtio_net_hdr);
577     }
578 
579     for (i = 0; i < n->max_queues; i++) {
580         nc = qemu_get_subqueue(n->nic, i);
581 
582         if (peer_has_vnet_hdr(n) &&
583             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
584             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
585             n->host_hdr_len = n->guest_hdr_len;
586         }
587     }
588 }
589 
590 static int virtio_net_max_tx_queue_size(VirtIONet *n)
591 {
592     NetClientState *peer = n->nic_conf.peers.ncs[0];
593 
594     /*
595      * Backends other than vhost-user don't support max queue size.
596      */
597     if (!peer) {
598         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
599     }
600 
601     if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
602         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
603     }
604 
605     return VIRTQUEUE_MAX_SIZE;
606 }
607 
608 static int peer_attach(VirtIONet *n, int index)
609 {
610     NetClientState *nc = qemu_get_subqueue(n->nic, index);
611 
612     if (!nc->peer) {
613         return 0;
614     }
615 
616     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
617         vhost_set_vring_enable(nc->peer, 1);
618     }
619 
620     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
621         return 0;
622     }
623 
624     if (n->max_queues == 1) {
625         return 0;
626     }
627 
628     return tap_enable(nc->peer);
629 }
630 
631 static int peer_detach(VirtIONet *n, int index)
632 {
633     NetClientState *nc = qemu_get_subqueue(n->nic, index);
634 
635     if (!nc->peer) {
636         return 0;
637     }
638 
639     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
640         vhost_set_vring_enable(nc->peer, 0);
641     }
642 
643     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
644         return 0;
645     }
646 
647     return tap_disable(nc->peer);
648 }
649 
650 static void virtio_net_set_queues(VirtIONet *n)
651 {
652     int i;
653     int r;
654 
655     if (n->nic->peer_deleted) {
656         return;
657     }
658 
659     for (i = 0; i < n->max_queues; i++) {
660         if (i < n->curr_queues) {
661             r = peer_attach(n, i);
662             assert(!r);
663         } else {
664             r = peer_detach(n, i);
665             assert(!r);
666         }
667     }
668 }
669 
670 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
671 
672 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
673                                         Error **errp)
674 {
675     VirtIONet *n = VIRTIO_NET(vdev);
676     NetClientState *nc = qemu_get_queue(n->nic);
677 
678     /* Firstly sync all virtio-net possible supported features */
679     features |= n->host_features;
680 
681     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
682 
683     if (!peer_has_vnet_hdr(n)) {
684         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
685         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
686         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
687         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
688 
689         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
690         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
691         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
692         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
693     }
694 
695     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
696         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
697         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
698     }
699 
700     if (!get_vhost_net(nc->peer)) {
701         return features;
702     }
703 
704     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
705     vdev->backend_features = features;
706 
707     if (n->mtu_bypass_backend &&
708             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
709         features |= (1ULL << VIRTIO_NET_F_MTU);
710     }
711 
712     return features;
713 }
714 
715 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
716 {
717     uint64_t features = 0;
718 
719     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
720      * but also these: */
721     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
722     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
723     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
724     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
725     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
726 
727     return features;
728 }
729 
730 static void virtio_net_apply_guest_offloads(VirtIONet *n)
731 {
732     qemu_set_offload(qemu_get_queue(n->nic)->peer,
733             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
734             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
735             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
736             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
737             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
738 }
739 
740 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
741 {
742     static const uint64_t guest_offloads_mask =
743         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
744         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
745         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
746         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
747         (1ULL << VIRTIO_NET_F_GUEST_UFO);
748 
749     return guest_offloads_mask & features;
750 }
751 
752 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
753 {
754     VirtIODevice *vdev = VIRTIO_DEVICE(n);
755     return virtio_net_guest_offloads_by_features(vdev->guest_features);
756 }
757 
758 static void failover_add_primary(VirtIONet *n, Error **errp)
759 {
760     Error *err = NULL;
761 
762     n->primary_device_opts = qemu_opts_find(qemu_find_opts("device"),
763             n->primary_device_id);
764     if (n->primary_device_opts) {
765         n->primary_dev = qdev_device_add(n->primary_device_opts, &err);
766         if (err) {
767             qemu_opts_del(n->primary_device_opts);
768         }
769         if (n->primary_dev) {
770             n->primary_bus = n->primary_dev->parent_bus;
771             if (err) {
772                 qdev_unplug(n->primary_dev, &err);
773                 qdev_set_id(n->primary_dev, "");
774 
775             }
776         }
777     } else {
778         error_setg(errp, "Primary device not found");
779         error_append_hint(errp, "Virtio-net failover will not work. Make "
780             "sure primary device has parameter"
781             " failover_pair_id=<virtio-net-id>\n");
782 }
783     if (err) {
784         error_propagate(errp, err);
785     }
786 }
787 
788 static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp)
789 {
790     VirtIONet *n = opaque;
791     int ret = 0;
792 
793     const char *standby_id = qemu_opt_get(opts, "failover_pair_id");
794 
795     if (standby_id != NULL && (g_strcmp0(standby_id, n->netclient_name) == 0)) {
796         n->primary_device_id = g_strdup(opts->id);
797         ret = 1;
798     }
799 
800     return ret;
801 }
802 
803 static DeviceState *virtio_net_find_primary(VirtIONet *n, Error **errp)
804 {
805     DeviceState *dev = NULL;
806     Error *err = NULL;
807 
808     if (qemu_opts_foreach(qemu_find_opts("device"),
809                          is_my_primary, n, &err)) {
810         if (err) {
811             error_propagate(errp, err);
812             return NULL;
813         }
814         if (n->primary_device_id) {
815             dev = qdev_find_recursive(sysbus_get_default(),
816                     n->primary_device_id);
817         } else {
818             error_setg(errp, "Primary device id not found");
819             return NULL;
820         }
821     }
822     return dev;
823 }
824 
825 
826 
827 static DeviceState *virtio_connect_failover_devices(VirtIONet *n,
828                                                     DeviceState *dev,
829                                                     Error **errp)
830 {
831     DeviceState *prim_dev = NULL;
832     Error *err = NULL;
833 
834     prim_dev = virtio_net_find_primary(n, &err);
835     if (prim_dev) {
836         n->primary_device_id = g_strdup(prim_dev->id);
837         n->primary_device_opts = prim_dev->opts;
838     } else {
839         if (err) {
840             error_propagate(errp, err);
841         }
842     }
843 
844     return prim_dev;
845 }
846 
847 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
848 {
849     VirtIONet *n = VIRTIO_NET(vdev);
850     Error *err = NULL;
851     int i;
852 
853     if (n->mtu_bypass_backend &&
854             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
855         features &= ~(1ULL << VIRTIO_NET_F_MTU);
856     }
857 
858     virtio_net_set_multiqueue(n,
859                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
860 
861     virtio_net_set_mrg_rx_bufs(n,
862                                virtio_has_feature(features,
863                                                   VIRTIO_NET_F_MRG_RXBUF),
864                                virtio_has_feature(features,
865                                                   VIRTIO_F_VERSION_1));
866 
867     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
868         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
869     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
870         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
871 
872     if (n->has_vnet_hdr) {
873         n->curr_guest_offloads =
874             virtio_net_guest_offloads_by_features(features);
875         virtio_net_apply_guest_offloads(n);
876     }
877 
878     for (i = 0;  i < n->max_queues; i++) {
879         NetClientState *nc = qemu_get_subqueue(n->nic, i);
880 
881         if (!get_vhost_net(nc->peer)) {
882             continue;
883         }
884         vhost_net_ack_features(get_vhost_net(nc->peer), features);
885     }
886 
887     if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
888         memset(n->vlans, 0, MAX_VLAN >> 3);
889     } else {
890         memset(n->vlans, 0xff, MAX_VLAN >> 3);
891     }
892 
893     if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
894         qapi_event_send_failover_negotiated(n->netclient_name);
895         atomic_set(&n->primary_should_be_hidden, false);
896         failover_add_primary(n, &err);
897         if (err) {
898             n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err);
899             if (err) {
900                 goto out_err;
901             }
902             failover_add_primary(n, &err);
903             if (err) {
904                 goto out_err;
905             }
906         }
907     }
908     return;
909 
910 out_err:
911     if (err) {
912         warn_report_err(err);
913     }
914 }
915 
916 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
917                                      struct iovec *iov, unsigned int iov_cnt)
918 {
919     uint8_t on;
920     size_t s;
921     NetClientState *nc = qemu_get_queue(n->nic);
922 
923     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
924     if (s != sizeof(on)) {
925         return VIRTIO_NET_ERR;
926     }
927 
928     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
929         n->promisc = on;
930     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
931         n->allmulti = on;
932     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
933         n->alluni = on;
934     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
935         n->nomulti = on;
936     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
937         n->nouni = on;
938     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
939         n->nobcast = on;
940     } else {
941         return VIRTIO_NET_ERR;
942     }
943 
944     rxfilter_notify(nc);
945 
946     return VIRTIO_NET_OK;
947 }
948 
949 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
950                                      struct iovec *iov, unsigned int iov_cnt)
951 {
952     VirtIODevice *vdev = VIRTIO_DEVICE(n);
953     uint64_t offloads;
954     size_t s;
955 
956     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
957         return VIRTIO_NET_ERR;
958     }
959 
960     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
961     if (s != sizeof(offloads)) {
962         return VIRTIO_NET_ERR;
963     }
964 
965     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
966         uint64_t supported_offloads;
967 
968         offloads = virtio_ldq_p(vdev, &offloads);
969 
970         if (!n->has_vnet_hdr) {
971             return VIRTIO_NET_ERR;
972         }
973 
974         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
975             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
976         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
977             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
978         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
979 
980         supported_offloads = virtio_net_supported_guest_offloads(n);
981         if (offloads & ~supported_offloads) {
982             return VIRTIO_NET_ERR;
983         }
984 
985         n->curr_guest_offloads = offloads;
986         virtio_net_apply_guest_offloads(n);
987 
988         return VIRTIO_NET_OK;
989     } else {
990         return VIRTIO_NET_ERR;
991     }
992 }
993 
994 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
995                                  struct iovec *iov, unsigned int iov_cnt)
996 {
997     VirtIODevice *vdev = VIRTIO_DEVICE(n);
998     struct virtio_net_ctrl_mac mac_data;
999     size_t s;
1000     NetClientState *nc = qemu_get_queue(n->nic);
1001 
1002     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1003         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1004             return VIRTIO_NET_ERR;
1005         }
1006         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1007         assert(s == sizeof(n->mac));
1008         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1009         rxfilter_notify(nc);
1010 
1011         return VIRTIO_NET_OK;
1012     }
1013 
1014     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1015         return VIRTIO_NET_ERR;
1016     }
1017 
1018     int in_use = 0;
1019     int first_multi = 0;
1020     uint8_t uni_overflow = 0;
1021     uint8_t multi_overflow = 0;
1022     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1023 
1024     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1025                    sizeof(mac_data.entries));
1026     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1027     if (s != sizeof(mac_data.entries)) {
1028         goto error;
1029     }
1030     iov_discard_front(&iov, &iov_cnt, s);
1031 
1032     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1033         goto error;
1034     }
1035 
1036     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1037         s = iov_to_buf(iov, iov_cnt, 0, macs,
1038                        mac_data.entries * ETH_ALEN);
1039         if (s != mac_data.entries * ETH_ALEN) {
1040             goto error;
1041         }
1042         in_use += mac_data.entries;
1043     } else {
1044         uni_overflow = 1;
1045     }
1046 
1047     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1048 
1049     first_multi = in_use;
1050 
1051     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1052                    sizeof(mac_data.entries));
1053     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1054     if (s != sizeof(mac_data.entries)) {
1055         goto error;
1056     }
1057 
1058     iov_discard_front(&iov, &iov_cnt, s);
1059 
1060     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1061         goto error;
1062     }
1063 
1064     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1065         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1066                        mac_data.entries * ETH_ALEN);
1067         if (s != mac_data.entries * ETH_ALEN) {
1068             goto error;
1069         }
1070         in_use += mac_data.entries;
1071     } else {
1072         multi_overflow = 1;
1073     }
1074 
1075     n->mac_table.in_use = in_use;
1076     n->mac_table.first_multi = first_multi;
1077     n->mac_table.uni_overflow = uni_overflow;
1078     n->mac_table.multi_overflow = multi_overflow;
1079     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1080     g_free(macs);
1081     rxfilter_notify(nc);
1082 
1083     return VIRTIO_NET_OK;
1084 
1085 error:
1086     g_free(macs);
1087     return VIRTIO_NET_ERR;
1088 }
1089 
1090 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1091                                         struct iovec *iov, unsigned int iov_cnt)
1092 {
1093     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1094     uint16_t vid;
1095     size_t s;
1096     NetClientState *nc = qemu_get_queue(n->nic);
1097 
1098     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1099     vid = virtio_lduw_p(vdev, &vid);
1100     if (s != sizeof(vid)) {
1101         return VIRTIO_NET_ERR;
1102     }
1103 
1104     if (vid >= MAX_VLAN)
1105         return VIRTIO_NET_ERR;
1106 
1107     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1108         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1109     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1110         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1111     else
1112         return VIRTIO_NET_ERR;
1113 
1114     rxfilter_notify(nc);
1115 
1116     return VIRTIO_NET_OK;
1117 }
1118 
1119 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1120                                       struct iovec *iov, unsigned int iov_cnt)
1121 {
1122     trace_virtio_net_handle_announce(n->announce_timer.round);
1123     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1124         n->status & VIRTIO_NET_S_ANNOUNCE) {
1125         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1126         if (n->announce_timer.round) {
1127             qemu_announce_timer_step(&n->announce_timer);
1128         }
1129         return VIRTIO_NET_OK;
1130     } else {
1131         return VIRTIO_NET_ERR;
1132     }
1133 }
1134 
1135 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1136                                 struct iovec *iov, unsigned int iov_cnt)
1137 {
1138     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1139     struct virtio_net_ctrl_mq mq;
1140     size_t s;
1141     uint16_t queues;
1142 
1143     s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1144     if (s != sizeof(mq)) {
1145         return VIRTIO_NET_ERR;
1146     }
1147 
1148     if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1149         return VIRTIO_NET_ERR;
1150     }
1151 
1152     queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1153 
1154     if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1155         queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1156         queues > n->max_queues ||
1157         !n->multiqueue) {
1158         return VIRTIO_NET_ERR;
1159     }
1160 
1161     n->curr_queues = queues;
1162     /* stop the backend before changing the number of queues to avoid handling a
1163      * disabled queue */
1164     virtio_net_set_status(vdev, vdev->status);
1165     virtio_net_set_queues(n);
1166 
1167     return VIRTIO_NET_OK;
1168 }
1169 
1170 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1171 {
1172     VirtIONet *n = VIRTIO_NET(vdev);
1173     struct virtio_net_ctrl_hdr ctrl;
1174     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1175     VirtQueueElement *elem;
1176     size_t s;
1177     struct iovec *iov, *iov2;
1178     unsigned int iov_cnt;
1179 
1180     for (;;) {
1181         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1182         if (!elem) {
1183             break;
1184         }
1185         if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1186             iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1187             virtio_error(vdev, "virtio-net ctrl missing headers");
1188             virtqueue_detach_element(vq, elem, 0);
1189             g_free(elem);
1190             break;
1191         }
1192 
1193         iov_cnt = elem->out_num;
1194         iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1195         s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1196         iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1197         if (s != sizeof(ctrl)) {
1198             status = VIRTIO_NET_ERR;
1199         } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1200             status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1201         } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1202             status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1203         } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1204             status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1205         } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1206             status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1207         } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1208             status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1209         } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1210             status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1211         }
1212 
1213         s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1214         assert(s == sizeof(status));
1215 
1216         virtqueue_push(vq, elem, sizeof(status));
1217         virtio_notify(vdev, vq);
1218         g_free(iov2);
1219         g_free(elem);
1220     }
1221 }
1222 
1223 /* RX */
1224 
1225 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1226 {
1227     VirtIONet *n = VIRTIO_NET(vdev);
1228     int queue_index = vq2q(virtio_get_queue_index(vq));
1229 
1230     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1231 }
1232 
1233 static int virtio_net_can_receive(NetClientState *nc)
1234 {
1235     VirtIONet *n = qemu_get_nic_opaque(nc);
1236     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1237     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1238 
1239     if (!vdev->vm_running) {
1240         return 0;
1241     }
1242 
1243     if (nc->queue_index >= n->curr_queues) {
1244         return 0;
1245     }
1246 
1247     if (!virtio_queue_ready(q->rx_vq) ||
1248         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1249         return 0;
1250     }
1251 
1252     return 1;
1253 }
1254 
1255 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1256 {
1257     VirtIONet *n = q->n;
1258     if (virtio_queue_empty(q->rx_vq) ||
1259         (n->mergeable_rx_bufs &&
1260          !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1261         virtio_queue_set_notification(q->rx_vq, 1);
1262 
1263         /* To avoid a race condition where the guest has made some buffers
1264          * available after the above check but before notification was
1265          * enabled, check for available buffers again.
1266          */
1267         if (virtio_queue_empty(q->rx_vq) ||
1268             (n->mergeable_rx_bufs &&
1269              !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1270             return 0;
1271         }
1272     }
1273 
1274     virtio_queue_set_notification(q->rx_vq, 0);
1275     return 1;
1276 }
1277 
1278 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1279 {
1280     virtio_tswap16s(vdev, &hdr->hdr_len);
1281     virtio_tswap16s(vdev, &hdr->gso_size);
1282     virtio_tswap16s(vdev, &hdr->csum_start);
1283     virtio_tswap16s(vdev, &hdr->csum_offset);
1284 }
1285 
1286 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1287  * it never finds out that the packets don't have valid checksums.  This
1288  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1289  * fix this with Xen but it hasn't appeared in an upstream release of
1290  * dhclient yet.
1291  *
1292  * To avoid breaking existing guests, we catch udp packets and add
1293  * checksums.  This is terrible but it's better than hacking the guest
1294  * kernels.
1295  *
1296  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1297  * we should provide a mechanism to disable it to avoid polluting the host
1298  * cache.
1299  */
1300 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1301                                         uint8_t *buf, size_t size)
1302 {
1303     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1304         (size > 27 && size < 1500) && /* normal sized MTU */
1305         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1306         (buf[23] == 17) && /* ip.protocol == UDP */
1307         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1308         net_checksum_calculate(buf, size);
1309         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1310     }
1311 }
1312 
1313 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1314                            const void *buf, size_t size)
1315 {
1316     if (n->has_vnet_hdr) {
1317         /* FIXME this cast is evil */
1318         void *wbuf = (void *)buf;
1319         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1320                                     size - n->host_hdr_len);
1321 
1322         if (n->needs_vnet_hdr_swap) {
1323             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1324         }
1325         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1326     } else {
1327         struct virtio_net_hdr hdr = {
1328             .flags = 0,
1329             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1330         };
1331         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1332     }
1333 }
1334 
1335 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1336 {
1337     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1338     static const uint8_t vlan[] = {0x81, 0x00};
1339     uint8_t *ptr = (uint8_t *)buf;
1340     int i;
1341 
1342     if (n->promisc)
1343         return 1;
1344 
1345     ptr += n->host_hdr_len;
1346 
1347     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1348         int vid = lduw_be_p(ptr + 14) & 0xfff;
1349         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1350             return 0;
1351     }
1352 
1353     if (ptr[0] & 1) { // multicast
1354         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1355             return !n->nobcast;
1356         } else if (n->nomulti) {
1357             return 0;
1358         } else if (n->allmulti || n->mac_table.multi_overflow) {
1359             return 1;
1360         }
1361 
1362         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1363             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1364                 return 1;
1365             }
1366         }
1367     } else { // unicast
1368         if (n->nouni) {
1369             return 0;
1370         } else if (n->alluni || n->mac_table.uni_overflow) {
1371             return 1;
1372         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1373             return 1;
1374         }
1375 
1376         for (i = 0; i < n->mac_table.first_multi; i++) {
1377             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1378                 return 1;
1379             }
1380         }
1381     }
1382 
1383     return 0;
1384 }
1385 
1386 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1387                                       size_t size)
1388 {
1389     VirtIONet *n = qemu_get_nic_opaque(nc);
1390     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1391     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1392     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1393     struct virtio_net_hdr_mrg_rxbuf mhdr;
1394     unsigned mhdr_cnt = 0;
1395     size_t offset, i, guest_offset;
1396 
1397     if (!virtio_net_can_receive(nc)) {
1398         return -1;
1399     }
1400 
1401     /* hdr_len refers to the header we supply to the guest */
1402     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1403         return 0;
1404     }
1405 
1406     if (!receive_filter(n, buf, size))
1407         return size;
1408 
1409     offset = i = 0;
1410 
1411     while (offset < size) {
1412         VirtQueueElement *elem;
1413         int len, total;
1414         const struct iovec *sg;
1415 
1416         total = 0;
1417 
1418         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1419         if (!elem) {
1420             if (i) {
1421                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1422                              "i %zd mergeable %d offset %zd, size %zd, "
1423                              "guest hdr len %zd, host hdr len %zd "
1424                              "guest features 0x%" PRIx64,
1425                              i, n->mergeable_rx_bufs, offset, size,
1426                              n->guest_hdr_len, n->host_hdr_len,
1427                              vdev->guest_features);
1428             }
1429             return -1;
1430         }
1431 
1432         if (elem->in_num < 1) {
1433             virtio_error(vdev,
1434                          "virtio-net receive queue contains no in buffers");
1435             virtqueue_detach_element(q->rx_vq, elem, 0);
1436             g_free(elem);
1437             return -1;
1438         }
1439 
1440         sg = elem->in_sg;
1441         if (i == 0) {
1442             assert(offset == 0);
1443             if (n->mergeable_rx_bufs) {
1444                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1445                                     sg, elem->in_num,
1446                                     offsetof(typeof(mhdr), num_buffers),
1447                                     sizeof(mhdr.num_buffers));
1448             }
1449 
1450             receive_header(n, sg, elem->in_num, buf, size);
1451             offset = n->host_hdr_len;
1452             total += n->guest_hdr_len;
1453             guest_offset = n->guest_hdr_len;
1454         } else {
1455             guest_offset = 0;
1456         }
1457 
1458         /* copy in packet.  ugh */
1459         len = iov_from_buf(sg, elem->in_num, guest_offset,
1460                            buf + offset, size - offset);
1461         total += len;
1462         offset += len;
1463         /* If buffers can't be merged, at this point we
1464          * must have consumed the complete packet.
1465          * Otherwise, drop it. */
1466         if (!n->mergeable_rx_bufs && offset < size) {
1467             virtqueue_unpop(q->rx_vq, elem, total);
1468             g_free(elem);
1469             return size;
1470         }
1471 
1472         /* signal other side */
1473         virtqueue_fill(q->rx_vq, elem, total, i++);
1474         g_free(elem);
1475     }
1476 
1477     if (mhdr_cnt) {
1478         virtio_stw_p(vdev, &mhdr.num_buffers, i);
1479         iov_from_buf(mhdr_sg, mhdr_cnt,
1480                      0,
1481                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
1482     }
1483 
1484     virtqueue_flush(q->rx_vq, i);
1485     virtio_notify(vdev, q->rx_vq);
1486 
1487     return size;
1488 }
1489 
1490 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1491                                   size_t size)
1492 {
1493     RCU_READ_LOCK_GUARD();
1494 
1495     return virtio_net_receive_rcu(nc, buf, size);
1496 }
1497 
1498 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1499                                          const uint8_t *buf,
1500                                          VirtioNetRscUnit *unit)
1501 {
1502     uint16_t ip_hdrlen;
1503     struct ip_header *ip;
1504 
1505     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1506                               + sizeof(struct eth_header));
1507     unit->ip = (void *)ip;
1508     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1509     unit->ip_plen = &ip->ip_len;
1510     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1511     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1512     unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1513 }
1514 
1515 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1516                                          const uint8_t *buf,
1517                                          VirtioNetRscUnit *unit)
1518 {
1519     struct ip6_header *ip6;
1520 
1521     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1522                                  + sizeof(struct eth_header));
1523     unit->ip = ip6;
1524     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1525     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\
1526                                         + sizeof(struct ip6_header));
1527     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1528 
1529     /* There is a difference between payload lenght in ipv4 and v6,
1530        ip header is excluded in ipv6 */
1531     unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1532 }
1533 
1534 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1535                                        VirtioNetRscSeg *seg)
1536 {
1537     int ret;
1538     struct virtio_net_hdr *h;
1539 
1540     h = (struct virtio_net_hdr *)seg->buf;
1541     h->flags = 0;
1542     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1543 
1544     if (seg->is_coalesced) {
1545         *virtio_net_rsc_ext_num_packets(h) = seg->packets;
1546         *virtio_net_rsc_ext_num_dupacks(h) = seg->dup_ack;
1547         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1548         if (chain->proto == ETH_P_IP) {
1549             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1550         } else {
1551             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1552         }
1553     }
1554 
1555     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1556     QTAILQ_REMOVE(&chain->buffers, seg, next);
1557     g_free(seg->buf);
1558     g_free(seg);
1559 
1560     return ret;
1561 }
1562 
1563 static void virtio_net_rsc_purge(void *opq)
1564 {
1565     VirtioNetRscSeg *seg, *rn;
1566     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1567 
1568     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1569         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1570             chain->stat.purge_failed++;
1571             continue;
1572         }
1573     }
1574 
1575     chain->stat.timer++;
1576     if (!QTAILQ_EMPTY(&chain->buffers)) {
1577         timer_mod(chain->drain_timer,
1578               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1579     }
1580 }
1581 
1582 static void virtio_net_rsc_cleanup(VirtIONet *n)
1583 {
1584     VirtioNetRscChain *chain, *rn_chain;
1585     VirtioNetRscSeg *seg, *rn_seg;
1586 
1587     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1588         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1589             QTAILQ_REMOVE(&chain->buffers, seg, next);
1590             g_free(seg->buf);
1591             g_free(seg);
1592         }
1593 
1594         timer_del(chain->drain_timer);
1595         timer_free(chain->drain_timer);
1596         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1597         g_free(chain);
1598     }
1599 }
1600 
1601 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1602                                      NetClientState *nc,
1603                                      const uint8_t *buf, size_t size)
1604 {
1605     uint16_t hdr_len;
1606     VirtioNetRscSeg *seg;
1607 
1608     hdr_len = chain->n->guest_hdr_len;
1609     seg = g_malloc(sizeof(VirtioNetRscSeg));
1610     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1611         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1612     memcpy(seg->buf, buf, size);
1613     seg->size = size;
1614     seg->packets = 1;
1615     seg->dup_ack = 0;
1616     seg->is_coalesced = 0;
1617     seg->nc = nc;
1618 
1619     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1620     chain->stat.cache++;
1621 
1622     switch (chain->proto) {
1623     case ETH_P_IP:
1624         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
1625         break;
1626     case ETH_P_IPV6:
1627         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
1628         break;
1629     default:
1630         g_assert_not_reached();
1631     }
1632 }
1633 
1634 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
1635                                          VirtioNetRscSeg *seg,
1636                                          const uint8_t *buf,
1637                                          struct tcp_header *n_tcp,
1638                                          struct tcp_header *o_tcp)
1639 {
1640     uint32_t nack, oack;
1641     uint16_t nwin, owin;
1642 
1643     nack = htonl(n_tcp->th_ack);
1644     nwin = htons(n_tcp->th_win);
1645     oack = htonl(o_tcp->th_ack);
1646     owin = htons(o_tcp->th_win);
1647 
1648     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
1649         chain->stat.ack_out_of_win++;
1650         return RSC_FINAL;
1651     } else if (nack == oack) {
1652         /* duplicated ack or window probe */
1653         if (nwin == owin) {
1654             /* duplicated ack, add dup ack count due to whql test up to 1 */
1655             chain->stat.dup_ack++;
1656             return RSC_FINAL;
1657         } else {
1658             /* Coalesce window update */
1659             o_tcp->th_win = n_tcp->th_win;
1660             chain->stat.win_update++;
1661             return RSC_COALESCE;
1662         }
1663     } else {
1664         /* pure ack, go to 'C', finalize*/
1665         chain->stat.pure_ack++;
1666         return RSC_FINAL;
1667     }
1668 }
1669 
1670 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
1671                                             VirtioNetRscSeg *seg,
1672                                             const uint8_t *buf,
1673                                             VirtioNetRscUnit *n_unit)
1674 {
1675     void *data;
1676     uint16_t o_ip_len;
1677     uint32_t nseq, oseq;
1678     VirtioNetRscUnit *o_unit;
1679 
1680     o_unit = &seg->unit;
1681     o_ip_len = htons(*o_unit->ip_plen);
1682     nseq = htonl(n_unit->tcp->th_seq);
1683     oseq = htonl(o_unit->tcp->th_seq);
1684 
1685     /* out of order or retransmitted. */
1686     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
1687         chain->stat.data_out_of_win++;
1688         return RSC_FINAL;
1689     }
1690 
1691     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
1692     if (nseq == oseq) {
1693         if ((o_unit->payload == 0) && n_unit->payload) {
1694             /* From no payload to payload, normal case, not a dup ack or etc */
1695             chain->stat.data_after_pure_ack++;
1696             goto coalesce;
1697         } else {
1698             return virtio_net_rsc_handle_ack(chain, seg, buf,
1699                                              n_unit->tcp, o_unit->tcp);
1700         }
1701     } else if ((nseq - oseq) != o_unit->payload) {
1702         /* Not a consistent packet, out of order */
1703         chain->stat.data_out_of_order++;
1704         return RSC_FINAL;
1705     } else {
1706 coalesce:
1707         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
1708             chain->stat.over_size++;
1709             return RSC_FINAL;
1710         }
1711 
1712         /* Here comes the right data, the payload length in v4/v6 is different,
1713            so use the field value to update and record the new data len */
1714         o_unit->payload += n_unit->payload; /* update new data len */
1715 
1716         /* update field in ip header */
1717         *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
1718 
1719         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
1720            for windows guest, while this may change the behavior for linux
1721            guest (only if it uses RSC feature). */
1722         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
1723 
1724         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
1725         o_unit->tcp->th_win = n_unit->tcp->th_win;
1726 
1727         memmove(seg->buf + seg->size, data, n_unit->payload);
1728         seg->size += n_unit->payload;
1729         seg->packets++;
1730         chain->stat.coalesced++;
1731         return RSC_COALESCE;
1732     }
1733 }
1734 
1735 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
1736                                         VirtioNetRscSeg *seg,
1737                                         const uint8_t *buf, size_t size,
1738                                         VirtioNetRscUnit *unit)
1739 {
1740     struct ip_header *ip1, *ip2;
1741 
1742     ip1 = (struct ip_header *)(unit->ip);
1743     ip2 = (struct ip_header *)(seg->unit.ip);
1744     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
1745         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
1746         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
1747         chain->stat.no_match++;
1748         return RSC_NO_MATCH;
1749     }
1750 
1751     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
1752 }
1753 
1754 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
1755                                         VirtioNetRscSeg *seg,
1756                                         const uint8_t *buf, size_t size,
1757                                         VirtioNetRscUnit *unit)
1758 {
1759     struct ip6_header *ip1, *ip2;
1760 
1761     ip1 = (struct ip6_header *)(unit->ip);
1762     ip2 = (struct ip6_header *)(seg->unit.ip);
1763     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
1764         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
1765         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
1766         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
1767             chain->stat.no_match++;
1768             return RSC_NO_MATCH;
1769     }
1770 
1771     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
1772 }
1773 
1774 /* Packets with 'SYN' should bypass, other flag should be sent after drain
1775  * to prevent out of order */
1776 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
1777                                          struct tcp_header *tcp)
1778 {
1779     uint16_t tcp_hdr;
1780     uint16_t tcp_flag;
1781 
1782     tcp_flag = htons(tcp->th_offset_flags);
1783     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
1784     tcp_flag &= VIRTIO_NET_TCP_FLAG;
1785     tcp_flag = htons(tcp->th_offset_flags) & 0x3F;
1786     if (tcp_flag & TH_SYN) {
1787         chain->stat.tcp_syn++;
1788         return RSC_BYPASS;
1789     }
1790 
1791     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
1792         chain->stat.tcp_ctrl_drain++;
1793         return RSC_FINAL;
1794     }
1795 
1796     if (tcp_hdr > sizeof(struct tcp_header)) {
1797         chain->stat.tcp_all_opt++;
1798         return RSC_FINAL;
1799     }
1800 
1801     return RSC_CANDIDATE;
1802 }
1803 
1804 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
1805                                          NetClientState *nc,
1806                                          const uint8_t *buf, size_t size,
1807                                          VirtioNetRscUnit *unit)
1808 {
1809     int ret;
1810     VirtioNetRscSeg *seg, *nseg;
1811 
1812     if (QTAILQ_EMPTY(&chain->buffers)) {
1813         chain->stat.empty_cache++;
1814         virtio_net_rsc_cache_buf(chain, nc, buf, size);
1815         timer_mod(chain->drain_timer,
1816               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1817         return size;
1818     }
1819 
1820     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
1821         if (chain->proto == ETH_P_IP) {
1822             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
1823         } else {
1824             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
1825         }
1826 
1827         if (ret == RSC_FINAL) {
1828             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1829                 /* Send failed */
1830                 chain->stat.final_failed++;
1831                 return 0;
1832             }
1833 
1834             /* Send current packet */
1835             return virtio_net_do_receive(nc, buf, size);
1836         } else if (ret == RSC_NO_MATCH) {
1837             continue;
1838         } else {
1839             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
1840             seg->is_coalesced = 1;
1841             return size;
1842         }
1843     }
1844 
1845     chain->stat.no_match_cache++;
1846     virtio_net_rsc_cache_buf(chain, nc, buf, size);
1847     return size;
1848 }
1849 
1850 /* Drain a connection data, this is to avoid out of order segments */
1851 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
1852                                         NetClientState *nc,
1853                                         const uint8_t *buf, size_t size,
1854                                         uint16_t ip_start, uint16_t ip_size,
1855                                         uint16_t tcp_port)
1856 {
1857     VirtioNetRscSeg *seg, *nseg;
1858     uint32_t ppair1, ppair2;
1859 
1860     ppair1 = *(uint32_t *)(buf + tcp_port);
1861     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
1862         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
1863         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
1864             || (ppair1 != ppair2)) {
1865             continue;
1866         }
1867         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1868             chain->stat.drain_failed++;
1869         }
1870 
1871         break;
1872     }
1873 
1874     return virtio_net_do_receive(nc, buf, size);
1875 }
1876 
1877 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
1878                                             struct ip_header *ip,
1879                                             const uint8_t *buf, size_t size)
1880 {
1881     uint16_t ip_len;
1882 
1883     /* Not an ipv4 packet */
1884     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
1885         chain->stat.ip_option++;
1886         return RSC_BYPASS;
1887     }
1888 
1889     /* Don't handle packets with ip option */
1890     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
1891         chain->stat.ip_option++;
1892         return RSC_BYPASS;
1893     }
1894 
1895     if (ip->ip_p != IPPROTO_TCP) {
1896         chain->stat.bypass_not_tcp++;
1897         return RSC_BYPASS;
1898     }
1899 
1900     /* Don't handle packets with ip fragment */
1901     if (!(htons(ip->ip_off) & IP_DF)) {
1902         chain->stat.ip_frag++;
1903         return RSC_BYPASS;
1904     }
1905 
1906     /* Don't handle packets with ecn flag */
1907     if (IPTOS_ECN(ip->ip_tos)) {
1908         chain->stat.ip_ecn++;
1909         return RSC_BYPASS;
1910     }
1911 
1912     ip_len = htons(ip->ip_len);
1913     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
1914         || ip_len > (size - chain->n->guest_hdr_len -
1915                      sizeof(struct eth_header))) {
1916         chain->stat.ip_hacked++;
1917         return RSC_BYPASS;
1918     }
1919 
1920     return RSC_CANDIDATE;
1921 }
1922 
1923 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
1924                                       NetClientState *nc,
1925                                       const uint8_t *buf, size_t size)
1926 {
1927     int32_t ret;
1928     uint16_t hdr_len;
1929     VirtioNetRscUnit unit;
1930 
1931     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
1932 
1933     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
1934         + sizeof(struct tcp_header))) {
1935         chain->stat.bypass_not_tcp++;
1936         return virtio_net_do_receive(nc, buf, size);
1937     }
1938 
1939     virtio_net_rsc_extract_unit4(chain, buf, &unit);
1940     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
1941         != RSC_CANDIDATE) {
1942         return virtio_net_do_receive(nc, buf, size);
1943     }
1944 
1945     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
1946     if (ret == RSC_BYPASS) {
1947         return virtio_net_do_receive(nc, buf, size);
1948     } else if (ret == RSC_FINAL) {
1949         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
1950                 ((hdr_len + sizeof(struct eth_header)) + 12),
1951                 VIRTIO_NET_IP4_ADDR_SIZE,
1952                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
1953     }
1954 
1955     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
1956 }
1957 
1958 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
1959                                             struct ip6_header *ip6,
1960                                             const uint8_t *buf, size_t size)
1961 {
1962     uint16_t ip_len;
1963 
1964     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
1965         != IP_HEADER_VERSION_6) {
1966         return RSC_BYPASS;
1967     }
1968 
1969     /* Both option and protocol is checked in this */
1970     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
1971         chain->stat.bypass_not_tcp++;
1972         return RSC_BYPASS;
1973     }
1974 
1975     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1976     if (ip_len < sizeof(struct tcp_header) ||
1977         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
1978                   - sizeof(struct ip6_header))) {
1979         chain->stat.ip_hacked++;
1980         return RSC_BYPASS;
1981     }
1982 
1983     /* Don't handle packets with ecn flag */
1984     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
1985         chain->stat.ip_ecn++;
1986         return RSC_BYPASS;
1987     }
1988 
1989     return RSC_CANDIDATE;
1990 }
1991 
1992 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
1993                                       const uint8_t *buf, size_t size)
1994 {
1995     int32_t ret;
1996     uint16_t hdr_len;
1997     VirtioNetRscChain *chain;
1998     VirtioNetRscUnit unit;
1999 
2000     chain = (VirtioNetRscChain *)opq;
2001     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2002 
2003     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2004         + sizeof(tcp_header))) {
2005         return virtio_net_do_receive(nc, buf, size);
2006     }
2007 
2008     virtio_net_rsc_extract_unit6(chain, buf, &unit);
2009     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2010                                                  unit.ip, buf, size)) {
2011         return virtio_net_do_receive(nc, buf, size);
2012     }
2013 
2014     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2015     if (ret == RSC_BYPASS) {
2016         return virtio_net_do_receive(nc, buf, size);
2017     } else if (ret == RSC_FINAL) {
2018         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2019                 ((hdr_len + sizeof(struct eth_header)) + 8),
2020                 VIRTIO_NET_IP6_ADDR_SIZE,
2021                 hdr_len + sizeof(struct eth_header)
2022                 + sizeof(struct ip6_header));
2023     }
2024 
2025     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2026 }
2027 
2028 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2029                                                       NetClientState *nc,
2030                                                       uint16_t proto)
2031 {
2032     VirtioNetRscChain *chain;
2033 
2034     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2035         return NULL;
2036     }
2037 
2038     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2039         if (chain->proto == proto) {
2040             return chain;
2041         }
2042     }
2043 
2044     chain = g_malloc(sizeof(*chain));
2045     chain->n = n;
2046     chain->proto = proto;
2047     if (proto == (uint16_t)ETH_P_IP) {
2048         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2049         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2050     } else {
2051         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2052         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2053     }
2054     chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2055                                       virtio_net_rsc_purge, chain);
2056     memset(&chain->stat, 0, sizeof(chain->stat));
2057 
2058     QTAILQ_INIT(&chain->buffers);
2059     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2060 
2061     return chain;
2062 }
2063 
2064 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2065                                       const uint8_t *buf,
2066                                       size_t size)
2067 {
2068     uint16_t proto;
2069     VirtioNetRscChain *chain;
2070     struct eth_header *eth;
2071     VirtIONet *n;
2072 
2073     n = qemu_get_nic_opaque(nc);
2074     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2075         return virtio_net_do_receive(nc, buf, size);
2076     }
2077 
2078     eth = (struct eth_header *)(buf + n->guest_hdr_len);
2079     proto = htons(eth->h_proto);
2080 
2081     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2082     if (chain) {
2083         chain->stat.received++;
2084         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2085             return virtio_net_rsc_receive4(chain, nc, buf, size);
2086         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2087             return virtio_net_rsc_receive6(chain, nc, buf, size);
2088         }
2089     }
2090     return virtio_net_do_receive(nc, buf, size);
2091 }
2092 
2093 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2094                                   size_t size)
2095 {
2096     VirtIONet *n = qemu_get_nic_opaque(nc);
2097     if ((n->rsc4_enabled || n->rsc6_enabled)) {
2098         return virtio_net_rsc_receive(nc, buf, size);
2099     } else {
2100         return virtio_net_do_receive(nc, buf, size);
2101     }
2102 }
2103 
2104 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2105 
2106 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2107 {
2108     VirtIONet *n = qemu_get_nic_opaque(nc);
2109     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2110     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2111 
2112     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2113     virtio_notify(vdev, q->tx_vq);
2114 
2115     g_free(q->async_tx.elem);
2116     q->async_tx.elem = NULL;
2117 
2118     virtio_queue_set_notification(q->tx_vq, 1);
2119     virtio_net_flush_tx(q);
2120 }
2121 
2122 /* TX */
2123 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2124 {
2125     VirtIONet *n = q->n;
2126     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2127     VirtQueueElement *elem;
2128     int32_t num_packets = 0;
2129     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2130     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2131         return num_packets;
2132     }
2133 
2134     if (q->async_tx.elem) {
2135         virtio_queue_set_notification(q->tx_vq, 0);
2136         return num_packets;
2137     }
2138 
2139     for (;;) {
2140         ssize_t ret;
2141         unsigned int out_num;
2142         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2143         struct virtio_net_hdr_mrg_rxbuf mhdr;
2144 
2145         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2146         if (!elem) {
2147             break;
2148         }
2149 
2150         out_num = elem->out_num;
2151         out_sg = elem->out_sg;
2152         if (out_num < 1) {
2153             virtio_error(vdev, "virtio-net header not in first element");
2154             virtqueue_detach_element(q->tx_vq, elem, 0);
2155             g_free(elem);
2156             return -EINVAL;
2157         }
2158 
2159         if (n->has_vnet_hdr) {
2160             if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2161                 n->guest_hdr_len) {
2162                 virtio_error(vdev, "virtio-net header incorrect");
2163                 virtqueue_detach_element(q->tx_vq, elem, 0);
2164                 g_free(elem);
2165                 return -EINVAL;
2166             }
2167             if (n->needs_vnet_hdr_swap) {
2168                 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2169                 sg2[0].iov_base = &mhdr;
2170                 sg2[0].iov_len = n->guest_hdr_len;
2171                 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2172                                    out_sg, out_num,
2173                                    n->guest_hdr_len, -1);
2174                 if (out_num == VIRTQUEUE_MAX_SIZE) {
2175                     goto drop;
2176                 }
2177                 out_num += 1;
2178                 out_sg = sg2;
2179             }
2180         }
2181         /*
2182          * If host wants to see the guest header as is, we can
2183          * pass it on unchanged. Otherwise, copy just the parts
2184          * that host is interested in.
2185          */
2186         assert(n->host_hdr_len <= n->guest_hdr_len);
2187         if (n->host_hdr_len != n->guest_hdr_len) {
2188             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2189                                        out_sg, out_num,
2190                                        0, n->host_hdr_len);
2191             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2192                              out_sg, out_num,
2193                              n->guest_hdr_len, -1);
2194             out_num = sg_num;
2195             out_sg = sg;
2196         }
2197 
2198         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2199                                       out_sg, out_num, virtio_net_tx_complete);
2200         if (ret == 0) {
2201             virtio_queue_set_notification(q->tx_vq, 0);
2202             q->async_tx.elem = elem;
2203             return -EBUSY;
2204         }
2205 
2206 drop:
2207         virtqueue_push(q->tx_vq, elem, 0);
2208         virtio_notify(vdev, q->tx_vq);
2209         g_free(elem);
2210 
2211         if (++num_packets >= n->tx_burst) {
2212             break;
2213         }
2214     }
2215     return num_packets;
2216 }
2217 
2218 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2219 {
2220     VirtIONet *n = VIRTIO_NET(vdev);
2221     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2222 
2223     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2224         virtio_net_drop_tx_queue_data(vdev, vq);
2225         return;
2226     }
2227 
2228     /* This happens when device was stopped but VCPU wasn't. */
2229     if (!vdev->vm_running) {
2230         q->tx_waiting = 1;
2231         return;
2232     }
2233 
2234     if (q->tx_waiting) {
2235         virtio_queue_set_notification(vq, 1);
2236         timer_del(q->tx_timer);
2237         q->tx_waiting = 0;
2238         if (virtio_net_flush_tx(q) == -EINVAL) {
2239             return;
2240         }
2241     } else {
2242         timer_mod(q->tx_timer,
2243                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2244         q->tx_waiting = 1;
2245         virtio_queue_set_notification(vq, 0);
2246     }
2247 }
2248 
2249 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2250 {
2251     VirtIONet *n = VIRTIO_NET(vdev);
2252     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2253 
2254     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2255         virtio_net_drop_tx_queue_data(vdev, vq);
2256         return;
2257     }
2258 
2259     if (unlikely(q->tx_waiting)) {
2260         return;
2261     }
2262     q->tx_waiting = 1;
2263     /* This happens when device was stopped but VCPU wasn't. */
2264     if (!vdev->vm_running) {
2265         return;
2266     }
2267     virtio_queue_set_notification(vq, 0);
2268     qemu_bh_schedule(q->tx_bh);
2269 }
2270 
2271 static void virtio_net_tx_timer(void *opaque)
2272 {
2273     VirtIONetQueue *q = opaque;
2274     VirtIONet *n = q->n;
2275     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2276     /* This happens when device was stopped but BH wasn't. */
2277     if (!vdev->vm_running) {
2278         /* Make sure tx waiting is set, so we'll run when restarted. */
2279         assert(q->tx_waiting);
2280         return;
2281     }
2282 
2283     q->tx_waiting = 0;
2284 
2285     /* Just in case the driver is not ready on more */
2286     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2287         return;
2288     }
2289 
2290     virtio_queue_set_notification(q->tx_vq, 1);
2291     virtio_net_flush_tx(q);
2292 }
2293 
2294 static void virtio_net_tx_bh(void *opaque)
2295 {
2296     VirtIONetQueue *q = opaque;
2297     VirtIONet *n = q->n;
2298     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2299     int32_t ret;
2300 
2301     /* This happens when device was stopped but BH wasn't. */
2302     if (!vdev->vm_running) {
2303         /* Make sure tx waiting is set, so we'll run when restarted. */
2304         assert(q->tx_waiting);
2305         return;
2306     }
2307 
2308     q->tx_waiting = 0;
2309 
2310     /* Just in case the driver is not ready on more */
2311     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2312         return;
2313     }
2314 
2315     ret = virtio_net_flush_tx(q);
2316     if (ret == -EBUSY || ret == -EINVAL) {
2317         return; /* Notification re-enable handled by tx_complete or device
2318                  * broken */
2319     }
2320 
2321     /* If we flush a full burst of packets, assume there are
2322      * more coming and immediately reschedule */
2323     if (ret >= n->tx_burst) {
2324         qemu_bh_schedule(q->tx_bh);
2325         q->tx_waiting = 1;
2326         return;
2327     }
2328 
2329     /* If less than a full burst, re-enable notification and flush
2330      * anything that may have come in while we weren't looking.  If
2331      * we find something, assume the guest is still active and reschedule */
2332     virtio_queue_set_notification(q->tx_vq, 1);
2333     ret = virtio_net_flush_tx(q);
2334     if (ret == -EINVAL) {
2335         return;
2336     } else if (ret > 0) {
2337         virtio_queue_set_notification(q->tx_vq, 0);
2338         qemu_bh_schedule(q->tx_bh);
2339         q->tx_waiting = 1;
2340     }
2341 }
2342 
2343 static void virtio_net_add_queue(VirtIONet *n, int index)
2344 {
2345     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2346 
2347     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2348                                            virtio_net_handle_rx);
2349 
2350     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2351         n->vqs[index].tx_vq =
2352             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2353                              virtio_net_handle_tx_timer);
2354         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2355                                               virtio_net_tx_timer,
2356                                               &n->vqs[index]);
2357     } else {
2358         n->vqs[index].tx_vq =
2359             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2360                              virtio_net_handle_tx_bh);
2361         n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2362     }
2363 
2364     n->vqs[index].tx_waiting = 0;
2365     n->vqs[index].n = n;
2366 }
2367 
2368 static void virtio_net_del_queue(VirtIONet *n, int index)
2369 {
2370     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2371     VirtIONetQueue *q = &n->vqs[index];
2372     NetClientState *nc = qemu_get_subqueue(n->nic, index);
2373 
2374     qemu_purge_queued_packets(nc);
2375 
2376     virtio_del_queue(vdev, index * 2);
2377     if (q->tx_timer) {
2378         timer_del(q->tx_timer);
2379         timer_free(q->tx_timer);
2380         q->tx_timer = NULL;
2381     } else {
2382         qemu_bh_delete(q->tx_bh);
2383         q->tx_bh = NULL;
2384     }
2385     q->tx_waiting = 0;
2386     virtio_del_queue(vdev, index * 2 + 1);
2387 }
2388 
2389 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2390 {
2391     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2392     int old_num_queues = virtio_get_num_queues(vdev);
2393     int new_num_queues = new_max_queues * 2 + 1;
2394     int i;
2395 
2396     assert(old_num_queues >= 3);
2397     assert(old_num_queues % 2 == 1);
2398 
2399     if (old_num_queues == new_num_queues) {
2400         return;
2401     }
2402 
2403     /*
2404      * We always need to remove and add ctrl vq if
2405      * old_num_queues != new_num_queues. Remove ctrl_vq first,
2406      * and then we only enter one of the following two loops.
2407      */
2408     virtio_del_queue(vdev, old_num_queues - 1);
2409 
2410     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2411         /* new_num_queues < old_num_queues */
2412         virtio_net_del_queue(n, i / 2);
2413     }
2414 
2415     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2416         /* new_num_queues > old_num_queues */
2417         virtio_net_add_queue(n, i / 2);
2418     }
2419 
2420     /* add ctrl_vq last */
2421     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2422 }
2423 
2424 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2425 {
2426     int max = multiqueue ? n->max_queues : 1;
2427 
2428     n->multiqueue = multiqueue;
2429     virtio_net_change_num_queues(n, max);
2430 
2431     virtio_net_set_queues(n);
2432 }
2433 
2434 static int virtio_net_post_load_device(void *opaque, int version_id)
2435 {
2436     VirtIONet *n = opaque;
2437     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2438     int i, link_down;
2439 
2440     trace_virtio_net_post_load_device();
2441     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2442                                virtio_vdev_has_feature(vdev,
2443                                                        VIRTIO_F_VERSION_1));
2444 
2445     /* MAC_TABLE_ENTRIES may be different from the saved image */
2446     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2447         n->mac_table.in_use = 0;
2448     }
2449 
2450     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2451         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2452     }
2453 
2454     /*
2455      * curr_guest_offloads will be later overwritten by the
2456      * virtio_set_features_nocheck call done from the virtio_load.
2457      * Here we make sure it is preserved and restored accordingly
2458      * in the virtio_net_post_load_virtio callback.
2459      */
2460     n->saved_guest_offloads = n->curr_guest_offloads;
2461 
2462     virtio_net_set_queues(n);
2463 
2464     /* Find the first multicast entry in the saved MAC filter */
2465     for (i = 0; i < n->mac_table.in_use; i++) {
2466         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2467             break;
2468         }
2469     }
2470     n->mac_table.first_multi = i;
2471 
2472     /* nc.link_down can't be migrated, so infer link_down according
2473      * to link status bit in n->status */
2474     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2475     for (i = 0; i < n->max_queues; i++) {
2476         qemu_get_subqueue(n->nic, i)->link_down = link_down;
2477     }
2478 
2479     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2480         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2481         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2482                                   QEMU_CLOCK_VIRTUAL,
2483                                   virtio_net_announce_timer, n);
2484         if (n->announce_timer.round) {
2485             timer_mod(n->announce_timer.tm,
2486                       qemu_clock_get_ms(n->announce_timer.type));
2487         } else {
2488             qemu_announce_timer_del(&n->announce_timer, false);
2489         }
2490     }
2491 
2492     return 0;
2493 }
2494 
2495 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2496 {
2497     VirtIONet *n = VIRTIO_NET(vdev);
2498     /*
2499      * The actual needed state is now in saved_guest_offloads,
2500      * see virtio_net_post_load_device for detail.
2501      * Restore it back and apply the desired offloads.
2502      */
2503     n->curr_guest_offloads = n->saved_guest_offloads;
2504     if (peer_has_vnet_hdr(n)) {
2505         virtio_net_apply_guest_offloads(n);
2506     }
2507 
2508     return 0;
2509 }
2510 
2511 /* tx_waiting field of a VirtIONetQueue */
2512 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2513     .name = "virtio-net-queue-tx_waiting",
2514     .fields = (VMStateField[]) {
2515         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2516         VMSTATE_END_OF_LIST()
2517    },
2518 };
2519 
2520 static bool max_queues_gt_1(void *opaque, int version_id)
2521 {
2522     return VIRTIO_NET(opaque)->max_queues > 1;
2523 }
2524 
2525 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2526 {
2527     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2528                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2529 }
2530 
2531 static bool mac_table_fits(void *opaque, int version_id)
2532 {
2533     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2534 }
2535 
2536 static bool mac_table_doesnt_fit(void *opaque, int version_id)
2537 {
2538     return !mac_table_fits(opaque, version_id);
2539 }
2540 
2541 /* This temporary type is shared by all the WITH_TMP methods
2542  * although only some fields are used by each.
2543  */
2544 struct VirtIONetMigTmp {
2545     VirtIONet      *parent;
2546     VirtIONetQueue *vqs_1;
2547     uint16_t        curr_queues_1;
2548     uint8_t         has_ufo;
2549     uint32_t        has_vnet_hdr;
2550 };
2551 
2552 /* The 2nd and subsequent tx_waiting flags are loaded later than
2553  * the 1st entry in the queues and only if there's more than one
2554  * entry.  We use the tmp mechanism to calculate a temporary
2555  * pointer and count and also validate the count.
2556  */
2557 
2558 static int virtio_net_tx_waiting_pre_save(void *opaque)
2559 {
2560     struct VirtIONetMigTmp *tmp = opaque;
2561 
2562     tmp->vqs_1 = tmp->parent->vqs + 1;
2563     tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2564     if (tmp->parent->curr_queues == 0) {
2565         tmp->curr_queues_1 = 0;
2566     }
2567 
2568     return 0;
2569 }
2570 
2571 static int virtio_net_tx_waiting_pre_load(void *opaque)
2572 {
2573     struct VirtIONetMigTmp *tmp = opaque;
2574 
2575     /* Reuse the pointer setup from save */
2576     virtio_net_tx_waiting_pre_save(opaque);
2577 
2578     if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2579         error_report("virtio-net: curr_queues %x > max_queues %x",
2580             tmp->parent->curr_queues, tmp->parent->max_queues);
2581 
2582         return -EINVAL;
2583     }
2584 
2585     return 0; /* all good */
2586 }
2587 
2588 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2589     .name      = "virtio-net-tx_waiting",
2590     .pre_load  = virtio_net_tx_waiting_pre_load,
2591     .pre_save  = virtio_net_tx_waiting_pre_save,
2592     .fields    = (VMStateField[]) {
2593         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2594                                      curr_queues_1,
2595                                      vmstate_virtio_net_queue_tx_waiting,
2596                                      struct VirtIONetQueue),
2597         VMSTATE_END_OF_LIST()
2598     },
2599 };
2600 
2601 /* the 'has_ufo' flag is just tested; if the incoming stream has the
2602  * flag set we need to check that we have it
2603  */
2604 static int virtio_net_ufo_post_load(void *opaque, int version_id)
2605 {
2606     struct VirtIONetMigTmp *tmp = opaque;
2607 
2608     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
2609         error_report("virtio-net: saved image requires TUN_F_UFO support");
2610         return -EINVAL;
2611     }
2612 
2613     return 0;
2614 }
2615 
2616 static int virtio_net_ufo_pre_save(void *opaque)
2617 {
2618     struct VirtIONetMigTmp *tmp = opaque;
2619 
2620     tmp->has_ufo = tmp->parent->has_ufo;
2621 
2622     return 0;
2623 }
2624 
2625 static const VMStateDescription vmstate_virtio_net_has_ufo = {
2626     .name      = "virtio-net-ufo",
2627     .post_load = virtio_net_ufo_post_load,
2628     .pre_save  = virtio_net_ufo_pre_save,
2629     .fields    = (VMStateField[]) {
2630         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
2631         VMSTATE_END_OF_LIST()
2632     },
2633 };
2634 
2635 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2636  * flag set we need to check that we have it
2637  */
2638 static int virtio_net_vnet_post_load(void *opaque, int version_id)
2639 {
2640     struct VirtIONetMigTmp *tmp = opaque;
2641 
2642     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
2643         error_report("virtio-net: saved image requires vnet_hdr=on");
2644         return -EINVAL;
2645     }
2646 
2647     return 0;
2648 }
2649 
2650 static int virtio_net_vnet_pre_save(void *opaque)
2651 {
2652     struct VirtIONetMigTmp *tmp = opaque;
2653 
2654     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
2655 
2656     return 0;
2657 }
2658 
2659 static const VMStateDescription vmstate_virtio_net_has_vnet = {
2660     .name      = "virtio-net-vnet",
2661     .post_load = virtio_net_vnet_post_load,
2662     .pre_save  = virtio_net_vnet_pre_save,
2663     .fields    = (VMStateField[]) {
2664         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
2665         VMSTATE_END_OF_LIST()
2666     },
2667 };
2668 
2669 static const VMStateDescription vmstate_virtio_net_device = {
2670     .name = "virtio-net-device",
2671     .version_id = VIRTIO_NET_VM_VERSION,
2672     .minimum_version_id = VIRTIO_NET_VM_VERSION,
2673     .post_load = virtio_net_post_load_device,
2674     .fields = (VMStateField[]) {
2675         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
2676         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
2677                                vmstate_virtio_net_queue_tx_waiting,
2678                                VirtIONetQueue),
2679         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
2680         VMSTATE_UINT16(status, VirtIONet),
2681         VMSTATE_UINT8(promisc, VirtIONet),
2682         VMSTATE_UINT8(allmulti, VirtIONet),
2683         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
2684 
2685         /* Guarded pair: If it fits we load it, else we throw it away
2686          * - can happen if source has a larger MAC table.; post-load
2687          *  sets flags in this case.
2688          */
2689         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
2690                                 0, mac_table_fits, mac_table.in_use,
2691                                  ETH_ALEN),
2692         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
2693                                      mac_table.in_use, ETH_ALEN),
2694 
2695         /* Note: This is an array of uint32's that's always been saved as a
2696          * buffer; hold onto your endiannesses; it's actually used as a bitmap
2697          * but based on the uint.
2698          */
2699         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
2700         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2701                          vmstate_virtio_net_has_vnet),
2702         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
2703         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
2704         VMSTATE_UINT8(alluni, VirtIONet),
2705         VMSTATE_UINT8(nomulti, VirtIONet),
2706         VMSTATE_UINT8(nouni, VirtIONet),
2707         VMSTATE_UINT8(nobcast, VirtIONet),
2708         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2709                          vmstate_virtio_net_has_ufo),
2710         VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
2711                             vmstate_info_uint16_equal, uint16_t),
2712         VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
2713         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2714                          vmstate_virtio_net_tx_waiting),
2715         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
2716                             has_ctrl_guest_offloads),
2717         VMSTATE_END_OF_LIST()
2718    },
2719 };
2720 
2721 static NetClientInfo net_virtio_info = {
2722     .type = NET_CLIENT_DRIVER_NIC,
2723     .size = sizeof(NICState),
2724     .can_receive = virtio_net_can_receive,
2725     .receive = virtio_net_receive,
2726     .link_status_changed = virtio_net_set_link_status,
2727     .query_rx_filter = virtio_net_query_rxfilter,
2728     .announce = virtio_net_announce,
2729 };
2730 
2731 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
2732 {
2733     VirtIONet *n = VIRTIO_NET(vdev);
2734     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
2735     assert(n->vhost_started);
2736     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
2737 }
2738 
2739 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
2740                                            bool mask)
2741 {
2742     VirtIONet *n = VIRTIO_NET(vdev);
2743     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
2744     assert(n->vhost_started);
2745     vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
2746                              vdev, idx, mask);
2747 }
2748 
2749 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
2750 {
2751     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
2752 
2753     n->config_size = virtio_feature_get_config_size(feature_sizes,
2754                                                     host_features);
2755 }
2756 
2757 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
2758                                    const char *type)
2759 {
2760     /*
2761      * The name can be NULL, the netclient name will be type.x.
2762      */
2763     assert(type != NULL);
2764 
2765     g_free(n->netclient_name);
2766     g_free(n->netclient_type);
2767     n->netclient_name = g_strdup(name);
2768     n->netclient_type = g_strdup(type);
2769 }
2770 
2771 static bool failover_unplug_primary(VirtIONet *n)
2772 {
2773     HotplugHandler *hotplug_ctrl;
2774     PCIDevice *pci_dev;
2775     Error *err = NULL;
2776 
2777     hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
2778     if (hotplug_ctrl) {
2779         pci_dev = PCI_DEVICE(n->primary_dev);
2780         pci_dev->partially_hotplugged = true;
2781         hotplug_handler_unplug_request(hotplug_ctrl, n->primary_dev, &err);
2782         if (err) {
2783             error_report_err(err);
2784             return false;
2785         }
2786     } else {
2787         return false;
2788     }
2789     return true;
2790 }
2791 
2792 static bool failover_replug_primary(VirtIONet *n, Error **errp)
2793 {
2794     HotplugHandler *hotplug_ctrl;
2795     PCIDevice *pdev = PCI_DEVICE(n->primary_dev);
2796 
2797     if (!pdev->partially_hotplugged) {
2798         return true;
2799     }
2800     if (!n->primary_device_opts) {
2801         n->primary_device_opts = qemu_opts_from_qdict(
2802                 qemu_find_opts("device"),
2803                 n->primary_device_dict, errp);
2804     }
2805     if (n->primary_device_opts) {
2806         if (n->primary_dev) {
2807             n->primary_bus = n->primary_dev->parent_bus;
2808         }
2809         qdev_set_parent_bus(n->primary_dev, n->primary_bus);
2810         n->primary_should_be_hidden = false;
2811         qemu_opt_set_bool(n->primary_device_opts,
2812                 "partially_hotplugged", true, errp);
2813         hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
2814         if (hotplug_ctrl) {
2815             hotplug_handler_pre_plug(hotplug_ctrl, n->primary_dev, errp);
2816             hotplug_handler_plug(hotplug_ctrl, n->primary_dev, errp);
2817         }
2818         if (!n->primary_dev) {
2819             error_setg(errp, "virtio_net: couldn't find primary device");
2820         }
2821     }
2822     return *errp != NULL;
2823 }
2824 
2825 static void virtio_net_handle_migration_primary(VirtIONet *n,
2826                                                 MigrationState *s)
2827 {
2828     bool should_be_hidden;
2829     Error *err = NULL;
2830 
2831     should_be_hidden = atomic_read(&n->primary_should_be_hidden);
2832 
2833     if (!n->primary_dev) {
2834         n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err);
2835         if (!n->primary_dev) {
2836             return;
2837         }
2838     }
2839 
2840     if (migration_in_setup(s) && !should_be_hidden &&
2841         n->primary_dev) {
2842         if (failover_unplug_primary(n)) {
2843             vmstate_unregister(n->primary_dev, qdev_get_vmsd(n->primary_dev),
2844                     n->primary_dev);
2845             qapi_event_send_unplug_primary(n->primary_device_id);
2846             atomic_set(&n->primary_should_be_hidden, true);
2847         } else {
2848             warn_report("couldn't unplug primary device");
2849         }
2850     } else if (migration_has_failed(s)) {
2851         /* We already unplugged the device let's plugged it back */
2852         if (!failover_replug_primary(n, &err)) {
2853             if (err) {
2854                 error_report_err(err);
2855             }
2856         }
2857     }
2858 }
2859 
2860 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
2861 {
2862     MigrationState *s = data;
2863     VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
2864     virtio_net_handle_migration_primary(n, s);
2865 }
2866 
2867 static int virtio_net_primary_should_be_hidden(DeviceListener *listener,
2868             QemuOpts *device_opts)
2869 {
2870     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
2871     bool match_found;
2872     bool hide;
2873 
2874     n->primary_device_dict = qemu_opts_to_qdict(device_opts,
2875             n->primary_device_dict);
2876     if (n->primary_device_dict) {
2877         g_free(n->standby_id);
2878         n->standby_id = g_strdup(qdict_get_try_str(n->primary_device_dict,
2879                     "failover_pair_id"));
2880     }
2881     if (device_opts && g_strcmp0(n->standby_id, n->netclient_name) == 0) {
2882         match_found = true;
2883     } else {
2884         match_found = false;
2885         hide = false;
2886         g_free(n->standby_id);
2887         n->primary_device_dict = NULL;
2888         goto out;
2889     }
2890 
2891     n->primary_device_opts = device_opts;
2892 
2893     /* primary_should_be_hidden is set during feature negotiation */
2894     hide = atomic_read(&n->primary_should_be_hidden);
2895 
2896     if (n->primary_device_dict) {
2897         g_free(n->primary_device_id);
2898         n->primary_device_id = g_strdup(qdict_get_try_str(
2899                     n->primary_device_dict, "id"));
2900         if (!n->primary_device_id) {
2901             warn_report("primary_device_id not set");
2902         }
2903     }
2904 
2905 out:
2906     if (match_found && hide) {
2907         return 1;
2908     } else if (match_found && !hide) {
2909         return 0;
2910     } else {
2911         return -1;
2912     }
2913 }
2914 
2915 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
2916 {
2917     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2918     VirtIONet *n = VIRTIO_NET(dev);
2919     NetClientState *nc;
2920     int i;
2921 
2922     if (n->net_conf.mtu) {
2923         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
2924     }
2925 
2926     if (n->net_conf.duplex_str) {
2927         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
2928             n->net_conf.duplex = DUPLEX_HALF;
2929         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
2930             n->net_conf.duplex = DUPLEX_FULL;
2931         } else {
2932             error_setg(errp, "'duplex' must be 'half' or 'full'");
2933         }
2934         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
2935     } else {
2936         n->net_conf.duplex = DUPLEX_UNKNOWN;
2937     }
2938 
2939     if (n->net_conf.speed < SPEED_UNKNOWN) {
2940         error_setg(errp, "'speed' must be between 0 and INT_MAX");
2941     } else if (n->net_conf.speed >= 0) {
2942         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
2943     }
2944 
2945     if (n->failover) {
2946         n->primary_listener.should_be_hidden =
2947             virtio_net_primary_should_be_hidden;
2948         atomic_set(&n->primary_should_be_hidden, true);
2949         device_listener_register(&n->primary_listener);
2950         n->migration_state.notify = virtio_net_migration_state_notifier;
2951         add_migration_state_change_notifier(&n->migration_state);
2952         n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
2953     }
2954 
2955     virtio_net_set_config_size(n, n->host_features);
2956     virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
2957 
2958     /*
2959      * We set a lower limit on RX queue size to what it always was.
2960      * Guests that want a smaller ring can always resize it without
2961      * help from us (using virtio 1 and up).
2962      */
2963     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
2964         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
2965         !is_power_of_2(n->net_conf.rx_queue_size)) {
2966         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
2967                    "must be a power of 2 between %d and %d.",
2968                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
2969                    VIRTQUEUE_MAX_SIZE);
2970         virtio_cleanup(vdev);
2971         return;
2972     }
2973 
2974     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
2975         n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
2976         !is_power_of_2(n->net_conf.tx_queue_size)) {
2977         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
2978                    "must be a power of 2 between %d and %d",
2979                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
2980                    VIRTQUEUE_MAX_SIZE);
2981         virtio_cleanup(vdev);
2982         return;
2983     }
2984 
2985     n->max_queues = MAX(n->nic_conf.peers.queues, 1);
2986     if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
2987         error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
2988                    "must be a positive integer less than %d.",
2989                    n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
2990         virtio_cleanup(vdev);
2991         return;
2992     }
2993     n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
2994     n->curr_queues = 1;
2995     n->tx_timeout = n->net_conf.txtimer;
2996 
2997     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
2998                        && strcmp(n->net_conf.tx, "bh")) {
2999         warn_report("virtio-net: "
3000                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3001                     n->net_conf.tx);
3002         error_printf("Defaulting to \"bh\"");
3003     }
3004 
3005     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3006                                     n->net_conf.tx_queue_size);
3007 
3008     for (i = 0; i < n->max_queues; i++) {
3009         virtio_net_add_queue(n, i);
3010     }
3011 
3012     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3013     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3014     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3015     n->status = VIRTIO_NET_S_LINK_UP;
3016     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3017                               QEMU_CLOCK_VIRTUAL,
3018                               virtio_net_announce_timer, n);
3019     n->announce_timer.round = 0;
3020 
3021     if (n->netclient_type) {
3022         /*
3023          * Happen when virtio_net_set_netclient_name has been called.
3024          */
3025         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3026                               n->netclient_type, n->netclient_name, n);
3027     } else {
3028         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3029                               object_get_typename(OBJECT(dev)), dev->id, n);
3030     }
3031 
3032     peer_test_vnet_hdr(n);
3033     if (peer_has_vnet_hdr(n)) {
3034         for (i = 0; i < n->max_queues; i++) {
3035             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3036         }
3037         n->host_hdr_len = sizeof(struct virtio_net_hdr);
3038     } else {
3039         n->host_hdr_len = 0;
3040     }
3041 
3042     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3043 
3044     n->vqs[0].tx_waiting = 0;
3045     n->tx_burst = n->net_conf.txburst;
3046     virtio_net_set_mrg_rx_bufs(n, 0, 0);
3047     n->promisc = 1; /* for compatibility */
3048 
3049     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3050 
3051     n->vlans = g_malloc0(MAX_VLAN >> 3);
3052 
3053     nc = qemu_get_queue(n->nic);
3054     nc->rxfilter_notify_enabled = 1;
3055 
3056     QTAILQ_INIT(&n->rsc_chains);
3057     n->qdev = dev;
3058 }
3059 
3060 static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
3061 {
3062     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3063     VirtIONet *n = VIRTIO_NET(dev);
3064     int i, max_queues;
3065 
3066     /* This will stop vhost backend if appropriate. */
3067     virtio_net_set_status(vdev, 0);
3068 
3069     g_free(n->netclient_name);
3070     n->netclient_name = NULL;
3071     g_free(n->netclient_type);
3072     n->netclient_type = NULL;
3073 
3074     g_free(n->mac_table.macs);
3075     g_free(n->vlans);
3076 
3077     if (n->failover) {
3078         g_free(n->primary_device_id);
3079         g_free(n->standby_id);
3080         qobject_unref(n->primary_device_dict);
3081         n->primary_device_dict = NULL;
3082     }
3083 
3084     max_queues = n->multiqueue ? n->max_queues : 1;
3085     for (i = 0; i < max_queues; i++) {
3086         virtio_net_del_queue(n, i);
3087     }
3088 
3089     qemu_announce_timer_del(&n->announce_timer, false);
3090     g_free(n->vqs);
3091     qemu_del_nic(n->nic);
3092     virtio_net_rsc_cleanup(n);
3093     virtio_cleanup(vdev);
3094 }
3095 
3096 static void virtio_net_instance_init(Object *obj)
3097 {
3098     VirtIONet *n = VIRTIO_NET(obj);
3099 
3100     /*
3101      * The default config_size is sizeof(struct virtio_net_config).
3102      * Can be overriden with virtio_net_set_config_size.
3103      */
3104     n->config_size = sizeof(struct virtio_net_config);
3105     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3106                                   "bootindex", "/ethernet-phy@0",
3107                                   DEVICE(n), NULL);
3108 }
3109 
3110 static int virtio_net_pre_save(void *opaque)
3111 {
3112     VirtIONet *n = opaque;
3113 
3114     /* At this point, backend must be stopped, otherwise
3115      * it might keep writing to memory. */
3116     assert(!n->vhost_started);
3117 
3118     return 0;
3119 }
3120 
3121 static bool primary_unplug_pending(void *opaque)
3122 {
3123     DeviceState *dev = opaque;
3124     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3125     VirtIONet *n = VIRTIO_NET(vdev);
3126 
3127     return n->primary_dev ? n->primary_dev->pending_deleted_event : false;
3128 }
3129 
3130 static bool dev_unplug_pending(void *opaque)
3131 {
3132     DeviceState *dev = opaque;
3133     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3134 
3135     return vdc->primary_unplug_pending(dev);
3136 }
3137 
3138 static const VMStateDescription vmstate_virtio_net = {
3139     .name = "virtio-net",
3140     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3141     .version_id = VIRTIO_NET_VM_VERSION,
3142     .fields = (VMStateField[]) {
3143         VMSTATE_VIRTIO_DEVICE,
3144         VMSTATE_END_OF_LIST()
3145     },
3146     .pre_save = virtio_net_pre_save,
3147     .dev_unplug_pending = dev_unplug_pending,
3148 };
3149 
3150 static Property virtio_net_properties[] = {
3151     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3152                     VIRTIO_NET_F_CSUM, true),
3153     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3154                     VIRTIO_NET_F_GUEST_CSUM, true),
3155     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3156     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3157                     VIRTIO_NET_F_GUEST_TSO4, true),
3158     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3159                     VIRTIO_NET_F_GUEST_TSO6, true),
3160     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3161                     VIRTIO_NET_F_GUEST_ECN, true),
3162     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3163                     VIRTIO_NET_F_GUEST_UFO, true),
3164     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3165                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3166     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3167                     VIRTIO_NET_F_HOST_TSO4, true),
3168     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3169                     VIRTIO_NET_F_HOST_TSO6, true),
3170     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3171                     VIRTIO_NET_F_HOST_ECN, true),
3172     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3173                     VIRTIO_NET_F_HOST_UFO, true),
3174     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3175                     VIRTIO_NET_F_MRG_RXBUF, true),
3176     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3177                     VIRTIO_NET_F_STATUS, true),
3178     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3179                     VIRTIO_NET_F_CTRL_VQ, true),
3180     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3181                     VIRTIO_NET_F_CTRL_RX, true),
3182     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3183                     VIRTIO_NET_F_CTRL_VLAN, true),
3184     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3185                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3186     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3187                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3188     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3189                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3190     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3191     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3192                     VIRTIO_NET_F_RSC_EXT, false),
3193     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3194                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3195     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3196     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3197                        TX_TIMER_INTERVAL),
3198     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3199     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3200     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3201                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3202     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3203                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3204     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3205     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3206                      true),
3207     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3208     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3209     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3210     DEFINE_PROP_END_OF_LIST(),
3211 };
3212 
3213 static void virtio_net_class_init(ObjectClass *klass, void *data)
3214 {
3215     DeviceClass *dc = DEVICE_CLASS(klass);
3216     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3217 
3218     dc->props = virtio_net_properties;
3219     dc->vmsd = &vmstate_virtio_net;
3220     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3221     vdc->realize = virtio_net_device_realize;
3222     vdc->unrealize = virtio_net_device_unrealize;
3223     vdc->get_config = virtio_net_get_config;
3224     vdc->set_config = virtio_net_set_config;
3225     vdc->get_features = virtio_net_get_features;
3226     vdc->set_features = virtio_net_set_features;
3227     vdc->bad_features = virtio_net_bad_features;
3228     vdc->reset = virtio_net_reset;
3229     vdc->set_status = virtio_net_set_status;
3230     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3231     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3232     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3233     vdc->post_load = virtio_net_post_load_virtio;
3234     vdc->vmsd = &vmstate_virtio_net_device;
3235     vdc->primary_unplug_pending = primary_unplug_pending;
3236 }
3237 
3238 static const TypeInfo virtio_net_info = {
3239     .name = TYPE_VIRTIO_NET,
3240     .parent = TYPE_VIRTIO_DEVICE,
3241     .instance_size = sizeof(VirtIONet),
3242     .instance_init = virtio_net_instance_init,
3243     .class_init = virtio_net_class_init,
3244 };
3245 
3246 static void virtio_register_types(void)
3247 {
3248     type_register_static(&virtio_net_info);
3249 }
3250 
3251 type_init(virtio_register_types)
3252