xref: /openbmc/qemu/hw/net/virtio-net.c (revision f7160f32)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/main-loop.h"
18 #include "qemu/module.h"
19 #include "hw/virtio/virtio.h"
20 #include "net/net.h"
21 #include "net/checksum.h"
22 #include "net/tap.h"
23 #include "qemu/error-report.h"
24 #include "qemu/timer.h"
25 #include "qemu/option.h"
26 #include "qemu/option_int.h"
27 #include "qemu/config-file.h"
28 #include "qapi/qmp/qdict.h"
29 #include "hw/virtio/virtio-net.h"
30 #include "net/vhost_net.h"
31 #include "net/announce.h"
32 #include "hw/virtio/virtio-bus.h"
33 #include "qapi/error.h"
34 #include "qapi/qapi-events-net.h"
35 #include "hw/qdev-properties.h"
36 #include "qapi/qapi-types-migration.h"
37 #include "qapi/qapi-events-migration.h"
38 #include "hw/virtio/virtio-access.h"
39 #include "migration/misc.h"
40 #include "standard-headers/linux/ethtool.h"
41 #include "sysemu/sysemu.h"
42 #include "trace.h"
43 #include "monitor/qdev.h"
44 #include "hw/pci/pci.h"
45 #include "net_rx_pkt.h"
46 #include "hw/virtio/vhost.h"
47 
48 #define VIRTIO_NET_VM_VERSION    11
49 
50 #define MAC_TABLE_ENTRIES    64
51 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
52 
53 /* previously fixed value */
54 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
55 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
56 
57 /* for now, only allow larger queues; with virtio-1, guest can downsize */
58 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
59 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
60 
61 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
62 
63 #define VIRTIO_NET_TCP_FLAG         0x3F
64 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
65 
66 /* IPv4 max payload, 16 bits in the header */
67 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
68 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
69 
70 /* header length value in ip header without option */
71 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
72 
73 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
74 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
75 
76 /* Purge coalesced packets timer interval, This value affects the performance
77    a lot, and should be tuned carefully, '300000'(300us) is the recommended
78    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
79    tso/gso/gro 'off'. */
80 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
81 
82 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
83                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
84                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
85                                          VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
86                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
87                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
88                                          VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
89                                          VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
90                                          VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
91 
92 static VirtIOFeature feature_sizes[] = {
93     {.flags = 1ULL << VIRTIO_NET_F_MAC,
94      .end = endof(struct virtio_net_config, mac)},
95     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
96      .end = endof(struct virtio_net_config, status)},
97     {.flags = 1ULL << VIRTIO_NET_F_MQ,
98      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
99     {.flags = 1ULL << VIRTIO_NET_F_MTU,
100      .end = endof(struct virtio_net_config, mtu)},
101     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
102      .end = endof(struct virtio_net_config, duplex)},
103     {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
104      .end = endof(struct virtio_net_config, supported_hash_types)},
105     {}
106 };
107 
108 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
109 {
110     VirtIONet *n = qemu_get_nic_opaque(nc);
111 
112     return &n->vqs[nc->queue_index];
113 }
114 
115 static int vq2q(int queue_index)
116 {
117     return queue_index / 2;
118 }
119 
120 /* TODO
121  * - we could suppress RX interrupt if we were so inclined.
122  */
123 
124 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
125 {
126     VirtIONet *n = VIRTIO_NET(vdev);
127     struct virtio_net_config netcfg;
128     NetClientState *nc = qemu_get_queue(n->nic);
129 
130     int ret = 0;
131     memset(&netcfg, 0 , sizeof(struct virtio_net_config));
132     virtio_stw_p(vdev, &netcfg.status, n->status);
133     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
134     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
135     memcpy(netcfg.mac, n->mac, ETH_ALEN);
136     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
137     netcfg.duplex = n->net_conf.duplex;
138     netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
139     virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
140                  virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
141                  VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
142     virtio_stl_p(vdev, &netcfg.supported_hash_types,
143                  VIRTIO_NET_RSS_SUPPORTED_HASHES);
144     memcpy(config, &netcfg, n->config_size);
145 
146     /*
147      * Is this VDPA? No peer means not VDPA: there's no way to
148      * disconnect/reconnect a VDPA peer.
149      */
150     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
151         ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
152                                    n->config_size);
153         if (ret != -1) {
154             memcpy(config, &netcfg, n->config_size);
155         }
156     }
157 }
158 
159 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
160 {
161     VirtIONet *n = VIRTIO_NET(vdev);
162     struct virtio_net_config netcfg = {};
163     NetClientState *nc = qemu_get_queue(n->nic);
164 
165     memcpy(&netcfg, config, n->config_size);
166 
167     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
168         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
169         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
170         memcpy(n->mac, netcfg.mac, ETH_ALEN);
171         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
172     }
173 
174     /*
175      * Is this VDPA? No peer means not VDPA: there's no way to
176      * disconnect/reconnect a VDPA peer.
177      */
178     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
179         vhost_net_set_config(get_vhost_net(nc->peer),
180                              (uint8_t *)&netcfg, 0, n->config_size,
181                              VHOST_SET_CONFIG_TYPE_MASTER);
182       }
183 }
184 
185 static bool virtio_net_started(VirtIONet *n, uint8_t status)
186 {
187     VirtIODevice *vdev = VIRTIO_DEVICE(n);
188     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
189         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
190 }
191 
192 static void virtio_net_announce_notify(VirtIONet *net)
193 {
194     VirtIODevice *vdev = VIRTIO_DEVICE(net);
195     trace_virtio_net_announce_notify();
196 
197     net->status |= VIRTIO_NET_S_ANNOUNCE;
198     virtio_notify_config(vdev);
199 }
200 
201 static void virtio_net_announce_timer(void *opaque)
202 {
203     VirtIONet *n = opaque;
204     trace_virtio_net_announce_timer(n->announce_timer.round);
205 
206     n->announce_timer.round--;
207     virtio_net_announce_notify(n);
208 }
209 
210 static void virtio_net_announce(NetClientState *nc)
211 {
212     VirtIONet *n = qemu_get_nic_opaque(nc);
213     VirtIODevice *vdev = VIRTIO_DEVICE(n);
214 
215     /*
216      * Make sure the virtio migration announcement timer isn't running
217      * If it is, let it trigger announcement so that we do not cause
218      * confusion.
219      */
220     if (n->announce_timer.round) {
221         return;
222     }
223 
224     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
225         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
226             virtio_net_announce_notify(n);
227     }
228 }
229 
230 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
231 {
232     VirtIODevice *vdev = VIRTIO_DEVICE(n);
233     NetClientState *nc = qemu_get_queue(n->nic);
234     int queues = n->multiqueue ? n->max_queues : 1;
235 
236     if (!get_vhost_net(nc->peer)) {
237         return;
238     }
239 
240     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
241         !!n->vhost_started) {
242         return;
243     }
244     if (!n->vhost_started) {
245         int r, i;
246 
247         if (n->needs_vnet_hdr_swap) {
248             error_report("backend does not support %s vnet headers; "
249                          "falling back on userspace virtio",
250                          virtio_is_big_endian(vdev) ? "BE" : "LE");
251             return;
252         }
253 
254         /* Any packets outstanding? Purge them to avoid touching rings
255          * when vhost is running.
256          */
257         for (i = 0;  i < queues; i++) {
258             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
259 
260             /* Purge both directions: TX and RX. */
261             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
262             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
263         }
264 
265         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
266             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
267             if (r < 0) {
268                 error_report("%uBytes MTU not supported by the backend",
269                              n->net_conf.mtu);
270 
271                 return;
272             }
273         }
274 
275         n->vhost_started = 1;
276         r = vhost_net_start(vdev, n->nic->ncs, queues);
277         if (r < 0) {
278             error_report("unable to start vhost net: %d: "
279                          "falling back on userspace virtio", -r);
280             n->vhost_started = 0;
281         }
282     } else {
283         vhost_net_stop(vdev, n->nic->ncs, queues);
284         n->vhost_started = 0;
285     }
286 }
287 
288 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
289                                           NetClientState *peer,
290                                           bool enable)
291 {
292     if (virtio_is_big_endian(vdev)) {
293         return qemu_set_vnet_be(peer, enable);
294     } else {
295         return qemu_set_vnet_le(peer, enable);
296     }
297 }
298 
299 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
300                                        int queues, bool enable)
301 {
302     int i;
303 
304     for (i = 0; i < queues; i++) {
305         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
306             enable) {
307             while (--i >= 0) {
308                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
309             }
310 
311             return true;
312         }
313     }
314 
315     return false;
316 }
317 
318 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
319 {
320     VirtIODevice *vdev = VIRTIO_DEVICE(n);
321     int queues = n->multiqueue ? n->max_queues : 1;
322 
323     if (virtio_net_started(n, status)) {
324         /* Before using the device, we tell the network backend about the
325          * endianness to use when parsing vnet headers. If the backend
326          * can't do it, we fallback onto fixing the headers in the core
327          * virtio-net code.
328          */
329         n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
330                                                             queues, true);
331     } else if (virtio_net_started(n, vdev->status)) {
332         /* After using the device, we need to reset the network backend to
333          * the default (guest native endianness), otherwise the guest may
334          * lose network connectivity if it is rebooted into a different
335          * endianness.
336          */
337         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
338     }
339 }
340 
341 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
342 {
343     unsigned int dropped = virtqueue_drop_all(vq);
344     if (dropped) {
345         virtio_notify(vdev, vq);
346     }
347 }
348 
349 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
350 {
351     VirtIONet *n = VIRTIO_NET(vdev);
352     VirtIONetQueue *q;
353     int i;
354     uint8_t queue_status;
355 
356     virtio_net_vnet_endian_status(n, status);
357     virtio_net_vhost_status(n, status);
358 
359     for (i = 0; i < n->max_queues; i++) {
360         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
361         bool queue_started;
362         q = &n->vqs[i];
363 
364         if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
365             queue_status = 0;
366         } else {
367             queue_status = status;
368         }
369         queue_started =
370             virtio_net_started(n, queue_status) && !n->vhost_started;
371 
372         if (queue_started) {
373             qemu_flush_queued_packets(ncs);
374         }
375 
376         if (!q->tx_waiting) {
377             continue;
378         }
379 
380         if (queue_started) {
381             if (q->tx_timer) {
382                 timer_mod(q->tx_timer,
383                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
384             } else {
385                 qemu_bh_schedule(q->tx_bh);
386             }
387         } else {
388             if (q->tx_timer) {
389                 timer_del(q->tx_timer);
390             } else {
391                 qemu_bh_cancel(q->tx_bh);
392             }
393             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
394                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
395                 vdev->vm_running) {
396                 /* if tx is waiting we are likely have some packets in tx queue
397                  * and disabled notification */
398                 q->tx_waiting = 0;
399                 virtio_queue_set_notification(q->tx_vq, 1);
400                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
401             }
402         }
403     }
404 }
405 
406 static void virtio_net_set_link_status(NetClientState *nc)
407 {
408     VirtIONet *n = qemu_get_nic_opaque(nc);
409     VirtIODevice *vdev = VIRTIO_DEVICE(n);
410     uint16_t old_status = n->status;
411 
412     if (nc->link_down)
413         n->status &= ~VIRTIO_NET_S_LINK_UP;
414     else
415         n->status |= VIRTIO_NET_S_LINK_UP;
416 
417     if (n->status != old_status)
418         virtio_notify_config(vdev);
419 
420     virtio_net_set_status(vdev, vdev->status);
421 }
422 
423 static void rxfilter_notify(NetClientState *nc)
424 {
425     VirtIONet *n = qemu_get_nic_opaque(nc);
426 
427     if (nc->rxfilter_notify_enabled) {
428         char *path = object_get_canonical_path(OBJECT(n->qdev));
429         qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
430                                               n->netclient_name, path);
431         g_free(path);
432 
433         /* disable event notification to avoid events flooding */
434         nc->rxfilter_notify_enabled = 0;
435     }
436 }
437 
438 static intList *get_vlan_table(VirtIONet *n)
439 {
440     intList *list, *entry;
441     int i, j;
442 
443     list = NULL;
444     for (i = 0; i < MAX_VLAN >> 5; i++) {
445         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
446             if (n->vlans[i] & (1U << j)) {
447                 entry = g_malloc0(sizeof(*entry));
448                 entry->value = (i << 5) + j;
449                 entry->next = list;
450                 list = entry;
451             }
452         }
453     }
454 
455     return list;
456 }
457 
458 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
459 {
460     VirtIONet *n = qemu_get_nic_opaque(nc);
461     VirtIODevice *vdev = VIRTIO_DEVICE(n);
462     RxFilterInfo *info;
463     strList *str_list, *entry;
464     int i;
465 
466     info = g_malloc0(sizeof(*info));
467     info->name = g_strdup(nc->name);
468     info->promiscuous = n->promisc;
469 
470     if (n->nouni) {
471         info->unicast = RX_STATE_NONE;
472     } else if (n->alluni) {
473         info->unicast = RX_STATE_ALL;
474     } else {
475         info->unicast = RX_STATE_NORMAL;
476     }
477 
478     if (n->nomulti) {
479         info->multicast = RX_STATE_NONE;
480     } else if (n->allmulti) {
481         info->multicast = RX_STATE_ALL;
482     } else {
483         info->multicast = RX_STATE_NORMAL;
484     }
485 
486     info->broadcast_allowed = n->nobcast;
487     info->multicast_overflow = n->mac_table.multi_overflow;
488     info->unicast_overflow = n->mac_table.uni_overflow;
489 
490     info->main_mac = qemu_mac_strdup_printf(n->mac);
491 
492     str_list = NULL;
493     for (i = 0; i < n->mac_table.first_multi; i++) {
494         entry = g_malloc0(sizeof(*entry));
495         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
496         entry->next = str_list;
497         str_list = entry;
498     }
499     info->unicast_table = str_list;
500 
501     str_list = NULL;
502     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
503         entry = g_malloc0(sizeof(*entry));
504         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
505         entry->next = str_list;
506         str_list = entry;
507     }
508     info->multicast_table = str_list;
509     info->vlan_table = get_vlan_table(n);
510 
511     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
512         info->vlan = RX_STATE_ALL;
513     } else if (!info->vlan_table) {
514         info->vlan = RX_STATE_NONE;
515     } else {
516         info->vlan = RX_STATE_NORMAL;
517     }
518 
519     /* enable event notification after query */
520     nc->rxfilter_notify_enabled = 1;
521 
522     return info;
523 }
524 
525 static void virtio_net_reset(VirtIODevice *vdev)
526 {
527     VirtIONet *n = VIRTIO_NET(vdev);
528     int i;
529 
530     /* Reset back to compatibility mode */
531     n->promisc = 1;
532     n->allmulti = 0;
533     n->alluni = 0;
534     n->nomulti = 0;
535     n->nouni = 0;
536     n->nobcast = 0;
537     /* multiqueue is disabled by default */
538     n->curr_queues = 1;
539     timer_del(n->announce_timer.tm);
540     n->announce_timer.round = 0;
541     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
542 
543     /* Flush any MAC and VLAN filter table state */
544     n->mac_table.in_use = 0;
545     n->mac_table.first_multi = 0;
546     n->mac_table.multi_overflow = 0;
547     n->mac_table.uni_overflow = 0;
548     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
549     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
550     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
551     memset(n->vlans, 0, MAX_VLAN >> 3);
552 
553     /* Flush any async TX */
554     for (i = 0;  i < n->max_queues; i++) {
555         NetClientState *nc = qemu_get_subqueue(n->nic, i);
556 
557         if (nc->peer) {
558             qemu_flush_or_purge_queued_packets(nc->peer, true);
559             assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
560         }
561     }
562 }
563 
564 static void peer_test_vnet_hdr(VirtIONet *n)
565 {
566     NetClientState *nc = qemu_get_queue(n->nic);
567     if (!nc->peer) {
568         return;
569     }
570 
571     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
572 }
573 
574 static int peer_has_vnet_hdr(VirtIONet *n)
575 {
576     return n->has_vnet_hdr;
577 }
578 
579 static int peer_has_ufo(VirtIONet *n)
580 {
581     if (!peer_has_vnet_hdr(n))
582         return 0;
583 
584     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
585 
586     return n->has_ufo;
587 }
588 
589 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
590                                        int version_1, int hash_report)
591 {
592     int i;
593     NetClientState *nc;
594 
595     n->mergeable_rx_bufs = mergeable_rx_bufs;
596 
597     if (version_1) {
598         n->guest_hdr_len = hash_report ?
599             sizeof(struct virtio_net_hdr_v1_hash) :
600             sizeof(struct virtio_net_hdr_mrg_rxbuf);
601         n->rss_data.populate_hash = !!hash_report;
602     } else {
603         n->guest_hdr_len = n->mergeable_rx_bufs ?
604             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
605             sizeof(struct virtio_net_hdr);
606     }
607 
608     for (i = 0; i < n->max_queues; i++) {
609         nc = qemu_get_subqueue(n->nic, i);
610 
611         if (peer_has_vnet_hdr(n) &&
612             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
613             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
614             n->host_hdr_len = n->guest_hdr_len;
615         }
616     }
617 }
618 
619 static int virtio_net_max_tx_queue_size(VirtIONet *n)
620 {
621     NetClientState *peer = n->nic_conf.peers.ncs[0];
622 
623     /*
624      * Backends other than vhost-user don't support max queue size.
625      */
626     if (!peer) {
627         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
628     }
629 
630     if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
631         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
632     }
633 
634     return VIRTQUEUE_MAX_SIZE;
635 }
636 
637 static int peer_attach(VirtIONet *n, int index)
638 {
639     NetClientState *nc = qemu_get_subqueue(n->nic, index);
640 
641     if (!nc->peer) {
642         return 0;
643     }
644 
645     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
646         vhost_set_vring_enable(nc->peer, 1);
647     }
648 
649     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
650         return 0;
651     }
652 
653     if (n->max_queues == 1) {
654         return 0;
655     }
656 
657     return tap_enable(nc->peer);
658 }
659 
660 static int peer_detach(VirtIONet *n, int index)
661 {
662     NetClientState *nc = qemu_get_subqueue(n->nic, index);
663 
664     if (!nc->peer) {
665         return 0;
666     }
667 
668     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
669         vhost_set_vring_enable(nc->peer, 0);
670     }
671 
672     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
673         return 0;
674     }
675 
676     return tap_disable(nc->peer);
677 }
678 
679 static void virtio_net_set_queues(VirtIONet *n)
680 {
681     int i;
682     int r;
683 
684     if (n->nic->peer_deleted) {
685         return;
686     }
687 
688     for (i = 0; i < n->max_queues; i++) {
689         if (i < n->curr_queues) {
690             r = peer_attach(n, i);
691             assert(!r);
692         } else {
693             r = peer_detach(n, i);
694             assert(!r);
695         }
696     }
697 }
698 
699 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
700 
701 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
702                                         Error **errp)
703 {
704     VirtIONet *n = VIRTIO_NET(vdev);
705     NetClientState *nc = qemu_get_queue(n->nic);
706 
707     /* Firstly sync all virtio-net possible supported features */
708     features |= n->host_features;
709 
710     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
711 
712     if (!peer_has_vnet_hdr(n)) {
713         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
714         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
715         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
716         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
717 
718         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
719         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
720         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
721         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
722 
723         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
724     }
725 
726     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
727         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
728         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
729     }
730 
731     if (!get_vhost_net(nc->peer)) {
732         return features;
733     }
734 
735     virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
736     virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
737     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
738     vdev->backend_features = features;
739 
740     if (n->mtu_bypass_backend &&
741             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
742         features |= (1ULL << VIRTIO_NET_F_MTU);
743     }
744 
745     return features;
746 }
747 
748 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
749 {
750     uint64_t features = 0;
751 
752     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
753      * but also these: */
754     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
755     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
756     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
757     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
758     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
759 
760     return features;
761 }
762 
763 static void virtio_net_apply_guest_offloads(VirtIONet *n)
764 {
765     qemu_set_offload(qemu_get_queue(n->nic)->peer,
766             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
767             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
768             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
769             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
770             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
771 }
772 
773 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
774 {
775     static const uint64_t guest_offloads_mask =
776         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
777         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
778         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
779         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
780         (1ULL << VIRTIO_NET_F_GUEST_UFO);
781 
782     return guest_offloads_mask & features;
783 }
784 
785 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
786 {
787     VirtIODevice *vdev = VIRTIO_DEVICE(n);
788     return virtio_net_guest_offloads_by_features(vdev->guest_features);
789 }
790 
791 static void failover_add_primary(VirtIONet *n, Error **errp)
792 {
793     Error *err = NULL;
794 
795     if (n->primary_dev) {
796         return;
797     }
798 
799     n->primary_device_opts = qemu_opts_find(qemu_find_opts("device"),
800             n->primary_device_id);
801     if (n->primary_device_opts) {
802         n->primary_dev = qdev_device_add(n->primary_device_opts, &err);
803         if (err) {
804             qemu_opts_del(n->primary_device_opts);
805         }
806         if (n->primary_dev) {
807             n->primary_bus = n->primary_dev->parent_bus;
808             if (err) {
809                 qdev_unplug(n->primary_dev, &err);
810                 qdev_set_id(n->primary_dev, "");
811 
812             }
813         }
814     } else {
815         error_setg(errp, "Primary device not found");
816         error_append_hint(errp, "Virtio-net failover will not work. Make "
817             "sure primary device has parameter"
818             " failover_pair_id=<virtio-net-id>\n");
819 }
820     if (err) {
821         error_propagate(errp, err);
822     }
823 }
824 
825 static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp)
826 {
827     VirtIONet *n = opaque;
828     int ret = 0;
829 
830     const char *standby_id = qemu_opt_get(opts, "failover_pair_id");
831 
832     if (standby_id != NULL && (g_strcmp0(standby_id, n->netclient_name) == 0)) {
833         n->primary_device_id = g_strdup(opts->id);
834         ret = 1;
835     }
836 
837     return ret;
838 }
839 
840 static DeviceState *virtio_net_find_primary(VirtIONet *n, Error **errp)
841 {
842     DeviceState *dev = NULL;
843     Error *err = NULL;
844 
845     if (qemu_opts_foreach(qemu_find_opts("device"),
846                          is_my_primary, n, &err)) {
847         if (err) {
848             error_propagate(errp, err);
849             return NULL;
850         }
851         if (n->primary_device_id) {
852             dev = qdev_find_recursive(sysbus_get_default(),
853                     n->primary_device_id);
854         } else {
855             error_setg(errp, "Primary device id not found");
856             return NULL;
857         }
858     }
859     return dev;
860 }
861 
862 
863 
864 static DeviceState *virtio_connect_failover_devices(VirtIONet *n,
865                                                     DeviceState *dev,
866                                                     Error **errp)
867 {
868     DeviceState *prim_dev = NULL;
869     Error *err = NULL;
870 
871     prim_dev = virtio_net_find_primary(n, &err);
872     if (prim_dev) {
873         n->primary_device_id = g_strdup(prim_dev->id);
874         n->primary_device_opts = prim_dev->opts;
875     } else {
876         if (err) {
877             error_propagate(errp, err);
878         }
879     }
880 
881     return prim_dev;
882 }
883 
884 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
885 {
886     VirtIONet *n = VIRTIO_NET(vdev);
887     Error *err = NULL;
888     int i;
889 
890     if (n->mtu_bypass_backend &&
891             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
892         features &= ~(1ULL << VIRTIO_NET_F_MTU);
893     }
894 
895     virtio_net_set_multiqueue(n,
896                               virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
897                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
898 
899     virtio_net_set_mrg_rx_bufs(n,
900                                virtio_has_feature(features,
901                                                   VIRTIO_NET_F_MRG_RXBUF),
902                                virtio_has_feature(features,
903                                                   VIRTIO_F_VERSION_1),
904                                virtio_has_feature(features,
905                                                   VIRTIO_NET_F_HASH_REPORT));
906 
907     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
908         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
909     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
910         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
911     n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
912 
913     if (n->has_vnet_hdr) {
914         n->curr_guest_offloads =
915             virtio_net_guest_offloads_by_features(features);
916         virtio_net_apply_guest_offloads(n);
917     }
918 
919     for (i = 0;  i < n->max_queues; i++) {
920         NetClientState *nc = qemu_get_subqueue(n->nic, i);
921 
922         if (!get_vhost_net(nc->peer)) {
923             continue;
924         }
925         vhost_net_ack_features(get_vhost_net(nc->peer), features);
926     }
927 
928     if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
929         memset(n->vlans, 0, MAX_VLAN >> 3);
930     } else {
931         memset(n->vlans, 0xff, MAX_VLAN >> 3);
932     }
933 
934     if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
935         qapi_event_send_failover_negotiated(n->netclient_name);
936         atomic_set(&n->primary_should_be_hidden, false);
937         failover_add_primary(n, &err);
938         if (err) {
939             n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err);
940             if (err) {
941                 goto out_err;
942             }
943             failover_add_primary(n, &err);
944             if (err) {
945                 goto out_err;
946             }
947         }
948     }
949     return;
950 
951 out_err:
952     if (err) {
953         warn_report_err(err);
954     }
955 }
956 
957 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
958                                      struct iovec *iov, unsigned int iov_cnt)
959 {
960     uint8_t on;
961     size_t s;
962     NetClientState *nc = qemu_get_queue(n->nic);
963 
964     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
965     if (s != sizeof(on)) {
966         return VIRTIO_NET_ERR;
967     }
968 
969     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
970         n->promisc = on;
971     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
972         n->allmulti = on;
973     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
974         n->alluni = on;
975     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
976         n->nomulti = on;
977     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
978         n->nouni = on;
979     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
980         n->nobcast = on;
981     } else {
982         return VIRTIO_NET_ERR;
983     }
984 
985     rxfilter_notify(nc);
986 
987     return VIRTIO_NET_OK;
988 }
989 
990 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
991                                      struct iovec *iov, unsigned int iov_cnt)
992 {
993     VirtIODevice *vdev = VIRTIO_DEVICE(n);
994     uint64_t offloads;
995     size_t s;
996 
997     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
998         return VIRTIO_NET_ERR;
999     }
1000 
1001     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
1002     if (s != sizeof(offloads)) {
1003         return VIRTIO_NET_ERR;
1004     }
1005 
1006     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1007         uint64_t supported_offloads;
1008 
1009         offloads = virtio_ldq_p(vdev, &offloads);
1010 
1011         if (!n->has_vnet_hdr) {
1012             return VIRTIO_NET_ERR;
1013         }
1014 
1015         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1016             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1017         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1018             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1019         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1020 
1021         supported_offloads = virtio_net_supported_guest_offloads(n);
1022         if (offloads & ~supported_offloads) {
1023             return VIRTIO_NET_ERR;
1024         }
1025 
1026         n->curr_guest_offloads = offloads;
1027         virtio_net_apply_guest_offloads(n);
1028 
1029         return VIRTIO_NET_OK;
1030     } else {
1031         return VIRTIO_NET_ERR;
1032     }
1033 }
1034 
1035 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1036                                  struct iovec *iov, unsigned int iov_cnt)
1037 {
1038     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1039     struct virtio_net_ctrl_mac mac_data;
1040     size_t s;
1041     NetClientState *nc = qemu_get_queue(n->nic);
1042 
1043     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1044         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1045             return VIRTIO_NET_ERR;
1046         }
1047         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1048         assert(s == sizeof(n->mac));
1049         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1050         rxfilter_notify(nc);
1051 
1052         return VIRTIO_NET_OK;
1053     }
1054 
1055     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1056         return VIRTIO_NET_ERR;
1057     }
1058 
1059     int in_use = 0;
1060     int first_multi = 0;
1061     uint8_t uni_overflow = 0;
1062     uint8_t multi_overflow = 0;
1063     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1064 
1065     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1066                    sizeof(mac_data.entries));
1067     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1068     if (s != sizeof(mac_data.entries)) {
1069         goto error;
1070     }
1071     iov_discard_front(&iov, &iov_cnt, s);
1072 
1073     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1074         goto error;
1075     }
1076 
1077     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1078         s = iov_to_buf(iov, iov_cnt, 0, macs,
1079                        mac_data.entries * ETH_ALEN);
1080         if (s != mac_data.entries * ETH_ALEN) {
1081             goto error;
1082         }
1083         in_use += mac_data.entries;
1084     } else {
1085         uni_overflow = 1;
1086     }
1087 
1088     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1089 
1090     first_multi = in_use;
1091 
1092     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1093                    sizeof(mac_data.entries));
1094     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1095     if (s != sizeof(mac_data.entries)) {
1096         goto error;
1097     }
1098 
1099     iov_discard_front(&iov, &iov_cnt, s);
1100 
1101     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1102         goto error;
1103     }
1104 
1105     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1106         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1107                        mac_data.entries * ETH_ALEN);
1108         if (s != mac_data.entries * ETH_ALEN) {
1109             goto error;
1110         }
1111         in_use += mac_data.entries;
1112     } else {
1113         multi_overflow = 1;
1114     }
1115 
1116     n->mac_table.in_use = in_use;
1117     n->mac_table.first_multi = first_multi;
1118     n->mac_table.uni_overflow = uni_overflow;
1119     n->mac_table.multi_overflow = multi_overflow;
1120     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1121     g_free(macs);
1122     rxfilter_notify(nc);
1123 
1124     return VIRTIO_NET_OK;
1125 
1126 error:
1127     g_free(macs);
1128     return VIRTIO_NET_ERR;
1129 }
1130 
1131 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1132                                         struct iovec *iov, unsigned int iov_cnt)
1133 {
1134     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1135     uint16_t vid;
1136     size_t s;
1137     NetClientState *nc = qemu_get_queue(n->nic);
1138 
1139     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1140     vid = virtio_lduw_p(vdev, &vid);
1141     if (s != sizeof(vid)) {
1142         return VIRTIO_NET_ERR;
1143     }
1144 
1145     if (vid >= MAX_VLAN)
1146         return VIRTIO_NET_ERR;
1147 
1148     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1149         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1150     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1151         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1152     else
1153         return VIRTIO_NET_ERR;
1154 
1155     rxfilter_notify(nc);
1156 
1157     return VIRTIO_NET_OK;
1158 }
1159 
1160 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1161                                       struct iovec *iov, unsigned int iov_cnt)
1162 {
1163     trace_virtio_net_handle_announce(n->announce_timer.round);
1164     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1165         n->status & VIRTIO_NET_S_ANNOUNCE) {
1166         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1167         if (n->announce_timer.round) {
1168             qemu_announce_timer_step(&n->announce_timer);
1169         }
1170         return VIRTIO_NET_OK;
1171     } else {
1172         return VIRTIO_NET_ERR;
1173     }
1174 }
1175 
1176 static void virtio_net_disable_rss(VirtIONet *n)
1177 {
1178     if (n->rss_data.enabled) {
1179         trace_virtio_net_rss_disable();
1180     }
1181     n->rss_data.enabled = false;
1182 }
1183 
1184 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1185                                       struct iovec *iov,
1186                                       unsigned int iov_cnt,
1187                                       bool do_rss)
1188 {
1189     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1190     struct virtio_net_rss_config cfg;
1191     size_t s, offset = 0, size_get;
1192     uint16_t queues, i;
1193     struct {
1194         uint16_t us;
1195         uint8_t b;
1196     } QEMU_PACKED temp;
1197     const char *err_msg = "";
1198     uint32_t err_value = 0;
1199 
1200     if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1201         err_msg = "RSS is not negotiated";
1202         goto error;
1203     }
1204     if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1205         err_msg = "Hash report is not negotiated";
1206         goto error;
1207     }
1208     size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1209     s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1210     if (s != size_get) {
1211         err_msg = "Short command buffer";
1212         err_value = (uint32_t)s;
1213         goto error;
1214     }
1215     n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1216     n->rss_data.indirections_len =
1217         virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1218     n->rss_data.indirections_len++;
1219     if (!do_rss) {
1220         n->rss_data.indirections_len = 1;
1221     }
1222     if (!is_power_of_2(n->rss_data.indirections_len)) {
1223         err_msg = "Invalid size of indirection table";
1224         err_value = n->rss_data.indirections_len;
1225         goto error;
1226     }
1227     if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1228         err_msg = "Too large indirection table";
1229         err_value = n->rss_data.indirections_len;
1230         goto error;
1231     }
1232     n->rss_data.default_queue = do_rss ?
1233         virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1234     if (n->rss_data.default_queue >= n->max_queues) {
1235         err_msg = "Invalid default queue";
1236         err_value = n->rss_data.default_queue;
1237         goto error;
1238     }
1239     offset += size_get;
1240     size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1241     g_free(n->rss_data.indirections_table);
1242     n->rss_data.indirections_table = g_malloc(size_get);
1243     if (!n->rss_data.indirections_table) {
1244         err_msg = "Can't allocate indirections table";
1245         err_value = n->rss_data.indirections_len;
1246         goto error;
1247     }
1248     s = iov_to_buf(iov, iov_cnt, offset,
1249                    n->rss_data.indirections_table, size_get);
1250     if (s != size_get) {
1251         err_msg = "Short indirection table buffer";
1252         err_value = (uint32_t)s;
1253         goto error;
1254     }
1255     for (i = 0; i < n->rss_data.indirections_len; ++i) {
1256         uint16_t val = n->rss_data.indirections_table[i];
1257         n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1258     }
1259     offset += size_get;
1260     size_get = sizeof(temp);
1261     s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1262     if (s != size_get) {
1263         err_msg = "Can't get queues";
1264         err_value = (uint32_t)s;
1265         goto error;
1266     }
1267     queues = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queues;
1268     if (queues == 0 || queues > n->max_queues) {
1269         err_msg = "Invalid number of queues";
1270         err_value = queues;
1271         goto error;
1272     }
1273     if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1274         err_msg = "Invalid key size";
1275         err_value = temp.b;
1276         goto error;
1277     }
1278     if (!temp.b && n->rss_data.hash_types) {
1279         err_msg = "No key provided";
1280         err_value = 0;
1281         goto error;
1282     }
1283     if (!temp.b && !n->rss_data.hash_types) {
1284         virtio_net_disable_rss(n);
1285         return queues;
1286     }
1287     offset += size_get;
1288     size_get = temp.b;
1289     s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1290     if (s != size_get) {
1291         err_msg = "Can get key buffer";
1292         err_value = (uint32_t)s;
1293         goto error;
1294     }
1295     n->rss_data.enabled = true;
1296     trace_virtio_net_rss_enable(n->rss_data.hash_types,
1297                                 n->rss_data.indirections_len,
1298                                 temp.b);
1299     return queues;
1300 error:
1301     trace_virtio_net_rss_error(err_msg, err_value);
1302     virtio_net_disable_rss(n);
1303     return 0;
1304 }
1305 
1306 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1307                                 struct iovec *iov, unsigned int iov_cnt)
1308 {
1309     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1310     uint16_t queues;
1311 
1312     virtio_net_disable_rss(n);
1313     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1314         queues = virtio_net_handle_rss(n, iov, iov_cnt, false);
1315         return queues ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1316     }
1317     if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1318         queues = virtio_net_handle_rss(n, iov, iov_cnt, true);
1319     } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1320         struct virtio_net_ctrl_mq mq;
1321         size_t s;
1322         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1323             return VIRTIO_NET_ERR;
1324         }
1325         s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1326         if (s != sizeof(mq)) {
1327             return VIRTIO_NET_ERR;
1328         }
1329         queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1330 
1331     } else {
1332         return VIRTIO_NET_ERR;
1333     }
1334 
1335     if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1336         queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1337         queues > n->max_queues ||
1338         !n->multiqueue) {
1339         return VIRTIO_NET_ERR;
1340     }
1341 
1342     n->curr_queues = queues;
1343     /* stop the backend before changing the number of queues to avoid handling a
1344      * disabled queue */
1345     virtio_net_set_status(vdev, vdev->status);
1346     virtio_net_set_queues(n);
1347 
1348     return VIRTIO_NET_OK;
1349 }
1350 
1351 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1352 {
1353     VirtIONet *n = VIRTIO_NET(vdev);
1354     struct virtio_net_ctrl_hdr ctrl;
1355     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1356     VirtQueueElement *elem;
1357     size_t s;
1358     struct iovec *iov, *iov2;
1359     unsigned int iov_cnt;
1360 
1361     for (;;) {
1362         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1363         if (!elem) {
1364             break;
1365         }
1366         if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1367             iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1368             virtio_error(vdev, "virtio-net ctrl missing headers");
1369             virtqueue_detach_element(vq, elem, 0);
1370             g_free(elem);
1371             break;
1372         }
1373 
1374         iov_cnt = elem->out_num;
1375         iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1376         s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1377         iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1378         if (s != sizeof(ctrl)) {
1379             status = VIRTIO_NET_ERR;
1380         } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1381             status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1382         } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1383             status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1384         } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1385             status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1386         } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1387             status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1388         } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1389             status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1390         } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1391             status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1392         }
1393 
1394         s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1395         assert(s == sizeof(status));
1396 
1397         virtqueue_push(vq, elem, sizeof(status));
1398         virtio_notify(vdev, vq);
1399         g_free(iov2);
1400         g_free(elem);
1401     }
1402 }
1403 
1404 /* RX */
1405 
1406 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1407 {
1408     VirtIONet *n = VIRTIO_NET(vdev);
1409     int queue_index = vq2q(virtio_get_queue_index(vq));
1410 
1411     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1412 }
1413 
1414 static bool virtio_net_can_receive(NetClientState *nc)
1415 {
1416     VirtIONet *n = qemu_get_nic_opaque(nc);
1417     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1418     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1419 
1420     if (!vdev->vm_running) {
1421         return false;
1422     }
1423 
1424     if (nc->queue_index >= n->curr_queues) {
1425         return false;
1426     }
1427 
1428     if (!virtio_queue_ready(q->rx_vq) ||
1429         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1430         return false;
1431     }
1432 
1433     return true;
1434 }
1435 
1436 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1437 {
1438     VirtIONet *n = q->n;
1439     if (virtio_queue_empty(q->rx_vq) ||
1440         (n->mergeable_rx_bufs &&
1441          !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1442         virtio_queue_set_notification(q->rx_vq, 1);
1443 
1444         /* To avoid a race condition where the guest has made some buffers
1445          * available after the above check but before notification was
1446          * enabled, check for available buffers again.
1447          */
1448         if (virtio_queue_empty(q->rx_vq) ||
1449             (n->mergeable_rx_bufs &&
1450              !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1451             return 0;
1452         }
1453     }
1454 
1455     virtio_queue_set_notification(q->rx_vq, 0);
1456     return 1;
1457 }
1458 
1459 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1460 {
1461     virtio_tswap16s(vdev, &hdr->hdr_len);
1462     virtio_tswap16s(vdev, &hdr->gso_size);
1463     virtio_tswap16s(vdev, &hdr->csum_start);
1464     virtio_tswap16s(vdev, &hdr->csum_offset);
1465 }
1466 
1467 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1468  * it never finds out that the packets don't have valid checksums.  This
1469  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1470  * fix this with Xen but it hasn't appeared in an upstream release of
1471  * dhclient yet.
1472  *
1473  * To avoid breaking existing guests, we catch udp packets and add
1474  * checksums.  This is terrible but it's better than hacking the guest
1475  * kernels.
1476  *
1477  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1478  * we should provide a mechanism to disable it to avoid polluting the host
1479  * cache.
1480  */
1481 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1482                                         uint8_t *buf, size_t size)
1483 {
1484     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1485         (size > 27 && size < 1500) && /* normal sized MTU */
1486         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1487         (buf[23] == 17) && /* ip.protocol == UDP */
1488         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1489         net_checksum_calculate(buf, size);
1490         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1491     }
1492 }
1493 
1494 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1495                            const void *buf, size_t size)
1496 {
1497     if (n->has_vnet_hdr) {
1498         /* FIXME this cast is evil */
1499         void *wbuf = (void *)buf;
1500         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1501                                     size - n->host_hdr_len);
1502 
1503         if (n->needs_vnet_hdr_swap) {
1504             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1505         }
1506         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1507     } else {
1508         struct virtio_net_hdr hdr = {
1509             .flags = 0,
1510             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1511         };
1512         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1513     }
1514 }
1515 
1516 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1517 {
1518     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1519     static const uint8_t vlan[] = {0x81, 0x00};
1520     uint8_t *ptr = (uint8_t *)buf;
1521     int i;
1522 
1523     if (n->promisc)
1524         return 1;
1525 
1526     ptr += n->host_hdr_len;
1527 
1528     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1529         int vid = lduw_be_p(ptr + 14) & 0xfff;
1530         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1531             return 0;
1532     }
1533 
1534     if (ptr[0] & 1) { // multicast
1535         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1536             return !n->nobcast;
1537         } else if (n->nomulti) {
1538             return 0;
1539         } else if (n->allmulti || n->mac_table.multi_overflow) {
1540             return 1;
1541         }
1542 
1543         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1544             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1545                 return 1;
1546             }
1547         }
1548     } else { // unicast
1549         if (n->nouni) {
1550             return 0;
1551         } else if (n->alluni || n->mac_table.uni_overflow) {
1552             return 1;
1553         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1554             return 1;
1555         }
1556 
1557         for (i = 0; i < n->mac_table.first_multi; i++) {
1558             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1559                 return 1;
1560             }
1561         }
1562     }
1563 
1564     return 0;
1565 }
1566 
1567 static uint8_t virtio_net_get_hash_type(bool isip4,
1568                                         bool isip6,
1569                                         bool isudp,
1570                                         bool istcp,
1571                                         uint32_t types)
1572 {
1573     if (isip4) {
1574         if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1575             return NetPktRssIpV4Tcp;
1576         }
1577         if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1578             return NetPktRssIpV4Udp;
1579         }
1580         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1581             return NetPktRssIpV4;
1582         }
1583     } else if (isip6) {
1584         uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1585                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1586 
1587         if (istcp && (types & mask)) {
1588             return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1589                 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1590         }
1591         mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1592         if (isudp && (types & mask)) {
1593             return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1594                 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1595         }
1596         mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1597         if (types & mask) {
1598             return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1599                 NetPktRssIpV6Ex : NetPktRssIpV6;
1600         }
1601     }
1602     return 0xff;
1603 }
1604 
1605 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1606                                    uint32_t hash)
1607 {
1608     struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1609     hdr->hash_value = hash;
1610     hdr->hash_report = report;
1611 }
1612 
1613 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1614                                   size_t size)
1615 {
1616     VirtIONet *n = qemu_get_nic_opaque(nc);
1617     unsigned int index = nc->queue_index, new_index = index;
1618     struct NetRxPkt *pkt = n->rx_pkt;
1619     uint8_t net_hash_type;
1620     uint32_t hash;
1621     bool isip4, isip6, isudp, istcp;
1622     static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1623         VIRTIO_NET_HASH_REPORT_IPv4,
1624         VIRTIO_NET_HASH_REPORT_TCPv4,
1625         VIRTIO_NET_HASH_REPORT_TCPv6,
1626         VIRTIO_NET_HASH_REPORT_IPv6,
1627         VIRTIO_NET_HASH_REPORT_IPv6_EX,
1628         VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1629         VIRTIO_NET_HASH_REPORT_UDPv4,
1630         VIRTIO_NET_HASH_REPORT_UDPv6,
1631         VIRTIO_NET_HASH_REPORT_UDPv6_EX
1632     };
1633 
1634     net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1635                              size - n->host_hdr_len);
1636     net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1637     if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1638         istcp = isudp = false;
1639     }
1640     if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1641         istcp = isudp = false;
1642     }
1643     net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1644                                              n->rss_data.hash_types);
1645     if (net_hash_type > NetPktRssIpV6UdpEx) {
1646         if (n->rss_data.populate_hash) {
1647             virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1648         }
1649         return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1650     }
1651 
1652     hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1653 
1654     if (n->rss_data.populate_hash) {
1655         virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1656     }
1657 
1658     if (n->rss_data.redirect) {
1659         new_index = hash & (n->rss_data.indirections_len - 1);
1660         new_index = n->rss_data.indirections_table[new_index];
1661     }
1662 
1663     return (index == new_index) ? -1 : new_index;
1664 }
1665 
1666 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1667                                       size_t size, bool no_rss)
1668 {
1669     VirtIONet *n = qemu_get_nic_opaque(nc);
1670     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1671     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1672     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1673     struct virtio_net_hdr_mrg_rxbuf mhdr;
1674     unsigned mhdr_cnt = 0;
1675     size_t offset, i, guest_offset;
1676 
1677     if (!virtio_net_can_receive(nc)) {
1678         return -1;
1679     }
1680 
1681     if (!no_rss && n->rss_data.enabled) {
1682         int index = virtio_net_process_rss(nc, buf, size);
1683         if (index >= 0) {
1684             NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1685             return virtio_net_receive_rcu(nc2, buf, size, true);
1686         }
1687     }
1688 
1689     /* hdr_len refers to the header we supply to the guest */
1690     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1691         return 0;
1692     }
1693 
1694     if (!receive_filter(n, buf, size))
1695         return size;
1696 
1697     offset = i = 0;
1698 
1699     while (offset < size) {
1700         VirtQueueElement *elem;
1701         int len, total;
1702         const struct iovec *sg;
1703 
1704         total = 0;
1705 
1706         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1707         if (!elem) {
1708             if (i) {
1709                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1710                              "i %zd mergeable %d offset %zd, size %zd, "
1711                              "guest hdr len %zd, host hdr len %zd "
1712                              "guest features 0x%" PRIx64,
1713                              i, n->mergeable_rx_bufs, offset, size,
1714                              n->guest_hdr_len, n->host_hdr_len,
1715                              vdev->guest_features);
1716             }
1717             return -1;
1718         }
1719 
1720         if (elem->in_num < 1) {
1721             virtio_error(vdev,
1722                          "virtio-net receive queue contains no in buffers");
1723             virtqueue_detach_element(q->rx_vq, elem, 0);
1724             g_free(elem);
1725             return -1;
1726         }
1727 
1728         sg = elem->in_sg;
1729         if (i == 0) {
1730             assert(offset == 0);
1731             if (n->mergeable_rx_bufs) {
1732                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1733                                     sg, elem->in_num,
1734                                     offsetof(typeof(mhdr), num_buffers),
1735                                     sizeof(mhdr.num_buffers));
1736             }
1737 
1738             receive_header(n, sg, elem->in_num, buf, size);
1739             if (n->rss_data.populate_hash) {
1740                 offset = sizeof(mhdr);
1741                 iov_from_buf(sg, elem->in_num, offset,
1742                              buf + offset, n->host_hdr_len - sizeof(mhdr));
1743             }
1744             offset = n->host_hdr_len;
1745             total += n->guest_hdr_len;
1746             guest_offset = n->guest_hdr_len;
1747         } else {
1748             guest_offset = 0;
1749         }
1750 
1751         /* copy in packet.  ugh */
1752         len = iov_from_buf(sg, elem->in_num, guest_offset,
1753                            buf + offset, size - offset);
1754         total += len;
1755         offset += len;
1756         /* If buffers can't be merged, at this point we
1757          * must have consumed the complete packet.
1758          * Otherwise, drop it. */
1759         if (!n->mergeable_rx_bufs && offset < size) {
1760             virtqueue_unpop(q->rx_vq, elem, total);
1761             g_free(elem);
1762             return size;
1763         }
1764 
1765         /* signal other side */
1766         virtqueue_fill(q->rx_vq, elem, total, i++);
1767         g_free(elem);
1768     }
1769 
1770     if (mhdr_cnt) {
1771         virtio_stw_p(vdev, &mhdr.num_buffers, i);
1772         iov_from_buf(mhdr_sg, mhdr_cnt,
1773                      0,
1774                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
1775     }
1776 
1777     virtqueue_flush(q->rx_vq, i);
1778     virtio_notify(vdev, q->rx_vq);
1779 
1780     return size;
1781 }
1782 
1783 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1784                                   size_t size)
1785 {
1786     RCU_READ_LOCK_GUARD();
1787 
1788     return virtio_net_receive_rcu(nc, buf, size, false);
1789 }
1790 
1791 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1792                                          const uint8_t *buf,
1793                                          VirtioNetRscUnit *unit)
1794 {
1795     uint16_t ip_hdrlen;
1796     struct ip_header *ip;
1797 
1798     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1799                               + sizeof(struct eth_header));
1800     unit->ip = (void *)ip;
1801     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1802     unit->ip_plen = &ip->ip_len;
1803     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1804     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1805     unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1806 }
1807 
1808 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1809                                          const uint8_t *buf,
1810                                          VirtioNetRscUnit *unit)
1811 {
1812     struct ip6_header *ip6;
1813 
1814     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1815                                  + sizeof(struct eth_header));
1816     unit->ip = ip6;
1817     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1818     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1819                                         + sizeof(struct ip6_header));
1820     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1821 
1822     /* There is a difference between payload lenght in ipv4 and v6,
1823        ip header is excluded in ipv6 */
1824     unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1825 }
1826 
1827 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1828                                        VirtioNetRscSeg *seg)
1829 {
1830     int ret;
1831     struct virtio_net_hdr_v1 *h;
1832 
1833     h = (struct virtio_net_hdr_v1 *)seg->buf;
1834     h->flags = 0;
1835     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1836 
1837     if (seg->is_coalesced) {
1838         h->rsc.segments = seg->packets;
1839         h->rsc.dup_acks = seg->dup_ack;
1840         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1841         if (chain->proto == ETH_P_IP) {
1842             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1843         } else {
1844             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1845         }
1846     }
1847 
1848     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1849     QTAILQ_REMOVE(&chain->buffers, seg, next);
1850     g_free(seg->buf);
1851     g_free(seg);
1852 
1853     return ret;
1854 }
1855 
1856 static void virtio_net_rsc_purge(void *opq)
1857 {
1858     VirtioNetRscSeg *seg, *rn;
1859     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1860 
1861     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1862         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1863             chain->stat.purge_failed++;
1864             continue;
1865         }
1866     }
1867 
1868     chain->stat.timer++;
1869     if (!QTAILQ_EMPTY(&chain->buffers)) {
1870         timer_mod(chain->drain_timer,
1871               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1872     }
1873 }
1874 
1875 static void virtio_net_rsc_cleanup(VirtIONet *n)
1876 {
1877     VirtioNetRscChain *chain, *rn_chain;
1878     VirtioNetRscSeg *seg, *rn_seg;
1879 
1880     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1881         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1882             QTAILQ_REMOVE(&chain->buffers, seg, next);
1883             g_free(seg->buf);
1884             g_free(seg);
1885         }
1886 
1887         timer_del(chain->drain_timer);
1888         timer_free(chain->drain_timer);
1889         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1890         g_free(chain);
1891     }
1892 }
1893 
1894 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1895                                      NetClientState *nc,
1896                                      const uint8_t *buf, size_t size)
1897 {
1898     uint16_t hdr_len;
1899     VirtioNetRscSeg *seg;
1900 
1901     hdr_len = chain->n->guest_hdr_len;
1902     seg = g_malloc(sizeof(VirtioNetRscSeg));
1903     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1904         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1905     memcpy(seg->buf, buf, size);
1906     seg->size = size;
1907     seg->packets = 1;
1908     seg->dup_ack = 0;
1909     seg->is_coalesced = 0;
1910     seg->nc = nc;
1911 
1912     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1913     chain->stat.cache++;
1914 
1915     switch (chain->proto) {
1916     case ETH_P_IP:
1917         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
1918         break;
1919     case ETH_P_IPV6:
1920         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
1921         break;
1922     default:
1923         g_assert_not_reached();
1924     }
1925 }
1926 
1927 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
1928                                          VirtioNetRscSeg *seg,
1929                                          const uint8_t *buf,
1930                                          struct tcp_header *n_tcp,
1931                                          struct tcp_header *o_tcp)
1932 {
1933     uint32_t nack, oack;
1934     uint16_t nwin, owin;
1935 
1936     nack = htonl(n_tcp->th_ack);
1937     nwin = htons(n_tcp->th_win);
1938     oack = htonl(o_tcp->th_ack);
1939     owin = htons(o_tcp->th_win);
1940 
1941     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
1942         chain->stat.ack_out_of_win++;
1943         return RSC_FINAL;
1944     } else if (nack == oack) {
1945         /* duplicated ack or window probe */
1946         if (nwin == owin) {
1947             /* duplicated ack, add dup ack count due to whql test up to 1 */
1948             chain->stat.dup_ack++;
1949             return RSC_FINAL;
1950         } else {
1951             /* Coalesce window update */
1952             o_tcp->th_win = n_tcp->th_win;
1953             chain->stat.win_update++;
1954             return RSC_COALESCE;
1955         }
1956     } else {
1957         /* pure ack, go to 'C', finalize*/
1958         chain->stat.pure_ack++;
1959         return RSC_FINAL;
1960     }
1961 }
1962 
1963 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
1964                                             VirtioNetRscSeg *seg,
1965                                             const uint8_t *buf,
1966                                             VirtioNetRscUnit *n_unit)
1967 {
1968     void *data;
1969     uint16_t o_ip_len;
1970     uint32_t nseq, oseq;
1971     VirtioNetRscUnit *o_unit;
1972 
1973     o_unit = &seg->unit;
1974     o_ip_len = htons(*o_unit->ip_plen);
1975     nseq = htonl(n_unit->tcp->th_seq);
1976     oseq = htonl(o_unit->tcp->th_seq);
1977 
1978     /* out of order or retransmitted. */
1979     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
1980         chain->stat.data_out_of_win++;
1981         return RSC_FINAL;
1982     }
1983 
1984     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
1985     if (nseq == oseq) {
1986         if ((o_unit->payload == 0) && n_unit->payload) {
1987             /* From no payload to payload, normal case, not a dup ack or etc */
1988             chain->stat.data_after_pure_ack++;
1989             goto coalesce;
1990         } else {
1991             return virtio_net_rsc_handle_ack(chain, seg, buf,
1992                                              n_unit->tcp, o_unit->tcp);
1993         }
1994     } else if ((nseq - oseq) != o_unit->payload) {
1995         /* Not a consistent packet, out of order */
1996         chain->stat.data_out_of_order++;
1997         return RSC_FINAL;
1998     } else {
1999 coalesce:
2000         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2001             chain->stat.over_size++;
2002             return RSC_FINAL;
2003         }
2004 
2005         /* Here comes the right data, the payload length in v4/v6 is different,
2006            so use the field value to update and record the new data len */
2007         o_unit->payload += n_unit->payload; /* update new data len */
2008 
2009         /* update field in ip header */
2010         *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2011 
2012         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2013            for windows guest, while this may change the behavior for linux
2014            guest (only if it uses RSC feature). */
2015         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2016 
2017         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2018         o_unit->tcp->th_win = n_unit->tcp->th_win;
2019 
2020         memmove(seg->buf + seg->size, data, n_unit->payload);
2021         seg->size += n_unit->payload;
2022         seg->packets++;
2023         chain->stat.coalesced++;
2024         return RSC_COALESCE;
2025     }
2026 }
2027 
2028 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2029                                         VirtioNetRscSeg *seg,
2030                                         const uint8_t *buf, size_t size,
2031                                         VirtioNetRscUnit *unit)
2032 {
2033     struct ip_header *ip1, *ip2;
2034 
2035     ip1 = (struct ip_header *)(unit->ip);
2036     ip2 = (struct ip_header *)(seg->unit.ip);
2037     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2038         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2039         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2040         chain->stat.no_match++;
2041         return RSC_NO_MATCH;
2042     }
2043 
2044     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2045 }
2046 
2047 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2048                                         VirtioNetRscSeg *seg,
2049                                         const uint8_t *buf, size_t size,
2050                                         VirtioNetRscUnit *unit)
2051 {
2052     struct ip6_header *ip1, *ip2;
2053 
2054     ip1 = (struct ip6_header *)(unit->ip);
2055     ip2 = (struct ip6_header *)(seg->unit.ip);
2056     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2057         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2058         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2059         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2060             chain->stat.no_match++;
2061             return RSC_NO_MATCH;
2062     }
2063 
2064     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2065 }
2066 
2067 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2068  * to prevent out of order */
2069 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2070                                          struct tcp_header *tcp)
2071 {
2072     uint16_t tcp_hdr;
2073     uint16_t tcp_flag;
2074 
2075     tcp_flag = htons(tcp->th_offset_flags);
2076     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2077     tcp_flag &= VIRTIO_NET_TCP_FLAG;
2078     tcp_flag = htons(tcp->th_offset_flags) & 0x3F;
2079     if (tcp_flag & TH_SYN) {
2080         chain->stat.tcp_syn++;
2081         return RSC_BYPASS;
2082     }
2083 
2084     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2085         chain->stat.tcp_ctrl_drain++;
2086         return RSC_FINAL;
2087     }
2088 
2089     if (tcp_hdr > sizeof(struct tcp_header)) {
2090         chain->stat.tcp_all_opt++;
2091         return RSC_FINAL;
2092     }
2093 
2094     return RSC_CANDIDATE;
2095 }
2096 
2097 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2098                                          NetClientState *nc,
2099                                          const uint8_t *buf, size_t size,
2100                                          VirtioNetRscUnit *unit)
2101 {
2102     int ret;
2103     VirtioNetRscSeg *seg, *nseg;
2104 
2105     if (QTAILQ_EMPTY(&chain->buffers)) {
2106         chain->stat.empty_cache++;
2107         virtio_net_rsc_cache_buf(chain, nc, buf, size);
2108         timer_mod(chain->drain_timer,
2109               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2110         return size;
2111     }
2112 
2113     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2114         if (chain->proto == ETH_P_IP) {
2115             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2116         } else {
2117             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2118         }
2119 
2120         if (ret == RSC_FINAL) {
2121             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2122                 /* Send failed */
2123                 chain->stat.final_failed++;
2124                 return 0;
2125             }
2126 
2127             /* Send current packet */
2128             return virtio_net_do_receive(nc, buf, size);
2129         } else if (ret == RSC_NO_MATCH) {
2130             continue;
2131         } else {
2132             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2133             seg->is_coalesced = 1;
2134             return size;
2135         }
2136     }
2137 
2138     chain->stat.no_match_cache++;
2139     virtio_net_rsc_cache_buf(chain, nc, buf, size);
2140     return size;
2141 }
2142 
2143 /* Drain a connection data, this is to avoid out of order segments */
2144 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2145                                         NetClientState *nc,
2146                                         const uint8_t *buf, size_t size,
2147                                         uint16_t ip_start, uint16_t ip_size,
2148                                         uint16_t tcp_port)
2149 {
2150     VirtioNetRscSeg *seg, *nseg;
2151     uint32_t ppair1, ppair2;
2152 
2153     ppair1 = *(uint32_t *)(buf + tcp_port);
2154     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2155         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2156         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2157             || (ppair1 != ppair2)) {
2158             continue;
2159         }
2160         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2161             chain->stat.drain_failed++;
2162         }
2163 
2164         break;
2165     }
2166 
2167     return virtio_net_do_receive(nc, buf, size);
2168 }
2169 
2170 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2171                                             struct ip_header *ip,
2172                                             const uint8_t *buf, size_t size)
2173 {
2174     uint16_t ip_len;
2175 
2176     /* Not an ipv4 packet */
2177     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2178         chain->stat.ip_option++;
2179         return RSC_BYPASS;
2180     }
2181 
2182     /* Don't handle packets with ip option */
2183     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2184         chain->stat.ip_option++;
2185         return RSC_BYPASS;
2186     }
2187 
2188     if (ip->ip_p != IPPROTO_TCP) {
2189         chain->stat.bypass_not_tcp++;
2190         return RSC_BYPASS;
2191     }
2192 
2193     /* Don't handle packets with ip fragment */
2194     if (!(htons(ip->ip_off) & IP_DF)) {
2195         chain->stat.ip_frag++;
2196         return RSC_BYPASS;
2197     }
2198 
2199     /* Don't handle packets with ecn flag */
2200     if (IPTOS_ECN(ip->ip_tos)) {
2201         chain->stat.ip_ecn++;
2202         return RSC_BYPASS;
2203     }
2204 
2205     ip_len = htons(ip->ip_len);
2206     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2207         || ip_len > (size - chain->n->guest_hdr_len -
2208                      sizeof(struct eth_header))) {
2209         chain->stat.ip_hacked++;
2210         return RSC_BYPASS;
2211     }
2212 
2213     return RSC_CANDIDATE;
2214 }
2215 
2216 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2217                                       NetClientState *nc,
2218                                       const uint8_t *buf, size_t size)
2219 {
2220     int32_t ret;
2221     uint16_t hdr_len;
2222     VirtioNetRscUnit unit;
2223 
2224     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2225 
2226     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2227         + sizeof(struct tcp_header))) {
2228         chain->stat.bypass_not_tcp++;
2229         return virtio_net_do_receive(nc, buf, size);
2230     }
2231 
2232     virtio_net_rsc_extract_unit4(chain, buf, &unit);
2233     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2234         != RSC_CANDIDATE) {
2235         return virtio_net_do_receive(nc, buf, size);
2236     }
2237 
2238     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2239     if (ret == RSC_BYPASS) {
2240         return virtio_net_do_receive(nc, buf, size);
2241     } else if (ret == RSC_FINAL) {
2242         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2243                 ((hdr_len + sizeof(struct eth_header)) + 12),
2244                 VIRTIO_NET_IP4_ADDR_SIZE,
2245                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2246     }
2247 
2248     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2249 }
2250 
2251 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2252                                             struct ip6_header *ip6,
2253                                             const uint8_t *buf, size_t size)
2254 {
2255     uint16_t ip_len;
2256 
2257     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2258         != IP_HEADER_VERSION_6) {
2259         return RSC_BYPASS;
2260     }
2261 
2262     /* Both option and protocol is checked in this */
2263     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2264         chain->stat.bypass_not_tcp++;
2265         return RSC_BYPASS;
2266     }
2267 
2268     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2269     if (ip_len < sizeof(struct tcp_header) ||
2270         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2271                   - sizeof(struct ip6_header))) {
2272         chain->stat.ip_hacked++;
2273         return RSC_BYPASS;
2274     }
2275 
2276     /* Don't handle packets with ecn flag */
2277     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2278         chain->stat.ip_ecn++;
2279         return RSC_BYPASS;
2280     }
2281 
2282     return RSC_CANDIDATE;
2283 }
2284 
2285 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2286                                       const uint8_t *buf, size_t size)
2287 {
2288     int32_t ret;
2289     uint16_t hdr_len;
2290     VirtioNetRscChain *chain;
2291     VirtioNetRscUnit unit;
2292 
2293     chain = (VirtioNetRscChain *)opq;
2294     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2295 
2296     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2297         + sizeof(tcp_header))) {
2298         return virtio_net_do_receive(nc, buf, size);
2299     }
2300 
2301     virtio_net_rsc_extract_unit6(chain, buf, &unit);
2302     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2303                                                  unit.ip, buf, size)) {
2304         return virtio_net_do_receive(nc, buf, size);
2305     }
2306 
2307     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2308     if (ret == RSC_BYPASS) {
2309         return virtio_net_do_receive(nc, buf, size);
2310     } else if (ret == RSC_FINAL) {
2311         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2312                 ((hdr_len + sizeof(struct eth_header)) + 8),
2313                 VIRTIO_NET_IP6_ADDR_SIZE,
2314                 hdr_len + sizeof(struct eth_header)
2315                 + sizeof(struct ip6_header));
2316     }
2317 
2318     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2319 }
2320 
2321 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2322                                                       NetClientState *nc,
2323                                                       uint16_t proto)
2324 {
2325     VirtioNetRscChain *chain;
2326 
2327     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2328         return NULL;
2329     }
2330 
2331     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2332         if (chain->proto == proto) {
2333             return chain;
2334         }
2335     }
2336 
2337     chain = g_malloc(sizeof(*chain));
2338     chain->n = n;
2339     chain->proto = proto;
2340     if (proto == (uint16_t)ETH_P_IP) {
2341         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2342         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2343     } else {
2344         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2345         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2346     }
2347     chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2348                                       virtio_net_rsc_purge, chain);
2349     memset(&chain->stat, 0, sizeof(chain->stat));
2350 
2351     QTAILQ_INIT(&chain->buffers);
2352     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2353 
2354     return chain;
2355 }
2356 
2357 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2358                                       const uint8_t *buf,
2359                                       size_t size)
2360 {
2361     uint16_t proto;
2362     VirtioNetRscChain *chain;
2363     struct eth_header *eth;
2364     VirtIONet *n;
2365 
2366     n = qemu_get_nic_opaque(nc);
2367     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2368         return virtio_net_do_receive(nc, buf, size);
2369     }
2370 
2371     eth = (struct eth_header *)(buf + n->guest_hdr_len);
2372     proto = htons(eth->h_proto);
2373 
2374     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2375     if (chain) {
2376         chain->stat.received++;
2377         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2378             return virtio_net_rsc_receive4(chain, nc, buf, size);
2379         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2380             return virtio_net_rsc_receive6(chain, nc, buf, size);
2381         }
2382     }
2383     return virtio_net_do_receive(nc, buf, size);
2384 }
2385 
2386 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2387                                   size_t size)
2388 {
2389     VirtIONet *n = qemu_get_nic_opaque(nc);
2390     if ((n->rsc4_enabled || n->rsc6_enabled)) {
2391         return virtio_net_rsc_receive(nc, buf, size);
2392     } else {
2393         return virtio_net_do_receive(nc, buf, size);
2394     }
2395 }
2396 
2397 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2398 
2399 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2400 {
2401     VirtIONet *n = qemu_get_nic_opaque(nc);
2402     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2403     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2404 
2405     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2406     virtio_notify(vdev, q->tx_vq);
2407 
2408     g_free(q->async_tx.elem);
2409     q->async_tx.elem = NULL;
2410 
2411     virtio_queue_set_notification(q->tx_vq, 1);
2412     virtio_net_flush_tx(q);
2413 }
2414 
2415 /* TX */
2416 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2417 {
2418     VirtIONet *n = q->n;
2419     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2420     VirtQueueElement *elem;
2421     int32_t num_packets = 0;
2422     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2423     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2424         return num_packets;
2425     }
2426 
2427     if (q->async_tx.elem) {
2428         virtio_queue_set_notification(q->tx_vq, 0);
2429         return num_packets;
2430     }
2431 
2432     for (;;) {
2433         ssize_t ret;
2434         unsigned int out_num;
2435         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2436         struct virtio_net_hdr_mrg_rxbuf mhdr;
2437 
2438         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2439         if (!elem) {
2440             break;
2441         }
2442 
2443         out_num = elem->out_num;
2444         out_sg = elem->out_sg;
2445         if (out_num < 1) {
2446             virtio_error(vdev, "virtio-net header not in first element");
2447             virtqueue_detach_element(q->tx_vq, elem, 0);
2448             g_free(elem);
2449             return -EINVAL;
2450         }
2451 
2452         if (n->has_vnet_hdr) {
2453             if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2454                 n->guest_hdr_len) {
2455                 virtio_error(vdev, "virtio-net header incorrect");
2456                 virtqueue_detach_element(q->tx_vq, elem, 0);
2457                 g_free(elem);
2458                 return -EINVAL;
2459             }
2460             if (n->needs_vnet_hdr_swap) {
2461                 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2462                 sg2[0].iov_base = &mhdr;
2463                 sg2[0].iov_len = n->guest_hdr_len;
2464                 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2465                                    out_sg, out_num,
2466                                    n->guest_hdr_len, -1);
2467                 if (out_num == VIRTQUEUE_MAX_SIZE) {
2468                     goto drop;
2469                 }
2470                 out_num += 1;
2471                 out_sg = sg2;
2472             }
2473         }
2474         /*
2475          * If host wants to see the guest header as is, we can
2476          * pass it on unchanged. Otherwise, copy just the parts
2477          * that host is interested in.
2478          */
2479         assert(n->host_hdr_len <= n->guest_hdr_len);
2480         if (n->host_hdr_len != n->guest_hdr_len) {
2481             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2482                                        out_sg, out_num,
2483                                        0, n->host_hdr_len);
2484             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2485                              out_sg, out_num,
2486                              n->guest_hdr_len, -1);
2487             out_num = sg_num;
2488             out_sg = sg;
2489         }
2490 
2491         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2492                                       out_sg, out_num, virtio_net_tx_complete);
2493         if (ret == 0) {
2494             virtio_queue_set_notification(q->tx_vq, 0);
2495             q->async_tx.elem = elem;
2496             return -EBUSY;
2497         }
2498 
2499 drop:
2500         virtqueue_push(q->tx_vq, elem, 0);
2501         virtio_notify(vdev, q->tx_vq);
2502         g_free(elem);
2503 
2504         if (++num_packets >= n->tx_burst) {
2505             break;
2506         }
2507     }
2508     return num_packets;
2509 }
2510 
2511 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2512 {
2513     VirtIONet *n = VIRTIO_NET(vdev);
2514     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2515 
2516     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2517         virtio_net_drop_tx_queue_data(vdev, vq);
2518         return;
2519     }
2520 
2521     /* This happens when device was stopped but VCPU wasn't. */
2522     if (!vdev->vm_running) {
2523         q->tx_waiting = 1;
2524         return;
2525     }
2526 
2527     if (q->tx_waiting) {
2528         virtio_queue_set_notification(vq, 1);
2529         timer_del(q->tx_timer);
2530         q->tx_waiting = 0;
2531         if (virtio_net_flush_tx(q) == -EINVAL) {
2532             return;
2533         }
2534     } else {
2535         timer_mod(q->tx_timer,
2536                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2537         q->tx_waiting = 1;
2538         virtio_queue_set_notification(vq, 0);
2539     }
2540 }
2541 
2542 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2543 {
2544     VirtIONet *n = VIRTIO_NET(vdev);
2545     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2546 
2547     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2548         virtio_net_drop_tx_queue_data(vdev, vq);
2549         return;
2550     }
2551 
2552     if (unlikely(q->tx_waiting)) {
2553         return;
2554     }
2555     q->tx_waiting = 1;
2556     /* This happens when device was stopped but VCPU wasn't. */
2557     if (!vdev->vm_running) {
2558         return;
2559     }
2560     virtio_queue_set_notification(vq, 0);
2561     qemu_bh_schedule(q->tx_bh);
2562 }
2563 
2564 static void virtio_net_tx_timer(void *opaque)
2565 {
2566     VirtIONetQueue *q = opaque;
2567     VirtIONet *n = q->n;
2568     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2569     /* This happens when device was stopped but BH wasn't. */
2570     if (!vdev->vm_running) {
2571         /* Make sure tx waiting is set, so we'll run when restarted. */
2572         assert(q->tx_waiting);
2573         return;
2574     }
2575 
2576     q->tx_waiting = 0;
2577 
2578     /* Just in case the driver is not ready on more */
2579     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2580         return;
2581     }
2582 
2583     virtio_queue_set_notification(q->tx_vq, 1);
2584     virtio_net_flush_tx(q);
2585 }
2586 
2587 static void virtio_net_tx_bh(void *opaque)
2588 {
2589     VirtIONetQueue *q = opaque;
2590     VirtIONet *n = q->n;
2591     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2592     int32_t ret;
2593 
2594     /* This happens when device was stopped but BH wasn't. */
2595     if (!vdev->vm_running) {
2596         /* Make sure tx waiting is set, so we'll run when restarted. */
2597         assert(q->tx_waiting);
2598         return;
2599     }
2600 
2601     q->tx_waiting = 0;
2602 
2603     /* Just in case the driver is not ready on more */
2604     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2605         return;
2606     }
2607 
2608     ret = virtio_net_flush_tx(q);
2609     if (ret == -EBUSY || ret == -EINVAL) {
2610         return; /* Notification re-enable handled by tx_complete or device
2611                  * broken */
2612     }
2613 
2614     /* If we flush a full burst of packets, assume there are
2615      * more coming and immediately reschedule */
2616     if (ret >= n->tx_burst) {
2617         qemu_bh_schedule(q->tx_bh);
2618         q->tx_waiting = 1;
2619         return;
2620     }
2621 
2622     /* If less than a full burst, re-enable notification and flush
2623      * anything that may have come in while we weren't looking.  If
2624      * we find something, assume the guest is still active and reschedule */
2625     virtio_queue_set_notification(q->tx_vq, 1);
2626     ret = virtio_net_flush_tx(q);
2627     if (ret == -EINVAL) {
2628         return;
2629     } else if (ret > 0) {
2630         virtio_queue_set_notification(q->tx_vq, 0);
2631         qemu_bh_schedule(q->tx_bh);
2632         q->tx_waiting = 1;
2633     }
2634 }
2635 
2636 static void virtio_net_add_queue(VirtIONet *n, int index)
2637 {
2638     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2639 
2640     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2641                                            virtio_net_handle_rx);
2642 
2643     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2644         n->vqs[index].tx_vq =
2645             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2646                              virtio_net_handle_tx_timer);
2647         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2648                                               virtio_net_tx_timer,
2649                                               &n->vqs[index]);
2650     } else {
2651         n->vqs[index].tx_vq =
2652             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2653                              virtio_net_handle_tx_bh);
2654         n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2655     }
2656 
2657     n->vqs[index].tx_waiting = 0;
2658     n->vqs[index].n = n;
2659 }
2660 
2661 static void virtio_net_del_queue(VirtIONet *n, int index)
2662 {
2663     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2664     VirtIONetQueue *q = &n->vqs[index];
2665     NetClientState *nc = qemu_get_subqueue(n->nic, index);
2666 
2667     qemu_purge_queued_packets(nc);
2668 
2669     virtio_del_queue(vdev, index * 2);
2670     if (q->tx_timer) {
2671         timer_del(q->tx_timer);
2672         timer_free(q->tx_timer);
2673         q->tx_timer = NULL;
2674     } else {
2675         qemu_bh_delete(q->tx_bh);
2676         q->tx_bh = NULL;
2677     }
2678     q->tx_waiting = 0;
2679     virtio_del_queue(vdev, index * 2 + 1);
2680 }
2681 
2682 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2683 {
2684     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2685     int old_num_queues = virtio_get_num_queues(vdev);
2686     int new_num_queues = new_max_queues * 2 + 1;
2687     int i;
2688 
2689     assert(old_num_queues >= 3);
2690     assert(old_num_queues % 2 == 1);
2691 
2692     if (old_num_queues == new_num_queues) {
2693         return;
2694     }
2695 
2696     /*
2697      * We always need to remove and add ctrl vq if
2698      * old_num_queues != new_num_queues. Remove ctrl_vq first,
2699      * and then we only enter one of the following two loops.
2700      */
2701     virtio_del_queue(vdev, old_num_queues - 1);
2702 
2703     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2704         /* new_num_queues < old_num_queues */
2705         virtio_net_del_queue(n, i / 2);
2706     }
2707 
2708     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2709         /* new_num_queues > old_num_queues */
2710         virtio_net_add_queue(n, i / 2);
2711     }
2712 
2713     /* add ctrl_vq last */
2714     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2715 }
2716 
2717 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2718 {
2719     int max = multiqueue ? n->max_queues : 1;
2720 
2721     n->multiqueue = multiqueue;
2722     virtio_net_change_num_queues(n, max);
2723 
2724     virtio_net_set_queues(n);
2725 }
2726 
2727 static int virtio_net_post_load_device(void *opaque, int version_id)
2728 {
2729     VirtIONet *n = opaque;
2730     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2731     int i, link_down;
2732 
2733     trace_virtio_net_post_load_device();
2734     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2735                                virtio_vdev_has_feature(vdev,
2736                                                        VIRTIO_F_VERSION_1),
2737                                virtio_vdev_has_feature(vdev,
2738                                                        VIRTIO_NET_F_HASH_REPORT));
2739 
2740     /* MAC_TABLE_ENTRIES may be different from the saved image */
2741     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2742         n->mac_table.in_use = 0;
2743     }
2744 
2745     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2746         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2747     }
2748 
2749     /*
2750      * curr_guest_offloads will be later overwritten by the
2751      * virtio_set_features_nocheck call done from the virtio_load.
2752      * Here we make sure it is preserved and restored accordingly
2753      * in the virtio_net_post_load_virtio callback.
2754      */
2755     n->saved_guest_offloads = n->curr_guest_offloads;
2756 
2757     virtio_net_set_queues(n);
2758 
2759     /* Find the first multicast entry in the saved MAC filter */
2760     for (i = 0; i < n->mac_table.in_use; i++) {
2761         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2762             break;
2763         }
2764     }
2765     n->mac_table.first_multi = i;
2766 
2767     /* nc.link_down can't be migrated, so infer link_down according
2768      * to link status bit in n->status */
2769     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2770     for (i = 0; i < n->max_queues; i++) {
2771         qemu_get_subqueue(n->nic, i)->link_down = link_down;
2772     }
2773 
2774     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2775         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2776         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2777                                   QEMU_CLOCK_VIRTUAL,
2778                                   virtio_net_announce_timer, n);
2779         if (n->announce_timer.round) {
2780             timer_mod(n->announce_timer.tm,
2781                       qemu_clock_get_ms(n->announce_timer.type));
2782         } else {
2783             qemu_announce_timer_del(&n->announce_timer, false);
2784         }
2785     }
2786 
2787     if (n->rss_data.enabled) {
2788         trace_virtio_net_rss_enable(n->rss_data.hash_types,
2789                                     n->rss_data.indirections_len,
2790                                     sizeof(n->rss_data.key));
2791     } else {
2792         trace_virtio_net_rss_disable();
2793     }
2794     return 0;
2795 }
2796 
2797 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2798 {
2799     VirtIONet *n = VIRTIO_NET(vdev);
2800     /*
2801      * The actual needed state is now in saved_guest_offloads,
2802      * see virtio_net_post_load_device for detail.
2803      * Restore it back and apply the desired offloads.
2804      */
2805     n->curr_guest_offloads = n->saved_guest_offloads;
2806     if (peer_has_vnet_hdr(n)) {
2807         virtio_net_apply_guest_offloads(n);
2808     }
2809 
2810     return 0;
2811 }
2812 
2813 /* tx_waiting field of a VirtIONetQueue */
2814 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2815     .name = "virtio-net-queue-tx_waiting",
2816     .fields = (VMStateField[]) {
2817         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2818         VMSTATE_END_OF_LIST()
2819    },
2820 };
2821 
2822 static bool max_queues_gt_1(void *opaque, int version_id)
2823 {
2824     return VIRTIO_NET(opaque)->max_queues > 1;
2825 }
2826 
2827 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2828 {
2829     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2830                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2831 }
2832 
2833 static bool mac_table_fits(void *opaque, int version_id)
2834 {
2835     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2836 }
2837 
2838 static bool mac_table_doesnt_fit(void *opaque, int version_id)
2839 {
2840     return !mac_table_fits(opaque, version_id);
2841 }
2842 
2843 /* This temporary type is shared by all the WITH_TMP methods
2844  * although only some fields are used by each.
2845  */
2846 struct VirtIONetMigTmp {
2847     VirtIONet      *parent;
2848     VirtIONetQueue *vqs_1;
2849     uint16_t        curr_queues_1;
2850     uint8_t         has_ufo;
2851     uint32_t        has_vnet_hdr;
2852 };
2853 
2854 /* The 2nd and subsequent tx_waiting flags are loaded later than
2855  * the 1st entry in the queues and only if there's more than one
2856  * entry.  We use the tmp mechanism to calculate a temporary
2857  * pointer and count and also validate the count.
2858  */
2859 
2860 static int virtio_net_tx_waiting_pre_save(void *opaque)
2861 {
2862     struct VirtIONetMigTmp *tmp = opaque;
2863 
2864     tmp->vqs_1 = tmp->parent->vqs + 1;
2865     tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2866     if (tmp->parent->curr_queues == 0) {
2867         tmp->curr_queues_1 = 0;
2868     }
2869 
2870     return 0;
2871 }
2872 
2873 static int virtio_net_tx_waiting_pre_load(void *opaque)
2874 {
2875     struct VirtIONetMigTmp *tmp = opaque;
2876 
2877     /* Reuse the pointer setup from save */
2878     virtio_net_tx_waiting_pre_save(opaque);
2879 
2880     if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2881         error_report("virtio-net: curr_queues %x > max_queues %x",
2882             tmp->parent->curr_queues, tmp->parent->max_queues);
2883 
2884         return -EINVAL;
2885     }
2886 
2887     return 0; /* all good */
2888 }
2889 
2890 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2891     .name      = "virtio-net-tx_waiting",
2892     .pre_load  = virtio_net_tx_waiting_pre_load,
2893     .pre_save  = virtio_net_tx_waiting_pre_save,
2894     .fields    = (VMStateField[]) {
2895         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2896                                      curr_queues_1,
2897                                      vmstate_virtio_net_queue_tx_waiting,
2898                                      struct VirtIONetQueue),
2899         VMSTATE_END_OF_LIST()
2900     },
2901 };
2902 
2903 /* the 'has_ufo' flag is just tested; if the incoming stream has the
2904  * flag set we need to check that we have it
2905  */
2906 static int virtio_net_ufo_post_load(void *opaque, int version_id)
2907 {
2908     struct VirtIONetMigTmp *tmp = opaque;
2909 
2910     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
2911         error_report("virtio-net: saved image requires TUN_F_UFO support");
2912         return -EINVAL;
2913     }
2914 
2915     return 0;
2916 }
2917 
2918 static int virtio_net_ufo_pre_save(void *opaque)
2919 {
2920     struct VirtIONetMigTmp *tmp = opaque;
2921 
2922     tmp->has_ufo = tmp->parent->has_ufo;
2923 
2924     return 0;
2925 }
2926 
2927 static const VMStateDescription vmstate_virtio_net_has_ufo = {
2928     .name      = "virtio-net-ufo",
2929     .post_load = virtio_net_ufo_post_load,
2930     .pre_save  = virtio_net_ufo_pre_save,
2931     .fields    = (VMStateField[]) {
2932         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
2933         VMSTATE_END_OF_LIST()
2934     },
2935 };
2936 
2937 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2938  * flag set we need to check that we have it
2939  */
2940 static int virtio_net_vnet_post_load(void *opaque, int version_id)
2941 {
2942     struct VirtIONetMigTmp *tmp = opaque;
2943 
2944     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
2945         error_report("virtio-net: saved image requires vnet_hdr=on");
2946         return -EINVAL;
2947     }
2948 
2949     return 0;
2950 }
2951 
2952 static int virtio_net_vnet_pre_save(void *opaque)
2953 {
2954     struct VirtIONetMigTmp *tmp = opaque;
2955 
2956     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
2957 
2958     return 0;
2959 }
2960 
2961 static const VMStateDescription vmstate_virtio_net_has_vnet = {
2962     .name      = "virtio-net-vnet",
2963     .post_load = virtio_net_vnet_post_load,
2964     .pre_save  = virtio_net_vnet_pre_save,
2965     .fields    = (VMStateField[]) {
2966         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
2967         VMSTATE_END_OF_LIST()
2968     },
2969 };
2970 
2971 static bool virtio_net_rss_needed(void *opaque)
2972 {
2973     return VIRTIO_NET(opaque)->rss_data.enabled;
2974 }
2975 
2976 static const VMStateDescription vmstate_virtio_net_rss = {
2977     .name      = "virtio-net-device/rss",
2978     .version_id = 1,
2979     .minimum_version_id = 1,
2980     .needed = virtio_net_rss_needed,
2981     .fields = (VMStateField[]) {
2982         VMSTATE_BOOL(rss_data.enabled, VirtIONet),
2983         VMSTATE_BOOL(rss_data.redirect, VirtIONet),
2984         VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
2985         VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
2986         VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
2987         VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
2988         VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
2989                             VIRTIO_NET_RSS_MAX_KEY_SIZE),
2990         VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
2991                                     rss_data.indirections_len, 0,
2992                                     vmstate_info_uint16, uint16_t),
2993         VMSTATE_END_OF_LIST()
2994     },
2995 };
2996 
2997 static const VMStateDescription vmstate_virtio_net_device = {
2998     .name = "virtio-net-device",
2999     .version_id = VIRTIO_NET_VM_VERSION,
3000     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3001     .post_load = virtio_net_post_load_device,
3002     .fields = (VMStateField[]) {
3003         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3004         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3005                                vmstate_virtio_net_queue_tx_waiting,
3006                                VirtIONetQueue),
3007         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3008         VMSTATE_UINT16(status, VirtIONet),
3009         VMSTATE_UINT8(promisc, VirtIONet),
3010         VMSTATE_UINT8(allmulti, VirtIONet),
3011         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3012 
3013         /* Guarded pair: If it fits we load it, else we throw it away
3014          * - can happen if source has a larger MAC table.; post-load
3015          *  sets flags in this case.
3016          */
3017         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3018                                 0, mac_table_fits, mac_table.in_use,
3019                                  ETH_ALEN),
3020         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3021                                      mac_table.in_use, ETH_ALEN),
3022 
3023         /* Note: This is an array of uint32's that's always been saved as a
3024          * buffer; hold onto your endiannesses; it's actually used as a bitmap
3025          * but based on the uint.
3026          */
3027         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3028         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3029                          vmstate_virtio_net_has_vnet),
3030         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3031         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3032         VMSTATE_UINT8(alluni, VirtIONet),
3033         VMSTATE_UINT8(nomulti, VirtIONet),
3034         VMSTATE_UINT8(nouni, VirtIONet),
3035         VMSTATE_UINT8(nobcast, VirtIONet),
3036         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3037                          vmstate_virtio_net_has_ufo),
3038         VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
3039                             vmstate_info_uint16_equal, uint16_t),
3040         VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
3041         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3042                          vmstate_virtio_net_tx_waiting),
3043         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3044                             has_ctrl_guest_offloads),
3045         VMSTATE_END_OF_LIST()
3046    },
3047     .subsections = (const VMStateDescription * []) {
3048         &vmstate_virtio_net_rss,
3049         NULL
3050     }
3051 };
3052 
3053 static NetClientInfo net_virtio_info = {
3054     .type = NET_CLIENT_DRIVER_NIC,
3055     .size = sizeof(NICState),
3056     .can_receive = virtio_net_can_receive,
3057     .receive = virtio_net_receive,
3058     .link_status_changed = virtio_net_set_link_status,
3059     .query_rx_filter = virtio_net_query_rxfilter,
3060     .announce = virtio_net_announce,
3061 };
3062 
3063 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3064 {
3065     VirtIONet *n = VIRTIO_NET(vdev);
3066     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3067     assert(n->vhost_started);
3068     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3069 }
3070 
3071 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3072                                            bool mask)
3073 {
3074     VirtIONet *n = VIRTIO_NET(vdev);
3075     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3076     assert(n->vhost_started);
3077     vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3078                              vdev, idx, mask);
3079 }
3080 
3081 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3082 {
3083     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3084 
3085     n->config_size = virtio_feature_get_config_size(feature_sizes,
3086                                                     host_features);
3087 }
3088 
3089 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3090                                    const char *type)
3091 {
3092     /*
3093      * The name can be NULL, the netclient name will be type.x.
3094      */
3095     assert(type != NULL);
3096 
3097     g_free(n->netclient_name);
3098     g_free(n->netclient_type);
3099     n->netclient_name = g_strdup(name);
3100     n->netclient_type = g_strdup(type);
3101 }
3102 
3103 static bool failover_unplug_primary(VirtIONet *n)
3104 {
3105     HotplugHandler *hotplug_ctrl;
3106     PCIDevice *pci_dev;
3107     Error *err = NULL;
3108 
3109     hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
3110     if (hotplug_ctrl) {
3111         pci_dev = PCI_DEVICE(n->primary_dev);
3112         pci_dev->partially_hotplugged = true;
3113         hotplug_handler_unplug_request(hotplug_ctrl, n->primary_dev, &err);
3114         if (err) {
3115             error_report_err(err);
3116             return false;
3117         }
3118     } else {
3119         return false;
3120     }
3121     return true;
3122 }
3123 
3124 static bool failover_replug_primary(VirtIONet *n, Error **errp)
3125 {
3126     Error *err = NULL;
3127     HotplugHandler *hotplug_ctrl;
3128     PCIDevice *pdev = PCI_DEVICE(n->primary_dev);
3129 
3130     if (!pdev->partially_hotplugged) {
3131         return true;
3132     }
3133     if (!n->primary_device_opts) {
3134         n->primary_device_opts = qemu_opts_from_qdict(
3135                 qemu_find_opts("device"),
3136                 n->primary_device_dict, errp);
3137         if (!n->primary_device_opts) {
3138             return false;
3139         }
3140     }
3141     n->primary_bus = n->primary_dev->parent_bus;
3142     if (!n->primary_bus) {
3143         error_setg(errp, "virtio_net: couldn't find primary bus");
3144         return false;
3145     }
3146     qdev_set_parent_bus(n->primary_dev, n->primary_bus);
3147     n->primary_should_be_hidden = false;
3148     if (!qemu_opt_set_bool(n->primary_device_opts,
3149                            "partially_hotplugged", true, errp)) {
3150         return false;
3151     }
3152     hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
3153     if (hotplug_ctrl) {
3154         hotplug_handler_pre_plug(hotplug_ctrl, n->primary_dev, &err);
3155         if (err) {
3156             goto out;
3157         }
3158         hotplug_handler_plug(hotplug_ctrl, n->primary_dev, &err);
3159     }
3160 
3161 out:
3162     error_propagate(errp, err);
3163     return !err;
3164 }
3165 
3166 static void virtio_net_handle_migration_primary(VirtIONet *n,
3167                                                 MigrationState *s)
3168 {
3169     bool should_be_hidden;
3170     Error *err = NULL;
3171 
3172     should_be_hidden = atomic_read(&n->primary_should_be_hidden);
3173 
3174     if (!n->primary_dev) {
3175         n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err);
3176         if (!n->primary_dev) {
3177             return;
3178         }
3179     }
3180 
3181     if (migration_in_setup(s) && !should_be_hidden) {
3182         if (failover_unplug_primary(n)) {
3183             vmstate_unregister(VMSTATE_IF(n->primary_dev),
3184                     qdev_get_vmsd(n->primary_dev),
3185                     n->primary_dev);
3186             qapi_event_send_unplug_primary(n->primary_device_id);
3187             atomic_set(&n->primary_should_be_hidden, true);
3188         } else {
3189             warn_report("couldn't unplug primary device");
3190         }
3191     } else if (migration_has_failed(s)) {
3192         /* We already unplugged the device let's plug it back */
3193         if (!failover_replug_primary(n, &err)) {
3194             if (err) {
3195                 error_report_err(err);
3196             }
3197         }
3198     }
3199 }
3200 
3201 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3202 {
3203     MigrationState *s = data;
3204     VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3205     virtio_net_handle_migration_primary(n, s);
3206 }
3207 
3208 static int virtio_net_primary_should_be_hidden(DeviceListener *listener,
3209             QemuOpts *device_opts)
3210 {
3211     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3212     bool match_found = false;
3213     bool hide = false;
3214 
3215     if (!device_opts) {
3216         return -1;
3217     }
3218     n->primary_device_dict = qemu_opts_to_qdict(device_opts,
3219             n->primary_device_dict);
3220     if (n->primary_device_dict) {
3221         g_free(n->standby_id);
3222         n->standby_id = g_strdup(qdict_get_try_str(n->primary_device_dict,
3223                     "failover_pair_id"));
3224     }
3225     if (g_strcmp0(n->standby_id, n->netclient_name) == 0) {
3226         match_found = true;
3227     } else {
3228         match_found = false;
3229         hide = false;
3230         g_free(n->standby_id);
3231         n->primary_device_dict = NULL;
3232         goto out;
3233     }
3234 
3235     n->primary_device_opts = device_opts;
3236 
3237     /* primary_should_be_hidden is set during feature negotiation */
3238     hide = atomic_read(&n->primary_should_be_hidden);
3239 
3240     if (n->primary_device_dict) {
3241         g_free(n->primary_device_id);
3242         n->primary_device_id = g_strdup(qdict_get_try_str(
3243                     n->primary_device_dict, "id"));
3244         if (!n->primary_device_id) {
3245             warn_report("primary_device_id not set");
3246         }
3247     }
3248 
3249 out:
3250     if (match_found && hide) {
3251         return 1;
3252     } else if (match_found && !hide) {
3253         return 0;
3254     } else {
3255         return -1;
3256     }
3257 }
3258 
3259 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3260 {
3261     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3262     VirtIONet *n = VIRTIO_NET(dev);
3263     NetClientState *nc;
3264     int i;
3265 
3266     if (n->net_conf.mtu) {
3267         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3268     }
3269 
3270     if (n->net_conf.duplex_str) {
3271         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3272             n->net_conf.duplex = DUPLEX_HALF;
3273         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3274             n->net_conf.duplex = DUPLEX_FULL;
3275         } else {
3276             error_setg(errp, "'duplex' must be 'half' or 'full'");
3277             return;
3278         }
3279         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3280     } else {
3281         n->net_conf.duplex = DUPLEX_UNKNOWN;
3282     }
3283 
3284     if (n->net_conf.speed < SPEED_UNKNOWN) {
3285         error_setg(errp, "'speed' must be between 0 and INT_MAX");
3286         return;
3287     }
3288     if (n->net_conf.speed >= 0) {
3289         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3290     }
3291 
3292     if (n->failover) {
3293         n->primary_listener.should_be_hidden =
3294             virtio_net_primary_should_be_hidden;
3295         atomic_set(&n->primary_should_be_hidden, true);
3296         device_listener_register(&n->primary_listener);
3297         n->migration_state.notify = virtio_net_migration_state_notifier;
3298         add_migration_state_change_notifier(&n->migration_state);
3299         n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3300     }
3301 
3302     virtio_net_set_config_size(n, n->host_features);
3303     virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
3304 
3305     /*
3306      * We set a lower limit on RX queue size to what it always was.
3307      * Guests that want a smaller ring can always resize it without
3308      * help from us (using virtio 1 and up).
3309      */
3310     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3311         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3312         !is_power_of_2(n->net_conf.rx_queue_size)) {
3313         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3314                    "must be a power of 2 between %d and %d.",
3315                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3316                    VIRTQUEUE_MAX_SIZE);
3317         virtio_cleanup(vdev);
3318         return;
3319     }
3320 
3321     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3322         n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3323         !is_power_of_2(n->net_conf.tx_queue_size)) {
3324         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3325                    "must be a power of 2 between %d and %d",
3326                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3327                    VIRTQUEUE_MAX_SIZE);
3328         virtio_cleanup(vdev);
3329         return;
3330     }
3331 
3332     n->max_queues = MAX(n->nic_conf.peers.queues, 1);
3333     if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
3334         error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
3335                    "must be a positive integer less than %d.",
3336                    n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
3337         virtio_cleanup(vdev);
3338         return;
3339     }
3340     n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
3341     n->curr_queues = 1;
3342     n->tx_timeout = n->net_conf.txtimer;
3343 
3344     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3345                        && strcmp(n->net_conf.tx, "bh")) {
3346         warn_report("virtio-net: "
3347                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3348                     n->net_conf.tx);
3349         error_printf("Defaulting to \"bh\"");
3350     }
3351 
3352     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3353                                     n->net_conf.tx_queue_size);
3354 
3355     for (i = 0; i < n->max_queues; i++) {
3356         virtio_net_add_queue(n, i);
3357     }
3358 
3359     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3360     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3361     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3362     n->status = VIRTIO_NET_S_LINK_UP;
3363     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3364                               QEMU_CLOCK_VIRTUAL,
3365                               virtio_net_announce_timer, n);
3366     n->announce_timer.round = 0;
3367 
3368     if (n->netclient_type) {
3369         /*
3370          * Happen when virtio_net_set_netclient_name has been called.
3371          */
3372         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3373                               n->netclient_type, n->netclient_name, n);
3374     } else {
3375         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3376                               object_get_typename(OBJECT(dev)), dev->id, n);
3377     }
3378 
3379     peer_test_vnet_hdr(n);
3380     if (peer_has_vnet_hdr(n)) {
3381         for (i = 0; i < n->max_queues; i++) {
3382             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3383         }
3384         n->host_hdr_len = sizeof(struct virtio_net_hdr);
3385     } else {
3386         n->host_hdr_len = 0;
3387     }
3388 
3389     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3390 
3391     n->vqs[0].tx_waiting = 0;
3392     n->tx_burst = n->net_conf.txburst;
3393     virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3394     n->promisc = 1; /* for compatibility */
3395 
3396     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3397 
3398     n->vlans = g_malloc0(MAX_VLAN >> 3);
3399 
3400     nc = qemu_get_queue(n->nic);
3401     nc->rxfilter_notify_enabled = 1;
3402 
3403     QTAILQ_INIT(&n->rsc_chains);
3404     n->qdev = dev;
3405 
3406     net_rx_pkt_init(&n->rx_pkt, false);
3407 }
3408 
3409 static void virtio_net_device_unrealize(DeviceState *dev)
3410 {
3411     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3412     VirtIONet *n = VIRTIO_NET(dev);
3413     int i, max_queues;
3414 
3415     /* This will stop vhost backend if appropriate. */
3416     virtio_net_set_status(vdev, 0);
3417 
3418     g_free(n->netclient_name);
3419     n->netclient_name = NULL;
3420     g_free(n->netclient_type);
3421     n->netclient_type = NULL;
3422 
3423     g_free(n->mac_table.macs);
3424     g_free(n->vlans);
3425 
3426     if (n->failover) {
3427         device_listener_unregister(&n->primary_listener);
3428         g_free(n->primary_device_id);
3429         g_free(n->standby_id);
3430         qobject_unref(n->primary_device_dict);
3431         n->primary_device_dict = NULL;
3432     }
3433 
3434     max_queues = n->multiqueue ? n->max_queues : 1;
3435     for (i = 0; i < max_queues; i++) {
3436         virtio_net_del_queue(n, i);
3437     }
3438     /* delete also control vq */
3439     virtio_del_queue(vdev, max_queues * 2);
3440     qemu_announce_timer_del(&n->announce_timer, false);
3441     g_free(n->vqs);
3442     qemu_del_nic(n->nic);
3443     virtio_net_rsc_cleanup(n);
3444     g_free(n->rss_data.indirections_table);
3445     net_rx_pkt_uninit(n->rx_pkt);
3446     virtio_cleanup(vdev);
3447 }
3448 
3449 static void virtio_net_instance_init(Object *obj)
3450 {
3451     VirtIONet *n = VIRTIO_NET(obj);
3452 
3453     /*
3454      * The default config_size is sizeof(struct virtio_net_config).
3455      * Can be overriden with virtio_net_set_config_size.
3456      */
3457     n->config_size = sizeof(struct virtio_net_config);
3458     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3459                                   "bootindex", "/ethernet-phy@0",
3460                                   DEVICE(n));
3461 }
3462 
3463 static int virtio_net_pre_save(void *opaque)
3464 {
3465     VirtIONet *n = opaque;
3466 
3467     /* At this point, backend must be stopped, otherwise
3468      * it might keep writing to memory. */
3469     assert(!n->vhost_started);
3470 
3471     return 0;
3472 }
3473 
3474 static bool primary_unplug_pending(void *opaque)
3475 {
3476     DeviceState *dev = opaque;
3477     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3478     VirtIONet *n = VIRTIO_NET(vdev);
3479 
3480     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3481         return false;
3482     }
3483     return n->primary_dev ? n->primary_dev->pending_deleted_event : false;
3484 }
3485 
3486 static bool dev_unplug_pending(void *opaque)
3487 {
3488     DeviceState *dev = opaque;
3489     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3490 
3491     return vdc->primary_unplug_pending(dev);
3492 }
3493 
3494 static const VMStateDescription vmstate_virtio_net = {
3495     .name = "virtio-net",
3496     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3497     .version_id = VIRTIO_NET_VM_VERSION,
3498     .fields = (VMStateField[]) {
3499         VMSTATE_VIRTIO_DEVICE,
3500         VMSTATE_END_OF_LIST()
3501     },
3502     .pre_save = virtio_net_pre_save,
3503     .dev_unplug_pending = dev_unplug_pending,
3504 };
3505 
3506 static Property virtio_net_properties[] = {
3507     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3508                     VIRTIO_NET_F_CSUM, true),
3509     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3510                     VIRTIO_NET_F_GUEST_CSUM, true),
3511     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3512     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3513                     VIRTIO_NET_F_GUEST_TSO4, true),
3514     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3515                     VIRTIO_NET_F_GUEST_TSO6, true),
3516     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3517                     VIRTIO_NET_F_GUEST_ECN, true),
3518     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3519                     VIRTIO_NET_F_GUEST_UFO, true),
3520     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3521                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3522     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3523                     VIRTIO_NET_F_HOST_TSO4, true),
3524     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3525                     VIRTIO_NET_F_HOST_TSO6, true),
3526     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3527                     VIRTIO_NET_F_HOST_ECN, true),
3528     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3529                     VIRTIO_NET_F_HOST_UFO, true),
3530     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3531                     VIRTIO_NET_F_MRG_RXBUF, true),
3532     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3533                     VIRTIO_NET_F_STATUS, true),
3534     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3535                     VIRTIO_NET_F_CTRL_VQ, true),
3536     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3537                     VIRTIO_NET_F_CTRL_RX, true),
3538     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3539                     VIRTIO_NET_F_CTRL_VLAN, true),
3540     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3541                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3542     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3543                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3544     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3545                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3546     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3547     DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3548                     VIRTIO_NET_F_RSS, false),
3549     DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3550                     VIRTIO_NET_F_HASH_REPORT, false),
3551     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3552                     VIRTIO_NET_F_RSC_EXT, false),
3553     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3554                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3555     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3556     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3557                        TX_TIMER_INTERVAL),
3558     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3559     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3560     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3561                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3562     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3563                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3564     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3565     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3566                      true),
3567     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3568     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3569     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3570     DEFINE_PROP_END_OF_LIST(),
3571 };
3572 
3573 static void virtio_net_class_init(ObjectClass *klass, void *data)
3574 {
3575     DeviceClass *dc = DEVICE_CLASS(klass);
3576     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3577 
3578     device_class_set_props(dc, virtio_net_properties);
3579     dc->vmsd = &vmstate_virtio_net;
3580     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3581     vdc->realize = virtio_net_device_realize;
3582     vdc->unrealize = virtio_net_device_unrealize;
3583     vdc->get_config = virtio_net_get_config;
3584     vdc->set_config = virtio_net_set_config;
3585     vdc->get_features = virtio_net_get_features;
3586     vdc->set_features = virtio_net_set_features;
3587     vdc->bad_features = virtio_net_bad_features;
3588     vdc->reset = virtio_net_reset;
3589     vdc->set_status = virtio_net_set_status;
3590     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3591     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3592     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3593     vdc->post_load = virtio_net_post_load_virtio;
3594     vdc->vmsd = &vmstate_virtio_net_device;
3595     vdc->primary_unplug_pending = primary_unplug_pending;
3596 }
3597 
3598 static const TypeInfo virtio_net_info = {
3599     .name = TYPE_VIRTIO_NET,
3600     .parent = TYPE_VIRTIO_DEVICE,
3601     .instance_size = sizeof(VirtIONet),
3602     .instance_init = virtio_net_instance_init,
3603     .class_init = virtio_net_class_init,
3604 };
3605 
3606 static void virtio_register_types(void)
3607 {
3608     type_register_static(&virtio_net_info);
3609 }
3610 
3611 type_init(virtio_register_types)
3612