xref: /openbmc/qemu/hw/net/virtio-net.c (revision 3abad4a221e050d43fa8540677b285057642baaf)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/main-loop.h"
18 #include "qemu/module.h"
19 #include "hw/virtio/virtio.h"
20 #include "net/net.h"
21 #include "net/checksum.h"
22 #include "net/tap.h"
23 #include "qemu/error-report.h"
24 #include "qemu/timer.h"
25 #include "qemu/option.h"
26 #include "qemu/option_int.h"
27 #include "qemu/config-file.h"
28 #include "qapi/qmp/qdict.h"
29 #include "hw/virtio/virtio-net.h"
30 #include "net/vhost_net.h"
31 #include "net/announce.h"
32 #include "hw/virtio/virtio-bus.h"
33 #include "qapi/error.h"
34 #include "qapi/qapi-events-net.h"
35 #include "hw/qdev-properties.h"
36 #include "qapi/qapi-types-migration.h"
37 #include "qapi/qapi-events-migration.h"
38 #include "hw/virtio/virtio-access.h"
39 #include "migration/misc.h"
40 #include "standard-headers/linux/ethtool.h"
41 #include "sysemu/sysemu.h"
42 #include "trace.h"
43 #include "monitor/qdev.h"
44 #include "hw/pci/pci.h"
45 #include "net_rx_pkt.h"
46 #include "hw/virtio/vhost.h"
47 
48 #define VIRTIO_NET_VM_VERSION    11
49 
50 #define MAC_TABLE_ENTRIES    64
51 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
52 
53 /* previously fixed value */
54 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
55 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
56 
57 /* for now, only allow larger queues; with virtio-1, guest can downsize */
58 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
59 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
60 
61 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
62 
63 #define VIRTIO_NET_TCP_FLAG         0x3F
64 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
65 
66 /* IPv4 max payload, 16 bits in the header */
67 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
68 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
69 
70 /* header length value in ip header without option */
71 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
72 
73 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
74 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
75 
76 /* Purge coalesced packets timer interval, This value affects the performance
77    a lot, and should be tuned carefully, '300000'(300us) is the recommended
78    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
79    tso/gso/gro 'off'. */
80 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
81 
82 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
83                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
84                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
85                                          VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
86                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
87                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
88                                          VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
89                                          VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
90                                          VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
91 
92 static VirtIOFeature feature_sizes[] = {
93     {.flags = 1ULL << VIRTIO_NET_F_MAC,
94      .end = endof(struct virtio_net_config, mac)},
95     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
96      .end = endof(struct virtio_net_config, status)},
97     {.flags = 1ULL << VIRTIO_NET_F_MQ,
98      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
99     {.flags = 1ULL << VIRTIO_NET_F_MTU,
100      .end = endof(struct virtio_net_config, mtu)},
101     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
102      .end = endof(struct virtio_net_config, duplex)},
103     {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
104      .end = endof(struct virtio_net_config, supported_hash_types)},
105     {}
106 };
107 
108 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
109 {
110     VirtIONet *n = qemu_get_nic_opaque(nc);
111 
112     return &n->vqs[nc->queue_index];
113 }
114 
115 static int vq2q(int queue_index)
116 {
117     return queue_index / 2;
118 }
119 
120 /* TODO
121  * - we could suppress RX interrupt if we were so inclined.
122  */
123 
124 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
125 {
126     VirtIONet *n = VIRTIO_NET(vdev);
127     struct virtio_net_config netcfg;
128     NetClientState *nc = qemu_get_queue(n->nic);
129 
130     int ret = 0;
131     memset(&netcfg, 0 , sizeof(struct virtio_net_config));
132     virtio_stw_p(vdev, &netcfg.status, n->status);
133     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
134     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
135     memcpy(netcfg.mac, n->mac, ETH_ALEN);
136     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
137     netcfg.duplex = n->net_conf.duplex;
138     netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
139     virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
140                  virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
141                  VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
142     virtio_stl_p(vdev, &netcfg.supported_hash_types,
143                  VIRTIO_NET_RSS_SUPPORTED_HASHES);
144     memcpy(config, &netcfg, n->config_size);
145 
146     /*
147      * Is this VDPA? No peer means not VDPA: there's no way to
148      * disconnect/reconnect a VDPA peer.
149      */
150     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
151         ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
152                                    n->config_size);
153         if (ret != -1) {
154             memcpy(config, &netcfg, n->config_size);
155         }
156     }
157 }
158 
159 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
160 {
161     VirtIONet *n = VIRTIO_NET(vdev);
162     struct virtio_net_config netcfg = {};
163     NetClientState *nc = qemu_get_queue(n->nic);
164 
165     memcpy(&netcfg, config, n->config_size);
166 
167     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
168         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
169         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
170         memcpy(n->mac, netcfg.mac, ETH_ALEN);
171         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
172     }
173 
174     /*
175      * Is this VDPA? No peer means not VDPA: there's no way to
176      * disconnect/reconnect a VDPA peer.
177      */
178     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
179         vhost_net_set_config(get_vhost_net(nc->peer),
180                              (uint8_t *)&netcfg, 0, n->config_size,
181                              VHOST_SET_CONFIG_TYPE_MASTER);
182       }
183 }
184 
185 static bool virtio_net_started(VirtIONet *n, uint8_t status)
186 {
187     VirtIODevice *vdev = VIRTIO_DEVICE(n);
188     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
189         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
190 }
191 
192 static void virtio_net_announce_notify(VirtIONet *net)
193 {
194     VirtIODevice *vdev = VIRTIO_DEVICE(net);
195     trace_virtio_net_announce_notify();
196 
197     net->status |= VIRTIO_NET_S_ANNOUNCE;
198     virtio_notify_config(vdev);
199 }
200 
201 static void virtio_net_announce_timer(void *opaque)
202 {
203     VirtIONet *n = opaque;
204     trace_virtio_net_announce_timer(n->announce_timer.round);
205 
206     n->announce_timer.round--;
207     virtio_net_announce_notify(n);
208 }
209 
210 static void virtio_net_announce(NetClientState *nc)
211 {
212     VirtIONet *n = qemu_get_nic_opaque(nc);
213     VirtIODevice *vdev = VIRTIO_DEVICE(n);
214 
215     /*
216      * Make sure the virtio migration announcement timer isn't running
217      * If it is, let it trigger announcement so that we do not cause
218      * confusion.
219      */
220     if (n->announce_timer.round) {
221         return;
222     }
223 
224     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
225         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
226             virtio_net_announce_notify(n);
227     }
228 }
229 
230 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
231 {
232     VirtIODevice *vdev = VIRTIO_DEVICE(n);
233     NetClientState *nc = qemu_get_queue(n->nic);
234     int queues = n->multiqueue ? n->max_queues : 1;
235 
236     if (!get_vhost_net(nc->peer)) {
237         return;
238     }
239 
240     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
241         !!n->vhost_started) {
242         return;
243     }
244     if (!n->vhost_started) {
245         int r, i;
246 
247         if (n->needs_vnet_hdr_swap) {
248             error_report("backend does not support %s vnet headers; "
249                          "falling back on userspace virtio",
250                          virtio_is_big_endian(vdev) ? "BE" : "LE");
251             return;
252         }
253 
254         /* Any packets outstanding? Purge them to avoid touching rings
255          * when vhost is running.
256          */
257         for (i = 0;  i < queues; i++) {
258             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
259 
260             /* Purge both directions: TX and RX. */
261             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
262             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
263         }
264 
265         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
266             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
267             if (r < 0) {
268                 error_report("%uBytes MTU not supported by the backend",
269                              n->net_conf.mtu);
270 
271                 return;
272             }
273         }
274 
275         n->vhost_started = 1;
276         r = vhost_net_start(vdev, n->nic->ncs, queues);
277         if (r < 0) {
278             error_report("unable to start vhost net: %d: "
279                          "falling back on userspace virtio", -r);
280             n->vhost_started = 0;
281         }
282     } else {
283         vhost_net_stop(vdev, n->nic->ncs, queues);
284         n->vhost_started = 0;
285     }
286 }
287 
288 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
289                                           NetClientState *peer,
290                                           bool enable)
291 {
292     if (virtio_is_big_endian(vdev)) {
293         return qemu_set_vnet_be(peer, enable);
294     } else {
295         return qemu_set_vnet_le(peer, enable);
296     }
297 }
298 
299 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
300                                        int queues, bool enable)
301 {
302     int i;
303 
304     for (i = 0; i < queues; i++) {
305         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
306             enable) {
307             while (--i >= 0) {
308                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
309             }
310 
311             return true;
312         }
313     }
314 
315     return false;
316 }
317 
318 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
319 {
320     VirtIODevice *vdev = VIRTIO_DEVICE(n);
321     int queues = n->multiqueue ? n->max_queues : 1;
322 
323     if (virtio_net_started(n, status)) {
324         /* Before using the device, we tell the network backend about the
325          * endianness to use when parsing vnet headers. If the backend
326          * can't do it, we fallback onto fixing the headers in the core
327          * virtio-net code.
328          */
329         n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
330                                                             queues, true);
331     } else if (virtio_net_started(n, vdev->status)) {
332         /* After using the device, we need to reset the network backend to
333          * the default (guest native endianness), otherwise the guest may
334          * lose network connectivity if it is rebooted into a different
335          * endianness.
336          */
337         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
338     }
339 }
340 
341 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
342 {
343     unsigned int dropped = virtqueue_drop_all(vq);
344     if (dropped) {
345         virtio_notify(vdev, vq);
346     }
347 }
348 
349 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
350 {
351     VirtIONet *n = VIRTIO_NET(vdev);
352     VirtIONetQueue *q;
353     int i;
354     uint8_t queue_status;
355 
356     virtio_net_vnet_endian_status(n, status);
357     virtio_net_vhost_status(n, status);
358 
359     for (i = 0; i < n->max_queues; i++) {
360         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
361         bool queue_started;
362         q = &n->vqs[i];
363 
364         if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
365             queue_status = 0;
366         } else {
367             queue_status = status;
368         }
369         queue_started =
370             virtio_net_started(n, queue_status) && !n->vhost_started;
371 
372         if (queue_started) {
373             qemu_flush_queued_packets(ncs);
374         }
375 
376         if (!q->tx_waiting) {
377             continue;
378         }
379 
380         if (queue_started) {
381             if (q->tx_timer) {
382                 timer_mod(q->tx_timer,
383                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
384             } else {
385                 qemu_bh_schedule(q->tx_bh);
386             }
387         } else {
388             if (q->tx_timer) {
389                 timer_del(q->tx_timer);
390             } else {
391                 qemu_bh_cancel(q->tx_bh);
392             }
393             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
394                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
395                 vdev->vm_running) {
396                 /* if tx is waiting we are likely have some packets in tx queue
397                  * and disabled notification */
398                 q->tx_waiting = 0;
399                 virtio_queue_set_notification(q->tx_vq, 1);
400                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
401             }
402         }
403     }
404 }
405 
406 static void virtio_net_set_link_status(NetClientState *nc)
407 {
408     VirtIONet *n = qemu_get_nic_opaque(nc);
409     VirtIODevice *vdev = VIRTIO_DEVICE(n);
410     uint16_t old_status = n->status;
411 
412     if (nc->link_down)
413         n->status &= ~VIRTIO_NET_S_LINK_UP;
414     else
415         n->status |= VIRTIO_NET_S_LINK_UP;
416 
417     if (n->status != old_status)
418         virtio_notify_config(vdev);
419 
420     virtio_net_set_status(vdev, vdev->status);
421 }
422 
423 static void rxfilter_notify(NetClientState *nc)
424 {
425     VirtIONet *n = qemu_get_nic_opaque(nc);
426 
427     if (nc->rxfilter_notify_enabled) {
428         char *path = object_get_canonical_path(OBJECT(n->qdev));
429         qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
430                                               n->netclient_name, path);
431         g_free(path);
432 
433         /* disable event notification to avoid events flooding */
434         nc->rxfilter_notify_enabled = 0;
435     }
436 }
437 
438 static intList *get_vlan_table(VirtIONet *n)
439 {
440     intList *list, *entry;
441     int i, j;
442 
443     list = NULL;
444     for (i = 0; i < MAX_VLAN >> 5; i++) {
445         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
446             if (n->vlans[i] & (1U << j)) {
447                 entry = g_malloc0(sizeof(*entry));
448                 entry->value = (i << 5) + j;
449                 entry->next = list;
450                 list = entry;
451             }
452         }
453     }
454 
455     return list;
456 }
457 
458 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
459 {
460     VirtIONet *n = qemu_get_nic_opaque(nc);
461     VirtIODevice *vdev = VIRTIO_DEVICE(n);
462     RxFilterInfo *info;
463     strList *str_list, *entry;
464     int i;
465 
466     info = g_malloc0(sizeof(*info));
467     info->name = g_strdup(nc->name);
468     info->promiscuous = n->promisc;
469 
470     if (n->nouni) {
471         info->unicast = RX_STATE_NONE;
472     } else if (n->alluni) {
473         info->unicast = RX_STATE_ALL;
474     } else {
475         info->unicast = RX_STATE_NORMAL;
476     }
477 
478     if (n->nomulti) {
479         info->multicast = RX_STATE_NONE;
480     } else if (n->allmulti) {
481         info->multicast = RX_STATE_ALL;
482     } else {
483         info->multicast = RX_STATE_NORMAL;
484     }
485 
486     info->broadcast_allowed = n->nobcast;
487     info->multicast_overflow = n->mac_table.multi_overflow;
488     info->unicast_overflow = n->mac_table.uni_overflow;
489 
490     info->main_mac = qemu_mac_strdup_printf(n->mac);
491 
492     str_list = NULL;
493     for (i = 0; i < n->mac_table.first_multi; i++) {
494         entry = g_malloc0(sizeof(*entry));
495         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
496         entry->next = str_list;
497         str_list = entry;
498     }
499     info->unicast_table = str_list;
500 
501     str_list = NULL;
502     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
503         entry = g_malloc0(sizeof(*entry));
504         entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
505         entry->next = str_list;
506         str_list = entry;
507     }
508     info->multicast_table = str_list;
509     info->vlan_table = get_vlan_table(n);
510 
511     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
512         info->vlan = RX_STATE_ALL;
513     } else if (!info->vlan_table) {
514         info->vlan = RX_STATE_NONE;
515     } else {
516         info->vlan = RX_STATE_NORMAL;
517     }
518 
519     /* enable event notification after query */
520     nc->rxfilter_notify_enabled = 1;
521 
522     return info;
523 }
524 
525 static void virtio_net_reset(VirtIODevice *vdev)
526 {
527     VirtIONet *n = VIRTIO_NET(vdev);
528     int i;
529 
530     /* Reset back to compatibility mode */
531     n->promisc = 1;
532     n->allmulti = 0;
533     n->alluni = 0;
534     n->nomulti = 0;
535     n->nouni = 0;
536     n->nobcast = 0;
537     /* multiqueue is disabled by default */
538     n->curr_queues = 1;
539     timer_del(n->announce_timer.tm);
540     n->announce_timer.round = 0;
541     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
542 
543     /* Flush any MAC and VLAN filter table state */
544     n->mac_table.in_use = 0;
545     n->mac_table.first_multi = 0;
546     n->mac_table.multi_overflow = 0;
547     n->mac_table.uni_overflow = 0;
548     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
549     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
550     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
551     memset(n->vlans, 0, MAX_VLAN >> 3);
552 
553     /* Flush any async TX */
554     for (i = 0;  i < n->max_queues; i++) {
555         NetClientState *nc = qemu_get_subqueue(n->nic, i);
556 
557         if (nc->peer) {
558             qemu_flush_or_purge_queued_packets(nc->peer, true);
559             assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
560         }
561     }
562 }
563 
564 static void peer_test_vnet_hdr(VirtIONet *n)
565 {
566     NetClientState *nc = qemu_get_queue(n->nic);
567     if (!nc->peer) {
568         return;
569     }
570 
571     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
572 }
573 
574 static int peer_has_vnet_hdr(VirtIONet *n)
575 {
576     return n->has_vnet_hdr;
577 }
578 
579 static int peer_has_ufo(VirtIONet *n)
580 {
581     if (!peer_has_vnet_hdr(n))
582         return 0;
583 
584     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
585 
586     return n->has_ufo;
587 }
588 
589 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
590                                        int version_1, int hash_report)
591 {
592     int i;
593     NetClientState *nc;
594 
595     n->mergeable_rx_bufs = mergeable_rx_bufs;
596 
597     if (version_1) {
598         n->guest_hdr_len = hash_report ?
599             sizeof(struct virtio_net_hdr_v1_hash) :
600             sizeof(struct virtio_net_hdr_mrg_rxbuf);
601         n->rss_data.populate_hash = !!hash_report;
602     } else {
603         n->guest_hdr_len = n->mergeable_rx_bufs ?
604             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
605             sizeof(struct virtio_net_hdr);
606     }
607 
608     for (i = 0; i < n->max_queues; i++) {
609         nc = qemu_get_subqueue(n->nic, i);
610 
611         if (peer_has_vnet_hdr(n) &&
612             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
613             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
614             n->host_hdr_len = n->guest_hdr_len;
615         }
616     }
617 }
618 
619 static int virtio_net_max_tx_queue_size(VirtIONet *n)
620 {
621     NetClientState *peer = n->nic_conf.peers.ncs[0];
622 
623     /*
624      * Backends other than vhost-user don't support max queue size.
625      */
626     if (!peer) {
627         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
628     }
629 
630     if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
631         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
632     }
633 
634     return VIRTQUEUE_MAX_SIZE;
635 }
636 
637 static int peer_attach(VirtIONet *n, int index)
638 {
639     NetClientState *nc = qemu_get_subqueue(n->nic, index);
640 
641     if (!nc->peer) {
642         return 0;
643     }
644 
645     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
646         vhost_set_vring_enable(nc->peer, 1);
647     }
648 
649     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
650         return 0;
651     }
652 
653     if (n->max_queues == 1) {
654         return 0;
655     }
656 
657     return tap_enable(nc->peer);
658 }
659 
660 static int peer_detach(VirtIONet *n, int index)
661 {
662     NetClientState *nc = qemu_get_subqueue(n->nic, index);
663 
664     if (!nc->peer) {
665         return 0;
666     }
667 
668     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
669         vhost_set_vring_enable(nc->peer, 0);
670     }
671 
672     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
673         return 0;
674     }
675 
676     return tap_disable(nc->peer);
677 }
678 
679 static void virtio_net_set_queues(VirtIONet *n)
680 {
681     int i;
682     int r;
683 
684     if (n->nic->peer_deleted) {
685         return;
686     }
687 
688     for (i = 0; i < n->max_queues; i++) {
689         if (i < n->curr_queues) {
690             r = peer_attach(n, i);
691             assert(!r);
692         } else {
693             r = peer_detach(n, i);
694             assert(!r);
695         }
696     }
697 }
698 
699 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
700 
701 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
702                                         Error **errp)
703 {
704     VirtIONet *n = VIRTIO_NET(vdev);
705     NetClientState *nc = qemu_get_queue(n->nic);
706 
707     /* Firstly sync all virtio-net possible supported features */
708     features |= n->host_features;
709 
710     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
711 
712     if (!peer_has_vnet_hdr(n)) {
713         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
714         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
715         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
716         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
717 
718         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
719         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
720         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
721         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
722 
723         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
724     }
725 
726     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
727         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
728         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
729     }
730 
731     if (!get_vhost_net(nc->peer)) {
732         return features;
733     }
734 
735     virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
736     virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
737     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
738     vdev->backend_features = features;
739 
740     if (n->mtu_bypass_backend &&
741             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
742         features |= (1ULL << VIRTIO_NET_F_MTU);
743     }
744 
745     return features;
746 }
747 
748 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
749 {
750     uint64_t features = 0;
751 
752     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
753      * but also these: */
754     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
755     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
756     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
757     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
758     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
759 
760     return features;
761 }
762 
763 static void virtio_net_apply_guest_offloads(VirtIONet *n)
764 {
765     qemu_set_offload(qemu_get_queue(n->nic)->peer,
766             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
767             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
768             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
769             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
770             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
771 }
772 
773 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
774 {
775     static const uint64_t guest_offloads_mask =
776         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
777         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
778         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
779         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
780         (1ULL << VIRTIO_NET_F_GUEST_UFO);
781 
782     return guest_offloads_mask & features;
783 }
784 
785 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
786 {
787     VirtIODevice *vdev = VIRTIO_DEVICE(n);
788     return virtio_net_guest_offloads_by_features(vdev->guest_features);
789 }
790 
791 typedef struct {
792     VirtIONet *n;
793     char *id;
794 } FailoverId;
795 
796 /**
797  * Set the id of the failover primary device
798  *
799  * @opaque: FailoverId to setup
800  * @opts: opts for device we are handling
801  * @errp: returns an error if this function fails
802  */
803 static int failover_set_primary(void *opaque, QemuOpts *opts, Error **errp)
804 {
805     FailoverId *fid = opaque;
806     const char *standby_id = qemu_opt_get(opts, "failover_pair_id");
807 
808     if (g_strcmp0(standby_id, fid->n->netclient_name) == 0) {
809         fid->id = g_strdup(opts->id);
810         return 1;
811     }
812 
813     return 0;
814 }
815 
816 /**
817  * Find the primary device id for this failover virtio-net
818  *
819  * @n: VirtIONet device
820  * @errp: returns an error if this function fails
821  */
822 static char *failover_find_primary_device_id(VirtIONet *n)
823 {
824     Error *err = NULL;
825     FailoverId fid;
826 
827     fid.n = n;
828     if (!qemu_opts_foreach(qemu_find_opts("device"),
829                            failover_set_primary, &fid, &err)) {
830         return NULL;
831     }
832     return fid.id;
833 }
834 
835 static void failover_add_primary(VirtIONet *n, Error **errp)
836 {
837     Error *err = NULL;
838     QemuOpts *opts;
839     char *id;
840 
841     if (n->primary_dev) {
842         return;
843     }
844 
845     id = failover_find_primary_device_id(n);
846     if (!id) {
847         return;
848     }
849     opts = qemu_opts_find(qemu_find_opts("device"), id);
850     if (opts) {
851         n->primary_dev = qdev_device_add(opts, &err);
852         if (err) {
853             qemu_opts_del(opts);
854         }
855     } else {
856         error_setg(errp, "Primary device not found");
857         error_append_hint(errp, "Virtio-net failover will not work. Make "
858                           "sure primary device has parameter"
859                           " failover_pair_id=<virtio-net-id>\n");
860     }
861     error_propagate(errp, err);
862 }
863 
864 /**
865  * Find the primary device for this failover virtio-net
866  *
867  * @n: VirtIONet device
868  * @errp: returns an error if this function fails
869  */
870 static DeviceState *failover_find_primary_device(VirtIONet *n)
871 {
872     char *id = failover_find_primary_device_id(n);
873 
874     if (!id) {
875         return NULL;
876     }
877 
878     return qdev_find_recursive(sysbus_get_default(), id);
879 }
880 
881 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
882 {
883     VirtIONet *n = VIRTIO_NET(vdev);
884     Error *err = NULL;
885     int i;
886 
887     if (n->mtu_bypass_backend &&
888             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
889         features &= ~(1ULL << VIRTIO_NET_F_MTU);
890     }
891 
892     virtio_net_set_multiqueue(n,
893                               virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
894                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
895 
896     virtio_net_set_mrg_rx_bufs(n,
897                                virtio_has_feature(features,
898                                                   VIRTIO_NET_F_MRG_RXBUF),
899                                virtio_has_feature(features,
900                                                   VIRTIO_F_VERSION_1),
901                                virtio_has_feature(features,
902                                                   VIRTIO_NET_F_HASH_REPORT));
903 
904     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
905         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
906     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
907         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
908     n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
909 
910     if (n->has_vnet_hdr) {
911         n->curr_guest_offloads =
912             virtio_net_guest_offloads_by_features(features);
913         virtio_net_apply_guest_offloads(n);
914     }
915 
916     for (i = 0;  i < n->max_queues; i++) {
917         NetClientState *nc = qemu_get_subqueue(n->nic, i);
918 
919         if (!get_vhost_net(nc->peer)) {
920             continue;
921         }
922         vhost_net_ack_features(get_vhost_net(nc->peer), features);
923     }
924 
925     if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
926         memset(n->vlans, 0, MAX_VLAN >> 3);
927     } else {
928         memset(n->vlans, 0xff, MAX_VLAN >> 3);
929     }
930 
931     if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
932         qapi_event_send_failover_negotiated(n->netclient_name);
933         qatomic_set(&n->failover_primary_hidden, false);
934         failover_add_primary(n, &err);
935         if (err) {
936             n->primary_dev = failover_find_primary_device(n);
937             failover_add_primary(n, &err);
938             if (err) {
939                 goto out_err;
940             }
941         }
942     }
943     return;
944 
945 out_err:
946     if (err) {
947         warn_report_err(err);
948     }
949 }
950 
951 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
952                                      struct iovec *iov, unsigned int iov_cnt)
953 {
954     uint8_t on;
955     size_t s;
956     NetClientState *nc = qemu_get_queue(n->nic);
957 
958     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
959     if (s != sizeof(on)) {
960         return VIRTIO_NET_ERR;
961     }
962 
963     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
964         n->promisc = on;
965     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
966         n->allmulti = on;
967     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
968         n->alluni = on;
969     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
970         n->nomulti = on;
971     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
972         n->nouni = on;
973     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
974         n->nobcast = on;
975     } else {
976         return VIRTIO_NET_ERR;
977     }
978 
979     rxfilter_notify(nc);
980 
981     return VIRTIO_NET_OK;
982 }
983 
984 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
985                                      struct iovec *iov, unsigned int iov_cnt)
986 {
987     VirtIODevice *vdev = VIRTIO_DEVICE(n);
988     uint64_t offloads;
989     size_t s;
990 
991     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
992         return VIRTIO_NET_ERR;
993     }
994 
995     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
996     if (s != sizeof(offloads)) {
997         return VIRTIO_NET_ERR;
998     }
999 
1000     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1001         uint64_t supported_offloads;
1002 
1003         offloads = virtio_ldq_p(vdev, &offloads);
1004 
1005         if (!n->has_vnet_hdr) {
1006             return VIRTIO_NET_ERR;
1007         }
1008 
1009         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1010             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1011         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1012             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1013         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1014 
1015         supported_offloads = virtio_net_supported_guest_offloads(n);
1016         if (offloads & ~supported_offloads) {
1017             return VIRTIO_NET_ERR;
1018         }
1019 
1020         n->curr_guest_offloads = offloads;
1021         virtio_net_apply_guest_offloads(n);
1022 
1023         return VIRTIO_NET_OK;
1024     } else {
1025         return VIRTIO_NET_ERR;
1026     }
1027 }
1028 
1029 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1030                                  struct iovec *iov, unsigned int iov_cnt)
1031 {
1032     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1033     struct virtio_net_ctrl_mac mac_data;
1034     size_t s;
1035     NetClientState *nc = qemu_get_queue(n->nic);
1036 
1037     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1038         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1039             return VIRTIO_NET_ERR;
1040         }
1041         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1042         assert(s == sizeof(n->mac));
1043         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1044         rxfilter_notify(nc);
1045 
1046         return VIRTIO_NET_OK;
1047     }
1048 
1049     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1050         return VIRTIO_NET_ERR;
1051     }
1052 
1053     int in_use = 0;
1054     int first_multi = 0;
1055     uint8_t uni_overflow = 0;
1056     uint8_t multi_overflow = 0;
1057     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1058 
1059     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1060                    sizeof(mac_data.entries));
1061     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1062     if (s != sizeof(mac_data.entries)) {
1063         goto error;
1064     }
1065     iov_discard_front(&iov, &iov_cnt, s);
1066 
1067     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1068         goto error;
1069     }
1070 
1071     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1072         s = iov_to_buf(iov, iov_cnt, 0, macs,
1073                        mac_data.entries * ETH_ALEN);
1074         if (s != mac_data.entries * ETH_ALEN) {
1075             goto error;
1076         }
1077         in_use += mac_data.entries;
1078     } else {
1079         uni_overflow = 1;
1080     }
1081 
1082     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1083 
1084     first_multi = in_use;
1085 
1086     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1087                    sizeof(mac_data.entries));
1088     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1089     if (s != sizeof(mac_data.entries)) {
1090         goto error;
1091     }
1092 
1093     iov_discard_front(&iov, &iov_cnt, s);
1094 
1095     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1096         goto error;
1097     }
1098 
1099     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1100         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1101                        mac_data.entries * ETH_ALEN);
1102         if (s != mac_data.entries * ETH_ALEN) {
1103             goto error;
1104         }
1105         in_use += mac_data.entries;
1106     } else {
1107         multi_overflow = 1;
1108     }
1109 
1110     n->mac_table.in_use = in_use;
1111     n->mac_table.first_multi = first_multi;
1112     n->mac_table.uni_overflow = uni_overflow;
1113     n->mac_table.multi_overflow = multi_overflow;
1114     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1115     g_free(macs);
1116     rxfilter_notify(nc);
1117 
1118     return VIRTIO_NET_OK;
1119 
1120 error:
1121     g_free(macs);
1122     return VIRTIO_NET_ERR;
1123 }
1124 
1125 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1126                                         struct iovec *iov, unsigned int iov_cnt)
1127 {
1128     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1129     uint16_t vid;
1130     size_t s;
1131     NetClientState *nc = qemu_get_queue(n->nic);
1132 
1133     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1134     vid = virtio_lduw_p(vdev, &vid);
1135     if (s != sizeof(vid)) {
1136         return VIRTIO_NET_ERR;
1137     }
1138 
1139     if (vid >= MAX_VLAN)
1140         return VIRTIO_NET_ERR;
1141 
1142     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1143         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1144     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1145         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1146     else
1147         return VIRTIO_NET_ERR;
1148 
1149     rxfilter_notify(nc);
1150 
1151     return VIRTIO_NET_OK;
1152 }
1153 
1154 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1155                                       struct iovec *iov, unsigned int iov_cnt)
1156 {
1157     trace_virtio_net_handle_announce(n->announce_timer.round);
1158     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1159         n->status & VIRTIO_NET_S_ANNOUNCE) {
1160         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1161         if (n->announce_timer.round) {
1162             qemu_announce_timer_step(&n->announce_timer);
1163         }
1164         return VIRTIO_NET_OK;
1165     } else {
1166         return VIRTIO_NET_ERR;
1167     }
1168 }
1169 
1170 static void virtio_net_disable_rss(VirtIONet *n)
1171 {
1172     if (n->rss_data.enabled) {
1173         trace_virtio_net_rss_disable();
1174     }
1175     n->rss_data.enabled = false;
1176 }
1177 
1178 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1179                                       struct iovec *iov,
1180                                       unsigned int iov_cnt,
1181                                       bool do_rss)
1182 {
1183     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1184     struct virtio_net_rss_config cfg;
1185     size_t s, offset = 0, size_get;
1186     uint16_t queues, i;
1187     struct {
1188         uint16_t us;
1189         uint8_t b;
1190     } QEMU_PACKED temp;
1191     const char *err_msg = "";
1192     uint32_t err_value = 0;
1193 
1194     if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1195         err_msg = "RSS is not negotiated";
1196         goto error;
1197     }
1198     if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1199         err_msg = "Hash report is not negotiated";
1200         goto error;
1201     }
1202     size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1203     s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1204     if (s != size_get) {
1205         err_msg = "Short command buffer";
1206         err_value = (uint32_t)s;
1207         goto error;
1208     }
1209     n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1210     n->rss_data.indirections_len =
1211         virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1212     n->rss_data.indirections_len++;
1213     if (!do_rss) {
1214         n->rss_data.indirections_len = 1;
1215     }
1216     if (!is_power_of_2(n->rss_data.indirections_len)) {
1217         err_msg = "Invalid size of indirection table";
1218         err_value = n->rss_data.indirections_len;
1219         goto error;
1220     }
1221     if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1222         err_msg = "Too large indirection table";
1223         err_value = n->rss_data.indirections_len;
1224         goto error;
1225     }
1226     n->rss_data.default_queue = do_rss ?
1227         virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1228     if (n->rss_data.default_queue >= n->max_queues) {
1229         err_msg = "Invalid default queue";
1230         err_value = n->rss_data.default_queue;
1231         goto error;
1232     }
1233     offset += size_get;
1234     size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1235     g_free(n->rss_data.indirections_table);
1236     n->rss_data.indirections_table = g_malloc(size_get);
1237     if (!n->rss_data.indirections_table) {
1238         err_msg = "Can't allocate indirections table";
1239         err_value = n->rss_data.indirections_len;
1240         goto error;
1241     }
1242     s = iov_to_buf(iov, iov_cnt, offset,
1243                    n->rss_data.indirections_table, size_get);
1244     if (s != size_get) {
1245         err_msg = "Short indirection table buffer";
1246         err_value = (uint32_t)s;
1247         goto error;
1248     }
1249     for (i = 0; i < n->rss_data.indirections_len; ++i) {
1250         uint16_t val = n->rss_data.indirections_table[i];
1251         n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1252     }
1253     offset += size_get;
1254     size_get = sizeof(temp);
1255     s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1256     if (s != size_get) {
1257         err_msg = "Can't get queues";
1258         err_value = (uint32_t)s;
1259         goto error;
1260     }
1261     queues = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queues;
1262     if (queues == 0 || queues > n->max_queues) {
1263         err_msg = "Invalid number of queues";
1264         err_value = queues;
1265         goto error;
1266     }
1267     if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1268         err_msg = "Invalid key size";
1269         err_value = temp.b;
1270         goto error;
1271     }
1272     if (!temp.b && n->rss_data.hash_types) {
1273         err_msg = "No key provided";
1274         err_value = 0;
1275         goto error;
1276     }
1277     if (!temp.b && !n->rss_data.hash_types) {
1278         virtio_net_disable_rss(n);
1279         return queues;
1280     }
1281     offset += size_get;
1282     size_get = temp.b;
1283     s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1284     if (s != size_get) {
1285         err_msg = "Can get key buffer";
1286         err_value = (uint32_t)s;
1287         goto error;
1288     }
1289     n->rss_data.enabled = true;
1290     trace_virtio_net_rss_enable(n->rss_data.hash_types,
1291                                 n->rss_data.indirections_len,
1292                                 temp.b);
1293     return queues;
1294 error:
1295     trace_virtio_net_rss_error(err_msg, err_value);
1296     virtio_net_disable_rss(n);
1297     return 0;
1298 }
1299 
1300 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1301                                 struct iovec *iov, unsigned int iov_cnt)
1302 {
1303     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1304     uint16_t queues;
1305 
1306     virtio_net_disable_rss(n);
1307     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1308         queues = virtio_net_handle_rss(n, iov, iov_cnt, false);
1309         return queues ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1310     }
1311     if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1312         queues = virtio_net_handle_rss(n, iov, iov_cnt, true);
1313     } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1314         struct virtio_net_ctrl_mq mq;
1315         size_t s;
1316         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1317             return VIRTIO_NET_ERR;
1318         }
1319         s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1320         if (s != sizeof(mq)) {
1321             return VIRTIO_NET_ERR;
1322         }
1323         queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1324 
1325     } else {
1326         return VIRTIO_NET_ERR;
1327     }
1328 
1329     if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1330         queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1331         queues > n->max_queues ||
1332         !n->multiqueue) {
1333         return VIRTIO_NET_ERR;
1334     }
1335 
1336     n->curr_queues = queues;
1337     /* stop the backend before changing the number of queues to avoid handling a
1338      * disabled queue */
1339     virtio_net_set_status(vdev, vdev->status);
1340     virtio_net_set_queues(n);
1341 
1342     return VIRTIO_NET_OK;
1343 }
1344 
1345 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1346 {
1347     VirtIONet *n = VIRTIO_NET(vdev);
1348     struct virtio_net_ctrl_hdr ctrl;
1349     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1350     VirtQueueElement *elem;
1351     size_t s;
1352     struct iovec *iov, *iov2;
1353     unsigned int iov_cnt;
1354 
1355     for (;;) {
1356         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1357         if (!elem) {
1358             break;
1359         }
1360         if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1361             iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1362             virtio_error(vdev, "virtio-net ctrl missing headers");
1363             virtqueue_detach_element(vq, elem, 0);
1364             g_free(elem);
1365             break;
1366         }
1367 
1368         iov_cnt = elem->out_num;
1369         iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1370         s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1371         iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1372         if (s != sizeof(ctrl)) {
1373             status = VIRTIO_NET_ERR;
1374         } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1375             status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1376         } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1377             status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1378         } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1379             status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1380         } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1381             status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1382         } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1383             status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1384         } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1385             status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1386         }
1387 
1388         s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1389         assert(s == sizeof(status));
1390 
1391         virtqueue_push(vq, elem, sizeof(status));
1392         virtio_notify(vdev, vq);
1393         g_free(iov2);
1394         g_free(elem);
1395     }
1396 }
1397 
1398 /* RX */
1399 
1400 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1401 {
1402     VirtIONet *n = VIRTIO_NET(vdev);
1403     int queue_index = vq2q(virtio_get_queue_index(vq));
1404 
1405     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1406 }
1407 
1408 static bool virtio_net_can_receive(NetClientState *nc)
1409 {
1410     VirtIONet *n = qemu_get_nic_opaque(nc);
1411     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1412     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1413 
1414     if (!vdev->vm_running) {
1415         return false;
1416     }
1417 
1418     if (nc->queue_index >= n->curr_queues) {
1419         return false;
1420     }
1421 
1422     if (!virtio_queue_ready(q->rx_vq) ||
1423         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1424         return false;
1425     }
1426 
1427     return true;
1428 }
1429 
1430 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1431 {
1432     VirtIONet *n = q->n;
1433     if (virtio_queue_empty(q->rx_vq) ||
1434         (n->mergeable_rx_bufs &&
1435          !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1436         virtio_queue_set_notification(q->rx_vq, 1);
1437 
1438         /* To avoid a race condition where the guest has made some buffers
1439          * available after the above check but before notification was
1440          * enabled, check for available buffers again.
1441          */
1442         if (virtio_queue_empty(q->rx_vq) ||
1443             (n->mergeable_rx_bufs &&
1444              !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1445             return 0;
1446         }
1447     }
1448 
1449     virtio_queue_set_notification(q->rx_vq, 0);
1450     return 1;
1451 }
1452 
1453 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1454 {
1455     virtio_tswap16s(vdev, &hdr->hdr_len);
1456     virtio_tswap16s(vdev, &hdr->gso_size);
1457     virtio_tswap16s(vdev, &hdr->csum_start);
1458     virtio_tswap16s(vdev, &hdr->csum_offset);
1459 }
1460 
1461 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1462  * it never finds out that the packets don't have valid checksums.  This
1463  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1464  * fix this with Xen but it hasn't appeared in an upstream release of
1465  * dhclient yet.
1466  *
1467  * To avoid breaking existing guests, we catch udp packets and add
1468  * checksums.  This is terrible but it's better than hacking the guest
1469  * kernels.
1470  *
1471  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1472  * we should provide a mechanism to disable it to avoid polluting the host
1473  * cache.
1474  */
1475 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1476                                         uint8_t *buf, size_t size)
1477 {
1478     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1479         (size > 27 && size < 1500) && /* normal sized MTU */
1480         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1481         (buf[23] == 17) && /* ip.protocol == UDP */
1482         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1483         net_checksum_calculate(buf, size);
1484         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1485     }
1486 }
1487 
1488 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1489                            const void *buf, size_t size)
1490 {
1491     if (n->has_vnet_hdr) {
1492         /* FIXME this cast is evil */
1493         void *wbuf = (void *)buf;
1494         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1495                                     size - n->host_hdr_len);
1496 
1497         if (n->needs_vnet_hdr_swap) {
1498             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1499         }
1500         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1501     } else {
1502         struct virtio_net_hdr hdr = {
1503             .flags = 0,
1504             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1505         };
1506         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1507     }
1508 }
1509 
1510 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1511 {
1512     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1513     static const uint8_t vlan[] = {0x81, 0x00};
1514     uint8_t *ptr = (uint8_t *)buf;
1515     int i;
1516 
1517     if (n->promisc)
1518         return 1;
1519 
1520     ptr += n->host_hdr_len;
1521 
1522     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1523         int vid = lduw_be_p(ptr + 14) & 0xfff;
1524         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1525             return 0;
1526     }
1527 
1528     if (ptr[0] & 1) { // multicast
1529         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1530             return !n->nobcast;
1531         } else if (n->nomulti) {
1532             return 0;
1533         } else if (n->allmulti || n->mac_table.multi_overflow) {
1534             return 1;
1535         }
1536 
1537         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1538             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1539                 return 1;
1540             }
1541         }
1542     } else { // unicast
1543         if (n->nouni) {
1544             return 0;
1545         } else if (n->alluni || n->mac_table.uni_overflow) {
1546             return 1;
1547         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1548             return 1;
1549         }
1550 
1551         for (i = 0; i < n->mac_table.first_multi; i++) {
1552             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1553                 return 1;
1554             }
1555         }
1556     }
1557 
1558     return 0;
1559 }
1560 
1561 static uint8_t virtio_net_get_hash_type(bool isip4,
1562                                         bool isip6,
1563                                         bool isudp,
1564                                         bool istcp,
1565                                         uint32_t types)
1566 {
1567     if (isip4) {
1568         if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1569             return NetPktRssIpV4Tcp;
1570         }
1571         if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1572             return NetPktRssIpV4Udp;
1573         }
1574         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1575             return NetPktRssIpV4;
1576         }
1577     } else if (isip6) {
1578         uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1579                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1580 
1581         if (istcp && (types & mask)) {
1582             return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1583                 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1584         }
1585         mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1586         if (isudp && (types & mask)) {
1587             return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1588                 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1589         }
1590         mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1591         if (types & mask) {
1592             return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1593                 NetPktRssIpV6Ex : NetPktRssIpV6;
1594         }
1595     }
1596     return 0xff;
1597 }
1598 
1599 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1600                                    uint32_t hash)
1601 {
1602     struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1603     hdr->hash_value = hash;
1604     hdr->hash_report = report;
1605 }
1606 
1607 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1608                                   size_t size)
1609 {
1610     VirtIONet *n = qemu_get_nic_opaque(nc);
1611     unsigned int index = nc->queue_index, new_index = index;
1612     struct NetRxPkt *pkt = n->rx_pkt;
1613     uint8_t net_hash_type;
1614     uint32_t hash;
1615     bool isip4, isip6, isudp, istcp;
1616     static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1617         VIRTIO_NET_HASH_REPORT_IPv4,
1618         VIRTIO_NET_HASH_REPORT_TCPv4,
1619         VIRTIO_NET_HASH_REPORT_TCPv6,
1620         VIRTIO_NET_HASH_REPORT_IPv6,
1621         VIRTIO_NET_HASH_REPORT_IPv6_EX,
1622         VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1623         VIRTIO_NET_HASH_REPORT_UDPv4,
1624         VIRTIO_NET_HASH_REPORT_UDPv6,
1625         VIRTIO_NET_HASH_REPORT_UDPv6_EX
1626     };
1627 
1628     net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1629                              size - n->host_hdr_len);
1630     net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1631     if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1632         istcp = isudp = false;
1633     }
1634     if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1635         istcp = isudp = false;
1636     }
1637     net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1638                                              n->rss_data.hash_types);
1639     if (net_hash_type > NetPktRssIpV6UdpEx) {
1640         if (n->rss_data.populate_hash) {
1641             virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1642         }
1643         return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1644     }
1645 
1646     hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1647 
1648     if (n->rss_data.populate_hash) {
1649         virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1650     }
1651 
1652     if (n->rss_data.redirect) {
1653         new_index = hash & (n->rss_data.indirections_len - 1);
1654         new_index = n->rss_data.indirections_table[new_index];
1655     }
1656 
1657     return (index == new_index) ? -1 : new_index;
1658 }
1659 
1660 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1661                                       size_t size, bool no_rss)
1662 {
1663     VirtIONet *n = qemu_get_nic_opaque(nc);
1664     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1665     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1666     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1667     struct virtio_net_hdr_mrg_rxbuf mhdr;
1668     unsigned mhdr_cnt = 0;
1669     size_t offset, i, guest_offset;
1670 
1671     if (!virtio_net_can_receive(nc)) {
1672         return -1;
1673     }
1674 
1675     if (!no_rss && n->rss_data.enabled) {
1676         int index = virtio_net_process_rss(nc, buf, size);
1677         if (index >= 0) {
1678             NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1679             return virtio_net_receive_rcu(nc2, buf, size, true);
1680         }
1681     }
1682 
1683     /* hdr_len refers to the header we supply to the guest */
1684     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1685         return 0;
1686     }
1687 
1688     if (!receive_filter(n, buf, size))
1689         return size;
1690 
1691     offset = i = 0;
1692 
1693     while (offset < size) {
1694         VirtQueueElement *elem;
1695         int len, total;
1696         const struct iovec *sg;
1697 
1698         total = 0;
1699 
1700         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1701         if (!elem) {
1702             if (i) {
1703                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1704                              "i %zd mergeable %d offset %zd, size %zd, "
1705                              "guest hdr len %zd, host hdr len %zd "
1706                              "guest features 0x%" PRIx64,
1707                              i, n->mergeable_rx_bufs, offset, size,
1708                              n->guest_hdr_len, n->host_hdr_len,
1709                              vdev->guest_features);
1710             }
1711             return -1;
1712         }
1713 
1714         if (elem->in_num < 1) {
1715             virtio_error(vdev,
1716                          "virtio-net receive queue contains no in buffers");
1717             virtqueue_detach_element(q->rx_vq, elem, 0);
1718             g_free(elem);
1719             return -1;
1720         }
1721 
1722         sg = elem->in_sg;
1723         if (i == 0) {
1724             assert(offset == 0);
1725             if (n->mergeable_rx_bufs) {
1726                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1727                                     sg, elem->in_num,
1728                                     offsetof(typeof(mhdr), num_buffers),
1729                                     sizeof(mhdr.num_buffers));
1730             }
1731 
1732             receive_header(n, sg, elem->in_num, buf, size);
1733             if (n->rss_data.populate_hash) {
1734                 offset = sizeof(mhdr);
1735                 iov_from_buf(sg, elem->in_num, offset,
1736                              buf + offset, n->host_hdr_len - sizeof(mhdr));
1737             }
1738             offset = n->host_hdr_len;
1739             total += n->guest_hdr_len;
1740             guest_offset = n->guest_hdr_len;
1741         } else {
1742             guest_offset = 0;
1743         }
1744 
1745         /* copy in packet.  ugh */
1746         len = iov_from_buf(sg, elem->in_num, guest_offset,
1747                            buf + offset, size - offset);
1748         total += len;
1749         offset += len;
1750         /* If buffers can't be merged, at this point we
1751          * must have consumed the complete packet.
1752          * Otherwise, drop it. */
1753         if (!n->mergeable_rx_bufs && offset < size) {
1754             virtqueue_unpop(q->rx_vq, elem, total);
1755             g_free(elem);
1756             return size;
1757         }
1758 
1759         /* signal other side */
1760         virtqueue_fill(q->rx_vq, elem, total, i++);
1761         g_free(elem);
1762     }
1763 
1764     if (mhdr_cnt) {
1765         virtio_stw_p(vdev, &mhdr.num_buffers, i);
1766         iov_from_buf(mhdr_sg, mhdr_cnt,
1767                      0,
1768                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
1769     }
1770 
1771     virtqueue_flush(q->rx_vq, i);
1772     virtio_notify(vdev, q->rx_vq);
1773 
1774     return size;
1775 }
1776 
1777 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1778                                   size_t size)
1779 {
1780     RCU_READ_LOCK_GUARD();
1781 
1782     return virtio_net_receive_rcu(nc, buf, size, false);
1783 }
1784 
1785 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1786                                          const uint8_t *buf,
1787                                          VirtioNetRscUnit *unit)
1788 {
1789     uint16_t ip_hdrlen;
1790     struct ip_header *ip;
1791 
1792     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1793                               + sizeof(struct eth_header));
1794     unit->ip = (void *)ip;
1795     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1796     unit->ip_plen = &ip->ip_len;
1797     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1798     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1799     unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1800 }
1801 
1802 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1803                                          const uint8_t *buf,
1804                                          VirtioNetRscUnit *unit)
1805 {
1806     struct ip6_header *ip6;
1807 
1808     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1809                                  + sizeof(struct eth_header));
1810     unit->ip = ip6;
1811     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1812     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1813                                         + sizeof(struct ip6_header));
1814     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1815 
1816     /* There is a difference between payload lenght in ipv4 and v6,
1817        ip header is excluded in ipv6 */
1818     unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1819 }
1820 
1821 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1822                                        VirtioNetRscSeg *seg)
1823 {
1824     int ret;
1825     struct virtio_net_hdr_v1 *h;
1826 
1827     h = (struct virtio_net_hdr_v1 *)seg->buf;
1828     h->flags = 0;
1829     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1830 
1831     if (seg->is_coalesced) {
1832         h->rsc.segments = seg->packets;
1833         h->rsc.dup_acks = seg->dup_ack;
1834         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1835         if (chain->proto == ETH_P_IP) {
1836             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1837         } else {
1838             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1839         }
1840     }
1841 
1842     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1843     QTAILQ_REMOVE(&chain->buffers, seg, next);
1844     g_free(seg->buf);
1845     g_free(seg);
1846 
1847     return ret;
1848 }
1849 
1850 static void virtio_net_rsc_purge(void *opq)
1851 {
1852     VirtioNetRscSeg *seg, *rn;
1853     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1854 
1855     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1856         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1857             chain->stat.purge_failed++;
1858             continue;
1859         }
1860     }
1861 
1862     chain->stat.timer++;
1863     if (!QTAILQ_EMPTY(&chain->buffers)) {
1864         timer_mod(chain->drain_timer,
1865               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1866     }
1867 }
1868 
1869 static void virtio_net_rsc_cleanup(VirtIONet *n)
1870 {
1871     VirtioNetRscChain *chain, *rn_chain;
1872     VirtioNetRscSeg *seg, *rn_seg;
1873 
1874     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1875         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1876             QTAILQ_REMOVE(&chain->buffers, seg, next);
1877             g_free(seg->buf);
1878             g_free(seg);
1879         }
1880 
1881         timer_del(chain->drain_timer);
1882         timer_free(chain->drain_timer);
1883         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1884         g_free(chain);
1885     }
1886 }
1887 
1888 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1889                                      NetClientState *nc,
1890                                      const uint8_t *buf, size_t size)
1891 {
1892     uint16_t hdr_len;
1893     VirtioNetRscSeg *seg;
1894 
1895     hdr_len = chain->n->guest_hdr_len;
1896     seg = g_malloc(sizeof(VirtioNetRscSeg));
1897     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1898         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1899     memcpy(seg->buf, buf, size);
1900     seg->size = size;
1901     seg->packets = 1;
1902     seg->dup_ack = 0;
1903     seg->is_coalesced = 0;
1904     seg->nc = nc;
1905 
1906     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1907     chain->stat.cache++;
1908 
1909     switch (chain->proto) {
1910     case ETH_P_IP:
1911         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
1912         break;
1913     case ETH_P_IPV6:
1914         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
1915         break;
1916     default:
1917         g_assert_not_reached();
1918     }
1919 }
1920 
1921 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
1922                                          VirtioNetRscSeg *seg,
1923                                          const uint8_t *buf,
1924                                          struct tcp_header *n_tcp,
1925                                          struct tcp_header *o_tcp)
1926 {
1927     uint32_t nack, oack;
1928     uint16_t nwin, owin;
1929 
1930     nack = htonl(n_tcp->th_ack);
1931     nwin = htons(n_tcp->th_win);
1932     oack = htonl(o_tcp->th_ack);
1933     owin = htons(o_tcp->th_win);
1934 
1935     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
1936         chain->stat.ack_out_of_win++;
1937         return RSC_FINAL;
1938     } else if (nack == oack) {
1939         /* duplicated ack or window probe */
1940         if (nwin == owin) {
1941             /* duplicated ack, add dup ack count due to whql test up to 1 */
1942             chain->stat.dup_ack++;
1943             return RSC_FINAL;
1944         } else {
1945             /* Coalesce window update */
1946             o_tcp->th_win = n_tcp->th_win;
1947             chain->stat.win_update++;
1948             return RSC_COALESCE;
1949         }
1950     } else {
1951         /* pure ack, go to 'C', finalize*/
1952         chain->stat.pure_ack++;
1953         return RSC_FINAL;
1954     }
1955 }
1956 
1957 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
1958                                             VirtioNetRscSeg *seg,
1959                                             const uint8_t *buf,
1960                                             VirtioNetRscUnit *n_unit)
1961 {
1962     void *data;
1963     uint16_t o_ip_len;
1964     uint32_t nseq, oseq;
1965     VirtioNetRscUnit *o_unit;
1966 
1967     o_unit = &seg->unit;
1968     o_ip_len = htons(*o_unit->ip_plen);
1969     nseq = htonl(n_unit->tcp->th_seq);
1970     oseq = htonl(o_unit->tcp->th_seq);
1971 
1972     /* out of order or retransmitted. */
1973     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
1974         chain->stat.data_out_of_win++;
1975         return RSC_FINAL;
1976     }
1977 
1978     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
1979     if (nseq == oseq) {
1980         if ((o_unit->payload == 0) && n_unit->payload) {
1981             /* From no payload to payload, normal case, not a dup ack or etc */
1982             chain->stat.data_after_pure_ack++;
1983             goto coalesce;
1984         } else {
1985             return virtio_net_rsc_handle_ack(chain, seg, buf,
1986                                              n_unit->tcp, o_unit->tcp);
1987         }
1988     } else if ((nseq - oseq) != o_unit->payload) {
1989         /* Not a consistent packet, out of order */
1990         chain->stat.data_out_of_order++;
1991         return RSC_FINAL;
1992     } else {
1993 coalesce:
1994         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
1995             chain->stat.over_size++;
1996             return RSC_FINAL;
1997         }
1998 
1999         /* Here comes the right data, the payload length in v4/v6 is different,
2000            so use the field value to update and record the new data len */
2001         o_unit->payload += n_unit->payload; /* update new data len */
2002 
2003         /* update field in ip header */
2004         *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2005 
2006         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2007            for windows guest, while this may change the behavior for linux
2008            guest (only if it uses RSC feature). */
2009         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2010 
2011         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2012         o_unit->tcp->th_win = n_unit->tcp->th_win;
2013 
2014         memmove(seg->buf + seg->size, data, n_unit->payload);
2015         seg->size += n_unit->payload;
2016         seg->packets++;
2017         chain->stat.coalesced++;
2018         return RSC_COALESCE;
2019     }
2020 }
2021 
2022 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2023                                         VirtioNetRscSeg *seg,
2024                                         const uint8_t *buf, size_t size,
2025                                         VirtioNetRscUnit *unit)
2026 {
2027     struct ip_header *ip1, *ip2;
2028 
2029     ip1 = (struct ip_header *)(unit->ip);
2030     ip2 = (struct ip_header *)(seg->unit.ip);
2031     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2032         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2033         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2034         chain->stat.no_match++;
2035         return RSC_NO_MATCH;
2036     }
2037 
2038     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2039 }
2040 
2041 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2042                                         VirtioNetRscSeg *seg,
2043                                         const uint8_t *buf, size_t size,
2044                                         VirtioNetRscUnit *unit)
2045 {
2046     struct ip6_header *ip1, *ip2;
2047 
2048     ip1 = (struct ip6_header *)(unit->ip);
2049     ip2 = (struct ip6_header *)(seg->unit.ip);
2050     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2051         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2052         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2053         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2054             chain->stat.no_match++;
2055             return RSC_NO_MATCH;
2056     }
2057 
2058     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2059 }
2060 
2061 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2062  * to prevent out of order */
2063 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2064                                          struct tcp_header *tcp)
2065 {
2066     uint16_t tcp_hdr;
2067     uint16_t tcp_flag;
2068 
2069     tcp_flag = htons(tcp->th_offset_flags);
2070     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2071     tcp_flag &= VIRTIO_NET_TCP_FLAG;
2072     if (tcp_flag & TH_SYN) {
2073         chain->stat.tcp_syn++;
2074         return RSC_BYPASS;
2075     }
2076 
2077     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2078         chain->stat.tcp_ctrl_drain++;
2079         return RSC_FINAL;
2080     }
2081 
2082     if (tcp_hdr > sizeof(struct tcp_header)) {
2083         chain->stat.tcp_all_opt++;
2084         return RSC_FINAL;
2085     }
2086 
2087     return RSC_CANDIDATE;
2088 }
2089 
2090 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2091                                          NetClientState *nc,
2092                                          const uint8_t *buf, size_t size,
2093                                          VirtioNetRscUnit *unit)
2094 {
2095     int ret;
2096     VirtioNetRscSeg *seg, *nseg;
2097 
2098     if (QTAILQ_EMPTY(&chain->buffers)) {
2099         chain->stat.empty_cache++;
2100         virtio_net_rsc_cache_buf(chain, nc, buf, size);
2101         timer_mod(chain->drain_timer,
2102               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2103         return size;
2104     }
2105 
2106     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2107         if (chain->proto == ETH_P_IP) {
2108             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2109         } else {
2110             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2111         }
2112 
2113         if (ret == RSC_FINAL) {
2114             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2115                 /* Send failed */
2116                 chain->stat.final_failed++;
2117                 return 0;
2118             }
2119 
2120             /* Send current packet */
2121             return virtio_net_do_receive(nc, buf, size);
2122         } else if (ret == RSC_NO_MATCH) {
2123             continue;
2124         } else {
2125             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2126             seg->is_coalesced = 1;
2127             return size;
2128         }
2129     }
2130 
2131     chain->stat.no_match_cache++;
2132     virtio_net_rsc_cache_buf(chain, nc, buf, size);
2133     return size;
2134 }
2135 
2136 /* Drain a connection data, this is to avoid out of order segments */
2137 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2138                                         NetClientState *nc,
2139                                         const uint8_t *buf, size_t size,
2140                                         uint16_t ip_start, uint16_t ip_size,
2141                                         uint16_t tcp_port)
2142 {
2143     VirtioNetRscSeg *seg, *nseg;
2144     uint32_t ppair1, ppair2;
2145 
2146     ppair1 = *(uint32_t *)(buf + tcp_port);
2147     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2148         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2149         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2150             || (ppair1 != ppair2)) {
2151             continue;
2152         }
2153         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2154             chain->stat.drain_failed++;
2155         }
2156 
2157         break;
2158     }
2159 
2160     return virtio_net_do_receive(nc, buf, size);
2161 }
2162 
2163 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2164                                             struct ip_header *ip,
2165                                             const uint8_t *buf, size_t size)
2166 {
2167     uint16_t ip_len;
2168 
2169     /* Not an ipv4 packet */
2170     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2171         chain->stat.ip_option++;
2172         return RSC_BYPASS;
2173     }
2174 
2175     /* Don't handle packets with ip option */
2176     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2177         chain->stat.ip_option++;
2178         return RSC_BYPASS;
2179     }
2180 
2181     if (ip->ip_p != IPPROTO_TCP) {
2182         chain->stat.bypass_not_tcp++;
2183         return RSC_BYPASS;
2184     }
2185 
2186     /* Don't handle packets with ip fragment */
2187     if (!(htons(ip->ip_off) & IP_DF)) {
2188         chain->stat.ip_frag++;
2189         return RSC_BYPASS;
2190     }
2191 
2192     /* Don't handle packets with ecn flag */
2193     if (IPTOS_ECN(ip->ip_tos)) {
2194         chain->stat.ip_ecn++;
2195         return RSC_BYPASS;
2196     }
2197 
2198     ip_len = htons(ip->ip_len);
2199     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2200         || ip_len > (size - chain->n->guest_hdr_len -
2201                      sizeof(struct eth_header))) {
2202         chain->stat.ip_hacked++;
2203         return RSC_BYPASS;
2204     }
2205 
2206     return RSC_CANDIDATE;
2207 }
2208 
2209 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2210                                       NetClientState *nc,
2211                                       const uint8_t *buf, size_t size)
2212 {
2213     int32_t ret;
2214     uint16_t hdr_len;
2215     VirtioNetRscUnit unit;
2216 
2217     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2218 
2219     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2220         + sizeof(struct tcp_header))) {
2221         chain->stat.bypass_not_tcp++;
2222         return virtio_net_do_receive(nc, buf, size);
2223     }
2224 
2225     virtio_net_rsc_extract_unit4(chain, buf, &unit);
2226     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2227         != RSC_CANDIDATE) {
2228         return virtio_net_do_receive(nc, buf, size);
2229     }
2230 
2231     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2232     if (ret == RSC_BYPASS) {
2233         return virtio_net_do_receive(nc, buf, size);
2234     } else if (ret == RSC_FINAL) {
2235         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2236                 ((hdr_len + sizeof(struct eth_header)) + 12),
2237                 VIRTIO_NET_IP4_ADDR_SIZE,
2238                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2239     }
2240 
2241     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2242 }
2243 
2244 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2245                                             struct ip6_header *ip6,
2246                                             const uint8_t *buf, size_t size)
2247 {
2248     uint16_t ip_len;
2249 
2250     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2251         != IP_HEADER_VERSION_6) {
2252         return RSC_BYPASS;
2253     }
2254 
2255     /* Both option and protocol is checked in this */
2256     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2257         chain->stat.bypass_not_tcp++;
2258         return RSC_BYPASS;
2259     }
2260 
2261     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2262     if (ip_len < sizeof(struct tcp_header) ||
2263         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2264                   - sizeof(struct ip6_header))) {
2265         chain->stat.ip_hacked++;
2266         return RSC_BYPASS;
2267     }
2268 
2269     /* Don't handle packets with ecn flag */
2270     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2271         chain->stat.ip_ecn++;
2272         return RSC_BYPASS;
2273     }
2274 
2275     return RSC_CANDIDATE;
2276 }
2277 
2278 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2279                                       const uint8_t *buf, size_t size)
2280 {
2281     int32_t ret;
2282     uint16_t hdr_len;
2283     VirtioNetRscChain *chain;
2284     VirtioNetRscUnit unit;
2285 
2286     chain = (VirtioNetRscChain *)opq;
2287     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2288 
2289     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2290         + sizeof(tcp_header))) {
2291         return virtio_net_do_receive(nc, buf, size);
2292     }
2293 
2294     virtio_net_rsc_extract_unit6(chain, buf, &unit);
2295     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2296                                                  unit.ip, buf, size)) {
2297         return virtio_net_do_receive(nc, buf, size);
2298     }
2299 
2300     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2301     if (ret == RSC_BYPASS) {
2302         return virtio_net_do_receive(nc, buf, size);
2303     } else if (ret == RSC_FINAL) {
2304         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2305                 ((hdr_len + sizeof(struct eth_header)) + 8),
2306                 VIRTIO_NET_IP6_ADDR_SIZE,
2307                 hdr_len + sizeof(struct eth_header)
2308                 + sizeof(struct ip6_header));
2309     }
2310 
2311     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2312 }
2313 
2314 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2315                                                       NetClientState *nc,
2316                                                       uint16_t proto)
2317 {
2318     VirtioNetRscChain *chain;
2319 
2320     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2321         return NULL;
2322     }
2323 
2324     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2325         if (chain->proto == proto) {
2326             return chain;
2327         }
2328     }
2329 
2330     chain = g_malloc(sizeof(*chain));
2331     chain->n = n;
2332     chain->proto = proto;
2333     if (proto == (uint16_t)ETH_P_IP) {
2334         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2335         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2336     } else {
2337         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2338         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2339     }
2340     chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2341                                       virtio_net_rsc_purge, chain);
2342     memset(&chain->stat, 0, sizeof(chain->stat));
2343 
2344     QTAILQ_INIT(&chain->buffers);
2345     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2346 
2347     return chain;
2348 }
2349 
2350 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2351                                       const uint8_t *buf,
2352                                       size_t size)
2353 {
2354     uint16_t proto;
2355     VirtioNetRscChain *chain;
2356     struct eth_header *eth;
2357     VirtIONet *n;
2358 
2359     n = qemu_get_nic_opaque(nc);
2360     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2361         return virtio_net_do_receive(nc, buf, size);
2362     }
2363 
2364     eth = (struct eth_header *)(buf + n->guest_hdr_len);
2365     proto = htons(eth->h_proto);
2366 
2367     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2368     if (chain) {
2369         chain->stat.received++;
2370         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2371             return virtio_net_rsc_receive4(chain, nc, buf, size);
2372         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2373             return virtio_net_rsc_receive6(chain, nc, buf, size);
2374         }
2375     }
2376     return virtio_net_do_receive(nc, buf, size);
2377 }
2378 
2379 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2380                                   size_t size)
2381 {
2382     VirtIONet *n = qemu_get_nic_opaque(nc);
2383     if ((n->rsc4_enabled || n->rsc6_enabled)) {
2384         return virtio_net_rsc_receive(nc, buf, size);
2385     } else {
2386         return virtio_net_do_receive(nc, buf, size);
2387     }
2388 }
2389 
2390 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2391 
2392 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2393 {
2394     VirtIONet *n = qemu_get_nic_opaque(nc);
2395     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2396     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2397 
2398     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2399     virtio_notify(vdev, q->tx_vq);
2400 
2401     g_free(q->async_tx.elem);
2402     q->async_tx.elem = NULL;
2403 
2404     virtio_queue_set_notification(q->tx_vq, 1);
2405     virtio_net_flush_tx(q);
2406 }
2407 
2408 /* TX */
2409 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2410 {
2411     VirtIONet *n = q->n;
2412     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2413     VirtQueueElement *elem;
2414     int32_t num_packets = 0;
2415     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2416     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2417         return num_packets;
2418     }
2419 
2420     if (q->async_tx.elem) {
2421         virtio_queue_set_notification(q->tx_vq, 0);
2422         return num_packets;
2423     }
2424 
2425     for (;;) {
2426         ssize_t ret;
2427         unsigned int out_num;
2428         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2429         struct virtio_net_hdr_mrg_rxbuf mhdr;
2430 
2431         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2432         if (!elem) {
2433             break;
2434         }
2435 
2436         out_num = elem->out_num;
2437         out_sg = elem->out_sg;
2438         if (out_num < 1) {
2439             virtio_error(vdev, "virtio-net header not in first element");
2440             virtqueue_detach_element(q->tx_vq, elem, 0);
2441             g_free(elem);
2442             return -EINVAL;
2443         }
2444 
2445         if (n->has_vnet_hdr) {
2446             if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2447                 n->guest_hdr_len) {
2448                 virtio_error(vdev, "virtio-net header incorrect");
2449                 virtqueue_detach_element(q->tx_vq, elem, 0);
2450                 g_free(elem);
2451                 return -EINVAL;
2452             }
2453             if (n->needs_vnet_hdr_swap) {
2454                 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2455                 sg2[0].iov_base = &mhdr;
2456                 sg2[0].iov_len = n->guest_hdr_len;
2457                 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2458                                    out_sg, out_num,
2459                                    n->guest_hdr_len, -1);
2460                 if (out_num == VIRTQUEUE_MAX_SIZE) {
2461                     goto drop;
2462                 }
2463                 out_num += 1;
2464                 out_sg = sg2;
2465             }
2466         }
2467         /*
2468          * If host wants to see the guest header as is, we can
2469          * pass it on unchanged. Otherwise, copy just the parts
2470          * that host is interested in.
2471          */
2472         assert(n->host_hdr_len <= n->guest_hdr_len);
2473         if (n->host_hdr_len != n->guest_hdr_len) {
2474             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2475                                        out_sg, out_num,
2476                                        0, n->host_hdr_len);
2477             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2478                              out_sg, out_num,
2479                              n->guest_hdr_len, -1);
2480             out_num = sg_num;
2481             out_sg = sg;
2482         }
2483 
2484         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2485                                       out_sg, out_num, virtio_net_tx_complete);
2486         if (ret == 0) {
2487             virtio_queue_set_notification(q->tx_vq, 0);
2488             q->async_tx.elem = elem;
2489             return -EBUSY;
2490         }
2491 
2492 drop:
2493         virtqueue_push(q->tx_vq, elem, 0);
2494         virtio_notify(vdev, q->tx_vq);
2495         g_free(elem);
2496 
2497         if (++num_packets >= n->tx_burst) {
2498             break;
2499         }
2500     }
2501     return num_packets;
2502 }
2503 
2504 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2505 {
2506     VirtIONet *n = VIRTIO_NET(vdev);
2507     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2508 
2509     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2510         virtio_net_drop_tx_queue_data(vdev, vq);
2511         return;
2512     }
2513 
2514     /* This happens when device was stopped but VCPU wasn't. */
2515     if (!vdev->vm_running) {
2516         q->tx_waiting = 1;
2517         return;
2518     }
2519 
2520     if (q->tx_waiting) {
2521         virtio_queue_set_notification(vq, 1);
2522         timer_del(q->tx_timer);
2523         q->tx_waiting = 0;
2524         if (virtio_net_flush_tx(q) == -EINVAL) {
2525             return;
2526         }
2527     } else {
2528         timer_mod(q->tx_timer,
2529                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2530         q->tx_waiting = 1;
2531         virtio_queue_set_notification(vq, 0);
2532     }
2533 }
2534 
2535 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2536 {
2537     VirtIONet *n = VIRTIO_NET(vdev);
2538     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2539 
2540     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2541         virtio_net_drop_tx_queue_data(vdev, vq);
2542         return;
2543     }
2544 
2545     if (unlikely(q->tx_waiting)) {
2546         return;
2547     }
2548     q->tx_waiting = 1;
2549     /* This happens when device was stopped but VCPU wasn't. */
2550     if (!vdev->vm_running) {
2551         return;
2552     }
2553     virtio_queue_set_notification(vq, 0);
2554     qemu_bh_schedule(q->tx_bh);
2555 }
2556 
2557 static void virtio_net_tx_timer(void *opaque)
2558 {
2559     VirtIONetQueue *q = opaque;
2560     VirtIONet *n = q->n;
2561     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2562     /* This happens when device was stopped but BH wasn't. */
2563     if (!vdev->vm_running) {
2564         /* Make sure tx waiting is set, so we'll run when restarted. */
2565         assert(q->tx_waiting);
2566         return;
2567     }
2568 
2569     q->tx_waiting = 0;
2570 
2571     /* Just in case the driver is not ready on more */
2572     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2573         return;
2574     }
2575 
2576     virtio_queue_set_notification(q->tx_vq, 1);
2577     virtio_net_flush_tx(q);
2578 }
2579 
2580 static void virtio_net_tx_bh(void *opaque)
2581 {
2582     VirtIONetQueue *q = opaque;
2583     VirtIONet *n = q->n;
2584     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2585     int32_t ret;
2586 
2587     /* This happens when device was stopped but BH wasn't. */
2588     if (!vdev->vm_running) {
2589         /* Make sure tx waiting is set, so we'll run when restarted. */
2590         assert(q->tx_waiting);
2591         return;
2592     }
2593 
2594     q->tx_waiting = 0;
2595 
2596     /* Just in case the driver is not ready on more */
2597     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2598         return;
2599     }
2600 
2601     ret = virtio_net_flush_tx(q);
2602     if (ret == -EBUSY || ret == -EINVAL) {
2603         return; /* Notification re-enable handled by tx_complete or device
2604                  * broken */
2605     }
2606 
2607     /* If we flush a full burst of packets, assume there are
2608      * more coming and immediately reschedule */
2609     if (ret >= n->tx_burst) {
2610         qemu_bh_schedule(q->tx_bh);
2611         q->tx_waiting = 1;
2612         return;
2613     }
2614 
2615     /* If less than a full burst, re-enable notification and flush
2616      * anything that may have come in while we weren't looking.  If
2617      * we find something, assume the guest is still active and reschedule */
2618     virtio_queue_set_notification(q->tx_vq, 1);
2619     ret = virtio_net_flush_tx(q);
2620     if (ret == -EINVAL) {
2621         return;
2622     } else if (ret > 0) {
2623         virtio_queue_set_notification(q->tx_vq, 0);
2624         qemu_bh_schedule(q->tx_bh);
2625         q->tx_waiting = 1;
2626     }
2627 }
2628 
2629 static void virtio_net_add_queue(VirtIONet *n, int index)
2630 {
2631     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2632 
2633     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2634                                            virtio_net_handle_rx);
2635 
2636     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2637         n->vqs[index].tx_vq =
2638             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2639                              virtio_net_handle_tx_timer);
2640         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2641                                               virtio_net_tx_timer,
2642                                               &n->vqs[index]);
2643     } else {
2644         n->vqs[index].tx_vq =
2645             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2646                              virtio_net_handle_tx_bh);
2647         n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2648     }
2649 
2650     n->vqs[index].tx_waiting = 0;
2651     n->vqs[index].n = n;
2652 }
2653 
2654 static void virtio_net_del_queue(VirtIONet *n, int index)
2655 {
2656     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2657     VirtIONetQueue *q = &n->vqs[index];
2658     NetClientState *nc = qemu_get_subqueue(n->nic, index);
2659 
2660     qemu_purge_queued_packets(nc);
2661 
2662     virtio_del_queue(vdev, index * 2);
2663     if (q->tx_timer) {
2664         timer_del(q->tx_timer);
2665         timer_free(q->tx_timer);
2666         q->tx_timer = NULL;
2667     } else {
2668         qemu_bh_delete(q->tx_bh);
2669         q->tx_bh = NULL;
2670     }
2671     q->tx_waiting = 0;
2672     virtio_del_queue(vdev, index * 2 + 1);
2673 }
2674 
2675 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2676 {
2677     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2678     int old_num_queues = virtio_get_num_queues(vdev);
2679     int new_num_queues = new_max_queues * 2 + 1;
2680     int i;
2681 
2682     assert(old_num_queues >= 3);
2683     assert(old_num_queues % 2 == 1);
2684 
2685     if (old_num_queues == new_num_queues) {
2686         return;
2687     }
2688 
2689     /*
2690      * We always need to remove and add ctrl vq if
2691      * old_num_queues != new_num_queues. Remove ctrl_vq first,
2692      * and then we only enter one of the following two loops.
2693      */
2694     virtio_del_queue(vdev, old_num_queues - 1);
2695 
2696     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2697         /* new_num_queues < old_num_queues */
2698         virtio_net_del_queue(n, i / 2);
2699     }
2700 
2701     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2702         /* new_num_queues > old_num_queues */
2703         virtio_net_add_queue(n, i / 2);
2704     }
2705 
2706     /* add ctrl_vq last */
2707     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2708 }
2709 
2710 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2711 {
2712     int max = multiqueue ? n->max_queues : 1;
2713 
2714     n->multiqueue = multiqueue;
2715     virtio_net_change_num_queues(n, max);
2716 
2717     virtio_net_set_queues(n);
2718 }
2719 
2720 static int virtio_net_post_load_device(void *opaque, int version_id)
2721 {
2722     VirtIONet *n = opaque;
2723     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2724     int i, link_down;
2725 
2726     trace_virtio_net_post_load_device();
2727     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2728                                virtio_vdev_has_feature(vdev,
2729                                                        VIRTIO_F_VERSION_1),
2730                                virtio_vdev_has_feature(vdev,
2731                                                        VIRTIO_NET_F_HASH_REPORT));
2732 
2733     /* MAC_TABLE_ENTRIES may be different from the saved image */
2734     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2735         n->mac_table.in_use = 0;
2736     }
2737 
2738     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2739         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2740     }
2741 
2742     /*
2743      * curr_guest_offloads will be later overwritten by the
2744      * virtio_set_features_nocheck call done from the virtio_load.
2745      * Here we make sure it is preserved and restored accordingly
2746      * in the virtio_net_post_load_virtio callback.
2747      */
2748     n->saved_guest_offloads = n->curr_guest_offloads;
2749 
2750     virtio_net_set_queues(n);
2751 
2752     /* Find the first multicast entry in the saved MAC filter */
2753     for (i = 0; i < n->mac_table.in_use; i++) {
2754         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2755             break;
2756         }
2757     }
2758     n->mac_table.first_multi = i;
2759 
2760     /* nc.link_down can't be migrated, so infer link_down according
2761      * to link status bit in n->status */
2762     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2763     for (i = 0; i < n->max_queues; i++) {
2764         qemu_get_subqueue(n->nic, i)->link_down = link_down;
2765     }
2766 
2767     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2768         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2769         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2770                                   QEMU_CLOCK_VIRTUAL,
2771                                   virtio_net_announce_timer, n);
2772         if (n->announce_timer.round) {
2773             timer_mod(n->announce_timer.tm,
2774                       qemu_clock_get_ms(n->announce_timer.type));
2775         } else {
2776             qemu_announce_timer_del(&n->announce_timer, false);
2777         }
2778     }
2779 
2780     if (n->rss_data.enabled) {
2781         trace_virtio_net_rss_enable(n->rss_data.hash_types,
2782                                     n->rss_data.indirections_len,
2783                                     sizeof(n->rss_data.key));
2784     } else {
2785         trace_virtio_net_rss_disable();
2786     }
2787     return 0;
2788 }
2789 
2790 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2791 {
2792     VirtIONet *n = VIRTIO_NET(vdev);
2793     /*
2794      * The actual needed state is now in saved_guest_offloads,
2795      * see virtio_net_post_load_device for detail.
2796      * Restore it back and apply the desired offloads.
2797      */
2798     n->curr_guest_offloads = n->saved_guest_offloads;
2799     if (peer_has_vnet_hdr(n)) {
2800         virtio_net_apply_guest_offloads(n);
2801     }
2802 
2803     return 0;
2804 }
2805 
2806 /* tx_waiting field of a VirtIONetQueue */
2807 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2808     .name = "virtio-net-queue-tx_waiting",
2809     .fields = (VMStateField[]) {
2810         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2811         VMSTATE_END_OF_LIST()
2812    },
2813 };
2814 
2815 static bool max_queues_gt_1(void *opaque, int version_id)
2816 {
2817     return VIRTIO_NET(opaque)->max_queues > 1;
2818 }
2819 
2820 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2821 {
2822     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2823                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2824 }
2825 
2826 static bool mac_table_fits(void *opaque, int version_id)
2827 {
2828     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2829 }
2830 
2831 static bool mac_table_doesnt_fit(void *opaque, int version_id)
2832 {
2833     return !mac_table_fits(opaque, version_id);
2834 }
2835 
2836 /* This temporary type is shared by all the WITH_TMP methods
2837  * although only some fields are used by each.
2838  */
2839 struct VirtIONetMigTmp {
2840     VirtIONet      *parent;
2841     VirtIONetQueue *vqs_1;
2842     uint16_t        curr_queues_1;
2843     uint8_t         has_ufo;
2844     uint32_t        has_vnet_hdr;
2845 };
2846 
2847 /* The 2nd and subsequent tx_waiting flags are loaded later than
2848  * the 1st entry in the queues and only if there's more than one
2849  * entry.  We use the tmp mechanism to calculate a temporary
2850  * pointer and count and also validate the count.
2851  */
2852 
2853 static int virtio_net_tx_waiting_pre_save(void *opaque)
2854 {
2855     struct VirtIONetMigTmp *tmp = opaque;
2856 
2857     tmp->vqs_1 = tmp->parent->vqs + 1;
2858     tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2859     if (tmp->parent->curr_queues == 0) {
2860         tmp->curr_queues_1 = 0;
2861     }
2862 
2863     return 0;
2864 }
2865 
2866 static int virtio_net_tx_waiting_pre_load(void *opaque)
2867 {
2868     struct VirtIONetMigTmp *tmp = opaque;
2869 
2870     /* Reuse the pointer setup from save */
2871     virtio_net_tx_waiting_pre_save(opaque);
2872 
2873     if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2874         error_report("virtio-net: curr_queues %x > max_queues %x",
2875             tmp->parent->curr_queues, tmp->parent->max_queues);
2876 
2877         return -EINVAL;
2878     }
2879 
2880     return 0; /* all good */
2881 }
2882 
2883 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2884     .name      = "virtio-net-tx_waiting",
2885     .pre_load  = virtio_net_tx_waiting_pre_load,
2886     .pre_save  = virtio_net_tx_waiting_pre_save,
2887     .fields    = (VMStateField[]) {
2888         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2889                                      curr_queues_1,
2890                                      vmstate_virtio_net_queue_tx_waiting,
2891                                      struct VirtIONetQueue),
2892         VMSTATE_END_OF_LIST()
2893     },
2894 };
2895 
2896 /* the 'has_ufo' flag is just tested; if the incoming stream has the
2897  * flag set we need to check that we have it
2898  */
2899 static int virtio_net_ufo_post_load(void *opaque, int version_id)
2900 {
2901     struct VirtIONetMigTmp *tmp = opaque;
2902 
2903     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
2904         error_report("virtio-net: saved image requires TUN_F_UFO support");
2905         return -EINVAL;
2906     }
2907 
2908     return 0;
2909 }
2910 
2911 static int virtio_net_ufo_pre_save(void *opaque)
2912 {
2913     struct VirtIONetMigTmp *tmp = opaque;
2914 
2915     tmp->has_ufo = tmp->parent->has_ufo;
2916 
2917     return 0;
2918 }
2919 
2920 static const VMStateDescription vmstate_virtio_net_has_ufo = {
2921     .name      = "virtio-net-ufo",
2922     .post_load = virtio_net_ufo_post_load,
2923     .pre_save  = virtio_net_ufo_pre_save,
2924     .fields    = (VMStateField[]) {
2925         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
2926         VMSTATE_END_OF_LIST()
2927     },
2928 };
2929 
2930 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2931  * flag set we need to check that we have it
2932  */
2933 static int virtio_net_vnet_post_load(void *opaque, int version_id)
2934 {
2935     struct VirtIONetMigTmp *tmp = opaque;
2936 
2937     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
2938         error_report("virtio-net: saved image requires vnet_hdr=on");
2939         return -EINVAL;
2940     }
2941 
2942     return 0;
2943 }
2944 
2945 static int virtio_net_vnet_pre_save(void *opaque)
2946 {
2947     struct VirtIONetMigTmp *tmp = opaque;
2948 
2949     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
2950 
2951     return 0;
2952 }
2953 
2954 static const VMStateDescription vmstate_virtio_net_has_vnet = {
2955     .name      = "virtio-net-vnet",
2956     .post_load = virtio_net_vnet_post_load,
2957     .pre_save  = virtio_net_vnet_pre_save,
2958     .fields    = (VMStateField[]) {
2959         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
2960         VMSTATE_END_OF_LIST()
2961     },
2962 };
2963 
2964 static bool virtio_net_rss_needed(void *opaque)
2965 {
2966     return VIRTIO_NET(opaque)->rss_data.enabled;
2967 }
2968 
2969 static const VMStateDescription vmstate_virtio_net_rss = {
2970     .name      = "virtio-net-device/rss",
2971     .version_id = 1,
2972     .minimum_version_id = 1,
2973     .needed = virtio_net_rss_needed,
2974     .fields = (VMStateField[]) {
2975         VMSTATE_BOOL(rss_data.enabled, VirtIONet),
2976         VMSTATE_BOOL(rss_data.redirect, VirtIONet),
2977         VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
2978         VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
2979         VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
2980         VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
2981         VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
2982                             VIRTIO_NET_RSS_MAX_KEY_SIZE),
2983         VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
2984                                     rss_data.indirections_len, 0,
2985                                     vmstate_info_uint16, uint16_t),
2986         VMSTATE_END_OF_LIST()
2987     },
2988 };
2989 
2990 static const VMStateDescription vmstate_virtio_net_device = {
2991     .name = "virtio-net-device",
2992     .version_id = VIRTIO_NET_VM_VERSION,
2993     .minimum_version_id = VIRTIO_NET_VM_VERSION,
2994     .post_load = virtio_net_post_load_device,
2995     .fields = (VMStateField[]) {
2996         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
2997         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
2998                                vmstate_virtio_net_queue_tx_waiting,
2999                                VirtIONetQueue),
3000         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3001         VMSTATE_UINT16(status, VirtIONet),
3002         VMSTATE_UINT8(promisc, VirtIONet),
3003         VMSTATE_UINT8(allmulti, VirtIONet),
3004         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3005 
3006         /* Guarded pair: If it fits we load it, else we throw it away
3007          * - can happen if source has a larger MAC table.; post-load
3008          *  sets flags in this case.
3009          */
3010         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3011                                 0, mac_table_fits, mac_table.in_use,
3012                                  ETH_ALEN),
3013         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3014                                      mac_table.in_use, ETH_ALEN),
3015 
3016         /* Note: This is an array of uint32's that's always been saved as a
3017          * buffer; hold onto your endiannesses; it's actually used as a bitmap
3018          * but based on the uint.
3019          */
3020         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3021         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3022                          vmstate_virtio_net_has_vnet),
3023         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3024         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3025         VMSTATE_UINT8(alluni, VirtIONet),
3026         VMSTATE_UINT8(nomulti, VirtIONet),
3027         VMSTATE_UINT8(nouni, VirtIONet),
3028         VMSTATE_UINT8(nobcast, VirtIONet),
3029         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3030                          vmstate_virtio_net_has_ufo),
3031         VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
3032                             vmstate_info_uint16_equal, uint16_t),
3033         VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
3034         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3035                          vmstate_virtio_net_tx_waiting),
3036         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3037                             has_ctrl_guest_offloads),
3038         VMSTATE_END_OF_LIST()
3039    },
3040     .subsections = (const VMStateDescription * []) {
3041         &vmstate_virtio_net_rss,
3042         NULL
3043     }
3044 };
3045 
3046 static NetClientInfo net_virtio_info = {
3047     .type = NET_CLIENT_DRIVER_NIC,
3048     .size = sizeof(NICState),
3049     .can_receive = virtio_net_can_receive,
3050     .receive = virtio_net_receive,
3051     .link_status_changed = virtio_net_set_link_status,
3052     .query_rx_filter = virtio_net_query_rxfilter,
3053     .announce = virtio_net_announce,
3054 };
3055 
3056 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3057 {
3058     VirtIONet *n = VIRTIO_NET(vdev);
3059     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3060     assert(n->vhost_started);
3061     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3062 }
3063 
3064 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3065                                            bool mask)
3066 {
3067     VirtIONet *n = VIRTIO_NET(vdev);
3068     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3069     assert(n->vhost_started);
3070     vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3071                              vdev, idx, mask);
3072 }
3073 
3074 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3075 {
3076     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3077 
3078     n->config_size = virtio_feature_get_config_size(feature_sizes,
3079                                                     host_features);
3080 }
3081 
3082 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3083                                    const char *type)
3084 {
3085     /*
3086      * The name can be NULL, the netclient name will be type.x.
3087      */
3088     assert(type != NULL);
3089 
3090     g_free(n->netclient_name);
3091     g_free(n->netclient_type);
3092     n->netclient_name = g_strdup(name);
3093     n->netclient_type = g_strdup(type);
3094 }
3095 
3096 static bool failover_unplug_primary(VirtIONet *n)
3097 {
3098     HotplugHandler *hotplug_ctrl;
3099     PCIDevice *pci_dev;
3100     Error *err = NULL;
3101 
3102     hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
3103     if (hotplug_ctrl) {
3104         pci_dev = PCI_DEVICE(n->primary_dev);
3105         pci_dev->partially_hotplugged = true;
3106         hotplug_handler_unplug_request(hotplug_ctrl, n->primary_dev, &err);
3107         if (err) {
3108             error_report_err(err);
3109             return false;
3110         }
3111     } else {
3112         return false;
3113     }
3114     return true;
3115 }
3116 
3117 static bool failover_replug_primary(VirtIONet *n, Error **errp)
3118 {
3119     Error *err = NULL;
3120     HotplugHandler *hotplug_ctrl;
3121     PCIDevice *pdev = PCI_DEVICE(n->primary_dev);
3122     BusState *primary_bus;
3123 
3124     if (!pdev->partially_hotplugged) {
3125         return true;
3126     }
3127     primary_bus = n->primary_dev->parent_bus;
3128     if (!primary_bus) {
3129         error_setg(errp, "virtio_net: couldn't find primary bus");
3130         return false;
3131     }
3132     qdev_set_parent_bus(n->primary_dev, primary_bus, &error_abort);
3133     qatomic_set(&n->failover_primary_hidden, false);
3134     hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
3135     if (hotplug_ctrl) {
3136         hotplug_handler_pre_plug(hotplug_ctrl, n->primary_dev, &err);
3137         if (err) {
3138             goto out;
3139         }
3140         hotplug_handler_plug(hotplug_ctrl, n->primary_dev, &err);
3141     }
3142 
3143 out:
3144     error_propagate(errp, err);
3145     return !err;
3146 }
3147 
3148 static void virtio_net_handle_migration_primary(VirtIONet *n,
3149                                                 MigrationState *s)
3150 {
3151     bool should_be_hidden;
3152     Error *err = NULL;
3153 
3154     should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3155 
3156     if (!n->primary_dev) {
3157         n->primary_dev = failover_find_primary_device(n);
3158         if (!n->primary_dev) {
3159             return;
3160         }
3161     }
3162 
3163     if (migration_in_setup(s) && !should_be_hidden) {
3164         if (failover_unplug_primary(n)) {
3165             vmstate_unregister(VMSTATE_IF(n->primary_dev),
3166                                qdev_get_vmsd(n->primary_dev),
3167                                n->primary_dev);
3168             qapi_event_send_unplug_primary(n->primary_dev->id);
3169             qatomic_set(&n->failover_primary_hidden, true);
3170         } else {
3171             warn_report("couldn't unplug primary device");
3172         }
3173     } else if (migration_has_failed(s)) {
3174         /* We already unplugged the device let's plug it back */
3175         if (!failover_replug_primary(n, &err)) {
3176             if (err) {
3177                 error_report_err(err);
3178             }
3179         }
3180     }
3181 }
3182 
3183 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3184 {
3185     MigrationState *s = data;
3186     VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3187     virtio_net_handle_migration_primary(n, s);
3188 }
3189 
3190 static bool failover_hide_primary_device(DeviceListener *listener,
3191                                          QemuOpts *device_opts)
3192 {
3193     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3194     const char *standby_id;
3195 
3196     if (!device_opts) {
3197         return false;
3198     }
3199     standby_id = qemu_opt_get(device_opts, "failover_pair_id");
3200     if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3201         return false;
3202     }
3203 
3204     /* failover_primary_hidden is set during feature negotiation */
3205     return qatomic_read(&n->failover_primary_hidden);
3206 }
3207 
3208 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3209 {
3210     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3211     VirtIONet *n = VIRTIO_NET(dev);
3212     NetClientState *nc;
3213     int i;
3214 
3215     if (n->net_conf.mtu) {
3216         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3217     }
3218 
3219     if (n->net_conf.duplex_str) {
3220         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3221             n->net_conf.duplex = DUPLEX_HALF;
3222         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3223             n->net_conf.duplex = DUPLEX_FULL;
3224         } else {
3225             error_setg(errp, "'duplex' must be 'half' or 'full'");
3226             return;
3227         }
3228         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3229     } else {
3230         n->net_conf.duplex = DUPLEX_UNKNOWN;
3231     }
3232 
3233     if (n->net_conf.speed < SPEED_UNKNOWN) {
3234         error_setg(errp, "'speed' must be between 0 and INT_MAX");
3235         return;
3236     }
3237     if (n->net_conf.speed >= 0) {
3238         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3239     }
3240 
3241     if (n->failover) {
3242         n->primary_listener.hide_device = failover_hide_primary_device;
3243         qatomic_set(&n->failover_primary_hidden, true);
3244         device_listener_register(&n->primary_listener);
3245         n->migration_state.notify = virtio_net_migration_state_notifier;
3246         add_migration_state_change_notifier(&n->migration_state);
3247         n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3248     }
3249 
3250     virtio_net_set_config_size(n, n->host_features);
3251     virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
3252 
3253     /*
3254      * We set a lower limit on RX queue size to what it always was.
3255      * Guests that want a smaller ring can always resize it without
3256      * help from us (using virtio 1 and up).
3257      */
3258     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3259         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3260         !is_power_of_2(n->net_conf.rx_queue_size)) {
3261         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3262                    "must be a power of 2 between %d and %d.",
3263                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3264                    VIRTQUEUE_MAX_SIZE);
3265         virtio_cleanup(vdev);
3266         return;
3267     }
3268 
3269     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3270         n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3271         !is_power_of_2(n->net_conf.tx_queue_size)) {
3272         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3273                    "must be a power of 2 between %d and %d",
3274                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3275                    VIRTQUEUE_MAX_SIZE);
3276         virtio_cleanup(vdev);
3277         return;
3278     }
3279 
3280     n->max_queues = MAX(n->nic_conf.peers.queues, 1);
3281     if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
3282         error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
3283                    "must be a positive integer less than %d.",
3284                    n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
3285         virtio_cleanup(vdev);
3286         return;
3287     }
3288     n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
3289     n->curr_queues = 1;
3290     n->tx_timeout = n->net_conf.txtimer;
3291 
3292     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3293                        && strcmp(n->net_conf.tx, "bh")) {
3294         warn_report("virtio-net: "
3295                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3296                     n->net_conf.tx);
3297         error_printf("Defaulting to \"bh\"");
3298     }
3299 
3300     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3301                                     n->net_conf.tx_queue_size);
3302 
3303     for (i = 0; i < n->max_queues; i++) {
3304         virtio_net_add_queue(n, i);
3305     }
3306 
3307     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3308     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3309     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3310     n->status = VIRTIO_NET_S_LINK_UP;
3311     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3312                               QEMU_CLOCK_VIRTUAL,
3313                               virtio_net_announce_timer, n);
3314     n->announce_timer.round = 0;
3315 
3316     if (n->netclient_type) {
3317         /*
3318          * Happen when virtio_net_set_netclient_name has been called.
3319          */
3320         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3321                               n->netclient_type, n->netclient_name, n);
3322     } else {
3323         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3324                               object_get_typename(OBJECT(dev)), dev->id, n);
3325     }
3326 
3327     peer_test_vnet_hdr(n);
3328     if (peer_has_vnet_hdr(n)) {
3329         for (i = 0; i < n->max_queues; i++) {
3330             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3331         }
3332         n->host_hdr_len = sizeof(struct virtio_net_hdr);
3333     } else {
3334         n->host_hdr_len = 0;
3335     }
3336 
3337     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3338 
3339     n->vqs[0].tx_waiting = 0;
3340     n->tx_burst = n->net_conf.txburst;
3341     virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3342     n->promisc = 1; /* for compatibility */
3343 
3344     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3345 
3346     n->vlans = g_malloc0(MAX_VLAN >> 3);
3347 
3348     nc = qemu_get_queue(n->nic);
3349     nc->rxfilter_notify_enabled = 1;
3350 
3351    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3352         struct virtio_net_config netcfg = {};
3353         memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3354         vhost_net_set_config(get_vhost_net(nc->peer),
3355             (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
3356     }
3357     QTAILQ_INIT(&n->rsc_chains);
3358     n->qdev = dev;
3359 
3360     net_rx_pkt_init(&n->rx_pkt, false);
3361 }
3362 
3363 static void virtio_net_device_unrealize(DeviceState *dev)
3364 {
3365     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3366     VirtIONet *n = VIRTIO_NET(dev);
3367     int i, max_queues;
3368 
3369     /* This will stop vhost backend if appropriate. */
3370     virtio_net_set_status(vdev, 0);
3371 
3372     g_free(n->netclient_name);
3373     n->netclient_name = NULL;
3374     g_free(n->netclient_type);
3375     n->netclient_type = NULL;
3376 
3377     g_free(n->mac_table.macs);
3378     g_free(n->vlans);
3379 
3380     if (n->failover) {
3381         device_listener_unregister(&n->primary_listener);
3382     }
3383 
3384     max_queues = n->multiqueue ? n->max_queues : 1;
3385     for (i = 0; i < max_queues; i++) {
3386         virtio_net_del_queue(n, i);
3387     }
3388     /* delete also control vq */
3389     virtio_del_queue(vdev, max_queues * 2);
3390     qemu_announce_timer_del(&n->announce_timer, false);
3391     g_free(n->vqs);
3392     qemu_del_nic(n->nic);
3393     virtio_net_rsc_cleanup(n);
3394     g_free(n->rss_data.indirections_table);
3395     net_rx_pkt_uninit(n->rx_pkt);
3396     virtio_cleanup(vdev);
3397 }
3398 
3399 static void virtio_net_instance_init(Object *obj)
3400 {
3401     VirtIONet *n = VIRTIO_NET(obj);
3402 
3403     /*
3404      * The default config_size is sizeof(struct virtio_net_config).
3405      * Can be overriden with virtio_net_set_config_size.
3406      */
3407     n->config_size = sizeof(struct virtio_net_config);
3408     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3409                                   "bootindex", "/ethernet-phy@0",
3410                                   DEVICE(n));
3411 }
3412 
3413 static int virtio_net_pre_save(void *opaque)
3414 {
3415     VirtIONet *n = opaque;
3416 
3417     /* At this point, backend must be stopped, otherwise
3418      * it might keep writing to memory. */
3419     assert(!n->vhost_started);
3420 
3421     return 0;
3422 }
3423 
3424 static bool primary_unplug_pending(void *opaque)
3425 {
3426     DeviceState *dev = opaque;
3427     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3428     VirtIONet *n = VIRTIO_NET(vdev);
3429 
3430     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3431         return false;
3432     }
3433     return n->primary_dev ? n->primary_dev->pending_deleted_event : false;
3434 }
3435 
3436 static bool dev_unplug_pending(void *opaque)
3437 {
3438     DeviceState *dev = opaque;
3439     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3440 
3441     return vdc->primary_unplug_pending(dev);
3442 }
3443 
3444 static const VMStateDescription vmstate_virtio_net = {
3445     .name = "virtio-net",
3446     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3447     .version_id = VIRTIO_NET_VM_VERSION,
3448     .fields = (VMStateField[]) {
3449         VMSTATE_VIRTIO_DEVICE,
3450         VMSTATE_END_OF_LIST()
3451     },
3452     .pre_save = virtio_net_pre_save,
3453     .dev_unplug_pending = dev_unplug_pending,
3454 };
3455 
3456 static Property virtio_net_properties[] = {
3457     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3458                     VIRTIO_NET_F_CSUM, true),
3459     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3460                     VIRTIO_NET_F_GUEST_CSUM, true),
3461     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3462     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3463                     VIRTIO_NET_F_GUEST_TSO4, true),
3464     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3465                     VIRTIO_NET_F_GUEST_TSO6, true),
3466     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3467                     VIRTIO_NET_F_GUEST_ECN, true),
3468     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3469                     VIRTIO_NET_F_GUEST_UFO, true),
3470     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3471                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3472     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3473                     VIRTIO_NET_F_HOST_TSO4, true),
3474     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3475                     VIRTIO_NET_F_HOST_TSO6, true),
3476     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3477                     VIRTIO_NET_F_HOST_ECN, true),
3478     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3479                     VIRTIO_NET_F_HOST_UFO, true),
3480     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3481                     VIRTIO_NET_F_MRG_RXBUF, true),
3482     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3483                     VIRTIO_NET_F_STATUS, true),
3484     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3485                     VIRTIO_NET_F_CTRL_VQ, true),
3486     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3487                     VIRTIO_NET_F_CTRL_RX, true),
3488     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3489                     VIRTIO_NET_F_CTRL_VLAN, true),
3490     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3491                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3492     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3493                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3494     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3495                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3496     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3497     DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3498                     VIRTIO_NET_F_RSS, false),
3499     DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3500                     VIRTIO_NET_F_HASH_REPORT, false),
3501     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3502                     VIRTIO_NET_F_RSC_EXT, false),
3503     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3504                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3505     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3506     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3507                        TX_TIMER_INTERVAL),
3508     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3509     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3510     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3511                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3512     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3513                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3514     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3515     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3516                      true),
3517     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3518     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3519     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3520     DEFINE_PROP_END_OF_LIST(),
3521 };
3522 
3523 static void virtio_net_class_init(ObjectClass *klass, void *data)
3524 {
3525     DeviceClass *dc = DEVICE_CLASS(klass);
3526     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3527 
3528     device_class_set_props(dc, virtio_net_properties);
3529     dc->vmsd = &vmstate_virtio_net;
3530     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3531     vdc->realize = virtio_net_device_realize;
3532     vdc->unrealize = virtio_net_device_unrealize;
3533     vdc->get_config = virtio_net_get_config;
3534     vdc->set_config = virtio_net_set_config;
3535     vdc->get_features = virtio_net_get_features;
3536     vdc->set_features = virtio_net_set_features;
3537     vdc->bad_features = virtio_net_bad_features;
3538     vdc->reset = virtio_net_reset;
3539     vdc->set_status = virtio_net_set_status;
3540     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3541     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3542     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3543     vdc->post_load = virtio_net_post_load_virtio;
3544     vdc->vmsd = &vmstate_virtio_net_device;
3545     vdc->primary_unplug_pending = primary_unplug_pending;
3546 }
3547 
3548 static const TypeInfo virtio_net_info = {
3549     .name = TYPE_VIRTIO_NET,
3550     .parent = TYPE_VIRTIO_DEVICE,
3551     .instance_size = sizeof(VirtIONet),
3552     .instance_init = virtio_net_instance_init,
3553     .class_init = virtio_net_class_init,
3554 };
3555 
3556 static void virtio_register_types(void)
3557 {
3558     type_register_static(&virtio_net_info);
3559 }
3560 
3561 type_init(virtio_register_types)
3562