xref: /openbmc/qemu/hw/net/virtio-net.c (revision c70fe3b1)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/log.h"
18 #include "qemu/main-loop.h"
19 #include "qemu/module.h"
20 #include "hw/virtio/virtio.h"
21 #include "net/net.h"
22 #include "net/checksum.h"
23 #include "net/tap.h"
24 #include "qemu/error-report.h"
25 #include "qemu/timer.h"
26 #include "qemu/option.h"
27 #include "qemu/option_int.h"
28 #include "qemu/config-file.h"
29 #include "qapi/qmp/qdict.h"
30 #include "hw/virtio/virtio-net.h"
31 #include "net/vhost_net.h"
32 #include "net/announce.h"
33 #include "hw/virtio/virtio-bus.h"
34 #include "qapi/error.h"
35 #include "qapi/qapi-events-net.h"
36 #include "hw/qdev-properties.h"
37 #include "qapi/qapi-types-migration.h"
38 #include "qapi/qapi-events-migration.h"
39 #include "hw/virtio/virtio-access.h"
40 #include "migration/misc.h"
41 #include "standard-headers/linux/ethtool.h"
42 #include "sysemu/sysemu.h"
43 #include "trace.h"
44 #include "monitor/qdev.h"
45 #include "hw/pci/pci.h"
46 #include "net_rx_pkt.h"
47 #include "hw/virtio/vhost.h"
48 #include "sysemu/qtest.h"
49 
50 #define VIRTIO_NET_VM_VERSION    11
51 
52 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
53 
54 /* previously fixed value */
55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
57 
58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
61 
62 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
63 
64 #define VIRTIO_NET_TCP_FLAG         0x3F
65 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
66 
67 /* IPv4 max payload, 16 bits in the header */
68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
70 
71 /* header length value in ip header without option */
72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
73 
74 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
76 
77 /* Purge coalesced packets timer interval, This value affects the performance
78    a lot, and should be tuned carefully, '300000'(300us) is the recommended
79    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
80    tso/gso/gro 'off'. */
81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
82 
83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
84                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
85                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
86                                          VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
87                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
88                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
89                                          VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
90                                          VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
91                                          VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
92 
93 static const VirtIOFeature feature_sizes[] = {
94     {.flags = 1ULL << VIRTIO_NET_F_MAC,
95      .end = endof(struct virtio_net_config, mac)},
96     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
97      .end = endof(struct virtio_net_config, status)},
98     {.flags = 1ULL << VIRTIO_NET_F_MQ,
99      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
100     {.flags = 1ULL << VIRTIO_NET_F_MTU,
101      .end = endof(struct virtio_net_config, mtu)},
102     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
103      .end = endof(struct virtio_net_config, duplex)},
104     {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
105      .end = endof(struct virtio_net_config, supported_hash_types)},
106     {}
107 };
108 
109 static const VirtIOConfigSizeParams cfg_size_params = {
110     .min_size = endof(struct virtio_net_config, mac),
111     .max_size = sizeof(struct virtio_net_config),
112     .feature_sizes = feature_sizes
113 };
114 
115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
116 {
117     VirtIONet *n = qemu_get_nic_opaque(nc);
118 
119     return &n->vqs[nc->queue_index];
120 }
121 
122 static int vq2q(int queue_index)
123 {
124     return queue_index / 2;
125 }
126 
127 static void flush_or_purge_queued_packets(NetClientState *nc)
128 {
129     if (!nc->peer) {
130         return;
131     }
132 
133     qemu_flush_or_purge_queued_packets(nc->peer, true);
134     assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
135 }
136 
137 /* TODO
138  * - we could suppress RX interrupt if we were so inclined.
139  */
140 
141 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
142 {
143     VirtIONet *n = VIRTIO_NET(vdev);
144     struct virtio_net_config netcfg;
145     NetClientState *nc = qemu_get_queue(n->nic);
146     static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
147 
148     int ret = 0;
149     memset(&netcfg, 0 , sizeof(struct virtio_net_config));
150     virtio_stw_p(vdev, &netcfg.status, n->status);
151     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
152     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
153     memcpy(netcfg.mac, n->mac, ETH_ALEN);
154     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
155     netcfg.duplex = n->net_conf.duplex;
156     netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
157     virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
158                  virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
159                  VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
160     virtio_stl_p(vdev, &netcfg.supported_hash_types,
161                  VIRTIO_NET_RSS_SUPPORTED_HASHES);
162     memcpy(config, &netcfg, n->config_size);
163 
164     /*
165      * Is this VDPA? No peer means not VDPA: there's no way to
166      * disconnect/reconnect a VDPA peer.
167      */
168     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
169         ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
170                                    n->config_size);
171         if (ret != -1) {
172             /*
173              * Some NIC/kernel combinations present 0 as the mac address.  As
174              * that is not a legal address, try to proceed with the
175              * address from the QEMU command line in the hope that the
176              * address has been configured correctly elsewhere - just not
177              * reported by the device.
178              */
179             if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
180                 info_report("Zero hardware mac address detected. Ignoring.");
181                 memcpy(netcfg.mac, n->mac, ETH_ALEN);
182             }
183             memcpy(config, &netcfg, n->config_size);
184         }
185     }
186 }
187 
188 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
189 {
190     VirtIONet *n = VIRTIO_NET(vdev);
191     struct virtio_net_config netcfg = {};
192     NetClientState *nc = qemu_get_queue(n->nic);
193 
194     memcpy(&netcfg, config, n->config_size);
195 
196     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
197         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
198         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
199         memcpy(n->mac, netcfg.mac, ETH_ALEN);
200         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
201     }
202 
203     /*
204      * Is this VDPA? No peer means not VDPA: there's no way to
205      * disconnect/reconnect a VDPA peer.
206      */
207     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
208         vhost_net_set_config(get_vhost_net(nc->peer),
209                              (uint8_t *)&netcfg, 0, n->config_size,
210                              VHOST_SET_CONFIG_TYPE_MASTER);
211       }
212 }
213 
214 static bool virtio_net_started(VirtIONet *n, uint8_t status)
215 {
216     VirtIODevice *vdev = VIRTIO_DEVICE(n);
217     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
218         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
219 }
220 
221 static void virtio_net_announce_notify(VirtIONet *net)
222 {
223     VirtIODevice *vdev = VIRTIO_DEVICE(net);
224     trace_virtio_net_announce_notify();
225 
226     net->status |= VIRTIO_NET_S_ANNOUNCE;
227     virtio_notify_config(vdev);
228 }
229 
230 static void virtio_net_announce_timer(void *opaque)
231 {
232     VirtIONet *n = opaque;
233     trace_virtio_net_announce_timer(n->announce_timer.round);
234 
235     n->announce_timer.round--;
236     virtio_net_announce_notify(n);
237 }
238 
239 static void virtio_net_announce(NetClientState *nc)
240 {
241     VirtIONet *n = qemu_get_nic_opaque(nc);
242     VirtIODevice *vdev = VIRTIO_DEVICE(n);
243 
244     /*
245      * Make sure the virtio migration announcement timer isn't running
246      * If it is, let it trigger announcement so that we do not cause
247      * confusion.
248      */
249     if (n->announce_timer.round) {
250         return;
251     }
252 
253     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
254         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
255             virtio_net_announce_notify(n);
256     }
257 }
258 
259 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
260 {
261     VirtIODevice *vdev = VIRTIO_DEVICE(n);
262     NetClientState *nc = qemu_get_queue(n->nic);
263     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
264     int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
265               n->max_ncs - n->max_queue_pairs : 0;
266 
267     if (!get_vhost_net(nc->peer)) {
268         return;
269     }
270 
271     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
272         !!n->vhost_started) {
273         return;
274     }
275     if (!n->vhost_started) {
276         int r, i;
277 
278         if (n->needs_vnet_hdr_swap) {
279             error_report("backend does not support %s vnet headers; "
280                          "falling back on userspace virtio",
281                          virtio_is_big_endian(vdev) ? "BE" : "LE");
282             return;
283         }
284 
285         /* Any packets outstanding? Purge them to avoid touching rings
286          * when vhost is running.
287          */
288         for (i = 0;  i < queue_pairs; i++) {
289             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
290 
291             /* Purge both directions: TX and RX. */
292             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
293             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
294         }
295 
296         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
297             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
298             if (r < 0) {
299                 error_report("%uBytes MTU not supported by the backend",
300                              n->net_conf.mtu);
301 
302                 return;
303             }
304         }
305 
306         n->vhost_started = 1;
307         r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
308         if (r < 0) {
309             error_report("unable to start vhost net: %d: "
310                          "falling back on userspace virtio", -r);
311             n->vhost_started = 0;
312         }
313     } else {
314         vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
315         n->vhost_started = 0;
316     }
317 }
318 
319 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
320                                           NetClientState *peer,
321                                           bool enable)
322 {
323     if (virtio_is_big_endian(vdev)) {
324         return qemu_set_vnet_be(peer, enable);
325     } else {
326         return qemu_set_vnet_le(peer, enable);
327     }
328 }
329 
330 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
331                                        int queue_pairs, bool enable)
332 {
333     int i;
334 
335     for (i = 0; i < queue_pairs; i++) {
336         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
337             enable) {
338             while (--i >= 0) {
339                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
340             }
341 
342             return true;
343         }
344     }
345 
346     return false;
347 }
348 
349 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
350 {
351     VirtIODevice *vdev = VIRTIO_DEVICE(n);
352     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
353 
354     if (virtio_net_started(n, status)) {
355         /* Before using the device, we tell the network backend about the
356          * endianness to use when parsing vnet headers. If the backend
357          * can't do it, we fallback onto fixing the headers in the core
358          * virtio-net code.
359          */
360         n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
361                                                             queue_pairs, true);
362     } else if (virtio_net_started(n, vdev->status)) {
363         /* After using the device, we need to reset the network backend to
364          * the default (guest native endianness), otherwise the guest may
365          * lose network connectivity if it is rebooted into a different
366          * endianness.
367          */
368         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
369     }
370 }
371 
372 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
373 {
374     unsigned int dropped = virtqueue_drop_all(vq);
375     if (dropped) {
376         virtio_notify(vdev, vq);
377     }
378 }
379 
380 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
381 {
382     VirtIONet *n = VIRTIO_NET(vdev);
383     VirtIONetQueue *q;
384     int i;
385     uint8_t queue_status;
386 
387     virtio_net_vnet_endian_status(n, status);
388     virtio_net_vhost_status(n, status);
389 
390     for (i = 0; i < n->max_queue_pairs; i++) {
391         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
392         bool queue_started;
393         q = &n->vqs[i];
394 
395         if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
396             queue_status = 0;
397         } else {
398             queue_status = status;
399         }
400         queue_started =
401             virtio_net_started(n, queue_status) && !n->vhost_started;
402 
403         if (queue_started) {
404             qemu_flush_queued_packets(ncs);
405         }
406 
407         if (!q->tx_waiting) {
408             continue;
409         }
410 
411         if (queue_started) {
412             if (q->tx_timer) {
413                 timer_mod(q->tx_timer,
414                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
415             } else {
416                 qemu_bh_schedule(q->tx_bh);
417             }
418         } else {
419             if (q->tx_timer) {
420                 timer_del(q->tx_timer);
421             } else {
422                 qemu_bh_cancel(q->tx_bh);
423             }
424             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
425                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
426                 vdev->vm_running) {
427                 /* if tx is waiting we are likely have some packets in tx queue
428                  * and disabled notification */
429                 q->tx_waiting = 0;
430                 virtio_queue_set_notification(q->tx_vq, 1);
431                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
432             }
433         }
434     }
435 }
436 
437 static void virtio_net_set_link_status(NetClientState *nc)
438 {
439     VirtIONet *n = qemu_get_nic_opaque(nc);
440     VirtIODevice *vdev = VIRTIO_DEVICE(n);
441     uint16_t old_status = n->status;
442 
443     if (nc->link_down)
444         n->status &= ~VIRTIO_NET_S_LINK_UP;
445     else
446         n->status |= VIRTIO_NET_S_LINK_UP;
447 
448     if (n->status != old_status)
449         virtio_notify_config(vdev);
450 
451     virtio_net_set_status(vdev, vdev->status);
452 }
453 
454 static void rxfilter_notify(NetClientState *nc)
455 {
456     VirtIONet *n = qemu_get_nic_opaque(nc);
457 
458     if (nc->rxfilter_notify_enabled) {
459         char *path = object_get_canonical_path(OBJECT(n->qdev));
460         qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
461                                               n->netclient_name, path);
462         g_free(path);
463 
464         /* disable event notification to avoid events flooding */
465         nc->rxfilter_notify_enabled = 0;
466     }
467 }
468 
469 static intList *get_vlan_table(VirtIONet *n)
470 {
471     intList *list;
472     int i, j;
473 
474     list = NULL;
475     for (i = 0; i < MAX_VLAN >> 5; i++) {
476         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
477             if (n->vlans[i] & (1U << j)) {
478                 QAPI_LIST_PREPEND(list, (i << 5) + j);
479             }
480         }
481     }
482 
483     return list;
484 }
485 
486 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
487 {
488     VirtIONet *n = qemu_get_nic_opaque(nc);
489     VirtIODevice *vdev = VIRTIO_DEVICE(n);
490     RxFilterInfo *info;
491     strList *str_list;
492     int i;
493 
494     info = g_malloc0(sizeof(*info));
495     info->name = g_strdup(nc->name);
496     info->promiscuous = n->promisc;
497 
498     if (n->nouni) {
499         info->unicast = RX_STATE_NONE;
500     } else if (n->alluni) {
501         info->unicast = RX_STATE_ALL;
502     } else {
503         info->unicast = RX_STATE_NORMAL;
504     }
505 
506     if (n->nomulti) {
507         info->multicast = RX_STATE_NONE;
508     } else if (n->allmulti) {
509         info->multicast = RX_STATE_ALL;
510     } else {
511         info->multicast = RX_STATE_NORMAL;
512     }
513 
514     info->broadcast_allowed = n->nobcast;
515     info->multicast_overflow = n->mac_table.multi_overflow;
516     info->unicast_overflow = n->mac_table.uni_overflow;
517 
518     info->main_mac = qemu_mac_strdup_printf(n->mac);
519 
520     str_list = NULL;
521     for (i = 0; i < n->mac_table.first_multi; i++) {
522         QAPI_LIST_PREPEND(str_list,
523                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
524     }
525     info->unicast_table = str_list;
526 
527     str_list = NULL;
528     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
529         QAPI_LIST_PREPEND(str_list,
530                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
531     }
532     info->multicast_table = str_list;
533     info->vlan_table = get_vlan_table(n);
534 
535     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
536         info->vlan = RX_STATE_ALL;
537     } else if (!info->vlan_table) {
538         info->vlan = RX_STATE_NONE;
539     } else {
540         info->vlan = RX_STATE_NORMAL;
541     }
542 
543     /* enable event notification after query */
544     nc->rxfilter_notify_enabled = 1;
545 
546     return info;
547 }
548 
549 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
550 {
551     VirtIONet *n = VIRTIO_NET(vdev);
552     NetClientState *nc;
553 
554     /* validate queue_index and skip for cvq */
555     if (queue_index >= n->max_queue_pairs * 2) {
556         return;
557     }
558 
559     nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
560 
561     if (!nc->peer) {
562         return;
563     }
564 
565     if (get_vhost_net(nc->peer) &&
566         nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
567         vhost_net_virtqueue_reset(vdev, nc, queue_index);
568     }
569 
570     flush_or_purge_queued_packets(nc);
571 }
572 
573 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
574 {
575     VirtIONet *n = VIRTIO_NET(vdev);
576     NetClientState *nc;
577     int r;
578 
579     /* validate queue_index and skip for cvq */
580     if (queue_index >= n->max_queue_pairs * 2) {
581         return;
582     }
583 
584     nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
585 
586     if (!nc->peer || !vdev->vhost_started) {
587         return;
588     }
589 
590     if (get_vhost_net(nc->peer) &&
591         nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
592         r = vhost_net_virtqueue_restart(vdev, nc, queue_index);
593         if (r < 0) {
594             error_report("unable to restart vhost net virtqueue: %d, "
595                             "when resetting the queue", queue_index);
596         }
597     }
598 }
599 
600 static void virtio_net_reset(VirtIODevice *vdev)
601 {
602     VirtIONet *n = VIRTIO_NET(vdev);
603     int i;
604 
605     /* Reset back to compatibility mode */
606     n->promisc = 1;
607     n->allmulti = 0;
608     n->alluni = 0;
609     n->nomulti = 0;
610     n->nouni = 0;
611     n->nobcast = 0;
612     /* multiqueue is disabled by default */
613     n->curr_queue_pairs = 1;
614     timer_del(n->announce_timer.tm);
615     n->announce_timer.round = 0;
616     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
617 
618     /* Flush any MAC and VLAN filter table state */
619     n->mac_table.in_use = 0;
620     n->mac_table.first_multi = 0;
621     n->mac_table.multi_overflow = 0;
622     n->mac_table.uni_overflow = 0;
623     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
624     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
625     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
626     memset(n->vlans, 0, MAX_VLAN >> 3);
627 
628     /* Flush any async TX */
629     for (i = 0;  i < n->max_queue_pairs; i++) {
630         flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
631     }
632 }
633 
634 static void peer_test_vnet_hdr(VirtIONet *n)
635 {
636     NetClientState *nc = qemu_get_queue(n->nic);
637     if (!nc->peer) {
638         return;
639     }
640 
641     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
642 }
643 
644 static int peer_has_vnet_hdr(VirtIONet *n)
645 {
646     return n->has_vnet_hdr;
647 }
648 
649 static int peer_has_ufo(VirtIONet *n)
650 {
651     if (!peer_has_vnet_hdr(n))
652         return 0;
653 
654     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
655 
656     return n->has_ufo;
657 }
658 
659 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
660                                        int version_1, int hash_report)
661 {
662     int i;
663     NetClientState *nc;
664 
665     n->mergeable_rx_bufs = mergeable_rx_bufs;
666 
667     if (version_1) {
668         n->guest_hdr_len = hash_report ?
669             sizeof(struct virtio_net_hdr_v1_hash) :
670             sizeof(struct virtio_net_hdr_mrg_rxbuf);
671         n->rss_data.populate_hash = !!hash_report;
672     } else {
673         n->guest_hdr_len = n->mergeable_rx_bufs ?
674             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
675             sizeof(struct virtio_net_hdr);
676     }
677 
678     for (i = 0; i < n->max_queue_pairs; i++) {
679         nc = qemu_get_subqueue(n->nic, i);
680 
681         if (peer_has_vnet_hdr(n) &&
682             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
683             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
684             n->host_hdr_len = n->guest_hdr_len;
685         }
686     }
687 }
688 
689 static int virtio_net_max_tx_queue_size(VirtIONet *n)
690 {
691     NetClientState *peer = n->nic_conf.peers.ncs[0];
692 
693     /*
694      * Backends other than vhost-user or vhost-vdpa don't support max queue
695      * size.
696      */
697     if (!peer) {
698         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
699     }
700 
701     switch(peer->info->type) {
702     case NET_CLIENT_DRIVER_VHOST_USER:
703     case NET_CLIENT_DRIVER_VHOST_VDPA:
704         return VIRTQUEUE_MAX_SIZE;
705     default:
706         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
707     };
708 }
709 
710 static int peer_attach(VirtIONet *n, int index)
711 {
712     NetClientState *nc = qemu_get_subqueue(n->nic, index);
713 
714     if (!nc->peer) {
715         return 0;
716     }
717 
718     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
719         vhost_set_vring_enable(nc->peer, 1);
720     }
721 
722     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
723         return 0;
724     }
725 
726     if (n->max_queue_pairs == 1) {
727         return 0;
728     }
729 
730     return tap_enable(nc->peer);
731 }
732 
733 static int peer_detach(VirtIONet *n, int index)
734 {
735     NetClientState *nc = qemu_get_subqueue(n->nic, index);
736 
737     if (!nc->peer) {
738         return 0;
739     }
740 
741     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
742         vhost_set_vring_enable(nc->peer, 0);
743     }
744 
745     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
746         return 0;
747     }
748 
749     return tap_disable(nc->peer);
750 }
751 
752 static void virtio_net_set_queue_pairs(VirtIONet *n)
753 {
754     int i;
755     int r;
756 
757     if (n->nic->peer_deleted) {
758         return;
759     }
760 
761     for (i = 0; i < n->max_queue_pairs; i++) {
762         if (i < n->curr_queue_pairs) {
763             r = peer_attach(n, i);
764             assert(!r);
765         } else {
766             r = peer_detach(n, i);
767             assert(!r);
768         }
769     }
770 }
771 
772 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
773 
774 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
775                                         Error **errp)
776 {
777     VirtIONet *n = VIRTIO_NET(vdev);
778     NetClientState *nc = qemu_get_queue(n->nic);
779 
780     /* Firstly sync all virtio-net possible supported features */
781     features |= n->host_features;
782 
783     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
784 
785     if (!peer_has_vnet_hdr(n)) {
786         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
787         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
788         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
789         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
790 
791         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
792         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
793         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
794         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
795 
796         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
797     }
798 
799     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
800         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
801         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
802     }
803 
804     if (!get_vhost_net(nc->peer)) {
805         virtio_add_feature(&features, VIRTIO_F_RING_RESET);
806         return features;
807     }
808 
809     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
810         virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
811     }
812     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
813     vdev->backend_features = features;
814 
815     if (n->mtu_bypass_backend &&
816             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
817         features |= (1ULL << VIRTIO_NET_F_MTU);
818     }
819 
820     return features;
821 }
822 
823 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
824 {
825     uint64_t features = 0;
826 
827     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
828      * but also these: */
829     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
830     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
831     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
832     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
833     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
834 
835     return features;
836 }
837 
838 static void virtio_net_apply_guest_offloads(VirtIONet *n)
839 {
840     qemu_set_offload(qemu_get_queue(n->nic)->peer,
841             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
842             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
843             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
844             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
845             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
846 }
847 
848 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
849 {
850     static const uint64_t guest_offloads_mask =
851         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
852         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
853         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
854         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
855         (1ULL << VIRTIO_NET_F_GUEST_UFO);
856 
857     return guest_offloads_mask & features;
858 }
859 
860 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
861 {
862     VirtIODevice *vdev = VIRTIO_DEVICE(n);
863     return virtio_net_guest_offloads_by_features(vdev->guest_features);
864 }
865 
866 typedef struct {
867     VirtIONet *n;
868     DeviceState *dev;
869 } FailoverDevice;
870 
871 /**
872  * Set the failover primary device
873  *
874  * @opaque: FailoverId to setup
875  * @opts: opts for device we are handling
876  * @errp: returns an error if this function fails
877  */
878 static int failover_set_primary(DeviceState *dev, void *opaque)
879 {
880     FailoverDevice *fdev = opaque;
881     PCIDevice *pci_dev = (PCIDevice *)
882         object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
883 
884     if (!pci_dev) {
885         return 0;
886     }
887 
888     if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
889         fdev->dev = dev;
890         return 1;
891     }
892 
893     return 0;
894 }
895 
896 /**
897  * Find the primary device for this failover virtio-net
898  *
899  * @n: VirtIONet device
900  * @errp: returns an error if this function fails
901  */
902 static DeviceState *failover_find_primary_device(VirtIONet *n)
903 {
904     FailoverDevice fdev = {
905         .n = n,
906     };
907 
908     qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
909                        NULL, NULL, &fdev);
910     return fdev.dev;
911 }
912 
913 static void failover_add_primary(VirtIONet *n, Error **errp)
914 {
915     Error *err = NULL;
916     DeviceState *dev = failover_find_primary_device(n);
917 
918     if (dev) {
919         return;
920     }
921 
922     if (!n->primary_opts) {
923         error_setg(errp, "Primary device not found");
924         error_append_hint(errp, "Virtio-net failover will not work. Make "
925                           "sure primary device has parameter"
926                           " failover_pair_id=%s\n", n->netclient_name);
927         return;
928     }
929 
930     dev = qdev_device_add_from_qdict(n->primary_opts,
931                                      n->primary_opts_from_json,
932                                      &err);
933     if (err) {
934         qobject_unref(n->primary_opts);
935         n->primary_opts = NULL;
936     } else {
937         object_unref(OBJECT(dev));
938     }
939     error_propagate(errp, err);
940 }
941 
942 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
943 {
944     VirtIONet *n = VIRTIO_NET(vdev);
945     Error *err = NULL;
946     int i;
947 
948     if (n->mtu_bypass_backend &&
949             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
950         features &= ~(1ULL << VIRTIO_NET_F_MTU);
951     }
952 
953     virtio_net_set_multiqueue(n,
954                               virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
955                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
956 
957     virtio_net_set_mrg_rx_bufs(n,
958                                virtio_has_feature(features,
959                                                   VIRTIO_NET_F_MRG_RXBUF),
960                                virtio_has_feature(features,
961                                                   VIRTIO_F_VERSION_1),
962                                virtio_has_feature(features,
963                                                   VIRTIO_NET_F_HASH_REPORT));
964 
965     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
966         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
967     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
968         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
969     n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
970 
971     if (n->has_vnet_hdr) {
972         n->curr_guest_offloads =
973             virtio_net_guest_offloads_by_features(features);
974         virtio_net_apply_guest_offloads(n);
975     }
976 
977     for (i = 0;  i < n->max_queue_pairs; i++) {
978         NetClientState *nc = qemu_get_subqueue(n->nic, i);
979 
980         if (!get_vhost_net(nc->peer)) {
981             continue;
982         }
983         vhost_net_ack_features(get_vhost_net(nc->peer), features);
984     }
985 
986     if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
987         memset(n->vlans, 0, MAX_VLAN >> 3);
988     } else {
989         memset(n->vlans, 0xff, MAX_VLAN >> 3);
990     }
991 
992     if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
993         qapi_event_send_failover_negotiated(n->netclient_name);
994         qatomic_set(&n->failover_primary_hidden, false);
995         failover_add_primary(n, &err);
996         if (err) {
997             if (!qtest_enabled()) {
998                 warn_report_err(err);
999             } else {
1000                 error_free(err);
1001             }
1002         }
1003     }
1004 }
1005 
1006 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
1007                                      struct iovec *iov, unsigned int iov_cnt)
1008 {
1009     uint8_t on;
1010     size_t s;
1011     NetClientState *nc = qemu_get_queue(n->nic);
1012 
1013     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
1014     if (s != sizeof(on)) {
1015         return VIRTIO_NET_ERR;
1016     }
1017 
1018     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
1019         n->promisc = on;
1020     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
1021         n->allmulti = on;
1022     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
1023         n->alluni = on;
1024     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
1025         n->nomulti = on;
1026     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
1027         n->nouni = on;
1028     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
1029         n->nobcast = on;
1030     } else {
1031         return VIRTIO_NET_ERR;
1032     }
1033 
1034     rxfilter_notify(nc);
1035 
1036     return VIRTIO_NET_OK;
1037 }
1038 
1039 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
1040                                      struct iovec *iov, unsigned int iov_cnt)
1041 {
1042     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1043     uint64_t offloads;
1044     size_t s;
1045 
1046     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1047         return VIRTIO_NET_ERR;
1048     }
1049 
1050     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
1051     if (s != sizeof(offloads)) {
1052         return VIRTIO_NET_ERR;
1053     }
1054 
1055     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1056         uint64_t supported_offloads;
1057 
1058         offloads = virtio_ldq_p(vdev, &offloads);
1059 
1060         if (!n->has_vnet_hdr) {
1061             return VIRTIO_NET_ERR;
1062         }
1063 
1064         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1065             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1066         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1067             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1068         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1069 
1070         supported_offloads = virtio_net_supported_guest_offloads(n);
1071         if (offloads & ~supported_offloads) {
1072             return VIRTIO_NET_ERR;
1073         }
1074 
1075         n->curr_guest_offloads = offloads;
1076         virtio_net_apply_guest_offloads(n);
1077 
1078         return VIRTIO_NET_OK;
1079     } else {
1080         return VIRTIO_NET_ERR;
1081     }
1082 }
1083 
1084 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1085                                  struct iovec *iov, unsigned int iov_cnt)
1086 {
1087     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1088     struct virtio_net_ctrl_mac mac_data;
1089     size_t s;
1090     NetClientState *nc = qemu_get_queue(n->nic);
1091 
1092     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1093         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1094             return VIRTIO_NET_ERR;
1095         }
1096         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1097         assert(s == sizeof(n->mac));
1098         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1099         rxfilter_notify(nc);
1100 
1101         return VIRTIO_NET_OK;
1102     }
1103 
1104     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1105         return VIRTIO_NET_ERR;
1106     }
1107 
1108     int in_use = 0;
1109     int first_multi = 0;
1110     uint8_t uni_overflow = 0;
1111     uint8_t multi_overflow = 0;
1112     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1113 
1114     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1115                    sizeof(mac_data.entries));
1116     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1117     if (s != sizeof(mac_data.entries)) {
1118         goto error;
1119     }
1120     iov_discard_front(&iov, &iov_cnt, s);
1121 
1122     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1123         goto error;
1124     }
1125 
1126     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1127         s = iov_to_buf(iov, iov_cnt, 0, macs,
1128                        mac_data.entries * ETH_ALEN);
1129         if (s != mac_data.entries * ETH_ALEN) {
1130             goto error;
1131         }
1132         in_use += mac_data.entries;
1133     } else {
1134         uni_overflow = 1;
1135     }
1136 
1137     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1138 
1139     first_multi = in_use;
1140 
1141     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1142                    sizeof(mac_data.entries));
1143     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1144     if (s != sizeof(mac_data.entries)) {
1145         goto error;
1146     }
1147 
1148     iov_discard_front(&iov, &iov_cnt, s);
1149 
1150     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1151         goto error;
1152     }
1153 
1154     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1155         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1156                        mac_data.entries * ETH_ALEN);
1157         if (s != mac_data.entries * ETH_ALEN) {
1158             goto error;
1159         }
1160         in_use += mac_data.entries;
1161     } else {
1162         multi_overflow = 1;
1163     }
1164 
1165     n->mac_table.in_use = in_use;
1166     n->mac_table.first_multi = first_multi;
1167     n->mac_table.uni_overflow = uni_overflow;
1168     n->mac_table.multi_overflow = multi_overflow;
1169     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1170     g_free(macs);
1171     rxfilter_notify(nc);
1172 
1173     return VIRTIO_NET_OK;
1174 
1175 error:
1176     g_free(macs);
1177     return VIRTIO_NET_ERR;
1178 }
1179 
1180 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1181                                         struct iovec *iov, unsigned int iov_cnt)
1182 {
1183     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1184     uint16_t vid;
1185     size_t s;
1186     NetClientState *nc = qemu_get_queue(n->nic);
1187 
1188     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1189     vid = virtio_lduw_p(vdev, &vid);
1190     if (s != sizeof(vid)) {
1191         return VIRTIO_NET_ERR;
1192     }
1193 
1194     if (vid >= MAX_VLAN)
1195         return VIRTIO_NET_ERR;
1196 
1197     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1198         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1199     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1200         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1201     else
1202         return VIRTIO_NET_ERR;
1203 
1204     rxfilter_notify(nc);
1205 
1206     return VIRTIO_NET_OK;
1207 }
1208 
1209 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1210                                       struct iovec *iov, unsigned int iov_cnt)
1211 {
1212     trace_virtio_net_handle_announce(n->announce_timer.round);
1213     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1214         n->status & VIRTIO_NET_S_ANNOUNCE) {
1215         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1216         if (n->announce_timer.round) {
1217             qemu_announce_timer_step(&n->announce_timer);
1218         }
1219         return VIRTIO_NET_OK;
1220     } else {
1221         return VIRTIO_NET_ERR;
1222     }
1223 }
1224 
1225 static void virtio_net_detach_epbf_rss(VirtIONet *n);
1226 
1227 static void virtio_net_disable_rss(VirtIONet *n)
1228 {
1229     if (n->rss_data.enabled) {
1230         trace_virtio_net_rss_disable();
1231     }
1232     n->rss_data.enabled = false;
1233 
1234     virtio_net_detach_epbf_rss(n);
1235 }
1236 
1237 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1238 {
1239     NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1240     if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1241         return false;
1242     }
1243 
1244     return nc->info->set_steering_ebpf(nc, prog_fd);
1245 }
1246 
1247 static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1248                                    struct EBPFRSSConfig *config)
1249 {
1250     config->redirect = data->redirect;
1251     config->populate_hash = data->populate_hash;
1252     config->hash_types = data->hash_types;
1253     config->indirections_len = data->indirections_len;
1254     config->default_queue = data->default_queue;
1255 }
1256 
1257 static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1258 {
1259     struct EBPFRSSConfig config = {};
1260 
1261     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1262         return false;
1263     }
1264 
1265     rss_data_to_rss_config(&n->rss_data, &config);
1266 
1267     if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1268                           n->rss_data.indirections_table, n->rss_data.key)) {
1269         return false;
1270     }
1271 
1272     if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1273         return false;
1274     }
1275 
1276     return true;
1277 }
1278 
1279 static void virtio_net_detach_epbf_rss(VirtIONet *n)
1280 {
1281     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1282 }
1283 
1284 static bool virtio_net_load_ebpf(VirtIONet *n)
1285 {
1286     if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1287         /* backend does't support steering ebpf */
1288         return false;
1289     }
1290 
1291     return ebpf_rss_load(&n->ebpf_rss);
1292 }
1293 
1294 static void virtio_net_unload_ebpf(VirtIONet *n)
1295 {
1296     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1297     ebpf_rss_unload(&n->ebpf_rss);
1298 }
1299 
1300 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1301                                       struct iovec *iov,
1302                                       unsigned int iov_cnt,
1303                                       bool do_rss)
1304 {
1305     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1306     struct virtio_net_rss_config cfg;
1307     size_t s, offset = 0, size_get;
1308     uint16_t queue_pairs, i;
1309     struct {
1310         uint16_t us;
1311         uint8_t b;
1312     } QEMU_PACKED temp;
1313     const char *err_msg = "";
1314     uint32_t err_value = 0;
1315 
1316     if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1317         err_msg = "RSS is not negotiated";
1318         goto error;
1319     }
1320     if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1321         err_msg = "Hash report is not negotiated";
1322         goto error;
1323     }
1324     size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1325     s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1326     if (s != size_get) {
1327         err_msg = "Short command buffer";
1328         err_value = (uint32_t)s;
1329         goto error;
1330     }
1331     n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1332     n->rss_data.indirections_len =
1333         virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1334     n->rss_data.indirections_len++;
1335     if (!do_rss) {
1336         n->rss_data.indirections_len = 1;
1337     }
1338     if (!is_power_of_2(n->rss_data.indirections_len)) {
1339         err_msg = "Invalid size of indirection table";
1340         err_value = n->rss_data.indirections_len;
1341         goto error;
1342     }
1343     if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1344         err_msg = "Too large indirection table";
1345         err_value = n->rss_data.indirections_len;
1346         goto error;
1347     }
1348     n->rss_data.default_queue = do_rss ?
1349         virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1350     if (n->rss_data.default_queue >= n->max_queue_pairs) {
1351         err_msg = "Invalid default queue";
1352         err_value = n->rss_data.default_queue;
1353         goto error;
1354     }
1355     offset += size_get;
1356     size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1357     g_free(n->rss_data.indirections_table);
1358     n->rss_data.indirections_table = g_malloc(size_get);
1359     if (!n->rss_data.indirections_table) {
1360         err_msg = "Can't allocate indirections table";
1361         err_value = n->rss_data.indirections_len;
1362         goto error;
1363     }
1364     s = iov_to_buf(iov, iov_cnt, offset,
1365                    n->rss_data.indirections_table, size_get);
1366     if (s != size_get) {
1367         err_msg = "Short indirection table buffer";
1368         err_value = (uint32_t)s;
1369         goto error;
1370     }
1371     for (i = 0; i < n->rss_data.indirections_len; ++i) {
1372         uint16_t val = n->rss_data.indirections_table[i];
1373         n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1374     }
1375     offset += size_get;
1376     size_get = sizeof(temp);
1377     s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1378     if (s != size_get) {
1379         err_msg = "Can't get queue_pairs";
1380         err_value = (uint32_t)s;
1381         goto error;
1382     }
1383     queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1384     if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1385         err_msg = "Invalid number of queue_pairs";
1386         err_value = queue_pairs;
1387         goto error;
1388     }
1389     if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1390         err_msg = "Invalid key size";
1391         err_value = temp.b;
1392         goto error;
1393     }
1394     if (!temp.b && n->rss_data.hash_types) {
1395         err_msg = "No key provided";
1396         err_value = 0;
1397         goto error;
1398     }
1399     if (!temp.b && !n->rss_data.hash_types) {
1400         virtio_net_disable_rss(n);
1401         return queue_pairs;
1402     }
1403     offset += size_get;
1404     size_get = temp.b;
1405     s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1406     if (s != size_get) {
1407         err_msg = "Can get key buffer";
1408         err_value = (uint32_t)s;
1409         goto error;
1410     }
1411     n->rss_data.enabled = true;
1412 
1413     if (!n->rss_data.populate_hash) {
1414         if (!virtio_net_attach_epbf_rss(n)) {
1415             /* EBPF must be loaded for vhost */
1416             if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1417                 warn_report("Can't load eBPF RSS for vhost");
1418                 goto error;
1419             }
1420             /* fallback to software RSS */
1421             warn_report("Can't load eBPF RSS - fallback to software RSS");
1422             n->rss_data.enabled_software_rss = true;
1423         }
1424     } else {
1425         /* use software RSS for hash populating */
1426         /* and detach eBPF if was loaded before */
1427         virtio_net_detach_epbf_rss(n);
1428         n->rss_data.enabled_software_rss = true;
1429     }
1430 
1431     trace_virtio_net_rss_enable(n->rss_data.hash_types,
1432                                 n->rss_data.indirections_len,
1433                                 temp.b);
1434     return queue_pairs;
1435 error:
1436     trace_virtio_net_rss_error(err_msg, err_value);
1437     virtio_net_disable_rss(n);
1438     return 0;
1439 }
1440 
1441 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1442                                 struct iovec *iov, unsigned int iov_cnt)
1443 {
1444     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1445     uint16_t queue_pairs;
1446     NetClientState *nc = qemu_get_queue(n->nic);
1447 
1448     virtio_net_disable_rss(n);
1449     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1450         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1451         return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1452     }
1453     if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1454         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1455     } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1456         struct virtio_net_ctrl_mq mq;
1457         size_t s;
1458         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1459             return VIRTIO_NET_ERR;
1460         }
1461         s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1462         if (s != sizeof(mq)) {
1463             return VIRTIO_NET_ERR;
1464         }
1465         queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1466 
1467     } else {
1468         return VIRTIO_NET_ERR;
1469     }
1470 
1471     if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1472         queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1473         queue_pairs > n->max_queue_pairs ||
1474         !n->multiqueue) {
1475         return VIRTIO_NET_ERR;
1476     }
1477 
1478     n->curr_queue_pairs = queue_pairs;
1479     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
1480         /*
1481          * Avoid updating the backend for a vdpa device: We're only interested
1482          * in updating the device model queues.
1483          */
1484         return VIRTIO_NET_OK;
1485     }
1486     /* stop the backend before changing the number of queue_pairs to avoid handling a
1487      * disabled queue */
1488     virtio_net_set_status(vdev, vdev->status);
1489     virtio_net_set_queue_pairs(n);
1490 
1491     return VIRTIO_NET_OK;
1492 }
1493 
1494 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1495                                   const struct iovec *in_sg, unsigned in_num,
1496                                   const struct iovec *out_sg,
1497                                   unsigned out_num)
1498 {
1499     VirtIONet *n = VIRTIO_NET(vdev);
1500     struct virtio_net_ctrl_hdr ctrl;
1501     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1502     size_t s;
1503     struct iovec *iov, *iov2;
1504 
1505     if (iov_size(in_sg, in_num) < sizeof(status) ||
1506         iov_size(out_sg, out_num) < sizeof(ctrl)) {
1507         virtio_error(vdev, "virtio-net ctrl missing headers");
1508         return 0;
1509     }
1510 
1511     iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1512     s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1513     iov_discard_front(&iov, &out_num, sizeof(ctrl));
1514     if (s != sizeof(ctrl)) {
1515         status = VIRTIO_NET_ERR;
1516     } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1517         status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1518     } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1519         status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1520     } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1521         status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1522     } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1523         status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1524     } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1525         status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1526     } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1527         status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1528     }
1529 
1530     s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1531     assert(s == sizeof(status));
1532 
1533     g_free(iov2);
1534     return sizeof(status);
1535 }
1536 
1537 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1538 {
1539     VirtQueueElement *elem;
1540 
1541     for (;;) {
1542         size_t written;
1543         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1544         if (!elem) {
1545             break;
1546         }
1547 
1548         written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1549                                              elem->out_sg, elem->out_num);
1550         if (written > 0) {
1551             virtqueue_push(vq, elem, written);
1552             virtio_notify(vdev, vq);
1553             g_free(elem);
1554         } else {
1555             virtqueue_detach_element(vq, elem, 0);
1556             g_free(elem);
1557             break;
1558         }
1559     }
1560 }
1561 
1562 /* RX */
1563 
1564 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1565 {
1566     VirtIONet *n = VIRTIO_NET(vdev);
1567     int queue_index = vq2q(virtio_get_queue_index(vq));
1568 
1569     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1570 }
1571 
1572 static bool virtio_net_can_receive(NetClientState *nc)
1573 {
1574     VirtIONet *n = qemu_get_nic_opaque(nc);
1575     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1576     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1577 
1578     if (!vdev->vm_running) {
1579         return false;
1580     }
1581 
1582     if (nc->queue_index >= n->curr_queue_pairs) {
1583         return false;
1584     }
1585 
1586     if (!virtio_queue_ready(q->rx_vq) ||
1587         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1588         return false;
1589     }
1590 
1591     return true;
1592 }
1593 
1594 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1595 {
1596     VirtIONet *n = q->n;
1597     if (virtio_queue_empty(q->rx_vq) ||
1598         (n->mergeable_rx_bufs &&
1599          !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1600         virtio_queue_set_notification(q->rx_vq, 1);
1601 
1602         /* To avoid a race condition where the guest has made some buffers
1603          * available after the above check but before notification was
1604          * enabled, check for available buffers again.
1605          */
1606         if (virtio_queue_empty(q->rx_vq) ||
1607             (n->mergeable_rx_bufs &&
1608              !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1609             return 0;
1610         }
1611     }
1612 
1613     virtio_queue_set_notification(q->rx_vq, 0);
1614     return 1;
1615 }
1616 
1617 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1618 {
1619     virtio_tswap16s(vdev, &hdr->hdr_len);
1620     virtio_tswap16s(vdev, &hdr->gso_size);
1621     virtio_tswap16s(vdev, &hdr->csum_start);
1622     virtio_tswap16s(vdev, &hdr->csum_offset);
1623 }
1624 
1625 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1626  * it never finds out that the packets don't have valid checksums.  This
1627  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1628  * fix this with Xen but it hasn't appeared in an upstream release of
1629  * dhclient yet.
1630  *
1631  * To avoid breaking existing guests, we catch udp packets and add
1632  * checksums.  This is terrible but it's better than hacking the guest
1633  * kernels.
1634  *
1635  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1636  * we should provide a mechanism to disable it to avoid polluting the host
1637  * cache.
1638  */
1639 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1640                                         uint8_t *buf, size_t size)
1641 {
1642     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1643         (size > 27 && size < 1500) && /* normal sized MTU */
1644         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1645         (buf[23] == 17) && /* ip.protocol == UDP */
1646         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1647         net_checksum_calculate(buf, size, CSUM_UDP);
1648         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1649     }
1650 }
1651 
1652 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1653                            const void *buf, size_t size)
1654 {
1655     if (n->has_vnet_hdr) {
1656         /* FIXME this cast is evil */
1657         void *wbuf = (void *)buf;
1658         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1659                                     size - n->host_hdr_len);
1660 
1661         if (n->needs_vnet_hdr_swap) {
1662             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1663         }
1664         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1665     } else {
1666         struct virtio_net_hdr hdr = {
1667             .flags = 0,
1668             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1669         };
1670         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1671     }
1672 }
1673 
1674 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1675 {
1676     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1677     static const uint8_t vlan[] = {0x81, 0x00};
1678     uint8_t *ptr = (uint8_t *)buf;
1679     int i;
1680 
1681     if (n->promisc)
1682         return 1;
1683 
1684     ptr += n->host_hdr_len;
1685 
1686     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1687         int vid = lduw_be_p(ptr + 14) & 0xfff;
1688         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1689             return 0;
1690     }
1691 
1692     if (ptr[0] & 1) { // multicast
1693         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1694             return !n->nobcast;
1695         } else if (n->nomulti) {
1696             return 0;
1697         } else if (n->allmulti || n->mac_table.multi_overflow) {
1698             return 1;
1699         }
1700 
1701         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1702             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1703                 return 1;
1704             }
1705         }
1706     } else { // unicast
1707         if (n->nouni) {
1708             return 0;
1709         } else if (n->alluni || n->mac_table.uni_overflow) {
1710             return 1;
1711         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1712             return 1;
1713         }
1714 
1715         for (i = 0; i < n->mac_table.first_multi; i++) {
1716             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1717                 return 1;
1718             }
1719         }
1720     }
1721 
1722     return 0;
1723 }
1724 
1725 static uint8_t virtio_net_get_hash_type(bool isip4,
1726                                         bool isip6,
1727                                         bool isudp,
1728                                         bool istcp,
1729                                         uint32_t types)
1730 {
1731     if (isip4) {
1732         if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1733             return NetPktRssIpV4Tcp;
1734         }
1735         if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1736             return NetPktRssIpV4Udp;
1737         }
1738         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1739             return NetPktRssIpV4;
1740         }
1741     } else if (isip6) {
1742         uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1743                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1744 
1745         if (istcp && (types & mask)) {
1746             return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1747                 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1748         }
1749         mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1750         if (isudp && (types & mask)) {
1751             return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1752                 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1753         }
1754         mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1755         if (types & mask) {
1756             return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1757                 NetPktRssIpV6Ex : NetPktRssIpV6;
1758         }
1759     }
1760     return 0xff;
1761 }
1762 
1763 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1764                                    uint32_t hash)
1765 {
1766     struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1767     hdr->hash_value = hash;
1768     hdr->hash_report = report;
1769 }
1770 
1771 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1772                                   size_t size)
1773 {
1774     VirtIONet *n = qemu_get_nic_opaque(nc);
1775     unsigned int index = nc->queue_index, new_index = index;
1776     struct NetRxPkt *pkt = n->rx_pkt;
1777     uint8_t net_hash_type;
1778     uint32_t hash;
1779     bool isip4, isip6, isudp, istcp;
1780     static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1781         VIRTIO_NET_HASH_REPORT_IPv4,
1782         VIRTIO_NET_HASH_REPORT_TCPv4,
1783         VIRTIO_NET_HASH_REPORT_TCPv6,
1784         VIRTIO_NET_HASH_REPORT_IPv6,
1785         VIRTIO_NET_HASH_REPORT_IPv6_EX,
1786         VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1787         VIRTIO_NET_HASH_REPORT_UDPv4,
1788         VIRTIO_NET_HASH_REPORT_UDPv6,
1789         VIRTIO_NET_HASH_REPORT_UDPv6_EX
1790     };
1791 
1792     net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1793                              size - n->host_hdr_len);
1794     net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1795     if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1796         istcp = isudp = false;
1797     }
1798     if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1799         istcp = isudp = false;
1800     }
1801     net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1802                                              n->rss_data.hash_types);
1803     if (net_hash_type > NetPktRssIpV6UdpEx) {
1804         if (n->rss_data.populate_hash) {
1805             virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1806         }
1807         return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1808     }
1809 
1810     hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1811 
1812     if (n->rss_data.populate_hash) {
1813         virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1814     }
1815 
1816     if (n->rss_data.redirect) {
1817         new_index = hash & (n->rss_data.indirections_len - 1);
1818         new_index = n->rss_data.indirections_table[new_index];
1819     }
1820 
1821     return (index == new_index) ? -1 : new_index;
1822 }
1823 
1824 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1825                                       size_t size, bool no_rss)
1826 {
1827     VirtIONet *n = qemu_get_nic_opaque(nc);
1828     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1829     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1830     VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1831     size_t lens[VIRTQUEUE_MAX_SIZE];
1832     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1833     struct virtio_net_hdr_mrg_rxbuf mhdr;
1834     unsigned mhdr_cnt = 0;
1835     size_t offset, i, guest_offset, j;
1836     ssize_t err;
1837 
1838     if (!virtio_net_can_receive(nc)) {
1839         return -1;
1840     }
1841 
1842     if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1843         int index = virtio_net_process_rss(nc, buf, size);
1844         if (index >= 0) {
1845             NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1846             return virtio_net_receive_rcu(nc2, buf, size, true);
1847         }
1848     }
1849 
1850     /* hdr_len refers to the header we supply to the guest */
1851     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1852         return 0;
1853     }
1854 
1855     if (!receive_filter(n, buf, size))
1856         return size;
1857 
1858     offset = i = 0;
1859 
1860     while (offset < size) {
1861         VirtQueueElement *elem;
1862         int len, total;
1863         const struct iovec *sg;
1864 
1865         total = 0;
1866 
1867         if (i == VIRTQUEUE_MAX_SIZE) {
1868             virtio_error(vdev, "virtio-net unexpected long buffer chain");
1869             err = size;
1870             goto err;
1871         }
1872 
1873         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1874         if (!elem) {
1875             if (i) {
1876                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1877                              "i %zd mergeable %d offset %zd, size %zd, "
1878                              "guest hdr len %zd, host hdr len %zd "
1879                              "guest features 0x%" PRIx64,
1880                              i, n->mergeable_rx_bufs, offset, size,
1881                              n->guest_hdr_len, n->host_hdr_len,
1882                              vdev->guest_features);
1883             }
1884             err = -1;
1885             goto err;
1886         }
1887 
1888         if (elem->in_num < 1) {
1889             virtio_error(vdev,
1890                          "virtio-net receive queue contains no in buffers");
1891             virtqueue_detach_element(q->rx_vq, elem, 0);
1892             g_free(elem);
1893             err = -1;
1894             goto err;
1895         }
1896 
1897         sg = elem->in_sg;
1898         if (i == 0) {
1899             assert(offset == 0);
1900             if (n->mergeable_rx_bufs) {
1901                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1902                                     sg, elem->in_num,
1903                                     offsetof(typeof(mhdr), num_buffers),
1904                                     sizeof(mhdr.num_buffers));
1905             }
1906 
1907             receive_header(n, sg, elem->in_num, buf, size);
1908             if (n->rss_data.populate_hash) {
1909                 offset = sizeof(mhdr);
1910                 iov_from_buf(sg, elem->in_num, offset,
1911                              buf + offset, n->host_hdr_len - sizeof(mhdr));
1912             }
1913             offset = n->host_hdr_len;
1914             total += n->guest_hdr_len;
1915             guest_offset = n->guest_hdr_len;
1916         } else {
1917             guest_offset = 0;
1918         }
1919 
1920         /* copy in packet.  ugh */
1921         len = iov_from_buf(sg, elem->in_num, guest_offset,
1922                            buf + offset, size - offset);
1923         total += len;
1924         offset += len;
1925         /* If buffers can't be merged, at this point we
1926          * must have consumed the complete packet.
1927          * Otherwise, drop it. */
1928         if (!n->mergeable_rx_bufs && offset < size) {
1929             virtqueue_unpop(q->rx_vq, elem, total);
1930             g_free(elem);
1931             err = size;
1932             goto err;
1933         }
1934 
1935         elems[i] = elem;
1936         lens[i] = total;
1937         i++;
1938     }
1939 
1940     if (mhdr_cnt) {
1941         virtio_stw_p(vdev, &mhdr.num_buffers, i);
1942         iov_from_buf(mhdr_sg, mhdr_cnt,
1943                      0,
1944                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
1945     }
1946 
1947     for (j = 0; j < i; j++) {
1948         /* signal other side */
1949         virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
1950         g_free(elems[j]);
1951     }
1952 
1953     virtqueue_flush(q->rx_vq, i);
1954     virtio_notify(vdev, q->rx_vq);
1955 
1956     return size;
1957 
1958 err:
1959     for (j = 0; j < i; j++) {
1960         virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
1961         g_free(elems[j]);
1962     }
1963 
1964     return err;
1965 }
1966 
1967 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1968                                   size_t size)
1969 {
1970     RCU_READ_LOCK_GUARD();
1971 
1972     return virtio_net_receive_rcu(nc, buf, size, false);
1973 }
1974 
1975 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1976                                          const uint8_t *buf,
1977                                          VirtioNetRscUnit *unit)
1978 {
1979     uint16_t ip_hdrlen;
1980     struct ip_header *ip;
1981 
1982     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1983                               + sizeof(struct eth_header));
1984     unit->ip = (void *)ip;
1985     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1986     unit->ip_plen = &ip->ip_len;
1987     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1988     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1989     unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1990 }
1991 
1992 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1993                                          const uint8_t *buf,
1994                                          VirtioNetRscUnit *unit)
1995 {
1996     struct ip6_header *ip6;
1997 
1998     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1999                                  + sizeof(struct eth_header));
2000     unit->ip = ip6;
2001     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2002     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
2003                                         + sizeof(struct ip6_header));
2004     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2005 
2006     /* There is a difference between payload lenght in ipv4 and v6,
2007        ip header is excluded in ipv6 */
2008     unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
2009 }
2010 
2011 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
2012                                        VirtioNetRscSeg *seg)
2013 {
2014     int ret;
2015     struct virtio_net_hdr_v1 *h;
2016 
2017     h = (struct virtio_net_hdr_v1 *)seg->buf;
2018     h->flags = 0;
2019     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
2020 
2021     if (seg->is_coalesced) {
2022         h->rsc.segments = seg->packets;
2023         h->rsc.dup_acks = seg->dup_ack;
2024         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
2025         if (chain->proto == ETH_P_IP) {
2026             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2027         } else {
2028             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2029         }
2030     }
2031 
2032     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
2033     QTAILQ_REMOVE(&chain->buffers, seg, next);
2034     g_free(seg->buf);
2035     g_free(seg);
2036 
2037     return ret;
2038 }
2039 
2040 static void virtio_net_rsc_purge(void *opq)
2041 {
2042     VirtioNetRscSeg *seg, *rn;
2043     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
2044 
2045     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
2046         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2047             chain->stat.purge_failed++;
2048             continue;
2049         }
2050     }
2051 
2052     chain->stat.timer++;
2053     if (!QTAILQ_EMPTY(&chain->buffers)) {
2054         timer_mod(chain->drain_timer,
2055               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2056     }
2057 }
2058 
2059 static void virtio_net_rsc_cleanup(VirtIONet *n)
2060 {
2061     VirtioNetRscChain *chain, *rn_chain;
2062     VirtioNetRscSeg *seg, *rn_seg;
2063 
2064     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2065         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2066             QTAILQ_REMOVE(&chain->buffers, seg, next);
2067             g_free(seg->buf);
2068             g_free(seg);
2069         }
2070 
2071         timer_free(chain->drain_timer);
2072         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2073         g_free(chain);
2074     }
2075 }
2076 
2077 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2078                                      NetClientState *nc,
2079                                      const uint8_t *buf, size_t size)
2080 {
2081     uint16_t hdr_len;
2082     VirtioNetRscSeg *seg;
2083 
2084     hdr_len = chain->n->guest_hdr_len;
2085     seg = g_new(VirtioNetRscSeg, 1);
2086     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2087         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2088     memcpy(seg->buf, buf, size);
2089     seg->size = size;
2090     seg->packets = 1;
2091     seg->dup_ack = 0;
2092     seg->is_coalesced = 0;
2093     seg->nc = nc;
2094 
2095     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2096     chain->stat.cache++;
2097 
2098     switch (chain->proto) {
2099     case ETH_P_IP:
2100         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2101         break;
2102     case ETH_P_IPV6:
2103         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2104         break;
2105     default:
2106         g_assert_not_reached();
2107     }
2108 }
2109 
2110 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2111                                          VirtioNetRscSeg *seg,
2112                                          const uint8_t *buf,
2113                                          struct tcp_header *n_tcp,
2114                                          struct tcp_header *o_tcp)
2115 {
2116     uint32_t nack, oack;
2117     uint16_t nwin, owin;
2118 
2119     nack = htonl(n_tcp->th_ack);
2120     nwin = htons(n_tcp->th_win);
2121     oack = htonl(o_tcp->th_ack);
2122     owin = htons(o_tcp->th_win);
2123 
2124     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2125         chain->stat.ack_out_of_win++;
2126         return RSC_FINAL;
2127     } else if (nack == oack) {
2128         /* duplicated ack or window probe */
2129         if (nwin == owin) {
2130             /* duplicated ack, add dup ack count due to whql test up to 1 */
2131             chain->stat.dup_ack++;
2132             return RSC_FINAL;
2133         } else {
2134             /* Coalesce window update */
2135             o_tcp->th_win = n_tcp->th_win;
2136             chain->stat.win_update++;
2137             return RSC_COALESCE;
2138         }
2139     } else {
2140         /* pure ack, go to 'C', finalize*/
2141         chain->stat.pure_ack++;
2142         return RSC_FINAL;
2143     }
2144 }
2145 
2146 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2147                                             VirtioNetRscSeg *seg,
2148                                             const uint8_t *buf,
2149                                             VirtioNetRscUnit *n_unit)
2150 {
2151     void *data;
2152     uint16_t o_ip_len;
2153     uint32_t nseq, oseq;
2154     VirtioNetRscUnit *o_unit;
2155 
2156     o_unit = &seg->unit;
2157     o_ip_len = htons(*o_unit->ip_plen);
2158     nseq = htonl(n_unit->tcp->th_seq);
2159     oseq = htonl(o_unit->tcp->th_seq);
2160 
2161     /* out of order or retransmitted. */
2162     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2163         chain->stat.data_out_of_win++;
2164         return RSC_FINAL;
2165     }
2166 
2167     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2168     if (nseq == oseq) {
2169         if ((o_unit->payload == 0) && n_unit->payload) {
2170             /* From no payload to payload, normal case, not a dup ack or etc */
2171             chain->stat.data_after_pure_ack++;
2172             goto coalesce;
2173         } else {
2174             return virtio_net_rsc_handle_ack(chain, seg, buf,
2175                                              n_unit->tcp, o_unit->tcp);
2176         }
2177     } else if ((nseq - oseq) != o_unit->payload) {
2178         /* Not a consistent packet, out of order */
2179         chain->stat.data_out_of_order++;
2180         return RSC_FINAL;
2181     } else {
2182 coalesce:
2183         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2184             chain->stat.over_size++;
2185             return RSC_FINAL;
2186         }
2187 
2188         /* Here comes the right data, the payload length in v4/v6 is different,
2189            so use the field value to update and record the new data len */
2190         o_unit->payload += n_unit->payload; /* update new data len */
2191 
2192         /* update field in ip header */
2193         *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2194 
2195         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2196            for windows guest, while this may change the behavior for linux
2197            guest (only if it uses RSC feature). */
2198         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2199 
2200         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2201         o_unit->tcp->th_win = n_unit->tcp->th_win;
2202 
2203         memmove(seg->buf + seg->size, data, n_unit->payload);
2204         seg->size += n_unit->payload;
2205         seg->packets++;
2206         chain->stat.coalesced++;
2207         return RSC_COALESCE;
2208     }
2209 }
2210 
2211 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2212                                         VirtioNetRscSeg *seg,
2213                                         const uint8_t *buf, size_t size,
2214                                         VirtioNetRscUnit *unit)
2215 {
2216     struct ip_header *ip1, *ip2;
2217 
2218     ip1 = (struct ip_header *)(unit->ip);
2219     ip2 = (struct ip_header *)(seg->unit.ip);
2220     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2221         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2222         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2223         chain->stat.no_match++;
2224         return RSC_NO_MATCH;
2225     }
2226 
2227     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2228 }
2229 
2230 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2231                                         VirtioNetRscSeg *seg,
2232                                         const uint8_t *buf, size_t size,
2233                                         VirtioNetRscUnit *unit)
2234 {
2235     struct ip6_header *ip1, *ip2;
2236 
2237     ip1 = (struct ip6_header *)(unit->ip);
2238     ip2 = (struct ip6_header *)(seg->unit.ip);
2239     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2240         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2241         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2242         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2243             chain->stat.no_match++;
2244             return RSC_NO_MATCH;
2245     }
2246 
2247     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2248 }
2249 
2250 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2251  * to prevent out of order */
2252 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2253                                          struct tcp_header *tcp)
2254 {
2255     uint16_t tcp_hdr;
2256     uint16_t tcp_flag;
2257 
2258     tcp_flag = htons(tcp->th_offset_flags);
2259     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2260     tcp_flag &= VIRTIO_NET_TCP_FLAG;
2261     if (tcp_flag & TH_SYN) {
2262         chain->stat.tcp_syn++;
2263         return RSC_BYPASS;
2264     }
2265 
2266     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2267         chain->stat.tcp_ctrl_drain++;
2268         return RSC_FINAL;
2269     }
2270 
2271     if (tcp_hdr > sizeof(struct tcp_header)) {
2272         chain->stat.tcp_all_opt++;
2273         return RSC_FINAL;
2274     }
2275 
2276     return RSC_CANDIDATE;
2277 }
2278 
2279 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2280                                          NetClientState *nc,
2281                                          const uint8_t *buf, size_t size,
2282                                          VirtioNetRscUnit *unit)
2283 {
2284     int ret;
2285     VirtioNetRscSeg *seg, *nseg;
2286 
2287     if (QTAILQ_EMPTY(&chain->buffers)) {
2288         chain->stat.empty_cache++;
2289         virtio_net_rsc_cache_buf(chain, nc, buf, size);
2290         timer_mod(chain->drain_timer,
2291               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2292         return size;
2293     }
2294 
2295     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2296         if (chain->proto == ETH_P_IP) {
2297             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2298         } else {
2299             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2300         }
2301 
2302         if (ret == RSC_FINAL) {
2303             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2304                 /* Send failed */
2305                 chain->stat.final_failed++;
2306                 return 0;
2307             }
2308 
2309             /* Send current packet */
2310             return virtio_net_do_receive(nc, buf, size);
2311         } else if (ret == RSC_NO_MATCH) {
2312             continue;
2313         } else {
2314             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2315             seg->is_coalesced = 1;
2316             return size;
2317         }
2318     }
2319 
2320     chain->stat.no_match_cache++;
2321     virtio_net_rsc_cache_buf(chain, nc, buf, size);
2322     return size;
2323 }
2324 
2325 /* Drain a connection data, this is to avoid out of order segments */
2326 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2327                                         NetClientState *nc,
2328                                         const uint8_t *buf, size_t size,
2329                                         uint16_t ip_start, uint16_t ip_size,
2330                                         uint16_t tcp_port)
2331 {
2332     VirtioNetRscSeg *seg, *nseg;
2333     uint32_t ppair1, ppair2;
2334 
2335     ppair1 = *(uint32_t *)(buf + tcp_port);
2336     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2337         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2338         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2339             || (ppair1 != ppair2)) {
2340             continue;
2341         }
2342         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2343             chain->stat.drain_failed++;
2344         }
2345 
2346         break;
2347     }
2348 
2349     return virtio_net_do_receive(nc, buf, size);
2350 }
2351 
2352 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2353                                             struct ip_header *ip,
2354                                             const uint8_t *buf, size_t size)
2355 {
2356     uint16_t ip_len;
2357 
2358     /* Not an ipv4 packet */
2359     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2360         chain->stat.ip_option++;
2361         return RSC_BYPASS;
2362     }
2363 
2364     /* Don't handle packets with ip option */
2365     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2366         chain->stat.ip_option++;
2367         return RSC_BYPASS;
2368     }
2369 
2370     if (ip->ip_p != IPPROTO_TCP) {
2371         chain->stat.bypass_not_tcp++;
2372         return RSC_BYPASS;
2373     }
2374 
2375     /* Don't handle packets with ip fragment */
2376     if (!(htons(ip->ip_off) & IP_DF)) {
2377         chain->stat.ip_frag++;
2378         return RSC_BYPASS;
2379     }
2380 
2381     /* Don't handle packets with ecn flag */
2382     if (IPTOS_ECN(ip->ip_tos)) {
2383         chain->stat.ip_ecn++;
2384         return RSC_BYPASS;
2385     }
2386 
2387     ip_len = htons(ip->ip_len);
2388     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2389         || ip_len > (size - chain->n->guest_hdr_len -
2390                      sizeof(struct eth_header))) {
2391         chain->stat.ip_hacked++;
2392         return RSC_BYPASS;
2393     }
2394 
2395     return RSC_CANDIDATE;
2396 }
2397 
2398 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2399                                       NetClientState *nc,
2400                                       const uint8_t *buf, size_t size)
2401 {
2402     int32_t ret;
2403     uint16_t hdr_len;
2404     VirtioNetRscUnit unit;
2405 
2406     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2407 
2408     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2409         + sizeof(struct tcp_header))) {
2410         chain->stat.bypass_not_tcp++;
2411         return virtio_net_do_receive(nc, buf, size);
2412     }
2413 
2414     virtio_net_rsc_extract_unit4(chain, buf, &unit);
2415     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2416         != RSC_CANDIDATE) {
2417         return virtio_net_do_receive(nc, buf, size);
2418     }
2419 
2420     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2421     if (ret == RSC_BYPASS) {
2422         return virtio_net_do_receive(nc, buf, size);
2423     } else if (ret == RSC_FINAL) {
2424         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2425                 ((hdr_len + sizeof(struct eth_header)) + 12),
2426                 VIRTIO_NET_IP4_ADDR_SIZE,
2427                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2428     }
2429 
2430     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2431 }
2432 
2433 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2434                                             struct ip6_header *ip6,
2435                                             const uint8_t *buf, size_t size)
2436 {
2437     uint16_t ip_len;
2438 
2439     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2440         != IP_HEADER_VERSION_6) {
2441         return RSC_BYPASS;
2442     }
2443 
2444     /* Both option and protocol is checked in this */
2445     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2446         chain->stat.bypass_not_tcp++;
2447         return RSC_BYPASS;
2448     }
2449 
2450     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2451     if (ip_len < sizeof(struct tcp_header) ||
2452         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2453                   - sizeof(struct ip6_header))) {
2454         chain->stat.ip_hacked++;
2455         return RSC_BYPASS;
2456     }
2457 
2458     /* Don't handle packets with ecn flag */
2459     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2460         chain->stat.ip_ecn++;
2461         return RSC_BYPASS;
2462     }
2463 
2464     return RSC_CANDIDATE;
2465 }
2466 
2467 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2468                                       const uint8_t *buf, size_t size)
2469 {
2470     int32_t ret;
2471     uint16_t hdr_len;
2472     VirtioNetRscChain *chain;
2473     VirtioNetRscUnit unit;
2474 
2475     chain = (VirtioNetRscChain *)opq;
2476     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2477 
2478     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2479         + sizeof(tcp_header))) {
2480         return virtio_net_do_receive(nc, buf, size);
2481     }
2482 
2483     virtio_net_rsc_extract_unit6(chain, buf, &unit);
2484     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2485                                                  unit.ip, buf, size)) {
2486         return virtio_net_do_receive(nc, buf, size);
2487     }
2488 
2489     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2490     if (ret == RSC_BYPASS) {
2491         return virtio_net_do_receive(nc, buf, size);
2492     } else if (ret == RSC_FINAL) {
2493         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2494                 ((hdr_len + sizeof(struct eth_header)) + 8),
2495                 VIRTIO_NET_IP6_ADDR_SIZE,
2496                 hdr_len + sizeof(struct eth_header)
2497                 + sizeof(struct ip6_header));
2498     }
2499 
2500     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2501 }
2502 
2503 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2504                                                       NetClientState *nc,
2505                                                       uint16_t proto)
2506 {
2507     VirtioNetRscChain *chain;
2508 
2509     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2510         return NULL;
2511     }
2512 
2513     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2514         if (chain->proto == proto) {
2515             return chain;
2516         }
2517     }
2518 
2519     chain = g_malloc(sizeof(*chain));
2520     chain->n = n;
2521     chain->proto = proto;
2522     if (proto == (uint16_t)ETH_P_IP) {
2523         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2524         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2525     } else {
2526         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2527         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2528     }
2529     chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2530                                       virtio_net_rsc_purge, chain);
2531     memset(&chain->stat, 0, sizeof(chain->stat));
2532 
2533     QTAILQ_INIT(&chain->buffers);
2534     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2535 
2536     return chain;
2537 }
2538 
2539 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2540                                       const uint8_t *buf,
2541                                       size_t size)
2542 {
2543     uint16_t proto;
2544     VirtioNetRscChain *chain;
2545     struct eth_header *eth;
2546     VirtIONet *n;
2547 
2548     n = qemu_get_nic_opaque(nc);
2549     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2550         return virtio_net_do_receive(nc, buf, size);
2551     }
2552 
2553     eth = (struct eth_header *)(buf + n->guest_hdr_len);
2554     proto = htons(eth->h_proto);
2555 
2556     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2557     if (chain) {
2558         chain->stat.received++;
2559         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2560             return virtio_net_rsc_receive4(chain, nc, buf, size);
2561         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2562             return virtio_net_rsc_receive6(chain, nc, buf, size);
2563         }
2564     }
2565     return virtio_net_do_receive(nc, buf, size);
2566 }
2567 
2568 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2569                                   size_t size)
2570 {
2571     VirtIONet *n = qemu_get_nic_opaque(nc);
2572     if ((n->rsc4_enabled || n->rsc6_enabled)) {
2573         return virtio_net_rsc_receive(nc, buf, size);
2574     } else {
2575         return virtio_net_do_receive(nc, buf, size);
2576     }
2577 }
2578 
2579 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2580 
2581 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2582 {
2583     VirtIONet *n = qemu_get_nic_opaque(nc);
2584     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2585     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2586     int ret;
2587 
2588     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2589     virtio_notify(vdev, q->tx_vq);
2590 
2591     g_free(q->async_tx.elem);
2592     q->async_tx.elem = NULL;
2593 
2594     virtio_queue_set_notification(q->tx_vq, 1);
2595     ret = virtio_net_flush_tx(q);
2596     if (ret >= n->tx_burst) {
2597         /*
2598          * the flush has been stopped by tx_burst
2599          * we will not receive notification for the
2600          * remainining part, so re-schedule
2601          */
2602         virtio_queue_set_notification(q->tx_vq, 0);
2603         if (q->tx_bh) {
2604             qemu_bh_schedule(q->tx_bh);
2605         } else {
2606             timer_mod(q->tx_timer,
2607                       qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2608         }
2609         q->tx_waiting = 1;
2610     }
2611 }
2612 
2613 /* TX */
2614 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2615 {
2616     VirtIONet *n = q->n;
2617     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2618     VirtQueueElement *elem;
2619     int32_t num_packets = 0;
2620     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2621     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2622         return num_packets;
2623     }
2624 
2625     if (q->async_tx.elem) {
2626         virtio_queue_set_notification(q->tx_vq, 0);
2627         return num_packets;
2628     }
2629 
2630     for (;;) {
2631         ssize_t ret;
2632         unsigned int out_num;
2633         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2634         struct virtio_net_hdr_mrg_rxbuf mhdr;
2635 
2636         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2637         if (!elem) {
2638             break;
2639         }
2640 
2641         out_num = elem->out_num;
2642         out_sg = elem->out_sg;
2643         if (out_num < 1) {
2644             virtio_error(vdev, "virtio-net header not in first element");
2645             virtqueue_detach_element(q->tx_vq, elem, 0);
2646             g_free(elem);
2647             return -EINVAL;
2648         }
2649 
2650         if (n->has_vnet_hdr) {
2651             if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2652                 n->guest_hdr_len) {
2653                 virtio_error(vdev, "virtio-net header incorrect");
2654                 virtqueue_detach_element(q->tx_vq, elem, 0);
2655                 g_free(elem);
2656                 return -EINVAL;
2657             }
2658             if (n->needs_vnet_hdr_swap) {
2659                 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2660                 sg2[0].iov_base = &mhdr;
2661                 sg2[0].iov_len = n->guest_hdr_len;
2662                 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2663                                    out_sg, out_num,
2664                                    n->guest_hdr_len, -1);
2665                 if (out_num == VIRTQUEUE_MAX_SIZE) {
2666                     goto drop;
2667                 }
2668                 out_num += 1;
2669                 out_sg = sg2;
2670             }
2671         }
2672         /*
2673          * If host wants to see the guest header as is, we can
2674          * pass it on unchanged. Otherwise, copy just the parts
2675          * that host is interested in.
2676          */
2677         assert(n->host_hdr_len <= n->guest_hdr_len);
2678         if (n->host_hdr_len != n->guest_hdr_len) {
2679             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2680                                        out_sg, out_num,
2681                                        0, n->host_hdr_len);
2682             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2683                              out_sg, out_num,
2684                              n->guest_hdr_len, -1);
2685             out_num = sg_num;
2686             out_sg = sg;
2687         }
2688 
2689         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2690                                       out_sg, out_num, virtio_net_tx_complete);
2691         if (ret == 0) {
2692             virtio_queue_set_notification(q->tx_vq, 0);
2693             q->async_tx.elem = elem;
2694             return -EBUSY;
2695         }
2696 
2697 drop:
2698         virtqueue_push(q->tx_vq, elem, 0);
2699         virtio_notify(vdev, q->tx_vq);
2700         g_free(elem);
2701 
2702         if (++num_packets >= n->tx_burst) {
2703             break;
2704         }
2705     }
2706     return num_packets;
2707 }
2708 
2709 static void virtio_net_tx_timer(void *opaque);
2710 
2711 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2712 {
2713     VirtIONet *n = VIRTIO_NET(vdev);
2714     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2715 
2716     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2717         virtio_net_drop_tx_queue_data(vdev, vq);
2718         return;
2719     }
2720 
2721     /* This happens when device was stopped but VCPU wasn't. */
2722     if (!vdev->vm_running) {
2723         q->tx_waiting = 1;
2724         return;
2725     }
2726 
2727     if (q->tx_waiting) {
2728         /* We already have queued packets, immediately flush */
2729         timer_del(q->tx_timer);
2730         virtio_net_tx_timer(q);
2731     } else {
2732         /* re-arm timer to flush it (and more) on next tick */
2733         timer_mod(q->tx_timer,
2734                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2735         q->tx_waiting = 1;
2736         virtio_queue_set_notification(vq, 0);
2737     }
2738 }
2739 
2740 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2741 {
2742     VirtIONet *n = VIRTIO_NET(vdev);
2743     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2744 
2745     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2746         virtio_net_drop_tx_queue_data(vdev, vq);
2747         return;
2748     }
2749 
2750     if (unlikely(q->tx_waiting)) {
2751         return;
2752     }
2753     q->tx_waiting = 1;
2754     /* This happens when device was stopped but VCPU wasn't. */
2755     if (!vdev->vm_running) {
2756         return;
2757     }
2758     virtio_queue_set_notification(vq, 0);
2759     qemu_bh_schedule(q->tx_bh);
2760 }
2761 
2762 static void virtio_net_tx_timer(void *opaque)
2763 {
2764     VirtIONetQueue *q = opaque;
2765     VirtIONet *n = q->n;
2766     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2767     int ret;
2768 
2769     /* This happens when device was stopped but BH wasn't. */
2770     if (!vdev->vm_running) {
2771         /* Make sure tx waiting is set, so we'll run when restarted. */
2772         assert(q->tx_waiting);
2773         return;
2774     }
2775 
2776     q->tx_waiting = 0;
2777 
2778     /* Just in case the driver is not ready on more */
2779     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2780         return;
2781     }
2782 
2783     ret = virtio_net_flush_tx(q);
2784     if (ret == -EBUSY || ret == -EINVAL) {
2785         return;
2786     }
2787     /*
2788      * If we flush a full burst of packets, assume there are
2789      * more coming and immediately rearm
2790      */
2791     if (ret >= n->tx_burst) {
2792         q->tx_waiting = 1;
2793         timer_mod(q->tx_timer,
2794                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2795         return;
2796     }
2797     /*
2798      * If less than a full burst, re-enable notification and flush
2799      * anything that may have come in while we weren't looking.  If
2800      * we find something, assume the guest is still active and rearm
2801      */
2802     virtio_queue_set_notification(q->tx_vq, 1);
2803     ret = virtio_net_flush_tx(q);
2804     if (ret > 0) {
2805         virtio_queue_set_notification(q->tx_vq, 0);
2806         q->tx_waiting = 1;
2807         timer_mod(q->tx_timer,
2808                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2809     }
2810 }
2811 
2812 static void virtio_net_tx_bh(void *opaque)
2813 {
2814     VirtIONetQueue *q = opaque;
2815     VirtIONet *n = q->n;
2816     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2817     int32_t ret;
2818 
2819     /* This happens when device was stopped but BH wasn't. */
2820     if (!vdev->vm_running) {
2821         /* Make sure tx waiting is set, so we'll run when restarted. */
2822         assert(q->tx_waiting);
2823         return;
2824     }
2825 
2826     q->tx_waiting = 0;
2827 
2828     /* Just in case the driver is not ready on more */
2829     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2830         return;
2831     }
2832 
2833     ret = virtio_net_flush_tx(q);
2834     if (ret == -EBUSY || ret == -EINVAL) {
2835         return; /* Notification re-enable handled by tx_complete or device
2836                  * broken */
2837     }
2838 
2839     /* If we flush a full burst of packets, assume there are
2840      * more coming and immediately reschedule */
2841     if (ret >= n->tx_burst) {
2842         qemu_bh_schedule(q->tx_bh);
2843         q->tx_waiting = 1;
2844         return;
2845     }
2846 
2847     /* If less than a full burst, re-enable notification and flush
2848      * anything that may have come in while we weren't looking.  If
2849      * we find something, assume the guest is still active and reschedule */
2850     virtio_queue_set_notification(q->tx_vq, 1);
2851     ret = virtio_net_flush_tx(q);
2852     if (ret == -EINVAL) {
2853         return;
2854     } else if (ret > 0) {
2855         virtio_queue_set_notification(q->tx_vq, 0);
2856         qemu_bh_schedule(q->tx_bh);
2857         q->tx_waiting = 1;
2858     }
2859 }
2860 
2861 static void virtio_net_add_queue(VirtIONet *n, int index)
2862 {
2863     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2864 
2865     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2866                                            virtio_net_handle_rx);
2867 
2868     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2869         n->vqs[index].tx_vq =
2870             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2871                              virtio_net_handle_tx_timer);
2872         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2873                                               virtio_net_tx_timer,
2874                                               &n->vqs[index]);
2875     } else {
2876         n->vqs[index].tx_vq =
2877             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2878                              virtio_net_handle_tx_bh);
2879         n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2880     }
2881 
2882     n->vqs[index].tx_waiting = 0;
2883     n->vqs[index].n = n;
2884 }
2885 
2886 static void virtio_net_del_queue(VirtIONet *n, int index)
2887 {
2888     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2889     VirtIONetQueue *q = &n->vqs[index];
2890     NetClientState *nc = qemu_get_subqueue(n->nic, index);
2891 
2892     qemu_purge_queued_packets(nc);
2893 
2894     virtio_del_queue(vdev, index * 2);
2895     if (q->tx_timer) {
2896         timer_free(q->tx_timer);
2897         q->tx_timer = NULL;
2898     } else {
2899         qemu_bh_delete(q->tx_bh);
2900         q->tx_bh = NULL;
2901     }
2902     q->tx_waiting = 0;
2903     virtio_del_queue(vdev, index * 2 + 1);
2904 }
2905 
2906 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
2907 {
2908     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2909     int old_num_queues = virtio_get_num_queues(vdev);
2910     int new_num_queues = new_max_queue_pairs * 2 + 1;
2911     int i;
2912 
2913     assert(old_num_queues >= 3);
2914     assert(old_num_queues % 2 == 1);
2915 
2916     if (old_num_queues == new_num_queues) {
2917         return;
2918     }
2919 
2920     /*
2921      * We always need to remove and add ctrl vq if
2922      * old_num_queues != new_num_queues. Remove ctrl_vq first,
2923      * and then we only enter one of the following two loops.
2924      */
2925     virtio_del_queue(vdev, old_num_queues - 1);
2926 
2927     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2928         /* new_num_queues < old_num_queues */
2929         virtio_net_del_queue(n, i / 2);
2930     }
2931 
2932     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2933         /* new_num_queues > old_num_queues */
2934         virtio_net_add_queue(n, i / 2);
2935     }
2936 
2937     /* add ctrl_vq last */
2938     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2939 }
2940 
2941 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2942 {
2943     int max = multiqueue ? n->max_queue_pairs : 1;
2944 
2945     n->multiqueue = multiqueue;
2946     virtio_net_change_num_queue_pairs(n, max);
2947 
2948     virtio_net_set_queue_pairs(n);
2949 }
2950 
2951 static int virtio_net_post_load_device(void *opaque, int version_id)
2952 {
2953     VirtIONet *n = opaque;
2954     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2955     int i, link_down;
2956 
2957     trace_virtio_net_post_load_device();
2958     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2959                                virtio_vdev_has_feature(vdev,
2960                                                        VIRTIO_F_VERSION_1),
2961                                virtio_vdev_has_feature(vdev,
2962                                                        VIRTIO_NET_F_HASH_REPORT));
2963 
2964     /* MAC_TABLE_ENTRIES may be different from the saved image */
2965     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2966         n->mac_table.in_use = 0;
2967     }
2968 
2969     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2970         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2971     }
2972 
2973     /*
2974      * curr_guest_offloads will be later overwritten by the
2975      * virtio_set_features_nocheck call done from the virtio_load.
2976      * Here we make sure it is preserved and restored accordingly
2977      * in the virtio_net_post_load_virtio callback.
2978      */
2979     n->saved_guest_offloads = n->curr_guest_offloads;
2980 
2981     virtio_net_set_queue_pairs(n);
2982 
2983     /* Find the first multicast entry in the saved MAC filter */
2984     for (i = 0; i < n->mac_table.in_use; i++) {
2985         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2986             break;
2987         }
2988     }
2989     n->mac_table.first_multi = i;
2990 
2991     /* nc.link_down can't be migrated, so infer link_down according
2992      * to link status bit in n->status */
2993     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2994     for (i = 0; i < n->max_queue_pairs; i++) {
2995         qemu_get_subqueue(n->nic, i)->link_down = link_down;
2996     }
2997 
2998     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2999         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3000         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3001                                   QEMU_CLOCK_VIRTUAL,
3002                                   virtio_net_announce_timer, n);
3003         if (n->announce_timer.round) {
3004             timer_mod(n->announce_timer.tm,
3005                       qemu_clock_get_ms(n->announce_timer.type));
3006         } else {
3007             qemu_announce_timer_del(&n->announce_timer, false);
3008         }
3009     }
3010 
3011     if (n->rss_data.enabled) {
3012         n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
3013         if (!n->rss_data.populate_hash) {
3014             if (!virtio_net_attach_epbf_rss(n)) {
3015                 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
3016                     warn_report("Can't post-load eBPF RSS for vhost");
3017                 } else {
3018                     warn_report("Can't post-load eBPF RSS - "
3019                                 "fallback to software RSS");
3020                     n->rss_data.enabled_software_rss = true;
3021                 }
3022             }
3023         }
3024 
3025         trace_virtio_net_rss_enable(n->rss_data.hash_types,
3026                                     n->rss_data.indirections_len,
3027                                     sizeof(n->rss_data.key));
3028     } else {
3029         trace_virtio_net_rss_disable();
3030     }
3031     return 0;
3032 }
3033 
3034 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
3035 {
3036     VirtIONet *n = VIRTIO_NET(vdev);
3037     /*
3038      * The actual needed state is now in saved_guest_offloads,
3039      * see virtio_net_post_load_device for detail.
3040      * Restore it back and apply the desired offloads.
3041      */
3042     n->curr_guest_offloads = n->saved_guest_offloads;
3043     if (peer_has_vnet_hdr(n)) {
3044         virtio_net_apply_guest_offloads(n);
3045     }
3046 
3047     return 0;
3048 }
3049 
3050 /* tx_waiting field of a VirtIONetQueue */
3051 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
3052     .name = "virtio-net-queue-tx_waiting",
3053     .fields = (VMStateField[]) {
3054         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
3055         VMSTATE_END_OF_LIST()
3056    },
3057 };
3058 
3059 static bool max_queue_pairs_gt_1(void *opaque, int version_id)
3060 {
3061     return VIRTIO_NET(opaque)->max_queue_pairs > 1;
3062 }
3063 
3064 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
3065 {
3066     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
3067                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3068 }
3069 
3070 static bool mac_table_fits(void *opaque, int version_id)
3071 {
3072     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
3073 }
3074 
3075 static bool mac_table_doesnt_fit(void *opaque, int version_id)
3076 {
3077     return !mac_table_fits(opaque, version_id);
3078 }
3079 
3080 /* This temporary type is shared by all the WITH_TMP methods
3081  * although only some fields are used by each.
3082  */
3083 struct VirtIONetMigTmp {
3084     VirtIONet      *parent;
3085     VirtIONetQueue *vqs_1;
3086     uint16_t        curr_queue_pairs_1;
3087     uint8_t         has_ufo;
3088     uint32_t        has_vnet_hdr;
3089 };
3090 
3091 /* The 2nd and subsequent tx_waiting flags are loaded later than
3092  * the 1st entry in the queue_pairs and only if there's more than one
3093  * entry.  We use the tmp mechanism to calculate a temporary
3094  * pointer and count and also validate the count.
3095  */
3096 
3097 static int virtio_net_tx_waiting_pre_save(void *opaque)
3098 {
3099     struct VirtIONetMigTmp *tmp = opaque;
3100 
3101     tmp->vqs_1 = tmp->parent->vqs + 1;
3102     tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3103     if (tmp->parent->curr_queue_pairs == 0) {
3104         tmp->curr_queue_pairs_1 = 0;
3105     }
3106 
3107     return 0;
3108 }
3109 
3110 static int virtio_net_tx_waiting_pre_load(void *opaque)
3111 {
3112     struct VirtIONetMigTmp *tmp = opaque;
3113 
3114     /* Reuse the pointer setup from save */
3115     virtio_net_tx_waiting_pre_save(opaque);
3116 
3117     if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3118         error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3119             tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
3120 
3121         return -EINVAL;
3122     }
3123 
3124     return 0; /* all good */
3125 }
3126 
3127 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3128     .name      = "virtio-net-tx_waiting",
3129     .pre_load  = virtio_net_tx_waiting_pre_load,
3130     .pre_save  = virtio_net_tx_waiting_pre_save,
3131     .fields    = (VMStateField[]) {
3132         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3133                                      curr_queue_pairs_1,
3134                                      vmstate_virtio_net_queue_tx_waiting,
3135                                      struct VirtIONetQueue),
3136         VMSTATE_END_OF_LIST()
3137     },
3138 };
3139 
3140 /* the 'has_ufo' flag is just tested; if the incoming stream has the
3141  * flag set we need to check that we have it
3142  */
3143 static int virtio_net_ufo_post_load(void *opaque, int version_id)
3144 {
3145     struct VirtIONetMigTmp *tmp = opaque;
3146 
3147     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3148         error_report("virtio-net: saved image requires TUN_F_UFO support");
3149         return -EINVAL;
3150     }
3151 
3152     return 0;
3153 }
3154 
3155 static int virtio_net_ufo_pre_save(void *opaque)
3156 {
3157     struct VirtIONetMigTmp *tmp = opaque;
3158 
3159     tmp->has_ufo = tmp->parent->has_ufo;
3160 
3161     return 0;
3162 }
3163 
3164 static const VMStateDescription vmstate_virtio_net_has_ufo = {
3165     .name      = "virtio-net-ufo",
3166     .post_load = virtio_net_ufo_post_load,
3167     .pre_save  = virtio_net_ufo_pre_save,
3168     .fields    = (VMStateField[]) {
3169         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3170         VMSTATE_END_OF_LIST()
3171     },
3172 };
3173 
3174 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3175  * flag set we need to check that we have it
3176  */
3177 static int virtio_net_vnet_post_load(void *opaque, int version_id)
3178 {
3179     struct VirtIONetMigTmp *tmp = opaque;
3180 
3181     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3182         error_report("virtio-net: saved image requires vnet_hdr=on");
3183         return -EINVAL;
3184     }
3185 
3186     return 0;
3187 }
3188 
3189 static int virtio_net_vnet_pre_save(void *opaque)
3190 {
3191     struct VirtIONetMigTmp *tmp = opaque;
3192 
3193     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3194 
3195     return 0;
3196 }
3197 
3198 static const VMStateDescription vmstate_virtio_net_has_vnet = {
3199     .name      = "virtio-net-vnet",
3200     .post_load = virtio_net_vnet_post_load,
3201     .pre_save  = virtio_net_vnet_pre_save,
3202     .fields    = (VMStateField[]) {
3203         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3204         VMSTATE_END_OF_LIST()
3205     },
3206 };
3207 
3208 static bool virtio_net_rss_needed(void *opaque)
3209 {
3210     return VIRTIO_NET(opaque)->rss_data.enabled;
3211 }
3212 
3213 static const VMStateDescription vmstate_virtio_net_rss = {
3214     .name      = "virtio-net-device/rss",
3215     .version_id = 1,
3216     .minimum_version_id = 1,
3217     .needed = virtio_net_rss_needed,
3218     .fields = (VMStateField[]) {
3219         VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3220         VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3221         VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3222         VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3223         VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3224         VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3225         VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3226                             VIRTIO_NET_RSS_MAX_KEY_SIZE),
3227         VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3228                                     rss_data.indirections_len, 0,
3229                                     vmstate_info_uint16, uint16_t),
3230         VMSTATE_END_OF_LIST()
3231     },
3232 };
3233 
3234 static const VMStateDescription vmstate_virtio_net_device = {
3235     .name = "virtio-net-device",
3236     .version_id = VIRTIO_NET_VM_VERSION,
3237     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3238     .post_load = virtio_net_post_load_device,
3239     .fields = (VMStateField[]) {
3240         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3241         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3242                                vmstate_virtio_net_queue_tx_waiting,
3243                                VirtIONetQueue),
3244         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3245         VMSTATE_UINT16(status, VirtIONet),
3246         VMSTATE_UINT8(promisc, VirtIONet),
3247         VMSTATE_UINT8(allmulti, VirtIONet),
3248         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3249 
3250         /* Guarded pair: If it fits we load it, else we throw it away
3251          * - can happen if source has a larger MAC table.; post-load
3252          *  sets flags in this case.
3253          */
3254         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3255                                 0, mac_table_fits, mac_table.in_use,
3256                                  ETH_ALEN),
3257         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3258                                      mac_table.in_use, ETH_ALEN),
3259 
3260         /* Note: This is an array of uint32's that's always been saved as a
3261          * buffer; hold onto your endiannesses; it's actually used as a bitmap
3262          * but based on the uint.
3263          */
3264         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3265         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3266                          vmstate_virtio_net_has_vnet),
3267         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3268         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3269         VMSTATE_UINT8(alluni, VirtIONet),
3270         VMSTATE_UINT8(nomulti, VirtIONet),
3271         VMSTATE_UINT8(nouni, VirtIONet),
3272         VMSTATE_UINT8(nobcast, VirtIONet),
3273         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3274                          vmstate_virtio_net_has_ufo),
3275         VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3276                             vmstate_info_uint16_equal, uint16_t),
3277         VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3278         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3279                          vmstate_virtio_net_tx_waiting),
3280         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3281                             has_ctrl_guest_offloads),
3282         VMSTATE_END_OF_LIST()
3283    },
3284     .subsections = (const VMStateDescription * []) {
3285         &vmstate_virtio_net_rss,
3286         NULL
3287     }
3288 };
3289 
3290 static NetClientInfo net_virtio_info = {
3291     .type = NET_CLIENT_DRIVER_NIC,
3292     .size = sizeof(NICState),
3293     .can_receive = virtio_net_can_receive,
3294     .receive = virtio_net_receive,
3295     .link_status_changed = virtio_net_set_link_status,
3296     .query_rx_filter = virtio_net_query_rxfilter,
3297     .announce = virtio_net_announce,
3298 };
3299 
3300 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3301 {
3302     VirtIONet *n = VIRTIO_NET(vdev);
3303     NetClientState *nc;
3304     assert(n->vhost_started);
3305     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3306         /* Must guard against invalid features and bogus queue index
3307          * from being set by malicious guest, or penetrated through
3308          * buggy migration stream.
3309          */
3310         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3311             qemu_log_mask(LOG_GUEST_ERROR,
3312                           "%s: bogus vq index ignored\n", __func__);
3313             return false;
3314         }
3315         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3316     } else {
3317         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3318     }
3319     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3320 }
3321 
3322 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3323                                            bool mask)
3324 {
3325     VirtIONet *n = VIRTIO_NET(vdev);
3326     NetClientState *nc;
3327     assert(n->vhost_started);
3328     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3329         /* Must guard against invalid features and bogus queue index
3330          * from being set by malicious guest, or penetrated through
3331          * buggy migration stream.
3332          */
3333         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3334             qemu_log_mask(LOG_GUEST_ERROR,
3335                           "%s: bogus vq index ignored\n", __func__);
3336             return;
3337         }
3338         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3339     } else {
3340         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3341     }
3342     vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3343                              vdev, idx, mask);
3344 }
3345 
3346 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3347 {
3348     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3349 
3350     n->config_size = virtio_get_config_size(&cfg_size_params, host_features);
3351 }
3352 
3353 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3354                                    const char *type)
3355 {
3356     /*
3357      * The name can be NULL, the netclient name will be type.x.
3358      */
3359     assert(type != NULL);
3360 
3361     g_free(n->netclient_name);
3362     g_free(n->netclient_type);
3363     n->netclient_name = g_strdup(name);
3364     n->netclient_type = g_strdup(type);
3365 }
3366 
3367 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3368 {
3369     HotplugHandler *hotplug_ctrl;
3370     PCIDevice *pci_dev;
3371     Error *err = NULL;
3372 
3373     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3374     if (hotplug_ctrl) {
3375         pci_dev = PCI_DEVICE(dev);
3376         pci_dev->partially_hotplugged = true;
3377         hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3378         if (err) {
3379             error_report_err(err);
3380             return false;
3381         }
3382     } else {
3383         return false;
3384     }
3385     return true;
3386 }
3387 
3388 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3389                                     Error **errp)
3390 {
3391     Error *err = NULL;
3392     HotplugHandler *hotplug_ctrl;
3393     PCIDevice *pdev = PCI_DEVICE(dev);
3394     BusState *primary_bus;
3395 
3396     if (!pdev->partially_hotplugged) {
3397         return true;
3398     }
3399     primary_bus = dev->parent_bus;
3400     if (!primary_bus) {
3401         error_setg(errp, "virtio_net: couldn't find primary bus");
3402         return false;
3403     }
3404     qdev_set_parent_bus(dev, primary_bus, &error_abort);
3405     qatomic_set(&n->failover_primary_hidden, false);
3406     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3407     if (hotplug_ctrl) {
3408         hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3409         if (err) {
3410             goto out;
3411         }
3412         hotplug_handler_plug(hotplug_ctrl, dev, &err);
3413     }
3414     pdev->partially_hotplugged = false;
3415 
3416 out:
3417     error_propagate(errp, err);
3418     return !err;
3419 }
3420 
3421 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3422 {
3423     bool should_be_hidden;
3424     Error *err = NULL;
3425     DeviceState *dev = failover_find_primary_device(n);
3426 
3427     if (!dev) {
3428         return;
3429     }
3430 
3431     should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3432 
3433     if (migration_in_setup(s) && !should_be_hidden) {
3434         if (failover_unplug_primary(n, dev)) {
3435             vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3436             qapi_event_send_unplug_primary(dev->id);
3437             qatomic_set(&n->failover_primary_hidden, true);
3438         } else {
3439             warn_report("couldn't unplug primary device");
3440         }
3441     } else if (migration_has_failed(s)) {
3442         /* We already unplugged the device let's plug it back */
3443         if (!failover_replug_primary(n, dev, &err)) {
3444             if (err) {
3445                 error_report_err(err);
3446             }
3447         }
3448     }
3449 }
3450 
3451 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3452 {
3453     MigrationState *s = data;
3454     VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3455     virtio_net_handle_migration_primary(n, s);
3456 }
3457 
3458 static bool failover_hide_primary_device(DeviceListener *listener,
3459                                          const QDict *device_opts,
3460                                          bool from_json,
3461                                          Error **errp)
3462 {
3463     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3464     const char *standby_id;
3465 
3466     if (!device_opts) {
3467         return false;
3468     }
3469 
3470     if (!qdict_haskey(device_opts, "failover_pair_id")) {
3471         return false;
3472     }
3473 
3474     if (!qdict_haskey(device_opts, "id")) {
3475         error_setg(errp, "Device with failover_pair_id needs to have id");
3476         return false;
3477     }
3478 
3479     standby_id = qdict_get_str(device_opts, "failover_pair_id");
3480     if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3481         return false;
3482     }
3483 
3484     /*
3485      * The hide helper can be called several times for a given device.
3486      * Check there is only one primary for a virtio-net device but
3487      * don't duplicate the qdict several times if it's called for the same
3488      * device.
3489      */
3490     if (n->primary_opts) {
3491         const char *old, *new;
3492         /* devices with failover_pair_id always have an id */
3493         old = qdict_get_str(n->primary_opts, "id");
3494         new = qdict_get_str(device_opts, "id");
3495         if (strcmp(old, new) != 0) {
3496             error_setg(errp, "Cannot attach more than one primary device to "
3497                        "'%s': '%s' and '%s'", n->netclient_name, old, new);
3498             return false;
3499         }
3500     } else {
3501         n->primary_opts = qdict_clone_shallow(device_opts);
3502         n->primary_opts_from_json = from_json;
3503     }
3504 
3505     /* failover_primary_hidden is set during feature negotiation */
3506     return qatomic_read(&n->failover_primary_hidden);
3507 }
3508 
3509 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3510 {
3511     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3512     VirtIONet *n = VIRTIO_NET(dev);
3513     NetClientState *nc;
3514     int i;
3515 
3516     if (n->net_conf.mtu) {
3517         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3518     }
3519 
3520     if (n->net_conf.duplex_str) {
3521         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3522             n->net_conf.duplex = DUPLEX_HALF;
3523         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3524             n->net_conf.duplex = DUPLEX_FULL;
3525         } else {
3526             error_setg(errp, "'duplex' must be 'half' or 'full'");
3527             return;
3528         }
3529         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3530     } else {
3531         n->net_conf.duplex = DUPLEX_UNKNOWN;
3532     }
3533 
3534     if (n->net_conf.speed < SPEED_UNKNOWN) {
3535         error_setg(errp, "'speed' must be between 0 and INT_MAX");
3536         return;
3537     }
3538     if (n->net_conf.speed >= 0) {
3539         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3540     }
3541 
3542     if (n->failover) {
3543         n->primary_listener.hide_device = failover_hide_primary_device;
3544         qatomic_set(&n->failover_primary_hidden, true);
3545         device_listener_register(&n->primary_listener);
3546         n->migration_state.notify = virtio_net_migration_state_notifier;
3547         add_migration_state_change_notifier(&n->migration_state);
3548         n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3549     }
3550 
3551     virtio_net_set_config_size(n, n->host_features);
3552     virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
3553 
3554     /*
3555      * We set a lower limit on RX queue size to what it always was.
3556      * Guests that want a smaller ring can always resize it without
3557      * help from us (using virtio 1 and up).
3558      */
3559     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3560         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3561         !is_power_of_2(n->net_conf.rx_queue_size)) {
3562         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3563                    "must be a power of 2 between %d and %d.",
3564                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3565                    VIRTQUEUE_MAX_SIZE);
3566         virtio_cleanup(vdev);
3567         return;
3568     }
3569 
3570     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3571         n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3572         !is_power_of_2(n->net_conf.tx_queue_size)) {
3573         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3574                    "must be a power of 2 between %d and %d",
3575                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3576                    VIRTQUEUE_MAX_SIZE);
3577         virtio_cleanup(vdev);
3578         return;
3579     }
3580 
3581     n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3582 
3583     /*
3584      * Figure out the datapath queue pairs since the backend could
3585      * provide control queue via peers as well.
3586      */
3587     if (n->nic_conf.peers.queues) {
3588         for (i = 0; i < n->max_ncs; i++) {
3589             if (n->nic_conf.peers.ncs[i]->is_datapath) {
3590                 ++n->max_queue_pairs;
3591             }
3592         }
3593     }
3594     n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3595 
3596     if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3597         error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3598                    "must be a positive integer less than %d.",
3599                    n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3600         virtio_cleanup(vdev);
3601         return;
3602     }
3603     n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3604     n->curr_queue_pairs = 1;
3605     n->tx_timeout = n->net_conf.txtimer;
3606 
3607     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3608                        && strcmp(n->net_conf.tx, "bh")) {
3609         warn_report("virtio-net: "
3610                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3611                     n->net_conf.tx);
3612         error_printf("Defaulting to \"bh\"");
3613     }
3614 
3615     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3616                                     n->net_conf.tx_queue_size);
3617 
3618     for (i = 0; i < n->max_queue_pairs; i++) {
3619         virtio_net_add_queue(n, i);
3620     }
3621 
3622     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3623     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3624     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3625     n->status = VIRTIO_NET_S_LINK_UP;
3626     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3627                               QEMU_CLOCK_VIRTUAL,
3628                               virtio_net_announce_timer, n);
3629     n->announce_timer.round = 0;
3630 
3631     if (n->netclient_type) {
3632         /*
3633          * Happen when virtio_net_set_netclient_name has been called.
3634          */
3635         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3636                               n->netclient_type, n->netclient_name, n);
3637     } else {
3638         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3639                               object_get_typename(OBJECT(dev)), dev->id, n);
3640     }
3641 
3642     for (i = 0; i < n->max_queue_pairs; i++) {
3643         n->nic->ncs[i].do_not_pad = true;
3644     }
3645 
3646     peer_test_vnet_hdr(n);
3647     if (peer_has_vnet_hdr(n)) {
3648         for (i = 0; i < n->max_queue_pairs; i++) {
3649             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3650         }
3651         n->host_hdr_len = sizeof(struct virtio_net_hdr);
3652     } else {
3653         n->host_hdr_len = 0;
3654     }
3655 
3656     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3657 
3658     n->vqs[0].tx_waiting = 0;
3659     n->tx_burst = n->net_conf.txburst;
3660     virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3661     n->promisc = 1; /* for compatibility */
3662 
3663     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3664 
3665     n->vlans = g_malloc0(MAX_VLAN >> 3);
3666 
3667     nc = qemu_get_queue(n->nic);
3668     nc->rxfilter_notify_enabled = 1;
3669 
3670    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3671         struct virtio_net_config netcfg = {};
3672         memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3673         vhost_net_set_config(get_vhost_net(nc->peer),
3674             (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
3675     }
3676     QTAILQ_INIT(&n->rsc_chains);
3677     n->qdev = dev;
3678 
3679     net_rx_pkt_init(&n->rx_pkt, false);
3680 
3681     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3682         virtio_net_load_ebpf(n);
3683     }
3684 }
3685 
3686 static void virtio_net_device_unrealize(DeviceState *dev)
3687 {
3688     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3689     VirtIONet *n = VIRTIO_NET(dev);
3690     int i, max_queue_pairs;
3691 
3692     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3693         virtio_net_unload_ebpf(n);
3694     }
3695 
3696     /* This will stop vhost backend if appropriate. */
3697     virtio_net_set_status(vdev, 0);
3698 
3699     g_free(n->netclient_name);
3700     n->netclient_name = NULL;
3701     g_free(n->netclient_type);
3702     n->netclient_type = NULL;
3703 
3704     g_free(n->mac_table.macs);
3705     g_free(n->vlans);
3706 
3707     if (n->failover) {
3708         qobject_unref(n->primary_opts);
3709         device_listener_unregister(&n->primary_listener);
3710         remove_migration_state_change_notifier(&n->migration_state);
3711     } else {
3712         assert(n->primary_opts == NULL);
3713     }
3714 
3715     max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3716     for (i = 0; i < max_queue_pairs; i++) {
3717         virtio_net_del_queue(n, i);
3718     }
3719     /* delete also control vq */
3720     virtio_del_queue(vdev, max_queue_pairs * 2);
3721     qemu_announce_timer_del(&n->announce_timer, false);
3722     g_free(n->vqs);
3723     qemu_del_nic(n->nic);
3724     virtio_net_rsc_cleanup(n);
3725     g_free(n->rss_data.indirections_table);
3726     net_rx_pkt_uninit(n->rx_pkt);
3727     virtio_cleanup(vdev);
3728 }
3729 
3730 static void virtio_net_instance_init(Object *obj)
3731 {
3732     VirtIONet *n = VIRTIO_NET(obj);
3733 
3734     /*
3735      * The default config_size is sizeof(struct virtio_net_config).
3736      * Can be overriden with virtio_net_set_config_size.
3737      */
3738     n->config_size = sizeof(struct virtio_net_config);
3739     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3740                                   "bootindex", "/ethernet-phy@0",
3741                                   DEVICE(n));
3742 
3743     ebpf_rss_init(&n->ebpf_rss);
3744 }
3745 
3746 static int virtio_net_pre_save(void *opaque)
3747 {
3748     VirtIONet *n = opaque;
3749 
3750     /* At this point, backend must be stopped, otherwise
3751      * it might keep writing to memory. */
3752     assert(!n->vhost_started);
3753 
3754     return 0;
3755 }
3756 
3757 static bool primary_unplug_pending(void *opaque)
3758 {
3759     DeviceState *dev = opaque;
3760     DeviceState *primary;
3761     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3762     VirtIONet *n = VIRTIO_NET(vdev);
3763 
3764     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3765         return false;
3766     }
3767     primary = failover_find_primary_device(n);
3768     return primary ? primary->pending_deleted_event : false;
3769 }
3770 
3771 static bool dev_unplug_pending(void *opaque)
3772 {
3773     DeviceState *dev = opaque;
3774     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3775 
3776     return vdc->primary_unplug_pending(dev);
3777 }
3778 
3779 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3780 {
3781     VirtIONet *n = VIRTIO_NET(vdev);
3782     NetClientState *nc = qemu_get_queue(n->nic);
3783     struct vhost_net *net = get_vhost_net(nc->peer);
3784     return &net->dev;
3785 }
3786 
3787 static const VMStateDescription vmstate_virtio_net = {
3788     .name = "virtio-net",
3789     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3790     .version_id = VIRTIO_NET_VM_VERSION,
3791     .fields = (VMStateField[]) {
3792         VMSTATE_VIRTIO_DEVICE,
3793         VMSTATE_END_OF_LIST()
3794     },
3795     .pre_save = virtio_net_pre_save,
3796     .dev_unplug_pending = dev_unplug_pending,
3797 };
3798 
3799 static Property virtio_net_properties[] = {
3800     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3801                     VIRTIO_NET_F_CSUM, true),
3802     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3803                     VIRTIO_NET_F_GUEST_CSUM, true),
3804     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3805     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3806                     VIRTIO_NET_F_GUEST_TSO4, true),
3807     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3808                     VIRTIO_NET_F_GUEST_TSO6, true),
3809     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3810                     VIRTIO_NET_F_GUEST_ECN, true),
3811     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3812                     VIRTIO_NET_F_GUEST_UFO, true),
3813     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3814                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3815     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3816                     VIRTIO_NET_F_HOST_TSO4, true),
3817     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3818                     VIRTIO_NET_F_HOST_TSO6, true),
3819     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3820                     VIRTIO_NET_F_HOST_ECN, true),
3821     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3822                     VIRTIO_NET_F_HOST_UFO, true),
3823     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3824                     VIRTIO_NET_F_MRG_RXBUF, true),
3825     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3826                     VIRTIO_NET_F_STATUS, true),
3827     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3828                     VIRTIO_NET_F_CTRL_VQ, true),
3829     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3830                     VIRTIO_NET_F_CTRL_RX, true),
3831     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3832                     VIRTIO_NET_F_CTRL_VLAN, true),
3833     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3834                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3835     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3836                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3837     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3838                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3839     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3840     DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3841                     VIRTIO_NET_F_RSS, false),
3842     DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3843                     VIRTIO_NET_F_HASH_REPORT, false),
3844     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3845                     VIRTIO_NET_F_RSC_EXT, false),
3846     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3847                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3848     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3849     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3850                        TX_TIMER_INTERVAL),
3851     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3852     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3853     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3854                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3855     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3856                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3857     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3858     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3859                      true),
3860     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3861     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3862     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3863     DEFINE_PROP_END_OF_LIST(),
3864 };
3865 
3866 static void virtio_net_class_init(ObjectClass *klass, void *data)
3867 {
3868     DeviceClass *dc = DEVICE_CLASS(klass);
3869     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3870 
3871     device_class_set_props(dc, virtio_net_properties);
3872     dc->vmsd = &vmstate_virtio_net;
3873     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3874     vdc->realize = virtio_net_device_realize;
3875     vdc->unrealize = virtio_net_device_unrealize;
3876     vdc->get_config = virtio_net_get_config;
3877     vdc->set_config = virtio_net_set_config;
3878     vdc->get_features = virtio_net_get_features;
3879     vdc->set_features = virtio_net_set_features;
3880     vdc->bad_features = virtio_net_bad_features;
3881     vdc->reset = virtio_net_reset;
3882     vdc->queue_reset = virtio_net_queue_reset;
3883     vdc->queue_enable = virtio_net_queue_enable;
3884     vdc->set_status = virtio_net_set_status;
3885     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3886     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3887     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3888     vdc->post_load = virtio_net_post_load_virtio;
3889     vdc->vmsd = &vmstate_virtio_net_device;
3890     vdc->primary_unplug_pending = primary_unplug_pending;
3891     vdc->get_vhost = virtio_net_get_vhost;
3892 }
3893 
3894 static const TypeInfo virtio_net_info = {
3895     .name = TYPE_VIRTIO_NET,
3896     .parent = TYPE_VIRTIO_DEVICE,
3897     .instance_size = sizeof(VirtIONet),
3898     .instance_init = virtio_net_instance_init,
3899     .class_init = virtio_net_class_init,
3900 };
3901 
3902 static void virtio_register_types(void)
3903 {
3904     type_register_static(&virtio_net_info);
3905 }
3906 
3907 type_init(virtio_register_types)
3908