xref: /openbmc/qemu/hw/net/virtio-net.c (revision 6c187695)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/log.h"
18 #include "qemu/main-loop.h"
19 #include "qemu/module.h"
20 #include "hw/virtio/virtio.h"
21 #include "net/net.h"
22 #include "net/checksum.h"
23 #include "net/tap.h"
24 #include "qemu/error-report.h"
25 #include "qemu/timer.h"
26 #include "qemu/option.h"
27 #include "qemu/option_int.h"
28 #include "qemu/config-file.h"
29 #include "qapi/qmp/qdict.h"
30 #include "hw/virtio/virtio-net.h"
31 #include "net/vhost_net.h"
32 #include "net/announce.h"
33 #include "hw/virtio/virtio-bus.h"
34 #include "qapi/error.h"
35 #include "qapi/qapi-events-net.h"
36 #include "hw/qdev-properties.h"
37 #include "qapi/qapi-types-migration.h"
38 #include "qapi/qapi-events-migration.h"
39 #include "hw/virtio/virtio-access.h"
40 #include "migration/misc.h"
41 #include "standard-headers/linux/ethtool.h"
42 #include "sysemu/sysemu.h"
43 #include "trace.h"
44 #include "monitor/qdev.h"
45 #include "hw/pci/pci.h"
46 #include "net_rx_pkt.h"
47 #include "hw/virtio/vhost.h"
48 #include "sysemu/qtest.h"
49 
50 #define VIRTIO_NET_VM_VERSION    11
51 
52 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
53 
54 /* previously fixed value */
55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
57 
58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
61 
62 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
63 
64 #define VIRTIO_NET_TCP_FLAG         0x3F
65 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
66 
67 /* IPv4 max payload, 16 bits in the header */
68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
70 
71 /* header length value in ip header without option */
72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
73 
74 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
76 
77 /* Purge coalesced packets timer interval, This value affects the performance
78    a lot, and should be tuned carefully, '300000'(300us) is the recommended
79    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
80    tso/gso/gro 'off'. */
81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
82 
83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
84                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
85                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
86                                          VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
87                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
88                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
89                                          VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
90                                          VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
91                                          VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
92 
93 static const VirtIOFeature feature_sizes[] = {
94     {.flags = 1ULL << VIRTIO_NET_F_MAC,
95      .end = endof(struct virtio_net_config, mac)},
96     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
97      .end = endof(struct virtio_net_config, status)},
98     {.flags = 1ULL << VIRTIO_NET_F_MQ,
99      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
100     {.flags = 1ULL << VIRTIO_NET_F_MTU,
101      .end = endof(struct virtio_net_config, mtu)},
102     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
103      .end = endof(struct virtio_net_config, duplex)},
104     {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
105      .end = endof(struct virtio_net_config, supported_hash_types)},
106     {}
107 };
108 
109 static const VirtIOConfigSizeParams cfg_size_params = {
110     .min_size = endof(struct virtio_net_config, mac),
111     .max_size = sizeof(struct virtio_net_config),
112     .feature_sizes = feature_sizes
113 };
114 
115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
116 {
117     VirtIONet *n = qemu_get_nic_opaque(nc);
118 
119     return &n->vqs[nc->queue_index];
120 }
121 
122 static int vq2q(int queue_index)
123 {
124     return queue_index / 2;
125 }
126 
127 /* TODO
128  * - we could suppress RX interrupt if we were so inclined.
129  */
130 
131 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
132 {
133     VirtIONet *n = VIRTIO_NET(vdev);
134     struct virtio_net_config netcfg;
135     NetClientState *nc = qemu_get_queue(n->nic);
136     static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
137 
138     int ret = 0;
139     memset(&netcfg, 0 , sizeof(struct virtio_net_config));
140     virtio_stw_p(vdev, &netcfg.status, n->status);
141     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
142     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
143     memcpy(netcfg.mac, n->mac, ETH_ALEN);
144     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
145     netcfg.duplex = n->net_conf.duplex;
146     netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
147     virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
148                  virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
149                  VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
150     virtio_stl_p(vdev, &netcfg.supported_hash_types,
151                  VIRTIO_NET_RSS_SUPPORTED_HASHES);
152     memcpy(config, &netcfg, n->config_size);
153 
154     /*
155      * Is this VDPA? No peer means not VDPA: there's no way to
156      * disconnect/reconnect a VDPA peer.
157      */
158     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
159         ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
160                                    n->config_size);
161         if (ret != -1) {
162             /*
163              * Some NIC/kernel combinations present 0 as the mac address.  As
164              * that is not a legal address, try to proceed with the
165              * address from the QEMU command line in the hope that the
166              * address has been configured correctly elsewhere - just not
167              * reported by the device.
168              */
169             if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
170                 info_report("Zero hardware mac address detected. Ignoring.");
171                 memcpy(netcfg.mac, n->mac, ETH_ALEN);
172             }
173             memcpy(config, &netcfg, n->config_size);
174         }
175     }
176 }
177 
178 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
179 {
180     VirtIONet *n = VIRTIO_NET(vdev);
181     struct virtio_net_config netcfg = {};
182     NetClientState *nc = qemu_get_queue(n->nic);
183 
184     memcpy(&netcfg, config, n->config_size);
185 
186     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
187         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
188         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
189         memcpy(n->mac, netcfg.mac, ETH_ALEN);
190         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
191     }
192 
193     /*
194      * Is this VDPA? No peer means not VDPA: there's no way to
195      * disconnect/reconnect a VDPA peer.
196      */
197     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
198         vhost_net_set_config(get_vhost_net(nc->peer),
199                              (uint8_t *)&netcfg, 0, n->config_size,
200                              VHOST_SET_CONFIG_TYPE_MASTER);
201       }
202 }
203 
204 static bool virtio_net_started(VirtIONet *n, uint8_t status)
205 {
206     VirtIODevice *vdev = VIRTIO_DEVICE(n);
207     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
208         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
209 }
210 
211 static void virtio_net_announce_notify(VirtIONet *net)
212 {
213     VirtIODevice *vdev = VIRTIO_DEVICE(net);
214     trace_virtio_net_announce_notify();
215 
216     net->status |= VIRTIO_NET_S_ANNOUNCE;
217     virtio_notify_config(vdev);
218 }
219 
220 static void virtio_net_announce_timer(void *opaque)
221 {
222     VirtIONet *n = opaque;
223     trace_virtio_net_announce_timer(n->announce_timer.round);
224 
225     n->announce_timer.round--;
226     virtio_net_announce_notify(n);
227 }
228 
229 static void virtio_net_announce(NetClientState *nc)
230 {
231     VirtIONet *n = qemu_get_nic_opaque(nc);
232     VirtIODevice *vdev = VIRTIO_DEVICE(n);
233 
234     /*
235      * Make sure the virtio migration announcement timer isn't running
236      * If it is, let it trigger announcement so that we do not cause
237      * confusion.
238      */
239     if (n->announce_timer.round) {
240         return;
241     }
242 
243     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
244         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
245             virtio_net_announce_notify(n);
246     }
247 }
248 
249 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
250 {
251     VirtIODevice *vdev = VIRTIO_DEVICE(n);
252     NetClientState *nc = qemu_get_queue(n->nic);
253     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
254     int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
255               n->max_ncs - n->max_queue_pairs : 0;
256 
257     if (!get_vhost_net(nc->peer)) {
258         return;
259     }
260 
261     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
262         !!n->vhost_started) {
263         return;
264     }
265     if (!n->vhost_started) {
266         int r, i;
267 
268         if (n->needs_vnet_hdr_swap) {
269             error_report("backend does not support %s vnet headers; "
270                          "falling back on userspace virtio",
271                          virtio_is_big_endian(vdev) ? "BE" : "LE");
272             return;
273         }
274 
275         /* Any packets outstanding? Purge them to avoid touching rings
276          * when vhost is running.
277          */
278         for (i = 0;  i < queue_pairs; i++) {
279             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
280 
281             /* Purge both directions: TX and RX. */
282             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
283             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
284         }
285 
286         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
287             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
288             if (r < 0) {
289                 error_report("%uBytes MTU not supported by the backend",
290                              n->net_conf.mtu);
291 
292                 return;
293             }
294         }
295 
296         n->vhost_started = 1;
297         r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
298         if (r < 0) {
299             error_report("unable to start vhost net: %d: "
300                          "falling back on userspace virtio", -r);
301             n->vhost_started = 0;
302         }
303     } else {
304         vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
305         n->vhost_started = 0;
306     }
307 }
308 
309 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
310                                           NetClientState *peer,
311                                           bool enable)
312 {
313     if (virtio_is_big_endian(vdev)) {
314         return qemu_set_vnet_be(peer, enable);
315     } else {
316         return qemu_set_vnet_le(peer, enable);
317     }
318 }
319 
320 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
321                                        int queue_pairs, bool enable)
322 {
323     int i;
324 
325     for (i = 0; i < queue_pairs; i++) {
326         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
327             enable) {
328             while (--i >= 0) {
329                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
330             }
331 
332             return true;
333         }
334     }
335 
336     return false;
337 }
338 
339 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
340 {
341     VirtIODevice *vdev = VIRTIO_DEVICE(n);
342     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
343 
344     if (virtio_net_started(n, status)) {
345         /* Before using the device, we tell the network backend about the
346          * endianness to use when parsing vnet headers. If the backend
347          * can't do it, we fallback onto fixing the headers in the core
348          * virtio-net code.
349          */
350         n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
351                                                             queue_pairs, true);
352     } else if (virtio_net_started(n, vdev->status)) {
353         /* After using the device, we need to reset the network backend to
354          * the default (guest native endianness), otherwise the guest may
355          * lose network connectivity if it is rebooted into a different
356          * endianness.
357          */
358         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
359     }
360 }
361 
362 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
363 {
364     unsigned int dropped = virtqueue_drop_all(vq);
365     if (dropped) {
366         virtio_notify(vdev, vq);
367     }
368 }
369 
370 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
371 {
372     VirtIONet *n = VIRTIO_NET(vdev);
373     VirtIONetQueue *q;
374     int i;
375     uint8_t queue_status;
376 
377     virtio_net_vnet_endian_status(n, status);
378     virtio_net_vhost_status(n, status);
379 
380     for (i = 0; i < n->max_queue_pairs; i++) {
381         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
382         bool queue_started;
383         q = &n->vqs[i];
384 
385         if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
386             queue_status = 0;
387         } else {
388             queue_status = status;
389         }
390         queue_started =
391             virtio_net_started(n, queue_status) && !n->vhost_started;
392 
393         if (queue_started) {
394             qemu_flush_queued_packets(ncs);
395         }
396 
397         if (!q->tx_waiting) {
398             continue;
399         }
400 
401         if (queue_started) {
402             if (q->tx_timer) {
403                 timer_mod(q->tx_timer,
404                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
405             } else {
406                 qemu_bh_schedule(q->tx_bh);
407             }
408         } else {
409             if (q->tx_timer) {
410                 timer_del(q->tx_timer);
411             } else {
412                 qemu_bh_cancel(q->tx_bh);
413             }
414             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
415                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
416                 vdev->vm_running) {
417                 /* if tx is waiting we are likely have some packets in tx queue
418                  * and disabled notification */
419                 q->tx_waiting = 0;
420                 virtio_queue_set_notification(q->tx_vq, 1);
421                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
422             }
423         }
424     }
425 }
426 
427 static void virtio_net_set_link_status(NetClientState *nc)
428 {
429     VirtIONet *n = qemu_get_nic_opaque(nc);
430     VirtIODevice *vdev = VIRTIO_DEVICE(n);
431     uint16_t old_status = n->status;
432 
433     if (nc->link_down)
434         n->status &= ~VIRTIO_NET_S_LINK_UP;
435     else
436         n->status |= VIRTIO_NET_S_LINK_UP;
437 
438     if (n->status != old_status)
439         virtio_notify_config(vdev);
440 
441     virtio_net_set_status(vdev, vdev->status);
442 }
443 
444 static void rxfilter_notify(NetClientState *nc)
445 {
446     VirtIONet *n = qemu_get_nic_opaque(nc);
447 
448     if (nc->rxfilter_notify_enabled) {
449         char *path = object_get_canonical_path(OBJECT(n->qdev));
450         qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
451                                               n->netclient_name, path);
452         g_free(path);
453 
454         /* disable event notification to avoid events flooding */
455         nc->rxfilter_notify_enabled = 0;
456     }
457 }
458 
459 static intList *get_vlan_table(VirtIONet *n)
460 {
461     intList *list;
462     int i, j;
463 
464     list = NULL;
465     for (i = 0; i < MAX_VLAN >> 5; i++) {
466         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
467             if (n->vlans[i] & (1U << j)) {
468                 QAPI_LIST_PREPEND(list, (i << 5) + j);
469             }
470         }
471     }
472 
473     return list;
474 }
475 
476 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
477 {
478     VirtIONet *n = qemu_get_nic_opaque(nc);
479     VirtIODevice *vdev = VIRTIO_DEVICE(n);
480     RxFilterInfo *info;
481     strList *str_list;
482     int i;
483 
484     info = g_malloc0(sizeof(*info));
485     info->name = g_strdup(nc->name);
486     info->promiscuous = n->promisc;
487 
488     if (n->nouni) {
489         info->unicast = RX_STATE_NONE;
490     } else if (n->alluni) {
491         info->unicast = RX_STATE_ALL;
492     } else {
493         info->unicast = RX_STATE_NORMAL;
494     }
495 
496     if (n->nomulti) {
497         info->multicast = RX_STATE_NONE;
498     } else if (n->allmulti) {
499         info->multicast = RX_STATE_ALL;
500     } else {
501         info->multicast = RX_STATE_NORMAL;
502     }
503 
504     info->broadcast_allowed = n->nobcast;
505     info->multicast_overflow = n->mac_table.multi_overflow;
506     info->unicast_overflow = n->mac_table.uni_overflow;
507 
508     info->main_mac = qemu_mac_strdup_printf(n->mac);
509 
510     str_list = NULL;
511     for (i = 0; i < n->mac_table.first_multi; i++) {
512         QAPI_LIST_PREPEND(str_list,
513                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
514     }
515     info->unicast_table = str_list;
516 
517     str_list = NULL;
518     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
519         QAPI_LIST_PREPEND(str_list,
520                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
521     }
522     info->multicast_table = str_list;
523     info->vlan_table = get_vlan_table(n);
524 
525     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
526         info->vlan = RX_STATE_ALL;
527     } else if (!info->vlan_table) {
528         info->vlan = RX_STATE_NONE;
529     } else {
530         info->vlan = RX_STATE_NORMAL;
531     }
532 
533     /* enable event notification after query */
534     nc->rxfilter_notify_enabled = 1;
535 
536     return info;
537 }
538 
539 static void virtio_net_reset(VirtIODevice *vdev)
540 {
541     VirtIONet *n = VIRTIO_NET(vdev);
542     int i;
543 
544     /* Reset back to compatibility mode */
545     n->promisc = 1;
546     n->allmulti = 0;
547     n->alluni = 0;
548     n->nomulti = 0;
549     n->nouni = 0;
550     n->nobcast = 0;
551     /* multiqueue is disabled by default */
552     n->curr_queue_pairs = 1;
553     timer_del(n->announce_timer.tm);
554     n->announce_timer.round = 0;
555     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
556 
557     /* Flush any MAC and VLAN filter table state */
558     n->mac_table.in_use = 0;
559     n->mac_table.first_multi = 0;
560     n->mac_table.multi_overflow = 0;
561     n->mac_table.uni_overflow = 0;
562     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
563     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
564     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
565     memset(n->vlans, 0, MAX_VLAN >> 3);
566 
567     /* Flush any async TX */
568     for (i = 0;  i < n->max_queue_pairs; i++) {
569         NetClientState *nc = qemu_get_subqueue(n->nic, i);
570 
571         if (nc->peer) {
572             qemu_flush_or_purge_queued_packets(nc->peer, true);
573             assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
574         }
575     }
576 }
577 
578 static void peer_test_vnet_hdr(VirtIONet *n)
579 {
580     NetClientState *nc = qemu_get_queue(n->nic);
581     if (!nc->peer) {
582         return;
583     }
584 
585     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
586 }
587 
588 static int peer_has_vnet_hdr(VirtIONet *n)
589 {
590     return n->has_vnet_hdr;
591 }
592 
593 static int peer_has_ufo(VirtIONet *n)
594 {
595     if (!peer_has_vnet_hdr(n))
596         return 0;
597 
598     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
599 
600     return n->has_ufo;
601 }
602 
603 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
604                                        int version_1, int hash_report)
605 {
606     int i;
607     NetClientState *nc;
608 
609     n->mergeable_rx_bufs = mergeable_rx_bufs;
610 
611     if (version_1) {
612         n->guest_hdr_len = hash_report ?
613             sizeof(struct virtio_net_hdr_v1_hash) :
614             sizeof(struct virtio_net_hdr_mrg_rxbuf);
615         n->rss_data.populate_hash = !!hash_report;
616     } else {
617         n->guest_hdr_len = n->mergeable_rx_bufs ?
618             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
619             sizeof(struct virtio_net_hdr);
620     }
621 
622     for (i = 0; i < n->max_queue_pairs; i++) {
623         nc = qemu_get_subqueue(n->nic, i);
624 
625         if (peer_has_vnet_hdr(n) &&
626             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
627             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
628             n->host_hdr_len = n->guest_hdr_len;
629         }
630     }
631 }
632 
633 static int virtio_net_max_tx_queue_size(VirtIONet *n)
634 {
635     NetClientState *peer = n->nic_conf.peers.ncs[0];
636 
637     /*
638      * Backends other than vhost-user or vhost-vdpa don't support max queue
639      * size.
640      */
641     if (!peer) {
642         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
643     }
644 
645     switch(peer->info->type) {
646     case NET_CLIENT_DRIVER_VHOST_USER:
647     case NET_CLIENT_DRIVER_VHOST_VDPA:
648         return VIRTQUEUE_MAX_SIZE;
649     default:
650         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
651     };
652 }
653 
654 static int peer_attach(VirtIONet *n, int index)
655 {
656     NetClientState *nc = qemu_get_subqueue(n->nic, index);
657 
658     if (!nc->peer) {
659         return 0;
660     }
661 
662     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
663         vhost_set_vring_enable(nc->peer, 1);
664     }
665 
666     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
667         return 0;
668     }
669 
670     if (n->max_queue_pairs == 1) {
671         return 0;
672     }
673 
674     return tap_enable(nc->peer);
675 }
676 
677 static int peer_detach(VirtIONet *n, int index)
678 {
679     NetClientState *nc = qemu_get_subqueue(n->nic, index);
680 
681     if (!nc->peer) {
682         return 0;
683     }
684 
685     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
686         vhost_set_vring_enable(nc->peer, 0);
687     }
688 
689     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
690         return 0;
691     }
692 
693     return tap_disable(nc->peer);
694 }
695 
696 static void virtio_net_set_queue_pairs(VirtIONet *n)
697 {
698     int i;
699     int r;
700 
701     if (n->nic->peer_deleted) {
702         return;
703     }
704 
705     for (i = 0; i < n->max_queue_pairs; i++) {
706         if (i < n->curr_queue_pairs) {
707             r = peer_attach(n, i);
708             assert(!r);
709         } else {
710             r = peer_detach(n, i);
711             assert(!r);
712         }
713     }
714 }
715 
716 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
717 
718 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
719                                         Error **errp)
720 {
721     VirtIONet *n = VIRTIO_NET(vdev);
722     NetClientState *nc = qemu_get_queue(n->nic);
723 
724     /* Firstly sync all virtio-net possible supported features */
725     features |= n->host_features;
726 
727     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
728 
729     if (!peer_has_vnet_hdr(n)) {
730         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
731         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
732         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
733         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
734 
735         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
736         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
737         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
738         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
739 
740         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
741     }
742 
743     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
744         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
745         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
746     }
747 
748     if (!get_vhost_net(nc->peer)) {
749         return features;
750     }
751 
752     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
753         virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
754     }
755     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
756     vdev->backend_features = features;
757 
758     if (n->mtu_bypass_backend &&
759             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
760         features |= (1ULL << VIRTIO_NET_F_MTU);
761     }
762 
763     return features;
764 }
765 
766 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
767 {
768     uint64_t features = 0;
769 
770     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
771      * but also these: */
772     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
773     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
774     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
775     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
776     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
777 
778     return features;
779 }
780 
781 static void virtio_net_apply_guest_offloads(VirtIONet *n)
782 {
783     qemu_set_offload(qemu_get_queue(n->nic)->peer,
784             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
785             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
786             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
787             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
788             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
789 }
790 
791 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
792 {
793     static const uint64_t guest_offloads_mask =
794         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
795         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
796         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
797         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
798         (1ULL << VIRTIO_NET_F_GUEST_UFO);
799 
800     return guest_offloads_mask & features;
801 }
802 
803 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
804 {
805     VirtIODevice *vdev = VIRTIO_DEVICE(n);
806     return virtio_net_guest_offloads_by_features(vdev->guest_features);
807 }
808 
809 typedef struct {
810     VirtIONet *n;
811     DeviceState *dev;
812 } FailoverDevice;
813 
814 /**
815  * Set the failover primary device
816  *
817  * @opaque: FailoverId to setup
818  * @opts: opts for device we are handling
819  * @errp: returns an error if this function fails
820  */
821 static int failover_set_primary(DeviceState *dev, void *opaque)
822 {
823     FailoverDevice *fdev = opaque;
824     PCIDevice *pci_dev = (PCIDevice *)
825         object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
826 
827     if (!pci_dev) {
828         return 0;
829     }
830 
831     if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
832         fdev->dev = dev;
833         return 1;
834     }
835 
836     return 0;
837 }
838 
839 /**
840  * Find the primary device for this failover virtio-net
841  *
842  * @n: VirtIONet device
843  * @errp: returns an error if this function fails
844  */
845 static DeviceState *failover_find_primary_device(VirtIONet *n)
846 {
847     FailoverDevice fdev = {
848         .n = n,
849     };
850 
851     qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
852                        NULL, NULL, &fdev);
853     return fdev.dev;
854 }
855 
856 static void failover_add_primary(VirtIONet *n, Error **errp)
857 {
858     Error *err = NULL;
859     DeviceState *dev = failover_find_primary_device(n);
860 
861     if (dev) {
862         return;
863     }
864 
865     if (!n->primary_opts) {
866         error_setg(errp, "Primary device not found");
867         error_append_hint(errp, "Virtio-net failover will not work. Make "
868                           "sure primary device has parameter"
869                           " failover_pair_id=%s\n", n->netclient_name);
870         return;
871     }
872 
873     dev = qdev_device_add_from_qdict(n->primary_opts,
874                                      n->primary_opts_from_json,
875                                      &err);
876     if (err) {
877         qobject_unref(n->primary_opts);
878         n->primary_opts = NULL;
879     } else {
880         object_unref(OBJECT(dev));
881     }
882     error_propagate(errp, err);
883 }
884 
885 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
886 {
887     VirtIONet *n = VIRTIO_NET(vdev);
888     Error *err = NULL;
889     int i;
890 
891     if (n->mtu_bypass_backend &&
892             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
893         features &= ~(1ULL << VIRTIO_NET_F_MTU);
894     }
895 
896     virtio_net_set_multiqueue(n,
897                               virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
898                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
899 
900     virtio_net_set_mrg_rx_bufs(n,
901                                virtio_has_feature(features,
902                                                   VIRTIO_NET_F_MRG_RXBUF),
903                                virtio_has_feature(features,
904                                                   VIRTIO_F_VERSION_1),
905                                virtio_has_feature(features,
906                                                   VIRTIO_NET_F_HASH_REPORT));
907 
908     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
909         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
910     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
911         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
912     n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
913 
914     if (n->has_vnet_hdr) {
915         n->curr_guest_offloads =
916             virtio_net_guest_offloads_by_features(features);
917         virtio_net_apply_guest_offloads(n);
918     }
919 
920     for (i = 0;  i < n->max_queue_pairs; i++) {
921         NetClientState *nc = qemu_get_subqueue(n->nic, i);
922 
923         if (!get_vhost_net(nc->peer)) {
924             continue;
925         }
926         vhost_net_ack_features(get_vhost_net(nc->peer), features);
927     }
928 
929     if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
930         memset(n->vlans, 0, MAX_VLAN >> 3);
931     } else {
932         memset(n->vlans, 0xff, MAX_VLAN >> 3);
933     }
934 
935     if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
936         qapi_event_send_failover_negotiated(n->netclient_name);
937         qatomic_set(&n->failover_primary_hidden, false);
938         failover_add_primary(n, &err);
939         if (err) {
940             if (!qtest_enabled()) {
941                 warn_report_err(err);
942             } else {
943                 error_free(err);
944             }
945         }
946     }
947 }
948 
949 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
950                                      struct iovec *iov, unsigned int iov_cnt)
951 {
952     uint8_t on;
953     size_t s;
954     NetClientState *nc = qemu_get_queue(n->nic);
955 
956     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
957     if (s != sizeof(on)) {
958         return VIRTIO_NET_ERR;
959     }
960 
961     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
962         n->promisc = on;
963     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
964         n->allmulti = on;
965     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
966         n->alluni = on;
967     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
968         n->nomulti = on;
969     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
970         n->nouni = on;
971     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
972         n->nobcast = on;
973     } else {
974         return VIRTIO_NET_ERR;
975     }
976 
977     rxfilter_notify(nc);
978 
979     return VIRTIO_NET_OK;
980 }
981 
982 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
983                                      struct iovec *iov, unsigned int iov_cnt)
984 {
985     VirtIODevice *vdev = VIRTIO_DEVICE(n);
986     uint64_t offloads;
987     size_t s;
988 
989     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
990         return VIRTIO_NET_ERR;
991     }
992 
993     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
994     if (s != sizeof(offloads)) {
995         return VIRTIO_NET_ERR;
996     }
997 
998     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
999         uint64_t supported_offloads;
1000 
1001         offloads = virtio_ldq_p(vdev, &offloads);
1002 
1003         if (!n->has_vnet_hdr) {
1004             return VIRTIO_NET_ERR;
1005         }
1006 
1007         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1008             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1009         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1010             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1011         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1012 
1013         supported_offloads = virtio_net_supported_guest_offloads(n);
1014         if (offloads & ~supported_offloads) {
1015             return VIRTIO_NET_ERR;
1016         }
1017 
1018         n->curr_guest_offloads = offloads;
1019         virtio_net_apply_guest_offloads(n);
1020 
1021         return VIRTIO_NET_OK;
1022     } else {
1023         return VIRTIO_NET_ERR;
1024     }
1025 }
1026 
1027 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1028                                  struct iovec *iov, unsigned int iov_cnt)
1029 {
1030     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1031     struct virtio_net_ctrl_mac mac_data;
1032     size_t s;
1033     NetClientState *nc = qemu_get_queue(n->nic);
1034 
1035     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1036         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1037             return VIRTIO_NET_ERR;
1038         }
1039         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1040         assert(s == sizeof(n->mac));
1041         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1042         rxfilter_notify(nc);
1043 
1044         return VIRTIO_NET_OK;
1045     }
1046 
1047     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1048         return VIRTIO_NET_ERR;
1049     }
1050 
1051     int in_use = 0;
1052     int first_multi = 0;
1053     uint8_t uni_overflow = 0;
1054     uint8_t multi_overflow = 0;
1055     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1056 
1057     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1058                    sizeof(mac_data.entries));
1059     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1060     if (s != sizeof(mac_data.entries)) {
1061         goto error;
1062     }
1063     iov_discard_front(&iov, &iov_cnt, s);
1064 
1065     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1066         goto error;
1067     }
1068 
1069     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1070         s = iov_to_buf(iov, iov_cnt, 0, macs,
1071                        mac_data.entries * ETH_ALEN);
1072         if (s != mac_data.entries * ETH_ALEN) {
1073             goto error;
1074         }
1075         in_use += mac_data.entries;
1076     } else {
1077         uni_overflow = 1;
1078     }
1079 
1080     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1081 
1082     first_multi = in_use;
1083 
1084     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1085                    sizeof(mac_data.entries));
1086     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1087     if (s != sizeof(mac_data.entries)) {
1088         goto error;
1089     }
1090 
1091     iov_discard_front(&iov, &iov_cnt, s);
1092 
1093     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1094         goto error;
1095     }
1096 
1097     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1098         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1099                        mac_data.entries * ETH_ALEN);
1100         if (s != mac_data.entries * ETH_ALEN) {
1101             goto error;
1102         }
1103         in_use += mac_data.entries;
1104     } else {
1105         multi_overflow = 1;
1106     }
1107 
1108     n->mac_table.in_use = in_use;
1109     n->mac_table.first_multi = first_multi;
1110     n->mac_table.uni_overflow = uni_overflow;
1111     n->mac_table.multi_overflow = multi_overflow;
1112     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1113     g_free(macs);
1114     rxfilter_notify(nc);
1115 
1116     return VIRTIO_NET_OK;
1117 
1118 error:
1119     g_free(macs);
1120     return VIRTIO_NET_ERR;
1121 }
1122 
1123 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1124                                         struct iovec *iov, unsigned int iov_cnt)
1125 {
1126     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1127     uint16_t vid;
1128     size_t s;
1129     NetClientState *nc = qemu_get_queue(n->nic);
1130 
1131     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1132     vid = virtio_lduw_p(vdev, &vid);
1133     if (s != sizeof(vid)) {
1134         return VIRTIO_NET_ERR;
1135     }
1136 
1137     if (vid >= MAX_VLAN)
1138         return VIRTIO_NET_ERR;
1139 
1140     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1141         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1142     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1143         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1144     else
1145         return VIRTIO_NET_ERR;
1146 
1147     rxfilter_notify(nc);
1148 
1149     return VIRTIO_NET_OK;
1150 }
1151 
1152 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1153                                       struct iovec *iov, unsigned int iov_cnt)
1154 {
1155     trace_virtio_net_handle_announce(n->announce_timer.round);
1156     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1157         n->status & VIRTIO_NET_S_ANNOUNCE) {
1158         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1159         if (n->announce_timer.round) {
1160             qemu_announce_timer_step(&n->announce_timer);
1161         }
1162         return VIRTIO_NET_OK;
1163     } else {
1164         return VIRTIO_NET_ERR;
1165     }
1166 }
1167 
1168 static void virtio_net_detach_epbf_rss(VirtIONet *n);
1169 
1170 static void virtio_net_disable_rss(VirtIONet *n)
1171 {
1172     if (n->rss_data.enabled) {
1173         trace_virtio_net_rss_disable();
1174     }
1175     n->rss_data.enabled = false;
1176 
1177     virtio_net_detach_epbf_rss(n);
1178 }
1179 
1180 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1181 {
1182     NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1183     if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1184         return false;
1185     }
1186 
1187     return nc->info->set_steering_ebpf(nc, prog_fd);
1188 }
1189 
1190 static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1191                                    struct EBPFRSSConfig *config)
1192 {
1193     config->redirect = data->redirect;
1194     config->populate_hash = data->populate_hash;
1195     config->hash_types = data->hash_types;
1196     config->indirections_len = data->indirections_len;
1197     config->default_queue = data->default_queue;
1198 }
1199 
1200 static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1201 {
1202     struct EBPFRSSConfig config = {};
1203 
1204     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1205         return false;
1206     }
1207 
1208     rss_data_to_rss_config(&n->rss_data, &config);
1209 
1210     if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1211                           n->rss_data.indirections_table, n->rss_data.key)) {
1212         return false;
1213     }
1214 
1215     if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1216         return false;
1217     }
1218 
1219     return true;
1220 }
1221 
1222 static void virtio_net_detach_epbf_rss(VirtIONet *n)
1223 {
1224     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1225 }
1226 
1227 static bool virtio_net_load_ebpf(VirtIONet *n)
1228 {
1229     if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1230         /* backend does't support steering ebpf */
1231         return false;
1232     }
1233 
1234     return ebpf_rss_load(&n->ebpf_rss);
1235 }
1236 
1237 static void virtio_net_unload_ebpf(VirtIONet *n)
1238 {
1239     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1240     ebpf_rss_unload(&n->ebpf_rss);
1241 }
1242 
1243 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1244                                       struct iovec *iov,
1245                                       unsigned int iov_cnt,
1246                                       bool do_rss)
1247 {
1248     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1249     struct virtio_net_rss_config cfg;
1250     size_t s, offset = 0, size_get;
1251     uint16_t queue_pairs, i;
1252     struct {
1253         uint16_t us;
1254         uint8_t b;
1255     } QEMU_PACKED temp;
1256     const char *err_msg = "";
1257     uint32_t err_value = 0;
1258 
1259     if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1260         err_msg = "RSS is not negotiated";
1261         goto error;
1262     }
1263     if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1264         err_msg = "Hash report is not negotiated";
1265         goto error;
1266     }
1267     size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1268     s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1269     if (s != size_get) {
1270         err_msg = "Short command buffer";
1271         err_value = (uint32_t)s;
1272         goto error;
1273     }
1274     n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1275     n->rss_data.indirections_len =
1276         virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1277     n->rss_data.indirections_len++;
1278     if (!do_rss) {
1279         n->rss_data.indirections_len = 1;
1280     }
1281     if (!is_power_of_2(n->rss_data.indirections_len)) {
1282         err_msg = "Invalid size of indirection table";
1283         err_value = n->rss_data.indirections_len;
1284         goto error;
1285     }
1286     if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1287         err_msg = "Too large indirection table";
1288         err_value = n->rss_data.indirections_len;
1289         goto error;
1290     }
1291     n->rss_data.default_queue = do_rss ?
1292         virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1293     if (n->rss_data.default_queue >= n->max_queue_pairs) {
1294         err_msg = "Invalid default queue";
1295         err_value = n->rss_data.default_queue;
1296         goto error;
1297     }
1298     offset += size_get;
1299     size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1300     g_free(n->rss_data.indirections_table);
1301     n->rss_data.indirections_table = g_malloc(size_get);
1302     if (!n->rss_data.indirections_table) {
1303         err_msg = "Can't allocate indirections table";
1304         err_value = n->rss_data.indirections_len;
1305         goto error;
1306     }
1307     s = iov_to_buf(iov, iov_cnt, offset,
1308                    n->rss_data.indirections_table, size_get);
1309     if (s != size_get) {
1310         err_msg = "Short indirection table buffer";
1311         err_value = (uint32_t)s;
1312         goto error;
1313     }
1314     for (i = 0; i < n->rss_data.indirections_len; ++i) {
1315         uint16_t val = n->rss_data.indirections_table[i];
1316         n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1317     }
1318     offset += size_get;
1319     size_get = sizeof(temp);
1320     s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1321     if (s != size_get) {
1322         err_msg = "Can't get queue_pairs";
1323         err_value = (uint32_t)s;
1324         goto error;
1325     }
1326     queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1327     if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1328         err_msg = "Invalid number of queue_pairs";
1329         err_value = queue_pairs;
1330         goto error;
1331     }
1332     if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1333         err_msg = "Invalid key size";
1334         err_value = temp.b;
1335         goto error;
1336     }
1337     if (!temp.b && n->rss_data.hash_types) {
1338         err_msg = "No key provided";
1339         err_value = 0;
1340         goto error;
1341     }
1342     if (!temp.b && !n->rss_data.hash_types) {
1343         virtio_net_disable_rss(n);
1344         return queue_pairs;
1345     }
1346     offset += size_get;
1347     size_get = temp.b;
1348     s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1349     if (s != size_get) {
1350         err_msg = "Can get key buffer";
1351         err_value = (uint32_t)s;
1352         goto error;
1353     }
1354     n->rss_data.enabled = true;
1355 
1356     if (!n->rss_data.populate_hash) {
1357         if (!virtio_net_attach_epbf_rss(n)) {
1358             /* EBPF must be loaded for vhost */
1359             if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1360                 warn_report("Can't load eBPF RSS for vhost");
1361                 goto error;
1362             }
1363             /* fallback to software RSS */
1364             warn_report("Can't load eBPF RSS - fallback to software RSS");
1365             n->rss_data.enabled_software_rss = true;
1366         }
1367     } else {
1368         /* use software RSS for hash populating */
1369         /* and detach eBPF if was loaded before */
1370         virtio_net_detach_epbf_rss(n);
1371         n->rss_data.enabled_software_rss = true;
1372     }
1373 
1374     trace_virtio_net_rss_enable(n->rss_data.hash_types,
1375                                 n->rss_data.indirections_len,
1376                                 temp.b);
1377     return queue_pairs;
1378 error:
1379     trace_virtio_net_rss_error(err_msg, err_value);
1380     virtio_net_disable_rss(n);
1381     return 0;
1382 }
1383 
1384 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1385                                 struct iovec *iov, unsigned int iov_cnt)
1386 {
1387     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1388     uint16_t queue_pairs;
1389     NetClientState *nc = qemu_get_queue(n->nic);
1390 
1391     virtio_net_disable_rss(n);
1392     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1393         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1394         return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1395     }
1396     if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1397         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1398     } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1399         struct virtio_net_ctrl_mq mq;
1400         size_t s;
1401         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1402             return VIRTIO_NET_ERR;
1403         }
1404         s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1405         if (s != sizeof(mq)) {
1406             return VIRTIO_NET_ERR;
1407         }
1408         queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1409 
1410     } else {
1411         return VIRTIO_NET_ERR;
1412     }
1413 
1414     if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1415         queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1416         queue_pairs > n->max_queue_pairs ||
1417         !n->multiqueue) {
1418         return VIRTIO_NET_ERR;
1419     }
1420 
1421     n->curr_queue_pairs = queue_pairs;
1422     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
1423         /*
1424          * Avoid updating the backend for a vdpa device: We're only interested
1425          * in updating the device model queues.
1426          */
1427         return VIRTIO_NET_OK;
1428     }
1429     /* stop the backend before changing the number of queue_pairs to avoid handling a
1430      * disabled queue */
1431     virtio_net_set_status(vdev, vdev->status);
1432     virtio_net_set_queue_pairs(n);
1433 
1434     return VIRTIO_NET_OK;
1435 }
1436 
1437 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1438                                   const struct iovec *in_sg, unsigned in_num,
1439                                   const struct iovec *out_sg,
1440                                   unsigned out_num)
1441 {
1442     VirtIONet *n = VIRTIO_NET(vdev);
1443     struct virtio_net_ctrl_hdr ctrl;
1444     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1445     size_t s;
1446     struct iovec *iov, *iov2;
1447 
1448     if (iov_size(in_sg, in_num) < sizeof(status) ||
1449         iov_size(out_sg, out_num) < sizeof(ctrl)) {
1450         virtio_error(vdev, "virtio-net ctrl missing headers");
1451         return 0;
1452     }
1453 
1454     iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1455     s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1456     iov_discard_front(&iov, &out_num, sizeof(ctrl));
1457     if (s != sizeof(ctrl)) {
1458         status = VIRTIO_NET_ERR;
1459     } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1460         status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1461     } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1462         status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1463     } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1464         status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1465     } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1466         status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1467     } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1468         status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1469     } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1470         status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1471     }
1472 
1473     s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1474     assert(s == sizeof(status));
1475 
1476     g_free(iov2);
1477     return sizeof(status);
1478 }
1479 
1480 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1481 {
1482     VirtQueueElement *elem;
1483 
1484     for (;;) {
1485         size_t written;
1486         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1487         if (!elem) {
1488             break;
1489         }
1490 
1491         written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1492                                              elem->out_sg, elem->out_num);
1493         if (written > 0) {
1494             virtqueue_push(vq, elem, written);
1495             virtio_notify(vdev, vq);
1496             g_free(elem);
1497         } else {
1498             virtqueue_detach_element(vq, elem, 0);
1499             g_free(elem);
1500             break;
1501         }
1502     }
1503 }
1504 
1505 /* RX */
1506 
1507 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1508 {
1509     VirtIONet *n = VIRTIO_NET(vdev);
1510     int queue_index = vq2q(virtio_get_queue_index(vq));
1511 
1512     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1513 }
1514 
1515 static bool virtio_net_can_receive(NetClientState *nc)
1516 {
1517     VirtIONet *n = qemu_get_nic_opaque(nc);
1518     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1519     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1520 
1521     if (!vdev->vm_running) {
1522         return false;
1523     }
1524 
1525     if (nc->queue_index >= n->curr_queue_pairs) {
1526         return false;
1527     }
1528 
1529     if (!virtio_queue_ready(q->rx_vq) ||
1530         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1531         return false;
1532     }
1533 
1534     return true;
1535 }
1536 
1537 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1538 {
1539     VirtIONet *n = q->n;
1540     if (virtio_queue_empty(q->rx_vq) ||
1541         (n->mergeable_rx_bufs &&
1542          !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1543         virtio_queue_set_notification(q->rx_vq, 1);
1544 
1545         /* To avoid a race condition where the guest has made some buffers
1546          * available after the above check but before notification was
1547          * enabled, check for available buffers again.
1548          */
1549         if (virtio_queue_empty(q->rx_vq) ||
1550             (n->mergeable_rx_bufs &&
1551              !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1552             return 0;
1553         }
1554     }
1555 
1556     virtio_queue_set_notification(q->rx_vq, 0);
1557     return 1;
1558 }
1559 
1560 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1561 {
1562     virtio_tswap16s(vdev, &hdr->hdr_len);
1563     virtio_tswap16s(vdev, &hdr->gso_size);
1564     virtio_tswap16s(vdev, &hdr->csum_start);
1565     virtio_tswap16s(vdev, &hdr->csum_offset);
1566 }
1567 
1568 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1569  * it never finds out that the packets don't have valid checksums.  This
1570  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1571  * fix this with Xen but it hasn't appeared in an upstream release of
1572  * dhclient yet.
1573  *
1574  * To avoid breaking existing guests, we catch udp packets and add
1575  * checksums.  This is terrible but it's better than hacking the guest
1576  * kernels.
1577  *
1578  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1579  * we should provide a mechanism to disable it to avoid polluting the host
1580  * cache.
1581  */
1582 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1583                                         uint8_t *buf, size_t size)
1584 {
1585     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1586         (size > 27 && size < 1500) && /* normal sized MTU */
1587         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1588         (buf[23] == 17) && /* ip.protocol == UDP */
1589         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1590         net_checksum_calculate(buf, size, CSUM_UDP);
1591         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1592     }
1593 }
1594 
1595 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1596                            const void *buf, size_t size)
1597 {
1598     if (n->has_vnet_hdr) {
1599         /* FIXME this cast is evil */
1600         void *wbuf = (void *)buf;
1601         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1602                                     size - n->host_hdr_len);
1603 
1604         if (n->needs_vnet_hdr_swap) {
1605             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1606         }
1607         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1608     } else {
1609         struct virtio_net_hdr hdr = {
1610             .flags = 0,
1611             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1612         };
1613         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1614     }
1615 }
1616 
1617 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1618 {
1619     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1620     static const uint8_t vlan[] = {0x81, 0x00};
1621     uint8_t *ptr = (uint8_t *)buf;
1622     int i;
1623 
1624     if (n->promisc)
1625         return 1;
1626 
1627     ptr += n->host_hdr_len;
1628 
1629     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1630         int vid = lduw_be_p(ptr + 14) & 0xfff;
1631         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1632             return 0;
1633     }
1634 
1635     if (ptr[0] & 1) { // multicast
1636         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1637             return !n->nobcast;
1638         } else if (n->nomulti) {
1639             return 0;
1640         } else if (n->allmulti || n->mac_table.multi_overflow) {
1641             return 1;
1642         }
1643 
1644         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1645             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1646                 return 1;
1647             }
1648         }
1649     } else { // unicast
1650         if (n->nouni) {
1651             return 0;
1652         } else if (n->alluni || n->mac_table.uni_overflow) {
1653             return 1;
1654         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1655             return 1;
1656         }
1657 
1658         for (i = 0; i < n->mac_table.first_multi; i++) {
1659             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1660                 return 1;
1661             }
1662         }
1663     }
1664 
1665     return 0;
1666 }
1667 
1668 static uint8_t virtio_net_get_hash_type(bool isip4,
1669                                         bool isip6,
1670                                         bool isudp,
1671                                         bool istcp,
1672                                         uint32_t types)
1673 {
1674     if (isip4) {
1675         if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1676             return NetPktRssIpV4Tcp;
1677         }
1678         if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1679             return NetPktRssIpV4Udp;
1680         }
1681         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1682             return NetPktRssIpV4;
1683         }
1684     } else if (isip6) {
1685         uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1686                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1687 
1688         if (istcp && (types & mask)) {
1689             return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1690                 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1691         }
1692         mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1693         if (isudp && (types & mask)) {
1694             return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1695                 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1696         }
1697         mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1698         if (types & mask) {
1699             return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1700                 NetPktRssIpV6Ex : NetPktRssIpV6;
1701         }
1702     }
1703     return 0xff;
1704 }
1705 
1706 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1707                                    uint32_t hash)
1708 {
1709     struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1710     hdr->hash_value = hash;
1711     hdr->hash_report = report;
1712 }
1713 
1714 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1715                                   size_t size)
1716 {
1717     VirtIONet *n = qemu_get_nic_opaque(nc);
1718     unsigned int index = nc->queue_index, new_index = index;
1719     struct NetRxPkt *pkt = n->rx_pkt;
1720     uint8_t net_hash_type;
1721     uint32_t hash;
1722     bool isip4, isip6, isudp, istcp;
1723     static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1724         VIRTIO_NET_HASH_REPORT_IPv4,
1725         VIRTIO_NET_HASH_REPORT_TCPv4,
1726         VIRTIO_NET_HASH_REPORT_TCPv6,
1727         VIRTIO_NET_HASH_REPORT_IPv6,
1728         VIRTIO_NET_HASH_REPORT_IPv6_EX,
1729         VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1730         VIRTIO_NET_HASH_REPORT_UDPv4,
1731         VIRTIO_NET_HASH_REPORT_UDPv6,
1732         VIRTIO_NET_HASH_REPORT_UDPv6_EX
1733     };
1734 
1735     net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1736                              size - n->host_hdr_len);
1737     net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1738     if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1739         istcp = isudp = false;
1740     }
1741     if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1742         istcp = isudp = false;
1743     }
1744     net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1745                                              n->rss_data.hash_types);
1746     if (net_hash_type > NetPktRssIpV6UdpEx) {
1747         if (n->rss_data.populate_hash) {
1748             virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1749         }
1750         return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1751     }
1752 
1753     hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1754 
1755     if (n->rss_data.populate_hash) {
1756         virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1757     }
1758 
1759     if (n->rss_data.redirect) {
1760         new_index = hash & (n->rss_data.indirections_len - 1);
1761         new_index = n->rss_data.indirections_table[new_index];
1762     }
1763 
1764     return (index == new_index) ? -1 : new_index;
1765 }
1766 
1767 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1768                                       size_t size, bool no_rss)
1769 {
1770     VirtIONet *n = qemu_get_nic_opaque(nc);
1771     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1772     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1773     VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1774     size_t lens[VIRTQUEUE_MAX_SIZE];
1775     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1776     struct virtio_net_hdr_mrg_rxbuf mhdr;
1777     unsigned mhdr_cnt = 0;
1778     size_t offset, i, guest_offset, j;
1779     ssize_t err;
1780 
1781     if (!virtio_net_can_receive(nc)) {
1782         return -1;
1783     }
1784 
1785     if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1786         int index = virtio_net_process_rss(nc, buf, size);
1787         if (index >= 0) {
1788             NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1789             return virtio_net_receive_rcu(nc2, buf, size, true);
1790         }
1791     }
1792 
1793     /* hdr_len refers to the header we supply to the guest */
1794     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1795         return 0;
1796     }
1797 
1798     if (!receive_filter(n, buf, size))
1799         return size;
1800 
1801     offset = i = 0;
1802 
1803     while (offset < size) {
1804         VirtQueueElement *elem;
1805         int len, total;
1806         const struct iovec *sg;
1807 
1808         total = 0;
1809 
1810         if (i == VIRTQUEUE_MAX_SIZE) {
1811             virtio_error(vdev, "virtio-net unexpected long buffer chain");
1812             err = size;
1813             goto err;
1814         }
1815 
1816         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1817         if (!elem) {
1818             if (i) {
1819                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1820                              "i %zd mergeable %d offset %zd, size %zd, "
1821                              "guest hdr len %zd, host hdr len %zd "
1822                              "guest features 0x%" PRIx64,
1823                              i, n->mergeable_rx_bufs, offset, size,
1824                              n->guest_hdr_len, n->host_hdr_len,
1825                              vdev->guest_features);
1826             }
1827             err = -1;
1828             goto err;
1829         }
1830 
1831         if (elem->in_num < 1) {
1832             virtio_error(vdev,
1833                          "virtio-net receive queue contains no in buffers");
1834             virtqueue_detach_element(q->rx_vq, elem, 0);
1835             g_free(elem);
1836             err = -1;
1837             goto err;
1838         }
1839 
1840         sg = elem->in_sg;
1841         if (i == 0) {
1842             assert(offset == 0);
1843             if (n->mergeable_rx_bufs) {
1844                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1845                                     sg, elem->in_num,
1846                                     offsetof(typeof(mhdr), num_buffers),
1847                                     sizeof(mhdr.num_buffers));
1848             }
1849 
1850             receive_header(n, sg, elem->in_num, buf, size);
1851             if (n->rss_data.populate_hash) {
1852                 offset = sizeof(mhdr);
1853                 iov_from_buf(sg, elem->in_num, offset,
1854                              buf + offset, n->host_hdr_len - sizeof(mhdr));
1855             }
1856             offset = n->host_hdr_len;
1857             total += n->guest_hdr_len;
1858             guest_offset = n->guest_hdr_len;
1859         } else {
1860             guest_offset = 0;
1861         }
1862 
1863         /* copy in packet.  ugh */
1864         len = iov_from_buf(sg, elem->in_num, guest_offset,
1865                            buf + offset, size - offset);
1866         total += len;
1867         offset += len;
1868         /* If buffers can't be merged, at this point we
1869          * must have consumed the complete packet.
1870          * Otherwise, drop it. */
1871         if (!n->mergeable_rx_bufs && offset < size) {
1872             virtqueue_unpop(q->rx_vq, elem, total);
1873             g_free(elem);
1874             err = size;
1875             goto err;
1876         }
1877 
1878         elems[i] = elem;
1879         lens[i] = total;
1880         i++;
1881     }
1882 
1883     if (mhdr_cnt) {
1884         virtio_stw_p(vdev, &mhdr.num_buffers, i);
1885         iov_from_buf(mhdr_sg, mhdr_cnt,
1886                      0,
1887                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
1888     }
1889 
1890     for (j = 0; j < i; j++) {
1891         /* signal other side */
1892         virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
1893         g_free(elems[j]);
1894     }
1895 
1896     virtqueue_flush(q->rx_vq, i);
1897     virtio_notify(vdev, q->rx_vq);
1898 
1899     return size;
1900 
1901 err:
1902     for (j = 0; j < i; j++) {
1903         virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
1904         g_free(elems[j]);
1905     }
1906 
1907     return err;
1908 }
1909 
1910 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1911                                   size_t size)
1912 {
1913     RCU_READ_LOCK_GUARD();
1914 
1915     return virtio_net_receive_rcu(nc, buf, size, false);
1916 }
1917 
1918 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1919                                          const uint8_t *buf,
1920                                          VirtioNetRscUnit *unit)
1921 {
1922     uint16_t ip_hdrlen;
1923     struct ip_header *ip;
1924 
1925     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1926                               + sizeof(struct eth_header));
1927     unit->ip = (void *)ip;
1928     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1929     unit->ip_plen = &ip->ip_len;
1930     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1931     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1932     unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1933 }
1934 
1935 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1936                                          const uint8_t *buf,
1937                                          VirtioNetRscUnit *unit)
1938 {
1939     struct ip6_header *ip6;
1940 
1941     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1942                                  + sizeof(struct eth_header));
1943     unit->ip = ip6;
1944     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1945     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1946                                         + sizeof(struct ip6_header));
1947     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1948 
1949     /* There is a difference between payload lenght in ipv4 and v6,
1950        ip header is excluded in ipv6 */
1951     unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1952 }
1953 
1954 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1955                                        VirtioNetRscSeg *seg)
1956 {
1957     int ret;
1958     struct virtio_net_hdr_v1 *h;
1959 
1960     h = (struct virtio_net_hdr_v1 *)seg->buf;
1961     h->flags = 0;
1962     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1963 
1964     if (seg->is_coalesced) {
1965         h->rsc.segments = seg->packets;
1966         h->rsc.dup_acks = seg->dup_ack;
1967         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1968         if (chain->proto == ETH_P_IP) {
1969             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1970         } else {
1971             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1972         }
1973     }
1974 
1975     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1976     QTAILQ_REMOVE(&chain->buffers, seg, next);
1977     g_free(seg->buf);
1978     g_free(seg);
1979 
1980     return ret;
1981 }
1982 
1983 static void virtio_net_rsc_purge(void *opq)
1984 {
1985     VirtioNetRscSeg *seg, *rn;
1986     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1987 
1988     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1989         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1990             chain->stat.purge_failed++;
1991             continue;
1992         }
1993     }
1994 
1995     chain->stat.timer++;
1996     if (!QTAILQ_EMPTY(&chain->buffers)) {
1997         timer_mod(chain->drain_timer,
1998               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1999     }
2000 }
2001 
2002 static void virtio_net_rsc_cleanup(VirtIONet *n)
2003 {
2004     VirtioNetRscChain *chain, *rn_chain;
2005     VirtioNetRscSeg *seg, *rn_seg;
2006 
2007     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2008         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2009             QTAILQ_REMOVE(&chain->buffers, seg, next);
2010             g_free(seg->buf);
2011             g_free(seg);
2012         }
2013 
2014         timer_free(chain->drain_timer);
2015         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2016         g_free(chain);
2017     }
2018 }
2019 
2020 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2021                                      NetClientState *nc,
2022                                      const uint8_t *buf, size_t size)
2023 {
2024     uint16_t hdr_len;
2025     VirtioNetRscSeg *seg;
2026 
2027     hdr_len = chain->n->guest_hdr_len;
2028     seg = g_new(VirtioNetRscSeg, 1);
2029     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2030         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2031     memcpy(seg->buf, buf, size);
2032     seg->size = size;
2033     seg->packets = 1;
2034     seg->dup_ack = 0;
2035     seg->is_coalesced = 0;
2036     seg->nc = nc;
2037 
2038     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2039     chain->stat.cache++;
2040 
2041     switch (chain->proto) {
2042     case ETH_P_IP:
2043         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2044         break;
2045     case ETH_P_IPV6:
2046         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2047         break;
2048     default:
2049         g_assert_not_reached();
2050     }
2051 }
2052 
2053 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2054                                          VirtioNetRscSeg *seg,
2055                                          const uint8_t *buf,
2056                                          struct tcp_header *n_tcp,
2057                                          struct tcp_header *o_tcp)
2058 {
2059     uint32_t nack, oack;
2060     uint16_t nwin, owin;
2061 
2062     nack = htonl(n_tcp->th_ack);
2063     nwin = htons(n_tcp->th_win);
2064     oack = htonl(o_tcp->th_ack);
2065     owin = htons(o_tcp->th_win);
2066 
2067     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2068         chain->stat.ack_out_of_win++;
2069         return RSC_FINAL;
2070     } else if (nack == oack) {
2071         /* duplicated ack or window probe */
2072         if (nwin == owin) {
2073             /* duplicated ack, add dup ack count due to whql test up to 1 */
2074             chain->stat.dup_ack++;
2075             return RSC_FINAL;
2076         } else {
2077             /* Coalesce window update */
2078             o_tcp->th_win = n_tcp->th_win;
2079             chain->stat.win_update++;
2080             return RSC_COALESCE;
2081         }
2082     } else {
2083         /* pure ack, go to 'C', finalize*/
2084         chain->stat.pure_ack++;
2085         return RSC_FINAL;
2086     }
2087 }
2088 
2089 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2090                                             VirtioNetRscSeg *seg,
2091                                             const uint8_t *buf,
2092                                             VirtioNetRscUnit *n_unit)
2093 {
2094     void *data;
2095     uint16_t o_ip_len;
2096     uint32_t nseq, oseq;
2097     VirtioNetRscUnit *o_unit;
2098 
2099     o_unit = &seg->unit;
2100     o_ip_len = htons(*o_unit->ip_plen);
2101     nseq = htonl(n_unit->tcp->th_seq);
2102     oseq = htonl(o_unit->tcp->th_seq);
2103 
2104     /* out of order or retransmitted. */
2105     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2106         chain->stat.data_out_of_win++;
2107         return RSC_FINAL;
2108     }
2109 
2110     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2111     if (nseq == oseq) {
2112         if ((o_unit->payload == 0) && n_unit->payload) {
2113             /* From no payload to payload, normal case, not a dup ack or etc */
2114             chain->stat.data_after_pure_ack++;
2115             goto coalesce;
2116         } else {
2117             return virtio_net_rsc_handle_ack(chain, seg, buf,
2118                                              n_unit->tcp, o_unit->tcp);
2119         }
2120     } else if ((nseq - oseq) != o_unit->payload) {
2121         /* Not a consistent packet, out of order */
2122         chain->stat.data_out_of_order++;
2123         return RSC_FINAL;
2124     } else {
2125 coalesce:
2126         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2127             chain->stat.over_size++;
2128             return RSC_FINAL;
2129         }
2130 
2131         /* Here comes the right data, the payload length in v4/v6 is different,
2132            so use the field value to update and record the new data len */
2133         o_unit->payload += n_unit->payload; /* update new data len */
2134 
2135         /* update field in ip header */
2136         *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2137 
2138         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2139            for windows guest, while this may change the behavior for linux
2140            guest (only if it uses RSC feature). */
2141         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2142 
2143         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2144         o_unit->tcp->th_win = n_unit->tcp->th_win;
2145 
2146         memmove(seg->buf + seg->size, data, n_unit->payload);
2147         seg->size += n_unit->payload;
2148         seg->packets++;
2149         chain->stat.coalesced++;
2150         return RSC_COALESCE;
2151     }
2152 }
2153 
2154 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2155                                         VirtioNetRscSeg *seg,
2156                                         const uint8_t *buf, size_t size,
2157                                         VirtioNetRscUnit *unit)
2158 {
2159     struct ip_header *ip1, *ip2;
2160 
2161     ip1 = (struct ip_header *)(unit->ip);
2162     ip2 = (struct ip_header *)(seg->unit.ip);
2163     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2164         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2165         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2166         chain->stat.no_match++;
2167         return RSC_NO_MATCH;
2168     }
2169 
2170     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2171 }
2172 
2173 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2174                                         VirtioNetRscSeg *seg,
2175                                         const uint8_t *buf, size_t size,
2176                                         VirtioNetRscUnit *unit)
2177 {
2178     struct ip6_header *ip1, *ip2;
2179 
2180     ip1 = (struct ip6_header *)(unit->ip);
2181     ip2 = (struct ip6_header *)(seg->unit.ip);
2182     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2183         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2184         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2185         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2186             chain->stat.no_match++;
2187             return RSC_NO_MATCH;
2188     }
2189 
2190     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2191 }
2192 
2193 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2194  * to prevent out of order */
2195 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2196                                          struct tcp_header *tcp)
2197 {
2198     uint16_t tcp_hdr;
2199     uint16_t tcp_flag;
2200 
2201     tcp_flag = htons(tcp->th_offset_flags);
2202     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2203     tcp_flag &= VIRTIO_NET_TCP_FLAG;
2204     if (tcp_flag & TH_SYN) {
2205         chain->stat.tcp_syn++;
2206         return RSC_BYPASS;
2207     }
2208 
2209     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2210         chain->stat.tcp_ctrl_drain++;
2211         return RSC_FINAL;
2212     }
2213 
2214     if (tcp_hdr > sizeof(struct tcp_header)) {
2215         chain->stat.tcp_all_opt++;
2216         return RSC_FINAL;
2217     }
2218 
2219     return RSC_CANDIDATE;
2220 }
2221 
2222 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2223                                          NetClientState *nc,
2224                                          const uint8_t *buf, size_t size,
2225                                          VirtioNetRscUnit *unit)
2226 {
2227     int ret;
2228     VirtioNetRscSeg *seg, *nseg;
2229 
2230     if (QTAILQ_EMPTY(&chain->buffers)) {
2231         chain->stat.empty_cache++;
2232         virtio_net_rsc_cache_buf(chain, nc, buf, size);
2233         timer_mod(chain->drain_timer,
2234               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2235         return size;
2236     }
2237 
2238     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2239         if (chain->proto == ETH_P_IP) {
2240             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2241         } else {
2242             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2243         }
2244 
2245         if (ret == RSC_FINAL) {
2246             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2247                 /* Send failed */
2248                 chain->stat.final_failed++;
2249                 return 0;
2250             }
2251 
2252             /* Send current packet */
2253             return virtio_net_do_receive(nc, buf, size);
2254         } else if (ret == RSC_NO_MATCH) {
2255             continue;
2256         } else {
2257             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2258             seg->is_coalesced = 1;
2259             return size;
2260         }
2261     }
2262 
2263     chain->stat.no_match_cache++;
2264     virtio_net_rsc_cache_buf(chain, nc, buf, size);
2265     return size;
2266 }
2267 
2268 /* Drain a connection data, this is to avoid out of order segments */
2269 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2270                                         NetClientState *nc,
2271                                         const uint8_t *buf, size_t size,
2272                                         uint16_t ip_start, uint16_t ip_size,
2273                                         uint16_t tcp_port)
2274 {
2275     VirtioNetRscSeg *seg, *nseg;
2276     uint32_t ppair1, ppair2;
2277 
2278     ppair1 = *(uint32_t *)(buf + tcp_port);
2279     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2280         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2281         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2282             || (ppair1 != ppair2)) {
2283             continue;
2284         }
2285         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2286             chain->stat.drain_failed++;
2287         }
2288 
2289         break;
2290     }
2291 
2292     return virtio_net_do_receive(nc, buf, size);
2293 }
2294 
2295 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2296                                             struct ip_header *ip,
2297                                             const uint8_t *buf, size_t size)
2298 {
2299     uint16_t ip_len;
2300 
2301     /* Not an ipv4 packet */
2302     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2303         chain->stat.ip_option++;
2304         return RSC_BYPASS;
2305     }
2306 
2307     /* Don't handle packets with ip option */
2308     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2309         chain->stat.ip_option++;
2310         return RSC_BYPASS;
2311     }
2312 
2313     if (ip->ip_p != IPPROTO_TCP) {
2314         chain->stat.bypass_not_tcp++;
2315         return RSC_BYPASS;
2316     }
2317 
2318     /* Don't handle packets with ip fragment */
2319     if (!(htons(ip->ip_off) & IP_DF)) {
2320         chain->stat.ip_frag++;
2321         return RSC_BYPASS;
2322     }
2323 
2324     /* Don't handle packets with ecn flag */
2325     if (IPTOS_ECN(ip->ip_tos)) {
2326         chain->stat.ip_ecn++;
2327         return RSC_BYPASS;
2328     }
2329 
2330     ip_len = htons(ip->ip_len);
2331     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2332         || ip_len > (size - chain->n->guest_hdr_len -
2333                      sizeof(struct eth_header))) {
2334         chain->stat.ip_hacked++;
2335         return RSC_BYPASS;
2336     }
2337 
2338     return RSC_CANDIDATE;
2339 }
2340 
2341 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2342                                       NetClientState *nc,
2343                                       const uint8_t *buf, size_t size)
2344 {
2345     int32_t ret;
2346     uint16_t hdr_len;
2347     VirtioNetRscUnit unit;
2348 
2349     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2350 
2351     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2352         + sizeof(struct tcp_header))) {
2353         chain->stat.bypass_not_tcp++;
2354         return virtio_net_do_receive(nc, buf, size);
2355     }
2356 
2357     virtio_net_rsc_extract_unit4(chain, buf, &unit);
2358     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2359         != RSC_CANDIDATE) {
2360         return virtio_net_do_receive(nc, buf, size);
2361     }
2362 
2363     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2364     if (ret == RSC_BYPASS) {
2365         return virtio_net_do_receive(nc, buf, size);
2366     } else if (ret == RSC_FINAL) {
2367         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2368                 ((hdr_len + sizeof(struct eth_header)) + 12),
2369                 VIRTIO_NET_IP4_ADDR_SIZE,
2370                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2371     }
2372 
2373     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2374 }
2375 
2376 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2377                                             struct ip6_header *ip6,
2378                                             const uint8_t *buf, size_t size)
2379 {
2380     uint16_t ip_len;
2381 
2382     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2383         != IP_HEADER_VERSION_6) {
2384         return RSC_BYPASS;
2385     }
2386 
2387     /* Both option and protocol is checked in this */
2388     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2389         chain->stat.bypass_not_tcp++;
2390         return RSC_BYPASS;
2391     }
2392 
2393     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2394     if (ip_len < sizeof(struct tcp_header) ||
2395         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2396                   - sizeof(struct ip6_header))) {
2397         chain->stat.ip_hacked++;
2398         return RSC_BYPASS;
2399     }
2400 
2401     /* Don't handle packets with ecn flag */
2402     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2403         chain->stat.ip_ecn++;
2404         return RSC_BYPASS;
2405     }
2406 
2407     return RSC_CANDIDATE;
2408 }
2409 
2410 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2411                                       const uint8_t *buf, size_t size)
2412 {
2413     int32_t ret;
2414     uint16_t hdr_len;
2415     VirtioNetRscChain *chain;
2416     VirtioNetRscUnit unit;
2417 
2418     chain = (VirtioNetRscChain *)opq;
2419     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2420 
2421     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2422         + sizeof(tcp_header))) {
2423         return virtio_net_do_receive(nc, buf, size);
2424     }
2425 
2426     virtio_net_rsc_extract_unit6(chain, buf, &unit);
2427     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2428                                                  unit.ip, buf, size)) {
2429         return virtio_net_do_receive(nc, buf, size);
2430     }
2431 
2432     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2433     if (ret == RSC_BYPASS) {
2434         return virtio_net_do_receive(nc, buf, size);
2435     } else if (ret == RSC_FINAL) {
2436         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2437                 ((hdr_len + sizeof(struct eth_header)) + 8),
2438                 VIRTIO_NET_IP6_ADDR_SIZE,
2439                 hdr_len + sizeof(struct eth_header)
2440                 + sizeof(struct ip6_header));
2441     }
2442 
2443     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2444 }
2445 
2446 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2447                                                       NetClientState *nc,
2448                                                       uint16_t proto)
2449 {
2450     VirtioNetRscChain *chain;
2451 
2452     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2453         return NULL;
2454     }
2455 
2456     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2457         if (chain->proto == proto) {
2458             return chain;
2459         }
2460     }
2461 
2462     chain = g_malloc(sizeof(*chain));
2463     chain->n = n;
2464     chain->proto = proto;
2465     if (proto == (uint16_t)ETH_P_IP) {
2466         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2467         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2468     } else {
2469         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2470         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2471     }
2472     chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2473                                       virtio_net_rsc_purge, chain);
2474     memset(&chain->stat, 0, sizeof(chain->stat));
2475 
2476     QTAILQ_INIT(&chain->buffers);
2477     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2478 
2479     return chain;
2480 }
2481 
2482 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2483                                       const uint8_t *buf,
2484                                       size_t size)
2485 {
2486     uint16_t proto;
2487     VirtioNetRscChain *chain;
2488     struct eth_header *eth;
2489     VirtIONet *n;
2490 
2491     n = qemu_get_nic_opaque(nc);
2492     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2493         return virtio_net_do_receive(nc, buf, size);
2494     }
2495 
2496     eth = (struct eth_header *)(buf + n->guest_hdr_len);
2497     proto = htons(eth->h_proto);
2498 
2499     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2500     if (chain) {
2501         chain->stat.received++;
2502         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2503             return virtio_net_rsc_receive4(chain, nc, buf, size);
2504         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2505             return virtio_net_rsc_receive6(chain, nc, buf, size);
2506         }
2507     }
2508     return virtio_net_do_receive(nc, buf, size);
2509 }
2510 
2511 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2512                                   size_t size)
2513 {
2514     VirtIONet *n = qemu_get_nic_opaque(nc);
2515     if ((n->rsc4_enabled || n->rsc6_enabled)) {
2516         return virtio_net_rsc_receive(nc, buf, size);
2517     } else {
2518         return virtio_net_do_receive(nc, buf, size);
2519     }
2520 }
2521 
2522 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2523 
2524 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2525 {
2526     VirtIONet *n = qemu_get_nic_opaque(nc);
2527     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2528     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2529 
2530     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2531     virtio_notify(vdev, q->tx_vq);
2532 
2533     g_free(q->async_tx.elem);
2534     q->async_tx.elem = NULL;
2535 
2536     virtio_queue_set_notification(q->tx_vq, 1);
2537     virtio_net_flush_tx(q);
2538 }
2539 
2540 /* TX */
2541 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2542 {
2543     VirtIONet *n = q->n;
2544     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2545     VirtQueueElement *elem;
2546     int32_t num_packets = 0;
2547     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2548     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2549         return num_packets;
2550     }
2551 
2552     if (q->async_tx.elem) {
2553         virtio_queue_set_notification(q->tx_vq, 0);
2554         return num_packets;
2555     }
2556 
2557     for (;;) {
2558         ssize_t ret;
2559         unsigned int out_num;
2560         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2561         struct virtio_net_hdr_mrg_rxbuf mhdr;
2562 
2563         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2564         if (!elem) {
2565             break;
2566         }
2567 
2568         out_num = elem->out_num;
2569         out_sg = elem->out_sg;
2570         if (out_num < 1) {
2571             virtio_error(vdev, "virtio-net header not in first element");
2572             virtqueue_detach_element(q->tx_vq, elem, 0);
2573             g_free(elem);
2574             return -EINVAL;
2575         }
2576 
2577         if (n->has_vnet_hdr) {
2578             if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2579                 n->guest_hdr_len) {
2580                 virtio_error(vdev, "virtio-net header incorrect");
2581                 virtqueue_detach_element(q->tx_vq, elem, 0);
2582                 g_free(elem);
2583                 return -EINVAL;
2584             }
2585             if (n->needs_vnet_hdr_swap) {
2586                 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2587                 sg2[0].iov_base = &mhdr;
2588                 sg2[0].iov_len = n->guest_hdr_len;
2589                 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2590                                    out_sg, out_num,
2591                                    n->guest_hdr_len, -1);
2592                 if (out_num == VIRTQUEUE_MAX_SIZE) {
2593                     goto drop;
2594                 }
2595                 out_num += 1;
2596                 out_sg = sg2;
2597             }
2598         }
2599         /*
2600          * If host wants to see the guest header as is, we can
2601          * pass it on unchanged. Otherwise, copy just the parts
2602          * that host is interested in.
2603          */
2604         assert(n->host_hdr_len <= n->guest_hdr_len);
2605         if (n->host_hdr_len != n->guest_hdr_len) {
2606             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2607                                        out_sg, out_num,
2608                                        0, n->host_hdr_len);
2609             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2610                              out_sg, out_num,
2611                              n->guest_hdr_len, -1);
2612             out_num = sg_num;
2613             out_sg = sg;
2614         }
2615 
2616         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2617                                       out_sg, out_num, virtio_net_tx_complete);
2618         if (ret == 0) {
2619             virtio_queue_set_notification(q->tx_vq, 0);
2620             q->async_tx.elem = elem;
2621             return -EBUSY;
2622         }
2623 
2624 drop:
2625         virtqueue_push(q->tx_vq, elem, 0);
2626         virtio_notify(vdev, q->tx_vq);
2627         g_free(elem);
2628 
2629         if (++num_packets >= n->tx_burst) {
2630             break;
2631         }
2632     }
2633     return num_packets;
2634 }
2635 
2636 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2637 {
2638     VirtIONet *n = VIRTIO_NET(vdev);
2639     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2640 
2641     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2642         virtio_net_drop_tx_queue_data(vdev, vq);
2643         return;
2644     }
2645 
2646     /* This happens when device was stopped but VCPU wasn't. */
2647     if (!vdev->vm_running) {
2648         q->tx_waiting = 1;
2649         return;
2650     }
2651 
2652     if (q->tx_waiting) {
2653         virtio_queue_set_notification(vq, 1);
2654         timer_del(q->tx_timer);
2655         q->tx_waiting = 0;
2656         if (virtio_net_flush_tx(q) == -EINVAL) {
2657             return;
2658         }
2659     } else {
2660         timer_mod(q->tx_timer,
2661                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2662         q->tx_waiting = 1;
2663         virtio_queue_set_notification(vq, 0);
2664     }
2665 }
2666 
2667 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2668 {
2669     VirtIONet *n = VIRTIO_NET(vdev);
2670     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2671 
2672     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2673         virtio_net_drop_tx_queue_data(vdev, vq);
2674         return;
2675     }
2676 
2677     if (unlikely(q->tx_waiting)) {
2678         return;
2679     }
2680     q->tx_waiting = 1;
2681     /* This happens when device was stopped but VCPU wasn't. */
2682     if (!vdev->vm_running) {
2683         return;
2684     }
2685     virtio_queue_set_notification(vq, 0);
2686     qemu_bh_schedule(q->tx_bh);
2687 }
2688 
2689 static void virtio_net_tx_timer(void *opaque)
2690 {
2691     VirtIONetQueue *q = opaque;
2692     VirtIONet *n = q->n;
2693     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2694     /* This happens when device was stopped but BH wasn't. */
2695     if (!vdev->vm_running) {
2696         /* Make sure tx waiting is set, so we'll run when restarted. */
2697         assert(q->tx_waiting);
2698         return;
2699     }
2700 
2701     q->tx_waiting = 0;
2702 
2703     /* Just in case the driver is not ready on more */
2704     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2705         return;
2706     }
2707 
2708     virtio_queue_set_notification(q->tx_vq, 1);
2709     virtio_net_flush_tx(q);
2710 }
2711 
2712 static void virtio_net_tx_bh(void *opaque)
2713 {
2714     VirtIONetQueue *q = opaque;
2715     VirtIONet *n = q->n;
2716     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2717     int32_t ret;
2718 
2719     /* This happens when device was stopped but BH wasn't. */
2720     if (!vdev->vm_running) {
2721         /* Make sure tx waiting is set, so we'll run when restarted. */
2722         assert(q->tx_waiting);
2723         return;
2724     }
2725 
2726     q->tx_waiting = 0;
2727 
2728     /* Just in case the driver is not ready on more */
2729     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2730         return;
2731     }
2732 
2733     ret = virtio_net_flush_tx(q);
2734     if (ret == -EBUSY || ret == -EINVAL) {
2735         return; /* Notification re-enable handled by tx_complete or device
2736                  * broken */
2737     }
2738 
2739     /* If we flush a full burst of packets, assume there are
2740      * more coming and immediately reschedule */
2741     if (ret >= n->tx_burst) {
2742         qemu_bh_schedule(q->tx_bh);
2743         q->tx_waiting = 1;
2744         return;
2745     }
2746 
2747     /* If less than a full burst, re-enable notification and flush
2748      * anything that may have come in while we weren't looking.  If
2749      * we find something, assume the guest is still active and reschedule */
2750     virtio_queue_set_notification(q->tx_vq, 1);
2751     ret = virtio_net_flush_tx(q);
2752     if (ret == -EINVAL) {
2753         return;
2754     } else if (ret > 0) {
2755         virtio_queue_set_notification(q->tx_vq, 0);
2756         qemu_bh_schedule(q->tx_bh);
2757         q->tx_waiting = 1;
2758     }
2759 }
2760 
2761 static void virtio_net_add_queue(VirtIONet *n, int index)
2762 {
2763     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2764 
2765     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2766                                            virtio_net_handle_rx);
2767 
2768     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2769         n->vqs[index].tx_vq =
2770             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2771                              virtio_net_handle_tx_timer);
2772         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2773                                               virtio_net_tx_timer,
2774                                               &n->vqs[index]);
2775     } else {
2776         n->vqs[index].tx_vq =
2777             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2778                              virtio_net_handle_tx_bh);
2779         n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2780     }
2781 
2782     n->vqs[index].tx_waiting = 0;
2783     n->vqs[index].n = n;
2784 }
2785 
2786 static void virtio_net_del_queue(VirtIONet *n, int index)
2787 {
2788     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2789     VirtIONetQueue *q = &n->vqs[index];
2790     NetClientState *nc = qemu_get_subqueue(n->nic, index);
2791 
2792     qemu_purge_queued_packets(nc);
2793 
2794     virtio_del_queue(vdev, index * 2);
2795     if (q->tx_timer) {
2796         timer_free(q->tx_timer);
2797         q->tx_timer = NULL;
2798     } else {
2799         qemu_bh_delete(q->tx_bh);
2800         q->tx_bh = NULL;
2801     }
2802     q->tx_waiting = 0;
2803     virtio_del_queue(vdev, index * 2 + 1);
2804 }
2805 
2806 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
2807 {
2808     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2809     int old_num_queues = virtio_get_num_queues(vdev);
2810     int new_num_queues = new_max_queue_pairs * 2 + 1;
2811     int i;
2812 
2813     assert(old_num_queues >= 3);
2814     assert(old_num_queues % 2 == 1);
2815 
2816     if (old_num_queues == new_num_queues) {
2817         return;
2818     }
2819 
2820     /*
2821      * We always need to remove and add ctrl vq if
2822      * old_num_queues != new_num_queues. Remove ctrl_vq first,
2823      * and then we only enter one of the following two loops.
2824      */
2825     virtio_del_queue(vdev, old_num_queues - 1);
2826 
2827     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2828         /* new_num_queues < old_num_queues */
2829         virtio_net_del_queue(n, i / 2);
2830     }
2831 
2832     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2833         /* new_num_queues > old_num_queues */
2834         virtio_net_add_queue(n, i / 2);
2835     }
2836 
2837     /* add ctrl_vq last */
2838     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2839 }
2840 
2841 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2842 {
2843     int max = multiqueue ? n->max_queue_pairs : 1;
2844 
2845     n->multiqueue = multiqueue;
2846     virtio_net_change_num_queue_pairs(n, max);
2847 
2848     virtio_net_set_queue_pairs(n);
2849 }
2850 
2851 static int virtio_net_post_load_device(void *opaque, int version_id)
2852 {
2853     VirtIONet *n = opaque;
2854     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2855     int i, link_down;
2856 
2857     trace_virtio_net_post_load_device();
2858     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2859                                virtio_vdev_has_feature(vdev,
2860                                                        VIRTIO_F_VERSION_1),
2861                                virtio_vdev_has_feature(vdev,
2862                                                        VIRTIO_NET_F_HASH_REPORT));
2863 
2864     /* MAC_TABLE_ENTRIES may be different from the saved image */
2865     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2866         n->mac_table.in_use = 0;
2867     }
2868 
2869     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2870         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2871     }
2872 
2873     /*
2874      * curr_guest_offloads will be later overwritten by the
2875      * virtio_set_features_nocheck call done from the virtio_load.
2876      * Here we make sure it is preserved and restored accordingly
2877      * in the virtio_net_post_load_virtio callback.
2878      */
2879     n->saved_guest_offloads = n->curr_guest_offloads;
2880 
2881     virtio_net_set_queue_pairs(n);
2882 
2883     /* Find the first multicast entry in the saved MAC filter */
2884     for (i = 0; i < n->mac_table.in_use; i++) {
2885         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2886             break;
2887         }
2888     }
2889     n->mac_table.first_multi = i;
2890 
2891     /* nc.link_down can't be migrated, so infer link_down according
2892      * to link status bit in n->status */
2893     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2894     for (i = 0; i < n->max_queue_pairs; i++) {
2895         qemu_get_subqueue(n->nic, i)->link_down = link_down;
2896     }
2897 
2898     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2899         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2900         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2901                                   QEMU_CLOCK_VIRTUAL,
2902                                   virtio_net_announce_timer, n);
2903         if (n->announce_timer.round) {
2904             timer_mod(n->announce_timer.tm,
2905                       qemu_clock_get_ms(n->announce_timer.type));
2906         } else {
2907             qemu_announce_timer_del(&n->announce_timer, false);
2908         }
2909     }
2910 
2911     if (n->rss_data.enabled) {
2912         n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
2913         if (!n->rss_data.populate_hash) {
2914             if (!virtio_net_attach_epbf_rss(n)) {
2915                 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
2916                     warn_report("Can't post-load eBPF RSS for vhost");
2917                 } else {
2918                     warn_report("Can't post-load eBPF RSS - "
2919                                 "fallback to software RSS");
2920                     n->rss_data.enabled_software_rss = true;
2921                 }
2922             }
2923         }
2924 
2925         trace_virtio_net_rss_enable(n->rss_data.hash_types,
2926                                     n->rss_data.indirections_len,
2927                                     sizeof(n->rss_data.key));
2928     } else {
2929         trace_virtio_net_rss_disable();
2930     }
2931     return 0;
2932 }
2933 
2934 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2935 {
2936     VirtIONet *n = VIRTIO_NET(vdev);
2937     /*
2938      * The actual needed state is now in saved_guest_offloads,
2939      * see virtio_net_post_load_device for detail.
2940      * Restore it back and apply the desired offloads.
2941      */
2942     n->curr_guest_offloads = n->saved_guest_offloads;
2943     if (peer_has_vnet_hdr(n)) {
2944         virtio_net_apply_guest_offloads(n);
2945     }
2946 
2947     return 0;
2948 }
2949 
2950 /* tx_waiting field of a VirtIONetQueue */
2951 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2952     .name = "virtio-net-queue-tx_waiting",
2953     .fields = (VMStateField[]) {
2954         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2955         VMSTATE_END_OF_LIST()
2956    },
2957 };
2958 
2959 static bool max_queue_pairs_gt_1(void *opaque, int version_id)
2960 {
2961     return VIRTIO_NET(opaque)->max_queue_pairs > 1;
2962 }
2963 
2964 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2965 {
2966     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2967                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2968 }
2969 
2970 static bool mac_table_fits(void *opaque, int version_id)
2971 {
2972     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2973 }
2974 
2975 static bool mac_table_doesnt_fit(void *opaque, int version_id)
2976 {
2977     return !mac_table_fits(opaque, version_id);
2978 }
2979 
2980 /* This temporary type is shared by all the WITH_TMP methods
2981  * although only some fields are used by each.
2982  */
2983 struct VirtIONetMigTmp {
2984     VirtIONet      *parent;
2985     VirtIONetQueue *vqs_1;
2986     uint16_t        curr_queue_pairs_1;
2987     uint8_t         has_ufo;
2988     uint32_t        has_vnet_hdr;
2989 };
2990 
2991 /* The 2nd and subsequent tx_waiting flags are loaded later than
2992  * the 1st entry in the queue_pairs and only if there's more than one
2993  * entry.  We use the tmp mechanism to calculate a temporary
2994  * pointer and count and also validate the count.
2995  */
2996 
2997 static int virtio_net_tx_waiting_pre_save(void *opaque)
2998 {
2999     struct VirtIONetMigTmp *tmp = opaque;
3000 
3001     tmp->vqs_1 = tmp->parent->vqs + 1;
3002     tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3003     if (tmp->parent->curr_queue_pairs == 0) {
3004         tmp->curr_queue_pairs_1 = 0;
3005     }
3006 
3007     return 0;
3008 }
3009 
3010 static int virtio_net_tx_waiting_pre_load(void *opaque)
3011 {
3012     struct VirtIONetMigTmp *tmp = opaque;
3013 
3014     /* Reuse the pointer setup from save */
3015     virtio_net_tx_waiting_pre_save(opaque);
3016 
3017     if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3018         error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3019             tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
3020 
3021         return -EINVAL;
3022     }
3023 
3024     return 0; /* all good */
3025 }
3026 
3027 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3028     .name      = "virtio-net-tx_waiting",
3029     .pre_load  = virtio_net_tx_waiting_pre_load,
3030     .pre_save  = virtio_net_tx_waiting_pre_save,
3031     .fields    = (VMStateField[]) {
3032         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3033                                      curr_queue_pairs_1,
3034                                      vmstate_virtio_net_queue_tx_waiting,
3035                                      struct VirtIONetQueue),
3036         VMSTATE_END_OF_LIST()
3037     },
3038 };
3039 
3040 /* the 'has_ufo' flag is just tested; if the incoming stream has the
3041  * flag set we need to check that we have it
3042  */
3043 static int virtio_net_ufo_post_load(void *opaque, int version_id)
3044 {
3045     struct VirtIONetMigTmp *tmp = opaque;
3046 
3047     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3048         error_report("virtio-net: saved image requires TUN_F_UFO support");
3049         return -EINVAL;
3050     }
3051 
3052     return 0;
3053 }
3054 
3055 static int virtio_net_ufo_pre_save(void *opaque)
3056 {
3057     struct VirtIONetMigTmp *tmp = opaque;
3058 
3059     tmp->has_ufo = tmp->parent->has_ufo;
3060 
3061     return 0;
3062 }
3063 
3064 static const VMStateDescription vmstate_virtio_net_has_ufo = {
3065     .name      = "virtio-net-ufo",
3066     .post_load = virtio_net_ufo_post_load,
3067     .pre_save  = virtio_net_ufo_pre_save,
3068     .fields    = (VMStateField[]) {
3069         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3070         VMSTATE_END_OF_LIST()
3071     },
3072 };
3073 
3074 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3075  * flag set we need to check that we have it
3076  */
3077 static int virtio_net_vnet_post_load(void *opaque, int version_id)
3078 {
3079     struct VirtIONetMigTmp *tmp = opaque;
3080 
3081     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3082         error_report("virtio-net: saved image requires vnet_hdr=on");
3083         return -EINVAL;
3084     }
3085 
3086     return 0;
3087 }
3088 
3089 static int virtio_net_vnet_pre_save(void *opaque)
3090 {
3091     struct VirtIONetMigTmp *tmp = opaque;
3092 
3093     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3094 
3095     return 0;
3096 }
3097 
3098 static const VMStateDescription vmstate_virtio_net_has_vnet = {
3099     .name      = "virtio-net-vnet",
3100     .post_load = virtio_net_vnet_post_load,
3101     .pre_save  = virtio_net_vnet_pre_save,
3102     .fields    = (VMStateField[]) {
3103         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3104         VMSTATE_END_OF_LIST()
3105     },
3106 };
3107 
3108 static bool virtio_net_rss_needed(void *opaque)
3109 {
3110     return VIRTIO_NET(opaque)->rss_data.enabled;
3111 }
3112 
3113 static const VMStateDescription vmstate_virtio_net_rss = {
3114     .name      = "virtio-net-device/rss",
3115     .version_id = 1,
3116     .minimum_version_id = 1,
3117     .needed = virtio_net_rss_needed,
3118     .fields = (VMStateField[]) {
3119         VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3120         VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3121         VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3122         VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3123         VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3124         VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3125         VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3126                             VIRTIO_NET_RSS_MAX_KEY_SIZE),
3127         VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3128                                     rss_data.indirections_len, 0,
3129                                     vmstate_info_uint16, uint16_t),
3130         VMSTATE_END_OF_LIST()
3131     },
3132 };
3133 
3134 static const VMStateDescription vmstate_virtio_net_device = {
3135     .name = "virtio-net-device",
3136     .version_id = VIRTIO_NET_VM_VERSION,
3137     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3138     .post_load = virtio_net_post_load_device,
3139     .fields = (VMStateField[]) {
3140         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3141         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3142                                vmstate_virtio_net_queue_tx_waiting,
3143                                VirtIONetQueue),
3144         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3145         VMSTATE_UINT16(status, VirtIONet),
3146         VMSTATE_UINT8(promisc, VirtIONet),
3147         VMSTATE_UINT8(allmulti, VirtIONet),
3148         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3149 
3150         /* Guarded pair: If it fits we load it, else we throw it away
3151          * - can happen if source has a larger MAC table.; post-load
3152          *  sets flags in this case.
3153          */
3154         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3155                                 0, mac_table_fits, mac_table.in_use,
3156                                  ETH_ALEN),
3157         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3158                                      mac_table.in_use, ETH_ALEN),
3159 
3160         /* Note: This is an array of uint32's that's always been saved as a
3161          * buffer; hold onto your endiannesses; it's actually used as a bitmap
3162          * but based on the uint.
3163          */
3164         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3165         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3166                          vmstate_virtio_net_has_vnet),
3167         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3168         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3169         VMSTATE_UINT8(alluni, VirtIONet),
3170         VMSTATE_UINT8(nomulti, VirtIONet),
3171         VMSTATE_UINT8(nouni, VirtIONet),
3172         VMSTATE_UINT8(nobcast, VirtIONet),
3173         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3174                          vmstate_virtio_net_has_ufo),
3175         VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3176                             vmstate_info_uint16_equal, uint16_t),
3177         VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3178         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3179                          vmstate_virtio_net_tx_waiting),
3180         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3181                             has_ctrl_guest_offloads),
3182         VMSTATE_END_OF_LIST()
3183    },
3184     .subsections = (const VMStateDescription * []) {
3185         &vmstate_virtio_net_rss,
3186         NULL
3187     }
3188 };
3189 
3190 static NetClientInfo net_virtio_info = {
3191     .type = NET_CLIENT_DRIVER_NIC,
3192     .size = sizeof(NICState),
3193     .can_receive = virtio_net_can_receive,
3194     .receive = virtio_net_receive,
3195     .link_status_changed = virtio_net_set_link_status,
3196     .query_rx_filter = virtio_net_query_rxfilter,
3197     .announce = virtio_net_announce,
3198 };
3199 
3200 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3201 {
3202     VirtIONet *n = VIRTIO_NET(vdev);
3203     NetClientState *nc;
3204     assert(n->vhost_started);
3205     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3206         /* Must guard against invalid features and bogus queue index
3207          * from being set by malicious guest, or penetrated through
3208          * buggy migration stream.
3209          */
3210         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3211             qemu_log_mask(LOG_GUEST_ERROR,
3212                           "%s: bogus vq index ignored\n", __func__);
3213             return false;
3214         }
3215         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3216     } else {
3217         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3218     }
3219     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3220 }
3221 
3222 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3223                                            bool mask)
3224 {
3225     VirtIONet *n = VIRTIO_NET(vdev);
3226     NetClientState *nc;
3227     assert(n->vhost_started);
3228     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3229         /* Must guard against invalid features and bogus queue index
3230          * from being set by malicious guest, or penetrated through
3231          * buggy migration stream.
3232          */
3233         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3234             qemu_log_mask(LOG_GUEST_ERROR,
3235                           "%s: bogus vq index ignored\n", __func__);
3236             return;
3237         }
3238         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3239     } else {
3240         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3241     }
3242     vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3243                              vdev, idx, mask);
3244 }
3245 
3246 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3247 {
3248     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3249 
3250     n->config_size = virtio_get_config_size(&cfg_size_params, host_features);
3251 }
3252 
3253 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3254                                    const char *type)
3255 {
3256     /*
3257      * The name can be NULL, the netclient name will be type.x.
3258      */
3259     assert(type != NULL);
3260 
3261     g_free(n->netclient_name);
3262     g_free(n->netclient_type);
3263     n->netclient_name = g_strdup(name);
3264     n->netclient_type = g_strdup(type);
3265 }
3266 
3267 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3268 {
3269     HotplugHandler *hotplug_ctrl;
3270     PCIDevice *pci_dev;
3271     Error *err = NULL;
3272 
3273     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3274     if (hotplug_ctrl) {
3275         pci_dev = PCI_DEVICE(dev);
3276         pci_dev->partially_hotplugged = true;
3277         hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3278         if (err) {
3279             error_report_err(err);
3280             return false;
3281         }
3282     } else {
3283         return false;
3284     }
3285     return true;
3286 }
3287 
3288 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3289                                     Error **errp)
3290 {
3291     Error *err = NULL;
3292     HotplugHandler *hotplug_ctrl;
3293     PCIDevice *pdev = PCI_DEVICE(dev);
3294     BusState *primary_bus;
3295 
3296     if (!pdev->partially_hotplugged) {
3297         return true;
3298     }
3299     primary_bus = dev->parent_bus;
3300     if (!primary_bus) {
3301         error_setg(errp, "virtio_net: couldn't find primary bus");
3302         return false;
3303     }
3304     qdev_set_parent_bus(dev, primary_bus, &error_abort);
3305     qatomic_set(&n->failover_primary_hidden, false);
3306     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3307     if (hotplug_ctrl) {
3308         hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3309         if (err) {
3310             goto out;
3311         }
3312         hotplug_handler_plug(hotplug_ctrl, dev, &err);
3313     }
3314     pdev->partially_hotplugged = false;
3315 
3316 out:
3317     error_propagate(errp, err);
3318     return !err;
3319 }
3320 
3321 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3322 {
3323     bool should_be_hidden;
3324     Error *err = NULL;
3325     DeviceState *dev = failover_find_primary_device(n);
3326 
3327     if (!dev) {
3328         return;
3329     }
3330 
3331     should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3332 
3333     if (migration_in_setup(s) && !should_be_hidden) {
3334         if (failover_unplug_primary(n, dev)) {
3335             vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3336             qapi_event_send_unplug_primary(dev->id);
3337             qatomic_set(&n->failover_primary_hidden, true);
3338         } else {
3339             warn_report("couldn't unplug primary device");
3340         }
3341     } else if (migration_has_failed(s)) {
3342         /* We already unplugged the device let's plug it back */
3343         if (!failover_replug_primary(n, dev, &err)) {
3344             if (err) {
3345                 error_report_err(err);
3346             }
3347         }
3348     }
3349 }
3350 
3351 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3352 {
3353     MigrationState *s = data;
3354     VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3355     virtio_net_handle_migration_primary(n, s);
3356 }
3357 
3358 static bool failover_hide_primary_device(DeviceListener *listener,
3359                                          const QDict *device_opts,
3360                                          bool from_json,
3361                                          Error **errp)
3362 {
3363     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3364     const char *standby_id;
3365 
3366     if (!device_opts) {
3367         return false;
3368     }
3369 
3370     if (!qdict_haskey(device_opts, "failover_pair_id")) {
3371         return false;
3372     }
3373 
3374     if (!qdict_haskey(device_opts, "id")) {
3375         error_setg(errp, "Device with failover_pair_id needs to have id");
3376         return false;
3377     }
3378 
3379     standby_id = qdict_get_str(device_opts, "failover_pair_id");
3380     if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3381         return false;
3382     }
3383 
3384     /*
3385      * The hide helper can be called several times for a given device.
3386      * Check there is only one primary for a virtio-net device but
3387      * don't duplicate the qdict several times if it's called for the same
3388      * device.
3389      */
3390     if (n->primary_opts) {
3391         const char *old, *new;
3392         /* devices with failover_pair_id always have an id */
3393         old = qdict_get_str(n->primary_opts, "id");
3394         new = qdict_get_str(device_opts, "id");
3395         if (strcmp(old, new) != 0) {
3396             error_setg(errp, "Cannot attach more than one primary device to "
3397                        "'%s': '%s' and '%s'", n->netclient_name, old, new);
3398             return false;
3399         }
3400     } else {
3401         n->primary_opts = qdict_clone_shallow(device_opts);
3402         n->primary_opts_from_json = from_json;
3403     }
3404 
3405     /* failover_primary_hidden is set during feature negotiation */
3406     return qatomic_read(&n->failover_primary_hidden);
3407 }
3408 
3409 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3410 {
3411     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3412     VirtIONet *n = VIRTIO_NET(dev);
3413     NetClientState *nc;
3414     int i;
3415 
3416     if (n->net_conf.mtu) {
3417         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3418     }
3419 
3420     if (n->net_conf.duplex_str) {
3421         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3422             n->net_conf.duplex = DUPLEX_HALF;
3423         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3424             n->net_conf.duplex = DUPLEX_FULL;
3425         } else {
3426             error_setg(errp, "'duplex' must be 'half' or 'full'");
3427             return;
3428         }
3429         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3430     } else {
3431         n->net_conf.duplex = DUPLEX_UNKNOWN;
3432     }
3433 
3434     if (n->net_conf.speed < SPEED_UNKNOWN) {
3435         error_setg(errp, "'speed' must be between 0 and INT_MAX");
3436         return;
3437     }
3438     if (n->net_conf.speed >= 0) {
3439         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3440     }
3441 
3442     if (n->failover) {
3443         n->primary_listener.hide_device = failover_hide_primary_device;
3444         qatomic_set(&n->failover_primary_hidden, true);
3445         device_listener_register(&n->primary_listener);
3446         n->migration_state.notify = virtio_net_migration_state_notifier;
3447         add_migration_state_change_notifier(&n->migration_state);
3448         n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3449     }
3450 
3451     virtio_net_set_config_size(n, n->host_features);
3452     virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
3453 
3454     /*
3455      * We set a lower limit on RX queue size to what it always was.
3456      * Guests that want a smaller ring can always resize it without
3457      * help from us (using virtio 1 and up).
3458      */
3459     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3460         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3461         !is_power_of_2(n->net_conf.rx_queue_size)) {
3462         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3463                    "must be a power of 2 between %d and %d.",
3464                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3465                    VIRTQUEUE_MAX_SIZE);
3466         virtio_cleanup(vdev);
3467         return;
3468     }
3469 
3470     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3471         n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3472         !is_power_of_2(n->net_conf.tx_queue_size)) {
3473         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3474                    "must be a power of 2 between %d and %d",
3475                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3476                    VIRTQUEUE_MAX_SIZE);
3477         virtio_cleanup(vdev);
3478         return;
3479     }
3480 
3481     n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3482 
3483     /*
3484      * Figure out the datapath queue pairs since the backend could
3485      * provide control queue via peers as well.
3486      */
3487     if (n->nic_conf.peers.queues) {
3488         for (i = 0; i < n->max_ncs; i++) {
3489             if (n->nic_conf.peers.ncs[i]->is_datapath) {
3490                 ++n->max_queue_pairs;
3491             }
3492         }
3493     }
3494     n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3495 
3496     if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3497         error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3498                    "must be a positive integer less than %d.",
3499                    n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3500         virtio_cleanup(vdev);
3501         return;
3502     }
3503     n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3504     n->curr_queue_pairs = 1;
3505     n->tx_timeout = n->net_conf.txtimer;
3506 
3507     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3508                        && strcmp(n->net_conf.tx, "bh")) {
3509         warn_report("virtio-net: "
3510                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3511                     n->net_conf.tx);
3512         error_printf("Defaulting to \"bh\"");
3513     }
3514 
3515     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3516                                     n->net_conf.tx_queue_size);
3517 
3518     for (i = 0; i < n->max_queue_pairs; i++) {
3519         virtio_net_add_queue(n, i);
3520     }
3521 
3522     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3523     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3524     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3525     n->status = VIRTIO_NET_S_LINK_UP;
3526     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3527                               QEMU_CLOCK_VIRTUAL,
3528                               virtio_net_announce_timer, n);
3529     n->announce_timer.round = 0;
3530 
3531     if (n->netclient_type) {
3532         /*
3533          * Happen when virtio_net_set_netclient_name has been called.
3534          */
3535         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3536                               n->netclient_type, n->netclient_name, n);
3537     } else {
3538         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3539                               object_get_typename(OBJECT(dev)), dev->id, n);
3540     }
3541 
3542     for (i = 0; i < n->max_queue_pairs; i++) {
3543         n->nic->ncs[i].do_not_pad = true;
3544     }
3545 
3546     peer_test_vnet_hdr(n);
3547     if (peer_has_vnet_hdr(n)) {
3548         for (i = 0; i < n->max_queue_pairs; i++) {
3549             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3550         }
3551         n->host_hdr_len = sizeof(struct virtio_net_hdr);
3552     } else {
3553         n->host_hdr_len = 0;
3554     }
3555 
3556     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3557 
3558     n->vqs[0].tx_waiting = 0;
3559     n->tx_burst = n->net_conf.txburst;
3560     virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3561     n->promisc = 1; /* for compatibility */
3562 
3563     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3564 
3565     n->vlans = g_malloc0(MAX_VLAN >> 3);
3566 
3567     nc = qemu_get_queue(n->nic);
3568     nc->rxfilter_notify_enabled = 1;
3569 
3570    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3571         struct virtio_net_config netcfg = {};
3572         memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3573         vhost_net_set_config(get_vhost_net(nc->peer),
3574             (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
3575     }
3576     QTAILQ_INIT(&n->rsc_chains);
3577     n->qdev = dev;
3578 
3579     net_rx_pkt_init(&n->rx_pkt, false);
3580 
3581     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3582         virtio_net_load_ebpf(n);
3583     }
3584 }
3585 
3586 static void virtio_net_device_unrealize(DeviceState *dev)
3587 {
3588     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3589     VirtIONet *n = VIRTIO_NET(dev);
3590     int i, max_queue_pairs;
3591 
3592     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3593         virtio_net_unload_ebpf(n);
3594     }
3595 
3596     /* This will stop vhost backend if appropriate. */
3597     virtio_net_set_status(vdev, 0);
3598 
3599     g_free(n->netclient_name);
3600     n->netclient_name = NULL;
3601     g_free(n->netclient_type);
3602     n->netclient_type = NULL;
3603 
3604     g_free(n->mac_table.macs);
3605     g_free(n->vlans);
3606 
3607     if (n->failover) {
3608         qobject_unref(n->primary_opts);
3609         device_listener_unregister(&n->primary_listener);
3610         remove_migration_state_change_notifier(&n->migration_state);
3611     } else {
3612         assert(n->primary_opts == NULL);
3613     }
3614 
3615     max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3616     for (i = 0; i < max_queue_pairs; i++) {
3617         virtio_net_del_queue(n, i);
3618     }
3619     /* delete also control vq */
3620     virtio_del_queue(vdev, max_queue_pairs * 2);
3621     qemu_announce_timer_del(&n->announce_timer, false);
3622     g_free(n->vqs);
3623     qemu_del_nic(n->nic);
3624     virtio_net_rsc_cleanup(n);
3625     g_free(n->rss_data.indirections_table);
3626     net_rx_pkt_uninit(n->rx_pkt);
3627     virtio_cleanup(vdev);
3628 }
3629 
3630 static void virtio_net_instance_init(Object *obj)
3631 {
3632     VirtIONet *n = VIRTIO_NET(obj);
3633 
3634     /*
3635      * The default config_size is sizeof(struct virtio_net_config).
3636      * Can be overriden with virtio_net_set_config_size.
3637      */
3638     n->config_size = sizeof(struct virtio_net_config);
3639     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3640                                   "bootindex", "/ethernet-phy@0",
3641                                   DEVICE(n));
3642 
3643     ebpf_rss_init(&n->ebpf_rss);
3644 }
3645 
3646 static int virtio_net_pre_save(void *opaque)
3647 {
3648     VirtIONet *n = opaque;
3649 
3650     /* At this point, backend must be stopped, otherwise
3651      * it might keep writing to memory. */
3652     assert(!n->vhost_started);
3653 
3654     return 0;
3655 }
3656 
3657 static bool primary_unplug_pending(void *opaque)
3658 {
3659     DeviceState *dev = opaque;
3660     DeviceState *primary;
3661     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3662     VirtIONet *n = VIRTIO_NET(vdev);
3663 
3664     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3665         return false;
3666     }
3667     primary = failover_find_primary_device(n);
3668     return primary ? primary->pending_deleted_event : false;
3669 }
3670 
3671 static bool dev_unplug_pending(void *opaque)
3672 {
3673     DeviceState *dev = opaque;
3674     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3675 
3676     return vdc->primary_unplug_pending(dev);
3677 }
3678 
3679 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3680 {
3681     VirtIONet *n = VIRTIO_NET(vdev);
3682     NetClientState *nc = qemu_get_queue(n->nic);
3683     struct vhost_net *net = get_vhost_net(nc->peer);
3684     return &net->dev;
3685 }
3686 
3687 static const VMStateDescription vmstate_virtio_net = {
3688     .name = "virtio-net",
3689     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3690     .version_id = VIRTIO_NET_VM_VERSION,
3691     .fields = (VMStateField[]) {
3692         VMSTATE_VIRTIO_DEVICE,
3693         VMSTATE_END_OF_LIST()
3694     },
3695     .pre_save = virtio_net_pre_save,
3696     .dev_unplug_pending = dev_unplug_pending,
3697 };
3698 
3699 static Property virtio_net_properties[] = {
3700     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3701                     VIRTIO_NET_F_CSUM, true),
3702     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3703                     VIRTIO_NET_F_GUEST_CSUM, true),
3704     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3705     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3706                     VIRTIO_NET_F_GUEST_TSO4, true),
3707     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3708                     VIRTIO_NET_F_GUEST_TSO6, true),
3709     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3710                     VIRTIO_NET_F_GUEST_ECN, true),
3711     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3712                     VIRTIO_NET_F_GUEST_UFO, true),
3713     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3714                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3715     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3716                     VIRTIO_NET_F_HOST_TSO4, true),
3717     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3718                     VIRTIO_NET_F_HOST_TSO6, true),
3719     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3720                     VIRTIO_NET_F_HOST_ECN, true),
3721     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3722                     VIRTIO_NET_F_HOST_UFO, true),
3723     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3724                     VIRTIO_NET_F_MRG_RXBUF, true),
3725     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3726                     VIRTIO_NET_F_STATUS, true),
3727     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3728                     VIRTIO_NET_F_CTRL_VQ, true),
3729     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3730                     VIRTIO_NET_F_CTRL_RX, true),
3731     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3732                     VIRTIO_NET_F_CTRL_VLAN, true),
3733     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3734                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3735     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3736                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3737     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3738                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3739     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3740     DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3741                     VIRTIO_NET_F_RSS, false),
3742     DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3743                     VIRTIO_NET_F_HASH_REPORT, false),
3744     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3745                     VIRTIO_NET_F_RSC_EXT, false),
3746     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3747                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3748     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3749     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3750                        TX_TIMER_INTERVAL),
3751     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3752     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3753     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3754                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3755     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3756                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3757     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3758     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3759                      true),
3760     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3761     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3762     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3763     DEFINE_PROP_END_OF_LIST(),
3764 };
3765 
3766 static void virtio_net_class_init(ObjectClass *klass, void *data)
3767 {
3768     DeviceClass *dc = DEVICE_CLASS(klass);
3769     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3770 
3771     device_class_set_props(dc, virtio_net_properties);
3772     dc->vmsd = &vmstate_virtio_net;
3773     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3774     vdc->realize = virtio_net_device_realize;
3775     vdc->unrealize = virtio_net_device_unrealize;
3776     vdc->get_config = virtio_net_get_config;
3777     vdc->set_config = virtio_net_set_config;
3778     vdc->get_features = virtio_net_get_features;
3779     vdc->set_features = virtio_net_set_features;
3780     vdc->bad_features = virtio_net_bad_features;
3781     vdc->reset = virtio_net_reset;
3782     vdc->set_status = virtio_net_set_status;
3783     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3784     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3785     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3786     vdc->post_load = virtio_net_post_load_virtio;
3787     vdc->vmsd = &vmstate_virtio_net_device;
3788     vdc->primary_unplug_pending = primary_unplug_pending;
3789     vdc->get_vhost = virtio_net_get_vhost;
3790 }
3791 
3792 static const TypeInfo virtio_net_info = {
3793     .name = TYPE_VIRTIO_NET,
3794     .parent = TYPE_VIRTIO_DEVICE,
3795     .instance_size = sizeof(VirtIONet),
3796     .instance_init = virtio_net_instance_init,
3797     .class_init = virtio_net_class_init,
3798 };
3799 
3800 static void virtio_register_types(void)
3801 {
3802     type_register_static(&virtio_net_info);
3803 }
3804 
3805 type_init(virtio_register_types)
3806