xref: /openbmc/qemu/hw/net/virtio-net.c (revision a8575f7fb2f213e6690b23160b04271d47fdfaa8)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/log.h"
18 #include "qemu/main-loop.h"
19 #include "qemu/module.h"
20 #include "hw/virtio/virtio.h"
21 #include "net/net.h"
22 #include "net/checksum.h"
23 #include "net/tap.h"
24 #include "qemu/error-report.h"
25 #include "qemu/timer.h"
26 #include "qemu/option.h"
27 #include "qemu/option_int.h"
28 #include "qemu/config-file.h"
29 #include "qapi/qmp/qdict.h"
30 #include "hw/virtio/virtio-net.h"
31 #include "net/vhost_net.h"
32 #include "net/announce.h"
33 #include "hw/virtio/virtio-bus.h"
34 #include "qapi/error.h"
35 #include "qapi/qapi-events-net.h"
36 #include "hw/qdev-properties.h"
37 #include "qapi/qapi-types-migration.h"
38 #include "qapi/qapi-events-migration.h"
39 #include "hw/virtio/virtio-access.h"
40 #include "migration/misc.h"
41 #include "standard-headers/linux/ethtool.h"
42 #include "sysemu/sysemu.h"
43 #include "sysemu/replay.h"
44 #include "trace.h"
45 #include "monitor/qdev.h"
46 #include "monitor/monitor.h"
47 #include "hw/pci/pci_device.h"
48 #include "net_rx_pkt.h"
49 #include "hw/virtio/vhost.h"
50 #include "sysemu/qtest.h"
51 
52 #define VIRTIO_NET_VM_VERSION    11
53 
54 /* previously fixed value */
55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
57 
58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
61 
62 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
63 
64 #define VIRTIO_NET_TCP_FLAG         0x3F
65 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
66 
67 /* IPv4 max payload, 16 bits in the header */
68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
70 
71 /* header length value in ip header without option */
72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
73 
74 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
76 
77 /* Purge coalesced packets timer interval, This value affects the performance
78    a lot, and should be tuned carefully, '300000'(300us) is the recommended
79    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
80    tso/gso/gro 'off'. */
81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
82 
83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
84                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
85                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
86                                          VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
87                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
88                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
89                                          VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
90                                          VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
91                                          VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
92 
93 static const VirtIOFeature feature_sizes[] = {
94     {.flags = 1ULL << VIRTIO_NET_F_MAC,
95      .end = endof(struct virtio_net_config, mac)},
96     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
97      .end = endof(struct virtio_net_config, status)},
98     {.flags = 1ULL << VIRTIO_NET_F_MQ,
99      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
100     {.flags = 1ULL << VIRTIO_NET_F_MTU,
101      .end = endof(struct virtio_net_config, mtu)},
102     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
103      .end = endof(struct virtio_net_config, duplex)},
104     {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
105      .end = endof(struct virtio_net_config, supported_hash_types)},
106     {}
107 };
108 
109 static const VirtIOConfigSizeParams cfg_size_params = {
110     .min_size = endof(struct virtio_net_config, mac),
111     .max_size = sizeof(struct virtio_net_config),
112     .feature_sizes = feature_sizes
113 };
114 
115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
116 {
117     VirtIONet *n = qemu_get_nic_opaque(nc);
118 
119     return &n->vqs[nc->queue_index];
120 }
121 
122 static int vq2q(int queue_index)
123 {
124     return queue_index / 2;
125 }
126 
127 static void flush_or_purge_queued_packets(NetClientState *nc)
128 {
129     if (!nc->peer) {
130         return;
131     }
132 
133     qemu_flush_or_purge_queued_packets(nc->peer, true);
134     assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
135 }
136 
137 /* TODO
138  * - we could suppress RX interrupt if we were so inclined.
139  */
140 
141 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
142 {
143     VirtIONet *n = VIRTIO_NET(vdev);
144     struct virtio_net_config netcfg;
145     NetClientState *nc = qemu_get_queue(n->nic);
146     static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
147 
148     int ret = 0;
149     memset(&netcfg, 0 , sizeof(struct virtio_net_config));
150     virtio_stw_p(vdev, &netcfg.status, n->status);
151     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
152     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
153     memcpy(netcfg.mac, n->mac, ETH_ALEN);
154     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
155     netcfg.duplex = n->net_conf.duplex;
156     netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
157     virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
158                  virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
159                  VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
160     virtio_stl_p(vdev, &netcfg.supported_hash_types,
161                  VIRTIO_NET_RSS_SUPPORTED_HASHES);
162     memcpy(config, &netcfg, n->config_size);
163 
164     /*
165      * Is this VDPA? No peer means not VDPA: there's no way to
166      * disconnect/reconnect a VDPA peer.
167      */
168     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
169         ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
170                                    n->config_size);
171         if (ret == -1) {
172             return;
173         }
174 
175         /*
176          * Some NIC/kernel combinations present 0 as the mac address.  As that
177          * is not a legal address, try to proceed with the address from the
178          * QEMU command line in the hope that the address has been configured
179          * correctly elsewhere - just not reported by the device.
180          */
181         if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
182             info_report("Zero hardware mac address detected. Ignoring.");
183             memcpy(netcfg.mac, n->mac, ETH_ALEN);
184         }
185 
186         netcfg.status |= virtio_tswap16(vdev,
187                                         n->status & VIRTIO_NET_S_ANNOUNCE);
188         memcpy(config, &netcfg, n->config_size);
189     }
190 }
191 
192 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
193 {
194     VirtIONet *n = VIRTIO_NET(vdev);
195     struct virtio_net_config netcfg = {};
196     NetClientState *nc = qemu_get_queue(n->nic);
197 
198     memcpy(&netcfg, config, n->config_size);
199 
200     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
201         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
202         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
203         memcpy(n->mac, netcfg.mac, ETH_ALEN);
204         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
205     }
206 
207     /*
208      * Is this VDPA? No peer means not VDPA: there's no way to
209      * disconnect/reconnect a VDPA peer.
210      */
211     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
212         vhost_net_set_config(get_vhost_net(nc->peer),
213                              (uint8_t *)&netcfg, 0, n->config_size,
214                              VHOST_SET_CONFIG_TYPE_FRONTEND);
215       }
216 }
217 
218 static bool virtio_net_started(VirtIONet *n, uint8_t status)
219 {
220     VirtIODevice *vdev = VIRTIO_DEVICE(n);
221     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
222         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
223 }
224 
225 static void virtio_net_announce_notify(VirtIONet *net)
226 {
227     VirtIODevice *vdev = VIRTIO_DEVICE(net);
228     trace_virtio_net_announce_notify();
229 
230     net->status |= VIRTIO_NET_S_ANNOUNCE;
231     virtio_notify_config(vdev);
232 }
233 
234 static void virtio_net_announce_timer(void *opaque)
235 {
236     VirtIONet *n = opaque;
237     trace_virtio_net_announce_timer(n->announce_timer.round);
238 
239     n->announce_timer.round--;
240     virtio_net_announce_notify(n);
241 }
242 
243 static void virtio_net_announce(NetClientState *nc)
244 {
245     VirtIONet *n = qemu_get_nic_opaque(nc);
246     VirtIODevice *vdev = VIRTIO_DEVICE(n);
247 
248     /*
249      * Make sure the virtio migration announcement timer isn't running
250      * If it is, let it trigger announcement so that we do not cause
251      * confusion.
252      */
253     if (n->announce_timer.round) {
254         return;
255     }
256 
257     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
258         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
259             virtio_net_announce_notify(n);
260     }
261 }
262 
263 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
264 {
265     VirtIODevice *vdev = VIRTIO_DEVICE(n);
266     NetClientState *nc = qemu_get_queue(n->nic);
267     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
268     int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
269               n->max_ncs - n->max_queue_pairs : 0;
270 
271     if (!get_vhost_net(nc->peer)) {
272         return;
273     }
274 
275     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
276         !!n->vhost_started) {
277         return;
278     }
279     if (!n->vhost_started) {
280         int r, i;
281 
282         if (n->needs_vnet_hdr_swap) {
283             error_report("backend does not support %s vnet headers; "
284                          "falling back on userspace virtio",
285                          virtio_is_big_endian(vdev) ? "BE" : "LE");
286             return;
287         }
288 
289         /* Any packets outstanding? Purge them to avoid touching rings
290          * when vhost is running.
291          */
292         for (i = 0;  i < queue_pairs; i++) {
293             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
294 
295             /* Purge both directions: TX and RX. */
296             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
297             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
298         }
299 
300         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
301             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
302             if (r < 0) {
303                 error_report("%uBytes MTU not supported by the backend",
304                              n->net_conf.mtu);
305 
306                 return;
307             }
308         }
309 
310         n->vhost_started = 1;
311         r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
312         if (r < 0) {
313             error_report("unable to start vhost net: %d: "
314                          "falling back on userspace virtio", -r);
315             n->vhost_started = 0;
316         }
317     } else {
318         vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
319         n->vhost_started = 0;
320     }
321 }
322 
323 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
324                                           NetClientState *peer,
325                                           bool enable)
326 {
327     if (virtio_is_big_endian(vdev)) {
328         return qemu_set_vnet_be(peer, enable);
329     } else {
330         return qemu_set_vnet_le(peer, enable);
331     }
332 }
333 
334 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
335                                        int queue_pairs, bool enable)
336 {
337     int i;
338 
339     for (i = 0; i < queue_pairs; i++) {
340         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
341             enable) {
342             while (--i >= 0) {
343                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
344             }
345 
346             return true;
347         }
348     }
349 
350     return false;
351 }
352 
353 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
354 {
355     VirtIODevice *vdev = VIRTIO_DEVICE(n);
356     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
357 
358     if (virtio_net_started(n, status)) {
359         /* Before using the device, we tell the network backend about the
360          * endianness to use when parsing vnet headers. If the backend
361          * can't do it, we fallback onto fixing the headers in the core
362          * virtio-net code.
363          */
364         n->needs_vnet_hdr_swap = n->has_vnet_hdr &&
365                                  virtio_net_set_vnet_endian(vdev, n->nic->ncs,
366                                                             queue_pairs, true);
367     } else if (virtio_net_started(n, vdev->status)) {
368         /* After using the device, we need to reset the network backend to
369          * the default (guest native endianness), otherwise the guest may
370          * lose network connectivity if it is rebooted into a different
371          * endianness.
372          */
373         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
374     }
375 }
376 
377 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
378 {
379     unsigned int dropped = virtqueue_drop_all(vq);
380     if (dropped) {
381         virtio_notify(vdev, vq);
382     }
383 }
384 
385 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
386 {
387     VirtIONet *n = VIRTIO_NET(vdev);
388     VirtIONetQueue *q;
389     int i;
390     uint8_t queue_status;
391 
392     virtio_net_vnet_endian_status(n, status);
393     virtio_net_vhost_status(n, status);
394 
395     for (i = 0; i < n->max_queue_pairs; i++) {
396         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
397         bool queue_started;
398         q = &n->vqs[i];
399 
400         if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
401             queue_status = 0;
402         } else {
403             queue_status = status;
404         }
405         queue_started =
406             virtio_net_started(n, queue_status) && !n->vhost_started;
407 
408         if (queue_started) {
409             qemu_flush_queued_packets(ncs);
410         }
411 
412         if (!q->tx_waiting) {
413             continue;
414         }
415 
416         if (queue_started) {
417             if (q->tx_timer) {
418                 timer_mod(q->tx_timer,
419                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
420             } else {
421                 replay_bh_schedule_event(q->tx_bh);
422             }
423         } else {
424             if (q->tx_timer) {
425                 timer_del(q->tx_timer);
426             } else {
427                 qemu_bh_cancel(q->tx_bh);
428             }
429             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
430                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
431                 vdev->vm_running) {
432                 /* if tx is waiting we are likely have some packets in tx queue
433                  * and disabled notification */
434                 q->tx_waiting = 0;
435                 virtio_queue_set_notification(q->tx_vq, 1);
436                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
437             }
438         }
439     }
440 }
441 
442 static void virtio_net_set_link_status(NetClientState *nc)
443 {
444     VirtIONet *n = qemu_get_nic_opaque(nc);
445     VirtIODevice *vdev = VIRTIO_DEVICE(n);
446     uint16_t old_status = n->status;
447 
448     if (nc->link_down)
449         n->status &= ~VIRTIO_NET_S_LINK_UP;
450     else
451         n->status |= VIRTIO_NET_S_LINK_UP;
452 
453     if (n->status != old_status)
454         virtio_notify_config(vdev);
455 
456     virtio_net_set_status(vdev, vdev->status);
457 }
458 
459 static void rxfilter_notify(NetClientState *nc)
460 {
461     VirtIONet *n = qemu_get_nic_opaque(nc);
462 
463     if (nc->rxfilter_notify_enabled) {
464         char *path = object_get_canonical_path(OBJECT(n->qdev));
465         qapi_event_send_nic_rx_filter_changed(n->netclient_name, path);
466         g_free(path);
467 
468         /* disable event notification to avoid events flooding */
469         nc->rxfilter_notify_enabled = 0;
470     }
471 }
472 
473 static intList *get_vlan_table(VirtIONet *n)
474 {
475     intList *list;
476     int i, j;
477 
478     list = NULL;
479     for (i = 0; i < MAX_VLAN >> 5; i++) {
480         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
481             if (n->vlans[i] & (1U << j)) {
482                 QAPI_LIST_PREPEND(list, (i << 5) + j);
483             }
484         }
485     }
486 
487     return list;
488 }
489 
490 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
491 {
492     VirtIONet *n = qemu_get_nic_opaque(nc);
493     VirtIODevice *vdev = VIRTIO_DEVICE(n);
494     RxFilterInfo *info;
495     strList *str_list;
496     int i;
497 
498     info = g_malloc0(sizeof(*info));
499     info->name = g_strdup(nc->name);
500     info->promiscuous = n->promisc;
501 
502     if (n->nouni) {
503         info->unicast = RX_STATE_NONE;
504     } else if (n->alluni) {
505         info->unicast = RX_STATE_ALL;
506     } else {
507         info->unicast = RX_STATE_NORMAL;
508     }
509 
510     if (n->nomulti) {
511         info->multicast = RX_STATE_NONE;
512     } else if (n->allmulti) {
513         info->multicast = RX_STATE_ALL;
514     } else {
515         info->multicast = RX_STATE_NORMAL;
516     }
517 
518     info->broadcast_allowed = n->nobcast;
519     info->multicast_overflow = n->mac_table.multi_overflow;
520     info->unicast_overflow = n->mac_table.uni_overflow;
521 
522     info->main_mac = qemu_mac_strdup_printf(n->mac);
523 
524     str_list = NULL;
525     for (i = 0; i < n->mac_table.first_multi; i++) {
526         QAPI_LIST_PREPEND(str_list,
527                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
528     }
529     info->unicast_table = str_list;
530 
531     str_list = NULL;
532     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
533         QAPI_LIST_PREPEND(str_list,
534                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
535     }
536     info->multicast_table = str_list;
537     info->vlan_table = get_vlan_table(n);
538 
539     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
540         info->vlan = RX_STATE_ALL;
541     } else if (!info->vlan_table) {
542         info->vlan = RX_STATE_NONE;
543     } else {
544         info->vlan = RX_STATE_NORMAL;
545     }
546 
547     /* enable event notification after query */
548     nc->rxfilter_notify_enabled = 1;
549 
550     return info;
551 }
552 
553 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
554 {
555     VirtIONet *n = VIRTIO_NET(vdev);
556     NetClientState *nc;
557 
558     /* validate queue_index and skip for cvq */
559     if (queue_index >= n->max_queue_pairs * 2) {
560         return;
561     }
562 
563     nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
564 
565     if (!nc->peer) {
566         return;
567     }
568 
569     if (get_vhost_net(nc->peer) &&
570         nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
571         vhost_net_virtqueue_reset(vdev, nc, queue_index);
572     }
573 
574     flush_or_purge_queued_packets(nc);
575 }
576 
577 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
578 {
579     VirtIONet *n = VIRTIO_NET(vdev);
580     NetClientState *nc;
581     int r;
582 
583     /* validate queue_index and skip for cvq */
584     if (queue_index >= n->max_queue_pairs * 2) {
585         return;
586     }
587 
588     nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
589 
590     if (!nc->peer || !vdev->vhost_started) {
591         return;
592     }
593 
594     if (get_vhost_net(nc->peer) &&
595         nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
596         r = vhost_net_virtqueue_restart(vdev, nc, queue_index);
597         if (r < 0) {
598             error_report("unable to restart vhost net virtqueue: %d, "
599                             "when resetting the queue", queue_index);
600         }
601     }
602 }
603 
604 static void peer_test_vnet_hdr(VirtIONet *n)
605 {
606     NetClientState *nc = qemu_get_queue(n->nic);
607     if (!nc->peer) {
608         return;
609     }
610 
611     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
612 }
613 
614 static int peer_has_vnet_hdr(VirtIONet *n)
615 {
616     return n->has_vnet_hdr;
617 }
618 
619 static int peer_has_ufo(VirtIONet *n)
620 {
621     if (!peer_has_vnet_hdr(n))
622         return 0;
623 
624     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
625 
626     return n->has_ufo;
627 }
628 
629 static int peer_has_uso(VirtIONet *n)
630 {
631     if (!peer_has_vnet_hdr(n)) {
632         return 0;
633     }
634 
635     return qemu_has_uso(qemu_get_queue(n->nic)->peer);
636 }
637 
638 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
639                                        int version_1, int hash_report)
640 {
641     int i;
642     NetClientState *nc;
643 
644     n->mergeable_rx_bufs = mergeable_rx_bufs;
645 
646     if (version_1) {
647         n->guest_hdr_len = hash_report ?
648             sizeof(struct virtio_net_hdr_v1_hash) :
649             sizeof(struct virtio_net_hdr_mrg_rxbuf);
650         n->rss_data.populate_hash = !!hash_report;
651     } else {
652         n->guest_hdr_len = n->mergeable_rx_bufs ?
653             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
654             sizeof(struct virtio_net_hdr);
655         n->rss_data.populate_hash = false;
656     }
657 
658     for (i = 0; i < n->max_queue_pairs; i++) {
659         nc = qemu_get_subqueue(n->nic, i);
660 
661         if (peer_has_vnet_hdr(n) &&
662             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
663             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
664             n->host_hdr_len = n->guest_hdr_len;
665         }
666     }
667 }
668 
669 static int virtio_net_max_tx_queue_size(VirtIONet *n)
670 {
671     NetClientState *peer = n->nic_conf.peers.ncs[0];
672 
673     /*
674      * Backends other than vhost-user or vhost-vdpa don't support max queue
675      * size.
676      */
677     if (!peer) {
678         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
679     }
680 
681     switch(peer->info->type) {
682     case NET_CLIENT_DRIVER_VHOST_USER:
683     case NET_CLIENT_DRIVER_VHOST_VDPA:
684         return VIRTQUEUE_MAX_SIZE;
685     default:
686         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
687     };
688 }
689 
690 static int peer_attach(VirtIONet *n, int index)
691 {
692     NetClientState *nc = qemu_get_subqueue(n->nic, index);
693 
694     if (!nc->peer) {
695         return 0;
696     }
697 
698     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
699         vhost_set_vring_enable(nc->peer, 1);
700     }
701 
702     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
703         return 0;
704     }
705 
706     if (n->max_queue_pairs == 1) {
707         return 0;
708     }
709 
710     return tap_enable(nc->peer);
711 }
712 
713 static int peer_detach(VirtIONet *n, int index)
714 {
715     NetClientState *nc = qemu_get_subqueue(n->nic, index);
716 
717     if (!nc->peer) {
718         return 0;
719     }
720 
721     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
722         vhost_set_vring_enable(nc->peer, 0);
723     }
724 
725     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
726         return 0;
727     }
728 
729     return tap_disable(nc->peer);
730 }
731 
732 static void virtio_net_set_queue_pairs(VirtIONet *n)
733 {
734     int i;
735     int r;
736 
737     if (n->nic->peer_deleted) {
738         return;
739     }
740 
741     for (i = 0; i < n->max_queue_pairs; i++) {
742         if (i < n->curr_queue_pairs) {
743             r = peer_attach(n, i);
744             assert(!r);
745         } else {
746             r = peer_detach(n, i);
747             assert(!r);
748         }
749     }
750 }
751 
752 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
753 
754 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
755                                         Error **errp)
756 {
757     VirtIONet *n = VIRTIO_NET(vdev);
758     NetClientState *nc = qemu_get_queue(n->nic);
759 
760     /* Firstly sync all virtio-net possible supported features */
761     features |= n->host_features;
762 
763     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
764 
765     if (!peer_has_vnet_hdr(n)) {
766         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
767         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
768         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
769         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
770 
771         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
772         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
773         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
774         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
775 
776         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
777         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
778         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
779 
780         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
781     }
782 
783     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
784         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
785         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
786     }
787 
788     if (!peer_has_uso(n)) {
789         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
790         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
791         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
792     }
793 
794     if (!get_vhost_net(nc->peer)) {
795         return features;
796     }
797 
798     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
799         virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
800     }
801     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
802     vdev->backend_features = features;
803 
804     if (n->mtu_bypass_backend &&
805             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
806         features |= (1ULL << VIRTIO_NET_F_MTU);
807     }
808 
809     /*
810      * Since GUEST_ANNOUNCE is emulated the feature bit could be set without
811      * enabled. This happens in the vDPA case.
812      *
813      * Make sure the feature set is not incoherent, as the driver could refuse
814      * to start.
815      *
816      * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes,
817      * helping guest to notify the new location with vDPA devices that does not
818      * support it.
819      */
820     if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) {
821         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE);
822     }
823 
824     return features;
825 }
826 
827 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
828 {
829     uint64_t features = 0;
830 
831     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
832      * but also these: */
833     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
834     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
835     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
836     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
837     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
838 
839     return features;
840 }
841 
842 static void virtio_net_apply_guest_offloads(VirtIONet *n)
843 {
844     qemu_set_offload(qemu_get_queue(n->nic)->peer,
845             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
846             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
847             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
848             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
849             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
850             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
851             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)));
852 }
853 
854 static uint64_t virtio_net_guest_offloads_by_features(uint64_t features)
855 {
856     static const uint64_t guest_offloads_mask =
857         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
858         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
859         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
860         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
861         (1ULL << VIRTIO_NET_F_GUEST_UFO)  |
862         (1ULL << VIRTIO_NET_F_GUEST_USO4) |
863         (1ULL << VIRTIO_NET_F_GUEST_USO6);
864 
865     return guest_offloads_mask & features;
866 }
867 
868 uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n)
869 {
870     VirtIODevice *vdev = VIRTIO_DEVICE(n);
871     return virtio_net_guest_offloads_by_features(vdev->guest_features);
872 }
873 
874 typedef struct {
875     VirtIONet *n;
876     DeviceState *dev;
877 } FailoverDevice;
878 
879 /**
880  * Set the failover primary device
881  *
882  * @opaque: FailoverId to setup
883  * @opts: opts for device we are handling
884  * @errp: returns an error if this function fails
885  */
886 static int failover_set_primary(DeviceState *dev, void *opaque)
887 {
888     FailoverDevice *fdev = opaque;
889     PCIDevice *pci_dev = (PCIDevice *)
890         object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
891 
892     if (!pci_dev) {
893         return 0;
894     }
895 
896     if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
897         fdev->dev = dev;
898         return 1;
899     }
900 
901     return 0;
902 }
903 
904 /**
905  * Find the primary device for this failover virtio-net
906  *
907  * @n: VirtIONet device
908  * @errp: returns an error if this function fails
909  */
910 static DeviceState *failover_find_primary_device(VirtIONet *n)
911 {
912     FailoverDevice fdev = {
913         .n = n,
914     };
915 
916     qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
917                        NULL, NULL, &fdev);
918     return fdev.dev;
919 }
920 
921 static void failover_add_primary(VirtIONet *n, Error **errp)
922 {
923     Error *err = NULL;
924     DeviceState *dev = failover_find_primary_device(n);
925 
926     if (dev) {
927         return;
928     }
929 
930     if (!n->primary_opts) {
931         error_setg(errp, "Primary device not found");
932         error_append_hint(errp, "Virtio-net failover will not work. Make "
933                           "sure primary device has parameter"
934                           " failover_pair_id=%s\n", n->netclient_name);
935         return;
936     }
937 
938     dev = qdev_device_add_from_qdict(n->primary_opts,
939                                      n->primary_opts_from_json,
940                                      &err);
941     if (err) {
942         qobject_unref(n->primary_opts);
943         n->primary_opts = NULL;
944     } else {
945         object_unref(OBJECT(dev));
946     }
947     error_propagate(errp, err);
948 }
949 
950 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
951 {
952     VirtIONet *n = VIRTIO_NET(vdev);
953     Error *err = NULL;
954     int i;
955 
956     if (n->mtu_bypass_backend &&
957             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
958         features &= ~(1ULL << VIRTIO_NET_F_MTU);
959     }
960 
961     virtio_net_set_multiqueue(n,
962                               virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
963                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
964 
965     virtio_net_set_mrg_rx_bufs(n,
966                                virtio_has_feature(features,
967                                                   VIRTIO_NET_F_MRG_RXBUF),
968                                virtio_has_feature(features,
969                                                   VIRTIO_F_VERSION_1),
970                                virtio_has_feature(features,
971                                                   VIRTIO_NET_F_HASH_REPORT));
972 
973     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
974         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
975     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
976         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
977     n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
978 
979     if (n->has_vnet_hdr) {
980         n->curr_guest_offloads =
981             virtio_net_guest_offloads_by_features(features);
982         virtio_net_apply_guest_offloads(n);
983     }
984 
985     for (i = 0;  i < n->max_queue_pairs; i++) {
986         NetClientState *nc = qemu_get_subqueue(n->nic, i);
987 
988         if (!get_vhost_net(nc->peer)) {
989             continue;
990         }
991         vhost_net_ack_features(get_vhost_net(nc->peer), features);
992 
993         /*
994          * keep acked_features in NetVhostUserState up-to-date so it
995          * can't miss any features configured by guest virtio driver.
996          */
997         vhost_net_save_acked_features(nc->peer);
998     }
999 
1000     if (!virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
1001         memset(n->vlans, 0xff, MAX_VLAN >> 3);
1002     }
1003 
1004     if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
1005         qapi_event_send_failover_negotiated(n->netclient_name);
1006         qatomic_set(&n->failover_primary_hidden, false);
1007         failover_add_primary(n, &err);
1008         if (err) {
1009             if (!qtest_enabled()) {
1010                 warn_report_err(err);
1011             } else {
1012                 error_free(err);
1013             }
1014         }
1015     }
1016 }
1017 
1018 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
1019                                      struct iovec *iov, unsigned int iov_cnt)
1020 {
1021     uint8_t on;
1022     size_t s;
1023     NetClientState *nc = qemu_get_queue(n->nic);
1024 
1025     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
1026     if (s != sizeof(on)) {
1027         return VIRTIO_NET_ERR;
1028     }
1029 
1030     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
1031         n->promisc = on;
1032     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
1033         n->allmulti = on;
1034     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
1035         n->alluni = on;
1036     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
1037         n->nomulti = on;
1038     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
1039         n->nouni = on;
1040     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
1041         n->nobcast = on;
1042     } else {
1043         return VIRTIO_NET_ERR;
1044     }
1045 
1046     rxfilter_notify(nc);
1047 
1048     return VIRTIO_NET_OK;
1049 }
1050 
1051 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
1052                                      struct iovec *iov, unsigned int iov_cnt)
1053 {
1054     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1055     uint64_t offloads;
1056     size_t s;
1057 
1058     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1059         return VIRTIO_NET_ERR;
1060     }
1061 
1062     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
1063     if (s != sizeof(offloads)) {
1064         return VIRTIO_NET_ERR;
1065     }
1066 
1067     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1068         uint64_t supported_offloads;
1069 
1070         offloads = virtio_ldq_p(vdev, &offloads);
1071 
1072         if (!n->has_vnet_hdr) {
1073             return VIRTIO_NET_ERR;
1074         }
1075 
1076         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1077             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1078         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1079             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1080         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1081 
1082         supported_offloads = virtio_net_supported_guest_offloads(n);
1083         if (offloads & ~supported_offloads) {
1084             return VIRTIO_NET_ERR;
1085         }
1086 
1087         n->curr_guest_offloads = offloads;
1088         virtio_net_apply_guest_offloads(n);
1089 
1090         return VIRTIO_NET_OK;
1091     } else {
1092         return VIRTIO_NET_ERR;
1093     }
1094 }
1095 
1096 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1097                                  struct iovec *iov, unsigned int iov_cnt)
1098 {
1099     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1100     struct virtio_net_ctrl_mac mac_data;
1101     size_t s;
1102     NetClientState *nc = qemu_get_queue(n->nic);
1103 
1104     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1105         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1106             return VIRTIO_NET_ERR;
1107         }
1108         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1109         assert(s == sizeof(n->mac));
1110         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1111         rxfilter_notify(nc);
1112 
1113         return VIRTIO_NET_OK;
1114     }
1115 
1116     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1117         return VIRTIO_NET_ERR;
1118     }
1119 
1120     int in_use = 0;
1121     int first_multi = 0;
1122     uint8_t uni_overflow = 0;
1123     uint8_t multi_overflow = 0;
1124     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1125 
1126     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1127                    sizeof(mac_data.entries));
1128     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1129     if (s != sizeof(mac_data.entries)) {
1130         goto error;
1131     }
1132     iov_discard_front(&iov, &iov_cnt, s);
1133 
1134     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1135         goto error;
1136     }
1137 
1138     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1139         s = iov_to_buf(iov, iov_cnt, 0, macs,
1140                        mac_data.entries * ETH_ALEN);
1141         if (s != mac_data.entries * ETH_ALEN) {
1142             goto error;
1143         }
1144         in_use += mac_data.entries;
1145     } else {
1146         uni_overflow = 1;
1147     }
1148 
1149     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1150 
1151     first_multi = in_use;
1152 
1153     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1154                    sizeof(mac_data.entries));
1155     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1156     if (s != sizeof(mac_data.entries)) {
1157         goto error;
1158     }
1159 
1160     iov_discard_front(&iov, &iov_cnt, s);
1161 
1162     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1163         goto error;
1164     }
1165 
1166     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1167         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1168                        mac_data.entries * ETH_ALEN);
1169         if (s != mac_data.entries * ETH_ALEN) {
1170             goto error;
1171         }
1172         in_use += mac_data.entries;
1173     } else {
1174         multi_overflow = 1;
1175     }
1176 
1177     n->mac_table.in_use = in_use;
1178     n->mac_table.first_multi = first_multi;
1179     n->mac_table.uni_overflow = uni_overflow;
1180     n->mac_table.multi_overflow = multi_overflow;
1181     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1182     g_free(macs);
1183     rxfilter_notify(nc);
1184 
1185     return VIRTIO_NET_OK;
1186 
1187 error:
1188     g_free(macs);
1189     return VIRTIO_NET_ERR;
1190 }
1191 
1192 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1193                                         struct iovec *iov, unsigned int iov_cnt)
1194 {
1195     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1196     uint16_t vid;
1197     size_t s;
1198     NetClientState *nc = qemu_get_queue(n->nic);
1199 
1200     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1201     vid = virtio_lduw_p(vdev, &vid);
1202     if (s != sizeof(vid)) {
1203         return VIRTIO_NET_ERR;
1204     }
1205 
1206     if (vid >= MAX_VLAN)
1207         return VIRTIO_NET_ERR;
1208 
1209     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1210         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1211     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1212         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1213     else
1214         return VIRTIO_NET_ERR;
1215 
1216     rxfilter_notify(nc);
1217 
1218     return VIRTIO_NET_OK;
1219 }
1220 
1221 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1222                                       struct iovec *iov, unsigned int iov_cnt)
1223 {
1224     trace_virtio_net_handle_announce(n->announce_timer.round);
1225     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1226         n->status & VIRTIO_NET_S_ANNOUNCE) {
1227         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1228         if (n->announce_timer.round) {
1229             qemu_announce_timer_step(&n->announce_timer);
1230         }
1231         return VIRTIO_NET_OK;
1232     } else {
1233         return VIRTIO_NET_ERR;
1234     }
1235 }
1236 
1237 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1238 {
1239     NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1240     if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1241         return false;
1242     }
1243 
1244     trace_virtio_net_rss_attach_ebpf(nic, prog_fd);
1245     return nc->info->set_steering_ebpf(nc, prog_fd);
1246 }
1247 
1248 static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1249                                    struct EBPFRSSConfig *config)
1250 {
1251     config->redirect = data->redirect;
1252     config->populate_hash = data->populate_hash;
1253     config->hash_types = data->hash_types;
1254     config->indirections_len = data->indirections_len;
1255     config->default_queue = data->default_queue;
1256 }
1257 
1258 static bool virtio_net_attach_ebpf_rss(VirtIONet *n)
1259 {
1260     struct EBPFRSSConfig config = {};
1261 
1262     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1263         return false;
1264     }
1265 
1266     rss_data_to_rss_config(&n->rss_data, &config);
1267 
1268     if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1269                           n->rss_data.indirections_table, n->rss_data.key,
1270                           NULL)) {
1271         return false;
1272     }
1273 
1274     if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1275         return false;
1276     }
1277 
1278     return true;
1279 }
1280 
1281 static void virtio_net_detach_ebpf_rss(VirtIONet *n)
1282 {
1283     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1284 }
1285 
1286 static void virtio_net_commit_rss_config(VirtIONet *n)
1287 {
1288     if (n->rss_data.enabled) {
1289         n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
1290         if (n->rss_data.populate_hash) {
1291             virtio_net_detach_ebpf_rss(n);
1292         } else if (!virtio_net_attach_ebpf_rss(n)) {
1293             if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1294                 warn_report("Can't load eBPF RSS for vhost");
1295             } else {
1296                 warn_report("Can't load eBPF RSS - fallback to software RSS");
1297                 n->rss_data.enabled_software_rss = true;
1298             }
1299         }
1300 
1301         trace_virtio_net_rss_enable(n,
1302                                     n->rss_data.hash_types,
1303                                     n->rss_data.indirections_len,
1304                                     sizeof(n->rss_data.key));
1305     } else {
1306         virtio_net_detach_ebpf_rss(n);
1307         trace_virtio_net_rss_disable(n);
1308     }
1309 }
1310 
1311 static void virtio_net_disable_rss(VirtIONet *n)
1312 {
1313     if (!n->rss_data.enabled) {
1314         return;
1315     }
1316 
1317     n->rss_data.enabled = false;
1318     virtio_net_commit_rss_config(n);
1319 }
1320 
1321 static bool virtio_net_load_ebpf_fds(VirtIONet *n, Error **errp)
1322 {
1323     int fds[EBPF_RSS_MAX_FDS] = { [0 ... EBPF_RSS_MAX_FDS - 1] = -1};
1324     int ret = true;
1325     int i = 0;
1326 
1327     if (n->nr_ebpf_rss_fds != EBPF_RSS_MAX_FDS) {
1328         error_setg(errp, "Expected %d file descriptors but got %d",
1329                    EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds);
1330         return false;
1331     }
1332 
1333     for (i = 0; i < n->nr_ebpf_rss_fds; i++) {
1334         fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i], errp);
1335         if (fds[i] < 0) {
1336             ret = false;
1337             goto exit;
1338         }
1339     }
1340 
1341     ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3], errp);
1342 
1343 exit:
1344     if (!ret) {
1345         for (i = 0; i < n->nr_ebpf_rss_fds && fds[i] != -1; i++) {
1346             close(fds[i]);
1347         }
1348     }
1349 
1350     return ret;
1351 }
1352 
1353 static bool virtio_net_load_ebpf(VirtIONet *n, Error **errp)
1354 {
1355     bool ret = false;
1356 
1357     if (virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1358         trace_virtio_net_rss_load(n, n->nr_ebpf_rss_fds, n->ebpf_rss_fds);
1359         if (n->ebpf_rss_fds) {
1360             ret = virtio_net_load_ebpf_fds(n, errp);
1361         } else {
1362             ret = ebpf_rss_load(&n->ebpf_rss, errp);
1363         }
1364     }
1365 
1366     return ret;
1367 }
1368 
1369 static void virtio_net_unload_ebpf(VirtIONet *n)
1370 {
1371     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1372     ebpf_rss_unload(&n->ebpf_rss);
1373 }
1374 
1375 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1376                                       struct iovec *iov,
1377                                       unsigned int iov_cnt,
1378                                       bool do_rss)
1379 {
1380     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1381     struct virtio_net_rss_config cfg;
1382     size_t s, offset = 0, size_get;
1383     uint16_t queue_pairs, i;
1384     struct {
1385         uint16_t us;
1386         uint8_t b;
1387     } QEMU_PACKED temp;
1388     const char *err_msg = "";
1389     uint32_t err_value = 0;
1390 
1391     if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1392         err_msg = "RSS is not negotiated";
1393         goto error;
1394     }
1395     if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1396         err_msg = "Hash report is not negotiated";
1397         goto error;
1398     }
1399     size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1400     s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1401     if (s != size_get) {
1402         err_msg = "Short command buffer";
1403         err_value = (uint32_t)s;
1404         goto error;
1405     }
1406     n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1407     n->rss_data.indirections_len =
1408         virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1409     if (!do_rss) {
1410         n->rss_data.indirections_len = 0;
1411     }
1412     if (n->rss_data.indirections_len >= VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1413         err_msg = "Too large indirection table";
1414         err_value = n->rss_data.indirections_len;
1415         goto error;
1416     }
1417     n->rss_data.indirections_len++;
1418     if (!is_power_of_2(n->rss_data.indirections_len)) {
1419         err_msg = "Invalid size of indirection table";
1420         err_value = n->rss_data.indirections_len;
1421         goto error;
1422     }
1423     n->rss_data.default_queue = do_rss ?
1424         virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1425     if (n->rss_data.default_queue >= n->max_queue_pairs) {
1426         err_msg = "Invalid default queue";
1427         err_value = n->rss_data.default_queue;
1428         goto error;
1429     }
1430     offset += size_get;
1431     size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1432     g_free(n->rss_data.indirections_table);
1433     n->rss_data.indirections_table = g_malloc(size_get);
1434     if (!n->rss_data.indirections_table) {
1435         err_msg = "Can't allocate indirections table";
1436         err_value = n->rss_data.indirections_len;
1437         goto error;
1438     }
1439     s = iov_to_buf(iov, iov_cnt, offset,
1440                    n->rss_data.indirections_table, size_get);
1441     if (s != size_get) {
1442         err_msg = "Short indirection table buffer";
1443         err_value = (uint32_t)s;
1444         goto error;
1445     }
1446     for (i = 0; i < n->rss_data.indirections_len; ++i) {
1447         uint16_t val = n->rss_data.indirections_table[i];
1448         n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1449     }
1450     offset += size_get;
1451     size_get = sizeof(temp);
1452     s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1453     if (s != size_get) {
1454         err_msg = "Can't get queue_pairs";
1455         err_value = (uint32_t)s;
1456         goto error;
1457     }
1458     queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1459     if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1460         err_msg = "Invalid number of queue_pairs";
1461         err_value = queue_pairs;
1462         goto error;
1463     }
1464     if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1465         err_msg = "Invalid key size";
1466         err_value = temp.b;
1467         goto error;
1468     }
1469     if (!temp.b && n->rss_data.hash_types) {
1470         err_msg = "No key provided";
1471         err_value = 0;
1472         goto error;
1473     }
1474     if (!temp.b && !n->rss_data.hash_types) {
1475         virtio_net_disable_rss(n);
1476         return queue_pairs;
1477     }
1478     offset += size_get;
1479     size_get = temp.b;
1480     s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1481     if (s != size_get) {
1482         err_msg = "Can get key buffer";
1483         err_value = (uint32_t)s;
1484         goto error;
1485     }
1486     n->rss_data.enabled = true;
1487     virtio_net_commit_rss_config(n);
1488     return queue_pairs;
1489 error:
1490     trace_virtio_net_rss_error(n, err_msg, err_value);
1491     virtio_net_disable_rss(n);
1492     return 0;
1493 }
1494 
1495 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1496                                 struct iovec *iov, unsigned int iov_cnt)
1497 {
1498     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1499     uint16_t queue_pairs;
1500     NetClientState *nc = qemu_get_queue(n->nic);
1501 
1502     virtio_net_disable_rss(n);
1503     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1504         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1505         return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1506     }
1507     if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1508         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1509     } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1510         struct virtio_net_ctrl_mq mq;
1511         size_t s;
1512         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1513             return VIRTIO_NET_ERR;
1514         }
1515         s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1516         if (s != sizeof(mq)) {
1517             return VIRTIO_NET_ERR;
1518         }
1519         queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1520 
1521     } else {
1522         return VIRTIO_NET_ERR;
1523     }
1524 
1525     if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1526         queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1527         queue_pairs > n->max_queue_pairs ||
1528         !n->multiqueue) {
1529         return VIRTIO_NET_ERR;
1530     }
1531 
1532     n->curr_queue_pairs = queue_pairs;
1533     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
1534         /*
1535          * Avoid updating the backend for a vdpa device: We're only interested
1536          * in updating the device model queues.
1537          */
1538         return VIRTIO_NET_OK;
1539     }
1540     /* stop the backend before changing the number of queue_pairs to avoid handling a
1541      * disabled queue */
1542     virtio_net_set_status(vdev, vdev->status);
1543     virtio_net_set_queue_pairs(n);
1544 
1545     return VIRTIO_NET_OK;
1546 }
1547 
1548 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1549                                   const struct iovec *in_sg, unsigned in_num,
1550                                   const struct iovec *out_sg,
1551                                   unsigned out_num)
1552 {
1553     VirtIONet *n = VIRTIO_NET(vdev);
1554     struct virtio_net_ctrl_hdr ctrl;
1555     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1556     size_t s;
1557     struct iovec *iov, *iov2;
1558 
1559     if (iov_size(in_sg, in_num) < sizeof(status) ||
1560         iov_size(out_sg, out_num) < sizeof(ctrl)) {
1561         virtio_error(vdev, "virtio-net ctrl missing headers");
1562         return 0;
1563     }
1564 
1565     iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1566     s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1567     iov_discard_front(&iov, &out_num, sizeof(ctrl));
1568     if (s != sizeof(ctrl)) {
1569         status = VIRTIO_NET_ERR;
1570     } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1571         status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1572     } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1573         status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1574     } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1575         status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1576     } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1577         status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1578     } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1579         status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1580     } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1581         status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1582     }
1583 
1584     s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1585     assert(s == sizeof(status));
1586 
1587     g_free(iov2);
1588     return sizeof(status);
1589 }
1590 
1591 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1592 {
1593     VirtQueueElement *elem;
1594 
1595     for (;;) {
1596         size_t written;
1597         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1598         if (!elem) {
1599             break;
1600         }
1601 
1602         written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1603                                              elem->out_sg, elem->out_num);
1604         if (written > 0) {
1605             virtqueue_push(vq, elem, written);
1606             virtio_notify(vdev, vq);
1607             g_free(elem);
1608         } else {
1609             virtqueue_detach_element(vq, elem, 0);
1610             g_free(elem);
1611             break;
1612         }
1613     }
1614 }
1615 
1616 /* RX */
1617 
1618 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1619 {
1620     VirtIONet *n = VIRTIO_NET(vdev);
1621     int queue_index = vq2q(virtio_get_queue_index(vq));
1622 
1623     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1624 }
1625 
1626 static bool virtio_net_can_receive(NetClientState *nc)
1627 {
1628     VirtIONet *n = qemu_get_nic_opaque(nc);
1629     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1630     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1631 
1632     if (!vdev->vm_running) {
1633         return false;
1634     }
1635 
1636     if (nc->queue_index >= n->curr_queue_pairs) {
1637         return false;
1638     }
1639 
1640     if (!virtio_queue_ready(q->rx_vq) ||
1641         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1642         return false;
1643     }
1644 
1645     return true;
1646 }
1647 
1648 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1649 {
1650     int opaque;
1651     unsigned int in_bytes;
1652     VirtIONet *n = q->n;
1653 
1654     while (virtio_queue_empty(q->rx_vq) || n->mergeable_rx_bufs) {
1655         opaque = virtqueue_get_avail_bytes(q->rx_vq, &in_bytes, NULL,
1656                                            bufsize, 0);
1657         /* Buffer is enough, disable notifiaction */
1658         if (bufsize <= in_bytes) {
1659             break;
1660         }
1661 
1662         if (virtio_queue_enable_notification_and_check(q->rx_vq, opaque)) {
1663             /* Guest has added some buffers, try again */
1664             continue;
1665         } else {
1666             return 0;
1667         }
1668     }
1669 
1670     virtio_queue_set_notification(q->rx_vq, 0);
1671 
1672     return 1;
1673 }
1674 
1675 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1676 {
1677     virtio_tswap16s(vdev, &hdr->hdr_len);
1678     virtio_tswap16s(vdev, &hdr->gso_size);
1679     virtio_tswap16s(vdev, &hdr->csum_start);
1680     virtio_tswap16s(vdev, &hdr->csum_offset);
1681 }
1682 
1683 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1684  * it never finds out that the packets don't have valid checksums.  This
1685  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1686  * fix this with Xen but it hasn't appeared in an upstream release of
1687  * dhclient yet.
1688  *
1689  * To avoid breaking existing guests, we catch udp packets and add
1690  * checksums.  This is terrible but it's better than hacking the guest
1691  * kernels.
1692  *
1693  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1694  * we should provide a mechanism to disable it to avoid polluting the host
1695  * cache.
1696  */
1697 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1698                                         uint8_t *buf, size_t size)
1699 {
1700     size_t csum_size = ETH_HLEN + sizeof(struct ip_header) +
1701                        sizeof(struct udp_header);
1702 
1703     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1704         (size >= csum_size && size < 1500) && /* normal sized MTU */
1705         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1706         (buf[23] == 17) && /* ip.protocol == UDP */
1707         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1708         net_checksum_calculate(buf, size, CSUM_UDP);
1709         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1710     }
1711 }
1712 
1713 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1714                            const void *buf, size_t size)
1715 {
1716     if (n->has_vnet_hdr) {
1717         /* FIXME this cast is evil */
1718         void *wbuf = (void *)buf;
1719         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1720                                     size - n->host_hdr_len);
1721 
1722         if (n->needs_vnet_hdr_swap) {
1723             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1724         }
1725         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1726     } else {
1727         struct virtio_net_hdr hdr = {
1728             .flags = 0,
1729             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1730         };
1731         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1732     }
1733 }
1734 
1735 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1736 {
1737     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1738     static const uint8_t vlan[] = {0x81, 0x00};
1739     uint8_t *ptr = (uint8_t *)buf;
1740     int i;
1741 
1742     if (n->promisc)
1743         return 1;
1744 
1745     ptr += n->host_hdr_len;
1746 
1747     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1748         int vid = lduw_be_p(ptr + 14) & 0xfff;
1749         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1750             return 0;
1751     }
1752 
1753     if (ptr[0] & 1) { // multicast
1754         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1755             return !n->nobcast;
1756         } else if (n->nomulti) {
1757             return 0;
1758         } else if (n->allmulti || n->mac_table.multi_overflow) {
1759             return 1;
1760         }
1761 
1762         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1763             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1764                 return 1;
1765             }
1766         }
1767     } else { // unicast
1768         if (n->nouni) {
1769             return 0;
1770         } else if (n->alluni || n->mac_table.uni_overflow) {
1771             return 1;
1772         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1773             return 1;
1774         }
1775 
1776         for (i = 0; i < n->mac_table.first_multi; i++) {
1777             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1778                 return 1;
1779             }
1780         }
1781     }
1782 
1783     return 0;
1784 }
1785 
1786 static uint8_t virtio_net_get_hash_type(bool hasip4,
1787                                         bool hasip6,
1788                                         EthL4HdrProto l4hdr_proto,
1789                                         uint32_t types)
1790 {
1791     if (hasip4) {
1792         switch (l4hdr_proto) {
1793         case ETH_L4_HDR_PROTO_TCP:
1794             if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
1795                 return NetPktRssIpV4Tcp;
1796             }
1797             break;
1798 
1799         case ETH_L4_HDR_PROTO_UDP:
1800             if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
1801                 return NetPktRssIpV4Udp;
1802             }
1803             break;
1804 
1805         default:
1806             break;
1807         }
1808 
1809         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1810             return NetPktRssIpV4;
1811         }
1812     } else if (hasip6) {
1813         switch (l4hdr_proto) {
1814         case ETH_L4_HDR_PROTO_TCP:
1815             if (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) {
1816                 return NetPktRssIpV6TcpEx;
1817             }
1818             if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
1819                 return NetPktRssIpV6Tcp;
1820             }
1821             break;
1822 
1823         case ETH_L4_HDR_PROTO_UDP:
1824             if (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) {
1825                 return NetPktRssIpV6UdpEx;
1826             }
1827             if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
1828                 return NetPktRssIpV6Udp;
1829             }
1830             break;
1831 
1832         default:
1833             break;
1834         }
1835 
1836         if (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) {
1837             return NetPktRssIpV6Ex;
1838         }
1839         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
1840             return NetPktRssIpV6;
1841         }
1842     }
1843     return 0xff;
1844 }
1845 
1846 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1847                                   size_t size,
1848                                   struct virtio_net_hdr_v1_hash *hdr)
1849 {
1850     VirtIONet *n = qemu_get_nic_opaque(nc);
1851     unsigned int index = nc->queue_index, new_index = index;
1852     struct NetRxPkt *pkt = n->rx_pkt;
1853     uint8_t net_hash_type;
1854     uint32_t hash;
1855     bool hasip4, hasip6;
1856     EthL4HdrProto l4hdr_proto;
1857     static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1858         VIRTIO_NET_HASH_REPORT_IPv4,
1859         VIRTIO_NET_HASH_REPORT_TCPv4,
1860         VIRTIO_NET_HASH_REPORT_TCPv6,
1861         VIRTIO_NET_HASH_REPORT_IPv6,
1862         VIRTIO_NET_HASH_REPORT_IPv6_EX,
1863         VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1864         VIRTIO_NET_HASH_REPORT_UDPv4,
1865         VIRTIO_NET_HASH_REPORT_UDPv6,
1866         VIRTIO_NET_HASH_REPORT_UDPv6_EX
1867     };
1868     struct iovec iov = {
1869         .iov_base = (void *)buf,
1870         .iov_len = size
1871     };
1872 
1873     net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len);
1874     net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
1875     net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto,
1876                                              n->rss_data.hash_types);
1877     if (net_hash_type > NetPktRssIpV6UdpEx) {
1878         if (n->rss_data.populate_hash) {
1879             hdr->hash_value = VIRTIO_NET_HASH_REPORT_NONE;
1880             hdr->hash_report = 0;
1881         }
1882         return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1883     }
1884 
1885     hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1886 
1887     if (n->rss_data.populate_hash) {
1888         hdr->hash_value = hash;
1889         hdr->hash_report = reports[net_hash_type];
1890     }
1891 
1892     if (n->rss_data.redirect) {
1893         new_index = hash & (n->rss_data.indirections_len - 1);
1894         new_index = n->rss_data.indirections_table[new_index];
1895     }
1896 
1897     return (index == new_index) ? -1 : new_index;
1898 }
1899 
1900 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1901                                       size_t size, bool no_rss)
1902 {
1903     VirtIONet *n = qemu_get_nic_opaque(nc);
1904     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1905     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1906     VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1907     size_t lens[VIRTQUEUE_MAX_SIZE];
1908     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1909     struct virtio_net_hdr_v1_hash extra_hdr;
1910     unsigned mhdr_cnt = 0;
1911     size_t offset, i, guest_offset, j;
1912     ssize_t err;
1913 
1914     if (!virtio_net_can_receive(nc)) {
1915         return -1;
1916     }
1917 
1918     if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1919         int index = virtio_net_process_rss(nc, buf, size, &extra_hdr);
1920         if (index >= 0) {
1921             NetClientState *nc2 =
1922                 qemu_get_subqueue(n->nic, index % n->curr_queue_pairs);
1923             return virtio_net_receive_rcu(nc2, buf, size, true);
1924         }
1925     }
1926 
1927     /* hdr_len refers to the header we supply to the guest */
1928     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1929         return 0;
1930     }
1931 
1932     if (!receive_filter(n, buf, size))
1933         return size;
1934 
1935     offset = i = 0;
1936 
1937     while (offset < size) {
1938         VirtQueueElement *elem;
1939         int len, total;
1940         const struct iovec *sg;
1941 
1942         total = 0;
1943 
1944         if (i == VIRTQUEUE_MAX_SIZE) {
1945             virtio_error(vdev, "virtio-net unexpected long buffer chain");
1946             err = size;
1947             goto err;
1948         }
1949 
1950         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1951         if (!elem) {
1952             if (i) {
1953                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1954                              "i %zd mergeable %d offset %zd, size %zd, "
1955                              "guest hdr len %zd, host hdr len %zd "
1956                              "guest features 0x%" PRIx64,
1957                              i, n->mergeable_rx_bufs, offset, size,
1958                              n->guest_hdr_len, n->host_hdr_len,
1959                              vdev->guest_features);
1960             }
1961             err = -1;
1962             goto err;
1963         }
1964 
1965         if (elem->in_num < 1) {
1966             virtio_error(vdev,
1967                          "virtio-net receive queue contains no in buffers");
1968             virtqueue_detach_element(q->rx_vq, elem, 0);
1969             g_free(elem);
1970             err = -1;
1971             goto err;
1972         }
1973 
1974         sg = elem->in_sg;
1975         if (i == 0) {
1976             assert(offset == 0);
1977             if (n->mergeable_rx_bufs) {
1978                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1979                                     sg, elem->in_num,
1980                                     offsetof(typeof(extra_hdr), hdr.num_buffers),
1981                                     sizeof(extra_hdr.hdr.num_buffers));
1982             }
1983 
1984             receive_header(n, sg, elem->in_num, buf, size);
1985             if (n->rss_data.populate_hash) {
1986                 offset = offsetof(typeof(extra_hdr), hash_value);
1987                 iov_from_buf(sg, elem->in_num, offset,
1988                              (char *)&extra_hdr + offset,
1989                              sizeof(extra_hdr.hash_value) +
1990                              sizeof(extra_hdr.hash_report));
1991             }
1992             offset = n->host_hdr_len;
1993             total += n->guest_hdr_len;
1994             guest_offset = n->guest_hdr_len;
1995         } else {
1996             guest_offset = 0;
1997         }
1998 
1999         /* copy in packet.  ugh */
2000         len = iov_from_buf(sg, elem->in_num, guest_offset,
2001                            buf + offset, size - offset);
2002         total += len;
2003         offset += len;
2004         /* If buffers can't be merged, at this point we
2005          * must have consumed the complete packet.
2006          * Otherwise, drop it. */
2007         if (!n->mergeable_rx_bufs && offset < size) {
2008             virtqueue_unpop(q->rx_vq, elem, total);
2009             g_free(elem);
2010             err = size;
2011             goto err;
2012         }
2013 
2014         elems[i] = elem;
2015         lens[i] = total;
2016         i++;
2017     }
2018 
2019     if (mhdr_cnt) {
2020         virtio_stw_p(vdev, &extra_hdr.hdr.num_buffers, i);
2021         iov_from_buf(mhdr_sg, mhdr_cnt,
2022                      0,
2023                      &extra_hdr.hdr.num_buffers,
2024                      sizeof extra_hdr.hdr.num_buffers);
2025     }
2026 
2027     for (j = 0; j < i; j++) {
2028         /* signal other side */
2029         virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
2030         g_free(elems[j]);
2031     }
2032 
2033     virtqueue_flush(q->rx_vq, i);
2034     virtio_notify(vdev, q->rx_vq);
2035 
2036     return size;
2037 
2038 err:
2039     for (j = 0; j < i; j++) {
2040         virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
2041         g_free(elems[j]);
2042     }
2043 
2044     return err;
2045 }
2046 
2047 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
2048                                   size_t size)
2049 {
2050     RCU_READ_LOCK_GUARD();
2051 
2052     return virtio_net_receive_rcu(nc, buf, size, false);
2053 }
2054 
2055 /*
2056  * Accessors to read and write the IP packet data length field. This
2057  * is a potentially unaligned network-byte-order 16 bit unsigned integer
2058  * pointed to by unit->ip_len.
2059  */
2060 static uint16_t read_unit_ip_len(VirtioNetRscUnit *unit)
2061 {
2062     return lduw_be_p(unit->ip_plen);
2063 }
2064 
2065 static void write_unit_ip_len(VirtioNetRscUnit *unit, uint16_t l)
2066 {
2067     stw_be_p(unit->ip_plen, l);
2068 }
2069 
2070 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
2071                                          const uint8_t *buf,
2072                                          VirtioNetRscUnit *unit)
2073 {
2074     uint16_t ip_hdrlen;
2075     struct ip_header *ip;
2076 
2077     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
2078                               + sizeof(struct eth_header));
2079     unit->ip = (void *)ip;
2080     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
2081     unit->ip_plen = &ip->ip_len;
2082     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
2083     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2084     unit->payload = read_unit_ip_len(unit) - ip_hdrlen - unit->tcp_hdrlen;
2085 }
2086 
2087 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
2088                                          const uint8_t *buf,
2089                                          VirtioNetRscUnit *unit)
2090 {
2091     struct ip6_header *ip6;
2092 
2093     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
2094                                  + sizeof(struct eth_header));
2095     unit->ip = ip6;
2096     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2097     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
2098                                         + sizeof(struct ip6_header));
2099     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2100 
2101     /* There is a difference between payload length in ipv4 and v6,
2102        ip header is excluded in ipv6 */
2103     unit->payload = read_unit_ip_len(unit) - unit->tcp_hdrlen;
2104 }
2105 
2106 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
2107                                        VirtioNetRscSeg *seg)
2108 {
2109     int ret;
2110     struct virtio_net_hdr_v1 *h;
2111 
2112     h = (struct virtio_net_hdr_v1 *)seg->buf;
2113     h->flags = 0;
2114     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
2115 
2116     if (seg->is_coalesced) {
2117         h->rsc.segments = seg->packets;
2118         h->rsc.dup_acks = seg->dup_ack;
2119         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
2120         if (chain->proto == ETH_P_IP) {
2121             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2122         } else {
2123             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2124         }
2125     }
2126 
2127     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
2128     QTAILQ_REMOVE(&chain->buffers, seg, next);
2129     g_free(seg->buf);
2130     g_free(seg);
2131 
2132     return ret;
2133 }
2134 
2135 static void virtio_net_rsc_purge(void *opq)
2136 {
2137     VirtioNetRscSeg *seg, *rn;
2138     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
2139 
2140     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
2141         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2142             chain->stat.purge_failed++;
2143             continue;
2144         }
2145     }
2146 
2147     chain->stat.timer++;
2148     if (!QTAILQ_EMPTY(&chain->buffers)) {
2149         timer_mod(chain->drain_timer,
2150               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout);
2151     }
2152 }
2153 
2154 static void virtio_net_rsc_cleanup(VirtIONet *n)
2155 {
2156     VirtioNetRscChain *chain, *rn_chain;
2157     VirtioNetRscSeg *seg, *rn_seg;
2158 
2159     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2160         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2161             QTAILQ_REMOVE(&chain->buffers, seg, next);
2162             g_free(seg->buf);
2163             g_free(seg);
2164         }
2165 
2166         timer_free(chain->drain_timer);
2167         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2168         g_free(chain);
2169     }
2170 }
2171 
2172 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2173                                      NetClientState *nc,
2174                                      const uint8_t *buf, size_t size)
2175 {
2176     uint16_t hdr_len;
2177     VirtioNetRscSeg *seg;
2178 
2179     hdr_len = chain->n->guest_hdr_len;
2180     seg = g_new(VirtioNetRscSeg, 1);
2181     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2182         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2183     memcpy(seg->buf, buf, size);
2184     seg->size = size;
2185     seg->packets = 1;
2186     seg->dup_ack = 0;
2187     seg->is_coalesced = 0;
2188     seg->nc = nc;
2189 
2190     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2191     chain->stat.cache++;
2192 
2193     switch (chain->proto) {
2194     case ETH_P_IP:
2195         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2196         break;
2197     case ETH_P_IPV6:
2198         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2199         break;
2200     default:
2201         g_assert_not_reached();
2202     }
2203 }
2204 
2205 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2206                                          VirtioNetRscSeg *seg,
2207                                          const uint8_t *buf,
2208                                          struct tcp_header *n_tcp,
2209                                          struct tcp_header *o_tcp)
2210 {
2211     uint32_t nack, oack;
2212     uint16_t nwin, owin;
2213 
2214     nack = htonl(n_tcp->th_ack);
2215     nwin = htons(n_tcp->th_win);
2216     oack = htonl(o_tcp->th_ack);
2217     owin = htons(o_tcp->th_win);
2218 
2219     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2220         chain->stat.ack_out_of_win++;
2221         return RSC_FINAL;
2222     } else if (nack == oack) {
2223         /* duplicated ack or window probe */
2224         if (nwin == owin) {
2225             /* duplicated ack, add dup ack count due to whql test up to 1 */
2226             chain->stat.dup_ack++;
2227             return RSC_FINAL;
2228         } else {
2229             /* Coalesce window update */
2230             o_tcp->th_win = n_tcp->th_win;
2231             chain->stat.win_update++;
2232             return RSC_COALESCE;
2233         }
2234     } else {
2235         /* pure ack, go to 'C', finalize*/
2236         chain->stat.pure_ack++;
2237         return RSC_FINAL;
2238     }
2239 }
2240 
2241 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2242                                             VirtioNetRscSeg *seg,
2243                                             const uint8_t *buf,
2244                                             VirtioNetRscUnit *n_unit)
2245 {
2246     void *data;
2247     uint16_t o_ip_len;
2248     uint32_t nseq, oseq;
2249     VirtioNetRscUnit *o_unit;
2250 
2251     o_unit = &seg->unit;
2252     o_ip_len = read_unit_ip_len(o_unit);
2253     nseq = htonl(n_unit->tcp->th_seq);
2254     oseq = htonl(o_unit->tcp->th_seq);
2255 
2256     /* out of order or retransmitted. */
2257     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2258         chain->stat.data_out_of_win++;
2259         return RSC_FINAL;
2260     }
2261 
2262     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2263     if (nseq == oseq) {
2264         if ((o_unit->payload == 0) && n_unit->payload) {
2265             /* From no payload to payload, normal case, not a dup ack or etc */
2266             chain->stat.data_after_pure_ack++;
2267             goto coalesce;
2268         } else {
2269             return virtio_net_rsc_handle_ack(chain, seg, buf,
2270                                              n_unit->tcp, o_unit->tcp);
2271         }
2272     } else if ((nseq - oseq) != o_unit->payload) {
2273         /* Not a consistent packet, out of order */
2274         chain->stat.data_out_of_order++;
2275         return RSC_FINAL;
2276     } else {
2277 coalesce:
2278         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2279             chain->stat.over_size++;
2280             return RSC_FINAL;
2281         }
2282 
2283         /* Here comes the right data, the payload length in v4/v6 is different,
2284            so use the field value to update and record the new data len */
2285         o_unit->payload += n_unit->payload; /* update new data len */
2286 
2287         /* update field in ip header */
2288         write_unit_ip_len(o_unit, o_ip_len + n_unit->payload);
2289 
2290         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2291            for windows guest, while this may change the behavior for linux
2292            guest (only if it uses RSC feature). */
2293         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2294 
2295         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2296         o_unit->tcp->th_win = n_unit->tcp->th_win;
2297 
2298         memmove(seg->buf + seg->size, data, n_unit->payload);
2299         seg->size += n_unit->payload;
2300         seg->packets++;
2301         chain->stat.coalesced++;
2302         return RSC_COALESCE;
2303     }
2304 }
2305 
2306 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2307                                         VirtioNetRscSeg *seg,
2308                                         const uint8_t *buf, size_t size,
2309                                         VirtioNetRscUnit *unit)
2310 {
2311     struct ip_header *ip1, *ip2;
2312 
2313     ip1 = (struct ip_header *)(unit->ip);
2314     ip2 = (struct ip_header *)(seg->unit.ip);
2315     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2316         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2317         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2318         chain->stat.no_match++;
2319         return RSC_NO_MATCH;
2320     }
2321 
2322     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2323 }
2324 
2325 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2326                                         VirtioNetRscSeg *seg,
2327                                         const uint8_t *buf, size_t size,
2328                                         VirtioNetRscUnit *unit)
2329 {
2330     struct ip6_header *ip1, *ip2;
2331 
2332     ip1 = (struct ip6_header *)(unit->ip);
2333     ip2 = (struct ip6_header *)(seg->unit.ip);
2334     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2335         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2336         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2337         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2338             chain->stat.no_match++;
2339             return RSC_NO_MATCH;
2340     }
2341 
2342     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2343 }
2344 
2345 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2346  * to prevent out of order */
2347 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2348                                          struct tcp_header *tcp)
2349 {
2350     uint16_t tcp_hdr;
2351     uint16_t tcp_flag;
2352 
2353     tcp_flag = htons(tcp->th_offset_flags);
2354     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2355     tcp_flag &= VIRTIO_NET_TCP_FLAG;
2356     if (tcp_flag & TH_SYN) {
2357         chain->stat.tcp_syn++;
2358         return RSC_BYPASS;
2359     }
2360 
2361     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2362         chain->stat.tcp_ctrl_drain++;
2363         return RSC_FINAL;
2364     }
2365 
2366     if (tcp_hdr > sizeof(struct tcp_header)) {
2367         chain->stat.tcp_all_opt++;
2368         return RSC_FINAL;
2369     }
2370 
2371     return RSC_CANDIDATE;
2372 }
2373 
2374 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2375                                          NetClientState *nc,
2376                                          const uint8_t *buf, size_t size,
2377                                          VirtioNetRscUnit *unit)
2378 {
2379     int ret;
2380     VirtioNetRscSeg *seg, *nseg;
2381 
2382     if (QTAILQ_EMPTY(&chain->buffers)) {
2383         chain->stat.empty_cache++;
2384         virtio_net_rsc_cache_buf(chain, nc, buf, size);
2385         timer_mod(chain->drain_timer,
2386               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout);
2387         return size;
2388     }
2389 
2390     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2391         if (chain->proto == ETH_P_IP) {
2392             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2393         } else {
2394             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2395         }
2396 
2397         if (ret == RSC_FINAL) {
2398             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2399                 /* Send failed */
2400                 chain->stat.final_failed++;
2401                 return 0;
2402             }
2403 
2404             /* Send current packet */
2405             return virtio_net_do_receive(nc, buf, size);
2406         } else if (ret == RSC_NO_MATCH) {
2407             continue;
2408         } else {
2409             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2410             seg->is_coalesced = 1;
2411             return size;
2412         }
2413     }
2414 
2415     chain->stat.no_match_cache++;
2416     virtio_net_rsc_cache_buf(chain, nc, buf, size);
2417     return size;
2418 }
2419 
2420 /* Drain a connection data, this is to avoid out of order segments */
2421 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2422                                         NetClientState *nc,
2423                                         const uint8_t *buf, size_t size,
2424                                         uint16_t ip_start, uint16_t ip_size,
2425                                         uint16_t tcp_port)
2426 {
2427     VirtioNetRscSeg *seg, *nseg;
2428     uint32_t ppair1, ppair2;
2429 
2430     ppair1 = *(uint32_t *)(buf + tcp_port);
2431     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2432         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2433         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2434             || (ppair1 != ppair2)) {
2435             continue;
2436         }
2437         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2438             chain->stat.drain_failed++;
2439         }
2440 
2441         break;
2442     }
2443 
2444     return virtio_net_do_receive(nc, buf, size);
2445 }
2446 
2447 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2448                                             struct ip_header *ip,
2449                                             const uint8_t *buf, size_t size)
2450 {
2451     uint16_t ip_len;
2452 
2453     /* Not an ipv4 packet */
2454     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2455         chain->stat.ip_option++;
2456         return RSC_BYPASS;
2457     }
2458 
2459     /* Don't handle packets with ip option */
2460     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2461         chain->stat.ip_option++;
2462         return RSC_BYPASS;
2463     }
2464 
2465     if (ip->ip_p != IPPROTO_TCP) {
2466         chain->stat.bypass_not_tcp++;
2467         return RSC_BYPASS;
2468     }
2469 
2470     /* Don't handle packets with ip fragment */
2471     if (!(htons(ip->ip_off) & IP_DF)) {
2472         chain->stat.ip_frag++;
2473         return RSC_BYPASS;
2474     }
2475 
2476     /* Don't handle packets with ecn flag */
2477     if (IPTOS_ECN(ip->ip_tos)) {
2478         chain->stat.ip_ecn++;
2479         return RSC_BYPASS;
2480     }
2481 
2482     ip_len = htons(ip->ip_len);
2483     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2484         || ip_len > (size - chain->n->guest_hdr_len -
2485                      sizeof(struct eth_header))) {
2486         chain->stat.ip_hacked++;
2487         return RSC_BYPASS;
2488     }
2489 
2490     return RSC_CANDIDATE;
2491 }
2492 
2493 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2494                                       NetClientState *nc,
2495                                       const uint8_t *buf, size_t size)
2496 {
2497     int32_t ret;
2498     uint16_t hdr_len;
2499     VirtioNetRscUnit unit;
2500 
2501     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2502 
2503     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2504         + sizeof(struct tcp_header))) {
2505         chain->stat.bypass_not_tcp++;
2506         return virtio_net_do_receive(nc, buf, size);
2507     }
2508 
2509     virtio_net_rsc_extract_unit4(chain, buf, &unit);
2510     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2511         != RSC_CANDIDATE) {
2512         return virtio_net_do_receive(nc, buf, size);
2513     }
2514 
2515     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2516     if (ret == RSC_BYPASS) {
2517         return virtio_net_do_receive(nc, buf, size);
2518     } else if (ret == RSC_FINAL) {
2519         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2520                 ((hdr_len + sizeof(struct eth_header)) + 12),
2521                 VIRTIO_NET_IP4_ADDR_SIZE,
2522                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2523     }
2524 
2525     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2526 }
2527 
2528 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2529                                             struct ip6_header *ip6,
2530                                             const uint8_t *buf, size_t size)
2531 {
2532     uint16_t ip_len;
2533 
2534     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2535         != IP_HEADER_VERSION_6) {
2536         return RSC_BYPASS;
2537     }
2538 
2539     /* Both option and protocol is checked in this */
2540     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2541         chain->stat.bypass_not_tcp++;
2542         return RSC_BYPASS;
2543     }
2544 
2545     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2546     if (ip_len < sizeof(struct tcp_header) ||
2547         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2548                   - sizeof(struct ip6_header))) {
2549         chain->stat.ip_hacked++;
2550         return RSC_BYPASS;
2551     }
2552 
2553     /* Don't handle packets with ecn flag */
2554     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2555         chain->stat.ip_ecn++;
2556         return RSC_BYPASS;
2557     }
2558 
2559     return RSC_CANDIDATE;
2560 }
2561 
2562 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2563                                       const uint8_t *buf, size_t size)
2564 {
2565     int32_t ret;
2566     uint16_t hdr_len;
2567     VirtioNetRscChain *chain;
2568     VirtioNetRscUnit unit;
2569 
2570     chain = opq;
2571     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2572 
2573     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2574         + sizeof(tcp_header))) {
2575         return virtio_net_do_receive(nc, buf, size);
2576     }
2577 
2578     virtio_net_rsc_extract_unit6(chain, buf, &unit);
2579     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2580                                                  unit.ip, buf, size)) {
2581         return virtio_net_do_receive(nc, buf, size);
2582     }
2583 
2584     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2585     if (ret == RSC_BYPASS) {
2586         return virtio_net_do_receive(nc, buf, size);
2587     } else if (ret == RSC_FINAL) {
2588         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2589                 ((hdr_len + sizeof(struct eth_header)) + 8),
2590                 VIRTIO_NET_IP6_ADDR_SIZE,
2591                 hdr_len + sizeof(struct eth_header)
2592                 + sizeof(struct ip6_header));
2593     }
2594 
2595     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2596 }
2597 
2598 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2599                                                       NetClientState *nc,
2600                                                       uint16_t proto)
2601 {
2602     VirtioNetRscChain *chain;
2603 
2604     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2605         return NULL;
2606     }
2607 
2608     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2609         if (chain->proto == proto) {
2610             return chain;
2611         }
2612     }
2613 
2614     chain = g_malloc(sizeof(*chain));
2615     chain->n = n;
2616     chain->proto = proto;
2617     if (proto == (uint16_t)ETH_P_IP) {
2618         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2619         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2620     } else {
2621         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2622         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2623     }
2624     chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2625                                       virtio_net_rsc_purge, chain);
2626     memset(&chain->stat, 0, sizeof(chain->stat));
2627 
2628     QTAILQ_INIT(&chain->buffers);
2629     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2630 
2631     return chain;
2632 }
2633 
2634 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2635                                       const uint8_t *buf,
2636                                       size_t size)
2637 {
2638     uint16_t proto;
2639     VirtioNetRscChain *chain;
2640     struct eth_header *eth;
2641     VirtIONet *n;
2642 
2643     n = qemu_get_nic_opaque(nc);
2644     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2645         return virtio_net_do_receive(nc, buf, size);
2646     }
2647 
2648     eth = (struct eth_header *)(buf + n->guest_hdr_len);
2649     proto = htons(eth->h_proto);
2650 
2651     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2652     if (chain) {
2653         chain->stat.received++;
2654         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2655             return virtio_net_rsc_receive4(chain, nc, buf, size);
2656         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2657             return virtio_net_rsc_receive6(chain, nc, buf, size);
2658         }
2659     }
2660     return virtio_net_do_receive(nc, buf, size);
2661 }
2662 
2663 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2664                                   size_t size)
2665 {
2666     VirtIONet *n = qemu_get_nic_opaque(nc);
2667     if ((n->rsc4_enabled || n->rsc6_enabled)) {
2668         return virtio_net_rsc_receive(nc, buf, size);
2669     } else {
2670         return virtio_net_do_receive(nc, buf, size);
2671     }
2672 }
2673 
2674 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2675 
2676 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2677 {
2678     VirtIONet *n = qemu_get_nic_opaque(nc);
2679     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2680     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2681     int ret;
2682 
2683     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2684     virtio_notify(vdev, q->tx_vq);
2685 
2686     g_free(q->async_tx.elem);
2687     q->async_tx.elem = NULL;
2688 
2689     virtio_queue_set_notification(q->tx_vq, 1);
2690     ret = virtio_net_flush_tx(q);
2691     if (ret >= n->tx_burst) {
2692         /*
2693          * the flush has been stopped by tx_burst
2694          * we will not receive notification for the
2695          * remainining part, so re-schedule
2696          */
2697         virtio_queue_set_notification(q->tx_vq, 0);
2698         if (q->tx_bh) {
2699             replay_bh_schedule_event(q->tx_bh);
2700         } else {
2701             timer_mod(q->tx_timer,
2702                       qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2703         }
2704         q->tx_waiting = 1;
2705     }
2706 }
2707 
2708 /* TX */
2709 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2710 {
2711     VirtIONet *n = q->n;
2712     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2713     VirtQueueElement *elem;
2714     int32_t num_packets = 0;
2715     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2716     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2717         return num_packets;
2718     }
2719 
2720     if (q->async_tx.elem) {
2721         virtio_queue_set_notification(q->tx_vq, 0);
2722         return num_packets;
2723     }
2724 
2725     for (;;) {
2726         ssize_t ret;
2727         unsigned int out_num;
2728         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2729         struct virtio_net_hdr vhdr;
2730 
2731         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2732         if (!elem) {
2733             break;
2734         }
2735 
2736         out_num = elem->out_num;
2737         out_sg = elem->out_sg;
2738         if (out_num < 1) {
2739             virtio_error(vdev, "virtio-net header not in first element");
2740             goto detach;
2741         }
2742 
2743         if (n->needs_vnet_hdr_swap) {
2744             if (iov_to_buf(out_sg, out_num, 0, &vhdr, sizeof(vhdr)) <
2745                 sizeof(vhdr)) {
2746                 virtio_error(vdev, "virtio-net header incorrect");
2747                 goto detach;
2748             }
2749             virtio_net_hdr_swap(vdev, &vhdr);
2750             sg2[0].iov_base = &vhdr;
2751             sg2[0].iov_len = sizeof(vhdr);
2752             out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, out_sg, out_num,
2753                                sizeof(vhdr), -1);
2754             if (out_num == VIRTQUEUE_MAX_SIZE) {
2755                 goto drop;
2756             }
2757             out_num += 1;
2758             out_sg = sg2;
2759         }
2760         /*
2761          * If host wants to see the guest header as is, we can
2762          * pass it on unchanged. Otherwise, copy just the parts
2763          * that host is interested in.
2764          */
2765         assert(n->host_hdr_len <= n->guest_hdr_len);
2766         if (n->host_hdr_len != n->guest_hdr_len) {
2767             if (iov_size(out_sg, out_num) < n->guest_hdr_len) {
2768                 virtio_error(vdev, "virtio-net header is invalid");
2769                 goto detach;
2770             }
2771             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2772                                        out_sg, out_num,
2773                                        0, n->host_hdr_len);
2774             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2775                              out_sg, out_num,
2776                              n->guest_hdr_len, -1);
2777             out_num = sg_num;
2778             out_sg = sg;
2779 
2780             if (out_num < 1) {
2781                 virtio_error(vdev, "virtio-net nothing to send");
2782                 goto detach;
2783             }
2784         }
2785 
2786         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2787                                       out_sg, out_num, virtio_net_tx_complete);
2788         if (ret == 0) {
2789             virtio_queue_set_notification(q->tx_vq, 0);
2790             q->async_tx.elem = elem;
2791             return -EBUSY;
2792         }
2793 
2794 drop:
2795         virtqueue_push(q->tx_vq, elem, 0);
2796         virtio_notify(vdev, q->tx_vq);
2797         g_free(elem);
2798 
2799         if (++num_packets >= n->tx_burst) {
2800             break;
2801         }
2802     }
2803     return num_packets;
2804 
2805 detach:
2806     virtqueue_detach_element(q->tx_vq, elem, 0);
2807     g_free(elem);
2808     return -EINVAL;
2809 }
2810 
2811 static void virtio_net_tx_timer(void *opaque);
2812 
2813 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2814 {
2815     VirtIONet *n = VIRTIO_NET(vdev);
2816     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2817 
2818     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2819         virtio_net_drop_tx_queue_data(vdev, vq);
2820         return;
2821     }
2822 
2823     /* This happens when device was stopped but VCPU wasn't. */
2824     if (!vdev->vm_running) {
2825         q->tx_waiting = 1;
2826         return;
2827     }
2828 
2829     if (q->tx_waiting) {
2830         /* We already have queued packets, immediately flush */
2831         timer_del(q->tx_timer);
2832         virtio_net_tx_timer(q);
2833     } else {
2834         /* re-arm timer to flush it (and more) on next tick */
2835         timer_mod(q->tx_timer,
2836                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2837         q->tx_waiting = 1;
2838         virtio_queue_set_notification(vq, 0);
2839     }
2840 }
2841 
2842 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2843 {
2844     VirtIONet *n = VIRTIO_NET(vdev);
2845     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2846 
2847     if (unlikely(n->vhost_started)) {
2848         return;
2849     }
2850 
2851     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2852         virtio_net_drop_tx_queue_data(vdev, vq);
2853         return;
2854     }
2855 
2856     if (unlikely(q->tx_waiting)) {
2857         return;
2858     }
2859     q->tx_waiting = 1;
2860     /* This happens when device was stopped but VCPU wasn't. */
2861     if (!vdev->vm_running) {
2862         return;
2863     }
2864     virtio_queue_set_notification(vq, 0);
2865     replay_bh_schedule_event(q->tx_bh);
2866 }
2867 
2868 static void virtio_net_tx_timer(void *opaque)
2869 {
2870     VirtIONetQueue *q = opaque;
2871     VirtIONet *n = q->n;
2872     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2873     int ret;
2874 
2875     /* This happens when device was stopped but BH wasn't. */
2876     if (!vdev->vm_running) {
2877         /* Make sure tx waiting is set, so we'll run when restarted. */
2878         assert(q->tx_waiting);
2879         return;
2880     }
2881 
2882     q->tx_waiting = 0;
2883 
2884     /* Just in case the driver is not ready on more */
2885     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2886         return;
2887     }
2888 
2889     ret = virtio_net_flush_tx(q);
2890     if (ret == -EBUSY || ret == -EINVAL) {
2891         return;
2892     }
2893     /*
2894      * If we flush a full burst of packets, assume there are
2895      * more coming and immediately rearm
2896      */
2897     if (ret >= n->tx_burst) {
2898         q->tx_waiting = 1;
2899         timer_mod(q->tx_timer,
2900                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2901         return;
2902     }
2903     /*
2904      * If less than a full burst, re-enable notification and flush
2905      * anything that may have come in while we weren't looking.  If
2906      * we find something, assume the guest is still active and rearm
2907      */
2908     virtio_queue_set_notification(q->tx_vq, 1);
2909     ret = virtio_net_flush_tx(q);
2910     if (ret > 0) {
2911         virtio_queue_set_notification(q->tx_vq, 0);
2912         q->tx_waiting = 1;
2913         timer_mod(q->tx_timer,
2914                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2915     }
2916 }
2917 
2918 static void virtio_net_tx_bh(void *opaque)
2919 {
2920     VirtIONetQueue *q = opaque;
2921     VirtIONet *n = q->n;
2922     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2923     int32_t ret;
2924 
2925     /* This happens when device was stopped but BH wasn't. */
2926     if (!vdev->vm_running) {
2927         /* Make sure tx waiting is set, so we'll run when restarted. */
2928         assert(q->tx_waiting);
2929         return;
2930     }
2931 
2932     q->tx_waiting = 0;
2933 
2934     /* Just in case the driver is not ready on more */
2935     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2936         return;
2937     }
2938 
2939     ret = virtio_net_flush_tx(q);
2940     if (ret == -EBUSY || ret == -EINVAL) {
2941         return; /* Notification re-enable handled by tx_complete or device
2942                  * broken */
2943     }
2944 
2945     /* If we flush a full burst of packets, assume there are
2946      * more coming and immediately reschedule */
2947     if (ret >= n->tx_burst) {
2948         replay_bh_schedule_event(q->tx_bh);
2949         q->tx_waiting = 1;
2950         return;
2951     }
2952 
2953     /* If less than a full burst, re-enable notification and flush
2954      * anything that may have come in while we weren't looking.  If
2955      * we find something, assume the guest is still active and reschedule */
2956     virtio_queue_set_notification(q->tx_vq, 1);
2957     ret = virtio_net_flush_tx(q);
2958     if (ret == -EINVAL) {
2959         return;
2960     } else if (ret > 0) {
2961         virtio_queue_set_notification(q->tx_vq, 0);
2962         replay_bh_schedule_event(q->tx_bh);
2963         q->tx_waiting = 1;
2964     }
2965 }
2966 
2967 static void virtio_net_add_queue(VirtIONet *n, int index)
2968 {
2969     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2970 
2971     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2972                                            virtio_net_handle_rx);
2973 
2974     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2975         n->vqs[index].tx_vq =
2976             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2977                              virtio_net_handle_tx_timer);
2978         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2979                                               virtio_net_tx_timer,
2980                                               &n->vqs[index]);
2981     } else {
2982         n->vqs[index].tx_vq =
2983             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2984                              virtio_net_handle_tx_bh);
2985         n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
2986                                                   &DEVICE(vdev)->mem_reentrancy_guard);
2987     }
2988 
2989     n->vqs[index].tx_waiting = 0;
2990     n->vqs[index].n = n;
2991 }
2992 
2993 static void virtio_net_del_queue(VirtIONet *n, int index)
2994 {
2995     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2996     VirtIONetQueue *q = &n->vqs[index];
2997     NetClientState *nc = qemu_get_subqueue(n->nic, index);
2998 
2999     qemu_purge_queued_packets(nc);
3000 
3001     virtio_del_queue(vdev, index * 2);
3002     if (q->tx_timer) {
3003         timer_free(q->tx_timer);
3004         q->tx_timer = NULL;
3005     } else {
3006         qemu_bh_delete(q->tx_bh);
3007         q->tx_bh = NULL;
3008     }
3009     q->tx_waiting = 0;
3010     virtio_del_queue(vdev, index * 2 + 1);
3011 }
3012 
3013 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
3014 {
3015     VirtIODevice *vdev = VIRTIO_DEVICE(n);
3016     int old_num_queues = virtio_get_num_queues(vdev);
3017     int new_num_queues = new_max_queue_pairs * 2 + 1;
3018     int i;
3019 
3020     assert(old_num_queues >= 3);
3021     assert(old_num_queues % 2 == 1);
3022 
3023     if (old_num_queues == new_num_queues) {
3024         return;
3025     }
3026 
3027     /*
3028      * We always need to remove and add ctrl vq if
3029      * old_num_queues != new_num_queues. Remove ctrl_vq first,
3030      * and then we only enter one of the following two loops.
3031      */
3032     virtio_del_queue(vdev, old_num_queues - 1);
3033 
3034     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
3035         /* new_num_queues < old_num_queues */
3036         virtio_net_del_queue(n, i / 2);
3037     }
3038 
3039     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
3040         /* new_num_queues > old_num_queues */
3041         virtio_net_add_queue(n, i / 2);
3042     }
3043 
3044     /* add ctrl_vq last */
3045     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3046 }
3047 
3048 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
3049 {
3050     int max = multiqueue ? n->max_queue_pairs : 1;
3051 
3052     n->multiqueue = multiqueue;
3053     virtio_net_change_num_queue_pairs(n, max);
3054 
3055     virtio_net_set_queue_pairs(n);
3056 }
3057 
3058 static int virtio_net_post_load_device(void *opaque, int version_id)
3059 {
3060     VirtIONet *n = opaque;
3061     VirtIODevice *vdev = VIRTIO_DEVICE(n);
3062     int i, link_down;
3063 
3064     trace_virtio_net_post_load_device();
3065     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
3066                                virtio_vdev_has_feature(vdev,
3067                                                        VIRTIO_F_VERSION_1),
3068                                virtio_vdev_has_feature(vdev,
3069                                                        VIRTIO_NET_F_HASH_REPORT));
3070 
3071     /* MAC_TABLE_ENTRIES may be different from the saved image */
3072     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
3073         n->mac_table.in_use = 0;
3074     }
3075 
3076     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
3077         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
3078     }
3079 
3080     /*
3081      * curr_guest_offloads will be later overwritten by the
3082      * virtio_set_features_nocheck call done from the virtio_load.
3083      * Here we make sure it is preserved and restored accordingly
3084      * in the virtio_net_post_load_virtio callback.
3085      */
3086     n->saved_guest_offloads = n->curr_guest_offloads;
3087 
3088     virtio_net_set_queue_pairs(n);
3089 
3090     /* Find the first multicast entry in the saved MAC filter */
3091     for (i = 0; i < n->mac_table.in_use; i++) {
3092         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
3093             break;
3094         }
3095     }
3096     n->mac_table.first_multi = i;
3097 
3098     /* nc.link_down can't be migrated, so infer link_down according
3099      * to link status bit in n->status */
3100     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
3101     for (i = 0; i < n->max_queue_pairs; i++) {
3102         qemu_get_subqueue(n->nic, i)->link_down = link_down;
3103     }
3104 
3105     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
3106         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3107         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3108                                   QEMU_CLOCK_VIRTUAL,
3109                                   virtio_net_announce_timer, n);
3110         if (n->announce_timer.round) {
3111             timer_mod(n->announce_timer.tm,
3112                       qemu_clock_get_ms(n->announce_timer.type));
3113         } else {
3114             qemu_announce_timer_del(&n->announce_timer, false);
3115         }
3116     }
3117 
3118     virtio_net_commit_rss_config(n);
3119     return 0;
3120 }
3121 
3122 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
3123 {
3124     VirtIONet *n = VIRTIO_NET(vdev);
3125     /*
3126      * The actual needed state is now in saved_guest_offloads,
3127      * see virtio_net_post_load_device for detail.
3128      * Restore it back and apply the desired offloads.
3129      */
3130     n->curr_guest_offloads = n->saved_guest_offloads;
3131     if (peer_has_vnet_hdr(n)) {
3132         virtio_net_apply_guest_offloads(n);
3133     }
3134 
3135     return 0;
3136 }
3137 
3138 /* tx_waiting field of a VirtIONetQueue */
3139 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
3140     .name = "virtio-net-queue-tx_waiting",
3141     .fields = (const VMStateField[]) {
3142         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
3143         VMSTATE_END_OF_LIST()
3144    },
3145 };
3146 
3147 static bool max_queue_pairs_gt_1(void *opaque, int version_id)
3148 {
3149     return VIRTIO_NET(opaque)->max_queue_pairs > 1;
3150 }
3151 
3152 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
3153 {
3154     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
3155                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3156 }
3157 
3158 static bool mac_table_fits(void *opaque, int version_id)
3159 {
3160     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
3161 }
3162 
3163 static bool mac_table_doesnt_fit(void *opaque, int version_id)
3164 {
3165     return !mac_table_fits(opaque, version_id);
3166 }
3167 
3168 /* This temporary type is shared by all the WITH_TMP methods
3169  * although only some fields are used by each.
3170  */
3171 struct VirtIONetMigTmp {
3172     VirtIONet      *parent;
3173     VirtIONetQueue *vqs_1;
3174     uint16_t        curr_queue_pairs_1;
3175     uint8_t         has_ufo;
3176     uint32_t        has_vnet_hdr;
3177 };
3178 
3179 /* The 2nd and subsequent tx_waiting flags are loaded later than
3180  * the 1st entry in the queue_pairs and only if there's more than one
3181  * entry.  We use the tmp mechanism to calculate a temporary
3182  * pointer and count and also validate the count.
3183  */
3184 
3185 static int virtio_net_tx_waiting_pre_save(void *opaque)
3186 {
3187     struct VirtIONetMigTmp *tmp = opaque;
3188 
3189     tmp->vqs_1 = tmp->parent->vqs + 1;
3190     tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3191     if (tmp->parent->curr_queue_pairs == 0) {
3192         tmp->curr_queue_pairs_1 = 0;
3193     }
3194 
3195     return 0;
3196 }
3197 
3198 static int virtio_net_tx_waiting_pre_load(void *opaque)
3199 {
3200     struct VirtIONetMigTmp *tmp = opaque;
3201 
3202     /* Reuse the pointer setup from save */
3203     virtio_net_tx_waiting_pre_save(opaque);
3204 
3205     if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3206         error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3207             tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
3208 
3209         return -EINVAL;
3210     }
3211 
3212     return 0; /* all good */
3213 }
3214 
3215 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3216     .name      = "virtio-net-tx_waiting",
3217     .pre_load  = virtio_net_tx_waiting_pre_load,
3218     .pre_save  = virtio_net_tx_waiting_pre_save,
3219     .fields    = (const VMStateField[]) {
3220         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3221                                      curr_queue_pairs_1,
3222                                      vmstate_virtio_net_queue_tx_waiting,
3223                                      struct VirtIONetQueue),
3224         VMSTATE_END_OF_LIST()
3225     },
3226 };
3227 
3228 /* the 'has_ufo' flag is just tested; if the incoming stream has the
3229  * flag set we need to check that we have it
3230  */
3231 static int virtio_net_ufo_post_load(void *opaque, int version_id)
3232 {
3233     struct VirtIONetMigTmp *tmp = opaque;
3234 
3235     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3236         error_report("virtio-net: saved image requires TUN_F_UFO support");
3237         return -EINVAL;
3238     }
3239 
3240     return 0;
3241 }
3242 
3243 static int virtio_net_ufo_pre_save(void *opaque)
3244 {
3245     struct VirtIONetMigTmp *tmp = opaque;
3246 
3247     tmp->has_ufo = tmp->parent->has_ufo;
3248 
3249     return 0;
3250 }
3251 
3252 static const VMStateDescription vmstate_virtio_net_has_ufo = {
3253     .name      = "virtio-net-ufo",
3254     .post_load = virtio_net_ufo_post_load,
3255     .pre_save  = virtio_net_ufo_pre_save,
3256     .fields    = (const VMStateField[]) {
3257         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3258         VMSTATE_END_OF_LIST()
3259     },
3260 };
3261 
3262 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3263  * flag set we need to check that we have it
3264  */
3265 static int virtio_net_vnet_post_load(void *opaque, int version_id)
3266 {
3267     struct VirtIONetMigTmp *tmp = opaque;
3268 
3269     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3270         error_report("virtio-net: saved image requires vnet_hdr=on");
3271         return -EINVAL;
3272     }
3273 
3274     return 0;
3275 }
3276 
3277 static int virtio_net_vnet_pre_save(void *opaque)
3278 {
3279     struct VirtIONetMigTmp *tmp = opaque;
3280 
3281     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3282 
3283     return 0;
3284 }
3285 
3286 static const VMStateDescription vmstate_virtio_net_has_vnet = {
3287     .name      = "virtio-net-vnet",
3288     .post_load = virtio_net_vnet_post_load,
3289     .pre_save  = virtio_net_vnet_pre_save,
3290     .fields    = (const VMStateField[]) {
3291         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3292         VMSTATE_END_OF_LIST()
3293     },
3294 };
3295 
3296 static bool virtio_net_rss_needed(void *opaque)
3297 {
3298     return VIRTIO_NET(opaque)->rss_data.enabled;
3299 }
3300 
3301 static const VMStateDescription vmstate_virtio_net_rss = {
3302     .name      = "virtio-net-device/rss",
3303     .version_id = 1,
3304     .minimum_version_id = 1,
3305     .needed = virtio_net_rss_needed,
3306     .fields = (const VMStateField[]) {
3307         VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3308         VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3309         VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3310         VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3311         VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3312         VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3313         VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3314                             VIRTIO_NET_RSS_MAX_KEY_SIZE),
3315         VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3316                                     rss_data.indirections_len, 0,
3317                                     vmstate_info_uint16, uint16_t),
3318         VMSTATE_END_OF_LIST()
3319     },
3320 };
3321 
3322 static const VMStateDescription vmstate_virtio_net_device = {
3323     .name = "virtio-net-device",
3324     .version_id = VIRTIO_NET_VM_VERSION,
3325     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3326     .post_load = virtio_net_post_load_device,
3327     .fields = (const VMStateField[]) {
3328         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3329         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3330                                vmstate_virtio_net_queue_tx_waiting,
3331                                VirtIONetQueue),
3332         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3333         VMSTATE_UINT16(status, VirtIONet),
3334         VMSTATE_UINT8(promisc, VirtIONet),
3335         VMSTATE_UINT8(allmulti, VirtIONet),
3336         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3337 
3338         /* Guarded pair: If it fits we load it, else we throw it away
3339          * - can happen if source has a larger MAC table.; post-load
3340          *  sets flags in this case.
3341          */
3342         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3343                                 0, mac_table_fits, mac_table.in_use,
3344                                  ETH_ALEN),
3345         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3346                                      mac_table.in_use, ETH_ALEN),
3347 
3348         /* Note: This is an array of uint32's that's always been saved as a
3349          * buffer; hold onto your endiannesses; it's actually used as a bitmap
3350          * but based on the uint.
3351          */
3352         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3353         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3354                          vmstate_virtio_net_has_vnet),
3355         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3356         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3357         VMSTATE_UINT8(alluni, VirtIONet),
3358         VMSTATE_UINT8(nomulti, VirtIONet),
3359         VMSTATE_UINT8(nouni, VirtIONet),
3360         VMSTATE_UINT8(nobcast, VirtIONet),
3361         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3362                          vmstate_virtio_net_has_ufo),
3363         VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3364                             vmstate_info_uint16_equal, uint16_t),
3365         VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3366         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3367                          vmstate_virtio_net_tx_waiting),
3368         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3369                             has_ctrl_guest_offloads),
3370         VMSTATE_END_OF_LIST()
3371     },
3372     .subsections = (const VMStateDescription * const []) {
3373         &vmstate_virtio_net_rss,
3374         NULL
3375     }
3376 };
3377 
3378 static NetClientInfo net_virtio_info = {
3379     .type = NET_CLIENT_DRIVER_NIC,
3380     .size = sizeof(NICState),
3381     .can_receive = virtio_net_can_receive,
3382     .receive = virtio_net_receive,
3383     .link_status_changed = virtio_net_set_link_status,
3384     .query_rx_filter = virtio_net_query_rxfilter,
3385     .announce = virtio_net_announce,
3386 };
3387 
3388 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3389 {
3390     VirtIONet *n = VIRTIO_NET(vdev);
3391     NetClientState *nc;
3392     assert(n->vhost_started);
3393     if (!n->multiqueue && idx == 2) {
3394         /* Must guard against invalid features and bogus queue index
3395          * from being set by malicious guest, or penetrated through
3396          * buggy migration stream.
3397          */
3398         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3399             qemu_log_mask(LOG_GUEST_ERROR,
3400                           "%s: bogus vq index ignored\n", __func__);
3401             return false;
3402         }
3403         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3404     } else {
3405         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3406     }
3407     /*
3408      * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3409      * as the macro of configure interrupt's IDX, If this driver does not
3410      * support, the function will return false
3411      */
3412 
3413     if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3414         return vhost_net_config_pending(get_vhost_net(nc->peer));
3415     }
3416     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3417 }
3418 
3419 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3420                                            bool mask)
3421 {
3422     VirtIONet *n = VIRTIO_NET(vdev);
3423     NetClientState *nc;
3424     assert(n->vhost_started);
3425     if (!n->multiqueue && idx == 2) {
3426         /* Must guard against invalid features and bogus queue index
3427          * from being set by malicious guest, or penetrated through
3428          * buggy migration stream.
3429          */
3430         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3431             qemu_log_mask(LOG_GUEST_ERROR,
3432                           "%s: bogus vq index ignored\n", __func__);
3433             return;
3434         }
3435         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3436     } else {
3437         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3438     }
3439     /*
3440      *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3441      * as the macro of configure interrupt's IDX, If this driver does not
3442      * support, the function will return
3443      */
3444 
3445     if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3446         vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask);
3447         return;
3448     }
3449     vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask);
3450 }
3451 
3452 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3453 {
3454     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3455 
3456     n->config_size = virtio_get_config_size(&cfg_size_params, host_features);
3457 }
3458 
3459 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3460                                    const char *type)
3461 {
3462     /*
3463      * The name can be NULL, the netclient name will be type.x.
3464      */
3465     assert(type != NULL);
3466 
3467     g_free(n->netclient_name);
3468     g_free(n->netclient_type);
3469     n->netclient_name = g_strdup(name);
3470     n->netclient_type = g_strdup(type);
3471 }
3472 
3473 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3474 {
3475     HotplugHandler *hotplug_ctrl;
3476     PCIDevice *pci_dev;
3477     Error *err = NULL;
3478 
3479     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3480     if (hotplug_ctrl) {
3481         pci_dev = PCI_DEVICE(dev);
3482         pci_dev->partially_hotplugged = true;
3483         hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3484         if (err) {
3485             error_report_err(err);
3486             return false;
3487         }
3488     } else {
3489         return false;
3490     }
3491     return true;
3492 }
3493 
3494 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3495                                     Error **errp)
3496 {
3497     Error *err = NULL;
3498     HotplugHandler *hotplug_ctrl;
3499     PCIDevice *pdev = PCI_DEVICE(dev);
3500     BusState *primary_bus;
3501 
3502     if (!pdev->partially_hotplugged) {
3503         return true;
3504     }
3505     primary_bus = dev->parent_bus;
3506     if (!primary_bus) {
3507         error_setg(errp, "virtio_net: couldn't find primary bus");
3508         return false;
3509     }
3510     qdev_set_parent_bus(dev, primary_bus, &error_abort);
3511     qatomic_set(&n->failover_primary_hidden, false);
3512     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3513     if (hotplug_ctrl) {
3514         hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3515         if (err) {
3516             goto out;
3517         }
3518         hotplug_handler_plug(hotplug_ctrl, dev, &err);
3519     }
3520     pdev->partially_hotplugged = false;
3521 
3522 out:
3523     error_propagate(errp, err);
3524     return !err;
3525 }
3526 
3527 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationEvent *e)
3528 {
3529     bool should_be_hidden;
3530     Error *err = NULL;
3531     DeviceState *dev = failover_find_primary_device(n);
3532 
3533     if (!dev) {
3534         return;
3535     }
3536 
3537     should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3538 
3539     if (e->type == MIG_EVENT_PRECOPY_SETUP && !should_be_hidden) {
3540         if (failover_unplug_primary(n, dev)) {
3541             vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3542             qapi_event_send_unplug_primary(dev->id);
3543             qatomic_set(&n->failover_primary_hidden, true);
3544         } else {
3545             warn_report("couldn't unplug primary device");
3546         }
3547     } else if (e->type == MIG_EVENT_PRECOPY_FAILED) {
3548         /* We already unplugged the device let's plug it back */
3549         if (!failover_replug_primary(n, dev, &err)) {
3550             if (err) {
3551                 error_report_err(err);
3552             }
3553         }
3554     }
3555 }
3556 
3557 static int virtio_net_migration_state_notifier(NotifierWithReturn *notifier,
3558                                                MigrationEvent *e, Error **errp)
3559 {
3560     VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3561     virtio_net_handle_migration_primary(n, e);
3562     return 0;
3563 }
3564 
3565 static bool failover_hide_primary_device(DeviceListener *listener,
3566                                          const QDict *device_opts,
3567                                          bool from_json,
3568                                          Error **errp)
3569 {
3570     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3571     const char *standby_id;
3572 
3573     if (!device_opts) {
3574         return false;
3575     }
3576 
3577     if (!qdict_haskey(device_opts, "failover_pair_id")) {
3578         return false;
3579     }
3580 
3581     if (!qdict_haskey(device_opts, "id")) {
3582         error_setg(errp, "Device with failover_pair_id needs to have id");
3583         return false;
3584     }
3585 
3586     standby_id = qdict_get_str(device_opts, "failover_pair_id");
3587     if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3588         return false;
3589     }
3590 
3591     /*
3592      * The hide helper can be called several times for a given device.
3593      * Check there is only one primary for a virtio-net device but
3594      * don't duplicate the qdict several times if it's called for the same
3595      * device.
3596      */
3597     if (n->primary_opts) {
3598         const char *old, *new;
3599         /* devices with failover_pair_id always have an id */
3600         old = qdict_get_str(n->primary_opts, "id");
3601         new = qdict_get_str(device_opts, "id");
3602         if (strcmp(old, new) != 0) {
3603             error_setg(errp, "Cannot attach more than one primary device to "
3604                        "'%s': '%s' and '%s'", n->netclient_name, old, new);
3605             return false;
3606         }
3607     } else {
3608         n->primary_opts = qdict_clone_shallow(device_opts);
3609         n->primary_opts_from_json = from_json;
3610     }
3611 
3612     /* failover_primary_hidden is set during feature negotiation */
3613     return qatomic_read(&n->failover_primary_hidden);
3614 }
3615 
3616 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3617 {
3618     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3619     VirtIONet *n = VIRTIO_NET(dev);
3620     NetClientState *nc;
3621     int i;
3622 
3623     if (n->net_conf.mtu) {
3624         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3625     }
3626 
3627     if (n->net_conf.duplex_str) {
3628         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3629             n->net_conf.duplex = DUPLEX_HALF;
3630         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3631             n->net_conf.duplex = DUPLEX_FULL;
3632         } else {
3633             error_setg(errp, "'duplex' must be 'half' or 'full'");
3634             return;
3635         }
3636         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3637     } else {
3638         n->net_conf.duplex = DUPLEX_UNKNOWN;
3639     }
3640 
3641     if (n->net_conf.speed < SPEED_UNKNOWN) {
3642         error_setg(errp, "'speed' must be between 0 and INT_MAX");
3643         return;
3644     }
3645     if (n->net_conf.speed >= 0) {
3646         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3647     }
3648 
3649     if (n->failover) {
3650         n->primary_listener.hide_device = failover_hide_primary_device;
3651         qatomic_set(&n->failover_primary_hidden, true);
3652         device_listener_register(&n->primary_listener);
3653         migration_add_notifier(&n->migration_state,
3654                                virtio_net_migration_state_notifier);
3655         n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3656     }
3657 
3658     virtio_net_set_config_size(n, n->host_features);
3659     virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
3660 
3661     /*
3662      * We set a lower limit on RX queue size to what it always was.
3663      * Guests that want a smaller ring can always resize it without
3664      * help from us (using virtio 1 and up).
3665      */
3666     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3667         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3668         !is_power_of_2(n->net_conf.rx_queue_size)) {
3669         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3670                    "must be a power of 2 between %d and %d.",
3671                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3672                    VIRTQUEUE_MAX_SIZE);
3673         virtio_cleanup(vdev);
3674         return;
3675     }
3676 
3677     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3678         n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) ||
3679         !is_power_of_2(n->net_conf.tx_queue_size)) {
3680         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3681                    "must be a power of 2 between %d and %d",
3682                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3683                    virtio_net_max_tx_queue_size(n));
3684         virtio_cleanup(vdev);
3685         return;
3686     }
3687 
3688     n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3689 
3690     /*
3691      * Figure out the datapath queue pairs since the backend could
3692      * provide control queue via peers as well.
3693      */
3694     if (n->nic_conf.peers.queues) {
3695         for (i = 0; i < n->max_ncs; i++) {
3696             if (n->nic_conf.peers.ncs[i]->is_datapath) {
3697                 ++n->max_queue_pairs;
3698             }
3699         }
3700     }
3701     n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3702 
3703     if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3704         error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3705                    "must be a positive integer less than %d.",
3706                    n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3707         virtio_cleanup(vdev);
3708         return;
3709     }
3710     n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3711     n->curr_queue_pairs = 1;
3712     n->tx_timeout = n->net_conf.txtimer;
3713 
3714     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3715                        && strcmp(n->net_conf.tx, "bh")) {
3716         warn_report("virtio-net: "
3717                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3718                     n->net_conf.tx);
3719         error_printf("Defaulting to \"bh\"");
3720     }
3721 
3722     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3723                                     n->net_conf.tx_queue_size);
3724 
3725     virtio_net_add_queue(n, 0);
3726 
3727     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3728     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3729     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3730     n->status = VIRTIO_NET_S_LINK_UP;
3731     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3732                               QEMU_CLOCK_VIRTUAL,
3733                               virtio_net_announce_timer, n);
3734     n->announce_timer.round = 0;
3735 
3736     if (n->netclient_type) {
3737         /*
3738          * Happen when virtio_net_set_netclient_name has been called.
3739          */
3740         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3741                               n->netclient_type, n->netclient_name,
3742                               &dev->mem_reentrancy_guard, n);
3743     } else {
3744         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3745                               object_get_typename(OBJECT(dev)), dev->id,
3746                               &dev->mem_reentrancy_guard, n);
3747     }
3748 
3749     for (i = 0; i < n->max_queue_pairs; i++) {
3750         n->nic->ncs[i].do_not_pad = true;
3751     }
3752 
3753     peer_test_vnet_hdr(n);
3754     if (peer_has_vnet_hdr(n)) {
3755         n->host_hdr_len = sizeof(struct virtio_net_hdr);
3756     } else {
3757         n->host_hdr_len = 0;
3758     }
3759 
3760     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3761 
3762     n->vqs[0].tx_waiting = 0;
3763     n->tx_burst = n->net_conf.txburst;
3764     virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3765     n->promisc = 1; /* for compatibility */
3766 
3767     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3768 
3769     n->vlans = g_malloc0(MAX_VLAN >> 3);
3770 
3771     nc = qemu_get_queue(n->nic);
3772     nc->rxfilter_notify_enabled = 1;
3773 
3774    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3775         struct virtio_net_config netcfg = {};
3776         memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3777         vhost_net_set_config(get_vhost_net(nc->peer),
3778             (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND);
3779     }
3780     QTAILQ_INIT(&n->rsc_chains);
3781     n->qdev = dev;
3782 
3783     net_rx_pkt_init(&n->rx_pkt);
3784 
3785     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3786         Error *err = NULL;
3787         if (!virtio_net_load_ebpf(n, &err)) {
3788             /*
3789              * If user explicitly gave QEMU RSS FDs to use, then
3790              * failing to use them must be considered a fatal
3791              * error. If no RSS FDs were provided, QEMU is trying
3792              * eBPF on a "best effort" basis only, so report a
3793              * warning and allow fallback to software RSS.
3794              */
3795             if (n->ebpf_rss_fds) {
3796                 error_propagate(errp, err);
3797             } else {
3798                 warn_report("unable to load eBPF RSS: %s",
3799                             error_get_pretty(err));
3800                 error_free(err);
3801             }
3802         }
3803     }
3804 }
3805 
3806 static void virtio_net_device_unrealize(DeviceState *dev)
3807 {
3808     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3809     VirtIONet *n = VIRTIO_NET(dev);
3810     int i, max_queue_pairs;
3811 
3812     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3813         virtio_net_unload_ebpf(n);
3814     }
3815 
3816     /* This will stop vhost backend if appropriate. */
3817     virtio_net_set_status(vdev, 0);
3818 
3819     g_free(n->netclient_name);
3820     n->netclient_name = NULL;
3821     g_free(n->netclient_type);
3822     n->netclient_type = NULL;
3823 
3824     g_free(n->mac_table.macs);
3825     g_free(n->vlans);
3826 
3827     if (n->failover) {
3828         qobject_unref(n->primary_opts);
3829         device_listener_unregister(&n->primary_listener);
3830         migration_remove_notifier(&n->migration_state);
3831     } else {
3832         assert(n->primary_opts == NULL);
3833     }
3834 
3835     max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3836     for (i = 0; i < max_queue_pairs; i++) {
3837         virtio_net_del_queue(n, i);
3838     }
3839     /* delete also control vq */
3840     virtio_del_queue(vdev, max_queue_pairs * 2);
3841     qemu_announce_timer_del(&n->announce_timer, false);
3842     g_free(n->vqs);
3843     qemu_del_nic(n->nic);
3844     virtio_net_rsc_cleanup(n);
3845     g_free(n->rss_data.indirections_table);
3846     net_rx_pkt_uninit(n->rx_pkt);
3847     virtio_cleanup(vdev);
3848 }
3849 
3850 static void virtio_net_reset(VirtIODevice *vdev)
3851 {
3852     VirtIONet *n = VIRTIO_NET(vdev);
3853     int i;
3854 
3855     /* Reset back to compatibility mode */
3856     n->promisc = 1;
3857     n->allmulti = 0;
3858     n->alluni = 0;
3859     n->nomulti = 0;
3860     n->nouni = 0;
3861     n->nobcast = 0;
3862     /* multiqueue is disabled by default */
3863     n->curr_queue_pairs = 1;
3864     timer_del(n->announce_timer.tm);
3865     n->announce_timer.round = 0;
3866     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
3867 
3868     /* Flush any MAC and VLAN filter table state */
3869     n->mac_table.in_use = 0;
3870     n->mac_table.first_multi = 0;
3871     n->mac_table.multi_overflow = 0;
3872     n->mac_table.uni_overflow = 0;
3873     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
3874     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
3875     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
3876     memset(n->vlans, 0, MAX_VLAN >> 3);
3877 
3878     /* Flush any async TX */
3879     for (i = 0;  i < n->max_queue_pairs; i++) {
3880         flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
3881     }
3882 
3883     virtio_net_disable_rss(n);
3884 }
3885 
3886 static void virtio_net_instance_init(Object *obj)
3887 {
3888     VirtIONet *n = VIRTIO_NET(obj);
3889 
3890     /*
3891      * The default config_size is sizeof(struct virtio_net_config).
3892      * Can be overridden with virtio_net_set_config_size.
3893      */
3894     n->config_size = sizeof(struct virtio_net_config);
3895     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3896                                   "bootindex", "/ethernet-phy@0",
3897                                   DEVICE(n));
3898 
3899     ebpf_rss_init(&n->ebpf_rss);
3900 }
3901 
3902 static int virtio_net_pre_save(void *opaque)
3903 {
3904     VirtIONet *n = opaque;
3905 
3906     /* At this point, backend must be stopped, otherwise
3907      * it might keep writing to memory. */
3908     assert(!n->vhost_started);
3909 
3910     return 0;
3911 }
3912 
3913 static bool primary_unplug_pending(void *opaque)
3914 {
3915     DeviceState *dev = opaque;
3916     DeviceState *primary;
3917     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3918     VirtIONet *n = VIRTIO_NET(vdev);
3919 
3920     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3921         return false;
3922     }
3923     primary = failover_find_primary_device(n);
3924     return primary ? primary->pending_deleted_event : false;
3925 }
3926 
3927 static bool dev_unplug_pending(void *opaque)
3928 {
3929     DeviceState *dev = opaque;
3930     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3931 
3932     return vdc->primary_unplug_pending(dev);
3933 }
3934 
3935 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3936 {
3937     VirtIONet *n = VIRTIO_NET(vdev);
3938     NetClientState *nc;
3939     struct vhost_net *net;
3940 
3941     if (!n->nic) {
3942         return NULL;
3943     }
3944 
3945     nc = qemu_get_queue(n->nic);
3946     if (!nc) {
3947         return NULL;
3948     }
3949 
3950     net = get_vhost_net(nc->peer);
3951     if (!net) {
3952         return NULL;
3953     }
3954 
3955     return &net->dev;
3956 }
3957 
3958 static const VMStateDescription vmstate_virtio_net = {
3959     .name = "virtio-net",
3960     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3961     .version_id = VIRTIO_NET_VM_VERSION,
3962     .fields = (const VMStateField[]) {
3963         VMSTATE_VIRTIO_DEVICE,
3964         VMSTATE_END_OF_LIST()
3965     },
3966     .pre_save = virtio_net_pre_save,
3967     .dev_unplug_pending = dev_unplug_pending,
3968 };
3969 
3970 static Property virtio_net_properties[] = {
3971     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3972                     VIRTIO_NET_F_CSUM, true),
3973     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3974                     VIRTIO_NET_F_GUEST_CSUM, true),
3975     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3976     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3977                     VIRTIO_NET_F_GUEST_TSO4, true),
3978     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3979                     VIRTIO_NET_F_GUEST_TSO6, true),
3980     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3981                     VIRTIO_NET_F_GUEST_ECN, true),
3982     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3983                     VIRTIO_NET_F_GUEST_UFO, true),
3984     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3985                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3986     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3987                     VIRTIO_NET_F_HOST_TSO4, true),
3988     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3989                     VIRTIO_NET_F_HOST_TSO6, true),
3990     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3991                     VIRTIO_NET_F_HOST_ECN, true),
3992     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3993                     VIRTIO_NET_F_HOST_UFO, true),
3994     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3995                     VIRTIO_NET_F_MRG_RXBUF, true),
3996     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3997                     VIRTIO_NET_F_STATUS, true),
3998     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3999                     VIRTIO_NET_F_CTRL_VQ, true),
4000     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
4001                     VIRTIO_NET_F_CTRL_RX, true),
4002     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
4003                     VIRTIO_NET_F_CTRL_VLAN, true),
4004     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
4005                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
4006     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
4007                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
4008     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
4009                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
4010     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
4011     DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
4012                     VIRTIO_NET_F_RSS, false),
4013     DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
4014                     VIRTIO_NET_F_HASH_REPORT, false),
4015     DEFINE_PROP_ARRAY("ebpf-rss-fds", VirtIONet, nr_ebpf_rss_fds,
4016                       ebpf_rss_fds, qdev_prop_string, char*),
4017     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
4018                     VIRTIO_NET_F_RSC_EXT, false),
4019     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
4020                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
4021     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
4022     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
4023                        TX_TIMER_INTERVAL),
4024     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
4025     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
4026     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
4027                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
4028     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
4029                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
4030     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
4031     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
4032                      true),
4033     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
4034     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
4035     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
4036     DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features,
4037                       VIRTIO_NET_F_GUEST_USO4, true),
4038     DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features,
4039                       VIRTIO_NET_F_GUEST_USO6, true),
4040     DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
4041                       VIRTIO_NET_F_HOST_USO, true),
4042     DEFINE_PROP_END_OF_LIST(),
4043 };
4044 
4045 static void virtio_net_class_init(ObjectClass *klass, void *data)
4046 {
4047     DeviceClass *dc = DEVICE_CLASS(klass);
4048     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
4049 
4050     device_class_set_props(dc, virtio_net_properties);
4051     dc->vmsd = &vmstate_virtio_net;
4052     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
4053     vdc->realize = virtio_net_device_realize;
4054     vdc->unrealize = virtio_net_device_unrealize;
4055     vdc->get_config = virtio_net_get_config;
4056     vdc->set_config = virtio_net_set_config;
4057     vdc->get_features = virtio_net_get_features;
4058     vdc->set_features = virtio_net_set_features;
4059     vdc->bad_features = virtio_net_bad_features;
4060     vdc->reset = virtio_net_reset;
4061     vdc->queue_reset = virtio_net_queue_reset;
4062     vdc->queue_enable = virtio_net_queue_enable;
4063     vdc->set_status = virtio_net_set_status;
4064     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
4065     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
4066     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
4067     vdc->post_load = virtio_net_post_load_virtio;
4068     vdc->vmsd = &vmstate_virtio_net_device;
4069     vdc->primary_unplug_pending = primary_unplug_pending;
4070     vdc->get_vhost = virtio_net_get_vhost;
4071     vdc->toggle_device_iotlb = vhost_toggle_device_iotlb;
4072 }
4073 
4074 static const TypeInfo virtio_net_info = {
4075     .name = TYPE_VIRTIO_NET,
4076     .parent = TYPE_VIRTIO_DEVICE,
4077     .instance_size = sizeof(VirtIONet),
4078     .instance_init = virtio_net_instance_init,
4079     .class_init = virtio_net_class_init,
4080 };
4081 
4082 static void virtio_register_types(void)
4083 {
4084     type_register_static(&virtio_net_info);
4085 }
4086 
4087 type_init(virtio_register_types)
4088