xref: /openbmc/qemu/hw/net/virtio-net.c (revision 00b69f1d867ddcf8884c92f5647b424088e754e4)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/log.h"
18 #include "qemu/main-loop.h"
19 #include "qemu/module.h"
20 #include "hw/virtio/virtio.h"
21 #include "net/net.h"
22 #include "net/checksum.h"
23 #include "net/tap.h"
24 #include "qemu/error-report.h"
25 #include "qemu/timer.h"
26 #include "qemu/option.h"
27 #include "qemu/option_int.h"
28 #include "qemu/config-file.h"
29 #include "qapi/qmp/qdict.h"
30 #include "hw/virtio/virtio-net.h"
31 #include "net/vhost_net.h"
32 #include "net/announce.h"
33 #include "hw/virtio/virtio-bus.h"
34 #include "qapi/error.h"
35 #include "qapi/qapi-events-net.h"
36 #include "hw/qdev-properties.h"
37 #include "qapi/qapi-types-migration.h"
38 #include "qapi/qapi-events-migration.h"
39 #include "hw/virtio/virtio-access.h"
40 #include "migration/misc.h"
41 #include "standard-headers/linux/ethtool.h"
42 #include "sysemu/sysemu.h"
43 #include "sysemu/replay.h"
44 #include "trace.h"
45 #include "monitor/qdev.h"
46 #include "monitor/monitor.h"
47 #include "hw/pci/pci_device.h"
48 #include "net_rx_pkt.h"
49 #include "hw/virtio/vhost.h"
50 #include "sysemu/qtest.h"
51 
52 #define VIRTIO_NET_VM_VERSION    11
53 
54 /* previously fixed value */
55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
57 
58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
61 
62 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
63 
64 #define VIRTIO_NET_TCP_FLAG         0x3F
65 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
66 
67 /* IPv4 max payload, 16 bits in the header */
68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
70 
71 /* header length value in ip header without option */
72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
73 
74 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
76 
77 /* Purge coalesced packets timer interval, This value affects the performance
78    a lot, and should be tuned carefully, '300000'(300us) is the recommended
79    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
80    tso/gso/gro 'off'. */
81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
82 
83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
84                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
85                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
86                                          VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
87                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
88                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
89                                          VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
90                                          VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
91                                          VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
92 
93 static const VirtIOFeature feature_sizes[] = {
94     {.flags = 1ULL << VIRTIO_NET_F_MAC,
95      .end = endof(struct virtio_net_config, mac)},
96     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
97      .end = endof(struct virtio_net_config, status)},
98     {.flags = 1ULL << VIRTIO_NET_F_MQ,
99      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
100     {.flags = 1ULL << VIRTIO_NET_F_MTU,
101      .end = endof(struct virtio_net_config, mtu)},
102     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
103      .end = endof(struct virtio_net_config, duplex)},
104     {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
105      .end = endof(struct virtio_net_config, supported_hash_types)},
106     {}
107 };
108 
109 static const VirtIOConfigSizeParams cfg_size_params = {
110     .min_size = endof(struct virtio_net_config, mac),
111     .max_size = sizeof(struct virtio_net_config),
112     .feature_sizes = feature_sizes
113 };
114 
115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
116 {
117     VirtIONet *n = qemu_get_nic_opaque(nc);
118 
119     return &n->vqs[nc->queue_index];
120 }
121 
122 static int vq2q(int queue_index)
123 {
124     return queue_index / 2;
125 }
126 
127 static void flush_or_purge_queued_packets(NetClientState *nc)
128 {
129     if (!nc->peer) {
130         return;
131     }
132 
133     qemu_flush_or_purge_queued_packets(nc->peer, true);
134     assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
135 }
136 
137 /* TODO
138  * - we could suppress RX interrupt if we were so inclined.
139  */
140 
141 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
142 {
143     VirtIONet *n = VIRTIO_NET(vdev);
144     struct virtio_net_config netcfg;
145     NetClientState *nc = qemu_get_queue(n->nic);
146     static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
147 
148     int ret = 0;
149     memset(&netcfg, 0 , sizeof(struct virtio_net_config));
150     virtio_stw_p(vdev, &netcfg.status, n->status);
151     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
152     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
153     memcpy(netcfg.mac, n->mac, ETH_ALEN);
154     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
155     netcfg.duplex = n->net_conf.duplex;
156     netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
157     virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
158                  virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
159                  VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
160     virtio_stl_p(vdev, &netcfg.supported_hash_types,
161                  VIRTIO_NET_RSS_SUPPORTED_HASHES);
162     memcpy(config, &netcfg, n->config_size);
163 
164     /*
165      * Is this VDPA? No peer means not VDPA: there's no way to
166      * disconnect/reconnect a VDPA peer.
167      */
168     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
169         ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
170                                    n->config_size);
171         if (ret == -1) {
172             return;
173         }
174 
175         /*
176          * Some NIC/kernel combinations present 0 as the mac address.  As that
177          * is not a legal address, try to proceed with the address from the
178          * QEMU command line in the hope that the address has been configured
179          * correctly elsewhere - just not reported by the device.
180          */
181         if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
182             info_report("Zero hardware mac address detected. Ignoring.");
183             memcpy(netcfg.mac, n->mac, ETH_ALEN);
184         }
185 
186         netcfg.status |= virtio_tswap16(vdev,
187                                         n->status & VIRTIO_NET_S_ANNOUNCE);
188         memcpy(config, &netcfg, n->config_size);
189     }
190 }
191 
192 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
193 {
194     VirtIONet *n = VIRTIO_NET(vdev);
195     struct virtio_net_config netcfg = {};
196     NetClientState *nc = qemu_get_queue(n->nic);
197 
198     memcpy(&netcfg, config, n->config_size);
199 
200     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
201         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
202         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
203         memcpy(n->mac, netcfg.mac, ETH_ALEN);
204         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
205     }
206 
207     /*
208      * Is this VDPA? No peer means not VDPA: there's no way to
209      * disconnect/reconnect a VDPA peer.
210      */
211     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
212         vhost_net_set_config(get_vhost_net(nc->peer),
213                              (uint8_t *)&netcfg, 0, n->config_size,
214                              VHOST_SET_CONFIG_TYPE_FRONTEND);
215       }
216 }
217 
218 static bool virtio_net_started(VirtIONet *n, uint8_t status)
219 {
220     VirtIODevice *vdev = VIRTIO_DEVICE(n);
221     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
222         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
223 }
224 
225 static void virtio_net_announce_notify(VirtIONet *net)
226 {
227     VirtIODevice *vdev = VIRTIO_DEVICE(net);
228     trace_virtio_net_announce_notify();
229 
230     net->status |= VIRTIO_NET_S_ANNOUNCE;
231     virtio_notify_config(vdev);
232 }
233 
234 static void virtio_net_announce_timer(void *opaque)
235 {
236     VirtIONet *n = opaque;
237     trace_virtio_net_announce_timer(n->announce_timer.round);
238 
239     n->announce_timer.round--;
240     virtio_net_announce_notify(n);
241 }
242 
243 static void virtio_net_announce(NetClientState *nc)
244 {
245     VirtIONet *n = qemu_get_nic_opaque(nc);
246     VirtIODevice *vdev = VIRTIO_DEVICE(n);
247 
248     /*
249      * Make sure the virtio migration announcement timer isn't running
250      * If it is, let it trigger announcement so that we do not cause
251      * confusion.
252      */
253     if (n->announce_timer.round) {
254         return;
255     }
256 
257     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
258         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
259             virtio_net_announce_notify(n);
260     }
261 }
262 
263 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
264 {
265     VirtIODevice *vdev = VIRTIO_DEVICE(n);
266     NetClientState *nc = qemu_get_queue(n->nic);
267     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
268     int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
269               n->max_ncs - n->max_queue_pairs : 0;
270 
271     if (!get_vhost_net(nc->peer)) {
272         return;
273     }
274 
275     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
276         !!n->vhost_started) {
277         return;
278     }
279     if (!n->vhost_started) {
280         int r, i;
281 
282         if (n->needs_vnet_hdr_swap) {
283             error_report("backend does not support %s vnet headers; "
284                          "falling back on userspace virtio",
285                          virtio_is_big_endian(vdev) ? "BE" : "LE");
286             return;
287         }
288 
289         /* Any packets outstanding? Purge them to avoid touching rings
290          * when vhost is running.
291          */
292         for (i = 0;  i < queue_pairs; i++) {
293             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
294 
295             /* Purge both directions: TX and RX. */
296             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
297             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
298         }
299 
300         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
301             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
302             if (r < 0) {
303                 error_report("%uBytes MTU not supported by the backend",
304                              n->net_conf.mtu);
305 
306                 return;
307             }
308         }
309 
310         n->vhost_started = 1;
311         r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
312         if (r < 0) {
313             error_report("unable to start vhost net: %d: "
314                          "falling back on userspace virtio", -r);
315             n->vhost_started = 0;
316         }
317     } else {
318         vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
319         n->vhost_started = 0;
320     }
321 }
322 
323 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
324                                           NetClientState *peer,
325                                           bool enable)
326 {
327     if (virtio_is_big_endian(vdev)) {
328         return qemu_set_vnet_be(peer, enable);
329     } else {
330         return qemu_set_vnet_le(peer, enable);
331     }
332 }
333 
334 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
335                                        int queue_pairs, bool enable)
336 {
337     int i;
338 
339     for (i = 0; i < queue_pairs; i++) {
340         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
341             enable) {
342             while (--i >= 0) {
343                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
344             }
345 
346             return true;
347         }
348     }
349 
350     return false;
351 }
352 
353 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
354 {
355     VirtIODevice *vdev = VIRTIO_DEVICE(n);
356     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
357 
358     if (virtio_net_started(n, status)) {
359         /* Before using the device, we tell the network backend about the
360          * endianness to use when parsing vnet headers. If the backend
361          * can't do it, we fallback onto fixing the headers in the core
362          * virtio-net code.
363          */
364         n->needs_vnet_hdr_swap = n->has_vnet_hdr &&
365                                  virtio_net_set_vnet_endian(vdev, n->nic->ncs,
366                                                             queue_pairs, true);
367     } else if (virtio_net_started(n, vdev->status)) {
368         /* After using the device, we need to reset the network backend to
369          * the default (guest native endianness), otherwise the guest may
370          * lose network connectivity if it is rebooted into a different
371          * endianness.
372          */
373         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
374     }
375 }
376 
377 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
378 {
379     unsigned int dropped = virtqueue_drop_all(vq);
380     if (dropped) {
381         virtio_notify(vdev, vq);
382     }
383 }
384 
385 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
386 {
387     VirtIONet *n = VIRTIO_NET(vdev);
388     VirtIONetQueue *q;
389     int i;
390     uint8_t queue_status;
391 
392     virtio_net_vnet_endian_status(n, status);
393     virtio_net_vhost_status(n, status);
394 
395     for (i = 0; i < n->max_queue_pairs; i++) {
396         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
397         bool queue_started;
398         q = &n->vqs[i];
399 
400         if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
401             queue_status = 0;
402         } else {
403             queue_status = status;
404         }
405         queue_started =
406             virtio_net_started(n, queue_status) && !n->vhost_started;
407 
408         if (queue_started) {
409             qemu_flush_queued_packets(ncs);
410         }
411 
412         if (!q->tx_waiting) {
413             continue;
414         }
415 
416         if (queue_started) {
417             if (q->tx_timer) {
418                 timer_mod(q->tx_timer,
419                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
420             } else {
421                 replay_bh_schedule_event(q->tx_bh);
422             }
423         } else {
424             if (q->tx_timer) {
425                 timer_del(q->tx_timer);
426             } else {
427                 qemu_bh_cancel(q->tx_bh);
428             }
429             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
430                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
431                 vdev->vm_running) {
432                 /* if tx is waiting we are likely have some packets in tx queue
433                  * and disabled notification */
434                 q->tx_waiting = 0;
435                 virtio_queue_set_notification(q->tx_vq, 1);
436                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
437             }
438         }
439     }
440 }
441 
442 static void virtio_net_set_link_status(NetClientState *nc)
443 {
444     VirtIONet *n = qemu_get_nic_opaque(nc);
445     VirtIODevice *vdev = VIRTIO_DEVICE(n);
446     uint16_t old_status = n->status;
447 
448     if (nc->link_down)
449         n->status &= ~VIRTIO_NET_S_LINK_UP;
450     else
451         n->status |= VIRTIO_NET_S_LINK_UP;
452 
453     if (n->status != old_status)
454         virtio_notify_config(vdev);
455 
456     virtio_net_set_status(vdev, vdev->status);
457 }
458 
459 static void rxfilter_notify(NetClientState *nc)
460 {
461     VirtIONet *n = qemu_get_nic_opaque(nc);
462 
463     if (nc->rxfilter_notify_enabled) {
464         char *path = object_get_canonical_path(OBJECT(n->qdev));
465         qapi_event_send_nic_rx_filter_changed(n->netclient_name, path);
466         g_free(path);
467 
468         /* disable event notification to avoid events flooding */
469         nc->rxfilter_notify_enabled = 0;
470     }
471 }
472 
473 static intList *get_vlan_table(VirtIONet *n)
474 {
475     intList *list;
476     int i, j;
477 
478     list = NULL;
479     for (i = 0; i < MAX_VLAN >> 5; i++) {
480         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
481             if (n->vlans[i] & (1U << j)) {
482                 QAPI_LIST_PREPEND(list, (i << 5) + j);
483             }
484         }
485     }
486 
487     return list;
488 }
489 
490 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
491 {
492     VirtIONet *n = qemu_get_nic_opaque(nc);
493     VirtIODevice *vdev = VIRTIO_DEVICE(n);
494     RxFilterInfo *info;
495     strList *str_list;
496     int i;
497 
498     info = g_malloc0(sizeof(*info));
499     info->name = g_strdup(nc->name);
500     info->promiscuous = n->promisc;
501 
502     if (n->nouni) {
503         info->unicast = RX_STATE_NONE;
504     } else if (n->alluni) {
505         info->unicast = RX_STATE_ALL;
506     } else {
507         info->unicast = RX_STATE_NORMAL;
508     }
509 
510     if (n->nomulti) {
511         info->multicast = RX_STATE_NONE;
512     } else if (n->allmulti) {
513         info->multicast = RX_STATE_ALL;
514     } else {
515         info->multicast = RX_STATE_NORMAL;
516     }
517 
518     info->broadcast_allowed = n->nobcast;
519     info->multicast_overflow = n->mac_table.multi_overflow;
520     info->unicast_overflow = n->mac_table.uni_overflow;
521 
522     info->main_mac = qemu_mac_strdup_printf(n->mac);
523 
524     str_list = NULL;
525     for (i = 0; i < n->mac_table.first_multi; i++) {
526         QAPI_LIST_PREPEND(str_list,
527                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
528     }
529     info->unicast_table = str_list;
530 
531     str_list = NULL;
532     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
533         QAPI_LIST_PREPEND(str_list,
534                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
535     }
536     info->multicast_table = str_list;
537     info->vlan_table = get_vlan_table(n);
538 
539     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
540         info->vlan = RX_STATE_ALL;
541     } else if (!info->vlan_table) {
542         info->vlan = RX_STATE_NONE;
543     } else {
544         info->vlan = RX_STATE_NORMAL;
545     }
546 
547     /* enable event notification after query */
548     nc->rxfilter_notify_enabled = 1;
549 
550     return info;
551 }
552 
553 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
554 {
555     VirtIONet *n = VIRTIO_NET(vdev);
556     NetClientState *nc;
557 
558     /* validate queue_index and skip for cvq */
559     if (queue_index >= n->max_queue_pairs * 2) {
560         return;
561     }
562 
563     nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
564 
565     if (!nc->peer) {
566         return;
567     }
568 
569     if (get_vhost_net(nc->peer) &&
570         nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
571         vhost_net_virtqueue_reset(vdev, nc, queue_index);
572     }
573 
574     flush_or_purge_queued_packets(nc);
575 }
576 
577 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
578 {
579     VirtIONet *n = VIRTIO_NET(vdev);
580     NetClientState *nc;
581     int r;
582 
583     /* validate queue_index and skip for cvq */
584     if (queue_index >= n->max_queue_pairs * 2) {
585         return;
586     }
587 
588     nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
589 
590     if (!nc->peer || !vdev->vhost_started) {
591         return;
592     }
593 
594     if (get_vhost_net(nc->peer) &&
595         nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
596         r = vhost_net_virtqueue_restart(vdev, nc, queue_index);
597         if (r < 0) {
598             error_report("unable to restart vhost net virtqueue: %d, "
599                             "when resetting the queue", queue_index);
600         }
601     }
602 }
603 
604 static void peer_test_vnet_hdr(VirtIONet *n)
605 {
606     NetClientState *nc = qemu_get_queue(n->nic);
607     if (!nc->peer) {
608         return;
609     }
610 
611     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
612 }
613 
614 static int peer_has_vnet_hdr(VirtIONet *n)
615 {
616     return n->has_vnet_hdr;
617 }
618 
619 static int peer_has_ufo(VirtIONet *n)
620 {
621     if (!peer_has_vnet_hdr(n))
622         return 0;
623 
624     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
625 
626     return n->has_ufo;
627 }
628 
629 static int peer_has_uso(VirtIONet *n)
630 {
631     if (!peer_has_vnet_hdr(n)) {
632         return 0;
633     }
634 
635     return qemu_has_uso(qemu_get_queue(n->nic)->peer);
636 }
637 
638 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
639                                        int version_1, int hash_report)
640 {
641     int i;
642     NetClientState *nc;
643 
644     n->mergeable_rx_bufs = mergeable_rx_bufs;
645 
646     if (version_1) {
647         n->guest_hdr_len = hash_report ?
648             sizeof(struct virtio_net_hdr_v1_hash) :
649             sizeof(struct virtio_net_hdr_mrg_rxbuf);
650         n->rss_data.populate_hash = !!hash_report;
651     } else {
652         n->guest_hdr_len = n->mergeable_rx_bufs ?
653             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
654             sizeof(struct virtio_net_hdr);
655         n->rss_data.populate_hash = false;
656     }
657 
658     for (i = 0; i < n->max_queue_pairs; i++) {
659         nc = qemu_get_subqueue(n->nic, i);
660 
661         if (peer_has_vnet_hdr(n) &&
662             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
663             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
664             n->host_hdr_len = n->guest_hdr_len;
665         }
666     }
667 }
668 
669 static int virtio_net_max_tx_queue_size(VirtIONet *n)
670 {
671     NetClientState *peer = n->nic_conf.peers.ncs[0];
672 
673     /*
674      * Backends other than vhost-user or vhost-vdpa don't support max queue
675      * size.
676      */
677     if (!peer) {
678         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
679     }
680 
681     switch(peer->info->type) {
682     case NET_CLIENT_DRIVER_VHOST_USER:
683     case NET_CLIENT_DRIVER_VHOST_VDPA:
684         return VIRTQUEUE_MAX_SIZE;
685     default:
686         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
687     };
688 }
689 
690 static int peer_attach(VirtIONet *n, int index)
691 {
692     NetClientState *nc = qemu_get_subqueue(n->nic, index);
693 
694     if (!nc->peer) {
695         return 0;
696     }
697 
698     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
699         vhost_set_vring_enable(nc->peer, 1);
700     }
701 
702     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
703         return 0;
704     }
705 
706     if (n->max_queue_pairs == 1) {
707         return 0;
708     }
709 
710     return tap_enable(nc->peer);
711 }
712 
713 static int peer_detach(VirtIONet *n, int index)
714 {
715     NetClientState *nc = qemu_get_subqueue(n->nic, index);
716 
717     if (!nc->peer) {
718         return 0;
719     }
720 
721     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
722         vhost_set_vring_enable(nc->peer, 0);
723     }
724 
725     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
726         return 0;
727     }
728 
729     return tap_disable(nc->peer);
730 }
731 
732 static void virtio_net_set_queue_pairs(VirtIONet *n)
733 {
734     int i;
735     int r;
736 
737     if (n->nic->peer_deleted) {
738         return;
739     }
740 
741     for (i = 0; i < n->max_queue_pairs; i++) {
742         if (i < n->curr_queue_pairs) {
743             r = peer_attach(n, i);
744             assert(!r);
745         } else {
746             r = peer_detach(n, i);
747             assert(!r);
748         }
749     }
750 }
751 
752 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
753 
754 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
755                                         Error **errp)
756 {
757     VirtIONet *n = VIRTIO_NET(vdev);
758     NetClientState *nc = qemu_get_queue(n->nic);
759 
760     /* Firstly sync all virtio-net possible supported features */
761     features |= n->host_features;
762 
763     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
764 
765     if (!peer_has_vnet_hdr(n)) {
766         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
767         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
768         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
769         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
770 
771         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
772         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
773         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
774         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
775 
776         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
777         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
778         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
779 
780         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
781     }
782 
783     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
784         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
785         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
786     }
787 
788     if (!peer_has_uso(n)) {
789         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
790         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
791         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
792     }
793 
794     if (!get_vhost_net(nc->peer)) {
795         return features;
796     }
797 
798     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
799         virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
800     }
801     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
802     vdev->backend_features = features;
803 
804     if (n->mtu_bypass_backend &&
805             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
806         features |= (1ULL << VIRTIO_NET_F_MTU);
807     }
808 
809     /*
810      * Since GUEST_ANNOUNCE is emulated the feature bit could be set without
811      * enabled. This happens in the vDPA case.
812      *
813      * Make sure the feature set is not incoherent, as the driver could refuse
814      * to start.
815      *
816      * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes,
817      * helping guest to notify the new location with vDPA devices that does not
818      * support it.
819      */
820     if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) {
821         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE);
822     }
823 
824     return features;
825 }
826 
827 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
828 {
829     uint64_t features = 0;
830 
831     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
832      * but also these: */
833     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
834     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
835     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
836     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
837     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
838 
839     return features;
840 }
841 
842 static void virtio_net_apply_guest_offloads(VirtIONet *n)
843 {
844     qemu_set_offload(qemu_get_queue(n->nic)->peer,
845             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
846             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
847             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
848             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
849             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
850             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
851             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)));
852 }
853 
854 static uint64_t virtio_net_guest_offloads_by_features(uint64_t features)
855 {
856     static const uint64_t guest_offloads_mask =
857         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
858         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
859         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
860         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
861         (1ULL << VIRTIO_NET_F_GUEST_UFO)  |
862         (1ULL << VIRTIO_NET_F_GUEST_USO4) |
863         (1ULL << VIRTIO_NET_F_GUEST_USO6);
864 
865     return guest_offloads_mask & features;
866 }
867 
868 uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n)
869 {
870     VirtIODevice *vdev = VIRTIO_DEVICE(n);
871     return virtio_net_guest_offloads_by_features(vdev->guest_features);
872 }
873 
874 typedef struct {
875     VirtIONet *n;
876     DeviceState *dev;
877 } FailoverDevice;
878 
879 /**
880  * Set the failover primary device
881  *
882  * @opaque: FailoverId to setup
883  * @opts: opts for device we are handling
884  * @errp: returns an error if this function fails
885  */
886 static int failover_set_primary(DeviceState *dev, void *opaque)
887 {
888     FailoverDevice *fdev = opaque;
889     PCIDevice *pci_dev = (PCIDevice *)
890         object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
891 
892     if (!pci_dev) {
893         return 0;
894     }
895 
896     if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
897         fdev->dev = dev;
898         return 1;
899     }
900 
901     return 0;
902 }
903 
904 /**
905  * Find the primary device for this failover virtio-net
906  *
907  * @n: VirtIONet device
908  * @errp: returns an error if this function fails
909  */
910 static DeviceState *failover_find_primary_device(VirtIONet *n)
911 {
912     FailoverDevice fdev = {
913         .n = n,
914     };
915 
916     qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
917                        NULL, NULL, &fdev);
918     return fdev.dev;
919 }
920 
921 static void failover_add_primary(VirtIONet *n, Error **errp)
922 {
923     Error *err = NULL;
924     DeviceState *dev = failover_find_primary_device(n);
925 
926     if (dev) {
927         return;
928     }
929 
930     if (!n->primary_opts) {
931         error_setg(errp, "Primary device not found");
932         error_append_hint(errp, "Virtio-net failover will not work. Make "
933                           "sure primary device has parameter"
934                           " failover_pair_id=%s\n", n->netclient_name);
935         return;
936     }
937 
938     dev = qdev_device_add_from_qdict(n->primary_opts,
939                                      n->primary_opts_from_json,
940                                      &err);
941     if (err) {
942         qobject_unref(n->primary_opts);
943         n->primary_opts = NULL;
944     } else {
945         object_unref(OBJECT(dev));
946     }
947     error_propagate(errp, err);
948 }
949 
950 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
951 {
952     VirtIONet *n = VIRTIO_NET(vdev);
953     Error *err = NULL;
954     int i;
955 
956     if (n->mtu_bypass_backend &&
957             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
958         features &= ~(1ULL << VIRTIO_NET_F_MTU);
959     }
960 
961     virtio_net_set_multiqueue(n,
962                               virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
963                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
964 
965     virtio_net_set_mrg_rx_bufs(n,
966                                virtio_has_feature(features,
967                                                   VIRTIO_NET_F_MRG_RXBUF),
968                                virtio_has_feature(features,
969                                                   VIRTIO_F_VERSION_1),
970                                virtio_has_feature(features,
971                                                   VIRTIO_NET_F_HASH_REPORT));
972 
973     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
974         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
975     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
976         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
977     n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
978 
979     if (n->has_vnet_hdr) {
980         n->curr_guest_offloads =
981             virtio_net_guest_offloads_by_features(features);
982         virtio_net_apply_guest_offloads(n);
983     }
984 
985     for (i = 0;  i < n->max_queue_pairs; i++) {
986         NetClientState *nc = qemu_get_subqueue(n->nic, i);
987 
988         if (!get_vhost_net(nc->peer)) {
989             continue;
990         }
991         vhost_net_ack_features(get_vhost_net(nc->peer), features);
992 
993         /*
994          * keep acked_features in NetVhostUserState up-to-date so it
995          * can't miss any features configured by guest virtio driver.
996          */
997         vhost_net_save_acked_features(nc->peer);
998     }
999 
1000     if (!virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
1001         memset(n->vlans, 0xff, MAX_VLAN >> 3);
1002     }
1003 
1004     if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
1005         qapi_event_send_failover_negotiated(n->netclient_name);
1006         qatomic_set(&n->failover_primary_hidden, false);
1007         failover_add_primary(n, &err);
1008         if (err) {
1009             if (!qtest_enabled()) {
1010                 warn_report_err(err);
1011             } else {
1012                 error_free(err);
1013             }
1014         }
1015     }
1016 }
1017 
1018 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
1019                                      struct iovec *iov, unsigned int iov_cnt)
1020 {
1021     uint8_t on;
1022     size_t s;
1023     NetClientState *nc = qemu_get_queue(n->nic);
1024 
1025     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
1026     if (s != sizeof(on)) {
1027         return VIRTIO_NET_ERR;
1028     }
1029 
1030     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
1031         n->promisc = on;
1032     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
1033         n->allmulti = on;
1034     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
1035         n->alluni = on;
1036     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
1037         n->nomulti = on;
1038     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
1039         n->nouni = on;
1040     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
1041         n->nobcast = on;
1042     } else {
1043         return VIRTIO_NET_ERR;
1044     }
1045 
1046     rxfilter_notify(nc);
1047 
1048     return VIRTIO_NET_OK;
1049 }
1050 
1051 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
1052                                      struct iovec *iov, unsigned int iov_cnt)
1053 {
1054     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1055     uint64_t offloads;
1056     size_t s;
1057 
1058     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1059         return VIRTIO_NET_ERR;
1060     }
1061 
1062     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
1063     if (s != sizeof(offloads)) {
1064         return VIRTIO_NET_ERR;
1065     }
1066 
1067     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1068         uint64_t supported_offloads;
1069 
1070         offloads = virtio_ldq_p(vdev, &offloads);
1071 
1072         if (!n->has_vnet_hdr) {
1073             return VIRTIO_NET_ERR;
1074         }
1075 
1076         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1077             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1078         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1079             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1080         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1081 
1082         supported_offloads = virtio_net_supported_guest_offloads(n);
1083         if (offloads & ~supported_offloads) {
1084             return VIRTIO_NET_ERR;
1085         }
1086 
1087         n->curr_guest_offloads = offloads;
1088         virtio_net_apply_guest_offloads(n);
1089 
1090         return VIRTIO_NET_OK;
1091     } else {
1092         return VIRTIO_NET_ERR;
1093     }
1094 }
1095 
1096 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1097                                  struct iovec *iov, unsigned int iov_cnt)
1098 {
1099     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1100     struct virtio_net_ctrl_mac mac_data;
1101     size_t s;
1102     NetClientState *nc = qemu_get_queue(n->nic);
1103 
1104     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1105         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1106             return VIRTIO_NET_ERR;
1107         }
1108         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1109         assert(s == sizeof(n->mac));
1110         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1111         rxfilter_notify(nc);
1112 
1113         return VIRTIO_NET_OK;
1114     }
1115 
1116     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1117         return VIRTIO_NET_ERR;
1118     }
1119 
1120     int in_use = 0;
1121     int first_multi = 0;
1122     uint8_t uni_overflow = 0;
1123     uint8_t multi_overflow = 0;
1124     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1125 
1126     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1127                    sizeof(mac_data.entries));
1128     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1129     if (s != sizeof(mac_data.entries)) {
1130         goto error;
1131     }
1132     iov_discard_front(&iov, &iov_cnt, s);
1133 
1134     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1135         goto error;
1136     }
1137 
1138     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1139         s = iov_to_buf(iov, iov_cnt, 0, macs,
1140                        mac_data.entries * ETH_ALEN);
1141         if (s != mac_data.entries * ETH_ALEN) {
1142             goto error;
1143         }
1144         in_use += mac_data.entries;
1145     } else {
1146         uni_overflow = 1;
1147     }
1148 
1149     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1150 
1151     first_multi = in_use;
1152 
1153     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1154                    sizeof(mac_data.entries));
1155     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1156     if (s != sizeof(mac_data.entries)) {
1157         goto error;
1158     }
1159 
1160     iov_discard_front(&iov, &iov_cnt, s);
1161 
1162     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1163         goto error;
1164     }
1165 
1166     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1167         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1168                        mac_data.entries * ETH_ALEN);
1169         if (s != mac_data.entries * ETH_ALEN) {
1170             goto error;
1171         }
1172         in_use += mac_data.entries;
1173     } else {
1174         multi_overflow = 1;
1175     }
1176 
1177     n->mac_table.in_use = in_use;
1178     n->mac_table.first_multi = first_multi;
1179     n->mac_table.uni_overflow = uni_overflow;
1180     n->mac_table.multi_overflow = multi_overflow;
1181     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1182     g_free(macs);
1183     rxfilter_notify(nc);
1184 
1185     return VIRTIO_NET_OK;
1186 
1187 error:
1188     g_free(macs);
1189     return VIRTIO_NET_ERR;
1190 }
1191 
1192 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1193                                         struct iovec *iov, unsigned int iov_cnt)
1194 {
1195     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1196     uint16_t vid;
1197     size_t s;
1198     NetClientState *nc = qemu_get_queue(n->nic);
1199 
1200     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1201     vid = virtio_lduw_p(vdev, &vid);
1202     if (s != sizeof(vid)) {
1203         return VIRTIO_NET_ERR;
1204     }
1205 
1206     if (vid >= MAX_VLAN)
1207         return VIRTIO_NET_ERR;
1208 
1209     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1210         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1211     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1212         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1213     else
1214         return VIRTIO_NET_ERR;
1215 
1216     rxfilter_notify(nc);
1217 
1218     return VIRTIO_NET_OK;
1219 }
1220 
1221 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1222                                       struct iovec *iov, unsigned int iov_cnt)
1223 {
1224     trace_virtio_net_handle_announce(n->announce_timer.round);
1225     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1226         n->status & VIRTIO_NET_S_ANNOUNCE) {
1227         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1228         if (n->announce_timer.round) {
1229             qemu_announce_timer_step(&n->announce_timer);
1230         }
1231         return VIRTIO_NET_OK;
1232     } else {
1233         return VIRTIO_NET_ERR;
1234     }
1235 }
1236 
1237 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1238 {
1239     NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1240     if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1241         return false;
1242     }
1243 
1244     return nc->info->set_steering_ebpf(nc, prog_fd);
1245 }
1246 
1247 static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1248                                    struct EBPFRSSConfig *config)
1249 {
1250     config->redirect = data->redirect;
1251     config->populate_hash = data->populate_hash;
1252     config->hash_types = data->hash_types;
1253     config->indirections_len = data->indirections_len;
1254     config->default_queue = data->default_queue;
1255 }
1256 
1257 static bool virtio_net_attach_ebpf_rss(VirtIONet *n)
1258 {
1259     struct EBPFRSSConfig config = {};
1260 
1261     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1262         return false;
1263     }
1264 
1265     rss_data_to_rss_config(&n->rss_data, &config);
1266 
1267     if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1268                           n->rss_data.indirections_table, n->rss_data.key,
1269                           NULL)) {
1270         return false;
1271     }
1272 
1273     if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1274         return false;
1275     }
1276 
1277     return true;
1278 }
1279 
1280 static void virtio_net_detach_ebpf_rss(VirtIONet *n)
1281 {
1282     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1283 }
1284 
1285 static void virtio_net_commit_rss_config(VirtIONet *n)
1286 {
1287     if (n->rss_data.enabled) {
1288         n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
1289         if (n->rss_data.populate_hash) {
1290             virtio_net_detach_ebpf_rss(n);
1291         } else if (!virtio_net_attach_ebpf_rss(n)) {
1292             if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1293                 warn_report("Can't load eBPF RSS for vhost");
1294             } else {
1295                 warn_report("Can't load eBPF RSS - fallback to software RSS");
1296                 n->rss_data.enabled_software_rss = true;
1297             }
1298         }
1299 
1300         trace_virtio_net_rss_enable(n->rss_data.hash_types,
1301                                     n->rss_data.indirections_len,
1302                                     sizeof(n->rss_data.key));
1303     } else {
1304         virtio_net_detach_ebpf_rss(n);
1305         trace_virtio_net_rss_disable();
1306     }
1307 }
1308 
1309 static void virtio_net_disable_rss(VirtIONet *n)
1310 {
1311     if (!n->rss_data.enabled) {
1312         return;
1313     }
1314 
1315     n->rss_data.enabled = false;
1316     virtio_net_commit_rss_config(n);
1317 }
1318 
1319 static bool virtio_net_load_ebpf_fds(VirtIONet *n)
1320 {
1321     int fds[EBPF_RSS_MAX_FDS] = { [0 ... EBPF_RSS_MAX_FDS - 1] = -1};
1322     int ret = true;
1323     int i = 0;
1324 
1325     if (n->nr_ebpf_rss_fds != EBPF_RSS_MAX_FDS) {
1326         warn_report("Expected %d file descriptors but got %d",
1327                     EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds);
1328        return false;
1329    }
1330 
1331     for (i = 0; i < n->nr_ebpf_rss_fds; i++) {
1332         fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i],
1333                                   &error_warn);
1334         if (fds[i] < 0) {
1335             ret = false;
1336             goto exit;
1337         }
1338     }
1339 
1340     ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3], NULL);
1341 
1342 exit:
1343     if (!ret) {
1344         for (i = 0; i < n->nr_ebpf_rss_fds && fds[i] != -1; i++) {
1345             close(fds[i]);
1346         }
1347     }
1348 
1349     return ret;
1350 }
1351 
1352 static bool virtio_net_load_ebpf(VirtIONet *n)
1353 {
1354     bool ret = false;
1355 
1356     if (virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1357         if (!(n->ebpf_rss_fds && virtio_net_load_ebpf_fds(n))) {
1358             ret = ebpf_rss_load(&n->ebpf_rss, NULL);
1359         }
1360     }
1361 
1362     return ret;
1363 }
1364 
1365 static void virtio_net_unload_ebpf(VirtIONet *n)
1366 {
1367     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1368     ebpf_rss_unload(&n->ebpf_rss);
1369 }
1370 
1371 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1372                                       struct iovec *iov,
1373                                       unsigned int iov_cnt,
1374                                       bool do_rss)
1375 {
1376     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1377     struct virtio_net_rss_config cfg;
1378     size_t s, offset = 0, size_get;
1379     uint16_t queue_pairs, i;
1380     struct {
1381         uint16_t us;
1382         uint8_t b;
1383     } QEMU_PACKED temp;
1384     const char *err_msg = "";
1385     uint32_t err_value = 0;
1386 
1387     if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1388         err_msg = "RSS is not negotiated";
1389         goto error;
1390     }
1391     if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1392         err_msg = "Hash report is not negotiated";
1393         goto error;
1394     }
1395     size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1396     s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1397     if (s != size_get) {
1398         err_msg = "Short command buffer";
1399         err_value = (uint32_t)s;
1400         goto error;
1401     }
1402     n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1403     n->rss_data.indirections_len =
1404         virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1405     n->rss_data.indirections_len++;
1406     if (!do_rss) {
1407         n->rss_data.indirections_len = 1;
1408     }
1409     if (!is_power_of_2(n->rss_data.indirections_len)) {
1410         err_msg = "Invalid size of indirection table";
1411         err_value = n->rss_data.indirections_len;
1412         goto error;
1413     }
1414     if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1415         err_msg = "Too large indirection table";
1416         err_value = n->rss_data.indirections_len;
1417         goto error;
1418     }
1419     n->rss_data.default_queue = do_rss ?
1420         virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1421     if (n->rss_data.default_queue >= n->max_queue_pairs) {
1422         err_msg = "Invalid default queue";
1423         err_value = n->rss_data.default_queue;
1424         goto error;
1425     }
1426     offset += size_get;
1427     size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1428     g_free(n->rss_data.indirections_table);
1429     n->rss_data.indirections_table = g_malloc(size_get);
1430     if (!n->rss_data.indirections_table) {
1431         err_msg = "Can't allocate indirections table";
1432         err_value = n->rss_data.indirections_len;
1433         goto error;
1434     }
1435     s = iov_to_buf(iov, iov_cnt, offset,
1436                    n->rss_data.indirections_table, size_get);
1437     if (s != size_get) {
1438         err_msg = "Short indirection table buffer";
1439         err_value = (uint32_t)s;
1440         goto error;
1441     }
1442     for (i = 0; i < n->rss_data.indirections_len; ++i) {
1443         uint16_t val = n->rss_data.indirections_table[i];
1444         n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1445     }
1446     offset += size_get;
1447     size_get = sizeof(temp);
1448     s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1449     if (s != size_get) {
1450         err_msg = "Can't get queue_pairs";
1451         err_value = (uint32_t)s;
1452         goto error;
1453     }
1454     queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1455     if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1456         err_msg = "Invalid number of queue_pairs";
1457         err_value = queue_pairs;
1458         goto error;
1459     }
1460     if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1461         err_msg = "Invalid key size";
1462         err_value = temp.b;
1463         goto error;
1464     }
1465     if (!temp.b && n->rss_data.hash_types) {
1466         err_msg = "No key provided";
1467         err_value = 0;
1468         goto error;
1469     }
1470     if (!temp.b && !n->rss_data.hash_types) {
1471         virtio_net_disable_rss(n);
1472         return queue_pairs;
1473     }
1474     offset += size_get;
1475     size_get = temp.b;
1476     s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1477     if (s != size_get) {
1478         err_msg = "Can get key buffer";
1479         err_value = (uint32_t)s;
1480         goto error;
1481     }
1482     n->rss_data.enabled = true;
1483     virtio_net_commit_rss_config(n);
1484     return queue_pairs;
1485 error:
1486     trace_virtio_net_rss_error(err_msg, err_value);
1487     virtio_net_disable_rss(n);
1488     return 0;
1489 }
1490 
1491 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1492                                 struct iovec *iov, unsigned int iov_cnt)
1493 {
1494     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1495     uint16_t queue_pairs;
1496     NetClientState *nc = qemu_get_queue(n->nic);
1497 
1498     virtio_net_disable_rss(n);
1499     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1500         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1501         return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1502     }
1503     if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1504         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1505     } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1506         struct virtio_net_ctrl_mq mq;
1507         size_t s;
1508         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1509             return VIRTIO_NET_ERR;
1510         }
1511         s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1512         if (s != sizeof(mq)) {
1513             return VIRTIO_NET_ERR;
1514         }
1515         queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1516 
1517     } else {
1518         return VIRTIO_NET_ERR;
1519     }
1520 
1521     if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1522         queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1523         queue_pairs > n->max_queue_pairs ||
1524         !n->multiqueue) {
1525         return VIRTIO_NET_ERR;
1526     }
1527 
1528     n->curr_queue_pairs = queue_pairs;
1529     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
1530         /*
1531          * Avoid updating the backend for a vdpa device: We're only interested
1532          * in updating the device model queues.
1533          */
1534         return VIRTIO_NET_OK;
1535     }
1536     /* stop the backend before changing the number of queue_pairs to avoid handling a
1537      * disabled queue */
1538     virtio_net_set_status(vdev, vdev->status);
1539     virtio_net_set_queue_pairs(n);
1540 
1541     return VIRTIO_NET_OK;
1542 }
1543 
1544 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1545                                   const struct iovec *in_sg, unsigned in_num,
1546                                   const struct iovec *out_sg,
1547                                   unsigned out_num)
1548 {
1549     VirtIONet *n = VIRTIO_NET(vdev);
1550     struct virtio_net_ctrl_hdr ctrl;
1551     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1552     size_t s;
1553     struct iovec *iov, *iov2;
1554 
1555     if (iov_size(in_sg, in_num) < sizeof(status) ||
1556         iov_size(out_sg, out_num) < sizeof(ctrl)) {
1557         virtio_error(vdev, "virtio-net ctrl missing headers");
1558         return 0;
1559     }
1560 
1561     iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1562     s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1563     iov_discard_front(&iov, &out_num, sizeof(ctrl));
1564     if (s != sizeof(ctrl)) {
1565         status = VIRTIO_NET_ERR;
1566     } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1567         status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1568     } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1569         status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1570     } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1571         status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1572     } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1573         status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1574     } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1575         status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1576     } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1577         status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1578     }
1579 
1580     s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1581     assert(s == sizeof(status));
1582 
1583     g_free(iov2);
1584     return sizeof(status);
1585 }
1586 
1587 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1588 {
1589     VirtQueueElement *elem;
1590 
1591     for (;;) {
1592         size_t written;
1593         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1594         if (!elem) {
1595             break;
1596         }
1597 
1598         written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1599                                              elem->out_sg, elem->out_num);
1600         if (written > 0) {
1601             virtqueue_push(vq, elem, written);
1602             virtio_notify(vdev, vq);
1603             g_free(elem);
1604         } else {
1605             virtqueue_detach_element(vq, elem, 0);
1606             g_free(elem);
1607             break;
1608         }
1609     }
1610 }
1611 
1612 /* RX */
1613 
1614 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1615 {
1616     VirtIONet *n = VIRTIO_NET(vdev);
1617     int queue_index = vq2q(virtio_get_queue_index(vq));
1618 
1619     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1620 }
1621 
1622 static bool virtio_net_can_receive(NetClientState *nc)
1623 {
1624     VirtIONet *n = qemu_get_nic_opaque(nc);
1625     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1626     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1627 
1628     if (!vdev->vm_running) {
1629         return false;
1630     }
1631 
1632     if (nc->queue_index >= n->curr_queue_pairs) {
1633         return false;
1634     }
1635 
1636     if (!virtio_queue_ready(q->rx_vq) ||
1637         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1638         return false;
1639     }
1640 
1641     return true;
1642 }
1643 
1644 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1645 {
1646     int opaque;
1647     unsigned int in_bytes;
1648     VirtIONet *n = q->n;
1649 
1650     while (virtio_queue_empty(q->rx_vq) || n->mergeable_rx_bufs) {
1651         opaque = virtqueue_get_avail_bytes(q->rx_vq, &in_bytes, NULL,
1652                                            bufsize, 0);
1653         /* Buffer is enough, disable notifiaction */
1654         if (bufsize <= in_bytes) {
1655             break;
1656         }
1657 
1658         if (virtio_queue_enable_notification_and_check(q->rx_vq, opaque)) {
1659             /* Guest has added some buffers, try again */
1660             continue;
1661         } else {
1662             return 0;
1663         }
1664     }
1665 
1666     virtio_queue_set_notification(q->rx_vq, 0);
1667 
1668     return 1;
1669 }
1670 
1671 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1672 {
1673     virtio_tswap16s(vdev, &hdr->hdr_len);
1674     virtio_tswap16s(vdev, &hdr->gso_size);
1675     virtio_tswap16s(vdev, &hdr->csum_start);
1676     virtio_tswap16s(vdev, &hdr->csum_offset);
1677 }
1678 
1679 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1680  * it never finds out that the packets don't have valid checksums.  This
1681  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1682  * fix this with Xen but it hasn't appeared in an upstream release of
1683  * dhclient yet.
1684  *
1685  * To avoid breaking existing guests, we catch udp packets and add
1686  * checksums.  This is terrible but it's better than hacking the guest
1687  * kernels.
1688  *
1689  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1690  * we should provide a mechanism to disable it to avoid polluting the host
1691  * cache.
1692  */
1693 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1694                                         uint8_t *buf, size_t size)
1695 {
1696     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1697         (size > 27 && size < 1500) && /* normal sized MTU */
1698         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1699         (buf[23] == 17) && /* ip.protocol == UDP */
1700         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1701         net_checksum_calculate(buf, size, CSUM_UDP);
1702         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1703     }
1704 }
1705 
1706 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1707                            const void *buf, size_t size)
1708 {
1709     if (n->has_vnet_hdr) {
1710         /* FIXME this cast is evil */
1711         void *wbuf = (void *)buf;
1712         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1713                                     size - n->host_hdr_len);
1714 
1715         if (n->needs_vnet_hdr_swap) {
1716             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1717         }
1718         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1719     } else {
1720         struct virtio_net_hdr hdr = {
1721             .flags = 0,
1722             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1723         };
1724         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1725     }
1726 }
1727 
1728 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1729 {
1730     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1731     static const uint8_t vlan[] = {0x81, 0x00};
1732     uint8_t *ptr = (uint8_t *)buf;
1733     int i;
1734 
1735     if (n->promisc)
1736         return 1;
1737 
1738     ptr += n->host_hdr_len;
1739 
1740     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1741         int vid = lduw_be_p(ptr + 14) & 0xfff;
1742         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1743             return 0;
1744     }
1745 
1746     if (ptr[0] & 1) { // multicast
1747         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1748             return !n->nobcast;
1749         } else if (n->nomulti) {
1750             return 0;
1751         } else if (n->allmulti || n->mac_table.multi_overflow) {
1752             return 1;
1753         }
1754 
1755         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1756             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1757                 return 1;
1758             }
1759         }
1760     } else { // unicast
1761         if (n->nouni) {
1762             return 0;
1763         } else if (n->alluni || n->mac_table.uni_overflow) {
1764             return 1;
1765         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1766             return 1;
1767         }
1768 
1769         for (i = 0; i < n->mac_table.first_multi; i++) {
1770             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1771                 return 1;
1772             }
1773         }
1774     }
1775 
1776     return 0;
1777 }
1778 
1779 static uint8_t virtio_net_get_hash_type(bool hasip4,
1780                                         bool hasip6,
1781                                         EthL4HdrProto l4hdr_proto,
1782                                         uint32_t types)
1783 {
1784     if (hasip4) {
1785         switch (l4hdr_proto) {
1786         case ETH_L4_HDR_PROTO_TCP:
1787             if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
1788                 return NetPktRssIpV4Tcp;
1789             }
1790             break;
1791 
1792         case ETH_L4_HDR_PROTO_UDP:
1793             if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
1794                 return NetPktRssIpV4Udp;
1795             }
1796             break;
1797 
1798         default:
1799             break;
1800         }
1801 
1802         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1803             return NetPktRssIpV4;
1804         }
1805     } else if (hasip6) {
1806         switch (l4hdr_proto) {
1807         case ETH_L4_HDR_PROTO_TCP:
1808             if (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) {
1809                 return NetPktRssIpV6TcpEx;
1810             }
1811             if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
1812                 return NetPktRssIpV6Tcp;
1813             }
1814             break;
1815 
1816         case ETH_L4_HDR_PROTO_UDP:
1817             if (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) {
1818                 return NetPktRssIpV6UdpEx;
1819             }
1820             if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
1821                 return NetPktRssIpV6Udp;
1822             }
1823             break;
1824 
1825         default:
1826             break;
1827         }
1828 
1829         if (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) {
1830             return NetPktRssIpV6Ex;
1831         }
1832         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
1833             return NetPktRssIpV6;
1834         }
1835     }
1836     return 0xff;
1837 }
1838 
1839 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1840                                   size_t size,
1841                                   struct virtio_net_hdr_v1_hash *hdr)
1842 {
1843     VirtIONet *n = qemu_get_nic_opaque(nc);
1844     unsigned int index = nc->queue_index, new_index = index;
1845     struct NetRxPkt *pkt = n->rx_pkt;
1846     uint8_t net_hash_type;
1847     uint32_t hash;
1848     bool hasip4, hasip6;
1849     EthL4HdrProto l4hdr_proto;
1850     static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1851         VIRTIO_NET_HASH_REPORT_IPv4,
1852         VIRTIO_NET_HASH_REPORT_TCPv4,
1853         VIRTIO_NET_HASH_REPORT_TCPv6,
1854         VIRTIO_NET_HASH_REPORT_IPv6,
1855         VIRTIO_NET_HASH_REPORT_IPv6_EX,
1856         VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1857         VIRTIO_NET_HASH_REPORT_UDPv4,
1858         VIRTIO_NET_HASH_REPORT_UDPv6,
1859         VIRTIO_NET_HASH_REPORT_UDPv6_EX
1860     };
1861     struct iovec iov = {
1862         .iov_base = (void *)buf,
1863         .iov_len = size
1864     };
1865 
1866     net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len);
1867     net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
1868     net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto,
1869                                              n->rss_data.hash_types);
1870     if (net_hash_type > NetPktRssIpV6UdpEx) {
1871         if (n->rss_data.populate_hash) {
1872             hdr->hash_value = VIRTIO_NET_HASH_REPORT_NONE;
1873             hdr->hash_report = 0;
1874         }
1875         return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1876     }
1877 
1878     hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1879 
1880     if (n->rss_data.populate_hash) {
1881         hdr->hash_value = hash;
1882         hdr->hash_report = reports[net_hash_type];
1883     }
1884 
1885     if (n->rss_data.redirect) {
1886         new_index = hash & (n->rss_data.indirections_len - 1);
1887         new_index = n->rss_data.indirections_table[new_index];
1888     }
1889 
1890     return (index == new_index) ? -1 : new_index;
1891 }
1892 
1893 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1894                                       size_t size, bool no_rss)
1895 {
1896     VirtIONet *n = qemu_get_nic_opaque(nc);
1897     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1898     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1899     VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1900     size_t lens[VIRTQUEUE_MAX_SIZE];
1901     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1902     struct virtio_net_hdr_v1_hash extra_hdr;
1903     unsigned mhdr_cnt = 0;
1904     size_t offset, i, guest_offset, j;
1905     ssize_t err;
1906 
1907     if (!virtio_net_can_receive(nc)) {
1908         return -1;
1909     }
1910 
1911     if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1912         int index = virtio_net_process_rss(nc, buf, size, &extra_hdr);
1913         if (index >= 0) {
1914             NetClientState *nc2 =
1915                 qemu_get_subqueue(n->nic, index % n->curr_queue_pairs);
1916             return virtio_net_receive_rcu(nc2, buf, size, true);
1917         }
1918     }
1919 
1920     /* hdr_len refers to the header we supply to the guest */
1921     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1922         return 0;
1923     }
1924 
1925     if (!receive_filter(n, buf, size))
1926         return size;
1927 
1928     offset = i = 0;
1929 
1930     while (offset < size) {
1931         VirtQueueElement *elem;
1932         int len, total;
1933         const struct iovec *sg;
1934 
1935         total = 0;
1936 
1937         if (i == VIRTQUEUE_MAX_SIZE) {
1938             virtio_error(vdev, "virtio-net unexpected long buffer chain");
1939             err = size;
1940             goto err;
1941         }
1942 
1943         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1944         if (!elem) {
1945             if (i) {
1946                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1947                              "i %zd mergeable %d offset %zd, size %zd, "
1948                              "guest hdr len %zd, host hdr len %zd "
1949                              "guest features 0x%" PRIx64,
1950                              i, n->mergeable_rx_bufs, offset, size,
1951                              n->guest_hdr_len, n->host_hdr_len,
1952                              vdev->guest_features);
1953             }
1954             err = -1;
1955             goto err;
1956         }
1957 
1958         if (elem->in_num < 1) {
1959             virtio_error(vdev,
1960                          "virtio-net receive queue contains no in buffers");
1961             virtqueue_detach_element(q->rx_vq, elem, 0);
1962             g_free(elem);
1963             err = -1;
1964             goto err;
1965         }
1966 
1967         sg = elem->in_sg;
1968         if (i == 0) {
1969             assert(offset == 0);
1970             if (n->mergeable_rx_bufs) {
1971                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1972                                     sg, elem->in_num,
1973                                     offsetof(typeof(extra_hdr), hdr.num_buffers),
1974                                     sizeof(extra_hdr.hdr.num_buffers));
1975             }
1976 
1977             receive_header(n, sg, elem->in_num, buf, size);
1978             if (n->rss_data.populate_hash) {
1979                 offset = offsetof(typeof(extra_hdr), hash_value);
1980                 iov_from_buf(sg, elem->in_num, offset,
1981                              (char *)&extra_hdr + offset,
1982                              sizeof(extra_hdr.hash_value) +
1983                              sizeof(extra_hdr.hash_report));
1984             }
1985             offset = n->host_hdr_len;
1986             total += n->guest_hdr_len;
1987             guest_offset = n->guest_hdr_len;
1988         } else {
1989             guest_offset = 0;
1990         }
1991 
1992         /* copy in packet.  ugh */
1993         len = iov_from_buf(sg, elem->in_num, guest_offset,
1994                            buf + offset, size - offset);
1995         total += len;
1996         offset += len;
1997         /* If buffers can't be merged, at this point we
1998          * must have consumed the complete packet.
1999          * Otherwise, drop it. */
2000         if (!n->mergeable_rx_bufs && offset < size) {
2001             virtqueue_unpop(q->rx_vq, elem, total);
2002             g_free(elem);
2003             err = size;
2004             goto err;
2005         }
2006 
2007         elems[i] = elem;
2008         lens[i] = total;
2009         i++;
2010     }
2011 
2012     if (mhdr_cnt) {
2013         virtio_stw_p(vdev, &extra_hdr.hdr.num_buffers, i);
2014         iov_from_buf(mhdr_sg, mhdr_cnt,
2015                      0,
2016                      &extra_hdr.hdr.num_buffers,
2017                      sizeof extra_hdr.hdr.num_buffers);
2018     }
2019 
2020     for (j = 0; j < i; j++) {
2021         /* signal other side */
2022         virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
2023         g_free(elems[j]);
2024     }
2025 
2026     virtqueue_flush(q->rx_vq, i);
2027     virtio_notify(vdev, q->rx_vq);
2028 
2029     return size;
2030 
2031 err:
2032     for (j = 0; j < i; j++) {
2033         virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
2034         g_free(elems[j]);
2035     }
2036 
2037     return err;
2038 }
2039 
2040 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
2041                                   size_t size)
2042 {
2043     RCU_READ_LOCK_GUARD();
2044 
2045     return virtio_net_receive_rcu(nc, buf, size, false);
2046 }
2047 
2048 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
2049                                          const uint8_t *buf,
2050                                          VirtioNetRscUnit *unit)
2051 {
2052     uint16_t ip_hdrlen;
2053     struct ip_header *ip;
2054 
2055     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
2056                               + sizeof(struct eth_header));
2057     unit->ip = (void *)ip;
2058     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
2059     unit->ip_plen = &ip->ip_len;
2060     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
2061     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2062     unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
2063 }
2064 
2065 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
2066                                          const uint8_t *buf,
2067                                          VirtioNetRscUnit *unit)
2068 {
2069     struct ip6_header *ip6;
2070 
2071     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
2072                                  + sizeof(struct eth_header));
2073     unit->ip = ip6;
2074     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2075     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
2076                                         + sizeof(struct ip6_header));
2077     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2078 
2079     /* There is a difference between payload length in ipv4 and v6,
2080        ip header is excluded in ipv6 */
2081     unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
2082 }
2083 
2084 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
2085                                        VirtioNetRscSeg *seg)
2086 {
2087     int ret;
2088     struct virtio_net_hdr_v1 *h;
2089 
2090     h = (struct virtio_net_hdr_v1 *)seg->buf;
2091     h->flags = 0;
2092     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
2093 
2094     if (seg->is_coalesced) {
2095         h->rsc.segments = seg->packets;
2096         h->rsc.dup_acks = seg->dup_ack;
2097         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
2098         if (chain->proto == ETH_P_IP) {
2099             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2100         } else {
2101             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2102         }
2103     }
2104 
2105     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
2106     QTAILQ_REMOVE(&chain->buffers, seg, next);
2107     g_free(seg->buf);
2108     g_free(seg);
2109 
2110     return ret;
2111 }
2112 
2113 static void virtio_net_rsc_purge(void *opq)
2114 {
2115     VirtioNetRscSeg *seg, *rn;
2116     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
2117 
2118     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
2119         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2120             chain->stat.purge_failed++;
2121             continue;
2122         }
2123     }
2124 
2125     chain->stat.timer++;
2126     if (!QTAILQ_EMPTY(&chain->buffers)) {
2127         timer_mod(chain->drain_timer,
2128               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout);
2129     }
2130 }
2131 
2132 static void virtio_net_rsc_cleanup(VirtIONet *n)
2133 {
2134     VirtioNetRscChain *chain, *rn_chain;
2135     VirtioNetRscSeg *seg, *rn_seg;
2136 
2137     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2138         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2139             QTAILQ_REMOVE(&chain->buffers, seg, next);
2140             g_free(seg->buf);
2141             g_free(seg);
2142         }
2143 
2144         timer_free(chain->drain_timer);
2145         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2146         g_free(chain);
2147     }
2148 }
2149 
2150 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2151                                      NetClientState *nc,
2152                                      const uint8_t *buf, size_t size)
2153 {
2154     uint16_t hdr_len;
2155     VirtioNetRscSeg *seg;
2156 
2157     hdr_len = chain->n->guest_hdr_len;
2158     seg = g_new(VirtioNetRscSeg, 1);
2159     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2160         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2161     memcpy(seg->buf, buf, size);
2162     seg->size = size;
2163     seg->packets = 1;
2164     seg->dup_ack = 0;
2165     seg->is_coalesced = 0;
2166     seg->nc = nc;
2167 
2168     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2169     chain->stat.cache++;
2170 
2171     switch (chain->proto) {
2172     case ETH_P_IP:
2173         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2174         break;
2175     case ETH_P_IPV6:
2176         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2177         break;
2178     default:
2179         g_assert_not_reached();
2180     }
2181 }
2182 
2183 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2184                                          VirtioNetRscSeg *seg,
2185                                          const uint8_t *buf,
2186                                          struct tcp_header *n_tcp,
2187                                          struct tcp_header *o_tcp)
2188 {
2189     uint32_t nack, oack;
2190     uint16_t nwin, owin;
2191 
2192     nack = htonl(n_tcp->th_ack);
2193     nwin = htons(n_tcp->th_win);
2194     oack = htonl(o_tcp->th_ack);
2195     owin = htons(o_tcp->th_win);
2196 
2197     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2198         chain->stat.ack_out_of_win++;
2199         return RSC_FINAL;
2200     } else if (nack == oack) {
2201         /* duplicated ack or window probe */
2202         if (nwin == owin) {
2203             /* duplicated ack, add dup ack count due to whql test up to 1 */
2204             chain->stat.dup_ack++;
2205             return RSC_FINAL;
2206         } else {
2207             /* Coalesce window update */
2208             o_tcp->th_win = n_tcp->th_win;
2209             chain->stat.win_update++;
2210             return RSC_COALESCE;
2211         }
2212     } else {
2213         /* pure ack, go to 'C', finalize*/
2214         chain->stat.pure_ack++;
2215         return RSC_FINAL;
2216     }
2217 }
2218 
2219 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2220                                             VirtioNetRscSeg *seg,
2221                                             const uint8_t *buf,
2222                                             VirtioNetRscUnit *n_unit)
2223 {
2224     void *data;
2225     uint16_t o_ip_len;
2226     uint32_t nseq, oseq;
2227     VirtioNetRscUnit *o_unit;
2228 
2229     o_unit = &seg->unit;
2230     o_ip_len = htons(*o_unit->ip_plen);
2231     nseq = htonl(n_unit->tcp->th_seq);
2232     oseq = htonl(o_unit->tcp->th_seq);
2233 
2234     /* out of order or retransmitted. */
2235     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2236         chain->stat.data_out_of_win++;
2237         return RSC_FINAL;
2238     }
2239 
2240     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2241     if (nseq == oseq) {
2242         if ((o_unit->payload == 0) && n_unit->payload) {
2243             /* From no payload to payload, normal case, not a dup ack or etc */
2244             chain->stat.data_after_pure_ack++;
2245             goto coalesce;
2246         } else {
2247             return virtio_net_rsc_handle_ack(chain, seg, buf,
2248                                              n_unit->tcp, o_unit->tcp);
2249         }
2250     } else if ((nseq - oseq) != o_unit->payload) {
2251         /* Not a consistent packet, out of order */
2252         chain->stat.data_out_of_order++;
2253         return RSC_FINAL;
2254     } else {
2255 coalesce:
2256         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2257             chain->stat.over_size++;
2258             return RSC_FINAL;
2259         }
2260 
2261         /* Here comes the right data, the payload length in v4/v6 is different,
2262            so use the field value to update and record the new data len */
2263         o_unit->payload += n_unit->payload; /* update new data len */
2264 
2265         /* update field in ip header */
2266         *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2267 
2268         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2269            for windows guest, while this may change the behavior for linux
2270            guest (only if it uses RSC feature). */
2271         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2272 
2273         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2274         o_unit->tcp->th_win = n_unit->tcp->th_win;
2275 
2276         memmove(seg->buf + seg->size, data, n_unit->payload);
2277         seg->size += n_unit->payload;
2278         seg->packets++;
2279         chain->stat.coalesced++;
2280         return RSC_COALESCE;
2281     }
2282 }
2283 
2284 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2285                                         VirtioNetRscSeg *seg,
2286                                         const uint8_t *buf, size_t size,
2287                                         VirtioNetRscUnit *unit)
2288 {
2289     struct ip_header *ip1, *ip2;
2290 
2291     ip1 = (struct ip_header *)(unit->ip);
2292     ip2 = (struct ip_header *)(seg->unit.ip);
2293     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2294         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2295         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2296         chain->stat.no_match++;
2297         return RSC_NO_MATCH;
2298     }
2299 
2300     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2301 }
2302 
2303 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2304                                         VirtioNetRscSeg *seg,
2305                                         const uint8_t *buf, size_t size,
2306                                         VirtioNetRscUnit *unit)
2307 {
2308     struct ip6_header *ip1, *ip2;
2309 
2310     ip1 = (struct ip6_header *)(unit->ip);
2311     ip2 = (struct ip6_header *)(seg->unit.ip);
2312     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2313         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2314         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2315         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2316             chain->stat.no_match++;
2317             return RSC_NO_MATCH;
2318     }
2319 
2320     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2321 }
2322 
2323 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2324  * to prevent out of order */
2325 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2326                                          struct tcp_header *tcp)
2327 {
2328     uint16_t tcp_hdr;
2329     uint16_t tcp_flag;
2330 
2331     tcp_flag = htons(tcp->th_offset_flags);
2332     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2333     tcp_flag &= VIRTIO_NET_TCP_FLAG;
2334     if (tcp_flag & TH_SYN) {
2335         chain->stat.tcp_syn++;
2336         return RSC_BYPASS;
2337     }
2338 
2339     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2340         chain->stat.tcp_ctrl_drain++;
2341         return RSC_FINAL;
2342     }
2343 
2344     if (tcp_hdr > sizeof(struct tcp_header)) {
2345         chain->stat.tcp_all_opt++;
2346         return RSC_FINAL;
2347     }
2348 
2349     return RSC_CANDIDATE;
2350 }
2351 
2352 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2353                                          NetClientState *nc,
2354                                          const uint8_t *buf, size_t size,
2355                                          VirtioNetRscUnit *unit)
2356 {
2357     int ret;
2358     VirtioNetRscSeg *seg, *nseg;
2359 
2360     if (QTAILQ_EMPTY(&chain->buffers)) {
2361         chain->stat.empty_cache++;
2362         virtio_net_rsc_cache_buf(chain, nc, buf, size);
2363         timer_mod(chain->drain_timer,
2364               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout);
2365         return size;
2366     }
2367 
2368     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2369         if (chain->proto == ETH_P_IP) {
2370             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2371         } else {
2372             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2373         }
2374 
2375         if (ret == RSC_FINAL) {
2376             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2377                 /* Send failed */
2378                 chain->stat.final_failed++;
2379                 return 0;
2380             }
2381 
2382             /* Send current packet */
2383             return virtio_net_do_receive(nc, buf, size);
2384         } else if (ret == RSC_NO_MATCH) {
2385             continue;
2386         } else {
2387             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2388             seg->is_coalesced = 1;
2389             return size;
2390         }
2391     }
2392 
2393     chain->stat.no_match_cache++;
2394     virtio_net_rsc_cache_buf(chain, nc, buf, size);
2395     return size;
2396 }
2397 
2398 /* Drain a connection data, this is to avoid out of order segments */
2399 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2400                                         NetClientState *nc,
2401                                         const uint8_t *buf, size_t size,
2402                                         uint16_t ip_start, uint16_t ip_size,
2403                                         uint16_t tcp_port)
2404 {
2405     VirtioNetRscSeg *seg, *nseg;
2406     uint32_t ppair1, ppair2;
2407 
2408     ppair1 = *(uint32_t *)(buf + tcp_port);
2409     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2410         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2411         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2412             || (ppair1 != ppair2)) {
2413             continue;
2414         }
2415         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2416             chain->stat.drain_failed++;
2417         }
2418 
2419         break;
2420     }
2421 
2422     return virtio_net_do_receive(nc, buf, size);
2423 }
2424 
2425 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2426                                             struct ip_header *ip,
2427                                             const uint8_t *buf, size_t size)
2428 {
2429     uint16_t ip_len;
2430 
2431     /* Not an ipv4 packet */
2432     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2433         chain->stat.ip_option++;
2434         return RSC_BYPASS;
2435     }
2436 
2437     /* Don't handle packets with ip option */
2438     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2439         chain->stat.ip_option++;
2440         return RSC_BYPASS;
2441     }
2442 
2443     if (ip->ip_p != IPPROTO_TCP) {
2444         chain->stat.bypass_not_tcp++;
2445         return RSC_BYPASS;
2446     }
2447 
2448     /* Don't handle packets with ip fragment */
2449     if (!(htons(ip->ip_off) & IP_DF)) {
2450         chain->stat.ip_frag++;
2451         return RSC_BYPASS;
2452     }
2453 
2454     /* Don't handle packets with ecn flag */
2455     if (IPTOS_ECN(ip->ip_tos)) {
2456         chain->stat.ip_ecn++;
2457         return RSC_BYPASS;
2458     }
2459 
2460     ip_len = htons(ip->ip_len);
2461     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2462         || ip_len > (size - chain->n->guest_hdr_len -
2463                      sizeof(struct eth_header))) {
2464         chain->stat.ip_hacked++;
2465         return RSC_BYPASS;
2466     }
2467 
2468     return RSC_CANDIDATE;
2469 }
2470 
2471 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2472                                       NetClientState *nc,
2473                                       const uint8_t *buf, size_t size)
2474 {
2475     int32_t ret;
2476     uint16_t hdr_len;
2477     VirtioNetRscUnit unit;
2478 
2479     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2480 
2481     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2482         + sizeof(struct tcp_header))) {
2483         chain->stat.bypass_not_tcp++;
2484         return virtio_net_do_receive(nc, buf, size);
2485     }
2486 
2487     virtio_net_rsc_extract_unit4(chain, buf, &unit);
2488     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2489         != RSC_CANDIDATE) {
2490         return virtio_net_do_receive(nc, buf, size);
2491     }
2492 
2493     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2494     if (ret == RSC_BYPASS) {
2495         return virtio_net_do_receive(nc, buf, size);
2496     } else if (ret == RSC_FINAL) {
2497         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2498                 ((hdr_len + sizeof(struct eth_header)) + 12),
2499                 VIRTIO_NET_IP4_ADDR_SIZE,
2500                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2501     }
2502 
2503     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2504 }
2505 
2506 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2507                                             struct ip6_header *ip6,
2508                                             const uint8_t *buf, size_t size)
2509 {
2510     uint16_t ip_len;
2511 
2512     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2513         != IP_HEADER_VERSION_6) {
2514         return RSC_BYPASS;
2515     }
2516 
2517     /* Both option and protocol is checked in this */
2518     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2519         chain->stat.bypass_not_tcp++;
2520         return RSC_BYPASS;
2521     }
2522 
2523     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2524     if (ip_len < sizeof(struct tcp_header) ||
2525         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2526                   - sizeof(struct ip6_header))) {
2527         chain->stat.ip_hacked++;
2528         return RSC_BYPASS;
2529     }
2530 
2531     /* Don't handle packets with ecn flag */
2532     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2533         chain->stat.ip_ecn++;
2534         return RSC_BYPASS;
2535     }
2536 
2537     return RSC_CANDIDATE;
2538 }
2539 
2540 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2541                                       const uint8_t *buf, size_t size)
2542 {
2543     int32_t ret;
2544     uint16_t hdr_len;
2545     VirtioNetRscChain *chain;
2546     VirtioNetRscUnit unit;
2547 
2548     chain = opq;
2549     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2550 
2551     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2552         + sizeof(tcp_header))) {
2553         return virtio_net_do_receive(nc, buf, size);
2554     }
2555 
2556     virtio_net_rsc_extract_unit6(chain, buf, &unit);
2557     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2558                                                  unit.ip, buf, size)) {
2559         return virtio_net_do_receive(nc, buf, size);
2560     }
2561 
2562     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2563     if (ret == RSC_BYPASS) {
2564         return virtio_net_do_receive(nc, buf, size);
2565     } else if (ret == RSC_FINAL) {
2566         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2567                 ((hdr_len + sizeof(struct eth_header)) + 8),
2568                 VIRTIO_NET_IP6_ADDR_SIZE,
2569                 hdr_len + sizeof(struct eth_header)
2570                 + sizeof(struct ip6_header));
2571     }
2572 
2573     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2574 }
2575 
2576 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2577                                                       NetClientState *nc,
2578                                                       uint16_t proto)
2579 {
2580     VirtioNetRscChain *chain;
2581 
2582     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2583         return NULL;
2584     }
2585 
2586     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2587         if (chain->proto == proto) {
2588             return chain;
2589         }
2590     }
2591 
2592     chain = g_malloc(sizeof(*chain));
2593     chain->n = n;
2594     chain->proto = proto;
2595     if (proto == (uint16_t)ETH_P_IP) {
2596         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2597         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2598     } else {
2599         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2600         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2601     }
2602     chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2603                                       virtio_net_rsc_purge, chain);
2604     memset(&chain->stat, 0, sizeof(chain->stat));
2605 
2606     QTAILQ_INIT(&chain->buffers);
2607     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2608 
2609     return chain;
2610 }
2611 
2612 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2613                                       const uint8_t *buf,
2614                                       size_t size)
2615 {
2616     uint16_t proto;
2617     VirtioNetRscChain *chain;
2618     struct eth_header *eth;
2619     VirtIONet *n;
2620 
2621     n = qemu_get_nic_opaque(nc);
2622     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2623         return virtio_net_do_receive(nc, buf, size);
2624     }
2625 
2626     eth = (struct eth_header *)(buf + n->guest_hdr_len);
2627     proto = htons(eth->h_proto);
2628 
2629     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2630     if (chain) {
2631         chain->stat.received++;
2632         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2633             return virtio_net_rsc_receive4(chain, nc, buf, size);
2634         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2635             return virtio_net_rsc_receive6(chain, nc, buf, size);
2636         }
2637     }
2638     return virtio_net_do_receive(nc, buf, size);
2639 }
2640 
2641 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2642                                   size_t size)
2643 {
2644     VirtIONet *n = qemu_get_nic_opaque(nc);
2645     if ((n->rsc4_enabled || n->rsc6_enabled)) {
2646         return virtio_net_rsc_receive(nc, buf, size);
2647     } else {
2648         return virtio_net_do_receive(nc, buf, size);
2649     }
2650 }
2651 
2652 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2653 
2654 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2655 {
2656     VirtIONet *n = qemu_get_nic_opaque(nc);
2657     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2658     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2659     int ret;
2660 
2661     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2662     virtio_notify(vdev, q->tx_vq);
2663 
2664     g_free(q->async_tx.elem);
2665     q->async_tx.elem = NULL;
2666 
2667     virtio_queue_set_notification(q->tx_vq, 1);
2668     ret = virtio_net_flush_tx(q);
2669     if (ret >= n->tx_burst) {
2670         /*
2671          * the flush has been stopped by tx_burst
2672          * we will not receive notification for the
2673          * remainining part, so re-schedule
2674          */
2675         virtio_queue_set_notification(q->tx_vq, 0);
2676         if (q->tx_bh) {
2677             replay_bh_schedule_event(q->tx_bh);
2678         } else {
2679             timer_mod(q->tx_timer,
2680                       qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2681         }
2682         q->tx_waiting = 1;
2683     }
2684 }
2685 
2686 /* TX */
2687 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2688 {
2689     VirtIONet *n = q->n;
2690     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2691     VirtQueueElement *elem;
2692     int32_t num_packets = 0;
2693     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2694     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2695         return num_packets;
2696     }
2697 
2698     if (q->async_tx.elem) {
2699         virtio_queue_set_notification(q->tx_vq, 0);
2700         return num_packets;
2701     }
2702 
2703     for (;;) {
2704         ssize_t ret;
2705         unsigned int out_num;
2706         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2707         struct virtio_net_hdr vhdr;
2708 
2709         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2710         if (!elem) {
2711             break;
2712         }
2713 
2714         out_num = elem->out_num;
2715         out_sg = elem->out_sg;
2716         if (out_num < 1) {
2717             virtio_error(vdev, "virtio-net header not in first element");
2718             goto detach;
2719         }
2720 
2721         if (n->needs_vnet_hdr_swap) {
2722             if (iov_to_buf(out_sg, out_num, 0, &vhdr, sizeof(vhdr)) <
2723                 sizeof(vhdr)) {
2724                 virtio_error(vdev, "virtio-net header incorrect");
2725                 goto detach;
2726             }
2727             virtio_net_hdr_swap(vdev, &vhdr);
2728             sg2[0].iov_base = &vhdr;
2729             sg2[0].iov_len = sizeof(vhdr);
2730             out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, out_sg, out_num,
2731                                sizeof(vhdr), -1);
2732             if (out_num == VIRTQUEUE_MAX_SIZE) {
2733                 goto drop;
2734             }
2735             out_num += 1;
2736             out_sg = sg2;
2737         }
2738         /*
2739          * If host wants to see the guest header as is, we can
2740          * pass it on unchanged. Otherwise, copy just the parts
2741          * that host is interested in.
2742          */
2743         assert(n->host_hdr_len <= n->guest_hdr_len);
2744         if (n->host_hdr_len != n->guest_hdr_len) {
2745             if (iov_size(out_sg, out_num) < n->guest_hdr_len) {
2746                 virtio_error(vdev, "virtio-net header is invalid");
2747                 goto detach;
2748             }
2749             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2750                                        out_sg, out_num,
2751                                        0, n->host_hdr_len);
2752             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2753                              out_sg, out_num,
2754                              n->guest_hdr_len, -1);
2755             out_num = sg_num;
2756             out_sg = sg;
2757 
2758             if (out_num < 1) {
2759                 virtio_error(vdev, "virtio-net nothing to send");
2760                 goto detach;
2761             }
2762         }
2763 
2764         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2765                                       out_sg, out_num, virtio_net_tx_complete);
2766         if (ret == 0) {
2767             virtio_queue_set_notification(q->tx_vq, 0);
2768             q->async_tx.elem = elem;
2769             return -EBUSY;
2770         }
2771 
2772 drop:
2773         virtqueue_push(q->tx_vq, elem, 0);
2774         virtio_notify(vdev, q->tx_vq);
2775         g_free(elem);
2776 
2777         if (++num_packets >= n->tx_burst) {
2778             break;
2779         }
2780     }
2781     return num_packets;
2782 
2783 detach:
2784     virtqueue_detach_element(q->tx_vq, elem, 0);
2785     g_free(elem);
2786     return -EINVAL;
2787 }
2788 
2789 static void virtio_net_tx_timer(void *opaque);
2790 
2791 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2792 {
2793     VirtIONet *n = VIRTIO_NET(vdev);
2794     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2795 
2796     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2797         virtio_net_drop_tx_queue_data(vdev, vq);
2798         return;
2799     }
2800 
2801     /* This happens when device was stopped but VCPU wasn't. */
2802     if (!vdev->vm_running) {
2803         q->tx_waiting = 1;
2804         return;
2805     }
2806 
2807     if (q->tx_waiting) {
2808         /* We already have queued packets, immediately flush */
2809         timer_del(q->tx_timer);
2810         virtio_net_tx_timer(q);
2811     } else {
2812         /* re-arm timer to flush it (and more) on next tick */
2813         timer_mod(q->tx_timer,
2814                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2815         q->tx_waiting = 1;
2816         virtio_queue_set_notification(vq, 0);
2817     }
2818 }
2819 
2820 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2821 {
2822     VirtIONet *n = VIRTIO_NET(vdev);
2823     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2824 
2825     if (unlikely(n->vhost_started)) {
2826         return;
2827     }
2828 
2829     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2830         virtio_net_drop_tx_queue_data(vdev, vq);
2831         return;
2832     }
2833 
2834     if (unlikely(q->tx_waiting)) {
2835         return;
2836     }
2837     q->tx_waiting = 1;
2838     /* This happens when device was stopped but VCPU wasn't. */
2839     if (!vdev->vm_running) {
2840         return;
2841     }
2842     virtio_queue_set_notification(vq, 0);
2843     replay_bh_schedule_event(q->tx_bh);
2844 }
2845 
2846 static void virtio_net_tx_timer(void *opaque)
2847 {
2848     VirtIONetQueue *q = opaque;
2849     VirtIONet *n = q->n;
2850     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2851     int ret;
2852 
2853     /* This happens when device was stopped but BH wasn't. */
2854     if (!vdev->vm_running) {
2855         /* Make sure tx waiting is set, so we'll run when restarted. */
2856         assert(q->tx_waiting);
2857         return;
2858     }
2859 
2860     q->tx_waiting = 0;
2861 
2862     /* Just in case the driver is not ready on more */
2863     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2864         return;
2865     }
2866 
2867     ret = virtio_net_flush_tx(q);
2868     if (ret == -EBUSY || ret == -EINVAL) {
2869         return;
2870     }
2871     /*
2872      * If we flush a full burst of packets, assume there are
2873      * more coming and immediately rearm
2874      */
2875     if (ret >= n->tx_burst) {
2876         q->tx_waiting = 1;
2877         timer_mod(q->tx_timer,
2878                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2879         return;
2880     }
2881     /*
2882      * If less than a full burst, re-enable notification and flush
2883      * anything that may have come in while we weren't looking.  If
2884      * we find something, assume the guest is still active and rearm
2885      */
2886     virtio_queue_set_notification(q->tx_vq, 1);
2887     ret = virtio_net_flush_tx(q);
2888     if (ret > 0) {
2889         virtio_queue_set_notification(q->tx_vq, 0);
2890         q->tx_waiting = 1;
2891         timer_mod(q->tx_timer,
2892                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2893     }
2894 }
2895 
2896 static void virtio_net_tx_bh(void *opaque)
2897 {
2898     VirtIONetQueue *q = opaque;
2899     VirtIONet *n = q->n;
2900     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2901     int32_t ret;
2902 
2903     /* This happens when device was stopped but BH wasn't. */
2904     if (!vdev->vm_running) {
2905         /* Make sure tx waiting is set, so we'll run when restarted. */
2906         assert(q->tx_waiting);
2907         return;
2908     }
2909 
2910     q->tx_waiting = 0;
2911 
2912     /* Just in case the driver is not ready on more */
2913     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2914         return;
2915     }
2916 
2917     ret = virtio_net_flush_tx(q);
2918     if (ret == -EBUSY || ret == -EINVAL) {
2919         return; /* Notification re-enable handled by tx_complete or device
2920                  * broken */
2921     }
2922 
2923     /* If we flush a full burst of packets, assume there are
2924      * more coming and immediately reschedule */
2925     if (ret >= n->tx_burst) {
2926         replay_bh_schedule_event(q->tx_bh);
2927         q->tx_waiting = 1;
2928         return;
2929     }
2930 
2931     /* If less than a full burst, re-enable notification and flush
2932      * anything that may have come in while we weren't looking.  If
2933      * we find something, assume the guest is still active and reschedule */
2934     virtio_queue_set_notification(q->tx_vq, 1);
2935     ret = virtio_net_flush_tx(q);
2936     if (ret == -EINVAL) {
2937         return;
2938     } else if (ret > 0) {
2939         virtio_queue_set_notification(q->tx_vq, 0);
2940         replay_bh_schedule_event(q->tx_bh);
2941         q->tx_waiting = 1;
2942     }
2943 }
2944 
2945 static void virtio_net_add_queue(VirtIONet *n, int index)
2946 {
2947     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2948 
2949     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2950                                            virtio_net_handle_rx);
2951 
2952     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2953         n->vqs[index].tx_vq =
2954             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2955                              virtio_net_handle_tx_timer);
2956         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2957                                               virtio_net_tx_timer,
2958                                               &n->vqs[index]);
2959     } else {
2960         n->vqs[index].tx_vq =
2961             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2962                              virtio_net_handle_tx_bh);
2963         n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
2964                                                   &DEVICE(vdev)->mem_reentrancy_guard);
2965     }
2966 
2967     n->vqs[index].tx_waiting = 0;
2968     n->vqs[index].n = n;
2969 }
2970 
2971 static void virtio_net_del_queue(VirtIONet *n, int index)
2972 {
2973     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2974     VirtIONetQueue *q = &n->vqs[index];
2975     NetClientState *nc = qemu_get_subqueue(n->nic, index);
2976 
2977     qemu_purge_queued_packets(nc);
2978 
2979     virtio_del_queue(vdev, index * 2);
2980     if (q->tx_timer) {
2981         timer_free(q->tx_timer);
2982         q->tx_timer = NULL;
2983     } else {
2984         qemu_bh_delete(q->tx_bh);
2985         q->tx_bh = NULL;
2986     }
2987     q->tx_waiting = 0;
2988     virtio_del_queue(vdev, index * 2 + 1);
2989 }
2990 
2991 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
2992 {
2993     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2994     int old_num_queues = virtio_get_num_queues(vdev);
2995     int new_num_queues = new_max_queue_pairs * 2 + 1;
2996     int i;
2997 
2998     assert(old_num_queues >= 3);
2999     assert(old_num_queues % 2 == 1);
3000 
3001     if (old_num_queues == new_num_queues) {
3002         return;
3003     }
3004 
3005     /*
3006      * We always need to remove and add ctrl vq if
3007      * old_num_queues != new_num_queues. Remove ctrl_vq first,
3008      * and then we only enter one of the following two loops.
3009      */
3010     virtio_del_queue(vdev, old_num_queues - 1);
3011 
3012     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
3013         /* new_num_queues < old_num_queues */
3014         virtio_net_del_queue(n, i / 2);
3015     }
3016 
3017     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
3018         /* new_num_queues > old_num_queues */
3019         virtio_net_add_queue(n, i / 2);
3020     }
3021 
3022     /* add ctrl_vq last */
3023     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3024 }
3025 
3026 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
3027 {
3028     int max = multiqueue ? n->max_queue_pairs : 1;
3029 
3030     n->multiqueue = multiqueue;
3031     virtio_net_change_num_queue_pairs(n, max);
3032 
3033     virtio_net_set_queue_pairs(n);
3034 }
3035 
3036 static int virtio_net_post_load_device(void *opaque, int version_id)
3037 {
3038     VirtIONet *n = opaque;
3039     VirtIODevice *vdev = VIRTIO_DEVICE(n);
3040     int i, link_down;
3041 
3042     trace_virtio_net_post_load_device();
3043     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
3044                                virtio_vdev_has_feature(vdev,
3045                                                        VIRTIO_F_VERSION_1),
3046                                virtio_vdev_has_feature(vdev,
3047                                                        VIRTIO_NET_F_HASH_REPORT));
3048 
3049     /* MAC_TABLE_ENTRIES may be different from the saved image */
3050     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
3051         n->mac_table.in_use = 0;
3052     }
3053 
3054     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
3055         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
3056     }
3057 
3058     /*
3059      * curr_guest_offloads will be later overwritten by the
3060      * virtio_set_features_nocheck call done from the virtio_load.
3061      * Here we make sure it is preserved and restored accordingly
3062      * in the virtio_net_post_load_virtio callback.
3063      */
3064     n->saved_guest_offloads = n->curr_guest_offloads;
3065 
3066     virtio_net_set_queue_pairs(n);
3067 
3068     /* Find the first multicast entry in the saved MAC filter */
3069     for (i = 0; i < n->mac_table.in_use; i++) {
3070         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
3071             break;
3072         }
3073     }
3074     n->mac_table.first_multi = i;
3075 
3076     /* nc.link_down can't be migrated, so infer link_down according
3077      * to link status bit in n->status */
3078     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
3079     for (i = 0; i < n->max_queue_pairs; i++) {
3080         qemu_get_subqueue(n->nic, i)->link_down = link_down;
3081     }
3082 
3083     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
3084         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3085         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3086                                   QEMU_CLOCK_VIRTUAL,
3087                                   virtio_net_announce_timer, n);
3088         if (n->announce_timer.round) {
3089             timer_mod(n->announce_timer.tm,
3090                       qemu_clock_get_ms(n->announce_timer.type));
3091         } else {
3092             qemu_announce_timer_del(&n->announce_timer, false);
3093         }
3094     }
3095 
3096     virtio_net_commit_rss_config(n);
3097     return 0;
3098 }
3099 
3100 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
3101 {
3102     VirtIONet *n = VIRTIO_NET(vdev);
3103     /*
3104      * The actual needed state is now in saved_guest_offloads,
3105      * see virtio_net_post_load_device for detail.
3106      * Restore it back and apply the desired offloads.
3107      */
3108     n->curr_guest_offloads = n->saved_guest_offloads;
3109     if (peer_has_vnet_hdr(n)) {
3110         virtio_net_apply_guest_offloads(n);
3111     }
3112 
3113     return 0;
3114 }
3115 
3116 /* tx_waiting field of a VirtIONetQueue */
3117 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
3118     .name = "virtio-net-queue-tx_waiting",
3119     .fields = (const VMStateField[]) {
3120         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
3121         VMSTATE_END_OF_LIST()
3122    },
3123 };
3124 
3125 static bool max_queue_pairs_gt_1(void *opaque, int version_id)
3126 {
3127     return VIRTIO_NET(opaque)->max_queue_pairs > 1;
3128 }
3129 
3130 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
3131 {
3132     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
3133                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3134 }
3135 
3136 static bool mac_table_fits(void *opaque, int version_id)
3137 {
3138     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
3139 }
3140 
3141 static bool mac_table_doesnt_fit(void *opaque, int version_id)
3142 {
3143     return !mac_table_fits(opaque, version_id);
3144 }
3145 
3146 /* This temporary type is shared by all the WITH_TMP methods
3147  * although only some fields are used by each.
3148  */
3149 struct VirtIONetMigTmp {
3150     VirtIONet      *parent;
3151     VirtIONetQueue *vqs_1;
3152     uint16_t        curr_queue_pairs_1;
3153     uint8_t         has_ufo;
3154     uint32_t        has_vnet_hdr;
3155 };
3156 
3157 /* The 2nd and subsequent tx_waiting flags are loaded later than
3158  * the 1st entry in the queue_pairs and only if there's more than one
3159  * entry.  We use the tmp mechanism to calculate a temporary
3160  * pointer and count and also validate the count.
3161  */
3162 
3163 static int virtio_net_tx_waiting_pre_save(void *opaque)
3164 {
3165     struct VirtIONetMigTmp *tmp = opaque;
3166 
3167     tmp->vqs_1 = tmp->parent->vqs + 1;
3168     tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3169     if (tmp->parent->curr_queue_pairs == 0) {
3170         tmp->curr_queue_pairs_1 = 0;
3171     }
3172 
3173     return 0;
3174 }
3175 
3176 static int virtio_net_tx_waiting_pre_load(void *opaque)
3177 {
3178     struct VirtIONetMigTmp *tmp = opaque;
3179 
3180     /* Reuse the pointer setup from save */
3181     virtio_net_tx_waiting_pre_save(opaque);
3182 
3183     if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3184         error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3185             tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
3186 
3187         return -EINVAL;
3188     }
3189 
3190     return 0; /* all good */
3191 }
3192 
3193 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3194     .name      = "virtio-net-tx_waiting",
3195     .pre_load  = virtio_net_tx_waiting_pre_load,
3196     .pre_save  = virtio_net_tx_waiting_pre_save,
3197     .fields    = (const VMStateField[]) {
3198         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3199                                      curr_queue_pairs_1,
3200                                      vmstate_virtio_net_queue_tx_waiting,
3201                                      struct VirtIONetQueue),
3202         VMSTATE_END_OF_LIST()
3203     },
3204 };
3205 
3206 /* the 'has_ufo' flag is just tested; if the incoming stream has the
3207  * flag set we need to check that we have it
3208  */
3209 static int virtio_net_ufo_post_load(void *opaque, int version_id)
3210 {
3211     struct VirtIONetMigTmp *tmp = opaque;
3212 
3213     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3214         error_report("virtio-net: saved image requires TUN_F_UFO support");
3215         return -EINVAL;
3216     }
3217 
3218     return 0;
3219 }
3220 
3221 static int virtio_net_ufo_pre_save(void *opaque)
3222 {
3223     struct VirtIONetMigTmp *tmp = opaque;
3224 
3225     tmp->has_ufo = tmp->parent->has_ufo;
3226 
3227     return 0;
3228 }
3229 
3230 static const VMStateDescription vmstate_virtio_net_has_ufo = {
3231     .name      = "virtio-net-ufo",
3232     .post_load = virtio_net_ufo_post_load,
3233     .pre_save  = virtio_net_ufo_pre_save,
3234     .fields    = (const VMStateField[]) {
3235         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3236         VMSTATE_END_OF_LIST()
3237     },
3238 };
3239 
3240 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3241  * flag set we need to check that we have it
3242  */
3243 static int virtio_net_vnet_post_load(void *opaque, int version_id)
3244 {
3245     struct VirtIONetMigTmp *tmp = opaque;
3246 
3247     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3248         error_report("virtio-net: saved image requires vnet_hdr=on");
3249         return -EINVAL;
3250     }
3251 
3252     return 0;
3253 }
3254 
3255 static int virtio_net_vnet_pre_save(void *opaque)
3256 {
3257     struct VirtIONetMigTmp *tmp = opaque;
3258 
3259     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3260 
3261     return 0;
3262 }
3263 
3264 static const VMStateDescription vmstate_virtio_net_has_vnet = {
3265     .name      = "virtio-net-vnet",
3266     .post_load = virtio_net_vnet_post_load,
3267     .pre_save  = virtio_net_vnet_pre_save,
3268     .fields    = (const VMStateField[]) {
3269         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3270         VMSTATE_END_OF_LIST()
3271     },
3272 };
3273 
3274 static bool virtio_net_rss_needed(void *opaque)
3275 {
3276     return VIRTIO_NET(opaque)->rss_data.enabled;
3277 }
3278 
3279 static const VMStateDescription vmstate_virtio_net_rss = {
3280     .name      = "virtio-net-device/rss",
3281     .version_id = 1,
3282     .minimum_version_id = 1,
3283     .needed = virtio_net_rss_needed,
3284     .fields = (const VMStateField[]) {
3285         VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3286         VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3287         VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3288         VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3289         VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3290         VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3291         VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3292                             VIRTIO_NET_RSS_MAX_KEY_SIZE),
3293         VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3294                                     rss_data.indirections_len, 0,
3295                                     vmstate_info_uint16, uint16_t),
3296         VMSTATE_END_OF_LIST()
3297     },
3298 };
3299 
3300 static const VMStateDescription vmstate_virtio_net_device = {
3301     .name = "virtio-net-device",
3302     .version_id = VIRTIO_NET_VM_VERSION,
3303     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3304     .post_load = virtio_net_post_load_device,
3305     .fields = (const VMStateField[]) {
3306         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3307         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3308                                vmstate_virtio_net_queue_tx_waiting,
3309                                VirtIONetQueue),
3310         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3311         VMSTATE_UINT16(status, VirtIONet),
3312         VMSTATE_UINT8(promisc, VirtIONet),
3313         VMSTATE_UINT8(allmulti, VirtIONet),
3314         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3315 
3316         /* Guarded pair: If it fits we load it, else we throw it away
3317          * - can happen if source has a larger MAC table.; post-load
3318          *  sets flags in this case.
3319          */
3320         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3321                                 0, mac_table_fits, mac_table.in_use,
3322                                  ETH_ALEN),
3323         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3324                                      mac_table.in_use, ETH_ALEN),
3325 
3326         /* Note: This is an array of uint32's that's always been saved as a
3327          * buffer; hold onto your endiannesses; it's actually used as a bitmap
3328          * but based on the uint.
3329          */
3330         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3331         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3332                          vmstate_virtio_net_has_vnet),
3333         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3334         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3335         VMSTATE_UINT8(alluni, VirtIONet),
3336         VMSTATE_UINT8(nomulti, VirtIONet),
3337         VMSTATE_UINT8(nouni, VirtIONet),
3338         VMSTATE_UINT8(nobcast, VirtIONet),
3339         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3340                          vmstate_virtio_net_has_ufo),
3341         VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3342                             vmstate_info_uint16_equal, uint16_t),
3343         VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3344         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3345                          vmstate_virtio_net_tx_waiting),
3346         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3347                             has_ctrl_guest_offloads),
3348         VMSTATE_END_OF_LIST()
3349     },
3350     .subsections = (const VMStateDescription * const []) {
3351         &vmstate_virtio_net_rss,
3352         NULL
3353     }
3354 };
3355 
3356 static NetClientInfo net_virtio_info = {
3357     .type = NET_CLIENT_DRIVER_NIC,
3358     .size = sizeof(NICState),
3359     .can_receive = virtio_net_can_receive,
3360     .receive = virtio_net_receive,
3361     .link_status_changed = virtio_net_set_link_status,
3362     .query_rx_filter = virtio_net_query_rxfilter,
3363     .announce = virtio_net_announce,
3364 };
3365 
3366 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3367 {
3368     VirtIONet *n = VIRTIO_NET(vdev);
3369     NetClientState *nc;
3370     assert(n->vhost_started);
3371     if (!n->multiqueue && idx == 2) {
3372         /* Must guard against invalid features and bogus queue index
3373          * from being set by malicious guest, or penetrated through
3374          * buggy migration stream.
3375          */
3376         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3377             qemu_log_mask(LOG_GUEST_ERROR,
3378                           "%s: bogus vq index ignored\n", __func__);
3379             return false;
3380         }
3381         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3382     } else {
3383         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3384     }
3385     /*
3386      * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3387      * as the macro of configure interrupt's IDX, If this driver does not
3388      * support, the function will return false
3389      */
3390 
3391     if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3392         return vhost_net_config_pending(get_vhost_net(nc->peer));
3393     }
3394     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3395 }
3396 
3397 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3398                                            bool mask)
3399 {
3400     VirtIONet *n = VIRTIO_NET(vdev);
3401     NetClientState *nc;
3402     assert(n->vhost_started);
3403     if (!n->multiqueue && idx == 2) {
3404         /* Must guard against invalid features and bogus queue index
3405          * from being set by malicious guest, or penetrated through
3406          * buggy migration stream.
3407          */
3408         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3409             qemu_log_mask(LOG_GUEST_ERROR,
3410                           "%s: bogus vq index ignored\n", __func__);
3411             return;
3412         }
3413         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3414     } else {
3415         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3416     }
3417     /*
3418      *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3419      * as the macro of configure interrupt's IDX, If this driver does not
3420      * support, the function will return
3421      */
3422 
3423     if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3424         vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask);
3425         return;
3426     }
3427     vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask);
3428 }
3429 
3430 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3431 {
3432     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3433 
3434     n->config_size = virtio_get_config_size(&cfg_size_params, host_features);
3435 }
3436 
3437 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3438                                    const char *type)
3439 {
3440     /*
3441      * The name can be NULL, the netclient name will be type.x.
3442      */
3443     assert(type != NULL);
3444 
3445     g_free(n->netclient_name);
3446     g_free(n->netclient_type);
3447     n->netclient_name = g_strdup(name);
3448     n->netclient_type = g_strdup(type);
3449 }
3450 
3451 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3452 {
3453     HotplugHandler *hotplug_ctrl;
3454     PCIDevice *pci_dev;
3455     Error *err = NULL;
3456 
3457     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3458     if (hotplug_ctrl) {
3459         pci_dev = PCI_DEVICE(dev);
3460         pci_dev->partially_hotplugged = true;
3461         hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3462         if (err) {
3463             error_report_err(err);
3464             return false;
3465         }
3466     } else {
3467         return false;
3468     }
3469     return true;
3470 }
3471 
3472 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3473                                     Error **errp)
3474 {
3475     Error *err = NULL;
3476     HotplugHandler *hotplug_ctrl;
3477     PCIDevice *pdev = PCI_DEVICE(dev);
3478     BusState *primary_bus;
3479 
3480     if (!pdev->partially_hotplugged) {
3481         return true;
3482     }
3483     primary_bus = dev->parent_bus;
3484     if (!primary_bus) {
3485         error_setg(errp, "virtio_net: couldn't find primary bus");
3486         return false;
3487     }
3488     qdev_set_parent_bus(dev, primary_bus, &error_abort);
3489     qatomic_set(&n->failover_primary_hidden, false);
3490     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3491     if (hotplug_ctrl) {
3492         hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3493         if (err) {
3494             goto out;
3495         }
3496         hotplug_handler_plug(hotplug_ctrl, dev, &err);
3497     }
3498     pdev->partially_hotplugged = false;
3499 
3500 out:
3501     error_propagate(errp, err);
3502     return !err;
3503 }
3504 
3505 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationEvent *e)
3506 {
3507     bool should_be_hidden;
3508     Error *err = NULL;
3509     DeviceState *dev = failover_find_primary_device(n);
3510 
3511     if (!dev) {
3512         return;
3513     }
3514 
3515     should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3516 
3517     if (e->type == MIG_EVENT_PRECOPY_SETUP && !should_be_hidden) {
3518         if (failover_unplug_primary(n, dev)) {
3519             vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3520             qapi_event_send_unplug_primary(dev->id);
3521             qatomic_set(&n->failover_primary_hidden, true);
3522         } else {
3523             warn_report("couldn't unplug primary device");
3524         }
3525     } else if (e->type == MIG_EVENT_PRECOPY_FAILED) {
3526         /* We already unplugged the device let's plug it back */
3527         if (!failover_replug_primary(n, dev, &err)) {
3528             if (err) {
3529                 error_report_err(err);
3530             }
3531         }
3532     }
3533 }
3534 
3535 static int virtio_net_migration_state_notifier(NotifierWithReturn *notifier,
3536                                                MigrationEvent *e, Error **errp)
3537 {
3538     VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3539     virtio_net_handle_migration_primary(n, e);
3540     return 0;
3541 }
3542 
3543 static bool failover_hide_primary_device(DeviceListener *listener,
3544                                          const QDict *device_opts,
3545                                          bool from_json,
3546                                          Error **errp)
3547 {
3548     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3549     const char *standby_id;
3550 
3551     if (!device_opts) {
3552         return false;
3553     }
3554 
3555     if (!qdict_haskey(device_opts, "failover_pair_id")) {
3556         return false;
3557     }
3558 
3559     if (!qdict_haskey(device_opts, "id")) {
3560         error_setg(errp, "Device with failover_pair_id needs to have id");
3561         return false;
3562     }
3563 
3564     standby_id = qdict_get_str(device_opts, "failover_pair_id");
3565     if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3566         return false;
3567     }
3568 
3569     /*
3570      * The hide helper can be called several times for a given device.
3571      * Check there is only one primary for a virtio-net device but
3572      * don't duplicate the qdict several times if it's called for the same
3573      * device.
3574      */
3575     if (n->primary_opts) {
3576         const char *old, *new;
3577         /* devices with failover_pair_id always have an id */
3578         old = qdict_get_str(n->primary_opts, "id");
3579         new = qdict_get_str(device_opts, "id");
3580         if (strcmp(old, new) != 0) {
3581             error_setg(errp, "Cannot attach more than one primary device to "
3582                        "'%s': '%s' and '%s'", n->netclient_name, old, new);
3583             return false;
3584         }
3585     } else {
3586         n->primary_opts = qdict_clone_shallow(device_opts);
3587         n->primary_opts_from_json = from_json;
3588     }
3589 
3590     /* failover_primary_hidden is set during feature negotiation */
3591     return qatomic_read(&n->failover_primary_hidden);
3592 }
3593 
3594 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3595 {
3596     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3597     VirtIONet *n = VIRTIO_NET(dev);
3598     NetClientState *nc;
3599     int i;
3600 
3601     if (n->net_conf.mtu) {
3602         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3603     }
3604 
3605     if (n->net_conf.duplex_str) {
3606         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3607             n->net_conf.duplex = DUPLEX_HALF;
3608         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3609             n->net_conf.duplex = DUPLEX_FULL;
3610         } else {
3611             error_setg(errp, "'duplex' must be 'half' or 'full'");
3612             return;
3613         }
3614         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3615     } else {
3616         n->net_conf.duplex = DUPLEX_UNKNOWN;
3617     }
3618 
3619     if (n->net_conf.speed < SPEED_UNKNOWN) {
3620         error_setg(errp, "'speed' must be between 0 and INT_MAX");
3621         return;
3622     }
3623     if (n->net_conf.speed >= 0) {
3624         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3625     }
3626 
3627     if (n->failover) {
3628         n->primary_listener.hide_device = failover_hide_primary_device;
3629         qatomic_set(&n->failover_primary_hidden, true);
3630         device_listener_register(&n->primary_listener);
3631         migration_add_notifier(&n->migration_state,
3632                                virtio_net_migration_state_notifier);
3633         n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3634     }
3635 
3636     virtio_net_set_config_size(n, n->host_features);
3637     virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
3638 
3639     /*
3640      * We set a lower limit on RX queue size to what it always was.
3641      * Guests that want a smaller ring can always resize it without
3642      * help from us (using virtio 1 and up).
3643      */
3644     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3645         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3646         !is_power_of_2(n->net_conf.rx_queue_size)) {
3647         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3648                    "must be a power of 2 between %d and %d.",
3649                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3650                    VIRTQUEUE_MAX_SIZE);
3651         virtio_cleanup(vdev);
3652         return;
3653     }
3654 
3655     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3656         n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) ||
3657         !is_power_of_2(n->net_conf.tx_queue_size)) {
3658         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3659                    "must be a power of 2 between %d and %d",
3660                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3661                    virtio_net_max_tx_queue_size(n));
3662         virtio_cleanup(vdev);
3663         return;
3664     }
3665 
3666     n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3667 
3668     /*
3669      * Figure out the datapath queue pairs since the backend could
3670      * provide control queue via peers as well.
3671      */
3672     if (n->nic_conf.peers.queues) {
3673         for (i = 0; i < n->max_ncs; i++) {
3674             if (n->nic_conf.peers.ncs[i]->is_datapath) {
3675                 ++n->max_queue_pairs;
3676             }
3677         }
3678     }
3679     n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3680 
3681     if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3682         error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3683                    "must be a positive integer less than %d.",
3684                    n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3685         virtio_cleanup(vdev);
3686         return;
3687     }
3688     n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3689     n->curr_queue_pairs = 1;
3690     n->tx_timeout = n->net_conf.txtimer;
3691 
3692     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3693                        && strcmp(n->net_conf.tx, "bh")) {
3694         warn_report("virtio-net: "
3695                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3696                     n->net_conf.tx);
3697         error_printf("Defaulting to \"bh\"");
3698     }
3699 
3700     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3701                                     n->net_conf.tx_queue_size);
3702 
3703     virtio_net_add_queue(n, 0);
3704 
3705     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3706     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3707     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3708     n->status = VIRTIO_NET_S_LINK_UP;
3709     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3710                               QEMU_CLOCK_VIRTUAL,
3711                               virtio_net_announce_timer, n);
3712     n->announce_timer.round = 0;
3713 
3714     if (n->netclient_type) {
3715         /*
3716          * Happen when virtio_net_set_netclient_name has been called.
3717          */
3718         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3719                               n->netclient_type, n->netclient_name,
3720                               &dev->mem_reentrancy_guard, n);
3721     } else {
3722         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3723                               object_get_typename(OBJECT(dev)), dev->id,
3724                               &dev->mem_reentrancy_guard, n);
3725     }
3726 
3727     for (i = 0; i < n->max_queue_pairs; i++) {
3728         n->nic->ncs[i].do_not_pad = true;
3729     }
3730 
3731     peer_test_vnet_hdr(n);
3732     if (peer_has_vnet_hdr(n)) {
3733         n->host_hdr_len = sizeof(struct virtio_net_hdr);
3734     } else {
3735         n->host_hdr_len = 0;
3736     }
3737 
3738     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3739 
3740     n->vqs[0].tx_waiting = 0;
3741     n->tx_burst = n->net_conf.txburst;
3742     virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3743     n->promisc = 1; /* for compatibility */
3744 
3745     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3746 
3747     n->vlans = g_malloc0(MAX_VLAN >> 3);
3748 
3749     nc = qemu_get_queue(n->nic);
3750     nc->rxfilter_notify_enabled = 1;
3751 
3752    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3753         struct virtio_net_config netcfg = {};
3754         memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3755         vhost_net_set_config(get_vhost_net(nc->peer),
3756             (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND);
3757     }
3758     QTAILQ_INIT(&n->rsc_chains);
3759     n->qdev = dev;
3760 
3761     net_rx_pkt_init(&n->rx_pkt);
3762 
3763     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3764         virtio_net_load_ebpf(n);
3765     }
3766 }
3767 
3768 static void virtio_net_device_unrealize(DeviceState *dev)
3769 {
3770     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3771     VirtIONet *n = VIRTIO_NET(dev);
3772     int i, max_queue_pairs;
3773 
3774     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3775         virtio_net_unload_ebpf(n);
3776     }
3777 
3778     /* This will stop vhost backend if appropriate. */
3779     virtio_net_set_status(vdev, 0);
3780 
3781     g_free(n->netclient_name);
3782     n->netclient_name = NULL;
3783     g_free(n->netclient_type);
3784     n->netclient_type = NULL;
3785 
3786     g_free(n->mac_table.macs);
3787     g_free(n->vlans);
3788 
3789     if (n->failover) {
3790         qobject_unref(n->primary_opts);
3791         device_listener_unregister(&n->primary_listener);
3792         migration_remove_notifier(&n->migration_state);
3793     } else {
3794         assert(n->primary_opts == NULL);
3795     }
3796 
3797     max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3798     for (i = 0; i < max_queue_pairs; i++) {
3799         virtio_net_del_queue(n, i);
3800     }
3801     /* delete also control vq */
3802     virtio_del_queue(vdev, max_queue_pairs * 2);
3803     qemu_announce_timer_del(&n->announce_timer, false);
3804     g_free(n->vqs);
3805     qemu_del_nic(n->nic);
3806     virtio_net_rsc_cleanup(n);
3807     g_free(n->rss_data.indirections_table);
3808     net_rx_pkt_uninit(n->rx_pkt);
3809     virtio_cleanup(vdev);
3810 }
3811 
3812 static void virtio_net_reset(VirtIODevice *vdev)
3813 {
3814     VirtIONet *n = VIRTIO_NET(vdev);
3815     int i;
3816 
3817     /* Reset back to compatibility mode */
3818     n->promisc = 1;
3819     n->allmulti = 0;
3820     n->alluni = 0;
3821     n->nomulti = 0;
3822     n->nouni = 0;
3823     n->nobcast = 0;
3824     /* multiqueue is disabled by default */
3825     n->curr_queue_pairs = 1;
3826     timer_del(n->announce_timer.tm);
3827     n->announce_timer.round = 0;
3828     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
3829 
3830     /* Flush any MAC and VLAN filter table state */
3831     n->mac_table.in_use = 0;
3832     n->mac_table.first_multi = 0;
3833     n->mac_table.multi_overflow = 0;
3834     n->mac_table.uni_overflow = 0;
3835     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
3836     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
3837     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
3838     memset(n->vlans, 0, MAX_VLAN >> 3);
3839 
3840     /* Flush any async TX */
3841     for (i = 0;  i < n->max_queue_pairs; i++) {
3842         flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
3843     }
3844 
3845     virtio_net_disable_rss(n);
3846 }
3847 
3848 static void virtio_net_instance_init(Object *obj)
3849 {
3850     VirtIONet *n = VIRTIO_NET(obj);
3851 
3852     /*
3853      * The default config_size is sizeof(struct virtio_net_config).
3854      * Can be overridden with virtio_net_set_config_size.
3855      */
3856     n->config_size = sizeof(struct virtio_net_config);
3857     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3858                                   "bootindex", "/ethernet-phy@0",
3859                                   DEVICE(n));
3860 
3861     ebpf_rss_init(&n->ebpf_rss);
3862 }
3863 
3864 static int virtio_net_pre_save(void *opaque)
3865 {
3866     VirtIONet *n = opaque;
3867 
3868     /* At this point, backend must be stopped, otherwise
3869      * it might keep writing to memory. */
3870     assert(!n->vhost_started);
3871 
3872     return 0;
3873 }
3874 
3875 static bool primary_unplug_pending(void *opaque)
3876 {
3877     DeviceState *dev = opaque;
3878     DeviceState *primary;
3879     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3880     VirtIONet *n = VIRTIO_NET(vdev);
3881 
3882     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3883         return false;
3884     }
3885     primary = failover_find_primary_device(n);
3886     return primary ? primary->pending_deleted_event : false;
3887 }
3888 
3889 static bool dev_unplug_pending(void *opaque)
3890 {
3891     DeviceState *dev = opaque;
3892     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3893 
3894     return vdc->primary_unplug_pending(dev);
3895 }
3896 
3897 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3898 {
3899     VirtIONet *n = VIRTIO_NET(vdev);
3900     NetClientState *nc;
3901     struct vhost_net *net;
3902 
3903     if (!n->nic) {
3904         return NULL;
3905     }
3906 
3907     nc = qemu_get_queue(n->nic);
3908     if (!nc) {
3909         return NULL;
3910     }
3911 
3912     net = get_vhost_net(nc->peer);
3913     if (!net) {
3914         return NULL;
3915     }
3916 
3917     return &net->dev;
3918 }
3919 
3920 static const VMStateDescription vmstate_virtio_net = {
3921     .name = "virtio-net",
3922     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3923     .version_id = VIRTIO_NET_VM_VERSION,
3924     .fields = (const VMStateField[]) {
3925         VMSTATE_VIRTIO_DEVICE,
3926         VMSTATE_END_OF_LIST()
3927     },
3928     .pre_save = virtio_net_pre_save,
3929     .dev_unplug_pending = dev_unplug_pending,
3930 };
3931 
3932 static Property virtio_net_properties[] = {
3933     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3934                     VIRTIO_NET_F_CSUM, true),
3935     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3936                     VIRTIO_NET_F_GUEST_CSUM, true),
3937     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3938     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3939                     VIRTIO_NET_F_GUEST_TSO4, true),
3940     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3941                     VIRTIO_NET_F_GUEST_TSO6, true),
3942     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3943                     VIRTIO_NET_F_GUEST_ECN, true),
3944     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3945                     VIRTIO_NET_F_GUEST_UFO, true),
3946     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3947                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3948     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3949                     VIRTIO_NET_F_HOST_TSO4, true),
3950     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3951                     VIRTIO_NET_F_HOST_TSO6, true),
3952     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3953                     VIRTIO_NET_F_HOST_ECN, true),
3954     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3955                     VIRTIO_NET_F_HOST_UFO, true),
3956     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3957                     VIRTIO_NET_F_MRG_RXBUF, true),
3958     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3959                     VIRTIO_NET_F_STATUS, true),
3960     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3961                     VIRTIO_NET_F_CTRL_VQ, true),
3962     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3963                     VIRTIO_NET_F_CTRL_RX, true),
3964     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3965                     VIRTIO_NET_F_CTRL_VLAN, true),
3966     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3967                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3968     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3969                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3970     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3971                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3972     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3973     DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3974                     VIRTIO_NET_F_RSS, false),
3975     DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3976                     VIRTIO_NET_F_HASH_REPORT, false),
3977     DEFINE_PROP_ARRAY("ebpf-rss-fds", VirtIONet, nr_ebpf_rss_fds,
3978                       ebpf_rss_fds, qdev_prop_string, char*),
3979     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3980                     VIRTIO_NET_F_RSC_EXT, false),
3981     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3982                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3983     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3984     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3985                        TX_TIMER_INTERVAL),
3986     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3987     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3988     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3989                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3990     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3991                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3992     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3993     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3994                      true),
3995     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3996     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3997     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3998     DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features,
3999                       VIRTIO_NET_F_GUEST_USO4, true),
4000     DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features,
4001                       VIRTIO_NET_F_GUEST_USO6, true),
4002     DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
4003                       VIRTIO_NET_F_HOST_USO, true),
4004     DEFINE_PROP_END_OF_LIST(),
4005 };
4006 
4007 static void virtio_net_class_init(ObjectClass *klass, void *data)
4008 {
4009     DeviceClass *dc = DEVICE_CLASS(klass);
4010     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
4011 
4012     device_class_set_props(dc, virtio_net_properties);
4013     dc->vmsd = &vmstate_virtio_net;
4014     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
4015     vdc->realize = virtio_net_device_realize;
4016     vdc->unrealize = virtio_net_device_unrealize;
4017     vdc->get_config = virtio_net_get_config;
4018     vdc->set_config = virtio_net_set_config;
4019     vdc->get_features = virtio_net_get_features;
4020     vdc->set_features = virtio_net_set_features;
4021     vdc->bad_features = virtio_net_bad_features;
4022     vdc->reset = virtio_net_reset;
4023     vdc->queue_reset = virtio_net_queue_reset;
4024     vdc->queue_enable = virtio_net_queue_enable;
4025     vdc->set_status = virtio_net_set_status;
4026     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
4027     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
4028     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
4029     vdc->post_load = virtio_net_post_load_virtio;
4030     vdc->vmsd = &vmstate_virtio_net_device;
4031     vdc->primary_unplug_pending = primary_unplug_pending;
4032     vdc->get_vhost = virtio_net_get_vhost;
4033     vdc->toggle_device_iotlb = vhost_toggle_device_iotlb;
4034 }
4035 
4036 static const TypeInfo virtio_net_info = {
4037     .name = TYPE_VIRTIO_NET,
4038     .parent = TYPE_VIRTIO_DEVICE,
4039     .instance_size = sizeof(VirtIONet),
4040     .instance_init = virtio_net_instance_init,
4041     .class_init = virtio_net_class_init,
4042 };
4043 
4044 static void virtio_register_types(void)
4045 {
4046     type_register_static(&virtio_net_info);
4047 }
4048 
4049 type_init(virtio_register_types)
4050