xref: /openbmc/qemu/hw/net/virtio-net.c (revision ba54a7e6b86884e43bed2d2f5a79c719059652a8)
1  /*
2   * Virtio Network Device
3   *
4   * Copyright IBM, Corp. 2007
5   *
6   * Authors:
7   *  Anthony Liguori   <aliguori@us.ibm.com>
8   *
9   * This work is licensed under the terms of the GNU GPL, version 2.  See
10   * the COPYING file in the top-level directory.
11   *
12   */
13  
14  #include "qemu/osdep.h"
15  #include "qemu/atomic.h"
16  #include "qemu/iov.h"
17  #include "qemu/log.h"
18  #include "qemu/main-loop.h"
19  #include "qemu/module.h"
20  #include "hw/virtio/virtio.h"
21  #include "net/net.h"
22  #include "net/checksum.h"
23  #include "net/tap.h"
24  #include "qemu/error-report.h"
25  #include "qemu/timer.h"
26  #include "qemu/option.h"
27  #include "qemu/option_int.h"
28  #include "qemu/config-file.h"
29  #include "qapi/qmp/qdict.h"
30  #include "hw/virtio/virtio-net.h"
31  #include "net/vhost_net.h"
32  #include "net/announce.h"
33  #include "hw/virtio/virtio-bus.h"
34  #include "qapi/error.h"
35  #include "qapi/qapi-events-net.h"
36  #include "hw/qdev-properties.h"
37  #include "qapi/qapi-types-migration.h"
38  #include "qapi/qapi-events-migration.h"
39  #include "hw/virtio/virtio-access.h"
40  #include "migration/misc.h"
41  #include "standard-headers/linux/ethtool.h"
42  #include "sysemu/sysemu.h"
43  #include "sysemu/replay.h"
44  #include "trace.h"
45  #include "monitor/qdev.h"
46  #include "monitor/monitor.h"
47  #include "hw/pci/pci_device.h"
48  #include "net_rx_pkt.h"
49  #include "hw/virtio/vhost.h"
50  #include "sysemu/qtest.h"
51  
52  #define VIRTIO_NET_VM_VERSION    11
53  
54  /* previously fixed value */
55  #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
56  #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
57  
58  /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
59  #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
60  #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
61  
62  #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
63  
64  #define VIRTIO_NET_TCP_FLAG         0x3F
65  #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
66  
67  /* IPv4 max payload, 16 bits in the header */
68  #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
69  #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
70  
71  /* header length value in ip header without option */
72  #define VIRTIO_NET_IP4_HEADER_LENGTH 5
73  
74  #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
75  #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
76  
77  /* Purge coalesced packets timer interval, This value affects the performance
78     a lot, and should be tuned carefully, '300000'(300us) is the recommended
79     value to pass the WHQL test, '50000' can gain 2x netperf throughput with
80     tso/gso/gro 'off'. */
81  #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
82  
83  #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
84                                           VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
85                                           VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
86                                           VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
87                                           VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
88                                           VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
89                                           VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
90                                           VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
91                                           VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
92  
93  static const VirtIOFeature feature_sizes[] = {
94      {.flags = 1ULL << VIRTIO_NET_F_MAC,
95       .end = endof(struct virtio_net_config, mac)},
96      {.flags = 1ULL << VIRTIO_NET_F_STATUS,
97       .end = endof(struct virtio_net_config, status)},
98      {.flags = 1ULL << VIRTIO_NET_F_MQ,
99       .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
100      {.flags = 1ULL << VIRTIO_NET_F_MTU,
101       .end = endof(struct virtio_net_config, mtu)},
102      {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
103       .end = endof(struct virtio_net_config, duplex)},
104      {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
105       .end = endof(struct virtio_net_config, supported_hash_types)},
106      {}
107  };
108  
109  static const VirtIOConfigSizeParams cfg_size_params = {
110      .min_size = endof(struct virtio_net_config, mac),
111      .max_size = sizeof(struct virtio_net_config),
112      .feature_sizes = feature_sizes
113  };
114  
virtio_net_get_subqueue(NetClientState * nc)115  static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
116  {
117      VirtIONet *n = qemu_get_nic_opaque(nc);
118  
119      return &n->vqs[nc->queue_index];
120  }
121  
vq2q(int queue_index)122  static int vq2q(int queue_index)
123  {
124      return queue_index / 2;
125  }
126  
flush_or_purge_queued_packets(NetClientState * nc)127  static void flush_or_purge_queued_packets(NetClientState *nc)
128  {
129      if (!nc->peer) {
130          return;
131      }
132  
133      qemu_flush_or_purge_queued_packets(nc->peer, true);
134      assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
135  }
136  
137  /* TODO
138   * - we could suppress RX interrupt if we were so inclined.
139   */
140  
virtio_net_get_config(VirtIODevice * vdev,uint8_t * config)141  static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
142  {
143      VirtIONet *n = VIRTIO_NET(vdev);
144      struct virtio_net_config netcfg;
145      NetClientState *nc = qemu_get_queue(n->nic);
146      static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
147  
148      int ret = 0;
149      memset(&netcfg, 0 , sizeof(struct virtio_net_config));
150      virtio_stw_p(vdev, &netcfg.status, n->status);
151      virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
152      virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
153      memcpy(netcfg.mac, n->mac, ETH_ALEN);
154      virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
155      netcfg.duplex = n->net_conf.duplex;
156      netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
157      virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
158                   virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
159                   VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
160      virtio_stl_p(vdev, &netcfg.supported_hash_types,
161                   VIRTIO_NET_RSS_SUPPORTED_HASHES);
162      memcpy(config, &netcfg, n->config_size);
163  
164      /*
165       * Is this VDPA? No peer means not VDPA: there's no way to
166       * disconnect/reconnect a VDPA peer.
167       */
168      if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
169          ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
170                                     n->config_size);
171          if (ret == -1) {
172              return;
173          }
174  
175          /*
176           * Some NIC/kernel combinations present 0 as the mac address.  As that
177           * is not a legal address, try to proceed with the address from the
178           * QEMU command line in the hope that the address has been configured
179           * correctly elsewhere - just not reported by the device.
180           */
181          if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
182              info_report("Zero hardware mac address detected. Ignoring.");
183              memcpy(netcfg.mac, n->mac, ETH_ALEN);
184          }
185  
186          netcfg.status |= virtio_tswap16(vdev,
187                                          n->status & VIRTIO_NET_S_ANNOUNCE);
188          memcpy(config, &netcfg, n->config_size);
189      }
190  }
191  
virtio_net_set_config(VirtIODevice * vdev,const uint8_t * config)192  static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
193  {
194      VirtIONet *n = VIRTIO_NET(vdev);
195      struct virtio_net_config netcfg = {};
196      NetClientState *nc = qemu_get_queue(n->nic);
197  
198      memcpy(&netcfg, config, n->config_size);
199  
200      if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
201          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
202          memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
203          memcpy(n->mac, netcfg.mac, ETH_ALEN);
204          qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
205      }
206  
207      /*
208       * Is this VDPA? No peer means not VDPA: there's no way to
209       * disconnect/reconnect a VDPA peer.
210       */
211      if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
212          vhost_net_set_config(get_vhost_net(nc->peer),
213                               (uint8_t *)&netcfg, 0, n->config_size,
214                               VHOST_SET_CONFIG_TYPE_FRONTEND);
215        }
216  }
217  
virtio_net_started(VirtIONet * n,uint8_t status)218  static bool virtio_net_started(VirtIONet *n, uint8_t status)
219  {
220      VirtIODevice *vdev = VIRTIO_DEVICE(n);
221      return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
222          (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
223  }
224  
virtio_net_announce_notify(VirtIONet * net)225  static void virtio_net_announce_notify(VirtIONet *net)
226  {
227      VirtIODevice *vdev = VIRTIO_DEVICE(net);
228      trace_virtio_net_announce_notify();
229  
230      net->status |= VIRTIO_NET_S_ANNOUNCE;
231      virtio_notify_config(vdev);
232  }
233  
virtio_net_announce_timer(void * opaque)234  static void virtio_net_announce_timer(void *opaque)
235  {
236      VirtIONet *n = opaque;
237      trace_virtio_net_announce_timer(n->announce_timer.round);
238  
239      n->announce_timer.round--;
240      virtio_net_announce_notify(n);
241  }
242  
virtio_net_announce(NetClientState * nc)243  static void virtio_net_announce(NetClientState *nc)
244  {
245      VirtIONet *n = qemu_get_nic_opaque(nc);
246      VirtIODevice *vdev = VIRTIO_DEVICE(n);
247  
248      /*
249       * Make sure the virtio migration announcement timer isn't running
250       * If it is, let it trigger announcement so that we do not cause
251       * confusion.
252       */
253      if (n->announce_timer.round) {
254          return;
255      }
256  
257      if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
258          virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
259              virtio_net_announce_notify(n);
260      }
261  }
262  
virtio_net_vhost_status(VirtIONet * n,uint8_t status)263  static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
264  {
265      VirtIODevice *vdev = VIRTIO_DEVICE(n);
266      NetClientState *nc = qemu_get_queue(n->nic);
267      int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
268      int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
269                n->max_ncs - n->max_queue_pairs : 0;
270  
271      if (!get_vhost_net(nc->peer)) {
272          return;
273      }
274  
275      if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
276          !!n->vhost_started) {
277          return;
278      }
279      if (!n->vhost_started) {
280          int r, i;
281  
282          if (n->needs_vnet_hdr_swap) {
283              error_report("backend does not support %s vnet headers; "
284                           "falling back on userspace virtio",
285                           virtio_is_big_endian(vdev) ? "BE" : "LE");
286              return;
287          }
288  
289          /* Any packets outstanding? Purge them to avoid touching rings
290           * when vhost is running.
291           */
292          for (i = 0;  i < queue_pairs; i++) {
293              NetClientState *qnc = qemu_get_subqueue(n->nic, i);
294  
295              /* Purge both directions: TX and RX. */
296              qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
297              qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
298          }
299  
300          if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
301              r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
302              if (r < 0) {
303                  error_report("%uBytes MTU not supported by the backend",
304                               n->net_conf.mtu);
305  
306                  return;
307              }
308          }
309  
310          n->vhost_started = 1;
311          r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
312          if (r < 0) {
313              error_report("unable to start vhost net: %d: "
314                           "falling back on userspace virtio", -r);
315              n->vhost_started = 0;
316          }
317      } else {
318          vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
319          n->vhost_started = 0;
320      }
321  }
322  
virtio_net_set_vnet_endian_one(VirtIODevice * vdev,NetClientState * peer,bool enable)323  static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
324                                            NetClientState *peer,
325                                            bool enable)
326  {
327      if (virtio_is_big_endian(vdev)) {
328          return qemu_set_vnet_be(peer, enable);
329      } else {
330          return qemu_set_vnet_le(peer, enable);
331      }
332  }
333  
virtio_net_set_vnet_endian(VirtIODevice * vdev,NetClientState * ncs,int queue_pairs,bool enable)334  static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
335                                         int queue_pairs, bool enable)
336  {
337      int i;
338  
339      for (i = 0; i < queue_pairs; i++) {
340          if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
341              enable) {
342              while (--i >= 0) {
343                  virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
344              }
345  
346              return true;
347          }
348      }
349  
350      return false;
351  }
352  
virtio_net_vnet_endian_status(VirtIONet * n,uint8_t status)353  static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
354  {
355      VirtIODevice *vdev = VIRTIO_DEVICE(n);
356      int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
357  
358      if (virtio_net_started(n, status)) {
359          /* Before using the device, we tell the network backend about the
360           * endianness to use when parsing vnet headers. If the backend
361           * can't do it, we fallback onto fixing the headers in the core
362           * virtio-net code.
363           */
364          n->needs_vnet_hdr_swap = n->has_vnet_hdr &&
365                                   virtio_net_set_vnet_endian(vdev, n->nic->ncs,
366                                                              queue_pairs, true);
367      } else if (virtio_net_started(n, vdev->status)) {
368          /* After using the device, we need to reset the network backend to
369           * the default (guest native endianness), otherwise the guest may
370           * lose network connectivity if it is rebooted into a different
371           * endianness.
372           */
373          virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
374      }
375  }
376  
virtio_net_drop_tx_queue_data(VirtIODevice * vdev,VirtQueue * vq)377  static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
378  {
379      unsigned int dropped = virtqueue_drop_all(vq);
380      if (dropped) {
381          virtio_notify(vdev, vq);
382      }
383  }
384  
virtio_net_set_status(struct VirtIODevice * vdev,uint8_t status)385  static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
386  {
387      VirtIONet *n = VIRTIO_NET(vdev);
388      VirtIONetQueue *q;
389      int i;
390      uint8_t queue_status;
391  
392      virtio_net_vnet_endian_status(n, status);
393      virtio_net_vhost_status(n, status);
394  
395      for (i = 0; i < n->max_queue_pairs; i++) {
396          NetClientState *ncs = qemu_get_subqueue(n->nic, i);
397          bool queue_started;
398          q = &n->vqs[i];
399  
400          if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
401              queue_status = 0;
402          } else {
403              queue_status = status;
404          }
405          queue_started =
406              virtio_net_started(n, queue_status) && !n->vhost_started;
407  
408          if (queue_started) {
409              qemu_flush_queued_packets(ncs);
410          }
411  
412          if (!q->tx_waiting) {
413              continue;
414          }
415  
416          if (queue_started) {
417              if (q->tx_timer) {
418                  timer_mod(q->tx_timer,
419                                 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
420              } else {
421                  replay_bh_schedule_event(q->tx_bh);
422              }
423          } else {
424              if (q->tx_timer) {
425                  timer_del(q->tx_timer);
426              } else {
427                  qemu_bh_cancel(q->tx_bh);
428              }
429              if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
430                  (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
431                  vdev->vm_running) {
432                  /* if tx is waiting we are likely have some packets in tx queue
433                   * and disabled notification */
434                  q->tx_waiting = 0;
435                  virtio_queue_set_notification(q->tx_vq, 1);
436                  virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
437              }
438          }
439      }
440  }
441  
virtio_net_set_link_status(NetClientState * nc)442  static void virtio_net_set_link_status(NetClientState *nc)
443  {
444      VirtIONet *n = qemu_get_nic_opaque(nc);
445      VirtIODevice *vdev = VIRTIO_DEVICE(n);
446      uint16_t old_status = n->status;
447  
448      if (nc->link_down)
449          n->status &= ~VIRTIO_NET_S_LINK_UP;
450      else
451          n->status |= VIRTIO_NET_S_LINK_UP;
452  
453      if (n->status != old_status)
454          virtio_notify_config(vdev);
455  
456      virtio_net_set_status(vdev, vdev->status);
457  }
458  
rxfilter_notify(NetClientState * nc)459  static void rxfilter_notify(NetClientState *nc)
460  {
461      VirtIONet *n = qemu_get_nic_opaque(nc);
462  
463      if (nc->rxfilter_notify_enabled) {
464          char *path = object_get_canonical_path(OBJECT(n->qdev));
465          qapi_event_send_nic_rx_filter_changed(n->netclient_name, path);
466          g_free(path);
467  
468          /* disable event notification to avoid events flooding */
469          nc->rxfilter_notify_enabled = 0;
470      }
471  }
472  
get_vlan_table(VirtIONet * n)473  static intList *get_vlan_table(VirtIONet *n)
474  {
475      intList *list;
476      int i, j;
477  
478      list = NULL;
479      for (i = 0; i < MAX_VLAN >> 5; i++) {
480          for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
481              if (n->vlans[i] & (1U << j)) {
482                  QAPI_LIST_PREPEND(list, (i << 5) + j);
483              }
484          }
485      }
486  
487      return list;
488  }
489  
virtio_net_query_rxfilter(NetClientState * nc)490  static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
491  {
492      VirtIONet *n = qemu_get_nic_opaque(nc);
493      VirtIODevice *vdev = VIRTIO_DEVICE(n);
494      RxFilterInfo *info;
495      strList *str_list;
496      int i;
497  
498      info = g_malloc0(sizeof(*info));
499      info->name = g_strdup(nc->name);
500      info->promiscuous = n->promisc;
501  
502      if (n->nouni) {
503          info->unicast = RX_STATE_NONE;
504      } else if (n->alluni) {
505          info->unicast = RX_STATE_ALL;
506      } else {
507          info->unicast = RX_STATE_NORMAL;
508      }
509  
510      if (n->nomulti) {
511          info->multicast = RX_STATE_NONE;
512      } else if (n->allmulti) {
513          info->multicast = RX_STATE_ALL;
514      } else {
515          info->multicast = RX_STATE_NORMAL;
516      }
517  
518      info->broadcast_allowed = n->nobcast;
519      info->multicast_overflow = n->mac_table.multi_overflow;
520      info->unicast_overflow = n->mac_table.uni_overflow;
521  
522      info->main_mac = qemu_mac_strdup_printf(n->mac);
523  
524      str_list = NULL;
525      for (i = 0; i < n->mac_table.first_multi; i++) {
526          QAPI_LIST_PREPEND(str_list,
527                        qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
528      }
529      info->unicast_table = str_list;
530  
531      str_list = NULL;
532      for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
533          QAPI_LIST_PREPEND(str_list,
534                        qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
535      }
536      info->multicast_table = str_list;
537      info->vlan_table = get_vlan_table(n);
538  
539      if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
540          info->vlan = RX_STATE_ALL;
541      } else if (!info->vlan_table) {
542          info->vlan = RX_STATE_NONE;
543      } else {
544          info->vlan = RX_STATE_NORMAL;
545      }
546  
547      /* enable event notification after query */
548      nc->rxfilter_notify_enabled = 1;
549  
550      return info;
551  }
552  
virtio_net_queue_reset(VirtIODevice * vdev,uint32_t queue_index)553  static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
554  {
555      VirtIONet *n = VIRTIO_NET(vdev);
556      NetClientState *nc;
557  
558      /* validate queue_index and skip for cvq */
559      if (queue_index >= n->max_queue_pairs * 2) {
560          return;
561      }
562  
563      nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
564  
565      if (!nc->peer) {
566          return;
567      }
568  
569      if (get_vhost_net(nc->peer) &&
570          nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
571          vhost_net_virtqueue_reset(vdev, nc, queue_index);
572      }
573  
574      flush_or_purge_queued_packets(nc);
575  }
576  
virtio_net_queue_enable(VirtIODevice * vdev,uint32_t queue_index)577  static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
578  {
579      VirtIONet *n = VIRTIO_NET(vdev);
580      NetClientState *nc;
581      int r;
582  
583      /* validate queue_index and skip for cvq */
584      if (queue_index >= n->max_queue_pairs * 2) {
585          return;
586      }
587  
588      nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
589  
590      if (!nc->peer || !vdev->vhost_started) {
591          return;
592      }
593  
594      if (get_vhost_net(nc->peer) &&
595          nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
596          r = vhost_net_virtqueue_restart(vdev, nc, queue_index);
597          if (r < 0) {
598              error_report("unable to restart vhost net virtqueue: %d, "
599                              "when resetting the queue", queue_index);
600          }
601      }
602  }
603  
peer_test_vnet_hdr(VirtIONet * n)604  static void peer_test_vnet_hdr(VirtIONet *n)
605  {
606      NetClientState *nc = qemu_get_queue(n->nic);
607      if (!nc->peer) {
608          return;
609      }
610  
611      n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
612  }
613  
peer_has_vnet_hdr(VirtIONet * n)614  static int peer_has_vnet_hdr(VirtIONet *n)
615  {
616      return n->has_vnet_hdr;
617  }
618  
peer_has_ufo(VirtIONet * n)619  static int peer_has_ufo(VirtIONet *n)
620  {
621      if (!peer_has_vnet_hdr(n))
622          return 0;
623  
624      n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
625  
626      return n->has_ufo;
627  }
628  
peer_has_uso(VirtIONet * n)629  static int peer_has_uso(VirtIONet *n)
630  {
631      if (!peer_has_vnet_hdr(n)) {
632          return 0;
633      }
634  
635      return qemu_has_uso(qemu_get_queue(n->nic)->peer);
636  }
637  
virtio_net_set_mrg_rx_bufs(VirtIONet * n,int mergeable_rx_bufs,int version_1,int hash_report)638  static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
639                                         int version_1, int hash_report)
640  {
641      int i;
642      NetClientState *nc;
643  
644      n->mergeable_rx_bufs = mergeable_rx_bufs;
645  
646      if (version_1) {
647          n->guest_hdr_len = hash_report ?
648              sizeof(struct virtio_net_hdr_v1_hash) :
649              sizeof(struct virtio_net_hdr_mrg_rxbuf);
650          n->rss_data.populate_hash = !!hash_report;
651      } else {
652          n->guest_hdr_len = n->mergeable_rx_bufs ?
653              sizeof(struct virtio_net_hdr_mrg_rxbuf) :
654              sizeof(struct virtio_net_hdr);
655          n->rss_data.populate_hash = false;
656      }
657  
658      for (i = 0; i < n->max_queue_pairs; i++) {
659          nc = qemu_get_subqueue(n->nic, i);
660  
661          if (peer_has_vnet_hdr(n) &&
662              qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
663              qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
664              n->host_hdr_len = n->guest_hdr_len;
665          }
666      }
667  }
668  
virtio_net_max_tx_queue_size(VirtIONet * n)669  static int virtio_net_max_tx_queue_size(VirtIONet *n)
670  {
671      NetClientState *peer = n->nic_conf.peers.ncs[0];
672  
673      /*
674       * Backends other than vhost-user or vhost-vdpa don't support max queue
675       * size.
676       */
677      if (!peer) {
678          return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
679      }
680  
681      switch(peer->info->type) {
682      case NET_CLIENT_DRIVER_VHOST_USER:
683      case NET_CLIENT_DRIVER_VHOST_VDPA:
684          return VIRTQUEUE_MAX_SIZE;
685      default:
686          return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
687      };
688  }
689  
peer_attach(VirtIONet * n,int index)690  static int peer_attach(VirtIONet *n, int index)
691  {
692      NetClientState *nc = qemu_get_subqueue(n->nic, index);
693  
694      if (!nc->peer) {
695          return 0;
696      }
697  
698      if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
699          vhost_set_vring_enable(nc->peer, 1);
700      }
701  
702      if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
703          return 0;
704      }
705  
706      if (n->max_queue_pairs == 1) {
707          return 0;
708      }
709  
710      return tap_enable(nc->peer);
711  }
712  
peer_detach(VirtIONet * n,int index)713  static int peer_detach(VirtIONet *n, int index)
714  {
715      NetClientState *nc = qemu_get_subqueue(n->nic, index);
716  
717      if (!nc->peer) {
718          return 0;
719      }
720  
721      if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
722          vhost_set_vring_enable(nc->peer, 0);
723      }
724  
725      if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
726          return 0;
727      }
728  
729      return tap_disable(nc->peer);
730  }
731  
virtio_net_set_queue_pairs(VirtIONet * n)732  static void virtio_net_set_queue_pairs(VirtIONet *n)
733  {
734      int i;
735      int r;
736  
737      if (n->nic->peer_deleted) {
738          return;
739      }
740  
741      for (i = 0; i < n->max_queue_pairs; i++) {
742          if (i < n->curr_queue_pairs) {
743              r = peer_attach(n, i);
744              assert(!r);
745          } else {
746              r = peer_detach(n, i);
747              assert(!r);
748          }
749      }
750  }
751  
752  static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
753  
virtio_net_get_features(VirtIODevice * vdev,uint64_t features,Error ** errp)754  static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
755                                          Error **errp)
756  {
757      VirtIONet *n = VIRTIO_NET(vdev);
758      NetClientState *nc = qemu_get_queue(n->nic);
759  
760      /* Firstly sync all virtio-net possible supported features */
761      features |= n->host_features;
762  
763      virtio_add_feature(&features, VIRTIO_NET_F_MAC);
764  
765      if (!peer_has_vnet_hdr(n)) {
766          virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
767          virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
768          virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
769          virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
770  
771          virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
772          virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
773          virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
774          virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
775  
776          virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
777          virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
778          virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
779  
780          virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
781      }
782  
783      if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
784          virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
785          virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
786      }
787  
788      if (!peer_has_uso(n)) {
789          virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
790          virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
791          virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
792      }
793  
794      if (!get_vhost_net(nc->peer)) {
795          return features;
796      }
797  
798      if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
799          virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
800      }
801      features = vhost_net_get_features(get_vhost_net(nc->peer), features);
802      vdev->backend_features = features;
803  
804      if (n->mtu_bypass_backend &&
805              (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
806          features |= (1ULL << VIRTIO_NET_F_MTU);
807      }
808  
809      /*
810       * Since GUEST_ANNOUNCE is emulated the feature bit could be set without
811       * enabled. This happens in the vDPA case.
812       *
813       * Make sure the feature set is not incoherent, as the driver could refuse
814       * to start.
815       *
816       * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes,
817       * helping guest to notify the new location with vDPA devices that does not
818       * support it.
819       */
820      if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) {
821          virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE);
822      }
823  
824      return features;
825  }
826  
virtio_net_bad_features(VirtIODevice * vdev)827  static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
828  {
829      uint64_t features = 0;
830  
831      /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
832       * but also these: */
833      virtio_add_feature(&features, VIRTIO_NET_F_MAC);
834      virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
835      virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
836      virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
837      virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
838  
839      return features;
840  }
841  
virtio_net_apply_guest_offloads(VirtIONet * n)842  static void virtio_net_apply_guest_offloads(VirtIONet *n)
843  {
844      qemu_set_offload(qemu_get_queue(n->nic)->peer,
845              !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
846              !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
847              !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
848              !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
849              !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
850              !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
851              !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)));
852  }
853  
virtio_net_guest_offloads_by_features(uint64_t features)854  static uint64_t virtio_net_guest_offloads_by_features(uint64_t features)
855  {
856      static const uint64_t guest_offloads_mask =
857          (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
858          (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
859          (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
860          (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
861          (1ULL << VIRTIO_NET_F_GUEST_UFO)  |
862          (1ULL << VIRTIO_NET_F_GUEST_USO4) |
863          (1ULL << VIRTIO_NET_F_GUEST_USO6);
864  
865      return guest_offloads_mask & features;
866  }
867  
virtio_net_supported_guest_offloads(const VirtIONet * n)868  uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n)
869  {
870      VirtIODevice *vdev = VIRTIO_DEVICE(n);
871      return virtio_net_guest_offloads_by_features(vdev->guest_features);
872  }
873  
874  typedef struct {
875      VirtIONet *n;
876      DeviceState *dev;
877  } FailoverDevice;
878  
879  /**
880   * Set the failover primary device
881   *
882   * @opaque: FailoverId to setup
883   * @opts: opts for device we are handling
884   * @errp: returns an error if this function fails
885   */
failover_set_primary(DeviceState * dev,void * opaque)886  static int failover_set_primary(DeviceState *dev, void *opaque)
887  {
888      FailoverDevice *fdev = opaque;
889      PCIDevice *pci_dev = (PCIDevice *)
890          object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
891  
892      if (!pci_dev) {
893          return 0;
894      }
895  
896      if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
897          fdev->dev = dev;
898          return 1;
899      }
900  
901      return 0;
902  }
903  
904  /**
905   * Find the primary device for this failover virtio-net
906   *
907   * @n: VirtIONet device
908   * @errp: returns an error if this function fails
909   */
failover_find_primary_device(VirtIONet * n)910  static DeviceState *failover_find_primary_device(VirtIONet *n)
911  {
912      FailoverDevice fdev = {
913          .n = n,
914      };
915  
916      qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
917                         NULL, NULL, &fdev);
918      return fdev.dev;
919  }
920  
failover_add_primary(VirtIONet * n,Error ** errp)921  static void failover_add_primary(VirtIONet *n, Error **errp)
922  {
923      Error *err = NULL;
924      DeviceState *dev = failover_find_primary_device(n);
925  
926      if (dev) {
927          return;
928      }
929  
930      if (!n->primary_opts) {
931          error_setg(errp, "Primary device not found");
932          error_append_hint(errp, "Virtio-net failover will not work. Make "
933                            "sure primary device has parameter"
934                            " failover_pair_id=%s\n", n->netclient_name);
935          return;
936      }
937  
938      dev = qdev_device_add_from_qdict(n->primary_opts,
939                                       n->primary_opts_from_json,
940                                       &err);
941      if (err) {
942          qobject_unref(n->primary_opts);
943          n->primary_opts = NULL;
944      } else {
945          object_unref(OBJECT(dev));
946      }
947      error_propagate(errp, err);
948  }
949  
virtio_net_set_features(VirtIODevice * vdev,uint64_t features)950  static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
951  {
952      VirtIONet *n = VIRTIO_NET(vdev);
953      Error *err = NULL;
954      int i;
955  
956      if (n->mtu_bypass_backend &&
957              !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
958          features &= ~(1ULL << VIRTIO_NET_F_MTU);
959      }
960  
961      virtio_net_set_multiqueue(n,
962                                virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
963                                virtio_has_feature(features, VIRTIO_NET_F_MQ));
964  
965      virtio_net_set_mrg_rx_bufs(n,
966                                 virtio_has_feature(features,
967                                                    VIRTIO_NET_F_MRG_RXBUF),
968                                 virtio_has_feature(features,
969                                                    VIRTIO_F_VERSION_1),
970                                 virtio_has_feature(features,
971                                                    VIRTIO_NET_F_HASH_REPORT));
972  
973      n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
974          virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
975      n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
976          virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
977      n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
978  
979      if (n->has_vnet_hdr) {
980          n->curr_guest_offloads =
981              virtio_net_guest_offloads_by_features(features);
982          virtio_net_apply_guest_offloads(n);
983      }
984  
985      for (i = 0;  i < n->max_queue_pairs; i++) {
986          NetClientState *nc = qemu_get_subqueue(n->nic, i);
987  
988          if (!get_vhost_net(nc->peer)) {
989              continue;
990          }
991          vhost_net_ack_features(get_vhost_net(nc->peer), features);
992  
993          /*
994           * keep acked_features in NetVhostUserState up-to-date so it
995           * can't miss any features configured by guest virtio driver.
996           */
997          vhost_net_save_acked_features(nc->peer);
998      }
999  
1000      if (!virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
1001          memset(n->vlans, 0xff, MAX_VLAN >> 3);
1002      }
1003  
1004      if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
1005          qapi_event_send_failover_negotiated(n->netclient_name);
1006          qatomic_set(&n->failover_primary_hidden, false);
1007          failover_add_primary(n, &err);
1008          if (err) {
1009              if (!qtest_enabled()) {
1010                  warn_report_err(err);
1011              } else {
1012                  error_free(err);
1013              }
1014          }
1015      }
1016  }
1017  
virtio_net_handle_rx_mode(VirtIONet * n,uint8_t cmd,struct iovec * iov,unsigned int iov_cnt)1018  static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
1019                                       struct iovec *iov, unsigned int iov_cnt)
1020  {
1021      uint8_t on;
1022      size_t s;
1023      NetClientState *nc = qemu_get_queue(n->nic);
1024  
1025      s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
1026      if (s != sizeof(on)) {
1027          return VIRTIO_NET_ERR;
1028      }
1029  
1030      if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
1031          n->promisc = on;
1032      } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
1033          n->allmulti = on;
1034      } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
1035          n->alluni = on;
1036      } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
1037          n->nomulti = on;
1038      } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
1039          n->nouni = on;
1040      } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
1041          n->nobcast = on;
1042      } else {
1043          return VIRTIO_NET_ERR;
1044      }
1045  
1046      rxfilter_notify(nc);
1047  
1048      return VIRTIO_NET_OK;
1049  }
1050  
virtio_net_handle_offloads(VirtIONet * n,uint8_t cmd,struct iovec * iov,unsigned int iov_cnt)1051  static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
1052                                       struct iovec *iov, unsigned int iov_cnt)
1053  {
1054      VirtIODevice *vdev = VIRTIO_DEVICE(n);
1055      uint64_t offloads;
1056      size_t s;
1057  
1058      if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1059          return VIRTIO_NET_ERR;
1060      }
1061  
1062      s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
1063      if (s != sizeof(offloads)) {
1064          return VIRTIO_NET_ERR;
1065      }
1066  
1067      if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1068          uint64_t supported_offloads;
1069  
1070          offloads = virtio_ldq_p(vdev, &offloads);
1071  
1072          if (!n->has_vnet_hdr) {
1073              return VIRTIO_NET_ERR;
1074          }
1075  
1076          n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1077              virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1078          n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1079              virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1080          virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1081  
1082          supported_offloads = virtio_net_supported_guest_offloads(n);
1083          if (offloads & ~supported_offloads) {
1084              return VIRTIO_NET_ERR;
1085          }
1086  
1087          n->curr_guest_offloads = offloads;
1088          virtio_net_apply_guest_offloads(n);
1089  
1090          return VIRTIO_NET_OK;
1091      } else {
1092          return VIRTIO_NET_ERR;
1093      }
1094  }
1095  
virtio_net_handle_mac(VirtIONet * n,uint8_t cmd,struct iovec * iov,unsigned int iov_cnt)1096  static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1097                                   struct iovec *iov, unsigned int iov_cnt)
1098  {
1099      VirtIODevice *vdev = VIRTIO_DEVICE(n);
1100      struct virtio_net_ctrl_mac mac_data;
1101      size_t s;
1102      NetClientState *nc = qemu_get_queue(n->nic);
1103  
1104      if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1105          if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1106              return VIRTIO_NET_ERR;
1107          }
1108          s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1109          assert(s == sizeof(n->mac));
1110          qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1111          rxfilter_notify(nc);
1112  
1113          return VIRTIO_NET_OK;
1114      }
1115  
1116      if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1117          return VIRTIO_NET_ERR;
1118      }
1119  
1120      int in_use = 0;
1121      int first_multi = 0;
1122      uint8_t uni_overflow = 0;
1123      uint8_t multi_overflow = 0;
1124      uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1125  
1126      s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1127                     sizeof(mac_data.entries));
1128      mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1129      if (s != sizeof(mac_data.entries)) {
1130          goto error;
1131      }
1132      iov_discard_front(&iov, &iov_cnt, s);
1133  
1134      if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1135          goto error;
1136      }
1137  
1138      if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1139          s = iov_to_buf(iov, iov_cnt, 0, macs,
1140                         mac_data.entries * ETH_ALEN);
1141          if (s != mac_data.entries * ETH_ALEN) {
1142              goto error;
1143          }
1144          in_use += mac_data.entries;
1145      } else {
1146          uni_overflow = 1;
1147      }
1148  
1149      iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1150  
1151      first_multi = in_use;
1152  
1153      s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1154                     sizeof(mac_data.entries));
1155      mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1156      if (s != sizeof(mac_data.entries)) {
1157          goto error;
1158      }
1159  
1160      iov_discard_front(&iov, &iov_cnt, s);
1161  
1162      if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1163          goto error;
1164      }
1165  
1166      if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1167          s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1168                         mac_data.entries * ETH_ALEN);
1169          if (s != mac_data.entries * ETH_ALEN) {
1170              goto error;
1171          }
1172          in_use += mac_data.entries;
1173      } else {
1174          multi_overflow = 1;
1175      }
1176  
1177      n->mac_table.in_use = in_use;
1178      n->mac_table.first_multi = first_multi;
1179      n->mac_table.uni_overflow = uni_overflow;
1180      n->mac_table.multi_overflow = multi_overflow;
1181      memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1182      g_free(macs);
1183      rxfilter_notify(nc);
1184  
1185      return VIRTIO_NET_OK;
1186  
1187  error:
1188      g_free(macs);
1189      return VIRTIO_NET_ERR;
1190  }
1191  
virtio_net_handle_vlan_table(VirtIONet * n,uint8_t cmd,struct iovec * iov,unsigned int iov_cnt)1192  static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1193                                          struct iovec *iov, unsigned int iov_cnt)
1194  {
1195      VirtIODevice *vdev = VIRTIO_DEVICE(n);
1196      uint16_t vid;
1197      size_t s;
1198      NetClientState *nc = qemu_get_queue(n->nic);
1199  
1200      s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1201      vid = virtio_lduw_p(vdev, &vid);
1202      if (s != sizeof(vid)) {
1203          return VIRTIO_NET_ERR;
1204      }
1205  
1206      if (vid >= MAX_VLAN)
1207          return VIRTIO_NET_ERR;
1208  
1209      if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1210          n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1211      else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1212          n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1213      else
1214          return VIRTIO_NET_ERR;
1215  
1216      rxfilter_notify(nc);
1217  
1218      return VIRTIO_NET_OK;
1219  }
1220  
virtio_net_handle_announce(VirtIONet * n,uint8_t cmd,struct iovec * iov,unsigned int iov_cnt)1221  static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1222                                        struct iovec *iov, unsigned int iov_cnt)
1223  {
1224      trace_virtio_net_handle_announce(n->announce_timer.round);
1225      if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1226          n->status & VIRTIO_NET_S_ANNOUNCE) {
1227          n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1228          if (n->announce_timer.round) {
1229              qemu_announce_timer_step(&n->announce_timer);
1230          }
1231          return VIRTIO_NET_OK;
1232      } else {
1233          return VIRTIO_NET_ERR;
1234      }
1235  }
1236  
virtio_net_attach_ebpf_to_backend(NICState * nic,int prog_fd)1237  static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1238  {
1239      NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1240      if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1241          return false;
1242      }
1243  
1244      trace_virtio_net_rss_attach_ebpf(nic, prog_fd);
1245      return nc->info->set_steering_ebpf(nc, prog_fd);
1246  }
1247  
rss_data_to_rss_config(struct VirtioNetRssData * data,struct EBPFRSSConfig * config)1248  static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1249                                     struct EBPFRSSConfig *config)
1250  {
1251      config->redirect = data->redirect;
1252      config->populate_hash = data->populate_hash;
1253      config->hash_types = data->hash_types;
1254      config->indirections_len = data->indirections_len;
1255      config->default_queue = data->default_queue;
1256  }
1257  
virtio_net_attach_ebpf_rss(VirtIONet * n)1258  static bool virtio_net_attach_ebpf_rss(VirtIONet *n)
1259  {
1260      struct EBPFRSSConfig config = {};
1261  
1262      if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1263          return false;
1264      }
1265  
1266      rss_data_to_rss_config(&n->rss_data, &config);
1267  
1268      if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1269                            n->rss_data.indirections_table, n->rss_data.key,
1270                            NULL)) {
1271          return false;
1272      }
1273  
1274      if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1275          return false;
1276      }
1277  
1278      return true;
1279  }
1280  
virtio_net_detach_ebpf_rss(VirtIONet * n)1281  static void virtio_net_detach_ebpf_rss(VirtIONet *n)
1282  {
1283      virtio_net_attach_ebpf_to_backend(n->nic, -1);
1284  }
1285  
virtio_net_commit_rss_config(VirtIONet * n)1286  static void virtio_net_commit_rss_config(VirtIONet *n)
1287  {
1288      if (n->rss_data.enabled) {
1289          n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
1290          if (n->rss_data.populate_hash) {
1291              virtio_net_detach_ebpf_rss(n);
1292          } else if (!virtio_net_attach_ebpf_rss(n)) {
1293              if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1294                  warn_report("Can't load eBPF RSS for vhost");
1295              } else {
1296                  warn_report("Can't load eBPF RSS - fallback to software RSS");
1297                  n->rss_data.enabled_software_rss = true;
1298              }
1299          }
1300  
1301          trace_virtio_net_rss_enable(n,
1302                                      n->rss_data.hash_types,
1303                                      n->rss_data.indirections_len,
1304                                      sizeof(n->rss_data.key));
1305      } else {
1306          virtio_net_detach_ebpf_rss(n);
1307          trace_virtio_net_rss_disable(n);
1308      }
1309  }
1310  
virtio_net_disable_rss(VirtIONet * n)1311  static void virtio_net_disable_rss(VirtIONet *n)
1312  {
1313      if (!n->rss_data.enabled) {
1314          return;
1315      }
1316  
1317      n->rss_data.enabled = false;
1318      virtio_net_commit_rss_config(n);
1319  }
1320  
virtio_net_load_ebpf_fds(VirtIONet * n,Error ** errp)1321  static bool virtio_net_load_ebpf_fds(VirtIONet *n, Error **errp)
1322  {
1323      int fds[EBPF_RSS_MAX_FDS] = { [0 ... EBPF_RSS_MAX_FDS - 1] = -1};
1324      int ret = true;
1325      int i = 0;
1326  
1327      if (n->nr_ebpf_rss_fds != EBPF_RSS_MAX_FDS) {
1328          error_setg(errp, "Expected %d file descriptors but got %d",
1329                     EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds);
1330          return false;
1331      }
1332  
1333      for (i = 0; i < n->nr_ebpf_rss_fds; i++) {
1334          fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i], errp);
1335          if (fds[i] < 0) {
1336              ret = false;
1337              goto exit;
1338          }
1339      }
1340  
1341      ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3], errp);
1342  
1343  exit:
1344      if (!ret) {
1345          for (i = 0; i < n->nr_ebpf_rss_fds && fds[i] != -1; i++) {
1346              close(fds[i]);
1347          }
1348      }
1349  
1350      return ret;
1351  }
1352  
virtio_net_load_ebpf(VirtIONet * n,Error ** errp)1353  static bool virtio_net_load_ebpf(VirtIONet *n, Error **errp)
1354  {
1355      bool ret = false;
1356  
1357      if (virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1358          trace_virtio_net_rss_load(n, n->nr_ebpf_rss_fds, n->ebpf_rss_fds);
1359          if (n->ebpf_rss_fds) {
1360              ret = virtio_net_load_ebpf_fds(n, errp);
1361          } else {
1362              ret = ebpf_rss_load(&n->ebpf_rss, errp);
1363          }
1364      }
1365  
1366      return ret;
1367  }
1368  
virtio_net_unload_ebpf(VirtIONet * n)1369  static void virtio_net_unload_ebpf(VirtIONet *n)
1370  {
1371      virtio_net_attach_ebpf_to_backend(n->nic, -1);
1372      ebpf_rss_unload(&n->ebpf_rss);
1373  }
1374  
virtio_net_handle_rss(VirtIONet * n,struct iovec * iov,unsigned int iov_cnt,bool do_rss)1375  static uint16_t virtio_net_handle_rss(VirtIONet *n,
1376                                        struct iovec *iov,
1377                                        unsigned int iov_cnt,
1378                                        bool do_rss)
1379  {
1380      VirtIODevice *vdev = VIRTIO_DEVICE(n);
1381      struct virtio_net_rss_config cfg;
1382      size_t s, offset = 0, size_get;
1383      uint16_t queue_pairs, i;
1384      struct {
1385          uint16_t us;
1386          uint8_t b;
1387      } QEMU_PACKED temp;
1388      const char *err_msg = "";
1389      uint32_t err_value = 0;
1390  
1391      if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1392          err_msg = "RSS is not negotiated";
1393          goto error;
1394      }
1395      if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1396          err_msg = "Hash report is not negotiated";
1397          goto error;
1398      }
1399      size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1400      s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1401      if (s != size_get) {
1402          err_msg = "Short command buffer";
1403          err_value = (uint32_t)s;
1404          goto error;
1405      }
1406      n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1407      n->rss_data.indirections_len =
1408          virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1409      if (!do_rss) {
1410          n->rss_data.indirections_len = 0;
1411      }
1412      if (n->rss_data.indirections_len >= VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1413          err_msg = "Too large indirection table";
1414          err_value = n->rss_data.indirections_len;
1415          goto error;
1416      }
1417      n->rss_data.indirections_len++;
1418      if (!is_power_of_2(n->rss_data.indirections_len)) {
1419          err_msg = "Invalid size of indirection table";
1420          err_value = n->rss_data.indirections_len;
1421          goto error;
1422      }
1423      n->rss_data.default_queue = do_rss ?
1424          virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1425      if (n->rss_data.default_queue >= n->max_queue_pairs) {
1426          err_msg = "Invalid default queue";
1427          err_value = n->rss_data.default_queue;
1428          goto error;
1429      }
1430      offset += size_get;
1431      size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1432      g_free(n->rss_data.indirections_table);
1433      n->rss_data.indirections_table = g_malloc(size_get);
1434      if (!n->rss_data.indirections_table) {
1435          err_msg = "Can't allocate indirections table";
1436          err_value = n->rss_data.indirections_len;
1437          goto error;
1438      }
1439      s = iov_to_buf(iov, iov_cnt, offset,
1440                     n->rss_data.indirections_table, size_get);
1441      if (s != size_get) {
1442          err_msg = "Short indirection table buffer";
1443          err_value = (uint32_t)s;
1444          goto error;
1445      }
1446      for (i = 0; i < n->rss_data.indirections_len; ++i) {
1447          uint16_t val = n->rss_data.indirections_table[i];
1448          n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1449      }
1450      offset += size_get;
1451      size_get = sizeof(temp);
1452      s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1453      if (s != size_get) {
1454          err_msg = "Can't get queue_pairs";
1455          err_value = (uint32_t)s;
1456          goto error;
1457      }
1458      queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1459      if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1460          err_msg = "Invalid number of queue_pairs";
1461          err_value = queue_pairs;
1462          goto error;
1463      }
1464      if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1465          err_msg = "Invalid key size";
1466          err_value = temp.b;
1467          goto error;
1468      }
1469      if (!temp.b && n->rss_data.hash_types) {
1470          err_msg = "No key provided";
1471          err_value = 0;
1472          goto error;
1473      }
1474      if (!temp.b && !n->rss_data.hash_types) {
1475          virtio_net_disable_rss(n);
1476          return queue_pairs;
1477      }
1478      offset += size_get;
1479      size_get = temp.b;
1480      s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1481      if (s != size_get) {
1482          err_msg = "Can get key buffer";
1483          err_value = (uint32_t)s;
1484          goto error;
1485      }
1486      n->rss_data.enabled = true;
1487      virtio_net_commit_rss_config(n);
1488      return queue_pairs;
1489  error:
1490      trace_virtio_net_rss_error(n, err_msg, err_value);
1491      virtio_net_disable_rss(n);
1492      return 0;
1493  }
1494  
virtio_net_handle_mq(VirtIONet * n,uint8_t cmd,struct iovec * iov,unsigned int iov_cnt)1495  static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1496                                  struct iovec *iov, unsigned int iov_cnt)
1497  {
1498      VirtIODevice *vdev = VIRTIO_DEVICE(n);
1499      uint16_t queue_pairs;
1500      NetClientState *nc = qemu_get_queue(n->nic);
1501  
1502      virtio_net_disable_rss(n);
1503      if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1504          queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1505          return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1506      }
1507      if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1508          queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1509      } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1510          struct virtio_net_ctrl_mq mq;
1511          size_t s;
1512          if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1513              return VIRTIO_NET_ERR;
1514          }
1515          s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1516          if (s != sizeof(mq)) {
1517              return VIRTIO_NET_ERR;
1518          }
1519          queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1520  
1521      } else {
1522          return VIRTIO_NET_ERR;
1523      }
1524  
1525      if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1526          queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1527          queue_pairs > n->max_queue_pairs ||
1528          !n->multiqueue) {
1529          return VIRTIO_NET_ERR;
1530      }
1531  
1532      n->curr_queue_pairs = queue_pairs;
1533      if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
1534          /*
1535           * Avoid updating the backend for a vdpa device: We're only interested
1536           * in updating the device model queues.
1537           */
1538          return VIRTIO_NET_OK;
1539      }
1540      /* stop the backend before changing the number of queue_pairs to avoid handling a
1541       * disabled queue */
1542      virtio_net_set_status(vdev, vdev->status);
1543      virtio_net_set_queue_pairs(n);
1544  
1545      return VIRTIO_NET_OK;
1546  }
1547  
virtio_net_handle_ctrl_iov(VirtIODevice * vdev,const struct iovec * in_sg,unsigned in_num,const struct iovec * out_sg,unsigned out_num)1548  size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1549                                    const struct iovec *in_sg, unsigned in_num,
1550                                    const struct iovec *out_sg,
1551                                    unsigned out_num)
1552  {
1553      VirtIONet *n = VIRTIO_NET(vdev);
1554      struct virtio_net_ctrl_hdr ctrl;
1555      virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1556      size_t s;
1557      struct iovec *iov, *iov2;
1558  
1559      if (iov_size(in_sg, in_num) < sizeof(status) ||
1560          iov_size(out_sg, out_num) < sizeof(ctrl)) {
1561          virtio_error(vdev, "virtio-net ctrl missing headers");
1562          return 0;
1563      }
1564  
1565      iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1566      s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1567      iov_discard_front(&iov, &out_num, sizeof(ctrl));
1568      if (s != sizeof(ctrl)) {
1569          status = VIRTIO_NET_ERR;
1570      } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1571          status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1572      } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1573          status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1574      } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1575          status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1576      } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1577          status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1578      } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1579          status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1580      } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1581          status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1582      }
1583  
1584      s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1585      assert(s == sizeof(status));
1586  
1587      g_free(iov2);
1588      return sizeof(status);
1589  }
1590  
virtio_net_handle_ctrl(VirtIODevice * vdev,VirtQueue * vq)1591  static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1592  {
1593      VirtQueueElement *elem;
1594  
1595      for (;;) {
1596          size_t written;
1597          elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1598          if (!elem) {
1599              break;
1600          }
1601  
1602          written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1603                                               elem->out_sg, elem->out_num);
1604          if (written > 0) {
1605              virtqueue_push(vq, elem, written);
1606              virtio_notify(vdev, vq);
1607              g_free(elem);
1608          } else {
1609              virtqueue_detach_element(vq, elem, 0);
1610              g_free(elem);
1611              break;
1612          }
1613      }
1614  }
1615  
1616  /* RX */
1617  
virtio_net_handle_rx(VirtIODevice * vdev,VirtQueue * vq)1618  static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1619  {
1620      VirtIONet *n = VIRTIO_NET(vdev);
1621      int queue_index = vq2q(virtio_get_queue_index(vq));
1622  
1623      qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1624  }
1625  
virtio_net_can_receive(NetClientState * nc)1626  static bool virtio_net_can_receive(NetClientState *nc)
1627  {
1628      VirtIONet *n = qemu_get_nic_opaque(nc);
1629      VirtIODevice *vdev = VIRTIO_DEVICE(n);
1630      VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1631  
1632      if (!vdev->vm_running) {
1633          return false;
1634      }
1635  
1636      if (nc->queue_index >= n->curr_queue_pairs) {
1637          return false;
1638      }
1639  
1640      if (!virtio_queue_ready(q->rx_vq) ||
1641          !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1642          return false;
1643      }
1644  
1645      return true;
1646  }
1647  
virtio_net_has_buffers(VirtIONetQueue * q,int bufsize)1648  static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1649  {
1650      int opaque;
1651      unsigned int in_bytes;
1652      VirtIONet *n = q->n;
1653  
1654      while (virtio_queue_empty(q->rx_vq) || n->mergeable_rx_bufs) {
1655          opaque = virtqueue_get_avail_bytes(q->rx_vq, &in_bytes, NULL,
1656                                             bufsize, 0);
1657          /* Buffer is enough, disable notifiaction */
1658          if (bufsize <= in_bytes) {
1659              break;
1660          }
1661  
1662          if (virtio_queue_enable_notification_and_check(q->rx_vq, opaque)) {
1663              /* Guest has added some buffers, try again */
1664              continue;
1665          } else {
1666              return 0;
1667          }
1668      }
1669  
1670      virtio_queue_set_notification(q->rx_vq, 0);
1671  
1672      return 1;
1673  }
1674  
virtio_net_hdr_swap(VirtIODevice * vdev,struct virtio_net_hdr * hdr)1675  static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1676  {
1677      virtio_tswap16s(vdev, &hdr->hdr_len);
1678      virtio_tswap16s(vdev, &hdr->gso_size);
1679      virtio_tswap16s(vdev, &hdr->csum_start);
1680      virtio_tswap16s(vdev, &hdr->csum_offset);
1681  }
1682  
1683  /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1684   * it never finds out that the packets don't have valid checksums.  This
1685   * causes dhclient to get upset.  Fedora's carried a patch for ages to
1686   * fix this with Xen but it hasn't appeared in an upstream release of
1687   * dhclient yet.
1688   *
1689   * To avoid breaking existing guests, we catch udp packets and add
1690   * checksums.  This is terrible but it's better than hacking the guest
1691   * kernels.
1692   *
1693   * N.B. if we introduce a zero-copy API, this operation is no longer free so
1694   * we should provide a mechanism to disable it to avoid polluting the host
1695   * cache.
1696   */
work_around_broken_dhclient(struct virtio_net_hdr * hdr,size_t * hdr_len,const uint8_t * buf,size_t buf_size,size_t * buf_offset)1697  static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1698                                          size_t *hdr_len, const uint8_t *buf,
1699                                          size_t buf_size, size_t *buf_offset)
1700  {
1701      size_t csum_size = ETH_HLEN + sizeof(struct ip_header) +
1702                         sizeof(struct udp_header);
1703  
1704      buf += *buf_offset;
1705      buf_size -= *buf_offset;
1706  
1707      if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1708          (buf_size >= csum_size && buf_size < 1500) && /* normal sized MTU */
1709          (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1710          (buf[23] == 17) && /* ip.protocol == UDP */
1711          (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1712          memcpy((uint8_t *)hdr + *hdr_len, buf, csum_size);
1713          net_checksum_calculate((uint8_t *)hdr + *hdr_len, csum_size, CSUM_UDP);
1714          hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1715          *hdr_len += csum_size;
1716          *buf_offset += csum_size;
1717      }
1718  }
1719  
receive_header(VirtIONet * n,struct virtio_net_hdr * hdr,const void * buf,size_t buf_size,size_t * buf_offset)1720  static size_t receive_header(VirtIONet *n, struct virtio_net_hdr *hdr,
1721                               const void *buf, size_t buf_size,
1722                               size_t *buf_offset)
1723  {
1724      size_t hdr_len = n->guest_hdr_len;
1725  
1726      memcpy(hdr, buf, sizeof(struct virtio_net_hdr));
1727  
1728      *buf_offset = n->host_hdr_len;
1729      work_around_broken_dhclient(hdr, &hdr_len, buf, buf_size, buf_offset);
1730  
1731      if (n->needs_vnet_hdr_swap) {
1732          virtio_net_hdr_swap(VIRTIO_DEVICE(n), hdr);
1733      }
1734  
1735      return hdr_len;
1736  }
1737  
receive_filter(VirtIONet * n,const uint8_t * buf,int size)1738  static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1739  {
1740      static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1741      static const uint8_t vlan[] = {0x81, 0x00};
1742      uint8_t *ptr = (uint8_t *)buf;
1743      int i;
1744  
1745      if (n->promisc)
1746          return 1;
1747  
1748      ptr += n->host_hdr_len;
1749  
1750      if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1751          int vid = lduw_be_p(ptr + 14) & 0xfff;
1752          if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1753              return 0;
1754      }
1755  
1756      if (ptr[0] & 1) { // multicast
1757          if (!memcmp(ptr, bcast, sizeof(bcast))) {
1758              return !n->nobcast;
1759          } else if (n->nomulti) {
1760              return 0;
1761          } else if (n->allmulti || n->mac_table.multi_overflow) {
1762              return 1;
1763          }
1764  
1765          for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1766              if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1767                  return 1;
1768              }
1769          }
1770      } else { // unicast
1771          if (n->nouni) {
1772              return 0;
1773          } else if (n->alluni || n->mac_table.uni_overflow) {
1774              return 1;
1775          } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1776              return 1;
1777          }
1778  
1779          for (i = 0; i < n->mac_table.first_multi; i++) {
1780              if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1781                  return 1;
1782              }
1783          }
1784      }
1785  
1786      return 0;
1787  }
1788  
virtio_net_get_hash_type(bool hasip4,bool hasip6,EthL4HdrProto l4hdr_proto,uint32_t types)1789  static uint8_t virtio_net_get_hash_type(bool hasip4,
1790                                          bool hasip6,
1791                                          EthL4HdrProto l4hdr_proto,
1792                                          uint32_t types)
1793  {
1794      if (hasip4) {
1795          switch (l4hdr_proto) {
1796          case ETH_L4_HDR_PROTO_TCP:
1797              if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
1798                  return NetPktRssIpV4Tcp;
1799              }
1800              break;
1801  
1802          case ETH_L4_HDR_PROTO_UDP:
1803              if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
1804                  return NetPktRssIpV4Udp;
1805              }
1806              break;
1807  
1808          default:
1809              break;
1810          }
1811  
1812          if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1813              return NetPktRssIpV4;
1814          }
1815      } else if (hasip6) {
1816          switch (l4hdr_proto) {
1817          case ETH_L4_HDR_PROTO_TCP:
1818              if (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) {
1819                  return NetPktRssIpV6TcpEx;
1820              }
1821              if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
1822                  return NetPktRssIpV6Tcp;
1823              }
1824              break;
1825  
1826          case ETH_L4_HDR_PROTO_UDP:
1827              if (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) {
1828                  return NetPktRssIpV6UdpEx;
1829              }
1830              if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
1831                  return NetPktRssIpV6Udp;
1832              }
1833              break;
1834  
1835          default:
1836              break;
1837          }
1838  
1839          if (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) {
1840              return NetPktRssIpV6Ex;
1841          }
1842          if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
1843              return NetPktRssIpV6;
1844          }
1845      }
1846      return 0xff;
1847  }
1848  
virtio_net_process_rss(NetClientState * nc,const uint8_t * buf,size_t size,struct virtio_net_hdr_v1_hash * hdr)1849  static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1850                                    size_t size,
1851                                    struct virtio_net_hdr_v1_hash *hdr)
1852  {
1853      VirtIONet *n = qemu_get_nic_opaque(nc);
1854      unsigned int index = nc->queue_index, new_index = index;
1855      struct NetRxPkt *pkt = n->rx_pkt;
1856      uint8_t net_hash_type;
1857      uint32_t hash;
1858      bool hasip4, hasip6;
1859      EthL4HdrProto l4hdr_proto;
1860      static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1861          VIRTIO_NET_HASH_REPORT_IPv4,
1862          VIRTIO_NET_HASH_REPORT_TCPv4,
1863          VIRTIO_NET_HASH_REPORT_TCPv6,
1864          VIRTIO_NET_HASH_REPORT_IPv6,
1865          VIRTIO_NET_HASH_REPORT_IPv6_EX,
1866          VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1867          VIRTIO_NET_HASH_REPORT_UDPv4,
1868          VIRTIO_NET_HASH_REPORT_UDPv6,
1869          VIRTIO_NET_HASH_REPORT_UDPv6_EX
1870      };
1871      struct iovec iov = {
1872          .iov_base = (void *)buf,
1873          .iov_len = size
1874      };
1875  
1876      net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len);
1877      net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
1878      net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto,
1879                                               n->rss_data.hash_types);
1880      if (net_hash_type > NetPktRssIpV6UdpEx) {
1881          if (n->rss_data.populate_hash) {
1882              hdr->hash_value = VIRTIO_NET_HASH_REPORT_NONE;
1883              hdr->hash_report = 0;
1884          }
1885          return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1886      }
1887  
1888      hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1889  
1890      if (n->rss_data.populate_hash) {
1891          hdr->hash_value = hash;
1892          hdr->hash_report = reports[net_hash_type];
1893      }
1894  
1895      if (n->rss_data.redirect) {
1896          new_index = hash & (n->rss_data.indirections_len - 1);
1897          new_index = n->rss_data.indirections_table[new_index];
1898      }
1899  
1900      return (index == new_index) ? -1 : new_index;
1901  }
1902  
1903  typedef struct Header {
1904      struct virtio_net_hdr_v1_hash virtio_net;
1905      struct eth_header eth;
1906      struct ip_header ip;
1907      struct udp_header udp;
1908  } Header;
1909  
virtio_net_receive_rcu(NetClientState * nc,const uint8_t * buf,size_t size)1910  static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1911                                        size_t size)
1912  {
1913      VirtIONet *n = qemu_get_nic_opaque(nc);
1914      VirtIONetQueue *q;
1915      VirtIODevice *vdev = VIRTIO_DEVICE(n);
1916      VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1917      size_t lens[VIRTQUEUE_MAX_SIZE];
1918      struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1919      Header hdr;
1920      unsigned mhdr_cnt = 0;
1921      size_t offset, i, guest_offset, j;
1922      ssize_t err;
1923  
1924      memset(&hdr.virtio_net, 0, sizeof(hdr.virtio_net));
1925  
1926      if (n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1927          int index = virtio_net_process_rss(nc, buf, size, &hdr.virtio_net);
1928          if (index >= 0) {
1929              nc = qemu_get_subqueue(n->nic, index % n->curr_queue_pairs);
1930          }
1931      }
1932  
1933      if (!virtio_net_can_receive(nc)) {
1934          return -1;
1935      }
1936  
1937      q = virtio_net_get_subqueue(nc);
1938  
1939      /* hdr_len refers to the header we supply to the guest */
1940      if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1941          return 0;
1942      }
1943  
1944      if (!receive_filter(n, buf, size))
1945          return size;
1946  
1947      offset = i = 0;
1948  
1949      while (offset < size) {
1950          VirtQueueElement *elem;
1951          int len, total;
1952          const struct iovec *sg;
1953  
1954          total = 0;
1955  
1956          if (i == VIRTQUEUE_MAX_SIZE) {
1957              virtio_error(vdev, "virtio-net unexpected long buffer chain");
1958              err = size;
1959              goto err;
1960          }
1961  
1962          elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1963          if (!elem) {
1964              if (i) {
1965                  virtio_error(vdev, "virtio-net unexpected empty queue: "
1966                               "i %zd mergeable %d offset %zd, size %zd, "
1967                               "guest hdr len %zd, host hdr len %zd "
1968                               "guest features 0x%" PRIx64,
1969                               i, n->mergeable_rx_bufs, offset, size,
1970                               n->guest_hdr_len, n->host_hdr_len,
1971                               vdev->guest_features);
1972              }
1973              err = -1;
1974              goto err;
1975          }
1976  
1977          if (elem->in_num < 1) {
1978              virtio_error(vdev,
1979                           "virtio-net receive queue contains no in buffers");
1980              virtqueue_detach_element(q->rx_vq, elem, 0);
1981              g_free(elem);
1982              err = -1;
1983              goto err;
1984          }
1985  
1986          sg = elem->in_sg;
1987          if (i == 0) {
1988              assert(offset == 0);
1989              if (n->mergeable_rx_bufs) {
1990                  mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1991                                      sg, elem->in_num,
1992                                      offsetof(typeof(hdr),
1993                                               virtio_net.hdr.num_buffers),
1994                                      sizeof(hdr.virtio_net.hdr.num_buffers));
1995              }
1996  
1997              guest_offset = n->has_vnet_hdr ?
1998                             receive_header(n, (struct virtio_net_hdr *)&hdr,
1999                                            buf, size, &offset) :
2000                             n->guest_hdr_len;
2001  
2002              iov_from_buf(sg, elem->in_num, 0, &hdr, guest_offset);
2003              total += guest_offset;
2004          } else {
2005              guest_offset = 0;
2006          }
2007  
2008          /* copy in packet.  ugh */
2009          len = iov_from_buf(sg, elem->in_num, guest_offset,
2010                             buf + offset, size - offset);
2011          total += len;
2012          offset += len;
2013          /* If buffers can't be merged, at this point we
2014           * must have consumed the complete packet.
2015           * Otherwise, drop it. */
2016          if (!n->mergeable_rx_bufs && offset < size) {
2017              virtqueue_unpop(q->rx_vq, elem, total);
2018              g_free(elem);
2019              err = size;
2020              goto err;
2021          }
2022  
2023          elems[i] = elem;
2024          lens[i] = total;
2025          i++;
2026      }
2027  
2028      if (mhdr_cnt) {
2029          virtio_stw_p(vdev, &hdr.virtio_net.hdr.num_buffers, i);
2030          iov_from_buf(mhdr_sg, mhdr_cnt,
2031                       0,
2032                       &hdr.virtio_net.hdr.num_buffers,
2033                       sizeof hdr.virtio_net.hdr.num_buffers);
2034      }
2035  
2036      for (j = 0; j < i; j++) {
2037          /* signal other side */
2038          virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
2039          g_free(elems[j]);
2040      }
2041  
2042      virtqueue_flush(q->rx_vq, i);
2043      virtio_notify(vdev, q->rx_vq);
2044  
2045      return size;
2046  
2047  err:
2048      for (j = 0; j < i; j++) {
2049          virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
2050          g_free(elems[j]);
2051      }
2052  
2053      return err;
2054  }
2055  
virtio_net_do_receive(NetClientState * nc,const uint8_t * buf,size_t size)2056  static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
2057                                    size_t size)
2058  {
2059      RCU_READ_LOCK_GUARD();
2060  
2061      return virtio_net_receive_rcu(nc, buf, size);
2062  }
2063  
2064  /*
2065   * Accessors to read and write the IP packet data length field. This
2066   * is a potentially unaligned network-byte-order 16 bit unsigned integer
2067   * pointed to by unit->ip_len.
2068   */
read_unit_ip_len(VirtioNetRscUnit * unit)2069  static uint16_t read_unit_ip_len(VirtioNetRscUnit *unit)
2070  {
2071      return lduw_be_p(unit->ip_plen);
2072  }
2073  
write_unit_ip_len(VirtioNetRscUnit * unit,uint16_t l)2074  static void write_unit_ip_len(VirtioNetRscUnit *unit, uint16_t l)
2075  {
2076      stw_be_p(unit->ip_plen, l);
2077  }
2078  
virtio_net_rsc_extract_unit4(VirtioNetRscChain * chain,const uint8_t * buf,VirtioNetRscUnit * unit)2079  static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
2080                                           const uint8_t *buf,
2081                                           VirtioNetRscUnit *unit)
2082  {
2083      uint16_t ip_hdrlen;
2084      struct ip_header *ip;
2085  
2086      ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
2087                                + sizeof(struct eth_header));
2088      unit->ip = (void *)ip;
2089      ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
2090      unit->ip_plen = &ip->ip_len;
2091      unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
2092      unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2093      unit->payload = read_unit_ip_len(unit) - ip_hdrlen - unit->tcp_hdrlen;
2094  }
2095  
virtio_net_rsc_extract_unit6(VirtioNetRscChain * chain,const uint8_t * buf,VirtioNetRscUnit * unit)2096  static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
2097                                           const uint8_t *buf,
2098                                           VirtioNetRscUnit *unit)
2099  {
2100      struct ip6_header *ip6;
2101  
2102      ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
2103                                   + sizeof(struct eth_header));
2104      unit->ip = ip6;
2105      unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2106      unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
2107                                          + sizeof(struct ip6_header));
2108      unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2109  
2110      /* There is a difference between payload length in ipv4 and v6,
2111         ip header is excluded in ipv6 */
2112      unit->payload = read_unit_ip_len(unit) - unit->tcp_hdrlen;
2113  }
2114  
virtio_net_rsc_drain_seg(VirtioNetRscChain * chain,VirtioNetRscSeg * seg)2115  static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
2116                                         VirtioNetRscSeg *seg)
2117  {
2118      int ret;
2119      struct virtio_net_hdr_v1 *h;
2120  
2121      h = (struct virtio_net_hdr_v1 *)seg->buf;
2122      h->flags = 0;
2123      h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
2124  
2125      if (seg->is_coalesced) {
2126          h->rsc.segments = seg->packets;
2127          h->rsc.dup_acks = seg->dup_ack;
2128          h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
2129          if (chain->proto == ETH_P_IP) {
2130              h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2131          } else {
2132              h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2133          }
2134      }
2135  
2136      ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
2137      QTAILQ_REMOVE(&chain->buffers, seg, next);
2138      g_free(seg->buf);
2139      g_free(seg);
2140  
2141      return ret;
2142  }
2143  
virtio_net_rsc_purge(void * opq)2144  static void virtio_net_rsc_purge(void *opq)
2145  {
2146      VirtioNetRscSeg *seg, *rn;
2147      VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
2148  
2149      QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
2150          if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2151              chain->stat.purge_failed++;
2152              continue;
2153          }
2154      }
2155  
2156      chain->stat.timer++;
2157      if (!QTAILQ_EMPTY(&chain->buffers)) {
2158          timer_mod(chain->drain_timer,
2159                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout);
2160      }
2161  }
2162  
virtio_net_rsc_cleanup(VirtIONet * n)2163  static void virtio_net_rsc_cleanup(VirtIONet *n)
2164  {
2165      VirtioNetRscChain *chain, *rn_chain;
2166      VirtioNetRscSeg *seg, *rn_seg;
2167  
2168      QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2169          QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2170              QTAILQ_REMOVE(&chain->buffers, seg, next);
2171              g_free(seg->buf);
2172              g_free(seg);
2173          }
2174  
2175          timer_free(chain->drain_timer);
2176          QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2177          g_free(chain);
2178      }
2179  }
2180  
virtio_net_rsc_cache_buf(VirtioNetRscChain * chain,NetClientState * nc,const uint8_t * buf,size_t size)2181  static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2182                                       NetClientState *nc,
2183                                       const uint8_t *buf, size_t size)
2184  {
2185      uint16_t hdr_len;
2186      VirtioNetRscSeg *seg;
2187  
2188      hdr_len = chain->n->guest_hdr_len;
2189      seg = g_new(VirtioNetRscSeg, 1);
2190      seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2191          + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2192      memcpy(seg->buf, buf, size);
2193      seg->size = size;
2194      seg->packets = 1;
2195      seg->dup_ack = 0;
2196      seg->is_coalesced = 0;
2197      seg->nc = nc;
2198  
2199      QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2200      chain->stat.cache++;
2201  
2202      switch (chain->proto) {
2203      case ETH_P_IP:
2204          virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2205          break;
2206      case ETH_P_IPV6:
2207          virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2208          break;
2209      default:
2210          g_assert_not_reached();
2211      }
2212  }
2213  
virtio_net_rsc_handle_ack(VirtioNetRscChain * chain,VirtioNetRscSeg * seg,const uint8_t * buf,struct tcp_header * n_tcp,struct tcp_header * o_tcp)2214  static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2215                                           VirtioNetRscSeg *seg,
2216                                           const uint8_t *buf,
2217                                           struct tcp_header *n_tcp,
2218                                           struct tcp_header *o_tcp)
2219  {
2220      uint32_t nack, oack;
2221      uint16_t nwin, owin;
2222  
2223      nack = htonl(n_tcp->th_ack);
2224      nwin = htons(n_tcp->th_win);
2225      oack = htonl(o_tcp->th_ack);
2226      owin = htons(o_tcp->th_win);
2227  
2228      if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2229          chain->stat.ack_out_of_win++;
2230          return RSC_FINAL;
2231      } else if (nack == oack) {
2232          /* duplicated ack or window probe */
2233          if (nwin == owin) {
2234              /* duplicated ack, add dup ack count due to whql test up to 1 */
2235              chain->stat.dup_ack++;
2236              return RSC_FINAL;
2237          } else {
2238              /* Coalesce window update */
2239              o_tcp->th_win = n_tcp->th_win;
2240              chain->stat.win_update++;
2241              return RSC_COALESCE;
2242          }
2243      } else {
2244          /* pure ack, go to 'C', finalize*/
2245          chain->stat.pure_ack++;
2246          return RSC_FINAL;
2247      }
2248  }
2249  
virtio_net_rsc_coalesce_data(VirtioNetRscChain * chain,VirtioNetRscSeg * seg,const uint8_t * buf,VirtioNetRscUnit * n_unit)2250  static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2251                                              VirtioNetRscSeg *seg,
2252                                              const uint8_t *buf,
2253                                              VirtioNetRscUnit *n_unit)
2254  {
2255      void *data;
2256      uint16_t o_ip_len;
2257      uint32_t nseq, oseq;
2258      VirtioNetRscUnit *o_unit;
2259  
2260      o_unit = &seg->unit;
2261      o_ip_len = read_unit_ip_len(o_unit);
2262      nseq = htonl(n_unit->tcp->th_seq);
2263      oseq = htonl(o_unit->tcp->th_seq);
2264  
2265      /* out of order or retransmitted. */
2266      if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2267          chain->stat.data_out_of_win++;
2268          return RSC_FINAL;
2269      }
2270  
2271      data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2272      if (nseq == oseq) {
2273          if ((o_unit->payload == 0) && n_unit->payload) {
2274              /* From no payload to payload, normal case, not a dup ack or etc */
2275              chain->stat.data_after_pure_ack++;
2276              goto coalesce;
2277          } else {
2278              return virtio_net_rsc_handle_ack(chain, seg, buf,
2279                                               n_unit->tcp, o_unit->tcp);
2280          }
2281      } else if ((nseq - oseq) != o_unit->payload) {
2282          /* Not a consistent packet, out of order */
2283          chain->stat.data_out_of_order++;
2284          return RSC_FINAL;
2285      } else {
2286  coalesce:
2287          if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2288              chain->stat.over_size++;
2289              return RSC_FINAL;
2290          }
2291  
2292          /* Here comes the right data, the payload length in v4/v6 is different,
2293             so use the field value to update and record the new data len */
2294          o_unit->payload += n_unit->payload; /* update new data len */
2295  
2296          /* update field in ip header */
2297          write_unit_ip_len(o_unit, o_ip_len + n_unit->payload);
2298  
2299          /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2300             for windows guest, while this may change the behavior for linux
2301             guest (only if it uses RSC feature). */
2302          o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2303  
2304          o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2305          o_unit->tcp->th_win = n_unit->tcp->th_win;
2306  
2307          memmove(seg->buf + seg->size, data, n_unit->payload);
2308          seg->size += n_unit->payload;
2309          seg->packets++;
2310          chain->stat.coalesced++;
2311          return RSC_COALESCE;
2312      }
2313  }
2314  
virtio_net_rsc_coalesce4(VirtioNetRscChain * chain,VirtioNetRscSeg * seg,const uint8_t * buf,size_t size,VirtioNetRscUnit * unit)2315  static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2316                                          VirtioNetRscSeg *seg,
2317                                          const uint8_t *buf, size_t size,
2318                                          VirtioNetRscUnit *unit)
2319  {
2320      struct ip_header *ip1, *ip2;
2321  
2322      ip1 = (struct ip_header *)(unit->ip);
2323      ip2 = (struct ip_header *)(seg->unit.ip);
2324      if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2325          || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2326          || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2327          chain->stat.no_match++;
2328          return RSC_NO_MATCH;
2329      }
2330  
2331      return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2332  }
2333  
virtio_net_rsc_coalesce6(VirtioNetRscChain * chain,VirtioNetRscSeg * seg,const uint8_t * buf,size_t size,VirtioNetRscUnit * unit)2334  static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2335                                          VirtioNetRscSeg *seg,
2336                                          const uint8_t *buf, size_t size,
2337                                          VirtioNetRscUnit *unit)
2338  {
2339      struct ip6_header *ip1, *ip2;
2340  
2341      ip1 = (struct ip6_header *)(unit->ip);
2342      ip2 = (struct ip6_header *)(seg->unit.ip);
2343      if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2344          || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2345          || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2346          || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2347              chain->stat.no_match++;
2348              return RSC_NO_MATCH;
2349      }
2350  
2351      return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2352  }
2353  
2354  /* Packets with 'SYN' should bypass, other flag should be sent after drain
2355   * to prevent out of order */
virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain * chain,struct tcp_header * tcp)2356  static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2357                                           struct tcp_header *tcp)
2358  {
2359      uint16_t tcp_hdr;
2360      uint16_t tcp_flag;
2361  
2362      tcp_flag = htons(tcp->th_offset_flags);
2363      tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2364      tcp_flag &= VIRTIO_NET_TCP_FLAG;
2365      if (tcp_flag & TH_SYN) {
2366          chain->stat.tcp_syn++;
2367          return RSC_BYPASS;
2368      }
2369  
2370      if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2371          chain->stat.tcp_ctrl_drain++;
2372          return RSC_FINAL;
2373      }
2374  
2375      if (tcp_hdr > sizeof(struct tcp_header)) {
2376          chain->stat.tcp_all_opt++;
2377          return RSC_FINAL;
2378      }
2379  
2380      return RSC_CANDIDATE;
2381  }
2382  
virtio_net_rsc_do_coalesce(VirtioNetRscChain * chain,NetClientState * nc,const uint8_t * buf,size_t size,VirtioNetRscUnit * unit)2383  static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2384                                           NetClientState *nc,
2385                                           const uint8_t *buf, size_t size,
2386                                           VirtioNetRscUnit *unit)
2387  {
2388      int ret;
2389      VirtioNetRscSeg *seg, *nseg;
2390  
2391      if (QTAILQ_EMPTY(&chain->buffers)) {
2392          chain->stat.empty_cache++;
2393          virtio_net_rsc_cache_buf(chain, nc, buf, size);
2394          timer_mod(chain->drain_timer,
2395                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout);
2396          return size;
2397      }
2398  
2399      QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2400          if (chain->proto == ETH_P_IP) {
2401              ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2402          } else {
2403              ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2404          }
2405  
2406          if (ret == RSC_FINAL) {
2407              if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2408                  /* Send failed */
2409                  chain->stat.final_failed++;
2410                  return 0;
2411              }
2412  
2413              /* Send current packet */
2414              return virtio_net_do_receive(nc, buf, size);
2415          } else if (ret == RSC_NO_MATCH) {
2416              continue;
2417          } else {
2418              /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2419              seg->is_coalesced = 1;
2420              return size;
2421          }
2422      }
2423  
2424      chain->stat.no_match_cache++;
2425      virtio_net_rsc_cache_buf(chain, nc, buf, size);
2426      return size;
2427  }
2428  
2429  /* Drain a connection data, this is to avoid out of order segments */
virtio_net_rsc_drain_flow(VirtioNetRscChain * chain,NetClientState * nc,const uint8_t * buf,size_t size,uint16_t ip_start,uint16_t ip_size,uint16_t tcp_port)2430  static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2431                                          NetClientState *nc,
2432                                          const uint8_t *buf, size_t size,
2433                                          uint16_t ip_start, uint16_t ip_size,
2434                                          uint16_t tcp_port)
2435  {
2436      VirtioNetRscSeg *seg, *nseg;
2437      uint32_t ppair1, ppair2;
2438  
2439      ppair1 = *(uint32_t *)(buf + tcp_port);
2440      QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2441          ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2442          if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2443              || (ppair1 != ppair2)) {
2444              continue;
2445          }
2446          if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2447              chain->stat.drain_failed++;
2448          }
2449  
2450          break;
2451      }
2452  
2453      return virtio_net_do_receive(nc, buf, size);
2454  }
2455  
virtio_net_rsc_sanity_check4(VirtioNetRscChain * chain,struct ip_header * ip,const uint8_t * buf,size_t size)2456  static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2457                                              struct ip_header *ip,
2458                                              const uint8_t *buf, size_t size)
2459  {
2460      uint16_t ip_len;
2461  
2462      /* Not an ipv4 packet */
2463      if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2464          chain->stat.ip_option++;
2465          return RSC_BYPASS;
2466      }
2467  
2468      /* Don't handle packets with ip option */
2469      if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2470          chain->stat.ip_option++;
2471          return RSC_BYPASS;
2472      }
2473  
2474      if (ip->ip_p != IPPROTO_TCP) {
2475          chain->stat.bypass_not_tcp++;
2476          return RSC_BYPASS;
2477      }
2478  
2479      /* Don't handle packets with ip fragment */
2480      if (!(htons(ip->ip_off) & IP_DF)) {
2481          chain->stat.ip_frag++;
2482          return RSC_BYPASS;
2483      }
2484  
2485      /* Don't handle packets with ecn flag */
2486      if (IPTOS_ECN(ip->ip_tos)) {
2487          chain->stat.ip_ecn++;
2488          return RSC_BYPASS;
2489      }
2490  
2491      ip_len = htons(ip->ip_len);
2492      if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2493          || ip_len > (size - chain->n->guest_hdr_len -
2494                       sizeof(struct eth_header))) {
2495          chain->stat.ip_hacked++;
2496          return RSC_BYPASS;
2497      }
2498  
2499      return RSC_CANDIDATE;
2500  }
2501  
virtio_net_rsc_receive4(VirtioNetRscChain * chain,NetClientState * nc,const uint8_t * buf,size_t size)2502  static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2503                                        NetClientState *nc,
2504                                        const uint8_t *buf, size_t size)
2505  {
2506      int32_t ret;
2507      uint16_t hdr_len;
2508      VirtioNetRscUnit unit;
2509  
2510      hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2511  
2512      if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2513          + sizeof(struct tcp_header))) {
2514          chain->stat.bypass_not_tcp++;
2515          return virtio_net_do_receive(nc, buf, size);
2516      }
2517  
2518      virtio_net_rsc_extract_unit4(chain, buf, &unit);
2519      if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2520          != RSC_CANDIDATE) {
2521          return virtio_net_do_receive(nc, buf, size);
2522      }
2523  
2524      ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2525      if (ret == RSC_BYPASS) {
2526          return virtio_net_do_receive(nc, buf, size);
2527      } else if (ret == RSC_FINAL) {
2528          return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2529                  ((hdr_len + sizeof(struct eth_header)) + 12),
2530                  VIRTIO_NET_IP4_ADDR_SIZE,
2531                  hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2532      }
2533  
2534      return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2535  }
2536  
virtio_net_rsc_sanity_check6(VirtioNetRscChain * chain,struct ip6_header * ip6,const uint8_t * buf,size_t size)2537  static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2538                                              struct ip6_header *ip6,
2539                                              const uint8_t *buf, size_t size)
2540  {
2541      uint16_t ip_len;
2542  
2543      if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2544          != IP_HEADER_VERSION_6) {
2545          return RSC_BYPASS;
2546      }
2547  
2548      /* Both option and protocol is checked in this */
2549      if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2550          chain->stat.bypass_not_tcp++;
2551          return RSC_BYPASS;
2552      }
2553  
2554      ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2555      if (ip_len < sizeof(struct tcp_header) ||
2556          ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2557                    - sizeof(struct ip6_header))) {
2558          chain->stat.ip_hacked++;
2559          return RSC_BYPASS;
2560      }
2561  
2562      /* Don't handle packets with ecn flag */
2563      if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2564          chain->stat.ip_ecn++;
2565          return RSC_BYPASS;
2566      }
2567  
2568      return RSC_CANDIDATE;
2569  }
2570  
virtio_net_rsc_receive6(void * opq,NetClientState * nc,const uint8_t * buf,size_t size)2571  static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2572                                        const uint8_t *buf, size_t size)
2573  {
2574      int32_t ret;
2575      uint16_t hdr_len;
2576      VirtioNetRscChain *chain;
2577      VirtioNetRscUnit unit;
2578  
2579      chain = opq;
2580      hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2581  
2582      if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2583          + sizeof(tcp_header))) {
2584          return virtio_net_do_receive(nc, buf, size);
2585      }
2586  
2587      virtio_net_rsc_extract_unit6(chain, buf, &unit);
2588      if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2589                                                   unit.ip, buf, size)) {
2590          return virtio_net_do_receive(nc, buf, size);
2591      }
2592  
2593      ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2594      if (ret == RSC_BYPASS) {
2595          return virtio_net_do_receive(nc, buf, size);
2596      } else if (ret == RSC_FINAL) {
2597          return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2598                  ((hdr_len + sizeof(struct eth_header)) + 8),
2599                  VIRTIO_NET_IP6_ADDR_SIZE,
2600                  hdr_len + sizeof(struct eth_header)
2601                  + sizeof(struct ip6_header));
2602      }
2603  
2604      return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2605  }
2606  
virtio_net_rsc_lookup_chain(VirtIONet * n,NetClientState * nc,uint16_t proto)2607  static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2608                                                        NetClientState *nc,
2609                                                        uint16_t proto)
2610  {
2611      VirtioNetRscChain *chain;
2612  
2613      if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2614          return NULL;
2615      }
2616  
2617      QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2618          if (chain->proto == proto) {
2619              return chain;
2620          }
2621      }
2622  
2623      chain = g_malloc(sizeof(*chain));
2624      chain->n = n;
2625      chain->proto = proto;
2626      if (proto == (uint16_t)ETH_P_IP) {
2627          chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2628          chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2629      } else {
2630          chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2631          chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2632      }
2633      chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2634                                        virtio_net_rsc_purge, chain);
2635      memset(&chain->stat, 0, sizeof(chain->stat));
2636  
2637      QTAILQ_INIT(&chain->buffers);
2638      QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2639  
2640      return chain;
2641  }
2642  
virtio_net_rsc_receive(NetClientState * nc,const uint8_t * buf,size_t size)2643  static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2644                                        const uint8_t *buf,
2645                                        size_t size)
2646  {
2647      uint16_t proto;
2648      VirtioNetRscChain *chain;
2649      struct eth_header *eth;
2650      VirtIONet *n;
2651  
2652      n = qemu_get_nic_opaque(nc);
2653      if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2654          return virtio_net_do_receive(nc, buf, size);
2655      }
2656  
2657      eth = (struct eth_header *)(buf + n->guest_hdr_len);
2658      proto = htons(eth->h_proto);
2659  
2660      chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2661      if (chain) {
2662          chain->stat.received++;
2663          if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2664              return virtio_net_rsc_receive4(chain, nc, buf, size);
2665          } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2666              return virtio_net_rsc_receive6(chain, nc, buf, size);
2667          }
2668      }
2669      return virtio_net_do_receive(nc, buf, size);
2670  }
2671  
virtio_net_receive(NetClientState * nc,const uint8_t * buf,size_t size)2672  static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2673                                    size_t size)
2674  {
2675      VirtIONet *n = qemu_get_nic_opaque(nc);
2676      if ((n->rsc4_enabled || n->rsc6_enabled)) {
2677          return virtio_net_rsc_receive(nc, buf, size);
2678      } else {
2679          return virtio_net_do_receive(nc, buf, size);
2680      }
2681  }
2682  
2683  static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2684  
virtio_net_tx_complete(NetClientState * nc,ssize_t len)2685  static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2686  {
2687      VirtIONet *n = qemu_get_nic_opaque(nc);
2688      VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2689      VirtIODevice *vdev = VIRTIO_DEVICE(n);
2690      int ret;
2691  
2692      virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2693      virtio_notify(vdev, q->tx_vq);
2694  
2695      g_free(q->async_tx.elem);
2696      q->async_tx.elem = NULL;
2697  
2698      virtio_queue_set_notification(q->tx_vq, 1);
2699      ret = virtio_net_flush_tx(q);
2700      if (ret >= n->tx_burst) {
2701          /*
2702           * the flush has been stopped by tx_burst
2703           * we will not receive notification for the
2704           * remainining part, so re-schedule
2705           */
2706          virtio_queue_set_notification(q->tx_vq, 0);
2707          if (q->tx_bh) {
2708              replay_bh_schedule_event(q->tx_bh);
2709          } else {
2710              timer_mod(q->tx_timer,
2711                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2712          }
2713          q->tx_waiting = 1;
2714      }
2715  }
2716  
2717  /* TX */
virtio_net_flush_tx(VirtIONetQueue * q)2718  static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2719  {
2720      VirtIONet *n = q->n;
2721      VirtIODevice *vdev = VIRTIO_DEVICE(n);
2722      VirtQueueElement *elem;
2723      int32_t num_packets = 0;
2724      int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2725      if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2726          return num_packets;
2727      }
2728  
2729      if (q->async_tx.elem) {
2730          virtio_queue_set_notification(q->tx_vq, 0);
2731          return num_packets;
2732      }
2733  
2734      for (;;) {
2735          ssize_t ret;
2736          unsigned int out_num;
2737          struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2738          struct virtio_net_hdr vhdr;
2739  
2740          elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2741          if (!elem) {
2742              break;
2743          }
2744  
2745          out_num = elem->out_num;
2746          out_sg = elem->out_sg;
2747          if (out_num < 1) {
2748              virtio_error(vdev, "virtio-net header not in first element");
2749              goto detach;
2750          }
2751  
2752          if (n->needs_vnet_hdr_swap) {
2753              if (iov_to_buf(out_sg, out_num, 0, &vhdr, sizeof(vhdr)) <
2754                  sizeof(vhdr)) {
2755                  virtio_error(vdev, "virtio-net header incorrect");
2756                  goto detach;
2757              }
2758              virtio_net_hdr_swap(vdev, &vhdr);
2759              sg2[0].iov_base = &vhdr;
2760              sg2[0].iov_len = sizeof(vhdr);
2761              out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, out_sg, out_num,
2762                                 sizeof(vhdr), -1);
2763              if (out_num == VIRTQUEUE_MAX_SIZE) {
2764                  goto drop;
2765              }
2766              out_num += 1;
2767              out_sg = sg2;
2768          }
2769          /*
2770           * If host wants to see the guest header as is, we can
2771           * pass it on unchanged. Otherwise, copy just the parts
2772           * that host is interested in.
2773           */
2774          assert(n->host_hdr_len <= n->guest_hdr_len);
2775          if (n->host_hdr_len != n->guest_hdr_len) {
2776              if (iov_size(out_sg, out_num) < n->guest_hdr_len) {
2777                  virtio_error(vdev, "virtio-net header is invalid");
2778                  goto detach;
2779              }
2780              unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2781                                         out_sg, out_num,
2782                                         0, n->host_hdr_len);
2783              sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2784                               out_sg, out_num,
2785                               n->guest_hdr_len, -1);
2786              out_num = sg_num;
2787              out_sg = sg;
2788  
2789              if (out_num < 1) {
2790                  virtio_error(vdev, "virtio-net nothing to send");
2791                  goto detach;
2792              }
2793          }
2794  
2795          ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2796                                        out_sg, out_num, virtio_net_tx_complete);
2797          if (ret == 0) {
2798              virtio_queue_set_notification(q->tx_vq, 0);
2799              q->async_tx.elem = elem;
2800              return -EBUSY;
2801          }
2802  
2803  drop:
2804          virtqueue_push(q->tx_vq, elem, 0);
2805          virtio_notify(vdev, q->tx_vq);
2806          g_free(elem);
2807  
2808          if (++num_packets >= n->tx_burst) {
2809              break;
2810          }
2811      }
2812      return num_packets;
2813  
2814  detach:
2815      virtqueue_detach_element(q->tx_vq, elem, 0);
2816      g_free(elem);
2817      return -EINVAL;
2818  }
2819  
2820  static void virtio_net_tx_timer(void *opaque);
2821  
virtio_net_handle_tx_timer(VirtIODevice * vdev,VirtQueue * vq)2822  static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2823  {
2824      VirtIONet *n = VIRTIO_NET(vdev);
2825      VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2826  
2827      if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2828          virtio_net_drop_tx_queue_data(vdev, vq);
2829          return;
2830      }
2831  
2832      /* This happens when device was stopped but VCPU wasn't. */
2833      if (!vdev->vm_running) {
2834          q->tx_waiting = 1;
2835          return;
2836      }
2837  
2838      if (q->tx_waiting) {
2839          /* We already have queued packets, immediately flush */
2840          timer_del(q->tx_timer);
2841          virtio_net_tx_timer(q);
2842      } else {
2843          /* re-arm timer to flush it (and more) on next tick */
2844          timer_mod(q->tx_timer,
2845                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2846          q->tx_waiting = 1;
2847          virtio_queue_set_notification(vq, 0);
2848      }
2849  }
2850  
virtio_net_handle_tx_bh(VirtIODevice * vdev,VirtQueue * vq)2851  static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2852  {
2853      VirtIONet *n = VIRTIO_NET(vdev);
2854      VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2855  
2856      if (unlikely(n->vhost_started)) {
2857          return;
2858      }
2859  
2860      if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2861          virtio_net_drop_tx_queue_data(vdev, vq);
2862          return;
2863      }
2864  
2865      if (unlikely(q->tx_waiting)) {
2866          return;
2867      }
2868      q->tx_waiting = 1;
2869      /* This happens when device was stopped but VCPU wasn't. */
2870      if (!vdev->vm_running) {
2871          return;
2872      }
2873      virtio_queue_set_notification(vq, 0);
2874      replay_bh_schedule_event(q->tx_bh);
2875  }
2876  
virtio_net_tx_timer(void * opaque)2877  static void virtio_net_tx_timer(void *opaque)
2878  {
2879      VirtIONetQueue *q = opaque;
2880      VirtIONet *n = q->n;
2881      VirtIODevice *vdev = VIRTIO_DEVICE(n);
2882      int ret;
2883  
2884      /* This happens when device was stopped but BH wasn't. */
2885      if (!vdev->vm_running) {
2886          /* Make sure tx waiting is set, so we'll run when restarted. */
2887          assert(q->tx_waiting);
2888          return;
2889      }
2890  
2891      q->tx_waiting = 0;
2892  
2893      /* Just in case the driver is not ready on more */
2894      if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2895          return;
2896      }
2897  
2898      ret = virtio_net_flush_tx(q);
2899      if (ret == -EBUSY || ret == -EINVAL) {
2900          return;
2901      }
2902      /*
2903       * If we flush a full burst of packets, assume there are
2904       * more coming and immediately rearm
2905       */
2906      if (ret >= n->tx_burst) {
2907          q->tx_waiting = 1;
2908          timer_mod(q->tx_timer,
2909                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2910          return;
2911      }
2912      /*
2913       * If less than a full burst, re-enable notification and flush
2914       * anything that may have come in while we weren't looking.  If
2915       * we find something, assume the guest is still active and rearm
2916       */
2917      virtio_queue_set_notification(q->tx_vq, 1);
2918      ret = virtio_net_flush_tx(q);
2919      if (ret > 0) {
2920          virtio_queue_set_notification(q->tx_vq, 0);
2921          q->tx_waiting = 1;
2922          timer_mod(q->tx_timer,
2923                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2924      }
2925  }
2926  
virtio_net_tx_bh(void * opaque)2927  static void virtio_net_tx_bh(void *opaque)
2928  {
2929      VirtIONetQueue *q = opaque;
2930      VirtIONet *n = q->n;
2931      VirtIODevice *vdev = VIRTIO_DEVICE(n);
2932      int32_t ret;
2933  
2934      /* This happens when device was stopped but BH wasn't. */
2935      if (!vdev->vm_running) {
2936          /* Make sure tx waiting is set, so we'll run when restarted. */
2937          assert(q->tx_waiting);
2938          return;
2939      }
2940  
2941      q->tx_waiting = 0;
2942  
2943      /* Just in case the driver is not ready on more */
2944      if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2945          return;
2946      }
2947  
2948      ret = virtio_net_flush_tx(q);
2949      if (ret == -EBUSY || ret == -EINVAL) {
2950          return; /* Notification re-enable handled by tx_complete or device
2951                   * broken */
2952      }
2953  
2954      /* If we flush a full burst of packets, assume there are
2955       * more coming and immediately reschedule */
2956      if (ret >= n->tx_burst) {
2957          replay_bh_schedule_event(q->tx_bh);
2958          q->tx_waiting = 1;
2959          return;
2960      }
2961  
2962      /* If less than a full burst, re-enable notification and flush
2963       * anything that may have come in while we weren't looking.  If
2964       * we find something, assume the guest is still active and reschedule */
2965      virtio_queue_set_notification(q->tx_vq, 1);
2966      ret = virtio_net_flush_tx(q);
2967      if (ret == -EINVAL) {
2968          return;
2969      } else if (ret > 0) {
2970          virtio_queue_set_notification(q->tx_vq, 0);
2971          replay_bh_schedule_event(q->tx_bh);
2972          q->tx_waiting = 1;
2973      }
2974  }
2975  
virtio_net_add_queue(VirtIONet * n,int index)2976  static void virtio_net_add_queue(VirtIONet *n, int index)
2977  {
2978      VirtIODevice *vdev = VIRTIO_DEVICE(n);
2979  
2980      n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2981                                             virtio_net_handle_rx);
2982  
2983      if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2984          n->vqs[index].tx_vq =
2985              virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2986                               virtio_net_handle_tx_timer);
2987          n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2988                                                virtio_net_tx_timer,
2989                                                &n->vqs[index]);
2990      } else {
2991          n->vqs[index].tx_vq =
2992              virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2993                               virtio_net_handle_tx_bh);
2994          n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
2995                                                    &DEVICE(vdev)->mem_reentrancy_guard);
2996      }
2997  
2998      n->vqs[index].tx_waiting = 0;
2999      n->vqs[index].n = n;
3000  }
3001  
virtio_net_del_queue(VirtIONet * n,int index)3002  static void virtio_net_del_queue(VirtIONet *n, int index)
3003  {
3004      VirtIODevice *vdev = VIRTIO_DEVICE(n);
3005      VirtIONetQueue *q = &n->vqs[index];
3006      NetClientState *nc = qemu_get_subqueue(n->nic, index);
3007  
3008      qemu_purge_queued_packets(nc);
3009  
3010      virtio_del_queue(vdev, index * 2);
3011      if (q->tx_timer) {
3012          timer_free(q->tx_timer);
3013          q->tx_timer = NULL;
3014      } else {
3015          qemu_bh_delete(q->tx_bh);
3016          q->tx_bh = NULL;
3017      }
3018      q->tx_waiting = 0;
3019      virtio_del_queue(vdev, index * 2 + 1);
3020  }
3021  
virtio_net_change_num_queue_pairs(VirtIONet * n,int new_max_queue_pairs)3022  static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
3023  {
3024      VirtIODevice *vdev = VIRTIO_DEVICE(n);
3025      int old_num_queues = virtio_get_num_queues(vdev);
3026      int new_num_queues = new_max_queue_pairs * 2 + 1;
3027      int i;
3028  
3029      assert(old_num_queues >= 3);
3030      assert(old_num_queues % 2 == 1);
3031  
3032      if (old_num_queues == new_num_queues) {
3033          return;
3034      }
3035  
3036      /*
3037       * We always need to remove and add ctrl vq if
3038       * old_num_queues != new_num_queues. Remove ctrl_vq first,
3039       * and then we only enter one of the following two loops.
3040       */
3041      virtio_del_queue(vdev, old_num_queues - 1);
3042  
3043      for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
3044          /* new_num_queues < old_num_queues */
3045          virtio_net_del_queue(n, i / 2);
3046      }
3047  
3048      for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
3049          /* new_num_queues > old_num_queues */
3050          virtio_net_add_queue(n, i / 2);
3051      }
3052  
3053      /* add ctrl_vq last */
3054      n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3055  }
3056  
virtio_net_set_multiqueue(VirtIONet * n,int multiqueue)3057  static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
3058  {
3059      int max = multiqueue ? n->max_queue_pairs : 1;
3060  
3061      n->multiqueue = multiqueue;
3062      virtio_net_change_num_queue_pairs(n, max);
3063  
3064      virtio_net_set_queue_pairs(n);
3065  }
3066  
virtio_net_pre_load_queues(VirtIODevice * vdev)3067  static int virtio_net_pre_load_queues(VirtIODevice *vdev)
3068  {
3069      virtio_net_set_multiqueue(VIRTIO_NET(vdev),
3070                                virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_RSS) ||
3071                                virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MQ));
3072  
3073      return 0;
3074  }
3075  
virtio_net_post_load_device(void * opaque,int version_id)3076  static int virtio_net_post_load_device(void *opaque, int version_id)
3077  {
3078      VirtIONet *n = opaque;
3079      VirtIODevice *vdev = VIRTIO_DEVICE(n);
3080      int i, link_down;
3081  
3082      trace_virtio_net_post_load_device();
3083      virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
3084                                 virtio_vdev_has_feature(vdev,
3085                                                         VIRTIO_F_VERSION_1),
3086                                 virtio_vdev_has_feature(vdev,
3087                                                         VIRTIO_NET_F_HASH_REPORT));
3088  
3089      /* MAC_TABLE_ENTRIES may be different from the saved image */
3090      if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
3091          n->mac_table.in_use = 0;
3092      }
3093  
3094      if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
3095          n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
3096      }
3097  
3098      /*
3099       * curr_guest_offloads will be later overwritten by the
3100       * virtio_set_features_nocheck call done from the virtio_load.
3101       * Here we make sure it is preserved and restored accordingly
3102       * in the virtio_net_post_load_virtio callback.
3103       */
3104      n->saved_guest_offloads = n->curr_guest_offloads;
3105  
3106      virtio_net_set_queue_pairs(n);
3107  
3108      /* Find the first multicast entry in the saved MAC filter */
3109      for (i = 0; i < n->mac_table.in_use; i++) {
3110          if (n->mac_table.macs[i * ETH_ALEN] & 1) {
3111              break;
3112          }
3113      }
3114      n->mac_table.first_multi = i;
3115  
3116      /* nc.link_down can't be migrated, so infer link_down according
3117       * to link status bit in n->status */
3118      link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
3119      for (i = 0; i < n->max_queue_pairs; i++) {
3120          qemu_get_subqueue(n->nic, i)->link_down = link_down;
3121      }
3122  
3123      if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
3124          virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3125          qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3126                                    QEMU_CLOCK_VIRTUAL,
3127                                    virtio_net_announce_timer, n);
3128          if (n->announce_timer.round) {
3129              timer_mod(n->announce_timer.tm,
3130                        qemu_clock_get_ms(n->announce_timer.type));
3131          } else {
3132              qemu_announce_timer_del(&n->announce_timer, false);
3133          }
3134      }
3135  
3136      virtio_net_commit_rss_config(n);
3137      return 0;
3138  }
3139  
virtio_net_post_load_virtio(VirtIODevice * vdev)3140  static int virtio_net_post_load_virtio(VirtIODevice *vdev)
3141  {
3142      VirtIONet *n = VIRTIO_NET(vdev);
3143      /*
3144       * The actual needed state is now in saved_guest_offloads,
3145       * see virtio_net_post_load_device for detail.
3146       * Restore it back and apply the desired offloads.
3147       */
3148      n->curr_guest_offloads = n->saved_guest_offloads;
3149      if (peer_has_vnet_hdr(n)) {
3150          virtio_net_apply_guest_offloads(n);
3151      }
3152  
3153      return 0;
3154  }
3155  
3156  /* tx_waiting field of a VirtIONetQueue */
3157  static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
3158      .name = "virtio-net-queue-tx_waiting",
3159      .fields = (const VMStateField[]) {
3160          VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
3161          VMSTATE_END_OF_LIST()
3162     },
3163  };
3164  
max_queue_pairs_gt_1(void * opaque,int version_id)3165  static bool max_queue_pairs_gt_1(void *opaque, int version_id)
3166  {
3167      return VIRTIO_NET(opaque)->max_queue_pairs > 1;
3168  }
3169  
has_ctrl_guest_offloads(void * opaque,int version_id)3170  static bool has_ctrl_guest_offloads(void *opaque, int version_id)
3171  {
3172      return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
3173                                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3174  }
3175  
mac_table_fits(void * opaque,int version_id)3176  static bool mac_table_fits(void *opaque, int version_id)
3177  {
3178      return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
3179  }
3180  
mac_table_doesnt_fit(void * opaque,int version_id)3181  static bool mac_table_doesnt_fit(void *opaque, int version_id)
3182  {
3183      return !mac_table_fits(opaque, version_id);
3184  }
3185  
3186  /* This temporary type is shared by all the WITH_TMP methods
3187   * although only some fields are used by each.
3188   */
3189  struct VirtIONetMigTmp {
3190      VirtIONet      *parent;
3191      VirtIONetQueue *vqs_1;
3192      uint16_t        curr_queue_pairs_1;
3193      uint8_t         has_ufo;
3194      uint32_t        has_vnet_hdr;
3195  };
3196  
3197  /* The 2nd and subsequent tx_waiting flags are loaded later than
3198   * the 1st entry in the queue_pairs and only if there's more than one
3199   * entry.  We use the tmp mechanism to calculate a temporary
3200   * pointer and count and also validate the count.
3201   */
3202  
virtio_net_tx_waiting_pre_save(void * opaque)3203  static int virtio_net_tx_waiting_pre_save(void *opaque)
3204  {
3205      struct VirtIONetMigTmp *tmp = opaque;
3206  
3207      tmp->vqs_1 = tmp->parent->vqs + 1;
3208      tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3209      if (tmp->parent->curr_queue_pairs == 0) {
3210          tmp->curr_queue_pairs_1 = 0;
3211      }
3212  
3213      return 0;
3214  }
3215  
virtio_net_tx_waiting_pre_load(void * opaque)3216  static int virtio_net_tx_waiting_pre_load(void *opaque)
3217  {
3218      struct VirtIONetMigTmp *tmp = opaque;
3219  
3220      /* Reuse the pointer setup from save */
3221      virtio_net_tx_waiting_pre_save(opaque);
3222  
3223      if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3224          error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3225              tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
3226  
3227          return -EINVAL;
3228      }
3229  
3230      return 0; /* all good */
3231  }
3232  
3233  static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3234      .name      = "virtio-net-tx_waiting",
3235      .pre_load  = virtio_net_tx_waiting_pre_load,
3236      .pre_save  = virtio_net_tx_waiting_pre_save,
3237      .fields    = (const VMStateField[]) {
3238          VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3239                                       curr_queue_pairs_1,
3240                                       vmstate_virtio_net_queue_tx_waiting,
3241                                       struct VirtIONetQueue),
3242          VMSTATE_END_OF_LIST()
3243      },
3244  };
3245  
3246  /* the 'has_ufo' flag is just tested; if the incoming stream has the
3247   * flag set we need to check that we have it
3248   */
virtio_net_ufo_post_load(void * opaque,int version_id)3249  static int virtio_net_ufo_post_load(void *opaque, int version_id)
3250  {
3251      struct VirtIONetMigTmp *tmp = opaque;
3252  
3253      if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3254          error_report("virtio-net: saved image requires TUN_F_UFO support");
3255          return -EINVAL;
3256      }
3257  
3258      return 0;
3259  }
3260  
virtio_net_ufo_pre_save(void * opaque)3261  static int virtio_net_ufo_pre_save(void *opaque)
3262  {
3263      struct VirtIONetMigTmp *tmp = opaque;
3264  
3265      tmp->has_ufo = tmp->parent->has_ufo;
3266  
3267      return 0;
3268  }
3269  
3270  static const VMStateDescription vmstate_virtio_net_has_ufo = {
3271      .name      = "virtio-net-ufo",
3272      .post_load = virtio_net_ufo_post_load,
3273      .pre_save  = virtio_net_ufo_pre_save,
3274      .fields    = (const VMStateField[]) {
3275          VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3276          VMSTATE_END_OF_LIST()
3277      },
3278  };
3279  
3280  /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3281   * flag set we need to check that we have it
3282   */
virtio_net_vnet_post_load(void * opaque,int version_id)3283  static int virtio_net_vnet_post_load(void *opaque, int version_id)
3284  {
3285      struct VirtIONetMigTmp *tmp = opaque;
3286  
3287      if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3288          error_report("virtio-net: saved image requires vnet_hdr=on");
3289          return -EINVAL;
3290      }
3291  
3292      return 0;
3293  }
3294  
virtio_net_vnet_pre_save(void * opaque)3295  static int virtio_net_vnet_pre_save(void *opaque)
3296  {
3297      struct VirtIONetMigTmp *tmp = opaque;
3298  
3299      tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3300  
3301      return 0;
3302  }
3303  
3304  static const VMStateDescription vmstate_virtio_net_has_vnet = {
3305      .name      = "virtio-net-vnet",
3306      .post_load = virtio_net_vnet_post_load,
3307      .pre_save  = virtio_net_vnet_pre_save,
3308      .fields    = (const VMStateField[]) {
3309          VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3310          VMSTATE_END_OF_LIST()
3311      },
3312  };
3313  
virtio_net_rss_needed(void * opaque)3314  static bool virtio_net_rss_needed(void *opaque)
3315  {
3316      return VIRTIO_NET(opaque)->rss_data.enabled;
3317  }
3318  
3319  static const VMStateDescription vmstate_virtio_net_rss = {
3320      .name      = "virtio-net-device/rss",
3321      .version_id = 1,
3322      .minimum_version_id = 1,
3323      .needed = virtio_net_rss_needed,
3324      .fields = (const VMStateField[]) {
3325          VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3326          VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3327          VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3328          VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3329          VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3330          VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3331          VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3332                              VIRTIO_NET_RSS_MAX_KEY_SIZE),
3333          VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3334                                      rss_data.indirections_len, 0,
3335                                      vmstate_info_uint16, uint16_t),
3336          VMSTATE_END_OF_LIST()
3337      },
3338  };
3339  
3340  static const VMStateDescription vmstate_virtio_net_device = {
3341      .name = "virtio-net-device",
3342      .version_id = VIRTIO_NET_VM_VERSION,
3343      .minimum_version_id = VIRTIO_NET_VM_VERSION,
3344      .post_load = virtio_net_post_load_device,
3345      .fields = (const VMStateField[]) {
3346          VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3347          VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3348                                 vmstate_virtio_net_queue_tx_waiting,
3349                                 VirtIONetQueue),
3350          VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3351          VMSTATE_UINT16(status, VirtIONet),
3352          VMSTATE_UINT8(promisc, VirtIONet),
3353          VMSTATE_UINT8(allmulti, VirtIONet),
3354          VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3355  
3356          /* Guarded pair: If it fits we load it, else we throw it away
3357           * - can happen if source has a larger MAC table.; post-load
3358           *  sets flags in this case.
3359           */
3360          VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3361                                  0, mac_table_fits, mac_table.in_use,
3362                                   ETH_ALEN),
3363          VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3364                                       mac_table.in_use, ETH_ALEN),
3365  
3366          /* Note: This is an array of uint32's that's always been saved as a
3367           * buffer; hold onto your endiannesses; it's actually used as a bitmap
3368           * but based on the uint.
3369           */
3370          VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3371          VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3372                           vmstate_virtio_net_has_vnet),
3373          VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3374          VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3375          VMSTATE_UINT8(alluni, VirtIONet),
3376          VMSTATE_UINT8(nomulti, VirtIONet),
3377          VMSTATE_UINT8(nouni, VirtIONet),
3378          VMSTATE_UINT8(nobcast, VirtIONet),
3379          VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3380                           vmstate_virtio_net_has_ufo),
3381          VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3382                              vmstate_info_uint16_equal, uint16_t),
3383          VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3384          VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3385                           vmstate_virtio_net_tx_waiting),
3386          VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3387                              has_ctrl_guest_offloads),
3388          VMSTATE_END_OF_LIST()
3389      },
3390      .subsections = (const VMStateDescription * const []) {
3391          &vmstate_virtio_net_rss,
3392          NULL
3393      }
3394  };
3395  
3396  static NetClientInfo net_virtio_info = {
3397      .type = NET_CLIENT_DRIVER_NIC,
3398      .size = sizeof(NICState),
3399      .can_receive = virtio_net_can_receive,
3400      .receive = virtio_net_receive,
3401      .link_status_changed = virtio_net_set_link_status,
3402      .query_rx_filter = virtio_net_query_rxfilter,
3403      .announce = virtio_net_announce,
3404  };
3405  
virtio_net_guest_notifier_pending(VirtIODevice * vdev,int idx)3406  static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3407  {
3408      VirtIONet *n = VIRTIO_NET(vdev);
3409      NetClientState *nc;
3410      assert(n->vhost_started);
3411      if (!n->multiqueue && idx == 2) {
3412          /* Must guard against invalid features and bogus queue index
3413           * from being set by malicious guest, or penetrated through
3414           * buggy migration stream.
3415           */
3416          if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3417              qemu_log_mask(LOG_GUEST_ERROR,
3418                            "%s: bogus vq index ignored\n", __func__);
3419              return false;
3420          }
3421          nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3422      } else {
3423          nc = qemu_get_subqueue(n->nic, vq2q(idx));
3424      }
3425      /*
3426       * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3427       * as the macro of configure interrupt's IDX, If this driver does not
3428       * support, the function will return false
3429       */
3430  
3431      if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3432          return vhost_net_config_pending(get_vhost_net(nc->peer));
3433      }
3434      return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3435  }
3436  
virtio_net_guest_notifier_mask(VirtIODevice * vdev,int idx,bool mask)3437  static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3438                                             bool mask)
3439  {
3440      VirtIONet *n = VIRTIO_NET(vdev);
3441      NetClientState *nc;
3442      assert(n->vhost_started);
3443      if (!n->multiqueue && idx == 2) {
3444          /* Must guard against invalid features and bogus queue index
3445           * from being set by malicious guest, or penetrated through
3446           * buggy migration stream.
3447           */
3448          if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3449              qemu_log_mask(LOG_GUEST_ERROR,
3450                            "%s: bogus vq index ignored\n", __func__);
3451              return;
3452          }
3453          nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3454      } else {
3455          nc = qemu_get_subqueue(n->nic, vq2q(idx));
3456      }
3457      /*
3458       *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3459       * as the macro of configure interrupt's IDX, If this driver does not
3460       * support, the function will return
3461       */
3462  
3463      if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3464          vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask);
3465          return;
3466      }
3467      vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask);
3468  }
3469  
virtio_net_set_config_size(VirtIONet * n,uint64_t host_features)3470  static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3471  {
3472      virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3473  
3474      n->config_size = virtio_get_config_size(&cfg_size_params, host_features);
3475  }
3476  
virtio_net_set_netclient_name(VirtIONet * n,const char * name,const char * type)3477  void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3478                                     const char *type)
3479  {
3480      /*
3481       * The name can be NULL, the netclient name will be type.x.
3482       */
3483      assert(type != NULL);
3484  
3485      g_free(n->netclient_name);
3486      g_free(n->netclient_type);
3487      n->netclient_name = g_strdup(name);
3488      n->netclient_type = g_strdup(type);
3489  }
3490  
failover_unplug_primary(VirtIONet * n,DeviceState * dev)3491  static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3492  {
3493      HotplugHandler *hotplug_ctrl;
3494      PCIDevice *pci_dev;
3495      Error *err = NULL;
3496  
3497      hotplug_ctrl = qdev_get_hotplug_handler(dev);
3498      if (hotplug_ctrl) {
3499          pci_dev = PCI_DEVICE(dev);
3500          pci_dev->partially_hotplugged = true;
3501          hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3502          if (err) {
3503              error_report_err(err);
3504              return false;
3505          }
3506      } else {
3507          return false;
3508      }
3509      return true;
3510  }
3511  
failover_replug_primary(VirtIONet * n,DeviceState * dev,Error ** errp)3512  static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3513                                      Error **errp)
3514  {
3515      Error *err = NULL;
3516      HotplugHandler *hotplug_ctrl;
3517      PCIDevice *pdev = PCI_DEVICE(dev);
3518      BusState *primary_bus;
3519  
3520      if (!pdev->partially_hotplugged) {
3521          return true;
3522      }
3523      primary_bus = dev->parent_bus;
3524      if (!primary_bus) {
3525          error_setg(errp, "virtio_net: couldn't find primary bus");
3526          return false;
3527      }
3528      qdev_set_parent_bus(dev, primary_bus, &error_abort);
3529      qatomic_set(&n->failover_primary_hidden, false);
3530      hotplug_ctrl = qdev_get_hotplug_handler(dev);
3531      if (hotplug_ctrl) {
3532          hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3533          if (err) {
3534              goto out;
3535          }
3536          hotplug_handler_plug(hotplug_ctrl, dev, &err);
3537      }
3538      pdev->partially_hotplugged = false;
3539  
3540  out:
3541      error_propagate(errp, err);
3542      return !err;
3543  }
3544  
virtio_net_handle_migration_primary(VirtIONet * n,MigrationEvent * e)3545  static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationEvent *e)
3546  {
3547      bool should_be_hidden;
3548      Error *err = NULL;
3549      DeviceState *dev = failover_find_primary_device(n);
3550  
3551      if (!dev) {
3552          return;
3553      }
3554  
3555      should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3556  
3557      if (e->type == MIG_EVENT_PRECOPY_SETUP && !should_be_hidden) {
3558          if (failover_unplug_primary(n, dev)) {
3559              vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3560              qapi_event_send_unplug_primary(dev->id);
3561              qatomic_set(&n->failover_primary_hidden, true);
3562          } else {
3563              warn_report("couldn't unplug primary device");
3564          }
3565      } else if (e->type == MIG_EVENT_PRECOPY_FAILED) {
3566          /* We already unplugged the device let's plug it back */
3567          if (!failover_replug_primary(n, dev, &err)) {
3568              if (err) {
3569                  error_report_err(err);
3570              }
3571          }
3572      }
3573  }
3574  
virtio_net_migration_state_notifier(NotifierWithReturn * notifier,MigrationEvent * e,Error ** errp)3575  static int virtio_net_migration_state_notifier(NotifierWithReturn *notifier,
3576                                                 MigrationEvent *e, Error **errp)
3577  {
3578      VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3579      virtio_net_handle_migration_primary(n, e);
3580      return 0;
3581  }
3582  
failover_hide_primary_device(DeviceListener * listener,const QDict * device_opts,bool from_json,Error ** errp)3583  static bool failover_hide_primary_device(DeviceListener *listener,
3584                                           const QDict *device_opts,
3585                                           bool from_json,
3586                                           Error **errp)
3587  {
3588      VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3589      const char *standby_id;
3590  
3591      if (!device_opts) {
3592          return false;
3593      }
3594  
3595      if (!qdict_haskey(device_opts, "failover_pair_id")) {
3596          return false;
3597      }
3598  
3599      if (!qdict_haskey(device_opts, "id")) {
3600          error_setg(errp, "Device with failover_pair_id needs to have id");
3601          return false;
3602      }
3603  
3604      standby_id = qdict_get_str(device_opts, "failover_pair_id");
3605      if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3606          return false;
3607      }
3608  
3609      /*
3610       * The hide helper can be called several times for a given device.
3611       * Check there is only one primary for a virtio-net device but
3612       * don't duplicate the qdict several times if it's called for the same
3613       * device.
3614       */
3615      if (n->primary_opts) {
3616          const char *old, *new;
3617          /* devices with failover_pair_id always have an id */
3618          old = qdict_get_str(n->primary_opts, "id");
3619          new = qdict_get_str(device_opts, "id");
3620          if (strcmp(old, new) != 0) {
3621              error_setg(errp, "Cannot attach more than one primary device to "
3622                         "'%s': '%s' and '%s'", n->netclient_name, old, new);
3623              return false;
3624          }
3625      } else {
3626          n->primary_opts = qdict_clone_shallow(device_opts);
3627          n->primary_opts_from_json = from_json;
3628      }
3629  
3630      /* failover_primary_hidden is set during feature negotiation */
3631      return qatomic_read(&n->failover_primary_hidden);
3632  }
3633  
virtio_net_device_realize(DeviceState * dev,Error ** errp)3634  static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3635  {
3636      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3637      VirtIONet *n = VIRTIO_NET(dev);
3638      NetClientState *nc;
3639      int i;
3640  
3641      if (n->net_conf.mtu) {
3642          n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3643      }
3644  
3645      if (n->net_conf.duplex_str) {
3646          if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3647              n->net_conf.duplex = DUPLEX_HALF;
3648          } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3649              n->net_conf.duplex = DUPLEX_FULL;
3650          } else {
3651              error_setg(errp, "'duplex' must be 'half' or 'full'");
3652              return;
3653          }
3654          n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3655      } else {
3656          n->net_conf.duplex = DUPLEX_UNKNOWN;
3657      }
3658  
3659      if (n->net_conf.speed < SPEED_UNKNOWN) {
3660          error_setg(errp, "'speed' must be between 0 and INT_MAX");
3661          return;
3662      }
3663      if (n->net_conf.speed >= 0) {
3664          n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3665      }
3666  
3667      if (n->failover) {
3668          n->primary_listener.hide_device = failover_hide_primary_device;
3669          qatomic_set(&n->failover_primary_hidden, true);
3670          device_listener_register(&n->primary_listener);
3671          migration_add_notifier(&n->migration_state,
3672                                 virtio_net_migration_state_notifier);
3673          n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3674      }
3675  
3676      virtio_net_set_config_size(n, n->host_features);
3677      virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
3678  
3679      /*
3680       * We set a lower limit on RX queue size to what it always was.
3681       * Guests that want a smaller ring can always resize it without
3682       * help from us (using virtio 1 and up).
3683       */
3684      if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3685          n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3686          !is_power_of_2(n->net_conf.rx_queue_size)) {
3687          error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3688                     "must be a power of 2 between %d and %d.",
3689                     n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3690                     VIRTQUEUE_MAX_SIZE);
3691          virtio_cleanup(vdev);
3692          return;
3693      }
3694  
3695      if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3696          n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) ||
3697          !is_power_of_2(n->net_conf.tx_queue_size)) {
3698          error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3699                     "must be a power of 2 between %d and %d",
3700                     n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3701                     virtio_net_max_tx_queue_size(n));
3702          virtio_cleanup(vdev);
3703          return;
3704      }
3705  
3706      n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3707  
3708      /*
3709       * Figure out the datapath queue pairs since the backend could
3710       * provide control queue via peers as well.
3711       */
3712      if (n->nic_conf.peers.queues) {
3713          for (i = 0; i < n->max_ncs; i++) {
3714              if (n->nic_conf.peers.ncs[i]->is_datapath) {
3715                  ++n->max_queue_pairs;
3716              }
3717          }
3718      }
3719      n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3720  
3721      if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3722          error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3723                     "must be a positive integer less than %d.",
3724                     n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3725          virtio_cleanup(vdev);
3726          return;
3727      }
3728      n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3729      n->curr_queue_pairs = 1;
3730      n->tx_timeout = n->net_conf.txtimer;
3731  
3732      if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3733                         && strcmp(n->net_conf.tx, "bh")) {
3734          warn_report("virtio-net: "
3735                      "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3736                      n->net_conf.tx);
3737          error_printf("Defaulting to \"bh\"");
3738      }
3739  
3740      n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3741                                      n->net_conf.tx_queue_size);
3742  
3743      virtio_net_add_queue(n, 0);
3744  
3745      n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3746      qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3747      memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3748      n->status = VIRTIO_NET_S_LINK_UP;
3749      qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3750                                QEMU_CLOCK_VIRTUAL,
3751                                virtio_net_announce_timer, n);
3752      n->announce_timer.round = 0;
3753  
3754      if (n->netclient_type) {
3755          /*
3756           * Happen when virtio_net_set_netclient_name has been called.
3757           */
3758          n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3759                                n->netclient_type, n->netclient_name,
3760                                &dev->mem_reentrancy_guard, n);
3761      } else {
3762          n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3763                                object_get_typename(OBJECT(dev)), dev->id,
3764                                &dev->mem_reentrancy_guard, n);
3765      }
3766  
3767      for (i = 0; i < n->max_queue_pairs; i++) {
3768          n->nic->ncs[i].do_not_pad = true;
3769      }
3770  
3771      peer_test_vnet_hdr(n);
3772      if (peer_has_vnet_hdr(n)) {
3773          n->host_hdr_len = sizeof(struct virtio_net_hdr);
3774      } else {
3775          n->host_hdr_len = 0;
3776      }
3777  
3778      qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3779  
3780      n->vqs[0].tx_waiting = 0;
3781      n->tx_burst = n->net_conf.txburst;
3782      virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3783      n->promisc = 1; /* for compatibility */
3784  
3785      n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3786  
3787      n->vlans = g_malloc0(MAX_VLAN >> 3);
3788  
3789      nc = qemu_get_queue(n->nic);
3790      nc->rxfilter_notify_enabled = 1;
3791  
3792     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3793          struct virtio_net_config netcfg = {};
3794          memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3795          vhost_net_set_config(get_vhost_net(nc->peer),
3796              (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND);
3797      }
3798      QTAILQ_INIT(&n->rsc_chains);
3799      n->qdev = dev;
3800  
3801      net_rx_pkt_init(&n->rx_pkt);
3802  
3803      if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3804          Error *err = NULL;
3805          if (!virtio_net_load_ebpf(n, &err)) {
3806              /*
3807               * If user explicitly gave QEMU RSS FDs to use, then
3808               * failing to use them must be considered a fatal
3809               * error. If no RSS FDs were provided, QEMU is trying
3810               * eBPF on a "best effort" basis only, so report a
3811               * warning and allow fallback to software RSS.
3812               */
3813              if (n->ebpf_rss_fds) {
3814                  error_propagate(errp, err);
3815              } else {
3816                  warn_report("unable to load eBPF RSS: %s",
3817                              error_get_pretty(err));
3818                  error_free(err);
3819              }
3820          }
3821      }
3822  }
3823  
virtio_net_device_unrealize(DeviceState * dev)3824  static void virtio_net_device_unrealize(DeviceState *dev)
3825  {
3826      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3827      VirtIONet *n = VIRTIO_NET(dev);
3828      int i, max_queue_pairs;
3829  
3830      if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3831          virtio_net_unload_ebpf(n);
3832      }
3833  
3834      /* This will stop vhost backend if appropriate. */
3835      virtio_net_set_status(vdev, 0);
3836  
3837      g_free(n->netclient_name);
3838      n->netclient_name = NULL;
3839      g_free(n->netclient_type);
3840      n->netclient_type = NULL;
3841  
3842      g_free(n->mac_table.macs);
3843      g_free(n->vlans);
3844  
3845      if (n->failover) {
3846          qobject_unref(n->primary_opts);
3847          device_listener_unregister(&n->primary_listener);
3848          migration_remove_notifier(&n->migration_state);
3849      } else {
3850          assert(n->primary_opts == NULL);
3851      }
3852  
3853      max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3854      for (i = 0; i < max_queue_pairs; i++) {
3855          virtio_net_del_queue(n, i);
3856      }
3857      /* delete also control vq */
3858      virtio_del_queue(vdev, max_queue_pairs * 2);
3859      qemu_announce_timer_del(&n->announce_timer, false);
3860      g_free(n->vqs);
3861      qemu_del_nic(n->nic);
3862      virtio_net_rsc_cleanup(n);
3863      g_free(n->rss_data.indirections_table);
3864      net_rx_pkt_uninit(n->rx_pkt);
3865      virtio_cleanup(vdev);
3866  }
3867  
virtio_net_reset(VirtIODevice * vdev)3868  static void virtio_net_reset(VirtIODevice *vdev)
3869  {
3870      VirtIONet *n = VIRTIO_NET(vdev);
3871      int i;
3872  
3873      /* Reset back to compatibility mode */
3874      n->promisc = 1;
3875      n->allmulti = 0;
3876      n->alluni = 0;
3877      n->nomulti = 0;
3878      n->nouni = 0;
3879      n->nobcast = 0;
3880      /* multiqueue is disabled by default */
3881      n->curr_queue_pairs = 1;
3882      timer_del(n->announce_timer.tm);
3883      n->announce_timer.round = 0;
3884      n->status &= ~VIRTIO_NET_S_ANNOUNCE;
3885  
3886      /* Flush any MAC and VLAN filter table state */
3887      n->mac_table.in_use = 0;
3888      n->mac_table.first_multi = 0;
3889      n->mac_table.multi_overflow = 0;
3890      n->mac_table.uni_overflow = 0;
3891      memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
3892      memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
3893      qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
3894      memset(n->vlans, 0, MAX_VLAN >> 3);
3895  
3896      /* Flush any async TX */
3897      for (i = 0;  i < n->max_queue_pairs; i++) {
3898          flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
3899      }
3900  
3901      virtio_net_disable_rss(n);
3902  }
3903  
virtio_net_instance_init(Object * obj)3904  static void virtio_net_instance_init(Object *obj)
3905  {
3906      VirtIONet *n = VIRTIO_NET(obj);
3907  
3908      /*
3909       * The default config_size is sizeof(struct virtio_net_config).
3910       * Can be overridden with virtio_net_set_config_size.
3911       */
3912      n->config_size = sizeof(struct virtio_net_config);
3913      device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3914                                    "bootindex", "/ethernet-phy@0",
3915                                    DEVICE(n));
3916  
3917      ebpf_rss_init(&n->ebpf_rss);
3918  }
3919  
virtio_net_pre_save(void * opaque)3920  static int virtio_net_pre_save(void *opaque)
3921  {
3922      VirtIONet *n = opaque;
3923  
3924      /* At this point, backend must be stopped, otherwise
3925       * it might keep writing to memory. */
3926      assert(!n->vhost_started);
3927  
3928      return 0;
3929  }
3930  
primary_unplug_pending(void * opaque)3931  static bool primary_unplug_pending(void *opaque)
3932  {
3933      DeviceState *dev = opaque;
3934      DeviceState *primary;
3935      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3936      VirtIONet *n = VIRTIO_NET(vdev);
3937  
3938      if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3939          return false;
3940      }
3941      primary = failover_find_primary_device(n);
3942      return primary ? primary->pending_deleted_event : false;
3943  }
3944  
dev_unplug_pending(void * opaque)3945  static bool dev_unplug_pending(void *opaque)
3946  {
3947      DeviceState *dev = opaque;
3948      VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3949  
3950      return vdc->primary_unplug_pending(dev);
3951  }
3952  
virtio_net_get_vhost(VirtIODevice * vdev)3953  static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3954  {
3955      VirtIONet *n = VIRTIO_NET(vdev);
3956      NetClientState *nc;
3957      struct vhost_net *net;
3958  
3959      if (!n->nic) {
3960          return NULL;
3961      }
3962  
3963      nc = qemu_get_queue(n->nic);
3964      if (!nc) {
3965          return NULL;
3966      }
3967  
3968      net = get_vhost_net(nc->peer);
3969      if (!net) {
3970          return NULL;
3971      }
3972  
3973      return &net->dev;
3974  }
3975  
3976  static const VMStateDescription vmstate_virtio_net = {
3977      .name = "virtio-net",
3978      .minimum_version_id = VIRTIO_NET_VM_VERSION,
3979      .version_id = VIRTIO_NET_VM_VERSION,
3980      .fields = (const VMStateField[]) {
3981          VMSTATE_VIRTIO_DEVICE,
3982          VMSTATE_END_OF_LIST()
3983      },
3984      .pre_save = virtio_net_pre_save,
3985      .dev_unplug_pending = dev_unplug_pending,
3986  };
3987  
3988  static Property virtio_net_properties[] = {
3989      DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3990                      VIRTIO_NET_F_CSUM, true),
3991      DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3992                      VIRTIO_NET_F_GUEST_CSUM, true),
3993      DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3994      DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3995                      VIRTIO_NET_F_GUEST_TSO4, true),
3996      DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3997                      VIRTIO_NET_F_GUEST_TSO6, true),
3998      DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3999                      VIRTIO_NET_F_GUEST_ECN, true),
4000      DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
4001                      VIRTIO_NET_F_GUEST_UFO, true),
4002      DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
4003                      VIRTIO_NET_F_GUEST_ANNOUNCE, true),
4004      DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
4005                      VIRTIO_NET_F_HOST_TSO4, true),
4006      DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
4007                      VIRTIO_NET_F_HOST_TSO6, true),
4008      DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
4009                      VIRTIO_NET_F_HOST_ECN, true),
4010      DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
4011                      VIRTIO_NET_F_HOST_UFO, true),
4012      DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
4013                      VIRTIO_NET_F_MRG_RXBUF, true),
4014      DEFINE_PROP_BIT64("status", VirtIONet, host_features,
4015                      VIRTIO_NET_F_STATUS, true),
4016      DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
4017                      VIRTIO_NET_F_CTRL_VQ, true),
4018      DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
4019                      VIRTIO_NET_F_CTRL_RX, true),
4020      DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
4021                      VIRTIO_NET_F_CTRL_VLAN, true),
4022      DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
4023                      VIRTIO_NET_F_CTRL_RX_EXTRA, true),
4024      DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
4025                      VIRTIO_NET_F_CTRL_MAC_ADDR, true),
4026      DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
4027                      VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
4028      DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
4029      DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
4030                      VIRTIO_NET_F_RSS, false),
4031      DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
4032                      VIRTIO_NET_F_HASH_REPORT, false),
4033      DEFINE_PROP_ARRAY("ebpf-rss-fds", VirtIONet, nr_ebpf_rss_fds,
4034                        ebpf_rss_fds, qdev_prop_string, char*),
4035      DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
4036                      VIRTIO_NET_F_RSC_EXT, false),
4037      DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
4038                         VIRTIO_NET_RSC_DEFAULT_INTERVAL),
4039      DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
4040      DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
4041                         TX_TIMER_INTERVAL),
4042      DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
4043      DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
4044      DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
4045                         VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
4046      DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
4047                         VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
4048      DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
4049      DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
4050                       true),
4051      DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
4052      DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
4053      DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
4054      DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features,
4055                        VIRTIO_NET_F_GUEST_USO4, true),
4056      DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features,
4057                        VIRTIO_NET_F_GUEST_USO6, true),
4058      DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
4059                        VIRTIO_NET_F_HOST_USO, true),
4060      DEFINE_PROP_END_OF_LIST(),
4061  };
4062  
virtio_net_class_init(ObjectClass * klass,void * data)4063  static void virtio_net_class_init(ObjectClass *klass, void *data)
4064  {
4065      DeviceClass *dc = DEVICE_CLASS(klass);
4066      VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
4067  
4068      device_class_set_props(dc, virtio_net_properties);
4069      dc->vmsd = &vmstate_virtio_net;
4070      set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
4071      vdc->realize = virtio_net_device_realize;
4072      vdc->unrealize = virtio_net_device_unrealize;
4073      vdc->get_config = virtio_net_get_config;
4074      vdc->set_config = virtio_net_set_config;
4075      vdc->get_features = virtio_net_get_features;
4076      vdc->set_features = virtio_net_set_features;
4077      vdc->bad_features = virtio_net_bad_features;
4078      vdc->reset = virtio_net_reset;
4079      vdc->queue_reset = virtio_net_queue_reset;
4080      vdc->queue_enable = virtio_net_queue_enable;
4081      vdc->set_status = virtio_net_set_status;
4082      vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
4083      vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
4084      vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
4085      vdc->pre_load_queues = virtio_net_pre_load_queues;
4086      vdc->post_load = virtio_net_post_load_virtio;
4087      vdc->vmsd = &vmstate_virtio_net_device;
4088      vdc->primary_unplug_pending = primary_unplug_pending;
4089      vdc->get_vhost = virtio_net_get_vhost;
4090      vdc->toggle_device_iotlb = vhost_toggle_device_iotlb;
4091  }
4092  
4093  static const TypeInfo virtio_net_info = {
4094      .name = TYPE_VIRTIO_NET,
4095      .parent = TYPE_VIRTIO_DEVICE,
4096      .instance_size = sizeof(VirtIONet),
4097      .instance_init = virtio_net_instance_init,
4098      .class_init = virtio_net_class_init,
4099  };
4100  
virtio_register_types(void)4101  static void virtio_register_types(void)
4102  {
4103      type_register_static(&virtio_net_info);
4104  }
4105  
4106  type_init(virtio_register_types)
4107