xref: /openbmc/qemu/net/vhost-vdpa.c (revision a9bc470ec208bd27a82100abc9dccf1b69f41b45)
1  /*
2   * vhost-vdpa.c
3   *
4   * Copyright(c) 2017-2018 Intel Corporation.
5   * Copyright(c) 2020 Red Hat, Inc.
6   *
7   * This work is licensed under the terms of the GNU GPL, version 2 or later.
8   * See the COPYING file in the top-level directory.
9   *
10   */
11  
12  #include "qemu/osdep.h"
13  #include "clients.h"
14  #include "hw/virtio/virtio-net.h"
15  #include "net/vhost_net.h"
16  #include "net/vhost-vdpa.h"
17  #include "hw/virtio/vhost-vdpa.h"
18  #include "qemu/config-file.h"
19  #include "qemu/error-report.h"
20  #include "qemu/log.h"
21  #include "qemu/memalign.h"
22  #include "qemu/option.h"
23  #include "qapi/error.h"
24  #include <linux/vhost.h>
25  #include <sys/ioctl.h>
26  #include <err.h>
27  #include "standard-headers/linux/virtio_net.h"
28  #include "monitor/monitor.h"
29  #include "migration/migration.h"
30  #include "migration/misc.h"
31  #include "hw/virtio/vhost.h"
32  
33  /* Todo:need to add the multiqueue support here */
34  typedef struct VhostVDPAState {
35      NetClientState nc;
36      struct vhost_vdpa vhost_vdpa;
37      Notifier migration_state;
38      VHostNetState *vhost_net;
39  
40      /* Control commands shadow buffers */
41      void *cvq_cmd_out_buffer;
42      virtio_net_ctrl_ack *status;
43  
44      /* The device always have SVQ enabled */
45      bool always_svq;
46  
47      /* The device can isolate CVQ in its own ASID */
48      bool cvq_isolated;
49  
50      bool started;
51  } VhostVDPAState;
52  
53  /*
54   * The array is sorted alphabetically in ascending order,
55   * with the exception of VHOST_INVALID_FEATURE_BIT,
56   * which should always be the last entry.
57   */
58  const int vdpa_feature_bits[] = {
59      VIRTIO_F_ANY_LAYOUT,
60      VIRTIO_F_IOMMU_PLATFORM,
61      VIRTIO_F_NOTIFY_ON_EMPTY,
62      VIRTIO_F_RING_PACKED,
63      VIRTIO_F_RING_RESET,
64      VIRTIO_F_VERSION_1,
65      VIRTIO_NET_F_CSUM,
66      VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,
67      VIRTIO_NET_F_CTRL_MAC_ADDR,
68      VIRTIO_NET_F_CTRL_RX,
69      VIRTIO_NET_F_CTRL_RX_EXTRA,
70      VIRTIO_NET_F_CTRL_VLAN,
71      VIRTIO_NET_F_CTRL_VQ,
72      VIRTIO_NET_F_GSO,
73      VIRTIO_NET_F_GUEST_CSUM,
74      VIRTIO_NET_F_GUEST_ECN,
75      VIRTIO_NET_F_GUEST_TSO4,
76      VIRTIO_NET_F_GUEST_TSO6,
77      VIRTIO_NET_F_GUEST_UFO,
78      VIRTIO_NET_F_GUEST_USO4,
79      VIRTIO_NET_F_GUEST_USO6,
80      VIRTIO_NET_F_HASH_REPORT,
81      VIRTIO_NET_F_HOST_ECN,
82      VIRTIO_NET_F_HOST_TSO4,
83      VIRTIO_NET_F_HOST_TSO6,
84      VIRTIO_NET_F_HOST_UFO,
85      VIRTIO_NET_F_HOST_USO,
86      VIRTIO_NET_F_MQ,
87      VIRTIO_NET_F_MRG_RXBUF,
88      VIRTIO_NET_F_MTU,
89      VIRTIO_NET_F_RSS,
90      VIRTIO_NET_F_STATUS,
91      VIRTIO_RING_F_EVENT_IDX,
92      VIRTIO_RING_F_INDIRECT_DESC,
93  
94      /* VHOST_INVALID_FEATURE_BIT should always be the last entry */
95      VHOST_INVALID_FEATURE_BIT
96  };
97  
98  /** Supported device specific feature bits with SVQ */
99  static const uint64_t vdpa_svq_device_features =
100      BIT_ULL(VIRTIO_NET_F_CSUM) |
101      BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |
102      BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) |
103      BIT_ULL(VIRTIO_NET_F_MTU) |
104      BIT_ULL(VIRTIO_NET_F_MAC) |
105      BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) |
106      BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |
107      BIT_ULL(VIRTIO_NET_F_GUEST_ECN) |
108      BIT_ULL(VIRTIO_NET_F_GUEST_UFO) |
109      BIT_ULL(VIRTIO_NET_F_HOST_TSO4) |
110      BIT_ULL(VIRTIO_NET_F_HOST_TSO6) |
111      BIT_ULL(VIRTIO_NET_F_HOST_ECN) |
112      BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
113      BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
114      BIT_ULL(VIRTIO_NET_F_STATUS) |
115      BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
116      BIT_ULL(VIRTIO_NET_F_CTRL_RX) |
117      BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |
118      BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) |
119      BIT_ULL(VIRTIO_NET_F_MQ) |
120      BIT_ULL(VIRTIO_F_ANY_LAYOUT) |
121      BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |
122      /* VHOST_F_LOG_ALL is exposed by SVQ */
123      BIT_ULL(VHOST_F_LOG_ALL) |
124      BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |
125      BIT_ULL(VIRTIO_NET_F_RSS) |
126      BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
127      BIT_ULL(VIRTIO_NET_F_STANDBY) |
128      BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX);
129  
130  #define VHOST_VDPA_NET_CVQ_ASID 1
131  
132  VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
133  {
134      VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
135      assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
136      return s->vhost_net;
137  }
138  
139  static size_t vhost_vdpa_net_cvq_cmd_len(void)
140  {
141      /*
142       * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
143       * In buffer is always 1 byte, so it should fit here
144       */
145      return sizeof(struct virtio_net_ctrl_hdr) +
146             2 * sizeof(struct virtio_net_ctrl_mac) +
147             MAC_TABLE_ENTRIES * ETH_ALEN;
148  }
149  
150  static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
151  {
152      return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
153  }
154  
155  static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp)
156  {
157      uint64_t invalid_dev_features =
158          features & ~vdpa_svq_device_features &
159          /* Transport are all accepted at this point */
160          ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START,
161                           VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START);
162  
163      if (invalid_dev_features) {
164          error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64,
165                     invalid_dev_features);
166          return false;
167      }
168  
169      return vhost_svq_valid_features(features, errp);
170  }
171  
172  static int vhost_vdpa_net_check_device_id(struct vhost_net *net)
173  {
174      uint32_t device_id;
175      int ret;
176      struct vhost_dev *hdev;
177  
178      hdev = (struct vhost_dev *)&net->dev;
179      ret = hdev->vhost_ops->vhost_get_device_id(hdev, &device_id);
180      if (device_id != VIRTIO_ID_NET) {
181          return -ENOTSUP;
182      }
183      return ret;
184  }
185  
186  static int vhost_vdpa_add(NetClientState *ncs, void *be,
187                            int queue_pair_index, int nvqs)
188  {
189      VhostNetOptions options;
190      struct vhost_net *net = NULL;
191      VhostVDPAState *s;
192      int ret;
193  
194      options.backend_type = VHOST_BACKEND_TYPE_VDPA;
195      assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
196      s = DO_UPCAST(VhostVDPAState, nc, ncs);
197      options.net_backend = ncs;
198      options.opaque      = be;
199      options.busyloop_timeout = 0;
200      options.nvqs = nvqs;
201  
202      net = vhost_net_init(&options);
203      if (!net) {
204          error_report("failed to init vhost_net for queue");
205          goto err_init;
206      }
207      s->vhost_net = net;
208      ret = vhost_vdpa_net_check_device_id(net);
209      if (ret) {
210          goto err_check;
211      }
212      return 0;
213  err_check:
214      vhost_net_cleanup(net);
215      g_free(net);
216  err_init:
217      return -1;
218  }
219  
220  static void vhost_vdpa_cleanup(NetClientState *nc)
221  {
222      VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
223  
224      /*
225       * If a peer NIC is attached, do not cleanup anything.
226       * Cleanup will happen as a part of qemu_cleanup() -> net_cleanup()
227       * when the guest is shutting down.
228       */
229      if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) {
230          return;
231      }
232      munmap(s->cvq_cmd_out_buffer, vhost_vdpa_net_cvq_cmd_page_len());
233      munmap(s->status, vhost_vdpa_net_cvq_cmd_page_len());
234      if (s->vhost_net) {
235          vhost_net_cleanup(s->vhost_net);
236          g_free(s->vhost_net);
237          s->vhost_net = NULL;
238      }
239       if (s->vhost_vdpa.device_fd >= 0) {
240          qemu_close(s->vhost_vdpa.device_fd);
241          s->vhost_vdpa.device_fd = -1;
242      }
243  }
244  
245  /** Dummy SetSteeringEBPF to support RSS for vhost-vdpa backend  */
246  static bool vhost_vdpa_set_steering_ebpf(NetClientState *nc, int prog_fd)
247  {
248      return true;
249  }
250  
251  static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc)
252  {
253      assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
254  
255      return true;
256  }
257  
258  static bool vhost_vdpa_has_ufo(NetClientState *nc)
259  {
260      assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
261      VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
262      uint64_t features = 0;
263      features |= (1ULL << VIRTIO_NET_F_HOST_UFO);
264      features = vhost_net_get_features(s->vhost_net, features);
265      return !!(features & (1ULL << VIRTIO_NET_F_HOST_UFO));
266  
267  }
268  
269  static bool vhost_vdpa_check_peer_type(NetClientState *nc, ObjectClass *oc,
270                                         Error **errp)
271  {
272      const char *driver = object_class_get_name(oc);
273  
274      if (!g_str_has_prefix(driver, "virtio-net-")) {
275          error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*");
276          return false;
277      }
278  
279      return true;
280  }
281  
282  /** Dummy receive in case qemu falls back to userland tap networking */
283  static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf,
284                                    size_t size)
285  {
286      return size;
287  }
288  
289  /** From any vdpa net client, get the netclient of the first queue pair */
290  static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s)
291  {
292      NICState *nic = qemu_get_nic(s->nc.peer);
293      NetClientState *nc0 = qemu_get_peer(nic->ncs, 0);
294  
295      return DO_UPCAST(VhostVDPAState, nc, nc0);
296  }
297  
298  static void vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable)
299  {
300      struct vhost_vdpa *v = &s->vhost_vdpa;
301      VirtIONet *n;
302      VirtIODevice *vdev;
303      int data_queue_pairs, cvq, r;
304  
305      /* We are only called on the first data vqs and only if x-svq is not set */
306      if (s->vhost_vdpa.shadow_vqs_enabled == enable) {
307          return;
308      }
309  
310      vdev = v->dev->vdev;
311      n = VIRTIO_NET(vdev);
312      if (!n->vhost_started) {
313          return;
314      }
315  
316      data_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
317      cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
318                                    n->max_ncs - n->max_queue_pairs : 0;
319      /*
320       * TODO: vhost_net_stop does suspend, get_base and reset. We can be smarter
321       * in the future and resume the device if read-only operations between
322       * suspend and reset goes wrong.
323       */
324      vhost_net_stop(vdev, n->nic->ncs, data_queue_pairs, cvq);
325  
326      /* Start will check migration setup_or_active to configure or not SVQ */
327      r = vhost_net_start(vdev, n->nic->ncs, data_queue_pairs, cvq);
328      if (unlikely(r < 0)) {
329          error_report("unable to start vhost net: %s(%d)", g_strerror(-r), -r);
330      }
331  }
332  
333  static void vdpa_net_migration_state_notifier(Notifier *notifier, void *data)
334  {
335      MigrationState *migration = data;
336      VhostVDPAState *s = container_of(notifier, VhostVDPAState,
337                                       migration_state);
338  
339      if (migration_in_setup(migration)) {
340          vhost_vdpa_net_log_global_enable(s, true);
341      } else if (migration_has_failed(migration)) {
342          vhost_vdpa_net_log_global_enable(s, false);
343      }
344  }
345  
346  static void vhost_vdpa_net_data_start_first(VhostVDPAState *s)
347  {
348      struct vhost_vdpa *v = &s->vhost_vdpa;
349  
350      migration_add_notifier(&s->migration_state,
351                             vdpa_net_migration_state_notifier);
352      if (v->shadow_vqs_enabled) {
353          v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
354                                             v->iova_range.last);
355      }
356  }
357  
358  static int vhost_vdpa_net_data_start(NetClientState *nc)
359  {
360      VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
361      struct vhost_vdpa *v = &s->vhost_vdpa;
362  
363      assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
364  
365      if (s->always_svq ||
366          migration_is_setup_or_active(migrate_get_current()->state)) {
367          v->shadow_vqs_enabled = true;
368          v->shadow_data = true;
369      } else {
370          v->shadow_vqs_enabled = false;
371          v->shadow_data = false;
372      }
373  
374      if (v->index == 0) {
375          vhost_vdpa_net_data_start_first(s);
376          return 0;
377      }
378  
379      if (v->shadow_vqs_enabled) {
380          VhostVDPAState *s0 = vhost_vdpa_net_first_nc_vdpa(s);
381          v->iova_tree = s0->vhost_vdpa.iova_tree;
382      }
383  
384      return 0;
385  }
386  
387  static int vhost_vdpa_net_data_load(NetClientState *nc)
388  {
389      VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
390      struct vhost_vdpa *v = &s->vhost_vdpa;
391      bool has_cvq = v->dev->vq_index_end % 2;
392  
393      if (has_cvq) {
394          return 0;
395      }
396  
397      for (int i = 0; i < v->dev->nvqs; ++i) {
398          vhost_vdpa_set_vring_ready(v, i + v->dev->vq_index);
399      }
400      return 0;
401  }
402  
403  static void vhost_vdpa_net_client_stop(NetClientState *nc)
404  {
405      VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
406      struct vhost_dev *dev;
407  
408      assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
409  
410      if (s->vhost_vdpa.index == 0) {
411          migration_remove_notifier(&s->migration_state);
412      }
413  
414      dev = s->vhost_vdpa.dev;
415      if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
416          g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
417      } else {
418          s->vhost_vdpa.iova_tree = NULL;
419      }
420  }
421  
422  static NetClientInfo net_vhost_vdpa_info = {
423          .type = NET_CLIENT_DRIVER_VHOST_VDPA,
424          .size = sizeof(VhostVDPAState),
425          .receive = vhost_vdpa_receive,
426          .start = vhost_vdpa_net_data_start,
427          .load = vhost_vdpa_net_data_load,
428          .stop = vhost_vdpa_net_client_stop,
429          .cleanup = vhost_vdpa_cleanup,
430          .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
431          .has_ufo = vhost_vdpa_has_ufo,
432          .check_peer_type = vhost_vdpa_check_peer_type,
433          .set_steering_ebpf = vhost_vdpa_set_steering_ebpf,
434  };
435  
436  static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index,
437                                            Error **errp)
438  {
439      struct vhost_vring_state state = {
440          .index = vq_index,
441      };
442      int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, &state);
443  
444      if (unlikely(r < 0)) {
445          r = -errno;
446          error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index);
447          return r;
448      }
449  
450      return state.num;
451  }
452  
453  static int vhost_vdpa_set_address_space_id(struct vhost_vdpa *v,
454                                             unsigned vq_group,
455                                             unsigned asid_num)
456  {
457      struct vhost_vring_state asid = {
458          .index = vq_group,
459          .num = asid_num,
460      };
461      int r;
462  
463      r = ioctl(v->device_fd, VHOST_VDPA_SET_GROUP_ASID, &asid);
464      if (unlikely(r < 0)) {
465          error_report("Can't set vq group %u asid %u, errno=%d (%s)",
466                       asid.index, asid.num, errno, g_strerror(errno));
467      }
468      return r;
469  }
470  
471  static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
472  {
473      VhostIOVATree *tree = v->iova_tree;
474      DMAMap needle = {
475          /*
476           * No need to specify size or to look for more translations since
477           * this contiguous chunk was allocated by us.
478           */
479          .translated_addr = (hwaddr)(uintptr_t)addr,
480      };
481      const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
482      int r;
483  
484      if (unlikely(!map)) {
485          error_report("Cannot locate expected map");
486          return;
487      }
488  
489      r = vhost_vdpa_dma_unmap(v, v->address_space_id, map->iova, map->size + 1);
490      if (unlikely(r != 0)) {
491          error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
492      }
493  
494      vhost_iova_tree_remove(tree, *map);
495  }
496  
497  /** Map CVQ buffer. */
498  static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size,
499                                    bool write)
500  {
501      DMAMap map = {};
502      int r;
503  
504      map.translated_addr = (hwaddr)(uintptr_t)buf;
505      map.size = size - 1;
506      map.perm = write ? IOMMU_RW : IOMMU_RO,
507      r = vhost_iova_tree_map_alloc(v->iova_tree, &map);
508      if (unlikely(r != IOVA_OK)) {
509          error_report("Cannot map injected element");
510          return r;
511      }
512  
513      r = vhost_vdpa_dma_map(v, v->address_space_id, map.iova,
514                             vhost_vdpa_net_cvq_cmd_page_len(), buf, !write);
515      if (unlikely(r < 0)) {
516          goto dma_map_err;
517      }
518  
519      return 0;
520  
521  dma_map_err:
522      vhost_iova_tree_remove(v->iova_tree, map);
523      return r;
524  }
525  
526  static int vhost_vdpa_net_cvq_start(NetClientState *nc)
527  {
528      VhostVDPAState *s, *s0;
529      struct vhost_vdpa *v;
530      int64_t cvq_group;
531      int r;
532      Error *err = NULL;
533  
534      assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
535  
536      s = DO_UPCAST(VhostVDPAState, nc, nc);
537      v = &s->vhost_vdpa;
538  
539      s0 = vhost_vdpa_net_first_nc_vdpa(s);
540      v->shadow_data = s0->vhost_vdpa.shadow_vqs_enabled;
541      v->shadow_vqs_enabled = s0->vhost_vdpa.shadow_vqs_enabled;
542      s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID;
543  
544      if (s->vhost_vdpa.shadow_data) {
545          /* SVQ is already configured for all virtqueues */
546          goto out;
547      }
548  
549      /*
550       * If we early return in these cases SVQ will not be enabled. The migration
551       * will be blocked as long as vhost-vdpa backends will not offer _F_LOG.
552       */
553      if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
554          return 0;
555      }
556  
557      if (!s->cvq_isolated) {
558          return 0;
559      }
560  
561      cvq_group = vhost_vdpa_get_vring_group(v->device_fd,
562                                             v->dev->vq_index_end - 1,
563                                             &err);
564      if (unlikely(cvq_group < 0)) {
565          error_report_err(err);
566          return cvq_group;
567      }
568  
569      r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID);
570      if (unlikely(r < 0)) {
571          return r;
572      }
573  
574      v->shadow_vqs_enabled = true;
575      s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID;
576  
577  out:
578      if (!s->vhost_vdpa.shadow_vqs_enabled) {
579          return 0;
580      }
581  
582      if (s0->vhost_vdpa.iova_tree) {
583          /*
584           * SVQ is already configured for all virtqueues.  Reuse IOVA tree for
585           * simplicity, whether CVQ shares ASID with guest or not, because:
586           * - Memory listener need access to guest's memory addresses allocated
587           *   in the IOVA tree.
588           * - There should be plenty of IOVA address space for both ASID not to
589           *   worry about collisions between them.  Guest's translations are
590           *   still validated with virtio virtqueue_pop so there is no risk for
591           *   the guest to access memory that it shouldn't.
592           *
593           * To allocate a iova tree per ASID is doable but it complicates the
594           * code and it is not worth it for the moment.
595           */
596          v->iova_tree = s0->vhost_vdpa.iova_tree;
597      } else {
598          v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
599                                             v->iova_range.last);
600      }
601  
602      r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer,
603                                 vhost_vdpa_net_cvq_cmd_page_len(), false);
604      if (unlikely(r < 0)) {
605          return r;
606      }
607  
608      r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->status,
609                                 vhost_vdpa_net_cvq_cmd_page_len(), true);
610      if (unlikely(r < 0)) {
611          vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
612      }
613  
614      return r;
615  }
616  
617  static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
618  {
619      VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
620  
621      assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
622  
623      if (s->vhost_vdpa.shadow_vqs_enabled) {
624          vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
625          vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status);
626      }
627  
628      vhost_vdpa_net_client_stop(nc);
629  }
630  
631  static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s,
632                                      const struct iovec *out_sg, size_t out_num,
633                                      const struct iovec *in_sg, size_t in_num)
634  {
635      VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
636      int r;
637  
638      r = vhost_svq_add(svq, out_sg, out_num, in_sg, in_num, NULL);
639      if (unlikely(r != 0)) {
640          if (unlikely(r == -ENOSPC)) {
641              qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
642                            __func__);
643          }
644      }
645  
646      return r;
647  }
648  
649  /*
650   * Convenience wrapper to poll SVQ for multiple control commands.
651   *
652   * Caller should hold the BQL when invoking this function, and should take
653   * the answer before SVQ pulls by itself when BQL is released.
654   */
655  static ssize_t vhost_vdpa_net_svq_poll(VhostVDPAState *s, size_t cmds_in_flight)
656  {
657      VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
658      return vhost_svq_poll(svq, cmds_in_flight);
659  }
660  
661  static void vhost_vdpa_net_load_cursor_reset(VhostVDPAState *s,
662                                               struct iovec *out_cursor,
663                                               struct iovec *in_cursor)
664  {
665      /* reset the cursor of the output buffer for the device */
666      out_cursor->iov_base = s->cvq_cmd_out_buffer;
667      out_cursor->iov_len = vhost_vdpa_net_cvq_cmd_page_len();
668  
669      /* reset the cursor of the in buffer for the device */
670      in_cursor->iov_base = s->status;
671      in_cursor->iov_len = vhost_vdpa_net_cvq_cmd_page_len();
672  }
673  
674  /*
675   * Poll SVQ for multiple pending control commands and check the device's ack.
676   *
677   * Caller should hold the BQL when invoking this function.
678   *
679   * @s: The VhostVDPAState
680   * @len: The length of the pending status shadow buffer
681   */
682  static ssize_t vhost_vdpa_net_svq_flush(VhostVDPAState *s, size_t len)
683  {
684      /* device uses a one-byte length ack for each control command */
685      ssize_t dev_written = vhost_vdpa_net_svq_poll(s, len);
686      if (unlikely(dev_written != len)) {
687          return -EIO;
688      }
689  
690      /* check the device's ack */
691      for (int i = 0; i < len; ++i) {
692          if (s->status[i] != VIRTIO_NET_OK) {
693              return -EIO;
694          }
695      }
696      return 0;
697  }
698  
699  static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s,
700                                         struct iovec *out_cursor,
701                                         struct iovec *in_cursor, uint8_t class,
702                                         uint8_t cmd, const struct iovec *data_sg,
703                                         size_t data_num)
704  {
705      const struct virtio_net_ctrl_hdr ctrl = {
706          .class = class,
707          .cmd = cmd,
708      };
709      size_t data_size = iov_size(data_sg, data_num), cmd_size;
710      struct iovec out, in;
711      ssize_t r;
712      unsigned dummy_cursor_iov_cnt;
713      VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
714  
715      assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl));
716      cmd_size = sizeof(ctrl) + data_size;
717      if (vhost_svq_available_slots(svq) < 2 ||
718          iov_size(out_cursor, 1) < cmd_size) {
719          /*
720           * It is time to flush all pending control commands if SVQ is full
721           * or control commands shadow buffers are full.
722           *
723           * We can poll here since we've had BQL from the time
724           * we sent the descriptor.
725           */
726          r = vhost_vdpa_net_svq_flush(s, in_cursor->iov_base -
727                                       (void *)s->status);
728          if (unlikely(r < 0)) {
729              return r;
730          }
731  
732          vhost_vdpa_net_load_cursor_reset(s, out_cursor, in_cursor);
733      }
734  
735      /* pack the CVQ command header */
736      iov_from_buf(out_cursor, 1, 0, &ctrl, sizeof(ctrl));
737      /* pack the CVQ command command-specific-data */
738      iov_to_buf(data_sg, data_num, 0,
739                 out_cursor->iov_base + sizeof(ctrl), data_size);
740  
741      /* extract the required buffer from the cursor for output */
742      iov_copy(&out, 1, out_cursor, 1, 0, cmd_size);
743      /* extract the required buffer from the cursor for input */
744      iov_copy(&in, 1, in_cursor, 1, 0, sizeof(*s->status));
745  
746      r = vhost_vdpa_net_cvq_add(s, &out, 1, &in, 1);
747      if (unlikely(r < 0)) {
748          return r;
749      }
750  
751      /* iterate the cursors */
752      dummy_cursor_iov_cnt = 1;
753      iov_discard_front(&out_cursor, &dummy_cursor_iov_cnt, cmd_size);
754      dummy_cursor_iov_cnt = 1;
755      iov_discard_front(&in_cursor, &dummy_cursor_iov_cnt, sizeof(*s->status));
756  
757      return 0;
758  }
759  
760  static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n,
761                                     struct iovec *out_cursor,
762                                     struct iovec *in_cursor)
763  {
764      if (virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
765          const struct iovec data = {
766              .iov_base = (void *)n->mac,
767              .iov_len = sizeof(n->mac),
768          };
769          ssize_t r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
770                                              VIRTIO_NET_CTRL_MAC,
771                                              VIRTIO_NET_CTRL_MAC_ADDR_SET,
772                                              &data, 1);
773          if (unlikely(r < 0)) {
774              return r;
775          }
776      }
777  
778      /*
779       * According to VirtIO standard, "The device MUST have an
780       * empty MAC filtering table on reset.".
781       *
782       * Therefore, there is no need to send this CVQ command if the
783       * driver also sets an empty MAC filter table, which aligns with
784       * the device's defaults.
785       *
786       * Note that the device's defaults can mismatch the driver's
787       * configuration only at live migration.
788       */
789      if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_RX) ||
790          n->mac_table.in_use == 0) {
791          return 0;
792      }
793  
794      uint32_t uni_entries = n->mac_table.first_multi,
795               uni_macs_size = uni_entries * ETH_ALEN,
796               mul_entries = n->mac_table.in_use - uni_entries,
797               mul_macs_size = mul_entries * ETH_ALEN;
798      struct virtio_net_ctrl_mac uni = {
799          .entries = cpu_to_le32(uni_entries),
800      };
801      struct virtio_net_ctrl_mac mul = {
802          .entries = cpu_to_le32(mul_entries),
803      };
804      const struct iovec data[] = {
805          {
806              .iov_base = &uni,
807              .iov_len = sizeof(uni),
808          }, {
809              .iov_base = n->mac_table.macs,
810              .iov_len = uni_macs_size,
811          }, {
812              .iov_base = &mul,
813              .iov_len = sizeof(mul),
814          }, {
815              .iov_base = &n->mac_table.macs[uni_macs_size],
816              .iov_len = mul_macs_size,
817          },
818      };
819      ssize_t r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
820                                          VIRTIO_NET_CTRL_MAC,
821                                          VIRTIO_NET_CTRL_MAC_TABLE_SET,
822                                          data, ARRAY_SIZE(data));
823      if (unlikely(r < 0)) {
824          return r;
825      }
826  
827      return 0;
828  }
829  
830  static int vhost_vdpa_net_load_rss(VhostVDPAState *s, const VirtIONet *n,
831                                     struct iovec *out_cursor,
832                                     struct iovec *in_cursor, bool do_rss)
833  {
834      struct virtio_net_rss_config cfg = {};
835      ssize_t r;
836      g_autofree uint16_t *table = NULL;
837  
838      /*
839       * According to VirtIO standard, "Initially the device has all hash
840       * types disabled and reports only VIRTIO_NET_HASH_REPORT_NONE.".
841       *
842       * Therefore, there is no need to send this CVQ command if the
843       * driver disables the all hash types, which aligns with
844       * the device's defaults.
845       *
846       * Note that the device's defaults can mismatch the driver's
847       * configuration only at live migration.
848       */
849      if (!n->rss_data.enabled ||
850          n->rss_data.hash_types == VIRTIO_NET_HASH_REPORT_NONE) {
851          return 0;
852      }
853  
854      table = g_malloc_n(n->rss_data.indirections_len,
855                         sizeof(n->rss_data.indirections_table[0]));
856      cfg.hash_types = cpu_to_le32(n->rss_data.hash_types);
857  
858      if (do_rss) {
859          /*
860           * According to VirtIO standard, "Number of entries in indirection_table
861           * is (indirection_table_mask + 1)".
862           */
863          cfg.indirection_table_mask = cpu_to_le16(n->rss_data.indirections_len -
864                                                   1);
865          cfg.unclassified_queue = cpu_to_le16(n->rss_data.default_queue);
866          for (int i = 0; i < n->rss_data.indirections_len; ++i) {
867              table[i] = cpu_to_le16(n->rss_data.indirections_table[i]);
868          }
869          cfg.max_tx_vq = cpu_to_le16(n->curr_queue_pairs);
870      } else {
871          /*
872           * According to VirtIO standard, "Field reserved MUST contain zeroes.
873           * It is defined to make the structure to match the layout of
874           * virtio_net_rss_config structure, defined in 5.1.6.5.7.".
875           *
876           * Therefore, we need to zero the fields in
877           * struct virtio_net_rss_config, which corresponds to the
878           * `reserved` field in struct virtio_net_hash_config.
879           *
880           * Note that all other fields are zeroed at their definitions,
881           * except for the `indirection_table` field, where the actual data
882           * is stored in the `table` variable to ensure compatibility
883           * with RSS case. Therefore, we need to zero the `table` variable here.
884           */
885          table[0] = 0;
886      }
887  
888      /*
889       * Considering that virtio_net_handle_rss() currently does not restore
890       * the hash key length parsed from the CVQ command sent from the guest
891       * into n->rss_data and uses the maximum key length in other code, so
892       * we also employ the maximum key length here.
893       */
894      cfg.hash_key_length = sizeof(n->rss_data.key);
895  
896      const struct iovec data[] = {
897          {
898              .iov_base = &cfg,
899              .iov_len = offsetof(struct virtio_net_rss_config,
900                                  indirection_table),
901          }, {
902              .iov_base = table,
903              .iov_len = n->rss_data.indirections_len *
904                         sizeof(n->rss_data.indirections_table[0]),
905          }, {
906              .iov_base = &cfg.max_tx_vq,
907              .iov_len = offsetof(struct virtio_net_rss_config, hash_key_data) -
908                         offsetof(struct virtio_net_rss_config, max_tx_vq),
909          }, {
910              .iov_base = (void *)n->rss_data.key,
911              .iov_len = sizeof(n->rss_data.key),
912          }
913      };
914  
915      r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
916                                  VIRTIO_NET_CTRL_MQ,
917                                  do_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG :
918                                  VIRTIO_NET_CTRL_MQ_HASH_CONFIG,
919                                  data, ARRAY_SIZE(data));
920      if (unlikely(r < 0)) {
921          return r;
922      }
923  
924      return 0;
925  }
926  
927  static int vhost_vdpa_net_load_mq(VhostVDPAState *s,
928                                    const VirtIONet *n,
929                                    struct iovec *out_cursor,
930                                    struct iovec *in_cursor)
931  {
932      struct virtio_net_ctrl_mq mq;
933      ssize_t r;
934  
935      if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_MQ)) {
936          return 0;
937      }
938  
939      mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs);
940      const struct iovec data = {
941          .iov_base = &mq,
942          .iov_len = sizeof(mq),
943      };
944      r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
945                                  VIRTIO_NET_CTRL_MQ,
946                                  VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
947                                  &data, 1);
948      if (unlikely(r < 0)) {
949          return r;
950      }
951  
952      if (virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_RSS)) {
953          /* load the receive-side scaling state */
954          r = vhost_vdpa_net_load_rss(s, n, out_cursor, in_cursor, true);
955          if (unlikely(r < 0)) {
956              return r;
957          }
958      } else if (virtio_vdev_has_feature(&n->parent_obj,
959                                         VIRTIO_NET_F_HASH_REPORT)) {
960          /* load the hash calculation state */
961          r = vhost_vdpa_net_load_rss(s, n, out_cursor, in_cursor, false);
962          if (unlikely(r < 0)) {
963              return r;
964          }
965      }
966  
967      return 0;
968  }
969  
970  static int vhost_vdpa_net_load_offloads(VhostVDPAState *s,
971                                          const VirtIONet *n,
972                                          struct iovec *out_cursor,
973                                          struct iovec *in_cursor)
974  {
975      uint64_t offloads;
976      ssize_t r;
977  
978      if (!virtio_vdev_has_feature(&n->parent_obj,
979                                   VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
980          return 0;
981      }
982  
983      if (n->curr_guest_offloads == virtio_net_supported_guest_offloads(n)) {
984          /*
985           * According to VirtIO standard, "Upon feature negotiation
986           * corresponding offload gets enabled to preserve
987           * backward compatibility.".
988           *
989           * Therefore, there is no need to send this CVQ command if the
990           * driver also enables all supported offloads, which aligns with
991           * the device's defaults.
992           *
993           * Note that the device's defaults can mismatch the driver's
994           * configuration only at live migration.
995           */
996          return 0;
997      }
998  
999      offloads = cpu_to_le64(n->curr_guest_offloads);
1000      const struct iovec data = {
1001          .iov_base = &offloads,
1002          .iov_len = sizeof(offloads),
1003      };
1004      r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
1005                                  VIRTIO_NET_CTRL_GUEST_OFFLOADS,
1006                                  VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET,
1007                                  &data, 1);
1008      if (unlikely(r < 0)) {
1009          return r;
1010      }
1011  
1012      return 0;
1013  }
1014  
1015  static int vhost_vdpa_net_load_rx_mode(VhostVDPAState *s,
1016                                         struct iovec *out_cursor,
1017                                         struct iovec *in_cursor,
1018                                         uint8_t cmd,
1019                                         uint8_t on)
1020  {
1021      const struct iovec data = {
1022          .iov_base = &on,
1023          .iov_len = sizeof(on),
1024      };
1025      ssize_t r;
1026  
1027      r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
1028                                  VIRTIO_NET_CTRL_RX, cmd, &data, 1);
1029      if (unlikely(r < 0)) {
1030          return r;
1031      }
1032  
1033      return 0;
1034  }
1035  
1036  static int vhost_vdpa_net_load_rx(VhostVDPAState *s,
1037                                    const VirtIONet *n,
1038                                    struct iovec *out_cursor,
1039                                    struct iovec *in_cursor)
1040  {
1041      ssize_t r;
1042  
1043      if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_RX)) {
1044          return 0;
1045      }
1046  
1047      /*
1048       * According to virtio_net_reset(), device turns promiscuous mode
1049       * on by default.
1050       *
1051       * Additionally, according to VirtIO standard, "Since there are
1052       * no guarantees, it can use a hash filter or silently switch to
1053       * allmulti or promiscuous mode if it is given too many addresses.".
1054       * QEMU marks `n->mac_table.uni_overflow` if guest sets too many
1055       * non-multicast MAC addresses, indicating that promiscuous mode
1056       * should be enabled.
1057       *
1058       * Therefore, QEMU should only send this CVQ command if the
1059       * `n->mac_table.uni_overflow` is not marked and `n->promisc` is off,
1060       * which sets promiscuous mode on, different from the device's defaults.
1061       *
1062       * Note that the device's defaults can mismatch the driver's
1063       * configuration only at live migration.
1064       */
1065      if (!n->mac_table.uni_overflow && !n->promisc) {
1066          r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor,
1067                                          VIRTIO_NET_CTRL_RX_PROMISC, 0);
1068          if (unlikely(r < 0)) {
1069              return r;
1070          }
1071      }
1072  
1073      /*
1074       * According to virtio_net_reset(), device turns all-multicast mode
1075       * off by default.
1076       *
1077       * According to VirtIO standard, "Since there are no guarantees,
1078       * it can use a hash filter or silently switch to allmulti or
1079       * promiscuous mode if it is given too many addresses.". QEMU marks
1080       * `n->mac_table.multi_overflow` if guest sets too many
1081       * non-multicast MAC addresses.
1082       *
1083       * Therefore, QEMU should only send this CVQ command if the
1084       * `n->mac_table.multi_overflow` is marked or `n->allmulti` is on,
1085       * which sets all-multicast mode on, different from the device's defaults.
1086       *
1087       * Note that the device's defaults can mismatch the driver's
1088       * configuration only at live migration.
1089       */
1090      if (n->mac_table.multi_overflow || n->allmulti) {
1091          r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor,
1092                                          VIRTIO_NET_CTRL_RX_ALLMULTI, 1);
1093          if (unlikely(r < 0)) {
1094              return r;
1095          }
1096      }
1097  
1098      if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_RX_EXTRA)) {
1099          return 0;
1100      }
1101  
1102      /*
1103       * According to virtio_net_reset(), device turns all-unicast mode
1104       * off by default.
1105       *
1106       * Therefore, QEMU should only send this CVQ command if the driver
1107       * sets all-unicast mode on, different from the device's defaults.
1108       *
1109       * Note that the device's defaults can mismatch the driver's
1110       * configuration only at live migration.
1111       */
1112      if (n->alluni) {
1113          r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor,
1114                                          VIRTIO_NET_CTRL_RX_ALLUNI, 1);
1115          if (r < 0) {
1116              return r;
1117          }
1118      }
1119  
1120      /*
1121       * According to virtio_net_reset(), device turns non-multicast mode
1122       * off by default.
1123       *
1124       * Therefore, QEMU should only send this CVQ command if the driver
1125       * sets non-multicast mode on, different from the device's defaults.
1126       *
1127       * Note that the device's defaults can mismatch the driver's
1128       * configuration only at live migration.
1129       */
1130      if (n->nomulti) {
1131          r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor,
1132                                          VIRTIO_NET_CTRL_RX_NOMULTI, 1);
1133          if (r < 0) {
1134              return r;
1135          }
1136      }
1137  
1138      /*
1139       * According to virtio_net_reset(), device turns non-unicast mode
1140       * off by default.
1141       *
1142       * Therefore, QEMU should only send this CVQ command if the driver
1143       * sets non-unicast mode on, different from the device's defaults.
1144       *
1145       * Note that the device's defaults can mismatch the driver's
1146       * configuration only at live migration.
1147       */
1148      if (n->nouni) {
1149          r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor,
1150                                          VIRTIO_NET_CTRL_RX_NOUNI, 1);
1151          if (r < 0) {
1152              return r;
1153          }
1154      }
1155  
1156      /*
1157       * According to virtio_net_reset(), device turns non-broadcast mode
1158       * off by default.
1159       *
1160       * Therefore, QEMU should only send this CVQ command if the driver
1161       * sets non-broadcast mode on, different from the device's defaults.
1162       *
1163       * Note that the device's defaults can mismatch the driver's
1164       * configuration only at live migration.
1165       */
1166      if (n->nobcast) {
1167          r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor,
1168                                          VIRTIO_NET_CTRL_RX_NOBCAST, 1);
1169          if (r < 0) {
1170              return r;
1171          }
1172      }
1173  
1174      return 0;
1175  }
1176  
1177  static int vhost_vdpa_net_load_single_vlan(VhostVDPAState *s,
1178                                             const VirtIONet *n,
1179                                             struct iovec *out_cursor,
1180                                             struct iovec *in_cursor,
1181                                             uint16_t vid)
1182  {
1183      const struct iovec data = {
1184          .iov_base = &vid,
1185          .iov_len = sizeof(vid),
1186      };
1187      ssize_t r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
1188                                          VIRTIO_NET_CTRL_VLAN,
1189                                          VIRTIO_NET_CTRL_VLAN_ADD,
1190                                          &data, 1);
1191      if (unlikely(r < 0)) {
1192          return r;
1193      }
1194  
1195      return 0;
1196  }
1197  
1198  static int vhost_vdpa_net_load_vlan(VhostVDPAState *s,
1199                                      const VirtIONet *n,
1200                                      struct iovec *out_cursor,
1201                                      struct iovec *in_cursor)
1202  {
1203      int r;
1204  
1205      if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_VLAN)) {
1206          return 0;
1207      }
1208  
1209      for (int i = 0; i < MAX_VLAN >> 5; i++) {
1210          for (int j = 0; n->vlans[i] && j <= 0x1f; j++) {
1211              if (n->vlans[i] & (1U << j)) {
1212                  r = vhost_vdpa_net_load_single_vlan(s, n, out_cursor,
1213                                                      in_cursor, (i << 5) + j);
1214                  if (unlikely(r != 0)) {
1215                      return r;
1216                  }
1217              }
1218          }
1219      }
1220  
1221      return 0;
1222  }
1223  
1224  static int vhost_vdpa_net_cvq_load(NetClientState *nc)
1225  {
1226      VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
1227      struct vhost_vdpa *v = &s->vhost_vdpa;
1228      const VirtIONet *n;
1229      int r;
1230      struct iovec out_cursor, in_cursor;
1231  
1232      assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
1233  
1234      vhost_vdpa_set_vring_ready(v, v->dev->vq_index);
1235  
1236      if (v->shadow_vqs_enabled) {
1237          n = VIRTIO_NET(v->dev->vdev);
1238          vhost_vdpa_net_load_cursor_reset(s, &out_cursor, &in_cursor);
1239          r = vhost_vdpa_net_load_mac(s, n, &out_cursor, &in_cursor);
1240          if (unlikely(r < 0)) {
1241              return r;
1242          }
1243          r = vhost_vdpa_net_load_mq(s, n, &out_cursor, &in_cursor);
1244          if (unlikely(r)) {
1245              return r;
1246          }
1247          r = vhost_vdpa_net_load_offloads(s, n, &out_cursor, &in_cursor);
1248          if (unlikely(r)) {
1249              return r;
1250          }
1251          r = vhost_vdpa_net_load_rx(s, n, &out_cursor, &in_cursor);
1252          if (unlikely(r)) {
1253              return r;
1254          }
1255          r = vhost_vdpa_net_load_vlan(s, n, &out_cursor, &in_cursor);
1256          if (unlikely(r)) {
1257              return r;
1258          }
1259  
1260          /*
1261           * We need to poll and check all pending device's used buffers.
1262           *
1263           * We can poll here since we've had BQL from the time
1264           * we sent the descriptor.
1265           */
1266          r = vhost_vdpa_net_svq_flush(s, in_cursor.iov_base - (void *)s->status);
1267          if (unlikely(r)) {
1268              return r;
1269          }
1270      }
1271  
1272      for (int i = 0; i < v->dev->vq_index; ++i) {
1273          vhost_vdpa_set_vring_ready(v, i);
1274      }
1275  
1276      return 0;
1277  }
1278  
1279  static NetClientInfo net_vhost_vdpa_cvq_info = {
1280      .type = NET_CLIENT_DRIVER_VHOST_VDPA,
1281      .size = sizeof(VhostVDPAState),
1282      .receive = vhost_vdpa_receive,
1283      .start = vhost_vdpa_net_cvq_start,
1284      .load = vhost_vdpa_net_cvq_load,
1285      .stop = vhost_vdpa_net_cvq_stop,
1286      .cleanup = vhost_vdpa_cleanup,
1287      .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
1288      .has_ufo = vhost_vdpa_has_ufo,
1289      .check_peer_type = vhost_vdpa_check_peer_type,
1290      .set_steering_ebpf = vhost_vdpa_set_steering_ebpf,
1291  };
1292  
1293  /*
1294   * Forward the excessive VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command to
1295   * vdpa device.
1296   *
1297   * Considering that QEMU cannot send the entire filter table to the
1298   * vdpa device, it should send the VIRTIO_NET_CTRL_RX_PROMISC CVQ
1299   * command to enable promiscuous mode to receive all packets,
1300   * according to VirtIO standard, "Since there are no guarantees,
1301   * it can use a hash filter or silently switch to allmulti or
1302   * promiscuous mode if it is given too many addresses.".
1303   *
1304   * Since QEMU ignores MAC addresses beyond `MAC_TABLE_ENTRIES` and
1305   * marks `n->mac_table.x_overflow` accordingly, it should have
1306   * the same effect on the device model to receive
1307   * (`MAC_TABLE_ENTRIES` + 1) or more non-multicast MAC addresses.
1308   * The same applies to multicast MAC addresses.
1309   *
1310   * Therefore, QEMU can provide the device model with a fake
1311   * VIRTIO_NET_CTRL_MAC_TABLE_SET command with (`MAC_TABLE_ENTRIES` + 1)
1312   * non-multicast MAC addresses and (`MAC_TABLE_ENTRIES` + 1) multicast
1313   * MAC addresses. This ensures that the device model marks
1314   * `n->mac_table.uni_overflow` and `n->mac_table.multi_overflow`,
1315   * allowing all packets to be received, which aligns with the
1316   * state of the vdpa device.
1317   */
1318  static int vhost_vdpa_net_excessive_mac_filter_cvq_add(VhostVDPAState *s,
1319                                                         VirtQueueElement *elem,
1320                                                         struct iovec *out,
1321                                                         const struct iovec *in)
1322  {
1323      struct virtio_net_ctrl_mac mac_data, *mac_ptr;
1324      struct virtio_net_ctrl_hdr *hdr_ptr;
1325      uint32_t cursor;
1326      ssize_t r;
1327      uint8_t on = 1;
1328  
1329      /* parse the non-multicast MAC address entries from CVQ command */
1330      cursor = sizeof(*hdr_ptr);
1331      r = iov_to_buf(elem->out_sg, elem->out_num, cursor,
1332                     &mac_data, sizeof(mac_data));
1333      if (unlikely(r != sizeof(mac_data))) {
1334          /*
1335           * If the CVQ command is invalid, we should simulate the vdpa device
1336           * to reject the VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command
1337           */
1338          *s->status = VIRTIO_NET_ERR;
1339          return sizeof(*s->status);
1340      }
1341      cursor += sizeof(mac_data) + le32_to_cpu(mac_data.entries) * ETH_ALEN;
1342  
1343      /* parse the multicast MAC address entries from CVQ command */
1344      r = iov_to_buf(elem->out_sg, elem->out_num, cursor,
1345                     &mac_data, sizeof(mac_data));
1346      if (r != sizeof(mac_data)) {
1347          /*
1348           * If the CVQ command is invalid, we should simulate the vdpa device
1349           * to reject the VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command
1350           */
1351          *s->status = VIRTIO_NET_ERR;
1352          return sizeof(*s->status);
1353      }
1354      cursor += sizeof(mac_data) + le32_to_cpu(mac_data.entries) * ETH_ALEN;
1355  
1356      /* validate the CVQ command */
1357      if (iov_size(elem->out_sg, elem->out_num) != cursor) {
1358          /*
1359           * If the CVQ command is invalid, we should simulate the vdpa device
1360           * to reject the VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command
1361           */
1362          *s->status = VIRTIO_NET_ERR;
1363          return sizeof(*s->status);
1364      }
1365  
1366      /*
1367       * According to VirtIO standard, "Since there are no guarantees,
1368       * it can use a hash filter or silently switch to allmulti or
1369       * promiscuous mode if it is given too many addresses.".
1370       *
1371       * Therefore, considering that QEMU is unable to send the entire
1372       * filter table to the vdpa device, it should send the
1373       * VIRTIO_NET_CTRL_RX_PROMISC CVQ command to enable promiscuous mode
1374       */
1375      hdr_ptr = out->iov_base;
1376      out->iov_len = sizeof(*hdr_ptr) + sizeof(on);
1377  
1378      hdr_ptr->class = VIRTIO_NET_CTRL_RX;
1379      hdr_ptr->cmd = VIRTIO_NET_CTRL_RX_PROMISC;
1380      iov_from_buf(out, 1, sizeof(*hdr_ptr), &on, sizeof(on));
1381      r = vhost_vdpa_net_cvq_add(s, out, 1, in, 1);
1382      if (unlikely(r < 0)) {
1383          return r;
1384      }
1385  
1386      /*
1387       * We can poll here since we've had BQL from the time
1388       * we sent the descriptor.
1389       */
1390      r = vhost_vdpa_net_svq_poll(s, 1);
1391      if (unlikely(r < sizeof(*s->status))) {
1392          return r;
1393      }
1394      if (*s->status != VIRTIO_NET_OK) {
1395          return sizeof(*s->status);
1396      }
1397  
1398      /*
1399       * QEMU should also send a fake VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ
1400       * command to the device model, including (`MAC_TABLE_ENTRIES` + 1)
1401       * non-multicast MAC addresses and (`MAC_TABLE_ENTRIES` + 1)
1402       * multicast MAC addresses.
1403       *
1404       * By doing so, the device model can mark `n->mac_table.uni_overflow`
1405       * and `n->mac_table.multi_overflow`, enabling all packets to be
1406       * received, which aligns with the state of the vdpa device.
1407       */
1408      cursor = 0;
1409      uint32_t fake_uni_entries = MAC_TABLE_ENTRIES + 1,
1410               fake_mul_entries = MAC_TABLE_ENTRIES + 1,
1411               fake_cvq_size = sizeof(struct virtio_net_ctrl_hdr) +
1412                               sizeof(mac_data) + fake_uni_entries * ETH_ALEN +
1413                               sizeof(mac_data) + fake_mul_entries * ETH_ALEN;
1414  
1415      assert(fake_cvq_size < vhost_vdpa_net_cvq_cmd_page_len());
1416      out->iov_len = fake_cvq_size;
1417  
1418      /* pack the header for fake CVQ command */
1419      hdr_ptr = out->iov_base + cursor;
1420      hdr_ptr->class = VIRTIO_NET_CTRL_MAC;
1421      hdr_ptr->cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
1422      cursor += sizeof(*hdr_ptr);
1423  
1424      /*
1425       * Pack the non-multicast MAC addresses part for fake CVQ command.
1426       *
1427       * According to virtio_net_handle_mac(), QEMU doesn't verify the MAC
1428       * addresses provided in CVQ command. Therefore, only the entries
1429       * field need to be prepared in the CVQ command.
1430       */
1431      mac_ptr = out->iov_base + cursor;
1432      mac_ptr->entries = cpu_to_le32(fake_uni_entries);
1433      cursor += sizeof(*mac_ptr) + fake_uni_entries * ETH_ALEN;
1434  
1435      /*
1436       * Pack the multicast MAC addresses part for fake CVQ command.
1437       *
1438       * According to virtio_net_handle_mac(), QEMU doesn't verify the MAC
1439       * addresses provided in CVQ command. Therefore, only the entries
1440       * field need to be prepared in the CVQ command.
1441       */
1442      mac_ptr = out->iov_base + cursor;
1443      mac_ptr->entries = cpu_to_le32(fake_mul_entries);
1444  
1445      /*
1446       * Simulating QEMU poll a vdpa device used buffer
1447       * for VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command
1448       */
1449      return sizeof(*s->status);
1450  }
1451  
1452  /**
1453   * Validate and copy control virtqueue commands.
1454   *
1455   * Following QEMU guidelines, we offer a copy of the buffers to the device to
1456   * prevent TOCTOU bugs.
1457   */
1458  static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
1459                                              VirtQueueElement *elem,
1460                                              void *opaque)
1461  {
1462      VhostVDPAState *s = opaque;
1463      size_t in_len;
1464      const struct virtio_net_ctrl_hdr *ctrl;
1465      virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1466      /* Out buffer sent to both the vdpa device and the device model */
1467      struct iovec out = {
1468          .iov_base = s->cvq_cmd_out_buffer,
1469      };
1470      /* in buffer used for device model */
1471      const struct iovec model_in = {
1472          .iov_base = &status,
1473          .iov_len = sizeof(status),
1474      };
1475      /* in buffer used for vdpa device */
1476      const struct iovec vdpa_in = {
1477          .iov_base = s->status,
1478          .iov_len = sizeof(*s->status),
1479      };
1480      ssize_t dev_written = -EINVAL;
1481  
1482      out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0,
1483                               s->cvq_cmd_out_buffer,
1484                               vhost_vdpa_net_cvq_cmd_page_len());
1485  
1486      ctrl = s->cvq_cmd_out_buffer;
1487      if (ctrl->class == VIRTIO_NET_CTRL_ANNOUNCE) {
1488          /*
1489           * Guest announce capability is emulated by qemu, so don't forward to
1490           * the device.
1491           */
1492          dev_written = sizeof(status);
1493          *s->status = VIRTIO_NET_OK;
1494      } else if (unlikely(ctrl->class == VIRTIO_NET_CTRL_MAC &&
1495                          ctrl->cmd == VIRTIO_NET_CTRL_MAC_TABLE_SET &&
1496                          iov_size(elem->out_sg, elem->out_num) > out.iov_len)) {
1497          /*
1498           * Due to the size limitation of the out buffer sent to the vdpa device,
1499           * which is determined by vhost_vdpa_net_cvq_cmd_page_len(), excessive
1500           * MAC addresses set by the driver for the filter table can cause
1501           * truncation of the CVQ command in QEMU. As a result, the vdpa device
1502           * rejects the flawed CVQ command.
1503           *
1504           * Therefore, QEMU must handle this situation instead of sending
1505           * the CVQ command directly.
1506           */
1507          dev_written = vhost_vdpa_net_excessive_mac_filter_cvq_add(s, elem,
1508                                                              &out, &vdpa_in);
1509          if (unlikely(dev_written < 0)) {
1510              goto out;
1511          }
1512      } else {
1513          ssize_t r;
1514          r = vhost_vdpa_net_cvq_add(s, &out, 1, &vdpa_in, 1);
1515          if (unlikely(r < 0)) {
1516              dev_written = r;
1517              goto out;
1518          }
1519  
1520          /*
1521           * We can poll here since we've had BQL from the time
1522           * we sent the descriptor.
1523           */
1524          dev_written = vhost_vdpa_net_svq_poll(s, 1);
1525      }
1526  
1527      if (unlikely(dev_written < sizeof(status))) {
1528          error_report("Insufficient written data (%zu)", dev_written);
1529          goto out;
1530      }
1531  
1532      if (*s->status != VIRTIO_NET_OK) {
1533          goto out;
1534      }
1535  
1536      status = VIRTIO_NET_ERR;
1537      virtio_net_handle_ctrl_iov(svq->vdev, &model_in, 1, &out, 1);
1538      if (status != VIRTIO_NET_OK) {
1539          error_report("Bad CVQ processing in model");
1540      }
1541  
1542  out:
1543      in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status,
1544                            sizeof(status));
1545      if (unlikely(in_len < sizeof(status))) {
1546          error_report("Bad device CVQ written length");
1547      }
1548      vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
1549      /*
1550       * `elem` belongs to vhost_vdpa_net_handle_ctrl_avail() only when
1551       * the function successfully forwards the CVQ command, indicated
1552       * by a non-negative value of `dev_written`. Otherwise, it still
1553       * belongs to SVQ.
1554       * This function should only free the `elem` when it owns.
1555       */
1556      if (dev_written >= 0) {
1557          g_free(elem);
1558      }
1559      return dev_written < 0 ? dev_written : 0;
1560  }
1561  
1562  static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
1563      .avail_handler = vhost_vdpa_net_handle_ctrl_avail,
1564  };
1565  
1566  /**
1567   * Probe if CVQ is isolated
1568   *
1569   * @device_fd         The vdpa device fd
1570   * @features          Features offered by the device.
1571   * @cvq_index         The control vq pair index
1572   *
1573   * Returns <0 in case of failure, 0 if false and 1 if true.
1574   */
1575  static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features,
1576                                            int cvq_index, Error **errp)
1577  {
1578      uint64_t backend_features;
1579      int64_t cvq_group;
1580      uint8_t status = VIRTIO_CONFIG_S_ACKNOWLEDGE |
1581                       VIRTIO_CONFIG_S_DRIVER;
1582      int r;
1583  
1584      ERRP_GUARD();
1585  
1586      r = ioctl(device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features);
1587      if (unlikely(r < 0)) {
1588          error_setg_errno(errp, errno, "Cannot get vdpa backend_features");
1589          return r;
1590      }
1591  
1592      if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) {
1593          return 0;
1594      }
1595  
1596      r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
1597      if (unlikely(r)) {
1598          error_setg_errno(errp, -r, "Cannot set device status");
1599          goto out;
1600      }
1601  
1602      r = ioctl(device_fd, VHOST_SET_FEATURES, &features);
1603      if (unlikely(r)) {
1604          error_setg_errno(errp, -r, "Cannot set features");
1605          goto out;
1606      }
1607  
1608      status |= VIRTIO_CONFIG_S_FEATURES_OK;
1609      r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
1610      if (unlikely(r)) {
1611          error_setg_errno(errp, -r, "Cannot set device status");
1612          goto out;
1613      }
1614  
1615      cvq_group = vhost_vdpa_get_vring_group(device_fd, cvq_index, errp);
1616      if (unlikely(cvq_group < 0)) {
1617          if (cvq_group != -ENOTSUP) {
1618              r = cvq_group;
1619              goto out;
1620          }
1621  
1622          /*
1623           * The kernel report VHOST_BACKEND_F_IOTLB_ASID if the vdpa frontend
1624           * support ASID even if the parent driver does not.  The CVQ cannot be
1625           * isolated in this case.
1626           */
1627          error_free(*errp);
1628          *errp = NULL;
1629          r = 0;
1630          goto out;
1631      }
1632  
1633      for (int i = 0; i < cvq_index; ++i) {
1634          int64_t group = vhost_vdpa_get_vring_group(device_fd, i, errp);
1635          if (unlikely(group < 0)) {
1636              r = group;
1637              goto out;
1638          }
1639  
1640          if (group == (int64_t)cvq_group) {
1641              r = 0;
1642              goto out;
1643          }
1644      }
1645  
1646      r = 1;
1647  
1648  out:
1649      status = 0;
1650      ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
1651      return r;
1652  }
1653  
1654  static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
1655                                         const char *device,
1656                                         const char *name,
1657                                         int vdpa_device_fd,
1658                                         int queue_pair_index,
1659                                         int nvqs,
1660                                         bool is_datapath,
1661                                         bool svq,
1662                                         struct vhost_vdpa_iova_range iova_range,
1663                                         uint64_t features,
1664                                         Error **errp)
1665  {
1666      NetClientState *nc = NULL;
1667      VhostVDPAState *s;
1668      int ret = 0;
1669      assert(name);
1670      int cvq_isolated = 0;
1671  
1672      if (is_datapath) {
1673          nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device,
1674                                   name);
1675      } else {
1676          cvq_isolated = vhost_vdpa_probe_cvq_isolation(vdpa_device_fd, features,
1677                                                        queue_pair_index * 2,
1678                                                        errp);
1679          if (unlikely(cvq_isolated < 0)) {
1680              return NULL;
1681          }
1682  
1683          nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer,
1684                                           device, name);
1685      }
1686      qemu_set_info_str(nc, TYPE_VHOST_VDPA);
1687      s = DO_UPCAST(VhostVDPAState, nc, nc);
1688  
1689      s->vhost_vdpa.device_fd = vdpa_device_fd;
1690      s->vhost_vdpa.index = queue_pair_index;
1691      s->always_svq = svq;
1692      s->migration_state.notify = NULL;
1693      s->vhost_vdpa.shadow_vqs_enabled = svq;
1694      s->vhost_vdpa.iova_range = iova_range;
1695      s->vhost_vdpa.shadow_data = svq;
1696      if (queue_pair_index == 0) {
1697          vhost_vdpa_net_valid_svq_features(features,
1698                                            &s->vhost_vdpa.migration_blocker);
1699      } else if (!is_datapath) {
1700          s->cvq_cmd_out_buffer = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(),
1701                                       PROT_READ | PROT_WRITE,
1702                                       MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1703          s->status = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(),
1704                           PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS,
1705                           -1, 0);
1706  
1707          s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
1708          s->vhost_vdpa.shadow_vq_ops_opaque = s;
1709          s->cvq_isolated = cvq_isolated;
1710      }
1711      ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
1712      if (ret) {
1713          qemu_del_net_client(nc);
1714          return NULL;
1715      }
1716      return nc;
1717  }
1718  
1719  static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
1720  {
1721      int ret = ioctl(fd, VHOST_GET_FEATURES, features);
1722      if (unlikely(ret < 0)) {
1723          error_setg_errno(errp, errno,
1724                           "Fail to query features from vhost-vDPA device");
1725      }
1726      return ret;
1727  }
1728  
1729  static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features,
1730                                            int *has_cvq, Error **errp)
1731  {
1732      unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
1733      g_autofree struct vhost_vdpa_config *config = NULL;
1734      __virtio16 *max_queue_pairs;
1735      int ret;
1736  
1737      if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) {
1738          *has_cvq = 1;
1739      } else {
1740          *has_cvq = 0;
1741      }
1742  
1743      if (features & (1 << VIRTIO_NET_F_MQ)) {
1744          config = g_malloc0(config_size + sizeof(*max_queue_pairs));
1745          config->off = offsetof(struct virtio_net_config, max_virtqueue_pairs);
1746          config->len = sizeof(*max_queue_pairs);
1747  
1748          ret = ioctl(fd, VHOST_VDPA_GET_CONFIG, config);
1749          if (ret) {
1750              error_setg(errp, "Fail to get config from vhost-vDPA device");
1751              return -ret;
1752          }
1753  
1754          max_queue_pairs = (__virtio16 *)&config->buf;
1755  
1756          return lduw_le_p(max_queue_pairs);
1757      }
1758  
1759      return 1;
1760  }
1761  
1762  int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
1763                          NetClientState *peer, Error **errp)
1764  {
1765      const NetdevVhostVDPAOptions *opts;
1766      uint64_t features;
1767      int vdpa_device_fd;
1768      g_autofree NetClientState **ncs = NULL;
1769      struct vhost_vdpa_iova_range iova_range;
1770      NetClientState *nc;
1771      int queue_pairs, r, i = 0, has_cvq = 0;
1772  
1773      assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
1774      opts = &netdev->u.vhost_vdpa;
1775      if (!opts->vhostdev && !opts->vhostfd) {
1776          error_setg(errp,
1777                     "vhost-vdpa: neither vhostdev= nor vhostfd= was specified");
1778          return -1;
1779      }
1780  
1781      if (opts->vhostdev && opts->vhostfd) {
1782          error_setg(errp,
1783                     "vhost-vdpa: vhostdev= and vhostfd= are mutually exclusive");
1784          return -1;
1785      }
1786  
1787      if (opts->vhostdev) {
1788          vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
1789          if (vdpa_device_fd == -1) {
1790              return -errno;
1791          }
1792      } else {
1793          /* has_vhostfd */
1794          vdpa_device_fd = monitor_fd_param(monitor_cur(), opts->vhostfd, errp);
1795          if (vdpa_device_fd == -1) {
1796              error_prepend(errp, "vhost-vdpa: unable to parse vhostfd: ");
1797              return -1;
1798          }
1799      }
1800  
1801      r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
1802      if (unlikely(r < 0)) {
1803          goto err;
1804      }
1805  
1806      queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features,
1807                                                   &has_cvq, errp);
1808      if (queue_pairs < 0) {
1809          qemu_close(vdpa_device_fd);
1810          return queue_pairs;
1811      }
1812  
1813      r = vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range);
1814      if (unlikely(r < 0)) {
1815          error_setg(errp, "vhost-vdpa: get iova range failed: %s",
1816                     strerror(-r));
1817          goto err;
1818      }
1819  
1820      if (opts->x_svq && !vhost_vdpa_net_valid_svq_features(features, errp)) {
1821          goto err;
1822      }
1823  
1824      ncs = g_malloc0(sizeof(*ncs) * queue_pairs);
1825  
1826      for (i = 0; i < queue_pairs; i++) {
1827          ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
1828                                       vdpa_device_fd, i, 2, true, opts->x_svq,
1829                                       iova_range, features, errp);
1830          if (!ncs[i])
1831              goto err;
1832      }
1833  
1834      if (has_cvq) {
1835          nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
1836                                   vdpa_device_fd, i, 1, false,
1837                                   opts->x_svq, iova_range, features, errp);
1838          if (!nc)
1839              goto err;
1840      }
1841  
1842      return 0;
1843  
1844  err:
1845      if (i) {
1846          for (i--; i >= 0; i--) {
1847              qemu_del_net_client(ncs[i]);
1848          }
1849      }
1850  
1851      qemu_close(vdpa_device_fd);
1852  
1853      return -1;
1854  }
1855