xref: /openbmc/qemu/net/vhost-vdpa.c (revision 36e4647247f200b6fa4d2f656133f567036e8a85)
1 /*
2  * vhost-vdpa.c
3  *
4  * Copyright(c) 2017-2018 Intel Corporation.
5  * Copyright(c) 2020 Red Hat, Inc.
6  *
7  * This work is licensed under the terms of the GNU GPL, version 2 or later.
8  * See the COPYING file in the top-level directory.
9  *
10  */
11 
12 #include "qemu/osdep.h"
13 #include "clients.h"
14 #include "hw/virtio/virtio-net.h"
15 #include "net/vhost_net.h"
16 #include "net/vhost-vdpa.h"
17 #include "hw/virtio/vhost-vdpa.h"
18 #include "qemu/config-file.h"
19 #include "qemu/error-report.h"
20 #include "qemu/log.h"
21 #include "qemu/memalign.h"
22 #include "qemu/option.h"
23 #include "qapi/error.h"
24 #include <linux/vhost.h>
25 #include <sys/ioctl.h>
26 #include <err.h>
27 #include "standard-headers/linux/virtio_net.h"
28 #include "monitor/monitor.h"
29 #include "hw/virtio/vhost.h"
30 
31 /* Todo:need to add the multiqueue support here */
32 typedef struct VhostVDPAState {
33     NetClientState nc;
34     struct vhost_vdpa vhost_vdpa;
35     VHostNetState *vhost_net;
36 
37     /* Control commands shadow buffers */
38     void *cvq_cmd_out_buffer;
39     virtio_net_ctrl_ack *status;
40 
41     bool started;
42 } VhostVDPAState;
43 
44 const int vdpa_feature_bits[] = {
45     VIRTIO_F_NOTIFY_ON_EMPTY,
46     VIRTIO_RING_F_INDIRECT_DESC,
47     VIRTIO_RING_F_EVENT_IDX,
48     VIRTIO_F_ANY_LAYOUT,
49     VIRTIO_F_VERSION_1,
50     VIRTIO_NET_F_CSUM,
51     VIRTIO_NET_F_GUEST_CSUM,
52     VIRTIO_NET_F_GSO,
53     VIRTIO_NET_F_GUEST_TSO4,
54     VIRTIO_NET_F_GUEST_TSO6,
55     VIRTIO_NET_F_GUEST_ECN,
56     VIRTIO_NET_F_GUEST_UFO,
57     VIRTIO_NET_F_HOST_TSO4,
58     VIRTIO_NET_F_HOST_TSO6,
59     VIRTIO_NET_F_HOST_ECN,
60     VIRTIO_NET_F_HOST_UFO,
61     VIRTIO_NET_F_MRG_RXBUF,
62     VIRTIO_NET_F_MTU,
63     VIRTIO_NET_F_CTRL_RX,
64     VIRTIO_NET_F_CTRL_RX_EXTRA,
65     VIRTIO_NET_F_CTRL_VLAN,
66     VIRTIO_NET_F_CTRL_MAC_ADDR,
67     VIRTIO_NET_F_RSS,
68     VIRTIO_NET_F_MQ,
69     VIRTIO_NET_F_CTRL_VQ,
70     VIRTIO_F_IOMMU_PLATFORM,
71     VIRTIO_F_RING_PACKED,
72     VIRTIO_F_RING_RESET,
73     VIRTIO_NET_F_RSS,
74     VIRTIO_NET_F_HASH_REPORT,
75     VIRTIO_NET_F_GUEST_ANNOUNCE,
76     VIRTIO_NET_F_STATUS,
77     VHOST_INVALID_FEATURE_BIT
78 };
79 
80 /** Supported device specific feature bits with SVQ */
81 static const uint64_t vdpa_svq_device_features =
82     BIT_ULL(VIRTIO_NET_F_CSUM) |
83     BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |
84     BIT_ULL(VIRTIO_NET_F_MTU) |
85     BIT_ULL(VIRTIO_NET_F_MAC) |
86     BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) |
87     BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |
88     BIT_ULL(VIRTIO_NET_F_GUEST_ECN) |
89     BIT_ULL(VIRTIO_NET_F_GUEST_UFO) |
90     BIT_ULL(VIRTIO_NET_F_HOST_TSO4) |
91     BIT_ULL(VIRTIO_NET_F_HOST_TSO6) |
92     BIT_ULL(VIRTIO_NET_F_HOST_ECN) |
93     BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
94     BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
95     BIT_ULL(VIRTIO_NET_F_STATUS) |
96     BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
97     BIT_ULL(VIRTIO_NET_F_MQ) |
98     BIT_ULL(VIRTIO_F_ANY_LAYOUT) |
99     BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |
100     BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
101     BIT_ULL(VIRTIO_NET_F_STANDBY);
102 
103 VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
104 {
105     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
106     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
107     return s->vhost_net;
108 }
109 
110 static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp)
111 {
112     uint64_t invalid_dev_features =
113         features & ~vdpa_svq_device_features &
114         /* Transport are all accepted at this point */
115         ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START,
116                          VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START);
117 
118     if (invalid_dev_features) {
119         error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64,
120                    invalid_dev_features);
121     }
122 
123     return !invalid_dev_features;
124 }
125 
126 static int vhost_vdpa_net_check_device_id(struct vhost_net *net)
127 {
128     uint32_t device_id;
129     int ret;
130     struct vhost_dev *hdev;
131 
132     hdev = (struct vhost_dev *)&net->dev;
133     ret = hdev->vhost_ops->vhost_get_device_id(hdev, &device_id);
134     if (device_id != VIRTIO_ID_NET) {
135         return -ENOTSUP;
136     }
137     return ret;
138 }
139 
140 static int vhost_vdpa_add(NetClientState *ncs, void *be,
141                           int queue_pair_index, int nvqs)
142 {
143     VhostNetOptions options;
144     struct vhost_net *net = NULL;
145     VhostVDPAState *s;
146     int ret;
147 
148     options.backend_type = VHOST_BACKEND_TYPE_VDPA;
149     assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
150     s = DO_UPCAST(VhostVDPAState, nc, ncs);
151     options.net_backend = ncs;
152     options.opaque      = be;
153     options.busyloop_timeout = 0;
154     options.nvqs = nvqs;
155 
156     net = vhost_net_init(&options);
157     if (!net) {
158         error_report("failed to init vhost_net for queue");
159         goto err_init;
160     }
161     s->vhost_net = net;
162     ret = vhost_vdpa_net_check_device_id(net);
163     if (ret) {
164         goto err_check;
165     }
166     return 0;
167 err_check:
168     vhost_net_cleanup(net);
169     g_free(net);
170 err_init:
171     return -1;
172 }
173 
174 static void vhost_vdpa_cleanup(NetClientState *nc)
175 {
176     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
177     struct vhost_dev *dev = &s->vhost_net->dev;
178 
179     qemu_vfree(s->cvq_cmd_out_buffer);
180     qemu_vfree(s->status);
181     if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
182         g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
183     }
184     if (s->vhost_net) {
185         vhost_net_cleanup(s->vhost_net);
186         g_free(s->vhost_net);
187         s->vhost_net = NULL;
188     }
189      if (s->vhost_vdpa.device_fd >= 0) {
190         qemu_close(s->vhost_vdpa.device_fd);
191         s->vhost_vdpa.device_fd = -1;
192     }
193 }
194 
195 static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc)
196 {
197     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
198 
199     return true;
200 }
201 
202 static bool vhost_vdpa_has_ufo(NetClientState *nc)
203 {
204     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
205     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
206     uint64_t features = 0;
207     features |= (1ULL << VIRTIO_NET_F_HOST_UFO);
208     features = vhost_net_get_features(s->vhost_net, features);
209     return !!(features & (1ULL << VIRTIO_NET_F_HOST_UFO));
210 
211 }
212 
213 static bool vhost_vdpa_check_peer_type(NetClientState *nc, ObjectClass *oc,
214                                        Error **errp)
215 {
216     const char *driver = object_class_get_name(oc);
217 
218     if (!g_str_has_prefix(driver, "virtio-net-")) {
219         error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*");
220         return false;
221     }
222 
223     return true;
224 }
225 
226 /** Dummy receive in case qemu falls back to userland tap networking */
227 static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf,
228                                   size_t size)
229 {
230     return size;
231 }
232 
233 static NetClientInfo net_vhost_vdpa_info = {
234         .type = NET_CLIENT_DRIVER_VHOST_VDPA,
235         .size = sizeof(VhostVDPAState),
236         .receive = vhost_vdpa_receive,
237         .cleanup = vhost_vdpa_cleanup,
238         .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
239         .has_ufo = vhost_vdpa_has_ufo,
240         .check_peer_type = vhost_vdpa_check_peer_type,
241 };
242 
243 static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
244 {
245     VhostIOVATree *tree = v->iova_tree;
246     DMAMap needle = {
247         /*
248          * No need to specify size or to look for more translations since
249          * this contiguous chunk was allocated by us.
250          */
251         .translated_addr = (hwaddr)(uintptr_t)addr,
252     };
253     const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
254     int r;
255 
256     if (unlikely(!map)) {
257         error_report("Cannot locate expected map");
258         return;
259     }
260 
261     r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1);
262     if (unlikely(r != 0)) {
263         error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
264     }
265 
266     vhost_iova_tree_remove(tree, *map);
267 }
268 
269 static size_t vhost_vdpa_net_cvq_cmd_len(void)
270 {
271     /*
272      * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
273      * In buffer is always 1 byte, so it should fit here
274      */
275     return sizeof(struct virtio_net_ctrl_hdr) +
276            2 * sizeof(struct virtio_net_ctrl_mac) +
277            MAC_TABLE_ENTRIES * ETH_ALEN;
278 }
279 
280 static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
281 {
282     return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
283 }
284 
285 /** Map CVQ buffer. */
286 static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size,
287                                   bool write)
288 {
289     DMAMap map = {};
290     int r;
291 
292     map.translated_addr = (hwaddr)(uintptr_t)buf;
293     map.size = size - 1;
294     map.perm = write ? IOMMU_RW : IOMMU_RO,
295     r = vhost_iova_tree_map_alloc(v->iova_tree, &map);
296     if (unlikely(r != IOVA_OK)) {
297         error_report("Cannot map injected element");
298         return r;
299     }
300 
301     r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf,
302                            !write);
303     if (unlikely(r < 0)) {
304         goto dma_map_err;
305     }
306 
307     return 0;
308 
309 dma_map_err:
310     vhost_iova_tree_remove(v->iova_tree, map);
311     return r;
312 }
313 
314 static int vhost_vdpa_net_cvq_start(NetClientState *nc)
315 {
316     VhostVDPAState *s;
317     int r;
318 
319     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
320 
321     s = DO_UPCAST(VhostVDPAState, nc, nc);
322     if (!s->vhost_vdpa.shadow_vqs_enabled) {
323         return 0;
324     }
325 
326     r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer,
327                                vhost_vdpa_net_cvq_cmd_page_len(), false);
328     if (unlikely(r < 0)) {
329         return r;
330     }
331 
332     r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->status,
333                                vhost_vdpa_net_cvq_cmd_page_len(), true);
334     if (unlikely(r < 0)) {
335         vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
336     }
337 
338     return r;
339 }
340 
341 static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
342 {
343     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
344 
345     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
346 
347     if (s->vhost_vdpa.shadow_vqs_enabled) {
348         vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
349         vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status);
350     }
351 }
352 
353 static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len,
354                                       size_t in_len)
355 {
356     /* Buffers for the device */
357     const struct iovec out = {
358         .iov_base = s->cvq_cmd_out_buffer,
359         .iov_len = out_len,
360     };
361     const struct iovec in = {
362         .iov_base = s->status,
363         .iov_len = sizeof(virtio_net_ctrl_ack),
364     };
365     VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
366     int r;
367 
368     r = vhost_svq_add(svq, &out, 1, &in, 1, NULL);
369     if (unlikely(r != 0)) {
370         if (unlikely(r == -ENOSPC)) {
371             qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
372                           __func__);
373         }
374         return r;
375     }
376 
377     /*
378      * We can poll here since we've had BQL from the time we sent the
379      * descriptor. Also, we need to take the answer before SVQ pulls by itself,
380      * when BQL is released
381      */
382     return vhost_svq_poll(svq);
383 }
384 
385 static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class,
386                                        uint8_t cmd, const void *data,
387                                        size_t data_size)
388 {
389     const struct virtio_net_ctrl_hdr ctrl = {
390         .class = class,
391         .cmd = cmd,
392     };
393 
394     assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl));
395 
396     memcpy(s->cvq_cmd_out_buffer, &ctrl, sizeof(ctrl));
397     memcpy(s->cvq_cmd_out_buffer + sizeof(ctrl), data, data_size);
398 
399     return vhost_vdpa_net_cvq_add(s, sizeof(ctrl) + data_size,
400                                   sizeof(virtio_net_ctrl_ack));
401 }
402 
403 static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n)
404 {
405     uint64_t features = n->parent_obj.guest_features;
406     if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) {
407         ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MAC,
408                                                   VIRTIO_NET_CTRL_MAC_ADDR_SET,
409                                                   n->mac, sizeof(n->mac));
410         if (unlikely(dev_written < 0)) {
411             return dev_written;
412         }
413 
414         return *s->status != VIRTIO_NET_OK;
415     }
416 
417     return 0;
418 }
419 
420 static int vhost_vdpa_net_load_mq(VhostVDPAState *s,
421                                   const VirtIONet *n)
422 {
423     struct virtio_net_ctrl_mq mq;
424     uint64_t features = n->parent_obj.guest_features;
425     ssize_t dev_written;
426 
427     if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) {
428         return 0;
429     }
430 
431     mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs);
432     dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MQ,
433                                           VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &mq,
434                                           sizeof(mq));
435     if (unlikely(dev_written < 0)) {
436         return dev_written;
437     }
438 
439     return *s->status != VIRTIO_NET_OK;
440 }
441 
442 static int vhost_vdpa_net_load(NetClientState *nc)
443 {
444     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
445     struct vhost_vdpa *v = &s->vhost_vdpa;
446     const VirtIONet *n;
447     int r;
448 
449     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
450 
451     if (!v->shadow_vqs_enabled) {
452         return 0;
453     }
454 
455     n = VIRTIO_NET(v->dev->vdev);
456     r = vhost_vdpa_net_load_mac(s, n);
457     if (unlikely(r < 0)) {
458         return r;
459     }
460     r = vhost_vdpa_net_load_mq(s, n);
461     if (unlikely(r)) {
462         return r;
463     }
464 
465     return 0;
466 }
467 
468 static NetClientInfo net_vhost_vdpa_cvq_info = {
469     .type = NET_CLIENT_DRIVER_VHOST_VDPA,
470     .size = sizeof(VhostVDPAState),
471     .receive = vhost_vdpa_receive,
472     .start = vhost_vdpa_net_cvq_start,
473     .load = vhost_vdpa_net_load,
474     .stop = vhost_vdpa_net_cvq_stop,
475     .cleanup = vhost_vdpa_cleanup,
476     .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
477     .has_ufo = vhost_vdpa_has_ufo,
478     .check_peer_type = vhost_vdpa_check_peer_type,
479 };
480 
481 /**
482  * Validate and copy control virtqueue commands.
483  *
484  * Following QEMU guidelines, we offer a copy of the buffers to the device to
485  * prevent TOCTOU bugs.
486  */
487 static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
488                                             VirtQueueElement *elem,
489                                             void *opaque)
490 {
491     VhostVDPAState *s = opaque;
492     size_t in_len;
493     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
494     /* Out buffer sent to both the vdpa device and the device model */
495     struct iovec out = {
496         .iov_base = s->cvq_cmd_out_buffer,
497     };
498     /* in buffer used for device model */
499     const struct iovec in = {
500         .iov_base = &status,
501         .iov_len = sizeof(status),
502     };
503     ssize_t dev_written = -EINVAL;
504 
505     out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0,
506                              s->cvq_cmd_out_buffer,
507                              vhost_vdpa_net_cvq_cmd_len());
508     dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status));
509     if (unlikely(dev_written < 0)) {
510         goto out;
511     }
512 
513     if (unlikely(dev_written < sizeof(status))) {
514         error_report("Insufficient written data (%zu)", dev_written);
515         goto out;
516     }
517 
518     if (*s->status != VIRTIO_NET_OK) {
519         return VIRTIO_NET_ERR;
520     }
521 
522     status = VIRTIO_NET_ERR;
523     virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, &out, 1);
524     if (status != VIRTIO_NET_OK) {
525         error_report("Bad CVQ processing in model");
526     }
527 
528 out:
529     in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status,
530                           sizeof(status));
531     if (unlikely(in_len < sizeof(status))) {
532         error_report("Bad device CVQ written length");
533     }
534     vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
535     g_free(elem);
536     return dev_written < 0 ? dev_written : 0;
537 }
538 
539 static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
540     .avail_handler = vhost_vdpa_net_handle_ctrl_avail,
541 };
542 
543 static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
544                                            const char *device,
545                                            const char *name,
546                                            int vdpa_device_fd,
547                                            int queue_pair_index,
548                                            int nvqs,
549                                            bool is_datapath,
550                                            bool svq,
551                                            VhostIOVATree *iova_tree)
552 {
553     NetClientState *nc = NULL;
554     VhostVDPAState *s;
555     int ret = 0;
556     assert(name);
557     if (is_datapath) {
558         nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device,
559                                  name);
560     } else {
561         nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer,
562                                          device, name);
563     }
564     qemu_set_info_str(nc, TYPE_VHOST_VDPA);
565     s = DO_UPCAST(VhostVDPAState, nc, nc);
566 
567     s->vhost_vdpa.device_fd = vdpa_device_fd;
568     s->vhost_vdpa.index = queue_pair_index;
569     s->vhost_vdpa.shadow_vqs_enabled = svq;
570     s->vhost_vdpa.iova_tree = iova_tree;
571     if (!is_datapath) {
572         s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
573                                             vhost_vdpa_net_cvq_cmd_page_len());
574         memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
575         s->status = qemu_memalign(qemu_real_host_page_size(),
576                                   vhost_vdpa_net_cvq_cmd_page_len());
577         memset(s->status, 0, vhost_vdpa_net_cvq_cmd_page_len());
578 
579         s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
580         s->vhost_vdpa.shadow_vq_ops_opaque = s;
581     }
582     ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
583     if (ret) {
584         qemu_del_net_client(nc);
585         return NULL;
586     }
587     return nc;
588 }
589 
590 static int vhost_vdpa_get_iova_range(int fd,
591                                      struct vhost_vdpa_iova_range *iova_range)
592 {
593     int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range);
594 
595     return ret < 0 ? -errno : 0;
596 }
597 
598 static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
599 {
600     int ret = ioctl(fd, VHOST_GET_FEATURES, features);
601     if (unlikely(ret < 0)) {
602         error_setg_errno(errp, errno,
603                          "Fail to query features from vhost-vDPA device");
604     }
605     return ret;
606 }
607 
608 static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features,
609                                           int *has_cvq, Error **errp)
610 {
611     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
612     g_autofree struct vhost_vdpa_config *config = NULL;
613     __virtio16 *max_queue_pairs;
614     int ret;
615 
616     if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) {
617         *has_cvq = 1;
618     } else {
619         *has_cvq = 0;
620     }
621 
622     if (features & (1 << VIRTIO_NET_F_MQ)) {
623         config = g_malloc0(config_size + sizeof(*max_queue_pairs));
624         config->off = offsetof(struct virtio_net_config, max_virtqueue_pairs);
625         config->len = sizeof(*max_queue_pairs);
626 
627         ret = ioctl(fd, VHOST_VDPA_GET_CONFIG, config);
628         if (ret) {
629             error_setg(errp, "Fail to get config from vhost-vDPA device");
630             return -ret;
631         }
632 
633         max_queue_pairs = (__virtio16 *)&config->buf;
634 
635         return lduw_le_p(max_queue_pairs);
636     }
637 
638     return 1;
639 }
640 
641 int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
642                         NetClientState *peer, Error **errp)
643 {
644     const NetdevVhostVDPAOptions *opts;
645     uint64_t features;
646     int vdpa_device_fd;
647     g_autofree NetClientState **ncs = NULL;
648     g_autoptr(VhostIOVATree) iova_tree = NULL;
649     NetClientState *nc;
650     int queue_pairs, r, i = 0, has_cvq = 0;
651 
652     assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
653     opts = &netdev->u.vhost_vdpa;
654     if (!opts->vhostdev && !opts->vhostfd) {
655         error_setg(errp,
656                    "vhost-vdpa: neither vhostdev= nor vhostfd= was specified");
657         return -1;
658     }
659 
660     if (opts->vhostdev && opts->vhostfd) {
661         error_setg(errp,
662                    "vhost-vdpa: vhostdev= and vhostfd= are mutually exclusive");
663         return -1;
664     }
665 
666     if (opts->vhostdev) {
667         vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
668         if (vdpa_device_fd == -1) {
669             return -errno;
670         }
671     } else {
672         /* has_vhostfd */
673         vdpa_device_fd = monitor_fd_param(monitor_cur(), opts->vhostfd, errp);
674         if (vdpa_device_fd == -1) {
675             error_prepend(errp, "vhost-vdpa: unable to parse vhostfd: ");
676             return -1;
677         }
678     }
679 
680     r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
681     if (unlikely(r < 0)) {
682         goto err;
683     }
684 
685     queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features,
686                                                  &has_cvq, errp);
687     if (queue_pairs < 0) {
688         qemu_close(vdpa_device_fd);
689         return queue_pairs;
690     }
691 
692     if (opts->x_svq) {
693         struct vhost_vdpa_iova_range iova_range;
694 
695         if (!vhost_vdpa_net_valid_svq_features(features, errp)) {
696             goto err_svq;
697         }
698 
699         vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range);
700         iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last);
701     }
702 
703     ncs = g_malloc0(sizeof(*ncs) * queue_pairs);
704 
705     for (i = 0; i < queue_pairs; i++) {
706         ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
707                                      vdpa_device_fd, i, 2, true, opts->x_svq,
708                                      iova_tree);
709         if (!ncs[i])
710             goto err;
711     }
712 
713     if (has_cvq) {
714         nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
715                                  vdpa_device_fd, i, 1, false,
716                                  opts->x_svq, iova_tree);
717         if (!nc)
718             goto err;
719     }
720 
721     /* iova_tree ownership belongs to last NetClientState */
722     g_steal_pointer(&iova_tree);
723     return 0;
724 
725 err:
726     if (i) {
727         for (i--; i >= 0; i--) {
728             qemu_del_net_client(ncs[i]);
729         }
730     }
731 
732 err_svq:
733     qemu_close(vdpa_device_fd);
734 
735     return -1;
736 }
737