xref: /openbmc/qemu/net/vhost-vdpa.c (revision 3f9a3eeb)
11e0a84eaSCindy Lu /*
21e0a84eaSCindy Lu  * vhost-vdpa.c
31e0a84eaSCindy Lu  *
41e0a84eaSCindy Lu  * Copyright(c) 2017-2018 Intel Corporation.
51e0a84eaSCindy Lu  * Copyright(c) 2020 Red Hat, Inc.
61e0a84eaSCindy Lu  *
71e0a84eaSCindy Lu  * This work is licensed under the terms of the GNU GPL, version 2 or later.
81e0a84eaSCindy Lu  * See the COPYING file in the top-level directory.
91e0a84eaSCindy Lu  *
101e0a84eaSCindy Lu  */
111e0a84eaSCindy Lu 
121e0a84eaSCindy Lu #include "qemu/osdep.h"
131e0a84eaSCindy Lu #include "clients.h"
14bd907ae4SEugenio Pérez #include "hw/virtio/virtio-net.h"
151e0a84eaSCindy Lu #include "net/vhost_net.h"
161e0a84eaSCindy Lu #include "net/vhost-vdpa.h"
171e0a84eaSCindy Lu #include "hw/virtio/vhost-vdpa.h"
181e0a84eaSCindy Lu #include "qemu/config-file.h"
191e0a84eaSCindy Lu #include "qemu/error-report.h"
20bd907ae4SEugenio Pérez #include "qemu/log.h"
21bd907ae4SEugenio Pérez #include "qemu/memalign.h"
221e0a84eaSCindy Lu #include "qemu/option.h"
231e0a84eaSCindy Lu #include "qapi/error.h"
2440237840SJason Wang #include <linux/vhost.h>
251e0a84eaSCindy Lu #include <sys/ioctl.h>
261e0a84eaSCindy Lu #include <err.h>
271e0a84eaSCindy Lu #include "standard-headers/linux/virtio_net.h"
281e0a84eaSCindy Lu #include "monitor/monitor.h"
291e0a84eaSCindy Lu #include "hw/virtio/vhost.h"
301e0a84eaSCindy Lu 
311e0a84eaSCindy Lu /* Todo:need to add the multiqueue support here */
321e0a84eaSCindy Lu typedef struct VhostVDPAState {
331e0a84eaSCindy Lu     NetClientState nc;
341e0a84eaSCindy Lu     struct vhost_vdpa vhost_vdpa;
351e0a84eaSCindy Lu     VHostNetState *vhost_net;
362df4dd31SEugenio Pérez 
372df4dd31SEugenio Pérez     /* Control commands shadow buffers */
3817fb889fSEugenio Pérez     void *cvq_cmd_out_buffer;
3917fb889fSEugenio Pérez     virtio_net_ctrl_ack *status;
4017fb889fSEugenio Pérez 
417f211a28SEugenio Pérez     /* The device always have SVQ enabled */
427f211a28SEugenio Pérez     bool always_svq;
431e0a84eaSCindy Lu     bool started;
441e0a84eaSCindy Lu } VhostVDPAState;
451e0a84eaSCindy Lu 
461e0a84eaSCindy Lu const int vdpa_feature_bits[] = {
471e0a84eaSCindy Lu     VIRTIO_F_NOTIFY_ON_EMPTY,
481e0a84eaSCindy Lu     VIRTIO_RING_F_INDIRECT_DESC,
491e0a84eaSCindy Lu     VIRTIO_RING_F_EVENT_IDX,
501e0a84eaSCindy Lu     VIRTIO_F_ANY_LAYOUT,
511e0a84eaSCindy Lu     VIRTIO_F_VERSION_1,
521e0a84eaSCindy Lu     VIRTIO_NET_F_CSUM,
531e0a84eaSCindy Lu     VIRTIO_NET_F_GUEST_CSUM,
541e0a84eaSCindy Lu     VIRTIO_NET_F_GSO,
551e0a84eaSCindy Lu     VIRTIO_NET_F_GUEST_TSO4,
561e0a84eaSCindy Lu     VIRTIO_NET_F_GUEST_TSO6,
571e0a84eaSCindy Lu     VIRTIO_NET_F_GUEST_ECN,
581e0a84eaSCindy Lu     VIRTIO_NET_F_GUEST_UFO,
591e0a84eaSCindy Lu     VIRTIO_NET_F_HOST_TSO4,
601e0a84eaSCindy Lu     VIRTIO_NET_F_HOST_TSO6,
611e0a84eaSCindy Lu     VIRTIO_NET_F_HOST_ECN,
621e0a84eaSCindy Lu     VIRTIO_NET_F_HOST_UFO,
631e0a84eaSCindy Lu     VIRTIO_NET_F_MRG_RXBUF,
641e0a84eaSCindy Lu     VIRTIO_NET_F_MTU,
6540237840SJason Wang     VIRTIO_NET_F_CTRL_RX,
6640237840SJason Wang     VIRTIO_NET_F_CTRL_RX_EXTRA,
6740237840SJason Wang     VIRTIO_NET_F_CTRL_VLAN,
6840237840SJason Wang     VIRTIO_NET_F_CTRL_MAC_ADDR,
6940237840SJason Wang     VIRTIO_NET_F_RSS,
7040237840SJason Wang     VIRTIO_NET_F_MQ,
7140237840SJason Wang     VIRTIO_NET_F_CTRL_VQ,
721e0a84eaSCindy Lu     VIRTIO_F_IOMMU_PLATFORM,
731e0a84eaSCindy Lu     VIRTIO_F_RING_PACKED,
74562a7d23SStefano Garzarella     VIRTIO_F_RING_RESET,
750145c393SAndrew Melnychenko     VIRTIO_NET_F_RSS,
760145c393SAndrew Melnychenko     VIRTIO_NET_F_HASH_REPORT,
771e0a84eaSCindy Lu     VIRTIO_NET_F_GUEST_ANNOUNCE,
789aa47eddSSi-Wei Liu     VIRTIO_NET_F_STATUS,
791e0a84eaSCindy Lu     VHOST_INVALID_FEATURE_BIT
801e0a84eaSCindy Lu };
811e0a84eaSCindy Lu 
821576dbb5SEugenio Pérez /** Supported device specific feature bits with SVQ */
831576dbb5SEugenio Pérez static const uint64_t vdpa_svq_device_features =
841576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_CSUM) |
851576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |
861576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_MTU) |
871576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_MAC) |
881576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) |
891576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |
901576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_GUEST_ECN) |
911576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_GUEST_UFO) |
921576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_HOST_TSO4) |
931576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_HOST_TSO6) |
941576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_HOST_ECN) |
951576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
961576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
971576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_STATUS) |
981576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
9972b99a87SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_MQ) |
1001576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_F_ANY_LAYOUT) |
1011576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |
1021576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
1031576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_STANDBY);
1041576dbb5SEugenio Pérez 
105c1a10086SEugenio Pérez #define VHOST_VDPA_NET_CVQ_ASID 1
106c1a10086SEugenio Pérez 
1071e0a84eaSCindy Lu VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
1081e0a84eaSCindy Lu {
1091e0a84eaSCindy Lu     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
1101e0a84eaSCindy Lu     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
1111e0a84eaSCindy Lu     return s->vhost_net;
1121e0a84eaSCindy Lu }
1131e0a84eaSCindy Lu 
11436e46472SEugenio Pérez static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp)
11536e46472SEugenio Pérez {
11636e46472SEugenio Pérez     uint64_t invalid_dev_features =
11736e46472SEugenio Pérez         features & ~vdpa_svq_device_features &
11836e46472SEugenio Pérez         /* Transport are all accepted at this point */
11936e46472SEugenio Pérez         ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START,
12036e46472SEugenio Pérez                          VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START);
12136e46472SEugenio Pérez 
12236e46472SEugenio Pérez     if (invalid_dev_features) {
12336e46472SEugenio Pérez         error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64,
12436e46472SEugenio Pérez                    invalid_dev_features);
125258a0394SEugenio Pérez         return false;
12636e46472SEugenio Pérez     }
12736e46472SEugenio Pérez 
128258a0394SEugenio Pérez     return vhost_svq_valid_features(features, errp);
12936e46472SEugenio Pérez }
13036e46472SEugenio Pérez 
1311e0a84eaSCindy Lu static int vhost_vdpa_net_check_device_id(struct vhost_net *net)
1321e0a84eaSCindy Lu {
1331e0a84eaSCindy Lu     uint32_t device_id;
1341e0a84eaSCindy Lu     int ret;
1351e0a84eaSCindy Lu     struct vhost_dev *hdev;
1361e0a84eaSCindy Lu 
1371e0a84eaSCindy Lu     hdev = (struct vhost_dev *)&net->dev;
1381e0a84eaSCindy Lu     ret = hdev->vhost_ops->vhost_get_device_id(hdev, &device_id);
1391e0a84eaSCindy Lu     if (device_id != VIRTIO_ID_NET) {
1401e0a84eaSCindy Lu         return -ENOTSUP;
1411e0a84eaSCindy Lu     }
1421e0a84eaSCindy Lu     return ret;
1431e0a84eaSCindy Lu }
1441e0a84eaSCindy Lu 
14540237840SJason Wang static int vhost_vdpa_add(NetClientState *ncs, void *be,
14640237840SJason Wang                           int queue_pair_index, int nvqs)
1471e0a84eaSCindy Lu {
1481e0a84eaSCindy Lu     VhostNetOptions options;
1491e0a84eaSCindy Lu     struct vhost_net *net = NULL;
1501e0a84eaSCindy Lu     VhostVDPAState *s;
1511e0a84eaSCindy Lu     int ret;
1521e0a84eaSCindy Lu 
1531e0a84eaSCindy Lu     options.backend_type = VHOST_BACKEND_TYPE_VDPA;
1541e0a84eaSCindy Lu     assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
1551e0a84eaSCindy Lu     s = DO_UPCAST(VhostVDPAState, nc, ncs);
1561e0a84eaSCindy Lu     options.net_backend = ncs;
1571e0a84eaSCindy Lu     options.opaque      = be;
1581e0a84eaSCindy Lu     options.busyloop_timeout = 0;
15940237840SJason Wang     options.nvqs = nvqs;
1601e0a84eaSCindy Lu 
1611e0a84eaSCindy Lu     net = vhost_net_init(&options);
1621e0a84eaSCindy Lu     if (!net) {
1631e0a84eaSCindy Lu         error_report("failed to init vhost_net for queue");
164a97ef87aSJason Wang         goto err_init;
1651e0a84eaSCindy Lu     }
1661e0a84eaSCindy Lu     s->vhost_net = net;
1671e0a84eaSCindy Lu     ret = vhost_vdpa_net_check_device_id(net);
1681e0a84eaSCindy Lu     if (ret) {
169a97ef87aSJason Wang         goto err_check;
1701e0a84eaSCindy Lu     }
1711e0a84eaSCindy Lu     return 0;
172a97ef87aSJason Wang err_check:
1731e0a84eaSCindy Lu     vhost_net_cleanup(net);
174ab36edcfSJason Wang     g_free(net);
175a97ef87aSJason Wang err_init:
1761e0a84eaSCindy Lu     return -1;
1771e0a84eaSCindy Lu }
1781e0a84eaSCindy Lu 
1791e0a84eaSCindy Lu static void vhost_vdpa_cleanup(NetClientState *nc)
1801e0a84eaSCindy Lu {
1811e0a84eaSCindy Lu     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
1821576dbb5SEugenio Pérez     struct vhost_dev *dev = &s->vhost_net->dev;
1831e0a84eaSCindy Lu 
1842df4dd31SEugenio Pérez     qemu_vfree(s->cvq_cmd_out_buffer);
18517fb889fSEugenio Pérez     qemu_vfree(s->status);
1861576dbb5SEugenio Pérez     if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
1871576dbb5SEugenio Pérez         g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
1881576dbb5SEugenio Pérez     }
1891e0a84eaSCindy Lu     if (s->vhost_net) {
1901e0a84eaSCindy Lu         vhost_net_cleanup(s->vhost_net);
1911e0a84eaSCindy Lu         g_free(s->vhost_net);
1921e0a84eaSCindy Lu         s->vhost_net = NULL;
1931e0a84eaSCindy Lu     }
19457b3a7d8SCindy Lu      if (s->vhost_vdpa.device_fd >= 0) {
19557b3a7d8SCindy Lu         qemu_close(s->vhost_vdpa.device_fd);
19657b3a7d8SCindy Lu         s->vhost_vdpa.device_fd = -1;
19757b3a7d8SCindy Lu     }
1981e0a84eaSCindy Lu }
1991e0a84eaSCindy Lu 
2001e0a84eaSCindy Lu static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc)
2011e0a84eaSCindy Lu {
2021e0a84eaSCindy Lu     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
2031e0a84eaSCindy Lu 
2041e0a84eaSCindy Lu     return true;
2051e0a84eaSCindy Lu }
2061e0a84eaSCindy Lu 
2071e0a84eaSCindy Lu static bool vhost_vdpa_has_ufo(NetClientState *nc)
2081e0a84eaSCindy Lu {
2091e0a84eaSCindy Lu     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
2101e0a84eaSCindy Lu     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
2111e0a84eaSCindy Lu     uint64_t features = 0;
2121e0a84eaSCindy Lu     features |= (1ULL << VIRTIO_NET_F_HOST_UFO);
2131e0a84eaSCindy Lu     features = vhost_net_get_features(s->vhost_net, features);
2141e0a84eaSCindy Lu     return !!(features & (1ULL << VIRTIO_NET_F_HOST_UFO));
2151e0a84eaSCindy Lu 
2161e0a84eaSCindy Lu }
2171e0a84eaSCindy Lu 
218ee8a1c63SKevin Wolf static bool vhost_vdpa_check_peer_type(NetClientState *nc, ObjectClass *oc,
219ee8a1c63SKevin Wolf                                        Error **errp)
220ee8a1c63SKevin Wolf {
221ee8a1c63SKevin Wolf     const char *driver = object_class_get_name(oc);
222ee8a1c63SKevin Wolf 
223ee8a1c63SKevin Wolf     if (!g_str_has_prefix(driver, "virtio-net-")) {
224ee8a1c63SKevin Wolf         error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*");
225ee8a1c63SKevin Wolf         return false;
226ee8a1c63SKevin Wolf     }
227ee8a1c63SKevin Wolf 
228ee8a1c63SKevin Wolf     return true;
229ee8a1c63SKevin Wolf }
230ee8a1c63SKevin Wolf 
231846a1e85SEugenio Pérez /** Dummy receive in case qemu falls back to userland tap networking */
232846a1e85SEugenio Pérez static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf,
233846a1e85SEugenio Pérez                                   size_t size)
234846a1e85SEugenio Pérez {
235bc5add1dSSi-Wei Liu     return size;
236846a1e85SEugenio Pérez }
237846a1e85SEugenio Pérez 
2381e0a84eaSCindy Lu static NetClientInfo net_vhost_vdpa_info = {
2391e0a84eaSCindy Lu         .type = NET_CLIENT_DRIVER_VHOST_VDPA,
2401e0a84eaSCindy Lu         .size = sizeof(VhostVDPAState),
241846a1e85SEugenio Pérez         .receive = vhost_vdpa_receive,
2421e0a84eaSCindy Lu         .cleanup = vhost_vdpa_cleanup,
2431e0a84eaSCindy Lu         .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
2441e0a84eaSCindy Lu         .has_ufo = vhost_vdpa_has_ufo,
245ee8a1c63SKevin Wolf         .check_peer_type = vhost_vdpa_check_peer_type,
2461e0a84eaSCindy Lu };
2471e0a84eaSCindy Lu 
248c1a10086SEugenio Pérez static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index)
249c1a10086SEugenio Pérez {
250c1a10086SEugenio Pérez     struct vhost_vring_state state = {
251c1a10086SEugenio Pérez         .index = vq_index,
252c1a10086SEugenio Pérez     };
253c1a10086SEugenio Pérez     int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, &state);
254c1a10086SEugenio Pérez 
255c1a10086SEugenio Pérez     if (unlikely(r < 0)) {
256c1a10086SEugenio Pérez         error_report("Cannot get VQ %u group: %s", vq_index,
257c1a10086SEugenio Pérez                      g_strerror(errno));
258c1a10086SEugenio Pérez         return r;
259c1a10086SEugenio Pérez     }
260c1a10086SEugenio Pérez 
261c1a10086SEugenio Pérez     return state.num;
262c1a10086SEugenio Pérez }
263c1a10086SEugenio Pérez 
264c1a10086SEugenio Pérez static int vhost_vdpa_set_address_space_id(struct vhost_vdpa *v,
265c1a10086SEugenio Pérez                                            unsigned vq_group,
266c1a10086SEugenio Pérez                                            unsigned asid_num)
267c1a10086SEugenio Pérez {
268c1a10086SEugenio Pérez     struct vhost_vring_state asid = {
269c1a10086SEugenio Pérez         .index = vq_group,
270c1a10086SEugenio Pérez         .num = asid_num,
271c1a10086SEugenio Pérez     };
272c1a10086SEugenio Pérez     int r;
273c1a10086SEugenio Pérez 
274c1a10086SEugenio Pérez     r = ioctl(v->device_fd, VHOST_VDPA_SET_GROUP_ASID, &asid);
275c1a10086SEugenio Pérez     if (unlikely(r < 0)) {
276c1a10086SEugenio Pérez         error_report("Can't set vq group %u asid %u, errno=%d (%s)",
277c1a10086SEugenio Pérez                      asid.index, asid.num, errno, g_strerror(errno));
278c1a10086SEugenio Pérez     }
279c1a10086SEugenio Pérez     return r;
280c1a10086SEugenio Pérez }
281c1a10086SEugenio Pérez 
2822df4dd31SEugenio Pérez static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
2832df4dd31SEugenio Pérez {
2842df4dd31SEugenio Pérez     VhostIOVATree *tree = v->iova_tree;
2852df4dd31SEugenio Pérez     DMAMap needle = {
2862df4dd31SEugenio Pérez         /*
2872df4dd31SEugenio Pérez          * No need to specify size or to look for more translations since
2882df4dd31SEugenio Pérez          * this contiguous chunk was allocated by us.
2892df4dd31SEugenio Pérez          */
2902df4dd31SEugenio Pérez         .translated_addr = (hwaddr)(uintptr_t)addr,
2912df4dd31SEugenio Pérez     };
2922df4dd31SEugenio Pérez     const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
2932df4dd31SEugenio Pérez     int r;
2942df4dd31SEugenio Pérez 
2952df4dd31SEugenio Pérez     if (unlikely(!map)) {
2962df4dd31SEugenio Pérez         error_report("Cannot locate expected map");
2972df4dd31SEugenio Pérez         return;
2982df4dd31SEugenio Pérez     }
2992df4dd31SEugenio Pérez 
300cd831ed5SEugenio Pérez     r = vhost_vdpa_dma_unmap(v, v->address_space_id, map->iova, map->size + 1);
3012df4dd31SEugenio Pérez     if (unlikely(r != 0)) {
3022df4dd31SEugenio Pérez         error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
3032df4dd31SEugenio Pérez     }
3042df4dd31SEugenio Pérez 
30569292a8eSEugenio Pérez     vhost_iova_tree_remove(tree, *map);
3062df4dd31SEugenio Pérez }
3072df4dd31SEugenio Pérez 
3082df4dd31SEugenio Pérez static size_t vhost_vdpa_net_cvq_cmd_len(void)
3092df4dd31SEugenio Pérez {
3102df4dd31SEugenio Pérez     /*
3112df4dd31SEugenio Pérez      * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
3122df4dd31SEugenio Pérez      * In buffer is always 1 byte, so it should fit here
3132df4dd31SEugenio Pérez      */
3142df4dd31SEugenio Pérez     return sizeof(struct virtio_net_ctrl_hdr) +
3152df4dd31SEugenio Pérez            2 * sizeof(struct virtio_net_ctrl_mac) +
3162df4dd31SEugenio Pérez            MAC_TABLE_ENTRIES * ETH_ALEN;
3172df4dd31SEugenio Pérez }
3182df4dd31SEugenio Pérez 
3192df4dd31SEugenio Pérez static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
3202df4dd31SEugenio Pérez {
3212df4dd31SEugenio Pérez     return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
3222df4dd31SEugenio Pérez }
3232df4dd31SEugenio Pérez 
3247a7f87e9SEugenio Pérez /** Map CVQ buffer. */
3257a7f87e9SEugenio Pérez static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size,
3267a7f87e9SEugenio Pérez                                   bool write)
3272df4dd31SEugenio Pérez {
3282df4dd31SEugenio Pérez     DMAMap map = {};
3292df4dd31SEugenio Pérez     int r;
3302df4dd31SEugenio Pérez 
3312df4dd31SEugenio Pérez     map.translated_addr = (hwaddr)(uintptr_t)buf;
3327a7f87e9SEugenio Pérez     map.size = size - 1;
3332df4dd31SEugenio Pérez     map.perm = write ? IOMMU_RW : IOMMU_RO,
3342df4dd31SEugenio Pérez     r = vhost_iova_tree_map_alloc(v->iova_tree, &map);
3352df4dd31SEugenio Pérez     if (unlikely(r != IOVA_OK)) {
3362df4dd31SEugenio Pérez         error_report("Cannot map injected element");
3377a7f87e9SEugenio Pérez         return r;
3382df4dd31SEugenio Pérez     }
3392df4dd31SEugenio Pérez 
340cd831ed5SEugenio Pérez     r = vhost_vdpa_dma_map(v, v->address_space_id, map.iova,
341cd831ed5SEugenio Pérez                            vhost_vdpa_net_cvq_cmd_page_len(), buf, !write);
3422df4dd31SEugenio Pérez     if (unlikely(r < 0)) {
3432df4dd31SEugenio Pérez         goto dma_map_err;
3442df4dd31SEugenio Pérez     }
3452df4dd31SEugenio Pérez 
3467a7f87e9SEugenio Pérez     return 0;
3472df4dd31SEugenio Pérez 
3482df4dd31SEugenio Pérez dma_map_err:
34969292a8eSEugenio Pérez     vhost_iova_tree_remove(v->iova_tree, map);
3507a7f87e9SEugenio Pérez     return r;
3512df4dd31SEugenio Pérez }
3522df4dd31SEugenio Pérez 
3537a7f87e9SEugenio Pérez static int vhost_vdpa_net_cvq_start(NetClientState *nc)
3542df4dd31SEugenio Pérez {
3557a7f87e9SEugenio Pérez     VhostVDPAState *s;
356c1a10086SEugenio Pérez     struct vhost_vdpa *v;
357c1a10086SEugenio Pérez     uint64_t backend_features;
358c1a10086SEugenio Pérez     int64_t cvq_group;
359c1a10086SEugenio Pérez     int cvq_index, r;
3602df4dd31SEugenio Pérez 
3617a7f87e9SEugenio Pérez     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
3627a7f87e9SEugenio Pérez 
3637a7f87e9SEugenio Pérez     s = DO_UPCAST(VhostVDPAState, nc, nc);
364c1a10086SEugenio Pérez     v = &s->vhost_vdpa;
365c1a10086SEugenio Pérez 
366c1a10086SEugenio Pérez     v->shadow_data = s->always_svq;
367c1a10086SEugenio Pérez     v->shadow_vqs_enabled = s->always_svq;
368c1a10086SEugenio Pérez     s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID;
369c1a10086SEugenio Pérez 
370c1a10086SEugenio Pérez     if (s->always_svq) {
371c1a10086SEugenio Pérez         /* SVQ is already configured for all virtqueues */
372c1a10086SEugenio Pérez         goto out;
373c1a10086SEugenio Pérez     }
374c1a10086SEugenio Pérez 
375c1a10086SEugenio Pérez     /*
376c1a10086SEugenio Pérez      * If we early return in these cases SVQ will not be enabled. The migration
377c1a10086SEugenio Pérez      * will be blocked as long as vhost-vdpa backends will not offer _F_LOG.
378c1a10086SEugenio Pérez      *
379c1a10086SEugenio Pérez      * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev
380c1a10086SEugenio Pérez      * yet.
381c1a10086SEugenio Pérez      */
382c1a10086SEugenio Pérez     r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features);
383c1a10086SEugenio Pérez     if (unlikely(r < 0)) {
384c1a10086SEugenio Pérez         error_report("Cannot get vdpa backend_features: %s(%d)",
385c1a10086SEugenio Pérez             g_strerror(errno), errno);
386c1a10086SEugenio Pérez         return -1;
387c1a10086SEugenio Pérez     }
388c1a10086SEugenio Pérez     if (!(backend_features & VHOST_BACKEND_F_IOTLB_ASID) ||
389c1a10086SEugenio Pérez         !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
390c1a10086SEugenio Pérez         return 0;
391c1a10086SEugenio Pérez     }
392c1a10086SEugenio Pérez 
393c1a10086SEugenio Pérez     /*
394c1a10086SEugenio Pérez      * Check if all the virtqueues of the virtio device are in a different vq
395c1a10086SEugenio Pérez      * than the last vq. VQ group of last group passed in cvq_group.
396c1a10086SEugenio Pérez      */
397c1a10086SEugenio Pérez     cvq_index = v->dev->vq_index_end - 1;
398c1a10086SEugenio Pérez     cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index);
399c1a10086SEugenio Pérez     if (unlikely(cvq_group < 0)) {
400c1a10086SEugenio Pérez         return cvq_group;
401c1a10086SEugenio Pérez     }
402c1a10086SEugenio Pérez     for (int i = 0; i < cvq_index; ++i) {
403c1a10086SEugenio Pérez         int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i);
404c1a10086SEugenio Pérez 
405c1a10086SEugenio Pérez         if (unlikely(group < 0)) {
406c1a10086SEugenio Pérez             return group;
407c1a10086SEugenio Pérez         }
408c1a10086SEugenio Pérez 
409c1a10086SEugenio Pérez         if (group == cvq_group) {
410c1a10086SEugenio Pérez             return 0;
411c1a10086SEugenio Pérez         }
412c1a10086SEugenio Pérez     }
413c1a10086SEugenio Pérez 
414c1a10086SEugenio Pérez     r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID);
415c1a10086SEugenio Pérez     if (unlikely(r < 0)) {
416c1a10086SEugenio Pérez         return r;
417c1a10086SEugenio Pérez     }
418c1a10086SEugenio Pérez 
419c1a10086SEugenio Pérez     v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
420c1a10086SEugenio Pérez                                        v->iova_range.last);
421c1a10086SEugenio Pérez     v->shadow_vqs_enabled = true;
422c1a10086SEugenio Pérez     s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID;
423c1a10086SEugenio Pérez 
424c1a10086SEugenio Pérez out:
4257a7f87e9SEugenio Pérez     if (!s->vhost_vdpa.shadow_vqs_enabled) {
4267a7f87e9SEugenio Pérez         return 0;
4272df4dd31SEugenio Pérez     }
4282df4dd31SEugenio Pérez 
4297a7f87e9SEugenio Pérez     r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer,
4307a7f87e9SEugenio Pérez                                vhost_vdpa_net_cvq_cmd_page_len(), false);
4317a7f87e9SEugenio Pérez     if (unlikely(r < 0)) {
4327a7f87e9SEugenio Pérez         return r;
4337a7f87e9SEugenio Pérez     }
4347a7f87e9SEugenio Pérez 
43517fb889fSEugenio Pérez     r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->status,
4367a7f87e9SEugenio Pérez                                vhost_vdpa_net_cvq_cmd_page_len(), true);
4377a7f87e9SEugenio Pérez     if (unlikely(r < 0)) {
4382df4dd31SEugenio Pérez         vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
4392df4dd31SEugenio Pérez     }
4402df4dd31SEugenio Pérez 
4417a7f87e9SEugenio Pérez     return r;
4427a7f87e9SEugenio Pérez }
4437a7f87e9SEugenio Pérez 
4447a7f87e9SEugenio Pérez static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
4457a7f87e9SEugenio Pérez {
4467a7f87e9SEugenio Pérez     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
4477a7f87e9SEugenio Pérez 
4487a7f87e9SEugenio Pérez     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
4497a7f87e9SEugenio Pérez 
4507a7f87e9SEugenio Pérez     if (s->vhost_vdpa.shadow_vqs_enabled) {
4517a7f87e9SEugenio Pérez         vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
45217fb889fSEugenio Pérez         vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status);
453c1a10086SEugenio Pérez         if (!s->always_svq) {
454c1a10086SEugenio Pérez             /*
455c1a10086SEugenio Pérez              * If only the CVQ is shadowed we can delete this safely.
456c1a10086SEugenio Pérez              * If all the VQs are shadows this will be needed by the time the
457c1a10086SEugenio Pérez              * device is started again to register SVQ vrings and similar.
458c1a10086SEugenio Pérez              */
459c1a10086SEugenio Pérez             g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
460c1a10086SEugenio Pérez         }
4617a7f87e9SEugenio Pérez     }
4622df4dd31SEugenio Pérez }
4632df4dd31SEugenio Pérez 
464be4278b6SEugenio Pérez static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len,
465be4278b6SEugenio Pérez                                       size_t in_len)
466be4278b6SEugenio Pérez {
467be4278b6SEugenio Pérez     /* Buffers for the device */
468be4278b6SEugenio Pérez     const struct iovec out = {
469be4278b6SEugenio Pérez         .iov_base = s->cvq_cmd_out_buffer,
470be4278b6SEugenio Pérez         .iov_len = out_len,
471be4278b6SEugenio Pérez     };
472be4278b6SEugenio Pérez     const struct iovec in = {
47317fb889fSEugenio Pérez         .iov_base = s->status,
474be4278b6SEugenio Pérez         .iov_len = sizeof(virtio_net_ctrl_ack),
475be4278b6SEugenio Pérez     };
476be4278b6SEugenio Pérez     VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
477be4278b6SEugenio Pérez     int r;
478be4278b6SEugenio Pérez 
479be4278b6SEugenio Pérez     r = vhost_svq_add(svq, &out, 1, &in, 1, NULL);
480be4278b6SEugenio Pérez     if (unlikely(r != 0)) {
481be4278b6SEugenio Pérez         if (unlikely(r == -ENOSPC)) {
482be4278b6SEugenio Pérez             qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
483be4278b6SEugenio Pérez                           __func__);
484be4278b6SEugenio Pérez         }
485be4278b6SEugenio Pérez         return r;
486be4278b6SEugenio Pérez     }
487be4278b6SEugenio Pérez 
488be4278b6SEugenio Pérez     /*
489be4278b6SEugenio Pérez      * We can poll here since we've had BQL from the time we sent the
490be4278b6SEugenio Pérez      * descriptor. Also, we need to take the answer before SVQ pulls by itself,
491be4278b6SEugenio Pérez      * when BQL is released
492be4278b6SEugenio Pérez      */
493be4278b6SEugenio Pérez     return vhost_svq_poll(svq);
494be4278b6SEugenio Pérez }
495be4278b6SEugenio Pérez 
496f73c0c43SEugenio Pérez static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class,
497f73c0c43SEugenio Pérez                                        uint8_t cmd, const void *data,
498f73c0c43SEugenio Pérez                                        size_t data_size)
499f73c0c43SEugenio Pérez {
500f73c0c43SEugenio Pérez     const struct virtio_net_ctrl_hdr ctrl = {
501f73c0c43SEugenio Pérez         .class = class,
502f73c0c43SEugenio Pérez         .cmd = cmd,
503f73c0c43SEugenio Pérez     };
504f73c0c43SEugenio Pérez 
505f73c0c43SEugenio Pérez     assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl));
506f73c0c43SEugenio Pérez 
507f73c0c43SEugenio Pérez     memcpy(s->cvq_cmd_out_buffer, &ctrl, sizeof(ctrl));
508f73c0c43SEugenio Pérez     memcpy(s->cvq_cmd_out_buffer + sizeof(ctrl), data, data_size);
509f73c0c43SEugenio Pérez 
510f73c0c43SEugenio Pérez     return vhost_vdpa_net_cvq_add(s, sizeof(ctrl) + data_size,
511f73c0c43SEugenio Pérez                                   sizeof(virtio_net_ctrl_ack));
512f73c0c43SEugenio Pérez }
513f73c0c43SEugenio Pérez 
514f73c0c43SEugenio Pérez static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n)
515f73c0c43SEugenio Pérez {
516f73c0c43SEugenio Pérez     uint64_t features = n->parent_obj.guest_features;
517f73c0c43SEugenio Pérez     if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) {
518f73c0c43SEugenio Pérez         ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MAC,
519f73c0c43SEugenio Pérez                                                   VIRTIO_NET_CTRL_MAC_ADDR_SET,
520f73c0c43SEugenio Pérez                                                   n->mac, sizeof(n->mac));
521f73c0c43SEugenio Pérez         if (unlikely(dev_written < 0)) {
522f73c0c43SEugenio Pérez             return dev_written;
523f73c0c43SEugenio Pérez         }
524f73c0c43SEugenio Pérez 
525f73c0c43SEugenio Pérez         return *s->status != VIRTIO_NET_OK;
526f73c0c43SEugenio Pérez     }
527f73c0c43SEugenio Pérez 
528f73c0c43SEugenio Pérez     return 0;
529f73c0c43SEugenio Pérez }
530f73c0c43SEugenio Pérez 
531f64c7cdaSEugenio Pérez static int vhost_vdpa_net_load_mq(VhostVDPAState *s,
532f64c7cdaSEugenio Pérez                                   const VirtIONet *n)
533f64c7cdaSEugenio Pérez {
534f64c7cdaSEugenio Pérez     struct virtio_net_ctrl_mq mq;
535f64c7cdaSEugenio Pérez     uint64_t features = n->parent_obj.guest_features;
536f64c7cdaSEugenio Pérez     ssize_t dev_written;
537f64c7cdaSEugenio Pérez 
538f64c7cdaSEugenio Pérez     if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) {
539f64c7cdaSEugenio Pérez         return 0;
540f64c7cdaSEugenio Pérez     }
541f64c7cdaSEugenio Pérez 
542f64c7cdaSEugenio Pérez     mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs);
543f64c7cdaSEugenio Pérez     dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MQ,
544f64c7cdaSEugenio Pérez                                           VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &mq,
545f64c7cdaSEugenio Pérez                                           sizeof(mq));
546f64c7cdaSEugenio Pérez     if (unlikely(dev_written < 0)) {
547f64c7cdaSEugenio Pérez         return dev_written;
548f64c7cdaSEugenio Pérez     }
549f64c7cdaSEugenio Pérez 
550f64c7cdaSEugenio Pérez     return *s->status != VIRTIO_NET_OK;
551f64c7cdaSEugenio Pérez }
552f64c7cdaSEugenio Pérez 
553dd036d8dSEugenio Pérez static int vhost_vdpa_net_load(NetClientState *nc)
554dd036d8dSEugenio Pérez {
555dd036d8dSEugenio Pérez     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
556f73c0c43SEugenio Pérez     struct vhost_vdpa *v = &s->vhost_vdpa;
557dd036d8dSEugenio Pérez     const VirtIONet *n;
558f73c0c43SEugenio Pérez     int r;
559dd036d8dSEugenio Pérez 
560dd036d8dSEugenio Pérez     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
561dd036d8dSEugenio Pérez 
562dd036d8dSEugenio Pérez     if (!v->shadow_vqs_enabled) {
563dd036d8dSEugenio Pérez         return 0;
564dd036d8dSEugenio Pérez     }
565dd036d8dSEugenio Pérez 
566dd036d8dSEugenio Pérez     n = VIRTIO_NET(v->dev->vdev);
567f73c0c43SEugenio Pérez     r = vhost_vdpa_net_load_mac(s, n);
568f73c0c43SEugenio Pérez     if (unlikely(r < 0)) {
569f73c0c43SEugenio Pérez         return r;
570dd036d8dSEugenio Pérez     }
571f64c7cdaSEugenio Pérez     r = vhost_vdpa_net_load_mq(s, n);
572f64c7cdaSEugenio Pérez     if (unlikely(r)) {
573f64c7cdaSEugenio Pérez         return r;
574f64c7cdaSEugenio Pérez     }
575dd036d8dSEugenio Pérez 
576dd036d8dSEugenio Pérez     return 0;
577dd036d8dSEugenio Pérez }
578dd036d8dSEugenio Pérez 
579f8972b56SEugenio Pérez static NetClientInfo net_vhost_vdpa_cvq_info = {
580f8972b56SEugenio Pérez     .type = NET_CLIENT_DRIVER_VHOST_VDPA,
581f8972b56SEugenio Pérez     .size = sizeof(VhostVDPAState),
582f8972b56SEugenio Pérez     .receive = vhost_vdpa_receive,
5837a7f87e9SEugenio Pérez     .start = vhost_vdpa_net_cvq_start,
584dd036d8dSEugenio Pérez     .load = vhost_vdpa_net_load,
5857a7f87e9SEugenio Pérez     .stop = vhost_vdpa_net_cvq_stop,
586f8972b56SEugenio Pérez     .cleanup = vhost_vdpa_cleanup,
587f8972b56SEugenio Pérez     .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
588f8972b56SEugenio Pérez     .has_ufo = vhost_vdpa_has_ufo,
589f8972b56SEugenio Pérez     .check_peer_type = vhost_vdpa_check_peer_type,
590f8972b56SEugenio Pérez };
591f8972b56SEugenio Pérez 
5922df4dd31SEugenio Pérez /**
5932df4dd31SEugenio Pérez  * Validate and copy control virtqueue commands.
5942df4dd31SEugenio Pérez  *
5952df4dd31SEugenio Pérez  * Following QEMU guidelines, we offer a copy of the buffers to the device to
5962df4dd31SEugenio Pérez  * prevent TOCTOU bugs.
597bd907ae4SEugenio Pérez  */
598bd907ae4SEugenio Pérez static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
599bd907ae4SEugenio Pérez                                             VirtQueueElement *elem,
600bd907ae4SEugenio Pérez                                             void *opaque)
601bd907ae4SEugenio Pérez {
6022df4dd31SEugenio Pérez     VhostVDPAState *s = opaque;
603be4278b6SEugenio Pérez     size_t in_len;
604bd907ae4SEugenio Pérez     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
6057a7f87e9SEugenio Pérez     /* Out buffer sent to both the vdpa device and the device model */
6067a7f87e9SEugenio Pérez     struct iovec out = {
6077a7f87e9SEugenio Pérez         .iov_base = s->cvq_cmd_out_buffer,
6087a7f87e9SEugenio Pérez     };
6092df4dd31SEugenio Pérez     /* in buffer used for device model */
6102df4dd31SEugenio Pérez     const struct iovec in = {
6112df4dd31SEugenio Pérez         .iov_base = &status,
6122df4dd31SEugenio Pérez         .iov_len = sizeof(status),
6132df4dd31SEugenio Pérez     };
614be4278b6SEugenio Pérez     ssize_t dev_written = -EINVAL;
615bd907ae4SEugenio Pérez 
6167a7f87e9SEugenio Pérez     out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0,
6177a7f87e9SEugenio Pérez                              s->cvq_cmd_out_buffer,
6187a7f87e9SEugenio Pérez                              vhost_vdpa_net_cvq_cmd_len());
619*3f9a3eebSEugenio Pérez     if (*(uint8_t *)s->cvq_cmd_out_buffer == VIRTIO_NET_CTRL_ANNOUNCE) {
620*3f9a3eebSEugenio Pérez         /*
621*3f9a3eebSEugenio Pérez          * Guest announce capability is emulated by qemu, so don't forward to
622*3f9a3eebSEugenio Pérez          * the device.
623*3f9a3eebSEugenio Pérez          */
624*3f9a3eebSEugenio Pérez         dev_written = sizeof(status);
625*3f9a3eebSEugenio Pérez         *s->status = VIRTIO_NET_OK;
626*3f9a3eebSEugenio Pérez     } else {
627be4278b6SEugenio Pérez         dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status));
628be4278b6SEugenio Pérez         if (unlikely(dev_written < 0)) {
629bd907ae4SEugenio Pérez             goto out;
630bd907ae4SEugenio Pérez         }
631*3f9a3eebSEugenio Pérez     }
632bd907ae4SEugenio Pérez 
633bd907ae4SEugenio Pérez     if (unlikely(dev_written < sizeof(status))) {
634bd907ae4SEugenio Pérez         error_report("Insufficient written data (%zu)", dev_written);
6352df4dd31SEugenio Pérez         goto out;
6362df4dd31SEugenio Pérez     }
6372df4dd31SEugenio Pérez 
63817fb889fSEugenio Pérez     if (*s->status != VIRTIO_NET_OK) {
639be4278b6SEugenio Pérez         return VIRTIO_NET_ERR;
6402df4dd31SEugenio Pérez     }
6412df4dd31SEugenio Pérez 
6422df4dd31SEugenio Pérez     status = VIRTIO_NET_ERR;
6437a7f87e9SEugenio Pérez     virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, &out, 1);
6442df4dd31SEugenio Pérez     if (status != VIRTIO_NET_OK) {
6452df4dd31SEugenio Pérez         error_report("Bad CVQ processing in model");
646bd907ae4SEugenio Pérez     }
647bd907ae4SEugenio Pérez 
648bd907ae4SEugenio Pérez out:
649bd907ae4SEugenio Pérez     in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status,
650bd907ae4SEugenio Pérez                           sizeof(status));
651bd907ae4SEugenio Pérez     if (unlikely(in_len < sizeof(status))) {
652bd907ae4SEugenio Pérez         error_report("Bad device CVQ written length");
653bd907ae4SEugenio Pérez     }
654bd907ae4SEugenio Pérez     vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
655bd907ae4SEugenio Pérez     g_free(elem);
656be4278b6SEugenio Pérez     return dev_written < 0 ? dev_written : 0;
657bd907ae4SEugenio Pérez }
658bd907ae4SEugenio Pérez 
659bd907ae4SEugenio Pérez static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
660bd907ae4SEugenio Pérez     .avail_handler = vhost_vdpa_net_handle_ctrl_avail,
661bd907ae4SEugenio Pérez };
662bd907ae4SEugenio Pérez 
663654790b6SJason Wang static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
664654790b6SJason Wang                                        const char *device,
665654790b6SJason Wang                                        const char *name,
66640237840SJason Wang                                        int vdpa_device_fd,
66740237840SJason Wang                                        int queue_pair_index,
66840237840SJason Wang                                        int nvqs,
6691576dbb5SEugenio Pérez                                        bool is_datapath,
6701576dbb5SEugenio Pérez                                        bool svq,
671a585fad2SEugenio Pérez                                        struct vhost_vdpa_iova_range iova_range,
6721576dbb5SEugenio Pérez                                        VhostIOVATree *iova_tree)
6731e0a84eaSCindy Lu {
6741e0a84eaSCindy Lu     NetClientState *nc = NULL;
6751e0a84eaSCindy Lu     VhostVDPAState *s;
6761e0a84eaSCindy Lu     int ret = 0;
6771e0a84eaSCindy Lu     assert(name);
67840237840SJason Wang     if (is_datapath) {
67940237840SJason Wang         nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device,
68040237840SJason Wang                                  name);
68140237840SJason Wang     } else {
682f8972b56SEugenio Pérez         nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer,
68340237840SJason Wang                                          device, name);
68440237840SJason Wang     }
68553b85d95SLaurent Vivier     qemu_set_info_str(nc, TYPE_VHOST_VDPA);
6861e0a84eaSCindy Lu     s = DO_UPCAST(VhostVDPAState, nc, nc);
6877327813dSJason Wang 
6881e0a84eaSCindy Lu     s->vhost_vdpa.device_fd = vdpa_device_fd;
68940237840SJason Wang     s->vhost_vdpa.index = queue_pair_index;
6907f211a28SEugenio Pérez     s->always_svq = svq;
6911576dbb5SEugenio Pérez     s->vhost_vdpa.shadow_vqs_enabled = svq;
692a585fad2SEugenio Pérez     s->vhost_vdpa.iova_range = iova_range;
6936188d78aSEugenio Pérez     s->vhost_vdpa.shadow_data = svq;
6941576dbb5SEugenio Pérez     s->vhost_vdpa.iova_tree = iova_tree;
695bd907ae4SEugenio Pérez     if (!is_datapath) {
6962df4dd31SEugenio Pérez         s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
6972df4dd31SEugenio Pérez                                             vhost_vdpa_net_cvq_cmd_page_len());
6982df4dd31SEugenio Pérez         memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
69917fb889fSEugenio Pérez         s->status = qemu_memalign(qemu_real_host_page_size(),
7002df4dd31SEugenio Pérez                                   vhost_vdpa_net_cvq_cmd_page_len());
70117fb889fSEugenio Pérez         memset(s->status, 0, vhost_vdpa_net_cvq_cmd_page_len());
7022df4dd31SEugenio Pérez 
703bd907ae4SEugenio Pérez         s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
704bd907ae4SEugenio Pérez         s->vhost_vdpa.shadow_vq_ops_opaque = s;
705bd907ae4SEugenio Pérez     }
70640237840SJason Wang     ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
70774af5eecSJason Wang     if (ret) {
70874af5eecSJason Wang         qemu_del_net_client(nc);
709654790b6SJason Wang         return NULL;
71074af5eecSJason Wang     }
711654790b6SJason Wang     return nc;
7121e0a84eaSCindy Lu }
7131e0a84eaSCindy Lu 
7141576dbb5SEugenio Pérez static int vhost_vdpa_get_iova_range(int fd,
7151576dbb5SEugenio Pérez                                      struct vhost_vdpa_iova_range *iova_range)
7161576dbb5SEugenio Pérez {
7171576dbb5SEugenio Pérez     int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range);
7181576dbb5SEugenio Pérez 
7191576dbb5SEugenio Pérez     return ret < 0 ? -errno : 0;
7201576dbb5SEugenio Pérez }
7211576dbb5SEugenio Pérez 
7228170ab3fSEugenio Pérez static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
7238170ab3fSEugenio Pérez {
7248170ab3fSEugenio Pérez     int ret = ioctl(fd, VHOST_GET_FEATURES, features);
7258170ab3fSEugenio Pérez     if (unlikely(ret < 0)) {
7268170ab3fSEugenio Pérez         error_setg_errno(errp, errno,
7278170ab3fSEugenio Pérez                          "Fail to query features from vhost-vDPA device");
7288170ab3fSEugenio Pérez     }
7298170ab3fSEugenio Pérez     return ret;
7308170ab3fSEugenio Pérez }
7318170ab3fSEugenio Pérez 
7328170ab3fSEugenio Pérez static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features,
7338170ab3fSEugenio Pérez                                           int *has_cvq, Error **errp)
73440237840SJason Wang {
73540237840SJason Wang     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
736cd523a41SStefano Garzarella     g_autofree struct vhost_vdpa_config *config = NULL;
73740237840SJason Wang     __virtio16 *max_queue_pairs;
73840237840SJason Wang     int ret;
73940237840SJason Wang 
74040237840SJason Wang     if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) {
74140237840SJason Wang         *has_cvq = 1;
74240237840SJason Wang     } else {
74340237840SJason Wang         *has_cvq = 0;
74440237840SJason Wang     }
74540237840SJason Wang 
74640237840SJason Wang     if (features & (1 << VIRTIO_NET_F_MQ)) {
74740237840SJason Wang         config = g_malloc0(config_size + sizeof(*max_queue_pairs));
74840237840SJason Wang         config->off = offsetof(struct virtio_net_config, max_virtqueue_pairs);
74940237840SJason Wang         config->len = sizeof(*max_queue_pairs);
75040237840SJason Wang 
75140237840SJason Wang         ret = ioctl(fd, VHOST_VDPA_GET_CONFIG, config);
75240237840SJason Wang         if (ret) {
75340237840SJason Wang             error_setg(errp, "Fail to get config from vhost-vDPA device");
75440237840SJason Wang             return -ret;
75540237840SJason Wang         }
75640237840SJason Wang 
75740237840SJason Wang         max_queue_pairs = (__virtio16 *)&config->buf;
75840237840SJason Wang 
75940237840SJason Wang         return lduw_le_p(max_queue_pairs);
76040237840SJason Wang     }
76140237840SJason Wang 
76240237840SJason Wang     return 1;
76340237840SJason Wang }
76440237840SJason Wang 
7651e0a84eaSCindy Lu int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
7661e0a84eaSCindy Lu                         NetClientState *peer, Error **errp)
7671e0a84eaSCindy Lu {
7681e0a84eaSCindy Lu     const NetdevVhostVDPAOptions *opts;
7698170ab3fSEugenio Pérez     uint64_t features;
770654790b6SJason Wang     int vdpa_device_fd;
771eb3cb751SEugenio Pérez     g_autofree NetClientState **ncs = NULL;
7721576dbb5SEugenio Pérez     g_autoptr(VhostIOVATree) iova_tree = NULL;
773a585fad2SEugenio Pérez     struct vhost_vdpa_iova_range iova_range;
774eb3cb751SEugenio Pérez     NetClientState *nc;
775aed5da45SEugenio Pérez     int queue_pairs, r, i = 0, has_cvq = 0;
7761e0a84eaSCindy Lu 
7771e0a84eaSCindy Lu     assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
7781e0a84eaSCindy Lu     opts = &netdev->u.vhost_vdpa;
7797480874aSMarkus Armbruster     if (!opts->vhostdev && !opts->vhostfd) {
7808801ccd0SSi-Wei Liu         error_setg(errp,
7818801ccd0SSi-Wei Liu                    "vhost-vdpa: neither vhostdev= nor vhostfd= was specified");
782c8295404SEugenio Pérez         return -1;
783c8295404SEugenio Pérez     }
7847327813dSJason Wang 
7857480874aSMarkus Armbruster     if (opts->vhostdev && opts->vhostfd) {
7868801ccd0SSi-Wei Liu         error_setg(errp,
7878801ccd0SSi-Wei Liu                    "vhost-vdpa: vhostdev= and vhostfd= are mutually exclusive");
7888801ccd0SSi-Wei Liu         return -1;
7898801ccd0SSi-Wei Liu     }
7908801ccd0SSi-Wei Liu 
7917480874aSMarkus Armbruster     if (opts->vhostdev) {
7920351152bSEugenio Pérez         vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
7937327813dSJason Wang         if (vdpa_device_fd == -1) {
7947327813dSJason Wang             return -errno;
7957327813dSJason Wang         }
7965107fd3eSPeter Maydell     } else {
7975107fd3eSPeter Maydell         /* has_vhostfd */
7988801ccd0SSi-Wei Liu         vdpa_device_fd = monitor_fd_param(monitor_cur(), opts->vhostfd, errp);
7998801ccd0SSi-Wei Liu         if (vdpa_device_fd == -1) {
8008801ccd0SSi-Wei Liu             error_prepend(errp, "vhost-vdpa: unable to parse vhostfd: ");
8018801ccd0SSi-Wei Liu             return -1;
8028801ccd0SSi-Wei Liu         }
8038801ccd0SSi-Wei Liu     }
8047327813dSJason Wang 
8058170ab3fSEugenio Pérez     r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
8068170ab3fSEugenio Pérez     if (unlikely(r < 0)) {
807aed5da45SEugenio Pérez         goto err;
8088170ab3fSEugenio Pérez     }
8098170ab3fSEugenio Pérez 
8108170ab3fSEugenio Pérez     queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features,
81140237840SJason Wang                                                  &has_cvq, errp);
81240237840SJason Wang     if (queue_pairs < 0) {
8137327813dSJason Wang         qemu_close(vdpa_device_fd);
81440237840SJason Wang         return queue_pairs;
8157327813dSJason Wang     }
8167327813dSJason Wang 
817a585fad2SEugenio Pérez     vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range);
8181576dbb5SEugenio Pérez     if (opts->x_svq) {
81936e46472SEugenio Pérez         if (!vhost_vdpa_net_valid_svq_features(features, errp)) {
8201576dbb5SEugenio Pérez             goto err_svq;
8211576dbb5SEugenio Pérez         }
8221576dbb5SEugenio Pérez 
8231576dbb5SEugenio Pérez         iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last);
8241576dbb5SEugenio Pérez     }
8251576dbb5SEugenio Pérez 
82640237840SJason Wang     ncs = g_malloc0(sizeof(*ncs) * queue_pairs);
82740237840SJason Wang 
82840237840SJason Wang     for (i = 0; i < queue_pairs; i++) {
82940237840SJason Wang         ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
8301576dbb5SEugenio Pérez                                      vdpa_device_fd, i, 2, true, opts->x_svq,
831a585fad2SEugenio Pérez                                      iova_range, iova_tree);
83240237840SJason Wang         if (!ncs[i])
83340237840SJason Wang             goto err;
83440237840SJason Wang     }
83540237840SJason Wang 
83640237840SJason Wang     if (has_cvq) {
83740237840SJason Wang         nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
8381576dbb5SEugenio Pérez                                  vdpa_device_fd, i, 1, false,
839a585fad2SEugenio Pérez                                  opts->x_svq, iova_range, iova_tree);
84040237840SJason Wang         if (!nc)
84140237840SJason Wang             goto err;
84240237840SJason Wang     }
84340237840SJason Wang 
8441576dbb5SEugenio Pérez     /* iova_tree ownership belongs to last NetClientState */
8451576dbb5SEugenio Pérez     g_steal_pointer(&iova_tree);
846654790b6SJason Wang     return 0;
84740237840SJason Wang 
84840237840SJason Wang err:
84940237840SJason Wang     if (i) {
8509bd05507SSi-Wei Liu         for (i--; i >= 0; i--) {
8519bd05507SSi-Wei Liu             qemu_del_net_client(ncs[i]);
8529bd05507SSi-Wei Liu         }
85340237840SJason Wang     }
8541576dbb5SEugenio Pérez 
8551576dbb5SEugenio Pérez err_svq:
85640237840SJason Wang     qemu_close(vdpa_device_fd);
85740237840SJason Wang 
85840237840SJason Wang     return -1;
8591e0a84eaSCindy Lu }
860