xref: /openbmc/qemu/net/vhost-vdpa.c (revision be4278b6)
11e0a84eaSCindy Lu /*
21e0a84eaSCindy Lu  * vhost-vdpa.c
31e0a84eaSCindy Lu  *
41e0a84eaSCindy Lu  * Copyright(c) 2017-2018 Intel Corporation.
51e0a84eaSCindy Lu  * Copyright(c) 2020 Red Hat, Inc.
61e0a84eaSCindy Lu  *
71e0a84eaSCindy Lu  * This work is licensed under the terms of the GNU GPL, version 2 or later.
81e0a84eaSCindy Lu  * See the COPYING file in the top-level directory.
91e0a84eaSCindy Lu  *
101e0a84eaSCindy Lu  */
111e0a84eaSCindy Lu 
121e0a84eaSCindy Lu #include "qemu/osdep.h"
131e0a84eaSCindy Lu #include "clients.h"
14bd907ae4SEugenio Pérez #include "hw/virtio/virtio-net.h"
151e0a84eaSCindy Lu #include "net/vhost_net.h"
161e0a84eaSCindy Lu #include "net/vhost-vdpa.h"
171e0a84eaSCindy Lu #include "hw/virtio/vhost-vdpa.h"
181e0a84eaSCindy Lu #include "qemu/config-file.h"
191e0a84eaSCindy Lu #include "qemu/error-report.h"
20bd907ae4SEugenio Pérez #include "qemu/log.h"
21bd907ae4SEugenio Pérez #include "qemu/memalign.h"
221e0a84eaSCindy Lu #include "qemu/option.h"
231e0a84eaSCindy Lu #include "qapi/error.h"
2440237840SJason Wang #include <linux/vhost.h>
251e0a84eaSCindy Lu #include <sys/ioctl.h>
261e0a84eaSCindy Lu #include <err.h>
271e0a84eaSCindy Lu #include "standard-headers/linux/virtio_net.h"
281e0a84eaSCindy Lu #include "monitor/monitor.h"
291e0a84eaSCindy Lu #include "hw/virtio/vhost.h"
301e0a84eaSCindy Lu 
311e0a84eaSCindy Lu /* Todo:need to add the multiqueue support here */
321e0a84eaSCindy Lu typedef struct VhostVDPAState {
331e0a84eaSCindy Lu     NetClientState nc;
341e0a84eaSCindy Lu     struct vhost_vdpa vhost_vdpa;
351e0a84eaSCindy Lu     VHostNetState *vhost_net;
362df4dd31SEugenio Pérez 
372df4dd31SEugenio Pérez     /* Control commands shadow buffers */
382df4dd31SEugenio Pérez     void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer;
391e0a84eaSCindy Lu     bool started;
401e0a84eaSCindy Lu } VhostVDPAState;
411e0a84eaSCindy Lu 
421e0a84eaSCindy Lu const int vdpa_feature_bits[] = {
431e0a84eaSCindy Lu     VIRTIO_F_NOTIFY_ON_EMPTY,
441e0a84eaSCindy Lu     VIRTIO_RING_F_INDIRECT_DESC,
451e0a84eaSCindy Lu     VIRTIO_RING_F_EVENT_IDX,
461e0a84eaSCindy Lu     VIRTIO_F_ANY_LAYOUT,
471e0a84eaSCindy Lu     VIRTIO_F_VERSION_1,
481e0a84eaSCindy Lu     VIRTIO_NET_F_CSUM,
491e0a84eaSCindy Lu     VIRTIO_NET_F_GUEST_CSUM,
501e0a84eaSCindy Lu     VIRTIO_NET_F_GSO,
511e0a84eaSCindy Lu     VIRTIO_NET_F_GUEST_TSO4,
521e0a84eaSCindy Lu     VIRTIO_NET_F_GUEST_TSO6,
531e0a84eaSCindy Lu     VIRTIO_NET_F_GUEST_ECN,
541e0a84eaSCindy Lu     VIRTIO_NET_F_GUEST_UFO,
551e0a84eaSCindy Lu     VIRTIO_NET_F_HOST_TSO4,
561e0a84eaSCindy Lu     VIRTIO_NET_F_HOST_TSO6,
571e0a84eaSCindy Lu     VIRTIO_NET_F_HOST_ECN,
581e0a84eaSCindy Lu     VIRTIO_NET_F_HOST_UFO,
591e0a84eaSCindy Lu     VIRTIO_NET_F_MRG_RXBUF,
601e0a84eaSCindy Lu     VIRTIO_NET_F_MTU,
6140237840SJason Wang     VIRTIO_NET_F_CTRL_RX,
6240237840SJason Wang     VIRTIO_NET_F_CTRL_RX_EXTRA,
6340237840SJason Wang     VIRTIO_NET_F_CTRL_VLAN,
6440237840SJason Wang     VIRTIO_NET_F_GUEST_ANNOUNCE,
6540237840SJason Wang     VIRTIO_NET_F_CTRL_MAC_ADDR,
6640237840SJason Wang     VIRTIO_NET_F_RSS,
6740237840SJason Wang     VIRTIO_NET_F_MQ,
6840237840SJason Wang     VIRTIO_NET_F_CTRL_VQ,
691e0a84eaSCindy Lu     VIRTIO_F_IOMMU_PLATFORM,
701e0a84eaSCindy Lu     VIRTIO_F_RING_PACKED,
710145c393SAndrew Melnychenko     VIRTIO_NET_F_RSS,
720145c393SAndrew Melnychenko     VIRTIO_NET_F_HASH_REPORT,
731e0a84eaSCindy Lu     VIRTIO_NET_F_GUEST_ANNOUNCE,
749aa47eddSSi-Wei Liu     VIRTIO_NET_F_STATUS,
751e0a84eaSCindy Lu     VHOST_INVALID_FEATURE_BIT
761e0a84eaSCindy Lu };
771e0a84eaSCindy Lu 
781576dbb5SEugenio Pérez /** Supported device specific feature bits with SVQ */
791576dbb5SEugenio Pérez static const uint64_t vdpa_svq_device_features =
801576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_CSUM) |
811576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |
821576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_MTU) |
831576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_MAC) |
841576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) |
851576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |
861576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_GUEST_ECN) |
871576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_GUEST_UFO) |
881576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_HOST_TSO4) |
891576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_HOST_TSO6) |
901576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_HOST_ECN) |
911576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
921576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
931576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_STATUS) |
941576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
951576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_F_ANY_LAYOUT) |
961576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |
971576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
981576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_STANDBY);
991576dbb5SEugenio Pérez 
1001e0a84eaSCindy Lu VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
1011e0a84eaSCindy Lu {
1021e0a84eaSCindy Lu     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
1031e0a84eaSCindy Lu     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
1041e0a84eaSCindy Lu     return s->vhost_net;
1051e0a84eaSCindy Lu }
1061e0a84eaSCindy Lu 
1071e0a84eaSCindy Lu static int vhost_vdpa_net_check_device_id(struct vhost_net *net)
1081e0a84eaSCindy Lu {
1091e0a84eaSCindy Lu     uint32_t device_id;
1101e0a84eaSCindy Lu     int ret;
1111e0a84eaSCindy Lu     struct vhost_dev *hdev;
1121e0a84eaSCindy Lu 
1131e0a84eaSCindy Lu     hdev = (struct vhost_dev *)&net->dev;
1141e0a84eaSCindy Lu     ret = hdev->vhost_ops->vhost_get_device_id(hdev, &device_id);
1151e0a84eaSCindy Lu     if (device_id != VIRTIO_ID_NET) {
1161e0a84eaSCindy Lu         return -ENOTSUP;
1171e0a84eaSCindy Lu     }
1181e0a84eaSCindy Lu     return ret;
1191e0a84eaSCindy Lu }
1201e0a84eaSCindy Lu 
12140237840SJason Wang static int vhost_vdpa_add(NetClientState *ncs, void *be,
12240237840SJason Wang                           int queue_pair_index, int nvqs)
1231e0a84eaSCindy Lu {
1241e0a84eaSCindy Lu     VhostNetOptions options;
1251e0a84eaSCindy Lu     struct vhost_net *net = NULL;
1261e0a84eaSCindy Lu     VhostVDPAState *s;
1271e0a84eaSCindy Lu     int ret;
1281e0a84eaSCindy Lu 
1291e0a84eaSCindy Lu     options.backend_type = VHOST_BACKEND_TYPE_VDPA;
1301e0a84eaSCindy Lu     assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
1311e0a84eaSCindy Lu     s = DO_UPCAST(VhostVDPAState, nc, ncs);
1321e0a84eaSCindy Lu     options.net_backend = ncs;
1331e0a84eaSCindy Lu     options.opaque      = be;
1341e0a84eaSCindy Lu     options.busyloop_timeout = 0;
13540237840SJason Wang     options.nvqs = nvqs;
1361e0a84eaSCindy Lu 
1371e0a84eaSCindy Lu     net = vhost_net_init(&options);
1381e0a84eaSCindy Lu     if (!net) {
1391e0a84eaSCindy Lu         error_report("failed to init vhost_net for queue");
140a97ef87aSJason Wang         goto err_init;
1411e0a84eaSCindy Lu     }
1421e0a84eaSCindy Lu     s->vhost_net = net;
1431e0a84eaSCindy Lu     ret = vhost_vdpa_net_check_device_id(net);
1441e0a84eaSCindy Lu     if (ret) {
145a97ef87aSJason Wang         goto err_check;
1461e0a84eaSCindy Lu     }
1471e0a84eaSCindy Lu     return 0;
148a97ef87aSJason Wang err_check:
1491e0a84eaSCindy Lu     vhost_net_cleanup(net);
150ab36edcfSJason Wang     g_free(net);
151a97ef87aSJason Wang err_init:
1521e0a84eaSCindy Lu     return -1;
1531e0a84eaSCindy Lu }
1541e0a84eaSCindy Lu 
1551e0a84eaSCindy Lu static void vhost_vdpa_cleanup(NetClientState *nc)
1561e0a84eaSCindy Lu {
1571e0a84eaSCindy Lu     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
1581576dbb5SEugenio Pérez     struct vhost_dev *dev = &s->vhost_net->dev;
1591e0a84eaSCindy Lu 
1602df4dd31SEugenio Pérez     qemu_vfree(s->cvq_cmd_out_buffer);
1612df4dd31SEugenio Pérez     qemu_vfree(s->cvq_cmd_in_buffer);
1621576dbb5SEugenio Pérez     if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
1631576dbb5SEugenio Pérez         g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
1641576dbb5SEugenio Pérez     }
1651e0a84eaSCindy Lu     if (s->vhost_net) {
1661e0a84eaSCindy Lu         vhost_net_cleanup(s->vhost_net);
1671e0a84eaSCindy Lu         g_free(s->vhost_net);
1681e0a84eaSCindy Lu         s->vhost_net = NULL;
1691e0a84eaSCindy Lu     }
17057b3a7d8SCindy Lu      if (s->vhost_vdpa.device_fd >= 0) {
17157b3a7d8SCindy Lu         qemu_close(s->vhost_vdpa.device_fd);
17257b3a7d8SCindy Lu         s->vhost_vdpa.device_fd = -1;
17357b3a7d8SCindy Lu     }
1741e0a84eaSCindy Lu }
1751e0a84eaSCindy Lu 
1761e0a84eaSCindy Lu static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc)
1771e0a84eaSCindy Lu {
1781e0a84eaSCindy Lu     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
1791e0a84eaSCindy Lu 
1801e0a84eaSCindy Lu     return true;
1811e0a84eaSCindy Lu }
1821e0a84eaSCindy Lu 
1831e0a84eaSCindy Lu static bool vhost_vdpa_has_ufo(NetClientState *nc)
1841e0a84eaSCindy Lu {
1851e0a84eaSCindy Lu     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
1861e0a84eaSCindy Lu     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
1871e0a84eaSCindy Lu     uint64_t features = 0;
1881e0a84eaSCindy Lu     features |= (1ULL << VIRTIO_NET_F_HOST_UFO);
1891e0a84eaSCindy Lu     features = vhost_net_get_features(s->vhost_net, features);
1901e0a84eaSCindy Lu     return !!(features & (1ULL << VIRTIO_NET_F_HOST_UFO));
1911e0a84eaSCindy Lu 
1921e0a84eaSCindy Lu }
1931e0a84eaSCindy Lu 
194ee8a1c63SKevin Wolf static bool vhost_vdpa_check_peer_type(NetClientState *nc, ObjectClass *oc,
195ee8a1c63SKevin Wolf                                        Error **errp)
196ee8a1c63SKevin Wolf {
197ee8a1c63SKevin Wolf     const char *driver = object_class_get_name(oc);
198ee8a1c63SKevin Wolf 
199ee8a1c63SKevin Wolf     if (!g_str_has_prefix(driver, "virtio-net-")) {
200ee8a1c63SKevin Wolf         error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*");
201ee8a1c63SKevin Wolf         return false;
202ee8a1c63SKevin Wolf     }
203ee8a1c63SKevin Wolf 
204ee8a1c63SKevin Wolf     return true;
205ee8a1c63SKevin Wolf }
206ee8a1c63SKevin Wolf 
207846a1e85SEugenio Pérez /** Dummy receive in case qemu falls back to userland tap networking */
208846a1e85SEugenio Pérez static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf,
209846a1e85SEugenio Pérez                                   size_t size)
210846a1e85SEugenio Pérez {
211846a1e85SEugenio Pérez     return 0;
212846a1e85SEugenio Pérez }
213846a1e85SEugenio Pérez 
2141e0a84eaSCindy Lu static NetClientInfo net_vhost_vdpa_info = {
2151e0a84eaSCindy Lu         .type = NET_CLIENT_DRIVER_VHOST_VDPA,
2161e0a84eaSCindy Lu         .size = sizeof(VhostVDPAState),
217846a1e85SEugenio Pérez         .receive = vhost_vdpa_receive,
2181e0a84eaSCindy Lu         .cleanup = vhost_vdpa_cleanup,
2191e0a84eaSCindy Lu         .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
2201e0a84eaSCindy Lu         .has_ufo = vhost_vdpa_has_ufo,
221ee8a1c63SKevin Wolf         .check_peer_type = vhost_vdpa_check_peer_type,
2221e0a84eaSCindy Lu };
2231e0a84eaSCindy Lu 
2242df4dd31SEugenio Pérez static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
2252df4dd31SEugenio Pérez {
2262df4dd31SEugenio Pérez     VhostIOVATree *tree = v->iova_tree;
2272df4dd31SEugenio Pérez     DMAMap needle = {
2282df4dd31SEugenio Pérez         /*
2292df4dd31SEugenio Pérez          * No need to specify size or to look for more translations since
2302df4dd31SEugenio Pérez          * this contiguous chunk was allocated by us.
2312df4dd31SEugenio Pérez          */
2322df4dd31SEugenio Pérez         .translated_addr = (hwaddr)(uintptr_t)addr,
2332df4dd31SEugenio Pérez     };
2342df4dd31SEugenio Pérez     const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
2352df4dd31SEugenio Pérez     int r;
2362df4dd31SEugenio Pérez 
2372df4dd31SEugenio Pérez     if (unlikely(!map)) {
2382df4dd31SEugenio Pérez         error_report("Cannot locate expected map");
2392df4dd31SEugenio Pérez         return;
2402df4dd31SEugenio Pérez     }
2412df4dd31SEugenio Pérez 
2422df4dd31SEugenio Pérez     r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1);
2432df4dd31SEugenio Pérez     if (unlikely(r != 0)) {
2442df4dd31SEugenio Pérez         error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
2452df4dd31SEugenio Pérez     }
2462df4dd31SEugenio Pérez 
24769292a8eSEugenio Pérez     vhost_iova_tree_remove(tree, *map);
2482df4dd31SEugenio Pérez }
2492df4dd31SEugenio Pérez 
2502df4dd31SEugenio Pérez static size_t vhost_vdpa_net_cvq_cmd_len(void)
2512df4dd31SEugenio Pérez {
2522df4dd31SEugenio Pérez     /*
2532df4dd31SEugenio Pérez      * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
2542df4dd31SEugenio Pérez      * In buffer is always 1 byte, so it should fit here
2552df4dd31SEugenio Pérez      */
2562df4dd31SEugenio Pérez     return sizeof(struct virtio_net_ctrl_hdr) +
2572df4dd31SEugenio Pérez            2 * sizeof(struct virtio_net_ctrl_mac) +
2582df4dd31SEugenio Pérez            MAC_TABLE_ENTRIES * ETH_ALEN;
2592df4dd31SEugenio Pérez }
2602df4dd31SEugenio Pérez 
2612df4dd31SEugenio Pérez static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
2622df4dd31SEugenio Pérez {
2632df4dd31SEugenio Pérez     return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
2642df4dd31SEugenio Pérez }
2652df4dd31SEugenio Pérez 
2667a7f87e9SEugenio Pérez /** Map CVQ buffer. */
2677a7f87e9SEugenio Pérez static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size,
2687a7f87e9SEugenio Pérez                                   bool write)
2692df4dd31SEugenio Pérez {
2702df4dd31SEugenio Pérez     DMAMap map = {};
2712df4dd31SEugenio Pérez     int r;
2722df4dd31SEugenio Pérez 
2732df4dd31SEugenio Pérez     map.translated_addr = (hwaddr)(uintptr_t)buf;
2747a7f87e9SEugenio Pérez     map.size = size - 1;
2752df4dd31SEugenio Pérez     map.perm = write ? IOMMU_RW : IOMMU_RO,
2762df4dd31SEugenio Pérez     r = vhost_iova_tree_map_alloc(v->iova_tree, &map);
2772df4dd31SEugenio Pérez     if (unlikely(r != IOVA_OK)) {
2782df4dd31SEugenio Pérez         error_report("Cannot map injected element");
2797a7f87e9SEugenio Pérez         return r;
2802df4dd31SEugenio Pérez     }
2812df4dd31SEugenio Pérez 
2822df4dd31SEugenio Pérez     r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf,
2832df4dd31SEugenio Pérez                            !write);
2842df4dd31SEugenio Pérez     if (unlikely(r < 0)) {
2852df4dd31SEugenio Pérez         goto dma_map_err;
2862df4dd31SEugenio Pérez     }
2872df4dd31SEugenio Pérez 
2887a7f87e9SEugenio Pérez     return 0;
2892df4dd31SEugenio Pérez 
2902df4dd31SEugenio Pérez dma_map_err:
29169292a8eSEugenio Pérez     vhost_iova_tree_remove(v->iova_tree, map);
2927a7f87e9SEugenio Pérez     return r;
2932df4dd31SEugenio Pérez }
2942df4dd31SEugenio Pérez 
2957a7f87e9SEugenio Pérez static int vhost_vdpa_net_cvq_start(NetClientState *nc)
2962df4dd31SEugenio Pérez {
2977a7f87e9SEugenio Pérez     VhostVDPAState *s;
2987a7f87e9SEugenio Pérez     int r;
2992df4dd31SEugenio Pérez 
3007a7f87e9SEugenio Pérez     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
3017a7f87e9SEugenio Pérez 
3027a7f87e9SEugenio Pérez     s = DO_UPCAST(VhostVDPAState, nc, nc);
3037a7f87e9SEugenio Pérez     if (!s->vhost_vdpa.shadow_vqs_enabled) {
3047a7f87e9SEugenio Pérez         return 0;
3052df4dd31SEugenio Pérez     }
3062df4dd31SEugenio Pérez 
3077a7f87e9SEugenio Pérez     r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer,
3087a7f87e9SEugenio Pérez                                vhost_vdpa_net_cvq_cmd_page_len(), false);
3097a7f87e9SEugenio Pérez     if (unlikely(r < 0)) {
3107a7f87e9SEugenio Pérez         return r;
3117a7f87e9SEugenio Pérez     }
3127a7f87e9SEugenio Pérez 
3137a7f87e9SEugenio Pérez     r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_in_buffer,
3147a7f87e9SEugenio Pérez                                vhost_vdpa_net_cvq_cmd_page_len(), true);
3157a7f87e9SEugenio Pérez     if (unlikely(r < 0)) {
3162df4dd31SEugenio Pérez         vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
3172df4dd31SEugenio Pérez     }
3182df4dd31SEugenio Pérez 
3197a7f87e9SEugenio Pérez     return r;
3207a7f87e9SEugenio Pérez }
3217a7f87e9SEugenio Pérez 
3227a7f87e9SEugenio Pérez static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
3237a7f87e9SEugenio Pérez {
3247a7f87e9SEugenio Pérez     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
3257a7f87e9SEugenio Pérez 
3267a7f87e9SEugenio Pérez     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
3277a7f87e9SEugenio Pérez 
3287a7f87e9SEugenio Pérez     if (s->vhost_vdpa.shadow_vqs_enabled) {
3297a7f87e9SEugenio Pérez         vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
3307a7f87e9SEugenio Pérez         vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_in_buffer);
3317a7f87e9SEugenio Pérez     }
3322df4dd31SEugenio Pérez }
3332df4dd31SEugenio Pérez 
334*be4278b6SEugenio Pérez static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len,
335*be4278b6SEugenio Pérez                                       size_t in_len)
336*be4278b6SEugenio Pérez {
337*be4278b6SEugenio Pérez     /* Buffers for the device */
338*be4278b6SEugenio Pérez     const struct iovec out = {
339*be4278b6SEugenio Pérez         .iov_base = s->cvq_cmd_out_buffer,
340*be4278b6SEugenio Pérez         .iov_len = out_len,
341*be4278b6SEugenio Pérez     };
342*be4278b6SEugenio Pérez     const struct iovec in = {
343*be4278b6SEugenio Pérez         .iov_base = s->cvq_cmd_in_buffer,
344*be4278b6SEugenio Pérez         .iov_len = sizeof(virtio_net_ctrl_ack),
345*be4278b6SEugenio Pérez     };
346*be4278b6SEugenio Pérez     VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
347*be4278b6SEugenio Pérez     int r;
348*be4278b6SEugenio Pérez 
349*be4278b6SEugenio Pérez     r = vhost_svq_add(svq, &out, 1, &in, 1, NULL);
350*be4278b6SEugenio Pérez     if (unlikely(r != 0)) {
351*be4278b6SEugenio Pérez         if (unlikely(r == -ENOSPC)) {
352*be4278b6SEugenio Pérez             qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
353*be4278b6SEugenio Pérez                           __func__);
354*be4278b6SEugenio Pérez         }
355*be4278b6SEugenio Pérez         return r;
356*be4278b6SEugenio Pérez     }
357*be4278b6SEugenio Pérez 
358*be4278b6SEugenio Pérez     /*
359*be4278b6SEugenio Pérez      * We can poll here since we've had BQL from the time we sent the
360*be4278b6SEugenio Pérez      * descriptor. Also, we need to take the answer before SVQ pulls by itself,
361*be4278b6SEugenio Pérez      * when BQL is released
362*be4278b6SEugenio Pérez      */
363*be4278b6SEugenio Pérez     return vhost_svq_poll(svq);
364*be4278b6SEugenio Pérez }
365*be4278b6SEugenio Pérez 
366f8972b56SEugenio Pérez static NetClientInfo net_vhost_vdpa_cvq_info = {
367f8972b56SEugenio Pérez     .type = NET_CLIENT_DRIVER_VHOST_VDPA,
368f8972b56SEugenio Pérez     .size = sizeof(VhostVDPAState),
369f8972b56SEugenio Pérez     .receive = vhost_vdpa_receive,
3707a7f87e9SEugenio Pérez     .start = vhost_vdpa_net_cvq_start,
3717a7f87e9SEugenio Pérez     .stop = vhost_vdpa_net_cvq_stop,
372f8972b56SEugenio Pérez     .cleanup = vhost_vdpa_cleanup,
373f8972b56SEugenio Pérez     .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
374f8972b56SEugenio Pérez     .has_ufo = vhost_vdpa_has_ufo,
375f8972b56SEugenio Pérez     .check_peer_type = vhost_vdpa_check_peer_type,
376f8972b56SEugenio Pérez };
377f8972b56SEugenio Pérez 
3782df4dd31SEugenio Pérez /**
3792df4dd31SEugenio Pérez  * Do not forward commands not supported by SVQ. Otherwise, the device could
3802df4dd31SEugenio Pérez  * accept it and qemu would not know how to update the device model.
3812df4dd31SEugenio Pérez  */
3827a7f87e9SEugenio Pérez static bool vhost_vdpa_net_cvq_validate_cmd(const void *out_buf, size_t len)
3832df4dd31SEugenio Pérez {
3842df4dd31SEugenio Pérez     struct virtio_net_ctrl_hdr ctrl;
3852df4dd31SEugenio Pérez 
3867a7f87e9SEugenio Pérez     if (unlikely(len < sizeof(ctrl))) {
3872df4dd31SEugenio Pérez         qemu_log_mask(LOG_GUEST_ERROR,
3887a7f87e9SEugenio Pérez                       "%s: invalid legnth of out buffer %zu\n", __func__, len);
3892df4dd31SEugenio Pérez         return false;
3902df4dd31SEugenio Pérez     }
3912df4dd31SEugenio Pérez 
3927a7f87e9SEugenio Pérez     memcpy(&ctrl, out_buf, sizeof(ctrl));
3932df4dd31SEugenio Pérez     switch (ctrl.class) {
3942df4dd31SEugenio Pérez     case VIRTIO_NET_CTRL_MAC:
3952df4dd31SEugenio Pérez         switch (ctrl.cmd) {
3962df4dd31SEugenio Pérez         case VIRTIO_NET_CTRL_MAC_ADDR_SET:
3972df4dd31SEugenio Pérez             return true;
3982df4dd31SEugenio Pérez         default:
3992df4dd31SEugenio Pérez             qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mac cmd %u\n",
4002df4dd31SEugenio Pérez                           __func__, ctrl.cmd);
4012df4dd31SEugenio Pérez         };
4022df4dd31SEugenio Pérez         break;
4032df4dd31SEugenio Pérez     default:
4042df4dd31SEugenio Pérez         qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid control class %u\n",
4052df4dd31SEugenio Pérez                       __func__, ctrl.class);
4062df4dd31SEugenio Pérez     };
4072df4dd31SEugenio Pérez 
4082df4dd31SEugenio Pérez     return false;
4092df4dd31SEugenio Pérez }
4102df4dd31SEugenio Pérez 
4112df4dd31SEugenio Pérez /**
4122df4dd31SEugenio Pérez  * Validate and copy control virtqueue commands.
4132df4dd31SEugenio Pérez  *
4142df4dd31SEugenio Pérez  * Following QEMU guidelines, we offer a copy of the buffers to the device to
4152df4dd31SEugenio Pérez  * prevent TOCTOU bugs.
416bd907ae4SEugenio Pérez  */
417bd907ae4SEugenio Pérez static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
418bd907ae4SEugenio Pérez                                             VirtQueueElement *elem,
419bd907ae4SEugenio Pérez                                             void *opaque)
420bd907ae4SEugenio Pérez {
4212df4dd31SEugenio Pérez     VhostVDPAState *s = opaque;
422*be4278b6SEugenio Pérez     size_t in_len;
423bd907ae4SEugenio Pérez     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
4247a7f87e9SEugenio Pérez     /* Out buffer sent to both the vdpa device and the device model */
4257a7f87e9SEugenio Pérez     struct iovec out = {
4267a7f87e9SEugenio Pérez         .iov_base = s->cvq_cmd_out_buffer,
4277a7f87e9SEugenio Pérez     };
4282df4dd31SEugenio Pérez     /* in buffer used for device model */
4292df4dd31SEugenio Pérez     const struct iovec in = {
4302df4dd31SEugenio Pérez         .iov_base = &status,
4312df4dd31SEugenio Pérez         .iov_len = sizeof(status),
4322df4dd31SEugenio Pérez     };
433*be4278b6SEugenio Pérez     ssize_t dev_written = -EINVAL;
4342df4dd31SEugenio Pérez     bool ok;
435bd907ae4SEugenio Pérez 
4367a7f87e9SEugenio Pérez     out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0,
4377a7f87e9SEugenio Pérez                              s->cvq_cmd_out_buffer,
4387a7f87e9SEugenio Pérez                              vhost_vdpa_net_cvq_cmd_len());
4397a7f87e9SEugenio Pérez     ok = vhost_vdpa_net_cvq_validate_cmd(s->cvq_cmd_out_buffer, out.iov_len);
4402df4dd31SEugenio Pérez     if (unlikely(!ok)) {
4412df4dd31SEugenio Pérez         goto out;
4422df4dd31SEugenio Pérez     }
443bd907ae4SEugenio Pérez 
444*be4278b6SEugenio Pérez     dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status));
445*be4278b6SEugenio Pérez     if (unlikely(dev_written < 0)) {
446bd907ae4SEugenio Pérez         goto out;
447bd907ae4SEugenio Pérez     }
448bd907ae4SEugenio Pérez 
449bd907ae4SEugenio Pérez     if (unlikely(dev_written < sizeof(status))) {
450bd907ae4SEugenio Pérez         error_report("Insufficient written data (%zu)", dev_written);
4512df4dd31SEugenio Pérez         goto out;
4522df4dd31SEugenio Pérez     }
4532df4dd31SEugenio Pérez 
4547a7f87e9SEugenio Pérez     memcpy(&status, s->cvq_cmd_in_buffer, sizeof(status));
4552df4dd31SEugenio Pérez     if (status != VIRTIO_NET_OK) {
456*be4278b6SEugenio Pérez         return VIRTIO_NET_ERR;
4572df4dd31SEugenio Pérez     }
4582df4dd31SEugenio Pérez 
4592df4dd31SEugenio Pérez     status = VIRTIO_NET_ERR;
4607a7f87e9SEugenio Pérez     virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, &out, 1);
4612df4dd31SEugenio Pérez     if (status != VIRTIO_NET_OK) {
4622df4dd31SEugenio Pérez         error_report("Bad CVQ processing in model");
463bd907ae4SEugenio Pérez     }
464bd907ae4SEugenio Pérez 
465bd907ae4SEugenio Pérez out:
466bd907ae4SEugenio Pérez     in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status,
467bd907ae4SEugenio Pérez                           sizeof(status));
468bd907ae4SEugenio Pérez     if (unlikely(in_len < sizeof(status))) {
469bd907ae4SEugenio Pérez         error_report("Bad device CVQ written length");
470bd907ae4SEugenio Pérez     }
471bd907ae4SEugenio Pérez     vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
472bd907ae4SEugenio Pérez     g_free(elem);
473*be4278b6SEugenio Pérez     return dev_written < 0 ? dev_written : 0;
474bd907ae4SEugenio Pérez }
475bd907ae4SEugenio Pérez 
476bd907ae4SEugenio Pérez static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
477bd907ae4SEugenio Pérez     .avail_handler = vhost_vdpa_net_handle_ctrl_avail,
478bd907ae4SEugenio Pérez };
479bd907ae4SEugenio Pérez 
480654790b6SJason Wang static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
481654790b6SJason Wang                                            const char *device,
482654790b6SJason Wang                                            const char *name,
48340237840SJason Wang                                            int vdpa_device_fd,
48440237840SJason Wang                                            int queue_pair_index,
48540237840SJason Wang                                            int nvqs,
4861576dbb5SEugenio Pérez                                            bool is_datapath,
4871576dbb5SEugenio Pérez                                            bool svq,
4881576dbb5SEugenio Pérez                                            VhostIOVATree *iova_tree)
4891e0a84eaSCindy Lu {
4901e0a84eaSCindy Lu     NetClientState *nc = NULL;
4911e0a84eaSCindy Lu     VhostVDPAState *s;
4921e0a84eaSCindy Lu     int ret = 0;
4931e0a84eaSCindy Lu     assert(name);
49440237840SJason Wang     if (is_datapath) {
49540237840SJason Wang         nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device,
49640237840SJason Wang                                  name);
49740237840SJason Wang     } else {
498f8972b56SEugenio Pérez         nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer,
49940237840SJason Wang                                          device, name);
50040237840SJason Wang     }
50156e6f594SJason Wang     snprintf(nc->info_str, sizeof(nc->info_str), TYPE_VHOST_VDPA);
5021e0a84eaSCindy Lu     s = DO_UPCAST(VhostVDPAState, nc, nc);
5037327813dSJason Wang 
5041e0a84eaSCindy Lu     s->vhost_vdpa.device_fd = vdpa_device_fd;
50540237840SJason Wang     s->vhost_vdpa.index = queue_pair_index;
5061576dbb5SEugenio Pérez     s->vhost_vdpa.shadow_vqs_enabled = svq;
5071576dbb5SEugenio Pérez     s->vhost_vdpa.iova_tree = iova_tree;
508bd907ae4SEugenio Pérez     if (!is_datapath) {
5092df4dd31SEugenio Pérez         s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
5102df4dd31SEugenio Pérez                                             vhost_vdpa_net_cvq_cmd_page_len());
5112df4dd31SEugenio Pérez         memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
5122df4dd31SEugenio Pérez         s->cvq_cmd_in_buffer = qemu_memalign(qemu_real_host_page_size(),
5132df4dd31SEugenio Pérez                                             vhost_vdpa_net_cvq_cmd_page_len());
5142df4dd31SEugenio Pérez         memset(s->cvq_cmd_in_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
5152df4dd31SEugenio Pérez 
516bd907ae4SEugenio Pérez         s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
517bd907ae4SEugenio Pérez         s->vhost_vdpa.shadow_vq_ops_opaque = s;
5181576dbb5SEugenio Pérez         error_setg(&s->vhost_vdpa.migration_blocker,
5191576dbb5SEugenio Pérez                    "Migration disabled: vhost-vdpa uses CVQ.");
520bd907ae4SEugenio Pérez     }
52140237840SJason Wang     ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
52274af5eecSJason Wang     if (ret) {
52374af5eecSJason Wang         qemu_del_net_client(nc);
524654790b6SJason Wang         return NULL;
52574af5eecSJason Wang     }
526654790b6SJason Wang     return nc;
5271e0a84eaSCindy Lu }
5281e0a84eaSCindy Lu 
5291576dbb5SEugenio Pérez static int vhost_vdpa_get_iova_range(int fd,
5301576dbb5SEugenio Pérez                                      struct vhost_vdpa_iova_range *iova_range)
5311576dbb5SEugenio Pérez {
5321576dbb5SEugenio Pérez     int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range);
5331576dbb5SEugenio Pérez 
5341576dbb5SEugenio Pérez     return ret < 0 ? -errno : 0;
5351576dbb5SEugenio Pérez }
5361576dbb5SEugenio Pérez 
5378170ab3fSEugenio Pérez static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
5388170ab3fSEugenio Pérez {
5398170ab3fSEugenio Pérez     int ret = ioctl(fd, VHOST_GET_FEATURES, features);
5408170ab3fSEugenio Pérez     if (unlikely(ret < 0)) {
5418170ab3fSEugenio Pérez         error_setg_errno(errp, errno,
5428170ab3fSEugenio Pérez                          "Fail to query features from vhost-vDPA device");
5438170ab3fSEugenio Pérez     }
5448170ab3fSEugenio Pérez     return ret;
5458170ab3fSEugenio Pérez }
5468170ab3fSEugenio Pérez 
5478170ab3fSEugenio Pérez static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features,
5488170ab3fSEugenio Pérez                                           int *has_cvq, Error **errp)
54940237840SJason Wang {
55040237840SJason Wang     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
551cd523a41SStefano Garzarella     g_autofree struct vhost_vdpa_config *config = NULL;
55240237840SJason Wang     __virtio16 *max_queue_pairs;
55340237840SJason Wang     int ret;
55440237840SJason Wang 
55540237840SJason Wang     if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) {
55640237840SJason Wang         *has_cvq = 1;
55740237840SJason Wang     } else {
55840237840SJason Wang         *has_cvq = 0;
55940237840SJason Wang     }
56040237840SJason Wang 
56140237840SJason Wang     if (features & (1 << VIRTIO_NET_F_MQ)) {
56240237840SJason Wang         config = g_malloc0(config_size + sizeof(*max_queue_pairs));
56340237840SJason Wang         config->off = offsetof(struct virtio_net_config, max_virtqueue_pairs);
56440237840SJason Wang         config->len = sizeof(*max_queue_pairs);
56540237840SJason Wang 
56640237840SJason Wang         ret = ioctl(fd, VHOST_VDPA_GET_CONFIG, config);
56740237840SJason Wang         if (ret) {
56840237840SJason Wang             error_setg(errp, "Fail to get config from vhost-vDPA device");
56940237840SJason Wang             return -ret;
57040237840SJason Wang         }
57140237840SJason Wang 
57240237840SJason Wang         max_queue_pairs = (__virtio16 *)&config->buf;
57340237840SJason Wang 
57440237840SJason Wang         return lduw_le_p(max_queue_pairs);
57540237840SJason Wang     }
57640237840SJason Wang 
57740237840SJason Wang     return 1;
57840237840SJason Wang }
57940237840SJason Wang 
5801e0a84eaSCindy Lu int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
5811e0a84eaSCindy Lu                         NetClientState *peer, Error **errp)
5821e0a84eaSCindy Lu {
5831e0a84eaSCindy Lu     const NetdevVhostVDPAOptions *opts;
5848170ab3fSEugenio Pérez     uint64_t features;
585654790b6SJason Wang     int vdpa_device_fd;
586eb3cb751SEugenio Pérez     g_autofree NetClientState **ncs = NULL;
5871576dbb5SEugenio Pérez     g_autoptr(VhostIOVATree) iova_tree = NULL;
588eb3cb751SEugenio Pérez     NetClientState *nc;
589aed5da45SEugenio Pérez     int queue_pairs, r, i = 0, has_cvq = 0;
5901e0a84eaSCindy Lu 
5911e0a84eaSCindy Lu     assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
5921e0a84eaSCindy Lu     opts = &netdev->u.vhost_vdpa;
593c8295404SEugenio Pérez     if (!opts->vhostdev) {
594c8295404SEugenio Pérez         error_setg(errp, "vdpa character device not specified with vhostdev");
595c8295404SEugenio Pérez         return -1;
596c8295404SEugenio Pérez     }
5977327813dSJason Wang 
5980351152bSEugenio Pérez     vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
5997327813dSJason Wang     if (vdpa_device_fd == -1) {
6007327813dSJason Wang         return -errno;
6017327813dSJason Wang     }
6027327813dSJason Wang 
6038170ab3fSEugenio Pérez     r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
6048170ab3fSEugenio Pérez     if (unlikely(r < 0)) {
605aed5da45SEugenio Pérez         goto err;
6068170ab3fSEugenio Pérez     }
6078170ab3fSEugenio Pérez 
6088170ab3fSEugenio Pérez     queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features,
60940237840SJason Wang                                                  &has_cvq, errp);
61040237840SJason Wang     if (queue_pairs < 0) {
6117327813dSJason Wang         qemu_close(vdpa_device_fd);
61240237840SJason Wang         return queue_pairs;
6137327813dSJason Wang     }
6147327813dSJason Wang 
6151576dbb5SEugenio Pérez     if (opts->x_svq) {
6161576dbb5SEugenio Pérez         struct vhost_vdpa_iova_range iova_range;
6171576dbb5SEugenio Pérez 
6181576dbb5SEugenio Pérez         uint64_t invalid_dev_features =
6191576dbb5SEugenio Pérez             features & ~vdpa_svq_device_features &
6201576dbb5SEugenio Pérez             /* Transport are all accepted at this point */
6211576dbb5SEugenio Pérez             ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START,
6221576dbb5SEugenio Pérez                              VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START);
6231576dbb5SEugenio Pérez 
6241576dbb5SEugenio Pérez         if (invalid_dev_features) {
6251576dbb5SEugenio Pérez             error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64,
6261576dbb5SEugenio Pérez                        invalid_dev_features);
6271576dbb5SEugenio Pérez             goto err_svq;
6281576dbb5SEugenio Pérez         }
6291576dbb5SEugenio Pérez 
6301576dbb5SEugenio Pérez         vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range);
6311576dbb5SEugenio Pérez         iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last);
6321576dbb5SEugenio Pérez     }
6331576dbb5SEugenio Pérez 
63440237840SJason Wang     ncs = g_malloc0(sizeof(*ncs) * queue_pairs);
63540237840SJason Wang 
63640237840SJason Wang     for (i = 0; i < queue_pairs; i++) {
63740237840SJason Wang         ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
6381576dbb5SEugenio Pérez                                      vdpa_device_fd, i, 2, true, opts->x_svq,
6391576dbb5SEugenio Pérez                                      iova_tree);
64040237840SJason Wang         if (!ncs[i])
64140237840SJason Wang             goto err;
64240237840SJason Wang     }
64340237840SJason Wang 
64440237840SJason Wang     if (has_cvq) {
64540237840SJason Wang         nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
6461576dbb5SEugenio Pérez                                  vdpa_device_fd, i, 1, false,
6471576dbb5SEugenio Pérez                                  opts->x_svq, iova_tree);
64840237840SJason Wang         if (!nc)
64940237840SJason Wang             goto err;
65040237840SJason Wang     }
65140237840SJason Wang 
6521576dbb5SEugenio Pérez     /* iova_tree ownership belongs to last NetClientState */
6531576dbb5SEugenio Pérez     g_steal_pointer(&iova_tree);
654654790b6SJason Wang     return 0;
65540237840SJason Wang 
65640237840SJason Wang err:
65740237840SJason Wang     if (i) {
6589bd05507SSi-Wei Liu         for (i--; i >= 0; i--) {
6599bd05507SSi-Wei Liu             qemu_del_net_client(ncs[i]);
6609bd05507SSi-Wei Liu         }
66140237840SJason Wang     }
6621576dbb5SEugenio Pérez 
6631576dbb5SEugenio Pérez err_svq:
66440237840SJason Wang     qemu_close(vdpa_device_fd);
66540237840SJason Wang 
66640237840SJason Wang     return -1;
6671e0a84eaSCindy Lu }
668