xref: /openbmc/qemu/net/vhost-vdpa.c (revision 6f348071)
11e0a84eaSCindy Lu /*
21e0a84eaSCindy Lu  * vhost-vdpa.c
31e0a84eaSCindy Lu  *
41e0a84eaSCindy Lu  * Copyright(c) 2017-2018 Intel Corporation.
51e0a84eaSCindy Lu  * Copyright(c) 2020 Red Hat, Inc.
61e0a84eaSCindy Lu  *
71e0a84eaSCindy Lu  * This work is licensed under the terms of the GNU GPL, version 2 or later.
81e0a84eaSCindy Lu  * See the COPYING file in the top-level directory.
91e0a84eaSCindy Lu  *
101e0a84eaSCindy Lu  */
111e0a84eaSCindy Lu 
121e0a84eaSCindy Lu #include "qemu/osdep.h"
131e0a84eaSCindy Lu #include "clients.h"
14bd907ae4SEugenio Pérez #include "hw/virtio/virtio-net.h"
151e0a84eaSCindy Lu #include "net/vhost_net.h"
161e0a84eaSCindy Lu #include "net/vhost-vdpa.h"
171e0a84eaSCindy Lu #include "hw/virtio/vhost-vdpa.h"
181e0a84eaSCindy Lu #include "qemu/config-file.h"
191e0a84eaSCindy Lu #include "qemu/error-report.h"
20bd907ae4SEugenio Pérez #include "qemu/log.h"
21bd907ae4SEugenio Pérez #include "qemu/memalign.h"
221e0a84eaSCindy Lu #include "qemu/option.h"
231e0a84eaSCindy Lu #include "qapi/error.h"
2440237840SJason Wang #include <linux/vhost.h>
251e0a84eaSCindy Lu #include <sys/ioctl.h>
261e0a84eaSCindy Lu #include <err.h>
271e0a84eaSCindy Lu #include "standard-headers/linux/virtio_net.h"
281e0a84eaSCindy Lu #include "monitor/monitor.h"
2969498430SEugenio Pérez #include "migration/migration.h"
3069498430SEugenio Pérez #include "migration/misc.h"
311e0a84eaSCindy Lu #include "hw/virtio/vhost.h"
321e0a84eaSCindy Lu 
331e0a84eaSCindy Lu /* Todo:need to add the multiqueue support here */
341e0a84eaSCindy Lu typedef struct VhostVDPAState {
351e0a84eaSCindy Lu     NetClientState nc;
361e0a84eaSCindy Lu     struct vhost_vdpa vhost_vdpa;
3769498430SEugenio Pérez     Notifier migration_state;
381e0a84eaSCindy Lu     VHostNetState *vhost_net;
392df4dd31SEugenio Pérez 
402df4dd31SEugenio Pérez     /* Control commands shadow buffers */
4117fb889fSEugenio Pérez     void *cvq_cmd_out_buffer;
4217fb889fSEugenio Pérez     virtio_net_ctrl_ack *status;
4317fb889fSEugenio Pérez 
447f211a28SEugenio Pérez     /* The device always have SVQ enabled */
457f211a28SEugenio Pérez     bool always_svq;
46152128d6SEugenio Pérez 
47152128d6SEugenio Pérez     /* The device can isolate CVQ in its own ASID */
48152128d6SEugenio Pérez     bool cvq_isolated;
49152128d6SEugenio Pérez 
501e0a84eaSCindy Lu     bool started;
511e0a84eaSCindy Lu } VhostVDPAState;
521e0a84eaSCindy Lu 
532875a0caSHawkins Jiawei /*
542875a0caSHawkins Jiawei  * The array is sorted alphabetically in ascending order,
552875a0caSHawkins Jiawei  * with the exception of VHOST_INVALID_FEATURE_BIT,
562875a0caSHawkins Jiawei  * which should always be the last entry.
572875a0caSHawkins Jiawei  */
581e0a84eaSCindy Lu const int vdpa_feature_bits[] = {
591e0a84eaSCindy Lu     VIRTIO_F_ANY_LAYOUT,
602875a0caSHawkins Jiawei     VIRTIO_F_IOMMU_PLATFORM,
612875a0caSHawkins Jiawei     VIRTIO_F_NOTIFY_ON_EMPTY,
622875a0caSHawkins Jiawei     VIRTIO_F_RING_PACKED,
632875a0caSHawkins Jiawei     VIRTIO_F_RING_RESET,
641e0a84eaSCindy Lu     VIRTIO_F_VERSION_1,
651e0a84eaSCindy Lu     VIRTIO_NET_F_CSUM,
6651e84244SEugenio Pérez     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,
672875a0caSHawkins Jiawei     VIRTIO_NET_F_CTRL_MAC_ADDR,
6840237840SJason Wang     VIRTIO_NET_F_CTRL_RX,
6940237840SJason Wang     VIRTIO_NET_F_CTRL_RX_EXTRA,
7040237840SJason Wang     VIRTIO_NET_F_CTRL_VLAN,
7140237840SJason Wang     VIRTIO_NET_F_CTRL_VQ,
722875a0caSHawkins Jiawei     VIRTIO_NET_F_GSO,
732875a0caSHawkins Jiawei     VIRTIO_NET_F_GUEST_CSUM,
742875a0caSHawkins Jiawei     VIRTIO_NET_F_GUEST_ECN,
752875a0caSHawkins Jiawei     VIRTIO_NET_F_GUEST_TSO4,
762875a0caSHawkins Jiawei     VIRTIO_NET_F_GUEST_TSO6,
772875a0caSHawkins Jiawei     VIRTIO_NET_F_GUEST_UFO,
780145c393SAndrew Melnychenko     VIRTIO_NET_F_HASH_REPORT,
792875a0caSHawkins Jiawei     VIRTIO_NET_F_HOST_ECN,
802875a0caSHawkins Jiawei     VIRTIO_NET_F_HOST_TSO4,
812875a0caSHawkins Jiawei     VIRTIO_NET_F_HOST_TSO6,
822875a0caSHawkins Jiawei     VIRTIO_NET_F_HOST_UFO,
832875a0caSHawkins Jiawei     VIRTIO_NET_F_MQ,
842875a0caSHawkins Jiawei     VIRTIO_NET_F_MRG_RXBUF,
852875a0caSHawkins Jiawei     VIRTIO_NET_F_MTU,
862875a0caSHawkins Jiawei     VIRTIO_NET_F_RSS,
879aa47eddSSi-Wei Liu     VIRTIO_NET_F_STATUS,
882875a0caSHawkins Jiawei     VIRTIO_RING_F_EVENT_IDX,
892875a0caSHawkins Jiawei     VIRTIO_RING_F_INDIRECT_DESC,
902875a0caSHawkins Jiawei 
912875a0caSHawkins Jiawei     /* VHOST_INVALID_FEATURE_BIT should always be the last entry */
921e0a84eaSCindy Lu     VHOST_INVALID_FEATURE_BIT
931e0a84eaSCindy Lu };
941e0a84eaSCindy Lu 
951576dbb5SEugenio Pérez /** Supported device specific feature bits with SVQ */
961576dbb5SEugenio Pérez static const uint64_t vdpa_svq_device_features =
971576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_CSUM) |
981576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |
994b4a1378SHawkins Jiawei     BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) |
1001576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_MTU) |
1011576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_MAC) |
1021576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) |
1031576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |
1041576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_GUEST_ECN) |
1051576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_GUEST_UFO) |
1061576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_HOST_TSO4) |
1071576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_HOST_TSO6) |
1081576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_HOST_ECN) |
1091576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
1101576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
1111576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_STATUS) |
1121576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
11372b99a87SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_MQ) |
1141576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_F_ANY_LAYOUT) |
1151576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |
116609ab4c3SEugenio Pérez     /* VHOST_F_LOG_ALL is exposed by SVQ */
117609ab4c3SEugenio Pérez     BIT_ULL(VHOST_F_LOG_ALL) |
1181576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
1190d74e2b7SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_STANDBY) |
1200d74e2b7SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX);
1211576dbb5SEugenio Pérez 
122c1a10086SEugenio Pérez #define VHOST_VDPA_NET_CVQ_ASID 1
123c1a10086SEugenio Pérez 
1241e0a84eaSCindy Lu VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
1251e0a84eaSCindy Lu {
1261e0a84eaSCindy Lu     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
1271e0a84eaSCindy Lu     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
1281e0a84eaSCindy Lu     return s->vhost_net;
1291e0a84eaSCindy Lu }
1301e0a84eaSCindy Lu 
131915bf6ccSEugenio Pérez static size_t vhost_vdpa_net_cvq_cmd_len(void)
132915bf6ccSEugenio Pérez {
133915bf6ccSEugenio Pérez     /*
134915bf6ccSEugenio Pérez      * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
135915bf6ccSEugenio Pérez      * In buffer is always 1 byte, so it should fit here
136915bf6ccSEugenio Pérez      */
137915bf6ccSEugenio Pérez     return sizeof(struct virtio_net_ctrl_hdr) +
138915bf6ccSEugenio Pérez            2 * sizeof(struct virtio_net_ctrl_mac) +
139915bf6ccSEugenio Pérez            MAC_TABLE_ENTRIES * ETH_ALEN;
140915bf6ccSEugenio Pérez }
141915bf6ccSEugenio Pérez 
142915bf6ccSEugenio Pérez static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
143915bf6ccSEugenio Pérez {
144915bf6ccSEugenio Pérez     return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
145915bf6ccSEugenio Pérez }
146915bf6ccSEugenio Pérez 
14736e46472SEugenio Pérez static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp)
14836e46472SEugenio Pérez {
14936e46472SEugenio Pérez     uint64_t invalid_dev_features =
15036e46472SEugenio Pérez         features & ~vdpa_svq_device_features &
15136e46472SEugenio Pérez         /* Transport are all accepted at this point */
15236e46472SEugenio Pérez         ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START,
15336e46472SEugenio Pérez                          VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START);
15436e46472SEugenio Pérez 
15536e46472SEugenio Pérez     if (invalid_dev_features) {
15636e46472SEugenio Pérez         error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64,
15736e46472SEugenio Pérez                    invalid_dev_features);
158258a0394SEugenio Pérez         return false;
15936e46472SEugenio Pérez     }
16036e46472SEugenio Pérez 
161258a0394SEugenio Pérez     return vhost_svq_valid_features(features, errp);
16236e46472SEugenio Pérez }
16336e46472SEugenio Pérez 
1641e0a84eaSCindy Lu static int vhost_vdpa_net_check_device_id(struct vhost_net *net)
1651e0a84eaSCindy Lu {
1661e0a84eaSCindy Lu     uint32_t device_id;
1671e0a84eaSCindy Lu     int ret;
1681e0a84eaSCindy Lu     struct vhost_dev *hdev;
1691e0a84eaSCindy Lu 
1701e0a84eaSCindy Lu     hdev = (struct vhost_dev *)&net->dev;
1711e0a84eaSCindy Lu     ret = hdev->vhost_ops->vhost_get_device_id(hdev, &device_id);
1721e0a84eaSCindy Lu     if (device_id != VIRTIO_ID_NET) {
1731e0a84eaSCindy Lu         return -ENOTSUP;
1741e0a84eaSCindy Lu     }
1751e0a84eaSCindy Lu     return ret;
1761e0a84eaSCindy Lu }
1771e0a84eaSCindy Lu 
17840237840SJason Wang static int vhost_vdpa_add(NetClientState *ncs, void *be,
17940237840SJason Wang                           int queue_pair_index, int nvqs)
1801e0a84eaSCindy Lu {
1811e0a84eaSCindy Lu     VhostNetOptions options;
1821e0a84eaSCindy Lu     struct vhost_net *net = NULL;
1831e0a84eaSCindy Lu     VhostVDPAState *s;
1841e0a84eaSCindy Lu     int ret;
1851e0a84eaSCindy Lu 
1861e0a84eaSCindy Lu     options.backend_type = VHOST_BACKEND_TYPE_VDPA;
1871e0a84eaSCindy Lu     assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
1881e0a84eaSCindy Lu     s = DO_UPCAST(VhostVDPAState, nc, ncs);
1891e0a84eaSCindy Lu     options.net_backend = ncs;
1901e0a84eaSCindy Lu     options.opaque      = be;
1911e0a84eaSCindy Lu     options.busyloop_timeout = 0;
19240237840SJason Wang     options.nvqs = nvqs;
1931e0a84eaSCindy Lu 
1941e0a84eaSCindy Lu     net = vhost_net_init(&options);
1951e0a84eaSCindy Lu     if (!net) {
1961e0a84eaSCindy Lu         error_report("failed to init vhost_net for queue");
197a97ef87aSJason Wang         goto err_init;
1981e0a84eaSCindy Lu     }
1991e0a84eaSCindy Lu     s->vhost_net = net;
2001e0a84eaSCindy Lu     ret = vhost_vdpa_net_check_device_id(net);
2011e0a84eaSCindy Lu     if (ret) {
202a97ef87aSJason Wang         goto err_check;
2031e0a84eaSCindy Lu     }
2041e0a84eaSCindy Lu     return 0;
205a97ef87aSJason Wang err_check:
2061e0a84eaSCindy Lu     vhost_net_cleanup(net);
207ab36edcfSJason Wang     g_free(net);
208a97ef87aSJason Wang err_init:
2091e0a84eaSCindy Lu     return -1;
2101e0a84eaSCindy Lu }
2111e0a84eaSCindy Lu 
2121e0a84eaSCindy Lu static void vhost_vdpa_cleanup(NetClientState *nc)
2131e0a84eaSCindy Lu {
2141e0a84eaSCindy Lu     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
2151e0a84eaSCindy Lu 
216a0d7215eSAni Sinha     /*
217a0d7215eSAni Sinha      * If a peer NIC is attached, do not cleanup anything.
218a0d7215eSAni Sinha      * Cleanup will happen as a part of qemu_cleanup() -> net_cleanup()
219a0d7215eSAni Sinha      * when the guest is shutting down.
220a0d7215eSAni Sinha      */
221a0d7215eSAni Sinha     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) {
222a0d7215eSAni Sinha         return;
223a0d7215eSAni Sinha     }
224babf8b87SEugenio Pérez     munmap(s->cvq_cmd_out_buffer, vhost_vdpa_net_cvq_cmd_page_len());
225babf8b87SEugenio Pérez     munmap(s->status, vhost_vdpa_net_cvq_cmd_page_len());
2261e0a84eaSCindy Lu     if (s->vhost_net) {
2271e0a84eaSCindy Lu         vhost_net_cleanup(s->vhost_net);
2281e0a84eaSCindy Lu         g_free(s->vhost_net);
2291e0a84eaSCindy Lu         s->vhost_net = NULL;
2301e0a84eaSCindy Lu     }
23157b3a7d8SCindy Lu      if (s->vhost_vdpa.device_fd >= 0) {
23257b3a7d8SCindy Lu         qemu_close(s->vhost_vdpa.device_fd);
23357b3a7d8SCindy Lu         s->vhost_vdpa.device_fd = -1;
23457b3a7d8SCindy Lu     }
2351e0a84eaSCindy Lu }
2361e0a84eaSCindy Lu 
2371e0a84eaSCindy Lu static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc)
2381e0a84eaSCindy Lu {
2391e0a84eaSCindy Lu     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
2401e0a84eaSCindy Lu 
2411e0a84eaSCindy Lu     return true;
2421e0a84eaSCindy Lu }
2431e0a84eaSCindy Lu 
2441e0a84eaSCindy Lu static bool vhost_vdpa_has_ufo(NetClientState *nc)
2451e0a84eaSCindy Lu {
2461e0a84eaSCindy Lu     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
2471e0a84eaSCindy Lu     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
2481e0a84eaSCindy Lu     uint64_t features = 0;
2491e0a84eaSCindy Lu     features |= (1ULL << VIRTIO_NET_F_HOST_UFO);
2501e0a84eaSCindy Lu     features = vhost_net_get_features(s->vhost_net, features);
2511e0a84eaSCindy Lu     return !!(features & (1ULL << VIRTIO_NET_F_HOST_UFO));
2521e0a84eaSCindy Lu 
2531e0a84eaSCindy Lu }
2541e0a84eaSCindy Lu 
255ee8a1c63SKevin Wolf static bool vhost_vdpa_check_peer_type(NetClientState *nc, ObjectClass *oc,
256ee8a1c63SKevin Wolf                                        Error **errp)
257ee8a1c63SKevin Wolf {
258ee8a1c63SKevin Wolf     const char *driver = object_class_get_name(oc);
259ee8a1c63SKevin Wolf 
260ee8a1c63SKevin Wolf     if (!g_str_has_prefix(driver, "virtio-net-")) {
261ee8a1c63SKevin Wolf         error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*");
262ee8a1c63SKevin Wolf         return false;
263ee8a1c63SKevin Wolf     }
264ee8a1c63SKevin Wolf 
265ee8a1c63SKevin Wolf     return true;
266ee8a1c63SKevin Wolf }
267ee8a1c63SKevin Wolf 
268846a1e85SEugenio Pérez /** Dummy receive in case qemu falls back to userland tap networking */
269846a1e85SEugenio Pérez static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf,
270846a1e85SEugenio Pérez                                   size_t size)
271846a1e85SEugenio Pérez {
272bc5add1dSSi-Wei Liu     return size;
273846a1e85SEugenio Pérez }
274846a1e85SEugenio Pérez 
27500ef422eSEugenio Pérez /** From any vdpa net client, get the netclient of the first queue pair */
27600ef422eSEugenio Pérez static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s)
27700ef422eSEugenio Pérez {
27800ef422eSEugenio Pérez     NICState *nic = qemu_get_nic(s->nc.peer);
27900ef422eSEugenio Pérez     NetClientState *nc0 = qemu_get_peer(nic->ncs, 0);
28000ef422eSEugenio Pérez 
28100ef422eSEugenio Pérez     return DO_UPCAST(VhostVDPAState, nc, nc0);
28200ef422eSEugenio Pérez }
28300ef422eSEugenio Pérez 
28469498430SEugenio Pérez static void vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable)
28569498430SEugenio Pérez {
28669498430SEugenio Pérez     struct vhost_vdpa *v = &s->vhost_vdpa;
28769498430SEugenio Pérez     VirtIONet *n;
28869498430SEugenio Pérez     VirtIODevice *vdev;
28969498430SEugenio Pérez     int data_queue_pairs, cvq, r;
29069498430SEugenio Pérez 
29169498430SEugenio Pérez     /* We are only called on the first data vqs and only if x-svq is not set */
29269498430SEugenio Pérez     if (s->vhost_vdpa.shadow_vqs_enabled == enable) {
29369498430SEugenio Pérez         return;
29469498430SEugenio Pérez     }
29569498430SEugenio Pérez 
29669498430SEugenio Pérez     vdev = v->dev->vdev;
29769498430SEugenio Pérez     n = VIRTIO_NET(vdev);
29869498430SEugenio Pérez     if (!n->vhost_started) {
29969498430SEugenio Pérez         return;
30069498430SEugenio Pérez     }
30169498430SEugenio Pérez 
30269498430SEugenio Pérez     data_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
30369498430SEugenio Pérez     cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
30469498430SEugenio Pérez                                   n->max_ncs - n->max_queue_pairs : 0;
30569498430SEugenio Pérez     /*
30669498430SEugenio Pérez      * TODO: vhost_net_stop does suspend, get_base and reset. We can be smarter
30769498430SEugenio Pérez      * in the future and resume the device if read-only operations between
30869498430SEugenio Pérez      * suspend and reset goes wrong.
30969498430SEugenio Pérez      */
31069498430SEugenio Pérez     vhost_net_stop(vdev, n->nic->ncs, data_queue_pairs, cvq);
31169498430SEugenio Pérez 
31269498430SEugenio Pérez     /* Start will check migration setup_or_active to configure or not SVQ */
31369498430SEugenio Pérez     r = vhost_net_start(vdev, n->nic->ncs, data_queue_pairs, cvq);
31469498430SEugenio Pérez     if (unlikely(r < 0)) {
31569498430SEugenio Pérez         error_report("unable to start vhost net: %s(%d)", g_strerror(-r), -r);
31669498430SEugenio Pérez     }
31769498430SEugenio Pérez }
31869498430SEugenio Pérez 
31969498430SEugenio Pérez static void vdpa_net_migration_state_notifier(Notifier *notifier, void *data)
32069498430SEugenio Pérez {
32169498430SEugenio Pérez     MigrationState *migration = data;
32269498430SEugenio Pérez     VhostVDPAState *s = container_of(notifier, VhostVDPAState,
32369498430SEugenio Pérez                                      migration_state);
32469498430SEugenio Pérez 
32569498430SEugenio Pérez     if (migration_in_setup(migration)) {
32669498430SEugenio Pérez         vhost_vdpa_net_log_global_enable(s, true);
32769498430SEugenio Pérez     } else if (migration_has_failed(migration)) {
32869498430SEugenio Pérez         vhost_vdpa_net_log_global_enable(s, false);
32969498430SEugenio Pérez     }
33069498430SEugenio Pérez }
33169498430SEugenio Pérez 
33200ef422eSEugenio Pérez static void vhost_vdpa_net_data_start_first(VhostVDPAState *s)
33300ef422eSEugenio Pérez {
33400ef422eSEugenio Pérez     struct vhost_vdpa *v = &s->vhost_vdpa;
33500ef422eSEugenio Pérez 
33669498430SEugenio Pérez     add_migration_state_change_notifier(&s->migration_state);
33700ef422eSEugenio Pérez     if (v->shadow_vqs_enabled) {
33800ef422eSEugenio Pérez         v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
33900ef422eSEugenio Pérez                                            v->iova_range.last);
34000ef422eSEugenio Pérez     }
34100ef422eSEugenio Pérez }
34200ef422eSEugenio Pérez 
34300ef422eSEugenio Pérez static int vhost_vdpa_net_data_start(NetClientState *nc)
34400ef422eSEugenio Pérez {
34500ef422eSEugenio Pérez     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
34600ef422eSEugenio Pérez     struct vhost_vdpa *v = &s->vhost_vdpa;
34700ef422eSEugenio Pérez 
34800ef422eSEugenio Pérez     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
34900ef422eSEugenio Pérez 
35069498430SEugenio Pérez     if (s->always_svq ||
35169498430SEugenio Pérez         migration_is_setup_or_active(migrate_get_current()->state)) {
35269498430SEugenio Pérez         v->shadow_vqs_enabled = true;
35369498430SEugenio Pérez         v->shadow_data = true;
35469498430SEugenio Pérez     } else {
35569498430SEugenio Pérez         v->shadow_vqs_enabled = false;
35669498430SEugenio Pérez         v->shadow_data = false;
35769498430SEugenio Pérez     }
35869498430SEugenio Pérez 
35900ef422eSEugenio Pérez     if (v->index == 0) {
36000ef422eSEugenio Pérez         vhost_vdpa_net_data_start_first(s);
36100ef422eSEugenio Pérez         return 0;
36200ef422eSEugenio Pérez     }
36300ef422eSEugenio Pérez 
36400ef422eSEugenio Pérez     if (v->shadow_vqs_enabled) {
36500ef422eSEugenio Pérez         VhostVDPAState *s0 = vhost_vdpa_net_first_nc_vdpa(s);
36600ef422eSEugenio Pérez         v->iova_tree = s0->vhost_vdpa.iova_tree;
36700ef422eSEugenio Pérez     }
36800ef422eSEugenio Pérez 
36900ef422eSEugenio Pérez     return 0;
37000ef422eSEugenio Pérez }
37100ef422eSEugenio Pérez 
37200ef422eSEugenio Pérez static void vhost_vdpa_net_client_stop(NetClientState *nc)
37300ef422eSEugenio Pérez {
37400ef422eSEugenio Pérez     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
37500ef422eSEugenio Pérez     struct vhost_dev *dev;
37600ef422eSEugenio Pérez 
37700ef422eSEugenio Pérez     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
37800ef422eSEugenio Pérez 
37969498430SEugenio Pérez     if (s->vhost_vdpa.index == 0) {
38069498430SEugenio Pérez         remove_migration_state_change_notifier(&s->migration_state);
38169498430SEugenio Pérez     }
38269498430SEugenio Pérez 
38300ef422eSEugenio Pérez     dev = s->vhost_vdpa.dev;
38400ef422eSEugenio Pérez     if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
38500ef422eSEugenio Pérez         g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
38600ef422eSEugenio Pérez     }
38700ef422eSEugenio Pérez }
38800ef422eSEugenio Pérez 
3891e0a84eaSCindy Lu static NetClientInfo net_vhost_vdpa_info = {
3901e0a84eaSCindy Lu         .type = NET_CLIENT_DRIVER_VHOST_VDPA,
3911e0a84eaSCindy Lu         .size = sizeof(VhostVDPAState),
392846a1e85SEugenio Pérez         .receive = vhost_vdpa_receive,
39300ef422eSEugenio Pérez         .start = vhost_vdpa_net_data_start,
39400ef422eSEugenio Pérez         .stop = vhost_vdpa_net_client_stop,
3951e0a84eaSCindy Lu         .cleanup = vhost_vdpa_cleanup,
3961e0a84eaSCindy Lu         .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
3971e0a84eaSCindy Lu         .has_ufo = vhost_vdpa_has_ufo,
398ee8a1c63SKevin Wolf         .check_peer_type = vhost_vdpa_check_peer_type,
3991e0a84eaSCindy Lu };
4001e0a84eaSCindy Lu 
401152128d6SEugenio Pérez static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index,
402152128d6SEugenio Pérez                                           Error **errp)
403c1a10086SEugenio Pérez {
404c1a10086SEugenio Pérez     struct vhost_vring_state state = {
405c1a10086SEugenio Pérez         .index = vq_index,
406c1a10086SEugenio Pérez     };
407c1a10086SEugenio Pérez     int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, &state);
408c1a10086SEugenio Pérez 
409c1a10086SEugenio Pérez     if (unlikely(r < 0)) {
4100f2bb0bfSEugenio Pérez         r = -errno;
411152128d6SEugenio Pérez         error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index);
412c1a10086SEugenio Pérez         return r;
413c1a10086SEugenio Pérez     }
414c1a10086SEugenio Pérez 
415c1a10086SEugenio Pérez     return state.num;
416c1a10086SEugenio Pérez }
417c1a10086SEugenio Pérez 
418c1a10086SEugenio Pérez static int vhost_vdpa_set_address_space_id(struct vhost_vdpa *v,
419c1a10086SEugenio Pérez                                            unsigned vq_group,
420c1a10086SEugenio Pérez                                            unsigned asid_num)
421c1a10086SEugenio Pérez {
422c1a10086SEugenio Pérez     struct vhost_vring_state asid = {
423c1a10086SEugenio Pérez         .index = vq_group,
424c1a10086SEugenio Pérez         .num = asid_num,
425c1a10086SEugenio Pérez     };
426c1a10086SEugenio Pérez     int r;
427c1a10086SEugenio Pérez 
428c1a10086SEugenio Pérez     r = ioctl(v->device_fd, VHOST_VDPA_SET_GROUP_ASID, &asid);
429c1a10086SEugenio Pérez     if (unlikely(r < 0)) {
430c1a10086SEugenio Pérez         error_report("Can't set vq group %u asid %u, errno=%d (%s)",
431c1a10086SEugenio Pérez                      asid.index, asid.num, errno, g_strerror(errno));
432c1a10086SEugenio Pérez     }
433c1a10086SEugenio Pérez     return r;
434c1a10086SEugenio Pérez }
435c1a10086SEugenio Pérez 
4362df4dd31SEugenio Pérez static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
4372df4dd31SEugenio Pérez {
4382df4dd31SEugenio Pérez     VhostIOVATree *tree = v->iova_tree;
4392df4dd31SEugenio Pérez     DMAMap needle = {
4402df4dd31SEugenio Pérez         /*
4412df4dd31SEugenio Pérez          * No need to specify size or to look for more translations since
4422df4dd31SEugenio Pérez          * this contiguous chunk was allocated by us.
4432df4dd31SEugenio Pérez          */
4442df4dd31SEugenio Pérez         .translated_addr = (hwaddr)(uintptr_t)addr,
4452df4dd31SEugenio Pérez     };
4462df4dd31SEugenio Pérez     const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
4472df4dd31SEugenio Pérez     int r;
4482df4dd31SEugenio Pérez 
4492df4dd31SEugenio Pérez     if (unlikely(!map)) {
4502df4dd31SEugenio Pérez         error_report("Cannot locate expected map");
4512df4dd31SEugenio Pérez         return;
4522df4dd31SEugenio Pérez     }
4532df4dd31SEugenio Pérez 
454cd831ed5SEugenio Pérez     r = vhost_vdpa_dma_unmap(v, v->address_space_id, map->iova, map->size + 1);
4552df4dd31SEugenio Pérez     if (unlikely(r != 0)) {
4562df4dd31SEugenio Pérez         error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
4572df4dd31SEugenio Pérez     }
4582df4dd31SEugenio Pérez 
45969292a8eSEugenio Pérez     vhost_iova_tree_remove(tree, *map);
4602df4dd31SEugenio Pérez }
4612df4dd31SEugenio Pérez 
4627a7f87e9SEugenio Pérez /** Map CVQ buffer. */
4637a7f87e9SEugenio Pérez static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size,
4647a7f87e9SEugenio Pérez                                   bool write)
4652df4dd31SEugenio Pérez {
4662df4dd31SEugenio Pérez     DMAMap map = {};
4672df4dd31SEugenio Pérez     int r;
4682df4dd31SEugenio Pérez 
4692df4dd31SEugenio Pérez     map.translated_addr = (hwaddr)(uintptr_t)buf;
4707a7f87e9SEugenio Pérez     map.size = size - 1;
4712df4dd31SEugenio Pérez     map.perm = write ? IOMMU_RW : IOMMU_RO,
4722df4dd31SEugenio Pérez     r = vhost_iova_tree_map_alloc(v->iova_tree, &map);
4732df4dd31SEugenio Pérez     if (unlikely(r != IOVA_OK)) {
4742df4dd31SEugenio Pérez         error_report("Cannot map injected element");
4757a7f87e9SEugenio Pérez         return r;
4762df4dd31SEugenio Pérez     }
4772df4dd31SEugenio Pérez 
478cd831ed5SEugenio Pérez     r = vhost_vdpa_dma_map(v, v->address_space_id, map.iova,
479cd831ed5SEugenio Pérez                            vhost_vdpa_net_cvq_cmd_page_len(), buf, !write);
4802df4dd31SEugenio Pérez     if (unlikely(r < 0)) {
4812df4dd31SEugenio Pérez         goto dma_map_err;
4822df4dd31SEugenio Pérez     }
4832df4dd31SEugenio Pérez 
4847a7f87e9SEugenio Pérez     return 0;
4852df4dd31SEugenio Pérez 
4862df4dd31SEugenio Pérez dma_map_err:
48769292a8eSEugenio Pérez     vhost_iova_tree_remove(v->iova_tree, map);
4887a7f87e9SEugenio Pérez     return r;
4892df4dd31SEugenio Pérez }
4902df4dd31SEugenio Pérez 
4917a7f87e9SEugenio Pérez static int vhost_vdpa_net_cvq_start(NetClientState *nc)
4922df4dd31SEugenio Pérez {
49300ef422eSEugenio Pérez     VhostVDPAState *s, *s0;
494c1a10086SEugenio Pérez     struct vhost_vdpa *v;
495c1a10086SEugenio Pérez     int64_t cvq_group;
496152128d6SEugenio Pérez     int r;
497152128d6SEugenio Pérez     Error *err = NULL;
4982df4dd31SEugenio Pérez 
4997a7f87e9SEugenio Pérez     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
5007a7f87e9SEugenio Pérez 
5017a7f87e9SEugenio Pérez     s = DO_UPCAST(VhostVDPAState, nc, nc);
502c1a10086SEugenio Pérez     v = &s->vhost_vdpa;
503c1a10086SEugenio Pérez 
50469498430SEugenio Pérez     s0 = vhost_vdpa_net_first_nc_vdpa(s);
50569498430SEugenio Pérez     v->shadow_data = s0->vhost_vdpa.shadow_vqs_enabled;
506c1a10086SEugenio Pérez     v->shadow_vqs_enabled = s->always_svq;
507c1a10086SEugenio Pérez     s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID;
508c1a10086SEugenio Pérez 
50969498430SEugenio Pérez     if (s->vhost_vdpa.shadow_data) {
510c1a10086SEugenio Pérez         /* SVQ is already configured for all virtqueues */
511c1a10086SEugenio Pérez         goto out;
512c1a10086SEugenio Pérez     }
513c1a10086SEugenio Pérez 
514c1a10086SEugenio Pérez     /*
515c1a10086SEugenio Pérez      * If we early return in these cases SVQ will not be enabled. The migration
516c1a10086SEugenio Pérez      * will be blocked as long as vhost-vdpa backends will not offer _F_LOG.
517c1a10086SEugenio Pérez      */
518152128d6SEugenio Pérez     if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
519c1a10086SEugenio Pérez         return 0;
520c1a10086SEugenio Pérez     }
521c1a10086SEugenio Pérez 
522152128d6SEugenio Pérez     if (!s->cvq_isolated) {
523152128d6SEugenio Pérez         return 0;
524152128d6SEugenio Pérez     }
525152128d6SEugenio Pérez 
526152128d6SEugenio Pérez     cvq_group = vhost_vdpa_get_vring_group(v->device_fd,
527152128d6SEugenio Pérez                                            v->dev->vq_index_end - 1,
528152128d6SEugenio Pérez                                            &err);
529c1a10086SEugenio Pérez     if (unlikely(cvq_group < 0)) {
530152128d6SEugenio Pérez         error_report_err(err);
531c1a10086SEugenio Pérez         return cvq_group;
532c1a10086SEugenio Pérez     }
533c1a10086SEugenio Pérez 
534c1a10086SEugenio Pérez     r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID);
535c1a10086SEugenio Pérez     if (unlikely(r < 0)) {
536c1a10086SEugenio Pérez         return r;
537c1a10086SEugenio Pérez     }
538c1a10086SEugenio Pérez 
539c1a10086SEugenio Pérez     v->shadow_vqs_enabled = true;
540c1a10086SEugenio Pérez     s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID;
541c1a10086SEugenio Pérez 
542c1a10086SEugenio Pérez out:
5437a7f87e9SEugenio Pérez     if (!s->vhost_vdpa.shadow_vqs_enabled) {
5447a7f87e9SEugenio Pérez         return 0;
5452df4dd31SEugenio Pérez     }
5462df4dd31SEugenio Pérez 
54700ef422eSEugenio Pérez     if (s0->vhost_vdpa.iova_tree) {
54800ef422eSEugenio Pérez         /*
54900ef422eSEugenio Pérez          * SVQ is already configured for all virtqueues.  Reuse IOVA tree for
55000ef422eSEugenio Pérez          * simplicity, whether CVQ shares ASID with guest or not, because:
55100ef422eSEugenio Pérez          * - Memory listener need access to guest's memory addresses allocated
55200ef422eSEugenio Pérez          *   in the IOVA tree.
55300ef422eSEugenio Pérez          * - There should be plenty of IOVA address space for both ASID not to
55400ef422eSEugenio Pérez          *   worry about collisions between them.  Guest's translations are
55500ef422eSEugenio Pérez          *   still validated with virtio virtqueue_pop so there is no risk for
55600ef422eSEugenio Pérez          *   the guest to access memory that it shouldn't.
55700ef422eSEugenio Pérez          *
55800ef422eSEugenio Pérez          * To allocate a iova tree per ASID is doable but it complicates the
55900ef422eSEugenio Pérez          * code and it is not worth it for the moment.
56000ef422eSEugenio Pérez          */
56100ef422eSEugenio Pérez         v->iova_tree = s0->vhost_vdpa.iova_tree;
56200ef422eSEugenio Pérez     } else {
56300ef422eSEugenio Pérez         v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
56400ef422eSEugenio Pérez                                            v->iova_range.last);
56500ef422eSEugenio Pérez     }
56600ef422eSEugenio Pérez 
5677a7f87e9SEugenio Pérez     r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer,
5687a7f87e9SEugenio Pérez                                vhost_vdpa_net_cvq_cmd_page_len(), false);
5697a7f87e9SEugenio Pérez     if (unlikely(r < 0)) {
5707a7f87e9SEugenio Pérez         return r;
5717a7f87e9SEugenio Pérez     }
5727a7f87e9SEugenio Pérez 
57317fb889fSEugenio Pérez     r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->status,
5747a7f87e9SEugenio Pérez                                vhost_vdpa_net_cvq_cmd_page_len(), true);
5757a7f87e9SEugenio Pérez     if (unlikely(r < 0)) {
5762df4dd31SEugenio Pérez         vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
5772df4dd31SEugenio Pérez     }
5782df4dd31SEugenio Pérez 
5797a7f87e9SEugenio Pérez     return r;
5807a7f87e9SEugenio Pérez }
5817a7f87e9SEugenio Pérez 
5827a7f87e9SEugenio Pérez static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
5837a7f87e9SEugenio Pérez {
5847a7f87e9SEugenio Pérez     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
5857a7f87e9SEugenio Pérez 
5867a7f87e9SEugenio Pérez     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
5877a7f87e9SEugenio Pérez 
5887a7f87e9SEugenio Pérez     if (s->vhost_vdpa.shadow_vqs_enabled) {
5897a7f87e9SEugenio Pérez         vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
59017fb889fSEugenio Pérez         vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status);
591c1a10086SEugenio Pérez     }
59200ef422eSEugenio Pérez 
59300ef422eSEugenio Pérez     vhost_vdpa_net_client_stop(nc);
5942df4dd31SEugenio Pérez }
5952df4dd31SEugenio Pérez 
596be4278b6SEugenio Pérez static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len,
597be4278b6SEugenio Pérez                                       size_t in_len)
598be4278b6SEugenio Pérez {
599be4278b6SEugenio Pérez     /* Buffers for the device */
600be4278b6SEugenio Pérez     const struct iovec out = {
601be4278b6SEugenio Pérez         .iov_base = s->cvq_cmd_out_buffer,
602be4278b6SEugenio Pérez         .iov_len = out_len,
603be4278b6SEugenio Pérez     };
604be4278b6SEugenio Pérez     const struct iovec in = {
60517fb889fSEugenio Pérez         .iov_base = s->status,
606be4278b6SEugenio Pérez         .iov_len = sizeof(virtio_net_ctrl_ack),
607be4278b6SEugenio Pérez     };
608be4278b6SEugenio Pérez     VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
609be4278b6SEugenio Pérez     int r;
610be4278b6SEugenio Pérez 
611be4278b6SEugenio Pérez     r = vhost_svq_add(svq, &out, 1, &in, 1, NULL);
612be4278b6SEugenio Pérez     if (unlikely(r != 0)) {
613be4278b6SEugenio Pérez         if (unlikely(r == -ENOSPC)) {
614be4278b6SEugenio Pérez             qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
615be4278b6SEugenio Pérez                           __func__);
616be4278b6SEugenio Pérez         }
617be4278b6SEugenio Pérez         return r;
618be4278b6SEugenio Pérez     }
619be4278b6SEugenio Pérez 
620be4278b6SEugenio Pérez     /*
621be4278b6SEugenio Pérez      * We can poll here since we've had BQL from the time we sent the
622be4278b6SEugenio Pérez      * descriptor. Also, we need to take the answer before SVQ pulls by itself,
623be4278b6SEugenio Pérez      * when BQL is released
624be4278b6SEugenio Pérez      */
625be4278b6SEugenio Pérez     return vhost_svq_poll(svq);
626be4278b6SEugenio Pérez }
627be4278b6SEugenio Pérez 
628f73c0c43SEugenio Pérez static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class,
629f73c0c43SEugenio Pérez                                        uint8_t cmd, const void *data,
630f73c0c43SEugenio Pérez                                        size_t data_size)
631f73c0c43SEugenio Pérez {
632f73c0c43SEugenio Pérez     const struct virtio_net_ctrl_hdr ctrl = {
633f73c0c43SEugenio Pérez         .class = class,
634f73c0c43SEugenio Pérez         .cmd = cmd,
635f73c0c43SEugenio Pérez     };
636f73c0c43SEugenio Pérez 
637f73c0c43SEugenio Pérez     assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl));
638f73c0c43SEugenio Pérez 
639f73c0c43SEugenio Pérez     memcpy(s->cvq_cmd_out_buffer, &ctrl, sizeof(ctrl));
640f73c0c43SEugenio Pérez     memcpy(s->cvq_cmd_out_buffer + sizeof(ctrl), data, data_size);
641f73c0c43SEugenio Pérez 
642f73c0c43SEugenio Pérez     return vhost_vdpa_net_cvq_add(s, sizeof(ctrl) + data_size,
643f73c0c43SEugenio Pérez                                   sizeof(virtio_net_ctrl_ack));
644f73c0c43SEugenio Pérez }
645f73c0c43SEugenio Pérez 
646f73c0c43SEugenio Pérez static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n)
647f73c0c43SEugenio Pérez {
64802d3bf09SHawkins Jiawei     if (virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
649f73c0c43SEugenio Pérez         ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MAC,
650f73c0c43SEugenio Pérez                                                   VIRTIO_NET_CTRL_MAC_ADDR_SET,
651f73c0c43SEugenio Pérez                                                   n->mac, sizeof(n->mac));
652f73c0c43SEugenio Pérez         if (unlikely(dev_written < 0)) {
653f73c0c43SEugenio Pérez             return dev_written;
654f73c0c43SEugenio Pérez         }
655b479bc3cSHawkins Jiawei         if (*s->status != VIRTIO_NET_OK) {
656b479bc3cSHawkins Jiawei             return -EIO;
657b479bc3cSHawkins Jiawei         }
658f73c0c43SEugenio Pérez     }
659f73c0c43SEugenio Pérez 
660f73c0c43SEugenio Pérez     return 0;
661f73c0c43SEugenio Pérez }
662f73c0c43SEugenio Pérez 
663f64c7cdaSEugenio Pérez static int vhost_vdpa_net_load_mq(VhostVDPAState *s,
664f64c7cdaSEugenio Pérez                                   const VirtIONet *n)
665f64c7cdaSEugenio Pérez {
666f64c7cdaSEugenio Pérez     struct virtio_net_ctrl_mq mq;
667f64c7cdaSEugenio Pérez     ssize_t dev_written;
668f64c7cdaSEugenio Pérez 
66902d3bf09SHawkins Jiawei     if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_MQ)) {
670f64c7cdaSEugenio Pérez         return 0;
671f64c7cdaSEugenio Pérez     }
672f64c7cdaSEugenio Pérez 
673f64c7cdaSEugenio Pérez     mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs);
674f64c7cdaSEugenio Pérez     dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MQ,
675f64c7cdaSEugenio Pérez                                           VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &mq,
676f64c7cdaSEugenio Pérez                                           sizeof(mq));
677f64c7cdaSEugenio Pérez     if (unlikely(dev_written < 0)) {
678f64c7cdaSEugenio Pérez         return dev_written;
679f64c7cdaSEugenio Pérez     }
680f45fd95eSHawkins Jiawei     if (*s->status != VIRTIO_NET_OK) {
681f45fd95eSHawkins Jiawei         return -EIO;
682f45fd95eSHawkins Jiawei     }
683f64c7cdaSEugenio Pérez 
684f45fd95eSHawkins Jiawei     return 0;
685f64c7cdaSEugenio Pérez }
686f64c7cdaSEugenio Pérez 
6870b58d368SHawkins Jiawei static int vhost_vdpa_net_load_offloads(VhostVDPAState *s,
6880b58d368SHawkins Jiawei                                         const VirtIONet *n)
6890b58d368SHawkins Jiawei {
6900b58d368SHawkins Jiawei     uint64_t offloads;
6910b58d368SHawkins Jiawei     ssize_t dev_written;
6920b58d368SHawkins Jiawei 
6930b58d368SHawkins Jiawei     if (!virtio_vdev_has_feature(&n->parent_obj,
6940b58d368SHawkins Jiawei                                  VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
6950b58d368SHawkins Jiawei         return 0;
6960b58d368SHawkins Jiawei     }
6970b58d368SHawkins Jiawei 
6980b58d368SHawkins Jiawei     if (n->curr_guest_offloads == virtio_net_supported_guest_offloads(n)) {
6990b58d368SHawkins Jiawei         /*
7000b58d368SHawkins Jiawei          * According to VirtIO standard, "Upon feature negotiation
7010b58d368SHawkins Jiawei          * corresponding offload gets enabled to preserve
7020b58d368SHawkins Jiawei          * backward compatibility.".
7030b58d368SHawkins Jiawei          *
7040b58d368SHawkins Jiawei          * Therefore, there is no need to send this CVQ command if the
7050b58d368SHawkins Jiawei          * driver also enables all supported offloads, which aligns with
7060b58d368SHawkins Jiawei          * the device's defaults.
7070b58d368SHawkins Jiawei          *
7080b58d368SHawkins Jiawei          * Note that the device's defaults can mismatch the driver's
7090b58d368SHawkins Jiawei          * configuration only at live migration.
7100b58d368SHawkins Jiawei          */
7110b58d368SHawkins Jiawei         return 0;
7120b58d368SHawkins Jiawei     }
7130b58d368SHawkins Jiawei 
7140b58d368SHawkins Jiawei     offloads = cpu_to_le64(n->curr_guest_offloads);
7150b58d368SHawkins Jiawei     dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_GUEST_OFFLOADS,
7160b58d368SHawkins Jiawei                                           VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET,
7170b58d368SHawkins Jiawei                                           &offloads, sizeof(offloads));
7180b58d368SHawkins Jiawei     if (unlikely(dev_written < 0)) {
7190b58d368SHawkins Jiawei         return dev_written;
7200b58d368SHawkins Jiawei     }
721*6f348071SHawkins Jiawei     if (*s->status != VIRTIO_NET_OK) {
722*6f348071SHawkins Jiawei         return -EIO;
723*6f348071SHawkins Jiawei     }
7240b58d368SHawkins Jiawei 
725*6f348071SHawkins Jiawei     return 0;
7260b58d368SHawkins Jiawei }
7270b58d368SHawkins Jiawei 
728dd036d8dSEugenio Pérez static int vhost_vdpa_net_load(NetClientState *nc)
729dd036d8dSEugenio Pérez {
730dd036d8dSEugenio Pérez     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
731f73c0c43SEugenio Pérez     struct vhost_vdpa *v = &s->vhost_vdpa;
732dd036d8dSEugenio Pérez     const VirtIONet *n;
733f73c0c43SEugenio Pérez     int r;
734dd036d8dSEugenio Pérez 
735dd036d8dSEugenio Pérez     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
736dd036d8dSEugenio Pérez 
737dd036d8dSEugenio Pérez     if (!v->shadow_vqs_enabled) {
738dd036d8dSEugenio Pérez         return 0;
739dd036d8dSEugenio Pérez     }
740dd036d8dSEugenio Pérez 
741dd036d8dSEugenio Pérez     n = VIRTIO_NET(v->dev->vdev);
742f73c0c43SEugenio Pérez     r = vhost_vdpa_net_load_mac(s, n);
743f73c0c43SEugenio Pérez     if (unlikely(r < 0)) {
744f73c0c43SEugenio Pérez         return r;
745dd036d8dSEugenio Pérez     }
746f64c7cdaSEugenio Pérez     r = vhost_vdpa_net_load_mq(s, n);
747f64c7cdaSEugenio Pérez     if (unlikely(r)) {
748f64c7cdaSEugenio Pérez         return r;
749f64c7cdaSEugenio Pérez     }
7500b58d368SHawkins Jiawei     r = vhost_vdpa_net_load_offloads(s, n);
7510b58d368SHawkins Jiawei     if (unlikely(r)) {
7520b58d368SHawkins Jiawei         return r;
7530b58d368SHawkins Jiawei     }
754dd036d8dSEugenio Pérez 
755dd036d8dSEugenio Pérez     return 0;
756dd036d8dSEugenio Pérez }
757dd036d8dSEugenio Pérez 
758f8972b56SEugenio Pérez static NetClientInfo net_vhost_vdpa_cvq_info = {
759f8972b56SEugenio Pérez     .type = NET_CLIENT_DRIVER_VHOST_VDPA,
760f8972b56SEugenio Pérez     .size = sizeof(VhostVDPAState),
761f8972b56SEugenio Pérez     .receive = vhost_vdpa_receive,
7627a7f87e9SEugenio Pérez     .start = vhost_vdpa_net_cvq_start,
763dd036d8dSEugenio Pérez     .load = vhost_vdpa_net_load,
7647a7f87e9SEugenio Pérez     .stop = vhost_vdpa_net_cvq_stop,
765f8972b56SEugenio Pérez     .cleanup = vhost_vdpa_cleanup,
766f8972b56SEugenio Pérez     .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
767f8972b56SEugenio Pérez     .has_ufo = vhost_vdpa_has_ufo,
768f8972b56SEugenio Pérez     .check_peer_type = vhost_vdpa_check_peer_type,
769f8972b56SEugenio Pérez };
770f8972b56SEugenio Pérez 
7712df4dd31SEugenio Pérez /**
7722df4dd31SEugenio Pérez  * Validate and copy control virtqueue commands.
7732df4dd31SEugenio Pérez  *
7742df4dd31SEugenio Pérez  * Following QEMU guidelines, we offer a copy of the buffers to the device to
7752df4dd31SEugenio Pérez  * prevent TOCTOU bugs.
776bd907ae4SEugenio Pérez  */
777bd907ae4SEugenio Pérez static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
778bd907ae4SEugenio Pérez                                             VirtQueueElement *elem,
779bd907ae4SEugenio Pérez                                             void *opaque)
780bd907ae4SEugenio Pérez {
7812df4dd31SEugenio Pérez     VhostVDPAState *s = opaque;
782be4278b6SEugenio Pérez     size_t in_len;
783bd907ae4SEugenio Pérez     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
7847a7f87e9SEugenio Pérez     /* Out buffer sent to both the vdpa device and the device model */
7857a7f87e9SEugenio Pérez     struct iovec out = {
7867a7f87e9SEugenio Pérez         .iov_base = s->cvq_cmd_out_buffer,
7877a7f87e9SEugenio Pérez     };
7882df4dd31SEugenio Pérez     /* in buffer used for device model */
7892df4dd31SEugenio Pérez     const struct iovec in = {
7902df4dd31SEugenio Pérez         .iov_base = &status,
7912df4dd31SEugenio Pérez         .iov_len = sizeof(status),
7922df4dd31SEugenio Pérez     };
793be4278b6SEugenio Pérez     ssize_t dev_written = -EINVAL;
794bd907ae4SEugenio Pérez 
7957a7f87e9SEugenio Pérez     out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0,
7967a7f87e9SEugenio Pérez                              s->cvq_cmd_out_buffer,
7977a7f87e9SEugenio Pérez                              vhost_vdpa_net_cvq_cmd_len());
7983f9a3eebSEugenio Pérez     if (*(uint8_t *)s->cvq_cmd_out_buffer == VIRTIO_NET_CTRL_ANNOUNCE) {
7993f9a3eebSEugenio Pérez         /*
8003f9a3eebSEugenio Pérez          * Guest announce capability is emulated by qemu, so don't forward to
8013f9a3eebSEugenio Pérez          * the device.
8023f9a3eebSEugenio Pérez          */
8033f9a3eebSEugenio Pérez         dev_written = sizeof(status);
8043f9a3eebSEugenio Pérez         *s->status = VIRTIO_NET_OK;
8053f9a3eebSEugenio Pérez     } else {
806be4278b6SEugenio Pérez         dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status));
807be4278b6SEugenio Pérez         if (unlikely(dev_written < 0)) {
808bd907ae4SEugenio Pérez             goto out;
809bd907ae4SEugenio Pérez         }
8103f9a3eebSEugenio Pérez     }
811bd907ae4SEugenio Pérez 
812bd907ae4SEugenio Pérez     if (unlikely(dev_written < sizeof(status))) {
813bd907ae4SEugenio Pérez         error_report("Insufficient written data (%zu)", dev_written);
8142df4dd31SEugenio Pérez         goto out;
8152df4dd31SEugenio Pérez     }
8162df4dd31SEugenio Pérez 
81717fb889fSEugenio Pérez     if (*s->status != VIRTIO_NET_OK) {
818d45243bcSEugenio Pérez         goto out;
8192df4dd31SEugenio Pérez     }
8202df4dd31SEugenio Pérez 
8212df4dd31SEugenio Pérez     status = VIRTIO_NET_ERR;
8227a7f87e9SEugenio Pérez     virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, &out, 1);
8232df4dd31SEugenio Pérez     if (status != VIRTIO_NET_OK) {
8242df4dd31SEugenio Pérez         error_report("Bad CVQ processing in model");
825bd907ae4SEugenio Pérez     }
826bd907ae4SEugenio Pérez 
827bd907ae4SEugenio Pérez out:
828bd907ae4SEugenio Pérez     in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status,
829bd907ae4SEugenio Pérez                           sizeof(status));
830bd907ae4SEugenio Pérez     if (unlikely(in_len < sizeof(status))) {
831bd907ae4SEugenio Pérez         error_report("Bad device CVQ written length");
832bd907ae4SEugenio Pérez     }
833bd907ae4SEugenio Pérez     vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
834bd907ae4SEugenio Pérez     g_free(elem);
835be4278b6SEugenio Pérez     return dev_written < 0 ? dev_written : 0;
836bd907ae4SEugenio Pérez }
837bd907ae4SEugenio Pérez 
838bd907ae4SEugenio Pérez static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
839bd907ae4SEugenio Pérez     .avail_handler = vhost_vdpa_net_handle_ctrl_avail,
840bd907ae4SEugenio Pérez };
841bd907ae4SEugenio Pérez 
842152128d6SEugenio Pérez /**
843152128d6SEugenio Pérez  * Probe if CVQ is isolated
844152128d6SEugenio Pérez  *
845152128d6SEugenio Pérez  * @device_fd         The vdpa device fd
846152128d6SEugenio Pérez  * @features          Features offered by the device.
847152128d6SEugenio Pérez  * @cvq_index         The control vq pair index
848152128d6SEugenio Pérez  *
849152128d6SEugenio Pérez  * Returns <0 in case of failure, 0 if false and 1 if true.
850152128d6SEugenio Pérez  */
851152128d6SEugenio Pérez static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features,
852152128d6SEugenio Pérez                                           int cvq_index, Error **errp)
853152128d6SEugenio Pérez {
854152128d6SEugenio Pérez     uint64_t backend_features;
855152128d6SEugenio Pérez     int64_t cvq_group;
856152128d6SEugenio Pérez     uint8_t status = VIRTIO_CONFIG_S_ACKNOWLEDGE |
857152128d6SEugenio Pérez                      VIRTIO_CONFIG_S_DRIVER |
858152128d6SEugenio Pérez                      VIRTIO_CONFIG_S_FEATURES_OK;
859152128d6SEugenio Pérez     int r;
860152128d6SEugenio Pérez 
861152128d6SEugenio Pérez     ERRP_GUARD();
862152128d6SEugenio Pérez 
863152128d6SEugenio Pérez     r = ioctl(device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features);
864152128d6SEugenio Pérez     if (unlikely(r < 0)) {
865152128d6SEugenio Pérez         error_setg_errno(errp, errno, "Cannot get vdpa backend_features");
866152128d6SEugenio Pérez         return r;
867152128d6SEugenio Pérez     }
868152128d6SEugenio Pérez 
869152128d6SEugenio Pérez     if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) {
870152128d6SEugenio Pérez         return 0;
871152128d6SEugenio Pérez     }
872152128d6SEugenio Pérez 
873152128d6SEugenio Pérez     r = ioctl(device_fd, VHOST_SET_FEATURES, &features);
874152128d6SEugenio Pérez     if (unlikely(r)) {
875152128d6SEugenio Pérez         error_setg_errno(errp, errno, "Cannot set features");
876152128d6SEugenio Pérez     }
877152128d6SEugenio Pérez 
878152128d6SEugenio Pérez     r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
879152128d6SEugenio Pérez     if (unlikely(r)) {
880152128d6SEugenio Pérez         error_setg_errno(errp, -r, "Cannot set device features");
881152128d6SEugenio Pérez         goto out;
882152128d6SEugenio Pérez     }
883152128d6SEugenio Pérez 
884152128d6SEugenio Pérez     cvq_group = vhost_vdpa_get_vring_group(device_fd, cvq_index, errp);
885152128d6SEugenio Pérez     if (unlikely(cvq_group < 0)) {
886152128d6SEugenio Pérez         if (cvq_group != -ENOTSUP) {
887152128d6SEugenio Pérez             r = cvq_group;
888152128d6SEugenio Pérez             goto out;
889152128d6SEugenio Pérez         }
890152128d6SEugenio Pérez 
891152128d6SEugenio Pérez         /*
892152128d6SEugenio Pérez          * The kernel report VHOST_BACKEND_F_IOTLB_ASID if the vdpa frontend
893152128d6SEugenio Pérez          * support ASID even if the parent driver does not.  The CVQ cannot be
894152128d6SEugenio Pérez          * isolated in this case.
895152128d6SEugenio Pérez          */
896152128d6SEugenio Pérez         error_free(*errp);
897152128d6SEugenio Pérez         *errp = NULL;
898152128d6SEugenio Pérez         r = 0;
899152128d6SEugenio Pérez         goto out;
900152128d6SEugenio Pérez     }
901152128d6SEugenio Pérez 
902152128d6SEugenio Pérez     for (int i = 0; i < cvq_index; ++i) {
903152128d6SEugenio Pérez         int64_t group = vhost_vdpa_get_vring_group(device_fd, i, errp);
904152128d6SEugenio Pérez         if (unlikely(group < 0)) {
905152128d6SEugenio Pérez             r = group;
906152128d6SEugenio Pérez             goto out;
907152128d6SEugenio Pérez         }
908152128d6SEugenio Pérez 
909152128d6SEugenio Pérez         if (group == (int64_t)cvq_group) {
910152128d6SEugenio Pérez             r = 0;
911152128d6SEugenio Pérez             goto out;
912152128d6SEugenio Pérez         }
913152128d6SEugenio Pérez     }
914152128d6SEugenio Pérez 
915152128d6SEugenio Pérez     r = 1;
916152128d6SEugenio Pérez 
917152128d6SEugenio Pérez out:
918152128d6SEugenio Pérez     status = 0;
919152128d6SEugenio Pérez     ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
920152128d6SEugenio Pérez     return r;
921152128d6SEugenio Pérez }
922152128d6SEugenio Pérez 
923654790b6SJason Wang static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
924654790b6SJason Wang                                        const char *device,
925654790b6SJason Wang                                        const char *name,
92640237840SJason Wang                                        int vdpa_device_fd,
92740237840SJason Wang                                        int queue_pair_index,
92840237840SJason Wang                                        int nvqs,
9291576dbb5SEugenio Pérez                                        bool is_datapath,
9301576dbb5SEugenio Pérez                                        bool svq,
9315c1ebd4cSEugenio Pérez                                        struct vhost_vdpa_iova_range iova_range,
932152128d6SEugenio Pérez                                        uint64_t features,
933152128d6SEugenio Pérez                                        Error **errp)
9341e0a84eaSCindy Lu {
9351e0a84eaSCindy Lu     NetClientState *nc = NULL;
9361e0a84eaSCindy Lu     VhostVDPAState *s;
9371e0a84eaSCindy Lu     int ret = 0;
9381e0a84eaSCindy Lu     assert(name);
939152128d6SEugenio Pérez     int cvq_isolated;
940152128d6SEugenio Pérez 
94140237840SJason Wang     if (is_datapath) {
94240237840SJason Wang         nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device,
94340237840SJason Wang                                  name);
94440237840SJason Wang     } else {
945152128d6SEugenio Pérez         cvq_isolated = vhost_vdpa_probe_cvq_isolation(vdpa_device_fd, features,
946152128d6SEugenio Pérez                                                       queue_pair_index * 2,
947152128d6SEugenio Pérez                                                       errp);
948152128d6SEugenio Pérez         if (unlikely(cvq_isolated < 0)) {
949152128d6SEugenio Pérez             return NULL;
950152128d6SEugenio Pérez         }
951152128d6SEugenio Pérez 
952f8972b56SEugenio Pérez         nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer,
95340237840SJason Wang                                          device, name);
95440237840SJason Wang     }
95553b85d95SLaurent Vivier     qemu_set_info_str(nc, TYPE_VHOST_VDPA);
9561e0a84eaSCindy Lu     s = DO_UPCAST(VhostVDPAState, nc, nc);
9577327813dSJason Wang 
9581e0a84eaSCindy Lu     s->vhost_vdpa.device_fd = vdpa_device_fd;
95940237840SJason Wang     s->vhost_vdpa.index = queue_pair_index;
9607f211a28SEugenio Pérez     s->always_svq = svq;
96169498430SEugenio Pérez     s->migration_state.notify = vdpa_net_migration_state_notifier;
9621576dbb5SEugenio Pérez     s->vhost_vdpa.shadow_vqs_enabled = svq;
963a585fad2SEugenio Pérez     s->vhost_vdpa.iova_range = iova_range;
9646188d78aSEugenio Pérez     s->vhost_vdpa.shadow_data = svq;
9655c1ebd4cSEugenio Pérez     if (queue_pair_index == 0) {
9665c1ebd4cSEugenio Pérez         vhost_vdpa_net_valid_svq_features(features,
9675c1ebd4cSEugenio Pérez                                           &s->vhost_vdpa.migration_blocker);
9685c1ebd4cSEugenio Pérez     } else if (!is_datapath) {
969babf8b87SEugenio Pérez         s->cvq_cmd_out_buffer = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(),
970babf8b87SEugenio Pérez                                      PROT_READ | PROT_WRITE,
971babf8b87SEugenio Pérez                                      MAP_SHARED | MAP_ANONYMOUS, -1, 0);
972babf8b87SEugenio Pérez         s->status = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(),
973babf8b87SEugenio Pérez                          PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS,
974babf8b87SEugenio Pérez                          -1, 0);
9752df4dd31SEugenio Pérez 
976bd907ae4SEugenio Pérez         s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
977bd907ae4SEugenio Pérez         s->vhost_vdpa.shadow_vq_ops_opaque = s;
978152128d6SEugenio Pérez         s->cvq_isolated = cvq_isolated;
9799c363cf6SEugenio Pérez 
9809c363cf6SEugenio Pérez         /*
9818bc0049eSEugenio Pérez          * TODO: We cannot migrate devices with CVQ and no x-svq enabled as
9828bc0049eSEugenio Pérez          * there is no way to set the device state (MAC, MQ, etc) before
9838bc0049eSEugenio Pérez          * starting the datapath.
9849c363cf6SEugenio Pérez          *
9859c363cf6SEugenio Pérez          * Migration blocker ownership now belongs to s->vhost_vdpa.
9869c363cf6SEugenio Pérez          */
9878bc0049eSEugenio Pérez         if (!svq) {
9889c363cf6SEugenio Pérez             error_setg(&s->vhost_vdpa.migration_blocker,
9899c363cf6SEugenio Pérez                        "net vdpa cannot migrate with CVQ feature");
990bd907ae4SEugenio Pérez         }
9918bc0049eSEugenio Pérez     }
99240237840SJason Wang     ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
99374af5eecSJason Wang     if (ret) {
99474af5eecSJason Wang         qemu_del_net_client(nc);
995654790b6SJason Wang         return NULL;
99674af5eecSJason Wang     }
997654790b6SJason Wang     return nc;
9981e0a84eaSCindy Lu }
9991e0a84eaSCindy Lu 
10008170ab3fSEugenio Pérez static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
10018170ab3fSEugenio Pérez {
10028170ab3fSEugenio Pérez     int ret = ioctl(fd, VHOST_GET_FEATURES, features);
10038170ab3fSEugenio Pérez     if (unlikely(ret < 0)) {
10048170ab3fSEugenio Pérez         error_setg_errno(errp, errno,
10058170ab3fSEugenio Pérez                          "Fail to query features from vhost-vDPA device");
10068170ab3fSEugenio Pérez     }
10078170ab3fSEugenio Pérez     return ret;
10088170ab3fSEugenio Pérez }
10098170ab3fSEugenio Pérez 
10108170ab3fSEugenio Pérez static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features,
10118170ab3fSEugenio Pérez                                           int *has_cvq, Error **errp)
101240237840SJason Wang {
101340237840SJason Wang     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
1014cd523a41SStefano Garzarella     g_autofree struct vhost_vdpa_config *config = NULL;
101540237840SJason Wang     __virtio16 *max_queue_pairs;
101640237840SJason Wang     int ret;
101740237840SJason Wang 
101840237840SJason Wang     if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) {
101940237840SJason Wang         *has_cvq = 1;
102040237840SJason Wang     } else {
102140237840SJason Wang         *has_cvq = 0;
102240237840SJason Wang     }
102340237840SJason Wang 
102440237840SJason Wang     if (features & (1 << VIRTIO_NET_F_MQ)) {
102540237840SJason Wang         config = g_malloc0(config_size + sizeof(*max_queue_pairs));
102640237840SJason Wang         config->off = offsetof(struct virtio_net_config, max_virtqueue_pairs);
102740237840SJason Wang         config->len = sizeof(*max_queue_pairs);
102840237840SJason Wang 
102940237840SJason Wang         ret = ioctl(fd, VHOST_VDPA_GET_CONFIG, config);
103040237840SJason Wang         if (ret) {
103140237840SJason Wang             error_setg(errp, "Fail to get config from vhost-vDPA device");
103240237840SJason Wang             return -ret;
103340237840SJason Wang         }
103440237840SJason Wang 
103540237840SJason Wang         max_queue_pairs = (__virtio16 *)&config->buf;
103640237840SJason Wang 
103740237840SJason Wang         return lduw_le_p(max_queue_pairs);
103840237840SJason Wang     }
103940237840SJason Wang 
104040237840SJason Wang     return 1;
104140237840SJason Wang }
104240237840SJason Wang 
10431e0a84eaSCindy Lu int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
10441e0a84eaSCindy Lu                         NetClientState *peer, Error **errp)
10451e0a84eaSCindy Lu {
10461e0a84eaSCindy Lu     const NetdevVhostVDPAOptions *opts;
10478170ab3fSEugenio Pérez     uint64_t features;
1048654790b6SJason Wang     int vdpa_device_fd;
1049eb3cb751SEugenio Pérez     g_autofree NetClientState **ncs = NULL;
1050a585fad2SEugenio Pérez     struct vhost_vdpa_iova_range iova_range;
1051eb3cb751SEugenio Pérez     NetClientState *nc;
1052aed5da45SEugenio Pérez     int queue_pairs, r, i = 0, has_cvq = 0;
10531e0a84eaSCindy Lu 
10541e0a84eaSCindy Lu     assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
10551e0a84eaSCindy Lu     opts = &netdev->u.vhost_vdpa;
10567480874aSMarkus Armbruster     if (!opts->vhostdev && !opts->vhostfd) {
10578801ccd0SSi-Wei Liu         error_setg(errp,
10588801ccd0SSi-Wei Liu                    "vhost-vdpa: neither vhostdev= nor vhostfd= was specified");
1059c8295404SEugenio Pérez         return -1;
1060c8295404SEugenio Pérez     }
10617327813dSJason Wang 
10627480874aSMarkus Armbruster     if (opts->vhostdev && opts->vhostfd) {
10638801ccd0SSi-Wei Liu         error_setg(errp,
10648801ccd0SSi-Wei Liu                    "vhost-vdpa: vhostdev= and vhostfd= are mutually exclusive");
10658801ccd0SSi-Wei Liu         return -1;
10668801ccd0SSi-Wei Liu     }
10678801ccd0SSi-Wei Liu 
10687480874aSMarkus Armbruster     if (opts->vhostdev) {
10690351152bSEugenio Pérez         vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
10707327813dSJason Wang         if (vdpa_device_fd == -1) {
10717327813dSJason Wang             return -errno;
10727327813dSJason Wang         }
10735107fd3eSPeter Maydell     } else {
10745107fd3eSPeter Maydell         /* has_vhostfd */
10758801ccd0SSi-Wei Liu         vdpa_device_fd = monitor_fd_param(monitor_cur(), opts->vhostfd, errp);
10768801ccd0SSi-Wei Liu         if (vdpa_device_fd == -1) {
10778801ccd0SSi-Wei Liu             error_prepend(errp, "vhost-vdpa: unable to parse vhostfd: ");
10788801ccd0SSi-Wei Liu             return -1;
10798801ccd0SSi-Wei Liu         }
10808801ccd0SSi-Wei Liu     }
10817327813dSJason Wang 
10828170ab3fSEugenio Pérez     r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
10838170ab3fSEugenio Pérez     if (unlikely(r < 0)) {
1084aed5da45SEugenio Pérez         goto err;
10858170ab3fSEugenio Pérez     }
10868170ab3fSEugenio Pérez 
10878170ab3fSEugenio Pérez     queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features,
108840237840SJason Wang                                                  &has_cvq, errp);
108940237840SJason Wang     if (queue_pairs < 0) {
10907327813dSJason Wang         qemu_close(vdpa_device_fd);
109140237840SJason Wang         return queue_pairs;
10927327813dSJason Wang     }
10937327813dSJason Wang 
1094bf7a2ad8SLongpeng     r = vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range);
1095bf7a2ad8SLongpeng     if (unlikely(r < 0)) {
1096bf7a2ad8SLongpeng         error_setg(errp, "vhost-vdpa: get iova range failed: %s",
1097bf7a2ad8SLongpeng                    strerror(-r));
1098bf7a2ad8SLongpeng         goto err;
1099bf7a2ad8SLongpeng     }
1100bf7a2ad8SLongpeng 
110100ef422eSEugenio Pérez     if (opts->x_svq && !vhost_vdpa_net_valid_svq_features(features, errp)) {
110200ef422eSEugenio Pérez         goto err;
11031576dbb5SEugenio Pérez     }
11041576dbb5SEugenio Pérez 
110540237840SJason Wang     ncs = g_malloc0(sizeof(*ncs) * queue_pairs);
110640237840SJason Wang 
110740237840SJason Wang     for (i = 0; i < queue_pairs; i++) {
110840237840SJason Wang         ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
11091576dbb5SEugenio Pérez                                      vdpa_device_fd, i, 2, true, opts->x_svq,
1110152128d6SEugenio Pérez                                      iova_range, features, errp);
111140237840SJason Wang         if (!ncs[i])
111240237840SJason Wang             goto err;
111340237840SJason Wang     }
111440237840SJason Wang 
111540237840SJason Wang     if (has_cvq) {
111640237840SJason Wang         nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
11171576dbb5SEugenio Pérez                                  vdpa_device_fd, i, 1, false,
1118152128d6SEugenio Pérez                                  opts->x_svq, iova_range, features, errp);
111940237840SJason Wang         if (!nc)
112040237840SJason Wang             goto err;
112140237840SJason Wang     }
112240237840SJason Wang 
1123654790b6SJason Wang     return 0;
112440237840SJason Wang 
112540237840SJason Wang err:
112640237840SJason Wang     if (i) {
11279bd05507SSi-Wei Liu         for (i--; i >= 0; i--) {
11289bd05507SSi-Wei Liu             qemu_del_net_client(ncs[i]);
11299bd05507SSi-Wei Liu         }
113040237840SJason Wang     }
11311576dbb5SEugenio Pérez 
113240237840SJason Wang     qemu_close(vdpa_device_fd);
113340237840SJason Wang 
113440237840SJason Wang     return -1;
11351e0a84eaSCindy Lu }
1136