xref: /openbmc/qemu/net/vhost-vdpa.c (revision 0f2bb0bf)
11e0a84eaSCindy Lu /*
21e0a84eaSCindy Lu  * vhost-vdpa.c
31e0a84eaSCindy Lu  *
41e0a84eaSCindy Lu  * Copyright(c) 2017-2018 Intel Corporation.
51e0a84eaSCindy Lu  * Copyright(c) 2020 Red Hat, Inc.
61e0a84eaSCindy Lu  *
71e0a84eaSCindy Lu  * This work is licensed under the terms of the GNU GPL, version 2 or later.
81e0a84eaSCindy Lu  * See the COPYING file in the top-level directory.
91e0a84eaSCindy Lu  *
101e0a84eaSCindy Lu  */
111e0a84eaSCindy Lu 
121e0a84eaSCindy Lu #include "qemu/osdep.h"
131e0a84eaSCindy Lu #include "clients.h"
14bd907ae4SEugenio Pérez #include "hw/virtio/virtio-net.h"
151e0a84eaSCindy Lu #include "net/vhost_net.h"
161e0a84eaSCindy Lu #include "net/vhost-vdpa.h"
171e0a84eaSCindy Lu #include "hw/virtio/vhost-vdpa.h"
181e0a84eaSCindy Lu #include "qemu/config-file.h"
191e0a84eaSCindy Lu #include "qemu/error-report.h"
20bd907ae4SEugenio Pérez #include "qemu/log.h"
21bd907ae4SEugenio Pérez #include "qemu/memalign.h"
221e0a84eaSCindy Lu #include "qemu/option.h"
231e0a84eaSCindy Lu #include "qapi/error.h"
2440237840SJason Wang #include <linux/vhost.h>
251e0a84eaSCindy Lu #include <sys/ioctl.h>
261e0a84eaSCindy Lu #include <err.h>
271e0a84eaSCindy Lu #include "standard-headers/linux/virtio_net.h"
281e0a84eaSCindy Lu #include "monitor/monitor.h"
2969498430SEugenio Pérez #include "migration/migration.h"
3069498430SEugenio Pérez #include "migration/misc.h"
311e0a84eaSCindy Lu #include "hw/virtio/vhost.h"
321e0a84eaSCindy Lu 
331e0a84eaSCindy Lu /* Todo:need to add the multiqueue support here */
341e0a84eaSCindy Lu typedef struct VhostVDPAState {
351e0a84eaSCindy Lu     NetClientState nc;
361e0a84eaSCindy Lu     struct vhost_vdpa vhost_vdpa;
3769498430SEugenio Pérez     Notifier migration_state;
381e0a84eaSCindy Lu     VHostNetState *vhost_net;
392df4dd31SEugenio Pérez 
402df4dd31SEugenio Pérez     /* Control commands shadow buffers */
4117fb889fSEugenio Pérez     void *cvq_cmd_out_buffer;
4217fb889fSEugenio Pérez     virtio_net_ctrl_ack *status;
4317fb889fSEugenio Pérez 
447f211a28SEugenio Pérez     /* The device always have SVQ enabled */
457f211a28SEugenio Pérez     bool always_svq;
461e0a84eaSCindy Lu     bool started;
471e0a84eaSCindy Lu } VhostVDPAState;
481e0a84eaSCindy Lu 
491e0a84eaSCindy Lu const int vdpa_feature_bits[] = {
501e0a84eaSCindy Lu     VIRTIO_F_NOTIFY_ON_EMPTY,
511e0a84eaSCindy Lu     VIRTIO_RING_F_INDIRECT_DESC,
521e0a84eaSCindy Lu     VIRTIO_RING_F_EVENT_IDX,
531e0a84eaSCindy Lu     VIRTIO_F_ANY_LAYOUT,
541e0a84eaSCindy Lu     VIRTIO_F_VERSION_1,
551e0a84eaSCindy Lu     VIRTIO_NET_F_CSUM,
561e0a84eaSCindy Lu     VIRTIO_NET_F_GUEST_CSUM,
571e0a84eaSCindy Lu     VIRTIO_NET_F_GSO,
581e0a84eaSCindy Lu     VIRTIO_NET_F_GUEST_TSO4,
591e0a84eaSCindy Lu     VIRTIO_NET_F_GUEST_TSO6,
601e0a84eaSCindy Lu     VIRTIO_NET_F_GUEST_ECN,
611e0a84eaSCindy Lu     VIRTIO_NET_F_GUEST_UFO,
621e0a84eaSCindy Lu     VIRTIO_NET_F_HOST_TSO4,
631e0a84eaSCindy Lu     VIRTIO_NET_F_HOST_TSO6,
641e0a84eaSCindy Lu     VIRTIO_NET_F_HOST_ECN,
651e0a84eaSCindy Lu     VIRTIO_NET_F_HOST_UFO,
661e0a84eaSCindy Lu     VIRTIO_NET_F_MRG_RXBUF,
671e0a84eaSCindy Lu     VIRTIO_NET_F_MTU,
6840237840SJason Wang     VIRTIO_NET_F_CTRL_RX,
6940237840SJason Wang     VIRTIO_NET_F_CTRL_RX_EXTRA,
7040237840SJason Wang     VIRTIO_NET_F_CTRL_VLAN,
7140237840SJason Wang     VIRTIO_NET_F_CTRL_MAC_ADDR,
7240237840SJason Wang     VIRTIO_NET_F_RSS,
7340237840SJason Wang     VIRTIO_NET_F_MQ,
7440237840SJason Wang     VIRTIO_NET_F_CTRL_VQ,
751e0a84eaSCindy Lu     VIRTIO_F_IOMMU_PLATFORM,
761e0a84eaSCindy Lu     VIRTIO_F_RING_PACKED,
77562a7d23SStefano Garzarella     VIRTIO_F_RING_RESET,
780145c393SAndrew Melnychenko     VIRTIO_NET_F_RSS,
790145c393SAndrew Melnychenko     VIRTIO_NET_F_HASH_REPORT,
809aa47eddSSi-Wei Liu     VIRTIO_NET_F_STATUS,
811e0a84eaSCindy Lu     VHOST_INVALID_FEATURE_BIT
821e0a84eaSCindy Lu };
831e0a84eaSCindy Lu 
841576dbb5SEugenio Pérez /** Supported device specific feature bits with SVQ */
851576dbb5SEugenio Pérez static const uint64_t vdpa_svq_device_features =
861576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_CSUM) |
871576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |
881576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_MTU) |
891576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_MAC) |
901576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) |
911576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |
921576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_GUEST_ECN) |
931576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_GUEST_UFO) |
941576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_HOST_TSO4) |
951576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_HOST_TSO6) |
961576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_HOST_ECN) |
971576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
981576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
991576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_STATUS) |
1001576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
10172b99a87SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_MQ) |
1021576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_F_ANY_LAYOUT) |
1031576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |
104609ab4c3SEugenio Pérez     /* VHOST_F_LOG_ALL is exposed by SVQ */
105609ab4c3SEugenio Pérez     BIT_ULL(VHOST_F_LOG_ALL) |
1061576dbb5SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
1070d74e2b7SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_STANDBY) |
1080d74e2b7SEugenio Pérez     BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX);
1091576dbb5SEugenio Pérez 
110c1a10086SEugenio Pérez #define VHOST_VDPA_NET_CVQ_ASID 1
111c1a10086SEugenio Pérez 
1121e0a84eaSCindy Lu VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
1131e0a84eaSCindy Lu {
1141e0a84eaSCindy Lu     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
1151e0a84eaSCindy Lu     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
1161e0a84eaSCindy Lu     return s->vhost_net;
1171e0a84eaSCindy Lu }
1181e0a84eaSCindy Lu 
11936e46472SEugenio Pérez static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp)
12036e46472SEugenio Pérez {
12136e46472SEugenio Pérez     uint64_t invalid_dev_features =
12236e46472SEugenio Pérez         features & ~vdpa_svq_device_features &
12336e46472SEugenio Pérez         /* Transport are all accepted at this point */
12436e46472SEugenio Pérez         ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START,
12536e46472SEugenio Pérez                          VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START);
12636e46472SEugenio Pérez 
12736e46472SEugenio Pérez     if (invalid_dev_features) {
12836e46472SEugenio Pérez         error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64,
12936e46472SEugenio Pérez                    invalid_dev_features);
130258a0394SEugenio Pérez         return false;
13136e46472SEugenio Pérez     }
13236e46472SEugenio Pérez 
133258a0394SEugenio Pérez     return vhost_svq_valid_features(features, errp);
13436e46472SEugenio Pérez }
13536e46472SEugenio Pérez 
1361e0a84eaSCindy Lu static int vhost_vdpa_net_check_device_id(struct vhost_net *net)
1371e0a84eaSCindy Lu {
1381e0a84eaSCindy Lu     uint32_t device_id;
1391e0a84eaSCindy Lu     int ret;
1401e0a84eaSCindy Lu     struct vhost_dev *hdev;
1411e0a84eaSCindy Lu 
1421e0a84eaSCindy Lu     hdev = (struct vhost_dev *)&net->dev;
1431e0a84eaSCindy Lu     ret = hdev->vhost_ops->vhost_get_device_id(hdev, &device_id);
1441e0a84eaSCindy Lu     if (device_id != VIRTIO_ID_NET) {
1451e0a84eaSCindy Lu         return -ENOTSUP;
1461e0a84eaSCindy Lu     }
1471e0a84eaSCindy Lu     return ret;
1481e0a84eaSCindy Lu }
1491e0a84eaSCindy Lu 
15040237840SJason Wang static int vhost_vdpa_add(NetClientState *ncs, void *be,
15140237840SJason Wang                           int queue_pair_index, int nvqs)
1521e0a84eaSCindy Lu {
1531e0a84eaSCindy Lu     VhostNetOptions options;
1541e0a84eaSCindy Lu     struct vhost_net *net = NULL;
1551e0a84eaSCindy Lu     VhostVDPAState *s;
1561e0a84eaSCindy Lu     int ret;
1571e0a84eaSCindy Lu 
1581e0a84eaSCindy Lu     options.backend_type = VHOST_BACKEND_TYPE_VDPA;
1591e0a84eaSCindy Lu     assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
1601e0a84eaSCindy Lu     s = DO_UPCAST(VhostVDPAState, nc, ncs);
1611e0a84eaSCindy Lu     options.net_backend = ncs;
1621e0a84eaSCindy Lu     options.opaque      = be;
1631e0a84eaSCindy Lu     options.busyloop_timeout = 0;
16440237840SJason Wang     options.nvqs = nvqs;
1651e0a84eaSCindy Lu 
1661e0a84eaSCindy Lu     net = vhost_net_init(&options);
1671e0a84eaSCindy Lu     if (!net) {
1681e0a84eaSCindy Lu         error_report("failed to init vhost_net for queue");
169a97ef87aSJason Wang         goto err_init;
1701e0a84eaSCindy Lu     }
1711e0a84eaSCindy Lu     s->vhost_net = net;
1721e0a84eaSCindy Lu     ret = vhost_vdpa_net_check_device_id(net);
1731e0a84eaSCindy Lu     if (ret) {
174a97ef87aSJason Wang         goto err_check;
1751e0a84eaSCindy Lu     }
1761e0a84eaSCindy Lu     return 0;
177a97ef87aSJason Wang err_check:
1781e0a84eaSCindy Lu     vhost_net_cleanup(net);
179ab36edcfSJason Wang     g_free(net);
180a97ef87aSJason Wang err_init:
1811e0a84eaSCindy Lu     return -1;
1821e0a84eaSCindy Lu }
1831e0a84eaSCindy Lu 
1841e0a84eaSCindy Lu static void vhost_vdpa_cleanup(NetClientState *nc)
1851e0a84eaSCindy Lu {
1861e0a84eaSCindy Lu     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
1871e0a84eaSCindy Lu 
1882df4dd31SEugenio Pérez     qemu_vfree(s->cvq_cmd_out_buffer);
18917fb889fSEugenio Pérez     qemu_vfree(s->status);
1901e0a84eaSCindy Lu     if (s->vhost_net) {
1911e0a84eaSCindy Lu         vhost_net_cleanup(s->vhost_net);
1921e0a84eaSCindy Lu         g_free(s->vhost_net);
1931e0a84eaSCindy Lu         s->vhost_net = NULL;
1941e0a84eaSCindy Lu     }
19557b3a7d8SCindy Lu      if (s->vhost_vdpa.device_fd >= 0) {
19657b3a7d8SCindy Lu         qemu_close(s->vhost_vdpa.device_fd);
19757b3a7d8SCindy Lu         s->vhost_vdpa.device_fd = -1;
19857b3a7d8SCindy Lu     }
1991e0a84eaSCindy Lu }
2001e0a84eaSCindy Lu 
2011e0a84eaSCindy Lu static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc)
2021e0a84eaSCindy Lu {
2031e0a84eaSCindy Lu     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
2041e0a84eaSCindy Lu 
2051e0a84eaSCindy Lu     return true;
2061e0a84eaSCindy Lu }
2071e0a84eaSCindy Lu 
2081e0a84eaSCindy Lu static bool vhost_vdpa_has_ufo(NetClientState *nc)
2091e0a84eaSCindy Lu {
2101e0a84eaSCindy Lu     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
2111e0a84eaSCindy Lu     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
2121e0a84eaSCindy Lu     uint64_t features = 0;
2131e0a84eaSCindy Lu     features |= (1ULL << VIRTIO_NET_F_HOST_UFO);
2141e0a84eaSCindy Lu     features = vhost_net_get_features(s->vhost_net, features);
2151e0a84eaSCindy Lu     return !!(features & (1ULL << VIRTIO_NET_F_HOST_UFO));
2161e0a84eaSCindy Lu 
2171e0a84eaSCindy Lu }
2181e0a84eaSCindy Lu 
219ee8a1c63SKevin Wolf static bool vhost_vdpa_check_peer_type(NetClientState *nc, ObjectClass *oc,
220ee8a1c63SKevin Wolf                                        Error **errp)
221ee8a1c63SKevin Wolf {
222ee8a1c63SKevin Wolf     const char *driver = object_class_get_name(oc);
223ee8a1c63SKevin Wolf 
224ee8a1c63SKevin Wolf     if (!g_str_has_prefix(driver, "virtio-net-")) {
225ee8a1c63SKevin Wolf         error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*");
226ee8a1c63SKevin Wolf         return false;
227ee8a1c63SKevin Wolf     }
228ee8a1c63SKevin Wolf 
229ee8a1c63SKevin Wolf     return true;
230ee8a1c63SKevin Wolf }
231ee8a1c63SKevin Wolf 
232846a1e85SEugenio Pérez /** Dummy receive in case qemu falls back to userland tap networking */
233846a1e85SEugenio Pérez static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf,
234846a1e85SEugenio Pérez                                   size_t size)
235846a1e85SEugenio Pérez {
236bc5add1dSSi-Wei Liu     return size;
237846a1e85SEugenio Pérez }
238846a1e85SEugenio Pérez 
23900ef422eSEugenio Pérez /** From any vdpa net client, get the netclient of the first queue pair */
24000ef422eSEugenio Pérez static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s)
24100ef422eSEugenio Pérez {
24200ef422eSEugenio Pérez     NICState *nic = qemu_get_nic(s->nc.peer);
24300ef422eSEugenio Pérez     NetClientState *nc0 = qemu_get_peer(nic->ncs, 0);
24400ef422eSEugenio Pérez 
24500ef422eSEugenio Pérez     return DO_UPCAST(VhostVDPAState, nc, nc0);
24600ef422eSEugenio Pérez }
24700ef422eSEugenio Pérez 
24869498430SEugenio Pérez static void vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable)
24969498430SEugenio Pérez {
25069498430SEugenio Pérez     struct vhost_vdpa *v = &s->vhost_vdpa;
25169498430SEugenio Pérez     VirtIONet *n;
25269498430SEugenio Pérez     VirtIODevice *vdev;
25369498430SEugenio Pérez     int data_queue_pairs, cvq, r;
25469498430SEugenio Pérez 
25569498430SEugenio Pérez     /* We are only called on the first data vqs and only if x-svq is not set */
25669498430SEugenio Pérez     if (s->vhost_vdpa.shadow_vqs_enabled == enable) {
25769498430SEugenio Pérez         return;
25869498430SEugenio Pérez     }
25969498430SEugenio Pérez 
26069498430SEugenio Pérez     vdev = v->dev->vdev;
26169498430SEugenio Pérez     n = VIRTIO_NET(vdev);
26269498430SEugenio Pérez     if (!n->vhost_started) {
26369498430SEugenio Pérez         return;
26469498430SEugenio Pérez     }
26569498430SEugenio Pérez 
26669498430SEugenio Pérez     data_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
26769498430SEugenio Pérez     cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
26869498430SEugenio Pérez                                   n->max_ncs - n->max_queue_pairs : 0;
26969498430SEugenio Pérez     /*
27069498430SEugenio Pérez      * TODO: vhost_net_stop does suspend, get_base and reset. We can be smarter
27169498430SEugenio Pérez      * in the future and resume the device if read-only operations between
27269498430SEugenio Pérez      * suspend and reset goes wrong.
27369498430SEugenio Pérez      */
27469498430SEugenio Pérez     vhost_net_stop(vdev, n->nic->ncs, data_queue_pairs, cvq);
27569498430SEugenio Pérez 
27669498430SEugenio Pérez     /* Start will check migration setup_or_active to configure or not SVQ */
27769498430SEugenio Pérez     r = vhost_net_start(vdev, n->nic->ncs, data_queue_pairs, cvq);
27869498430SEugenio Pérez     if (unlikely(r < 0)) {
27969498430SEugenio Pérez         error_report("unable to start vhost net: %s(%d)", g_strerror(-r), -r);
28069498430SEugenio Pérez     }
28169498430SEugenio Pérez }
28269498430SEugenio Pérez 
28369498430SEugenio Pérez static void vdpa_net_migration_state_notifier(Notifier *notifier, void *data)
28469498430SEugenio Pérez {
28569498430SEugenio Pérez     MigrationState *migration = data;
28669498430SEugenio Pérez     VhostVDPAState *s = container_of(notifier, VhostVDPAState,
28769498430SEugenio Pérez                                      migration_state);
28869498430SEugenio Pérez 
28969498430SEugenio Pérez     if (migration_in_setup(migration)) {
29069498430SEugenio Pérez         vhost_vdpa_net_log_global_enable(s, true);
29169498430SEugenio Pérez     } else if (migration_has_failed(migration)) {
29269498430SEugenio Pérez         vhost_vdpa_net_log_global_enable(s, false);
29369498430SEugenio Pérez     }
29469498430SEugenio Pérez }
29569498430SEugenio Pérez 
29600ef422eSEugenio Pérez static void vhost_vdpa_net_data_start_first(VhostVDPAState *s)
29700ef422eSEugenio Pérez {
29800ef422eSEugenio Pérez     struct vhost_vdpa *v = &s->vhost_vdpa;
29900ef422eSEugenio Pérez 
30069498430SEugenio Pérez     add_migration_state_change_notifier(&s->migration_state);
30100ef422eSEugenio Pérez     if (v->shadow_vqs_enabled) {
30200ef422eSEugenio Pérez         v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
30300ef422eSEugenio Pérez                                            v->iova_range.last);
30400ef422eSEugenio Pérez     }
30500ef422eSEugenio Pérez }
30600ef422eSEugenio Pérez 
30700ef422eSEugenio Pérez static int vhost_vdpa_net_data_start(NetClientState *nc)
30800ef422eSEugenio Pérez {
30900ef422eSEugenio Pérez     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
31000ef422eSEugenio Pérez     struct vhost_vdpa *v = &s->vhost_vdpa;
31100ef422eSEugenio Pérez 
31200ef422eSEugenio Pérez     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
31300ef422eSEugenio Pérez 
31469498430SEugenio Pérez     if (s->always_svq ||
31569498430SEugenio Pérez         migration_is_setup_or_active(migrate_get_current()->state)) {
31669498430SEugenio Pérez         v->shadow_vqs_enabled = true;
31769498430SEugenio Pérez         v->shadow_data = true;
31869498430SEugenio Pérez     } else {
31969498430SEugenio Pérez         v->shadow_vqs_enabled = false;
32069498430SEugenio Pérez         v->shadow_data = false;
32169498430SEugenio Pérez     }
32269498430SEugenio Pérez 
32300ef422eSEugenio Pérez     if (v->index == 0) {
32400ef422eSEugenio Pérez         vhost_vdpa_net_data_start_first(s);
32500ef422eSEugenio Pérez         return 0;
32600ef422eSEugenio Pérez     }
32700ef422eSEugenio Pérez 
32800ef422eSEugenio Pérez     if (v->shadow_vqs_enabled) {
32900ef422eSEugenio Pérez         VhostVDPAState *s0 = vhost_vdpa_net_first_nc_vdpa(s);
33000ef422eSEugenio Pérez         v->iova_tree = s0->vhost_vdpa.iova_tree;
33100ef422eSEugenio Pérez     }
33200ef422eSEugenio Pérez 
33300ef422eSEugenio Pérez     return 0;
33400ef422eSEugenio Pérez }
33500ef422eSEugenio Pérez 
33600ef422eSEugenio Pérez static void vhost_vdpa_net_client_stop(NetClientState *nc)
33700ef422eSEugenio Pérez {
33800ef422eSEugenio Pérez     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
33900ef422eSEugenio Pérez     struct vhost_dev *dev;
34000ef422eSEugenio Pérez 
34100ef422eSEugenio Pérez     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
34200ef422eSEugenio Pérez 
34369498430SEugenio Pérez     if (s->vhost_vdpa.index == 0) {
34469498430SEugenio Pérez         remove_migration_state_change_notifier(&s->migration_state);
34569498430SEugenio Pérez     }
34669498430SEugenio Pérez 
34700ef422eSEugenio Pérez     dev = s->vhost_vdpa.dev;
34800ef422eSEugenio Pérez     if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
34900ef422eSEugenio Pérez         g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
35000ef422eSEugenio Pérez     }
35100ef422eSEugenio Pérez }
35200ef422eSEugenio Pérez 
3531e0a84eaSCindy Lu static NetClientInfo net_vhost_vdpa_info = {
3541e0a84eaSCindy Lu         .type = NET_CLIENT_DRIVER_VHOST_VDPA,
3551e0a84eaSCindy Lu         .size = sizeof(VhostVDPAState),
356846a1e85SEugenio Pérez         .receive = vhost_vdpa_receive,
35700ef422eSEugenio Pérez         .start = vhost_vdpa_net_data_start,
35800ef422eSEugenio Pérez         .stop = vhost_vdpa_net_client_stop,
3591e0a84eaSCindy Lu         .cleanup = vhost_vdpa_cleanup,
3601e0a84eaSCindy Lu         .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
3611e0a84eaSCindy Lu         .has_ufo = vhost_vdpa_has_ufo,
362ee8a1c63SKevin Wolf         .check_peer_type = vhost_vdpa_check_peer_type,
3631e0a84eaSCindy Lu };
3641e0a84eaSCindy Lu 
365*0f2bb0bfSEugenio Pérez /**
366*0f2bb0bfSEugenio Pérez  * Get vring virtqueue group
367*0f2bb0bfSEugenio Pérez  *
368*0f2bb0bfSEugenio Pérez  * @device_fd  vdpa device fd
369*0f2bb0bfSEugenio Pérez  * @vq_index   Virtqueue index
370*0f2bb0bfSEugenio Pérez  *
371*0f2bb0bfSEugenio Pérez  * Return -errno in case of error, or vq group if success.
372*0f2bb0bfSEugenio Pérez  */
373c1a10086SEugenio Pérez static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index)
374c1a10086SEugenio Pérez {
375c1a10086SEugenio Pérez     struct vhost_vring_state state = {
376c1a10086SEugenio Pérez         .index = vq_index,
377c1a10086SEugenio Pérez     };
378c1a10086SEugenio Pérez     int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, &state);
379c1a10086SEugenio Pérez 
380c1a10086SEugenio Pérez     if (unlikely(r < 0)) {
381*0f2bb0bfSEugenio Pérez         r = -errno;
382c1a10086SEugenio Pérez         error_report("Cannot get VQ %u group: %s", vq_index,
383c1a10086SEugenio Pérez                      g_strerror(errno));
384c1a10086SEugenio Pérez         return r;
385c1a10086SEugenio Pérez     }
386c1a10086SEugenio Pérez 
387c1a10086SEugenio Pérez     return state.num;
388c1a10086SEugenio Pérez }
389c1a10086SEugenio Pérez 
390c1a10086SEugenio Pérez static int vhost_vdpa_set_address_space_id(struct vhost_vdpa *v,
391c1a10086SEugenio Pérez                                            unsigned vq_group,
392c1a10086SEugenio Pérez                                            unsigned asid_num)
393c1a10086SEugenio Pérez {
394c1a10086SEugenio Pérez     struct vhost_vring_state asid = {
395c1a10086SEugenio Pérez         .index = vq_group,
396c1a10086SEugenio Pérez         .num = asid_num,
397c1a10086SEugenio Pérez     };
398c1a10086SEugenio Pérez     int r;
399c1a10086SEugenio Pérez 
400c1a10086SEugenio Pérez     r = ioctl(v->device_fd, VHOST_VDPA_SET_GROUP_ASID, &asid);
401c1a10086SEugenio Pérez     if (unlikely(r < 0)) {
402c1a10086SEugenio Pérez         error_report("Can't set vq group %u asid %u, errno=%d (%s)",
403c1a10086SEugenio Pérez                      asid.index, asid.num, errno, g_strerror(errno));
404c1a10086SEugenio Pérez     }
405c1a10086SEugenio Pérez     return r;
406c1a10086SEugenio Pérez }
407c1a10086SEugenio Pérez 
4082df4dd31SEugenio Pérez static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
4092df4dd31SEugenio Pérez {
4102df4dd31SEugenio Pérez     VhostIOVATree *tree = v->iova_tree;
4112df4dd31SEugenio Pérez     DMAMap needle = {
4122df4dd31SEugenio Pérez         /*
4132df4dd31SEugenio Pérez          * No need to specify size or to look for more translations since
4142df4dd31SEugenio Pérez          * this contiguous chunk was allocated by us.
4152df4dd31SEugenio Pérez          */
4162df4dd31SEugenio Pérez         .translated_addr = (hwaddr)(uintptr_t)addr,
4172df4dd31SEugenio Pérez     };
4182df4dd31SEugenio Pérez     const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
4192df4dd31SEugenio Pérez     int r;
4202df4dd31SEugenio Pérez 
4212df4dd31SEugenio Pérez     if (unlikely(!map)) {
4222df4dd31SEugenio Pérez         error_report("Cannot locate expected map");
4232df4dd31SEugenio Pérez         return;
4242df4dd31SEugenio Pérez     }
4252df4dd31SEugenio Pérez 
426cd831ed5SEugenio Pérez     r = vhost_vdpa_dma_unmap(v, v->address_space_id, map->iova, map->size + 1);
4272df4dd31SEugenio Pérez     if (unlikely(r != 0)) {
4282df4dd31SEugenio Pérez         error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
4292df4dd31SEugenio Pérez     }
4302df4dd31SEugenio Pérez 
43169292a8eSEugenio Pérez     vhost_iova_tree_remove(tree, *map);
4322df4dd31SEugenio Pérez }
4332df4dd31SEugenio Pérez 
4342df4dd31SEugenio Pérez static size_t vhost_vdpa_net_cvq_cmd_len(void)
4352df4dd31SEugenio Pérez {
4362df4dd31SEugenio Pérez     /*
4372df4dd31SEugenio Pérez      * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
4382df4dd31SEugenio Pérez      * In buffer is always 1 byte, so it should fit here
4392df4dd31SEugenio Pérez      */
4402df4dd31SEugenio Pérez     return sizeof(struct virtio_net_ctrl_hdr) +
4412df4dd31SEugenio Pérez            2 * sizeof(struct virtio_net_ctrl_mac) +
4422df4dd31SEugenio Pérez            MAC_TABLE_ENTRIES * ETH_ALEN;
4432df4dd31SEugenio Pérez }
4442df4dd31SEugenio Pérez 
4452df4dd31SEugenio Pérez static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
4462df4dd31SEugenio Pérez {
4472df4dd31SEugenio Pérez     return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
4482df4dd31SEugenio Pérez }
4492df4dd31SEugenio Pérez 
4507a7f87e9SEugenio Pérez /** Map CVQ buffer. */
4517a7f87e9SEugenio Pérez static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size,
4527a7f87e9SEugenio Pérez                                   bool write)
4532df4dd31SEugenio Pérez {
4542df4dd31SEugenio Pérez     DMAMap map = {};
4552df4dd31SEugenio Pérez     int r;
4562df4dd31SEugenio Pérez 
4572df4dd31SEugenio Pérez     map.translated_addr = (hwaddr)(uintptr_t)buf;
4587a7f87e9SEugenio Pérez     map.size = size - 1;
4592df4dd31SEugenio Pérez     map.perm = write ? IOMMU_RW : IOMMU_RO,
4602df4dd31SEugenio Pérez     r = vhost_iova_tree_map_alloc(v->iova_tree, &map);
4612df4dd31SEugenio Pérez     if (unlikely(r != IOVA_OK)) {
4622df4dd31SEugenio Pérez         error_report("Cannot map injected element");
4637a7f87e9SEugenio Pérez         return r;
4642df4dd31SEugenio Pérez     }
4652df4dd31SEugenio Pérez 
466cd831ed5SEugenio Pérez     r = vhost_vdpa_dma_map(v, v->address_space_id, map.iova,
467cd831ed5SEugenio Pérez                            vhost_vdpa_net_cvq_cmd_page_len(), buf, !write);
4682df4dd31SEugenio Pérez     if (unlikely(r < 0)) {
4692df4dd31SEugenio Pérez         goto dma_map_err;
4702df4dd31SEugenio Pérez     }
4712df4dd31SEugenio Pérez 
4727a7f87e9SEugenio Pérez     return 0;
4732df4dd31SEugenio Pérez 
4742df4dd31SEugenio Pérez dma_map_err:
47569292a8eSEugenio Pérez     vhost_iova_tree_remove(v->iova_tree, map);
4767a7f87e9SEugenio Pérez     return r;
4772df4dd31SEugenio Pérez }
4782df4dd31SEugenio Pérez 
4797a7f87e9SEugenio Pérez static int vhost_vdpa_net_cvq_start(NetClientState *nc)
4802df4dd31SEugenio Pérez {
48100ef422eSEugenio Pérez     VhostVDPAState *s, *s0;
482c1a10086SEugenio Pérez     struct vhost_vdpa *v;
483c1a10086SEugenio Pérez     uint64_t backend_features;
484c1a10086SEugenio Pérez     int64_t cvq_group;
485c1a10086SEugenio Pérez     int cvq_index, r;
4862df4dd31SEugenio Pérez 
4877a7f87e9SEugenio Pérez     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
4887a7f87e9SEugenio Pérez 
4897a7f87e9SEugenio Pérez     s = DO_UPCAST(VhostVDPAState, nc, nc);
490c1a10086SEugenio Pérez     v = &s->vhost_vdpa;
491c1a10086SEugenio Pérez 
49269498430SEugenio Pérez     s0 = vhost_vdpa_net_first_nc_vdpa(s);
49369498430SEugenio Pérez     v->shadow_data = s0->vhost_vdpa.shadow_vqs_enabled;
494c1a10086SEugenio Pérez     v->shadow_vqs_enabled = s->always_svq;
495c1a10086SEugenio Pérez     s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID;
496c1a10086SEugenio Pérez 
49769498430SEugenio Pérez     if (s->vhost_vdpa.shadow_data) {
498c1a10086SEugenio Pérez         /* SVQ is already configured for all virtqueues */
499c1a10086SEugenio Pérez         goto out;
500c1a10086SEugenio Pérez     }
501c1a10086SEugenio Pérez 
502c1a10086SEugenio Pérez     /*
503c1a10086SEugenio Pérez      * If we early return in these cases SVQ will not be enabled. The migration
504c1a10086SEugenio Pérez      * will be blocked as long as vhost-vdpa backends will not offer _F_LOG.
505c1a10086SEugenio Pérez      *
506c1a10086SEugenio Pérez      * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev
507c1a10086SEugenio Pérez      * yet.
508c1a10086SEugenio Pérez      */
509c1a10086SEugenio Pérez     r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features);
510c1a10086SEugenio Pérez     if (unlikely(r < 0)) {
511c1a10086SEugenio Pérez         error_report("Cannot get vdpa backend_features: %s(%d)",
512c1a10086SEugenio Pérez             g_strerror(errno), errno);
513c1a10086SEugenio Pérez         return -1;
514c1a10086SEugenio Pérez     }
515525ae115SEugenio Pérez     if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) ||
516c1a10086SEugenio Pérez         !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
517c1a10086SEugenio Pérez         return 0;
518c1a10086SEugenio Pérez     }
519c1a10086SEugenio Pérez 
520c1a10086SEugenio Pérez     /*
521c1a10086SEugenio Pérez      * Check if all the virtqueues of the virtio device are in a different vq
522c1a10086SEugenio Pérez      * than the last vq. VQ group of last group passed in cvq_group.
523c1a10086SEugenio Pérez      */
524c1a10086SEugenio Pérez     cvq_index = v->dev->vq_index_end - 1;
525c1a10086SEugenio Pérez     cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index);
526c1a10086SEugenio Pérez     if (unlikely(cvq_group < 0)) {
527c1a10086SEugenio Pérez         return cvq_group;
528c1a10086SEugenio Pérez     }
529c1a10086SEugenio Pérez     for (int i = 0; i < cvq_index; ++i) {
530c1a10086SEugenio Pérez         int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i);
531c1a10086SEugenio Pérez 
532c1a10086SEugenio Pérez         if (unlikely(group < 0)) {
533c1a10086SEugenio Pérez             return group;
534c1a10086SEugenio Pérez         }
535c1a10086SEugenio Pérez 
536c1a10086SEugenio Pérez         if (group == cvq_group) {
537c1a10086SEugenio Pérez             return 0;
538c1a10086SEugenio Pérez         }
539c1a10086SEugenio Pérez     }
540c1a10086SEugenio Pérez 
541c1a10086SEugenio Pérez     r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID);
542c1a10086SEugenio Pérez     if (unlikely(r < 0)) {
543c1a10086SEugenio Pérez         return r;
544c1a10086SEugenio Pérez     }
545c1a10086SEugenio Pérez 
546c1a10086SEugenio Pérez     v->shadow_vqs_enabled = true;
547c1a10086SEugenio Pérez     s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID;
548c1a10086SEugenio Pérez 
549c1a10086SEugenio Pérez out:
5507a7f87e9SEugenio Pérez     if (!s->vhost_vdpa.shadow_vqs_enabled) {
5517a7f87e9SEugenio Pérez         return 0;
5522df4dd31SEugenio Pérez     }
5532df4dd31SEugenio Pérez 
55400ef422eSEugenio Pérez     if (s0->vhost_vdpa.iova_tree) {
55500ef422eSEugenio Pérez         /*
55600ef422eSEugenio Pérez          * SVQ is already configured for all virtqueues.  Reuse IOVA tree for
55700ef422eSEugenio Pérez          * simplicity, whether CVQ shares ASID with guest or not, because:
55800ef422eSEugenio Pérez          * - Memory listener need access to guest's memory addresses allocated
55900ef422eSEugenio Pérez          *   in the IOVA tree.
56000ef422eSEugenio Pérez          * - There should be plenty of IOVA address space for both ASID not to
56100ef422eSEugenio Pérez          *   worry about collisions between them.  Guest's translations are
56200ef422eSEugenio Pérez          *   still validated with virtio virtqueue_pop so there is no risk for
56300ef422eSEugenio Pérez          *   the guest to access memory that it shouldn't.
56400ef422eSEugenio Pérez          *
56500ef422eSEugenio Pérez          * To allocate a iova tree per ASID is doable but it complicates the
56600ef422eSEugenio Pérez          * code and it is not worth it for the moment.
56700ef422eSEugenio Pérez          */
56800ef422eSEugenio Pérez         v->iova_tree = s0->vhost_vdpa.iova_tree;
56900ef422eSEugenio Pérez     } else {
57000ef422eSEugenio Pérez         v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
57100ef422eSEugenio Pérez                                            v->iova_range.last);
57200ef422eSEugenio Pérez     }
57300ef422eSEugenio Pérez 
5747a7f87e9SEugenio Pérez     r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer,
5757a7f87e9SEugenio Pérez                                vhost_vdpa_net_cvq_cmd_page_len(), false);
5767a7f87e9SEugenio Pérez     if (unlikely(r < 0)) {
5777a7f87e9SEugenio Pérez         return r;
5787a7f87e9SEugenio Pérez     }
5797a7f87e9SEugenio Pérez 
58017fb889fSEugenio Pérez     r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->status,
5817a7f87e9SEugenio Pérez                                vhost_vdpa_net_cvq_cmd_page_len(), true);
5827a7f87e9SEugenio Pérez     if (unlikely(r < 0)) {
5832df4dd31SEugenio Pérez         vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
5842df4dd31SEugenio Pérez     }
5852df4dd31SEugenio Pérez 
5867a7f87e9SEugenio Pérez     return r;
5877a7f87e9SEugenio Pérez }
5887a7f87e9SEugenio Pérez 
5897a7f87e9SEugenio Pérez static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
5907a7f87e9SEugenio Pérez {
5917a7f87e9SEugenio Pérez     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
5927a7f87e9SEugenio Pérez 
5937a7f87e9SEugenio Pérez     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
5947a7f87e9SEugenio Pérez 
5957a7f87e9SEugenio Pérez     if (s->vhost_vdpa.shadow_vqs_enabled) {
5967a7f87e9SEugenio Pérez         vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
59717fb889fSEugenio Pérez         vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status);
598c1a10086SEugenio Pérez     }
59900ef422eSEugenio Pérez 
60000ef422eSEugenio Pérez     vhost_vdpa_net_client_stop(nc);
6012df4dd31SEugenio Pérez }
6022df4dd31SEugenio Pérez 
603be4278b6SEugenio Pérez static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len,
604be4278b6SEugenio Pérez                                       size_t in_len)
605be4278b6SEugenio Pérez {
606be4278b6SEugenio Pérez     /* Buffers for the device */
607be4278b6SEugenio Pérez     const struct iovec out = {
608be4278b6SEugenio Pérez         .iov_base = s->cvq_cmd_out_buffer,
609be4278b6SEugenio Pérez         .iov_len = out_len,
610be4278b6SEugenio Pérez     };
611be4278b6SEugenio Pérez     const struct iovec in = {
61217fb889fSEugenio Pérez         .iov_base = s->status,
613be4278b6SEugenio Pérez         .iov_len = sizeof(virtio_net_ctrl_ack),
614be4278b6SEugenio Pérez     };
615be4278b6SEugenio Pérez     VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
616be4278b6SEugenio Pérez     int r;
617be4278b6SEugenio Pérez 
618be4278b6SEugenio Pérez     r = vhost_svq_add(svq, &out, 1, &in, 1, NULL);
619be4278b6SEugenio Pérez     if (unlikely(r != 0)) {
620be4278b6SEugenio Pérez         if (unlikely(r == -ENOSPC)) {
621be4278b6SEugenio Pérez             qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
622be4278b6SEugenio Pérez                           __func__);
623be4278b6SEugenio Pérez         }
624be4278b6SEugenio Pérez         return r;
625be4278b6SEugenio Pérez     }
626be4278b6SEugenio Pérez 
627be4278b6SEugenio Pérez     /*
628be4278b6SEugenio Pérez      * We can poll here since we've had BQL from the time we sent the
629be4278b6SEugenio Pérez      * descriptor. Also, we need to take the answer before SVQ pulls by itself,
630be4278b6SEugenio Pérez      * when BQL is released
631be4278b6SEugenio Pérez      */
632be4278b6SEugenio Pérez     return vhost_svq_poll(svq);
633be4278b6SEugenio Pérez }
634be4278b6SEugenio Pérez 
635f73c0c43SEugenio Pérez static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class,
636f73c0c43SEugenio Pérez                                        uint8_t cmd, const void *data,
637f73c0c43SEugenio Pérez                                        size_t data_size)
638f73c0c43SEugenio Pérez {
639f73c0c43SEugenio Pérez     const struct virtio_net_ctrl_hdr ctrl = {
640f73c0c43SEugenio Pérez         .class = class,
641f73c0c43SEugenio Pérez         .cmd = cmd,
642f73c0c43SEugenio Pérez     };
643f73c0c43SEugenio Pérez 
644f73c0c43SEugenio Pérez     assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl));
645f73c0c43SEugenio Pérez 
646f73c0c43SEugenio Pérez     memcpy(s->cvq_cmd_out_buffer, &ctrl, sizeof(ctrl));
647f73c0c43SEugenio Pérez     memcpy(s->cvq_cmd_out_buffer + sizeof(ctrl), data, data_size);
648f73c0c43SEugenio Pérez 
649f73c0c43SEugenio Pérez     return vhost_vdpa_net_cvq_add(s, sizeof(ctrl) + data_size,
650f73c0c43SEugenio Pérez                                   sizeof(virtio_net_ctrl_ack));
651f73c0c43SEugenio Pérez }
652f73c0c43SEugenio Pérez 
653f73c0c43SEugenio Pérez static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n)
654f73c0c43SEugenio Pérez {
655f73c0c43SEugenio Pérez     uint64_t features = n->parent_obj.guest_features;
656f73c0c43SEugenio Pérez     if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) {
657f73c0c43SEugenio Pérez         ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MAC,
658f73c0c43SEugenio Pérez                                                   VIRTIO_NET_CTRL_MAC_ADDR_SET,
659f73c0c43SEugenio Pérez                                                   n->mac, sizeof(n->mac));
660f73c0c43SEugenio Pérez         if (unlikely(dev_written < 0)) {
661f73c0c43SEugenio Pérez             return dev_written;
662f73c0c43SEugenio Pérez         }
663f73c0c43SEugenio Pérez 
664f73c0c43SEugenio Pérez         return *s->status != VIRTIO_NET_OK;
665f73c0c43SEugenio Pérez     }
666f73c0c43SEugenio Pérez 
667f73c0c43SEugenio Pérez     return 0;
668f73c0c43SEugenio Pérez }
669f73c0c43SEugenio Pérez 
670f64c7cdaSEugenio Pérez static int vhost_vdpa_net_load_mq(VhostVDPAState *s,
671f64c7cdaSEugenio Pérez                                   const VirtIONet *n)
672f64c7cdaSEugenio Pérez {
673f64c7cdaSEugenio Pérez     struct virtio_net_ctrl_mq mq;
674f64c7cdaSEugenio Pérez     uint64_t features = n->parent_obj.guest_features;
675f64c7cdaSEugenio Pérez     ssize_t dev_written;
676f64c7cdaSEugenio Pérez 
677f64c7cdaSEugenio Pérez     if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) {
678f64c7cdaSEugenio Pérez         return 0;
679f64c7cdaSEugenio Pérez     }
680f64c7cdaSEugenio Pérez 
681f64c7cdaSEugenio Pérez     mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs);
682f64c7cdaSEugenio Pérez     dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MQ,
683f64c7cdaSEugenio Pérez                                           VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &mq,
684f64c7cdaSEugenio Pérez                                           sizeof(mq));
685f64c7cdaSEugenio Pérez     if (unlikely(dev_written < 0)) {
686f64c7cdaSEugenio Pérez         return dev_written;
687f64c7cdaSEugenio Pérez     }
688f64c7cdaSEugenio Pérez 
689f64c7cdaSEugenio Pérez     return *s->status != VIRTIO_NET_OK;
690f64c7cdaSEugenio Pérez }
691f64c7cdaSEugenio Pérez 
692dd036d8dSEugenio Pérez static int vhost_vdpa_net_load(NetClientState *nc)
693dd036d8dSEugenio Pérez {
694dd036d8dSEugenio Pérez     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
695f73c0c43SEugenio Pérez     struct vhost_vdpa *v = &s->vhost_vdpa;
696dd036d8dSEugenio Pérez     const VirtIONet *n;
697f73c0c43SEugenio Pérez     int r;
698dd036d8dSEugenio Pérez 
699dd036d8dSEugenio Pérez     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
700dd036d8dSEugenio Pérez 
701dd036d8dSEugenio Pérez     if (!v->shadow_vqs_enabled) {
702dd036d8dSEugenio Pérez         return 0;
703dd036d8dSEugenio Pérez     }
704dd036d8dSEugenio Pérez 
705dd036d8dSEugenio Pérez     n = VIRTIO_NET(v->dev->vdev);
706f73c0c43SEugenio Pérez     r = vhost_vdpa_net_load_mac(s, n);
707f73c0c43SEugenio Pérez     if (unlikely(r < 0)) {
708f73c0c43SEugenio Pérez         return r;
709dd036d8dSEugenio Pérez     }
710f64c7cdaSEugenio Pérez     r = vhost_vdpa_net_load_mq(s, n);
711f64c7cdaSEugenio Pérez     if (unlikely(r)) {
712f64c7cdaSEugenio Pérez         return r;
713f64c7cdaSEugenio Pérez     }
714dd036d8dSEugenio Pérez 
715dd036d8dSEugenio Pérez     return 0;
716dd036d8dSEugenio Pérez }
717dd036d8dSEugenio Pérez 
718f8972b56SEugenio Pérez static NetClientInfo net_vhost_vdpa_cvq_info = {
719f8972b56SEugenio Pérez     .type = NET_CLIENT_DRIVER_VHOST_VDPA,
720f8972b56SEugenio Pérez     .size = sizeof(VhostVDPAState),
721f8972b56SEugenio Pérez     .receive = vhost_vdpa_receive,
7227a7f87e9SEugenio Pérez     .start = vhost_vdpa_net_cvq_start,
723dd036d8dSEugenio Pérez     .load = vhost_vdpa_net_load,
7247a7f87e9SEugenio Pérez     .stop = vhost_vdpa_net_cvq_stop,
725f8972b56SEugenio Pérez     .cleanup = vhost_vdpa_cleanup,
726f8972b56SEugenio Pérez     .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
727f8972b56SEugenio Pérez     .has_ufo = vhost_vdpa_has_ufo,
728f8972b56SEugenio Pérez     .check_peer_type = vhost_vdpa_check_peer_type,
729f8972b56SEugenio Pérez };
730f8972b56SEugenio Pérez 
7312df4dd31SEugenio Pérez /**
7322df4dd31SEugenio Pérez  * Validate and copy control virtqueue commands.
7332df4dd31SEugenio Pérez  *
7342df4dd31SEugenio Pérez  * Following QEMU guidelines, we offer a copy of the buffers to the device to
7352df4dd31SEugenio Pérez  * prevent TOCTOU bugs.
736bd907ae4SEugenio Pérez  */
737bd907ae4SEugenio Pérez static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
738bd907ae4SEugenio Pérez                                             VirtQueueElement *elem,
739bd907ae4SEugenio Pérez                                             void *opaque)
740bd907ae4SEugenio Pérez {
7412df4dd31SEugenio Pérez     VhostVDPAState *s = opaque;
742be4278b6SEugenio Pérez     size_t in_len;
743bd907ae4SEugenio Pérez     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
7447a7f87e9SEugenio Pérez     /* Out buffer sent to both the vdpa device and the device model */
7457a7f87e9SEugenio Pérez     struct iovec out = {
7467a7f87e9SEugenio Pérez         .iov_base = s->cvq_cmd_out_buffer,
7477a7f87e9SEugenio Pérez     };
7482df4dd31SEugenio Pérez     /* in buffer used for device model */
7492df4dd31SEugenio Pérez     const struct iovec in = {
7502df4dd31SEugenio Pérez         .iov_base = &status,
7512df4dd31SEugenio Pérez         .iov_len = sizeof(status),
7522df4dd31SEugenio Pérez     };
753be4278b6SEugenio Pérez     ssize_t dev_written = -EINVAL;
754bd907ae4SEugenio Pérez 
7557a7f87e9SEugenio Pérez     out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0,
7567a7f87e9SEugenio Pérez                              s->cvq_cmd_out_buffer,
7577a7f87e9SEugenio Pérez                              vhost_vdpa_net_cvq_cmd_len());
7583f9a3eebSEugenio Pérez     if (*(uint8_t *)s->cvq_cmd_out_buffer == VIRTIO_NET_CTRL_ANNOUNCE) {
7593f9a3eebSEugenio Pérez         /*
7603f9a3eebSEugenio Pérez          * Guest announce capability is emulated by qemu, so don't forward to
7613f9a3eebSEugenio Pérez          * the device.
7623f9a3eebSEugenio Pérez          */
7633f9a3eebSEugenio Pérez         dev_written = sizeof(status);
7643f9a3eebSEugenio Pérez         *s->status = VIRTIO_NET_OK;
7653f9a3eebSEugenio Pérez     } else {
766be4278b6SEugenio Pérez         dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status));
767be4278b6SEugenio Pérez         if (unlikely(dev_written < 0)) {
768bd907ae4SEugenio Pérez             goto out;
769bd907ae4SEugenio Pérez         }
7703f9a3eebSEugenio Pérez     }
771bd907ae4SEugenio Pérez 
772bd907ae4SEugenio Pérez     if (unlikely(dev_written < sizeof(status))) {
773bd907ae4SEugenio Pérez         error_report("Insufficient written data (%zu)", dev_written);
7742df4dd31SEugenio Pérez         goto out;
7752df4dd31SEugenio Pérez     }
7762df4dd31SEugenio Pérez 
77717fb889fSEugenio Pérez     if (*s->status != VIRTIO_NET_OK) {
778be4278b6SEugenio Pérez         return VIRTIO_NET_ERR;
7792df4dd31SEugenio Pérez     }
7802df4dd31SEugenio Pérez 
7812df4dd31SEugenio Pérez     status = VIRTIO_NET_ERR;
7827a7f87e9SEugenio Pérez     virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, &out, 1);
7832df4dd31SEugenio Pérez     if (status != VIRTIO_NET_OK) {
7842df4dd31SEugenio Pérez         error_report("Bad CVQ processing in model");
785bd907ae4SEugenio Pérez     }
786bd907ae4SEugenio Pérez 
787bd907ae4SEugenio Pérez out:
788bd907ae4SEugenio Pérez     in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status,
789bd907ae4SEugenio Pérez                           sizeof(status));
790bd907ae4SEugenio Pérez     if (unlikely(in_len < sizeof(status))) {
791bd907ae4SEugenio Pérez         error_report("Bad device CVQ written length");
792bd907ae4SEugenio Pérez     }
793bd907ae4SEugenio Pérez     vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
794bd907ae4SEugenio Pérez     g_free(elem);
795be4278b6SEugenio Pérez     return dev_written < 0 ? dev_written : 0;
796bd907ae4SEugenio Pérez }
797bd907ae4SEugenio Pérez 
798bd907ae4SEugenio Pérez static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
799bd907ae4SEugenio Pérez     .avail_handler = vhost_vdpa_net_handle_ctrl_avail,
800bd907ae4SEugenio Pérez };
801bd907ae4SEugenio Pérez 
802654790b6SJason Wang static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
803654790b6SJason Wang                                        const char *device,
804654790b6SJason Wang                                        const char *name,
80540237840SJason Wang                                        int vdpa_device_fd,
80640237840SJason Wang                                        int queue_pair_index,
80740237840SJason Wang                                        int nvqs,
8081576dbb5SEugenio Pérez                                        bool is_datapath,
8091576dbb5SEugenio Pérez                                        bool svq,
8105c1ebd4cSEugenio Pérez                                        struct vhost_vdpa_iova_range iova_range,
8115c1ebd4cSEugenio Pérez                                        uint64_t features)
8121e0a84eaSCindy Lu {
8131e0a84eaSCindy Lu     NetClientState *nc = NULL;
8141e0a84eaSCindy Lu     VhostVDPAState *s;
8151e0a84eaSCindy Lu     int ret = 0;
8161e0a84eaSCindy Lu     assert(name);
81740237840SJason Wang     if (is_datapath) {
81840237840SJason Wang         nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device,
81940237840SJason Wang                                  name);
82040237840SJason Wang     } else {
821f8972b56SEugenio Pérez         nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer,
82240237840SJason Wang                                          device, name);
82340237840SJason Wang     }
82453b85d95SLaurent Vivier     qemu_set_info_str(nc, TYPE_VHOST_VDPA);
8251e0a84eaSCindy Lu     s = DO_UPCAST(VhostVDPAState, nc, nc);
8267327813dSJason Wang 
8271e0a84eaSCindy Lu     s->vhost_vdpa.device_fd = vdpa_device_fd;
82840237840SJason Wang     s->vhost_vdpa.index = queue_pair_index;
8297f211a28SEugenio Pérez     s->always_svq = svq;
83069498430SEugenio Pérez     s->migration_state.notify = vdpa_net_migration_state_notifier;
8311576dbb5SEugenio Pérez     s->vhost_vdpa.shadow_vqs_enabled = svq;
832a585fad2SEugenio Pérez     s->vhost_vdpa.iova_range = iova_range;
8336188d78aSEugenio Pérez     s->vhost_vdpa.shadow_data = svq;
8345c1ebd4cSEugenio Pérez     if (queue_pair_index == 0) {
8355c1ebd4cSEugenio Pérez         vhost_vdpa_net_valid_svq_features(features,
8365c1ebd4cSEugenio Pérez                                           &s->vhost_vdpa.migration_blocker);
8375c1ebd4cSEugenio Pérez     } else if (!is_datapath) {
8382df4dd31SEugenio Pérez         s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
8392df4dd31SEugenio Pérez                                             vhost_vdpa_net_cvq_cmd_page_len());
8402df4dd31SEugenio Pérez         memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
84117fb889fSEugenio Pérez         s->status = qemu_memalign(qemu_real_host_page_size(),
8422df4dd31SEugenio Pérez                                   vhost_vdpa_net_cvq_cmd_page_len());
84317fb889fSEugenio Pérez         memset(s->status, 0, vhost_vdpa_net_cvq_cmd_page_len());
8442df4dd31SEugenio Pérez 
845bd907ae4SEugenio Pérez         s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
846bd907ae4SEugenio Pérez         s->vhost_vdpa.shadow_vq_ops_opaque = s;
8479c363cf6SEugenio Pérez 
8489c363cf6SEugenio Pérez         /*
8499c363cf6SEugenio Pérez          * TODO: We cannot migrate devices with CVQ as there is no way to set
8509c363cf6SEugenio Pérez          * the device state (MAC, MQ, etc) before starting the datapath.
8519c363cf6SEugenio Pérez          *
8529c363cf6SEugenio Pérez          * Migration blocker ownership now belongs to s->vhost_vdpa.
8539c363cf6SEugenio Pérez          */
8549c363cf6SEugenio Pérez         error_setg(&s->vhost_vdpa.migration_blocker,
8559c363cf6SEugenio Pérez                    "net vdpa cannot migrate with CVQ feature");
856bd907ae4SEugenio Pérez     }
85740237840SJason Wang     ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
85874af5eecSJason Wang     if (ret) {
85974af5eecSJason Wang         qemu_del_net_client(nc);
860654790b6SJason Wang         return NULL;
86174af5eecSJason Wang     }
862654790b6SJason Wang     return nc;
8631e0a84eaSCindy Lu }
8641e0a84eaSCindy Lu 
8658170ab3fSEugenio Pérez static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
8668170ab3fSEugenio Pérez {
8678170ab3fSEugenio Pérez     int ret = ioctl(fd, VHOST_GET_FEATURES, features);
8688170ab3fSEugenio Pérez     if (unlikely(ret < 0)) {
8698170ab3fSEugenio Pérez         error_setg_errno(errp, errno,
8708170ab3fSEugenio Pérez                          "Fail to query features from vhost-vDPA device");
8718170ab3fSEugenio Pérez     }
8728170ab3fSEugenio Pérez     return ret;
8738170ab3fSEugenio Pérez }
8748170ab3fSEugenio Pérez 
8758170ab3fSEugenio Pérez static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features,
8768170ab3fSEugenio Pérez                                           int *has_cvq, Error **errp)
87740237840SJason Wang {
87840237840SJason Wang     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
879cd523a41SStefano Garzarella     g_autofree struct vhost_vdpa_config *config = NULL;
88040237840SJason Wang     __virtio16 *max_queue_pairs;
88140237840SJason Wang     int ret;
88240237840SJason Wang 
88340237840SJason Wang     if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) {
88440237840SJason Wang         *has_cvq = 1;
88540237840SJason Wang     } else {
88640237840SJason Wang         *has_cvq = 0;
88740237840SJason Wang     }
88840237840SJason Wang 
88940237840SJason Wang     if (features & (1 << VIRTIO_NET_F_MQ)) {
89040237840SJason Wang         config = g_malloc0(config_size + sizeof(*max_queue_pairs));
89140237840SJason Wang         config->off = offsetof(struct virtio_net_config, max_virtqueue_pairs);
89240237840SJason Wang         config->len = sizeof(*max_queue_pairs);
89340237840SJason Wang 
89440237840SJason Wang         ret = ioctl(fd, VHOST_VDPA_GET_CONFIG, config);
89540237840SJason Wang         if (ret) {
89640237840SJason Wang             error_setg(errp, "Fail to get config from vhost-vDPA device");
89740237840SJason Wang             return -ret;
89840237840SJason Wang         }
89940237840SJason Wang 
90040237840SJason Wang         max_queue_pairs = (__virtio16 *)&config->buf;
90140237840SJason Wang 
90240237840SJason Wang         return lduw_le_p(max_queue_pairs);
90340237840SJason Wang     }
90440237840SJason Wang 
90540237840SJason Wang     return 1;
90640237840SJason Wang }
90740237840SJason Wang 
9081e0a84eaSCindy Lu int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
9091e0a84eaSCindy Lu                         NetClientState *peer, Error **errp)
9101e0a84eaSCindy Lu {
9111e0a84eaSCindy Lu     const NetdevVhostVDPAOptions *opts;
9128170ab3fSEugenio Pérez     uint64_t features;
913654790b6SJason Wang     int vdpa_device_fd;
914eb3cb751SEugenio Pérez     g_autofree NetClientState **ncs = NULL;
915a585fad2SEugenio Pérez     struct vhost_vdpa_iova_range iova_range;
916eb3cb751SEugenio Pérez     NetClientState *nc;
917aed5da45SEugenio Pérez     int queue_pairs, r, i = 0, has_cvq = 0;
9181e0a84eaSCindy Lu 
9191e0a84eaSCindy Lu     assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
9201e0a84eaSCindy Lu     opts = &netdev->u.vhost_vdpa;
9217480874aSMarkus Armbruster     if (!opts->vhostdev && !opts->vhostfd) {
9228801ccd0SSi-Wei Liu         error_setg(errp,
9238801ccd0SSi-Wei Liu                    "vhost-vdpa: neither vhostdev= nor vhostfd= was specified");
924c8295404SEugenio Pérez         return -1;
925c8295404SEugenio Pérez     }
9267327813dSJason Wang 
9277480874aSMarkus Armbruster     if (opts->vhostdev && opts->vhostfd) {
9288801ccd0SSi-Wei Liu         error_setg(errp,
9298801ccd0SSi-Wei Liu                    "vhost-vdpa: vhostdev= and vhostfd= are mutually exclusive");
9308801ccd0SSi-Wei Liu         return -1;
9318801ccd0SSi-Wei Liu     }
9328801ccd0SSi-Wei Liu 
9337480874aSMarkus Armbruster     if (opts->vhostdev) {
9340351152bSEugenio Pérez         vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
9357327813dSJason Wang         if (vdpa_device_fd == -1) {
9367327813dSJason Wang             return -errno;
9377327813dSJason Wang         }
9385107fd3eSPeter Maydell     } else {
9395107fd3eSPeter Maydell         /* has_vhostfd */
9408801ccd0SSi-Wei Liu         vdpa_device_fd = monitor_fd_param(monitor_cur(), opts->vhostfd, errp);
9418801ccd0SSi-Wei Liu         if (vdpa_device_fd == -1) {
9428801ccd0SSi-Wei Liu             error_prepend(errp, "vhost-vdpa: unable to parse vhostfd: ");
9438801ccd0SSi-Wei Liu             return -1;
9448801ccd0SSi-Wei Liu         }
9458801ccd0SSi-Wei Liu     }
9467327813dSJason Wang 
9478170ab3fSEugenio Pérez     r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
9488170ab3fSEugenio Pérez     if (unlikely(r < 0)) {
949aed5da45SEugenio Pérez         goto err;
9508170ab3fSEugenio Pérez     }
9518170ab3fSEugenio Pérez 
9528170ab3fSEugenio Pérez     queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features,
95340237840SJason Wang                                                  &has_cvq, errp);
95440237840SJason Wang     if (queue_pairs < 0) {
9557327813dSJason Wang         qemu_close(vdpa_device_fd);
95640237840SJason Wang         return queue_pairs;
9577327813dSJason Wang     }
9587327813dSJason Wang 
959bf7a2ad8SLongpeng     r = vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range);
960bf7a2ad8SLongpeng     if (unlikely(r < 0)) {
961bf7a2ad8SLongpeng         error_setg(errp, "vhost-vdpa: get iova range failed: %s",
962bf7a2ad8SLongpeng                    strerror(-r));
963bf7a2ad8SLongpeng         goto err;
964bf7a2ad8SLongpeng     }
965bf7a2ad8SLongpeng 
96600ef422eSEugenio Pérez     if (opts->x_svq && !vhost_vdpa_net_valid_svq_features(features, errp)) {
96700ef422eSEugenio Pérez         goto err;
9681576dbb5SEugenio Pérez     }
9691576dbb5SEugenio Pérez 
97040237840SJason Wang     ncs = g_malloc0(sizeof(*ncs) * queue_pairs);
97140237840SJason Wang 
97240237840SJason Wang     for (i = 0; i < queue_pairs; i++) {
97340237840SJason Wang         ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
9741576dbb5SEugenio Pérez                                      vdpa_device_fd, i, 2, true, opts->x_svq,
9755c1ebd4cSEugenio Pérez                                      iova_range, features);
97640237840SJason Wang         if (!ncs[i])
97740237840SJason Wang             goto err;
97840237840SJason Wang     }
97940237840SJason Wang 
98040237840SJason Wang     if (has_cvq) {
98140237840SJason Wang         nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
9821576dbb5SEugenio Pérez                                  vdpa_device_fd, i, 1, false,
9835c1ebd4cSEugenio Pérez                                  opts->x_svq, iova_range, features);
98440237840SJason Wang         if (!nc)
98540237840SJason Wang             goto err;
98640237840SJason Wang     }
98740237840SJason Wang 
988654790b6SJason Wang     return 0;
98940237840SJason Wang 
99040237840SJason Wang err:
99140237840SJason Wang     if (i) {
9929bd05507SSi-Wei Liu         for (i--; i >= 0; i--) {
9939bd05507SSi-Wei Liu             qemu_del_net_client(ncs[i]);
9949bd05507SSi-Wei Liu         }
99540237840SJason Wang     }
9961576dbb5SEugenio Pérez 
99740237840SJason Wang     qemu_close(vdpa_device_fd);
99840237840SJason Wang 
99940237840SJason Wang     return -1;
10001e0a84eaSCindy Lu }
1001