11e0a84eaSCindy Lu /*
21e0a84eaSCindy Lu * vhost-vdpa.c
31e0a84eaSCindy Lu *
41e0a84eaSCindy Lu * Copyright(c) 2017-2018 Intel Corporation.
51e0a84eaSCindy Lu * Copyright(c) 2020 Red Hat, Inc.
61e0a84eaSCindy Lu *
71e0a84eaSCindy Lu * This work is licensed under the terms of the GNU GPL, version 2 or later.
81e0a84eaSCindy Lu * See the COPYING file in the top-level directory.
91e0a84eaSCindy Lu *
101e0a84eaSCindy Lu */
111e0a84eaSCindy Lu
121e0a84eaSCindy Lu #include "qemu/osdep.h"
131e0a84eaSCindy Lu #include "clients.h"
14bd907ae4SEugenio Pérez #include "hw/virtio/virtio-net.h"
151e0a84eaSCindy Lu #include "net/vhost_net.h"
161e0a84eaSCindy Lu #include "net/vhost-vdpa.h"
171e0a84eaSCindy Lu #include "hw/virtio/vhost-vdpa.h"
181e0a84eaSCindy Lu #include "qemu/config-file.h"
191e0a84eaSCindy Lu #include "qemu/error-report.h"
20bd907ae4SEugenio Pérez #include "qemu/log.h"
21bd907ae4SEugenio Pérez #include "qemu/memalign.h"
221e0a84eaSCindy Lu #include "qemu/option.h"
231e0a84eaSCindy Lu #include "qapi/error.h"
2440237840SJason Wang #include <linux/vhost.h>
251e0a84eaSCindy Lu #include <sys/ioctl.h>
261e0a84eaSCindy Lu #include <err.h>
271e0a84eaSCindy Lu #include "standard-headers/linux/virtio_net.h"
281e0a84eaSCindy Lu #include "monitor/monitor.h"
2969498430SEugenio Pérez #include "migration/misc.h"
301e0a84eaSCindy Lu #include "hw/virtio/vhost.h"
3162845d32SSi-Wei Liu #include "trace.h"
321e0a84eaSCindy Lu
331e0a84eaSCindy Lu /* Todo:need to add the multiqueue support here */
341e0a84eaSCindy Lu typedef struct VhostVDPAState {
351e0a84eaSCindy Lu NetClientState nc;
361e0a84eaSCindy Lu struct vhost_vdpa vhost_vdpa;
373e775730SSteve Sistare NotifierWithReturn migration_state;
381e0a84eaSCindy Lu VHostNetState *vhost_net;
392df4dd31SEugenio Pérez
402df4dd31SEugenio Pérez /* Control commands shadow buffers */
4117fb889fSEugenio Pérez void *cvq_cmd_out_buffer;
4217fb889fSEugenio Pérez virtio_net_ctrl_ack *status;
4317fb889fSEugenio Pérez
447f211a28SEugenio Pérez /* The device always have SVQ enabled */
457f211a28SEugenio Pérez bool always_svq;
46152128d6SEugenio Pérez
47152128d6SEugenio Pérez /* The device can isolate CVQ in its own ASID */
48152128d6SEugenio Pérez bool cvq_isolated;
49152128d6SEugenio Pérez
501e0a84eaSCindy Lu bool started;
511e0a84eaSCindy Lu } VhostVDPAState;
521e0a84eaSCindy Lu
532875a0caSHawkins Jiawei /*
542875a0caSHawkins Jiawei * The array is sorted alphabetically in ascending order,
552875a0caSHawkins Jiawei * with the exception of VHOST_INVALID_FEATURE_BIT,
562875a0caSHawkins Jiawei * which should always be the last entry.
572875a0caSHawkins Jiawei */
581e0a84eaSCindy Lu const int vdpa_feature_bits[] = {
591e0a84eaSCindy Lu VIRTIO_F_ANY_LAYOUT,
602875a0caSHawkins Jiawei VIRTIO_F_IOMMU_PLATFORM,
612875a0caSHawkins Jiawei VIRTIO_F_NOTIFY_ON_EMPTY,
622875a0caSHawkins Jiawei VIRTIO_F_RING_PACKED,
632875a0caSHawkins Jiawei VIRTIO_F_RING_RESET,
641e0a84eaSCindy Lu VIRTIO_F_VERSION_1,
65c03213fdSJonah Palmer VIRTIO_F_IN_ORDER,
66b937fa89SJonah Palmer VIRTIO_F_NOTIFICATION_DATA,
671e0a84eaSCindy Lu VIRTIO_NET_F_CSUM,
6851e84244SEugenio Pérez VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,
692875a0caSHawkins Jiawei VIRTIO_NET_F_CTRL_MAC_ADDR,
7040237840SJason Wang VIRTIO_NET_F_CTRL_RX,
7140237840SJason Wang VIRTIO_NET_F_CTRL_RX_EXTRA,
7240237840SJason Wang VIRTIO_NET_F_CTRL_VLAN,
7340237840SJason Wang VIRTIO_NET_F_CTRL_VQ,
742875a0caSHawkins Jiawei VIRTIO_NET_F_GSO,
752875a0caSHawkins Jiawei VIRTIO_NET_F_GUEST_CSUM,
762875a0caSHawkins Jiawei VIRTIO_NET_F_GUEST_ECN,
772875a0caSHawkins Jiawei VIRTIO_NET_F_GUEST_TSO4,
782875a0caSHawkins Jiawei VIRTIO_NET_F_GUEST_TSO6,
792875a0caSHawkins Jiawei VIRTIO_NET_F_GUEST_UFO,
809da16849SAndrew Melnychenko VIRTIO_NET_F_GUEST_USO4,
819da16849SAndrew Melnychenko VIRTIO_NET_F_GUEST_USO6,
820145c393SAndrew Melnychenko VIRTIO_NET_F_HASH_REPORT,
832875a0caSHawkins Jiawei VIRTIO_NET_F_HOST_ECN,
842875a0caSHawkins Jiawei VIRTIO_NET_F_HOST_TSO4,
852875a0caSHawkins Jiawei VIRTIO_NET_F_HOST_TSO6,
862875a0caSHawkins Jiawei VIRTIO_NET_F_HOST_UFO,
879da16849SAndrew Melnychenko VIRTIO_NET_F_HOST_USO,
882875a0caSHawkins Jiawei VIRTIO_NET_F_MQ,
892875a0caSHawkins Jiawei VIRTIO_NET_F_MRG_RXBUF,
902875a0caSHawkins Jiawei VIRTIO_NET_F_MTU,
91f8e09b97SAkihiko Odaki VIRTIO_NET_F_RSC_EXT,
922875a0caSHawkins Jiawei VIRTIO_NET_F_RSS,
939aa47eddSSi-Wei Liu VIRTIO_NET_F_STATUS,
942875a0caSHawkins Jiawei VIRTIO_RING_F_EVENT_IDX,
952875a0caSHawkins Jiawei VIRTIO_RING_F_INDIRECT_DESC,
962875a0caSHawkins Jiawei
972875a0caSHawkins Jiawei /* VHOST_INVALID_FEATURE_BIT should always be the last entry */
981e0a84eaSCindy Lu VHOST_INVALID_FEATURE_BIT
991e0a84eaSCindy Lu };
1001e0a84eaSCindy Lu
1011576dbb5SEugenio Pérez /** Supported device specific feature bits with SVQ */
1021576dbb5SEugenio Pérez static const uint64_t vdpa_svq_device_features =
1031576dbb5SEugenio Pérez BIT_ULL(VIRTIO_NET_F_CSUM) |
1041576dbb5SEugenio Pérez BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |
1054b4a1378SHawkins Jiawei BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) |
1061576dbb5SEugenio Pérez BIT_ULL(VIRTIO_NET_F_MTU) |
1071576dbb5SEugenio Pérez BIT_ULL(VIRTIO_NET_F_MAC) |
1081576dbb5SEugenio Pérez BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) |
1091576dbb5SEugenio Pérez BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |
1101576dbb5SEugenio Pérez BIT_ULL(VIRTIO_NET_F_GUEST_ECN) |
1111576dbb5SEugenio Pérez BIT_ULL(VIRTIO_NET_F_GUEST_UFO) |
1121576dbb5SEugenio Pérez BIT_ULL(VIRTIO_NET_F_HOST_TSO4) |
1131576dbb5SEugenio Pérez BIT_ULL(VIRTIO_NET_F_HOST_TSO6) |
1141576dbb5SEugenio Pérez BIT_ULL(VIRTIO_NET_F_HOST_ECN) |
1151576dbb5SEugenio Pérez BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
1161576dbb5SEugenio Pérez BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
1171576dbb5SEugenio Pérez BIT_ULL(VIRTIO_NET_F_STATUS) |
1181576dbb5SEugenio Pérez BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
119ea6eec49SHawkins Jiawei BIT_ULL(VIRTIO_NET_F_CTRL_RX) |
120e213c45aSHawkins Jiawei BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |
121d669b7bbSHawkins Jiawei BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) |
12272b99a87SEugenio Pérez BIT_ULL(VIRTIO_NET_F_MQ) |
1231576dbb5SEugenio Pérez BIT_ULL(VIRTIO_F_ANY_LAYOUT) |
1241576dbb5SEugenio Pérez BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |
125609ab4c3SEugenio Pérez /* VHOST_F_LOG_ALL is exposed by SVQ */
126609ab4c3SEugenio Pérez BIT_ULL(VHOST_F_LOG_ALL) |
127556b67d4SHawkins Jiawei BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |
12807eba949SHawkins Jiawei BIT_ULL(VIRTIO_NET_F_RSS) |
1291576dbb5SEugenio Pérez BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
1300d74e2b7SEugenio Pérez BIT_ULL(VIRTIO_NET_F_STANDBY) |
1310d74e2b7SEugenio Pérez BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX);
1321576dbb5SEugenio Pérez
133c1a10086SEugenio Pérez #define VHOST_VDPA_NET_CVQ_ASID 1
134c1a10086SEugenio Pérez
vhost_vdpa_get_vhost_net(NetClientState * nc)1351e0a84eaSCindy Lu VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
1361e0a84eaSCindy Lu {
1371e0a84eaSCindy Lu VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
1381e0a84eaSCindy Lu assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
1391e0a84eaSCindy Lu return s->vhost_net;
1401e0a84eaSCindy Lu }
1411e0a84eaSCindy Lu
vhost_vdpa_net_cvq_cmd_len(void)142915bf6ccSEugenio Pérez static size_t vhost_vdpa_net_cvq_cmd_len(void)
143915bf6ccSEugenio Pérez {
144915bf6ccSEugenio Pérez /*
145915bf6ccSEugenio Pérez * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
146915bf6ccSEugenio Pérez * In buffer is always 1 byte, so it should fit here
147915bf6ccSEugenio Pérez */
148915bf6ccSEugenio Pérez return sizeof(struct virtio_net_ctrl_hdr) +
149915bf6ccSEugenio Pérez 2 * sizeof(struct virtio_net_ctrl_mac) +
150915bf6ccSEugenio Pérez MAC_TABLE_ENTRIES * ETH_ALEN;
151915bf6ccSEugenio Pérez }
152915bf6ccSEugenio Pérez
vhost_vdpa_net_cvq_cmd_page_len(void)153915bf6ccSEugenio Pérez static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
154915bf6ccSEugenio Pérez {
155915bf6ccSEugenio Pérez return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
156915bf6ccSEugenio Pérez }
157915bf6ccSEugenio Pérez
vhost_vdpa_net_valid_svq_features(uint64_t features,Error ** errp)15836e46472SEugenio Pérez static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp)
15936e46472SEugenio Pérez {
16036e46472SEugenio Pérez uint64_t invalid_dev_features =
16136e46472SEugenio Pérez features & ~vdpa_svq_device_features &
16236e46472SEugenio Pérez /* Transport are all accepted at this point */
16336e46472SEugenio Pérez ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START,
16436e46472SEugenio Pérez VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START);
16536e46472SEugenio Pérez
16636e46472SEugenio Pérez if (invalid_dev_features) {
16736e46472SEugenio Pérez error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64,
16836e46472SEugenio Pérez invalid_dev_features);
169258a0394SEugenio Pérez return false;
17036e46472SEugenio Pérez }
17136e46472SEugenio Pérez
172258a0394SEugenio Pérez return vhost_svq_valid_features(features, errp);
17336e46472SEugenio Pérez }
17436e46472SEugenio Pérez
vhost_vdpa_net_check_device_id(struct vhost_net * net)1751e0a84eaSCindy Lu static int vhost_vdpa_net_check_device_id(struct vhost_net *net)
1761e0a84eaSCindy Lu {
1771e0a84eaSCindy Lu uint32_t device_id;
1781e0a84eaSCindy Lu int ret;
1791e0a84eaSCindy Lu struct vhost_dev *hdev;
1801e0a84eaSCindy Lu
1811e0a84eaSCindy Lu hdev = (struct vhost_dev *)&net->dev;
1821e0a84eaSCindy Lu ret = hdev->vhost_ops->vhost_get_device_id(hdev, &device_id);
1831e0a84eaSCindy Lu if (device_id != VIRTIO_ID_NET) {
1841e0a84eaSCindy Lu return -ENOTSUP;
1851e0a84eaSCindy Lu }
1861e0a84eaSCindy Lu return ret;
1871e0a84eaSCindy Lu }
1881e0a84eaSCindy Lu
vhost_vdpa_add(NetClientState * ncs,void * be,int queue_pair_index,int nvqs)18940237840SJason Wang static int vhost_vdpa_add(NetClientState *ncs, void *be,
19040237840SJason Wang int queue_pair_index, int nvqs)
1911e0a84eaSCindy Lu {
1921e0a84eaSCindy Lu VhostNetOptions options;
1931e0a84eaSCindy Lu struct vhost_net *net = NULL;
1941e0a84eaSCindy Lu VhostVDPAState *s;
1951e0a84eaSCindy Lu int ret;
1961e0a84eaSCindy Lu
1971e0a84eaSCindy Lu options.backend_type = VHOST_BACKEND_TYPE_VDPA;
1981e0a84eaSCindy Lu assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
1991e0a84eaSCindy Lu s = DO_UPCAST(VhostVDPAState, nc, ncs);
2001e0a84eaSCindy Lu options.net_backend = ncs;
2011e0a84eaSCindy Lu options.opaque = be;
2021e0a84eaSCindy Lu options.busyloop_timeout = 0;
20340237840SJason Wang options.nvqs = nvqs;
2041e0a84eaSCindy Lu
2051e0a84eaSCindy Lu net = vhost_net_init(&options);
2061e0a84eaSCindy Lu if (!net) {
2071e0a84eaSCindy Lu error_report("failed to init vhost_net for queue");
208a97ef87aSJason Wang goto err_init;
2091e0a84eaSCindy Lu }
2101e0a84eaSCindy Lu s->vhost_net = net;
2111e0a84eaSCindy Lu ret = vhost_vdpa_net_check_device_id(net);
2121e0a84eaSCindy Lu if (ret) {
213a97ef87aSJason Wang goto err_check;
2141e0a84eaSCindy Lu }
2151e0a84eaSCindy Lu return 0;
216a97ef87aSJason Wang err_check:
2171e0a84eaSCindy Lu vhost_net_cleanup(net);
218ab36edcfSJason Wang g_free(net);
219a97ef87aSJason Wang err_init:
2201e0a84eaSCindy Lu return -1;
2211e0a84eaSCindy Lu }
2221e0a84eaSCindy Lu
vhost_vdpa_cleanup(NetClientState * nc)2231e0a84eaSCindy Lu static void vhost_vdpa_cleanup(NetClientState *nc)
2241e0a84eaSCindy Lu {
2251e0a84eaSCindy Lu VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
2261e0a84eaSCindy Lu
227a0d7215eSAni Sinha /*
228a0d7215eSAni Sinha * If a peer NIC is attached, do not cleanup anything.
229a0d7215eSAni Sinha * Cleanup will happen as a part of qemu_cleanup() -> net_cleanup()
230a0d7215eSAni Sinha * when the guest is shutting down.
231a0d7215eSAni Sinha */
232a0d7215eSAni Sinha if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) {
233a0d7215eSAni Sinha return;
234a0d7215eSAni Sinha }
235babf8b87SEugenio Pérez munmap(s->cvq_cmd_out_buffer, vhost_vdpa_net_cvq_cmd_page_len());
236babf8b87SEugenio Pérez munmap(s->status, vhost_vdpa_net_cvq_cmd_page_len());
2371e0a84eaSCindy Lu if (s->vhost_net) {
2381e0a84eaSCindy Lu vhost_net_cleanup(s->vhost_net);
2391e0a84eaSCindy Lu g_free(s->vhost_net);
2401e0a84eaSCindy Lu s->vhost_net = NULL;
2411e0a84eaSCindy Lu }
2428c5e9809SEugenio Pérez if (s->vhost_vdpa.index != 0) {
2438c5e9809SEugenio Pérez return;
2448c5e9809SEugenio Pérez }
245f12b2498SEugenio Pérez qemu_close(s->vhost_vdpa.shared->device_fd);
2468c5e9809SEugenio Pérez g_free(s->vhost_vdpa.shared);
2471e0a84eaSCindy Lu }
2481e0a84eaSCindy Lu
249d1fd2d31SHawkins Jiawei /** Dummy SetSteeringEBPF to support RSS for vhost-vdpa backend */
vhost_vdpa_set_steering_ebpf(NetClientState * nc,int prog_fd)250d1fd2d31SHawkins Jiawei static bool vhost_vdpa_set_steering_ebpf(NetClientState *nc, int prog_fd)
251d1fd2d31SHawkins Jiawei {
252d1fd2d31SHawkins Jiawei return true;
253d1fd2d31SHawkins Jiawei }
254d1fd2d31SHawkins Jiawei
vhost_vdpa_has_vnet_hdr(NetClientState * nc)2551e0a84eaSCindy Lu static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc)
2561e0a84eaSCindy Lu {
2571e0a84eaSCindy Lu assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
2581e0a84eaSCindy Lu
2591e0a84eaSCindy Lu return true;
2601e0a84eaSCindy Lu }
2611e0a84eaSCindy Lu
vhost_vdpa_has_ufo(NetClientState * nc)2621e0a84eaSCindy Lu static bool vhost_vdpa_has_ufo(NetClientState *nc)
2631e0a84eaSCindy Lu {
2641e0a84eaSCindy Lu assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
2651e0a84eaSCindy Lu VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
2661e0a84eaSCindy Lu uint64_t features = 0;
2671e0a84eaSCindy Lu features |= (1ULL << VIRTIO_NET_F_HOST_UFO);
2681e0a84eaSCindy Lu features = vhost_net_get_features(s->vhost_net, features);
2691e0a84eaSCindy Lu return !!(features & (1ULL << VIRTIO_NET_F_HOST_UFO));
2701e0a84eaSCindy Lu
2711e0a84eaSCindy Lu }
2721e0a84eaSCindy Lu
vhost_vdpa_check_peer_type(NetClientState * nc,ObjectClass * oc,Error ** errp)273ee8a1c63SKevin Wolf static bool vhost_vdpa_check_peer_type(NetClientState *nc, ObjectClass *oc,
274ee8a1c63SKevin Wolf Error **errp)
275ee8a1c63SKevin Wolf {
276ee8a1c63SKevin Wolf const char *driver = object_class_get_name(oc);
277ee8a1c63SKevin Wolf
278ee8a1c63SKevin Wolf if (!g_str_has_prefix(driver, "virtio-net-")) {
279ee8a1c63SKevin Wolf error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*");
280ee8a1c63SKevin Wolf return false;
281ee8a1c63SKevin Wolf }
282ee8a1c63SKevin Wolf
283ee8a1c63SKevin Wolf return true;
284ee8a1c63SKevin Wolf }
285ee8a1c63SKevin Wolf
286846a1e85SEugenio Pérez /** Dummy receive in case qemu falls back to userland tap networking */
vhost_vdpa_receive(NetClientState * nc,const uint8_t * buf,size_t size)287846a1e85SEugenio Pérez static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf,
288846a1e85SEugenio Pérez size_t size)
289846a1e85SEugenio Pérez {
290bc5add1dSSi-Wei Liu return size;
291846a1e85SEugenio Pérez }
292846a1e85SEugenio Pérez
293573581b1SSi-Wei Liu
29477c3a336SSi-Wei Liu /** From any vdpa net client, get the netclient of the i-th queue pair */
vhost_vdpa_net_get_nc_vdpa(VhostVDPAState * s,int i)29577c3a336SSi-Wei Liu static VhostVDPAState *vhost_vdpa_net_get_nc_vdpa(VhostVDPAState *s, int i)
296573581b1SSi-Wei Liu {
297573581b1SSi-Wei Liu NICState *nic = qemu_get_nic(s->nc.peer);
29877c3a336SSi-Wei Liu NetClientState *nc_i = qemu_get_peer(nic->ncs, i);
299573581b1SSi-Wei Liu
30077c3a336SSi-Wei Liu return DO_UPCAST(VhostVDPAState, nc, nc_i);
30177c3a336SSi-Wei Liu }
30277c3a336SSi-Wei Liu
vhost_vdpa_net_first_nc_vdpa(VhostVDPAState * s)30377c3a336SSi-Wei Liu static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s)
30477c3a336SSi-Wei Liu {
30577c3a336SSi-Wei Liu return vhost_vdpa_net_get_nc_vdpa(s, 0);
306573581b1SSi-Wei Liu }
307573581b1SSi-Wei Liu
vhost_vdpa_net_log_global_enable(VhostVDPAState * s,bool enable)30869498430SEugenio Pérez static void vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable)
30969498430SEugenio Pérez {
31069498430SEugenio Pérez struct vhost_vdpa *v = &s->vhost_vdpa;
31169498430SEugenio Pérez VirtIONet *n;
31269498430SEugenio Pérez VirtIODevice *vdev;
31369498430SEugenio Pérez int data_queue_pairs, cvq, r;
31469498430SEugenio Pérez
31569498430SEugenio Pérez /* We are only called on the first data vqs and only if x-svq is not set */
31669498430SEugenio Pérez if (s->vhost_vdpa.shadow_vqs_enabled == enable) {
31769498430SEugenio Pérez return;
31869498430SEugenio Pérez }
31969498430SEugenio Pérez
32069498430SEugenio Pérez vdev = v->dev->vdev;
32169498430SEugenio Pérez n = VIRTIO_NET(vdev);
32269498430SEugenio Pérez if (!n->vhost_started) {
32369498430SEugenio Pérez return;
32469498430SEugenio Pérez }
32569498430SEugenio Pérez
32669498430SEugenio Pérez data_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
32769498430SEugenio Pérez cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
32869498430SEugenio Pérez n->max_ncs - n->max_queue_pairs : 0;
329db4cba36SSi-Wei Liu v->shared->svq_switching = enable ?
330db4cba36SSi-Wei Liu SVQ_TSTATE_ENABLING : SVQ_TSTATE_DISABLING;
33169498430SEugenio Pérez /*
33269498430SEugenio Pérez * TODO: vhost_net_stop does suspend, get_base and reset. We can be smarter
33369498430SEugenio Pérez * in the future and resume the device if read-only operations between
33469498430SEugenio Pérez * suspend and reset goes wrong.
33569498430SEugenio Pérez */
33669498430SEugenio Pérez vhost_net_stop(vdev, n->nic->ncs, data_queue_pairs, cvq);
33769498430SEugenio Pérez
33869498430SEugenio Pérez /* Start will check migration setup_or_active to configure or not SVQ */
33969498430SEugenio Pérez r = vhost_net_start(vdev, n->nic->ncs, data_queue_pairs, cvq);
34069498430SEugenio Pérez if (unlikely(r < 0)) {
34169498430SEugenio Pérez error_report("unable to start vhost net: %s(%d)", g_strerror(-r), -r);
34269498430SEugenio Pérez }
343db4cba36SSi-Wei Liu v->shared->svq_switching = SVQ_TSTATE_DONE;
34469498430SEugenio Pérez }
34569498430SEugenio Pérez
vdpa_net_migration_state_notifier(NotifierWithReturn * notifier,MigrationEvent * e,Error ** errp)3463e775730SSteve Sistare static int vdpa_net_migration_state_notifier(NotifierWithReturn *notifier,
3475663dd3fSSteve Sistare MigrationEvent *e, Error **errp)
34869498430SEugenio Pérez {
3495663dd3fSSteve Sistare VhostVDPAState *s = container_of(notifier, VhostVDPAState, migration_state);
35069498430SEugenio Pérez
3519d9babf7SSteve Sistare if (e->type == MIG_EVENT_PRECOPY_SETUP) {
35269498430SEugenio Pérez vhost_vdpa_net_log_global_enable(s, true);
3539d9babf7SSteve Sistare } else if (e->type == MIG_EVENT_PRECOPY_FAILED) {
35469498430SEugenio Pérez vhost_vdpa_net_log_global_enable(s, false);
35569498430SEugenio Pérez }
3563e775730SSteve Sistare return 0;
35769498430SEugenio Pérez }
35869498430SEugenio Pérez
vhost_vdpa_net_data_start_first(VhostVDPAState * s)35900ef422eSEugenio Pérez static void vhost_vdpa_net_data_start_first(VhostVDPAState *s)
36000ef422eSEugenio Pérez {
36100ef422eSEugenio Pérez struct vhost_vdpa *v = &s->vhost_vdpa;
36200ef422eSEugenio Pérez
363d9cda213SSteve Sistare migration_add_notifier(&s->migration_state,
364d9cda213SSteve Sistare vdpa_net_migration_state_notifier);
36500ef422eSEugenio Pérez if (v->shadow_vqs_enabled) {
366ae25ff41SEugenio Pérez v->shared->iova_tree = vhost_iova_tree_new(v->shared->iova_range.first,
367ae25ff41SEugenio Pérez v->shared->iova_range.last);
36800ef422eSEugenio Pérez }
36900ef422eSEugenio Pérez }
37000ef422eSEugenio Pérez
vhost_vdpa_net_data_start(NetClientState * nc)37100ef422eSEugenio Pérez static int vhost_vdpa_net_data_start(NetClientState *nc)
37200ef422eSEugenio Pérez {
37300ef422eSEugenio Pérez VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
37400ef422eSEugenio Pérez struct vhost_vdpa *v = &s->vhost_vdpa;
37500ef422eSEugenio Pérez
37600ef422eSEugenio Pérez assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
37700ef422eSEugenio Pérez
378*f018eb62SPeter Xu if (s->always_svq || migration_is_running()) {
37969498430SEugenio Pérez v->shadow_vqs_enabled = true;
38069498430SEugenio Pérez } else {
38169498430SEugenio Pérez v->shadow_vqs_enabled = false;
38269498430SEugenio Pérez }
38369498430SEugenio Pérez
38400ef422eSEugenio Pérez if (v->index == 0) {
385a6e823d4SEugenio Pérez v->shared->shadow_data = v->shadow_vqs_enabled;
38600ef422eSEugenio Pérez vhost_vdpa_net_data_start_first(s);
38700ef422eSEugenio Pérez return 0;
38800ef422eSEugenio Pérez }
38900ef422eSEugenio Pérez
39000ef422eSEugenio Pérez return 0;
39100ef422eSEugenio Pérez }
39200ef422eSEugenio Pérez
vhost_vdpa_net_data_load(NetClientState * nc)3936c482547SEugenio Pérez static int vhost_vdpa_net_data_load(NetClientState *nc)
3946c482547SEugenio Pérez {
3956c482547SEugenio Pérez VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
3966c482547SEugenio Pérez struct vhost_vdpa *v = &s->vhost_vdpa;
3976c482547SEugenio Pérez bool has_cvq = v->dev->vq_index_end % 2;
3986c482547SEugenio Pérez
3996c482547SEugenio Pérez if (has_cvq) {
4006c482547SEugenio Pérez return 0;
4016c482547SEugenio Pérez }
4026c482547SEugenio Pérez
4036c482547SEugenio Pérez for (int i = 0; i < v->dev->nvqs; ++i) {
4049d5a807cSStefano Garzarella int ret = vhost_vdpa_set_vring_ready(v, i + v->dev->vq_index);
4059d5a807cSStefano Garzarella if (ret < 0) {
4069d5a807cSStefano Garzarella return ret;
4079d5a807cSStefano Garzarella }
4086c482547SEugenio Pérez }
4096c482547SEugenio Pérez return 0;
4106c482547SEugenio Pérez }
4116c482547SEugenio Pérez
vhost_vdpa_net_client_stop(NetClientState * nc)41200ef422eSEugenio Pérez static void vhost_vdpa_net_client_stop(NetClientState *nc)
41300ef422eSEugenio Pérez {
41400ef422eSEugenio Pérez VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
41500ef422eSEugenio Pérez struct vhost_dev *dev;
41600ef422eSEugenio Pérez
41700ef422eSEugenio Pérez assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
41800ef422eSEugenio Pérez
41969498430SEugenio Pérez if (s->vhost_vdpa.index == 0) {
420d9cda213SSteve Sistare migration_remove_notifier(&s->migration_state);
42169498430SEugenio Pérez }
42269498430SEugenio Pérez
42300ef422eSEugenio Pérez dev = s->vhost_vdpa.dev;
42400ef422eSEugenio Pérez if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
4255edb02e8SEugenio Pérez g_clear_pointer(&s->vhost_vdpa.shared->iova_tree,
4265edb02e8SEugenio Pérez vhost_iova_tree_delete);
42700ef422eSEugenio Pérez }
42800ef422eSEugenio Pérez }
42900ef422eSEugenio Pérez
4301e0a84eaSCindy Lu static NetClientInfo net_vhost_vdpa_info = {
4311e0a84eaSCindy Lu .type = NET_CLIENT_DRIVER_VHOST_VDPA,
4321e0a84eaSCindy Lu .size = sizeof(VhostVDPAState),
433846a1e85SEugenio Pérez .receive = vhost_vdpa_receive,
43400ef422eSEugenio Pérez .start = vhost_vdpa_net_data_start,
4356c482547SEugenio Pérez .load = vhost_vdpa_net_data_load,
43600ef422eSEugenio Pérez .stop = vhost_vdpa_net_client_stop,
4371e0a84eaSCindy Lu .cleanup = vhost_vdpa_cleanup,
4381e0a84eaSCindy Lu .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
4391e0a84eaSCindy Lu .has_ufo = vhost_vdpa_has_ufo,
440ee8a1c63SKevin Wolf .check_peer_type = vhost_vdpa_check_peer_type,
441d1fd2d31SHawkins Jiawei .set_steering_ebpf = vhost_vdpa_set_steering_ebpf,
4421e0a84eaSCindy Lu };
4431e0a84eaSCindy Lu
vhost_vdpa_get_vring_group(int device_fd,unsigned vq_index,Error ** errp)444152128d6SEugenio Pérez static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index,
445152128d6SEugenio Pérez Error **errp)
446c1a10086SEugenio Pérez {
447c1a10086SEugenio Pérez struct vhost_vring_state state = {
448c1a10086SEugenio Pérez .index = vq_index,
449c1a10086SEugenio Pérez };
450c1a10086SEugenio Pérez int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, &state);
451c1a10086SEugenio Pérez
452c1a10086SEugenio Pérez if (unlikely(r < 0)) {
4530f2bb0bfSEugenio Pérez r = -errno;
454152128d6SEugenio Pérez error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index);
455c1a10086SEugenio Pérez return r;
456c1a10086SEugenio Pérez }
457c1a10086SEugenio Pérez
458c1a10086SEugenio Pérez return state.num;
459c1a10086SEugenio Pérez }
460c1a10086SEugenio Pérez
vhost_vdpa_set_address_space_id(struct vhost_vdpa * v,unsigned vq_group,unsigned asid_num)461c1a10086SEugenio Pérez static int vhost_vdpa_set_address_space_id(struct vhost_vdpa *v,
462c1a10086SEugenio Pérez unsigned vq_group,
463c1a10086SEugenio Pérez unsigned asid_num)
464c1a10086SEugenio Pérez {
465c1a10086SEugenio Pérez struct vhost_vring_state asid = {
466c1a10086SEugenio Pérez .index = vq_group,
467c1a10086SEugenio Pérez .num = asid_num,
468c1a10086SEugenio Pérez };
469c1a10086SEugenio Pérez int r;
470c1a10086SEugenio Pérez
47162845d32SSi-Wei Liu trace_vhost_vdpa_set_address_space_id(v, vq_group, asid_num);
47262845d32SSi-Wei Liu
473f12b2498SEugenio Pérez r = ioctl(v->shared->device_fd, VHOST_VDPA_SET_GROUP_ASID, &asid);
474c1a10086SEugenio Pérez if (unlikely(r < 0)) {
475c1a10086SEugenio Pérez error_report("Can't set vq group %u asid %u, errno=%d (%s)",
476c1a10086SEugenio Pérez asid.index, asid.num, errno, g_strerror(errno));
477c1a10086SEugenio Pérez }
478c1a10086SEugenio Pérez return r;
479c1a10086SEugenio Pérez }
480c1a10086SEugenio Pérez
vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa * v,void * addr)4812df4dd31SEugenio Pérez static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
4822df4dd31SEugenio Pérez {
4835edb02e8SEugenio Pérez VhostIOVATree *tree = v->shared->iova_tree;
4842df4dd31SEugenio Pérez DMAMap needle = {
4852df4dd31SEugenio Pérez /*
4862df4dd31SEugenio Pérez * No need to specify size or to look for more translations since
4872df4dd31SEugenio Pérez * this contiguous chunk was allocated by us.
4882df4dd31SEugenio Pérez */
4892df4dd31SEugenio Pérez .translated_addr = (hwaddr)(uintptr_t)addr,
4902df4dd31SEugenio Pérez };
4912df4dd31SEugenio Pérez const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
4922df4dd31SEugenio Pérez int r;
4932df4dd31SEugenio Pérez
4942df4dd31SEugenio Pérez if (unlikely(!map)) {
4952df4dd31SEugenio Pérez error_report("Cannot locate expected map");
4962df4dd31SEugenio Pérez return;
4972df4dd31SEugenio Pérez }
4982df4dd31SEugenio Pérez
4996f03d9efSEugenio Pérez r = vhost_vdpa_dma_unmap(v->shared, v->address_space_id, map->iova,
5006f03d9efSEugenio Pérez map->size + 1);
5012df4dd31SEugenio Pérez if (unlikely(r != 0)) {
5022df4dd31SEugenio Pérez error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
5032df4dd31SEugenio Pérez }
5042df4dd31SEugenio Pérez
50569292a8eSEugenio Pérez vhost_iova_tree_remove(tree, *map);
5062df4dd31SEugenio Pérez }
5072df4dd31SEugenio Pérez
5087a7f87e9SEugenio Pérez /** Map CVQ buffer. */
vhost_vdpa_cvq_map_buf(struct vhost_vdpa * v,void * buf,size_t size,bool write)5097a7f87e9SEugenio Pérez static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size,
5107a7f87e9SEugenio Pérez bool write)
5112df4dd31SEugenio Pérez {
5122df4dd31SEugenio Pérez DMAMap map = {};
5132df4dd31SEugenio Pérez int r;
5142df4dd31SEugenio Pérez
5152df4dd31SEugenio Pérez map.translated_addr = (hwaddr)(uintptr_t)buf;
5167a7f87e9SEugenio Pérez map.size = size - 1;
5172df4dd31SEugenio Pérez map.perm = write ? IOMMU_RW : IOMMU_RO,
5185edb02e8SEugenio Pérez r = vhost_iova_tree_map_alloc(v->shared->iova_tree, &map);
5192df4dd31SEugenio Pérez if (unlikely(r != IOVA_OK)) {
5202df4dd31SEugenio Pérez error_report("Cannot map injected element");
5217a7f87e9SEugenio Pérez return r;
5222df4dd31SEugenio Pérez }
5232df4dd31SEugenio Pérez
5246f03d9efSEugenio Pérez r = vhost_vdpa_dma_map(v->shared, v->address_space_id, map.iova,
525cd831ed5SEugenio Pérez vhost_vdpa_net_cvq_cmd_page_len(), buf, !write);
5262df4dd31SEugenio Pérez if (unlikely(r < 0)) {
5272df4dd31SEugenio Pérez goto dma_map_err;
5282df4dd31SEugenio Pérez }
5292df4dd31SEugenio Pérez
5307a7f87e9SEugenio Pérez return 0;
5312df4dd31SEugenio Pérez
5322df4dd31SEugenio Pérez dma_map_err:
5335edb02e8SEugenio Pérez vhost_iova_tree_remove(v->shared->iova_tree, map);
5347a7f87e9SEugenio Pérez return r;
5352df4dd31SEugenio Pérez }
5362df4dd31SEugenio Pérez
vhost_vdpa_net_cvq_start(NetClientState * nc)5377a7f87e9SEugenio Pérez static int vhost_vdpa_net_cvq_start(NetClientState *nc)
5382df4dd31SEugenio Pérez {
539573581b1SSi-Wei Liu VhostVDPAState *s, *s0;
540c1a10086SEugenio Pérez struct vhost_vdpa *v;
541c1a10086SEugenio Pérez int64_t cvq_group;
542152128d6SEugenio Pérez int r;
543152128d6SEugenio Pérez Error *err = NULL;
5442df4dd31SEugenio Pérez
5457a7f87e9SEugenio Pérez assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
5467a7f87e9SEugenio Pérez
5477a7f87e9SEugenio Pérez s = DO_UPCAST(VhostVDPAState, nc, nc);
548c1a10086SEugenio Pérez v = &s->vhost_vdpa;
549c1a10086SEugenio Pérez
550573581b1SSi-Wei Liu s0 = vhost_vdpa_net_first_nc_vdpa(s);
551573581b1SSi-Wei Liu v->shadow_vqs_enabled = s0->vhost_vdpa.shadow_vqs_enabled;
552c1a10086SEugenio Pérez s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID;
553c1a10086SEugenio Pérez
554a6e823d4SEugenio Pérez if (v->shared->shadow_data) {
555c1a10086SEugenio Pérez /* SVQ is already configured for all virtqueues */
556c1a10086SEugenio Pérez goto out;
557c1a10086SEugenio Pérez }
558c1a10086SEugenio Pérez
559c1a10086SEugenio Pérez /*
560c1a10086SEugenio Pérez * If we early return in these cases SVQ will not be enabled. The migration
561c1a10086SEugenio Pérez * will be blocked as long as vhost-vdpa backends will not offer _F_LOG.
562c1a10086SEugenio Pérez */
563152128d6SEugenio Pérez if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
564c1a10086SEugenio Pérez return 0;
565c1a10086SEugenio Pérez }
566c1a10086SEugenio Pérez
567152128d6SEugenio Pérez if (!s->cvq_isolated) {
568152128d6SEugenio Pérez return 0;
569152128d6SEugenio Pérez }
570152128d6SEugenio Pérez
571f12b2498SEugenio Pérez cvq_group = vhost_vdpa_get_vring_group(v->shared->device_fd,
572152128d6SEugenio Pérez v->dev->vq_index_end - 1,
573152128d6SEugenio Pérez &err);
574c1a10086SEugenio Pérez if (unlikely(cvq_group < 0)) {
575152128d6SEugenio Pérez error_report_err(err);
576c1a10086SEugenio Pérez return cvq_group;
577c1a10086SEugenio Pérez }
578c1a10086SEugenio Pérez
579c1a10086SEugenio Pérez r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID);
580c1a10086SEugenio Pérez if (unlikely(r < 0)) {
581c1a10086SEugenio Pérez return r;
582c1a10086SEugenio Pérez }
583c1a10086SEugenio Pérez
584c1a10086SEugenio Pérez v->shadow_vqs_enabled = true;
585c1a10086SEugenio Pérez s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID;
586c1a10086SEugenio Pérez
587c1a10086SEugenio Pérez out:
5887a7f87e9SEugenio Pérez if (!s->vhost_vdpa.shadow_vqs_enabled) {
5897a7f87e9SEugenio Pérez return 0;
5902df4dd31SEugenio Pérez }
5912df4dd31SEugenio Pérez
59200ef422eSEugenio Pérez /*
5935edb02e8SEugenio Pérez * If other vhost_vdpa already have an iova_tree, reuse it for simplicity,
5945edb02e8SEugenio Pérez * whether CVQ shares ASID with guest or not, because:
5955edb02e8SEugenio Pérez * - Memory listener need access to guest's memory addresses allocated in
5965edb02e8SEugenio Pérez * the IOVA tree.
59700ef422eSEugenio Pérez * - There should be plenty of IOVA address space for both ASID not to
5985edb02e8SEugenio Pérez * worry about collisions between them. Guest's translations are still
5995edb02e8SEugenio Pérez * validated with virtio virtqueue_pop so there is no risk for the guest
6005edb02e8SEugenio Pérez * to access memory that it shouldn't.
60100ef422eSEugenio Pérez *
6025edb02e8SEugenio Pérez * To allocate a iova tree per ASID is doable but it complicates the code
6035edb02e8SEugenio Pérez * and it is not worth it for the moment.
60400ef422eSEugenio Pérez */
6055edb02e8SEugenio Pérez if (!v->shared->iova_tree) {
606ae25ff41SEugenio Pérez v->shared->iova_tree = vhost_iova_tree_new(v->shared->iova_range.first,
607ae25ff41SEugenio Pérez v->shared->iova_range.last);
60800ef422eSEugenio Pérez }
60900ef422eSEugenio Pérez
6107a7f87e9SEugenio Pérez r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer,
6117a7f87e9SEugenio Pérez vhost_vdpa_net_cvq_cmd_page_len(), false);
6127a7f87e9SEugenio Pérez if (unlikely(r < 0)) {
6137a7f87e9SEugenio Pérez return r;
6147a7f87e9SEugenio Pérez }
6157a7f87e9SEugenio Pérez
61617fb889fSEugenio Pérez r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->status,
6177a7f87e9SEugenio Pérez vhost_vdpa_net_cvq_cmd_page_len(), true);
6187a7f87e9SEugenio Pérez if (unlikely(r < 0)) {
6192df4dd31SEugenio Pérez vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
6202df4dd31SEugenio Pérez }
6212df4dd31SEugenio Pérez
6227a7f87e9SEugenio Pérez return r;
6237a7f87e9SEugenio Pérez }
6247a7f87e9SEugenio Pérez
vhost_vdpa_net_cvq_stop(NetClientState * nc)6257a7f87e9SEugenio Pérez static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
6267a7f87e9SEugenio Pérez {
6277a7f87e9SEugenio Pérez VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
6287a7f87e9SEugenio Pérez
6297a7f87e9SEugenio Pérez assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
6307a7f87e9SEugenio Pérez
6317a7f87e9SEugenio Pérez if (s->vhost_vdpa.shadow_vqs_enabled) {
6327a7f87e9SEugenio Pérez vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
63317fb889fSEugenio Pérez vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status);
634c1a10086SEugenio Pérez }
63500ef422eSEugenio Pérez
63600ef422eSEugenio Pérez vhost_vdpa_net_client_stop(nc);
6372df4dd31SEugenio Pérez }
6382df4dd31SEugenio Pérez
vhost_vdpa_net_cvq_add(VhostVDPAState * s,const struct iovec * out_sg,size_t out_num,const struct iovec * in_sg,size_t in_num)6390e6bff0dSHawkins Jiawei static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s,
6400e6bff0dSHawkins Jiawei const struct iovec *out_sg, size_t out_num,
6410e6bff0dSHawkins Jiawei const struct iovec *in_sg, size_t in_num)
642be4278b6SEugenio Pérez {
643be4278b6SEugenio Pérez VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
644be4278b6SEugenio Pérez int r;
645be4278b6SEugenio Pérez
6460e6bff0dSHawkins Jiawei r = vhost_svq_add(svq, out_sg, out_num, in_sg, in_num, NULL);
647be4278b6SEugenio Pérez if (unlikely(r != 0)) {
648be4278b6SEugenio Pérez if (unlikely(r == -ENOSPC)) {
649be4278b6SEugenio Pérez qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
650be4278b6SEugenio Pérez __func__);
651be4278b6SEugenio Pérez }
652a864a321SHawkins Jiawei }
653a864a321SHawkins Jiawei
654be4278b6SEugenio Pérez return r;
655be4278b6SEugenio Pérez }
656be4278b6SEugenio Pérez
657be4278b6SEugenio Pérez /*
658a864a321SHawkins Jiawei * Convenience wrapper to poll SVQ for multiple control commands.
659a864a321SHawkins Jiawei *
660a864a321SHawkins Jiawei * Caller should hold the BQL when invoking this function, and should take
661a864a321SHawkins Jiawei * the answer before SVQ pulls by itself when BQL is released.
662be4278b6SEugenio Pérez */
vhost_vdpa_net_svq_poll(VhostVDPAState * s,size_t cmds_in_flight)663a864a321SHawkins Jiawei static ssize_t vhost_vdpa_net_svq_poll(VhostVDPAState *s, size_t cmds_in_flight)
664a864a321SHawkins Jiawei {
665a864a321SHawkins Jiawei VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
666a864a321SHawkins Jiawei return vhost_svq_poll(svq, cmds_in_flight);
667be4278b6SEugenio Pérez }
668be4278b6SEugenio Pérez
vhost_vdpa_net_load_cursor_reset(VhostVDPAState * s,struct iovec * out_cursor,struct iovec * in_cursor)6691d7e2a8fSHawkins Jiawei static void vhost_vdpa_net_load_cursor_reset(VhostVDPAState *s,
6701d7e2a8fSHawkins Jiawei struct iovec *out_cursor,
6711d7e2a8fSHawkins Jiawei struct iovec *in_cursor)
6721d7e2a8fSHawkins Jiawei {
6731d7e2a8fSHawkins Jiawei /* reset the cursor of the output buffer for the device */
6741d7e2a8fSHawkins Jiawei out_cursor->iov_base = s->cvq_cmd_out_buffer;
6751d7e2a8fSHawkins Jiawei out_cursor->iov_len = vhost_vdpa_net_cvq_cmd_page_len();
6761d7e2a8fSHawkins Jiawei
6771d7e2a8fSHawkins Jiawei /* reset the cursor of the in buffer for the device */
6781d7e2a8fSHawkins Jiawei in_cursor->iov_base = s->status;
6791d7e2a8fSHawkins Jiawei in_cursor->iov_len = vhost_vdpa_net_cvq_cmd_page_len();
6801d7e2a8fSHawkins Jiawei }
6811d7e2a8fSHawkins Jiawei
682acec5f68SHawkins Jiawei /*
683acec5f68SHawkins Jiawei * Poll SVQ for multiple pending control commands and check the device's ack.
684acec5f68SHawkins Jiawei *
685acec5f68SHawkins Jiawei * Caller should hold the BQL when invoking this function.
686acec5f68SHawkins Jiawei *
687acec5f68SHawkins Jiawei * @s: The VhostVDPAState
688acec5f68SHawkins Jiawei * @len: The length of the pending status shadow buffer
689acec5f68SHawkins Jiawei */
vhost_vdpa_net_svq_flush(VhostVDPAState * s,size_t len)690acec5f68SHawkins Jiawei static ssize_t vhost_vdpa_net_svq_flush(VhostVDPAState *s, size_t len)
691acec5f68SHawkins Jiawei {
692acec5f68SHawkins Jiawei /* device uses a one-byte length ack for each control command */
693acec5f68SHawkins Jiawei ssize_t dev_written = vhost_vdpa_net_svq_poll(s, len);
694acec5f68SHawkins Jiawei if (unlikely(dev_written != len)) {
695acec5f68SHawkins Jiawei return -EIO;
696acec5f68SHawkins Jiawei }
697acec5f68SHawkins Jiawei
698acec5f68SHawkins Jiawei /* check the device's ack */
699acec5f68SHawkins Jiawei for (int i = 0; i < len; ++i) {
700acec5f68SHawkins Jiawei if (s->status[i] != VIRTIO_NET_OK) {
701acec5f68SHawkins Jiawei return -EIO;
702acec5f68SHawkins Jiawei }
703acec5f68SHawkins Jiawei }
704acec5f68SHawkins Jiawei return 0;
705acec5f68SHawkins Jiawei }
706acec5f68SHawkins Jiawei
vhost_vdpa_net_load_cmd(VhostVDPAState * s,struct iovec * out_cursor,struct iovec * in_cursor,uint8_t class,uint8_t cmd,const struct iovec * data_sg,size_t data_num)7071d7e2a8fSHawkins Jiawei static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s,
7081d7e2a8fSHawkins Jiawei struct iovec *out_cursor,
7091d7e2a8fSHawkins Jiawei struct iovec *in_cursor, uint8_t class,
7102848c6aaSHawkins Jiawei uint8_t cmd, const struct iovec *data_sg,
7112848c6aaSHawkins Jiawei size_t data_num)
712f73c0c43SEugenio Pérez {
713f73c0c43SEugenio Pérez const struct virtio_net_ctrl_hdr ctrl = {
714f73c0c43SEugenio Pérez .class = class,
715f73c0c43SEugenio Pérez .cmd = cmd,
716f73c0c43SEugenio Pérez };
717acec5f68SHawkins Jiawei size_t data_size = iov_size(data_sg, data_num), cmd_size;
7181d7e2a8fSHawkins Jiawei struct iovec out, in;
719a864a321SHawkins Jiawei ssize_t r;
720acec5f68SHawkins Jiawei unsigned dummy_cursor_iov_cnt;
721acec5f68SHawkins Jiawei VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
722f73c0c43SEugenio Pérez
723f73c0c43SEugenio Pérez assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl));
724acec5f68SHawkins Jiawei cmd_size = sizeof(ctrl) + data_size;
725faed7446SSi-Wei Liu trace_vhost_vdpa_net_load_cmd(s, class, cmd, data_num, data_size);
726acec5f68SHawkins Jiawei if (vhost_svq_available_slots(svq) < 2 ||
727acec5f68SHawkins Jiawei iov_size(out_cursor, 1) < cmd_size) {
728acec5f68SHawkins Jiawei /*
729acec5f68SHawkins Jiawei * It is time to flush all pending control commands if SVQ is full
730acec5f68SHawkins Jiawei * or control commands shadow buffers are full.
731acec5f68SHawkins Jiawei *
732acec5f68SHawkins Jiawei * We can poll here since we've had BQL from the time
733acec5f68SHawkins Jiawei * we sent the descriptor.
734acec5f68SHawkins Jiawei */
735acec5f68SHawkins Jiawei r = vhost_vdpa_net_svq_flush(s, in_cursor->iov_base -
736acec5f68SHawkins Jiawei (void *)s->status);
737acec5f68SHawkins Jiawei if (unlikely(r < 0)) {
738acec5f68SHawkins Jiawei return r;
739f73c0c43SEugenio Pérez }
740f73c0c43SEugenio Pérez
741acec5f68SHawkins Jiawei vhost_vdpa_net_load_cursor_reset(s, out_cursor, in_cursor);
742acec5f68SHawkins Jiawei }
743f73c0c43SEugenio Pérez
744f73c0c43SEugenio Pérez /* pack the CVQ command header */
7451d7e2a8fSHawkins Jiawei iov_from_buf(out_cursor, 1, 0, &ctrl, sizeof(ctrl));
746f73c0c43SEugenio Pérez /* pack the CVQ command command-specific-data */
747f73c0c43SEugenio Pérez iov_to_buf(data_sg, data_num, 0,
7481d7e2a8fSHawkins Jiawei out_cursor->iov_base + sizeof(ctrl), data_size);
7491d7e2a8fSHawkins Jiawei
7501d7e2a8fSHawkins Jiawei /* extract the required buffer from the cursor for output */
751acec5f68SHawkins Jiawei iov_copy(&out, 1, out_cursor, 1, 0, cmd_size);
7521d7e2a8fSHawkins Jiawei /* extract the required buffer from the cursor for input */
7531d7e2a8fSHawkins Jiawei iov_copy(&in, 1, in_cursor, 1, 0, sizeof(*s->status));
754f73c0c43SEugenio Pérez
755a864a321SHawkins Jiawei r = vhost_vdpa_net_cvq_add(s, &out, 1, &in, 1);
756a864a321SHawkins Jiawei if (unlikely(r < 0)) {
757faed7446SSi-Wei Liu trace_vhost_vdpa_net_load_cmd_retval(s, class, cmd, r);
758a864a321SHawkins Jiawei return r;
759a864a321SHawkins Jiawei }
760a864a321SHawkins Jiawei
761acec5f68SHawkins Jiawei /* iterate the cursors */
762acec5f68SHawkins Jiawei dummy_cursor_iov_cnt = 1;
763acec5f68SHawkins Jiawei iov_discard_front(&out_cursor, &dummy_cursor_iov_cnt, cmd_size);
764acec5f68SHawkins Jiawei dummy_cursor_iov_cnt = 1;
765acec5f68SHawkins Jiawei iov_discard_front(&in_cursor, &dummy_cursor_iov_cnt, sizeof(*s->status));
766acec5f68SHawkins Jiawei
767acec5f68SHawkins Jiawei return 0;
768f73c0c43SEugenio Pérez }
769f73c0c43SEugenio Pérez
vhost_vdpa_net_load_mac(VhostVDPAState * s,const VirtIONet * n,struct iovec * out_cursor,struct iovec * in_cursor)7701d7e2a8fSHawkins Jiawei static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n,
7711d7e2a8fSHawkins Jiawei struct iovec *out_cursor,
7721d7e2a8fSHawkins Jiawei struct iovec *in_cursor)
773f73c0c43SEugenio Pérez {
77402d3bf09SHawkins Jiawei if (virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
7752848c6aaSHawkins Jiawei const struct iovec data = {
7762848c6aaSHawkins Jiawei .iov_base = (void *)n->mac,
7772848c6aaSHawkins Jiawei .iov_len = sizeof(n->mac),
7782848c6aaSHawkins Jiawei };
779acec5f68SHawkins Jiawei ssize_t r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
7801d7e2a8fSHawkins Jiawei VIRTIO_NET_CTRL_MAC,
781f73c0c43SEugenio Pérez VIRTIO_NET_CTRL_MAC_ADDR_SET,
7822848c6aaSHawkins Jiawei &data, 1);
783acec5f68SHawkins Jiawei if (unlikely(r < 0)) {
784acec5f68SHawkins Jiawei return r;
785b479bc3cSHawkins Jiawei }
786f73c0c43SEugenio Pérez }
787f73c0c43SEugenio Pérez
7880ddcecb8SHawkins Jiawei /*
7890ddcecb8SHawkins Jiawei * According to VirtIO standard, "The device MUST have an
7900ddcecb8SHawkins Jiawei * empty MAC filtering table on reset.".
7910ddcecb8SHawkins Jiawei *
7920ddcecb8SHawkins Jiawei * Therefore, there is no need to send this CVQ command if the
7930ddcecb8SHawkins Jiawei * driver also sets an empty MAC filter table, which aligns with
7940ddcecb8SHawkins Jiawei * the device's defaults.
7950ddcecb8SHawkins Jiawei *
7960ddcecb8SHawkins Jiawei * Note that the device's defaults can mismatch the driver's
7970ddcecb8SHawkins Jiawei * configuration only at live migration.
7980ddcecb8SHawkins Jiawei */
7990ddcecb8SHawkins Jiawei if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_RX) ||
8000ddcecb8SHawkins Jiawei n->mac_table.in_use == 0) {
8010ddcecb8SHawkins Jiawei return 0;
8020ddcecb8SHawkins Jiawei }
8030ddcecb8SHawkins Jiawei
8040ddcecb8SHawkins Jiawei uint32_t uni_entries = n->mac_table.first_multi,
8050ddcecb8SHawkins Jiawei uni_macs_size = uni_entries * ETH_ALEN,
8060ddcecb8SHawkins Jiawei mul_entries = n->mac_table.in_use - uni_entries,
8070ddcecb8SHawkins Jiawei mul_macs_size = mul_entries * ETH_ALEN;
8080ddcecb8SHawkins Jiawei struct virtio_net_ctrl_mac uni = {
8090ddcecb8SHawkins Jiawei .entries = cpu_to_le32(uni_entries),
8100ddcecb8SHawkins Jiawei };
8110ddcecb8SHawkins Jiawei struct virtio_net_ctrl_mac mul = {
8120ddcecb8SHawkins Jiawei .entries = cpu_to_le32(mul_entries),
8130ddcecb8SHawkins Jiawei };
8140ddcecb8SHawkins Jiawei const struct iovec data[] = {
8150ddcecb8SHawkins Jiawei {
8160ddcecb8SHawkins Jiawei .iov_base = &uni,
8170ddcecb8SHawkins Jiawei .iov_len = sizeof(uni),
8180ddcecb8SHawkins Jiawei }, {
8190ddcecb8SHawkins Jiawei .iov_base = n->mac_table.macs,
8200ddcecb8SHawkins Jiawei .iov_len = uni_macs_size,
8210ddcecb8SHawkins Jiawei }, {
8220ddcecb8SHawkins Jiawei .iov_base = &mul,
8230ddcecb8SHawkins Jiawei .iov_len = sizeof(mul),
8240ddcecb8SHawkins Jiawei }, {
8250ddcecb8SHawkins Jiawei .iov_base = &n->mac_table.macs[uni_macs_size],
8260ddcecb8SHawkins Jiawei .iov_len = mul_macs_size,
8270ddcecb8SHawkins Jiawei },
8280ddcecb8SHawkins Jiawei };
829acec5f68SHawkins Jiawei ssize_t r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
8300ddcecb8SHawkins Jiawei VIRTIO_NET_CTRL_MAC,
8310ddcecb8SHawkins Jiawei VIRTIO_NET_CTRL_MAC_TABLE_SET,
8320ddcecb8SHawkins Jiawei data, ARRAY_SIZE(data));
833acec5f68SHawkins Jiawei if (unlikely(r < 0)) {
834acec5f68SHawkins Jiawei return r;
8350ddcecb8SHawkins Jiawei }
8360ddcecb8SHawkins Jiawei
837f73c0c43SEugenio Pérez return 0;
838f73c0c43SEugenio Pérez }
839f73c0c43SEugenio Pérez
vhost_vdpa_net_load_rss(VhostVDPAState * s,const VirtIONet * n,struct iovec * out_cursor,struct iovec * in_cursor,bool do_rss)8408b98c15fSHawkins Jiawei static int vhost_vdpa_net_load_rss(VhostVDPAState *s, const VirtIONet *n,
8418b98c15fSHawkins Jiawei struct iovec *out_cursor,
842b3c09106SHawkins Jiawei struct iovec *in_cursor, bool do_rss)
8438b98c15fSHawkins Jiawei {
8448b98c15fSHawkins Jiawei struct virtio_net_rss_config cfg = {};
8458b98c15fSHawkins Jiawei ssize_t r;
8468b98c15fSHawkins Jiawei g_autofree uint16_t *table = NULL;
8478b98c15fSHawkins Jiawei
8488b98c15fSHawkins Jiawei /*
8498b98c15fSHawkins Jiawei * According to VirtIO standard, "Initially the device has all hash
8508b98c15fSHawkins Jiawei * types disabled and reports only VIRTIO_NET_HASH_REPORT_NONE.".
8518b98c15fSHawkins Jiawei *
8528b98c15fSHawkins Jiawei * Therefore, there is no need to send this CVQ command if the
8538b98c15fSHawkins Jiawei * driver disables the all hash types, which aligns with
8548b98c15fSHawkins Jiawei * the device's defaults.
8558b98c15fSHawkins Jiawei *
8568b98c15fSHawkins Jiawei * Note that the device's defaults can mismatch the driver's
8578b98c15fSHawkins Jiawei * configuration only at live migration.
8588b98c15fSHawkins Jiawei */
8598b98c15fSHawkins Jiawei if (!n->rss_data.enabled ||
8608b98c15fSHawkins Jiawei n->rss_data.hash_types == VIRTIO_NET_HASH_REPORT_NONE) {
8618b98c15fSHawkins Jiawei return 0;
8628b98c15fSHawkins Jiawei }
8638b98c15fSHawkins Jiawei
8648b98c15fSHawkins Jiawei table = g_malloc_n(n->rss_data.indirections_len,
8658b98c15fSHawkins Jiawei sizeof(n->rss_data.indirections_table[0]));
8668b98c15fSHawkins Jiawei cfg.hash_types = cpu_to_le32(n->rss_data.hash_types);
8678b98c15fSHawkins Jiawei
868b3c09106SHawkins Jiawei if (do_rss) {
869b3c09106SHawkins Jiawei /*
870b3c09106SHawkins Jiawei * According to VirtIO standard, "Number of entries in indirection_table
871b3c09106SHawkins Jiawei * is (indirection_table_mask + 1)".
872b3c09106SHawkins Jiawei */
873b3c09106SHawkins Jiawei cfg.indirection_table_mask = cpu_to_le16(n->rss_data.indirections_len -
874b3c09106SHawkins Jiawei 1);
875b3c09106SHawkins Jiawei cfg.unclassified_queue = cpu_to_le16(n->rss_data.default_queue);
876b3c09106SHawkins Jiawei for (int i = 0; i < n->rss_data.indirections_len; ++i) {
877b3c09106SHawkins Jiawei table[i] = cpu_to_le16(n->rss_data.indirections_table[i]);
878b3c09106SHawkins Jiawei }
879b3c09106SHawkins Jiawei cfg.max_tx_vq = cpu_to_le16(n->curr_queue_pairs);
880b3c09106SHawkins Jiawei } else {
8818b98c15fSHawkins Jiawei /*
8828b98c15fSHawkins Jiawei * According to VirtIO standard, "Field reserved MUST contain zeroes.
8838b98c15fSHawkins Jiawei * It is defined to make the structure to match the layout of
8848b98c15fSHawkins Jiawei * virtio_net_rss_config structure, defined in 5.1.6.5.7.".
8858b98c15fSHawkins Jiawei *
8868b98c15fSHawkins Jiawei * Therefore, we need to zero the fields in
8878b98c15fSHawkins Jiawei * struct virtio_net_rss_config, which corresponds to the
8888b98c15fSHawkins Jiawei * `reserved` field in struct virtio_net_hash_config.
8898b98c15fSHawkins Jiawei *
8908b98c15fSHawkins Jiawei * Note that all other fields are zeroed at their definitions,
8918b98c15fSHawkins Jiawei * except for the `indirection_table` field, where the actual data
8928b98c15fSHawkins Jiawei * is stored in the `table` variable to ensure compatibility
8938b98c15fSHawkins Jiawei * with RSS case. Therefore, we need to zero the `table` variable here.
8948b98c15fSHawkins Jiawei */
8958b98c15fSHawkins Jiawei table[0] = 0;
896b3c09106SHawkins Jiawei }
8978b98c15fSHawkins Jiawei
8988b98c15fSHawkins Jiawei /*
8998b98c15fSHawkins Jiawei * Considering that virtio_net_handle_rss() currently does not restore
9008b98c15fSHawkins Jiawei * the hash key length parsed from the CVQ command sent from the guest
9018b98c15fSHawkins Jiawei * into n->rss_data and uses the maximum key length in other code, so
9028b98c15fSHawkins Jiawei * we also employ the maximum key length here.
9038b98c15fSHawkins Jiawei */
9048b98c15fSHawkins Jiawei cfg.hash_key_length = sizeof(n->rss_data.key);
9058b98c15fSHawkins Jiawei
9068b98c15fSHawkins Jiawei const struct iovec data[] = {
9078b98c15fSHawkins Jiawei {
9088b98c15fSHawkins Jiawei .iov_base = &cfg,
9098b98c15fSHawkins Jiawei .iov_len = offsetof(struct virtio_net_rss_config,
9108b98c15fSHawkins Jiawei indirection_table),
9118b98c15fSHawkins Jiawei }, {
9128b98c15fSHawkins Jiawei .iov_base = table,
9138b98c15fSHawkins Jiawei .iov_len = n->rss_data.indirections_len *
9148b98c15fSHawkins Jiawei sizeof(n->rss_data.indirections_table[0]),
9158b98c15fSHawkins Jiawei }, {
9168b98c15fSHawkins Jiawei .iov_base = &cfg.max_tx_vq,
9178b98c15fSHawkins Jiawei .iov_len = offsetof(struct virtio_net_rss_config, hash_key_data) -
9188b98c15fSHawkins Jiawei offsetof(struct virtio_net_rss_config, max_tx_vq),
9198b98c15fSHawkins Jiawei }, {
9208b98c15fSHawkins Jiawei .iov_base = (void *)n->rss_data.key,
9218b98c15fSHawkins Jiawei .iov_len = sizeof(n->rss_data.key),
9228b98c15fSHawkins Jiawei }
9238b98c15fSHawkins Jiawei };
9248b98c15fSHawkins Jiawei
9258b98c15fSHawkins Jiawei r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
9268b98c15fSHawkins Jiawei VIRTIO_NET_CTRL_MQ,
927b3c09106SHawkins Jiawei do_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG :
9288b98c15fSHawkins Jiawei VIRTIO_NET_CTRL_MQ_HASH_CONFIG,
9298b98c15fSHawkins Jiawei data, ARRAY_SIZE(data));
9308b98c15fSHawkins Jiawei if (unlikely(r < 0)) {
9318b98c15fSHawkins Jiawei return r;
9328b98c15fSHawkins Jiawei }
9338b98c15fSHawkins Jiawei
9348b98c15fSHawkins Jiawei return 0;
9358b98c15fSHawkins Jiawei }
9368b98c15fSHawkins Jiawei
vhost_vdpa_net_load_mq(VhostVDPAState * s,const VirtIONet * n,struct iovec * out_cursor,struct iovec * in_cursor)937f64c7cdaSEugenio Pérez static int vhost_vdpa_net_load_mq(VhostVDPAState *s,
9381d7e2a8fSHawkins Jiawei const VirtIONet *n,
9391d7e2a8fSHawkins Jiawei struct iovec *out_cursor,
9401d7e2a8fSHawkins Jiawei struct iovec *in_cursor)
941f64c7cdaSEugenio Pérez {
942f64c7cdaSEugenio Pérez struct virtio_net_ctrl_mq mq;
943acec5f68SHawkins Jiawei ssize_t r;
944f64c7cdaSEugenio Pérez
94502d3bf09SHawkins Jiawei if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_MQ)) {
946f64c7cdaSEugenio Pérez return 0;
947f64c7cdaSEugenio Pérez }
948f64c7cdaSEugenio Pérez
9491c4eab47SSi-Wei Liu trace_vhost_vdpa_net_load_mq(s, n->curr_queue_pairs);
9501c4eab47SSi-Wei Liu
951f64c7cdaSEugenio Pérez mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs);
9522848c6aaSHawkins Jiawei const struct iovec data = {
9532848c6aaSHawkins Jiawei .iov_base = &mq,
9542848c6aaSHawkins Jiawei .iov_len = sizeof(mq),
9552848c6aaSHawkins Jiawei };
956acec5f68SHawkins Jiawei r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
9571d7e2a8fSHawkins Jiawei VIRTIO_NET_CTRL_MQ,
9582848c6aaSHawkins Jiawei VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
9592848c6aaSHawkins Jiawei &data, 1);
960acec5f68SHawkins Jiawei if (unlikely(r < 0)) {
961acec5f68SHawkins Jiawei return r;
962f45fd95eSHawkins Jiawei }
963f64c7cdaSEugenio Pérez
964b3c09106SHawkins Jiawei if (virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_RSS)) {
965b3c09106SHawkins Jiawei /* load the receive-side scaling state */
966b3c09106SHawkins Jiawei r = vhost_vdpa_net_load_rss(s, n, out_cursor, in_cursor, true);
9678b98c15fSHawkins Jiawei if (unlikely(r < 0)) {
9688b98c15fSHawkins Jiawei return r;
9698b98c15fSHawkins Jiawei }
970b3c09106SHawkins Jiawei } else if (virtio_vdev_has_feature(&n->parent_obj,
971b3c09106SHawkins Jiawei VIRTIO_NET_F_HASH_REPORT)) {
972b3c09106SHawkins Jiawei /* load the hash calculation state */
973b3c09106SHawkins Jiawei r = vhost_vdpa_net_load_rss(s, n, out_cursor, in_cursor, false);
974b3c09106SHawkins Jiawei if (unlikely(r < 0)) {
975b3c09106SHawkins Jiawei return r;
976b3c09106SHawkins Jiawei }
977b3c09106SHawkins Jiawei }
9788b98c15fSHawkins Jiawei
979f45fd95eSHawkins Jiawei return 0;
980f64c7cdaSEugenio Pérez }
981f64c7cdaSEugenio Pérez
vhost_vdpa_net_load_offloads(VhostVDPAState * s,const VirtIONet * n,struct iovec * out_cursor,struct iovec * in_cursor)9820b58d368SHawkins Jiawei static int vhost_vdpa_net_load_offloads(VhostVDPAState *s,
9831d7e2a8fSHawkins Jiawei const VirtIONet *n,
9841d7e2a8fSHawkins Jiawei struct iovec *out_cursor,
9851d7e2a8fSHawkins Jiawei struct iovec *in_cursor)
9860b58d368SHawkins Jiawei {
9870b58d368SHawkins Jiawei uint64_t offloads;
988acec5f68SHawkins Jiawei ssize_t r;
9890b58d368SHawkins Jiawei
9900b58d368SHawkins Jiawei if (!virtio_vdev_has_feature(&n->parent_obj,
9910b58d368SHawkins Jiawei VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
9920b58d368SHawkins Jiawei return 0;
9930b58d368SHawkins Jiawei }
9940b58d368SHawkins Jiawei
9950b58d368SHawkins Jiawei if (n->curr_guest_offloads == virtio_net_supported_guest_offloads(n)) {
9960b58d368SHawkins Jiawei /*
9970b58d368SHawkins Jiawei * According to VirtIO standard, "Upon feature negotiation
9980b58d368SHawkins Jiawei * corresponding offload gets enabled to preserve
9990b58d368SHawkins Jiawei * backward compatibility.".
10000b58d368SHawkins Jiawei *
10010b58d368SHawkins Jiawei * Therefore, there is no need to send this CVQ command if the
10020b58d368SHawkins Jiawei * driver also enables all supported offloads, which aligns with
10030b58d368SHawkins Jiawei * the device's defaults.
10040b58d368SHawkins Jiawei *
10050b58d368SHawkins Jiawei * Note that the device's defaults can mismatch the driver's
10060b58d368SHawkins Jiawei * configuration only at live migration.
10070b58d368SHawkins Jiawei */
10080b58d368SHawkins Jiawei return 0;
10090b58d368SHawkins Jiawei }
10100b58d368SHawkins Jiawei
10110b58d368SHawkins Jiawei offloads = cpu_to_le64(n->curr_guest_offloads);
10122848c6aaSHawkins Jiawei const struct iovec data = {
10132848c6aaSHawkins Jiawei .iov_base = &offloads,
10142848c6aaSHawkins Jiawei .iov_len = sizeof(offloads),
10152848c6aaSHawkins Jiawei };
1016acec5f68SHawkins Jiawei r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
10171d7e2a8fSHawkins Jiawei VIRTIO_NET_CTRL_GUEST_OFFLOADS,
10180b58d368SHawkins Jiawei VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET,
10192848c6aaSHawkins Jiawei &data, 1);
1020acec5f68SHawkins Jiawei if (unlikely(r < 0)) {
1021acec5f68SHawkins Jiawei return r;
10226f348071SHawkins Jiawei }
10230b58d368SHawkins Jiawei
10246f348071SHawkins Jiawei return 0;
10250b58d368SHawkins Jiawei }
10260b58d368SHawkins Jiawei
vhost_vdpa_net_load_rx_mode(VhostVDPAState * s,struct iovec * out_cursor,struct iovec * in_cursor,uint8_t cmd,uint8_t on)1027b12f907eSHawkins Jiawei static int vhost_vdpa_net_load_rx_mode(VhostVDPAState *s,
10281d7e2a8fSHawkins Jiawei struct iovec *out_cursor,
10291d7e2a8fSHawkins Jiawei struct iovec *in_cursor,
1030b12f907eSHawkins Jiawei uint8_t cmd,
1031b12f907eSHawkins Jiawei uint8_t on)
1032b12f907eSHawkins Jiawei {
1033b12f907eSHawkins Jiawei const struct iovec data = {
1034b12f907eSHawkins Jiawei .iov_base = &on,
1035b12f907eSHawkins Jiawei .iov_len = sizeof(on),
1036b12f907eSHawkins Jiawei };
1037acec5f68SHawkins Jiawei ssize_t r;
103824e59cfeSHawkins Jiawei
1039acec5f68SHawkins Jiawei r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
1040acec5f68SHawkins Jiawei VIRTIO_NET_CTRL_RX, cmd, &data, 1);
1041acec5f68SHawkins Jiawei if (unlikely(r < 0)) {
1042acec5f68SHawkins Jiawei return r;
104324e59cfeSHawkins Jiawei }
104424e59cfeSHawkins Jiawei
104524e59cfeSHawkins Jiawei return 0;
1046b12f907eSHawkins Jiawei }
1047b12f907eSHawkins Jiawei
vhost_vdpa_net_load_rx(VhostVDPAState * s,const VirtIONet * n,struct iovec * out_cursor,struct iovec * in_cursor)1048b12f907eSHawkins Jiawei static int vhost_vdpa_net_load_rx(VhostVDPAState *s,
10491d7e2a8fSHawkins Jiawei const VirtIONet *n,
10501d7e2a8fSHawkins Jiawei struct iovec *out_cursor,
10511d7e2a8fSHawkins Jiawei struct iovec *in_cursor)
1052b12f907eSHawkins Jiawei {
105324e59cfeSHawkins Jiawei ssize_t r;
1054b12f907eSHawkins Jiawei
1055b12f907eSHawkins Jiawei if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_RX)) {
1056b12f907eSHawkins Jiawei return 0;
1057b12f907eSHawkins Jiawei }
1058b12f907eSHawkins Jiawei
1059b12f907eSHawkins Jiawei /*
1060b12f907eSHawkins Jiawei * According to virtio_net_reset(), device turns promiscuous mode
1061b12f907eSHawkins Jiawei * on by default.
1062b12f907eSHawkins Jiawei *
10630a19d879SMichael Tokarev * Additionally, according to VirtIO standard, "Since there are
1064b12f907eSHawkins Jiawei * no guarantees, it can use a hash filter or silently switch to
1065b12f907eSHawkins Jiawei * allmulti or promiscuous mode if it is given too many addresses.".
1066b12f907eSHawkins Jiawei * QEMU marks `n->mac_table.uni_overflow` if guest sets too many
1067b12f907eSHawkins Jiawei * non-multicast MAC addresses, indicating that promiscuous mode
1068b12f907eSHawkins Jiawei * should be enabled.
1069b12f907eSHawkins Jiawei *
1070b12f907eSHawkins Jiawei * Therefore, QEMU should only send this CVQ command if the
1071b12f907eSHawkins Jiawei * `n->mac_table.uni_overflow` is not marked and `n->promisc` is off,
1072b12f907eSHawkins Jiawei * which sets promiscuous mode on, different from the device's defaults.
1073b12f907eSHawkins Jiawei *
1074b12f907eSHawkins Jiawei * Note that the device's defaults can mismatch the driver's
1075b12f907eSHawkins Jiawei * configuration only at live migration.
1076b12f907eSHawkins Jiawei */
1077b12f907eSHawkins Jiawei if (!n->mac_table.uni_overflow && !n->promisc) {
10781d7e2a8fSHawkins Jiawei r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor,
1079b12f907eSHawkins Jiawei VIRTIO_NET_CTRL_RX_PROMISC, 0);
108024e59cfeSHawkins Jiawei if (unlikely(r < 0)) {
108124e59cfeSHawkins Jiawei return r;
1082b12f907eSHawkins Jiawei }
1083b12f907eSHawkins Jiawei }
1084b12f907eSHawkins Jiawei
1085b12f907eSHawkins Jiawei /*
1086b12f907eSHawkins Jiawei * According to virtio_net_reset(), device turns all-multicast mode
1087b12f907eSHawkins Jiawei * off by default.
1088b12f907eSHawkins Jiawei *
1089b12f907eSHawkins Jiawei * According to VirtIO standard, "Since there are no guarantees,
1090b12f907eSHawkins Jiawei * it can use a hash filter or silently switch to allmulti or
1091b12f907eSHawkins Jiawei * promiscuous mode if it is given too many addresses.". QEMU marks
1092b12f907eSHawkins Jiawei * `n->mac_table.multi_overflow` if guest sets too many
1093b12f907eSHawkins Jiawei * non-multicast MAC addresses.
1094b12f907eSHawkins Jiawei *
1095b12f907eSHawkins Jiawei * Therefore, QEMU should only send this CVQ command if the
1096b12f907eSHawkins Jiawei * `n->mac_table.multi_overflow` is marked or `n->allmulti` is on,
1097b12f907eSHawkins Jiawei * which sets all-multicast mode on, different from the device's defaults.
1098b12f907eSHawkins Jiawei *
1099b12f907eSHawkins Jiawei * Note that the device's defaults can mismatch the driver's
1100b12f907eSHawkins Jiawei * configuration only at live migration.
1101b12f907eSHawkins Jiawei */
1102b12f907eSHawkins Jiawei if (n->mac_table.multi_overflow || n->allmulti) {
11031d7e2a8fSHawkins Jiawei r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor,
1104b12f907eSHawkins Jiawei VIRTIO_NET_CTRL_RX_ALLMULTI, 1);
110524e59cfeSHawkins Jiawei if (unlikely(r < 0)) {
110624e59cfeSHawkins Jiawei return r;
1107b12f907eSHawkins Jiawei }
1108b12f907eSHawkins Jiawei }
1109b12f907eSHawkins Jiawei
11104fd180c7SHawkins Jiawei if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_RX_EXTRA)) {
11114fd180c7SHawkins Jiawei return 0;
11124fd180c7SHawkins Jiawei }
11134fd180c7SHawkins Jiawei
11144fd180c7SHawkins Jiawei /*
11154fd180c7SHawkins Jiawei * According to virtio_net_reset(), device turns all-unicast mode
11164fd180c7SHawkins Jiawei * off by default.
11174fd180c7SHawkins Jiawei *
11184fd180c7SHawkins Jiawei * Therefore, QEMU should only send this CVQ command if the driver
11194fd180c7SHawkins Jiawei * sets all-unicast mode on, different from the device's defaults.
11204fd180c7SHawkins Jiawei *
11214fd180c7SHawkins Jiawei * Note that the device's defaults can mismatch the driver's
11224fd180c7SHawkins Jiawei * configuration only at live migration.
11234fd180c7SHawkins Jiawei */
11244fd180c7SHawkins Jiawei if (n->alluni) {
11251d7e2a8fSHawkins Jiawei r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor,
11264fd180c7SHawkins Jiawei VIRTIO_NET_CTRL_RX_ALLUNI, 1);
112724e59cfeSHawkins Jiawei if (r < 0) {
112824e59cfeSHawkins Jiawei return r;
11294fd180c7SHawkins Jiawei }
11304fd180c7SHawkins Jiawei }
11314fd180c7SHawkins Jiawei
11324fd180c7SHawkins Jiawei /*
11334fd180c7SHawkins Jiawei * According to virtio_net_reset(), device turns non-multicast mode
11344fd180c7SHawkins Jiawei * off by default.
11354fd180c7SHawkins Jiawei *
11364fd180c7SHawkins Jiawei * Therefore, QEMU should only send this CVQ command if the driver
11374fd180c7SHawkins Jiawei * sets non-multicast mode on, different from the device's defaults.
11384fd180c7SHawkins Jiawei *
11394fd180c7SHawkins Jiawei * Note that the device's defaults can mismatch the driver's
11404fd180c7SHawkins Jiawei * configuration only at live migration.
11414fd180c7SHawkins Jiawei */
11424fd180c7SHawkins Jiawei if (n->nomulti) {
11431d7e2a8fSHawkins Jiawei r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor,
11444fd180c7SHawkins Jiawei VIRTIO_NET_CTRL_RX_NOMULTI, 1);
114524e59cfeSHawkins Jiawei if (r < 0) {
114624e59cfeSHawkins Jiawei return r;
11474fd180c7SHawkins Jiawei }
11484fd180c7SHawkins Jiawei }
11494fd180c7SHawkins Jiawei
11504fd180c7SHawkins Jiawei /*
11514fd180c7SHawkins Jiawei * According to virtio_net_reset(), device turns non-unicast mode
11524fd180c7SHawkins Jiawei * off by default.
11534fd180c7SHawkins Jiawei *
11544fd180c7SHawkins Jiawei * Therefore, QEMU should only send this CVQ command if the driver
11554fd180c7SHawkins Jiawei * sets non-unicast mode on, different from the device's defaults.
11564fd180c7SHawkins Jiawei *
11574fd180c7SHawkins Jiawei * Note that the device's defaults can mismatch the driver's
11584fd180c7SHawkins Jiawei * configuration only at live migration.
11594fd180c7SHawkins Jiawei */
11604fd180c7SHawkins Jiawei if (n->nouni) {
11611d7e2a8fSHawkins Jiawei r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor,
11624fd180c7SHawkins Jiawei VIRTIO_NET_CTRL_RX_NOUNI, 1);
116324e59cfeSHawkins Jiawei if (r < 0) {
116424e59cfeSHawkins Jiawei return r;
11654fd180c7SHawkins Jiawei }
11664fd180c7SHawkins Jiawei }
11674fd180c7SHawkins Jiawei
11684fd180c7SHawkins Jiawei /*
11694fd180c7SHawkins Jiawei * According to virtio_net_reset(), device turns non-broadcast mode
11704fd180c7SHawkins Jiawei * off by default.
11714fd180c7SHawkins Jiawei *
11724fd180c7SHawkins Jiawei * Therefore, QEMU should only send this CVQ command if the driver
11734fd180c7SHawkins Jiawei * sets non-broadcast mode on, different from the device's defaults.
11744fd180c7SHawkins Jiawei *
11754fd180c7SHawkins Jiawei * Note that the device's defaults can mismatch the driver's
11764fd180c7SHawkins Jiawei * configuration only at live migration.
11774fd180c7SHawkins Jiawei */
11784fd180c7SHawkins Jiawei if (n->nobcast) {
11791d7e2a8fSHawkins Jiawei r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor,
11804fd180c7SHawkins Jiawei VIRTIO_NET_CTRL_RX_NOBCAST, 1);
118124e59cfeSHawkins Jiawei if (r < 0) {
118224e59cfeSHawkins Jiawei return r;
11834fd180c7SHawkins Jiawei }
11844fd180c7SHawkins Jiawei }
11854fd180c7SHawkins Jiawei
1186b12f907eSHawkins Jiawei return 0;
1187b12f907eSHawkins Jiawei }
1188b12f907eSHawkins Jiawei
vhost_vdpa_net_load_single_vlan(VhostVDPAState * s,const VirtIONet * n,struct iovec * out_cursor,struct iovec * in_cursor,uint16_t vid)11898f7e9967SHawkins Jiawei static int vhost_vdpa_net_load_single_vlan(VhostVDPAState *s,
11908f7e9967SHawkins Jiawei const VirtIONet *n,
11911d7e2a8fSHawkins Jiawei struct iovec *out_cursor,
11921d7e2a8fSHawkins Jiawei struct iovec *in_cursor,
11938f7e9967SHawkins Jiawei uint16_t vid)
11948f7e9967SHawkins Jiawei {
11958f7e9967SHawkins Jiawei const struct iovec data = {
11968f7e9967SHawkins Jiawei .iov_base = &vid,
11978f7e9967SHawkins Jiawei .iov_len = sizeof(vid),
11988f7e9967SHawkins Jiawei };
1199acec5f68SHawkins Jiawei ssize_t r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
12001d7e2a8fSHawkins Jiawei VIRTIO_NET_CTRL_VLAN,
12018f7e9967SHawkins Jiawei VIRTIO_NET_CTRL_VLAN_ADD,
12028f7e9967SHawkins Jiawei &data, 1);
1203acec5f68SHawkins Jiawei if (unlikely(r < 0)) {
1204acec5f68SHawkins Jiawei return r;
12058f7e9967SHawkins Jiawei }
12068f7e9967SHawkins Jiawei
12078f7e9967SHawkins Jiawei return 0;
12088f7e9967SHawkins Jiawei }
12098f7e9967SHawkins Jiawei
vhost_vdpa_net_load_vlan(VhostVDPAState * s,const VirtIONet * n,struct iovec * out_cursor,struct iovec * in_cursor)12108f7e9967SHawkins Jiawei static int vhost_vdpa_net_load_vlan(VhostVDPAState *s,
12111d7e2a8fSHawkins Jiawei const VirtIONet *n,
12121d7e2a8fSHawkins Jiawei struct iovec *out_cursor,
12131d7e2a8fSHawkins Jiawei struct iovec *in_cursor)
12148f7e9967SHawkins Jiawei {
12158f7e9967SHawkins Jiawei int r;
12168f7e9967SHawkins Jiawei
12178f7e9967SHawkins Jiawei if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_VLAN)) {
12188f7e9967SHawkins Jiawei return 0;
12198f7e9967SHawkins Jiawei }
12208f7e9967SHawkins Jiawei
12218f7e9967SHawkins Jiawei for (int i = 0; i < MAX_VLAN >> 5; i++) {
12228f7e9967SHawkins Jiawei for (int j = 0; n->vlans[i] && j <= 0x1f; j++) {
12238f7e9967SHawkins Jiawei if (n->vlans[i] & (1U << j)) {
12241d7e2a8fSHawkins Jiawei r = vhost_vdpa_net_load_single_vlan(s, n, out_cursor,
12251d7e2a8fSHawkins Jiawei in_cursor, (i << 5) + j);
12268f7e9967SHawkins Jiawei if (unlikely(r != 0)) {
12278f7e9967SHawkins Jiawei return r;
12288f7e9967SHawkins Jiawei }
12298f7e9967SHawkins Jiawei }
12308f7e9967SHawkins Jiawei }
12318f7e9967SHawkins Jiawei }
12328f7e9967SHawkins Jiawei
12338f7e9967SHawkins Jiawei return 0;
12348f7e9967SHawkins Jiawei }
12358f7e9967SHawkins Jiawei
vhost_vdpa_net_cvq_load(NetClientState * nc)1236f3fada59SEugenio Pérez static int vhost_vdpa_net_cvq_load(NetClientState *nc)
1237dd036d8dSEugenio Pérez {
1238dd036d8dSEugenio Pérez VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
1239f73c0c43SEugenio Pérez struct vhost_vdpa *v = &s->vhost_vdpa;
1240dd036d8dSEugenio Pérez const VirtIONet *n;
1241f73c0c43SEugenio Pérez int r;
12421d7e2a8fSHawkins Jiawei struct iovec out_cursor, in_cursor;
1243dd036d8dSEugenio Pérez
1244dd036d8dSEugenio Pérez assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
1245dd036d8dSEugenio Pérez
12469d5a807cSStefano Garzarella r = vhost_vdpa_set_vring_ready(v, v->dev->vq_index);
12479d5a807cSStefano Garzarella if (unlikely(r < 0)) {
12489d5a807cSStefano Garzarella return r;
12499d5a807cSStefano Garzarella }
1250dd036d8dSEugenio Pérez
12516c482547SEugenio Pérez if (v->shadow_vqs_enabled) {
1252dd036d8dSEugenio Pérez n = VIRTIO_NET(v->dev->vdev);
12531d7e2a8fSHawkins Jiawei vhost_vdpa_net_load_cursor_reset(s, &out_cursor, &in_cursor);
12541d7e2a8fSHawkins Jiawei r = vhost_vdpa_net_load_mac(s, n, &out_cursor, &in_cursor);
1255f73c0c43SEugenio Pérez if (unlikely(r < 0)) {
1256f73c0c43SEugenio Pérez return r;
1257dd036d8dSEugenio Pérez }
12581d7e2a8fSHawkins Jiawei r = vhost_vdpa_net_load_mq(s, n, &out_cursor, &in_cursor);
1259f64c7cdaSEugenio Pérez if (unlikely(r)) {
1260f64c7cdaSEugenio Pérez return r;
1261f64c7cdaSEugenio Pérez }
12621d7e2a8fSHawkins Jiawei r = vhost_vdpa_net_load_offloads(s, n, &out_cursor, &in_cursor);
12630b58d368SHawkins Jiawei if (unlikely(r)) {
12640b58d368SHawkins Jiawei return r;
12650b58d368SHawkins Jiawei }
12661d7e2a8fSHawkins Jiawei r = vhost_vdpa_net_load_rx(s, n, &out_cursor, &in_cursor);
1267b12f907eSHawkins Jiawei if (unlikely(r)) {
1268b12f907eSHawkins Jiawei return r;
1269b12f907eSHawkins Jiawei }
12701d7e2a8fSHawkins Jiawei r = vhost_vdpa_net_load_vlan(s, n, &out_cursor, &in_cursor);
12718f7e9967SHawkins Jiawei if (unlikely(r)) {
12728f7e9967SHawkins Jiawei return r;
12738f7e9967SHawkins Jiawei }
1274acec5f68SHawkins Jiawei
1275acec5f68SHawkins Jiawei /*
1276acec5f68SHawkins Jiawei * We need to poll and check all pending device's used buffers.
1277acec5f68SHawkins Jiawei *
1278acec5f68SHawkins Jiawei * We can poll here since we've had BQL from the time
1279acec5f68SHawkins Jiawei * we sent the descriptor.
1280acec5f68SHawkins Jiawei */
1281acec5f68SHawkins Jiawei r = vhost_vdpa_net_svq_flush(s, in_cursor.iov_base - (void *)s->status);
1282fee364e4SHawkins Jiawei if (unlikely(r)) {
1283fee364e4SHawkins Jiawei return r;
1284fee364e4SHawkins Jiawei }
12856c482547SEugenio Pérez }
12866c482547SEugenio Pérez
12876c482547SEugenio Pérez for (int i = 0; i < v->dev->vq_index; ++i) {
12889d5a807cSStefano Garzarella r = vhost_vdpa_set_vring_ready(v, i);
12899d5a807cSStefano Garzarella if (unlikely(r < 0)) {
12909d5a807cSStefano Garzarella return r;
12919d5a807cSStefano Garzarella }
12926c482547SEugenio Pérez }
1293dd036d8dSEugenio Pérez
1294dd036d8dSEugenio Pérez return 0;
1295dd036d8dSEugenio Pérez }
1296dd036d8dSEugenio Pérez
1297f8972b56SEugenio Pérez static NetClientInfo net_vhost_vdpa_cvq_info = {
1298f8972b56SEugenio Pérez .type = NET_CLIENT_DRIVER_VHOST_VDPA,
1299f8972b56SEugenio Pérez .size = sizeof(VhostVDPAState),
1300f8972b56SEugenio Pérez .receive = vhost_vdpa_receive,
13017a7f87e9SEugenio Pérez .start = vhost_vdpa_net_cvq_start,
1302f3fada59SEugenio Pérez .load = vhost_vdpa_net_cvq_load,
13037a7f87e9SEugenio Pérez .stop = vhost_vdpa_net_cvq_stop,
1304f8972b56SEugenio Pérez .cleanup = vhost_vdpa_cleanup,
1305f8972b56SEugenio Pérez .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
1306f8972b56SEugenio Pérez .has_ufo = vhost_vdpa_has_ufo,
1307f8972b56SEugenio Pérez .check_peer_type = vhost_vdpa_check_peer_type,
1308d1fd2d31SHawkins Jiawei .set_steering_ebpf = vhost_vdpa_set_steering_ebpf,
1309f8972b56SEugenio Pérez };
1310f8972b56SEugenio Pérez
1311fee364e4SHawkins Jiawei /*
1312fee364e4SHawkins Jiawei * Forward the excessive VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command to
1313fee364e4SHawkins Jiawei * vdpa device.
1314fee364e4SHawkins Jiawei *
1315fee364e4SHawkins Jiawei * Considering that QEMU cannot send the entire filter table to the
1316fee364e4SHawkins Jiawei * vdpa device, it should send the VIRTIO_NET_CTRL_RX_PROMISC CVQ
1317fee364e4SHawkins Jiawei * command to enable promiscuous mode to receive all packets,
1318fee364e4SHawkins Jiawei * according to VirtIO standard, "Since there are no guarantees,
1319fee364e4SHawkins Jiawei * it can use a hash filter or silently switch to allmulti or
1320fee364e4SHawkins Jiawei * promiscuous mode if it is given too many addresses.".
1321fee364e4SHawkins Jiawei *
1322fee364e4SHawkins Jiawei * Since QEMU ignores MAC addresses beyond `MAC_TABLE_ENTRIES` and
1323fee364e4SHawkins Jiawei * marks `n->mac_table.x_overflow` accordingly, it should have
1324fee364e4SHawkins Jiawei * the same effect on the device model to receive
1325fee364e4SHawkins Jiawei * (`MAC_TABLE_ENTRIES` + 1) or more non-multicast MAC addresses.
1326fee364e4SHawkins Jiawei * The same applies to multicast MAC addresses.
1327fee364e4SHawkins Jiawei *
1328fee364e4SHawkins Jiawei * Therefore, QEMU can provide the device model with a fake
1329fee364e4SHawkins Jiawei * VIRTIO_NET_CTRL_MAC_TABLE_SET command with (`MAC_TABLE_ENTRIES` + 1)
1330fee364e4SHawkins Jiawei * non-multicast MAC addresses and (`MAC_TABLE_ENTRIES` + 1) multicast
1331fee364e4SHawkins Jiawei * MAC addresses. This ensures that the device model marks
1332fee364e4SHawkins Jiawei * `n->mac_table.uni_overflow` and `n->mac_table.multi_overflow`,
1333fee364e4SHawkins Jiawei * allowing all packets to be received, which aligns with the
1334fee364e4SHawkins Jiawei * state of the vdpa device.
1335fee364e4SHawkins Jiawei */
vhost_vdpa_net_excessive_mac_filter_cvq_add(VhostVDPAState * s,VirtQueueElement * elem,struct iovec * out,const struct iovec * in)1336fee364e4SHawkins Jiawei static int vhost_vdpa_net_excessive_mac_filter_cvq_add(VhostVDPAState *s,
1337fee364e4SHawkins Jiawei VirtQueueElement *elem,
1338327dedb8SHawkins Jiawei struct iovec *out,
1339327dedb8SHawkins Jiawei const struct iovec *in)
1340fee364e4SHawkins Jiawei {
1341fee364e4SHawkins Jiawei struct virtio_net_ctrl_mac mac_data, *mac_ptr;
1342fee364e4SHawkins Jiawei struct virtio_net_ctrl_hdr *hdr_ptr;
1343fee364e4SHawkins Jiawei uint32_t cursor;
1344fee364e4SHawkins Jiawei ssize_t r;
1345327dedb8SHawkins Jiawei uint8_t on = 1;
1346fee364e4SHawkins Jiawei
1347fee364e4SHawkins Jiawei /* parse the non-multicast MAC address entries from CVQ command */
1348fee364e4SHawkins Jiawei cursor = sizeof(*hdr_ptr);
1349fee364e4SHawkins Jiawei r = iov_to_buf(elem->out_sg, elem->out_num, cursor,
1350fee364e4SHawkins Jiawei &mac_data, sizeof(mac_data));
1351fee364e4SHawkins Jiawei if (unlikely(r != sizeof(mac_data))) {
1352fee364e4SHawkins Jiawei /*
1353fee364e4SHawkins Jiawei * If the CVQ command is invalid, we should simulate the vdpa device
1354fee364e4SHawkins Jiawei * to reject the VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command
1355fee364e4SHawkins Jiawei */
1356fee364e4SHawkins Jiawei *s->status = VIRTIO_NET_ERR;
1357fee364e4SHawkins Jiawei return sizeof(*s->status);
1358fee364e4SHawkins Jiawei }
1359fee364e4SHawkins Jiawei cursor += sizeof(mac_data) + le32_to_cpu(mac_data.entries) * ETH_ALEN;
1360fee364e4SHawkins Jiawei
1361fee364e4SHawkins Jiawei /* parse the multicast MAC address entries from CVQ command */
1362fee364e4SHawkins Jiawei r = iov_to_buf(elem->out_sg, elem->out_num, cursor,
1363fee364e4SHawkins Jiawei &mac_data, sizeof(mac_data));
1364fee364e4SHawkins Jiawei if (r != sizeof(mac_data)) {
1365fee364e4SHawkins Jiawei /*
1366fee364e4SHawkins Jiawei * If the CVQ command is invalid, we should simulate the vdpa device
1367fee364e4SHawkins Jiawei * to reject the VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command
1368fee364e4SHawkins Jiawei */
1369fee364e4SHawkins Jiawei *s->status = VIRTIO_NET_ERR;
1370fee364e4SHawkins Jiawei return sizeof(*s->status);
1371fee364e4SHawkins Jiawei }
1372fee364e4SHawkins Jiawei cursor += sizeof(mac_data) + le32_to_cpu(mac_data.entries) * ETH_ALEN;
1373fee364e4SHawkins Jiawei
1374fee364e4SHawkins Jiawei /* validate the CVQ command */
1375fee364e4SHawkins Jiawei if (iov_size(elem->out_sg, elem->out_num) != cursor) {
1376fee364e4SHawkins Jiawei /*
1377fee364e4SHawkins Jiawei * If the CVQ command is invalid, we should simulate the vdpa device
1378fee364e4SHawkins Jiawei * to reject the VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command
1379fee364e4SHawkins Jiawei */
1380fee364e4SHawkins Jiawei *s->status = VIRTIO_NET_ERR;
1381fee364e4SHawkins Jiawei return sizeof(*s->status);
1382fee364e4SHawkins Jiawei }
1383fee364e4SHawkins Jiawei
1384fee364e4SHawkins Jiawei /*
1385fee364e4SHawkins Jiawei * According to VirtIO standard, "Since there are no guarantees,
1386fee364e4SHawkins Jiawei * it can use a hash filter or silently switch to allmulti or
1387fee364e4SHawkins Jiawei * promiscuous mode if it is given too many addresses.".
1388fee364e4SHawkins Jiawei *
1389fee364e4SHawkins Jiawei * Therefore, considering that QEMU is unable to send the entire
1390fee364e4SHawkins Jiawei * filter table to the vdpa device, it should send the
1391fee364e4SHawkins Jiawei * VIRTIO_NET_CTRL_RX_PROMISC CVQ command to enable promiscuous mode
1392fee364e4SHawkins Jiawei */
1393327dedb8SHawkins Jiawei hdr_ptr = out->iov_base;
1394327dedb8SHawkins Jiawei out->iov_len = sizeof(*hdr_ptr) + sizeof(on);
1395327dedb8SHawkins Jiawei
1396327dedb8SHawkins Jiawei hdr_ptr->class = VIRTIO_NET_CTRL_RX;
1397327dedb8SHawkins Jiawei hdr_ptr->cmd = VIRTIO_NET_CTRL_RX_PROMISC;
1398327dedb8SHawkins Jiawei iov_from_buf(out, 1, sizeof(*hdr_ptr), &on, sizeof(on));
1399327dedb8SHawkins Jiawei r = vhost_vdpa_net_cvq_add(s, out, 1, in, 1);
1400fee364e4SHawkins Jiawei if (unlikely(r < 0)) {
1401fee364e4SHawkins Jiawei return r;
1402fee364e4SHawkins Jiawei }
1403a864a321SHawkins Jiawei
1404a864a321SHawkins Jiawei /*
1405a864a321SHawkins Jiawei * We can poll here since we've had BQL from the time
1406a864a321SHawkins Jiawei * we sent the descriptor.
1407a864a321SHawkins Jiawei */
1408a864a321SHawkins Jiawei r = vhost_vdpa_net_svq_poll(s, 1);
1409a864a321SHawkins Jiawei if (unlikely(r < sizeof(*s->status))) {
1410a864a321SHawkins Jiawei return r;
1411a864a321SHawkins Jiawei }
1412fee364e4SHawkins Jiawei if (*s->status != VIRTIO_NET_OK) {
1413fee364e4SHawkins Jiawei return sizeof(*s->status);
1414fee364e4SHawkins Jiawei }
1415fee364e4SHawkins Jiawei
1416fee364e4SHawkins Jiawei /*
1417fee364e4SHawkins Jiawei * QEMU should also send a fake VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ
1418fee364e4SHawkins Jiawei * command to the device model, including (`MAC_TABLE_ENTRIES` + 1)
1419fee364e4SHawkins Jiawei * non-multicast MAC addresses and (`MAC_TABLE_ENTRIES` + 1)
1420fee364e4SHawkins Jiawei * multicast MAC addresses.
1421fee364e4SHawkins Jiawei *
1422fee364e4SHawkins Jiawei * By doing so, the device model can mark `n->mac_table.uni_overflow`
1423fee364e4SHawkins Jiawei * and `n->mac_table.multi_overflow`, enabling all packets to be
1424fee364e4SHawkins Jiawei * received, which aligns with the state of the vdpa device.
1425fee364e4SHawkins Jiawei */
1426fee364e4SHawkins Jiawei cursor = 0;
1427fee364e4SHawkins Jiawei uint32_t fake_uni_entries = MAC_TABLE_ENTRIES + 1,
1428fee364e4SHawkins Jiawei fake_mul_entries = MAC_TABLE_ENTRIES + 1,
1429fee364e4SHawkins Jiawei fake_cvq_size = sizeof(struct virtio_net_ctrl_hdr) +
1430fee364e4SHawkins Jiawei sizeof(mac_data) + fake_uni_entries * ETH_ALEN +
1431fee364e4SHawkins Jiawei sizeof(mac_data) + fake_mul_entries * ETH_ALEN;
1432fee364e4SHawkins Jiawei
1433fee364e4SHawkins Jiawei assert(fake_cvq_size < vhost_vdpa_net_cvq_cmd_page_len());
1434fee364e4SHawkins Jiawei out->iov_len = fake_cvq_size;
1435fee364e4SHawkins Jiawei
1436fee364e4SHawkins Jiawei /* pack the header for fake CVQ command */
1437fee364e4SHawkins Jiawei hdr_ptr = out->iov_base + cursor;
1438fee364e4SHawkins Jiawei hdr_ptr->class = VIRTIO_NET_CTRL_MAC;
1439fee364e4SHawkins Jiawei hdr_ptr->cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
1440fee364e4SHawkins Jiawei cursor += sizeof(*hdr_ptr);
1441fee364e4SHawkins Jiawei
1442fee364e4SHawkins Jiawei /*
1443fee364e4SHawkins Jiawei * Pack the non-multicast MAC addresses part for fake CVQ command.
1444fee364e4SHawkins Jiawei *
1445fee364e4SHawkins Jiawei * According to virtio_net_handle_mac(), QEMU doesn't verify the MAC
14460a19d879SMichael Tokarev * addresses provided in CVQ command. Therefore, only the entries
1447fee364e4SHawkins Jiawei * field need to be prepared in the CVQ command.
1448fee364e4SHawkins Jiawei */
1449fee364e4SHawkins Jiawei mac_ptr = out->iov_base + cursor;
1450fee364e4SHawkins Jiawei mac_ptr->entries = cpu_to_le32(fake_uni_entries);
1451fee364e4SHawkins Jiawei cursor += sizeof(*mac_ptr) + fake_uni_entries * ETH_ALEN;
1452fee364e4SHawkins Jiawei
1453fee364e4SHawkins Jiawei /*
1454fee364e4SHawkins Jiawei * Pack the multicast MAC addresses part for fake CVQ command.
1455fee364e4SHawkins Jiawei *
1456fee364e4SHawkins Jiawei * According to virtio_net_handle_mac(), QEMU doesn't verify the MAC
14570a19d879SMichael Tokarev * addresses provided in CVQ command. Therefore, only the entries
1458fee364e4SHawkins Jiawei * field need to be prepared in the CVQ command.
1459fee364e4SHawkins Jiawei */
1460fee364e4SHawkins Jiawei mac_ptr = out->iov_base + cursor;
1461fee364e4SHawkins Jiawei mac_ptr->entries = cpu_to_le32(fake_mul_entries);
1462fee364e4SHawkins Jiawei
1463fee364e4SHawkins Jiawei /*
1464fee364e4SHawkins Jiawei * Simulating QEMU poll a vdpa device used buffer
1465fee364e4SHawkins Jiawei * for VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command
1466fee364e4SHawkins Jiawei */
1467fee364e4SHawkins Jiawei return sizeof(*s->status);
1468fee364e4SHawkins Jiawei }
1469fee364e4SHawkins Jiawei
14702df4dd31SEugenio Pérez /**
14712df4dd31SEugenio Pérez * Validate and copy control virtqueue commands.
14722df4dd31SEugenio Pérez *
14732df4dd31SEugenio Pérez * Following QEMU guidelines, we offer a copy of the buffers to the device to
14742df4dd31SEugenio Pérez * prevent TOCTOU bugs.
1475bd907ae4SEugenio Pérez */
vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue * svq,VirtQueueElement * elem,void * opaque)1476bd907ae4SEugenio Pérez static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
1477bd907ae4SEugenio Pérez VirtQueueElement *elem,
1478bd907ae4SEugenio Pérez void *opaque)
1479bd907ae4SEugenio Pérez {
14802df4dd31SEugenio Pérez VhostVDPAState *s = opaque;
1481be4278b6SEugenio Pérez size_t in_len;
148245c41018SHawkins Jiawei const struct virtio_net_ctrl_hdr *ctrl;
1483bd907ae4SEugenio Pérez virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
14847a7f87e9SEugenio Pérez /* Out buffer sent to both the vdpa device and the device model */
14857a7f87e9SEugenio Pérez struct iovec out = {
14867a7f87e9SEugenio Pérez .iov_base = s->cvq_cmd_out_buffer,
14877a7f87e9SEugenio Pérez };
14882df4dd31SEugenio Pérez /* in buffer used for device model */
14890e6bff0dSHawkins Jiawei const struct iovec model_in = {
14902df4dd31SEugenio Pérez .iov_base = &status,
14912df4dd31SEugenio Pérez .iov_len = sizeof(status),
14922df4dd31SEugenio Pérez };
14930e6bff0dSHawkins Jiawei /* in buffer used for vdpa device */
14940e6bff0dSHawkins Jiawei const struct iovec vdpa_in = {
14950e6bff0dSHawkins Jiawei .iov_base = s->status,
14960e6bff0dSHawkins Jiawei .iov_len = sizeof(*s->status),
14970e6bff0dSHawkins Jiawei };
1498be4278b6SEugenio Pérez ssize_t dev_written = -EINVAL;
1499bd907ae4SEugenio Pérez
15007a7f87e9SEugenio Pérez out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0,
15017a7f87e9SEugenio Pérez s->cvq_cmd_out_buffer,
1502fee364e4SHawkins Jiawei vhost_vdpa_net_cvq_cmd_page_len());
150345c41018SHawkins Jiawei
150445c41018SHawkins Jiawei ctrl = s->cvq_cmd_out_buffer;
150545c41018SHawkins Jiawei if (ctrl->class == VIRTIO_NET_CTRL_ANNOUNCE) {
15063f9a3eebSEugenio Pérez /*
15073f9a3eebSEugenio Pérez * Guest announce capability is emulated by qemu, so don't forward to
15083f9a3eebSEugenio Pérez * the device.
15093f9a3eebSEugenio Pérez */
15103f9a3eebSEugenio Pérez dev_written = sizeof(status);
15113f9a3eebSEugenio Pérez *s->status = VIRTIO_NET_OK;
1512fee364e4SHawkins Jiawei } else if (unlikely(ctrl->class == VIRTIO_NET_CTRL_MAC &&
1513fee364e4SHawkins Jiawei ctrl->cmd == VIRTIO_NET_CTRL_MAC_TABLE_SET &&
1514fee364e4SHawkins Jiawei iov_size(elem->out_sg, elem->out_num) > out.iov_len)) {
1515fee364e4SHawkins Jiawei /*
1516fee364e4SHawkins Jiawei * Due to the size limitation of the out buffer sent to the vdpa device,
1517fee364e4SHawkins Jiawei * which is determined by vhost_vdpa_net_cvq_cmd_page_len(), excessive
1518fee364e4SHawkins Jiawei * MAC addresses set by the driver for the filter table can cause
1519fee364e4SHawkins Jiawei * truncation of the CVQ command in QEMU. As a result, the vdpa device
1520fee364e4SHawkins Jiawei * rejects the flawed CVQ command.
1521fee364e4SHawkins Jiawei *
1522fee364e4SHawkins Jiawei * Therefore, QEMU must handle this situation instead of sending
15230a19d879SMichael Tokarev * the CVQ command directly.
1524fee364e4SHawkins Jiawei */
1525fee364e4SHawkins Jiawei dev_written = vhost_vdpa_net_excessive_mac_filter_cvq_add(s, elem,
1526327dedb8SHawkins Jiawei &out, &vdpa_in);
1527fee364e4SHawkins Jiawei if (unlikely(dev_written < 0)) {
1528fee364e4SHawkins Jiawei goto out;
1529fee364e4SHawkins Jiawei }
15303f9a3eebSEugenio Pérez } else {
1531a864a321SHawkins Jiawei ssize_t r;
1532a864a321SHawkins Jiawei r = vhost_vdpa_net_cvq_add(s, &out, 1, &vdpa_in, 1);
1533a864a321SHawkins Jiawei if (unlikely(r < 0)) {
1534a864a321SHawkins Jiawei dev_written = r;
1535bd907ae4SEugenio Pérez goto out;
1536bd907ae4SEugenio Pérez }
1537a864a321SHawkins Jiawei
1538a864a321SHawkins Jiawei /*
1539a864a321SHawkins Jiawei * We can poll here since we've had BQL from the time
1540a864a321SHawkins Jiawei * we sent the descriptor.
1541a864a321SHawkins Jiawei */
1542a864a321SHawkins Jiawei dev_written = vhost_vdpa_net_svq_poll(s, 1);
15433f9a3eebSEugenio Pérez }
1544bd907ae4SEugenio Pérez
1545bd907ae4SEugenio Pérez if (unlikely(dev_written < sizeof(status))) {
1546bd907ae4SEugenio Pérez error_report("Insufficient written data (%zu)", dev_written);
15472df4dd31SEugenio Pérez goto out;
15482df4dd31SEugenio Pérez }
15492df4dd31SEugenio Pérez
155017fb889fSEugenio Pérez if (*s->status != VIRTIO_NET_OK) {
1551d45243bcSEugenio Pérez goto out;
15522df4dd31SEugenio Pérez }
15532df4dd31SEugenio Pérez
15542df4dd31SEugenio Pérez status = VIRTIO_NET_ERR;
15550e6bff0dSHawkins Jiawei virtio_net_handle_ctrl_iov(svq->vdev, &model_in, 1, &out, 1);
15562df4dd31SEugenio Pérez if (status != VIRTIO_NET_OK) {
15572df4dd31SEugenio Pérez error_report("Bad CVQ processing in model");
1558bd907ae4SEugenio Pérez }
1559bd907ae4SEugenio Pérez
1560bd907ae4SEugenio Pérez out:
1561bd907ae4SEugenio Pérez in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status,
1562bd907ae4SEugenio Pérez sizeof(status));
1563bd907ae4SEugenio Pérez if (unlikely(in_len < sizeof(status))) {
1564bd907ae4SEugenio Pérez error_report("Bad device CVQ written length");
1565bd907ae4SEugenio Pérez }
1566bd907ae4SEugenio Pérez vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
1567031b1abaSHawkins Jiawei /*
1568031b1abaSHawkins Jiawei * `elem` belongs to vhost_vdpa_net_handle_ctrl_avail() only when
1569031b1abaSHawkins Jiawei * the function successfully forwards the CVQ command, indicated
1570031b1abaSHawkins Jiawei * by a non-negative value of `dev_written`. Otherwise, it still
1571031b1abaSHawkins Jiawei * belongs to SVQ.
1572031b1abaSHawkins Jiawei * This function should only free the `elem` when it owns.
1573031b1abaSHawkins Jiawei */
1574031b1abaSHawkins Jiawei if (dev_written >= 0) {
1575bd907ae4SEugenio Pérez g_free(elem);
1576031b1abaSHawkins Jiawei }
1577be4278b6SEugenio Pérez return dev_written < 0 ? dev_written : 0;
1578bd907ae4SEugenio Pérez }
1579bd907ae4SEugenio Pérez
1580bd907ae4SEugenio Pérez static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
1581bd907ae4SEugenio Pérez .avail_handler = vhost_vdpa_net_handle_ctrl_avail,
1582bd907ae4SEugenio Pérez };
1583bd907ae4SEugenio Pérez
1584152128d6SEugenio Pérez /**
1585152128d6SEugenio Pérez * Probe if CVQ is isolated
1586152128d6SEugenio Pérez *
1587152128d6SEugenio Pérez * @device_fd The vdpa device fd
1588152128d6SEugenio Pérez * @features Features offered by the device.
1589152128d6SEugenio Pérez * @cvq_index The control vq pair index
1590152128d6SEugenio Pérez *
1591152128d6SEugenio Pérez * Returns <0 in case of failure, 0 if false and 1 if true.
1592152128d6SEugenio Pérez */
vhost_vdpa_probe_cvq_isolation(int device_fd,uint64_t features,int cvq_index,Error ** errp)1593152128d6SEugenio Pérez static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features,
1594152128d6SEugenio Pérez int cvq_index, Error **errp)
1595152128d6SEugenio Pérez {
159646ff64a8SZhao Liu ERRP_GUARD();
1597152128d6SEugenio Pérez uint64_t backend_features;
1598152128d6SEugenio Pérez int64_t cvq_group;
1599152128d6SEugenio Pérez uint8_t status = VIRTIO_CONFIG_S_ACKNOWLEDGE |
1600845ec38aSEugenio Pérez VIRTIO_CONFIG_S_DRIVER;
1601152128d6SEugenio Pérez int r;
1602152128d6SEugenio Pérez
1603152128d6SEugenio Pérez r = ioctl(device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features);
1604152128d6SEugenio Pérez if (unlikely(r < 0)) {
1605152128d6SEugenio Pérez error_setg_errno(errp, errno, "Cannot get vdpa backend_features");
1606152128d6SEugenio Pérez return r;
1607152128d6SEugenio Pérez }
1608152128d6SEugenio Pérez
1609152128d6SEugenio Pérez if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) {
1610152128d6SEugenio Pérez return 0;
1611152128d6SEugenio Pérez }
1612152128d6SEugenio Pérez
1613845ec38aSEugenio Pérez r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
1614152128d6SEugenio Pérez if (unlikely(r)) {
1615845ec38aSEugenio Pérez error_setg_errno(errp, -r, "Cannot set device status");
1616f1085882SEugenio Pérez goto out;
1617152128d6SEugenio Pérez }
1618152128d6SEugenio Pérez
1619845ec38aSEugenio Pérez r = ioctl(device_fd, VHOST_SET_FEATURES, &features);
1620845ec38aSEugenio Pérez if (unlikely(r)) {
1621845ec38aSEugenio Pérez error_setg_errno(errp, -r, "Cannot set features");
1622845ec38aSEugenio Pérez goto out;
1623845ec38aSEugenio Pérez }
1624845ec38aSEugenio Pérez
1625845ec38aSEugenio Pérez status |= VIRTIO_CONFIG_S_FEATURES_OK;
1626152128d6SEugenio Pérez r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
1627152128d6SEugenio Pérez if (unlikely(r)) {
1628845ec38aSEugenio Pérez error_setg_errno(errp, -r, "Cannot set device status");
1629152128d6SEugenio Pérez goto out;
1630152128d6SEugenio Pérez }
1631152128d6SEugenio Pérez
1632152128d6SEugenio Pérez cvq_group = vhost_vdpa_get_vring_group(device_fd, cvq_index, errp);
1633152128d6SEugenio Pérez if (unlikely(cvq_group < 0)) {
1634152128d6SEugenio Pérez if (cvq_group != -ENOTSUP) {
1635152128d6SEugenio Pérez r = cvq_group;
1636152128d6SEugenio Pérez goto out;
1637152128d6SEugenio Pérez }
1638152128d6SEugenio Pérez
1639152128d6SEugenio Pérez /*
1640152128d6SEugenio Pérez * The kernel report VHOST_BACKEND_F_IOTLB_ASID if the vdpa frontend
1641152128d6SEugenio Pérez * support ASID even if the parent driver does not. The CVQ cannot be
1642152128d6SEugenio Pérez * isolated in this case.
1643152128d6SEugenio Pérez */
1644152128d6SEugenio Pérez error_free(*errp);
1645152128d6SEugenio Pérez *errp = NULL;
1646152128d6SEugenio Pérez r = 0;
1647152128d6SEugenio Pérez goto out;
1648152128d6SEugenio Pérez }
1649152128d6SEugenio Pérez
1650152128d6SEugenio Pérez for (int i = 0; i < cvq_index; ++i) {
1651152128d6SEugenio Pérez int64_t group = vhost_vdpa_get_vring_group(device_fd, i, errp);
1652152128d6SEugenio Pérez if (unlikely(group < 0)) {
1653152128d6SEugenio Pérez r = group;
1654152128d6SEugenio Pérez goto out;
1655152128d6SEugenio Pérez }
1656152128d6SEugenio Pérez
1657152128d6SEugenio Pérez if (group == (int64_t)cvq_group) {
1658152128d6SEugenio Pérez r = 0;
1659152128d6SEugenio Pérez goto out;
1660152128d6SEugenio Pérez }
1661152128d6SEugenio Pérez }
1662152128d6SEugenio Pérez
1663152128d6SEugenio Pérez r = 1;
1664152128d6SEugenio Pérez
1665152128d6SEugenio Pérez out:
1666152128d6SEugenio Pérez status = 0;
1667152128d6SEugenio Pérez ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
1668152128d6SEugenio Pérez return r;
1669152128d6SEugenio Pérez }
1670152128d6SEugenio Pérez
net_vhost_vdpa_init(NetClientState * peer,const char * device,const char * name,int vdpa_device_fd,int queue_pair_index,int nvqs,bool is_datapath,bool svq,struct vhost_vdpa_iova_range iova_range,uint64_t features,VhostVDPAShared * shared,Error ** errp)1671654790b6SJason Wang static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
1672654790b6SJason Wang const char *device,
1673654790b6SJason Wang const char *name,
167440237840SJason Wang int vdpa_device_fd,
167540237840SJason Wang int queue_pair_index,
167640237840SJason Wang int nvqs,
16771576dbb5SEugenio Pérez bool is_datapath,
16781576dbb5SEugenio Pérez bool svq,
16795c1ebd4cSEugenio Pérez struct vhost_vdpa_iova_range iova_range,
1680152128d6SEugenio Pérez uint64_t features,
16818c5e9809SEugenio Pérez VhostVDPAShared *shared,
1682152128d6SEugenio Pérez Error **errp)
16831e0a84eaSCindy Lu {
16841e0a84eaSCindy Lu NetClientState *nc = NULL;
16851e0a84eaSCindy Lu VhostVDPAState *s;
16861e0a84eaSCindy Lu int ret = 0;
16871e0a84eaSCindy Lu assert(name);
1688e77db790SStefan Hajnoczi int cvq_isolated = 0;
1689152128d6SEugenio Pérez
169040237840SJason Wang if (is_datapath) {
169140237840SJason Wang nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device,
169240237840SJason Wang name);
169340237840SJason Wang } else {
1694152128d6SEugenio Pérez cvq_isolated = vhost_vdpa_probe_cvq_isolation(vdpa_device_fd, features,
1695152128d6SEugenio Pérez queue_pair_index * 2,
1696152128d6SEugenio Pérez errp);
1697152128d6SEugenio Pérez if (unlikely(cvq_isolated < 0)) {
1698152128d6SEugenio Pérez return NULL;
1699152128d6SEugenio Pérez }
1700152128d6SEugenio Pérez
1701f8972b56SEugenio Pérez nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer,
170240237840SJason Wang device, name);
170340237840SJason Wang }
170453b85d95SLaurent Vivier qemu_set_info_str(nc, TYPE_VHOST_VDPA);
17051e0a84eaSCindy Lu s = DO_UPCAST(VhostVDPAState, nc, nc);
17067327813dSJason Wang
170740237840SJason Wang s->vhost_vdpa.index = queue_pair_index;
17087f211a28SEugenio Pérez s->always_svq = svq;
1709d9cda213SSteve Sistare s->migration_state.notify = NULL;
17101576dbb5SEugenio Pérez s->vhost_vdpa.shadow_vqs_enabled = svq;
17115c1ebd4cSEugenio Pérez if (queue_pair_index == 0) {
17125c1ebd4cSEugenio Pérez vhost_vdpa_net_valid_svq_features(features,
17135c1ebd4cSEugenio Pérez &s->vhost_vdpa.migration_blocker);
17148c5e9809SEugenio Pérez s->vhost_vdpa.shared = g_new0(VhostVDPAShared, 1);
1715f12b2498SEugenio Pérez s->vhost_vdpa.shared->device_fd = vdpa_device_fd;
1716ae25ff41SEugenio Pérez s->vhost_vdpa.shared->iova_range = iova_range;
1717a6e823d4SEugenio Pérez s->vhost_vdpa.shared->shadow_data = svq;
17185c1ebd4cSEugenio Pérez } else if (!is_datapath) {
1719babf8b87SEugenio Pérez s->cvq_cmd_out_buffer = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(),
1720babf8b87SEugenio Pérez PROT_READ | PROT_WRITE,
1721babf8b87SEugenio Pérez MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1722babf8b87SEugenio Pérez s->status = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(),
1723babf8b87SEugenio Pérez PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS,
1724babf8b87SEugenio Pérez -1, 0);
17252df4dd31SEugenio Pérez
1726bd907ae4SEugenio Pérez s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
1727bd907ae4SEugenio Pérez s->vhost_vdpa.shadow_vq_ops_opaque = s;
1728152128d6SEugenio Pérez s->cvq_isolated = cvq_isolated;
17298bc0049eSEugenio Pérez }
17308c5e9809SEugenio Pérez if (queue_pair_index != 0) {
17318c5e9809SEugenio Pérez s->vhost_vdpa.shared = shared;
17328c5e9809SEugenio Pérez }
17338c5e9809SEugenio Pérez
173440237840SJason Wang ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
173574af5eecSJason Wang if (ret) {
173674af5eecSJason Wang qemu_del_net_client(nc);
1737654790b6SJason Wang return NULL;
173874af5eecSJason Wang }
17398c5e9809SEugenio Pérez
1740654790b6SJason Wang return nc;
17411e0a84eaSCindy Lu }
17421e0a84eaSCindy Lu
vhost_vdpa_get_features(int fd,uint64_t * features,Error ** errp)17438170ab3fSEugenio Pérez static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
17448170ab3fSEugenio Pérez {
17458170ab3fSEugenio Pérez int ret = ioctl(fd, VHOST_GET_FEATURES, features);
17468170ab3fSEugenio Pérez if (unlikely(ret < 0)) {
17478170ab3fSEugenio Pérez error_setg_errno(errp, errno,
17488170ab3fSEugenio Pérez "Fail to query features from vhost-vDPA device");
17498170ab3fSEugenio Pérez }
17508170ab3fSEugenio Pérez return ret;
17518170ab3fSEugenio Pérez }
17528170ab3fSEugenio Pérez
vhost_vdpa_get_max_queue_pairs(int fd,uint64_t features,int * has_cvq,Error ** errp)17538170ab3fSEugenio Pérez static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features,
17548170ab3fSEugenio Pérez int *has_cvq, Error **errp)
175540237840SJason Wang {
175640237840SJason Wang unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
1757cd523a41SStefano Garzarella g_autofree struct vhost_vdpa_config *config = NULL;
175840237840SJason Wang __virtio16 *max_queue_pairs;
175940237840SJason Wang int ret;
176040237840SJason Wang
176140237840SJason Wang if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) {
176240237840SJason Wang *has_cvq = 1;
176340237840SJason Wang } else {
176440237840SJason Wang *has_cvq = 0;
176540237840SJason Wang }
176640237840SJason Wang
176740237840SJason Wang if (features & (1 << VIRTIO_NET_F_MQ)) {
176840237840SJason Wang config = g_malloc0(config_size + sizeof(*max_queue_pairs));
176940237840SJason Wang config->off = offsetof(struct virtio_net_config, max_virtqueue_pairs);
177040237840SJason Wang config->len = sizeof(*max_queue_pairs);
177140237840SJason Wang
177240237840SJason Wang ret = ioctl(fd, VHOST_VDPA_GET_CONFIG, config);
177340237840SJason Wang if (ret) {
177440237840SJason Wang error_setg(errp, "Fail to get config from vhost-vDPA device");
177540237840SJason Wang return -ret;
177640237840SJason Wang }
177740237840SJason Wang
177840237840SJason Wang max_queue_pairs = (__virtio16 *)&config->buf;
177940237840SJason Wang
178040237840SJason Wang return lduw_le_p(max_queue_pairs);
178140237840SJason Wang }
178240237840SJason Wang
178340237840SJason Wang return 1;
178440237840SJason Wang }
178540237840SJason Wang
net_init_vhost_vdpa(const Netdev * netdev,const char * name,NetClientState * peer,Error ** errp)17861e0a84eaSCindy Lu int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
17871e0a84eaSCindy Lu NetClientState *peer, Error **errp)
17881e0a84eaSCindy Lu {
17893dd5fc53SZhao Liu ERRP_GUARD();
17901e0a84eaSCindy Lu const NetdevVhostVDPAOptions *opts;
17918170ab3fSEugenio Pérez uint64_t features;
1792654790b6SJason Wang int vdpa_device_fd;
1793eb3cb751SEugenio Pérez g_autofree NetClientState **ncs = NULL;
1794a585fad2SEugenio Pérez struct vhost_vdpa_iova_range iova_range;
1795eb3cb751SEugenio Pérez NetClientState *nc;
1796aed5da45SEugenio Pérez int queue_pairs, r, i = 0, has_cvq = 0;
17971e0a84eaSCindy Lu
17981e0a84eaSCindy Lu assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
17991e0a84eaSCindy Lu opts = &netdev->u.vhost_vdpa;
18007480874aSMarkus Armbruster if (!opts->vhostdev && !opts->vhostfd) {
18018801ccd0SSi-Wei Liu error_setg(errp,
18028801ccd0SSi-Wei Liu "vhost-vdpa: neither vhostdev= nor vhostfd= was specified");
1803c8295404SEugenio Pérez return -1;
1804c8295404SEugenio Pérez }
18057327813dSJason Wang
18067480874aSMarkus Armbruster if (opts->vhostdev && opts->vhostfd) {
18078801ccd0SSi-Wei Liu error_setg(errp,
18088801ccd0SSi-Wei Liu "vhost-vdpa: vhostdev= and vhostfd= are mutually exclusive");
18098801ccd0SSi-Wei Liu return -1;
18108801ccd0SSi-Wei Liu }
18118801ccd0SSi-Wei Liu
18127480874aSMarkus Armbruster if (opts->vhostdev) {
18130351152bSEugenio Pérez vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
18147327813dSJason Wang if (vdpa_device_fd == -1) {
18157327813dSJason Wang return -errno;
18167327813dSJason Wang }
18175107fd3eSPeter Maydell } else {
18185107fd3eSPeter Maydell /* has_vhostfd */
18198801ccd0SSi-Wei Liu vdpa_device_fd = monitor_fd_param(monitor_cur(), opts->vhostfd, errp);
18208801ccd0SSi-Wei Liu if (vdpa_device_fd == -1) {
18218801ccd0SSi-Wei Liu error_prepend(errp, "vhost-vdpa: unable to parse vhostfd: ");
18228801ccd0SSi-Wei Liu return -1;
18238801ccd0SSi-Wei Liu }
18248801ccd0SSi-Wei Liu }
18257327813dSJason Wang
18268170ab3fSEugenio Pérez r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
18278170ab3fSEugenio Pérez if (unlikely(r < 0)) {
1828aed5da45SEugenio Pérez goto err;
18298170ab3fSEugenio Pérez }
18308170ab3fSEugenio Pérez
18318170ab3fSEugenio Pérez queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features,
183240237840SJason Wang &has_cvq, errp);
183340237840SJason Wang if (queue_pairs < 0) {
18347327813dSJason Wang qemu_close(vdpa_device_fd);
183540237840SJason Wang return queue_pairs;
18367327813dSJason Wang }
18377327813dSJason Wang
1838bf7a2ad8SLongpeng r = vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range);
1839bf7a2ad8SLongpeng if (unlikely(r < 0)) {
1840bf7a2ad8SLongpeng error_setg(errp, "vhost-vdpa: get iova range failed: %s",
1841bf7a2ad8SLongpeng strerror(-r));
1842bf7a2ad8SLongpeng goto err;
1843bf7a2ad8SLongpeng }
1844bf7a2ad8SLongpeng
184500ef422eSEugenio Pérez if (opts->x_svq && !vhost_vdpa_net_valid_svq_features(features, errp)) {
184600ef422eSEugenio Pérez goto err;
18471576dbb5SEugenio Pérez }
18481576dbb5SEugenio Pérez
184940237840SJason Wang ncs = g_malloc0(sizeof(*ncs) * queue_pairs);
185040237840SJason Wang
185140237840SJason Wang for (i = 0; i < queue_pairs; i++) {
18528c5e9809SEugenio Pérez VhostVDPAShared *shared = NULL;
18538c5e9809SEugenio Pérez
18548c5e9809SEugenio Pérez if (i) {
18558c5e9809SEugenio Pérez shared = DO_UPCAST(VhostVDPAState, nc, ncs[0])->vhost_vdpa.shared;
18568c5e9809SEugenio Pérez }
185740237840SJason Wang ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
18581576dbb5SEugenio Pérez vdpa_device_fd, i, 2, true, opts->x_svq,
18598c5e9809SEugenio Pérez iova_range, features, shared, errp);
186040237840SJason Wang if (!ncs[i])
186140237840SJason Wang goto err;
186240237840SJason Wang }
186340237840SJason Wang
186440237840SJason Wang if (has_cvq) {
18658c5e9809SEugenio Pérez VhostVDPAState *s0 = DO_UPCAST(VhostVDPAState, nc, ncs[0]);
18668c5e9809SEugenio Pérez VhostVDPAShared *shared = s0->vhost_vdpa.shared;
18678c5e9809SEugenio Pérez
186840237840SJason Wang nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
18691576dbb5SEugenio Pérez vdpa_device_fd, i, 1, false,
18708c5e9809SEugenio Pérez opts->x_svq, iova_range, features, shared,
18718c5e9809SEugenio Pérez errp);
187240237840SJason Wang if (!nc)
187340237840SJason Wang goto err;
187440237840SJason Wang }
187540237840SJason Wang
1876654790b6SJason Wang return 0;
187740237840SJason Wang
187840237840SJason Wang err:
187940237840SJason Wang if (i) {
18809bd05507SSi-Wei Liu for (i--; i >= 0; i--) {
18819bd05507SSi-Wei Liu qemu_del_net_client(ncs[i]);
18829bd05507SSi-Wei Liu }
188340237840SJason Wang }
18841576dbb5SEugenio Pérez
188540237840SJason Wang qemu_close(vdpa_device_fd);
188640237840SJason Wang
188740237840SJason Wang return -1;
18881e0a84eaSCindy Lu }
1889