xref: /openbmc/qemu/net/vhost-vdpa.c (revision 2df4dd31)
11e0a84eaSCindy Lu /*
21e0a84eaSCindy Lu  * vhost-vdpa.c
31e0a84eaSCindy Lu  *
41e0a84eaSCindy Lu  * Copyright(c) 2017-2018 Intel Corporation.
51e0a84eaSCindy Lu  * Copyright(c) 2020 Red Hat, Inc.
61e0a84eaSCindy Lu  *
71e0a84eaSCindy Lu  * This work is licensed under the terms of the GNU GPL, version 2 or later.
81e0a84eaSCindy Lu  * See the COPYING file in the top-level directory.
91e0a84eaSCindy Lu  *
101e0a84eaSCindy Lu  */
111e0a84eaSCindy Lu 
121e0a84eaSCindy Lu #include "qemu/osdep.h"
131e0a84eaSCindy Lu #include "clients.h"
14bd907ae4SEugenio Pérez #include "hw/virtio/virtio-net.h"
151e0a84eaSCindy Lu #include "net/vhost_net.h"
161e0a84eaSCindy Lu #include "net/vhost-vdpa.h"
171e0a84eaSCindy Lu #include "hw/virtio/vhost-vdpa.h"
181e0a84eaSCindy Lu #include "qemu/config-file.h"
191e0a84eaSCindy Lu #include "qemu/error-report.h"
20bd907ae4SEugenio Pérez #include "qemu/log.h"
21bd907ae4SEugenio Pérez #include "qemu/memalign.h"
221e0a84eaSCindy Lu #include "qemu/option.h"
231e0a84eaSCindy Lu #include "qapi/error.h"
2440237840SJason Wang #include <linux/vhost.h>
251e0a84eaSCindy Lu #include <sys/ioctl.h>
261e0a84eaSCindy Lu #include <err.h>
271e0a84eaSCindy Lu #include "standard-headers/linux/virtio_net.h"
281e0a84eaSCindy Lu #include "monitor/monitor.h"
291e0a84eaSCindy Lu #include "hw/virtio/vhost.h"
301e0a84eaSCindy Lu 
311e0a84eaSCindy Lu /* Todo:need to add the multiqueue support here */
321e0a84eaSCindy Lu typedef struct VhostVDPAState {
331e0a84eaSCindy Lu     NetClientState nc;
341e0a84eaSCindy Lu     struct vhost_vdpa vhost_vdpa;
351e0a84eaSCindy Lu     VHostNetState *vhost_net;
36*2df4dd31SEugenio Pérez 
37*2df4dd31SEugenio Pérez     /* Control commands shadow buffers */
38*2df4dd31SEugenio Pérez     void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer;
391e0a84eaSCindy Lu     bool started;
401e0a84eaSCindy Lu } VhostVDPAState;
411e0a84eaSCindy Lu 
421e0a84eaSCindy Lu const int vdpa_feature_bits[] = {
431e0a84eaSCindy Lu     VIRTIO_F_NOTIFY_ON_EMPTY,
441e0a84eaSCindy Lu     VIRTIO_RING_F_INDIRECT_DESC,
451e0a84eaSCindy Lu     VIRTIO_RING_F_EVENT_IDX,
461e0a84eaSCindy Lu     VIRTIO_F_ANY_LAYOUT,
471e0a84eaSCindy Lu     VIRTIO_F_VERSION_1,
481e0a84eaSCindy Lu     VIRTIO_NET_F_CSUM,
491e0a84eaSCindy Lu     VIRTIO_NET_F_GUEST_CSUM,
501e0a84eaSCindy Lu     VIRTIO_NET_F_GSO,
511e0a84eaSCindy Lu     VIRTIO_NET_F_GUEST_TSO4,
521e0a84eaSCindy Lu     VIRTIO_NET_F_GUEST_TSO6,
531e0a84eaSCindy Lu     VIRTIO_NET_F_GUEST_ECN,
541e0a84eaSCindy Lu     VIRTIO_NET_F_GUEST_UFO,
551e0a84eaSCindy Lu     VIRTIO_NET_F_HOST_TSO4,
561e0a84eaSCindy Lu     VIRTIO_NET_F_HOST_TSO6,
571e0a84eaSCindy Lu     VIRTIO_NET_F_HOST_ECN,
581e0a84eaSCindy Lu     VIRTIO_NET_F_HOST_UFO,
591e0a84eaSCindy Lu     VIRTIO_NET_F_MRG_RXBUF,
601e0a84eaSCindy Lu     VIRTIO_NET_F_MTU,
6140237840SJason Wang     VIRTIO_NET_F_CTRL_RX,
6240237840SJason Wang     VIRTIO_NET_F_CTRL_RX_EXTRA,
6340237840SJason Wang     VIRTIO_NET_F_CTRL_VLAN,
6440237840SJason Wang     VIRTIO_NET_F_GUEST_ANNOUNCE,
6540237840SJason Wang     VIRTIO_NET_F_CTRL_MAC_ADDR,
6640237840SJason Wang     VIRTIO_NET_F_RSS,
6740237840SJason Wang     VIRTIO_NET_F_MQ,
6840237840SJason Wang     VIRTIO_NET_F_CTRL_VQ,
691e0a84eaSCindy Lu     VIRTIO_F_IOMMU_PLATFORM,
701e0a84eaSCindy Lu     VIRTIO_F_RING_PACKED,
710145c393SAndrew Melnychenko     VIRTIO_NET_F_RSS,
720145c393SAndrew Melnychenko     VIRTIO_NET_F_HASH_REPORT,
731e0a84eaSCindy Lu     VIRTIO_NET_F_GUEST_ANNOUNCE,
749aa47eddSSi-Wei Liu     VIRTIO_NET_F_STATUS,
751e0a84eaSCindy Lu     VHOST_INVALID_FEATURE_BIT
761e0a84eaSCindy Lu };
771e0a84eaSCindy Lu 
781e0a84eaSCindy Lu VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
791e0a84eaSCindy Lu {
801e0a84eaSCindy Lu     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
811e0a84eaSCindy Lu     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
821e0a84eaSCindy Lu     return s->vhost_net;
831e0a84eaSCindy Lu }
841e0a84eaSCindy Lu 
851e0a84eaSCindy Lu static int vhost_vdpa_net_check_device_id(struct vhost_net *net)
861e0a84eaSCindy Lu {
871e0a84eaSCindy Lu     uint32_t device_id;
881e0a84eaSCindy Lu     int ret;
891e0a84eaSCindy Lu     struct vhost_dev *hdev;
901e0a84eaSCindy Lu 
911e0a84eaSCindy Lu     hdev = (struct vhost_dev *)&net->dev;
921e0a84eaSCindy Lu     ret = hdev->vhost_ops->vhost_get_device_id(hdev, &device_id);
931e0a84eaSCindy Lu     if (device_id != VIRTIO_ID_NET) {
941e0a84eaSCindy Lu         return -ENOTSUP;
951e0a84eaSCindy Lu     }
961e0a84eaSCindy Lu     return ret;
971e0a84eaSCindy Lu }
981e0a84eaSCindy Lu 
9940237840SJason Wang static int vhost_vdpa_add(NetClientState *ncs, void *be,
10040237840SJason Wang                           int queue_pair_index, int nvqs)
1011e0a84eaSCindy Lu {
1021e0a84eaSCindy Lu     VhostNetOptions options;
1031e0a84eaSCindy Lu     struct vhost_net *net = NULL;
1041e0a84eaSCindy Lu     VhostVDPAState *s;
1051e0a84eaSCindy Lu     int ret;
1061e0a84eaSCindy Lu 
1071e0a84eaSCindy Lu     options.backend_type = VHOST_BACKEND_TYPE_VDPA;
1081e0a84eaSCindy Lu     assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
1091e0a84eaSCindy Lu     s = DO_UPCAST(VhostVDPAState, nc, ncs);
1101e0a84eaSCindy Lu     options.net_backend = ncs;
1111e0a84eaSCindy Lu     options.opaque      = be;
1121e0a84eaSCindy Lu     options.busyloop_timeout = 0;
11340237840SJason Wang     options.nvqs = nvqs;
1141e0a84eaSCindy Lu 
1151e0a84eaSCindy Lu     net = vhost_net_init(&options);
1161e0a84eaSCindy Lu     if (!net) {
1171e0a84eaSCindy Lu         error_report("failed to init vhost_net for queue");
118a97ef87aSJason Wang         goto err_init;
1191e0a84eaSCindy Lu     }
1201e0a84eaSCindy Lu     s->vhost_net = net;
1211e0a84eaSCindy Lu     ret = vhost_vdpa_net_check_device_id(net);
1221e0a84eaSCindy Lu     if (ret) {
123a97ef87aSJason Wang         goto err_check;
1241e0a84eaSCindy Lu     }
1251e0a84eaSCindy Lu     return 0;
126a97ef87aSJason Wang err_check:
1271e0a84eaSCindy Lu     vhost_net_cleanup(net);
128ab36edcfSJason Wang     g_free(net);
129a97ef87aSJason Wang err_init:
1301e0a84eaSCindy Lu     return -1;
1311e0a84eaSCindy Lu }
1321e0a84eaSCindy Lu 
1331e0a84eaSCindy Lu static void vhost_vdpa_cleanup(NetClientState *nc)
1341e0a84eaSCindy Lu {
1351e0a84eaSCindy Lu     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
1361e0a84eaSCindy Lu 
137*2df4dd31SEugenio Pérez     qemu_vfree(s->cvq_cmd_out_buffer);
138*2df4dd31SEugenio Pérez     qemu_vfree(s->cvq_cmd_in_buffer);
1391e0a84eaSCindy Lu     if (s->vhost_net) {
1401e0a84eaSCindy Lu         vhost_net_cleanup(s->vhost_net);
1411e0a84eaSCindy Lu         g_free(s->vhost_net);
1421e0a84eaSCindy Lu         s->vhost_net = NULL;
1431e0a84eaSCindy Lu     }
14457b3a7d8SCindy Lu      if (s->vhost_vdpa.device_fd >= 0) {
14557b3a7d8SCindy Lu         qemu_close(s->vhost_vdpa.device_fd);
14657b3a7d8SCindy Lu         s->vhost_vdpa.device_fd = -1;
14757b3a7d8SCindy Lu     }
1481e0a84eaSCindy Lu }
1491e0a84eaSCindy Lu 
1501e0a84eaSCindy Lu static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc)
1511e0a84eaSCindy Lu {
1521e0a84eaSCindy Lu     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
1531e0a84eaSCindy Lu 
1541e0a84eaSCindy Lu     return true;
1551e0a84eaSCindy Lu }
1561e0a84eaSCindy Lu 
1571e0a84eaSCindy Lu static bool vhost_vdpa_has_ufo(NetClientState *nc)
1581e0a84eaSCindy Lu {
1591e0a84eaSCindy Lu     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
1601e0a84eaSCindy Lu     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
1611e0a84eaSCindy Lu     uint64_t features = 0;
1621e0a84eaSCindy Lu     features |= (1ULL << VIRTIO_NET_F_HOST_UFO);
1631e0a84eaSCindy Lu     features = vhost_net_get_features(s->vhost_net, features);
1641e0a84eaSCindy Lu     return !!(features & (1ULL << VIRTIO_NET_F_HOST_UFO));
1651e0a84eaSCindy Lu 
1661e0a84eaSCindy Lu }
1671e0a84eaSCindy Lu 
168ee8a1c63SKevin Wolf static bool vhost_vdpa_check_peer_type(NetClientState *nc, ObjectClass *oc,
169ee8a1c63SKevin Wolf                                        Error **errp)
170ee8a1c63SKevin Wolf {
171ee8a1c63SKevin Wolf     const char *driver = object_class_get_name(oc);
172ee8a1c63SKevin Wolf 
173ee8a1c63SKevin Wolf     if (!g_str_has_prefix(driver, "virtio-net-")) {
174ee8a1c63SKevin Wolf         error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*");
175ee8a1c63SKevin Wolf         return false;
176ee8a1c63SKevin Wolf     }
177ee8a1c63SKevin Wolf 
178ee8a1c63SKevin Wolf     return true;
179ee8a1c63SKevin Wolf }
180ee8a1c63SKevin Wolf 
181846a1e85SEugenio Pérez /** Dummy receive in case qemu falls back to userland tap networking */
182846a1e85SEugenio Pérez static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf,
183846a1e85SEugenio Pérez                                   size_t size)
184846a1e85SEugenio Pérez {
185846a1e85SEugenio Pérez     return 0;
186846a1e85SEugenio Pérez }
187846a1e85SEugenio Pérez 
1881e0a84eaSCindy Lu static NetClientInfo net_vhost_vdpa_info = {
1891e0a84eaSCindy Lu         .type = NET_CLIENT_DRIVER_VHOST_VDPA,
1901e0a84eaSCindy Lu         .size = sizeof(VhostVDPAState),
191846a1e85SEugenio Pérez         .receive = vhost_vdpa_receive,
1921e0a84eaSCindy Lu         .cleanup = vhost_vdpa_cleanup,
1931e0a84eaSCindy Lu         .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
1941e0a84eaSCindy Lu         .has_ufo = vhost_vdpa_has_ufo,
195ee8a1c63SKevin Wolf         .check_peer_type = vhost_vdpa_check_peer_type,
1961e0a84eaSCindy Lu };
1971e0a84eaSCindy Lu 
198*2df4dd31SEugenio Pérez static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
199*2df4dd31SEugenio Pérez {
200*2df4dd31SEugenio Pérez     VhostIOVATree *tree = v->iova_tree;
201*2df4dd31SEugenio Pérez     DMAMap needle = {
202*2df4dd31SEugenio Pérez         /*
203*2df4dd31SEugenio Pérez          * No need to specify size or to look for more translations since
204*2df4dd31SEugenio Pérez          * this contiguous chunk was allocated by us.
205*2df4dd31SEugenio Pérez          */
206*2df4dd31SEugenio Pérez         .translated_addr = (hwaddr)(uintptr_t)addr,
207*2df4dd31SEugenio Pérez     };
208*2df4dd31SEugenio Pérez     const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
209*2df4dd31SEugenio Pérez     int r;
210*2df4dd31SEugenio Pérez 
211*2df4dd31SEugenio Pérez     if (unlikely(!map)) {
212*2df4dd31SEugenio Pérez         error_report("Cannot locate expected map");
213*2df4dd31SEugenio Pérez         return;
214*2df4dd31SEugenio Pérez     }
215*2df4dd31SEugenio Pérez 
216*2df4dd31SEugenio Pérez     r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1);
217*2df4dd31SEugenio Pérez     if (unlikely(r != 0)) {
218*2df4dd31SEugenio Pérez         error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
219*2df4dd31SEugenio Pérez     }
220*2df4dd31SEugenio Pérez 
221*2df4dd31SEugenio Pérez     vhost_iova_tree_remove(tree, map);
222*2df4dd31SEugenio Pérez }
223*2df4dd31SEugenio Pérez 
224*2df4dd31SEugenio Pérez static size_t vhost_vdpa_net_cvq_cmd_len(void)
225*2df4dd31SEugenio Pérez {
226*2df4dd31SEugenio Pérez     /*
227*2df4dd31SEugenio Pérez      * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
228*2df4dd31SEugenio Pérez      * In buffer is always 1 byte, so it should fit here
229*2df4dd31SEugenio Pérez      */
230*2df4dd31SEugenio Pérez     return sizeof(struct virtio_net_ctrl_hdr) +
231*2df4dd31SEugenio Pérez            2 * sizeof(struct virtio_net_ctrl_mac) +
232*2df4dd31SEugenio Pérez            MAC_TABLE_ENTRIES * ETH_ALEN;
233*2df4dd31SEugenio Pérez }
234*2df4dd31SEugenio Pérez 
235*2df4dd31SEugenio Pérez static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
236*2df4dd31SEugenio Pérez {
237*2df4dd31SEugenio Pérez     return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
238*2df4dd31SEugenio Pérez }
239*2df4dd31SEugenio Pérez 
240*2df4dd31SEugenio Pérez /** Copy and map a guest buffer. */
241*2df4dd31SEugenio Pérez static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v,
242*2df4dd31SEugenio Pérez                                    const struct iovec *out_data,
243*2df4dd31SEugenio Pérez                                    size_t out_num, size_t data_len, void *buf,
244*2df4dd31SEugenio Pérez                                    size_t *written, bool write)
245*2df4dd31SEugenio Pérez {
246*2df4dd31SEugenio Pérez     DMAMap map = {};
247*2df4dd31SEugenio Pérez     int r;
248*2df4dd31SEugenio Pérez 
249*2df4dd31SEugenio Pérez     if (unlikely(!data_len)) {
250*2df4dd31SEugenio Pérez         qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n",
251*2df4dd31SEugenio Pérez                       __func__, write ? "in" : "out");
252*2df4dd31SEugenio Pérez         return false;
253*2df4dd31SEugenio Pérez     }
254*2df4dd31SEugenio Pérez 
255*2df4dd31SEugenio Pérez     *written = iov_to_buf(out_data, out_num, 0, buf, data_len);
256*2df4dd31SEugenio Pérez     map.translated_addr = (hwaddr)(uintptr_t)buf;
257*2df4dd31SEugenio Pérez     map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1;
258*2df4dd31SEugenio Pérez     map.perm = write ? IOMMU_RW : IOMMU_RO,
259*2df4dd31SEugenio Pérez     r = vhost_iova_tree_map_alloc(v->iova_tree, &map);
260*2df4dd31SEugenio Pérez     if (unlikely(r != IOVA_OK)) {
261*2df4dd31SEugenio Pérez         error_report("Cannot map injected element");
262*2df4dd31SEugenio Pérez         return false;
263*2df4dd31SEugenio Pérez     }
264*2df4dd31SEugenio Pérez 
265*2df4dd31SEugenio Pérez     r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf,
266*2df4dd31SEugenio Pérez                            !write);
267*2df4dd31SEugenio Pérez     if (unlikely(r < 0)) {
268*2df4dd31SEugenio Pérez         goto dma_map_err;
269*2df4dd31SEugenio Pérez     }
270*2df4dd31SEugenio Pérez 
271*2df4dd31SEugenio Pérez     return true;
272*2df4dd31SEugenio Pérez 
273*2df4dd31SEugenio Pérez dma_map_err:
274*2df4dd31SEugenio Pérez     vhost_iova_tree_remove(v->iova_tree, &map);
275*2df4dd31SEugenio Pérez     return false;
276*2df4dd31SEugenio Pérez }
277*2df4dd31SEugenio Pérez 
278bd907ae4SEugenio Pérez /**
279*2df4dd31SEugenio Pérez  * Copy the guest element into a dedicated buffer suitable to be sent to NIC
280*2df4dd31SEugenio Pérez  *
281*2df4dd31SEugenio Pérez  * @iov: [0] is the out buffer, [1] is the in one
282*2df4dd31SEugenio Pérez  */
283*2df4dd31SEugenio Pérez static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s,
284*2df4dd31SEugenio Pérez                                         VirtQueueElement *elem,
285*2df4dd31SEugenio Pérez                                         struct iovec *iov)
286*2df4dd31SEugenio Pérez {
287*2df4dd31SEugenio Pérez     size_t in_copied;
288*2df4dd31SEugenio Pérez     bool ok;
289*2df4dd31SEugenio Pérez 
290*2df4dd31SEugenio Pérez     iov[0].iov_base = s->cvq_cmd_out_buffer;
291*2df4dd31SEugenio Pérez     ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg, elem->out_num,
292*2df4dd31SEugenio Pérez                                 vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base,
293*2df4dd31SEugenio Pérez                                 &iov[0].iov_len, false);
294*2df4dd31SEugenio Pérez     if (unlikely(!ok)) {
295*2df4dd31SEugenio Pérez         return false;
296*2df4dd31SEugenio Pérez     }
297*2df4dd31SEugenio Pérez 
298*2df4dd31SEugenio Pérez     iov[1].iov_base = s->cvq_cmd_in_buffer;
299*2df4dd31SEugenio Pérez     ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, NULL, 0,
300*2df4dd31SEugenio Pérez                                 sizeof(virtio_net_ctrl_ack), iov[1].iov_base,
301*2df4dd31SEugenio Pérez                                 &in_copied, true);
302*2df4dd31SEugenio Pérez     if (unlikely(!ok)) {
303*2df4dd31SEugenio Pérez         vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
304*2df4dd31SEugenio Pérez         return false;
305*2df4dd31SEugenio Pérez     }
306*2df4dd31SEugenio Pérez 
307*2df4dd31SEugenio Pérez     iov[1].iov_len = sizeof(virtio_net_ctrl_ack);
308*2df4dd31SEugenio Pérez     return true;
309*2df4dd31SEugenio Pérez }
310*2df4dd31SEugenio Pérez 
311*2df4dd31SEugenio Pérez /**
312*2df4dd31SEugenio Pérez  * Do not forward commands not supported by SVQ. Otherwise, the device could
313*2df4dd31SEugenio Pérez  * accept it and qemu would not know how to update the device model.
314*2df4dd31SEugenio Pérez  */
315*2df4dd31SEugenio Pérez static bool vhost_vdpa_net_cvq_validate_cmd(const struct iovec *out,
316*2df4dd31SEugenio Pérez                                             size_t out_num)
317*2df4dd31SEugenio Pérez {
318*2df4dd31SEugenio Pérez     struct virtio_net_ctrl_hdr ctrl;
319*2df4dd31SEugenio Pérez     size_t n;
320*2df4dd31SEugenio Pérez 
321*2df4dd31SEugenio Pérez     n = iov_to_buf(out, out_num, 0, &ctrl, sizeof(ctrl));
322*2df4dd31SEugenio Pérez     if (unlikely(n < sizeof(ctrl))) {
323*2df4dd31SEugenio Pérez         qemu_log_mask(LOG_GUEST_ERROR,
324*2df4dd31SEugenio Pérez                       "%s: invalid legnth of out buffer %zu\n", __func__, n);
325*2df4dd31SEugenio Pérez         return false;
326*2df4dd31SEugenio Pérez     }
327*2df4dd31SEugenio Pérez 
328*2df4dd31SEugenio Pérez     switch (ctrl.class) {
329*2df4dd31SEugenio Pérez     case VIRTIO_NET_CTRL_MAC:
330*2df4dd31SEugenio Pérez         switch (ctrl.cmd) {
331*2df4dd31SEugenio Pérez         case VIRTIO_NET_CTRL_MAC_ADDR_SET:
332*2df4dd31SEugenio Pérez             return true;
333*2df4dd31SEugenio Pérez         default:
334*2df4dd31SEugenio Pérez             qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mac cmd %u\n",
335*2df4dd31SEugenio Pérez                           __func__, ctrl.cmd);
336*2df4dd31SEugenio Pérez         };
337*2df4dd31SEugenio Pérez         break;
338*2df4dd31SEugenio Pérez     default:
339*2df4dd31SEugenio Pérez         qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid control class %u\n",
340*2df4dd31SEugenio Pérez                       __func__, ctrl.class);
341*2df4dd31SEugenio Pérez     };
342*2df4dd31SEugenio Pérez 
343*2df4dd31SEugenio Pérez     return false;
344*2df4dd31SEugenio Pérez }
345*2df4dd31SEugenio Pérez 
346*2df4dd31SEugenio Pérez /**
347*2df4dd31SEugenio Pérez  * Validate and copy control virtqueue commands.
348*2df4dd31SEugenio Pérez  *
349*2df4dd31SEugenio Pérez  * Following QEMU guidelines, we offer a copy of the buffers to the device to
350*2df4dd31SEugenio Pérez  * prevent TOCTOU bugs.
351bd907ae4SEugenio Pérez  */
352bd907ae4SEugenio Pérez static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
353bd907ae4SEugenio Pérez                                             VirtQueueElement *elem,
354bd907ae4SEugenio Pérez                                             void *opaque)
355bd907ae4SEugenio Pérez {
356*2df4dd31SEugenio Pérez     VhostVDPAState *s = opaque;
357bd907ae4SEugenio Pérez     size_t in_len, dev_written;
358bd907ae4SEugenio Pérez     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
359*2df4dd31SEugenio Pérez     /* out and in buffers sent to the device */
360*2df4dd31SEugenio Pérez     struct iovec dev_buffers[2] = {
361*2df4dd31SEugenio Pérez         { .iov_base = s->cvq_cmd_out_buffer },
362*2df4dd31SEugenio Pérez         { .iov_base = s->cvq_cmd_in_buffer },
363*2df4dd31SEugenio Pérez     };
364*2df4dd31SEugenio Pérez     /* in buffer used for device model */
365*2df4dd31SEugenio Pérez     const struct iovec in = {
366*2df4dd31SEugenio Pérez         .iov_base = &status,
367*2df4dd31SEugenio Pérez         .iov_len = sizeof(status),
368*2df4dd31SEugenio Pérez     };
369*2df4dd31SEugenio Pérez     int r = -EINVAL;
370*2df4dd31SEugenio Pérez     bool ok;
371bd907ae4SEugenio Pérez 
372*2df4dd31SEugenio Pérez     ok = vhost_vdpa_net_cvq_map_elem(s, elem, dev_buffers);
373*2df4dd31SEugenio Pérez     if (unlikely(!ok)) {
374*2df4dd31SEugenio Pérez         goto out;
375*2df4dd31SEugenio Pérez     }
376bd907ae4SEugenio Pérez 
377*2df4dd31SEugenio Pérez     ok = vhost_vdpa_net_cvq_validate_cmd(&dev_buffers[0], 1);
378*2df4dd31SEugenio Pérez     if (unlikely(!ok)) {
379*2df4dd31SEugenio Pérez         goto out;
380*2df4dd31SEugenio Pérez     }
381*2df4dd31SEugenio Pérez 
382*2df4dd31SEugenio Pérez     r = vhost_svq_add(svq, &dev_buffers[0], 1, &dev_buffers[1], 1, elem);
383bd907ae4SEugenio Pérez     if (unlikely(r != 0)) {
384bd907ae4SEugenio Pérez         if (unlikely(r == -ENOSPC)) {
385bd907ae4SEugenio Pérez             qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
386bd907ae4SEugenio Pérez                           __func__);
387bd907ae4SEugenio Pérez         }
388bd907ae4SEugenio Pérez         goto out;
389bd907ae4SEugenio Pérez     }
390bd907ae4SEugenio Pérez 
391bd907ae4SEugenio Pérez     /*
392bd907ae4SEugenio Pérez      * We can poll here since we've had BQL from the time we sent the
393bd907ae4SEugenio Pérez      * descriptor. Also, we need to take the answer before SVQ pulls by itself,
394bd907ae4SEugenio Pérez      * when BQL is released
395bd907ae4SEugenio Pérez      */
396bd907ae4SEugenio Pérez     dev_written = vhost_svq_poll(svq);
397bd907ae4SEugenio Pérez     if (unlikely(dev_written < sizeof(status))) {
398bd907ae4SEugenio Pérez         error_report("Insufficient written data (%zu)", dev_written);
399*2df4dd31SEugenio Pérez         goto out;
400*2df4dd31SEugenio Pérez     }
401*2df4dd31SEugenio Pérez 
402*2df4dd31SEugenio Pérez     memcpy(&status, dev_buffers[1].iov_base, sizeof(status));
403*2df4dd31SEugenio Pérez     if (status != VIRTIO_NET_OK) {
404*2df4dd31SEugenio Pérez         goto out;
405*2df4dd31SEugenio Pérez     }
406*2df4dd31SEugenio Pérez 
407*2df4dd31SEugenio Pérez     status = VIRTIO_NET_ERR;
408*2df4dd31SEugenio Pérez     virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, dev_buffers, 1);
409*2df4dd31SEugenio Pérez     if (status != VIRTIO_NET_OK) {
410*2df4dd31SEugenio Pérez         error_report("Bad CVQ processing in model");
411bd907ae4SEugenio Pérez     }
412bd907ae4SEugenio Pérez 
413bd907ae4SEugenio Pérez out:
414bd907ae4SEugenio Pérez     in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status,
415bd907ae4SEugenio Pérez                           sizeof(status));
416bd907ae4SEugenio Pérez     if (unlikely(in_len < sizeof(status))) {
417bd907ae4SEugenio Pérez         error_report("Bad device CVQ written length");
418bd907ae4SEugenio Pérez     }
419bd907ae4SEugenio Pérez     vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
420bd907ae4SEugenio Pérez     g_free(elem);
421*2df4dd31SEugenio Pérez     if (dev_buffers[0].iov_base) {
422*2df4dd31SEugenio Pérez         vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[0].iov_base);
423*2df4dd31SEugenio Pérez     }
424*2df4dd31SEugenio Pérez     if (dev_buffers[1].iov_base) {
425*2df4dd31SEugenio Pérez         vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[1].iov_base);
426*2df4dd31SEugenio Pérez     }
427bd907ae4SEugenio Pérez     return r;
428bd907ae4SEugenio Pérez }
429bd907ae4SEugenio Pérez 
430bd907ae4SEugenio Pérez static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
431bd907ae4SEugenio Pérez     .avail_handler = vhost_vdpa_net_handle_ctrl_avail,
432bd907ae4SEugenio Pérez };
433bd907ae4SEugenio Pérez 
434654790b6SJason Wang static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
435654790b6SJason Wang                                            const char *device,
436654790b6SJason Wang                                            const char *name,
43740237840SJason Wang                                            int vdpa_device_fd,
43840237840SJason Wang                                            int queue_pair_index,
43940237840SJason Wang                                            int nvqs,
44040237840SJason Wang                                            bool is_datapath)
4411e0a84eaSCindy Lu {
4421e0a84eaSCindy Lu     NetClientState *nc = NULL;
4431e0a84eaSCindy Lu     VhostVDPAState *s;
4441e0a84eaSCindy Lu     int ret = 0;
4451e0a84eaSCindy Lu     assert(name);
44640237840SJason Wang     if (is_datapath) {
44740237840SJason Wang         nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device,
44840237840SJason Wang                                  name);
44940237840SJason Wang     } else {
45040237840SJason Wang         nc = qemu_new_net_control_client(&net_vhost_vdpa_info, peer,
45140237840SJason Wang                                          device, name);
45240237840SJason Wang     }
45356e6f594SJason Wang     snprintf(nc->info_str, sizeof(nc->info_str), TYPE_VHOST_VDPA);
4541e0a84eaSCindy Lu     s = DO_UPCAST(VhostVDPAState, nc, nc);
4557327813dSJason Wang 
4561e0a84eaSCindy Lu     s->vhost_vdpa.device_fd = vdpa_device_fd;
45740237840SJason Wang     s->vhost_vdpa.index = queue_pair_index;
458bd907ae4SEugenio Pérez     if (!is_datapath) {
459*2df4dd31SEugenio Pérez         s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
460*2df4dd31SEugenio Pérez                                             vhost_vdpa_net_cvq_cmd_page_len());
461*2df4dd31SEugenio Pérez         memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
462*2df4dd31SEugenio Pérez         s->cvq_cmd_in_buffer = qemu_memalign(qemu_real_host_page_size(),
463*2df4dd31SEugenio Pérez                                             vhost_vdpa_net_cvq_cmd_page_len());
464*2df4dd31SEugenio Pérez         memset(s->cvq_cmd_in_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
465*2df4dd31SEugenio Pérez 
466bd907ae4SEugenio Pérez         s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
467bd907ae4SEugenio Pérez         s->vhost_vdpa.shadow_vq_ops_opaque = s;
468bd907ae4SEugenio Pérez     }
46940237840SJason Wang     ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
47074af5eecSJason Wang     if (ret) {
47174af5eecSJason Wang         qemu_del_net_client(nc);
472654790b6SJason Wang         return NULL;
47374af5eecSJason Wang     }
474654790b6SJason Wang     return nc;
4751e0a84eaSCindy Lu }
4761e0a84eaSCindy Lu 
47740237840SJason Wang static int vhost_vdpa_get_max_queue_pairs(int fd, int *has_cvq, Error **errp)
47840237840SJason Wang {
47940237840SJason Wang     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
480cd523a41SStefano Garzarella     g_autofree struct vhost_vdpa_config *config = NULL;
48140237840SJason Wang     __virtio16 *max_queue_pairs;
48240237840SJason Wang     uint64_t features;
48340237840SJason Wang     int ret;
48440237840SJason Wang 
48540237840SJason Wang     ret = ioctl(fd, VHOST_GET_FEATURES, &features);
48640237840SJason Wang     if (ret) {
48740237840SJason Wang         error_setg(errp, "Fail to query features from vhost-vDPA device");
48840237840SJason Wang         return ret;
48940237840SJason Wang     }
49040237840SJason Wang 
49140237840SJason Wang     if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) {
49240237840SJason Wang         *has_cvq = 1;
49340237840SJason Wang     } else {
49440237840SJason Wang         *has_cvq = 0;
49540237840SJason Wang     }
49640237840SJason Wang 
49740237840SJason Wang     if (features & (1 << VIRTIO_NET_F_MQ)) {
49840237840SJason Wang         config = g_malloc0(config_size + sizeof(*max_queue_pairs));
49940237840SJason Wang         config->off = offsetof(struct virtio_net_config, max_virtqueue_pairs);
50040237840SJason Wang         config->len = sizeof(*max_queue_pairs);
50140237840SJason Wang 
50240237840SJason Wang         ret = ioctl(fd, VHOST_VDPA_GET_CONFIG, config);
50340237840SJason Wang         if (ret) {
50440237840SJason Wang             error_setg(errp, "Fail to get config from vhost-vDPA device");
50540237840SJason Wang             return -ret;
50640237840SJason Wang         }
50740237840SJason Wang 
50840237840SJason Wang         max_queue_pairs = (__virtio16 *)&config->buf;
50940237840SJason Wang 
51040237840SJason Wang         return lduw_le_p(max_queue_pairs);
51140237840SJason Wang     }
51240237840SJason Wang 
51340237840SJason Wang     return 1;
51440237840SJason Wang }
51540237840SJason Wang 
5161e0a84eaSCindy Lu int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
5171e0a84eaSCindy Lu                         NetClientState *peer, Error **errp)
5181e0a84eaSCindy Lu {
5191e0a84eaSCindy Lu     const NetdevVhostVDPAOptions *opts;
520654790b6SJason Wang     int vdpa_device_fd;
521eb3cb751SEugenio Pérez     g_autofree NetClientState **ncs = NULL;
522eb3cb751SEugenio Pérez     NetClientState *nc;
52340237840SJason Wang     int queue_pairs, i, has_cvq = 0;
5241e0a84eaSCindy Lu 
5251e0a84eaSCindy Lu     assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
5261e0a84eaSCindy Lu     opts = &netdev->u.vhost_vdpa;
527c8295404SEugenio Pérez     if (!opts->vhostdev) {
528c8295404SEugenio Pérez         error_setg(errp, "vdpa character device not specified with vhostdev");
529c8295404SEugenio Pérez         return -1;
530c8295404SEugenio Pérez     }
5317327813dSJason Wang 
5320351152bSEugenio Pérez     vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
5337327813dSJason Wang     if (vdpa_device_fd == -1) {
5347327813dSJason Wang         return -errno;
5357327813dSJason Wang     }
5367327813dSJason Wang 
53740237840SJason Wang     queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd,
53840237840SJason Wang                                                  &has_cvq, errp);
53940237840SJason Wang     if (queue_pairs < 0) {
5407327813dSJason Wang         qemu_close(vdpa_device_fd);
54140237840SJason Wang         return queue_pairs;
5427327813dSJason Wang     }
5437327813dSJason Wang 
54440237840SJason Wang     ncs = g_malloc0(sizeof(*ncs) * queue_pairs);
54540237840SJason Wang 
54640237840SJason Wang     for (i = 0; i < queue_pairs; i++) {
54740237840SJason Wang         ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
54840237840SJason Wang                                      vdpa_device_fd, i, 2, true);
54940237840SJason Wang         if (!ncs[i])
55040237840SJason Wang             goto err;
55140237840SJason Wang     }
55240237840SJason Wang 
55340237840SJason Wang     if (has_cvq) {
55440237840SJason Wang         nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
55540237840SJason Wang                                  vdpa_device_fd, i, 1, false);
55640237840SJason Wang         if (!nc)
55740237840SJason Wang             goto err;
55840237840SJason Wang     }
55940237840SJason Wang 
560654790b6SJason Wang     return 0;
56140237840SJason Wang 
56240237840SJason Wang err:
56340237840SJason Wang     if (i) {
5649bd05507SSi-Wei Liu         for (i--; i >= 0; i--) {
5659bd05507SSi-Wei Liu             qemu_del_net_client(ncs[i]);
5669bd05507SSi-Wei Liu         }
56740237840SJason Wang     }
56840237840SJason Wang     qemu_close(vdpa_device_fd);
56940237840SJason Wang 
57040237840SJason Wang     return -1;
5711e0a84eaSCindy Lu }
572