11e0a84eaSCindy Lu /* 21e0a84eaSCindy Lu * vhost-vdpa.c 31e0a84eaSCindy Lu * 41e0a84eaSCindy Lu * Copyright(c) 2017-2018 Intel Corporation. 51e0a84eaSCindy Lu * Copyright(c) 2020 Red Hat, Inc. 61e0a84eaSCindy Lu * 71e0a84eaSCindy Lu * This work is licensed under the terms of the GNU GPL, version 2 or later. 81e0a84eaSCindy Lu * See the COPYING file in the top-level directory. 91e0a84eaSCindy Lu * 101e0a84eaSCindy Lu */ 111e0a84eaSCindy Lu 121e0a84eaSCindy Lu #include "qemu/osdep.h" 131e0a84eaSCindy Lu #include "clients.h" 141e0a84eaSCindy Lu #include "net/vhost_net.h" 151e0a84eaSCindy Lu #include "net/vhost-vdpa.h" 161e0a84eaSCindy Lu #include "hw/virtio/vhost-vdpa.h" 171e0a84eaSCindy Lu #include "qemu/config-file.h" 181e0a84eaSCindy Lu #include "qemu/error-report.h" 191e0a84eaSCindy Lu #include "qemu/option.h" 201e0a84eaSCindy Lu #include "qapi/error.h" 2140237840SJason Wang #include <linux/vhost.h> 221e0a84eaSCindy Lu #include <sys/ioctl.h> 231e0a84eaSCindy Lu #include <err.h> 241e0a84eaSCindy Lu #include "standard-headers/linux/virtio_net.h" 251e0a84eaSCindy Lu #include "monitor/monitor.h" 261e0a84eaSCindy Lu #include "hw/virtio/vhost.h" 271e0a84eaSCindy Lu 281e0a84eaSCindy Lu /* Todo:need to add the multiqueue support here */ 291e0a84eaSCindy Lu typedef struct VhostVDPAState { 301e0a84eaSCindy Lu NetClientState nc; 311e0a84eaSCindy Lu struct vhost_vdpa vhost_vdpa; 321e0a84eaSCindy Lu VHostNetState *vhost_net; 331e0a84eaSCindy Lu bool started; 341e0a84eaSCindy Lu } VhostVDPAState; 351e0a84eaSCindy Lu 361e0a84eaSCindy Lu const int vdpa_feature_bits[] = { 371e0a84eaSCindy Lu VIRTIO_F_NOTIFY_ON_EMPTY, 381e0a84eaSCindy Lu VIRTIO_RING_F_INDIRECT_DESC, 391e0a84eaSCindy Lu VIRTIO_RING_F_EVENT_IDX, 401e0a84eaSCindy Lu VIRTIO_F_ANY_LAYOUT, 411e0a84eaSCindy Lu VIRTIO_F_VERSION_1, 421e0a84eaSCindy Lu VIRTIO_NET_F_CSUM, 431e0a84eaSCindy Lu VIRTIO_NET_F_GUEST_CSUM, 441e0a84eaSCindy Lu VIRTIO_NET_F_GSO, 451e0a84eaSCindy Lu VIRTIO_NET_F_GUEST_TSO4, 461e0a84eaSCindy Lu VIRTIO_NET_F_GUEST_TSO6, 471e0a84eaSCindy Lu VIRTIO_NET_F_GUEST_ECN, 481e0a84eaSCindy Lu VIRTIO_NET_F_GUEST_UFO, 491e0a84eaSCindy Lu VIRTIO_NET_F_HOST_TSO4, 501e0a84eaSCindy Lu VIRTIO_NET_F_HOST_TSO6, 511e0a84eaSCindy Lu VIRTIO_NET_F_HOST_ECN, 521e0a84eaSCindy Lu VIRTIO_NET_F_HOST_UFO, 531e0a84eaSCindy Lu VIRTIO_NET_F_MRG_RXBUF, 541e0a84eaSCindy Lu VIRTIO_NET_F_MTU, 5540237840SJason Wang VIRTIO_NET_F_CTRL_RX, 5640237840SJason Wang VIRTIO_NET_F_CTRL_RX_EXTRA, 5740237840SJason Wang VIRTIO_NET_F_CTRL_VLAN, 5840237840SJason Wang VIRTIO_NET_F_GUEST_ANNOUNCE, 5940237840SJason Wang VIRTIO_NET_F_CTRL_MAC_ADDR, 6040237840SJason Wang VIRTIO_NET_F_RSS, 6140237840SJason Wang VIRTIO_NET_F_MQ, 6240237840SJason Wang VIRTIO_NET_F_CTRL_VQ, 631e0a84eaSCindy Lu VIRTIO_F_IOMMU_PLATFORM, 641e0a84eaSCindy Lu VIRTIO_F_RING_PACKED, 650145c393SAndrew Melnychenko VIRTIO_NET_F_RSS, 660145c393SAndrew Melnychenko VIRTIO_NET_F_HASH_REPORT, 671e0a84eaSCindy Lu VIRTIO_NET_F_GUEST_ANNOUNCE, 689aa47eddSSi-Wei Liu VIRTIO_NET_F_STATUS, 691e0a84eaSCindy Lu VHOST_INVALID_FEATURE_BIT 701e0a84eaSCindy Lu }; 711e0a84eaSCindy Lu 721e0a84eaSCindy Lu VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) 731e0a84eaSCindy Lu { 741e0a84eaSCindy Lu VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); 751e0a84eaSCindy Lu assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 761e0a84eaSCindy Lu return s->vhost_net; 771e0a84eaSCindy Lu } 781e0a84eaSCindy Lu 791e0a84eaSCindy Lu static int vhost_vdpa_net_check_device_id(struct vhost_net *net) 801e0a84eaSCindy Lu { 811e0a84eaSCindy Lu uint32_t device_id; 821e0a84eaSCindy Lu int ret; 831e0a84eaSCindy Lu struct vhost_dev *hdev; 841e0a84eaSCindy Lu 851e0a84eaSCindy Lu hdev = (struct vhost_dev *)&net->dev; 861e0a84eaSCindy Lu ret = hdev->vhost_ops->vhost_get_device_id(hdev, &device_id); 871e0a84eaSCindy Lu if (device_id != VIRTIO_ID_NET) { 881e0a84eaSCindy Lu return -ENOTSUP; 891e0a84eaSCindy Lu } 901e0a84eaSCindy Lu return ret; 911e0a84eaSCindy Lu } 921e0a84eaSCindy Lu 9340237840SJason Wang static int vhost_vdpa_add(NetClientState *ncs, void *be, 9440237840SJason Wang int queue_pair_index, int nvqs) 951e0a84eaSCindy Lu { 961e0a84eaSCindy Lu VhostNetOptions options; 971e0a84eaSCindy Lu struct vhost_net *net = NULL; 981e0a84eaSCindy Lu VhostVDPAState *s; 991e0a84eaSCindy Lu int ret; 1001e0a84eaSCindy Lu 1011e0a84eaSCindy Lu options.backend_type = VHOST_BACKEND_TYPE_VDPA; 1021e0a84eaSCindy Lu assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 1031e0a84eaSCindy Lu s = DO_UPCAST(VhostVDPAState, nc, ncs); 1041e0a84eaSCindy Lu options.net_backend = ncs; 1051e0a84eaSCindy Lu options.opaque = be; 1061e0a84eaSCindy Lu options.busyloop_timeout = 0; 10740237840SJason Wang options.nvqs = nvqs; 1081e0a84eaSCindy Lu 1091e0a84eaSCindy Lu net = vhost_net_init(&options); 1101e0a84eaSCindy Lu if (!net) { 1111e0a84eaSCindy Lu error_report("failed to init vhost_net for queue"); 112a97ef87aSJason Wang goto err_init; 1131e0a84eaSCindy Lu } 1141e0a84eaSCindy Lu s->vhost_net = net; 1151e0a84eaSCindy Lu ret = vhost_vdpa_net_check_device_id(net); 1161e0a84eaSCindy Lu if (ret) { 117a97ef87aSJason Wang goto err_check; 1181e0a84eaSCindy Lu } 1191e0a84eaSCindy Lu return 0; 120a97ef87aSJason Wang err_check: 1211e0a84eaSCindy Lu vhost_net_cleanup(net); 122ab36edcfSJason Wang g_free(net); 123a97ef87aSJason Wang err_init: 1241e0a84eaSCindy Lu return -1; 1251e0a84eaSCindy Lu } 1261e0a84eaSCindy Lu 1271e0a84eaSCindy Lu static void vhost_vdpa_cleanup(NetClientState *nc) 1281e0a84eaSCindy Lu { 1291e0a84eaSCindy Lu VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); 1301e0a84eaSCindy Lu 1311e0a84eaSCindy Lu if (s->vhost_net) { 1321e0a84eaSCindy Lu vhost_net_cleanup(s->vhost_net); 1331e0a84eaSCindy Lu g_free(s->vhost_net); 1341e0a84eaSCindy Lu s->vhost_net = NULL; 1351e0a84eaSCindy Lu } 13657b3a7d8SCindy Lu if (s->vhost_vdpa.device_fd >= 0) { 13757b3a7d8SCindy Lu qemu_close(s->vhost_vdpa.device_fd); 13857b3a7d8SCindy Lu s->vhost_vdpa.device_fd = -1; 13957b3a7d8SCindy Lu } 1401e0a84eaSCindy Lu } 1411e0a84eaSCindy Lu 1421e0a84eaSCindy Lu static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc) 1431e0a84eaSCindy Lu { 1441e0a84eaSCindy Lu assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 1451e0a84eaSCindy Lu 1461e0a84eaSCindy Lu return true; 1471e0a84eaSCindy Lu } 1481e0a84eaSCindy Lu 1491e0a84eaSCindy Lu static bool vhost_vdpa_has_ufo(NetClientState *nc) 1501e0a84eaSCindy Lu { 1511e0a84eaSCindy Lu assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 1521e0a84eaSCindy Lu VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); 1531e0a84eaSCindy Lu uint64_t features = 0; 1541e0a84eaSCindy Lu features |= (1ULL << VIRTIO_NET_F_HOST_UFO); 1551e0a84eaSCindy Lu features = vhost_net_get_features(s->vhost_net, features); 1561e0a84eaSCindy Lu return !!(features & (1ULL << VIRTIO_NET_F_HOST_UFO)); 1571e0a84eaSCindy Lu 1581e0a84eaSCindy Lu } 1591e0a84eaSCindy Lu 160ee8a1c63SKevin Wolf static bool vhost_vdpa_check_peer_type(NetClientState *nc, ObjectClass *oc, 161ee8a1c63SKevin Wolf Error **errp) 162ee8a1c63SKevin Wolf { 163ee8a1c63SKevin Wolf const char *driver = object_class_get_name(oc); 164ee8a1c63SKevin Wolf 165ee8a1c63SKevin Wolf if (!g_str_has_prefix(driver, "virtio-net-")) { 166ee8a1c63SKevin Wolf error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*"); 167ee8a1c63SKevin Wolf return false; 168ee8a1c63SKevin Wolf } 169ee8a1c63SKevin Wolf 170ee8a1c63SKevin Wolf return true; 171ee8a1c63SKevin Wolf } 172ee8a1c63SKevin Wolf 173846a1e85SEugenio Pérez /** Dummy receive in case qemu falls back to userland tap networking */ 174846a1e85SEugenio Pérez static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf, 175846a1e85SEugenio Pérez size_t size) 176846a1e85SEugenio Pérez { 177846a1e85SEugenio Pérez return 0; 178846a1e85SEugenio Pérez } 179846a1e85SEugenio Pérez 1801e0a84eaSCindy Lu static NetClientInfo net_vhost_vdpa_info = { 1811e0a84eaSCindy Lu .type = NET_CLIENT_DRIVER_VHOST_VDPA, 1821e0a84eaSCindy Lu .size = sizeof(VhostVDPAState), 183846a1e85SEugenio Pérez .receive = vhost_vdpa_receive, 1841e0a84eaSCindy Lu .cleanup = vhost_vdpa_cleanup, 1851e0a84eaSCindy Lu .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, 1861e0a84eaSCindy Lu .has_ufo = vhost_vdpa_has_ufo, 187ee8a1c63SKevin Wolf .check_peer_type = vhost_vdpa_check_peer_type, 1881e0a84eaSCindy Lu }; 1891e0a84eaSCindy Lu 190654790b6SJason Wang static NetClientState *net_vhost_vdpa_init(NetClientState *peer, 191654790b6SJason Wang const char *device, 192654790b6SJason Wang const char *name, 19340237840SJason Wang int vdpa_device_fd, 19440237840SJason Wang int queue_pair_index, 19540237840SJason Wang int nvqs, 19640237840SJason Wang bool is_datapath) 1971e0a84eaSCindy Lu { 1981e0a84eaSCindy Lu NetClientState *nc = NULL; 1991e0a84eaSCindy Lu VhostVDPAState *s; 2001e0a84eaSCindy Lu int ret = 0; 2011e0a84eaSCindy Lu assert(name); 20240237840SJason Wang if (is_datapath) { 20340237840SJason Wang nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, 20440237840SJason Wang name); 20540237840SJason Wang } else { 20640237840SJason Wang nc = qemu_new_net_control_client(&net_vhost_vdpa_info, peer, 20740237840SJason Wang device, name); 20840237840SJason Wang } 20956e6f594SJason Wang snprintf(nc->info_str, sizeof(nc->info_str), TYPE_VHOST_VDPA); 2101e0a84eaSCindy Lu s = DO_UPCAST(VhostVDPAState, nc, nc); 2117327813dSJason Wang 2121e0a84eaSCindy Lu s->vhost_vdpa.device_fd = vdpa_device_fd; 21340237840SJason Wang s->vhost_vdpa.index = queue_pair_index; 21440237840SJason Wang ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); 21574af5eecSJason Wang if (ret) { 21674af5eecSJason Wang qemu_del_net_client(nc); 217654790b6SJason Wang return NULL; 21874af5eecSJason Wang } 219654790b6SJason Wang return nc; 2201e0a84eaSCindy Lu } 2211e0a84eaSCindy Lu 22240237840SJason Wang static int vhost_vdpa_get_max_queue_pairs(int fd, int *has_cvq, Error **errp) 22340237840SJason Wang { 22440237840SJason Wang unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 225cd523a41SStefano Garzarella g_autofree struct vhost_vdpa_config *config = NULL; 22640237840SJason Wang __virtio16 *max_queue_pairs; 22740237840SJason Wang uint64_t features; 22840237840SJason Wang int ret; 22940237840SJason Wang 23040237840SJason Wang ret = ioctl(fd, VHOST_GET_FEATURES, &features); 23140237840SJason Wang if (ret) { 23240237840SJason Wang error_setg(errp, "Fail to query features from vhost-vDPA device"); 23340237840SJason Wang return ret; 23440237840SJason Wang } 23540237840SJason Wang 23640237840SJason Wang if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) { 23740237840SJason Wang *has_cvq = 1; 23840237840SJason Wang } else { 23940237840SJason Wang *has_cvq = 0; 24040237840SJason Wang } 24140237840SJason Wang 24240237840SJason Wang if (features & (1 << VIRTIO_NET_F_MQ)) { 24340237840SJason Wang config = g_malloc0(config_size + sizeof(*max_queue_pairs)); 24440237840SJason Wang config->off = offsetof(struct virtio_net_config, max_virtqueue_pairs); 24540237840SJason Wang config->len = sizeof(*max_queue_pairs); 24640237840SJason Wang 24740237840SJason Wang ret = ioctl(fd, VHOST_VDPA_GET_CONFIG, config); 24840237840SJason Wang if (ret) { 24940237840SJason Wang error_setg(errp, "Fail to get config from vhost-vDPA device"); 25040237840SJason Wang return -ret; 25140237840SJason Wang } 25240237840SJason Wang 25340237840SJason Wang max_queue_pairs = (__virtio16 *)&config->buf; 25440237840SJason Wang 25540237840SJason Wang return lduw_le_p(max_queue_pairs); 25640237840SJason Wang } 25740237840SJason Wang 25840237840SJason Wang return 1; 25940237840SJason Wang } 26040237840SJason Wang 2611e0a84eaSCindy Lu int net_init_vhost_vdpa(const Netdev *netdev, const char *name, 2621e0a84eaSCindy Lu NetClientState *peer, Error **errp) 2631e0a84eaSCindy Lu { 2641e0a84eaSCindy Lu const NetdevVhostVDPAOptions *opts; 265654790b6SJason Wang int vdpa_device_fd; 266eb3cb751SEugenio Pérez g_autofree NetClientState **ncs = NULL; 267eb3cb751SEugenio Pérez NetClientState *nc; 26840237840SJason Wang int queue_pairs, i, has_cvq = 0; 2691e0a84eaSCindy Lu 2701e0a84eaSCindy Lu assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA); 2711e0a84eaSCindy Lu opts = &netdev->u.vhost_vdpa; 272c8295404SEugenio Pérez if (!opts->vhostdev) { 273c8295404SEugenio Pérez error_setg(errp, "vdpa character device not specified with vhostdev"); 274c8295404SEugenio Pérez return -1; 275c8295404SEugenio Pérez } 2767327813dSJason Wang 2770351152bSEugenio Pérez vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp); 2787327813dSJason Wang if (vdpa_device_fd == -1) { 2797327813dSJason Wang return -errno; 2807327813dSJason Wang } 2817327813dSJason Wang 28240237840SJason Wang queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, 28340237840SJason Wang &has_cvq, errp); 28440237840SJason Wang if (queue_pairs < 0) { 2857327813dSJason Wang qemu_close(vdpa_device_fd); 28640237840SJason Wang return queue_pairs; 2877327813dSJason Wang } 2887327813dSJason Wang 28940237840SJason Wang ncs = g_malloc0(sizeof(*ncs) * queue_pairs); 29040237840SJason Wang 29140237840SJason Wang for (i = 0; i < queue_pairs; i++) { 29240237840SJason Wang ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, 29340237840SJason Wang vdpa_device_fd, i, 2, true); 29440237840SJason Wang if (!ncs[i]) 29540237840SJason Wang goto err; 29640237840SJason Wang } 29740237840SJason Wang 29840237840SJason Wang if (has_cvq) { 29940237840SJason Wang nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, 30040237840SJason Wang vdpa_device_fd, i, 1, false); 30140237840SJason Wang if (!nc) 30240237840SJason Wang goto err; 30340237840SJason Wang } 30440237840SJason Wang 305654790b6SJason Wang return 0; 30640237840SJason Wang 30740237840SJason Wang err: 30840237840SJason Wang if (i) { 309*9bd05507SSi-Wei Liu for (i--; i >= 0; i--) { 310*9bd05507SSi-Wei Liu qemu_del_net_client(ncs[i]); 311*9bd05507SSi-Wei Liu } 31240237840SJason Wang } 31340237840SJason Wang qemu_close(vdpa_device_fd); 31440237840SJason Wang 31540237840SJason Wang return -1; 3161e0a84eaSCindy Lu } 317