15ee3dc7aSYi Liu /* 25ee3dc7aSYi Liu * iommufd container backend 35ee3dc7aSYi Liu * 45ee3dc7aSYi Liu * Copyright (C) 2023 Intel Corporation. 55ee3dc7aSYi Liu * Copyright Red Hat, Inc. 2023 65ee3dc7aSYi Liu * 75ee3dc7aSYi Liu * Authors: Yi Liu <yi.l.liu@intel.com> 85ee3dc7aSYi Liu * Eric Auger <eric.auger@redhat.com> 95ee3dc7aSYi Liu * 105ee3dc7aSYi Liu * SPDX-License-Identifier: GPL-2.0-or-later 115ee3dc7aSYi Liu */ 125ee3dc7aSYi Liu 135ee3dc7aSYi Liu #include "qemu/osdep.h" 145ee3dc7aSYi Liu #include <sys/ioctl.h> 155ee3dc7aSYi Liu #include <linux/vfio.h> 165ee3dc7aSYi Liu #include <linux/iommufd.h> 175ee3dc7aSYi Liu 185ee3dc7aSYi Liu #include "hw/vfio/vfio-common.h" 195ee3dc7aSYi Liu #include "qemu/error-report.h" 205ee3dc7aSYi Liu #include "trace.h" 215ee3dc7aSYi Liu #include "qapi/error.h" 225ee3dc7aSYi Liu #include "sysemu/iommufd.h" 235ee3dc7aSYi Liu #include "hw/qdev-core.h" 245ee3dc7aSYi Liu #include "sysemu/reset.h" 255ee3dc7aSYi Liu #include "qemu/cutils.h" 265ee3dc7aSYi Liu #include "qemu/chardev_open.h" 275ee3dc7aSYi Liu 285ee3dc7aSYi Liu static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova, 295ee3dc7aSYi Liu ram_addr_t size, void *vaddr, bool readonly) 305ee3dc7aSYi Liu { 315ee3dc7aSYi Liu VFIOIOMMUFDContainer *container = 325ee3dc7aSYi Liu container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); 335ee3dc7aSYi Liu 345ee3dc7aSYi Liu return iommufd_backend_map_dma(container->be, 355ee3dc7aSYi Liu container->ioas_id, 365ee3dc7aSYi Liu iova, size, vaddr, readonly); 375ee3dc7aSYi Liu } 385ee3dc7aSYi Liu 395ee3dc7aSYi Liu static int iommufd_cdev_unmap(VFIOContainerBase *bcontainer, 405ee3dc7aSYi Liu hwaddr iova, ram_addr_t size, 415ee3dc7aSYi Liu IOMMUTLBEntry *iotlb) 425ee3dc7aSYi Liu { 435ee3dc7aSYi Liu VFIOIOMMUFDContainer *container = 445ee3dc7aSYi Liu container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); 455ee3dc7aSYi Liu 465ee3dc7aSYi Liu /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */ 475ee3dc7aSYi Liu return iommufd_backend_unmap_dma(container->be, 485ee3dc7aSYi Liu container->ioas_id, iova, size); 495ee3dc7aSYi Liu } 505ee3dc7aSYi Liu 515ee3dc7aSYi Liu static int iommufd_cdev_kvm_device_add(VFIODevice *vbasedev, Error **errp) 525ee3dc7aSYi Liu { 535ee3dc7aSYi Liu return vfio_kvm_device_add_fd(vbasedev->fd, errp); 545ee3dc7aSYi Liu } 555ee3dc7aSYi Liu 565ee3dc7aSYi Liu static void iommufd_cdev_kvm_device_del(VFIODevice *vbasedev) 575ee3dc7aSYi Liu { 585ee3dc7aSYi Liu Error *err = NULL; 595ee3dc7aSYi Liu 605ee3dc7aSYi Liu if (vfio_kvm_device_del_fd(vbasedev->fd, &err)) { 615ee3dc7aSYi Liu error_report_err(err); 625ee3dc7aSYi Liu } 635ee3dc7aSYi Liu } 645ee3dc7aSYi Liu 655ee3dc7aSYi Liu static int iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp) 665ee3dc7aSYi Liu { 675ee3dc7aSYi Liu IOMMUFDBackend *iommufd = vbasedev->iommufd; 685ee3dc7aSYi Liu struct vfio_device_bind_iommufd bind = { 695ee3dc7aSYi Liu .argsz = sizeof(bind), 705ee3dc7aSYi Liu .flags = 0, 715ee3dc7aSYi Liu }; 725ee3dc7aSYi Liu int ret; 735ee3dc7aSYi Liu 745ee3dc7aSYi Liu ret = iommufd_backend_connect(iommufd, errp); 755ee3dc7aSYi Liu if (ret) { 765ee3dc7aSYi Liu return ret; 775ee3dc7aSYi Liu } 785ee3dc7aSYi Liu 795ee3dc7aSYi Liu /* 805ee3dc7aSYi Liu * Add device to kvm-vfio to be prepared for the tracking 815ee3dc7aSYi Liu * in KVM. Especially for some emulated devices, it requires 825ee3dc7aSYi Liu * to have kvm information in the device open. 835ee3dc7aSYi Liu */ 845ee3dc7aSYi Liu ret = iommufd_cdev_kvm_device_add(vbasedev, errp); 855ee3dc7aSYi Liu if (ret) { 865ee3dc7aSYi Liu goto err_kvm_device_add; 875ee3dc7aSYi Liu } 885ee3dc7aSYi Liu 895ee3dc7aSYi Liu /* Bind device to iommufd */ 905ee3dc7aSYi Liu bind.iommufd = iommufd->fd; 915ee3dc7aSYi Liu ret = ioctl(vbasedev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind); 925ee3dc7aSYi Liu if (ret) { 935ee3dc7aSYi Liu error_setg_errno(errp, errno, "error bind device fd=%d to iommufd=%d", 945ee3dc7aSYi Liu vbasedev->fd, bind.iommufd); 955ee3dc7aSYi Liu goto err_bind; 965ee3dc7aSYi Liu } 975ee3dc7aSYi Liu 985ee3dc7aSYi Liu vbasedev->devid = bind.out_devid; 995ee3dc7aSYi Liu trace_iommufd_cdev_connect_and_bind(bind.iommufd, vbasedev->name, 1005ee3dc7aSYi Liu vbasedev->fd, vbasedev->devid); 1015ee3dc7aSYi Liu return ret; 1025ee3dc7aSYi Liu err_bind: 1035ee3dc7aSYi Liu iommufd_cdev_kvm_device_del(vbasedev); 1045ee3dc7aSYi Liu err_kvm_device_add: 1055ee3dc7aSYi Liu iommufd_backend_disconnect(iommufd); 1065ee3dc7aSYi Liu return ret; 1075ee3dc7aSYi Liu } 1085ee3dc7aSYi Liu 1095ee3dc7aSYi Liu static void iommufd_cdev_unbind_and_disconnect(VFIODevice *vbasedev) 1105ee3dc7aSYi Liu { 1115ee3dc7aSYi Liu /* Unbind is automatically conducted when device fd is closed */ 1125ee3dc7aSYi Liu iommufd_cdev_kvm_device_del(vbasedev); 1135ee3dc7aSYi Liu iommufd_backend_disconnect(vbasedev->iommufd); 1145ee3dc7aSYi Liu } 1155ee3dc7aSYi Liu 1165ee3dc7aSYi Liu static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp) 1175ee3dc7aSYi Liu { 1185ee3dc7aSYi Liu long int ret = -ENOTTY; 1195ee3dc7aSYi Liu char *path, *vfio_dev_path = NULL, *vfio_path = NULL; 1205ee3dc7aSYi Liu DIR *dir = NULL; 1215ee3dc7aSYi Liu struct dirent *dent; 1225ee3dc7aSYi Liu gchar *contents; 1235ee3dc7aSYi Liu struct stat st; 1245ee3dc7aSYi Liu gsize length; 1255ee3dc7aSYi Liu int major, minor; 1265ee3dc7aSYi Liu dev_t vfio_devt; 1275ee3dc7aSYi Liu 1285ee3dc7aSYi Liu path = g_strdup_printf("%s/vfio-dev", sysfs_path); 1295ee3dc7aSYi Liu if (stat(path, &st) < 0) { 1305ee3dc7aSYi Liu error_setg_errno(errp, errno, "no such host device"); 1315ee3dc7aSYi Liu goto out_free_path; 1325ee3dc7aSYi Liu } 1335ee3dc7aSYi Liu 1345ee3dc7aSYi Liu dir = opendir(path); 1355ee3dc7aSYi Liu if (!dir) { 1365ee3dc7aSYi Liu error_setg_errno(errp, errno, "couldn't open directory %s", path); 1375ee3dc7aSYi Liu goto out_free_path; 1385ee3dc7aSYi Liu } 1395ee3dc7aSYi Liu 1405ee3dc7aSYi Liu while ((dent = readdir(dir))) { 1415ee3dc7aSYi Liu if (!strncmp(dent->d_name, "vfio", 4)) { 1425ee3dc7aSYi Liu vfio_dev_path = g_strdup_printf("%s/%s/dev", path, dent->d_name); 1435ee3dc7aSYi Liu break; 1445ee3dc7aSYi Liu } 1455ee3dc7aSYi Liu } 1465ee3dc7aSYi Liu 1475ee3dc7aSYi Liu if (!vfio_dev_path) { 1485ee3dc7aSYi Liu error_setg(errp, "failed to find vfio-dev/vfioX/dev"); 1495ee3dc7aSYi Liu goto out_close_dir; 1505ee3dc7aSYi Liu } 1515ee3dc7aSYi Liu 1525ee3dc7aSYi Liu if (!g_file_get_contents(vfio_dev_path, &contents, &length, NULL)) { 1535ee3dc7aSYi Liu error_setg(errp, "failed to load \"%s\"", vfio_dev_path); 1545ee3dc7aSYi Liu goto out_free_dev_path; 1555ee3dc7aSYi Liu } 1565ee3dc7aSYi Liu 1575ee3dc7aSYi Liu if (sscanf(contents, "%d:%d", &major, &minor) != 2) { 1585ee3dc7aSYi Liu error_setg(errp, "failed to get major:minor for \"%s\"", vfio_dev_path); 1595ee3dc7aSYi Liu goto out_free_dev_path; 1605ee3dc7aSYi Liu } 1615ee3dc7aSYi Liu g_free(contents); 1625ee3dc7aSYi Liu vfio_devt = makedev(major, minor); 1635ee3dc7aSYi Liu 1645ee3dc7aSYi Liu vfio_path = g_strdup_printf("/dev/vfio/devices/%s", dent->d_name); 1655ee3dc7aSYi Liu ret = open_cdev(vfio_path, vfio_devt); 1665ee3dc7aSYi Liu if (ret < 0) { 1675ee3dc7aSYi Liu error_setg(errp, "Failed to open %s", vfio_path); 1685ee3dc7aSYi Liu } 1695ee3dc7aSYi Liu 1705ee3dc7aSYi Liu trace_iommufd_cdev_getfd(vfio_path, ret); 1715ee3dc7aSYi Liu g_free(vfio_path); 1725ee3dc7aSYi Liu 1735ee3dc7aSYi Liu out_free_dev_path: 1745ee3dc7aSYi Liu g_free(vfio_dev_path); 1755ee3dc7aSYi Liu out_close_dir: 1765ee3dc7aSYi Liu closedir(dir); 1775ee3dc7aSYi Liu out_free_path: 1785ee3dc7aSYi Liu if (*errp) { 1795ee3dc7aSYi Liu error_prepend(errp, VFIO_MSG_PREFIX, path); 1805ee3dc7aSYi Liu } 1815ee3dc7aSYi Liu g_free(path); 1825ee3dc7aSYi Liu 1835ee3dc7aSYi Liu return ret; 1845ee3dc7aSYi Liu } 1855ee3dc7aSYi Liu 1865ee3dc7aSYi Liu static int iommufd_cdev_attach_ioas_hwpt(VFIODevice *vbasedev, uint32_t id, 1875ee3dc7aSYi Liu Error **errp) 1885ee3dc7aSYi Liu { 1895ee3dc7aSYi Liu int ret, iommufd = vbasedev->iommufd->fd; 1905ee3dc7aSYi Liu struct vfio_device_attach_iommufd_pt attach_data = { 1915ee3dc7aSYi Liu .argsz = sizeof(attach_data), 1925ee3dc7aSYi Liu .flags = 0, 1935ee3dc7aSYi Liu .pt_id = id, 1945ee3dc7aSYi Liu }; 1955ee3dc7aSYi Liu 1965ee3dc7aSYi Liu /* Attach device to an IOAS or hwpt within iommufd */ 1975ee3dc7aSYi Liu ret = ioctl(vbasedev->fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &attach_data); 1985ee3dc7aSYi Liu if (ret) { 1995ee3dc7aSYi Liu error_setg_errno(errp, errno, 2005ee3dc7aSYi Liu "[iommufd=%d] error attach %s (%d) to id=%d", 2015ee3dc7aSYi Liu iommufd, vbasedev->name, vbasedev->fd, id); 2025ee3dc7aSYi Liu } else { 2035ee3dc7aSYi Liu trace_iommufd_cdev_attach_ioas_hwpt(iommufd, vbasedev->name, 2045ee3dc7aSYi Liu vbasedev->fd, id); 2055ee3dc7aSYi Liu } 2065ee3dc7aSYi Liu return ret; 2075ee3dc7aSYi Liu } 2085ee3dc7aSYi Liu 2095ee3dc7aSYi Liu static int iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp) 2105ee3dc7aSYi Liu { 2115ee3dc7aSYi Liu int ret, iommufd = vbasedev->iommufd->fd; 2125ee3dc7aSYi Liu struct vfio_device_detach_iommufd_pt detach_data = { 2135ee3dc7aSYi Liu .argsz = sizeof(detach_data), 2145ee3dc7aSYi Liu .flags = 0, 2155ee3dc7aSYi Liu }; 2165ee3dc7aSYi Liu 2175ee3dc7aSYi Liu ret = ioctl(vbasedev->fd, VFIO_DEVICE_DETACH_IOMMUFD_PT, &detach_data); 2185ee3dc7aSYi Liu if (ret) { 2195ee3dc7aSYi Liu error_setg_errno(errp, errno, "detach %s failed", vbasedev->name); 2205ee3dc7aSYi Liu } else { 2215ee3dc7aSYi Liu trace_iommufd_cdev_detach_ioas_hwpt(iommufd, vbasedev->name); 2225ee3dc7aSYi Liu } 2235ee3dc7aSYi Liu return ret; 2245ee3dc7aSYi Liu } 2255ee3dc7aSYi Liu 2265ee3dc7aSYi Liu static int iommufd_cdev_attach_container(VFIODevice *vbasedev, 2275ee3dc7aSYi Liu VFIOIOMMUFDContainer *container, 2285ee3dc7aSYi Liu Error **errp) 2295ee3dc7aSYi Liu { 2305ee3dc7aSYi Liu return iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp); 2315ee3dc7aSYi Liu } 2325ee3dc7aSYi Liu 2335ee3dc7aSYi Liu static void iommufd_cdev_detach_container(VFIODevice *vbasedev, 2345ee3dc7aSYi Liu VFIOIOMMUFDContainer *container) 2355ee3dc7aSYi Liu { 2365ee3dc7aSYi Liu Error *err = NULL; 2375ee3dc7aSYi Liu 2385ee3dc7aSYi Liu if (iommufd_cdev_detach_ioas_hwpt(vbasedev, &err)) { 2395ee3dc7aSYi Liu error_report_err(err); 2405ee3dc7aSYi Liu } 2415ee3dc7aSYi Liu } 2425ee3dc7aSYi Liu 2435ee3dc7aSYi Liu static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container) 2445ee3dc7aSYi Liu { 2455ee3dc7aSYi Liu VFIOContainerBase *bcontainer = &container->bcontainer; 2465ee3dc7aSYi Liu 2475ee3dc7aSYi Liu if (!QLIST_EMPTY(&bcontainer->device_list)) { 2485ee3dc7aSYi Liu return; 2495ee3dc7aSYi Liu } 2505ee3dc7aSYi Liu memory_listener_unregister(&bcontainer->listener); 2515ee3dc7aSYi Liu vfio_container_destroy(bcontainer); 2525ee3dc7aSYi Liu iommufd_backend_free_id(container->be, container->ioas_id); 2535ee3dc7aSYi Liu g_free(container); 2545ee3dc7aSYi Liu } 2555ee3dc7aSYi Liu 2565ee3dc7aSYi Liu static int iommufd_cdev_ram_block_discard_disable(bool state) 2575ee3dc7aSYi Liu { 2585ee3dc7aSYi Liu /* 2595ee3dc7aSYi Liu * We support coordinated discarding of RAM via the RamDiscardManager. 2605ee3dc7aSYi Liu */ 2615ee3dc7aSYi Liu return ram_block_uncoordinated_discard_disable(state); 2625ee3dc7aSYi Liu } 2635ee3dc7aSYi Liu 264*714e9affSZhenzhong Duan static int iommufd_cdev_get_info_iova_range(VFIOIOMMUFDContainer *container, 265*714e9affSZhenzhong Duan uint32_t ioas_id, Error **errp) 266*714e9affSZhenzhong Duan { 267*714e9affSZhenzhong Duan VFIOContainerBase *bcontainer = &container->bcontainer; 268*714e9affSZhenzhong Duan struct iommu_ioas_iova_ranges *info; 269*714e9affSZhenzhong Duan struct iommu_iova_range *iova_ranges; 270*714e9affSZhenzhong Duan int ret, sz, fd = container->be->fd; 271*714e9affSZhenzhong Duan 272*714e9affSZhenzhong Duan info = g_malloc0(sizeof(*info)); 273*714e9affSZhenzhong Duan info->size = sizeof(*info); 274*714e9affSZhenzhong Duan info->ioas_id = ioas_id; 275*714e9affSZhenzhong Duan 276*714e9affSZhenzhong Duan ret = ioctl(fd, IOMMU_IOAS_IOVA_RANGES, info); 277*714e9affSZhenzhong Duan if (ret && errno != EMSGSIZE) { 278*714e9affSZhenzhong Duan goto error; 279*714e9affSZhenzhong Duan } 280*714e9affSZhenzhong Duan 281*714e9affSZhenzhong Duan sz = info->num_iovas * sizeof(struct iommu_iova_range); 282*714e9affSZhenzhong Duan info = g_realloc(info, sizeof(*info) + sz); 283*714e9affSZhenzhong Duan info->allowed_iovas = (uintptr_t)(info + 1); 284*714e9affSZhenzhong Duan 285*714e9affSZhenzhong Duan ret = ioctl(fd, IOMMU_IOAS_IOVA_RANGES, info); 286*714e9affSZhenzhong Duan if (ret) { 287*714e9affSZhenzhong Duan goto error; 288*714e9affSZhenzhong Duan } 289*714e9affSZhenzhong Duan 290*714e9affSZhenzhong Duan iova_ranges = (struct iommu_iova_range *)(uintptr_t)info->allowed_iovas; 291*714e9affSZhenzhong Duan 292*714e9affSZhenzhong Duan for (int i = 0; i < info->num_iovas; i++) { 293*714e9affSZhenzhong Duan Range *range = g_new(Range, 1); 294*714e9affSZhenzhong Duan 295*714e9affSZhenzhong Duan range_set_bounds(range, iova_ranges[i].start, iova_ranges[i].last); 296*714e9affSZhenzhong Duan bcontainer->iova_ranges = 297*714e9affSZhenzhong Duan range_list_insert(bcontainer->iova_ranges, range); 298*714e9affSZhenzhong Duan } 299*714e9affSZhenzhong Duan bcontainer->pgsizes = info->out_iova_alignment; 300*714e9affSZhenzhong Duan 301*714e9affSZhenzhong Duan g_free(info); 302*714e9affSZhenzhong Duan return 0; 303*714e9affSZhenzhong Duan 304*714e9affSZhenzhong Duan error: 305*714e9affSZhenzhong Duan ret = -errno; 306*714e9affSZhenzhong Duan g_free(info); 307*714e9affSZhenzhong Duan error_setg_errno(errp, errno, "Cannot get IOVA ranges"); 308*714e9affSZhenzhong Duan return ret; 309*714e9affSZhenzhong Duan } 310*714e9affSZhenzhong Duan 3115ee3dc7aSYi Liu static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, 3125ee3dc7aSYi Liu AddressSpace *as, Error **errp) 3135ee3dc7aSYi Liu { 3145ee3dc7aSYi Liu VFIOContainerBase *bcontainer; 3155ee3dc7aSYi Liu VFIOIOMMUFDContainer *container; 3165ee3dc7aSYi Liu VFIOAddressSpace *space; 3175ee3dc7aSYi Liu struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; 3185ee3dc7aSYi Liu int ret, devfd; 3195ee3dc7aSYi Liu uint32_t ioas_id; 3205ee3dc7aSYi Liu Error *err = NULL; 3215ee3dc7aSYi Liu 3225ee3dc7aSYi Liu devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); 3235ee3dc7aSYi Liu if (devfd < 0) { 3245ee3dc7aSYi Liu return devfd; 3255ee3dc7aSYi Liu } 3265ee3dc7aSYi Liu vbasedev->fd = devfd; 3275ee3dc7aSYi Liu 3285ee3dc7aSYi Liu ret = iommufd_cdev_connect_and_bind(vbasedev, errp); 3295ee3dc7aSYi Liu if (ret) { 3305ee3dc7aSYi Liu goto err_connect_bind; 3315ee3dc7aSYi Liu } 3325ee3dc7aSYi Liu 3335ee3dc7aSYi Liu space = vfio_get_address_space(as); 3345ee3dc7aSYi Liu 3355ee3dc7aSYi Liu /* try to attach to an existing container in this space */ 3365ee3dc7aSYi Liu QLIST_FOREACH(bcontainer, &space->containers, next) { 3375ee3dc7aSYi Liu container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); 3385ee3dc7aSYi Liu if (bcontainer->ops != &vfio_iommufd_ops || 3395ee3dc7aSYi Liu vbasedev->iommufd != container->be) { 3405ee3dc7aSYi Liu continue; 3415ee3dc7aSYi Liu } 3425ee3dc7aSYi Liu if (iommufd_cdev_attach_container(vbasedev, container, &err)) { 3435ee3dc7aSYi Liu const char *msg = error_get_pretty(err); 3445ee3dc7aSYi Liu 3455ee3dc7aSYi Liu trace_iommufd_cdev_fail_attach_existing_container(msg); 3465ee3dc7aSYi Liu error_free(err); 3475ee3dc7aSYi Liu err = NULL; 3485ee3dc7aSYi Liu } else { 3495ee3dc7aSYi Liu ret = iommufd_cdev_ram_block_discard_disable(true); 3505ee3dc7aSYi Liu if (ret) { 3515ee3dc7aSYi Liu error_setg(errp, 3525ee3dc7aSYi Liu "Cannot set discarding of RAM broken (%d)", ret); 3535ee3dc7aSYi Liu goto err_discard_disable; 3545ee3dc7aSYi Liu } 3555ee3dc7aSYi Liu goto found_container; 3565ee3dc7aSYi Liu } 3575ee3dc7aSYi Liu } 3585ee3dc7aSYi Liu 3595ee3dc7aSYi Liu /* Need to allocate a new dedicated container */ 3605ee3dc7aSYi Liu ret = iommufd_backend_alloc_ioas(vbasedev->iommufd, &ioas_id, errp); 3615ee3dc7aSYi Liu if (ret < 0) { 3625ee3dc7aSYi Liu goto err_alloc_ioas; 3635ee3dc7aSYi Liu } 3645ee3dc7aSYi Liu 3655ee3dc7aSYi Liu trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id); 3665ee3dc7aSYi Liu 3675ee3dc7aSYi Liu container = g_malloc0(sizeof(*container)); 3685ee3dc7aSYi Liu container->be = vbasedev->iommufd; 3695ee3dc7aSYi Liu container->ioas_id = ioas_id; 3705ee3dc7aSYi Liu 3715ee3dc7aSYi Liu bcontainer = &container->bcontainer; 3725ee3dc7aSYi Liu vfio_container_init(bcontainer, space, &vfio_iommufd_ops); 3735ee3dc7aSYi Liu QLIST_INSERT_HEAD(&space->containers, bcontainer, next); 3745ee3dc7aSYi Liu 3755ee3dc7aSYi Liu ret = iommufd_cdev_attach_container(vbasedev, container, errp); 3765ee3dc7aSYi Liu if (ret) { 3775ee3dc7aSYi Liu goto err_attach_container; 3785ee3dc7aSYi Liu } 3795ee3dc7aSYi Liu 3805ee3dc7aSYi Liu ret = iommufd_cdev_ram_block_discard_disable(true); 3815ee3dc7aSYi Liu if (ret) { 3825ee3dc7aSYi Liu goto err_discard_disable; 3835ee3dc7aSYi Liu } 3845ee3dc7aSYi Liu 385*714e9affSZhenzhong Duan ret = iommufd_cdev_get_info_iova_range(container, ioas_id, &err); 386*714e9affSZhenzhong Duan if (ret) { 387*714e9affSZhenzhong Duan error_append_hint(&err, 388*714e9affSZhenzhong Duan "Fallback to default 64bit IOVA range and 4K page size\n"); 389*714e9affSZhenzhong Duan warn_report_err(err); 390*714e9affSZhenzhong Duan err = NULL; 3915ee3dc7aSYi Liu bcontainer->pgsizes = qemu_real_host_page_size(); 392*714e9affSZhenzhong Duan } 3935ee3dc7aSYi Liu 3945ee3dc7aSYi Liu bcontainer->listener = vfio_memory_listener; 3955ee3dc7aSYi Liu memory_listener_register(&bcontainer->listener, bcontainer->space->as); 3965ee3dc7aSYi Liu 3975ee3dc7aSYi Liu if (bcontainer->error) { 3985ee3dc7aSYi Liu ret = -1; 3995ee3dc7aSYi Liu error_propagate_prepend(errp, bcontainer->error, 4005ee3dc7aSYi Liu "memory listener initialization failed: "); 4015ee3dc7aSYi Liu goto err_listener_register; 4025ee3dc7aSYi Liu } 4035ee3dc7aSYi Liu 4045ee3dc7aSYi Liu bcontainer->initialized = true; 4055ee3dc7aSYi Liu 4065ee3dc7aSYi Liu found_container: 4075ee3dc7aSYi Liu ret = ioctl(devfd, VFIO_DEVICE_GET_INFO, &dev_info); 4085ee3dc7aSYi Liu if (ret) { 4095ee3dc7aSYi Liu error_setg_errno(errp, errno, "error getting device info"); 4105ee3dc7aSYi Liu goto err_listener_register; 4115ee3dc7aSYi Liu } 4125ee3dc7aSYi Liu 4135ee3dc7aSYi Liu /* 4145ee3dc7aSYi Liu * TODO: examine RAM_BLOCK_DISCARD stuff, should we do group level 4155ee3dc7aSYi Liu * for discarding incompatibility check as well? 4165ee3dc7aSYi Liu */ 4175ee3dc7aSYi Liu if (vbasedev->ram_block_discard_allowed) { 4185ee3dc7aSYi Liu iommufd_cdev_ram_block_discard_disable(false); 4195ee3dc7aSYi Liu } 4205ee3dc7aSYi Liu 4215ee3dc7aSYi Liu vbasedev->group = 0; 4225ee3dc7aSYi Liu vbasedev->num_irqs = dev_info.num_irqs; 4235ee3dc7aSYi Liu vbasedev->num_regions = dev_info.num_regions; 4245ee3dc7aSYi Liu vbasedev->flags = dev_info.flags; 4255ee3dc7aSYi Liu vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET); 4265ee3dc7aSYi Liu vbasedev->bcontainer = bcontainer; 4275ee3dc7aSYi Liu QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next); 4285ee3dc7aSYi Liu QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next); 4295ee3dc7aSYi Liu 4305ee3dc7aSYi Liu trace_iommufd_cdev_device_info(vbasedev->name, devfd, vbasedev->num_irqs, 4315ee3dc7aSYi Liu vbasedev->num_regions, vbasedev->flags); 4325ee3dc7aSYi Liu return 0; 4335ee3dc7aSYi Liu 4345ee3dc7aSYi Liu err_listener_register: 4355ee3dc7aSYi Liu iommufd_cdev_ram_block_discard_disable(false); 4365ee3dc7aSYi Liu err_discard_disable: 4375ee3dc7aSYi Liu iommufd_cdev_detach_container(vbasedev, container); 4385ee3dc7aSYi Liu err_attach_container: 4395ee3dc7aSYi Liu iommufd_cdev_container_destroy(container); 4405ee3dc7aSYi Liu err_alloc_ioas: 4415ee3dc7aSYi Liu vfio_put_address_space(space); 4425ee3dc7aSYi Liu iommufd_cdev_unbind_and_disconnect(vbasedev); 4435ee3dc7aSYi Liu err_connect_bind: 4445ee3dc7aSYi Liu close(vbasedev->fd); 4455ee3dc7aSYi Liu return ret; 4465ee3dc7aSYi Liu } 4475ee3dc7aSYi Liu 4485ee3dc7aSYi Liu static void iommufd_cdev_detach(VFIODevice *vbasedev) 4495ee3dc7aSYi Liu { 4505ee3dc7aSYi Liu VFIOContainerBase *bcontainer = vbasedev->bcontainer; 4515ee3dc7aSYi Liu VFIOAddressSpace *space = bcontainer->space; 4525ee3dc7aSYi Liu VFIOIOMMUFDContainer *container = container_of(bcontainer, 4535ee3dc7aSYi Liu VFIOIOMMUFDContainer, 4545ee3dc7aSYi Liu bcontainer); 4555ee3dc7aSYi Liu QLIST_REMOVE(vbasedev, global_next); 4565ee3dc7aSYi Liu QLIST_REMOVE(vbasedev, container_next); 4575ee3dc7aSYi Liu vbasedev->bcontainer = NULL; 4585ee3dc7aSYi Liu 4595ee3dc7aSYi Liu if (!vbasedev->ram_block_discard_allowed) { 4605ee3dc7aSYi Liu iommufd_cdev_ram_block_discard_disable(false); 4615ee3dc7aSYi Liu } 4625ee3dc7aSYi Liu 4635ee3dc7aSYi Liu iommufd_cdev_detach_container(vbasedev, container); 4645ee3dc7aSYi Liu iommufd_cdev_container_destroy(container); 4655ee3dc7aSYi Liu vfio_put_address_space(space); 4665ee3dc7aSYi Liu 4675ee3dc7aSYi Liu iommufd_cdev_unbind_and_disconnect(vbasedev); 4685ee3dc7aSYi Liu close(vbasedev->fd); 4695ee3dc7aSYi Liu } 4705ee3dc7aSYi Liu 4715ee3dc7aSYi Liu const VFIOIOMMUOps vfio_iommufd_ops = { 4725ee3dc7aSYi Liu .dma_map = iommufd_cdev_map, 4735ee3dc7aSYi Liu .dma_unmap = iommufd_cdev_unmap, 4745ee3dc7aSYi Liu .attach_device = iommufd_cdev_attach, 4755ee3dc7aSYi Liu .detach_device = iommufd_cdev_detach, 4765ee3dc7aSYi Liu }; 477