1418026caSFam Zheng /* 2418026caSFam Zheng * VFIO utility 3418026caSFam Zheng * 4418026caSFam Zheng * Copyright 2016 - 2018 Red Hat, Inc. 5418026caSFam Zheng * 6418026caSFam Zheng * Authors: 7418026caSFam Zheng * Fam Zheng <famz@redhat.com> 8418026caSFam Zheng * 9418026caSFam Zheng * This work is licensed under the terms of the GNU GPL, version 2 or later. 10418026caSFam Zheng * See the COPYING file in the top-level directory. 11418026caSFam Zheng */ 12418026caSFam Zheng 13418026caSFam Zheng #include "qemu/osdep.h" 14418026caSFam Zheng #include <sys/ioctl.h> 15418026caSFam Zheng #include <linux/vfio.h> 16418026caSFam Zheng #include "qapi/error.h" 17418026caSFam Zheng #include "exec/ramlist.h" 18418026caSFam Zheng #include "exec/cpu-common.h" 19b430b513SDavid Hildenbrand #include "exec/memory.h" 20418026caSFam Zheng #include "trace.h" 21418026caSFam Zheng #include "qemu/error-report.h" 22418026caSFam Zheng #include "standard-headers/linux/pci_regs.h" 23418026caSFam Zheng #include "qemu/event_notifier.h" 24418026caSFam Zheng #include "qemu/vfio-helpers.h" 256e8a355dSDaniel Brodsky #include "qemu/lockable.h" 26418026caSFam Zheng #include "trace.h" 27418026caSFam Zheng 28418026caSFam Zheng #define QEMU_VFIO_DEBUG 0 29418026caSFam Zheng 30418026caSFam Zheng #define QEMU_VFIO_IOVA_MIN 0x10000ULL 31418026caSFam Zheng /* XXX: Once VFIO exposes the iova bit width in the IOMMU capability interface, 32418026caSFam Zheng * we can use a runtime limit; alternatively it's also possible to do platform 33418026caSFam Zheng * specific detection by reading sysfs entries. Until then, 39 is a safe bet. 34418026caSFam Zheng **/ 35418026caSFam Zheng #define QEMU_VFIO_IOVA_MAX (1ULL << 39) 36418026caSFam Zheng 37418026caSFam Zheng typedef struct { 38418026caSFam Zheng /* Page aligned addr. */ 39418026caSFam Zheng void *host; 40418026caSFam Zheng size_t size; 41418026caSFam Zheng uint64_t iova; 42418026caSFam Zheng } IOVAMapping; 43418026caSFam Zheng 444487d420SEric Auger struct IOVARange { 454487d420SEric Auger uint64_t start; 464487d420SEric Auger uint64_t end; 474487d420SEric Auger }; 484487d420SEric Auger 49418026caSFam Zheng struct QEMUVFIOState { 50418026caSFam Zheng QemuMutex lock; 51418026caSFam Zheng 52418026caSFam Zheng /* These fields are protected by BQL */ 53418026caSFam Zheng int container; 54418026caSFam Zheng int group; 55418026caSFam Zheng int device; 56418026caSFam Zheng RAMBlockNotifier ram_notifier; 57418026caSFam Zheng struct vfio_region_info config_region_info, bar_region_info[6]; 584487d420SEric Auger struct IOVARange *usable_iova_ranges; 594487d420SEric Auger uint8_t nb_iova_ranges; 60418026caSFam Zheng 61418026caSFam Zheng /* These fields are protected by @lock */ 62418026caSFam Zheng /* VFIO's IO virtual address space is managed by splitting into a few 63418026caSFam Zheng * sections: 64418026caSFam Zheng * 65418026caSFam Zheng * --------------- <= 0 66418026caSFam Zheng * |xxxxxxxxxxxxx| 67418026caSFam Zheng * |-------------| <= QEMU_VFIO_IOVA_MIN 68418026caSFam Zheng * | | 69418026caSFam Zheng * | Fixed | 70418026caSFam Zheng * | | 71418026caSFam Zheng * |-------------| <= low_water_mark 72418026caSFam Zheng * | | 73418026caSFam Zheng * | Free | 74418026caSFam Zheng * | | 75418026caSFam Zheng * |-------------| <= high_water_mark 76418026caSFam Zheng * | | 77418026caSFam Zheng * | Temp | 78418026caSFam Zheng * | | 79418026caSFam Zheng * |-------------| <= QEMU_VFIO_IOVA_MAX 80418026caSFam Zheng * |xxxxxxxxxxxxx| 81418026caSFam Zheng * |xxxxxxxxxxxxx| 82418026caSFam Zheng * --------------- 83418026caSFam Zheng * 84418026caSFam Zheng * - Addresses lower than QEMU_VFIO_IOVA_MIN are reserved as invalid; 85418026caSFam Zheng * 86418026caSFam Zheng * - Fixed mappings of HVAs are assigned "low" IOVAs in the range of 87418026caSFam Zheng * [QEMU_VFIO_IOVA_MIN, low_water_mark). Once allocated they will not be 88418026caSFam Zheng * reclaimed - low_water_mark never shrinks; 89418026caSFam Zheng * 90418026caSFam Zheng * - IOVAs in range [low_water_mark, high_water_mark) are free; 91418026caSFam Zheng * 92418026caSFam Zheng * - IOVAs in range [high_water_mark, QEMU_VFIO_IOVA_MAX) are volatile 93418026caSFam Zheng * mappings. At each qemu_vfio_dma_reset_temporary() call, the whole area 94418026caSFam Zheng * is recycled. The caller should make sure I/O's depending on these 95418026caSFam Zheng * mappings are completed before calling. 96418026caSFam Zheng **/ 97418026caSFam Zheng uint64_t low_water_mark; 98418026caSFam Zheng uint64_t high_water_mark; 99418026caSFam Zheng IOVAMapping *mappings; 100418026caSFam Zheng int nr_mappings; 101418026caSFam Zheng }; 102418026caSFam Zheng 103418026caSFam Zheng /** 104418026caSFam Zheng * Find group file by PCI device address as specified @device, and return the 105418026caSFam Zheng * path. The returned string is owned by caller and should be g_free'ed later. 106418026caSFam Zheng */ 107418026caSFam Zheng static char *sysfs_find_group_file(const char *device, Error **errp) 108418026caSFam Zheng { 109418026caSFam Zheng char *sysfs_link; 110418026caSFam Zheng char *sysfs_group; 111418026caSFam Zheng char *p; 112418026caSFam Zheng char *path = NULL; 113418026caSFam Zheng 114418026caSFam Zheng sysfs_link = g_strdup_printf("/sys/bus/pci/devices/%s/iommu_group", device); 11578d8c99eSPaolo Bonzini sysfs_group = g_malloc0(PATH_MAX); 116418026caSFam Zheng if (readlink(sysfs_link, sysfs_group, PATH_MAX - 1) == -1) { 117418026caSFam Zheng error_setg_errno(errp, errno, "Failed to find iommu group sysfs path"); 118418026caSFam Zheng goto out; 119418026caSFam Zheng } 120418026caSFam Zheng p = strrchr(sysfs_group, '/'); 121418026caSFam Zheng if (!p) { 122418026caSFam Zheng error_setg(errp, "Failed to find iommu group number"); 123418026caSFam Zheng goto out; 124418026caSFam Zheng } 125418026caSFam Zheng 126418026caSFam Zheng path = g_strdup_printf("/dev/vfio/%s", p + 1); 127418026caSFam Zheng out: 128418026caSFam Zheng g_free(sysfs_link); 129418026caSFam Zheng g_free(sysfs_group); 130418026caSFam Zheng return path; 131418026caSFam Zheng } 132418026caSFam Zheng 133418026caSFam Zheng static inline void assert_bar_index_valid(QEMUVFIOState *s, int index) 134418026caSFam Zheng { 135418026caSFam Zheng assert(index >= 0 && index < ARRAY_SIZE(s->bar_region_info)); 136418026caSFam Zheng } 137418026caSFam Zheng 138418026caSFam Zheng static int qemu_vfio_pci_init_bar(QEMUVFIOState *s, int index, Error **errp) 139418026caSFam Zheng { 140df058222SPhilippe Mathieu-Daudé g_autofree char *barname = NULL; 141418026caSFam Zheng assert_bar_index_valid(s, index); 142418026caSFam Zheng s->bar_region_info[index] = (struct vfio_region_info) { 143418026caSFam Zheng .index = VFIO_PCI_BAR0_REGION_INDEX + index, 144418026caSFam Zheng .argsz = sizeof(struct vfio_region_info), 145418026caSFam Zheng }; 146418026caSFam Zheng if (ioctl(s->device, VFIO_DEVICE_GET_REGION_INFO, &s->bar_region_info[index])) { 147418026caSFam Zheng error_setg_errno(errp, errno, "Failed to get BAR region info"); 148418026caSFam Zheng return -errno; 149418026caSFam Zheng } 150df058222SPhilippe Mathieu-Daudé barname = g_strdup_printf("bar[%d]", index); 151df058222SPhilippe Mathieu-Daudé trace_qemu_vfio_region_info(barname, s->bar_region_info[index].offset, 152df058222SPhilippe Mathieu-Daudé s->bar_region_info[index].size, 153df058222SPhilippe Mathieu-Daudé s->bar_region_info[index].cap_offset); 154418026caSFam Zheng 155418026caSFam Zheng return 0; 156418026caSFam Zheng } 157418026caSFam Zheng 158418026caSFam Zheng /** 159418026caSFam Zheng * Map a PCI bar area. 160418026caSFam Zheng */ 161418026caSFam Zheng void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index, 162b02c01a5SPhilippe Mathieu-Daudé uint64_t offset, uint64_t size, int prot, 163418026caSFam Zheng Error **errp) 164418026caSFam Zheng { 165418026caSFam Zheng void *p; 1668e3b0cbbSMarc-André Lureau assert(QEMU_IS_ALIGNED(offset, qemu_real_host_page_size())); 167418026caSFam Zheng assert_bar_index_valid(s, index); 168418026caSFam Zheng p = mmap(NULL, MIN(size, s->bar_region_info[index].size - offset), 169b02c01a5SPhilippe Mathieu-Daudé prot, MAP_SHARED, 170418026caSFam Zheng s->device, s->bar_region_info[index].offset + offset); 1712817fbceSPhilippe Mathieu-Daudé trace_qemu_vfio_pci_map_bar(index, s->bar_region_info[index].offset , 1722817fbceSPhilippe Mathieu-Daudé size, offset, p); 173418026caSFam Zheng if (p == MAP_FAILED) { 174418026caSFam Zheng error_setg_errno(errp, errno, "Failed to map BAR region"); 175418026caSFam Zheng p = NULL; 176418026caSFam Zheng } 177418026caSFam Zheng return p; 178418026caSFam Zheng } 179418026caSFam Zheng 180418026caSFam Zheng /** 181418026caSFam Zheng * Unmap a PCI bar area. 182418026caSFam Zheng */ 183418026caSFam Zheng void qemu_vfio_pci_unmap_bar(QEMUVFIOState *s, int index, void *bar, 184418026caSFam Zheng uint64_t offset, uint64_t size) 185418026caSFam Zheng { 186418026caSFam Zheng if (bar) { 187418026caSFam Zheng munmap(bar, MIN(size, s->bar_region_info[index].size - offset)); 188418026caSFam Zheng } 189418026caSFam Zheng } 190418026caSFam Zheng 191418026caSFam Zheng /** 192a6da793aSPhilippe Mathieu-Daudé * Initialize device IRQ with @irq_type and register an event notifier. 193418026caSFam Zheng */ 194418026caSFam Zheng int qemu_vfio_pci_init_irq(QEMUVFIOState *s, EventNotifier *e, 195418026caSFam Zheng int irq_type, Error **errp) 196418026caSFam Zheng { 197418026caSFam Zheng int r; 198418026caSFam Zheng struct vfio_irq_set *irq_set; 199418026caSFam Zheng size_t irq_set_size; 200418026caSFam Zheng struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) }; 201418026caSFam Zheng 202418026caSFam Zheng irq_info.index = irq_type; 203418026caSFam Zheng if (ioctl(s->device, VFIO_DEVICE_GET_IRQ_INFO, &irq_info)) { 204418026caSFam Zheng error_setg_errno(errp, errno, "Failed to get device interrupt info"); 205418026caSFam Zheng return -errno; 206418026caSFam Zheng } 207418026caSFam Zheng if (!(irq_info.flags & VFIO_IRQ_INFO_EVENTFD)) { 208418026caSFam Zheng error_setg(errp, "Device interrupt doesn't support eventfd"); 209418026caSFam Zheng return -EINVAL; 210418026caSFam Zheng } 211418026caSFam Zheng 212418026caSFam Zheng irq_set_size = sizeof(*irq_set) + sizeof(int); 213418026caSFam Zheng irq_set = g_malloc0(irq_set_size); 214418026caSFam Zheng 215418026caSFam Zheng /* Get to a known IRQ state */ 216418026caSFam Zheng *irq_set = (struct vfio_irq_set) { 217418026caSFam Zheng .argsz = irq_set_size, 218418026caSFam Zheng .flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER, 219418026caSFam Zheng .index = irq_info.index, 220418026caSFam Zheng .start = 0, 221418026caSFam Zheng .count = 1, 222418026caSFam Zheng }; 223418026caSFam Zheng 224418026caSFam Zheng *(int *)&irq_set->data = event_notifier_get_fd(e); 225418026caSFam Zheng r = ioctl(s->device, VFIO_DEVICE_SET_IRQS, irq_set); 226418026caSFam Zheng g_free(irq_set); 227418026caSFam Zheng if (r) { 228418026caSFam Zheng error_setg_errno(errp, errno, "Failed to setup device interrupt"); 229418026caSFam Zheng return -errno; 230418026caSFam Zheng } 231418026caSFam Zheng return 0; 232418026caSFam Zheng } 233418026caSFam Zheng 234418026caSFam Zheng static int qemu_vfio_pci_read_config(QEMUVFIOState *s, void *buf, 235418026caSFam Zheng int size, int ofs) 236418026caSFam Zheng { 237418026caSFam Zheng int ret; 238418026caSFam Zheng 2393d87c2d9SPhilippe Mathieu-Daudé trace_qemu_vfio_pci_read_config(buf, ofs, size, 2403d87c2d9SPhilippe Mathieu-Daudé s->config_region_info.offset, 2413d87c2d9SPhilippe Mathieu-Daudé s->config_region_info.size); 2423d87c2d9SPhilippe Mathieu-Daudé assert(QEMU_IS_ALIGNED(s->config_region_info.offset + ofs, size)); 243*37b0b24eSNikita Ivanov ret = RETRY_ON_EINTR( 244*37b0b24eSNikita Ivanov pread(s->device, buf, size, s->config_region_info.offset + ofs) 245*37b0b24eSNikita Ivanov ); 246418026caSFam Zheng return ret == size ? 0 : -errno; 247418026caSFam Zheng } 248418026caSFam Zheng 249418026caSFam Zheng static int qemu_vfio_pci_write_config(QEMUVFIOState *s, void *buf, int size, int ofs) 250418026caSFam Zheng { 251418026caSFam Zheng int ret; 252418026caSFam Zheng 2533d87c2d9SPhilippe Mathieu-Daudé trace_qemu_vfio_pci_write_config(buf, ofs, size, 2543d87c2d9SPhilippe Mathieu-Daudé s->config_region_info.offset, 2553d87c2d9SPhilippe Mathieu-Daudé s->config_region_info.size); 2563d87c2d9SPhilippe Mathieu-Daudé assert(QEMU_IS_ALIGNED(s->config_region_info.offset + ofs, size)); 257*37b0b24eSNikita Ivanov ret = RETRY_ON_EINTR( 258*37b0b24eSNikita Ivanov pwrite(s->device, buf, size, s->config_region_info.offset + ofs) 259*37b0b24eSNikita Ivanov ); 260418026caSFam Zheng return ret == size ? 0 : -errno; 261418026caSFam Zheng } 262418026caSFam Zheng 2634487d420SEric Auger static void collect_usable_iova_ranges(QEMUVFIOState *s, void *buf) 2644487d420SEric Auger { 2654487d420SEric Auger struct vfio_iommu_type1_info *info = (struct vfio_iommu_type1_info *)buf; 2664487d420SEric Auger struct vfio_info_cap_header *cap = (void *)buf + info->cap_offset; 2674487d420SEric Auger struct vfio_iommu_type1_info_cap_iova_range *cap_iova_range; 2684487d420SEric Auger int i; 2694487d420SEric Auger 2704487d420SEric Auger while (cap->id != VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE) { 2714487d420SEric Auger if (!cap->next) { 2724487d420SEric Auger return; 2734487d420SEric Auger } 2743d558330SMarkus Armbruster cap = buf + cap->next; 2754487d420SEric Auger } 2764487d420SEric Auger 2774487d420SEric Auger cap_iova_range = (struct vfio_iommu_type1_info_cap_iova_range *)cap; 2784487d420SEric Auger 2794487d420SEric Auger s->nb_iova_ranges = cap_iova_range->nr_iovas; 2804487d420SEric Auger if (s->nb_iova_ranges > 1) { 2814487d420SEric Auger s->usable_iova_ranges = 282b21e2380SMarkus Armbruster g_renew(struct IOVARange, s->usable_iova_ranges, 283b21e2380SMarkus Armbruster s->nb_iova_ranges); 2844487d420SEric Auger } 2854487d420SEric Auger 2864487d420SEric Auger for (i = 0; i < s->nb_iova_ranges; i++) { 2874487d420SEric Auger s->usable_iova_ranges[i].start = cap_iova_range->iova_ranges[i].start; 2884487d420SEric Auger s->usable_iova_ranges[i].end = cap_iova_range->iova_ranges[i].end; 2894487d420SEric Auger } 2904487d420SEric Auger } 2914487d420SEric Auger 292418026caSFam Zheng static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device, 293418026caSFam Zheng Error **errp) 294418026caSFam Zheng { 295418026caSFam Zheng int ret; 296418026caSFam Zheng int i; 297418026caSFam Zheng uint16_t pci_cmd; 298418026caSFam Zheng struct vfio_group_status group_status = { .argsz = sizeof(group_status) }; 2994487d420SEric Auger struct vfio_iommu_type1_info *iommu_info = NULL; 3004487d420SEric Auger size_t iommu_info_size = sizeof(*iommu_info); 301418026caSFam Zheng struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; 302418026caSFam Zheng char *group_file = NULL; 303418026caSFam Zheng 3044487d420SEric Auger s->usable_iova_ranges = NULL; 3054487d420SEric Auger 306418026caSFam Zheng /* Create a new container */ 307418026caSFam Zheng s->container = open("/dev/vfio/vfio", O_RDWR); 308418026caSFam Zheng 309418026caSFam Zheng if (s->container == -1) { 310418026caSFam Zheng error_setg_errno(errp, errno, "Failed to open /dev/vfio/vfio"); 311418026caSFam Zheng return -errno; 312418026caSFam Zheng } 313418026caSFam Zheng if (ioctl(s->container, VFIO_GET_API_VERSION) != VFIO_API_VERSION) { 314418026caSFam Zheng error_setg(errp, "Invalid VFIO version"); 315418026caSFam Zheng ret = -EINVAL; 316418026caSFam Zheng goto fail_container; 317418026caSFam Zheng } 318418026caSFam Zheng 319418026caSFam Zheng if (!ioctl(s->container, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) { 320a4bc212aSPhilippe Mathieu-Daudé error_setg_errno(errp, errno, "VFIO IOMMU Type1 is not supported"); 321418026caSFam Zheng ret = -EINVAL; 322418026caSFam Zheng goto fail_container; 323418026caSFam Zheng } 324418026caSFam Zheng 325418026caSFam Zheng /* Open the group */ 326418026caSFam Zheng group_file = sysfs_find_group_file(device, errp); 327418026caSFam Zheng if (!group_file) { 328418026caSFam Zheng ret = -EINVAL; 329418026caSFam Zheng goto fail_container; 330418026caSFam Zheng } 331418026caSFam Zheng 332418026caSFam Zheng s->group = open(group_file, O_RDWR); 333418026caSFam Zheng if (s->group == -1) { 334418026caSFam Zheng error_setg_errno(errp, errno, "Failed to open VFIO group file: %s", 335418026caSFam Zheng group_file); 336418026caSFam Zheng g_free(group_file); 337418026caSFam Zheng ret = -errno; 338418026caSFam Zheng goto fail_container; 339418026caSFam Zheng } 340418026caSFam Zheng g_free(group_file); 341418026caSFam Zheng 342418026caSFam Zheng /* Test the group is viable and available */ 343418026caSFam Zheng if (ioctl(s->group, VFIO_GROUP_GET_STATUS, &group_status)) { 344418026caSFam Zheng error_setg_errno(errp, errno, "Failed to get VFIO group status"); 345418026caSFam Zheng ret = -errno; 346418026caSFam Zheng goto fail; 347418026caSFam Zheng } 348418026caSFam Zheng 349418026caSFam Zheng if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) { 350418026caSFam Zheng error_setg(errp, "VFIO group is not viable"); 351418026caSFam Zheng ret = -EINVAL; 352418026caSFam Zheng goto fail; 353418026caSFam Zheng } 354418026caSFam Zheng 355418026caSFam Zheng /* Add the group to the container */ 356418026caSFam Zheng if (ioctl(s->group, VFIO_GROUP_SET_CONTAINER, &s->container)) { 357418026caSFam Zheng error_setg_errno(errp, errno, "Failed to add group to VFIO container"); 358418026caSFam Zheng ret = -errno; 359418026caSFam Zheng goto fail; 360418026caSFam Zheng } 361418026caSFam Zheng 362418026caSFam Zheng /* Enable the IOMMU model we want */ 363418026caSFam Zheng if (ioctl(s->container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU)) { 364418026caSFam Zheng error_setg_errno(errp, errno, "Failed to set VFIO IOMMU type"); 365418026caSFam Zheng ret = -errno; 366418026caSFam Zheng goto fail; 367418026caSFam Zheng } 368418026caSFam Zheng 3694487d420SEric Auger iommu_info = g_malloc0(iommu_info_size); 3704487d420SEric Auger iommu_info->argsz = iommu_info_size; 3714487d420SEric Auger 372418026caSFam Zheng /* Get additional IOMMU info */ 3734487d420SEric Auger if (ioctl(s->container, VFIO_IOMMU_GET_INFO, iommu_info)) { 374418026caSFam Zheng error_setg_errno(errp, errno, "Failed to get IOMMU info"); 375418026caSFam Zheng ret = -errno; 376418026caSFam Zheng goto fail; 377418026caSFam Zheng } 378418026caSFam Zheng 3794487d420SEric Auger /* 3804487d420SEric Auger * if the kernel does not report usable IOVA regions, choose 3814487d420SEric Auger * the legacy [QEMU_VFIO_IOVA_MIN, QEMU_VFIO_IOVA_MAX -1] region 3824487d420SEric Auger */ 3834487d420SEric Auger s->nb_iova_ranges = 1; 3844487d420SEric Auger s->usable_iova_ranges = g_new0(struct IOVARange, 1); 3854487d420SEric Auger s->usable_iova_ranges[0].start = QEMU_VFIO_IOVA_MIN; 3864487d420SEric Auger s->usable_iova_ranges[0].end = QEMU_VFIO_IOVA_MAX - 1; 3874487d420SEric Auger 3884487d420SEric Auger if (iommu_info->argsz > iommu_info_size) { 3894487d420SEric Auger iommu_info_size = iommu_info->argsz; 3904487d420SEric Auger iommu_info = g_realloc(iommu_info, iommu_info_size); 3914487d420SEric Auger if (ioctl(s->container, VFIO_IOMMU_GET_INFO, iommu_info)) { 3924487d420SEric Auger ret = -errno; 3934487d420SEric Auger goto fail; 3944487d420SEric Auger } 3954487d420SEric Auger collect_usable_iova_ranges(s, iommu_info); 3964487d420SEric Auger } 3974487d420SEric Auger 398418026caSFam Zheng s->device = ioctl(s->group, VFIO_GROUP_GET_DEVICE_FD, device); 399418026caSFam Zheng 400418026caSFam Zheng if (s->device < 0) { 401418026caSFam Zheng error_setg_errno(errp, errno, "Failed to get device fd"); 402418026caSFam Zheng ret = -errno; 403418026caSFam Zheng goto fail; 404418026caSFam Zheng } 405418026caSFam Zheng 406418026caSFam Zheng /* Test and setup the device */ 407418026caSFam Zheng if (ioctl(s->device, VFIO_DEVICE_GET_INFO, &device_info)) { 408418026caSFam Zheng error_setg_errno(errp, errno, "Failed to get device info"); 409418026caSFam Zheng ret = -errno; 410418026caSFam Zheng goto fail; 411418026caSFam Zheng } 412418026caSFam Zheng 413418026caSFam Zheng if (device_info.num_regions < VFIO_PCI_CONFIG_REGION_INDEX) { 414418026caSFam Zheng error_setg(errp, "Invalid device regions"); 415418026caSFam Zheng ret = -EINVAL; 416418026caSFam Zheng goto fail; 417418026caSFam Zheng } 418418026caSFam Zheng 419418026caSFam Zheng s->config_region_info = (struct vfio_region_info) { 420418026caSFam Zheng .index = VFIO_PCI_CONFIG_REGION_INDEX, 421418026caSFam Zheng .argsz = sizeof(struct vfio_region_info), 422418026caSFam Zheng }; 423418026caSFam Zheng if (ioctl(s->device, VFIO_DEVICE_GET_REGION_INFO, &s->config_region_info)) { 424418026caSFam Zheng error_setg_errno(errp, errno, "Failed to get config region info"); 425418026caSFam Zheng ret = -errno; 426418026caSFam Zheng goto fail; 427418026caSFam Zheng } 428df058222SPhilippe Mathieu-Daudé trace_qemu_vfio_region_info("config", s->config_region_info.offset, 429df058222SPhilippe Mathieu-Daudé s->config_region_info.size, 430df058222SPhilippe Mathieu-Daudé s->config_region_info.cap_offset); 431418026caSFam Zheng 4329e722ebcSLi Qiang for (i = 0; i < ARRAY_SIZE(s->bar_region_info); i++) { 433418026caSFam Zheng ret = qemu_vfio_pci_init_bar(s, i, errp); 434418026caSFam Zheng if (ret) { 435418026caSFam Zheng goto fail; 436418026caSFam Zheng } 437418026caSFam Zheng } 438418026caSFam Zheng 439418026caSFam Zheng /* Enable bus master */ 440418026caSFam Zheng ret = qemu_vfio_pci_read_config(s, &pci_cmd, sizeof(pci_cmd), PCI_COMMAND); 441418026caSFam Zheng if (ret) { 442418026caSFam Zheng goto fail; 443418026caSFam Zheng } 444418026caSFam Zheng pci_cmd |= PCI_COMMAND_MASTER; 445418026caSFam Zheng ret = qemu_vfio_pci_write_config(s, &pci_cmd, sizeof(pci_cmd), PCI_COMMAND); 446418026caSFam Zheng if (ret) { 447418026caSFam Zheng goto fail; 448418026caSFam Zheng } 4494487d420SEric Auger g_free(iommu_info); 450418026caSFam Zheng return 0; 451418026caSFam Zheng fail: 4524487d420SEric Auger g_free(s->usable_iova_ranges); 4534487d420SEric Auger s->usable_iova_ranges = NULL; 4544487d420SEric Auger s->nb_iova_ranges = 0; 4554487d420SEric Auger g_free(iommu_info); 456418026caSFam Zheng close(s->group); 457418026caSFam Zheng fail_container: 458418026caSFam Zheng close(s->container); 459418026caSFam Zheng return ret; 460418026caSFam Zheng } 461418026caSFam Zheng 4628f44304cSDavid Hildenbrand static void qemu_vfio_ram_block_added(RAMBlockNotifier *n, void *host, 4638f44304cSDavid Hildenbrand size_t size, size_t max_size) 464418026caSFam Zheng { 465418026caSFam Zheng QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier); 466521b97cdSPhilippe Mathieu-Daudé Error *local_err = NULL; 467082851a3SDavid Hildenbrand int ret; 468082851a3SDavid Hildenbrand 4698f44304cSDavid Hildenbrand trace_qemu_vfio_ram_block_added(s, host, max_size); 470521b97cdSPhilippe Mathieu-Daudé ret = qemu_vfio_dma_map(s, host, max_size, false, NULL, &local_err); 471082851a3SDavid Hildenbrand if (ret) { 472521b97cdSPhilippe Mathieu-Daudé error_reportf_err(local_err, 473521b97cdSPhilippe Mathieu-Daudé "qemu_vfio_dma_map(%p, %zu) failed: ", 474521b97cdSPhilippe Mathieu-Daudé host, max_size); 475082851a3SDavid Hildenbrand } 476418026caSFam Zheng } 477418026caSFam Zheng 4788f44304cSDavid Hildenbrand static void qemu_vfio_ram_block_removed(RAMBlockNotifier *n, void *host, 4798f44304cSDavid Hildenbrand size_t size, size_t max_size) 480418026caSFam Zheng { 481418026caSFam Zheng QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier); 482418026caSFam Zheng if (host) { 4838f44304cSDavid Hildenbrand trace_qemu_vfio_ram_block_removed(s, host, max_size); 484418026caSFam Zheng qemu_vfio_dma_unmap(s, host); 485418026caSFam Zheng } 486418026caSFam Zheng } 487418026caSFam Zheng 488418026caSFam Zheng static void qemu_vfio_open_common(QEMUVFIOState *s) 489418026caSFam Zheng { 490549b50a3SMarkus Armbruster qemu_mutex_init(&s->lock); 491418026caSFam Zheng s->ram_notifier.ram_block_added = qemu_vfio_ram_block_added; 492418026caSFam Zheng s->ram_notifier.ram_block_removed = qemu_vfio_ram_block_removed; 493418026caSFam Zheng s->low_water_mark = QEMU_VFIO_IOVA_MIN; 494418026caSFam Zheng s->high_water_mark = QEMU_VFIO_IOVA_MAX; 495082851a3SDavid Hildenbrand ram_block_notifier_add(&s->ram_notifier); 496418026caSFam Zheng } 497418026caSFam Zheng 498418026caSFam Zheng /** 499418026caSFam Zheng * Open a PCI device, e.g. "0000:00:01.0". 500418026caSFam Zheng */ 501418026caSFam Zheng QEMUVFIOState *qemu_vfio_open_pci(const char *device, Error **errp) 502418026caSFam Zheng { 503418026caSFam Zheng int r; 504418026caSFam Zheng QEMUVFIOState *s = g_new0(QEMUVFIOState, 1); 505418026caSFam Zheng 506b430b513SDavid Hildenbrand /* 507b430b513SDavid Hildenbrand * VFIO may pin all memory inside mappings, resulting it in pinning 508b430b513SDavid Hildenbrand * all memory inside RAM blocks unconditionally. 509b430b513SDavid Hildenbrand */ 510b430b513SDavid Hildenbrand r = ram_block_discard_disable(true); 511b430b513SDavid Hildenbrand if (r) { 512b430b513SDavid Hildenbrand error_setg_errno(errp, -r, "Cannot set discarding of RAM broken"); 513b430b513SDavid Hildenbrand g_free(s); 514b430b513SDavid Hildenbrand return NULL; 515b430b513SDavid Hildenbrand } 516b430b513SDavid Hildenbrand 517418026caSFam Zheng r = qemu_vfio_init_pci(s, device, errp); 518418026caSFam Zheng if (r) { 519b430b513SDavid Hildenbrand ram_block_discard_disable(false); 520418026caSFam Zheng g_free(s); 521418026caSFam Zheng return NULL; 522418026caSFam Zheng } 523418026caSFam Zheng qemu_vfio_open_common(s); 524418026caSFam Zheng return s; 525418026caSFam Zheng } 526418026caSFam Zheng 527418026caSFam Zheng static void qemu_vfio_dump_mappings(QEMUVFIOState *s) 528418026caSFam Zheng { 529f6b8104dSPhilippe Mathieu-Daudé for (int i = 0; i < s->nr_mappings; ++i) { 530f6b8104dSPhilippe Mathieu-Daudé trace_qemu_vfio_dump_mapping(s->mappings[i].host, 531f6b8104dSPhilippe Mathieu-Daudé s->mappings[i].iova, 532f6b8104dSPhilippe Mathieu-Daudé s->mappings[i].size); 533418026caSFam Zheng } 534418026caSFam Zheng } 535418026caSFam Zheng 536418026caSFam Zheng /** 537418026caSFam Zheng * Find the mapping entry that contains [host, host + size) and set @index to 538418026caSFam Zheng * the position. If no entry contains it, @index is the position _after_ which 539418026caSFam Zheng * to insert the new mapping. IOW, it is the index of the largest element that 540418026caSFam Zheng * is smaller than @host, or -1 if no entry is. 541418026caSFam Zheng */ 542418026caSFam Zheng static IOVAMapping *qemu_vfio_find_mapping(QEMUVFIOState *s, void *host, 543418026caSFam Zheng int *index) 544418026caSFam Zheng { 545418026caSFam Zheng IOVAMapping *p = s->mappings; 546418026caSFam Zheng IOVAMapping *q = p ? p + s->nr_mappings - 1 : NULL; 547418026caSFam Zheng IOVAMapping *mid; 548418026caSFam Zheng trace_qemu_vfio_find_mapping(s, host); 549418026caSFam Zheng if (!p) { 550418026caSFam Zheng *index = -1; 551418026caSFam Zheng return NULL; 552418026caSFam Zheng } 553418026caSFam Zheng while (true) { 554418026caSFam Zheng mid = p + (q - p) / 2; 555418026caSFam Zheng if (mid == p) { 556418026caSFam Zheng break; 557418026caSFam Zheng } 558418026caSFam Zheng if (mid->host > host) { 559418026caSFam Zheng q = mid; 560418026caSFam Zheng } else if (mid->host < host) { 561418026caSFam Zheng p = mid; 562418026caSFam Zheng } else { 563418026caSFam Zheng break; 564418026caSFam Zheng } 565418026caSFam Zheng } 566418026caSFam Zheng if (mid->host > host) { 567418026caSFam Zheng mid--; 568418026caSFam Zheng } else if (mid < &s->mappings[s->nr_mappings - 1] 569418026caSFam Zheng && (mid + 1)->host <= host) { 570418026caSFam Zheng mid++; 571418026caSFam Zheng } 572418026caSFam Zheng *index = mid - &s->mappings[0]; 573418026caSFam Zheng if (mid >= &s->mappings[0] && 574418026caSFam Zheng mid->host <= host && mid->host + mid->size > host) { 575418026caSFam Zheng assert(mid < &s->mappings[s->nr_mappings]); 576418026caSFam Zheng return mid; 577418026caSFam Zheng } 578418026caSFam Zheng /* At this point *index + 1 is the right position to insert the new 579418026caSFam Zheng * mapping.*/ 580418026caSFam Zheng return NULL; 581418026caSFam Zheng } 582418026caSFam Zheng 583418026caSFam Zheng /** 584a6da793aSPhilippe Mathieu-Daudé * Allocate IOVA and create a new mapping record and insert it in @s. 585418026caSFam Zheng */ 586418026caSFam Zheng static IOVAMapping *qemu_vfio_add_mapping(QEMUVFIOState *s, 587418026caSFam Zheng void *host, size_t size, 588418026caSFam Zheng int index, uint64_t iova) 589418026caSFam Zheng { 590418026caSFam Zheng int shift; 591418026caSFam Zheng IOVAMapping m = {.host = host, .size = size, .iova = iova}; 592418026caSFam Zheng IOVAMapping *insert; 593418026caSFam Zheng 5948e3b0cbbSMarc-André Lureau assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size())); 5958e3b0cbbSMarc-André Lureau assert(QEMU_IS_ALIGNED(s->low_water_mark, qemu_real_host_page_size())); 5968e3b0cbbSMarc-André Lureau assert(QEMU_IS_ALIGNED(s->high_water_mark, qemu_real_host_page_size())); 597418026caSFam Zheng trace_qemu_vfio_new_mapping(s, host, size, index, iova); 598418026caSFam Zheng 599418026caSFam Zheng assert(index >= 0); 600418026caSFam Zheng s->nr_mappings++; 601d29eb678SOlaf Hering s->mappings = g_renew(IOVAMapping, s->mappings, s->nr_mappings); 602418026caSFam Zheng insert = &s->mappings[index]; 603418026caSFam Zheng shift = s->nr_mappings - index - 1; 604418026caSFam Zheng if (shift) { 605418026caSFam Zheng memmove(insert + 1, insert, shift * sizeof(s->mappings[0])); 606418026caSFam Zheng } 607418026caSFam Zheng *insert = m; 608418026caSFam Zheng return insert; 609418026caSFam Zheng } 610418026caSFam Zheng 611418026caSFam Zheng /* Do the DMA mapping with VFIO. */ 612418026caSFam Zheng static int qemu_vfio_do_mapping(QEMUVFIOState *s, void *host, size_t size, 613f38b376dSPhilippe Mathieu-Daudé uint64_t iova, Error **errp) 614418026caSFam Zheng { 615418026caSFam Zheng struct vfio_iommu_type1_dma_map dma_map = { 616418026caSFam Zheng .argsz = sizeof(dma_map), 617418026caSFam Zheng .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE, 618418026caSFam Zheng .iova = iova, 619418026caSFam Zheng .vaddr = (uintptr_t)host, 620418026caSFam Zheng .size = size, 621418026caSFam Zheng }; 6224c946b22SPhilippe Mathieu-Daudé trace_qemu_vfio_do_mapping(s, host, iova, size); 623418026caSFam Zheng 624418026caSFam Zheng if (ioctl(s->container, VFIO_IOMMU_MAP_DMA, &dma_map)) { 625f38b376dSPhilippe Mathieu-Daudé error_setg_errno(errp, errno, "VFIO_MAP_DMA failed"); 626418026caSFam Zheng return -errno; 627418026caSFam Zheng } 628418026caSFam Zheng return 0; 629418026caSFam Zheng } 630418026caSFam Zheng 631418026caSFam Zheng /** 632418026caSFam Zheng * Undo the DMA mapping from @s with VFIO, and remove from mapping list. 633418026caSFam Zheng */ 634418026caSFam Zheng static void qemu_vfio_undo_mapping(QEMUVFIOState *s, IOVAMapping *mapping, 635418026caSFam Zheng Error **errp) 636418026caSFam Zheng { 637418026caSFam Zheng int index; 638418026caSFam Zheng struct vfio_iommu_type1_dma_unmap unmap = { 639418026caSFam Zheng .argsz = sizeof(unmap), 640418026caSFam Zheng .flags = 0, 641418026caSFam Zheng .iova = mapping->iova, 642418026caSFam Zheng .size = mapping->size, 643418026caSFam Zheng }; 644418026caSFam Zheng 645418026caSFam Zheng index = mapping - s->mappings; 646418026caSFam Zheng assert(mapping->size > 0); 6478e3b0cbbSMarc-André Lureau assert(QEMU_IS_ALIGNED(mapping->size, qemu_real_host_page_size())); 648418026caSFam Zheng assert(index >= 0 && index < s->nr_mappings); 649418026caSFam Zheng if (ioctl(s->container, VFIO_IOMMU_UNMAP_DMA, &unmap)) { 650b09d51c9SMichal Privoznik error_setg_errno(errp, errno, "VFIO_UNMAP_DMA failed"); 651418026caSFam Zheng } 652418026caSFam Zheng memmove(mapping, &s->mappings[index + 1], 653418026caSFam Zheng sizeof(s->mappings[0]) * (s->nr_mappings - index - 1)); 654418026caSFam Zheng s->nr_mappings--; 655d29eb678SOlaf Hering s->mappings = g_renew(IOVAMapping, s->mappings, s->nr_mappings); 656418026caSFam Zheng } 657418026caSFam Zheng 658418026caSFam Zheng /* Check if the mapping list is (ascending) ordered. */ 659418026caSFam Zheng static bool qemu_vfio_verify_mappings(QEMUVFIOState *s) 660418026caSFam Zheng { 661418026caSFam Zheng int i; 662418026caSFam Zheng if (QEMU_VFIO_DEBUG) { 663418026caSFam Zheng for (i = 0; i < s->nr_mappings - 1; ++i) { 664418026caSFam Zheng if (!(s->mappings[i].host < s->mappings[i + 1].host)) { 665cb49dfceSPhilippe Mathieu-Daudé error_report("item %d not sorted!", i); 666418026caSFam Zheng qemu_vfio_dump_mappings(s); 667418026caSFam Zheng return false; 668418026caSFam Zheng } 669418026caSFam Zheng if (!(s->mappings[i].host + s->mappings[i].size <= 670418026caSFam Zheng s->mappings[i + 1].host)) { 671cb49dfceSPhilippe Mathieu-Daudé error_report("item %d overlap with next!", i); 672418026caSFam Zheng qemu_vfio_dump_mappings(s); 673418026caSFam Zheng return false; 674418026caSFam Zheng } 675418026caSFam Zheng } 676418026caSFam Zheng } 677418026caSFam Zheng return true; 678418026caSFam Zheng } 679418026caSFam Zheng 680453095e9SPhilippe Mathieu-Daudé static bool qemu_vfio_find_fixed_iova(QEMUVFIOState *s, size_t size, 681453095e9SPhilippe Mathieu-Daudé uint64_t *iova, Error **errp) 6829ab57411SEric Auger { 6839ab57411SEric Auger int i; 6849ab57411SEric Auger 6859ab57411SEric Auger for (i = 0; i < s->nb_iova_ranges; i++) { 6869ab57411SEric Auger if (s->usable_iova_ranges[i].end < s->low_water_mark) { 6879ab57411SEric Auger continue; 6889ab57411SEric Auger } 6899ab57411SEric Auger s->low_water_mark = 6909ab57411SEric Auger MAX(s->low_water_mark, s->usable_iova_ranges[i].start); 6919ab57411SEric Auger 6929ab57411SEric Auger if (s->usable_iova_ranges[i].end - s->low_water_mark + 1 >= size || 6939ab57411SEric Auger s->usable_iova_ranges[i].end - s->low_water_mark + 1 == 0) { 6949ab57411SEric Auger *iova = s->low_water_mark; 6959ab57411SEric Auger s->low_water_mark += size; 696453095e9SPhilippe Mathieu-Daudé return true; 6979ab57411SEric Auger } 6989ab57411SEric Auger } 699453095e9SPhilippe Mathieu-Daudé error_setg(errp, "fixed iova range not found"); 700453095e9SPhilippe Mathieu-Daudé 701453095e9SPhilippe Mathieu-Daudé return false; 7029ab57411SEric Auger } 7039ab57411SEric Auger 704453095e9SPhilippe Mathieu-Daudé static bool qemu_vfio_find_temp_iova(QEMUVFIOState *s, size_t size, 705453095e9SPhilippe Mathieu-Daudé uint64_t *iova, Error **errp) 7069ab57411SEric Auger { 7079ab57411SEric Auger int i; 7089ab57411SEric Auger 7099ab57411SEric Auger for (i = s->nb_iova_ranges - 1; i >= 0; i--) { 7109ab57411SEric Auger if (s->usable_iova_ranges[i].start > s->high_water_mark) { 7119ab57411SEric Auger continue; 7129ab57411SEric Auger } 7139ab57411SEric Auger s->high_water_mark = 7149ab57411SEric Auger MIN(s->high_water_mark, s->usable_iova_ranges[i].end + 1); 7159ab57411SEric Auger 7169ab57411SEric Auger if (s->high_water_mark - s->usable_iova_ranges[i].start + 1 >= size || 7179ab57411SEric Auger s->high_water_mark - s->usable_iova_ranges[i].start + 1 == 0) { 7189ab57411SEric Auger *iova = s->high_water_mark - size; 7199ab57411SEric Auger s->high_water_mark = *iova; 720453095e9SPhilippe Mathieu-Daudé return true; 7219ab57411SEric Auger } 7229ab57411SEric Auger } 723453095e9SPhilippe Mathieu-Daudé error_setg(errp, "temporary iova range not found"); 724453095e9SPhilippe Mathieu-Daudé 725453095e9SPhilippe Mathieu-Daudé return false; 7269ab57411SEric Auger } 7279ab57411SEric Auger 72871e3038cSPhilippe Mathieu-Daudé /** 72971e3038cSPhilippe Mathieu-Daudé * qemu_vfio_water_mark_reached: 73071e3038cSPhilippe Mathieu-Daudé * 73171e3038cSPhilippe Mathieu-Daudé * Returns %true if high watermark has been reached, %false otherwise. 73271e3038cSPhilippe Mathieu-Daudé */ 73371e3038cSPhilippe Mathieu-Daudé static bool qemu_vfio_water_mark_reached(QEMUVFIOState *s, size_t size, 73471e3038cSPhilippe Mathieu-Daudé Error **errp) 73571e3038cSPhilippe Mathieu-Daudé { 73671e3038cSPhilippe Mathieu-Daudé if (s->high_water_mark - s->low_water_mark + 1 < size) { 73771e3038cSPhilippe Mathieu-Daudé error_setg(errp, "iova exhausted (water mark reached)"); 73871e3038cSPhilippe Mathieu-Daudé return true; 73971e3038cSPhilippe Mathieu-Daudé } 74071e3038cSPhilippe Mathieu-Daudé return false; 74171e3038cSPhilippe Mathieu-Daudé } 74271e3038cSPhilippe Mathieu-Daudé 743418026caSFam Zheng /* Map [host, host + size) area into a contiguous IOVA address space, and store 744418026caSFam Zheng * the result in @iova if not NULL. The caller need to make sure the area is 745418026caSFam Zheng * aligned to page size, and mustn't overlap with existing mapping areas (split 746418026caSFam Zheng * mapping status within this area is not allowed). 747418026caSFam Zheng */ 748418026caSFam Zheng int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size, 749521b97cdSPhilippe Mathieu-Daudé bool temporary, uint64_t *iova, Error **errp) 750418026caSFam Zheng { 751418026caSFam Zheng int index; 752418026caSFam Zheng IOVAMapping *mapping; 753418026caSFam Zheng uint64_t iova0; 754418026caSFam Zheng 7558e3b0cbbSMarc-André Lureau assert(QEMU_PTR_IS_ALIGNED(host, qemu_real_host_page_size())); 7568e3b0cbbSMarc-André Lureau assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size())); 757418026caSFam Zheng trace_qemu_vfio_dma_map(s, host, size, temporary, iova); 758a990858bSPhilippe Mathieu-Daudé QEMU_LOCK_GUARD(&s->lock); 759418026caSFam Zheng mapping = qemu_vfio_find_mapping(s, host, &index); 760418026caSFam Zheng if (mapping) { 761418026caSFam Zheng iova0 = mapping->iova + ((uint8_t *)host - (uint8_t *)mapping->host); 762418026caSFam Zheng } else { 7635a4f1626SPhilippe Mathieu-Daudé int ret; 7645a4f1626SPhilippe Mathieu-Daudé 76571e3038cSPhilippe Mathieu-Daudé if (qemu_vfio_water_mark_reached(s, size, errp)) { 7665a4f1626SPhilippe Mathieu-Daudé return -ENOMEM; 767418026caSFam Zheng } 768418026caSFam Zheng if (!temporary) { 769453095e9SPhilippe Mathieu-Daudé if (!qemu_vfio_find_fixed_iova(s, size, &iova0, errp)) { 7705a4f1626SPhilippe Mathieu-Daudé return -ENOMEM; 7719ab57411SEric Auger } 7729ab57411SEric Auger 773418026caSFam Zheng mapping = qemu_vfio_add_mapping(s, host, size, index + 1, iova0); 774418026caSFam Zheng assert(qemu_vfio_verify_mappings(s)); 775f38b376dSPhilippe Mathieu-Daudé ret = qemu_vfio_do_mapping(s, host, size, iova0, errp); 7765a4f1626SPhilippe Mathieu-Daudé if (ret < 0) { 777418026caSFam Zheng qemu_vfio_undo_mapping(s, mapping, NULL); 7785a4f1626SPhilippe Mathieu-Daudé return ret; 779418026caSFam Zheng } 780418026caSFam Zheng qemu_vfio_dump_mappings(s); 781418026caSFam Zheng } else { 782453095e9SPhilippe Mathieu-Daudé if (!qemu_vfio_find_temp_iova(s, size, &iova0, errp)) { 7835a4f1626SPhilippe Mathieu-Daudé return -ENOMEM; 7849ab57411SEric Auger } 785f38b376dSPhilippe Mathieu-Daudé ret = qemu_vfio_do_mapping(s, host, size, iova0, errp); 7865a4f1626SPhilippe Mathieu-Daudé if (ret < 0) { 7875a4f1626SPhilippe Mathieu-Daudé return ret; 788418026caSFam Zheng } 789418026caSFam Zheng } 790418026caSFam Zheng } 7914c946b22SPhilippe Mathieu-Daudé trace_qemu_vfio_dma_mapped(s, host, iova0, size); 792418026caSFam Zheng if (iova) { 793418026caSFam Zheng *iova = iova0; 794418026caSFam Zheng } 7955a4f1626SPhilippe Mathieu-Daudé return 0; 796418026caSFam Zheng } 797418026caSFam Zheng 798418026caSFam Zheng /* Reset the high watermark and free all "temporary" mappings. */ 799418026caSFam Zheng int qemu_vfio_dma_reset_temporary(QEMUVFIOState *s) 800418026caSFam Zheng { 801418026caSFam Zheng struct vfio_iommu_type1_dma_unmap unmap = { 802418026caSFam Zheng .argsz = sizeof(unmap), 803418026caSFam Zheng .flags = 0, 804418026caSFam Zheng .iova = s->high_water_mark, 805418026caSFam Zheng .size = QEMU_VFIO_IOVA_MAX - s->high_water_mark, 806418026caSFam Zheng }; 807418026caSFam Zheng trace_qemu_vfio_dma_reset_temporary(s); 8086e8a355dSDaniel Brodsky QEMU_LOCK_GUARD(&s->lock); 809418026caSFam Zheng if (ioctl(s->container, VFIO_IOMMU_UNMAP_DMA, &unmap)) { 810b09d51c9SMichal Privoznik error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno)); 811418026caSFam Zheng return -errno; 812418026caSFam Zheng } 813418026caSFam Zheng s->high_water_mark = QEMU_VFIO_IOVA_MAX; 814418026caSFam Zheng return 0; 815418026caSFam Zheng } 816418026caSFam Zheng 817418026caSFam Zheng /* Unmapping the whole area that was previously mapped with 818418026caSFam Zheng * qemu_vfio_dma_map(). */ 819418026caSFam Zheng void qemu_vfio_dma_unmap(QEMUVFIOState *s, void *host) 820418026caSFam Zheng { 821418026caSFam Zheng int index = 0; 822418026caSFam Zheng IOVAMapping *m; 823418026caSFam Zheng 824418026caSFam Zheng if (!host) { 825418026caSFam Zheng return; 826418026caSFam Zheng } 827418026caSFam Zheng 828418026caSFam Zheng trace_qemu_vfio_dma_unmap(s, host); 829a990858bSPhilippe Mathieu-Daudé QEMU_LOCK_GUARD(&s->lock); 830418026caSFam Zheng m = qemu_vfio_find_mapping(s, host, &index); 831418026caSFam Zheng if (!m) { 832a990858bSPhilippe Mathieu-Daudé return; 833418026caSFam Zheng } 834418026caSFam Zheng qemu_vfio_undo_mapping(s, m, NULL); 835418026caSFam Zheng } 836418026caSFam Zheng 837418026caSFam Zheng static void qemu_vfio_reset(QEMUVFIOState *s) 838418026caSFam Zheng { 839418026caSFam Zheng ioctl(s->device, VFIO_DEVICE_RESET); 840418026caSFam Zheng } 841418026caSFam Zheng 842418026caSFam Zheng /* Close and free the VFIO resources. */ 843418026caSFam Zheng void qemu_vfio_close(QEMUVFIOState *s) 844418026caSFam Zheng { 845418026caSFam Zheng int i; 846418026caSFam Zheng 847418026caSFam Zheng if (!s) { 848418026caSFam Zheng return; 849418026caSFam Zheng } 8501f0fea38SStefan Hajnoczi 8511f0fea38SStefan Hajnoczi ram_block_notifier_remove(&s->ram_notifier); 8521f0fea38SStefan Hajnoczi 853418026caSFam Zheng for (i = 0; i < s->nr_mappings; ++i) { 854418026caSFam Zheng qemu_vfio_undo_mapping(s, &s->mappings[i], NULL); 855418026caSFam Zheng } 8561f0fea38SStefan Hajnoczi 8574487d420SEric Auger g_free(s->usable_iova_ranges); 8584487d420SEric Auger s->nb_iova_ranges = 0; 859418026caSFam Zheng qemu_vfio_reset(s); 860418026caSFam Zheng close(s->device); 861418026caSFam Zheng close(s->group); 862418026caSFam Zheng close(s->container); 863b430b513SDavid Hildenbrand ram_block_discard_disable(false); 864418026caSFam Zheng } 865