xref: /openbmc/qemu/util/vfio-helpers.c (revision a4bc212add9bd9d7a7baaa095f875d9212eb1fc9)
1418026caSFam Zheng /*
2418026caSFam Zheng  * VFIO utility
3418026caSFam Zheng  *
4418026caSFam Zheng  * Copyright 2016 - 2018 Red Hat, Inc.
5418026caSFam Zheng  *
6418026caSFam Zheng  * Authors:
7418026caSFam Zheng  *   Fam Zheng <famz@redhat.com>
8418026caSFam Zheng  *
9418026caSFam Zheng  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10418026caSFam Zheng  * See the COPYING file in the top-level directory.
11418026caSFam Zheng  */
12418026caSFam Zheng 
13418026caSFam Zheng #include "qemu/osdep.h"
14418026caSFam Zheng #include <sys/ioctl.h>
15418026caSFam Zheng #include <linux/vfio.h>
16418026caSFam Zheng #include "qapi/error.h"
17418026caSFam Zheng #include "exec/ramlist.h"
18418026caSFam Zheng #include "exec/cpu-common.h"
19418026caSFam Zheng #include "trace.h"
20418026caSFam Zheng #include "qemu/error-report.h"
21418026caSFam Zheng #include "standard-headers/linux/pci_regs.h"
22418026caSFam Zheng #include "qemu/event_notifier.h"
23418026caSFam Zheng #include "qemu/vfio-helpers.h"
246e8a355dSDaniel Brodsky #include "qemu/lockable.h"
25418026caSFam Zheng #include "trace.h"
26418026caSFam Zheng 
27418026caSFam Zheng #define QEMU_VFIO_DEBUG 0
28418026caSFam Zheng 
29418026caSFam Zheng #define QEMU_VFIO_IOVA_MIN 0x10000ULL
30418026caSFam Zheng /* XXX: Once VFIO exposes the iova bit width in the IOMMU capability interface,
31418026caSFam Zheng  * we can use a runtime limit; alternatively it's also possible to do platform
32418026caSFam Zheng  * specific detection by reading sysfs entries. Until then, 39 is a safe bet.
33418026caSFam Zheng  **/
34418026caSFam Zheng #define QEMU_VFIO_IOVA_MAX (1ULL << 39)
35418026caSFam Zheng 
36418026caSFam Zheng typedef struct {
37418026caSFam Zheng     /* Page aligned addr. */
38418026caSFam Zheng     void *host;
39418026caSFam Zheng     size_t size;
40418026caSFam Zheng     uint64_t iova;
41418026caSFam Zheng } IOVAMapping;
42418026caSFam Zheng 
434487d420SEric Auger struct IOVARange {
444487d420SEric Auger     uint64_t start;
454487d420SEric Auger     uint64_t end;
464487d420SEric Auger };
474487d420SEric Auger 
48418026caSFam Zheng struct QEMUVFIOState {
49418026caSFam Zheng     QemuMutex lock;
50418026caSFam Zheng 
51418026caSFam Zheng     /* These fields are protected by BQL */
52418026caSFam Zheng     int container;
53418026caSFam Zheng     int group;
54418026caSFam Zheng     int device;
55418026caSFam Zheng     RAMBlockNotifier ram_notifier;
56418026caSFam Zheng     struct vfio_region_info config_region_info, bar_region_info[6];
574487d420SEric Auger     struct IOVARange *usable_iova_ranges;
584487d420SEric Auger     uint8_t nb_iova_ranges;
59418026caSFam Zheng 
60418026caSFam Zheng     /* These fields are protected by @lock */
61418026caSFam Zheng     /* VFIO's IO virtual address space is managed by splitting into a few
62418026caSFam Zheng      * sections:
63418026caSFam Zheng      *
64418026caSFam Zheng      * ---------------       <= 0
65418026caSFam Zheng      * |xxxxxxxxxxxxx|
66418026caSFam Zheng      * |-------------|       <= QEMU_VFIO_IOVA_MIN
67418026caSFam Zheng      * |             |
68418026caSFam Zheng      * |    Fixed    |
69418026caSFam Zheng      * |             |
70418026caSFam Zheng      * |-------------|       <= low_water_mark
71418026caSFam Zheng      * |             |
72418026caSFam Zheng      * |    Free     |
73418026caSFam Zheng      * |             |
74418026caSFam Zheng      * |-------------|       <= high_water_mark
75418026caSFam Zheng      * |             |
76418026caSFam Zheng      * |    Temp     |
77418026caSFam Zheng      * |             |
78418026caSFam Zheng      * |-------------|       <= QEMU_VFIO_IOVA_MAX
79418026caSFam Zheng      * |xxxxxxxxxxxxx|
80418026caSFam Zheng      * |xxxxxxxxxxxxx|
81418026caSFam Zheng      * ---------------
82418026caSFam Zheng      *
83418026caSFam Zheng      * - Addresses lower than QEMU_VFIO_IOVA_MIN are reserved as invalid;
84418026caSFam Zheng      *
85418026caSFam Zheng      * - Fixed mappings of HVAs are assigned "low" IOVAs in the range of
86418026caSFam Zheng      *   [QEMU_VFIO_IOVA_MIN, low_water_mark).  Once allocated they will not be
87418026caSFam Zheng      *   reclaimed - low_water_mark never shrinks;
88418026caSFam Zheng      *
89418026caSFam Zheng      * - IOVAs in range [low_water_mark, high_water_mark) are free;
90418026caSFam Zheng      *
91418026caSFam Zheng      * - IOVAs in range [high_water_mark, QEMU_VFIO_IOVA_MAX) are volatile
92418026caSFam Zheng      *   mappings. At each qemu_vfio_dma_reset_temporary() call, the whole area
93418026caSFam Zheng      *   is recycled. The caller should make sure I/O's depending on these
94418026caSFam Zheng      *   mappings are completed before calling.
95418026caSFam Zheng      **/
96418026caSFam Zheng     uint64_t low_water_mark;
97418026caSFam Zheng     uint64_t high_water_mark;
98418026caSFam Zheng     IOVAMapping *mappings;
99418026caSFam Zheng     int nr_mappings;
100418026caSFam Zheng };
101418026caSFam Zheng 
102418026caSFam Zheng /**
103418026caSFam Zheng  * Find group file by PCI device address as specified @device, and return the
104418026caSFam Zheng  * path. The returned string is owned by caller and should be g_free'ed later.
105418026caSFam Zheng  */
106418026caSFam Zheng static char *sysfs_find_group_file(const char *device, Error **errp)
107418026caSFam Zheng {
108418026caSFam Zheng     char *sysfs_link;
109418026caSFam Zheng     char *sysfs_group;
110418026caSFam Zheng     char *p;
111418026caSFam Zheng     char *path = NULL;
112418026caSFam Zheng 
113418026caSFam Zheng     sysfs_link = g_strdup_printf("/sys/bus/pci/devices/%s/iommu_group", device);
11478d8c99eSPaolo Bonzini     sysfs_group = g_malloc0(PATH_MAX);
115418026caSFam Zheng     if (readlink(sysfs_link, sysfs_group, PATH_MAX - 1) == -1) {
116418026caSFam Zheng         error_setg_errno(errp, errno, "Failed to find iommu group sysfs path");
117418026caSFam Zheng         goto out;
118418026caSFam Zheng     }
119418026caSFam Zheng     p = strrchr(sysfs_group, '/');
120418026caSFam Zheng     if (!p) {
121418026caSFam Zheng         error_setg(errp, "Failed to find iommu group number");
122418026caSFam Zheng         goto out;
123418026caSFam Zheng     }
124418026caSFam Zheng 
125418026caSFam Zheng     path = g_strdup_printf("/dev/vfio/%s", p + 1);
126418026caSFam Zheng out:
127418026caSFam Zheng     g_free(sysfs_link);
128418026caSFam Zheng     g_free(sysfs_group);
129418026caSFam Zheng     return path;
130418026caSFam Zheng }
131418026caSFam Zheng 
132418026caSFam Zheng static inline void assert_bar_index_valid(QEMUVFIOState *s, int index)
133418026caSFam Zheng {
134418026caSFam Zheng     assert(index >= 0 && index < ARRAY_SIZE(s->bar_region_info));
135418026caSFam Zheng }
136418026caSFam Zheng 
137418026caSFam Zheng static int qemu_vfio_pci_init_bar(QEMUVFIOState *s, int index, Error **errp)
138418026caSFam Zheng {
139418026caSFam Zheng     assert_bar_index_valid(s, index);
140418026caSFam Zheng     s->bar_region_info[index] = (struct vfio_region_info) {
141418026caSFam Zheng         .index = VFIO_PCI_BAR0_REGION_INDEX + index,
142418026caSFam Zheng         .argsz = sizeof(struct vfio_region_info),
143418026caSFam Zheng     };
144418026caSFam Zheng     if (ioctl(s->device, VFIO_DEVICE_GET_REGION_INFO, &s->bar_region_info[index])) {
145418026caSFam Zheng         error_setg_errno(errp, errno, "Failed to get BAR region info");
146418026caSFam Zheng         return -errno;
147418026caSFam Zheng     }
148418026caSFam Zheng 
149418026caSFam Zheng     return 0;
150418026caSFam Zheng }
151418026caSFam Zheng 
152418026caSFam Zheng /**
153418026caSFam Zheng  * Map a PCI bar area.
154418026caSFam Zheng  */
155418026caSFam Zheng void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index,
156b02c01a5SPhilippe Mathieu-Daudé                             uint64_t offset, uint64_t size, int prot,
157418026caSFam Zheng                             Error **errp)
158418026caSFam Zheng {
159418026caSFam Zheng     void *p;
160418026caSFam Zheng     assert_bar_index_valid(s, index);
161418026caSFam Zheng     p = mmap(NULL, MIN(size, s->bar_region_info[index].size - offset),
162b02c01a5SPhilippe Mathieu-Daudé              prot, MAP_SHARED,
163418026caSFam Zheng              s->device, s->bar_region_info[index].offset + offset);
164418026caSFam Zheng     if (p == MAP_FAILED) {
165418026caSFam Zheng         error_setg_errno(errp, errno, "Failed to map BAR region");
166418026caSFam Zheng         p = NULL;
167418026caSFam Zheng     }
168418026caSFam Zheng     return p;
169418026caSFam Zheng }
170418026caSFam Zheng 
171418026caSFam Zheng /**
172418026caSFam Zheng  * Unmap a PCI bar area.
173418026caSFam Zheng  */
174418026caSFam Zheng void qemu_vfio_pci_unmap_bar(QEMUVFIOState *s, int index, void *bar,
175418026caSFam Zheng                              uint64_t offset, uint64_t size)
176418026caSFam Zheng {
177418026caSFam Zheng     if (bar) {
178418026caSFam Zheng         munmap(bar, MIN(size, s->bar_region_info[index].size - offset));
179418026caSFam Zheng     }
180418026caSFam Zheng }
181418026caSFam Zheng 
182418026caSFam Zheng /**
183a6da793aSPhilippe Mathieu-Daudé  * Initialize device IRQ with @irq_type and register an event notifier.
184418026caSFam Zheng  */
185418026caSFam Zheng int qemu_vfio_pci_init_irq(QEMUVFIOState *s, EventNotifier *e,
186418026caSFam Zheng                            int irq_type, Error **errp)
187418026caSFam Zheng {
188418026caSFam Zheng     int r;
189418026caSFam Zheng     struct vfio_irq_set *irq_set;
190418026caSFam Zheng     size_t irq_set_size;
191418026caSFam Zheng     struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) };
192418026caSFam Zheng 
193418026caSFam Zheng     irq_info.index = irq_type;
194418026caSFam Zheng     if (ioctl(s->device, VFIO_DEVICE_GET_IRQ_INFO, &irq_info)) {
195418026caSFam Zheng         error_setg_errno(errp, errno, "Failed to get device interrupt info");
196418026caSFam Zheng         return -errno;
197418026caSFam Zheng     }
198418026caSFam Zheng     if (!(irq_info.flags & VFIO_IRQ_INFO_EVENTFD)) {
199418026caSFam Zheng         error_setg(errp, "Device interrupt doesn't support eventfd");
200418026caSFam Zheng         return -EINVAL;
201418026caSFam Zheng     }
202418026caSFam Zheng 
203418026caSFam Zheng     irq_set_size = sizeof(*irq_set) + sizeof(int);
204418026caSFam Zheng     irq_set = g_malloc0(irq_set_size);
205418026caSFam Zheng 
206418026caSFam Zheng     /* Get to a known IRQ state */
207418026caSFam Zheng     *irq_set = (struct vfio_irq_set) {
208418026caSFam Zheng         .argsz = irq_set_size,
209418026caSFam Zheng         .flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER,
210418026caSFam Zheng         .index = irq_info.index,
211418026caSFam Zheng         .start = 0,
212418026caSFam Zheng         .count = 1,
213418026caSFam Zheng     };
214418026caSFam Zheng 
215418026caSFam Zheng     *(int *)&irq_set->data = event_notifier_get_fd(e);
216418026caSFam Zheng     r = ioctl(s->device, VFIO_DEVICE_SET_IRQS, irq_set);
217418026caSFam Zheng     g_free(irq_set);
218418026caSFam Zheng     if (r) {
219418026caSFam Zheng         error_setg_errno(errp, errno, "Failed to setup device interrupt");
220418026caSFam Zheng         return -errno;
221418026caSFam Zheng     }
222418026caSFam Zheng     return 0;
223418026caSFam Zheng }
224418026caSFam Zheng 
225418026caSFam Zheng static int qemu_vfio_pci_read_config(QEMUVFIOState *s, void *buf,
226418026caSFam Zheng                                      int size, int ofs)
227418026caSFam Zheng {
228418026caSFam Zheng     int ret;
229418026caSFam Zheng 
230418026caSFam Zheng     do {
231418026caSFam Zheng         ret = pread(s->device, buf, size, s->config_region_info.offset + ofs);
232418026caSFam Zheng     } while (ret == -1 && errno == EINTR);
233418026caSFam Zheng     return ret == size ? 0 : -errno;
234418026caSFam Zheng }
235418026caSFam Zheng 
236418026caSFam Zheng static int qemu_vfio_pci_write_config(QEMUVFIOState *s, void *buf, int size, int ofs)
237418026caSFam Zheng {
238418026caSFam Zheng     int ret;
239418026caSFam Zheng 
240418026caSFam Zheng     do {
241418026caSFam Zheng         ret = pwrite(s->device, buf, size, s->config_region_info.offset + ofs);
242418026caSFam Zheng     } while (ret == -1 && errno == EINTR);
243418026caSFam Zheng     return ret == size ? 0 : -errno;
244418026caSFam Zheng }
245418026caSFam Zheng 
2464487d420SEric Auger static void collect_usable_iova_ranges(QEMUVFIOState *s, void *buf)
2474487d420SEric Auger {
2484487d420SEric Auger     struct vfio_iommu_type1_info *info = (struct vfio_iommu_type1_info *)buf;
2494487d420SEric Auger     struct vfio_info_cap_header *cap = (void *)buf + info->cap_offset;
2504487d420SEric Auger     struct vfio_iommu_type1_info_cap_iova_range *cap_iova_range;
2514487d420SEric Auger     int i;
2524487d420SEric Auger 
2534487d420SEric Auger     while (cap->id != VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE) {
2544487d420SEric Auger         if (!cap->next) {
2554487d420SEric Auger             return;
2564487d420SEric Auger         }
2574487d420SEric Auger         cap = (struct vfio_info_cap_header *)(buf + cap->next);
2584487d420SEric Auger     }
2594487d420SEric Auger 
2604487d420SEric Auger     cap_iova_range = (struct vfio_iommu_type1_info_cap_iova_range *)cap;
2614487d420SEric Auger 
2624487d420SEric Auger     s->nb_iova_ranges = cap_iova_range->nr_iovas;
2634487d420SEric Auger     if (s->nb_iova_ranges > 1) {
2644487d420SEric Auger         s->usable_iova_ranges =
2654487d420SEric Auger             g_realloc(s->usable_iova_ranges,
2664487d420SEric Auger                       s->nb_iova_ranges * sizeof(struct IOVARange));
2674487d420SEric Auger     }
2684487d420SEric Auger 
2694487d420SEric Auger     for (i = 0; i < s->nb_iova_ranges; i++) {
2704487d420SEric Auger         s->usable_iova_ranges[i].start = cap_iova_range->iova_ranges[i].start;
2714487d420SEric Auger         s->usable_iova_ranges[i].end = cap_iova_range->iova_ranges[i].end;
2724487d420SEric Auger     }
2734487d420SEric Auger }
2744487d420SEric Auger 
275418026caSFam Zheng static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
276418026caSFam Zheng                               Error **errp)
277418026caSFam Zheng {
278418026caSFam Zheng     int ret;
279418026caSFam Zheng     int i;
280418026caSFam Zheng     uint16_t pci_cmd;
281418026caSFam Zheng     struct vfio_group_status group_status = { .argsz = sizeof(group_status) };
2824487d420SEric Auger     struct vfio_iommu_type1_info *iommu_info = NULL;
2834487d420SEric Auger     size_t iommu_info_size = sizeof(*iommu_info);
284418026caSFam Zheng     struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
285418026caSFam Zheng     char *group_file = NULL;
286418026caSFam Zheng 
2874487d420SEric Auger     s->usable_iova_ranges = NULL;
2884487d420SEric Auger 
289418026caSFam Zheng     /* Create a new container */
290418026caSFam Zheng     s->container = open("/dev/vfio/vfio", O_RDWR);
291418026caSFam Zheng 
292418026caSFam Zheng     if (s->container == -1) {
293418026caSFam Zheng         error_setg_errno(errp, errno, "Failed to open /dev/vfio/vfio");
294418026caSFam Zheng         return -errno;
295418026caSFam Zheng     }
296418026caSFam Zheng     if (ioctl(s->container, VFIO_GET_API_VERSION) != VFIO_API_VERSION) {
297418026caSFam Zheng         error_setg(errp, "Invalid VFIO version");
298418026caSFam Zheng         ret = -EINVAL;
299418026caSFam Zheng         goto fail_container;
300418026caSFam Zheng     }
301418026caSFam Zheng 
302418026caSFam Zheng     if (!ioctl(s->container, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) {
303*a4bc212aSPhilippe Mathieu-Daudé         error_setg_errno(errp, errno, "VFIO IOMMU Type1 is not supported");
304418026caSFam Zheng         ret = -EINVAL;
305418026caSFam Zheng         goto fail_container;
306418026caSFam Zheng     }
307418026caSFam Zheng 
308418026caSFam Zheng     /* Open the group */
309418026caSFam Zheng     group_file = sysfs_find_group_file(device, errp);
310418026caSFam Zheng     if (!group_file) {
311418026caSFam Zheng         ret = -EINVAL;
312418026caSFam Zheng         goto fail_container;
313418026caSFam Zheng     }
314418026caSFam Zheng 
315418026caSFam Zheng     s->group = open(group_file, O_RDWR);
316418026caSFam Zheng     if (s->group == -1) {
317418026caSFam Zheng         error_setg_errno(errp, errno, "Failed to open VFIO group file: %s",
318418026caSFam Zheng                          group_file);
319418026caSFam Zheng         g_free(group_file);
320418026caSFam Zheng         ret = -errno;
321418026caSFam Zheng         goto fail_container;
322418026caSFam Zheng     }
323418026caSFam Zheng     g_free(group_file);
324418026caSFam Zheng 
325418026caSFam Zheng     /* Test the group is viable and available */
326418026caSFam Zheng     if (ioctl(s->group, VFIO_GROUP_GET_STATUS, &group_status)) {
327418026caSFam Zheng         error_setg_errno(errp, errno, "Failed to get VFIO group status");
328418026caSFam Zheng         ret = -errno;
329418026caSFam Zheng         goto fail;
330418026caSFam Zheng     }
331418026caSFam Zheng 
332418026caSFam Zheng     if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
333418026caSFam Zheng         error_setg(errp, "VFIO group is not viable");
334418026caSFam Zheng         ret = -EINVAL;
335418026caSFam Zheng         goto fail;
336418026caSFam Zheng     }
337418026caSFam Zheng 
338418026caSFam Zheng     /* Add the group to the container */
339418026caSFam Zheng     if (ioctl(s->group, VFIO_GROUP_SET_CONTAINER, &s->container)) {
340418026caSFam Zheng         error_setg_errno(errp, errno, "Failed to add group to VFIO container");
341418026caSFam Zheng         ret = -errno;
342418026caSFam Zheng         goto fail;
343418026caSFam Zheng     }
344418026caSFam Zheng 
345418026caSFam Zheng     /* Enable the IOMMU model we want */
346418026caSFam Zheng     if (ioctl(s->container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU)) {
347418026caSFam Zheng         error_setg_errno(errp, errno, "Failed to set VFIO IOMMU type");
348418026caSFam Zheng         ret = -errno;
349418026caSFam Zheng         goto fail;
350418026caSFam Zheng     }
351418026caSFam Zheng 
3524487d420SEric Auger     iommu_info = g_malloc0(iommu_info_size);
3534487d420SEric Auger     iommu_info->argsz = iommu_info_size;
3544487d420SEric Auger 
355418026caSFam Zheng     /* Get additional IOMMU info */
3564487d420SEric Auger     if (ioctl(s->container, VFIO_IOMMU_GET_INFO, iommu_info)) {
357418026caSFam Zheng         error_setg_errno(errp, errno, "Failed to get IOMMU info");
358418026caSFam Zheng         ret = -errno;
359418026caSFam Zheng         goto fail;
360418026caSFam Zheng     }
361418026caSFam Zheng 
3624487d420SEric Auger     /*
3634487d420SEric Auger      * if the kernel does not report usable IOVA regions, choose
3644487d420SEric Auger      * the legacy [QEMU_VFIO_IOVA_MIN, QEMU_VFIO_IOVA_MAX -1] region
3654487d420SEric Auger      */
3664487d420SEric Auger     s->nb_iova_ranges = 1;
3674487d420SEric Auger     s->usable_iova_ranges = g_new0(struct IOVARange, 1);
3684487d420SEric Auger     s->usable_iova_ranges[0].start = QEMU_VFIO_IOVA_MIN;
3694487d420SEric Auger     s->usable_iova_ranges[0].end = QEMU_VFIO_IOVA_MAX - 1;
3704487d420SEric Auger 
3714487d420SEric Auger     if (iommu_info->argsz > iommu_info_size) {
3724487d420SEric Auger         iommu_info_size = iommu_info->argsz;
3734487d420SEric Auger         iommu_info = g_realloc(iommu_info, iommu_info_size);
3744487d420SEric Auger         if (ioctl(s->container, VFIO_IOMMU_GET_INFO, iommu_info)) {
3754487d420SEric Auger             ret = -errno;
3764487d420SEric Auger             goto fail;
3774487d420SEric Auger         }
3784487d420SEric Auger         collect_usable_iova_ranges(s, iommu_info);
3794487d420SEric Auger     }
3804487d420SEric Auger 
381418026caSFam Zheng     s->device = ioctl(s->group, VFIO_GROUP_GET_DEVICE_FD, device);
382418026caSFam Zheng 
383418026caSFam Zheng     if (s->device < 0) {
384418026caSFam Zheng         error_setg_errno(errp, errno, "Failed to get device fd");
385418026caSFam Zheng         ret = -errno;
386418026caSFam Zheng         goto fail;
387418026caSFam Zheng     }
388418026caSFam Zheng 
389418026caSFam Zheng     /* Test and setup the device */
390418026caSFam Zheng     if (ioctl(s->device, VFIO_DEVICE_GET_INFO, &device_info)) {
391418026caSFam Zheng         error_setg_errno(errp, errno, "Failed to get device info");
392418026caSFam Zheng         ret = -errno;
393418026caSFam Zheng         goto fail;
394418026caSFam Zheng     }
395418026caSFam Zheng 
396418026caSFam Zheng     if (device_info.num_regions < VFIO_PCI_CONFIG_REGION_INDEX) {
397418026caSFam Zheng         error_setg(errp, "Invalid device regions");
398418026caSFam Zheng         ret = -EINVAL;
399418026caSFam Zheng         goto fail;
400418026caSFam Zheng     }
401418026caSFam Zheng 
402418026caSFam Zheng     s->config_region_info = (struct vfio_region_info) {
403418026caSFam Zheng         .index = VFIO_PCI_CONFIG_REGION_INDEX,
404418026caSFam Zheng         .argsz = sizeof(struct vfio_region_info),
405418026caSFam Zheng     };
406418026caSFam Zheng     if (ioctl(s->device, VFIO_DEVICE_GET_REGION_INFO, &s->config_region_info)) {
407418026caSFam Zheng         error_setg_errno(errp, errno, "Failed to get config region info");
408418026caSFam Zheng         ret = -errno;
409418026caSFam Zheng         goto fail;
410418026caSFam Zheng     }
411418026caSFam Zheng 
4129e722ebcSLi Qiang     for (i = 0; i < ARRAY_SIZE(s->bar_region_info); i++) {
413418026caSFam Zheng         ret = qemu_vfio_pci_init_bar(s, i, errp);
414418026caSFam Zheng         if (ret) {
415418026caSFam Zheng             goto fail;
416418026caSFam Zheng         }
417418026caSFam Zheng     }
418418026caSFam Zheng 
419418026caSFam Zheng     /* Enable bus master */
420418026caSFam Zheng     ret = qemu_vfio_pci_read_config(s, &pci_cmd, sizeof(pci_cmd), PCI_COMMAND);
421418026caSFam Zheng     if (ret) {
422418026caSFam Zheng         goto fail;
423418026caSFam Zheng     }
424418026caSFam Zheng     pci_cmd |= PCI_COMMAND_MASTER;
425418026caSFam Zheng     ret = qemu_vfio_pci_write_config(s, &pci_cmd, sizeof(pci_cmd), PCI_COMMAND);
426418026caSFam Zheng     if (ret) {
427418026caSFam Zheng         goto fail;
428418026caSFam Zheng     }
4294487d420SEric Auger     g_free(iommu_info);
430418026caSFam Zheng     return 0;
431418026caSFam Zheng fail:
4324487d420SEric Auger     g_free(s->usable_iova_ranges);
4334487d420SEric Auger     s->usable_iova_ranges = NULL;
4344487d420SEric Auger     s->nb_iova_ranges = 0;
4354487d420SEric Auger     g_free(iommu_info);
436418026caSFam Zheng     close(s->group);
437418026caSFam Zheng fail_container:
438418026caSFam Zheng     close(s->container);
439418026caSFam Zheng     return ret;
440418026caSFam Zheng }
441418026caSFam Zheng 
442418026caSFam Zheng static void qemu_vfio_ram_block_added(RAMBlockNotifier *n,
443418026caSFam Zheng                                       void *host, size_t size)
444418026caSFam Zheng {
445418026caSFam Zheng     QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier);
446418026caSFam Zheng     trace_qemu_vfio_ram_block_added(s, host, size);
447418026caSFam Zheng     qemu_vfio_dma_map(s, host, size, false, NULL);
448418026caSFam Zheng }
449418026caSFam Zheng 
450418026caSFam Zheng static void qemu_vfio_ram_block_removed(RAMBlockNotifier *n,
451418026caSFam Zheng                                         void *host, size_t size)
452418026caSFam Zheng {
453418026caSFam Zheng     QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier);
454418026caSFam Zheng     if (host) {
455418026caSFam Zheng         trace_qemu_vfio_ram_block_removed(s, host, size);
456418026caSFam Zheng         qemu_vfio_dma_unmap(s, host);
457418026caSFam Zheng     }
458418026caSFam Zheng }
459418026caSFam Zheng 
460754cb9c0SYury Kotov static int qemu_vfio_init_ramblock(RAMBlock *rb, void *opaque)
461418026caSFam Zheng {
462754cb9c0SYury Kotov     void *host_addr = qemu_ram_get_host_addr(rb);
463754cb9c0SYury Kotov     ram_addr_t length = qemu_ram_get_used_length(rb);
464418026caSFam Zheng     int ret;
465418026caSFam Zheng     QEMUVFIOState *s = opaque;
466418026caSFam Zheng 
467418026caSFam Zheng     if (!host_addr) {
468418026caSFam Zheng         return 0;
469418026caSFam Zheng     }
470418026caSFam Zheng     ret = qemu_vfio_dma_map(s, host_addr, length, false, NULL);
471418026caSFam Zheng     if (ret) {
472418026caSFam Zheng         fprintf(stderr, "qemu_vfio_init_ramblock: failed %p %" PRId64 "\n",
473418026caSFam Zheng                 host_addr, (uint64_t)length);
474418026caSFam Zheng     }
475418026caSFam Zheng     return 0;
476418026caSFam Zheng }
477418026caSFam Zheng 
478418026caSFam Zheng static void qemu_vfio_open_common(QEMUVFIOState *s)
479418026caSFam Zheng {
480549b50a3SMarkus Armbruster     qemu_mutex_init(&s->lock);
481418026caSFam Zheng     s->ram_notifier.ram_block_added = qemu_vfio_ram_block_added;
482418026caSFam Zheng     s->ram_notifier.ram_block_removed = qemu_vfio_ram_block_removed;
483418026caSFam Zheng     ram_block_notifier_add(&s->ram_notifier);
484418026caSFam Zheng     s->low_water_mark = QEMU_VFIO_IOVA_MIN;
485418026caSFam Zheng     s->high_water_mark = QEMU_VFIO_IOVA_MAX;
486418026caSFam Zheng     qemu_ram_foreach_block(qemu_vfio_init_ramblock, s);
487418026caSFam Zheng }
488418026caSFam Zheng 
489418026caSFam Zheng /**
490418026caSFam Zheng  * Open a PCI device, e.g. "0000:00:01.0".
491418026caSFam Zheng  */
492418026caSFam Zheng QEMUVFIOState *qemu_vfio_open_pci(const char *device, Error **errp)
493418026caSFam Zheng {
494418026caSFam Zheng     int r;
495418026caSFam Zheng     QEMUVFIOState *s = g_new0(QEMUVFIOState, 1);
496418026caSFam Zheng 
497418026caSFam Zheng     r = qemu_vfio_init_pci(s, device, errp);
498418026caSFam Zheng     if (r) {
499418026caSFam Zheng         g_free(s);
500418026caSFam Zheng         return NULL;
501418026caSFam Zheng     }
502418026caSFam Zheng     qemu_vfio_open_common(s);
503418026caSFam Zheng     return s;
504418026caSFam Zheng }
505418026caSFam Zheng 
506418026caSFam Zheng static void qemu_vfio_dump_mapping(IOVAMapping *m)
507418026caSFam Zheng {
508418026caSFam Zheng     if (QEMU_VFIO_DEBUG) {
509418026caSFam Zheng         printf("  vfio mapping %p %" PRIx64 " to %" PRIx64 "\n", m->host,
510418026caSFam Zheng                (uint64_t)m->size, (uint64_t)m->iova);
511418026caSFam Zheng     }
512418026caSFam Zheng }
513418026caSFam Zheng 
514418026caSFam Zheng static void qemu_vfio_dump_mappings(QEMUVFIOState *s)
515418026caSFam Zheng {
516418026caSFam Zheng     int i;
517418026caSFam Zheng 
518418026caSFam Zheng     if (QEMU_VFIO_DEBUG) {
519418026caSFam Zheng         printf("vfio mappings\n");
520418026caSFam Zheng         for (i = 0; i < s->nr_mappings; ++i) {
521418026caSFam Zheng             qemu_vfio_dump_mapping(&s->mappings[i]);
522418026caSFam Zheng         }
523418026caSFam Zheng     }
524418026caSFam Zheng }
525418026caSFam Zheng 
526418026caSFam Zheng /**
527418026caSFam Zheng  * Find the mapping entry that contains [host, host + size) and set @index to
528418026caSFam Zheng  * the position. If no entry contains it, @index is the position _after_ which
529418026caSFam Zheng  * to insert the new mapping. IOW, it is the index of the largest element that
530418026caSFam Zheng  * is smaller than @host, or -1 if no entry is.
531418026caSFam Zheng  */
532418026caSFam Zheng static IOVAMapping *qemu_vfio_find_mapping(QEMUVFIOState *s, void *host,
533418026caSFam Zheng                                            int *index)
534418026caSFam Zheng {
535418026caSFam Zheng     IOVAMapping *p = s->mappings;
536418026caSFam Zheng     IOVAMapping *q = p ? p + s->nr_mappings - 1 : NULL;
537418026caSFam Zheng     IOVAMapping *mid;
538418026caSFam Zheng     trace_qemu_vfio_find_mapping(s, host);
539418026caSFam Zheng     if (!p) {
540418026caSFam Zheng         *index = -1;
541418026caSFam Zheng         return NULL;
542418026caSFam Zheng     }
543418026caSFam Zheng     while (true) {
544418026caSFam Zheng         mid = p + (q - p) / 2;
545418026caSFam Zheng         if (mid == p) {
546418026caSFam Zheng             break;
547418026caSFam Zheng         }
548418026caSFam Zheng         if (mid->host > host) {
549418026caSFam Zheng             q = mid;
550418026caSFam Zheng         } else if (mid->host < host) {
551418026caSFam Zheng             p = mid;
552418026caSFam Zheng         } else {
553418026caSFam Zheng             break;
554418026caSFam Zheng         }
555418026caSFam Zheng     }
556418026caSFam Zheng     if (mid->host > host) {
557418026caSFam Zheng         mid--;
558418026caSFam Zheng     } else if (mid < &s->mappings[s->nr_mappings - 1]
559418026caSFam Zheng                && (mid + 1)->host <= host) {
560418026caSFam Zheng         mid++;
561418026caSFam Zheng     }
562418026caSFam Zheng     *index = mid - &s->mappings[0];
563418026caSFam Zheng     if (mid >= &s->mappings[0] &&
564418026caSFam Zheng         mid->host <= host && mid->host + mid->size > host) {
565418026caSFam Zheng         assert(mid < &s->mappings[s->nr_mappings]);
566418026caSFam Zheng         return mid;
567418026caSFam Zheng     }
568418026caSFam Zheng     /* At this point *index + 1 is the right position to insert the new
569418026caSFam Zheng      * mapping.*/
570418026caSFam Zheng     return NULL;
571418026caSFam Zheng }
572418026caSFam Zheng 
573418026caSFam Zheng /**
574a6da793aSPhilippe Mathieu-Daudé  * Allocate IOVA and create a new mapping record and insert it in @s.
575418026caSFam Zheng  */
576418026caSFam Zheng static IOVAMapping *qemu_vfio_add_mapping(QEMUVFIOState *s,
577418026caSFam Zheng                                           void *host, size_t size,
578418026caSFam Zheng                                           int index, uint64_t iova)
579418026caSFam Zheng {
580418026caSFam Zheng     int shift;
581418026caSFam Zheng     IOVAMapping m = {.host = host, .size = size, .iova = iova};
582418026caSFam Zheng     IOVAMapping *insert;
583418026caSFam Zheng 
584038adc2fSWei Yang     assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size));
585038adc2fSWei Yang     assert(QEMU_IS_ALIGNED(s->low_water_mark, qemu_real_host_page_size));
586038adc2fSWei Yang     assert(QEMU_IS_ALIGNED(s->high_water_mark, qemu_real_host_page_size));
587418026caSFam Zheng     trace_qemu_vfio_new_mapping(s, host, size, index, iova);
588418026caSFam Zheng 
589418026caSFam Zheng     assert(index >= 0);
590418026caSFam Zheng     s->nr_mappings++;
591d29eb678SOlaf Hering     s->mappings = g_renew(IOVAMapping, s->mappings, s->nr_mappings);
592418026caSFam Zheng     insert = &s->mappings[index];
593418026caSFam Zheng     shift = s->nr_mappings - index - 1;
594418026caSFam Zheng     if (shift) {
595418026caSFam Zheng         memmove(insert + 1, insert, shift * sizeof(s->mappings[0]));
596418026caSFam Zheng     }
597418026caSFam Zheng     *insert = m;
598418026caSFam Zheng     return insert;
599418026caSFam Zheng }
600418026caSFam Zheng 
601418026caSFam Zheng /* Do the DMA mapping with VFIO. */
602418026caSFam Zheng static int qemu_vfio_do_mapping(QEMUVFIOState *s, void *host, size_t size,
603418026caSFam Zheng                                 uint64_t iova)
604418026caSFam Zheng {
605418026caSFam Zheng     struct vfio_iommu_type1_dma_map dma_map = {
606418026caSFam Zheng         .argsz = sizeof(dma_map),
607418026caSFam Zheng         .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
608418026caSFam Zheng         .iova = iova,
609418026caSFam Zheng         .vaddr = (uintptr_t)host,
610418026caSFam Zheng         .size = size,
611418026caSFam Zheng     };
612418026caSFam Zheng     trace_qemu_vfio_do_mapping(s, host, size, iova);
613418026caSFam Zheng 
614418026caSFam Zheng     if (ioctl(s->container, VFIO_IOMMU_MAP_DMA, &dma_map)) {
615b09d51c9SMichal Privoznik         error_report("VFIO_MAP_DMA failed: %s", strerror(errno));
616418026caSFam Zheng         return -errno;
617418026caSFam Zheng     }
618418026caSFam Zheng     return 0;
619418026caSFam Zheng }
620418026caSFam Zheng 
621418026caSFam Zheng /**
622418026caSFam Zheng  * Undo the DMA mapping from @s with VFIO, and remove from mapping list.
623418026caSFam Zheng  */
624418026caSFam Zheng static void qemu_vfio_undo_mapping(QEMUVFIOState *s, IOVAMapping *mapping,
625418026caSFam Zheng                                    Error **errp)
626418026caSFam Zheng {
627418026caSFam Zheng     int index;
628418026caSFam Zheng     struct vfio_iommu_type1_dma_unmap unmap = {
629418026caSFam Zheng         .argsz = sizeof(unmap),
630418026caSFam Zheng         .flags = 0,
631418026caSFam Zheng         .iova = mapping->iova,
632418026caSFam Zheng         .size = mapping->size,
633418026caSFam Zheng     };
634418026caSFam Zheng 
635418026caSFam Zheng     index = mapping - s->mappings;
636418026caSFam Zheng     assert(mapping->size > 0);
637038adc2fSWei Yang     assert(QEMU_IS_ALIGNED(mapping->size, qemu_real_host_page_size));
638418026caSFam Zheng     assert(index >= 0 && index < s->nr_mappings);
639418026caSFam Zheng     if (ioctl(s->container, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
640b09d51c9SMichal Privoznik         error_setg_errno(errp, errno, "VFIO_UNMAP_DMA failed");
641418026caSFam Zheng     }
642418026caSFam Zheng     memmove(mapping, &s->mappings[index + 1],
643418026caSFam Zheng             sizeof(s->mappings[0]) * (s->nr_mappings - index - 1));
644418026caSFam Zheng     s->nr_mappings--;
645d29eb678SOlaf Hering     s->mappings = g_renew(IOVAMapping, s->mappings, s->nr_mappings);
646418026caSFam Zheng }
647418026caSFam Zheng 
648418026caSFam Zheng /* Check if the mapping list is (ascending) ordered. */
649418026caSFam Zheng static bool qemu_vfio_verify_mappings(QEMUVFIOState *s)
650418026caSFam Zheng {
651418026caSFam Zheng     int i;
652418026caSFam Zheng     if (QEMU_VFIO_DEBUG) {
653418026caSFam Zheng         for (i = 0; i < s->nr_mappings - 1; ++i) {
654418026caSFam Zheng             if (!(s->mappings[i].host < s->mappings[i + 1].host)) {
655418026caSFam Zheng                 fprintf(stderr, "item %d not sorted!\n", i);
656418026caSFam Zheng                 qemu_vfio_dump_mappings(s);
657418026caSFam Zheng                 return false;
658418026caSFam Zheng             }
659418026caSFam Zheng             if (!(s->mappings[i].host + s->mappings[i].size <=
660418026caSFam Zheng                   s->mappings[i + 1].host)) {
661418026caSFam Zheng                 fprintf(stderr, "item %d overlap with next!\n", i);
662418026caSFam Zheng                 qemu_vfio_dump_mappings(s);
663418026caSFam Zheng                 return false;
664418026caSFam Zheng             }
665418026caSFam Zheng         }
666418026caSFam Zheng     }
667418026caSFam Zheng     return true;
668418026caSFam Zheng }
669418026caSFam Zheng 
6709ab57411SEric Auger static int
6719ab57411SEric Auger qemu_vfio_find_fixed_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
6729ab57411SEric Auger {
6739ab57411SEric Auger     int i;
6749ab57411SEric Auger 
6759ab57411SEric Auger     for (i = 0; i < s->nb_iova_ranges; i++) {
6769ab57411SEric Auger         if (s->usable_iova_ranges[i].end < s->low_water_mark) {
6779ab57411SEric Auger             continue;
6789ab57411SEric Auger         }
6799ab57411SEric Auger         s->low_water_mark =
6809ab57411SEric Auger             MAX(s->low_water_mark, s->usable_iova_ranges[i].start);
6819ab57411SEric Auger 
6829ab57411SEric Auger         if (s->usable_iova_ranges[i].end - s->low_water_mark + 1 >= size ||
6839ab57411SEric Auger             s->usable_iova_ranges[i].end - s->low_water_mark + 1 == 0) {
6849ab57411SEric Auger             *iova = s->low_water_mark;
6859ab57411SEric Auger             s->low_water_mark += size;
6869ab57411SEric Auger             return 0;
6879ab57411SEric Auger         }
6889ab57411SEric Auger     }
6899ab57411SEric Auger     return -ENOMEM;
6909ab57411SEric Auger }
6919ab57411SEric Auger 
6929ab57411SEric Auger static int
6939ab57411SEric Auger qemu_vfio_find_temp_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
6949ab57411SEric Auger {
6959ab57411SEric Auger     int i;
6969ab57411SEric Auger 
6979ab57411SEric Auger     for (i = s->nb_iova_ranges - 1; i >= 0; i--) {
6989ab57411SEric Auger         if (s->usable_iova_ranges[i].start > s->high_water_mark) {
6999ab57411SEric Auger             continue;
7009ab57411SEric Auger         }
7019ab57411SEric Auger         s->high_water_mark =
7029ab57411SEric Auger             MIN(s->high_water_mark, s->usable_iova_ranges[i].end + 1);
7039ab57411SEric Auger 
7049ab57411SEric Auger         if (s->high_water_mark - s->usable_iova_ranges[i].start + 1 >= size ||
7059ab57411SEric Auger             s->high_water_mark - s->usable_iova_ranges[i].start + 1 == 0) {
7069ab57411SEric Auger             *iova = s->high_water_mark - size;
7079ab57411SEric Auger             s->high_water_mark = *iova;
7089ab57411SEric Auger             return 0;
7099ab57411SEric Auger         }
7109ab57411SEric Auger     }
7119ab57411SEric Auger     return -ENOMEM;
7129ab57411SEric Auger }
7139ab57411SEric Auger 
714418026caSFam Zheng /* Map [host, host + size) area into a contiguous IOVA address space, and store
715418026caSFam Zheng  * the result in @iova if not NULL. The caller need to make sure the area is
716418026caSFam Zheng  * aligned to page size, and mustn't overlap with existing mapping areas (split
717418026caSFam Zheng  * mapping status within this area is not allowed).
718418026caSFam Zheng  */
719418026caSFam Zheng int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
720418026caSFam Zheng                       bool temporary, uint64_t *iova)
721418026caSFam Zheng {
722418026caSFam Zheng     int ret = 0;
723418026caSFam Zheng     int index;
724418026caSFam Zheng     IOVAMapping *mapping;
725418026caSFam Zheng     uint64_t iova0;
726418026caSFam Zheng 
727038adc2fSWei Yang     assert(QEMU_PTR_IS_ALIGNED(host, qemu_real_host_page_size));
728038adc2fSWei Yang     assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size));
729418026caSFam Zheng     trace_qemu_vfio_dma_map(s, host, size, temporary, iova);
730418026caSFam Zheng     qemu_mutex_lock(&s->lock);
731418026caSFam Zheng     mapping = qemu_vfio_find_mapping(s, host, &index);
732418026caSFam Zheng     if (mapping) {
733418026caSFam Zheng         iova0 = mapping->iova + ((uint8_t *)host - (uint8_t *)mapping->host);
734418026caSFam Zheng     } else {
735418026caSFam Zheng         if (s->high_water_mark - s->low_water_mark + 1 < size) {
736418026caSFam Zheng             ret = -ENOMEM;
737418026caSFam Zheng             goto out;
738418026caSFam Zheng         }
739418026caSFam Zheng         if (!temporary) {
7409ab57411SEric Auger             if (qemu_vfio_find_fixed_iova(s, size, &iova0)) {
7419ab57411SEric Auger                 ret = -ENOMEM;
7429ab57411SEric Auger                 goto out;
7439ab57411SEric Auger             }
7449ab57411SEric Auger 
745418026caSFam Zheng             mapping = qemu_vfio_add_mapping(s, host, size, index + 1, iova0);
746418026caSFam Zheng             if (!mapping) {
747418026caSFam Zheng                 ret = -ENOMEM;
748418026caSFam Zheng                 goto out;
749418026caSFam Zheng             }
750418026caSFam Zheng             assert(qemu_vfio_verify_mappings(s));
751418026caSFam Zheng             ret = qemu_vfio_do_mapping(s, host, size, iova0);
752418026caSFam Zheng             if (ret) {
753418026caSFam Zheng                 qemu_vfio_undo_mapping(s, mapping, NULL);
754418026caSFam Zheng                 goto out;
755418026caSFam Zheng             }
756418026caSFam Zheng             qemu_vfio_dump_mappings(s);
757418026caSFam Zheng         } else {
7589ab57411SEric Auger             if (qemu_vfio_find_temp_iova(s, size, &iova0)) {
7599ab57411SEric Auger                 ret = -ENOMEM;
7609ab57411SEric Auger                 goto out;
7619ab57411SEric Auger             }
762418026caSFam Zheng             ret = qemu_vfio_do_mapping(s, host, size, iova0);
763418026caSFam Zheng             if (ret) {
764418026caSFam Zheng                 goto out;
765418026caSFam Zheng             }
766418026caSFam Zheng         }
767418026caSFam Zheng     }
768418026caSFam Zheng     if (iova) {
769418026caSFam Zheng         *iova = iova0;
770418026caSFam Zheng     }
771418026caSFam Zheng out:
772418026caSFam Zheng     qemu_mutex_unlock(&s->lock);
773418026caSFam Zheng     return ret;
774418026caSFam Zheng }
775418026caSFam Zheng 
776418026caSFam Zheng /* Reset the high watermark and free all "temporary" mappings. */
777418026caSFam Zheng int qemu_vfio_dma_reset_temporary(QEMUVFIOState *s)
778418026caSFam Zheng {
779418026caSFam Zheng     struct vfio_iommu_type1_dma_unmap unmap = {
780418026caSFam Zheng         .argsz = sizeof(unmap),
781418026caSFam Zheng         .flags = 0,
782418026caSFam Zheng         .iova = s->high_water_mark,
783418026caSFam Zheng         .size = QEMU_VFIO_IOVA_MAX - s->high_water_mark,
784418026caSFam Zheng     };
785418026caSFam Zheng     trace_qemu_vfio_dma_reset_temporary(s);
7866e8a355dSDaniel Brodsky     QEMU_LOCK_GUARD(&s->lock);
787418026caSFam Zheng     if (ioctl(s->container, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
788b09d51c9SMichal Privoznik         error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno));
789418026caSFam Zheng         return -errno;
790418026caSFam Zheng     }
791418026caSFam Zheng     s->high_water_mark = QEMU_VFIO_IOVA_MAX;
792418026caSFam Zheng     return 0;
793418026caSFam Zheng }
794418026caSFam Zheng 
795418026caSFam Zheng /* Unmapping the whole area that was previously mapped with
796418026caSFam Zheng  * qemu_vfio_dma_map(). */
797418026caSFam Zheng void qemu_vfio_dma_unmap(QEMUVFIOState *s, void *host)
798418026caSFam Zheng {
799418026caSFam Zheng     int index = 0;
800418026caSFam Zheng     IOVAMapping *m;
801418026caSFam Zheng 
802418026caSFam Zheng     if (!host) {
803418026caSFam Zheng         return;
804418026caSFam Zheng     }
805418026caSFam Zheng 
806418026caSFam Zheng     trace_qemu_vfio_dma_unmap(s, host);
807418026caSFam Zheng     qemu_mutex_lock(&s->lock);
808418026caSFam Zheng     m = qemu_vfio_find_mapping(s, host, &index);
809418026caSFam Zheng     if (!m) {
810418026caSFam Zheng         goto out;
811418026caSFam Zheng     }
812418026caSFam Zheng     qemu_vfio_undo_mapping(s, m, NULL);
813418026caSFam Zheng out:
814418026caSFam Zheng     qemu_mutex_unlock(&s->lock);
815418026caSFam Zheng }
816418026caSFam Zheng 
817418026caSFam Zheng static void qemu_vfio_reset(QEMUVFIOState *s)
818418026caSFam Zheng {
819418026caSFam Zheng     ioctl(s->device, VFIO_DEVICE_RESET);
820418026caSFam Zheng }
821418026caSFam Zheng 
822418026caSFam Zheng /* Close and free the VFIO resources. */
823418026caSFam Zheng void qemu_vfio_close(QEMUVFIOState *s)
824418026caSFam Zheng {
825418026caSFam Zheng     int i;
826418026caSFam Zheng 
827418026caSFam Zheng     if (!s) {
828418026caSFam Zheng         return;
829418026caSFam Zheng     }
830418026caSFam Zheng     for (i = 0; i < s->nr_mappings; ++i) {
831418026caSFam Zheng         qemu_vfio_undo_mapping(s, &s->mappings[i], NULL);
832418026caSFam Zheng     }
833418026caSFam Zheng     ram_block_notifier_remove(&s->ram_notifier);
8344487d420SEric Auger     g_free(s->usable_iova_ranges);
8354487d420SEric Auger     s->nb_iova_ranges = 0;
836418026caSFam Zheng     qemu_vfio_reset(s);
837418026caSFam Zheng     close(s->device);
838418026caSFam Zheng     close(s->group);
839418026caSFam Zheng     close(s->container);
840418026caSFam Zheng }
841