xref: /openbmc/qemu/backends/iommufd.c (revision 14997d521d1cd0bb36c902ef1032f0d3f2a3c912)
1 /*
2  * iommufd container backend
3  *
4  * Copyright (C) 2023 Intel Corporation.
5  * Copyright Red Hat, Inc. 2023
6  *
7  * Authors: Yi Liu <yi.l.liu@intel.com>
8  *          Eric Auger <eric.auger@redhat.com>
9  *
10  * SPDX-License-Identifier: GPL-2.0-or-later
11  */
12 
13 #include "qemu/osdep.h"
14 #include "system/iommufd.h"
15 #include "qapi/error.h"
16 #include "qemu/module.h"
17 #include "qom/object_interfaces.h"
18 #include "qemu/error-report.h"
19 #include "monitor/monitor.h"
20 #include "trace.h"
21 #include "hw/vfio/vfio-device.h"
22 #include <sys/ioctl.h>
23 #include <linux/iommufd.h>
24 
25 static void iommufd_backend_init(Object *obj)
26 {
27     IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
28 
29     be->fd = -1;
30     be->users = 0;
31     be->owned = true;
32 }
33 
34 static void iommufd_backend_finalize(Object *obj)
35 {
36     IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
37 
38     if (be->owned) {
39         close(be->fd);
40         be->fd = -1;
41     }
42 }
43 
44 static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp)
45 {
46     ERRP_GUARD();
47     IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
48     int fd = -1;
49 
50     fd = monitor_fd_param(monitor_cur(), str, errp);
51     if (fd == -1) {
52         error_prepend(errp, "Could not parse remote object fd %s:", str);
53         return;
54     }
55     be->fd = fd;
56     be->owned = false;
57     trace_iommu_backend_set_fd(be->fd);
58 }
59 
60 static bool iommufd_backend_can_be_deleted(UserCreatable *uc)
61 {
62     IOMMUFDBackend *be = IOMMUFD_BACKEND(uc);
63 
64     return !be->users;
65 }
66 
67 static void iommufd_backend_class_init(ObjectClass *oc, const void *data)
68 {
69     UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
70 
71     ucc->can_be_deleted = iommufd_backend_can_be_deleted;
72 
73     object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd);
74 }
75 
76 bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
77 {
78     int fd;
79 
80     if (be->owned && !be->users) {
81         fd = qemu_open("/dev/iommu", O_RDWR, errp);
82         if (fd < 0) {
83             return false;
84         }
85         be->fd = fd;
86     }
87     be->users++;
88 
89     trace_iommufd_backend_connect(be->fd, be->owned, be->users);
90     return true;
91 }
92 
93 void iommufd_backend_disconnect(IOMMUFDBackend *be)
94 {
95     if (!be->users) {
96         goto out;
97     }
98     be->users--;
99     if (!be->users && be->owned) {
100         close(be->fd);
101         be->fd = -1;
102     }
103 out:
104     trace_iommufd_backend_disconnect(be->fd, be->users);
105 }
106 
107 bool iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id,
108                                 Error **errp)
109 {
110     int fd = be->fd;
111     struct iommu_ioas_alloc alloc_data  = {
112         .size = sizeof(alloc_data),
113         .flags = 0,
114     };
115 
116     if (ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data)) {
117         error_setg_errno(errp, errno, "Failed to allocate ioas");
118         return false;
119     }
120 
121     *ioas_id = alloc_data.out_ioas_id;
122     trace_iommufd_backend_alloc_ioas(fd, *ioas_id);
123 
124     return true;
125 }
126 
127 void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id)
128 {
129     int ret, fd = be->fd;
130     struct iommu_destroy des = {
131         .size = sizeof(des),
132         .id = id,
133     };
134 
135     ret = ioctl(fd, IOMMU_DESTROY, &des);
136     trace_iommufd_backend_free_id(fd, id, ret);
137     if (ret) {
138         error_report("Failed to free id: %u %m", id);
139     }
140 }
141 
142 int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
143                             ram_addr_t size, void *vaddr, bool readonly)
144 {
145     int ret, fd = be->fd;
146     struct iommu_ioas_map map = {
147         .size = sizeof(map),
148         .flags = IOMMU_IOAS_MAP_READABLE |
149                  IOMMU_IOAS_MAP_FIXED_IOVA,
150         .ioas_id = ioas_id,
151         .__reserved = 0,
152         .user_va = (uintptr_t)vaddr,
153         .iova = iova,
154         .length = size,
155     };
156 
157     if (!readonly) {
158         map.flags |= IOMMU_IOAS_MAP_WRITEABLE;
159     }
160 
161     ret = ioctl(fd, IOMMU_IOAS_MAP, &map);
162     trace_iommufd_backend_map_dma(fd, ioas_id, iova, size,
163                                   vaddr, readonly, ret);
164     if (ret) {
165         ret = -errno;
166 
167         /* TODO: Not support mapping hardware PCI BAR region for now. */
168         if (errno == EFAULT) {
169             warn_report("IOMMU_IOAS_MAP failed: %m, PCI BAR?");
170         }
171     }
172     return ret;
173 }
174 
175 int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
176                               hwaddr iova, ram_addr_t size)
177 {
178     int ret, fd = be->fd;
179     struct iommu_ioas_unmap unmap = {
180         .size = sizeof(unmap),
181         .ioas_id = ioas_id,
182         .iova = iova,
183         .length = size,
184     };
185 
186     ret = ioctl(fd, IOMMU_IOAS_UNMAP, &unmap);
187     /*
188      * IOMMUFD takes mapping as some kind of object, unmapping
189      * nonexistent mapping is treated as deleting a nonexistent
190      * object and return ENOENT. This is different from legacy
191      * backend which allows it. vIOMMU may trigger a lot of
192      * redundant unmapping, to avoid flush the log, treat them
193      * as succeess for IOMMUFD just like legacy backend.
194      */
195     if (ret && errno == ENOENT) {
196         trace_iommufd_backend_unmap_dma_non_exist(fd, ioas_id, iova, size, ret);
197         ret = 0;
198     } else {
199         trace_iommufd_backend_unmap_dma(fd, ioas_id, iova, size, ret);
200     }
201 
202     if (ret) {
203         ret = -errno;
204     }
205     return ret;
206 }
207 
208 bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id,
209                                 uint32_t pt_id, uint32_t flags,
210                                 uint32_t data_type, uint32_t data_len,
211                                 void *data_ptr, uint32_t *out_hwpt,
212                                 Error **errp)
213 {
214     int ret, fd = be->fd;
215     struct iommu_hwpt_alloc alloc_hwpt = {
216         .size = sizeof(struct iommu_hwpt_alloc),
217         .flags = flags,
218         .dev_id = dev_id,
219         .pt_id = pt_id,
220         .data_type = data_type,
221         .data_len = data_len,
222         .data_uptr = (uintptr_t)data_ptr,
223     };
224 
225     ret = ioctl(fd, IOMMU_HWPT_ALLOC, &alloc_hwpt);
226     trace_iommufd_backend_alloc_hwpt(fd, dev_id, pt_id, flags, data_type,
227                                      data_len, (uintptr_t)data_ptr,
228                                      alloc_hwpt.out_hwpt_id, ret);
229     if (ret) {
230         error_setg_errno(errp, errno, "Failed to allocate hwpt");
231         return false;
232     }
233 
234     *out_hwpt = alloc_hwpt.out_hwpt_id;
235     return true;
236 }
237 
238 bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be,
239                                         uint32_t hwpt_id, bool start,
240                                         Error **errp)
241 {
242     int ret;
243     struct iommu_hwpt_set_dirty_tracking set_dirty = {
244             .size = sizeof(set_dirty),
245             .hwpt_id = hwpt_id,
246             .flags = start ? IOMMU_HWPT_DIRTY_TRACKING_ENABLE : 0,
247     };
248 
249     ret = ioctl(be->fd, IOMMU_HWPT_SET_DIRTY_TRACKING, &set_dirty);
250     trace_iommufd_backend_set_dirty(be->fd, hwpt_id, start, ret ? errno : 0);
251     if (ret) {
252         error_setg_errno(errp, errno,
253                          "IOMMU_HWPT_SET_DIRTY_TRACKING(hwpt_id %u) failed",
254                          hwpt_id);
255         return false;
256     }
257 
258     return true;
259 }
260 
261 bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be,
262                                       uint32_t hwpt_id,
263                                       uint64_t iova, ram_addr_t size,
264                                       uint64_t page_size, uint64_t *data,
265                                       Error **errp)
266 {
267     int ret;
268     struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap = {
269         .size = sizeof(get_dirty_bitmap),
270         .hwpt_id = hwpt_id,
271         .iova = iova,
272         .length = size,
273         .page_size = page_size,
274         .data = (uintptr_t)data,
275     };
276 
277     ret = ioctl(be->fd, IOMMU_HWPT_GET_DIRTY_BITMAP, &get_dirty_bitmap);
278     trace_iommufd_backend_get_dirty_bitmap(be->fd, hwpt_id, iova, size,
279                                            page_size, ret ? errno : 0);
280     if (ret) {
281         error_setg_errno(errp, errno,
282                          "IOMMU_HWPT_GET_DIRTY_BITMAP (iova: 0x%"HWADDR_PRIx
283                          " size: 0x"RAM_ADDR_FMT") failed", iova, size);
284         return false;
285     }
286 
287     return true;
288 }
289 
290 bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
291                                      uint32_t *type, void *data, uint32_t len,
292                                      uint64_t *caps, Error **errp)
293 {
294     struct iommu_hw_info info = {
295         .size = sizeof(info),
296         .dev_id = devid,
297         .data_len = len,
298         .data_uptr = (uintptr_t)data,
299     };
300 
301     if (ioctl(be->fd, IOMMU_GET_HW_INFO, &info)) {
302         error_setg_errno(errp, errno, "Failed to get hardware info");
303         return false;
304     }
305 
306     g_assert(type);
307     *type = info.out_data_type;
308     g_assert(caps);
309     *caps = info.out_capabilities;
310 
311     return true;
312 }
313 
314 bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id,
315                                       uint32_t data_type, uint32_t entry_len,
316                                       uint32_t *entry_num, void *data,
317                                       Error **errp)
318 {
319     int ret, fd = be->fd;
320     uint32_t total_entries = *entry_num;
321     struct iommu_hwpt_invalidate cache = {
322         .size = sizeof(cache),
323         .hwpt_id = id,
324         .data_type = data_type,
325         .entry_len = entry_len,
326         .entry_num = total_entries,
327         .data_uptr = (uintptr_t)data,
328     };
329 
330     ret = ioctl(fd, IOMMU_HWPT_INVALIDATE, &cache);
331     trace_iommufd_backend_invalidate_cache(fd, id, data_type, entry_len,
332                                            total_entries, cache.entry_num,
333                                            (uintptr_t)data, ret ? errno : 0);
334     *entry_num = cache.entry_num;
335 
336     if (ret) {
337         error_setg_errno(errp, errno, "IOMMU_HWPT_INVALIDATE failed:"
338                          " total %d entries, processed %d entries",
339                          total_entries, cache.entry_num);
340     } else if (total_entries != cache.entry_num) {
341         error_setg(errp, "IOMMU_HWPT_INVALIDATE succeed but with unprocessed"
342                          " entries: total %d entries, processed %d entries."
343                          " Kernel BUG?!", total_entries, cache.entry_num);
344         return false;
345     }
346 
347     return !ret;
348 }
349 
350 bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
351                                            uint32_t hwpt_id, Error **errp)
352 {
353     HostIOMMUDeviceIOMMUFDClass *idevc =
354         HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(idev);
355 
356     g_assert(idevc->attach_hwpt);
357     return idevc->attach_hwpt(idev, hwpt_id, errp);
358 }
359 
360 bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
361                                            Error **errp)
362 {
363     HostIOMMUDeviceIOMMUFDClass *idevc =
364         HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(idev);
365 
366     g_assert(idevc->detach_hwpt);
367     return idevc->detach_hwpt(idev, errp);
368 }
369 
370 static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp)
371 {
372     HostIOMMUDeviceCaps *caps = &hiod->caps;
373 
374     switch (cap) {
375     case HOST_IOMMU_DEVICE_CAP_IOMMU_TYPE:
376         return caps->type;
377     case HOST_IOMMU_DEVICE_CAP_AW_BITS:
378         return vfio_device_get_aw_bits(hiod->agent);
379     default:
380         error_setg(errp, "%s: unsupported capability %x", hiod->name, cap);
381         return -EINVAL;
382     }
383 }
384 
385 static void hiod_iommufd_class_init(ObjectClass *oc, const void *data)
386 {
387     HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc);
388 
389     hioc->get_cap = hiod_iommufd_get_cap;
390 };
391 
392 static const TypeInfo types[] = {
393     {
394         .name = TYPE_IOMMUFD_BACKEND,
395         .parent = TYPE_OBJECT,
396         .instance_size = sizeof(IOMMUFDBackend),
397         .instance_init = iommufd_backend_init,
398         .instance_finalize = iommufd_backend_finalize,
399         .class_size = sizeof(IOMMUFDBackendClass),
400         .class_init = iommufd_backend_class_init,
401         .interfaces = (const InterfaceInfo[]) {
402             { TYPE_USER_CREATABLE },
403             { }
404         }
405     }, {
406         .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD,
407         .parent = TYPE_HOST_IOMMU_DEVICE,
408         .instance_size = sizeof(HostIOMMUDeviceIOMMUFD),
409         .class_size = sizeof(HostIOMMUDeviceIOMMUFDClass),
410         .class_init = hiod_iommufd_class_init,
411         .abstract = true,
412     }
413 };
414 
415 DEFINE_TYPES(types)
416