xref: /openbmc/qemu/hw/vfio-user/container.c (revision 84fe49d94ac72d7fd226a65d2250c6294885561d)
1 /*
2  * Container for vfio-user IOMMU type: rather than communicating with the kernel
3  * vfio driver, we communicate over a socket to a server using the vfio-user
4  * protocol.
5  *
6  * SPDX-License-Identifier: GPL-2.0-or-later
7  */
8 
9 #include <sys/ioctl.h>
10 #include <linux/vfio.h>
11 #include "qemu/osdep.h"
12 
13 #include "hw/vfio-user/container.h"
14 #include "hw/vfio-user/device.h"
15 #include "hw/vfio-user/trace.h"
16 #include "hw/vfio/vfio-device.h"
17 #include "hw/vfio/vfio-listener.h"
18 #include "qapi/error.h"
19 
20 /*
21  * When DMA space is the physical address space, the region add/del listeners
22  * will fire during memory update transactions.  These depend on BQL being held,
23  * so do any resulting map/demap ops async while keeping BQL.
24  */
25 static void vfio_user_listener_begin(VFIOContainerBase *bcontainer)
26 {
27     VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
28                                                  bcontainer);
29 
30     container->proxy->async_ops = true;
31 }
32 
33 static void vfio_user_listener_commit(VFIOContainerBase *bcontainer)
34 {
35     VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
36                                             bcontainer);
37 
38     /* wait here for any async requests sent during the transaction */
39     container->proxy->async_ops = false;
40     vfio_user_wait_reqs(container->proxy);
41 }
42 
43 static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer,
44                                hwaddr iova, ram_addr_t size,
45                                IOMMUTLBEntry *iotlb, bool unmap_all)
46 {
47     VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
48                                             bcontainer);
49     Error *local_err = NULL;
50     int ret = 0;
51 
52     VFIOUserDMAUnmap *msgp = g_malloc(sizeof(*msgp));
53 
54     vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_UNMAP, sizeof(*msgp), 0);
55     msgp->argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
56     msgp->flags = unmap_all ? VFIO_DMA_UNMAP_FLAG_ALL : 0;
57     msgp->iova = iova;
58     msgp->size = size;
59     trace_vfio_user_dma_unmap(msgp->iova, msgp->size, msgp->flags,
60                               container->proxy->async_ops);
61 
62     if (container->proxy->async_ops) {
63         if (!vfio_user_send_nowait(container->proxy, &msgp->hdr, NULL,
64                               0, &local_err)) {
65             error_report_err(local_err);
66             ret = -EFAULT;
67         } else {
68             ret = 0;
69         }
70     } else {
71         if (!vfio_user_send_wait(container->proxy, &msgp->hdr, NULL,
72                                  0, &local_err)) {
73                 error_report_err(local_err);
74                 ret = -EFAULT;
75         }
76 
77         if (msgp->hdr.flags & VFIO_USER_ERROR) {
78             ret = -msgp->hdr.error_reply;
79         }
80 
81         g_free(msgp);
82     }
83 
84     return ret;
85 }
86 
87 static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
88                              ram_addr_t size, void *vaddr, bool readonly,
89                              MemoryRegion *mrp)
90 {
91     VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
92                                                 bcontainer);
93     int fd = memory_region_get_fd(mrp);
94     Error *local_err = NULL;
95     int ret;
96 
97     VFIOUserFDs *fds = NULL;
98     VFIOUserDMAMap *msgp = g_malloc0(sizeof(*msgp));
99 
100     vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_MAP, sizeof(*msgp), 0);
101     msgp->argsz = sizeof(struct vfio_iommu_type1_dma_map);
102     msgp->flags = VFIO_DMA_MAP_FLAG_READ;
103     msgp->offset = 0;
104     msgp->iova = iova;
105     msgp->size = size;
106 
107     /*
108      * vaddr enters as a QEMU process address; make it either a file offset
109      * for mapped areas or leave as 0.
110      */
111     if (fd != -1) {
112         msgp->offset = qemu_ram_block_host_offset(mrp->ram_block, vaddr);
113     }
114 
115     if (!readonly) {
116         msgp->flags |= VFIO_DMA_MAP_FLAG_WRITE;
117     }
118 
119     trace_vfio_user_dma_map(msgp->iova, msgp->size, msgp->offset, msgp->flags,
120                             container->proxy->async_ops);
121 
122     /*
123      * The async_ops case sends without blocking. They're later waited for in
124      * vfio_send_wait_reqs.
125      */
126     if (container->proxy->async_ops) {
127         /* can't use auto variable since we don't block */
128         if (fd != -1) {
129             fds = vfio_user_getfds(1);
130             fds->send_fds = 1;
131             fds->fds[0] = fd;
132         }
133 
134         if (!vfio_user_send_nowait(container->proxy, &msgp->hdr, fds,
135                               0, &local_err)) {
136             error_report_err(local_err);
137             ret = -EFAULT;
138         } else {
139             ret = 0;
140         }
141     } else {
142         VFIOUserFDs local_fds = { 1, 0, &fd };
143 
144         fds = fd != -1 ? &local_fds : NULL;
145 
146         if (!vfio_user_send_wait(container->proxy, &msgp->hdr, fds,
147                                  0, &local_err)) {
148                 error_report_err(local_err);
149                 ret = -EFAULT;
150         }
151 
152         if (msgp->hdr.flags & VFIO_USER_ERROR) {
153             ret = -msgp->hdr.error_reply;
154         }
155 
156         g_free(msgp);
157     }
158 
159     return ret;
160 }
161 
162 static int
163 vfio_user_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
164                                     bool start, Error **errp)
165 {
166     error_setg_errno(errp, ENOTSUP, "Not supported");
167     return -ENOTSUP;
168 }
169 
170 static int vfio_user_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
171                                          VFIOBitmap *vbmap, hwaddr iova,
172                                          hwaddr size, Error **errp)
173 {
174     error_setg_errno(errp, ENOTSUP, "Not supported");
175     return -ENOTSUP;
176 }
177 
178 static bool vfio_user_setup(VFIOContainerBase *bcontainer, Error **errp)
179 {
180     VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
181                                                 bcontainer);
182 
183     assert(container->proxy->dma_pgsizes != 0);
184     bcontainer->pgsizes = container->proxy->dma_pgsizes;
185     bcontainer->dma_max_mappings = container->proxy->max_dma;
186 
187     /* No live migration support yet. */
188     bcontainer->dirty_pages_supported = false;
189     bcontainer->max_dirty_bitmap_size = container->proxy->max_bitmap;
190     bcontainer->dirty_pgsizes = container->proxy->migr_pgsize;
191 
192     return true;
193 }
194 
195 static VFIOUserContainer *vfio_user_create_container(VFIODevice *vbasedev,
196                                                      Error **errp)
197 {
198     VFIOUserContainer *container;
199 
200     container = VFIO_IOMMU_USER(object_new(TYPE_VFIO_IOMMU_USER));
201     container->proxy = vbasedev->proxy;
202     return container;
203 }
204 
205 /*
206  * Try to mirror vfio_container_connect() as much as possible.
207  */
208 static VFIOUserContainer *
209 vfio_user_container_connect(AddressSpace *as, VFIODevice *vbasedev,
210                             Error **errp)
211 {
212     VFIOContainerBase *bcontainer;
213     VFIOUserContainer *container;
214     VFIOAddressSpace *space;
215     VFIOIOMMUClass *vioc;
216     int ret;
217 
218     space = vfio_address_space_get(as);
219 
220     container = vfio_user_create_container(vbasedev, errp);
221     if (!container) {
222         goto put_space_exit;
223     }
224 
225     bcontainer = &container->bcontainer;
226 
227     ret = ram_block_uncoordinated_discard_disable(true);
228     if (ret) {
229         error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
230         goto free_container_exit;
231     }
232 
233     vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
234     assert(vioc->setup);
235 
236     if (!vioc->setup(bcontainer, errp)) {
237         goto enable_discards_exit;
238     }
239 
240     vfio_address_space_insert(space, bcontainer);
241 
242     if (!vfio_listener_register(bcontainer, errp)) {
243         goto listener_release_exit;
244     }
245 
246     bcontainer->initialized = true;
247 
248     return container;
249 
250 listener_release_exit:
251     vfio_listener_unregister(bcontainer);
252     if (vioc->release) {
253         vioc->release(bcontainer);
254     }
255 
256 enable_discards_exit:
257     ram_block_uncoordinated_discard_disable(false);
258 
259 free_container_exit:
260     object_unref(container);
261 
262 put_space_exit:
263     vfio_address_space_put(space);
264 
265     return NULL;
266 }
267 
268 static void vfio_user_container_disconnect(VFIOUserContainer *container)
269 {
270     VFIOContainerBase *bcontainer = &container->bcontainer;
271     VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
272     VFIOAddressSpace *space = bcontainer->space;
273 
274     ram_block_uncoordinated_discard_disable(false);
275 
276     vfio_listener_unregister(bcontainer);
277     if (vioc->release) {
278         vioc->release(bcontainer);
279     }
280 
281     object_unref(container);
282 
283     vfio_address_space_put(space);
284 }
285 
286 static bool vfio_user_device_get(VFIOUserContainer *container,
287                                  VFIODevice *vbasedev, Error **errp)
288 {
289     struct vfio_device_info info = { .argsz = sizeof(info) };
290 
291 
292     if (!vfio_user_get_device_info(vbasedev->proxy, &info, errp)) {
293         return false;
294     }
295 
296     vbasedev->fd = -1;
297 
298     vfio_device_prepare(vbasedev, &container->bcontainer, &info);
299 
300     return true;
301 }
302 
303 /*
304  * vfio_user_device_attach: attach a device to a new container.
305  */
306 static bool vfio_user_device_attach(const char *name, VFIODevice *vbasedev,
307                                     AddressSpace *as, Error **errp)
308 {
309     VFIOUserContainer *container;
310 
311     container = vfio_user_container_connect(as, vbasedev, errp);
312     if (container == NULL) {
313         error_prepend(errp, "failed to connect proxy");
314         return false;
315     }
316 
317     return vfio_user_device_get(container, vbasedev, errp);
318 }
319 
320 static void vfio_user_device_detach(VFIODevice *vbasedev)
321 {
322     VFIOUserContainer *container = container_of(vbasedev->bcontainer,
323                                                 VFIOUserContainer, bcontainer);
324 
325     vfio_device_unprepare(vbasedev);
326 
327     vfio_user_container_disconnect(container);
328 }
329 
330 static int vfio_user_pci_hot_reset(VFIODevice *vbasedev, bool single)
331 {
332     /* ->needs_reset is always false for vfio-user. */
333     return 0;
334 }
335 
336 static void vfio_iommu_user_class_init(ObjectClass *klass, const void *data)
337 {
338     VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
339 
340     vioc->setup = vfio_user_setup;
341     vioc->listener_begin = vfio_user_listener_begin,
342     vioc->listener_commit = vfio_user_listener_commit,
343     vioc->dma_map = vfio_user_dma_map;
344     vioc->dma_unmap = vfio_user_dma_unmap;
345     vioc->attach_device = vfio_user_device_attach;
346     vioc->detach_device = vfio_user_device_detach;
347     vioc->set_dirty_page_tracking = vfio_user_set_dirty_page_tracking;
348     vioc->query_dirty_bitmap = vfio_user_query_dirty_bitmap;
349     vioc->pci_hot_reset = vfio_user_pci_hot_reset;
350 };
351 
352 static const TypeInfo types[] = {
353     {
354         .name = TYPE_VFIO_IOMMU_USER,
355         .parent = TYPE_VFIO_IOMMU,
356         .instance_size = sizeof(VFIOUserContainer),
357         .class_init = vfio_iommu_user_class_init,
358     },
359 };
360 
361 DEFINE_TYPES(types)
362