xref: /openbmc/qemu/hw/vfio-user/container.c (revision f79afdf7dafd5fc9551c002de0f4139af4e9f5aa)
1 /*
2  * Container for vfio-user IOMMU type: rather than communicating with the kernel
3  * vfio driver, we communicate over a socket to a server using the vfio-user
4  * protocol.
5  *
6  * SPDX-License-Identifier: GPL-2.0-or-later
7  */
8 
9 #include <sys/ioctl.h>
10 #include <linux/vfio.h>
11 #include "qemu/osdep.h"
12 
13 #include "hw/vfio-user/container.h"
14 #include "hw/vfio-user/device.h"
15 #include "hw/vfio-user/trace.h"
16 #include "hw/vfio/vfio-device.h"
17 #include "hw/vfio/vfio-listener.h"
18 #include "qapi/error.h"
19 
20 /*
21  * When DMA space is the physical address space, the region add/del listeners
22  * will fire during memory update transactions.  These depend on BQL being held,
23  * so do any resulting map/demap ops async while keeping BQL.
24  */
vfio_user_listener_begin(VFIOContainerBase * bcontainer)25 static void vfio_user_listener_begin(VFIOContainerBase *bcontainer)
26 {
27     VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
28                                                  bcontainer);
29 
30     container->proxy->async_ops = true;
31 }
32 
vfio_user_listener_commit(VFIOContainerBase * bcontainer)33 static void vfio_user_listener_commit(VFIOContainerBase *bcontainer)
34 {
35     VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
36                                             bcontainer);
37 
38     /* wait here for any async requests sent during the transaction */
39     container->proxy->async_ops = false;
40     vfio_user_wait_reqs(container->proxy);
41 }
42 
vfio_user_dma_unmap(const VFIOContainerBase * bcontainer,hwaddr iova,ram_addr_t size,IOMMUTLBEntry * iotlb,bool unmap_all)43 static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer,
44                                hwaddr iova, ram_addr_t size,
45                                IOMMUTLBEntry *iotlb, bool unmap_all)
46 {
47     VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
48                                             bcontainer);
49     Error *local_err = NULL;
50     int ret = 0;
51 
52     VFIOUserDMAUnmap *msgp = g_malloc(sizeof(*msgp));
53 
54     vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_UNMAP, sizeof(*msgp), 0);
55     msgp->argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
56     msgp->flags = unmap_all ? VFIO_DMA_UNMAP_FLAG_ALL : 0;
57     msgp->iova = iova;
58     msgp->size = size;
59     trace_vfio_user_dma_unmap(msgp->iova, msgp->size, msgp->flags,
60                               container->proxy->async_ops);
61 
62     if (container->proxy->async_ops) {
63         if (!vfio_user_send_nowait(container->proxy, &msgp->hdr, NULL,
64                               0, &local_err)) {
65             error_report_err(local_err);
66             ret = -EFAULT;
67         }
68     } else {
69         if (!vfio_user_send_wait(container->proxy, &msgp->hdr, NULL,
70                                  0, &local_err)) {
71                 error_report_err(local_err);
72                 ret = -EFAULT;
73         }
74 
75         if (msgp->hdr.flags & VFIO_USER_ERROR) {
76             ret = -msgp->hdr.error_reply;
77         }
78 
79         g_free(msgp);
80     }
81 
82     return ret;
83 }
84 
vfio_user_dma_map(const VFIOContainerBase * bcontainer,hwaddr iova,ram_addr_t size,void * vaddr,bool readonly,MemoryRegion * mrp)85 static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
86                              ram_addr_t size, void *vaddr, bool readonly,
87                              MemoryRegion *mrp)
88 {
89     VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
90                                                 bcontainer);
91     int fd = memory_region_get_fd(mrp);
92     Error *local_err = NULL;
93     int ret = 0;
94 
95     VFIOUserFDs *fds = NULL;
96     VFIOUserDMAMap *msgp = g_malloc0(sizeof(*msgp));
97 
98     vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_MAP, sizeof(*msgp), 0);
99     msgp->argsz = sizeof(struct vfio_iommu_type1_dma_map);
100     msgp->flags = VFIO_DMA_MAP_FLAG_READ;
101     msgp->offset = 0;
102     msgp->iova = iova;
103     msgp->size = size;
104 
105     /*
106      * vaddr enters as a QEMU process address; make it either a file offset
107      * for mapped areas or leave as 0.
108      */
109     if (fd != -1) {
110         msgp->offset = qemu_ram_block_host_offset(mrp->ram_block, vaddr);
111     }
112 
113     if (!readonly) {
114         msgp->flags |= VFIO_DMA_MAP_FLAG_WRITE;
115     }
116 
117     trace_vfio_user_dma_map(msgp->iova, msgp->size, msgp->offset, msgp->flags,
118                             container->proxy->async_ops);
119 
120     /*
121      * The async_ops case sends without blocking. They're later waited for in
122      * vfio_send_wait_reqs.
123      */
124     if (container->proxy->async_ops) {
125         /* can't use auto variable since we don't block */
126         if (fd != -1) {
127             fds = vfio_user_getfds(1);
128             fds->send_fds = 1;
129             fds->fds[0] = fd;
130         }
131 
132         if (!vfio_user_send_nowait(container->proxy, &msgp->hdr, fds,
133                               0, &local_err)) {
134             error_report_err(local_err);
135             ret = -EFAULT;
136         }
137     } else {
138         VFIOUserFDs local_fds = { 1, 0, &fd };
139 
140         fds = fd != -1 ? &local_fds : NULL;
141 
142         if (!vfio_user_send_wait(container->proxy, &msgp->hdr, fds,
143                                  0, &local_err)) {
144                 error_report_err(local_err);
145                 ret = -EFAULT;
146         }
147 
148         if (msgp->hdr.flags & VFIO_USER_ERROR) {
149             ret = -msgp->hdr.error_reply;
150         }
151 
152         g_free(msgp);
153     }
154 
155     return ret;
156 }
157 
158 static int
vfio_user_set_dirty_page_tracking(const VFIOContainerBase * bcontainer,bool start,Error ** errp)159 vfio_user_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
160                                     bool start, Error **errp)
161 {
162     error_setg_errno(errp, ENOTSUP, "Not supported");
163     return -ENOTSUP;
164 }
165 
vfio_user_query_dirty_bitmap(const VFIOContainerBase * bcontainer,VFIOBitmap * vbmap,hwaddr iova,hwaddr size,Error ** errp)166 static int vfio_user_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
167                                          VFIOBitmap *vbmap, hwaddr iova,
168                                          hwaddr size, Error **errp)
169 {
170     error_setg_errno(errp, ENOTSUP, "Not supported");
171     return -ENOTSUP;
172 }
173 
vfio_user_setup(VFIOContainerBase * bcontainer,Error ** errp)174 static bool vfio_user_setup(VFIOContainerBase *bcontainer, Error **errp)
175 {
176     VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
177                                                 bcontainer);
178 
179     assert(container->proxy->dma_pgsizes != 0);
180     bcontainer->pgsizes = container->proxy->dma_pgsizes;
181     bcontainer->dma_max_mappings = container->proxy->max_dma;
182 
183     /* No live migration support yet. */
184     bcontainer->dirty_pages_supported = false;
185     bcontainer->max_dirty_bitmap_size = container->proxy->max_bitmap;
186     bcontainer->dirty_pgsizes = container->proxy->migr_pgsize;
187 
188     return true;
189 }
190 
vfio_user_create_container(VFIODevice * vbasedev,Error ** errp)191 static VFIOUserContainer *vfio_user_create_container(VFIODevice *vbasedev,
192                                                      Error **errp)
193 {
194     VFIOUserContainer *container;
195 
196     container = VFIO_IOMMU_USER(object_new(TYPE_VFIO_IOMMU_USER));
197     container->proxy = vbasedev->proxy;
198     return container;
199 }
200 
201 /*
202  * Try to mirror vfio_container_connect() as much as possible.
203  */
204 static VFIOUserContainer *
vfio_user_container_connect(AddressSpace * as,VFIODevice * vbasedev,Error ** errp)205 vfio_user_container_connect(AddressSpace *as, VFIODevice *vbasedev,
206                             Error **errp)
207 {
208     VFIOContainerBase *bcontainer;
209     VFIOUserContainer *container;
210     VFIOAddressSpace *space;
211     VFIOIOMMUClass *vioc;
212     int ret;
213 
214     space = vfio_address_space_get(as);
215 
216     container = vfio_user_create_container(vbasedev, errp);
217     if (!container) {
218         goto put_space_exit;
219     }
220 
221     bcontainer = &container->bcontainer;
222 
223     ret = ram_block_uncoordinated_discard_disable(true);
224     if (ret) {
225         error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
226         goto free_container_exit;
227     }
228 
229     vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
230     assert(vioc->setup);
231 
232     if (!vioc->setup(bcontainer, errp)) {
233         goto enable_discards_exit;
234     }
235 
236     vfio_address_space_insert(space, bcontainer);
237 
238     if (!vfio_listener_register(bcontainer, errp)) {
239         goto listener_release_exit;
240     }
241 
242     bcontainer->initialized = true;
243 
244     return container;
245 
246 listener_release_exit:
247     vfio_listener_unregister(bcontainer);
248     if (vioc->release) {
249         vioc->release(bcontainer);
250     }
251 
252 enable_discards_exit:
253     ram_block_uncoordinated_discard_disable(false);
254 
255 free_container_exit:
256     object_unref(container);
257 
258 put_space_exit:
259     vfio_address_space_put(space);
260 
261     return NULL;
262 }
263 
vfio_user_container_disconnect(VFIOUserContainer * container)264 static void vfio_user_container_disconnect(VFIOUserContainer *container)
265 {
266     VFIOContainerBase *bcontainer = &container->bcontainer;
267     VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
268     VFIOAddressSpace *space = bcontainer->space;
269 
270     ram_block_uncoordinated_discard_disable(false);
271 
272     vfio_listener_unregister(bcontainer);
273     if (vioc->release) {
274         vioc->release(bcontainer);
275     }
276 
277     object_unref(container);
278 
279     vfio_address_space_put(space);
280 }
281 
vfio_user_device_get(VFIOUserContainer * container,VFIODevice * vbasedev,Error ** errp)282 static bool vfio_user_device_get(VFIOUserContainer *container,
283                                  VFIODevice *vbasedev, Error **errp)
284 {
285     struct vfio_device_info info = { .argsz = sizeof(info) };
286 
287 
288     if (!vfio_user_get_device_info(vbasedev->proxy, &info, errp)) {
289         return false;
290     }
291 
292     vbasedev->fd = -1;
293 
294     vfio_device_prepare(vbasedev, &container->bcontainer, &info);
295 
296     return true;
297 }
298 
299 /*
300  * vfio_user_device_attach: attach a device to a new container.
301  */
vfio_user_device_attach(const char * name,VFIODevice * vbasedev,AddressSpace * as,Error ** errp)302 static bool vfio_user_device_attach(const char *name, VFIODevice *vbasedev,
303                                     AddressSpace *as, Error **errp)
304 {
305     VFIOUserContainer *container;
306 
307     container = vfio_user_container_connect(as, vbasedev, errp);
308     if (container == NULL) {
309         error_prepend(errp, "failed to connect proxy");
310         return false;
311     }
312 
313     return vfio_user_device_get(container, vbasedev, errp);
314 }
315 
vfio_user_device_detach(VFIODevice * vbasedev)316 static void vfio_user_device_detach(VFIODevice *vbasedev)
317 {
318     VFIOUserContainer *container = container_of(vbasedev->bcontainer,
319                                                 VFIOUserContainer, bcontainer);
320 
321     vfio_device_unprepare(vbasedev);
322 
323     vfio_user_container_disconnect(container);
324 }
325 
vfio_user_pci_hot_reset(VFIODevice * vbasedev,bool single)326 static int vfio_user_pci_hot_reset(VFIODevice *vbasedev, bool single)
327 {
328     /* ->needs_reset is always false for vfio-user. */
329     return 0;
330 }
331 
vfio_iommu_user_class_init(ObjectClass * klass,const void * data)332 static void vfio_iommu_user_class_init(ObjectClass *klass, const void *data)
333 {
334     VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
335 
336     vioc->setup = vfio_user_setup;
337     vioc->listener_begin = vfio_user_listener_begin,
338     vioc->listener_commit = vfio_user_listener_commit,
339     vioc->dma_map = vfio_user_dma_map;
340     vioc->dma_unmap = vfio_user_dma_unmap;
341     vioc->attach_device = vfio_user_device_attach;
342     vioc->detach_device = vfio_user_device_detach;
343     vioc->set_dirty_page_tracking = vfio_user_set_dirty_page_tracking;
344     vioc->query_dirty_bitmap = vfio_user_query_dirty_bitmap;
345     vioc->pci_hot_reset = vfio_user_pci_hot_reset;
346 };
347 
348 static const TypeInfo types[] = {
349     {
350         .name = TYPE_VFIO_IOMMU_USER,
351         .parent = TYPE_VFIO_IOMMU,
352         .instance_size = sizeof(VFIOUserContainer),
353         .class_init = vfio_iommu_user_class_init,
354     },
355 };
356 
357 DEFINE_TYPES(types)
358