1 /*
2 * Container for vfio-user IOMMU type: rather than communicating with the kernel
3 * vfio driver, we communicate over a socket to a server using the vfio-user
4 * protocol.
5 *
6 * SPDX-License-Identifier: GPL-2.0-or-later
7 */
8
9 #include <sys/ioctl.h>
10 #include <linux/vfio.h>
11 #include "qemu/osdep.h"
12
13 #include "hw/vfio-user/container.h"
14 #include "hw/vfio-user/device.h"
15 #include "hw/vfio-user/trace.h"
16 #include "hw/vfio/vfio-device.h"
17 #include "hw/vfio/vfio-listener.h"
18 #include "qapi/error.h"
19
20 /*
21 * When DMA space is the physical address space, the region add/del listeners
22 * will fire during memory update transactions. These depend on BQL being held,
23 * so do any resulting map/demap ops async while keeping BQL.
24 */
vfio_user_listener_begin(VFIOContainerBase * bcontainer)25 static void vfio_user_listener_begin(VFIOContainerBase *bcontainer)
26 {
27 VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
28 bcontainer);
29
30 container->proxy->async_ops = true;
31 }
32
vfio_user_listener_commit(VFIOContainerBase * bcontainer)33 static void vfio_user_listener_commit(VFIOContainerBase *bcontainer)
34 {
35 VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
36 bcontainer);
37
38 /* wait here for any async requests sent during the transaction */
39 container->proxy->async_ops = false;
40 vfio_user_wait_reqs(container->proxy);
41 }
42
vfio_user_dma_unmap(const VFIOContainerBase * bcontainer,hwaddr iova,ram_addr_t size,IOMMUTLBEntry * iotlb,bool unmap_all)43 static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer,
44 hwaddr iova, ram_addr_t size,
45 IOMMUTLBEntry *iotlb, bool unmap_all)
46 {
47 VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
48 bcontainer);
49 Error *local_err = NULL;
50 int ret = 0;
51
52 VFIOUserDMAUnmap *msgp = g_malloc(sizeof(*msgp));
53
54 vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_UNMAP, sizeof(*msgp), 0);
55 msgp->argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
56 msgp->flags = unmap_all ? VFIO_DMA_UNMAP_FLAG_ALL : 0;
57 msgp->iova = iova;
58 msgp->size = size;
59 trace_vfio_user_dma_unmap(msgp->iova, msgp->size, msgp->flags,
60 container->proxy->async_ops);
61
62 if (container->proxy->async_ops) {
63 if (!vfio_user_send_nowait(container->proxy, &msgp->hdr, NULL,
64 0, &local_err)) {
65 error_report_err(local_err);
66 ret = -EFAULT;
67 }
68 } else {
69 if (!vfio_user_send_wait(container->proxy, &msgp->hdr, NULL,
70 0, &local_err)) {
71 error_report_err(local_err);
72 ret = -EFAULT;
73 }
74
75 if (msgp->hdr.flags & VFIO_USER_ERROR) {
76 ret = -msgp->hdr.error_reply;
77 }
78
79 g_free(msgp);
80 }
81
82 return ret;
83 }
84
vfio_user_dma_map(const VFIOContainerBase * bcontainer,hwaddr iova,ram_addr_t size,void * vaddr,bool readonly,MemoryRegion * mrp)85 static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
86 ram_addr_t size, void *vaddr, bool readonly,
87 MemoryRegion *mrp)
88 {
89 VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
90 bcontainer);
91 int fd = memory_region_get_fd(mrp);
92 Error *local_err = NULL;
93 int ret = 0;
94
95 VFIOUserFDs *fds = NULL;
96 VFIOUserDMAMap *msgp = g_malloc0(sizeof(*msgp));
97
98 vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_MAP, sizeof(*msgp), 0);
99 msgp->argsz = sizeof(struct vfio_iommu_type1_dma_map);
100 msgp->flags = VFIO_DMA_MAP_FLAG_READ;
101 msgp->offset = 0;
102 msgp->iova = iova;
103 msgp->size = size;
104
105 /*
106 * vaddr enters as a QEMU process address; make it either a file offset
107 * for mapped areas or leave as 0.
108 */
109 if (fd != -1) {
110 msgp->offset = qemu_ram_block_host_offset(mrp->ram_block, vaddr);
111 }
112
113 if (!readonly) {
114 msgp->flags |= VFIO_DMA_MAP_FLAG_WRITE;
115 }
116
117 trace_vfio_user_dma_map(msgp->iova, msgp->size, msgp->offset, msgp->flags,
118 container->proxy->async_ops);
119
120 /*
121 * The async_ops case sends without blocking. They're later waited for in
122 * vfio_send_wait_reqs.
123 */
124 if (container->proxy->async_ops) {
125 /* can't use auto variable since we don't block */
126 if (fd != -1) {
127 fds = vfio_user_getfds(1);
128 fds->send_fds = 1;
129 fds->fds[0] = fd;
130 }
131
132 if (!vfio_user_send_nowait(container->proxy, &msgp->hdr, fds,
133 0, &local_err)) {
134 error_report_err(local_err);
135 ret = -EFAULT;
136 }
137 } else {
138 VFIOUserFDs local_fds = { 1, 0, &fd };
139
140 fds = fd != -1 ? &local_fds : NULL;
141
142 if (!vfio_user_send_wait(container->proxy, &msgp->hdr, fds,
143 0, &local_err)) {
144 error_report_err(local_err);
145 ret = -EFAULT;
146 }
147
148 if (msgp->hdr.flags & VFIO_USER_ERROR) {
149 ret = -msgp->hdr.error_reply;
150 }
151
152 g_free(msgp);
153 }
154
155 return ret;
156 }
157
158 static int
vfio_user_set_dirty_page_tracking(const VFIOContainerBase * bcontainer,bool start,Error ** errp)159 vfio_user_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
160 bool start, Error **errp)
161 {
162 error_setg_errno(errp, ENOTSUP, "Not supported");
163 return -ENOTSUP;
164 }
165
vfio_user_query_dirty_bitmap(const VFIOContainerBase * bcontainer,VFIOBitmap * vbmap,hwaddr iova,hwaddr size,Error ** errp)166 static int vfio_user_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
167 VFIOBitmap *vbmap, hwaddr iova,
168 hwaddr size, Error **errp)
169 {
170 error_setg_errno(errp, ENOTSUP, "Not supported");
171 return -ENOTSUP;
172 }
173
vfio_user_setup(VFIOContainerBase * bcontainer,Error ** errp)174 static bool vfio_user_setup(VFIOContainerBase *bcontainer, Error **errp)
175 {
176 VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
177 bcontainer);
178
179 assert(container->proxy->dma_pgsizes != 0);
180 bcontainer->pgsizes = container->proxy->dma_pgsizes;
181 bcontainer->dma_max_mappings = container->proxy->max_dma;
182
183 /* No live migration support yet. */
184 bcontainer->dirty_pages_supported = false;
185 bcontainer->max_dirty_bitmap_size = container->proxy->max_bitmap;
186 bcontainer->dirty_pgsizes = container->proxy->migr_pgsize;
187
188 return true;
189 }
190
vfio_user_create_container(VFIODevice * vbasedev,Error ** errp)191 static VFIOUserContainer *vfio_user_create_container(VFIODevice *vbasedev,
192 Error **errp)
193 {
194 VFIOUserContainer *container;
195
196 container = VFIO_IOMMU_USER(object_new(TYPE_VFIO_IOMMU_USER));
197 container->proxy = vbasedev->proxy;
198 return container;
199 }
200
201 /*
202 * Try to mirror vfio_container_connect() as much as possible.
203 */
204 static VFIOUserContainer *
vfio_user_container_connect(AddressSpace * as,VFIODevice * vbasedev,Error ** errp)205 vfio_user_container_connect(AddressSpace *as, VFIODevice *vbasedev,
206 Error **errp)
207 {
208 VFIOContainerBase *bcontainer;
209 VFIOUserContainer *container;
210 VFIOAddressSpace *space;
211 VFIOIOMMUClass *vioc;
212 int ret;
213
214 space = vfio_address_space_get(as);
215
216 container = vfio_user_create_container(vbasedev, errp);
217 if (!container) {
218 goto put_space_exit;
219 }
220
221 bcontainer = &container->bcontainer;
222
223 ret = ram_block_uncoordinated_discard_disable(true);
224 if (ret) {
225 error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
226 goto free_container_exit;
227 }
228
229 vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
230 assert(vioc->setup);
231
232 if (!vioc->setup(bcontainer, errp)) {
233 goto enable_discards_exit;
234 }
235
236 vfio_address_space_insert(space, bcontainer);
237
238 if (!vfio_listener_register(bcontainer, errp)) {
239 goto listener_release_exit;
240 }
241
242 bcontainer->initialized = true;
243
244 return container;
245
246 listener_release_exit:
247 vfio_listener_unregister(bcontainer);
248 if (vioc->release) {
249 vioc->release(bcontainer);
250 }
251
252 enable_discards_exit:
253 ram_block_uncoordinated_discard_disable(false);
254
255 free_container_exit:
256 object_unref(container);
257
258 put_space_exit:
259 vfio_address_space_put(space);
260
261 return NULL;
262 }
263
vfio_user_container_disconnect(VFIOUserContainer * container)264 static void vfio_user_container_disconnect(VFIOUserContainer *container)
265 {
266 VFIOContainerBase *bcontainer = &container->bcontainer;
267 VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
268 VFIOAddressSpace *space = bcontainer->space;
269
270 ram_block_uncoordinated_discard_disable(false);
271
272 vfio_listener_unregister(bcontainer);
273 if (vioc->release) {
274 vioc->release(bcontainer);
275 }
276
277 object_unref(container);
278
279 vfio_address_space_put(space);
280 }
281
vfio_user_device_get(VFIOUserContainer * container,VFIODevice * vbasedev,Error ** errp)282 static bool vfio_user_device_get(VFIOUserContainer *container,
283 VFIODevice *vbasedev, Error **errp)
284 {
285 struct vfio_device_info info = { .argsz = sizeof(info) };
286
287
288 if (!vfio_user_get_device_info(vbasedev->proxy, &info, errp)) {
289 return false;
290 }
291
292 vbasedev->fd = -1;
293
294 vfio_device_prepare(vbasedev, &container->bcontainer, &info);
295
296 return true;
297 }
298
299 /*
300 * vfio_user_device_attach: attach a device to a new container.
301 */
vfio_user_device_attach(const char * name,VFIODevice * vbasedev,AddressSpace * as,Error ** errp)302 static bool vfio_user_device_attach(const char *name, VFIODevice *vbasedev,
303 AddressSpace *as, Error **errp)
304 {
305 VFIOUserContainer *container;
306
307 container = vfio_user_container_connect(as, vbasedev, errp);
308 if (container == NULL) {
309 error_prepend(errp, "failed to connect proxy");
310 return false;
311 }
312
313 return vfio_user_device_get(container, vbasedev, errp);
314 }
315
vfio_user_device_detach(VFIODevice * vbasedev)316 static void vfio_user_device_detach(VFIODevice *vbasedev)
317 {
318 VFIOUserContainer *container = container_of(vbasedev->bcontainer,
319 VFIOUserContainer, bcontainer);
320
321 vfio_device_unprepare(vbasedev);
322
323 vfio_user_container_disconnect(container);
324 }
325
vfio_user_pci_hot_reset(VFIODevice * vbasedev,bool single)326 static int vfio_user_pci_hot_reset(VFIODevice *vbasedev, bool single)
327 {
328 /* ->needs_reset is always false for vfio-user. */
329 return 0;
330 }
331
vfio_iommu_user_class_init(ObjectClass * klass,const void * data)332 static void vfio_iommu_user_class_init(ObjectClass *klass, const void *data)
333 {
334 VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
335
336 vioc->setup = vfio_user_setup;
337 vioc->listener_begin = vfio_user_listener_begin,
338 vioc->listener_commit = vfio_user_listener_commit,
339 vioc->dma_map = vfio_user_dma_map;
340 vioc->dma_unmap = vfio_user_dma_unmap;
341 vioc->attach_device = vfio_user_device_attach;
342 vioc->detach_device = vfio_user_device_detach;
343 vioc->set_dirty_page_tracking = vfio_user_set_dirty_page_tracking;
344 vioc->query_dirty_bitmap = vfio_user_query_dirty_bitmap;
345 vioc->pci_hot_reset = vfio_user_pci_hot_reset;
346 };
347
348 static const TypeInfo types[] = {
349 {
350 .name = TYPE_VFIO_IOMMU_USER,
351 .parent = TYPE_VFIO_IOMMU,
352 .instance_size = sizeof(VFIOUserContainer),
353 .class_init = vfio_iommu_user_class_init,
354 },
355 };
356
357 DEFINE_TYPES(types)
358