xref: /openbmc/qemu/hw/vfio/cpr-legacy.c (revision 924c3ccb310e615bd350d4c77b269b19d95bf5e4)
1 /*
2  * Copyright (c) 2021-2025 Oracle and/or its affiliates.
3  *
4  * SPDX-License-Identifier: GPL-2.0-or-later
5  */
6 
7 #include <sys/ioctl.h>
8 #include <linux/vfio.h>
9 #include "qemu/osdep.h"
10 #include "hw/vfio/vfio-container.h"
11 #include "hw/vfio/vfio-device.h"
12 #include "hw/vfio/vfio-listener.h"
13 #include "migration/blocker.h"
14 #include "migration/cpr.h"
15 #include "migration/migration.h"
16 #include "migration/vmstate.h"
17 #include "qapi/error.h"
18 #include "qemu/error-report.h"
19 
20 static bool vfio_dma_unmap_vaddr_all(VFIOContainer *container, Error **errp)
21 {
22     struct vfio_iommu_type1_dma_unmap unmap = {
23         .argsz = sizeof(unmap),
24         .flags = VFIO_DMA_UNMAP_FLAG_VADDR | VFIO_DMA_UNMAP_FLAG_ALL,
25         .iova = 0,
26         .size = 0,
27     };
28     if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
29         error_setg_errno(errp, errno, "vfio_dma_unmap_vaddr_all");
30         return false;
31     }
32     container->cpr.vaddr_unmapped = true;
33     return true;
34 }
35 
36 /*
37  * Set the new @vaddr for any mappings registered during cpr load.
38  * The incoming state is cleared thereafter.
39  */
40 static int vfio_legacy_cpr_dma_map(const VFIOContainerBase *bcontainer,
41                                    hwaddr iova, ram_addr_t size, void *vaddr,
42                                    bool readonly, MemoryRegion *mr)
43 {
44     const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
45                                                   bcontainer);
46     struct vfio_iommu_type1_dma_map map = {
47         .argsz = sizeof(map),
48         .flags = VFIO_DMA_MAP_FLAG_VADDR,
49         .vaddr = (__u64)(uintptr_t)vaddr,
50         .iova = iova,
51         .size = size,
52     };
53 
54     g_assert(cpr_is_incoming());
55 
56     if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map)) {
57         return -errno;
58     }
59 
60     return 0;
61 }
62 
63 static void vfio_region_remap(MemoryListener *listener,
64                               MemoryRegionSection *section)
65 {
66     VFIOContainer *container = container_of(listener, VFIOContainer,
67                                             cpr.remap_listener);
68     vfio_container_region_add(&container->bcontainer, section, true);
69 }
70 
71 static bool vfio_cpr_supported(VFIOContainer *container, Error **errp)
72 {
73     if (!ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UPDATE_VADDR)) {
74         error_setg(errp, "VFIO container does not support VFIO_UPDATE_VADDR");
75         return false;
76 
77     } else if (!ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UNMAP_ALL)) {
78         error_setg(errp, "VFIO container does not support VFIO_UNMAP_ALL");
79         return false;
80 
81     } else {
82         return true;
83     }
84 }
85 
86 static int vfio_container_pre_save(void *opaque)
87 {
88     VFIOContainer *container = opaque;
89     Error *local_err = NULL;
90 
91     if (!vfio_dma_unmap_vaddr_all(container, &local_err)) {
92         error_report_err(local_err);
93         return -1;
94     }
95     return 0;
96 }
97 
98 static int vfio_container_post_load(void *opaque, int version_id)
99 {
100     VFIOContainer *container = opaque;
101     VFIOContainerBase *bcontainer = &container->bcontainer;
102     VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
103     dma_map_fn saved_dma_map = vioc->dma_map;
104     Error *local_err = NULL;
105 
106     /* During incoming CPR, divert calls to dma_map. */
107     vioc->dma_map = vfio_legacy_cpr_dma_map;
108 
109     if (!vfio_listener_register(bcontainer, &local_err)) {
110         error_report_err(local_err);
111         return -1;
112     }
113 
114     /* Restore original dma_map function */
115     vioc->dma_map = saved_dma_map;
116 
117     return 0;
118 }
119 
120 static const VMStateDescription vfio_container_vmstate = {
121     .name = "vfio-container",
122     .version_id = 0,
123     .minimum_version_id = 0,
124     .priority = MIG_PRI_LOW,  /* Must happen after devices and groups */
125     .pre_save = vfio_container_pre_save,
126     .post_load = vfio_container_post_load,
127     .needed = cpr_incoming_needed,
128     .fields = (VMStateField[]) {
129         VMSTATE_END_OF_LIST()
130     }
131 };
132 
133 static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier,
134                                   MigrationEvent *e, Error **errp)
135 {
136     VFIOContainer *container =
137         container_of(notifier, VFIOContainer, cpr.transfer_notifier);
138     VFIOContainerBase *bcontainer = &container->bcontainer;
139 
140     if (e->type != MIG_EVENT_PRECOPY_FAILED) {
141         return 0;
142     }
143 
144     if (container->cpr.vaddr_unmapped) {
145         /*
146          * Force a call to vfio_region_remap for each mapped section by
147          * temporarily registering a listener, and temporarily diverting
148          * dma_map to vfio_legacy_cpr_dma_map.  The latter restores vaddr.
149          */
150 
151         VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
152         dma_map_fn saved_dma_map = vioc->dma_map;
153         vioc->dma_map = vfio_legacy_cpr_dma_map;
154 
155         container->cpr.remap_listener = (MemoryListener) {
156             .name = "vfio cpr recover",
157             .region_add = vfio_region_remap
158         };
159         memory_listener_register(&container->cpr.remap_listener,
160                                  bcontainer->space->as);
161         memory_listener_unregister(&container->cpr.remap_listener);
162         container->cpr.vaddr_unmapped = false;
163         vioc->dma_map = saved_dma_map;
164     }
165     return 0;
166 }
167 
168 bool vfio_legacy_cpr_register_container(VFIOContainer *container, Error **errp)
169 {
170     VFIOContainerBase *bcontainer = &container->bcontainer;
171     Error **cpr_blocker = &container->cpr.blocker;
172 
173     migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier,
174                                 vfio_cpr_reboot_notifier,
175                                 MIG_MODE_CPR_REBOOT);
176 
177     if (!vfio_cpr_supported(container, cpr_blocker)) {
178         return migrate_add_blocker_modes(cpr_blocker, errp,
179                                          MIG_MODE_CPR_TRANSFER, -1) == 0;
180     }
181 
182     vmstate_register(NULL, -1, &vfio_container_vmstate, container);
183 
184     migration_add_notifier_mode(&container->cpr.transfer_notifier,
185                                 vfio_cpr_fail_notifier,
186                                 MIG_MODE_CPR_TRANSFER);
187     return true;
188 }
189 
190 void vfio_legacy_cpr_unregister_container(VFIOContainer *container)
191 {
192     VFIOContainerBase *bcontainer = &container->bcontainer;
193 
194     migration_remove_notifier(&bcontainer->cpr_reboot_notifier);
195     migrate_del_blocker(&container->cpr.blocker);
196     vmstate_unregister(NULL, &vfio_container_vmstate, container);
197     migration_remove_notifier(&container->cpr.transfer_notifier);
198 }
199 
200 /*
201  * In old QEMU, VFIO_DMA_UNMAP_FLAG_VADDR may fail on some mapping after
202  * succeeding for others, so the latter have lost their vaddr.  Call this
203  * to restore vaddr for a section with a giommu.
204  *
205  * The giommu already exists.  Find it and replay it, which calls
206  * vfio_legacy_cpr_dma_map further down the stack.
207  */
208 void vfio_cpr_giommu_remap(VFIOContainerBase *bcontainer,
209                            MemoryRegionSection *section)
210 {
211     VFIOGuestIOMMU *giommu = NULL;
212     hwaddr as_offset = section->offset_within_address_space;
213     hwaddr iommu_offset = as_offset - section->offset_within_region;
214 
215     QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) {
216         if (giommu->iommu_mr == IOMMU_MEMORY_REGION(section->mr) &&
217             giommu->iommu_offset == iommu_offset) {
218             break;
219         }
220     }
221     g_assert(giommu);
222     memory_region_iommu_replay(giommu->iommu_mr, &giommu->n);
223 }
224 
225 /*
226  * In old QEMU, VFIO_DMA_UNMAP_FLAG_VADDR may fail on some mapping after
227  * succeeding for others, so the latter have lost their vaddr.  Call this
228  * to restore vaddr for a section with a RamDiscardManager.
229  *
230  * The ram discard listener already exists.  Call its populate function
231  * directly, which calls vfio_legacy_cpr_dma_map.
232  */
233 bool vfio_cpr_ram_discard_register_listener(VFIOContainerBase *bcontainer,
234                                             MemoryRegionSection *section)
235 {
236     VFIORamDiscardListener *vrdl =
237         vfio_find_ram_discard_listener(bcontainer, section);
238 
239     g_assert(vrdl);
240     return vrdl->listener.notify_populate(&vrdl->listener, section) == 0;
241 }
242 
243 int vfio_cpr_group_get_device_fd(int d, const char *name)
244 {
245     const int id = 0;
246     int fd = cpr_find_fd(name, id);
247 
248     if (fd < 0) {
249         fd = ioctl(d, VFIO_GROUP_GET_DEVICE_FD, name);
250         if (fd >= 0) {
251             cpr_save_fd(name, id, fd);
252         }
253     }
254     return fd;
255 }
256 
257 static bool same_device(int fd1, int fd2)
258 {
259     struct stat st1, st2;
260 
261     return !fstat(fd1, &st1) && !fstat(fd2, &st2) && st1.st_dev == st2.st_dev;
262 }
263 
264 bool vfio_cpr_container_match(VFIOContainer *container, VFIOGroup *group,
265                               int fd)
266 {
267     if (container->fd == fd) {
268         return true;
269     }
270     if (!same_device(container->fd, fd)) {
271         return false;
272     }
273     /*
274      * Same device, different fd.  This occurs when the container fd is
275      * cpr_save'd multiple times, once for each groupid, so SCM_RIGHTS
276      * produces duplicates.  De-dup it.
277      */
278     cpr_delete_fd("vfio_container_for_group", group->groupid);
279     close(fd);
280     cpr_save_fd("vfio_container_for_group", group->groupid, container->fd);
281     return true;
282 }
283