xref: /openbmc/qemu/hw/vfio/cpr-legacy.c (revision 565d591f719d05763544a5d929de3a40c903b3ea)
1 /*
2  * Copyright (c) 2021-2025 Oracle and/or its affiliates.
3  *
4  * SPDX-License-Identifier: GPL-2.0-or-later
5  */
6 
7 #include <sys/ioctl.h>
8 #include <linux/vfio.h>
9 #include "qemu/osdep.h"
10 #include "hw/vfio/vfio-container.h"
11 #include "hw/vfio/vfio-device.h"
12 #include "hw/vfio/vfio-listener.h"
13 #include "migration/blocker.h"
14 #include "migration/cpr.h"
15 #include "migration/migration.h"
16 #include "migration/vmstate.h"
17 #include "qapi/error.h"
18 #include "qemu/error-report.h"
19 
20 static bool vfio_dma_unmap_vaddr_all(VFIOContainer *container, Error **errp)
21 {
22     struct vfio_iommu_type1_dma_unmap unmap = {
23         .argsz = sizeof(unmap),
24         .flags = VFIO_DMA_UNMAP_FLAG_VADDR | VFIO_DMA_UNMAP_FLAG_ALL,
25         .iova = 0,
26         .size = 0,
27     };
28     if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
29         error_setg_errno(errp, errno, "vfio_dma_unmap_vaddr_all");
30         return false;
31     }
32     container->cpr.vaddr_unmapped = true;
33     return true;
34 }
35 
36 /*
37  * Set the new @vaddr for any mappings registered during cpr load.
38  * The incoming state is cleared thereafter.
39  */
40 static int vfio_legacy_cpr_dma_map(const VFIOContainerBase *bcontainer,
41                                    hwaddr iova, ram_addr_t size, void *vaddr,
42                                    bool readonly, MemoryRegion *mr)
43 {
44     const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
45                                                   bcontainer);
46     struct vfio_iommu_type1_dma_map map = {
47         .argsz = sizeof(map),
48         .flags = VFIO_DMA_MAP_FLAG_VADDR,
49         .vaddr = (__u64)(uintptr_t)vaddr,
50         .iova = iova,
51         .size = size,
52     };
53 
54     g_assert(cpr_is_incoming());
55 
56     if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map)) {
57         return -errno;
58     }
59 
60     return 0;
61 }
62 
63 static void vfio_region_remap(MemoryListener *listener,
64                               MemoryRegionSection *section)
65 {
66     VFIOContainer *container = container_of(listener, VFIOContainer,
67                                             cpr.remap_listener);
68     vfio_container_region_add(&container->bcontainer, section, true);
69 }
70 
71 static bool vfio_cpr_supported(VFIOContainer *container, Error **errp)
72 {
73     if (!ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UPDATE_VADDR)) {
74         error_setg(errp, "VFIO container does not support VFIO_UPDATE_VADDR");
75         return false;
76 
77     } else if (!ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UNMAP_ALL)) {
78         error_setg(errp, "VFIO container does not support VFIO_UNMAP_ALL");
79         return false;
80 
81     } else {
82         return true;
83     }
84 }
85 
86 static int vfio_container_pre_save(void *opaque)
87 {
88     VFIOContainer *container = opaque;
89     Error *local_err = NULL;
90 
91     if (!vfio_dma_unmap_vaddr_all(container, &local_err)) {
92         error_report_err(local_err);
93         return -1;
94     }
95     return 0;
96 }
97 
98 static int vfio_container_post_load(void *opaque, int version_id)
99 {
100     VFIOContainer *container = opaque;
101     VFIOContainerBase *bcontainer = &container->bcontainer;
102     VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
103     dma_map_fn saved_dma_map = vioc->dma_map;
104     Error *local_err = NULL;
105 
106     /* During incoming CPR, divert calls to dma_map. */
107     vioc->dma_map = vfio_legacy_cpr_dma_map;
108 
109     if (!vfio_listener_register(bcontainer, &local_err)) {
110         error_report_err(local_err);
111         return -1;
112     }
113 
114     /* Restore original dma_map function */
115     vioc->dma_map = saved_dma_map;
116 
117     return 0;
118 }
119 
120 static const VMStateDescription vfio_container_vmstate = {
121     .name = "vfio-container",
122     .version_id = 0,
123     .minimum_version_id = 0,
124     .priority = MIG_PRI_LOW,  /* Must happen after devices and groups */
125     .pre_save = vfio_container_pre_save,
126     .post_load = vfio_container_post_load,
127     .needed = cpr_incoming_needed,
128     .fields = (VMStateField[]) {
129         VMSTATE_END_OF_LIST()
130     }
131 };
132 
133 static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier,
134                                   MigrationEvent *e, Error **errp)
135 {
136     VFIOContainer *container =
137         container_of(notifier, VFIOContainer, cpr.transfer_notifier);
138     VFIOContainerBase *bcontainer = &container->bcontainer;
139 
140     if (e->type != MIG_EVENT_PRECOPY_FAILED) {
141         return 0;
142     }
143 
144     if (container->cpr.vaddr_unmapped) {
145         /*
146          * Force a call to vfio_region_remap for each mapped section by
147          * temporarily registering a listener, and temporarily diverting
148          * dma_map to vfio_legacy_cpr_dma_map.  The latter restores vaddr.
149          */
150 
151         VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
152         dma_map_fn saved_dma_map = vioc->dma_map;
153         vioc->dma_map = vfio_legacy_cpr_dma_map;
154 
155         container->cpr.remap_listener = (MemoryListener) {
156             .name = "vfio cpr recover",
157             .region_add = vfio_region_remap
158         };
159         memory_listener_register(&container->cpr.remap_listener,
160                                  bcontainer->space->as);
161         memory_listener_unregister(&container->cpr.remap_listener);
162         container->cpr.vaddr_unmapped = false;
163         vioc->dma_map = saved_dma_map;
164     }
165     return 0;
166 }
167 
168 bool vfio_legacy_cpr_register_container(VFIOContainer *container, Error **errp)
169 {
170     VFIOContainerBase *bcontainer = &container->bcontainer;
171     Error **cpr_blocker = &container->cpr.blocker;
172 
173     migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier,
174                                 vfio_cpr_reboot_notifier,
175                                 MIG_MODE_CPR_REBOOT);
176 
177     if (!vfio_cpr_supported(container, cpr_blocker)) {
178         return migrate_add_blocker_modes(cpr_blocker, errp,
179                                          MIG_MODE_CPR_TRANSFER, -1) == 0;
180     }
181 
182     vfio_cpr_add_kvm_notifier();
183 
184     vmstate_register(NULL, -1, &vfio_container_vmstate, container);
185 
186     migration_add_notifier_mode(&container->cpr.transfer_notifier,
187                                 vfio_cpr_fail_notifier,
188                                 MIG_MODE_CPR_TRANSFER);
189     return true;
190 }
191 
192 void vfio_legacy_cpr_unregister_container(VFIOContainer *container)
193 {
194     VFIOContainerBase *bcontainer = &container->bcontainer;
195 
196     migration_remove_notifier(&bcontainer->cpr_reboot_notifier);
197     migrate_del_blocker(&container->cpr.blocker);
198     vmstate_unregister(NULL, &vfio_container_vmstate, container);
199     migration_remove_notifier(&container->cpr.transfer_notifier);
200 }
201 
202 /*
203  * In old QEMU, VFIO_DMA_UNMAP_FLAG_VADDR may fail on some mapping after
204  * succeeding for others, so the latter have lost their vaddr.  Call this
205  * to restore vaddr for a section with a giommu.
206  *
207  * The giommu already exists.  Find it and replay it, which calls
208  * vfio_legacy_cpr_dma_map further down the stack.
209  */
210 void vfio_cpr_giommu_remap(VFIOContainerBase *bcontainer,
211                            MemoryRegionSection *section)
212 {
213     VFIOGuestIOMMU *giommu = NULL;
214     hwaddr as_offset = section->offset_within_address_space;
215     hwaddr iommu_offset = as_offset - section->offset_within_region;
216 
217     QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) {
218         if (giommu->iommu_mr == IOMMU_MEMORY_REGION(section->mr) &&
219             giommu->iommu_offset == iommu_offset) {
220             break;
221         }
222     }
223     g_assert(giommu);
224     memory_region_iommu_replay(giommu->iommu_mr, &giommu->n);
225 }
226 
227 /*
228  * In old QEMU, VFIO_DMA_UNMAP_FLAG_VADDR may fail on some mapping after
229  * succeeding for others, so the latter have lost their vaddr.  Call this
230  * to restore vaddr for a section with a RamDiscardManager.
231  *
232  * The ram discard listener already exists.  Call its populate function
233  * directly, which calls vfio_legacy_cpr_dma_map.
234  */
235 bool vfio_cpr_ram_discard_register_listener(VFIOContainerBase *bcontainer,
236                                             MemoryRegionSection *section)
237 {
238     VFIORamDiscardListener *vrdl =
239         vfio_find_ram_discard_listener(bcontainer, section);
240 
241     g_assert(vrdl);
242     return vrdl->listener.notify_populate(&vrdl->listener, section) == 0;
243 }
244 
245 int vfio_cpr_group_get_device_fd(int d, const char *name)
246 {
247     const int id = 0;
248     int fd = cpr_find_fd(name, id);
249 
250     if (fd < 0) {
251         fd = ioctl(d, VFIO_GROUP_GET_DEVICE_FD, name);
252         if (fd >= 0) {
253             cpr_save_fd(name, id, fd);
254         }
255     }
256     return fd;
257 }
258 
259 static bool same_device(int fd1, int fd2)
260 {
261     struct stat st1, st2;
262 
263     return !fstat(fd1, &st1) && !fstat(fd2, &st2) && st1.st_dev == st2.st_dev;
264 }
265 
266 bool vfio_cpr_container_match(VFIOContainer *container, VFIOGroup *group,
267                               int fd)
268 {
269     if (container->fd == fd) {
270         return true;
271     }
272     if (!same_device(container->fd, fd)) {
273         return false;
274     }
275     /*
276      * Same device, different fd.  This occurs when the container fd is
277      * cpr_save'd multiple times, once for each groupid, so SCM_RIGHTS
278      * produces duplicates.  De-dup it.
279      */
280     cpr_delete_fd("vfio_container_for_group", group->groupid);
281     close(fd);
282     cpr_save_fd("vfio_container_for_group", group->groupid, container->fd);
283     return true;
284 }
285