xref: /openbmc/qemu/hw/vfio/migration-multifd.c (revision 6380b0a02fbdac253b8a98b300398319ab655237)
1 /*
2  * Multifd VFIO migration
3  *
4  * Copyright (C) 2024,2025 Oracle and/or its affiliates.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  *
9  * SPDX-License-Identifier: GPL-2.0-or-later
10  */
11 
12 #include "qemu/osdep.h"
13 #include "hw/vfio/vfio-device.h"
14 #include "migration/misc.h"
15 #include "qapi/error.h"
16 #include "qemu/bswap.h"
17 #include "qemu/error-report.h"
18 #include "qemu/lockable.h"
19 #include "qemu/main-loop.h"
20 #include "qemu/thread.h"
21 #include "io/channel-buffer.h"
22 #include "migration/qemu-file.h"
23 #include "migration-multifd.h"
24 #include "vfio-migration-internal.h"
25 #include "trace.h"
26 #include "vfio-helpers.h"
27 
28 #define VFIO_DEVICE_STATE_CONFIG_STATE (1)
29 
30 #define VFIO_DEVICE_STATE_PACKET_VER_CURRENT (0)
31 
32 typedef struct VFIODeviceStatePacket {
33     uint32_t version;
34     uint32_t idx;
35     uint32_t flags;
36     uint8_t data[0];
37 } QEMU_PACKED VFIODeviceStatePacket;
38 
39 bool vfio_load_config_after_iter(VFIODevice *vbasedev)
40 {
41     if (vbasedev->migration_load_config_after_iter == ON_OFF_AUTO_ON) {
42         return true;
43     } else if (vbasedev->migration_load_config_after_iter == ON_OFF_AUTO_OFF) {
44         return false;
45     }
46 
47     assert(vbasedev->migration_load_config_after_iter == ON_OFF_AUTO_AUTO);
48     return vfio_arch_wants_loading_config_after_iter();
49 }
50 
51 /* type safety */
52 typedef struct VFIOStateBuffers {
53     GArray *array;
54 } VFIOStateBuffers;
55 
56 typedef struct VFIOStateBuffer {
57     bool is_present;
58     char *data;
59     size_t len;
60 } VFIOStateBuffer;
61 
62 typedef struct VFIOMultifd {
63     bool load_bufs_thread_running;
64     bool load_bufs_thread_want_exit;
65 
66     bool load_bufs_iter_done;
67     QemuCond load_bufs_iter_done_cond;
68 
69     VFIOStateBuffers load_bufs;
70     QemuCond load_bufs_buffer_ready_cond;
71     QemuCond load_bufs_thread_finished_cond;
72     QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */
73     uint32_t load_buf_idx;
74     uint32_t load_buf_idx_last;
75 } VFIOMultifd;
76 
77 static void vfio_state_buffer_clear(gpointer data)
78 {
79     VFIOStateBuffer *lb = data;
80 
81     if (!lb->is_present) {
82         return;
83     }
84 
85     g_clear_pointer(&lb->data, g_free);
86     lb->is_present = false;
87 }
88 
89 static void vfio_state_buffers_init(VFIOStateBuffers *bufs)
90 {
91     bufs->array = g_array_new(FALSE, TRUE, sizeof(VFIOStateBuffer));
92     g_array_set_clear_func(bufs->array, vfio_state_buffer_clear);
93 }
94 
95 static void vfio_state_buffers_destroy(VFIOStateBuffers *bufs)
96 {
97     g_clear_pointer(&bufs->array, g_array_unref);
98 }
99 
100 static void vfio_state_buffers_assert_init(VFIOStateBuffers *bufs)
101 {
102     assert(bufs->array);
103 }
104 
105 static unsigned int vfio_state_buffers_size_get(VFIOStateBuffers *bufs)
106 {
107     return bufs->array->len;
108 }
109 
110 static void vfio_state_buffers_size_set(VFIOStateBuffers *bufs,
111                                         unsigned int size)
112 {
113     g_array_set_size(bufs->array, size);
114 }
115 
116 static VFIOStateBuffer *vfio_state_buffers_at(VFIOStateBuffers *bufs,
117                                               unsigned int idx)
118 {
119     return &g_array_index(bufs->array, VFIOStateBuffer, idx);
120 }
121 
122 /* called with load_bufs_mutex locked */
123 static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev,
124                                           VFIODeviceStatePacket *packet,
125                                           size_t packet_total_size,
126                                           Error **errp)
127 {
128     VFIOMigration *migration = vbasedev->migration;
129     VFIOMultifd *multifd = migration->multifd;
130     VFIOStateBuffer *lb;
131 
132     vfio_state_buffers_assert_init(&multifd->load_bufs);
133     if (packet->idx >= vfio_state_buffers_size_get(&multifd->load_bufs)) {
134         vfio_state_buffers_size_set(&multifd->load_bufs, packet->idx + 1);
135     }
136 
137     lb = vfio_state_buffers_at(&multifd->load_bufs, packet->idx);
138     if (lb->is_present) {
139         error_setg(errp, "%s: state buffer %" PRIu32 " already filled",
140                    vbasedev->name, packet->idx);
141         return false;
142     }
143 
144     assert(packet->idx >= multifd->load_buf_idx);
145 
146     lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet));
147     lb->len = packet_total_size - sizeof(*packet);
148     lb->is_present = true;
149 
150     return true;
151 }
152 
153 bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size,
154                                     Error **errp)
155 {
156     VFIODevice *vbasedev = opaque;
157     VFIOMigration *migration = vbasedev->migration;
158     VFIOMultifd *multifd = migration->multifd;
159     VFIODeviceStatePacket *packet = (VFIODeviceStatePacket *)data;
160 
161     if (!vfio_multifd_transfer_enabled(vbasedev)) {
162         error_setg(errp,
163                    "%s: got device state packet but not doing multifd transfer",
164                    vbasedev->name);
165         return false;
166     }
167 
168     assert(multifd);
169 
170     if (data_size < sizeof(*packet)) {
171         error_setg(errp, "%s: packet too short at %zu (min is %zu)",
172                    vbasedev->name, data_size, sizeof(*packet));
173         return false;
174     }
175 
176     packet->version = be32_to_cpu(packet->version);
177     if (packet->version != VFIO_DEVICE_STATE_PACKET_VER_CURRENT) {
178         error_setg(errp, "%s: packet has unknown version %" PRIu32,
179                    vbasedev->name, packet->version);
180         return false;
181     }
182 
183     packet->idx = be32_to_cpu(packet->idx);
184     packet->flags = be32_to_cpu(packet->flags);
185 
186     if (packet->idx == UINT32_MAX) {
187         error_setg(errp, "%s: packet index is invalid", vbasedev->name);
188         return false;
189     }
190 
191     trace_vfio_load_state_device_buffer_incoming(vbasedev->name, packet->idx);
192 
193     /*
194      * Holding BQL here would violate the lock order and can cause
195      * a deadlock once we attempt to lock load_bufs_mutex below.
196      */
197     assert(!bql_locked());
198 
199     WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) {
200         /* config state packet should be the last one in the stream */
201         if (packet->flags & VFIO_DEVICE_STATE_CONFIG_STATE) {
202             multifd->load_buf_idx_last = packet->idx;
203         }
204 
205         if (!vfio_load_state_buffer_insert(vbasedev, packet, data_size,
206                                            errp)) {
207             return false;
208         }
209 
210         qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond);
211     }
212 
213     return true;
214 }
215 
216 static bool vfio_load_bufs_thread_load_config(VFIODevice *vbasedev,
217                                               Error **errp)
218 {
219     VFIOMigration *migration = vbasedev->migration;
220     VFIOMultifd *multifd = migration->multifd;
221     VFIOStateBuffer *lb;
222     g_autoptr(QIOChannelBuffer) bioc = NULL;
223     g_autoptr(QEMUFile) f_out = NULL, f_in = NULL;
224     uint64_t mig_header;
225     int ret;
226 
227     assert(multifd->load_buf_idx == multifd->load_buf_idx_last);
228     lb = vfio_state_buffers_at(&multifd->load_bufs, multifd->load_buf_idx);
229     assert(lb->is_present);
230 
231     bioc = qio_channel_buffer_new(lb->len);
232     qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-load");
233 
234     f_out = qemu_file_new_output(QIO_CHANNEL(bioc));
235     qemu_put_buffer(f_out, (uint8_t *)lb->data, lb->len);
236 
237     ret = qemu_fflush(f_out);
238     if (ret) {
239         error_setg(errp, "%s: load config state flush failed: %d",
240                    vbasedev->name, ret);
241         return false;
242     }
243 
244     qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
245     f_in = qemu_file_new_input(QIO_CHANNEL(bioc));
246 
247     mig_header = qemu_get_be64(f_in);
248     if (mig_header != VFIO_MIG_FLAG_DEV_CONFIG_STATE) {
249         error_setg(errp, "%s: expected FLAG_DEV_CONFIG_STATE but got %" PRIx64,
250                    vbasedev->name, mig_header);
251         return false;
252     }
253 
254     bql_lock();
255     ret = vfio_load_device_config_state(f_in, vbasedev);
256     bql_unlock();
257 
258     if (ret < 0) {
259         error_setg(errp, "%s: vfio_load_device_config_state() failed: %d",
260                    vbasedev->name, ret);
261         return false;
262     }
263 
264     return true;
265 }
266 
267 static VFIOStateBuffer *vfio_load_state_buffer_get(VFIOMultifd *multifd)
268 {
269     VFIOStateBuffer *lb;
270     unsigned int bufs_len;
271 
272     bufs_len = vfio_state_buffers_size_get(&multifd->load_bufs);
273     if (multifd->load_buf_idx >= bufs_len) {
274         assert(multifd->load_buf_idx == bufs_len);
275         return NULL;
276     }
277 
278     lb = vfio_state_buffers_at(&multifd->load_bufs,
279                                multifd->load_buf_idx);
280     if (!lb->is_present) {
281         return NULL;
282     }
283 
284     return lb;
285 }
286 
287 static bool vfio_load_state_buffer_write(VFIODevice *vbasedev,
288                                          VFIOStateBuffer *lb,
289                                          Error **errp)
290 {
291     VFIOMigration *migration = vbasedev->migration;
292     VFIOMultifd *multifd = migration->multifd;
293     g_autofree char *buf = NULL;
294     char *buf_cur;
295     size_t buf_len;
296 
297     if (!lb->len) {
298         return true;
299     }
300 
301     trace_vfio_load_state_device_buffer_load_start(vbasedev->name,
302                                                    multifd->load_buf_idx);
303 
304     /* lb might become re-allocated when we drop the lock */
305     buf = g_steal_pointer(&lb->data);
306     buf_cur = buf;
307     buf_len = lb->len;
308     while (buf_len > 0) {
309         ssize_t wr_ret;
310         int errno_save;
311 
312         /*
313          * Loading data to the device takes a while,
314          * drop the lock during this process.
315          */
316         qemu_mutex_unlock(&multifd->load_bufs_mutex);
317         wr_ret = write(migration->data_fd, buf_cur, buf_len);
318         errno_save = errno;
319         qemu_mutex_lock(&multifd->load_bufs_mutex);
320 
321         if (wr_ret < 0) {
322             error_setg(errp,
323                        "%s: writing state buffer %" PRIu32 " failed: %d",
324                        vbasedev->name, multifd->load_buf_idx, errno_save);
325             return false;
326         }
327 
328         assert(wr_ret <= buf_len);
329         buf_len -= wr_ret;
330         buf_cur += wr_ret;
331     }
332 
333     trace_vfio_load_state_device_buffer_load_end(vbasedev->name,
334                                                  multifd->load_buf_idx);
335 
336     return true;
337 }
338 
339 static bool vfio_load_bufs_thread_want_exit(VFIOMultifd *multifd,
340                                             bool *should_quit)
341 {
342     return multifd->load_bufs_thread_want_exit || qatomic_read(should_quit);
343 }
344 
345 /*
346  * This thread is spawned by vfio_multifd_switchover_start() which gets
347  * called upon encountering the switchover point marker in main migration
348  * stream.
349  *
350  * It exits after either:
351  * * completing loading the remaining device state and device config, OR:
352  * * encountering some error while doing the above, OR:
353  * * being forcefully aborted by the migration core by it setting should_quit
354  *   or by vfio_load_cleanup_load_bufs_thread() setting
355  *   multifd->load_bufs_thread_want_exit.
356  */
357 static bool vfio_load_bufs_thread(void *opaque, bool *should_quit, Error **errp)
358 {
359     VFIODevice *vbasedev = opaque;
360     VFIOMigration *migration = vbasedev->migration;
361     VFIOMultifd *multifd = migration->multifd;
362     bool ret = false;
363 
364     trace_vfio_load_bufs_thread_start(vbasedev->name);
365 
366     assert(multifd);
367     QEMU_LOCK_GUARD(&multifd->load_bufs_mutex);
368 
369     assert(multifd->load_bufs_thread_running);
370 
371     while (true) {
372         VFIOStateBuffer *lb;
373 
374         /*
375          * Always check cancellation first after the buffer_ready wait below in
376          * case that cond was signalled by vfio_load_cleanup_load_bufs_thread().
377          */
378         if (vfio_load_bufs_thread_want_exit(multifd, should_quit)) {
379             error_setg(errp, "operation cancelled");
380             goto thread_exit;
381         }
382 
383         assert(multifd->load_buf_idx <= multifd->load_buf_idx_last);
384 
385         lb = vfio_load_state_buffer_get(multifd);
386         if (!lb) {
387             trace_vfio_load_state_device_buffer_starved(vbasedev->name,
388                                                         multifd->load_buf_idx);
389             qemu_cond_wait(&multifd->load_bufs_buffer_ready_cond,
390                            &multifd->load_bufs_mutex);
391             continue;
392         }
393 
394         if (multifd->load_buf_idx == multifd->load_buf_idx_last) {
395             break;
396         }
397 
398         if (multifd->load_buf_idx == 0) {
399             trace_vfio_load_state_device_buffer_start(vbasedev->name);
400         }
401 
402         if (!vfio_load_state_buffer_write(vbasedev, lb, errp)) {
403             goto thread_exit;
404         }
405 
406         if (multifd->load_buf_idx == multifd->load_buf_idx_last - 1) {
407             trace_vfio_load_state_device_buffer_end(vbasedev->name);
408         }
409 
410         multifd->load_buf_idx++;
411     }
412 
413     if (vfio_load_config_after_iter(vbasedev)) {
414         while (!multifd->load_bufs_iter_done) {
415             qemu_cond_wait(&multifd->load_bufs_iter_done_cond,
416                            &multifd->load_bufs_mutex);
417 
418             /*
419              * Need to re-check cancellation immediately after wait in case
420              * cond was signalled by vfio_load_cleanup_load_bufs_thread().
421              */
422             if (vfio_load_bufs_thread_want_exit(multifd, should_quit)) {
423                 error_setg(errp, "operation cancelled");
424                 goto thread_exit;
425             }
426         }
427     }
428 
429     if (!vfio_load_bufs_thread_load_config(vbasedev, errp)) {
430         goto thread_exit;
431     }
432 
433     ret = true;
434 
435 thread_exit:
436     /*
437      * Notify possibly waiting vfio_load_cleanup_load_bufs_thread() that
438      * this thread is exiting.
439      */
440     multifd->load_bufs_thread_running = false;
441     qemu_cond_signal(&multifd->load_bufs_thread_finished_cond);
442 
443     trace_vfio_load_bufs_thread_end(vbasedev->name);
444 
445     return ret;
446 }
447 
448 int vfio_load_state_config_load_ready(VFIODevice *vbasedev)
449 {
450     VFIOMigration *migration = vbasedev->migration;
451     VFIOMultifd *multifd = migration->multifd;
452     int ret = 0;
453 
454     if (!vfio_multifd_transfer_enabled(vbasedev)) {
455         error_report("%s: got DEV_CONFIG_LOAD_READY outside multifd transfer",
456                      vbasedev->name);
457         return -EINVAL;
458     }
459 
460     if (!vfio_load_config_after_iter(vbasedev)) {
461         error_report("%s: got DEV_CONFIG_LOAD_READY but was disabled",
462                      vbasedev->name);
463         return -EINVAL;
464     }
465 
466     assert(multifd);
467 
468     /* The lock order is load_bufs_mutex -> BQL so unlock BQL here first */
469     bql_unlock();
470     WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) {
471         if (multifd->load_bufs_iter_done) {
472             /* Can't print error here as we're outside BQL */
473             ret = -EINVAL;
474             break;
475         }
476 
477         multifd->load_bufs_iter_done = true;
478         qemu_cond_signal(&multifd->load_bufs_iter_done_cond);
479     }
480     bql_lock();
481 
482     if (ret) {
483         error_report("%s: duplicate DEV_CONFIG_LOAD_READY",
484                      vbasedev->name);
485     }
486 
487     return ret;
488 }
489 
490 static VFIOMultifd *vfio_multifd_new(void)
491 {
492     VFIOMultifd *multifd = g_new(VFIOMultifd, 1);
493 
494     vfio_state_buffers_init(&multifd->load_bufs);
495 
496     qemu_mutex_init(&multifd->load_bufs_mutex);
497 
498     multifd->load_buf_idx = 0;
499     multifd->load_buf_idx_last = UINT32_MAX;
500     qemu_cond_init(&multifd->load_bufs_buffer_ready_cond);
501 
502     multifd->load_bufs_iter_done = false;
503     qemu_cond_init(&multifd->load_bufs_iter_done_cond);
504 
505     multifd->load_bufs_thread_running = false;
506     multifd->load_bufs_thread_want_exit = false;
507     qemu_cond_init(&multifd->load_bufs_thread_finished_cond);
508 
509     return multifd;
510 }
511 
512 /*
513  * Terminates vfio_load_bufs_thread by setting
514  * multifd->load_bufs_thread_want_exit and signalling all the conditions
515  * the thread could be blocked on.
516  *
517  * Waits for the thread to signal that it had finished.
518  */
519 static void vfio_load_cleanup_load_bufs_thread(VFIOMultifd *multifd)
520 {
521     /* The lock order is load_bufs_mutex -> BQL so unlock BQL here first */
522     bql_unlock();
523     WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) {
524         while (multifd->load_bufs_thread_running) {
525             multifd->load_bufs_thread_want_exit = true;
526 
527             qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond);
528             qemu_cond_signal(&multifd->load_bufs_iter_done_cond);
529             qemu_cond_wait(&multifd->load_bufs_thread_finished_cond,
530                            &multifd->load_bufs_mutex);
531         }
532     }
533     bql_lock();
534 }
535 
536 static void vfio_multifd_free(VFIOMultifd *multifd)
537 {
538     vfio_load_cleanup_load_bufs_thread(multifd);
539 
540     qemu_cond_destroy(&multifd->load_bufs_thread_finished_cond);
541     qemu_cond_destroy(&multifd->load_bufs_iter_done_cond);
542     vfio_state_buffers_destroy(&multifd->load_bufs);
543     qemu_cond_destroy(&multifd->load_bufs_buffer_ready_cond);
544     qemu_mutex_destroy(&multifd->load_bufs_mutex);
545 
546     g_free(multifd);
547 }
548 
549 void vfio_multifd_cleanup(VFIODevice *vbasedev)
550 {
551     VFIOMigration *migration = vbasedev->migration;
552 
553     g_clear_pointer(&migration->multifd, vfio_multifd_free);
554 }
555 
556 bool vfio_multifd_transfer_supported(void)
557 {
558     return multifd_device_state_supported() &&
559         migrate_send_switchover_start();
560 }
561 
562 bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev)
563 {
564     VFIOMigration *migration = vbasedev->migration;
565 
566     return migration->multifd_transfer;
567 }
568 
569 bool vfio_multifd_setup(VFIODevice *vbasedev, bool alloc_multifd, Error **errp)
570 {
571     VFIOMigration *migration = vbasedev->migration;
572 
573     /*
574      * Make a copy of this setting at the start in case it is changed
575      * mid-migration.
576      */
577     if (vbasedev->migration_multifd_transfer == ON_OFF_AUTO_AUTO) {
578         migration->multifd_transfer = vfio_multifd_transfer_supported();
579     } else {
580         migration->multifd_transfer =
581             vbasedev->migration_multifd_transfer == ON_OFF_AUTO_ON;
582     }
583 
584     if (!vfio_multifd_transfer_enabled(vbasedev)) {
585         /* Nothing further to check or do */
586         return true;
587     }
588 
589     if (!vfio_multifd_transfer_supported()) {
590         error_setg(errp,
591                    "%s: Multifd device transfer requested but unsupported in the current config",
592                    vbasedev->name);
593         return false;
594     }
595 
596     if (alloc_multifd) {
597         assert(!migration->multifd);
598         migration->multifd = vfio_multifd_new();
599     }
600 
601     return true;
602 }
603 
604 void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f)
605 {
606     assert(vfio_multifd_transfer_enabled(vbasedev));
607 
608     /*
609      * Emit dummy NOP data on the main migration channel since the actual
610      * device state transfer is done via multifd channels.
611      */
612     qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
613 }
614 
615 static bool
616 vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev,
617                                                char *idstr,
618                                                uint32_t instance_id,
619                                                uint32_t idx,
620                                                Error **errp)
621 {
622     g_autoptr(QIOChannelBuffer) bioc = NULL;
623     g_autoptr(QEMUFile) f = NULL;
624     int ret;
625     g_autofree VFIODeviceStatePacket *packet = NULL;
626     size_t packet_len;
627 
628     bioc = qio_channel_buffer_new(0);
629     qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-save");
630 
631     f = qemu_file_new_output(QIO_CHANNEL(bioc));
632 
633     if (vfio_save_device_config_state(f, vbasedev, errp)) {
634         return false;
635     }
636 
637     ret = qemu_fflush(f);
638     if (ret) {
639         error_setg(errp, "%s: save config state flush failed: %d",
640                    vbasedev->name, ret);
641         return false;
642     }
643 
644     packet_len = sizeof(*packet) + bioc->usage;
645     packet = g_malloc0(packet_len);
646     packet->version = cpu_to_be32(VFIO_DEVICE_STATE_PACKET_VER_CURRENT);
647     packet->idx = cpu_to_be32(idx);
648     packet->flags = cpu_to_be32(VFIO_DEVICE_STATE_CONFIG_STATE);
649     memcpy(&packet->data, bioc->data, bioc->usage);
650 
651     if (!multifd_queue_device_state(idstr, instance_id,
652                                     (char *)packet, packet_len)) {
653         error_setg(errp, "%s: multifd config data queuing failed",
654                    vbasedev->name);
655         return false;
656     }
657 
658     vfio_migration_add_bytes_transferred(packet_len);
659 
660     return true;
661 }
662 
663 /*
664  * This thread is spawned by the migration core directly via
665  * .save_complete_precopy_thread SaveVMHandler.
666  *
667  * It exits after either:
668  * * completing saving the remaining device state and device config, OR:
669  * * encountering some error while doing the above, OR:
670  * * being forcefully aborted by the migration core by
671  *   multifd_device_state_save_thread_should_exit() returning true.
672  */
673 bool
674 vfio_multifd_save_complete_precopy_thread(SaveCompletePrecopyThreadData *d,
675                                           Error **errp)
676 {
677     VFIODevice *vbasedev = d->handler_opaque;
678     VFIOMigration *migration = vbasedev->migration;
679     bool ret = false;
680     g_autofree VFIODeviceStatePacket *packet = NULL;
681     uint32_t idx;
682 
683     if (!vfio_multifd_transfer_enabled(vbasedev)) {
684         /* Nothing to do, vfio_save_complete_precopy() does the transfer. */
685         return true;
686     }
687 
688     trace_vfio_save_complete_precopy_thread_start(vbasedev->name,
689                                                   d->idstr, d->instance_id);
690 
691     /* We reach here with device state STOP or STOP_COPY only */
692     if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
693                                  VFIO_DEVICE_STATE_STOP, errp)) {
694         goto thread_exit;
695     }
696 
697     packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size);
698     packet->version = cpu_to_be32(VFIO_DEVICE_STATE_PACKET_VER_CURRENT);
699 
700     for (idx = 0; ; idx++) {
701         ssize_t data_size;
702         size_t packet_size;
703 
704         if (multifd_device_state_save_thread_should_exit()) {
705             error_setg(errp, "operation cancelled");
706             goto thread_exit;
707         }
708 
709         data_size = read(migration->data_fd, &packet->data,
710                          migration->data_buffer_size);
711         if (data_size < 0) {
712             error_setg(errp, "%s: reading state buffer %" PRIu32 " failed: %d",
713                        vbasedev->name, idx, errno);
714             goto thread_exit;
715         } else if (data_size == 0) {
716             break;
717         }
718 
719         packet->idx = cpu_to_be32(idx);
720         packet_size = sizeof(*packet) + data_size;
721 
722         if (!multifd_queue_device_state(d->idstr, d->instance_id,
723                                         (char *)packet, packet_size)) {
724             error_setg(errp, "%s: multifd data queuing failed", vbasedev->name);
725             goto thread_exit;
726         }
727 
728         vfio_migration_add_bytes_transferred(packet_size);
729     }
730 
731     if (!vfio_save_complete_precopy_thread_config_state(vbasedev,
732                                                         d->idstr,
733                                                         d->instance_id,
734                                                         idx, errp)) {
735         goto thread_exit;
736    }
737 
738     ret = true;
739 
740 thread_exit:
741     trace_vfio_save_complete_precopy_thread_end(vbasedev->name, ret);
742 
743     return ret;
744 }
745 
746 int vfio_multifd_switchover_start(VFIODevice *vbasedev)
747 {
748     VFIOMigration *migration = vbasedev->migration;
749     VFIOMultifd *multifd = migration->multifd;
750 
751     assert(multifd);
752 
753     /* The lock order is load_bufs_mutex -> BQL so unlock BQL here first */
754     bql_unlock();
755     WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) {
756         assert(!multifd->load_bufs_thread_running);
757         multifd->load_bufs_thread_running = true;
758     }
759     bql_lock();
760 
761     qemu_loadvm_start_load_thread(vfio_load_bufs_thread, vbasedev);
762 
763     return 0;
764 }
765