xref: /openbmc/qemu/hw/vfio/migration-multifd.c (revision 300dcf58b72fa1635190b19f102231b0775e93cb)
1 /*
2  * Multifd VFIO migration
3  *
4  * Copyright (C) 2024,2025 Oracle and/or its affiliates.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  *
9  * SPDX-License-Identifier: GPL-2.0-or-later
10  */
11 
12 #include "qemu/osdep.h"
13 #include "hw/vfio/vfio-device.h"
14 #include "migration/misc.h"
15 #include "qapi/error.h"
16 #include "qemu/bswap.h"
17 #include "qemu/error-report.h"
18 #include "qemu/lockable.h"
19 #include "qemu/main-loop.h"
20 #include "qemu/thread.h"
21 #include "io/channel-buffer.h"
22 #include "migration/qemu-file.h"
23 #include "migration-multifd.h"
24 #include "vfio-migration-internal.h"
25 #include "trace.h"
26 #include "vfio-helpers.h"
27 
28 #define VFIO_DEVICE_STATE_CONFIG_STATE (1)
29 
30 #define VFIO_DEVICE_STATE_PACKET_VER_CURRENT (0)
31 
32 typedef struct VFIODeviceStatePacket {
33     uint32_t version;
34     uint32_t idx;
35     uint32_t flags;
36     uint8_t data[0];
37 } QEMU_PACKED VFIODeviceStatePacket;
38 
39 bool vfio_load_config_after_iter(VFIODevice *vbasedev)
40 {
41     if (vbasedev->migration_load_config_after_iter == ON_OFF_AUTO_ON) {
42         return true;
43     } else if (vbasedev->migration_load_config_after_iter == ON_OFF_AUTO_OFF) {
44         return false;
45     }
46 
47     assert(vbasedev->migration_load_config_after_iter == ON_OFF_AUTO_AUTO);
48     return vfio_arch_wants_loading_config_after_iter();
49 }
50 
51 /* type safety */
52 typedef struct VFIOStateBuffers {
53     GArray *array;
54 } VFIOStateBuffers;
55 
56 typedef struct VFIOStateBuffer {
57     bool is_present;
58     char *data;
59     size_t len;
60 } VFIOStateBuffer;
61 
62 typedef struct VFIOMultifd {
63     bool load_bufs_thread_running;
64     bool load_bufs_thread_want_exit;
65 
66     bool load_bufs_iter_done;
67     QemuCond load_bufs_iter_done_cond;
68 
69     VFIOStateBuffers load_bufs;
70     QemuCond load_bufs_buffer_ready_cond;
71     QemuCond load_bufs_thread_finished_cond;
72     QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */
73     uint32_t load_buf_idx;
74     uint32_t load_buf_idx_last;
75     size_t load_buf_queued_pending_buffers_size;
76 } VFIOMultifd;
77 
78 static void vfio_state_buffer_clear(gpointer data)
79 {
80     VFIOStateBuffer *lb = data;
81 
82     if (!lb->is_present) {
83         return;
84     }
85 
86     g_clear_pointer(&lb->data, g_free);
87     lb->is_present = false;
88 }
89 
90 static void vfio_state_buffers_init(VFIOStateBuffers *bufs)
91 {
92     bufs->array = g_array_new(FALSE, TRUE, sizeof(VFIOStateBuffer));
93     g_array_set_clear_func(bufs->array, vfio_state_buffer_clear);
94 }
95 
96 static void vfio_state_buffers_destroy(VFIOStateBuffers *bufs)
97 {
98     g_clear_pointer(&bufs->array, g_array_unref);
99 }
100 
101 static void vfio_state_buffers_assert_init(VFIOStateBuffers *bufs)
102 {
103     assert(bufs->array);
104 }
105 
106 static unsigned int vfio_state_buffers_size_get(VFIOStateBuffers *bufs)
107 {
108     return bufs->array->len;
109 }
110 
111 static void vfio_state_buffers_size_set(VFIOStateBuffers *bufs,
112                                         unsigned int size)
113 {
114     g_array_set_size(bufs->array, size);
115 }
116 
117 static VFIOStateBuffer *vfio_state_buffers_at(VFIOStateBuffers *bufs,
118                                               unsigned int idx)
119 {
120     return &g_array_index(bufs->array, VFIOStateBuffer, idx);
121 }
122 
123 /* called with load_bufs_mutex locked */
124 static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev,
125                                           VFIODeviceStatePacket *packet,
126                                           size_t packet_total_size,
127                                           Error **errp)
128 {
129     VFIOMigration *migration = vbasedev->migration;
130     VFIOMultifd *multifd = migration->multifd;
131     VFIOStateBuffer *lb;
132     size_t data_size = packet_total_size - sizeof(*packet);
133 
134     vfio_state_buffers_assert_init(&multifd->load_bufs);
135     if (packet->idx >= vfio_state_buffers_size_get(&multifd->load_bufs)) {
136         vfio_state_buffers_size_set(&multifd->load_bufs, packet->idx + 1);
137     }
138 
139     lb = vfio_state_buffers_at(&multifd->load_bufs, packet->idx);
140     if (lb->is_present) {
141         error_setg(errp, "%s: state buffer %" PRIu32 " already filled",
142                    vbasedev->name, packet->idx);
143         return false;
144     }
145 
146     assert(packet->idx >= multifd->load_buf_idx);
147 
148     multifd->load_buf_queued_pending_buffers_size += data_size;
149     if (multifd->load_buf_queued_pending_buffers_size >
150         vbasedev->migration_max_queued_buffers_size) {
151         error_setg(errp,
152                    "%s: queuing state buffer %" PRIu32
153                    " would exceed the size max of %" PRIu64,
154                    vbasedev->name, packet->idx,
155                    vbasedev->migration_max_queued_buffers_size);
156         return false;
157     }
158 
159     lb->data = g_memdup2(&packet->data, data_size);
160     lb->len = data_size;
161     lb->is_present = true;
162 
163     return true;
164 }
165 
166 bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size,
167                                     Error **errp)
168 {
169     VFIODevice *vbasedev = opaque;
170     VFIOMigration *migration = vbasedev->migration;
171     VFIOMultifd *multifd = migration->multifd;
172     VFIODeviceStatePacket *packet = (VFIODeviceStatePacket *)data;
173 
174     if (!vfio_multifd_transfer_enabled(vbasedev)) {
175         error_setg(errp,
176                    "%s: got device state packet but not doing multifd transfer",
177                    vbasedev->name);
178         return false;
179     }
180 
181     assert(multifd);
182 
183     if (data_size < sizeof(*packet)) {
184         error_setg(errp, "%s: packet too short at %zu (min is %zu)",
185                    vbasedev->name, data_size, sizeof(*packet));
186         return false;
187     }
188 
189     packet->version = be32_to_cpu(packet->version);
190     if (packet->version != VFIO_DEVICE_STATE_PACKET_VER_CURRENT) {
191         error_setg(errp, "%s: packet has unknown version %" PRIu32,
192                    vbasedev->name, packet->version);
193         return false;
194     }
195 
196     packet->idx = be32_to_cpu(packet->idx);
197     packet->flags = be32_to_cpu(packet->flags);
198 
199     if (packet->idx == UINT32_MAX) {
200         error_setg(errp, "%s: packet index is invalid", vbasedev->name);
201         return false;
202     }
203 
204     trace_vfio_load_state_device_buffer_incoming(vbasedev->name, packet->idx);
205 
206     /*
207      * Holding BQL here would violate the lock order and can cause
208      * a deadlock once we attempt to lock load_bufs_mutex below.
209      */
210     assert(!bql_locked());
211 
212     WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) {
213         /* config state packet should be the last one in the stream */
214         if (packet->flags & VFIO_DEVICE_STATE_CONFIG_STATE) {
215             multifd->load_buf_idx_last = packet->idx;
216         }
217 
218         if (!vfio_load_state_buffer_insert(vbasedev, packet, data_size,
219                                            errp)) {
220             return false;
221         }
222 
223         qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond);
224     }
225 
226     return true;
227 }
228 
229 static bool vfio_load_bufs_thread_load_config(VFIODevice *vbasedev,
230                                               Error **errp)
231 {
232     VFIOMigration *migration = vbasedev->migration;
233     VFIOMultifd *multifd = migration->multifd;
234     VFIOStateBuffer *lb;
235     g_autoptr(QIOChannelBuffer) bioc = NULL;
236     g_autoptr(QEMUFile) f_out = NULL, f_in = NULL;
237     uint64_t mig_header;
238     int ret;
239 
240     assert(multifd->load_buf_idx == multifd->load_buf_idx_last);
241     lb = vfio_state_buffers_at(&multifd->load_bufs, multifd->load_buf_idx);
242     assert(lb->is_present);
243 
244     bioc = qio_channel_buffer_new(lb->len);
245     qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-load");
246 
247     f_out = qemu_file_new_output(QIO_CHANNEL(bioc));
248     qemu_put_buffer(f_out, (uint8_t *)lb->data, lb->len);
249 
250     ret = qemu_fflush(f_out);
251     if (ret) {
252         error_setg(errp, "%s: load config state flush failed: %d",
253                    vbasedev->name, ret);
254         return false;
255     }
256 
257     qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
258     f_in = qemu_file_new_input(QIO_CHANNEL(bioc));
259 
260     mig_header = qemu_get_be64(f_in);
261     if (mig_header != VFIO_MIG_FLAG_DEV_CONFIG_STATE) {
262         error_setg(errp, "%s: expected FLAG_DEV_CONFIG_STATE but got %" PRIx64,
263                    vbasedev->name, mig_header);
264         return false;
265     }
266 
267     bql_lock();
268     ret = vfio_load_device_config_state(f_in, vbasedev);
269     bql_unlock();
270 
271     if (ret < 0) {
272         error_setg(errp, "%s: vfio_load_device_config_state() failed: %d",
273                    vbasedev->name, ret);
274         return false;
275     }
276 
277     return true;
278 }
279 
280 static VFIOStateBuffer *vfio_load_state_buffer_get(VFIOMultifd *multifd)
281 {
282     VFIOStateBuffer *lb;
283     unsigned int bufs_len;
284 
285     bufs_len = vfio_state_buffers_size_get(&multifd->load_bufs);
286     if (multifd->load_buf_idx >= bufs_len) {
287         assert(multifd->load_buf_idx == bufs_len);
288         return NULL;
289     }
290 
291     lb = vfio_state_buffers_at(&multifd->load_bufs,
292                                multifd->load_buf_idx);
293     if (!lb->is_present) {
294         return NULL;
295     }
296 
297     return lb;
298 }
299 
300 static bool vfio_load_state_buffer_write(VFIODevice *vbasedev,
301                                          VFIOStateBuffer *lb,
302                                          Error **errp)
303 {
304     VFIOMigration *migration = vbasedev->migration;
305     VFIOMultifd *multifd = migration->multifd;
306     g_autofree char *buf = NULL;
307     char *buf_cur;
308     size_t buf_len;
309 
310     if (!lb->len) {
311         return true;
312     }
313 
314     trace_vfio_load_state_device_buffer_load_start(vbasedev->name,
315                                                    multifd->load_buf_idx);
316 
317     /* lb might become re-allocated when we drop the lock */
318     buf = g_steal_pointer(&lb->data);
319     buf_cur = buf;
320     buf_len = lb->len;
321     while (buf_len > 0) {
322         ssize_t wr_ret;
323         int errno_save;
324 
325         /*
326          * Loading data to the device takes a while,
327          * drop the lock during this process.
328          */
329         qemu_mutex_unlock(&multifd->load_bufs_mutex);
330         wr_ret = write(migration->data_fd, buf_cur, buf_len);
331         errno_save = errno;
332         qemu_mutex_lock(&multifd->load_bufs_mutex);
333 
334         if (wr_ret < 0) {
335             error_setg(errp,
336                        "%s: writing state buffer %" PRIu32 " failed: %d",
337                        vbasedev->name, multifd->load_buf_idx, errno_save);
338             return false;
339         }
340 
341         assert(wr_ret <= buf_len);
342         buf_len -= wr_ret;
343         buf_cur += wr_ret;
344 
345         assert(multifd->load_buf_queued_pending_buffers_size >= wr_ret);
346         multifd->load_buf_queued_pending_buffers_size -= wr_ret;
347     }
348 
349     trace_vfio_load_state_device_buffer_load_end(vbasedev->name,
350                                                  multifd->load_buf_idx);
351 
352     return true;
353 }
354 
355 static bool vfio_load_bufs_thread_want_exit(VFIOMultifd *multifd,
356                                             bool *should_quit)
357 {
358     return multifd->load_bufs_thread_want_exit || qatomic_read(should_quit);
359 }
360 
361 /*
362  * This thread is spawned by vfio_multifd_switchover_start() which gets
363  * called upon encountering the switchover point marker in main migration
364  * stream.
365  *
366  * It exits after either:
367  * * completing loading the remaining device state and device config, OR:
368  * * encountering some error while doing the above, OR:
369  * * being forcefully aborted by the migration core by it setting should_quit
370  *   or by vfio_load_cleanup_load_bufs_thread() setting
371  *   multifd->load_bufs_thread_want_exit.
372  */
373 static bool vfio_load_bufs_thread(void *opaque, bool *should_quit, Error **errp)
374 {
375     VFIODevice *vbasedev = opaque;
376     VFIOMigration *migration = vbasedev->migration;
377     VFIOMultifd *multifd = migration->multifd;
378     bool ret = false;
379 
380     trace_vfio_load_bufs_thread_start(vbasedev->name);
381 
382     assert(multifd);
383     QEMU_LOCK_GUARD(&multifd->load_bufs_mutex);
384 
385     assert(multifd->load_bufs_thread_running);
386 
387     while (true) {
388         VFIOStateBuffer *lb;
389 
390         /*
391          * Always check cancellation first after the buffer_ready wait below in
392          * case that cond was signalled by vfio_load_cleanup_load_bufs_thread().
393          */
394         if (vfio_load_bufs_thread_want_exit(multifd, should_quit)) {
395             error_setg(errp, "operation cancelled");
396             goto thread_exit;
397         }
398 
399         assert(multifd->load_buf_idx <= multifd->load_buf_idx_last);
400 
401         lb = vfio_load_state_buffer_get(multifd);
402         if (!lb) {
403             trace_vfio_load_state_device_buffer_starved(vbasedev->name,
404                                                         multifd->load_buf_idx);
405             qemu_cond_wait(&multifd->load_bufs_buffer_ready_cond,
406                            &multifd->load_bufs_mutex);
407             continue;
408         }
409 
410         if (multifd->load_buf_idx == multifd->load_buf_idx_last) {
411             break;
412         }
413 
414         if (multifd->load_buf_idx == 0) {
415             trace_vfio_load_state_device_buffer_start(vbasedev->name);
416         }
417 
418         if (!vfio_load_state_buffer_write(vbasedev, lb, errp)) {
419             goto thread_exit;
420         }
421 
422         if (multifd->load_buf_idx == multifd->load_buf_idx_last - 1) {
423             trace_vfio_load_state_device_buffer_end(vbasedev->name);
424         }
425 
426         multifd->load_buf_idx++;
427     }
428 
429     if (vfio_load_config_after_iter(vbasedev)) {
430         while (!multifd->load_bufs_iter_done) {
431             qemu_cond_wait(&multifd->load_bufs_iter_done_cond,
432                            &multifd->load_bufs_mutex);
433 
434             /*
435              * Need to re-check cancellation immediately after wait in case
436              * cond was signalled by vfio_load_cleanup_load_bufs_thread().
437              */
438             if (vfio_load_bufs_thread_want_exit(multifd, should_quit)) {
439                 error_setg(errp, "operation cancelled");
440                 goto thread_exit;
441             }
442         }
443     }
444 
445     if (!vfio_load_bufs_thread_load_config(vbasedev, errp)) {
446         goto thread_exit;
447     }
448 
449     ret = true;
450 
451 thread_exit:
452     /*
453      * Notify possibly waiting vfio_load_cleanup_load_bufs_thread() that
454      * this thread is exiting.
455      */
456     multifd->load_bufs_thread_running = false;
457     qemu_cond_signal(&multifd->load_bufs_thread_finished_cond);
458 
459     trace_vfio_load_bufs_thread_end(vbasedev->name);
460 
461     return ret;
462 }
463 
464 int vfio_load_state_config_load_ready(VFIODevice *vbasedev)
465 {
466     VFIOMigration *migration = vbasedev->migration;
467     VFIOMultifd *multifd = migration->multifd;
468     int ret = 0;
469 
470     if (!vfio_multifd_transfer_enabled(vbasedev)) {
471         error_report("%s: got DEV_CONFIG_LOAD_READY outside multifd transfer",
472                      vbasedev->name);
473         return -EINVAL;
474     }
475 
476     if (!vfio_load_config_after_iter(vbasedev)) {
477         error_report("%s: got DEV_CONFIG_LOAD_READY but was disabled",
478                      vbasedev->name);
479         return -EINVAL;
480     }
481 
482     assert(multifd);
483 
484     /* The lock order is load_bufs_mutex -> BQL so unlock BQL here first */
485     bql_unlock();
486     WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) {
487         if (multifd->load_bufs_iter_done) {
488             /* Can't print error here as we're outside BQL */
489             ret = -EINVAL;
490             break;
491         }
492 
493         multifd->load_bufs_iter_done = true;
494         qemu_cond_signal(&multifd->load_bufs_iter_done_cond);
495     }
496     bql_lock();
497 
498     if (ret) {
499         error_report("%s: duplicate DEV_CONFIG_LOAD_READY",
500                      vbasedev->name);
501     }
502 
503     return ret;
504 }
505 
506 static VFIOMultifd *vfio_multifd_new(void)
507 {
508     VFIOMultifd *multifd = g_new(VFIOMultifd, 1);
509 
510     vfio_state_buffers_init(&multifd->load_bufs);
511 
512     qemu_mutex_init(&multifd->load_bufs_mutex);
513 
514     multifd->load_buf_idx = 0;
515     multifd->load_buf_idx_last = UINT32_MAX;
516     multifd->load_buf_queued_pending_buffers_size = 0;
517     qemu_cond_init(&multifd->load_bufs_buffer_ready_cond);
518 
519     multifd->load_bufs_iter_done = false;
520     qemu_cond_init(&multifd->load_bufs_iter_done_cond);
521 
522     multifd->load_bufs_thread_running = false;
523     multifd->load_bufs_thread_want_exit = false;
524     qemu_cond_init(&multifd->load_bufs_thread_finished_cond);
525 
526     return multifd;
527 }
528 
529 /*
530  * Terminates vfio_load_bufs_thread by setting
531  * multifd->load_bufs_thread_want_exit and signalling all the conditions
532  * the thread could be blocked on.
533  *
534  * Waits for the thread to signal that it had finished.
535  */
536 static void vfio_load_cleanup_load_bufs_thread(VFIOMultifd *multifd)
537 {
538     /* The lock order is load_bufs_mutex -> BQL so unlock BQL here first */
539     bql_unlock();
540     WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) {
541         while (multifd->load_bufs_thread_running) {
542             multifd->load_bufs_thread_want_exit = true;
543 
544             qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond);
545             qemu_cond_signal(&multifd->load_bufs_iter_done_cond);
546             qemu_cond_wait(&multifd->load_bufs_thread_finished_cond,
547                            &multifd->load_bufs_mutex);
548         }
549     }
550     bql_lock();
551 }
552 
553 static void vfio_multifd_free(VFIOMultifd *multifd)
554 {
555     vfio_load_cleanup_load_bufs_thread(multifd);
556 
557     qemu_cond_destroy(&multifd->load_bufs_thread_finished_cond);
558     qemu_cond_destroy(&multifd->load_bufs_iter_done_cond);
559     vfio_state_buffers_destroy(&multifd->load_bufs);
560     qemu_cond_destroy(&multifd->load_bufs_buffer_ready_cond);
561     qemu_mutex_destroy(&multifd->load_bufs_mutex);
562 
563     g_free(multifd);
564 }
565 
566 void vfio_multifd_cleanup(VFIODevice *vbasedev)
567 {
568     VFIOMigration *migration = vbasedev->migration;
569 
570     g_clear_pointer(&migration->multifd, vfio_multifd_free);
571 }
572 
573 bool vfio_multifd_transfer_supported(void)
574 {
575     return multifd_device_state_supported() &&
576         migrate_send_switchover_start();
577 }
578 
579 bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev)
580 {
581     VFIOMigration *migration = vbasedev->migration;
582 
583     return migration->multifd_transfer;
584 }
585 
586 bool vfio_multifd_setup(VFIODevice *vbasedev, bool alloc_multifd, Error **errp)
587 {
588     VFIOMigration *migration = vbasedev->migration;
589 
590     /*
591      * Make a copy of this setting at the start in case it is changed
592      * mid-migration.
593      */
594     if (vbasedev->migration_multifd_transfer == ON_OFF_AUTO_AUTO) {
595         migration->multifd_transfer = vfio_multifd_transfer_supported();
596     } else {
597         migration->multifd_transfer =
598             vbasedev->migration_multifd_transfer == ON_OFF_AUTO_ON;
599     }
600 
601     if (!vfio_multifd_transfer_enabled(vbasedev)) {
602         /* Nothing further to check or do */
603         return true;
604     }
605 
606     if (!vfio_multifd_transfer_supported()) {
607         error_setg(errp,
608                    "%s: Multifd device transfer requested but unsupported in the current config",
609                    vbasedev->name);
610         return false;
611     }
612 
613     if (alloc_multifd) {
614         assert(!migration->multifd);
615         migration->multifd = vfio_multifd_new();
616     }
617 
618     return true;
619 }
620 
621 void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f)
622 {
623     assert(vfio_multifd_transfer_enabled(vbasedev));
624 
625     /*
626      * Emit dummy NOP data on the main migration channel since the actual
627      * device state transfer is done via multifd channels.
628      */
629     qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
630 }
631 
632 static bool
633 vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev,
634                                                char *idstr,
635                                                uint32_t instance_id,
636                                                uint32_t idx,
637                                                Error **errp)
638 {
639     g_autoptr(QIOChannelBuffer) bioc = NULL;
640     g_autoptr(QEMUFile) f = NULL;
641     int ret;
642     g_autofree VFIODeviceStatePacket *packet = NULL;
643     size_t packet_len;
644 
645     bioc = qio_channel_buffer_new(0);
646     qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-save");
647 
648     f = qemu_file_new_output(QIO_CHANNEL(bioc));
649 
650     if (vfio_save_device_config_state(f, vbasedev, errp)) {
651         return false;
652     }
653 
654     ret = qemu_fflush(f);
655     if (ret) {
656         error_setg(errp, "%s: save config state flush failed: %d",
657                    vbasedev->name, ret);
658         return false;
659     }
660 
661     packet_len = sizeof(*packet) + bioc->usage;
662     packet = g_malloc0(packet_len);
663     packet->version = cpu_to_be32(VFIO_DEVICE_STATE_PACKET_VER_CURRENT);
664     packet->idx = cpu_to_be32(idx);
665     packet->flags = cpu_to_be32(VFIO_DEVICE_STATE_CONFIG_STATE);
666     memcpy(&packet->data, bioc->data, bioc->usage);
667 
668     if (!multifd_queue_device_state(idstr, instance_id,
669                                     (char *)packet, packet_len)) {
670         error_setg(errp, "%s: multifd config data queuing failed",
671                    vbasedev->name);
672         return false;
673     }
674 
675     vfio_migration_add_bytes_transferred(packet_len);
676 
677     return true;
678 }
679 
680 /*
681  * This thread is spawned by the migration core directly via
682  * .save_complete_precopy_thread SaveVMHandler.
683  *
684  * It exits after either:
685  * * completing saving the remaining device state and device config, OR:
686  * * encountering some error while doing the above, OR:
687  * * being forcefully aborted by the migration core by
688  *   multifd_device_state_save_thread_should_exit() returning true.
689  */
690 bool
691 vfio_multifd_save_complete_precopy_thread(SaveCompletePrecopyThreadData *d,
692                                           Error **errp)
693 {
694     VFIODevice *vbasedev = d->handler_opaque;
695     VFIOMigration *migration = vbasedev->migration;
696     bool ret = false;
697     g_autofree VFIODeviceStatePacket *packet = NULL;
698     uint32_t idx;
699 
700     if (!vfio_multifd_transfer_enabled(vbasedev)) {
701         /* Nothing to do, vfio_save_complete_precopy() does the transfer. */
702         return true;
703     }
704 
705     trace_vfio_save_complete_precopy_thread_start(vbasedev->name,
706                                                   d->idstr, d->instance_id);
707 
708     /* We reach here with device state STOP or STOP_COPY only */
709     if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
710                                  VFIO_DEVICE_STATE_STOP, errp)) {
711         goto thread_exit;
712     }
713 
714     packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size);
715     packet->version = cpu_to_be32(VFIO_DEVICE_STATE_PACKET_VER_CURRENT);
716 
717     for (idx = 0; ; idx++) {
718         ssize_t data_size;
719         size_t packet_size;
720 
721         if (multifd_device_state_save_thread_should_exit()) {
722             error_setg(errp, "operation cancelled");
723             goto thread_exit;
724         }
725 
726         data_size = read(migration->data_fd, &packet->data,
727                          migration->data_buffer_size);
728         if (data_size < 0) {
729             error_setg(errp, "%s: reading state buffer %" PRIu32 " failed: %d",
730                        vbasedev->name, idx, errno);
731             goto thread_exit;
732         } else if (data_size == 0) {
733             break;
734         }
735 
736         packet->idx = cpu_to_be32(idx);
737         packet_size = sizeof(*packet) + data_size;
738 
739         if (!multifd_queue_device_state(d->idstr, d->instance_id,
740                                         (char *)packet, packet_size)) {
741             error_setg(errp, "%s: multifd data queuing failed", vbasedev->name);
742             goto thread_exit;
743         }
744 
745         vfio_migration_add_bytes_transferred(packet_size);
746     }
747 
748     if (!vfio_save_complete_precopy_thread_config_state(vbasedev,
749                                                         d->idstr,
750                                                         d->instance_id,
751                                                         idx, errp)) {
752         goto thread_exit;
753    }
754 
755     ret = true;
756 
757 thread_exit:
758     trace_vfio_save_complete_precopy_thread_end(vbasedev->name, ret);
759 
760     return ret;
761 }
762 
763 int vfio_multifd_switchover_start(VFIODevice *vbasedev)
764 {
765     VFIOMigration *migration = vbasedev->migration;
766     VFIOMultifd *multifd = migration->multifd;
767 
768     assert(multifd);
769 
770     /* The lock order is load_bufs_mutex -> BQL so unlock BQL here first */
771     bql_unlock();
772     WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) {
773         assert(!multifd->load_bufs_thread_running);
774         multifd->load_bufs_thread_running = true;
775     }
776     bql_lock();
777 
778     qemu_loadvm_start_load_thread(vfio_load_bufs_thread, vbasedev);
779 
780     return 0;
781 }
782