1 /*
2 * Multifd VFIO migration
3 *
4 * Copyright (C) 2024,2025 Oracle and/or its affiliates.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 *
9 * SPDX-License-Identifier: GPL-2.0-or-later
10 */
11
12 #include "qemu/osdep.h"
13 #include "hw/vfio/vfio-device.h"
14 #include "migration/misc.h"
15 #include "qapi/error.h"
16 #include "qemu/error-report.h"
17 #include "qemu/lockable.h"
18 #include "qemu/main-loop.h"
19 #include "qemu/thread.h"
20 #include "io/channel-buffer.h"
21 #include "migration/qemu-file.h"
22 #include "migration-multifd.h"
23 #include "vfio-migration-internal.h"
24 #include "trace.h"
25 #include "vfio-helpers.h"
26
27 #define VFIO_DEVICE_STATE_CONFIG_STATE (1)
28
29 #define VFIO_DEVICE_STATE_PACKET_VER_CURRENT (0)
30
31 typedef struct VFIODeviceStatePacket {
32 uint32_t version;
33 uint32_t idx;
34 uint32_t flags;
35 uint8_t data[0];
36 } QEMU_PACKED VFIODeviceStatePacket;
37
vfio_load_config_after_iter(VFIODevice * vbasedev)38 bool vfio_load_config_after_iter(VFIODevice *vbasedev)
39 {
40 if (vbasedev->migration_load_config_after_iter == ON_OFF_AUTO_ON) {
41 return true;
42 } else if (vbasedev->migration_load_config_after_iter == ON_OFF_AUTO_OFF) {
43 return false;
44 }
45
46 assert(vbasedev->migration_load_config_after_iter == ON_OFF_AUTO_AUTO);
47 return vfio_arch_wants_loading_config_after_iter();
48 }
49
50 /* type safety */
51 typedef struct VFIOStateBuffers {
52 GArray *array;
53 } VFIOStateBuffers;
54
55 typedef struct VFIOStateBuffer {
56 bool is_present;
57 char *data;
58 size_t len;
59 } VFIOStateBuffer;
60
61 typedef struct VFIOMultifd {
62 bool load_bufs_thread_running;
63 bool load_bufs_thread_want_exit;
64
65 bool load_bufs_iter_done;
66 QemuCond load_bufs_iter_done_cond;
67
68 VFIOStateBuffers load_bufs;
69 QemuCond load_bufs_buffer_ready_cond;
70 QemuCond load_bufs_thread_finished_cond;
71 QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */
72 uint32_t load_buf_idx;
73 uint32_t load_buf_idx_last;
74 size_t load_buf_queued_pending_buffers_size;
75 } VFIOMultifd;
76
vfio_state_buffer_clear(gpointer data)77 static void vfio_state_buffer_clear(gpointer data)
78 {
79 VFIOStateBuffer *lb = data;
80
81 if (!lb->is_present) {
82 return;
83 }
84
85 g_clear_pointer(&lb->data, g_free);
86 lb->is_present = false;
87 }
88
vfio_state_buffers_init(VFIOStateBuffers * bufs)89 static void vfio_state_buffers_init(VFIOStateBuffers *bufs)
90 {
91 bufs->array = g_array_new(FALSE, TRUE, sizeof(VFIOStateBuffer));
92 g_array_set_clear_func(bufs->array, vfio_state_buffer_clear);
93 }
94
vfio_state_buffers_destroy(VFIOStateBuffers * bufs)95 static void vfio_state_buffers_destroy(VFIOStateBuffers *bufs)
96 {
97 g_clear_pointer(&bufs->array, g_array_unref);
98 }
99
vfio_state_buffers_assert_init(VFIOStateBuffers * bufs)100 static void vfio_state_buffers_assert_init(VFIOStateBuffers *bufs)
101 {
102 assert(bufs->array);
103 }
104
vfio_state_buffers_size_get(VFIOStateBuffers * bufs)105 static unsigned int vfio_state_buffers_size_get(VFIOStateBuffers *bufs)
106 {
107 return bufs->array->len;
108 }
109
vfio_state_buffers_size_set(VFIOStateBuffers * bufs,unsigned int size)110 static void vfio_state_buffers_size_set(VFIOStateBuffers *bufs,
111 unsigned int size)
112 {
113 g_array_set_size(bufs->array, size);
114 }
115
vfio_state_buffers_at(VFIOStateBuffers * bufs,unsigned int idx)116 static VFIOStateBuffer *vfio_state_buffers_at(VFIOStateBuffers *bufs,
117 unsigned int idx)
118 {
119 return &g_array_index(bufs->array, VFIOStateBuffer, idx);
120 }
121
122 /* called with load_bufs_mutex locked */
vfio_load_state_buffer_insert(VFIODevice * vbasedev,VFIODeviceStatePacket * packet,size_t packet_total_size,Error ** errp)123 static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev,
124 VFIODeviceStatePacket *packet,
125 size_t packet_total_size,
126 Error **errp)
127 {
128 VFIOMigration *migration = vbasedev->migration;
129 VFIOMultifd *multifd = migration->multifd;
130 VFIOStateBuffer *lb;
131 size_t data_size = packet_total_size - sizeof(*packet);
132
133 vfio_state_buffers_assert_init(&multifd->load_bufs);
134 if (packet->idx >= vfio_state_buffers_size_get(&multifd->load_bufs)) {
135 vfio_state_buffers_size_set(&multifd->load_bufs, packet->idx + 1);
136 }
137
138 lb = vfio_state_buffers_at(&multifd->load_bufs, packet->idx);
139 if (lb->is_present) {
140 error_setg(errp, "%s: state buffer %" PRIu32 " already filled",
141 vbasedev->name, packet->idx);
142 return false;
143 }
144
145 assert(packet->idx >= multifd->load_buf_idx);
146
147 multifd->load_buf_queued_pending_buffers_size += data_size;
148 if (multifd->load_buf_queued_pending_buffers_size >
149 vbasedev->migration_max_queued_buffers_size) {
150 error_setg(errp,
151 "%s: queuing state buffer %" PRIu32
152 " would exceed the size max of %" PRIu64,
153 vbasedev->name, packet->idx,
154 vbasedev->migration_max_queued_buffers_size);
155 return false;
156 }
157
158 lb->data = g_memdup2(&packet->data, data_size);
159 lb->len = data_size;
160 lb->is_present = true;
161
162 return true;
163 }
164
vfio_multifd_load_state_buffer(void * opaque,char * data,size_t data_size,Error ** errp)165 bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size,
166 Error **errp)
167 {
168 VFIODevice *vbasedev = opaque;
169 VFIOMigration *migration = vbasedev->migration;
170 VFIOMultifd *multifd = migration->multifd;
171 VFIODeviceStatePacket *packet = (VFIODeviceStatePacket *)data;
172
173 if (!vfio_multifd_transfer_enabled(vbasedev)) {
174 error_setg(errp,
175 "%s: got device state packet but not doing multifd transfer",
176 vbasedev->name);
177 return false;
178 }
179
180 assert(multifd);
181
182 if (data_size < sizeof(*packet)) {
183 error_setg(errp, "%s: packet too short at %zu (min is %zu)",
184 vbasedev->name, data_size, sizeof(*packet));
185 return false;
186 }
187
188 packet->version = be32_to_cpu(packet->version);
189 if (packet->version != VFIO_DEVICE_STATE_PACKET_VER_CURRENT) {
190 error_setg(errp, "%s: packet has unknown version %" PRIu32,
191 vbasedev->name, packet->version);
192 return false;
193 }
194
195 packet->idx = be32_to_cpu(packet->idx);
196 packet->flags = be32_to_cpu(packet->flags);
197
198 if (packet->idx == UINT32_MAX) {
199 error_setg(errp, "%s: packet index is invalid", vbasedev->name);
200 return false;
201 }
202
203 trace_vfio_load_state_device_buffer_incoming(vbasedev->name, packet->idx);
204
205 /*
206 * Holding BQL here would violate the lock order and can cause
207 * a deadlock once we attempt to lock load_bufs_mutex below.
208 */
209 assert(!bql_locked());
210
211 WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) {
212 /* config state packet should be the last one in the stream */
213 if (packet->flags & VFIO_DEVICE_STATE_CONFIG_STATE) {
214 multifd->load_buf_idx_last = packet->idx;
215 }
216
217 if (!vfio_load_state_buffer_insert(vbasedev, packet, data_size,
218 errp)) {
219 return false;
220 }
221
222 qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond);
223 }
224
225 return true;
226 }
227
vfio_load_bufs_thread_load_config(VFIODevice * vbasedev,Error ** errp)228 static bool vfio_load_bufs_thread_load_config(VFIODevice *vbasedev,
229 Error **errp)
230 {
231 VFIOMigration *migration = vbasedev->migration;
232 VFIOMultifd *multifd = migration->multifd;
233 VFIOStateBuffer *lb;
234 g_autoptr(QIOChannelBuffer) bioc = NULL;
235 g_autoptr(QEMUFile) f_out = NULL, f_in = NULL;
236 uint64_t mig_header;
237 int ret;
238
239 assert(multifd->load_buf_idx == multifd->load_buf_idx_last);
240 lb = vfio_state_buffers_at(&multifd->load_bufs, multifd->load_buf_idx);
241 assert(lb->is_present);
242
243 bioc = qio_channel_buffer_new(lb->len);
244 qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-load");
245
246 f_out = qemu_file_new_output(QIO_CHANNEL(bioc));
247 qemu_put_buffer(f_out, (uint8_t *)lb->data, lb->len);
248
249 ret = qemu_fflush(f_out);
250 if (ret) {
251 error_setg(errp, "%s: load config state flush failed: %d",
252 vbasedev->name, ret);
253 return false;
254 }
255
256 qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
257 f_in = qemu_file_new_input(QIO_CHANNEL(bioc));
258
259 mig_header = qemu_get_be64(f_in);
260 if (mig_header != VFIO_MIG_FLAG_DEV_CONFIG_STATE) {
261 error_setg(errp, "%s: expected FLAG_DEV_CONFIG_STATE but got %" PRIx64,
262 vbasedev->name, mig_header);
263 return false;
264 }
265
266 bql_lock();
267 ret = vfio_load_device_config_state(f_in, vbasedev);
268 bql_unlock();
269
270 if (ret < 0) {
271 error_setg(errp, "%s: vfio_load_device_config_state() failed: %d",
272 vbasedev->name, ret);
273 return false;
274 }
275
276 return true;
277 }
278
vfio_load_state_buffer_get(VFIOMultifd * multifd)279 static VFIOStateBuffer *vfio_load_state_buffer_get(VFIOMultifd *multifd)
280 {
281 VFIOStateBuffer *lb;
282 unsigned int bufs_len;
283
284 bufs_len = vfio_state_buffers_size_get(&multifd->load_bufs);
285 if (multifd->load_buf_idx >= bufs_len) {
286 assert(multifd->load_buf_idx == bufs_len);
287 return NULL;
288 }
289
290 lb = vfio_state_buffers_at(&multifd->load_bufs,
291 multifd->load_buf_idx);
292 if (!lb->is_present) {
293 return NULL;
294 }
295
296 return lb;
297 }
298
vfio_load_state_buffer_write(VFIODevice * vbasedev,VFIOStateBuffer * lb,Error ** errp)299 static bool vfio_load_state_buffer_write(VFIODevice *vbasedev,
300 VFIOStateBuffer *lb,
301 Error **errp)
302 {
303 VFIOMigration *migration = vbasedev->migration;
304 VFIOMultifd *multifd = migration->multifd;
305 g_autofree char *buf = NULL;
306 char *buf_cur;
307 size_t buf_len;
308
309 if (!lb->len) {
310 return true;
311 }
312
313 trace_vfio_load_state_device_buffer_load_start(vbasedev->name,
314 multifd->load_buf_idx);
315
316 /* lb might become re-allocated when we drop the lock */
317 buf = g_steal_pointer(&lb->data);
318 buf_cur = buf;
319 buf_len = lb->len;
320 while (buf_len > 0) {
321 ssize_t wr_ret;
322 int errno_save;
323
324 /*
325 * Loading data to the device takes a while,
326 * drop the lock during this process.
327 */
328 qemu_mutex_unlock(&multifd->load_bufs_mutex);
329 wr_ret = write(migration->data_fd, buf_cur, buf_len);
330 errno_save = errno;
331 qemu_mutex_lock(&multifd->load_bufs_mutex);
332
333 if (wr_ret < 0) {
334 error_setg(errp,
335 "%s: writing state buffer %" PRIu32 " failed: %d",
336 vbasedev->name, multifd->load_buf_idx, errno_save);
337 return false;
338 }
339
340 assert(wr_ret <= buf_len);
341 buf_len -= wr_ret;
342 buf_cur += wr_ret;
343
344 assert(multifd->load_buf_queued_pending_buffers_size >= wr_ret);
345 multifd->load_buf_queued_pending_buffers_size -= wr_ret;
346 }
347
348 trace_vfio_load_state_device_buffer_load_end(vbasedev->name,
349 multifd->load_buf_idx);
350
351 return true;
352 }
353
vfio_load_bufs_thread_want_exit(VFIOMultifd * multifd,bool * should_quit)354 static bool vfio_load_bufs_thread_want_exit(VFIOMultifd *multifd,
355 bool *should_quit)
356 {
357 return multifd->load_bufs_thread_want_exit || qatomic_read(should_quit);
358 }
359
360 /*
361 * This thread is spawned by vfio_multifd_switchover_start() which gets
362 * called upon encountering the switchover point marker in main migration
363 * stream.
364 *
365 * It exits after either:
366 * * completing loading the remaining device state and device config, OR:
367 * * encountering some error while doing the above, OR:
368 * * being forcefully aborted by the migration core by it setting should_quit
369 * or by vfio_load_cleanup_load_bufs_thread() setting
370 * multifd->load_bufs_thread_want_exit.
371 */
vfio_load_bufs_thread(void * opaque,bool * should_quit,Error ** errp)372 static bool vfio_load_bufs_thread(void *opaque, bool *should_quit, Error **errp)
373 {
374 VFIODevice *vbasedev = opaque;
375 VFIOMigration *migration = vbasedev->migration;
376 VFIOMultifd *multifd = migration->multifd;
377 bool ret = false;
378
379 trace_vfio_load_bufs_thread_start(vbasedev->name);
380
381 assert(multifd);
382 QEMU_LOCK_GUARD(&multifd->load_bufs_mutex);
383
384 assert(multifd->load_bufs_thread_running);
385
386 while (true) {
387 VFIOStateBuffer *lb;
388
389 /*
390 * Always check cancellation first after the buffer_ready wait below in
391 * case that cond was signalled by vfio_load_cleanup_load_bufs_thread().
392 */
393 if (vfio_load_bufs_thread_want_exit(multifd, should_quit)) {
394 error_setg(errp, "operation cancelled");
395 goto thread_exit;
396 }
397
398 assert(multifd->load_buf_idx <= multifd->load_buf_idx_last);
399
400 lb = vfio_load_state_buffer_get(multifd);
401 if (!lb) {
402 trace_vfio_load_state_device_buffer_starved(vbasedev->name,
403 multifd->load_buf_idx);
404 qemu_cond_wait(&multifd->load_bufs_buffer_ready_cond,
405 &multifd->load_bufs_mutex);
406 continue;
407 }
408
409 if (multifd->load_buf_idx == multifd->load_buf_idx_last) {
410 break;
411 }
412
413 if (multifd->load_buf_idx == 0) {
414 trace_vfio_load_state_device_buffer_start(vbasedev->name);
415 }
416
417 if (!vfio_load_state_buffer_write(vbasedev, lb, errp)) {
418 goto thread_exit;
419 }
420
421 if (multifd->load_buf_idx == multifd->load_buf_idx_last - 1) {
422 trace_vfio_load_state_device_buffer_end(vbasedev->name);
423 }
424
425 multifd->load_buf_idx++;
426 }
427
428 if (vfio_load_config_after_iter(vbasedev)) {
429 while (!multifd->load_bufs_iter_done) {
430 qemu_cond_wait(&multifd->load_bufs_iter_done_cond,
431 &multifd->load_bufs_mutex);
432
433 /*
434 * Need to re-check cancellation immediately after wait in case
435 * cond was signalled by vfio_load_cleanup_load_bufs_thread().
436 */
437 if (vfio_load_bufs_thread_want_exit(multifd, should_quit)) {
438 error_setg(errp, "operation cancelled");
439 goto thread_exit;
440 }
441 }
442 }
443
444 if (!vfio_load_bufs_thread_load_config(vbasedev, errp)) {
445 goto thread_exit;
446 }
447
448 ret = true;
449
450 thread_exit:
451 /*
452 * Notify possibly waiting vfio_load_cleanup_load_bufs_thread() that
453 * this thread is exiting.
454 */
455 multifd->load_bufs_thread_running = false;
456 qemu_cond_signal(&multifd->load_bufs_thread_finished_cond);
457
458 trace_vfio_load_bufs_thread_end(vbasedev->name);
459
460 return ret;
461 }
462
vfio_load_state_config_load_ready(VFIODevice * vbasedev)463 int vfio_load_state_config_load_ready(VFIODevice *vbasedev)
464 {
465 VFIOMigration *migration = vbasedev->migration;
466 VFIOMultifd *multifd = migration->multifd;
467 int ret = 0;
468
469 if (!vfio_multifd_transfer_enabled(vbasedev)) {
470 error_report("%s: got DEV_CONFIG_LOAD_READY outside multifd transfer",
471 vbasedev->name);
472 return -EINVAL;
473 }
474
475 if (!vfio_load_config_after_iter(vbasedev)) {
476 error_report("%s: got DEV_CONFIG_LOAD_READY but was disabled",
477 vbasedev->name);
478 return -EINVAL;
479 }
480
481 assert(multifd);
482
483 /* The lock order is load_bufs_mutex -> BQL so unlock BQL here first */
484 bql_unlock();
485 WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) {
486 if (multifd->load_bufs_iter_done) {
487 /* Can't print error here as we're outside BQL */
488 ret = -EINVAL;
489 break;
490 }
491
492 multifd->load_bufs_iter_done = true;
493 qemu_cond_signal(&multifd->load_bufs_iter_done_cond);
494 }
495 bql_lock();
496
497 if (ret) {
498 error_report("%s: duplicate DEV_CONFIG_LOAD_READY",
499 vbasedev->name);
500 }
501
502 return ret;
503 }
504
vfio_multifd_new(void)505 static VFIOMultifd *vfio_multifd_new(void)
506 {
507 VFIOMultifd *multifd = g_new(VFIOMultifd, 1);
508
509 vfio_state_buffers_init(&multifd->load_bufs);
510
511 qemu_mutex_init(&multifd->load_bufs_mutex);
512
513 multifd->load_buf_idx = 0;
514 multifd->load_buf_idx_last = UINT32_MAX;
515 multifd->load_buf_queued_pending_buffers_size = 0;
516 qemu_cond_init(&multifd->load_bufs_buffer_ready_cond);
517
518 multifd->load_bufs_iter_done = false;
519 qemu_cond_init(&multifd->load_bufs_iter_done_cond);
520
521 multifd->load_bufs_thread_running = false;
522 multifd->load_bufs_thread_want_exit = false;
523 qemu_cond_init(&multifd->load_bufs_thread_finished_cond);
524
525 return multifd;
526 }
527
528 /*
529 * Terminates vfio_load_bufs_thread by setting
530 * multifd->load_bufs_thread_want_exit and signalling all the conditions
531 * the thread could be blocked on.
532 *
533 * Waits for the thread to signal that it had finished.
534 */
vfio_load_cleanup_load_bufs_thread(VFIOMultifd * multifd)535 static void vfio_load_cleanup_load_bufs_thread(VFIOMultifd *multifd)
536 {
537 /* The lock order is load_bufs_mutex -> BQL so unlock BQL here first */
538 bql_unlock();
539 WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) {
540 while (multifd->load_bufs_thread_running) {
541 multifd->load_bufs_thread_want_exit = true;
542
543 qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond);
544 qemu_cond_signal(&multifd->load_bufs_iter_done_cond);
545 qemu_cond_wait(&multifd->load_bufs_thread_finished_cond,
546 &multifd->load_bufs_mutex);
547 }
548 }
549 bql_lock();
550 }
551
vfio_multifd_free(VFIOMultifd * multifd)552 static void vfio_multifd_free(VFIOMultifd *multifd)
553 {
554 vfio_load_cleanup_load_bufs_thread(multifd);
555
556 qemu_cond_destroy(&multifd->load_bufs_thread_finished_cond);
557 qemu_cond_destroy(&multifd->load_bufs_iter_done_cond);
558 vfio_state_buffers_destroy(&multifd->load_bufs);
559 qemu_cond_destroy(&multifd->load_bufs_buffer_ready_cond);
560 qemu_mutex_destroy(&multifd->load_bufs_mutex);
561
562 g_free(multifd);
563 }
564
vfio_multifd_cleanup(VFIODevice * vbasedev)565 void vfio_multifd_cleanup(VFIODevice *vbasedev)
566 {
567 VFIOMigration *migration = vbasedev->migration;
568
569 g_clear_pointer(&migration->multifd, vfio_multifd_free);
570 }
571
vfio_multifd_transfer_supported(void)572 bool vfio_multifd_transfer_supported(void)
573 {
574 return multifd_device_state_supported() &&
575 migrate_send_switchover_start();
576 }
577
vfio_multifd_transfer_enabled(VFIODevice * vbasedev)578 bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev)
579 {
580 VFIOMigration *migration = vbasedev->migration;
581
582 return migration->multifd_transfer;
583 }
584
vfio_multifd_setup(VFIODevice * vbasedev,bool alloc_multifd,Error ** errp)585 bool vfio_multifd_setup(VFIODevice *vbasedev, bool alloc_multifd, Error **errp)
586 {
587 VFIOMigration *migration = vbasedev->migration;
588
589 /*
590 * Make a copy of this setting at the start in case it is changed
591 * mid-migration.
592 */
593 if (vbasedev->migration_multifd_transfer == ON_OFF_AUTO_AUTO) {
594 migration->multifd_transfer = vfio_multifd_transfer_supported();
595 } else {
596 migration->multifd_transfer =
597 vbasedev->migration_multifd_transfer == ON_OFF_AUTO_ON;
598 }
599
600 if (!vfio_multifd_transfer_enabled(vbasedev)) {
601 /* Nothing further to check or do */
602 return true;
603 }
604
605 if (!vfio_multifd_transfer_supported()) {
606 error_setg(errp,
607 "%s: Multifd device transfer requested but unsupported in the current config",
608 vbasedev->name);
609 return false;
610 }
611
612 if (alloc_multifd) {
613 assert(!migration->multifd);
614 migration->multifd = vfio_multifd_new();
615 }
616
617 return true;
618 }
619
vfio_multifd_emit_dummy_eos(VFIODevice * vbasedev,QEMUFile * f)620 void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f)
621 {
622 assert(vfio_multifd_transfer_enabled(vbasedev));
623
624 /*
625 * Emit dummy NOP data on the main migration channel since the actual
626 * device state transfer is done via multifd channels.
627 */
628 qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
629 }
630
631 static bool
vfio_save_complete_precopy_thread_config_state(VFIODevice * vbasedev,char * idstr,uint32_t instance_id,uint32_t idx,Error ** errp)632 vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev,
633 char *idstr,
634 uint32_t instance_id,
635 uint32_t idx,
636 Error **errp)
637 {
638 g_autoptr(QIOChannelBuffer) bioc = NULL;
639 g_autoptr(QEMUFile) f = NULL;
640 int ret;
641 g_autofree VFIODeviceStatePacket *packet = NULL;
642 size_t packet_len;
643
644 bioc = qio_channel_buffer_new(0);
645 qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-save");
646
647 f = qemu_file_new_output(QIO_CHANNEL(bioc));
648
649 if (vfio_save_device_config_state(f, vbasedev, errp)) {
650 return false;
651 }
652
653 ret = qemu_fflush(f);
654 if (ret) {
655 error_setg(errp, "%s: save config state flush failed: %d",
656 vbasedev->name, ret);
657 return false;
658 }
659
660 packet_len = sizeof(*packet) + bioc->usage;
661 packet = g_malloc0(packet_len);
662 packet->version = cpu_to_be32(VFIO_DEVICE_STATE_PACKET_VER_CURRENT);
663 packet->idx = cpu_to_be32(idx);
664 packet->flags = cpu_to_be32(VFIO_DEVICE_STATE_CONFIG_STATE);
665 memcpy(&packet->data, bioc->data, bioc->usage);
666
667 if (!multifd_queue_device_state(idstr, instance_id,
668 (char *)packet, packet_len)) {
669 error_setg(errp, "%s: multifd config data queuing failed",
670 vbasedev->name);
671 return false;
672 }
673
674 vfio_migration_add_bytes_transferred(packet_len);
675
676 return true;
677 }
678
679 /*
680 * This thread is spawned by the migration core directly via
681 * .save_complete_precopy_thread SaveVMHandler.
682 *
683 * It exits after either:
684 * * completing saving the remaining device state and device config, OR:
685 * * encountering some error while doing the above, OR:
686 * * being forcefully aborted by the migration core by
687 * multifd_device_state_save_thread_should_exit() returning true.
688 */
689 bool
vfio_multifd_save_complete_precopy_thread(SaveCompletePrecopyThreadData * d,Error ** errp)690 vfio_multifd_save_complete_precopy_thread(SaveCompletePrecopyThreadData *d,
691 Error **errp)
692 {
693 VFIODevice *vbasedev = d->handler_opaque;
694 VFIOMigration *migration = vbasedev->migration;
695 bool ret = false;
696 g_autofree VFIODeviceStatePacket *packet = NULL;
697 uint32_t idx;
698
699 if (!vfio_multifd_transfer_enabled(vbasedev)) {
700 /* Nothing to do, vfio_save_complete_precopy() does the transfer. */
701 return true;
702 }
703
704 trace_vfio_save_complete_precopy_thread_start(vbasedev->name,
705 d->idstr, d->instance_id);
706
707 /* We reach here with device state STOP or STOP_COPY only */
708 if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
709 VFIO_DEVICE_STATE_STOP, errp)) {
710 goto thread_exit;
711 }
712
713 packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size);
714 packet->version = cpu_to_be32(VFIO_DEVICE_STATE_PACKET_VER_CURRENT);
715
716 for (idx = 0; ; idx++) {
717 ssize_t data_size;
718 size_t packet_size;
719
720 if (multifd_device_state_save_thread_should_exit()) {
721 error_setg(errp, "operation cancelled");
722 goto thread_exit;
723 }
724
725 data_size = read(migration->data_fd, &packet->data,
726 migration->data_buffer_size);
727 if (data_size < 0) {
728 error_setg(errp, "%s: reading state buffer %" PRIu32 " failed: %d",
729 vbasedev->name, idx, errno);
730 goto thread_exit;
731 } else if (data_size == 0) {
732 break;
733 }
734
735 packet->idx = cpu_to_be32(idx);
736 packet_size = sizeof(*packet) + data_size;
737
738 if (!multifd_queue_device_state(d->idstr, d->instance_id,
739 (char *)packet, packet_size)) {
740 error_setg(errp, "%s: multifd data queuing failed", vbasedev->name);
741 goto thread_exit;
742 }
743
744 vfio_migration_add_bytes_transferred(packet_size);
745 }
746
747 if (!vfio_save_complete_precopy_thread_config_state(vbasedev,
748 d->idstr,
749 d->instance_id,
750 idx, errp)) {
751 goto thread_exit;
752 }
753
754 ret = true;
755
756 thread_exit:
757 trace_vfio_save_complete_precopy_thread_end(vbasedev->name, ret);
758
759 return ret;
760 }
761
vfio_multifd_switchover_start(VFIODevice * vbasedev)762 int vfio_multifd_switchover_start(VFIODevice *vbasedev)
763 {
764 VFIOMigration *migration = vbasedev->migration;
765 VFIOMultifd *multifd = migration->multifd;
766
767 assert(multifd);
768
769 /* The lock order is load_bufs_mutex -> BQL so unlock BQL here first */
770 bql_unlock();
771 WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) {
772 assert(!multifd->load_bufs_thread_running);
773 multifd->load_bufs_thread_running = true;
774 }
775 bql_lock();
776
777 qemu_loadvm_start_load_thread(vfio_load_bufs_thread, vbasedev);
778
779 return 0;
780 }
781