xref: /openbmc/qemu/migration/migration.c (revision 21063bce)
1 /*
2  * QEMU live migration
3  *
4  * Copyright IBM, Corp. 2008
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  * Contributions after 2012-01-13 are licensed under the terms of the
13  * GNU GPL, version 2 or (at your option) any later version.
14  */
15 
16 #include "qemu/osdep.h"
17 #include "qemu/cutils.h"
18 #include "qemu/error-report.h"
19 #include "qemu/main-loop.h"
20 #include "migration/blocker.h"
21 #include "exec.h"
22 #include "fd.h"
23 #include "socket.h"
24 #include "sysemu/runstate.h"
25 #include "sysemu/sysemu.h"
26 #include "sysemu/cpu-throttle.h"
27 #include "rdma.h"
28 #include "ram.h"
29 #include "migration/global_state.h"
30 #include "migration/misc.h"
31 #include "migration.h"
32 #include "savevm.h"
33 #include "qemu-file.h"
34 #include "channel.h"
35 #include "migration/vmstate.h"
36 #include "block/block.h"
37 #include "qapi/error.h"
38 #include "qapi/clone-visitor.h"
39 #include "qapi/qapi-visit-migration.h"
40 #include "qapi/qapi-visit-sockets.h"
41 #include "qapi/qapi-commands-migration.h"
42 #include "qapi/qapi-events-migration.h"
43 #include "qapi/qmp/qerror.h"
44 #include "qapi/qmp/qnull.h"
45 #include "qemu/rcu.h"
46 #include "block.h"
47 #include "postcopy-ram.h"
48 #include "qemu/thread.h"
49 #include "trace.h"
50 #include "exec/target_page.h"
51 #include "io/channel-buffer.h"
52 #include "io/channel-tls.h"
53 #include "migration/colo.h"
54 #include "hw/boards.h"
55 #include "hw/qdev-properties.h"
56 #include "hw/qdev-properties-system.h"
57 #include "monitor/monitor.h"
58 #include "net/announce.h"
59 #include "qemu/queue.h"
60 #include "multifd.h"
61 #include "threadinfo.h"
62 #include "qemu/yank.h"
63 #include "sysemu/cpus.h"
64 #include "yank_functions.h"
65 #include "sysemu/qtest.h"
66 #include "ui/qemu-spice.h"
67 
68 #define MAX_THROTTLE  (128 << 20)      /* Migration transfer speed throttling */
69 
70 /* Amount of time to allocate to each "chunk" of bandwidth-throttled
71  * data. */
72 #define BUFFER_DELAY     100
73 #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY)
74 
75 /* Time in milliseconds we are allowed to stop the source,
76  * for sending the last part */
77 #define DEFAULT_MIGRATE_SET_DOWNTIME 300
78 
79 /* Maximum migrate downtime set to 2000 seconds */
80 #define MAX_MIGRATE_DOWNTIME_SECONDS 2000
81 #define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000)
82 
83 /* Default compression thread count */
84 #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
85 /* Default decompression thread count, usually decompression is at
86  * least 4 times as fast as compression.*/
87 #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2
88 /*0: means nocompress, 1: best speed, ... 9: best compress ratio */
89 #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1
90 /* Define default autoconverge cpu throttle migration parameters */
91 #define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50
92 #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20
93 #define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10
94 #define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99
95 
96 /* Migration XBZRLE default cache size */
97 #define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024)
98 
99 /* The delay time (in ms) between two COLO checkpoints */
100 #define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100)
101 #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2
102 #define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE
103 /* 0: means nocompress, 1: best speed, ... 9: best compress ratio */
104 #define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1
105 /* 0: means nocompress, 1: best speed, ... 20: best compress ratio */
106 #define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1
107 
108 /* Background transfer rate for postcopy, 0 means unlimited, note
109  * that page requests can still exceed this limit.
110  */
111 #define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0
112 
113 /*
114  * Parameters for self_announce_delay giving a stream of RARP/ARP
115  * packets after migration.
116  */
117 #define DEFAULT_MIGRATE_ANNOUNCE_INITIAL  50
118 #define DEFAULT_MIGRATE_ANNOUNCE_MAX     550
119 #define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS    5
120 #define DEFAULT_MIGRATE_ANNOUNCE_STEP    100
121 
122 static NotifierList migration_state_notifiers =
123     NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
124 
125 /* Messages sent on the return path from destination to source */
126 enum mig_rp_message_type {
127     MIG_RP_MSG_INVALID = 0,  /* Must be 0 */
128     MIG_RP_MSG_SHUT,         /* sibling will not send any more RP messages */
129     MIG_RP_MSG_PONG,         /* Response to a PING; data (seq: be32 ) */
130 
131     MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */
132     MIG_RP_MSG_REQ_PAGES,    /* data (start: be64, len: be32) */
133     MIG_RP_MSG_RECV_BITMAP,  /* send recved_bitmap back to source */
134     MIG_RP_MSG_RESUME_ACK,   /* tell source that we are ready to resume */
135 
136     MIG_RP_MSG_MAX
137 };
138 
139 /* Migration capabilities set */
140 struct MigrateCapsSet {
141     int size;                       /* Capability set size */
142     MigrationCapability caps[];     /* Variadic array of capabilities */
143 };
144 typedef struct MigrateCapsSet MigrateCapsSet;
145 
146 /* Define and initialize MigrateCapsSet */
147 #define INITIALIZE_MIGRATE_CAPS_SET(_name, ...)   \
148     MigrateCapsSet _name = {    \
149         .size = sizeof((int []) { __VA_ARGS__ }) / sizeof(int), \
150         .caps = { __VA_ARGS__ } \
151     }
152 
153 /* Background-snapshot compatibility check list */
154 static const
155 INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot,
156     MIGRATION_CAPABILITY_POSTCOPY_RAM,
157     MIGRATION_CAPABILITY_DIRTY_BITMAPS,
158     MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME,
159     MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE,
160     MIGRATION_CAPABILITY_RETURN_PATH,
161     MIGRATION_CAPABILITY_MULTIFD,
162     MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER,
163     MIGRATION_CAPABILITY_AUTO_CONVERGE,
164     MIGRATION_CAPABILITY_RELEASE_RAM,
165     MIGRATION_CAPABILITY_RDMA_PIN_ALL,
166     MIGRATION_CAPABILITY_COMPRESS,
167     MIGRATION_CAPABILITY_XBZRLE,
168     MIGRATION_CAPABILITY_X_COLO,
169     MIGRATION_CAPABILITY_VALIDATE_UUID,
170     MIGRATION_CAPABILITY_ZERO_COPY_SEND);
171 
172 /* When we add fault tolerance, we could have several
173    migrations at once.  For now we don't need to add
174    dynamic creation of migration */
175 
176 static MigrationState *current_migration;
177 static MigrationIncomingState *current_incoming;
178 
179 static GSList *migration_blockers;
180 
181 static bool migration_object_check(MigrationState *ms, Error **errp);
182 static int migration_maybe_pause(MigrationState *s,
183                                  int *current_active_state,
184                                  int new_state);
185 static void migrate_fd_cancel(MigrationState *s);
186 
187 static bool migration_needs_multiple_sockets(void)
188 {
189     return migrate_use_multifd() || migrate_postcopy_preempt();
190 }
191 
192 static bool uri_supports_multi_channels(const char *uri)
193 {
194     return strstart(uri, "tcp:", NULL) || strstart(uri, "unix:", NULL) ||
195            strstart(uri, "vsock:", NULL);
196 }
197 
198 static bool
199 migration_channels_and_uri_compatible(const char *uri, Error **errp)
200 {
201     if (migration_needs_multiple_sockets() &&
202         !uri_supports_multi_channels(uri)) {
203         error_setg(errp, "Migration requires multi-channel URIs (e.g. tcp)");
204         return false;
205     }
206 
207     return true;
208 }
209 
210 static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp)
211 {
212     uintptr_t a = (uintptr_t) ap, b = (uintptr_t) bp;
213 
214     return (a > b) - (a < b);
215 }
216 
217 void migration_object_init(void)
218 {
219     /* This can only be called once. */
220     assert(!current_migration);
221     current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION));
222 
223     /*
224      * Init the migrate incoming object as well no matter whether
225      * we'll use it or not.
226      */
227     assert(!current_incoming);
228     current_incoming = g_new0(MigrationIncomingState, 1);
229     current_incoming->state = MIGRATION_STATUS_NONE;
230     current_incoming->postcopy_remote_fds =
231         g_array_new(FALSE, TRUE, sizeof(struct PostCopyFD));
232     qemu_mutex_init(&current_incoming->rp_mutex);
233     qemu_mutex_init(&current_incoming->postcopy_prio_thread_mutex);
234     qemu_event_init(&current_incoming->main_thread_load_event, false);
235     qemu_sem_init(&current_incoming->postcopy_pause_sem_dst, 0);
236     qemu_sem_init(&current_incoming->postcopy_pause_sem_fault, 0);
237     qemu_sem_init(&current_incoming->postcopy_pause_sem_fast_load, 0);
238     qemu_sem_init(&current_incoming->postcopy_qemufile_dst_done, 0);
239 
240     qemu_mutex_init(&current_incoming->page_request_mutex);
241     current_incoming->page_requested = g_tree_new(page_request_addr_cmp);
242 
243     migration_object_check(current_migration, &error_fatal);
244 
245     blk_mig_init();
246     ram_mig_init();
247     dirty_bitmap_mig_init();
248 }
249 
250 void migration_cancel(const Error *error)
251 {
252     if (error) {
253         migrate_set_error(current_migration, error);
254     }
255     migrate_fd_cancel(current_migration);
256 }
257 
258 void migration_shutdown(void)
259 {
260     /*
261      * When the QEMU main thread exit, the COLO thread
262      * may wait a semaphore. So, we should wakeup the
263      * COLO thread before migration shutdown.
264      */
265     colo_shutdown();
266     /*
267      * Cancel the current migration - that will (eventually)
268      * stop the migration using this structure
269      */
270     migration_cancel(NULL);
271     object_unref(OBJECT(current_migration));
272 
273     /*
274      * Cancel outgoing migration of dirty bitmaps. It should
275      * at least unref used block nodes.
276      */
277     dirty_bitmap_mig_cancel_outgoing();
278 
279     /*
280      * Cancel incoming migration of dirty bitmaps. Dirty bitmaps
281      * are non-critical data, and their loss never considered as
282      * something serious.
283      */
284     dirty_bitmap_mig_cancel_incoming();
285 }
286 
287 /* For outgoing */
288 MigrationState *migrate_get_current(void)
289 {
290     /* This can only be called after the object created. */
291     assert(current_migration);
292     return current_migration;
293 }
294 
295 MigrationIncomingState *migration_incoming_get_current(void)
296 {
297     assert(current_incoming);
298     return current_incoming;
299 }
300 
301 void migration_incoming_transport_cleanup(MigrationIncomingState *mis)
302 {
303     if (mis->socket_address_list) {
304         qapi_free_SocketAddressList(mis->socket_address_list);
305         mis->socket_address_list = NULL;
306     }
307 
308     if (mis->transport_cleanup) {
309         mis->transport_cleanup(mis->transport_data);
310         mis->transport_data = mis->transport_cleanup = NULL;
311     }
312 }
313 
314 void migration_incoming_state_destroy(void)
315 {
316     struct MigrationIncomingState *mis = migration_incoming_get_current();
317 
318     multifd_load_cleanup();
319 
320     if (mis->to_src_file) {
321         /* Tell source that we are done */
322         migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
323         qemu_fclose(mis->to_src_file);
324         mis->to_src_file = NULL;
325     }
326 
327     if (mis->from_src_file) {
328         migration_ioc_unregister_yank_from_file(mis->from_src_file);
329         qemu_fclose(mis->from_src_file);
330         mis->from_src_file = NULL;
331     }
332     if (mis->postcopy_remote_fds) {
333         g_array_free(mis->postcopy_remote_fds, TRUE);
334         mis->postcopy_remote_fds = NULL;
335     }
336 
337     migration_incoming_transport_cleanup(mis);
338     qemu_event_reset(&mis->main_thread_load_event);
339 
340     if (mis->page_requested) {
341         g_tree_destroy(mis->page_requested);
342         mis->page_requested = NULL;
343     }
344 
345     if (mis->postcopy_qemufile_dst) {
346         migration_ioc_unregister_yank_from_file(mis->postcopy_qemufile_dst);
347         qemu_fclose(mis->postcopy_qemufile_dst);
348         mis->postcopy_qemufile_dst = NULL;
349     }
350 
351     yank_unregister_instance(MIGRATION_YANK_INSTANCE);
352 }
353 
354 static void migrate_generate_event(int new_state)
355 {
356     if (migrate_use_events()) {
357         qapi_event_send_migration(new_state);
358     }
359 }
360 
361 static bool migrate_late_block_activate(void)
362 {
363     MigrationState *s;
364 
365     s = migrate_get_current();
366 
367     return s->enabled_capabilities[
368         MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE];
369 }
370 
371 /*
372  * Send a message on the return channel back to the source
373  * of the migration.
374  */
375 static int migrate_send_rp_message(MigrationIncomingState *mis,
376                                    enum mig_rp_message_type message_type,
377                                    uint16_t len, void *data)
378 {
379     int ret = 0;
380 
381     trace_migrate_send_rp_message((int)message_type, len);
382     QEMU_LOCK_GUARD(&mis->rp_mutex);
383 
384     /*
385      * It's possible that the file handle got lost due to network
386      * failures.
387      */
388     if (!mis->to_src_file) {
389         ret = -EIO;
390         return ret;
391     }
392 
393     qemu_put_be16(mis->to_src_file, (unsigned int)message_type);
394     qemu_put_be16(mis->to_src_file, len);
395     qemu_put_buffer(mis->to_src_file, data, len);
396     qemu_fflush(mis->to_src_file);
397 
398     /* It's possible that qemu file got error during sending */
399     ret = qemu_file_get_error(mis->to_src_file);
400 
401     return ret;
402 }
403 
404 /* Request one page from the source VM at the given start address.
405  *   rb: the RAMBlock to request the page in
406  *   Start: Address offset within the RB
407  *   Len: Length in bytes required - must be a multiple of pagesize
408  */
409 int migrate_send_rp_message_req_pages(MigrationIncomingState *mis,
410                                       RAMBlock *rb, ram_addr_t start)
411 {
412     uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */
413     size_t msglen = 12; /* start + len */
414     size_t len = qemu_ram_pagesize(rb);
415     enum mig_rp_message_type msg_type;
416     const char *rbname;
417     int rbname_len;
418 
419     *(uint64_t *)bufc = cpu_to_be64((uint64_t)start);
420     *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len);
421 
422     /*
423      * We maintain the last ramblock that we requested for page.  Note that we
424      * don't need locking because this function will only be called within the
425      * postcopy ram fault thread.
426      */
427     if (rb != mis->last_rb) {
428         mis->last_rb = rb;
429 
430         rbname = qemu_ram_get_idstr(rb);
431         rbname_len = strlen(rbname);
432 
433         assert(rbname_len < 256);
434 
435         bufc[msglen++] = rbname_len;
436         memcpy(bufc + msglen, rbname, rbname_len);
437         msglen += rbname_len;
438         msg_type = MIG_RP_MSG_REQ_PAGES_ID;
439     } else {
440         msg_type = MIG_RP_MSG_REQ_PAGES;
441     }
442 
443     return migrate_send_rp_message(mis, msg_type, msglen, bufc);
444 }
445 
446 int migrate_send_rp_req_pages(MigrationIncomingState *mis,
447                               RAMBlock *rb, ram_addr_t start, uint64_t haddr)
448 {
449     void *aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, qemu_ram_pagesize(rb));
450     bool received = false;
451 
452     WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) {
453         received = ramblock_recv_bitmap_test_byte_offset(rb, start);
454         if (!received && !g_tree_lookup(mis->page_requested, aligned)) {
455             /*
456              * The page has not been received, and it's not yet in the page
457              * request list.  Queue it.  Set the value of element to 1, so that
458              * things like g_tree_lookup() will return TRUE (1) when found.
459              */
460             g_tree_insert(mis->page_requested, aligned, (gpointer)1);
461             mis->page_requested_count++;
462             trace_postcopy_page_req_add(aligned, mis->page_requested_count);
463         }
464     }
465 
466     /*
467      * If the page is there, skip sending the message.  We don't even need the
468      * lock because as long as the page arrived, it'll be there forever.
469      */
470     if (received) {
471         return 0;
472     }
473 
474     return migrate_send_rp_message_req_pages(mis, rb, start);
475 }
476 
477 static bool migration_colo_enabled;
478 bool migration_incoming_colo_enabled(void)
479 {
480     return migration_colo_enabled;
481 }
482 
483 void migration_incoming_disable_colo(void)
484 {
485     ram_block_discard_disable(false);
486     migration_colo_enabled = false;
487 }
488 
489 int migration_incoming_enable_colo(void)
490 {
491     if (ram_block_discard_disable(true)) {
492         error_report("COLO: cannot disable RAM discard");
493         return -EBUSY;
494     }
495     migration_colo_enabled = true;
496     return 0;
497 }
498 
499 void migrate_add_address(SocketAddress *address)
500 {
501     MigrationIncomingState *mis = migration_incoming_get_current();
502 
503     QAPI_LIST_PREPEND(mis->socket_address_list,
504                       QAPI_CLONE(SocketAddress, address));
505 }
506 
507 static void qemu_start_incoming_migration(const char *uri, Error **errp)
508 {
509     const char *p = NULL;
510 
511     /* URI is not suitable for migration? */
512     if (!migration_channels_and_uri_compatible(uri, errp)) {
513         return;
514     }
515 
516     qapi_event_send_migration(MIGRATION_STATUS_SETUP);
517     if (strstart(uri, "tcp:", &p) ||
518         strstart(uri, "unix:", NULL) ||
519         strstart(uri, "vsock:", NULL)) {
520         socket_start_incoming_migration(p ? p : uri, errp);
521 #ifdef CONFIG_RDMA
522     } else if (strstart(uri, "rdma:", &p)) {
523         rdma_start_incoming_migration(p, errp);
524 #endif
525     } else if (strstart(uri, "exec:", &p)) {
526         exec_start_incoming_migration(p, errp);
527     } else if (strstart(uri, "fd:", &p)) {
528         fd_start_incoming_migration(p, errp);
529     } else {
530         error_setg(errp, "unknown migration protocol: %s", uri);
531     }
532 }
533 
534 static void process_incoming_migration_bh(void *opaque)
535 {
536     Error *local_err = NULL;
537     MigrationIncomingState *mis = opaque;
538 
539     /* If capability late_block_activate is set:
540      * Only fire up the block code now if we're going to restart the
541      * VM, else 'cont' will do it.
542      * This causes file locking to happen; so we don't want it to happen
543      * unless we really are starting the VM.
544      */
545     if (!migrate_late_block_activate() ||
546          (autostart && (!global_state_received() ||
547             global_state_get_runstate() == RUN_STATE_RUNNING))) {
548         /* Make sure all file formats throw away their mutable metadata.
549          * If we get an error here, just don't restart the VM yet. */
550         bdrv_activate_all(&local_err);
551         if (local_err) {
552             error_report_err(local_err);
553             local_err = NULL;
554             autostart = false;
555         }
556     }
557 
558     /*
559      * This must happen after all error conditions are dealt with and
560      * we're sure the VM is going to be running on this host.
561      */
562     qemu_announce_self(&mis->announce_timer, migrate_announce_params());
563 
564     multifd_load_shutdown();
565 
566     dirty_bitmap_mig_before_vm_start();
567 
568     if (!global_state_received() ||
569         global_state_get_runstate() == RUN_STATE_RUNNING) {
570         if (autostart) {
571             vm_start();
572         } else {
573             runstate_set(RUN_STATE_PAUSED);
574         }
575     } else if (migration_incoming_colo_enabled()) {
576         migration_incoming_disable_colo();
577         vm_start();
578     } else {
579         runstate_set(global_state_get_runstate());
580     }
581     /*
582      * This must happen after any state changes since as soon as an external
583      * observer sees this event they might start to prod at the VM assuming
584      * it's ready to use.
585      */
586     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
587                       MIGRATION_STATUS_COMPLETED);
588     qemu_bh_delete(mis->bh);
589     migration_incoming_state_destroy();
590 }
591 
592 static void coroutine_fn
593 process_incoming_migration_co(void *opaque)
594 {
595     MigrationIncomingState *mis = migration_incoming_get_current();
596     PostcopyState ps;
597     int ret;
598     Error *local_err = NULL;
599 
600     assert(mis->from_src_file);
601     mis->migration_incoming_co = qemu_coroutine_self();
602     mis->largest_page_size = qemu_ram_pagesize_largest();
603     postcopy_state_set(POSTCOPY_INCOMING_NONE);
604     migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
605                       MIGRATION_STATUS_ACTIVE);
606     ret = qemu_loadvm_state(mis->from_src_file);
607 
608     ps = postcopy_state_get();
609     trace_process_incoming_migration_co_end(ret, ps);
610     if (ps != POSTCOPY_INCOMING_NONE) {
611         if (ps == POSTCOPY_INCOMING_ADVISE) {
612             /*
613              * Where a migration had postcopy enabled (and thus went to advise)
614              * but managed to complete within the precopy period, we can use
615              * the normal exit.
616              */
617             postcopy_ram_incoming_cleanup(mis);
618         } else if (ret >= 0) {
619             /*
620              * Postcopy was started, cleanup should happen at the end of the
621              * postcopy thread.
622              */
623             trace_process_incoming_migration_co_postcopy_end_main();
624             return;
625         }
626         /* Else if something went wrong then just fall out of the normal exit */
627     }
628 
629     /* we get COLO info, and know if we are in COLO mode */
630     if (!ret && migration_incoming_colo_enabled()) {
631         /* Make sure all file formats throw away their mutable metadata */
632         bdrv_activate_all(&local_err);
633         if (local_err) {
634             error_report_err(local_err);
635             goto fail;
636         }
637 
638         qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
639              colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
640         mis->have_colo_incoming_thread = true;
641         qemu_coroutine_yield();
642 
643         qemu_mutex_unlock_iothread();
644         /* Wait checkpoint incoming thread exit before free resource */
645         qemu_thread_join(&mis->colo_incoming_thread);
646         qemu_mutex_lock_iothread();
647         /* We hold the global iothread lock, so it is safe here */
648         colo_release_ram_cache();
649     }
650 
651     if (ret < 0) {
652         error_report("load of migration failed: %s", strerror(-ret));
653         goto fail;
654     }
655     mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
656     qemu_bh_schedule(mis->bh);
657     mis->migration_incoming_co = NULL;
658     return;
659 fail:
660     local_err = NULL;
661     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
662                       MIGRATION_STATUS_FAILED);
663     qemu_fclose(mis->from_src_file);
664 
665     multifd_load_cleanup();
666 
667     exit(EXIT_FAILURE);
668 }
669 
670 /**
671  * migration_incoming_setup: Setup incoming migration
672  * @f: file for main migration channel
673  * @errp: where to put errors
674  *
675  * Returns: %true on success, %false on error.
676  */
677 static bool migration_incoming_setup(QEMUFile *f, Error **errp)
678 {
679     MigrationIncomingState *mis = migration_incoming_get_current();
680 
681     if (!mis->from_src_file) {
682         mis->from_src_file = f;
683     }
684     qemu_file_set_blocking(f, false);
685     return true;
686 }
687 
688 void migration_incoming_process(void)
689 {
690     Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL);
691     qemu_coroutine_enter(co);
692 }
693 
694 /* Returns true if recovered from a paused migration, otherwise false */
695 static bool postcopy_try_recover(void)
696 {
697     MigrationIncomingState *mis = migration_incoming_get_current();
698 
699     if (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
700         /* Resumed from a paused postcopy migration */
701 
702         /* This should be set already in migration_incoming_setup() */
703         assert(mis->from_src_file);
704         /* Postcopy has standalone thread to do vm load */
705         qemu_file_set_blocking(mis->from_src_file, true);
706 
707         /* Re-configure the return path */
708         mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
709 
710         migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
711                           MIGRATION_STATUS_POSTCOPY_RECOVER);
712 
713         /*
714          * Here, we only wake up the main loading thread (while the
715          * rest threads will still be waiting), so that we can receive
716          * commands from source now, and answer it if needed. The
717          * rest threads will be woken up afterwards until we are sure
718          * that source is ready to reply to page requests.
719          */
720         qemu_sem_post(&mis->postcopy_pause_sem_dst);
721         return true;
722     }
723 
724     return false;
725 }
726 
727 void migration_fd_process_incoming(QEMUFile *f, Error **errp)
728 {
729     if (!migration_incoming_setup(f, errp)) {
730         return;
731     }
732     if (postcopy_try_recover()) {
733         return;
734     }
735     migration_incoming_process();
736 }
737 
738 /*
739  * Returns true when we want to start a new incoming migration process,
740  * false otherwise.
741  */
742 static bool migration_should_start_incoming(bool main_channel)
743 {
744     /* Multifd doesn't start unless all channels are established */
745     if (migrate_use_multifd()) {
746         return migration_has_all_channels();
747     }
748 
749     /* Preempt channel only starts when the main channel is created */
750     if (migrate_postcopy_preempt()) {
751         return main_channel;
752     }
753 
754     /*
755      * For all the rest types of migration, we should only reach here when
756      * it's the main channel that's being created, and we should always
757      * proceed with this channel.
758      */
759     assert(main_channel);
760     return true;
761 }
762 
763 void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
764 {
765     MigrationIncomingState *mis = migration_incoming_get_current();
766     Error *local_err = NULL;
767     QEMUFile *f;
768     bool default_channel = true;
769     uint32_t channel_magic = 0;
770     int ret = 0;
771 
772     if (migrate_use_multifd() && !migrate_postcopy_ram() &&
773         qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
774         /*
775          * With multiple channels, it is possible that we receive channels
776          * out of order on destination side, causing incorrect mapping of
777          * source channels on destination side. Check channel MAGIC to
778          * decide type of channel. Please note this is best effort, postcopy
779          * preempt channel does not send any magic number so avoid it for
780          * postcopy live migration. Also tls live migration already does
781          * tls handshake while initializing main channel so with tls this
782          * issue is not possible.
783          */
784         ret = migration_channel_read_peek(ioc, (void *)&channel_magic,
785                                           sizeof(channel_magic), &local_err);
786 
787         if (ret != 0) {
788             error_propagate(errp, local_err);
789             return;
790         }
791 
792         default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC));
793     } else {
794         default_channel = !mis->from_src_file;
795     }
796 
797     if (multifd_load_setup(errp) != 0) {
798         error_setg(errp, "Failed to setup multifd channels");
799         return;
800     }
801 
802     if (default_channel) {
803         f = qemu_file_new_input(ioc);
804 
805         if (!migration_incoming_setup(f, errp)) {
806             return;
807         }
808     } else {
809         /* Multiple connections */
810         assert(migration_needs_multiple_sockets());
811         if (migrate_use_multifd()) {
812             multifd_recv_new_channel(ioc, &local_err);
813         } else {
814             assert(migrate_postcopy_preempt());
815             f = qemu_file_new_input(ioc);
816             postcopy_preempt_new_channel(mis, f);
817         }
818         if (local_err) {
819             error_propagate(errp, local_err);
820             return;
821         }
822     }
823 
824     if (migration_should_start_incoming(default_channel)) {
825         /* If it's a recovery, we're done */
826         if (postcopy_try_recover()) {
827             return;
828         }
829         migration_incoming_process();
830     }
831 }
832 
833 /**
834  * @migration_has_all_channels: We have received all channels that we need
835  *
836  * Returns true when we have got connections to all the channels that
837  * we need for migration.
838  */
839 bool migration_has_all_channels(void)
840 {
841     MigrationIncomingState *mis = migration_incoming_get_current();
842 
843     if (!mis->from_src_file) {
844         return false;
845     }
846 
847     if (migrate_use_multifd()) {
848         return multifd_recv_all_channels_created();
849     }
850 
851     if (migrate_postcopy_preempt()) {
852         return mis->postcopy_qemufile_dst != NULL;
853     }
854 
855     return true;
856 }
857 
858 /*
859  * Send a 'SHUT' message on the return channel with the given value
860  * to indicate that we've finished with the RP.  Non-0 value indicates
861  * error.
862  */
863 void migrate_send_rp_shut(MigrationIncomingState *mis,
864                           uint32_t value)
865 {
866     uint32_t buf;
867 
868     buf = cpu_to_be32(value);
869     migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf);
870 }
871 
872 /*
873  * Send a 'PONG' message on the return channel with the given value
874  * (normally in response to a 'PING')
875  */
876 void migrate_send_rp_pong(MigrationIncomingState *mis,
877                           uint32_t value)
878 {
879     uint32_t buf;
880 
881     buf = cpu_to_be32(value);
882     migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf);
883 }
884 
885 void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis,
886                                  char *block_name)
887 {
888     char buf[512];
889     int len;
890     int64_t res;
891 
892     /*
893      * First, we send the header part. It contains only the len of
894      * idstr, and the idstr itself.
895      */
896     len = strlen(block_name);
897     buf[0] = len;
898     memcpy(buf + 1, block_name, len);
899 
900     if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
901         error_report("%s: MSG_RP_RECV_BITMAP only used for recovery",
902                      __func__);
903         return;
904     }
905 
906     migrate_send_rp_message(mis, MIG_RP_MSG_RECV_BITMAP, len + 1, buf);
907 
908     /*
909      * Next, we dump the received bitmap to the stream.
910      *
911      * TODO: currently we are safe since we are the only one that is
912      * using the to_src_file handle (fault thread is still paused),
913      * and it's ok even not taking the mutex. However the best way is
914      * to take the lock before sending the message header, and release
915      * the lock after sending the bitmap.
916      */
917     qemu_mutex_lock(&mis->rp_mutex);
918     res = ramblock_recv_bitmap_send(mis->to_src_file, block_name);
919     qemu_mutex_unlock(&mis->rp_mutex);
920 
921     trace_migrate_send_rp_recv_bitmap(block_name, res);
922 }
923 
924 void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value)
925 {
926     uint32_t buf;
927 
928     buf = cpu_to_be32(value);
929     migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf);
930 }
931 
932 MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp)
933 {
934     MigrationCapabilityStatusList *head = NULL, **tail = &head;
935     MigrationCapabilityStatus *caps;
936     MigrationState *s = migrate_get_current();
937     int i;
938 
939     for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
940 #ifndef CONFIG_LIVE_BLOCK_MIGRATION
941         if (i == MIGRATION_CAPABILITY_BLOCK) {
942             continue;
943         }
944 #endif
945         caps = g_malloc0(sizeof(*caps));
946         caps->capability = i;
947         caps->state = s->enabled_capabilities[i];
948         QAPI_LIST_APPEND(tail, caps);
949     }
950 
951     return head;
952 }
953 
954 MigrationParameters *qmp_query_migrate_parameters(Error **errp)
955 {
956     MigrationParameters *params;
957     MigrationState *s = migrate_get_current();
958 
959     /* TODO use QAPI_CLONE() instead of duplicating it inline */
960     params = g_malloc0(sizeof(*params));
961     params->has_compress_level = true;
962     params->compress_level = s->parameters.compress_level;
963     params->has_compress_threads = true;
964     params->compress_threads = s->parameters.compress_threads;
965     params->has_compress_wait_thread = true;
966     params->compress_wait_thread = s->parameters.compress_wait_thread;
967     params->has_decompress_threads = true;
968     params->decompress_threads = s->parameters.decompress_threads;
969     params->has_throttle_trigger_threshold = true;
970     params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold;
971     params->has_cpu_throttle_initial = true;
972     params->cpu_throttle_initial = s->parameters.cpu_throttle_initial;
973     params->has_cpu_throttle_increment = true;
974     params->cpu_throttle_increment = s->parameters.cpu_throttle_increment;
975     params->has_cpu_throttle_tailslow = true;
976     params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow;
977     params->tls_creds = g_strdup(s->parameters.tls_creds);
978     params->tls_hostname = g_strdup(s->parameters.tls_hostname);
979     params->tls_authz = g_strdup(s->parameters.tls_authz ?
980                                  s->parameters.tls_authz : "");
981     params->has_max_bandwidth = true;
982     params->max_bandwidth = s->parameters.max_bandwidth;
983     params->has_downtime_limit = true;
984     params->downtime_limit = s->parameters.downtime_limit;
985     params->has_x_checkpoint_delay = true;
986     params->x_checkpoint_delay = s->parameters.x_checkpoint_delay;
987     params->has_block_incremental = true;
988     params->block_incremental = s->parameters.block_incremental;
989     params->has_multifd_channels = true;
990     params->multifd_channels = s->parameters.multifd_channels;
991     params->has_multifd_compression = true;
992     params->multifd_compression = s->parameters.multifd_compression;
993     params->has_multifd_zlib_level = true;
994     params->multifd_zlib_level = s->parameters.multifd_zlib_level;
995     params->has_multifd_zstd_level = true;
996     params->multifd_zstd_level = s->parameters.multifd_zstd_level;
997     params->has_xbzrle_cache_size = true;
998     params->xbzrle_cache_size = s->parameters.xbzrle_cache_size;
999     params->has_max_postcopy_bandwidth = true;
1000     params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth;
1001     params->has_max_cpu_throttle = true;
1002     params->max_cpu_throttle = s->parameters.max_cpu_throttle;
1003     params->has_announce_initial = true;
1004     params->announce_initial = s->parameters.announce_initial;
1005     params->has_announce_max = true;
1006     params->announce_max = s->parameters.announce_max;
1007     params->has_announce_rounds = true;
1008     params->announce_rounds = s->parameters.announce_rounds;
1009     params->has_announce_step = true;
1010     params->announce_step = s->parameters.announce_step;
1011 
1012     if (s->parameters.has_block_bitmap_mapping) {
1013         params->has_block_bitmap_mapping = true;
1014         params->block_bitmap_mapping =
1015             QAPI_CLONE(BitmapMigrationNodeAliasList,
1016                        s->parameters.block_bitmap_mapping);
1017     }
1018 
1019     return params;
1020 }
1021 
1022 void qmp_client_migrate_info(const char *protocol, const char *hostname,
1023                              bool has_port, int64_t port,
1024                              bool has_tls_port, int64_t tls_port,
1025                              const char *cert_subject,
1026                              Error **errp)
1027 {
1028     if (strcmp(protocol, "spice") == 0) {
1029         if (!qemu_using_spice(errp)) {
1030             return;
1031         }
1032 
1033         if (!has_port && !has_tls_port) {
1034             error_setg(errp, QERR_MISSING_PARAMETER, "port/tls-port");
1035             return;
1036         }
1037 
1038         if (qemu_spice.migrate_info(hostname,
1039                                     has_port ? port : -1,
1040                                     has_tls_port ? tls_port : -1,
1041                                     cert_subject)) {
1042             error_setg(errp, "Could not set up display for migration");
1043             return;
1044         }
1045         return;
1046     }
1047 
1048     error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "protocol", "'spice'");
1049 }
1050 
1051 AnnounceParameters *migrate_announce_params(void)
1052 {
1053     static AnnounceParameters ap;
1054 
1055     MigrationState *s = migrate_get_current();
1056 
1057     ap.initial = s->parameters.announce_initial;
1058     ap.max = s->parameters.announce_max;
1059     ap.rounds = s->parameters.announce_rounds;
1060     ap.step = s->parameters.announce_step;
1061 
1062     return &ap;
1063 }
1064 
1065 /*
1066  * Return true if we're already in the middle of a migration
1067  * (i.e. any of the active or setup states)
1068  */
1069 bool migration_is_setup_or_active(int state)
1070 {
1071     switch (state) {
1072     case MIGRATION_STATUS_ACTIVE:
1073     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1074     case MIGRATION_STATUS_POSTCOPY_PAUSED:
1075     case MIGRATION_STATUS_POSTCOPY_RECOVER:
1076     case MIGRATION_STATUS_SETUP:
1077     case MIGRATION_STATUS_PRE_SWITCHOVER:
1078     case MIGRATION_STATUS_DEVICE:
1079     case MIGRATION_STATUS_WAIT_UNPLUG:
1080     case MIGRATION_STATUS_COLO:
1081         return true;
1082 
1083     default:
1084         return false;
1085 
1086     }
1087 }
1088 
1089 bool migration_is_running(int state)
1090 {
1091     switch (state) {
1092     case MIGRATION_STATUS_ACTIVE:
1093     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1094     case MIGRATION_STATUS_POSTCOPY_PAUSED:
1095     case MIGRATION_STATUS_POSTCOPY_RECOVER:
1096     case MIGRATION_STATUS_SETUP:
1097     case MIGRATION_STATUS_PRE_SWITCHOVER:
1098     case MIGRATION_STATUS_DEVICE:
1099     case MIGRATION_STATUS_WAIT_UNPLUG:
1100     case MIGRATION_STATUS_CANCELLING:
1101         return true;
1102 
1103     default:
1104         return false;
1105 
1106     }
1107 }
1108 
1109 static bool migrate_show_downtime(MigrationState *s)
1110 {
1111     return (s->state == MIGRATION_STATUS_COMPLETED) || migration_in_postcopy();
1112 }
1113 
1114 static void populate_time_info(MigrationInfo *info, MigrationState *s)
1115 {
1116     info->has_status = true;
1117     info->has_setup_time = true;
1118     info->setup_time = s->setup_time;
1119 
1120     if (s->state == MIGRATION_STATUS_COMPLETED) {
1121         info->has_total_time = true;
1122         info->total_time = s->total_time;
1123     } else {
1124         info->has_total_time = true;
1125         info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
1126                            s->start_time;
1127     }
1128 
1129     if (migrate_show_downtime(s)) {
1130         info->has_downtime = true;
1131         info->downtime = s->downtime;
1132     } else {
1133         info->has_expected_downtime = true;
1134         info->expected_downtime = s->expected_downtime;
1135     }
1136 }
1137 
1138 static void populate_ram_info(MigrationInfo *info, MigrationState *s)
1139 {
1140     size_t page_size = qemu_target_page_size();
1141 
1142     info->ram = g_malloc0(sizeof(*info->ram));
1143     info->ram->transferred = stat64_get(&ram_atomic_counters.transferred);
1144     info->ram->total = ram_bytes_total();
1145     info->ram->duplicate = stat64_get(&ram_atomic_counters.duplicate);
1146     /* legacy value.  It is not used anymore */
1147     info->ram->skipped = 0;
1148     info->ram->normal = stat64_get(&ram_atomic_counters.normal);
1149     info->ram->normal_bytes = info->ram->normal * page_size;
1150     info->ram->mbps = s->mbps;
1151     info->ram->dirty_sync_count = ram_counters.dirty_sync_count;
1152     info->ram->dirty_sync_missed_zero_copy =
1153             ram_counters.dirty_sync_missed_zero_copy;
1154     info->ram->postcopy_requests = ram_counters.postcopy_requests;
1155     info->ram->page_size = page_size;
1156     info->ram->multifd_bytes = ram_counters.multifd_bytes;
1157     info->ram->pages_per_second = s->pages_per_second;
1158     info->ram->precopy_bytes = ram_counters.precopy_bytes;
1159     info->ram->downtime_bytes = ram_counters.downtime_bytes;
1160     info->ram->postcopy_bytes = stat64_get(&ram_atomic_counters.postcopy_bytes);
1161 
1162     if (migrate_use_xbzrle()) {
1163         info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache));
1164         info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size();
1165         info->xbzrle_cache->bytes = xbzrle_counters.bytes;
1166         info->xbzrle_cache->pages = xbzrle_counters.pages;
1167         info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss;
1168         info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate;
1169         info->xbzrle_cache->encoding_rate = xbzrle_counters.encoding_rate;
1170         info->xbzrle_cache->overflow = xbzrle_counters.overflow;
1171     }
1172 
1173     if (migrate_use_compression()) {
1174         info->compression = g_malloc0(sizeof(*info->compression));
1175         info->compression->pages = compression_counters.pages;
1176         info->compression->busy = compression_counters.busy;
1177         info->compression->busy_rate = compression_counters.busy_rate;
1178         info->compression->compressed_size =
1179                                     compression_counters.compressed_size;
1180         info->compression->compression_rate =
1181                                     compression_counters.compression_rate;
1182     }
1183 
1184     if (cpu_throttle_active()) {
1185         info->has_cpu_throttle_percentage = true;
1186         info->cpu_throttle_percentage = cpu_throttle_get_percentage();
1187     }
1188 
1189     if (s->state != MIGRATION_STATUS_COMPLETED) {
1190         info->ram->remaining = ram_bytes_remaining();
1191         info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate;
1192     }
1193 }
1194 
1195 static void populate_disk_info(MigrationInfo *info)
1196 {
1197     if (blk_mig_active()) {
1198         info->disk = g_malloc0(sizeof(*info->disk));
1199         info->disk->transferred = blk_mig_bytes_transferred();
1200         info->disk->remaining = blk_mig_bytes_remaining();
1201         info->disk->total = blk_mig_bytes_total();
1202     }
1203 }
1204 
1205 static void fill_source_migration_info(MigrationInfo *info)
1206 {
1207     MigrationState *s = migrate_get_current();
1208     int state = qatomic_read(&s->state);
1209     GSList *cur_blocker = migration_blockers;
1210 
1211     info->blocked_reasons = NULL;
1212 
1213     /*
1214      * There are two types of reasons a migration might be blocked;
1215      * a) devices marked in VMState as non-migratable, and
1216      * b) Explicit migration blockers
1217      * We need to add both of them here.
1218      */
1219     qemu_savevm_non_migratable_list(&info->blocked_reasons);
1220 
1221     while (cur_blocker) {
1222         QAPI_LIST_PREPEND(info->blocked_reasons,
1223                           g_strdup(error_get_pretty(cur_blocker->data)));
1224         cur_blocker = g_slist_next(cur_blocker);
1225     }
1226     info->has_blocked_reasons = info->blocked_reasons != NULL;
1227 
1228     switch (state) {
1229     case MIGRATION_STATUS_NONE:
1230         /* no migration has happened ever */
1231         /* do not overwrite destination migration status */
1232         return;
1233     case MIGRATION_STATUS_SETUP:
1234         info->has_status = true;
1235         info->has_total_time = false;
1236         break;
1237     case MIGRATION_STATUS_ACTIVE:
1238     case MIGRATION_STATUS_CANCELLING:
1239     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1240     case MIGRATION_STATUS_PRE_SWITCHOVER:
1241     case MIGRATION_STATUS_DEVICE:
1242     case MIGRATION_STATUS_POSTCOPY_PAUSED:
1243     case MIGRATION_STATUS_POSTCOPY_RECOVER:
1244         /* TODO add some postcopy stats */
1245         populate_time_info(info, s);
1246         populate_ram_info(info, s);
1247         populate_disk_info(info);
1248         populate_vfio_info(info);
1249         break;
1250     case MIGRATION_STATUS_COLO:
1251         info->has_status = true;
1252         /* TODO: display COLO specific information (checkpoint info etc.) */
1253         break;
1254     case MIGRATION_STATUS_COMPLETED:
1255         populate_time_info(info, s);
1256         populate_ram_info(info, s);
1257         populate_vfio_info(info);
1258         break;
1259     case MIGRATION_STATUS_FAILED:
1260         info->has_status = true;
1261         if (s->error) {
1262             info->error_desc = g_strdup(error_get_pretty(s->error));
1263         }
1264         break;
1265     case MIGRATION_STATUS_CANCELLED:
1266         info->has_status = true;
1267         break;
1268     case MIGRATION_STATUS_WAIT_UNPLUG:
1269         info->has_status = true;
1270         break;
1271     }
1272     info->status = state;
1273 }
1274 
1275 typedef enum WriteTrackingSupport {
1276     WT_SUPPORT_UNKNOWN = 0,
1277     WT_SUPPORT_ABSENT,
1278     WT_SUPPORT_AVAILABLE,
1279     WT_SUPPORT_COMPATIBLE
1280 } WriteTrackingSupport;
1281 
1282 static
1283 WriteTrackingSupport migrate_query_write_tracking(void)
1284 {
1285     /* Check if kernel supports required UFFD features */
1286     if (!ram_write_tracking_available()) {
1287         return WT_SUPPORT_ABSENT;
1288     }
1289     /*
1290      * Check if current memory configuration is
1291      * compatible with required UFFD features.
1292      */
1293     if (!ram_write_tracking_compatible()) {
1294         return WT_SUPPORT_AVAILABLE;
1295     }
1296 
1297     return WT_SUPPORT_COMPATIBLE;
1298 }
1299 
1300 /**
1301  * @migration_caps_check - check capability validity
1302  *
1303  * @cap_list: old capability list, array of bool
1304  * @params: new capabilities to be applied soon
1305  * @errp: set *errp if the check failed, with reason
1306  *
1307  * Returns true if check passed, otherwise false.
1308  */
1309 static bool migrate_caps_check(bool *cap_list,
1310                                MigrationCapabilityStatusList *params,
1311                                Error **errp)
1312 {
1313     MigrationCapabilityStatusList *cap;
1314     bool old_postcopy_cap;
1315     MigrationIncomingState *mis = migration_incoming_get_current();
1316 
1317     old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM];
1318 
1319     for (cap = params; cap; cap = cap->next) {
1320         cap_list[cap->value->capability] = cap->value->state;
1321     }
1322 
1323 #ifndef CONFIG_LIVE_BLOCK_MIGRATION
1324     if (cap_list[MIGRATION_CAPABILITY_BLOCK]) {
1325         error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) "
1326                    "block migration");
1327         error_append_hint(errp, "Use drive_mirror+NBD instead.\n");
1328         return false;
1329     }
1330 #endif
1331 
1332 #ifndef CONFIG_REPLICATION
1333     if (cap_list[MIGRATION_CAPABILITY_X_COLO]) {
1334         error_setg(errp, "QEMU compiled without replication module"
1335                    " can't enable COLO");
1336         error_append_hint(errp, "Please enable replication before COLO.\n");
1337         return false;
1338     }
1339 #endif
1340 
1341     if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
1342         /* This check is reasonably expensive, so only when it's being
1343          * set the first time, also it's only the destination that needs
1344          * special support.
1345          */
1346         if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) &&
1347             !postcopy_ram_supported_by_host(mis)) {
1348             /* postcopy_ram_supported_by_host will have emitted a more
1349              * detailed message
1350              */
1351             error_setg(errp, "Postcopy is not supported");
1352             return false;
1353         }
1354 
1355         if (cap_list[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) {
1356             error_setg(errp, "Postcopy is not compatible with ignore-shared");
1357             return false;
1358         }
1359     }
1360 
1361     if (cap_list[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) {
1362         WriteTrackingSupport wt_support;
1363         int idx;
1364         /*
1365          * Check if 'background-snapshot' capability is supported by
1366          * host kernel and compatible with guest memory configuration.
1367          */
1368         wt_support = migrate_query_write_tracking();
1369         if (wt_support < WT_SUPPORT_AVAILABLE) {
1370             error_setg(errp, "Background-snapshot is not supported by host kernel");
1371             return false;
1372         }
1373         if (wt_support < WT_SUPPORT_COMPATIBLE) {
1374             error_setg(errp, "Background-snapshot is not compatible "
1375                     "with guest memory configuration");
1376             return false;
1377         }
1378 
1379         /*
1380          * Check if there are any migration capabilities
1381          * incompatible with 'background-snapshot'.
1382          */
1383         for (idx = 0; idx < check_caps_background_snapshot.size; idx++) {
1384             int incomp_cap = check_caps_background_snapshot.caps[idx];
1385             if (cap_list[incomp_cap]) {
1386                 error_setg(errp,
1387                         "Background-snapshot is not compatible with %s",
1388                         MigrationCapability_str(incomp_cap));
1389                 return false;
1390             }
1391         }
1392     }
1393 
1394 #ifdef CONFIG_LINUX
1395     if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] &&
1396         (!cap_list[MIGRATION_CAPABILITY_MULTIFD] ||
1397          cap_list[MIGRATION_CAPABILITY_COMPRESS] ||
1398          cap_list[MIGRATION_CAPABILITY_XBZRLE] ||
1399          migrate_multifd_compression() ||
1400          migrate_use_tls())) {
1401         error_setg(errp,
1402                    "Zero copy only available for non-compressed non-TLS multifd migration");
1403         return false;
1404     }
1405 #else
1406     if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) {
1407         error_setg(errp,
1408                    "Zero copy currently only available on Linux");
1409         return false;
1410     }
1411 #endif
1412 
1413     if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) {
1414         if (!cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
1415             error_setg(errp, "Postcopy preempt requires postcopy-ram");
1416             return false;
1417         }
1418 
1419         /*
1420          * Preempt mode requires urgent pages to be sent in separate
1421          * channel, OTOH compression logic will disorder all pages into
1422          * different compression channels, which is not compatible with the
1423          * preempt assumptions on channel assignments.
1424          */
1425         if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) {
1426             error_setg(errp, "Postcopy preempt not compatible with compress");
1427             return false;
1428         }
1429     }
1430 
1431     if (cap_list[MIGRATION_CAPABILITY_MULTIFD]) {
1432         if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) {
1433             error_setg(errp, "Multifd is not compatible with compress");
1434             return false;
1435         }
1436     }
1437 
1438     return true;
1439 }
1440 
1441 static void fill_destination_migration_info(MigrationInfo *info)
1442 {
1443     MigrationIncomingState *mis = migration_incoming_get_current();
1444 
1445     if (mis->socket_address_list) {
1446         info->has_socket_address = true;
1447         info->socket_address =
1448             QAPI_CLONE(SocketAddressList, mis->socket_address_list);
1449     }
1450 
1451     switch (mis->state) {
1452     case MIGRATION_STATUS_NONE:
1453         return;
1454     case MIGRATION_STATUS_SETUP:
1455     case MIGRATION_STATUS_CANCELLING:
1456     case MIGRATION_STATUS_CANCELLED:
1457     case MIGRATION_STATUS_ACTIVE:
1458     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1459     case MIGRATION_STATUS_POSTCOPY_PAUSED:
1460     case MIGRATION_STATUS_POSTCOPY_RECOVER:
1461     case MIGRATION_STATUS_FAILED:
1462     case MIGRATION_STATUS_COLO:
1463         info->has_status = true;
1464         break;
1465     case MIGRATION_STATUS_COMPLETED:
1466         info->has_status = true;
1467         fill_destination_postcopy_migration_info(info);
1468         break;
1469     }
1470     info->status = mis->state;
1471 }
1472 
1473 MigrationInfo *qmp_query_migrate(Error **errp)
1474 {
1475     MigrationInfo *info = g_malloc0(sizeof(*info));
1476 
1477     fill_destination_migration_info(info);
1478     fill_source_migration_info(info);
1479 
1480     return info;
1481 }
1482 
1483 void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
1484                                   Error **errp)
1485 {
1486     MigrationState *s = migrate_get_current();
1487     MigrationCapabilityStatusList *cap;
1488     bool cap_list[MIGRATION_CAPABILITY__MAX];
1489 
1490     if (migration_is_running(s->state)) {
1491         error_setg(errp, QERR_MIGRATION_ACTIVE);
1492         return;
1493     }
1494 
1495     memcpy(cap_list, s->enabled_capabilities, sizeof(cap_list));
1496     if (!migrate_caps_check(cap_list, params, errp)) {
1497         return;
1498     }
1499 
1500     for (cap = params; cap; cap = cap->next) {
1501         s->enabled_capabilities[cap->value->capability] = cap->value->state;
1502     }
1503 }
1504 
1505 /*
1506  * Check whether the parameters are valid. Error will be put into errp
1507  * (if provided). Return true if valid, otherwise false.
1508  */
1509 static bool migrate_params_check(MigrationParameters *params, Error **errp)
1510 {
1511     if (params->has_compress_level &&
1512         (params->compress_level > 9)) {
1513         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level",
1514                    "a value between 0 and 9");
1515         return false;
1516     }
1517 
1518     if (params->has_compress_threads && (params->compress_threads < 1)) {
1519         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1520                    "compress_threads",
1521                    "a value between 1 and 255");
1522         return false;
1523     }
1524 
1525     if (params->has_decompress_threads && (params->decompress_threads < 1)) {
1526         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1527                    "decompress_threads",
1528                    "a value between 1 and 255");
1529         return false;
1530     }
1531 
1532     if (params->has_throttle_trigger_threshold &&
1533         (params->throttle_trigger_threshold < 1 ||
1534          params->throttle_trigger_threshold > 100)) {
1535         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1536                    "throttle_trigger_threshold",
1537                    "an integer in the range of 1 to 100");
1538         return false;
1539     }
1540 
1541     if (params->has_cpu_throttle_initial &&
1542         (params->cpu_throttle_initial < 1 ||
1543          params->cpu_throttle_initial > 99)) {
1544         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1545                    "cpu_throttle_initial",
1546                    "an integer in the range of 1 to 99");
1547         return false;
1548     }
1549 
1550     if (params->has_cpu_throttle_increment &&
1551         (params->cpu_throttle_increment < 1 ||
1552          params->cpu_throttle_increment > 99)) {
1553         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1554                    "cpu_throttle_increment",
1555                    "an integer in the range of 1 to 99");
1556         return false;
1557     }
1558 
1559     if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) {
1560         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1561                    "max_bandwidth",
1562                    "an integer in the range of 0 to "stringify(SIZE_MAX)
1563                    " bytes/second");
1564         return false;
1565     }
1566 
1567     if (params->has_downtime_limit &&
1568         (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) {
1569         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1570                    "downtime_limit",
1571                    "an integer in the range of 0 to "
1572                     stringify(MAX_MIGRATE_DOWNTIME)" ms");
1573         return false;
1574     }
1575 
1576     /* x_checkpoint_delay is now always positive */
1577 
1578     if (params->has_multifd_channels && (params->multifd_channels < 1)) {
1579         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1580                    "multifd_channels",
1581                    "a value between 1 and 255");
1582         return false;
1583     }
1584 
1585     if (params->has_multifd_zlib_level &&
1586         (params->multifd_zlib_level > 9)) {
1587         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level",
1588                    "a value between 0 and 9");
1589         return false;
1590     }
1591 
1592     if (params->has_multifd_zstd_level &&
1593         (params->multifd_zstd_level > 20)) {
1594         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level",
1595                    "a value between 0 and 20");
1596         return false;
1597     }
1598 
1599     if (params->has_xbzrle_cache_size &&
1600         (params->xbzrle_cache_size < qemu_target_page_size() ||
1601          !is_power_of_2(params->xbzrle_cache_size))) {
1602         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1603                    "xbzrle_cache_size",
1604                    "a power of two no less than the target page size");
1605         return false;
1606     }
1607 
1608     if (params->has_max_cpu_throttle &&
1609         (params->max_cpu_throttle < params->cpu_throttle_initial ||
1610          params->max_cpu_throttle > 99)) {
1611         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1612                    "max_cpu_throttle",
1613                    "an integer in the range of cpu_throttle_initial to 99");
1614         return false;
1615     }
1616 
1617     if (params->has_announce_initial &&
1618         params->announce_initial > 100000) {
1619         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1620                    "announce_initial",
1621                    "a value between 0 and 100000");
1622         return false;
1623     }
1624     if (params->has_announce_max &&
1625         params->announce_max > 100000) {
1626         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1627                    "announce_max",
1628                    "a value between 0 and 100000");
1629        return false;
1630     }
1631     if (params->has_announce_rounds &&
1632         params->announce_rounds > 1000) {
1633         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1634                    "announce_rounds",
1635                    "a value between 0 and 1000");
1636        return false;
1637     }
1638     if (params->has_announce_step &&
1639         (params->announce_step < 1 ||
1640         params->announce_step > 10000)) {
1641         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1642                    "announce_step",
1643                    "a value between 0 and 10000");
1644        return false;
1645     }
1646 
1647     if (params->has_block_bitmap_mapping &&
1648         !check_dirty_bitmap_mig_alias_map(params->block_bitmap_mapping, errp)) {
1649         error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: ");
1650         return false;
1651     }
1652 
1653 #ifdef CONFIG_LINUX
1654     if (migrate_use_zero_copy_send() &&
1655         ((params->has_multifd_compression && params->multifd_compression) ||
1656          (params->tls_creds && *params->tls_creds))) {
1657         error_setg(errp,
1658                    "Zero copy only available for non-compressed non-TLS multifd migration");
1659         return false;
1660     }
1661 #endif
1662 
1663     return true;
1664 }
1665 
1666 static void migrate_params_test_apply(MigrateSetParameters *params,
1667                                       MigrationParameters *dest)
1668 {
1669     *dest = migrate_get_current()->parameters;
1670 
1671     /* TODO use QAPI_CLONE() instead of duplicating it inline */
1672 
1673     if (params->has_compress_level) {
1674         dest->compress_level = params->compress_level;
1675     }
1676 
1677     if (params->has_compress_threads) {
1678         dest->compress_threads = params->compress_threads;
1679     }
1680 
1681     if (params->has_compress_wait_thread) {
1682         dest->compress_wait_thread = params->compress_wait_thread;
1683     }
1684 
1685     if (params->has_decompress_threads) {
1686         dest->decompress_threads = params->decompress_threads;
1687     }
1688 
1689     if (params->has_throttle_trigger_threshold) {
1690         dest->throttle_trigger_threshold = params->throttle_trigger_threshold;
1691     }
1692 
1693     if (params->has_cpu_throttle_initial) {
1694         dest->cpu_throttle_initial = params->cpu_throttle_initial;
1695     }
1696 
1697     if (params->has_cpu_throttle_increment) {
1698         dest->cpu_throttle_increment = params->cpu_throttle_increment;
1699     }
1700 
1701     if (params->has_cpu_throttle_tailslow) {
1702         dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow;
1703     }
1704 
1705     if (params->tls_creds) {
1706         assert(params->tls_creds->type == QTYPE_QSTRING);
1707         dest->tls_creds = params->tls_creds->u.s;
1708     }
1709 
1710     if (params->tls_hostname) {
1711         assert(params->tls_hostname->type == QTYPE_QSTRING);
1712         dest->tls_hostname = params->tls_hostname->u.s;
1713     }
1714 
1715     if (params->has_max_bandwidth) {
1716         dest->max_bandwidth = params->max_bandwidth;
1717     }
1718 
1719     if (params->has_downtime_limit) {
1720         dest->downtime_limit = params->downtime_limit;
1721     }
1722 
1723     if (params->has_x_checkpoint_delay) {
1724         dest->x_checkpoint_delay = params->x_checkpoint_delay;
1725     }
1726 
1727     if (params->has_block_incremental) {
1728         dest->block_incremental = params->block_incremental;
1729     }
1730     if (params->has_multifd_channels) {
1731         dest->multifd_channels = params->multifd_channels;
1732     }
1733     if (params->has_multifd_compression) {
1734         dest->multifd_compression = params->multifd_compression;
1735     }
1736     if (params->has_xbzrle_cache_size) {
1737         dest->xbzrle_cache_size = params->xbzrle_cache_size;
1738     }
1739     if (params->has_max_postcopy_bandwidth) {
1740         dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth;
1741     }
1742     if (params->has_max_cpu_throttle) {
1743         dest->max_cpu_throttle = params->max_cpu_throttle;
1744     }
1745     if (params->has_announce_initial) {
1746         dest->announce_initial = params->announce_initial;
1747     }
1748     if (params->has_announce_max) {
1749         dest->announce_max = params->announce_max;
1750     }
1751     if (params->has_announce_rounds) {
1752         dest->announce_rounds = params->announce_rounds;
1753     }
1754     if (params->has_announce_step) {
1755         dest->announce_step = params->announce_step;
1756     }
1757 
1758     if (params->has_block_bitmap_mapping) {
1759         dest->has_block_bitmap_mapping = true;
1760         dest->block_bitmap_mapping = params->block_bitmap_mapping;
1761     }
1762 }
1763 
1764 static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
1765 {
1766     MigrationState *s = migrate_get_current();
1767 
1768     /* TODO use QAPI_CLONE() instead of duplicating it inline */
1769 
1770     if (params->has_compress_level) {
1771         s->parameters.compress_level = params->compress_level;
1772     }
1773 
1774     if (params->has_compress_threads) {
1775         s->parameters.compress_threads = params->compress_threads;
1776     }
1777 
1778     if (params->has_compress_wait_thread) {
1779         s->parameters.compress_wait_thread = params->compress_wait_thread;
1780     }
1781 
1782     if (params->has_decompress_threads) {
1783         s->parameters.decompress_threads = params->decompress_threads;
1784     }
1785 
1786     if (params->has_throttle_trigger_threshold) {
1787         s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold;
1788     }
1789 
1790     if (params->has_cpu_throttle_initial) {
1791         s->parameters.cpu_throttle_initial = params->cpu_throttle_initial;
1792     }
1793 
1794     if (params->has_cpu_throttle_increment) {
1795         s->parameters.cpu_throttle_increment = params->cpu_throttle_increment;
1796     }
1797 
1798     if (params->has_cpu_throttle_tailslow) {
1799         s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow;
1800     }
1801 
1802     if (params->tls_creds) {
1803         g_free(s->parameters.tls_creds);
1804         assert(params->tls_creds->type == QTYPE_QSTRING);
1805         s->parameters.tls_creds = g_strdup(params->tls_creds->u.s);
1806     }
1807 
1808     if (params->tls_hostname) {
1809         g_free(s->parameters.tls_hostname);
1810         assert(params->tls_hostname->type == QTYPE_QSTRING);
1811         s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s);
1812     }
1813 
1814     if (params->tls_authz) {
1815         g_free(s->parameters.tls_authz);
1816         assert(params->tls_authz->type == QTYPE_QSTRING);
1817         s->parameters.tls_authz = g_strdup(params->tls_authz->u.s);
1818     }
1819 
1820     if (params->has_max_bandwidth) {
1821         s->parameters.max_bandwidth = params->max_bandwidth;
1822         if (s->to_dst_file && !migration_in_postcopy()) {
1823             qemu_file_set_rate_limit(s->to_dst_file,
1824                                 s->parameters.max_bandwidth / XFER_LIMIT_RATIO);
1825         }
1826     }
1827 
1828     if (params->has_downtime_limit) {
1829         s->parameters.downtime_limit = params->downtime_limit;
1830     }
1831 
1832     if (params->has_x_checkpoint_delay) {
1833         s->parameters.x_checkpoint_delay = params->x_checkpoint_delay;
1834         if (migration_in_colo_state()) {
1835             colo_checkpoint_notify(s);
1836         }
1837     }
1838 
1839     if (params->has_block_incremental) {
1840         s->parameters.block_incremental = params->block_incremental;
1841     }
1842     if (params->has_multifd_channels) {
1843         s->parameters.multifd_channels = params->multifd_channels;
1844     }
1845     if (params->has_multifd_compression) {
1846         s->parameters.multifd_compression = params->multifd_compression;
1847     }
1848     if (params->has_xbzrle_cache_size) {
1849         s->parameters.xbzrle_cache_size = params->xbzrle_cache_size;
1850         xbzrle_cache_resize(params->xbzrle_cache_size, errp);
1851     }
1852     if (params->has_max_postcopy_bandwidth) {
1853         s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth;
1854         if (s->to_dst_file && migration_in_postcopy()) {
1855             qemu_file_set_rate_limit(s->to_dst_file,
1856                     s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO);
1857         }
1858     }
1859     if (params->has_max_cpu_throttle) {
1860         s->parameters.max_cpu_throttle = params->max_cpu_throttle;
1861     }
1862     if (params->has_announce_initial) {
1863         s->parameters.announce_initial = params->announce_initial;
1864     }
1865     if (params->has_announce_max) {
1866         s->parameters.announce_max = params->announce_max;
1867     }
1868     if (params->has_announce_rounds) {
1869         s->parameters.announce_rounds = params->announce_rounds;
1870     }
1871     if (params->has_announce_step) {
1872         s->parameters.announce_step = params->announce_step;
1873     }
1874 
1875     if (params->has_block_bitmap_mapping) {
1876         qapi_free_BitmapMigrationNodeAliasList(
1877             s->parameters.block_bitmap_mapping);
1878 
1879         s->parameters.has_block_bitmap_mapping = true;
1880         s->parameters.block_bitmap_mapping =
1881             QAPI_CLONE(BitmapMigrationNodeAliasList,
1882                        params->block_bitmap_mapping);
1883     }
1884 }
1885 
1886 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
1887 {
1888     MigrationParameters tmp;
1889 
1890     /* TODO Rewrite "" to null instead */
1891     if (params->tls_creds
1892         && params->tls_creds->type == QTYPE_QNULL) {
1893         qobject_unref(params->tls_creds->u.n);
1894         params->tls_creds->type = QTYPE_QSTRING;
1895         params->tls_creds->u.s = strdup("");
1896     }
1897     /* TODO Rewrite "" to null instead */
1898     if (params->tls_hostname
1899         && params->tls_hostname->type == QTYPE_QNULL) {
1900         qobject_unref(params->tls_hostname->u.n);
1901         params->tls_hostname->type = QTYPE_QSTRING;
1902         params->tls_hostname->u.s = strdup("");
1903     }
1904 
1905     migrate_params_test_apply(params, &tmp);
1906 
1907     if (!migrate_params_check(&tmp, errp)) {
1908         /* Invalid parameter */
1909         return;
1910     }
1911 
1912     migrate_params_apply(params, errp);
1913 }
1914 
1915 
1916 void qmp_migrate_start_postcopy(Error **errp)
1917 {
1918     MigrationState *s = migrate_get_current();
1919 
1920     if (!migrate_postcopy()) {
1921         error_setg(errp, "Enable postcopy with migrate_set_capability before"
1922                          " the start of migration");
1923         return;
1924     }
1925 
1926     if (s->state == MIGRATION_STATUS_NONE) {
1927         error_setg(errp, "Postcopy must be started after migration has been"
1928                          " started");
1929         return;
1930     }
1931     /*
1932      * we don't error if migration has finished since that would be racy
1933      * with issuing this command.
1934      */
1935     qatomic_set(&s->start_postcopy, true);
1936 }
1937 
1938 /* shared migration helpers */
1939 
1940 void migrate_set_state(int *state, int old_state, int new_state)
1941 {
1942     assert(new_state < MIGRATION_STATUS__MAX);
1943     if (qatomic_cmpxchg(state, old_state, new_state) == old_state) {
1944         trace_migrate_set_state(MigrationStatus_str(new_state));
1945         migrate_generate_event(new_state);
1946     }
1947 }
1948 
1949 static MigrationCapabilityStatus *migrate_cap_add(MigrationCapability index,
1950                                                   bool state)
1951 {
1952     MigrationCapabilityStatus *cap;
1953 
1954     cap = g_new0(MigrationCapabilityStatus, 1);
1955     cap->capability = index;
1956     cap->state = state;
1957 
1958     return cap;
1959 }
1960 
1961 void migrate_set_block_enabled(bool value, Error **errp)
1962 {
1963     MigrationCapabilityStatusList *cap = NULL;
1964 
1965     QAPI_LIST_PREPEND(cap, migrate_cap_add(MIGRATION_CAPABILITY_BLOCK, value));
1966     qmp_migrate_set_capabilities(cap, errp);
1967     qapi_free_MigrationCapabilityStatusList(cap);
1968 }
1969 
1970 static void migrate_set_block_incremental(MigrationState *s, bool value)
1971 {
1972     s->parameters.block_incremental = value;
1973 }
1974 
1975 static void block_cleanup_parameters(MigrationState *s)
1976 {
1977     if (s->must_remove_block_options) {
1978         /* setting to false can never fail */
1979         migrate_set_block_enabled(false, &error_abort);
1980         migrate_set_block_incremental(s, false);
1981         s->must_remove_block_options = false;
1982     }
1983 }
1984 
1985 static void migrate_fd_cleanup(MigrationState *s)
1986 {
1987     qemu_bh_delete(s->cleanup_bh);
1988     s->cleanup_bh = NULL;
1989 
1990     g_free(s->hostname);
1991     s->hostname = NULL;
1992     json_writer_free(s->vmdesc);
1993     s->vmdesc = NULL;
1994 
1995     qemu_savevm_state_cleanup();
1996 
1997     if (s->to_dst_file) {
1998         QEMUFile *tmp;
1999 
2000         trace_migrate_fd_cleanup();
2001         qemu_mutex_unlock_iothread();
2002         if (s->migration_thread_running) {
2003             qemu_thread_join(&s->thread);
2004             s->migration_thread_running = false;
2005         }
2006         qemu_mutex_lock_iothread();
2007 
2008         multifd_save_cleanup();
2009         qemu_mutex_lock(&s->qemu_file_lock);
2010         tmp = s->to_dst_file;
2011         s->to_dst_file = NULL;
2012         qemu_mutex_unlock(&s->qemu_file_lock);
2013         /*
2014          * Close the file handle without the lock to make sure the
2015          * critical section won't block for long.
2016          */
2017         migration_ioc_unregister_yank_from_file(tmp);
2018         qemu_fclose(tmp);
2019     }
2020 
2021     if (s->postcopy_qemufile_src) {
2022         migration_ioc_unregister_yank_from_file(s->postcopy_qemufile_src);
2023         qemu_fclose(s->postcopy_qemufile_src);
2024         s->postcopy_qemufile_src = NULL;
2025     }
2026 
2027     assert(!migration_is_active(s));
2028 
2029     if (s->state == MIGRATION_STATUS_CANCELLING) {
2030         migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING,
2031                           MIGRATION_STATUS_CANCELLED);
2032     }
2033 
2034     if (s->error) {
2035         /* It is used on info migrate.  We can't free it */
2036         error_report_err(error_copy(s->error));
2037     }
2038     notifier_list_notify(&migration_state_notifiers, s);
2039     block_cleanup_parameters(s);
2040     yank_unregister_instance(MIGRATION_YANK_INSTANCE);
2041 }
2042 
2043 static void migrate_fd_cleanup_schedule(MigrationState *s)
2044 {
2045     /*
2046      * Ref the state for bh, because it may be called when
2047      * there're already no other refs
2048      */
2049     object_ref(OBJECT(s));
2050     qemu_bh_schedule(s->cleanup_bh);
2051 }
2052 
2053 static void migrate_fd_cleanup_bh(void *opaque)
2054 {
2055     MigrationState *s = opaque;
2056     migrate_fd_cleanup(s);
2057     object_unref(OBJECT(s));
2058 }
2059 
2060 void migrate_set_error(MigrationState *s, const Error *error)
2061 {
2062     QEMU_LOCK_GUARD(&s->error_mutex);
2063     if (!s->error) {
2064         s->error = error_copy(error);
2065     }
2066 }
2067 
2068 static void migrate_error_free(MigrationState *s)
2069 {
2070     QEMU_LOCK_GUARD(&s->error_mutex);
2071     if (s->error) {
2072         error_free(s->error);
2073         s->error = NULL;
2074     }
2075 }
2076 
2077 void migrate_fd_error(MigrationState *s, const Error *error)
2078 {
2079     trace_migrate_fd_error(error_get_pretty(error));
2080     assert(s->to_dst_file == NULL);
2081     migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
2082                       MIGRATION_STATUS_FAILED);
2083     migrate_set_error(s, error);
2084 }
2085 
2086 static void migrate_fd_cancel(MigrationState *s)
2087 {
2088     int old_state ;
2089     QEMUFile *f = migrate_get_current()->to_dst_file;
2090     trace_migrate_fd_cancel();
2091 
2092     WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) {
2093         if (s->rp_state.from_dst_file) {
2094             /* shutdown the rp socket, so causing the rp thread to shutdown */
2095             qemu_file_shutdown(s->rp_state.from_dst_file);
2096         }
2097     }
2098 
2099     do {
2100         old_state = s->state;
2101         if (!migration_is_running(old_state)) {
2102             break;
2103         }
2104         /* If the migration is paused, kick it out of the pause */
2105         if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) {
2106             qemu_sem_post(&s->pause_sem);
2107         }
2108         migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING);
2109     } while (s->state != MIGRATION_STATUS_CANCELLING);
2110 
2111     /*
2112      * If we're unlucky the migration code might be stuck somewhere in a
2113      * send/write while the network has failed and is waiting to timeout;
2114      * if we've got shutdown(2) available then we can force it to quit.
2115      * The outgoing qemu file gets closed in migrate_fd_cleanup that is
2116      * called in a bh, so there is no race against this cancel.
2117      */
2118     if (s->state == MIGRATION_STATUS_CANCELLING && f) {
2119         qemu_file_shutdown(f);
2120     }
2121     if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) {
2122         Error *local_err = NULL;
2123 
2124         bdrv_activate_all(&local_err);
2125         if (local_err) {
2126             error_report_err(local_err);
2127         } else {
2128             s->block_inactive = false;
2129         }
2130     }
2131 }
2132 
2133 void add_migration_state_change_notifier(Notifier *notify)
2134 {
2135     notifier_list_add(&migration_state_notifiers, notify);
2136 }
2137 
2138 void remove_migration_state_change_notifier(Notifier *notify)
2139 {
2140     notifier_remove(notify);
2141 }
2142 
2143 bool migration_in_setup(MigrationState *s)
2144 {
2145     return s->state == MIGRATION_STATUS_SETUP;
2146 }
2147 
2148 bool migration_has_finished(MigrationState *s)
2149 {
2150     return s->state == MIGRATION_STATUS_COMPLETED;
2151 }
2152 
2153 bool migration_has_failed(MigrationState *s)
2154 {
2155     return (s->state == MIGRATION_STATUS_CANCELLED ||
2156             s->state == MIGRATION_STATUS_FAILED);
2157 }
2158 
2159 bool migration_in_postcopy(void)
2160 {
2161     MigrationState *s = migrate_get_current();
2162 
2163     switch (s->state) {
2164     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
2165     case MIGRATION_STATUS_POSTCOPY_PAUSED:
2166     case MIGRATION_STATUS_POSTCOPY_RECOVER:
2167         return true;
2168     default:
2169         return false;
2170     }
2171 }
2172 
2173 bool migration_in_postcopy_after_devices(MigrationState *s)
2174 {
2175     return migration_in_postcopy() && s->postcopy_after_devices;
2176 }
2177 
2178 bool migration_in_incoming_postcopy(void)
2179 {
2180     PostcopyState ps = postcopy_state_get();
2181 
2182     return ps >= POSTCOPY_INCOMING_DISCARD && ps < POSTCOPY_INCOMING_END;
2183 }
2184 
2185 bool migration_incoming_postcopy_advised(void)
2186 {
2187     PostcopyState ps = postcopy_state_get();
2188 
2189     return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
2190 }
2191 
2192 bool migration_in_bg_snapshot(void)
2193 {
2194     MigrationState *s = migrate_get_current();
2195 
2196     return migrate_background_snapshot() &&
2197             migration_is_setup_or_active(s->state);
2198 }
2199 
2200 bool migration_is_idle(void)
2201 {
2202     MigrationState *s = current_migration;
2203 
2204     if (!s) {
2205         return true;
2206     }
2207 
2208     switch (s->state) {
2209     case MIGRATION_STATUS_NONE:
2210     case MIGRATION_STATUS_CANCELLED:
2211     case MIGRATION_STATUS_COMPLETED:
2212     case MIGRATION_STATUS_FAILED:
2213         return true;
2214     case MIGRATION_STATUS_SETUP:
2215     case MIGRATION_STATUS_CANCELLING:
2216     case MIGRATION_STATUS_ACTIVE:
2217     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
2218     case MIGRATION_STATUS_COLO:
2219     case MIGRATION_STATUS_PRE_SWITCHOVER:
2220     case MIGRATION_STATUS_DEVICE:
2221     case MIGRATION_STATUS_WAIT_UNPLUG:
2222         return false;
2223     case MIGRATION_STATUS__MAX:
2224         g_assert_not_reached();
2225     }
2226 
2227     return false;
2228 }
2229 
2230 bool migration_is_active(MigrationState *s)
2231 {
2232     return (s->state == MIGRATION_STATUS_ACTIVE ||
2233             s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
2234 }
2235 
2236 void migrate_init(MigrationState *s)
2237 {
2238     /*
2239      * Reinitialise all migration state, except
2240      * parameters/capabilities that the user set, and
2241      * locks.
2242      */
2243     s->cleanup_bh = 0;
2244     s->vm_start_bh = 0;
2245     s->to_dst_file = NULL;
2246     s->state = MIGRATION_STATUS_NONE;
2247     s->rp_state.from_dst_file = NULL;
2248     s->rp_state.error = false;
2249     s->mbps = 0.0;
2250     s->pages_per_second = 0.0;
2251     s->downtime = 0;
2252     s->expected_downtime = 0;
2253     s->setup_time = 0;
2254     s->start_postcopy = false;
2255     s->postcopy_after_devices = false;
2256     s->migration_thread_running = false;
2257     error_free(s->error);
2258     s->error = NULL;
2259     s->hostname = NULL;
2260 
2261     migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
2262 
2263     s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
2264     s->total_time = 0;
2265     s->vm_was_running = false;
2266     s->iteration_initial_bytes = 0;
2267     s->threshold_size = 0;
2268 }
2269 
2270 int migrate_add_blocker_internal(Error *reason, Error **errp)
2271 {
2272     /* Snapshots are similar to migrations, so check RUN_STATE_SAVE_VM too. */
2273     if (runstate_check(RUN_STATE_SAVE_VM) || !migration_is_idle()) {
2274         error_propagate_prepend(errp, error_copy(reason),
2275                                 "disallowing migration blocker "
2276                                 "(migration/snapshot in progress) for: ");
2277         return -EBUSY;
2278     }
2279 
2280     migration_blockers = g_slist_prepend(migration_blockers, reason);
2281     return 0;
2282 }
2283 
2284 int migrate_add_blocker(Error *reason, Error **errp)
2285 {
2286     if (only_migratable) {
2287         error_propagate_prepend(errp, error_copy(reason),
2288                                 "disallowing migration blocker "
2289                                 "(--only-migratable) for: ");
2290         return -EACCES;
2291     }
2292 
2293     return migrate_add_blocker_internal(reason, errp);
2294 }
2295 
2296 void migrate_del_blocker(Error *reason)
2297 {
2298     migration_blockers = g_slist_remove(migration_blockers, reason);
2299 }
2300 
2301 void qmp_migrate_incoming(const char *uri, Error **errp)
2302 {
2303     Error *local_err = NULL;
2304     static bool once = true;
2305 
2306     if (!once) {
2307         error_setg(errp, "The incoming migration has already been started");
2308         return;
2309     }
2310     if (!runstate_check(RUN_STATE_INMIGRATE)) {
2311         error_setg(errp, "'-incoming' was not specified on the command line");
2312         return;
2313     }
2314 
2315     if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
2316         return;
2317     }
2318 
2319     qemu_start_incoming_migration(uri, &local_err);
2320 
2321     if (local_err) {
2322         yank_unregister_instance(MIGRATION_YANK_INSTANCE);
2323         error_propagate(errp, local_err);
2324         return;
2325     }
2326 
2327     once = false;
2328 }
2329 
2330 void qmp_migrate_recover(const char *uri, Error **errp)
2331 {
2332     MigrationIncomingState *mis = migration_incoming_get_current();
2333 
2334     /*
2335      * Don't even bother to use ERRP_GUARD() as it _must_ always be set by
2336      * callers (no one should ignore a recover failure); if there is, it's a
2337      * programming error.
2338      */
2339     assert(errp);
2340 
2341     if (mis->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
2342         error_setg(errp, "Migrate recover can only be run "
2343                    "when postcopy is paused.");
2344         return;
2345     }
2346 
2347     /* If there's an existing transport, release it */
2348     migration_incoming_transport_cleanup(mis);
2349 
2350     /*
2351      * Note that this call will never start a real migration; it will
2352      * only re-setup the migration stream and poke existing migration
2353      * to continue using that newly established channel.
2354      */
2355     qemu_start_incoming_migration(uri, errp);
2356 }
2357 
2358 void qmp_migrate_pause(Error **errp)
2359 {
2360     MigrationState *ms = migrate_get_current();
2361     MigrationIncomingState *mis = migration_incoming_get_current();
2362     int ret;
2363 
2364     if (ms->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
2365         /* Source side, during postcopy */
2366         qemu_mutex_lock(&ms->qemu_file_lock);
2367         ret = qemu_file_shutdown(ms->to_dst_file);
2368         qemu_mutex_unlock(&ms->qemu_file_lock);
2369         if (ret) {
2370             error_setg(errp, "Failed to pause source migration");
2371         }
2372         return;
2373     }
2374 
2375     if (mis->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
2376         ret = qemu_file_shutdown(mis->from_src_file);
2377         if (ret) {
2378             error_setg(errp, "Failed to pause destination migration");
2379         }
2380         return;
2381     }
2382 
2383     error_setg(errp, "migrate-pause is currently only supported "
2384                "during postcopy-active state");
2385 }
2386 
2387 bool migration_is_blocked(Error **errp)
2388 {
2389     if (qemu_savevm_state_blocked(errp)) {
2390         return true;
2391     }
2392 
2393     if (migration_blockers) {
2394         error_propagate(errp, error_copy(migration_blockers->data));
2395         return true;
2396     }
2397 
2398     return false;
2399 }
2400 
2401 /* Returns true if continue to migrate, or false if error detected */
2402 static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
2403                             bool resume, Error **errp)
2404 {
2405     Error *local_err = NULL;
2406 
2407     if (resume) {
2408         if (s->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
2409             error_setg(errp, "Cannot resume if there is no "
2410                        "paused migration");
2411             return false;
2412         }
2413 
2414         /*
2415          * Postcopy recovery won't work well with release-ram
2416          * capability since release-ram will drop the page buffer as
2417          * long as the page is put into the send buffer.  So if there
2418          * is a network failure happened, any page buffers that have
2419          * not yet reached the destination VM but have already been
2420          * sent from the source VM will be lost forever.  Let's refuse
2421          * the client from resuming such a postcopy migration.
2422          * Luckily release-ram was designed to only be used when src
2423          * and destination VMs are on the same host, so it should be
2424          * fine.
2425          */
2426         if (migrate_release_ram()) {
2427             error_setg(errp, "Postcopy recovery cannot work "
2428                        "when release-ram capability is set");
2429             return false;
2430         }
2431 
2432         /* This is a resume, skip init status */
2433         return true;
2434     }
2435 
2436     if (migration_is_running(s->state)) {
2437         error_setg(errp, QERR_MIGRATION_ACTIVE);
2438         return false;
2439     }
2440 
2441     if (runstate_check(RUN_STATE_INMIGRATE)) {
2442         error_setg(errp, "Guest is waiting for an incoming migration");
2443         return false;
2444     }
2445 
2446     if (runstate_check(RUN_STATE_POSTMIGRATE)) {
2447         error_setg(errp, "Can't migrate the vm that was paused due to "
2448                    "previous migration");
2449         return false;
2450     }
2451 
2452     if (migration_is_blocked(errp)) {
2453         return false;
2454     }
2455 
2456     if (blk || blk_inc) {
2457         if (migrate_colo_enabled()) {
2458             error_setg(errp, "No disk migration is required in COLO mode");
2459             return false;
2460         }
2461         if (migrate_use_block() || migrate_use_block_incremental()) {
2462             error_setg(errp, "Command options are incompatible with "
2463                        "current migration capabilities");
2464             return false;
2465         }
2466         migrate_set_block_enabled(true, &local_err);
2467         if (local_err) {
2468             error_propagate(errp, local_err);
2469             return false;
2470         }
2471         s->must_remove_block_options = true;
2472     }
2473 
2474     if (blk_inc) {
2475         migrate_set_block_incremental(s, true);
2476     }
2477 
2478     migrate_init(s);
2479     /*
2480      * set ram_counters compression_counters memory to zero for a
2481      * new migration
2482      */
2483     memset(&ram_counters, 0, sizeof(ram_counters));
2484     memset(&compression_counters, 0, sizeof(compression_counters));
2485 
2486     return true;
2487 }
2488 
2489 void qmp_migrate(const char *uri, bool has_blk, bool blk,
2490                  bool has_inc, bool inc, bool has_detach, bool detach,
2491                  bool has_resume, bool resume, Error **errp)
2492 {
2493     Error *local_err = NULL;
2494     MigrationState *s = migrate_get_current();
2495     const char *p = NULL;
2496 
2497     /* URI is not suitable for migration? */
2498     if (!migration_channels_and_uri_compatible(uri, errp)) {
2499         return;
2500     }
2501 
2502     if (!migrate_prepare(s, has_blk && blk, has_inc && inc,
2503                          has_resume && resume, errp)) {
2504         /* Error detected, put into errp */
2505         return;
2506     }
2507 
2508     if (!(has_resume && resume)) {
2509         if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
2510             return;
2511         }
2512     }
2513 
2514     if (strstart(uri, "tcp:", &p) ||
2515         strstart(uri, "unix:", NULL) ||
2516         strstart(uri, "vsock:", NULL)) {
2517         socket_start_outgoing_migration(s, p ? p : uri, &local_err);
2518 #ifdef CONFIG_RDMA
2519     } else if (strstart(uri, "rdma:", &p)) {
2520         rdma_start_outgoing_migration(s, p, &local_err);
2521 #endif
2522     } else if (strstart(uri, "exec:", &p)) {
2523         exec_start_outgoing_migration(s, p, &local_err);
2524     } else if (strstart(uri, "fd:", &p)) {
2525         fd_start_outgoing_migration(s, p, &local_err);
2526     } else {
2527         if (!(has_resume && resume)) {
2528             yank_unregister_instance(MIGRATION_YANK_INSTANCE);
2529         }
2530         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri",
2531                    "a valid migration protocol");
2532         migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
2533                           MIGRATION_STATUS_FAILED);
2534         block_cleanup_parameters(s);
2535         return;
2536     }
2537 
2538     if (local_err) {
2539         if (!(has_resume && resume)) {
2540             yank_unregister_instance(MIGRATION_YANK_INSTANCE);
2541         }
2542         migrate_fd_error(s, local_err);
2543         error_propagate(errp, local_err);
2544         return;
2545     }
2546 }
2547 
2548 void qmp_migrate_cancel(Error **errp)
2549 {
2550     migration_cancel(NULL);
2551 }
2552 
2553 void qmp_migrate_continue(MigrationStatus state, Error **errp)
2554 {
2555     MigrationState *s = migrate_get_current();
2556     if (s->state != state) {
2557         error_setg(errp,  "Migration not in expected state: %s",
2558                    MigrationStatus_str(s->state));
2559         return;
2560     }
2561     qemu_sem_post(&s->pause_sem);
2562 }
2563 
2564 bool migrate_release_ram(void)
2565 {
2566     MigrationState *s;
2567 
2568     s = migrate_get_current();
2569 
2570     return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM];
2571 }
2572 
2573 bool migrate_postcopy_ram(void)
2574 {
2575     MigrationState *s;
2576 
2577     s = migrate_get_current();
2578 
2579     return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM];
2580 }
2581 
2582 bool migrate_postcopy(void)
2583 {
2584     return migrate_postcopy_ram() || migrate_dirty_bitmaps();
2585 }
2586 
2587 bool migrate_auto_converge(void)
2588 {
2589     MigrationState *s;
2590 
2591     s = migrate_get_current();
2592 
2593     return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
2594 }
2595 
2596 bool migrate_zero_blocks(void)
2597 {
2598     MigrationState *s;
2599 
2600     s = migrate_get_current();
2601 
2602     return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
2603 }
2604 
2605 bool migrate_postcopy_blocktime(void)
2606 {
2607     MigrationState *s;
2608 
2609     s = migrate_get_current();
2610 
2611     return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME];
2612 }
2613 
2614 bool migrate_use_compression(void)
2615 {
2616     MigrationState *s;
2617 
2618     s = migrate_get_current();
2619 
2620     return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS];
2621 }
2622 
2623 int migrate_compress_level(void)
2624 {
2625     MigrationState *s;
2626 
2627     s = migrate_get_current();
2628 
2629     return s->parameters.compress_level;
2630 }
2631 
2632 int migrate_compress_threads(void)
2633 {
2634     MigrationState *s;
2635 
2636     s = migrate_get_current();
2637 
2638     return s->parameters.compress_threads;
2639 }
2640 
2641 int migrate_compress_wait_thread(void)
2642 {
2643     MigrationState *s;
2644 
2645     s = migrate_get_current();
2646 
2647     return s->parameters.compress_wait_thread;
2648 }
2649 
2650 int migrate_decompress_threads(void)
2651 {
2652     MigrationState *s;
2653 
2654     s = migrate_get_current();
2655 
2656     return s->parameters.decompress_threads;
2657 }
2658 
2659 bool migrate_dirty_bitmaps(void)
2660 {
2661     MigrationState *s;
2662 
2663     s = migrate_get_current();
2664 
2665     return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS];
2666 }
2667 
2668 bool migrate_ignore_shared(void)
2669 {
2670     MigrationState *s;
2671 
2672     s = migrate_get_current();
2673 
2674     return s->enabled_capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED];
2675 }
2676 
2677 bool migrate_validate_uuid(void)
2678 {
2679     MigrationState *s;
2680 
2681     s = migrate_get_current();
2682 
2683     return s->enabled_capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID];
2684 }
2685 
2686 bool migrate_use_events(void)
2687 {
2688     MigrationState *s;
2689 
2690     s = migrate_get_current();
2691 
2692     return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS];
2693 }
2694 
2695 bool migrate_use_multifd(void)
2696 {
2697     MigrationState *s;
2698 
2699     s = migrate_get_current();
2700 
2701     return s->enabled_capabilities[MIGRATION_CAPABILITY_MULTIFD];
2702 }
2703 
2704 bool migrate_pause_before_switchover(void)
2705 {
2706     MigrationState *s;
2707 
2708     s = migrate_get_current();
2709 
2710     return s->enabled_capabilities[
2711         MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER];
2712 }
2713 
2714 int migrate_multifd_channels(void)
2715 {
2716     MigrationState *s;
2717 
2718     s = migrate_get_current();
2719 
2720     return s->parameters.multifd_channels;
2721 }
2722 
2723 MultiFDCompression migrate_multifd_compression(void)
2724 {
2725     MigrationState *s;
2726 
2727     s = migrate_get_current();
2728 
2729     assert(s->parameters.multifd_compression < MULTIFD_COMPRESSION__MAX);
2730     return s->parameters.multifd_compression;
2731 }
2732 
2733 int migrate_multifd_zlib_level(void)
2734 {
2735     MigrationState *s;
2736 
2737     s = migrate_get_current();
2738 
2739     return s->parameters.multifd_zlib_level;
2740 }
2741 
2742 int migrate_multifd_zstd_level(void)
2743 {
2744     MigrationState *s;
2745 
2746     s = migrate_get_current();
2747 
2748     return s->parameters.multifd_zstd_level;
2749 }
2750 
2751 #ifdef CONFIG_LINUX
2752 bool migrate_use_zero_copy_send(void)
2753 {
2754     MigrationState *s;
2755 
2756     s = migrate_get_current();
2757 
2758     return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND];
2759 }
2760 #endif
2761 
2762 int migrate_use_tls(void)
2763 {
2764     MigrationState *s;
2765 
2766     s = migrate_get_current();
2767 
2768     return s->parameters.tls_creds && *s->parameters.tls_creds;
2769 }
2770 
2771 int migrate_use_xbzrle(void)
2772 {
2773     MigrationState *s;
2774 
2775     s = migrate_get_current();
2776 
2777     return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
2778 }
2779 
2780 uint64_t migrate_xbzrle_cache_size(void)
2781 {
2782     MigrationState *s;
2783 
2784     s = migrate_get_current();
2785 
2786     return s->parameters.xbzrle_cache_size;
2787 }
2788 
2789 static int64_t migrate_max_postcopy_bandwidth(void)
2790 {
2791     MigrationState *s;
2792 
2793     s = migrate_get_current();
2794 
2795     return s->parameters.max_postcopy_bandwidth;
2796 }
2797 
2798 bool migrate_use_block(void)
2799 {
2800     MigrationState *s;
2801 
2802     s = migrate_get_current();
2803 
2804     return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK];
2805 }
2806 
2807 bool migrate_use_return_path(void)
2808 {
2809     MigrationState *s;
2810 
2811     s = migrate_get_current();
2812 
2813     return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH];
2814 }
2815 
2816 bool migrate_use_block_incremental(void)
2817 {
2818     MigrationState *s;
2819 
2820     s = migrate_get_current();
2821 
2822     return s->parameters.block_incremental;
2823 }
2824 
2825 bool migrate_background_snapshot(void)
2826 {
2827     MigrationState *s;
2828 
2829     s = migrate_get_current();
2830 
2831     return s->enabled_capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT];
2832 }
2833 
2834 bool migrate_postcopy_preempt(void)
2835 {
2836     MigrationState *s;
2837 
2838     s = migrate_get_current();
2839 
2840     return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT];
2841 }
2842 
2843 /* migration thread support */
2844 /*
2845  * Something bad happened to the RP stream, mark an error
2846  * The caller shall print or trace something to indicate why
2847  */
2848 static void mark_source_rp_bad(MigrationState *s)
2849 {
2850     s->rp_state.error = true;
2851 }
2852 
2853 static struct rp_cmd_args {
2854     ssize_t     len; /* -1 = variable */
2855     const char *name;
2856 } rp_cmd_args[] = {
2857     [MIG_RP_MSG_INVALID]        = { .len = -1, .name = "INVALID" },
2858     [MIG_RP_MSG_SHUT]           = { .len =  4, .name = "SHUT" },
2859     [MIG_RP_MSG_PONG]           = { .len =  4, .name = "PONG" },
2860     [MIG_RP_MSG_REQ_PAGES]      = { .len = 12, .name = "REQ_PAGES" },
2861     [MIG_RP_MSG_REQ_PAGES_ID]   = { .len = -1, .name = "REQ_PAGES_ID" },
2862     [MIG_RP_MSG_RECV_BITMAP]    = { .len = -1, .name = "RECV_BITMAP" },
2863     [MIG_RP_MSG_RESUME_ACK]     = { .len =  4, .name = "RESUME_ACK" },
2864     [MIG_RP_MSG_MAX]            = { .len = -1, .name = "MAX" },
2865 };
2866 
2867 /*
2868  * Process a request for pages received on the return path,
2869  * We're allowed to send more than requested (e.g. to round to our page size)
2870  * and we don't need to send pages that have already been sent.
2871  */
2872 static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
2873                                        ram_addr_t start, size_t len)
2874 {
2875     long our_host_ps = qemu_real_host_page_size();
2876 
2877     trace_migrate_handle_rp_req_pages(rbname, start, len);
2878 
2879     /*
2880      * Since we currently insist on matching page sizes, just sanity check
2881      * we're being asked for whole host pages.
2882      */
2883     if (!QEMU_IS_ALIGNED(start, our_host_ps) ||
2884         !QEMU_IS_ALIGNED(len, our_host_ps)) {
2885         error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT
2886                      " len: %zd", __func__, start, len);
2887         mark_source_rp_bad(ms);
2888         return;
2889     }
2890 
2891     if (ram_save_queue_pages(rbname, start, len)) {
2892         mark_source_rp_bad(ms);
2893     }
2894 }
2895 
2896 /* Return true to retry, false to quit */
2897 static bool postcopy_pause_return_path_thread(MigrationState *s)
2898 {
2899     trace_postcopy_pause_return_path();
2900 
2901     qemu_sem_wait(&s->postcopy_pause_rp_sem);
2902 
2903     trace_postcopy_pause_return_path_continued();
2904 
2905     return true;
2906 }
2907 
2908 static int migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name)
2909 {
2910     RAMBlock *block = qemu_ram_block_by_name(block_name);
2911 
2912     if (!block) {
2913         error_report("%s: invalid block name '%s'", __func__, block_name);
2914         return -EINVAL;
2915     }
2916 
2917     /* Fetch the received bitmap and refresh the dirty bitmap */
2918     return ram_dirty_bitmap_reload(s, block);
2919 }
2920 
2921 static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value)
2922 {
2923     trace_source_return_path_thread_resume_ack(value);
2924 
2925     if (value != MIGRATION_RESUME_ACK_VALUE) {
2926         error_report("%s: illegal resume_ack value %"PRIu32,
2927                      __func__, value);
2928         return -1;
2929     }
2930 
2931     /* Now both sides are active. */
2932     migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
2933                       MIGRATION_STATUS_POSTCOPY_ACTIVE);
2934 
2935     /* Notify send thread that time to continue send pages */
2936     qemu_sem_post(&s->rp_state.rp_sem);
2937 
2938     return 0;
2939 }
2940 
2941 /*
2942  * Release ms->rp_state.from_dst_file (and postcopy_qemufile_src if
2943  * existed) in a safe way.
2944  */
2945 static void migration_release_dst_files(MigrationState *ms)
2946 {
2947     QEMUFile *file;
2948 
2949     WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) {
2950         /*
2951          * Reset the from_dst_file pointer first before releasing it, as we
2952          * can't block within lock section
2953          */
2954         file = ms->rp_state.from_dst_file;
2955         ms->rp_state.from_dst_file = NULL;
2956     }
2957 
2958     /*
2959      * Do the same to postcopy fast path socket too if there is.  No
2960      * locking needed because this qemufile should only be managed by
2961      * return path thread.
2962      */
2963     if (ms->postcopy_qemufile_src) {
2964         migration_ioc_unregister_yank_from_file(ms->postcopy_qemufile_src);
2965         qemu_file_shutdown(ms->postcopy_qemufile_src);
2966         qemu_fclose(ms->postcopy_qemufile_src);
2967         ms->postcopy_qemufile_src = NULL;
2968     }
2969 
2970     qemu_fclose(file);
2971 }
2972 
2973 /*
2974  * Handles messages sent on the return path towards the source VM
2975  *
2976  */
2977 static void *source_return_path_thread(void *opaque)
2978 {
2979     MigrationState *ms = opaque;
2980     QEMUFile *rp = ms->rp_state.from_dst_file;
2981     uint16_t header_len, header_type;
2982     uint8_t buf[512];
2983     uint32_t tmp32, sibling_error;
2984     ram_addr_t start = 0; /* =0 to silence warning */
2985     size_t  len = 0, expected_len;
2986     int res;
2987 
2988     trace_source_return_path_thread_entry();
2989     rcu_register_thread();
2990 
2991 retry:
2992     while (!ms->rp_state.error && !qemu_file_get_error(rp) &&
2993            migration_is_setup_or_active(ms->state)) {
2994         trace_source_return_path_thread_loop_top();
2995         header_type = qemu_get_be16(rp);
2996         header_len = qemu_get_be16(rp);
2997 
2998         if (qemu_file_get_error(rp)) {
2999             mark_source_rp_bad(ms);
3000             goto out;
3001         }
3002 
3003         if (header_type >= MIG_RP_MSG_MAX ||
3004             header_type == MIG_RP_MSG_INVALID) {
3005             error_report("RP: Received invalid message 0x%04x length 0x%04x",
3006                          header_type, header_len);
3007             mark_source_rp_bad(ms);
3008             goto out;
3009         }
3010 
3011         if ((rp_cmd_args[header_type].len != -1 &&
3012             header_len != rp_cmd_args[header_type].len) ||
3013             header_len > sizeof(buf)) {
3014             error_report("RP: Received '%s' message (0x%04x) with"
3015                          "incorrect length %d expecting %zu",
3016                          rp_cmd_args[header_type].name, header_type, header_len,
3017                          (size_t)rp_cmd_args[header_type].len);
3018             mark_source_rp_bad(ms);
3019             goto out;
3020         }
3021 
3022         /* We know we've got a valid header by this point */
3023         res = qemu_get_buffer(rp, buf, header_len);
3024         if (res != header_len) {
3025             error_report("RP: Failed reading data for message 0x%04x"
3026                          " read %d expected %d",
3027                          header_type, res, header_len);
3028             mark_source_rp_bad(ms);
3029             goto out;
3030         }
3031 
3032         /* OK, we have the message and the data */
3033         switch (header_type) {
3034         case MIG_RP_MSG_SHUT:
3035             sibling_error = ldl_be_p(buf);
3036             trace_source_return_path_thread_shut(sibling_error);
3037             if (sibling_error) {
3038                 error_report("RP: Sibling indicated error %d", sibling_error);
3039                 mark_source_rp_bad(ms);
3040             }
3041             /*
3042              * We'll let the main thread deal with closing the RP
3043              * we could do a shutdown(2) on it, but we're the only user
3044              * anyway, so there's nothing gained.
3045              */
3046             goto out;
3047 
3048         case MIG_RP_MSG_PONG:
3049             tmp32 = ldl_be_p(buf);
3050             trace_source_return_path_thread_pong(tmp32);
3051             qemu_sem_post(&ms->rp_state.rp_pong_acks);
3052             break;
3053 
3054         case MIG_RP_MSG_REQ_PAGES:
3055             start = ldq_be_p(buf);
3056             len = ldl_be_p(buf + 8);
3057             migrate_handle_rp_req_pages(ms, NULL, start, len);
3058             break;
3059 
3060         case MIG_RP_MSG_REQ_PAGES_ID:
3061             expected_len = 12 + 1; /* header + termination */
3062 
3063             if (header_len >= expected_len) {
3064                 start = ldq_be_p(buf);
3065                 len = ldl_be_p(buf + 8);
3066                 /* Now we expect an idstr */
3067                 tmp32 = buf[12]; /* Length of the following idstr */
3068                 buf[13 + tmp32] = '\0';
3069                 expected_len += tmp32;
3070             }
3071             if (header_len != expected_len) {
3072                 error_report("RP: Req_Page_id with length %d expecting %zd",
3073                              header_len, expected_len);
3074                 mark_source_rp_bad(ms);
3075                 goto out;
3076             }
3077             migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len);
3078             break;
3079 
3080         case MIG_RP_MSG_RECV_BITMAP:
3081             if (header_len < 1) {
3082                 error_report("%s: missing block name", __func__);
3083                 mark_source_rp_bad(ms);
3084                 goto out;
3085             }
3086             /* Format: len (1B) + idstr (<255B). This ends the idstr. */
3087             buf[buf[0] + 1] = '\0';
3088             if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1))) {
3089                 mark_source_rp_bad(ms);
3090                 goto out;
3091             }
3092             break;
3093 
3094         case MIG_RP_MSG_RESUME_ACK:
3095             tmp32 = ldl_be_p(buf);
3096             if (migrate_handle_rp_resume_ack(ms, tmp32)) {
3097                 mark_source_rp_bad(ms);
3098                 goto out;
3099             }
3100             break;
3101 
3102         default:
3103             break;
3104         }
3105     }
3106 
3107 out:
3108     res = qemu_file_get_error(rp);
3109     if (res) {
3110         if (res && migration_in_postcopy()) {
3111             /*
3112              * Maybe there is something we can do: it looks like a
3113              * network down issue, and we pause for a recovery.
3114              */
3115             migration_release_dst_files(ms);
3116             rp = NULL;
3117             if (postcopy_pause_return_path_thread(ms)) {
3118                 /*
3119                  * Reload rp, reset the rest.  Referencing it is safe since
3120                  * it's reset only by us above, or when migration completes
3121                  */
3122                 rp = ms->rp_state.from_dst_file;
3123                 ms->rp_state.error = false;
3124                 goto retry;
3125             }
3126         }
3127 
3128         trace_source_return_path_thread_bad_end();
3129         mark_source_rp_bad(ms);
3130     }
3131 
3132     trace_source_return_path_thread_end();
3133     migration_release_dst_files(ms);
3134     rcu_unregister_thread();
3135     return NULL;
3136 }
3137 
3138 static int open_return_path_on_source(MigrationState *ms,
3139                                       bool create_thread)
3140 {
3141     ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
3142     if (!ms->rp_state.from_dst_file) {
3143         return -1;
3144     }
3145 
3146     trace_open_return_path_on_source();
3147 
3148     if (!create_thread) {
3149         /* We're done */
3150         return 0;
3151     }
3152 
3153     qemu_thread_create(&ms->rp_state.rp_thread, "return path",
3154                        source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
3155     ms->rp_state.rp_thread_created = true;
3156 
3157     trace_open_return_path_on_source_continue();
3158 
3159     return 0;
3160 }
3161 
3162 /* Returns 0 if the RP was ok, otherwise there was an error on the RP */
3163 static int await_return_path_close_on_source(MigrationState *ms)
3164 {
3165     /*
3166      * If this is a normal exit then the destination will send a SHUT and the
3167      * rp_thread will exit, however if there's an error we need to cause
3168      * it to exit.
3169      */
3170     if (qemu_file_get_error(ms->to_dst_file) && ms->rp_state.from_dst_file) {
3171         /*
3172          * shutdown(2), if we have it, will cause it to unblock if it's stuck
3173          * waiting for the destination.
3174          */
3175         qemu_file_shutdown(ms->rp_state.from_dst_file);
3176         mark_source_rp_bad(ms);
3177     }
3178     trace_await_return_path_close_on_source_joining();
3179     qemu_thread_join(&ms->rp_state.rp_thread);
3180     ms->rp_state.rp_thread_created = false;
3181     trace_await_return_path_close_on_source_close();
3182     return ms->rp_state.error;
3183 }
3184 
3185 static inline void
3186 migration_wait_main_channel(MigrationState *ms)
3187 {
3188     /* Wait until one PONG message received */
3189     qemu_sem_wait(&ms->rp_state.rp_pong_acks);
3190 }
3191 
3192 /*
3193  * Switch from normal iteration to postcopy
3194  * Returns non-0 on error
3195  */
3196 static int postcopy_start(MigrationState *ms)
3197 {
3198     int ret;
3199     QIOChannelBuffer *bioc;
3200     QEMUFile *fb;
3201     int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3202     int64_t bandwidth = migrate_max_postcopy_bandwidth();
3203     bool restart_block = false;
3204     int cur_state = MIGRATION_STATUS_ACTIVE;
3205 
3206     if (migrate_postcopy_preempt()) {
3207         migration_wait_main_channel(ms);
3208         if (postcopy_preempt_establish_channel(ms)) {
3209             migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILED);
3210             return -1;
3211         }
3212     }
3213 
3214     if (!migrate_pause_before_switchover()) {
3215         migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
3216                           MIGRATION_STATUS_POSTCOPY_ACTIVE);
3217     }
3218 
3219     trace_postcopy_start();
3220     qemu_mutex_lock_iothread();
3221     trace_postcopy_start_set_run();
3222 
3223     qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
3224     global_state_store();
3225     ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
3226     if (ret < 0) {
3227         goto fail;
3228     }
3229 
3230     ret = migration_maybe_pause(ms, &cur_state,
3231                                 MIGRATION_STATUS_POSTCOPY_ACTIVE);
3232     if (ret < 0) {
3233         goto fail;
3234     }
3235 
3236     ret = bdrv_inactivate_all();
3237     if (ret < 0) {
3238         goto fail;
3239     }
3240     restart_block = true;
3241 
3242     /*
3243      * Cause any non-postcopiable, but iterative devices to
3244      * send out their final data.
3245      */
3246     qemu_savevm_state_complete_precopy(ms->to_dst_file, true, false);
3247 
3248     /*
3249      * in Finish migrate and with the io-lock held everything should
3250      * be quiet, but we've potentially still got dirty pages and we
3251      * need to tell the destination to throw any pages it's already received
3252      * that are dirty
3253      */
3254     if (migrate_postcopy_ram()) {
3255         ram_postcopy_send_discard_bitmap(ms);
3256     }
3257 
3258     /*
3259      * send rest of state - note things that are doing postcopy
3260      * will notice we're in POSTCOPY_ACTIVE and not actually
3261      * wrap their state up here
3262      */
3263     /* 0 max-postcopy-bandwidth means unlimited */
3264     if (!bandwidth) {
3265         qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX);
3266     } else {
3267         qemu_file_set_rate_limit(ms->to_dst_file, bandwidth / XFER_LIMIT_RATIO);
3268     }
3269     if (migrate_postcopy_ram()) {
3270         /* Ping just for debugging, helps line traces up */
3271         qemu_savevm_send_ping(ms->to_dst_file, 2);
3272     }
3273 
3274     /*
3275      * While loading the device state we may trigger page transfer
3276      * requests and the fd must be free to process those, and thus
3277      * the destination must read the whole device state off the fd before
3278      * it starts processing it.  Unfortunately the ad-hoc migration format
3279      * doesn't allow the destination to know the size to read without fully
3280      * parsing it through each devices load-state code (especially the open
3281      * coded devices that use get/put).
3282      * So we wrap the device state up in a package with a length at the start;
3283      * to do this we use a qemu_buf to hold the whole of the device state.
3284      */
3285     bioc = qio_channel_buffer_new(4096);
3286     qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer");
3287     fb = qemu_file_new_output(QIO_CHANNEL(bioc));
3288     object_unref(OBJECT(bioc));
3289 
3290     /*
3291      * Make sure the receiver can get incoming pages before we send the rest
3292      * of the state
3293      */
3294     qemu_savevm_send_postcopy_listen(fb);
3295 
3296     qemu_savevm_state_complete_precopy(fb, false, false);
3297     if (migrate_postcopy_ram()) {
3298         qemu_savevm_send_ping(fb, 3);
3299     }
3300 
3301     qemu_savevm_send_postcopy_run(fb);
3302 
3303     /* <><> end of stuff going into the package */
3304 
3305     /* Last point of recovery; as soon as we send the package the destination
3306      * can open devices and potentially start running.
3307      * Lets just check again we've not got any errors.
3308      */
3309     ret = qemu_file_get_error(ms->to_dst_file);
3310     if (ret) {
3311         error_report("postcopy_start: Migration stream errored (pre package)");
3312         goto fail_closefb;
3313     }
3314 
3315     restart_block = false;
3316 
3317     /* Now send that blob */
3318     if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) {
3319         goto fail_closefb;
3320     }
3321     qemu_fclose(fb);
3322 
3323     /* Send a notify to give a chance for anything that needs to happen
3324      * at the transition to postcopy and after the device state; in particular
3325      * spice needs to trigger a transition now
3326      */
3327     ms->postcopy_after_devices = true;
3328     notifier_list_notify(&migration_state_notifiers, ms);
3329 
3330     ms->downtime =  qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop;
3331 
3332     qemu_mutex_unlock_iothread();
3333 
3334     if (migrate_postcopy_ram()) {
3335         /*
3336          * Although this ping is just for debug, it could potentially be
3337          * used for getting a better measurement of downtime at the source.
3338          */
3339         qemu_savevm_send_ping(ms->to_dst_file, 4);
3340     }
3341 
3342     if (migrate_release_ram()) {
3343         ram_postcopy_migrated_memory_release(ms);
3344     }
3345 
3346     ret = qemu_file_get_error(ms->to_dst_file);
3347     if (ret) {
3348         error_report("postcopy_start: Migration stream errored");
3349         migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
3350                               MIGRATION_STATUS_FAILED);
3351     }
3352 
3353     trace_postcopy_preempt_enabled(migrate_postcopy_preempt());
3354 
3355     return ret;
3356 
3357 fail_closefb:
3358     qemu_fclose(fb);
3359 fail:
3360     migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
3361                           MIGRATION_STATUS_FAILED);
3362     if (restart_block) {
3363         /* A failure happened early enough that we know the destination hasn't
3364          * accessed block devices, so we're safe to recover.
3365          */
3366         Error *local_err = NULL;
3367 
3368         bdrv_activate_all(&local_err);
3369         if (local_err) {
3370             error_report_err(local_err);
3371         }
3372     }
3373     qemu_mutex_unlock_iothread();
3374     return -1;
3375 }
3376 
3377 /**
3378  * migration_maybe_pause: Pause if required to by
3379  * migrate_pause_before_switchover called with the iothread locked
3380  * Returns: 0 on success
3381  */
3382 static int migration_maybe_pause(MigrationState *s,
3383                                  int *current_active_state,
3384                                  int new_state)
3385 {
3386     if (!migrate_pause_before_switchover()) {
3387         return 0;
3388     }
3389 
3390     /* Since leaving this state is not atomic with posting the semaphore
3391      * it's possible that someone could have issued multiple migrate_continue
3392      * and the semaphore is incorrectly positive at this point;
3393      * the docs say it's undefined to reinit a semaphore that's already
3394      * init'd, so use timedwait to eat up any existing posts.
3395      */
3396     while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) {
3397         /* This block intentionally left blank */
3398     }
3399 
3400     /*
3401      * If the migration is cancelled when it is in the completion phase,
3402      * the migration state is set to MIGRATION_STATUS_CANCELLING.
3403      * So we don't need to wait a semaphore, otherwise we would always
3404      * wait for the 'pause_sem' semaphore.
3405      */
3406     if (s->state != MIGRATION_STATUS_CANCELLING) {
3407         qemu_mutex_unlock_iothread();
3408         migrate_set_state(&s->state, *current_active_state,
3409                           MIGRATION_STATUS_PRE_SWITCHOVER);
3410         qemu_sem_wait(&s->pause_sem);
3411         migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER,
3412                           new_state);
3413         *current_active_state = new_state;
3414         qemu_mutex_lock_iothread();
3415     }
3416 
3417     return s->state == new_state ? 0 : -EINVAL;
3418 }
3419 
3420 /**
3421  * migration_completion: Used by migration_thread when there's not much left.
3422  *   The caller 'breaks' the loop when this returns.
3423  *
3424  * @s: Current migration state
3425  */
3426 static void migration_completion(MigrationState *s)
3427 {
3428     int ret;
3429     int current_active_state = s->state;
3430 
3431     if (s->state == MIGRATION_STATUS_ACTIVE) {
3432         qemu_mutex_lock_iothread();
3433         s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3434         qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
3435         s->vm_was_running = runstate_is_running();
3436         ret = global_state_store();
3437 
3438         if (!ret) {
3439             bool inactivate = !migrate_colo_enabled();
3440             ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
3441             trace_migration_completion_vm_stop(ret);
3442             if (ret >= 0) {
3443                 ret = migration_maybe_pause(s, &current_active_state,
3444                                             MIGRATION_STATUS_DEVICE);
3445             }
3446             if (ret >= 0) {
3447                 qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
3448                 ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
3449                                                          inactivate);
3450             }
3451             if (inactivate && ret >= 0) {
3452                 s->block_inactive = true;
3453             }
3454         }
3455         qemu_mutex_unlock_iothread();
3456 
3457         if (ret < 0) {
3458             goto fail;
3459         }
3460     } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
3461         trace_migration_completion_postcopy_end();
3462 
3463         qemu_mutex_lock_iothread();
3464         qemu_savevm_state_complete_postcopy(s->to_dst_file);
3465         qemu_mutex_unlock_iothread();
3466 
3467         /* Shutdown the postcopy fast path thread */
3468         if (migrate_postcopy_preempt()) {
3469             postcopy_preempt_shutdown_file(s);
3470         }
3471 
3472         trace_migration_completion_postcopy_end_after_complete();
3473     } else {
3474         goto fail;
3475     }
3476 
3477     /*
3478      * If rp was opened we must clean up the thread before
3479      * cleaning everything else up (since if there are no failures
3480      * it will wait for the destination to send it's status in
3481      * a SHUT command).
3482      */
3483     if (s->rp_state.rp_thread_created) {
3484         int rp_error;
3485         trace_migration_return_path_end_before();
3486         rp_error = await_return_path_close_on_source(s);
3487         trace_migration_return_path_end_after(rp_error);
3488         if (rp_error) {
3489             goto fail_invalidate;
3490         }
3491     }
3492 
3493     if (qemu_file_get_error(s->to_dst_file)) {
3494         trace_migration_completion_file_err();
3495         goto fail_invalidate;
3496     }
3497 
3498     if (migrate_colo_enabled() && s->state == MIGRATION_STATUS_ACTIVE) {
3499         /* COLO does not support postcopy */
3500         migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
3501                           MIGRATION_STATUS_COLO);
3502     } else {
3503         migrate_set_state(&s->state, current_active_state,
3504                           MIGRATION_STATUS_COMPLETED);
3505     }
3506 
3507     return;
3508 
3509 fail_invalidate:
3510     /* If not doing postcopy, vm_start() will be called: let's regain
3511      * control on images.
3512      */
3513     if (s->state == MIGRATION_STATUS_ACTIVE ||
3514         s->state == MIGRATION_STATUS_DEVICE) {
3515         Error *local_err = NULL;
3516 
3517         qemu_mutex_lock_iothread();
3518         bdrv_activate_all(&local_err);
3519         if (local_err) {
3520             error_report_err(local_err);
3521         } else {
3522             s->block_inactive = false;
3523         }
3524         qemu_mutex_unlock_iothread();
3525     }
3526 
3527 fail:
3528     migrate_set_state(&s->state, current_active_state,
3529                       MIGRATION_STATUS_FAILED);
3530 }
3531 
3532 /**
3533  * bg_migration_completion: Used by bg_migration_thread when after all the
3534  *   RAM has been saved. The caller 'breaks' the loop when this returns.
3535  *
3536  * @s: Current migration state
3537  */
3538 static void bg_migration_completion(MigrationState *s)
3539 {
3540     int current_active_state = s->state;
3541 
3542     /*
3543      * Stop tracking RAM writes - un-protect memory, un-register UFFD
3544      * memory ranges, flush kernel wait queues and wake up threads
3545      * waiting for write fault to be resolved.
3546      */
3547     ram_write_tracking_stop();
3548 
3549     if (s->state == MIGRATION_STATUS_ACTIVE) {
3550         /*
3551          * By this moment we have RAM content saved into the migration stream.
3552          * The next step is to flush the non-RAM content (device state)
3553          * right after the ram content. The device state has been stored into
3554          * the temporary buffer before RAM saving started.
3555          */
3556         qemu_put_buffer(s->to_dst_file, s->bioc->data, s->bioc->usage);
3557         qemu_fflush(s->to_dst_file);
3558     } else if (s->state == MIGRATION_STATUS_CANCELLING) {
3559         goto fail;
3560     }
3561 
3562     if (qemu_file_get_error(s->to_dst_file)) {
3563         trace_migration_completion_file_err();
3564         goto fail;
3565     }
3566 
3567     migrate_set_state(&s->state, current_active_state,
3568                       MIGRATION_STATUS_COMPLETED);
3569     return;
3570 
3571 fail:
3572     migrate_set_state(&s->state, current_active_state,
3573                       MIGRATION_STATUS_FAILED);
3574 }
3575 
3576 bool migrate_colo_enabled(void)
3577 {
3578     MigrationState *s = migrate_get_current();
3579     return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO];
3580 }
3581 
3582 typedef enum MigThrError {
3583     /* No error detected */
3584     MIG_THR_ERR_NONE = 0,
3585     /* Detected error, but resumed successfully */
3586     MIG_THR_ERR_RECOVERED = 1,
3587     /* Detected fatal error, need to exit */
3588     MIG_THR_ERR_FATAL = 2,
3589 } MigThrError;
3590 
3591 static int postcopy_resume_handshake(MigrationState *s)
3592 {
3593     qemu_savevm_send_postcopy_resume(s->to_dst_file);
3594 
3595     while (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
3596         qemu_sem_wait(&s->rp_state.rp_sem);
3597     }
3598 
3599     if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
3600         return 0;
3601     }
3602 
3603     return -1;
3604 }
3605 
3606 /* Return zero if success, or <0 for error */
3607 static int postcopy_do_resume(MigrationState *s)
3608 {
3609     int ret;
3610 
3611     /*
3612      * Call all the resume_prepare() hooks, so that modules can be
3613      * ready for the migration resume.
3614      */
3615     ret = qemu_savevm_state_resume_prepare(s);
3616     if (ret) {
3617         error_report("%s: resume_prepare() failure detected: %d",
3618                      __func__, ret);
3619         return ret;
3620     }
3621 
3622     /*
3623      * If preempt is enabled, re-establish the preempt channel.  Note that
3624      * we do it after resume prepare to make sure the main channel will be
3625      * created before the preempt channel.  E.g. with weak network, the
3626      * dest QEMU may get messed up with the preempt and main channels on
3627      * the order of connection setup.  This guarantees the correct order.
3628      */
3629     ret = postcopy_preempt_establish_channel(s);
3630     if (ret) {
3631         error_report("%s: postcopy_preempt_establish_channel(): %d",
3632                      __func__, ret);
3633         return ret;
3634     }
3635 
3636     /*
3637      * Last handshake with destination on the resume (destination will
3638      * switch to postcopy-active afterwards)
3639      */
3640     ret = postcopy_resume_handshake(s);
3641     if (ret) {
3642         error_report("%s: handshake failed: %d", __func__, ret);
3643         return ret;
3644     }
3645 
3646     return 0;
3647 }
3648 
3649 /*
3650  * We don't return until we are in a safe state to continue current
3651  * postcopy migration.  Returns MIG_THR_ERR_RECOVERED if recovered, or
3652  * MIG_THR_ERR_FATAL if unrecovery failure happened.
3653  */
3654 static MigThrError postcopy_pause(MigrationState *s)
3655 {
3656     assert(s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
3657 
3658     while (true) {
3659         QEMUFile *file;
3660 
3661         /*
3662          * Current channel is possibly broken. Release it.  Note that this is
3663          * guaranteed even without lock because to_dst_file should only be
3664          * modified by the migration thread.  That also guarantees that the
3665          * unregister of yank is safe too without the lock.  It should be safe
3666          * even to be within the qemu_file_lock, but we didn't do that to avoid
3667          * taking more mutex (yank_lock) within qemu_file_lock.  TL;DR: we make
3668          * the qemu_file_lock critical section as small as possible.
3669          */
3670         assert(s->to_dst_file);
3671         migration_ioc_unregister_yank_from_file(s->to_dst_file);
3672         qemu_mutex_lock(&s->qemu_file_lock);
3673         file = s->to_dst_file;
3674         s->to_dst_file = NULL;
3675         qemu_mutex_unlock(&s->qemu_file_lock);
3676 
3677         qemu_file_shutdown(file);
3678         qemu_fclose(file);
3679 
3680         migrate_set_state(&s->state, s->state,
3681                           MIGRATION_STATUS_POSTCOPY_PAUSED);
3682 
3683         error_report("Detected IO failure for postcopy. "
3684                      "Migration paused.");
3685 
3686         /*
3687          * We wait until things fixed up. Then someone will setup the
3688          * status back for us.
3689          */
3690         while (s->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
3691             qemu_sem_wait(&s->postcopy_pause_sem);
3692         }
3693 
3694         if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
3695             /* Woken up by a recover procedure. Give it a shot */
3696 
3697             /*
3698              * Firstly, let's wake up the return path now, with a new
3699              * return path channel.
3700              */
3701             qemu_sem_post(&s->postcopy_pause_rp_sem);
3702 
3703             /* Do the resume logic */
3704             if (postcopy_do_resume(s) == 0) {
3705                 /* Let's continue! */
3706                 trace_postcopy_pause_continued();
3707                 return MIG_THR_ERR_RECOVERED;
3708             } else {
3709                 /*
3710                  * Something wrong happened during the recovery, let's
3711                  * pause again. Pause is always better than throwing
3712                  * data away.
3713                  */
3714                 continue;
3715             }
3716         } else {
3717             /* This is not right... Time to quit. */
3718             return MIG_THR_ERR_FATAL;
3719         }
3720     }
3721 }
3722 
3723 static MigThrError migration_detect_error(MigrationState *s)
3724 {
3725     int ret;
3726     int state = s->state;
3727     Error *local_error = NULL;
3728 
3729     if (state == MIGRATION_STATUS_CANCELLING ||
3730         state == MIGRATION_STATUS_CANCELLED) {
3731         /* End the migration, but don't set the state to failed */
3732         return MIG_THR_ERR_FATAL;
3733     }
3734 
3735     /*
3736      * Try to detect any file errors.  Note that postcopy_qemufile_src will
3737      * be NULL when postcopy preempt is not enabled.
3738      */
3739     ret = qemu_file_get_error_obj_any(s->to_dst_file,
3740                                       s->postcopy_qemufile_src,
3741                                       &local_error);
3742     if (!ret) {
3743         /* Everything is fine */
3744         assert(!local_error);
3745         return MIG_THR_ERR_NONE;
3746     }
3747 
3748     if (local_error) {
3749         migrate_set_error(s, local_error);
3750         error_free(local_error);
3751     }
3752 
3753     if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret) {
3754         /*
3755          * For postcopy, we allow the network to be down for a
3756          * while. After that, it can be continued by a
3757          * recovery phase.
3758          */
3759         return postcopy_pause(s);
3760     } else {
3761         /*
3762          * For precopy (or postcopy with error outside IO), we fail
3763          * with no time.
3764          */
3765         migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED);
3766         trace_migration_thread_file_err();
3767 
3768         /* Time to stop the migration, now. */
3769         return MIG_THR_ERR_FATAL;
3770     }
3771 }
3772 
3773 /* How many bytes have we transferred since the beginning of the migration */
3774 static uint64_t migration_total_bytes(MigrationState *s)
3775 {
3776     return qemu_file_total_transferred(s->to_dst_file) +
3777         ram_counters.multifd_bytes;
3778 }
3779 
3780 static void migration_calculate_complete(MigrationState *s)
3781 {
3782     uint64_t bytes = migration_total_bytes(s);
3783     int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3784     int64_t transfer_time;
3785 
3786     s->total_time = end_time - s->start_time;
3787     if (!s->downtime) {
3788         /*
3789          * It's still not set, so we are precopy migration.  For
3790          * postcopy, downtime is calculated during postcopy_start().
3791          */
3792         s->downtime = end_time - s->downtime_start;
3793     }
3794 
3795     transfer_time = s->total_time - s->setup_time;
3796     if (transfer_time) {
3797         s->mbps = ((double) bytes * 8.0) / transfer_time / 1000;
3798     }
3799 }
3800 
3801 static void update_iteration_initial_status(MigrationState *s)
3802 {
3803     /*
3804      * Update these three fields at the same time to avoid mismatch info lead
3805      * wrong speed calculation.
3806      */
3807     s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3808     s->iteration_initial_bytes = migration_total_bytes(s);
3809     s->iteration_initial_pages = ram_get_total_transferred_pages();
3810 }
3811 
3812 static void migration_update_counters(MigrationState *s,
3813                                       int64_t current_time)
3814 {
3815     uint64_t transferred, transferred_pages, time_spent;
3816     uint64_t current_bytes; /* bytes transferred since the beginning */
3817     double bandwidth;
3818 
3819     if (current_time < s->iteration_start_time + BUFFER_DELAY) {
3820         return;
3821     }
3822 
3823     current_bytes = migration_total_bytes(s);
3824     transferred = current_bytes - s->iteration_initial_bytes;
3825     time_spent = current_time - s->iteration_start_time;
3826     bandwidth = (double)transferred / time_spent;
3827     s->threshold_size = bandwidth * s->parameters.downtime_limit;
3828 
3829     s->mbps = (((double) transferred * 8.0) /
3830                ((double) time_spent / 1000.0)) / 1000.0 / 1000.0;
3831 
3832     transferred_pages = ram_get_total_transferred_pages() -
3833                             s->iteration_initial_pages;
3834     s->pages_per_second = (double) transferred_pages /
3835                              (((double) time_spent / 1000.0));
3836 
3837     /*
3838      * if we haven't sent anything, we don't want to
3839      * recalculate. 10000 is a small enough number for our purposes
3840      */
3841     if (ram_counters.dirty_pages_rate && transferred > 10000) {
3842         s->expected_downtime = ram_counters.remaining / bandwidth;
3843     }
3844 
3845     qemu_file_reset_rate_limit(s->to_dst_file);
3846 
3847     update_iteration_initial_status(s);
3848 
3849     trace_migrate_transferred(transferred, time_spent,
3850                               bandwidth, s->threshold_size);
3851 }
3852 
3853 /* Migration thread iteration status */
3854 typedef enum {
3855     MIG_ITERATE_RESUME,         /* Resume current iteration */
3856     MIG_ITERATE_SKIP,           /* Skip current iteration */
3857     MIG_ITERATE_BREAK,          /* Break the loop */
3858 } MigIterateState;
3859 
3860 /*
3861  * Return true if continue to the next iteration directly, false
3862  * otherwise.
3863  */
3864 static MigIterateState migration_iteration_run(MigrationState *s)
3865 {
3866     uint64_t must_precopy, can_postcopy;
3867     bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE;
3868 
3869     qemu_savevm_state_pending_estimate(&must_precopy, &can_postcopy);
3870     uint64_t pending_size = must_precopy + can_postcopy;
3871 
3872     trace_migrate_pending_estimate(pending_size, must_precopy, can_postcopy);
3873 
3874     if (must_precopy <= s->threshold_size) {
3875         qemu_savevm_state_pending_exact(&must_precopy, &can_postcopy);
3876         pending_size = must_precopy + can_postcopy;
3877         trace_migrate_pending_exact(pending_size, must_precopy, can_postcopy);
3878     }
3879 
3880     if (!pending_size || pending_size < s->threshold_size) {
3881         trace_migration_thread_low_pending(pending_size);
3882         migration_completion(s);
3883         return MIG_ITERATE_BREAK;
3884     }
3885 
3886     /* Still a significant amount to transfer */
3887     if (!in_postcopy && must_precopy <= s->threshold_size &&
3888         qatomic_read(&s->start_postcopy)) {
3889         if (postcopy_start(s)) {
3890             error_report("%s: postcopy failed to start", __func__);
3891         }
3892         return MIG_ITERATE_SKIP;
3893     }
3894 
3895     /* Just another iteration step */
3896     qemu_savevm_state_iterate(s->to_dst_file, in_postcopy);
3897     return MIG_ITERATE_RESUME;
3898 }
3899 
3900 static void migration_iteration_finish(MigrationState *s)
3901 {
3902     /* If we enabled cpu throttling for auto-converge, turn it off. */
3903     cpu_throttle_stop();
3904 
3905     qemu_mutex_lock_iothread();
3906     switch (s->state) {
3907     case MIGRATION_STATUS_COMPLETED:
3908         migration_calculate_complete(s);
3909         runstate_set(RUN_STATE_POSTMIGRATE);
3910         break;
3911     case MIGRATION_STATUS_COLO:
3912         if (!migrate_colo_enabled()) {
3913             error_report("%s: critical error: calling COLO code without "
3914                          "COLO enabled", __func__);
3915         }
3916         migrate_start_colo_process(s);
3917         s->vm_was_running = true;
3918         /* Fallthrough */
3919     case MIGRATION_STATUS_FAILED:
3920     case MIGRATION_STATUS_CANCELLED:
3921     case MIGRATION_STATUS_CANCELLING:
3922         if (s->vm_was_running) {
3923             if (!runstate_check(RUN_STATE_SHUTDOWN)) {
3924                 vm_start();
3925             }
3926         } else {
3927             if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
3928                 runstate_set(RUN_STATE_POSTMIGRATE);
3929             }
3930         }
3931         break;
3932 
3933     default:
3934         /* Should not reach here, but if so, forgive the VM. */
3935         error_report("%s: Unknown ending state %d", __func__, s->state);
3936         break;
3937     }
3938     migrate_fd_cleanup_schedule(s);
3939     qemu_mutex_unlock_iothread();
3940 }
3941 
3942 static void bg_migration_iteration_finish(MigrationState *s)
3943 {
3944     qemu_mutex_lock_iothread();
3945     switch (s->state) {
3946     case MIGRATION_STATUS_COMPLETED:
3947         migration_calculate_complete(s);
3948         break;
3949 
3950     case MIGRATION_STATUS_ACTIVE:
3951     case MIGRATION_STATUS_FAILED:
3952     case MIGRATION_STATUS_CANCELLED:
3953     case MIGRATION_STATUS_CANCELLING:
3954         break;
3955 
3956     default:
3957         /* Should not reach here, but if so, forgive the VM. */
3958         error_report("%s: Unknown ending state %d", __func__, s->state);
3959         break;
3960     }
3961 
3962     migrate_fd_cleanup_schedule(s);
3963     qemu_mutex_unlock_iothread();
3964 }
3965 
3966 /*
3967  * Return true if continue to the next iteration directly, false
3968  * otherwise.
3969  */
3970 static MigIterateState bg_migration_iteration_run(MigrationState *s)
3971 {
3972     int res;
3973 
3974     res = qemu_savevm_state_iterate(s->to_dst_file, false);
3975     if (res > 0) {
3976         bg_migration_completion(s);
3977         return MIG_ITERATE_BREAK;
3978     }
3979 
3980     return MIG_ITERATE_RESUME;
3981 }
3982 
3983 void migration_make_urgent_request(void)
3984 {
3985     qemu_sem_post(&migrate_get_current()->rate_limit_sem);
3986 }
3987 
3988 void migration_consume_urgent_request(void)
3989 {
3990     qemu_sem_wait(&migrate_get_current()->rate_limit_sem);
3991 }
3992 
3993 /* Returns true if the rate limiting was broken by an urgent request */
3994 bool migration_rate_limit(void)
3995 {
3996     int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3997     MigrationState *s = migrate_get_current();
3998 
3999     bool urgent = false;
4000     migration_update_counters(s, now);
4001     if (qemu_file_rate_limit(s->to_dst_file)) {
4002 
4003         if (qemu_file_get_error(s->to_dst_file)) {
4004             return false;
4005         }
4006         /*
4007          * Wait for a delay to do rate limiting OR
4008          * something urgent to post the semaphore.
4009          */
4010         int ms = s->iteration_start_time + BUFFER_DELAY - now;
4011         trace_migration_rate_limit_pre(ms);
4012         if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) {
4013             /*
4014              * We were woken by one or more urgent things but
4015              * the timedwait will have consumed one of them.
4016              * The service routine for the urgent wake will dec
4017              * the semaphore itself for each item it consumes,
4018              * so add this one we just eat back.
4019              */
4020             qemu_sem_post(&s->rate_limit_sem);
4021             urgent = true;
4022         }
4023         trace_migration_rate_limit_post(urgent);
4024     }
4025     return urgent;
4026 }
4027 
4028 /*
4029  * if failover devices are present, wait they are completely
4030  * unplugged
4031  */
4032 
4033 static void qemu_savevm_wait_unplug(MigrationState *s, int old_state,
4034                                     int new_state)
4035 {
4036     if (qemu_savevm_state_guest_unplug_pending()) {
4037         migrate_set_state(&s->state, old_state, MIGRATION_STATUS_WAIT_UNPLUG);
4038 
4039         while (s->state == MIGRATION_STATUS_WAIT_UNPLUG &&
4040                qemu_savevm_state_guest_unplug_pending()) {
4041             qemu_sem_timedwait(&s->wait_unplug_sem, 250);
4042         }
4043         if (s->state != MIGRATION_STATUS_WAIT_UNPLUG) {
4044             int timeout = 120; /* 30 seconds */
4045             /*
4046              * migration has been canceled
4047              * but as we have started an unplug we must wait the end
4048              * to be able to plug back the card
4049              */
4050             while (timeout-- && qemu_savevm_state_guest_unplug_pending()) {
4051                 qemu_sem_timedwait(&s->wait_unplug_sem, 250);
4052             }
4053             if (qemu_savevm_state_guest_unplug_pending() &&
4054                 !qtest_enabled()) {
4055                 warn_report("migration: partially unplugged device on "
4056                             "failure");
4057             }
4058         }
4059 
4060         migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, new_state);
4061     } else {
4062         migrate_set_state(&s->state, old_state, new_state);
4063     }
4064 }
4065 
4066 /*
4067  * Master migration thread on the source VM.
4068  * It drives the migration and pumps the data down the outgoing channel.
4069  */
4070 static void *migration_thread(void *opaque)
4071 {
4072     MigrationState *s = opaque;
4073     MigrationThread *thread = NULL;
4074     int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
4075     MigThrError thr_error;
4076     bool urgent = false;
4077 
4078     thread = MigrationThreadAdd("live_migration", qemu_get_thread_id());
4079 
4080     rcu_register_thread();
4081 
4082     object_ref(OBJECT(s));
4083     update_iteration_initial_status(s);
4084 
4085     qemu_savevm_state_header(s->to_dst_file);
4086 
4087     /*
4088      * If we opened the return path, we need to make sure dst has it
4089      * opened as well.
4090      */
4091     if (s->rp_state.rp_thread_created) {
4092         /* Now tell the dest that it should open its end so it can reply */
4093         qemu_savevm_send_open_return_path(s->to_dst_file);
4094 
4095         /* And do a ping that will make stuff easier to debug */
4096         qemu_savevm_send_ping(s->to_dst_file, 1);
4097     }
4098 
4099     if (migrate_postcopy()) {
4100         /*
4101          * Tell the destination that we *might* want to do postcopy later;
4102          * if the other end can't do postcopy it should fail now, nice and
4103          * early.
4104          */
4105         qemu_savevm_send_postcopy_advise(s->to_dst_file);
4106     }
4107 
4108     if (migrate_colo_enabled()) {
4109         /* Notify migration destination that we enable COLO */
4110         qemu_savevm_send_colo_enable(s->to_dst_file);
4111     }
4112 
4113     qemu_savevm_state_setup(s->to_dst_file);
4114 
4115     qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
4116                                MIGRATION_STATUS_ACTIVE);
4117 
4118     s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
4119 
4120     trace_migration_thread_setup_complete();
4121 
4122     while (migration_is_active(s)) {
4123         if (urgent || !qemu_file_rate_limit(s->to_dst_file)) {
4124             MigIterateState iter_state = migration_iteration_run(s);
4125             if (iter_state == MIG_ITERATE_SKIP) {
4126                 continue;
4127             } else if (iter_state == MIG_ITERATE_BREAK) {
4128                 break;
4129             }
4130         }
4131 
4132         /*
4133          * Try to detect any kind of failures, and see whether we
4134          * should stop the migration now.
4135          */
4136         thr_error = migration_detect_error(s);
4137         if (thr_error == MIG_THR_ERR_FATAL) {
4138             /* Stop migration */
4139             break;
4140         } else if (thr_error == MIG_THR_ERR_RECOVERED) {
4141             /*
4142              * Just recovered from a e.g. network failure, reset all
4143              * the local variables. This is important to avoid
4144              * breaking transferred_bytes and bandwidth calculation
4145              */
4146             update_iteration_initial_status(s);
4147         }
4148 
4149         urgent = migration_rate_limit();
4150     }
4151 
4152     trace_migration_thread_after_loop();
4153     migration_iteration_finish(s);
4154     object_unref(OBJECT(s));
4155     rcu_unregister_thread();
4156     MigrationThreadDel(thread);
4157     return NULL;
4158 }
4159 
4160 static void bg_migration_vm_start_bh(void *opaque)
4161 {
4162     MigrationState *s = opaque;
4163 
4164     qemu_bh_delete(s->vm_start_bh);
4165     s->vm_start_bh = NULL;
4166 
4167     vm_start();
4168     s->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - s->downtime_start;
4169 }
4170 
4171 /**
4172  * Background snapshot thread, based on live migration code.
4173  * This is an alternative implementation of live migration mechanism
4174  * introduced specifically to support background snapshots.
4175  *
4176  * It takes advantage of userfault_fd write protection mechanism introduced
4177  * in v5.7 kernel. Compared to existing dirty page logging migration much
4178  * lesser stream traffic is produced resulting in smaller snapshot images,
4179  * simply cause of no page duplicates can get into the stream.
4180  *
4181  * Another key point is that generated vmstate stream reflects machine state
4182  * 'frozen' at the beginning of snapshot creation compared to dirty page logging
4183  * mechanism, which effectively results in that saved snapshot is the state of VM
4184  * at the end of the process.
4185  */
4186 static void *bg_migration_thread(void *opaque)
4187 {
4188     MigrationState *s = opaque;
4189     int64_t setup_start;
4190     MigThrError thr_error;
4191     QEMUFile *fb;
4192     bool early_fail = true;
4193 
4194     rcu_register_thread();
4195     object_ref(OBJECT(s));
4196 
4197     qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
4198 
4199     setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
4200     /*
4201      * We want to save vmstate for the moment when migration has been
4202      * initiated but also we want to save RAM content while VM is running.
4203      * The RAM content should appear first in the vmstate. So, we first
4204      * stash the non-RAM part of the vmstate to the temporary buffer,
4205      * then write RAM part of the vmstate to the migration stream
4206      * with vCPUs running and, finally, write stashed non-RAM part of
4207      * the vmstate from the buffer to the migration stream.
4208      */
4209     s->bioc = qio_channel_buffer_new(512 * 1024);
4210     qio_channel_set_name(QIO_CHANNEL(s->bioc), "vmstate-buffer");
4211     fb = qemu_file_new_output(QIO_CHANNEL(s->bioc));
4212     object_unref(OBJECT(s->bioc));
4213 
4214     update_iteration_initial_status(s);
4215 
4216     /*
4217      * Prepare for tracking memory writes with UFFD-WP - populate
4218      * RAM pages before protecting.
4219      */
4220 #ifdef __linux__
4221     ram_write_tracking_prepare();
4222 #endif
4223 
4224     qemu_savevm_state_header(s->to_dst_file);
4225     qemu_savevm_state_setup(s->to_dst_file);
4226 
4227     qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
4228                                MIGRATION_STATUS_ACTIVE);
4229 
4230     s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
4231 
4232     trace_migration_thread_setup_complete();
4233     s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
4234 
4235     qemu_mutex_lock_iothread();
4236 
4237     /*
4238      * If VM is currently in suspended state, then, to make a valid runstate
4239      * transition in vm_stop_force_state() we need to wakeup it up.
4240      */
4241     qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
4242     s->vm_was_running = runstate_is_running();
4243 
4244     if (global_state_store()) {
4245         goto fail;
4246     }
4247     /* Forcibly stop VM before saving state of vCPUs and devices */
4248     if (vm_stop_force_state(RUN_STATE_PAUSED)) {
4249         goto fail;
4250     }
4251     /*
4252      * Put vCPUs in sync with shadow context structures, then
4253      * save their state to channel-buffer along with devices.
4254      */
4255     cpu_synchronize_all_states();
4256     if (qemu_savevm_state_complete_precopy_non_iterable(fb, false, false)) {
4257         goto fail;
4258     }
4259     /*
4260      * Since we are going to get non-iterable state data directly
4261      * from s->bioc->data, explicit flush is needed here.
4262      */
4263     qemu_fflush(fb);
4264 
4265     /* Now initialize UFFD context and start tracking RAM writes */
4266     if (ram_write_tracking_start()) {
4267         goto fail;
4268     }
4269     early_fail = false;
4270 
4271     /*
4272      * Start VM from BH handler to avoid write-fault lock here.
4273      * UFFD-WP protection for the whole RAM is already enabled so
4274      * calling VM state change notifiers from vm_start() would initiate
4275      * writes to virtio VQs memory which is in write-protected region.
4276      */
4277     s->vm_start_bh = qemu_bh_new(bg_migration_vm_start_bh, s);
4278     qemu_bh_schedule(s->vm_start_bh);
4279 
4280     qemu_mutex_unlock_iothread();
4281 
4282     while (migration_is_active(s)) {
4283         MigIterateState iter_state = bg_migration_iteration_run(s);
4284         if (iter_state == MIG_ITERATE_SKIP) {
4285             continue;
4286         } else if (iter_state == MIG_ITERATE_BREAK) {
4287             break;
4288         }
4289 
4290         /*
4291          * Try to detect any kind of failures, and see whether we
4292          * should stop the migration now.
4293          */
4294         thr_error = migration_detect_error(s);
4295         if (thr_error == MIG_THR_ERR_FATAL) {
4296             /* Stop migration */
4297             break;
4298         }
4299 
4300         migration_update_counters(s, qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
4301     }
4302 
4303     trace_migration_thread_after_loop();
4304 
4305 fail:
4306     if (early_fail) {
4307         migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
4308                 MIGRATION_STATUS_FAILED);
4309         qemu_mutex_unlock_iothread();
4310     }
4311 
4312     bg_migration_iteration_finish(s);
4313 
4314     qemu_fclose(fb);
4315     object_unref(OBJECT(s));
4316     rcu_unregister_thread();
4317 
4318     return NULL;
4319 }
4320 
4321 void migrate_fd_connect(MigrationState *s, Error *error_in)
4322 {
4323     Error *local_err = NULL;
4324     int64_t rate_limit;
4325     bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED;
4326 
4327     /*
4328      * If there's a previous error, free it and prepare for another one.
4329      * Meanwhile if migration completes successfully, there won't have an error
4330      * dumped when calling migrate_fd_cleanup().
4331      */
4332     migrate_error_free(s);
4333 
4334     s->expected_downtime = s->parameters.downtime_limit;
4335     if (resume) {
4336         assert(s->cleanup_bh);
4337     } else {
4338         assert(!s->cleanup_bh);
4339         s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup_bh, s);
4340     }
4341     if (error_in) {
4342         migrate_fd_error(s, error_in);
4343         if (resume) {
4344             /*
4345              * Don't do cleanup for resume if channel is invalid, but only dump
4346              * the error.  We wait for another channel connect from the user.
4347              * The error_report still gives HMP user a hint on what failed.
4348              * It's normally done in migrate_fd_cleanup(), but call it here
4349              * explicitly.
4350              */
4351             error_report_err(error_copy(s->error));
4352         } else {
4353             migrate_fd_cleanup(s);
4354         }
4355         return;
4356     }
4357 
4358     if (resume) {
4359         /* This is a resumed migration */
4360         rate_limit = s->parameters.max_postcopy_bandwidth /
4361             XFER_LIMIT_RATIO;
4362     } else {
4363         /* This is a fresh new migration */
4364         rate_limit = s->parameters.max_bandwidth / XFER_LIMIT_RATIO;
4365 
4366         /* Notify before starting migration thread */
4367         notifier_list_notify(&migration_state_notifiers, s);
4368     }
4369 
4370     qemu_file_set_rate_limit(s->to_dst_file, rate_limit);
4371     qemu_file_set_blocking(s->to_dst_file, true);
4372 
4373     /*
4374      * Open the return path. For postcopy, it is used exclusively. For
4375      * precopy, only if user specified "return-path" capability would
4376      * QEMU uses the return path.
4377      */
4378     if (migrate_postcopy_ram() || migrate_use_return_path()) {
4379         if (open_return_path_on_source(s, !resume)) {
4380             error_report("Unable to open return-path for postcopy");
4381             migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
4382             migrate_fd_cleanup(s);
4383             return;
4384         }
4385     }
4386 
4387     if (resume) {
4388         /* Wakeup the main migration thread to do the recovery */
4389         migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
4390                           MIGRATION_STATUS_POSTCOPY_RECOVER);
4391         qemu_sem_post(&s->postcopy_pause_sem);
4392         return;
4393     }
4394 
4395     if (multifd_save_setup(&local_err) != 0) {
4396         error_report_err(local_err);
4397         migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
4398                           MIGRATION_STATUS_FAILED);
4399         migrate_fd_cleanup(s);
4400         return;
4401     }
4402 
4403     if (migrate_background_snapshot()) {
4404         qemu_thread_create(&s->thread, "bg_snapshot",
4405                 bg_migration_thread, s, QEMU_THREAD_JOINABLE);
4406     } else {
4407         qemu_thread_create(&s->thread, "live_migration",
4408                 migration_thread, s, QEMU_THREAD_JOINABLE);
4409     }
4410     s->migration_thread_running = true;
4411 }
4412 
4413 void migration_global_dump(Monitor *mon)
4414 {
4415     MigrationState *ms = migrate_get_current();
4416 
4417     monitor_printf(mon, "globals:\n");
4418     monitor_printf(mon, "store-global-state: %s\n",
4419                    ms->store_global_state ? "on" : "off");
4420     monitor_printf(mon, "only-migratable: %s\n",
4421                    only_migratable ? "on" : "off");
4422     monitor_printf(mon, "send-configuration: %s\n",
4423                    ms->send_configuration ? "on" : "off");
4424     monitor_printf(mon, "send-section-footer: %s\n",
4425                    ms->send_section_footer ? "on" : "off");
4426     monitor_printf(mon, "decompress-error-check: %s\n",
4427                    ms->decompress_error_check ? "on" : "off");
4428     monitor_printf(mon, "clear-bitmap-shift: %u\n",
4429                    ms->clear_bitmap_shift);
4430 }
4431 
4432 #define DEFINE_PROP_MIG_CAP(name, x)             \
4433     DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false)
4434 
4435 static Property migration_properties[] = {
4436     DEFINE_PROP_BOOL("store-global-state", MigrationState,
4437                      store_global_state, true),
4438     DEFINE_PROP_BOOL("send-configuration", MigrationState,
4439                      send_configuration, true),
4440     DEFINE_PROP_BOOL("send-section-footer", MigrationState,
4441                      send_section_footer, true),
4442     DEFINE_PROP_BOOL("decompress-error-check", MigrationState,
4443                       decompress_error_check, true),
4444     DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState,
4445                       clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT),
4446 
4447     /* Migration parameters */
4448     DEFINE_PROP_UINT8("x-compress-level", MigrationState,
4449                       parameters.compress_level,
4450                       DEFAULT_MIGRATE_COMPRESS_LEVEL),
4451     DEFINE_PROP_UINT8("x-compress-threads", MigrationState,
4452                       parameters.compress_threads,
4453                       DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT),
4454     DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState,
4455                       parameters.compress_wait_thread, true),
4456     DEFINE_PROP_UINT8("x-decompress-threads", MigrationState,
4457                       parameters.decompress_threads,
4458                       DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT),
4459     DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState,
4460                       parameters.throttle_trigger_threshold,
4461                       DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD),
4462     DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState,
4463                       parameters.cpu_throttle_initial,
4464                       DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL),
4465     DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState,
4466                       parameters.cpu_throttle_increment,
4467                       DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT),
4468     DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState,
4469                       parameters.cpu_throttle_tailslow, false),
4470     DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState,
4471                       parameters.max_bandwidth, MAX_THROTTLE),
4472     DEFINE_PROP_UINT64("x-downtime-limit", MigrationState,
4473                       parameters.downtime_limit,
4474                       DEFAULT_MIGRATE_SET_DOWNTIME),
4475     DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState,
4476                       parameters.x_checkpoint_delay,
4477                       DEFAULT_MIGRATE_X_CHECKPOINT_DELAY),
4478     DEFINE_PROP_UINT8("multifd-channels", MigrationState,
4479                       parameters.multifd_channels,
4480                       DEFAULT_MIGRATE_MULTIFD_CHANNELS),
4481     DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState,
4482                       parameters.multifd_compression,
4483                       DEFAULT_MIGRATE_MULTIFD_COMPRESSION),
4484     DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState,
4485                       parameters.multifd_zlib_level,
4486                       DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL),
4487     DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState,
4488                       parameters.multifd_zstd_level,
4489                       DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL),
4490     DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState,
4491                       parameters.xbzrle_cache_size,
4492                       DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE),
4493     DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState,
4494                       parameters.max_postcopy_bandwidth,
4495                       DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH),
4496     DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState,
4497                       parameters.max_cpu_throttle,
4498                       DEFAULT_MIGRATE_MAX_CPU_THROTTLE),
4499     DEFINE_PROP_SIZE("announce-initial", MigrationState,
4500                       parameters.announce_initial,
4501                       DEFAULT_MIGRATE_ANNOUNCE_INITIAL),
4502     DEFINE_PROP_SIZE("announce-max", MigrationState,
4503                       parameters.announce_max,
4504                       DEFAULT_MIGRATE_ANNOUNCE_MAX),
4505     DEFINE_PROP_SIZE("announce-rounds", MigrationState,
4506                       parameters.announce_rounds,
4507                       DEFAULT_MIGRATE_ANNOUNCE_ROUNDS),
4508     DEFINE_PROP_SIZE("announce-step", MigrationState,
4509                       parameters.announce_step,
4510                       DEFAULT_MIGRATE_ANNOUNCE_STEP),
4511     DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds),
4512     DEFINE_PROP_STRING("tls-hostname", MigrationState, parameters.tls_hostname),
4513     DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz),
4514 
4515     /* Migration capabilities */
4516     DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
4517     DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL),
4518     DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE),
4519     DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS),
4520     DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS),
4521     DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS),
4522     DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM),
4523     DEFINE_PROP_MIG_CAP("x-postcopy-preempt",
4524                         MIGRATION_CAPABILITY_POSTCOPY_PREEMPT),
4525     DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO),
4526     DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM),
4527     DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK),
4528     DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH),
4529     DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD),
4530     DEFINE_PROP_MIG_CAP("x-background-snapshot",
4531             MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT),
4532 #ifdef CONFIG_LINUX
4533     DEFINE_PROP_MIG_CAP("x-zero-copy-send",
4534             MIGRATION_CAPABILITY_ZERO_COPY_SEND),
4535 #endif
4536 
4537     DEFINE_PROP_END_OF_LIST(),
4538 };
4539 
4540 static void migration_class_init(ObjectClass *klass, void *data)
4541 {
4542     DeviceClass *dc = DEVICE_CLASS(klass);
4543 
4544     dc->user_creatable = false;
4545     device_class_set_props(dc, migration_properties);
4546 }
4547 
4548 static void migration_instance_finalize(Object *obj)
4549 {
4550     MigrationState *ms = MIGRATION_OBJ(obj);
4551 
4552     qemu_mutex_destroy(&ms->error_mutex);
4553     qemu_mutex_destroy(&ms->qemu_file_lock);
4554     qemu_sem_destroy(&ms->wait_unplug_sem);
4555     qemu_sem_destroy(&ms->rate_limit_sem);
4556     qemu_sem_destroy(&ms->pause_sem);
4557     qemu_sem_destroy(&ms->postcopy_pause_sem);
4558     qemu_sem_destroy(&ms->postcopy_pause_rp_sem);
4559     qemu_sem_destroy(&ms->rp_state.rp_sem);
4560     qemu_sem_destroy(&ms->rp_state.rp_pong_acks);
4561     qemu_sem_destroy(&ms->postcopy_qemufile_src_sem);
4562     error_free(ms->error);
4563 }
4564 
4565 static void migration_instance_init(Object *obj)
4566 {
4567     MigrationState *ms = MIGRATION_OBJ(obj);
4568     MigrationParameters *params = &ms->parameters;
4569 
4570     ms->state = MIGRATION_STATUS_NONE;
4571     ms->mbps = -1;
4572     ms->pages_per_second = -1;
4573     qemu_sem_init(&ms->pause_sem, 0);
4574     qemu_mutex_init(&ms->error_mutex);
4575 
4576     params->tls_hostname = g_strdup("");
4577     params->tls_creds = g_strdup("");
4578 
4579     /* Set has_* up only for parameter checks */
4580     params->has_compress_level = true;
4581     params->has_compress_threads = true;
4582     params->has_compress_wait_thread = true;
4583     params->has_decompress_threads = true;
4584     params->has_throttle_trigger_threshold = true;
4585     params->has_cpu_throttle_initial = true;
4586     params->has_cpu_throttle_increment = true;
4587     params->has_cpu_throttle_tailslow = true;
4588     params->has_max_bandwidth = true;
4589     params->has_downtime_limit = true;
4590     params->has_x_checkpoint_delay = true;
4591     params->has_block_incremental = true;
4592     params->has_multifd_channels = true;
4593     params->has_multifd_compression = true;
4594     params->has_multifd_zlib_level = true;
4595     params->has_multifd_zstd_level = true;
4596     params->has_xbzrle_cache_size = true;
4597     params->has_max_postcopy_bandwidth = true;
4598     params->has_max_cpu_throttle = true;
4599     params->has_announce_initial = true;
4600     params->has_announce_max = true;
4601     params->has_announce_rounds = true;
4602     params->has_announce_step = true;
4603 
4604     qemu_sem_init(&ms->postcopy_pause_sem, 0);
4605     qemu_sem_init(&ms->postcopy_pause_rp_sem, 0);
4606     qemu_sem_init(&ms->rp_state.rp_sem, 0);
4607     qemu_sem_init(&ms->rp_state.rp_pong_acks, 0);
4608     qemu_sem_init(&ms->rate_limit_sem, 0);
4609     qemu_sem_init(&ms->wait_unplug_sem, 0);
4610     qemu_sem_init(&ms->postcopy_qemufile_src_sem, 0);
4611     qemu_mutex_init(&ms->qemu_file_lock);
4612 }
4613 
4614 /*
4615  * Return true if check pass, false otherwise. Error will be put
4616  * inside errp if provided.
4617  */
4618 static bool migration_object_check(MigrationState *ms, Error **errp)
4619 {
4620     MigrationCapabilityStatusList *head = NULL;
4621     /* Assuming all off */
4622     bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret;
4623     int i;
4624 
4625     if (!migrate_params_check(&ms->parameters, errp)) {
4626         return false;
4627     }
4628 
4629     for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
4630         if (ms->enabled_capabilities[i]) {
4631             QAPI_LIST_PREPEND(head, migrate_cap_add(i, true));
4632         }
4633     }
4634 
4635     ret = migrate_caps_check(cap_list, head, errp);
4636 
4637     /* It works with head == NULL */
4638     qapi_free_MigrationCapabilityStatusList(head);
4639 
4640     return ret;
4641 }
4642 
4643 static const TypeInfo migration_type = {
4644     .name = TYPE_MIGRATION,
4645     /*
4646      * NOTE: TYPE_MIGRATION is not really a device, as the object is
4647      * not created using qdev_new(), it is not attached to the qdev
4648      * device tree, and it is never realized.
4649      *
4650      * TODO: Make this TYPE_OBJECT once QOM provides something like
4651      * TYPE_DEVICE's "-global" properties.
4652      */
4653     .parent = TYPE_DEVICE,
4654     .class_init = migration_class_init,
4655     .class_size = sizeof(MigrationClass),
4656     .instance_size = sizeof(MigrationState),
4657     .instance_init = migration_instance_init,
4658     .instance_finalize = migration_instance_finalize,
4659 };
4660 
4661 static void register_migration_types(void)
4662 {
4663     type_register_static(&migration_type);
4664 }
4665 
4666 type_init(register_migration_types);
4667