xref: /openbmc/qemu/migration/migration.c (revision f664b88247487c4cb020d016bef0f3b1daf9f4e5)
1 /*
2  * QEMU live migration
3  *
4  * Copyright IBM, Corp. 2008
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  * Contributions after 2012-01-13 are licensed under the terms of the
13  * GNU GPL, version 2 or (at your option) any later version.
14  */
15 
16 #include "qemu/osdep.h"
17 #include "qemu/cutils.h"
18 #include "qemu/error-report.h"
19 #include "qemu/main-loop.h"
20 #include "migration/blocker.h"
21 #include "migration/migration.h"
22 #include "savevm.h"
23 #include "qemu-file-channel.h"
24 #include "migration/qemu-file.h"
25 #include "migration/vmstate.h"
26 #include "sysemu/sysemu.h"
27 #include "block/block.h"
28 #include "qapi/qmp/qerror.h"
29 #include "qapi/util.h"
30 #include "qemu/sockets.h"
31 #include "qemu/rcu.h"
32 #include "migration/block.h"
33 #include "postcopy-ram.h"
34 #include "qemu/thread.h"
35 #include "qmp-commands.h"
36 #include "trace.h"
37 #include "qapi-event.h"
38 #include "qom/cpu.h"
39 #include "exec/memory.h"
40 #include "exec/address-spaces.h"
41 #include "exec/target_page.h"
42 #include "io/channel-buffer.h"
43 #include "io/channel-tls.h"
44 #include "migration/colo.h"
45 
46 #define MAX_THROTTLE  (32 << 20)      /* Migration transfer speed throttling */
47 
48 /* Amount of time to allocate to each "chunk" of bandwidth-throttled
49  * data. */
50 #define BUFFER_DELAY     100
51 #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY)
52 
53 /* Time in milliseconds we are allowed to stop the source,
54  * for sending the last part */
55 #define DEFAULT_MIGRATE_SET_DOWNTIME 300
56 
57 /* Maximum migrate downtime set to 2000 seconds */
58 #define MAX_MIGRATE_DOWNTIME_SECONDS 2000
59 #define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000)
60 
61 /* Default compression thread count */
62 #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
63 /* Default decompression thread count, usually decompression is at
64  * least 4 times as fast as compression.*/
65 #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2
66 /*0: means nocompress, 1: best speed, ... 9: best compress ratio */
67 #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1
68 /* Define default autoconverge cpu throttle migration parameters */
69 #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20
70 #define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10
71 
72 /* Migration XBZRLE default cache size */
73 #define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024)
74 
75 /* The delay time (in ms) between two COLO checkpoints
76  * Note: Please change this default value to 10000 when we support hybrid mode.
77  */
78 #define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY 200
79 
80 static NotifierList migration_state_notifiers =
81     NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
82 
83 static bool deferred_incoming;
84 
85 /* When we add fault tolerance, we could have several
86    migrations at once.  For now we don't need to add
87    dynamic creation of migration */
88 
89 /* For outgoing */
90 MigrationState *migrate_get_current(void)
91 {
92     static bool once;
93     static MigrationState current_migration = {
94         .state = MIGRATION_STATUS_NONE,
95         .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE,
96         .mbps = -1,
97         .parameters = {
98             .compress_level = DEFAULT_MIGRATE_COMPRESS_LEVEL,
99             .compress_threads = DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT,
100             .decompress_threads = DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT,
101             .cpu_throttle_initial = DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL,
102             .cpu_throttle_increment = DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT,
103             .max_bandwidth = MAX_THROTTLE,
104             .downtime_limit = DEFAULT_MIGRATE_SET_DOWNTIME,
105             .x_checkpoint_delay = DEFAULT_MIGRATE_X_CHECKPOINT_DELAY,
106         },
107     };
108 
109     if (!once) {
110         current_migration.parameters.tls_creds = g_strdup("");
111         current_migration.parameters.tls_hostname = g_strdup("");
112         once = true;
113     }
114     return &current_migration;
115 }
116 
117 MigrationIncomingState *migration_incoming_get_current(void)
118 {
119     static bool once;
120     static MigrationIncomingState mis_current;
121 
122     if (!once) {
123         mis_current.state = MIGRATION_STATUS_NONE;
124         memset(&mis_current, 0, sizeof(MigrationIncomingState));
125         QLIST_INIT(&mis_current.loadvm_handlers);
126         qemu_mutex_init(&mis_current.rp_mutex);
127         qemu_event_init(&mis_current.main_thread_load_event, false);
128         once = true;
129     }
130     return &mis_current;
131 }
132 
133 void migration_incoming_state_destroy(void)
134 {
135     struct MigrationIncomingState *mis = migration_incoming_get_current();
136 
137     qemu_event_destroy(&mis->main_thread_load_event);
138     loadvm_free_handlers(mis);
139 }
140 
141 
142 typedef struct {
143     bool optional;
144     uint32_t size;
145     uint8_t runstate[100];
146     RunState state;
147     bool received;
148 } GlobalState;
149 
150 static GlobalState global_state;
151 
152 int global_state_store(void)
153 {
154     if (!runstate_store((char *)global_state.runstate,
155                         sizeof(global_state.runstate))) {
156         error_report("runstate name too big: %s", global_state.runstate);
157         trace_migrate_state_too_big();
158         return -EINVAL;
159     }
160     return 0;
161 }
162 
163 void global_state_store_running(void)
164 {
165     const char *state = RunState_lookup[RUN_STATE_RUNNING];
166     strncpy((char *)global_state.runstate,
167            state, sizeof(global_state.runstate));
168 }
169 
170 static bool global_state_received(void)
171 {
172     return global_state.received;
173 }
174 
175 static RunState global_state_get_runstate(void)
176 {
177     return global_state.state;
178 }
179 
180 void global_state_set_optional(void)
181 {
182     global_state.optional = true;
183 }
184 
185 static bool global_state_needed(void *opaque)
186 {
187     GlobalState *s = opaque;
188     char *runstate = (char *)s->runstate;
189 
190     /* If it is not optional, it is mandatory */
191 
192     if (s->optional == false) {
193         return true;
194     }
195 
196     /* If state is running or paused, it is not needed */
197 
198     if (strcmp(runstate, "running") == 0 ||
199         strcmp(runstate, "paused") == 0) {
200         return false;
201     }
202 
203     /* for any other state it is needed */
204     return true;
205 }
206 
207 static int global_state_post_load(void *opaque, int version_id)
208 {
209     GlobalState *s = opaque;
210     Error *local_err = NULL;
211     int r;
212     char *runstate = (char *)s->runstate;
213 
214     s->received = true;
215     trace_migrate_global_state_post_load(runstate);
216 
217     r = qapi_enum_parse(RunState_lookup, runstate, RUN_STATE__MAX,
218                                 -1, &local_err);
219 
220     if (r == -1) {
221         if (local_err) {
222             error_report_err(local_err);
223         }
224         return -EINVAL;
225     }
226     s->state = r;
227 
228     return 0;
229 }
230 
231 static void global_state_pre_save(void *opaque)
232 {
233     GlobalState *s = opaque;
234 
235     trace_migrate_global_state_pre_save((char *)s->runstate);
236     s->size = strlen((char *)s->runstate) + 1;
237 }
238 
239 static const VMStateDescription vmstate_globalstate = {
240     .name = "globalstate",
241     .version_id = 1,
242     .minimum_version_id = 1,
243     .post_load = global_state_post_load,
244     .pre_save = global_state_pre_save,
245     .needed = global_state_needed,
246     .fields = (VMStateField[]) {
247         VMSTATE_UINT32(size, GlobalState),
248         VMSTATE_BUFFER(runstate, GlobalState),
249         VMSTATE_END_OF_LIST()
250     },
251 };
252 
253 void register_global_state(void)
254 {
255     /* We would use it independently that we receive it */
256     strcpy((char *)&global_state.runstate, "");
257     global_state.received = false;
258     vmstate_register(NULL, 0, &vmstate_globalstate, &global_state);
259 }
260 
261 static void migrate_generate_event(int new_state)
262 {
263     if (migrate_use_events()) {
264         qapi_event_send_migration(new_state, &error_abort);
265     }
266 }
267 
268 /*
269  * Called on -incoming with a defer: uri.
270  * The migration can be started later after any parameters have been
271  * changed.
272  */
273 static void deferred_incoming_migration(Error **errp)
274 {
275     if (deferred_incoming) {
276         error_setg(errp, "Incoming migration already deferred");
277     }
278     deferred_incoming = true;
279 }
280 
281 /* Request a range of pages from the source VM at the given
282  * start address.
283  *   rbname: Name of the RAMBlock to request the page in, if NULL it's the same
284  *           as the last request (a name must have been given previously)
285  *   Start: Address offset within the RB
286  *   Len: Length in bytes required - must be a multiple of pagesize
287  */
288 void migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rbname,
289                                ram_addr_t start, size_t len)
290 {
291     uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */
292     size_t msglen = 12; /* start + len */
293 
294     *(uint64_t *)bufc = cpu_to_be64((uint64_t)start);
295     *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len);
296 
297     if (rbname) {
298         int rbname_len = strlen(rbname);
299         assert(rbname_len < 256);
300 
301         bufc[msglen++] = rbname_len;
302         memcpy(bufc + msglen, rbname, rbname_len);
303         msglen += rbname_len;
304         migrate_send_rp_message(mis, MIG_RP_MSG_REQ_PAGES_ID, msglen, bufc);
305     } else {
306         migrate_send_rp_message(mis, MIG_RP_MSG_REQ_PAGES, msglen, bufc);
307     }
308 }
309 
310 void qemu_start_incoming_migration(const char *uri, Error **errp)
311 {
312     const char *p;
313 
314     qapi_event_send_migration(MIGRATION_STATUS_SETUP, &error_abort);
315     if (!strcmp(uri, "defer")) {
316         deferred_incoming_migration(errp);
317     } else if (strstart(uri, "tcp:", &p)) {
318         tcp_start_incoming_migration(p, errp);
319 #ifdef CONFIG_RDMA
320     } else if (strstart(uri, "rdma:", &p)) {
321         rdma_start_incoming_migration(p, errp);
322 #endif
323     } else if (strstart(uri, "exec:", &p)) {
324         exec_start_incoming_migration(p, errp);
325     } else if (strstart(uri, "unix:", &p)) {
326         unix_start_incoming_migration(p, errp);
327     } else if (strstart(uri, "fd:", &p)) {
328         fd_start_incoming_migration(p, errp);
329     } else {
330         error_setg(errp, "unknown migration protocol: %s", uri);
331     }
332 }
333 
334 static void process_incoming_migration_bh(void *opaque)
335 {
336     Error *local_err = NULL;
337     MigrationIncomingState *mis = opaque;
338 
339     /* Make sure all file formats flush their mutable metadata.
340      * If we get an error here, just don't restart the VM yet. */
341     bdrv_invalidate_cache_all(&local_err);
342     if (local_err) {
343         error_report_err(local_err);
344         local_err = NULL;
345         autostart = false;
346     }
347 
348     /*
349      * This must happen after all error conditions are dealt with and
350      * we're sure the VM is going to be running on this host.
351      */
352     qemu_announce_self();
353 
354     /* If global state section was not received or we are in running
355        state, we need to obey autostart. Any other state is set with
356        runstate_set. */
357 
358     if (!global_state_received() ||
359         global_state_get_runstate() == RUN_STATE_RUNNING) {
360         if (autostart) {
361             vm_start();
362         } else {
363             runstate_set(RUN_STATE_PAUSED);
364         }
365     } else {
366         runstate_set(global_state_get_runstate());
367     }
368     migrate_decompress_threads_join();
369     /*
370      * This must happen after any state changes since as soon as an external
371      * observer sees this event they might start to prod at the VM assuming
372      * it's ready to use.
373      */
374     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
375                       MIGRATION_STATUS_COMPLETED);
376     qemu_bh_delete(mis->bh);
377     migration_incoming_state_destroy();
378 }
379 
380 static void process_incoming_migration_co(void *opaque)
381 {
382     QEMUFile *f = opaque;
383     MigrationIncomingState *mis = migration_incoming_get_current();
384     PostcopyState ps;
385     int ret;
386 
387     mis->from_src_file = f;
388     mis->largest_page_size = qemu_ram_pagesize_largest();
389     postcopy_state_set(POSTCOPY_INCOMING_NONE);
390     migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
391                       MIGRATION_STATUS_ACTIVE);
392     ret = qemu_loadvm_state(f);
393 
394     ps = postcopy_state_get();
395     trace_process_incoming_migration_co_end(ret, ps);
396     if (ps != POSTCOPY_INCOMING_NONE) {
397         if (ps == POSTCOPY_INCOMING_ADVISE) {
398             /*
399              * Where a migration had postcopy enabled (and thus went to advise)
400              * but managed to complete within the precopy period, we can use
401              * the normal exit.
402              */
403             postcopy_ram_incoming_cleanup(mis);
404         } else if (ret >= 0) {
405             /*
406              * Postcopy was started, cleanup should happen at the end of the
407              * postcopy thread.
408              */
409             trace_process_incoming_migration_co_postcopy_end_main();
410             return;
411         }
412         /* Else if something went wrong then just fall out of the normal exit */
413     }
414 
415     /* we get COLO info, and know if we are in COLO mode */
416     if (!ret && migration_incoming_enable_colo()) {
417         mis->migration_incoming_co = qemu_coroutine_self();
418         qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
419              colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
420         mis->have_colo_incoming_thread = true;
421         qemu_coroutine_yield();
422 
423         /* Wait checkpoint incoming thread exit before free resource */
424         qemu_thread_join(&mis->colo_incoming_thread);
425     }
426 
427     if (ret < 0) {
428         migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
429                           MIGRATION_STATUS_FAILED);
430         error_report("load of migration failed: %s", strerror(-ret));
431         migrate_decompress_threads_join();
432         exit(EXIT_FAILURE);
433     }
434 
435     qemu_fclose(f);
436     free_xbzrle_decoded_buf();
437 
438     mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
439     qemu_bh_schedule(mis->bh);
440 }
441 
442 void migration_fd_process_incoming(QEMUFile *f)
443 {
444     Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, f);
445 
446     migrate_decompress_threads_create();
447     qemu_file_set_blocking(f, false);
448     qemu_coroutine_enter(co);
449 }
450 
451 /*
452  * Send a message on the return channel back to the source
453  * of the migration.
454  */
455 void migrate_send_rp_message(MigrationIncomingState *mis,
456                              enum mig_rp_message_type message_type,
457                              uint16_t len, void *data)
458 {
459     trace_migrate_send_rp_message((int)message_type, len);
460     qemu_mutex_lock(&mis->rp_mutex);
461     qemu_put_be16(mis->to_src_file, (unsigned int)message_type);
462     qemu_put_be16(mis->to_src_file, len);
463     qemu_put_buffer(mis->to_src_file, data, len);
464     qemu_fflush(mis->to_src_file);
465     qemu_mutex_unlock(&mis->rp_mutex);
466 }
467 
468 /*
469  * Send a 'SHUT' message on the return channel with the given value
470  * to indicate that we've finished with the RP.  Non-0 value indicates
471  * error.
472  */
473 void migrate_send_rp_shut(MigrationIncomingState *mis,
474                           uint32_t value)
475 {
476     uint32_t buf;
477 
478     buf = cpu_to_be32(value);
479     migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf);
480 }
481 
482 /*
483  * Send a 'PONG' message on the return channel with the given value
484  * (normally in response to a 'PING')
485  */
486 void migrate_send_rp_pong(MigrationIncomingState *mis,
487                           uint32_t value)
488 {
489     uint32_t buf;
490 
491     buf = cpu_to_be32(value);
492     migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf);
493 }
494 
495 MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp)
496 {
497     MigrationCapabilityStatusList *head = NULL;
498     MigrationCapabilityStatusList *caps;
499     MigrationState *s = migrate_get_current();
500     int i;
501 
502     caps = NULL; /* silence compiler warning */
503     for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
504 #ifndef CONFIG_LIVE_BLOCK_MIGRATION
505         if (i == MIGRATION_CAPABILITY_BLOCK) {
506             continue;
507         }
508 #endif
509         if (i == MIGRATION_CAPABILITY_X_COLO && !colo_supported()) {
510             continue;
511         }
512         if (head == NULL) {
513             head = g_malloc0(sizeof(*caps));
514             caps = head;
515         } else {
516             caps->next = g_malloc0(sizeof(*caps));
517             caps = caps->next;
518         }
519         caps->value =
520             g_malloc(sizeof(*caps->value));
521         caps->value->capability = i;
522         caps->value->state = s->enabled_capabilities[i];
523     }
524 
525     return head;
526 }
527 
528 MigrationParameters *qmp_query_migrate_parameters(Error **errp)
529 {
530     MigrationParameters *params;
531     MigrationState *s = migrate_get_current();
532 
533     params = g_malloc0(sizeof(*params));
534     params->has_compress_level = true;
535     params->compress_level = s->parameters.compress_level;
536     params->has_compress_threads = true;
537     params->compress_threads = s->parameters.compress_threads;
538     params->has_decompress_threads = true;
539     params->decompress_threads = s->parameters.decompress_threads;
540     params->has_cpu_throttle_initial = true;
541     params->cpu_throttle_initial = s->parameters.cpu_throttle_initial;
542     params->has_cpu_throttle_increment = true;
543     params->cpu_throttle_increment = s->parameters.cpu_throttle_increment;
544     params->has_tls_creds = !!s->parameters.tls_creds;
545     params->tls_creds = g_strdup(s->parameters.tls_creds);
546     params->has_tls_hostname = !!s->parameters.tls_hostname;
547     params->tls_hostname = g_strdup(s->parameters.tls_hostname);
548     params->has_max_bandwidth = true;
549     params->max_bandwidth = s->parameters.max_bandwidth;
550     params->has_downtime_limit = true;
551     params->downtime_limit = s->parameters.downtime_limit;
552     params->has_x_checkpoint_delay = true;
553     params->x_checkpoint_delay = s->parameters.x_checkpoint_delay;
554     params->has_block_incremental = true;
555     params->block_incremental = s->parameters.block_incremental;
556 
557     return params;
558 }
559 
560 /*
561  * Return true if we're already in the middle of a migration
562  * (i.e. any of the active or setup states)
563  */
564 static bool migration_is_setup_or_active(int state)
565 {
566     switch (state) {
567     case MIGRATION_STATUS_ACTIVE:
568     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
569     case MIGRATION_STATUS_SETUP:
570         return true;
571 
572     default:
573         return false;
574 
575     }
576 }
577 
578 static void get_xbzrle_cache_stats(MigrationInfo *info)
579 {
580     if (migrate_use_xbzrle()) {
581         info->has_xbzrle_cache = true;
582         info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache));
583         info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size();
584         info->xbzrle_cache->bytes = xbzrle_mig_bytes_transferred();
585         info->xbzrle_cache->pages = xbzrle_mig_pages_transferred();
586         info->xbzrle_cache->cache_miss = xbzrle_mig_pages_cache_miss();
587         info->xbzrle_cache->cache_miss_rate = xbzrle_mig_cache_miss_rate();
588         info->xbzrle_cache->overflow = xbzrle_mig_pages_overflow();
589     }
590 }
591 
592 static void populate_ram_info(MigrationInfo *info, MigrationState *s)
593 {
594     info->has_ram = true;
595     info->ram = g_malloc0(sizeof(*info->ram));
596     info->ram->transferred = ram_bytes_transferred();
597     info->ram->total = ram_bytes_total();
598     info->ram->duplicate = dup_mig_pages_transferred();
599     /* legacy value.  It is not used anymore */
600     info->ram->skipped = 0;
601     info->ram->normal = norm_mig_pages_transferred();
602     info->ram->normal_bytes = norm_mig_pages_transferred() *
603         qemu_target_page_size();
604     info->ram->mbps = s->mbps;
605     info->ram->dirty_sync_count = ram_dirty_sync_count();
606     info->ram->postcopy_requests = ram_postcopy_requests();
607     info->ram->page_size = qemu_target_page_size();
608 
609     if (s->state != MIGRATION_STATUS_COMPLETED) {
610         info->ram->remaining = ram_bytes_remaining();
611         info->ram->dirty_pages_rate = ram_dirty_pages_rate();
612     }
613 }
614 
615 MigrationInfo *qmp_query_migrate(Error **errp)
616 {
617     MigrationInfo *info = g_malloc0(sizeof(*info));
618     MigrationState *s = migrate_get_current();
619 
620     switch (s->state) {
621     case MIGRATION_STATUS_NONE:
622         /* no migration has happened ever */
623         break;
624     case MIGRATION_STATUS_SETUP:
625         info->has_status = true;
626         info->has_total_time = false;
627         break;
628     case MIGRATION_STATUS_ACTIVE:
629     case MIGRATION_STATUS_CANCELLING:
630         info->has_status = true;
631         info->has_total_time = true;
632         info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
633             - s->total_time;
634         info->has_expected_downtime = true;
635         info->expected_downtime = s->expected_downtime;
636         info->has_setup_time = true;
637         info->setup_time = s->setup_time;
638 
639         populate_ram_info(info, s);
640 
641         if (blk_mig_active()) {
642             info->has_disk = true;
643             info->disk = g_malloc0(sizeof(*info->disk));
644             info->disk->transferred = blk_mig_bytes_transferred();
645             info->disk->remaining = blk_mig_bytes_remaining();
646             info->disk->total = blk_mig_bytes_total();
647         }
648 
649         if (cpu_throttle_active()) {
650             info->has_cpu_throttle_percentage = true;
651             info->cpu_throttle_percentage = cpu_throttle_get_percentage();
652         }
653 
654         get_xbzrle_cache_stats(info);
655         break;
656     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
657         /* Mostly the same as active; TODO add some postcopy stats */
658         info->has_status = true;
659         info->has_total_time = true;
660         info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
661             - s->total_time;
662         info->has_expected_downtime = true;
663         info->expected_downtime = s->expected_downtime;
664         info->has_setup_time = true;
665         info->setup_time = s->setup_time;
666 
667         populate_ram_info(info, s);
668 
669         if (blk_mig_active()) {
670             info->has_disk = true;
671             info->disk = g_malloc0(sizeof(*info->disk));
672             info->disk->transferred = blk_mig_bytes_transferred();
673             info->disk->remaining = blk_mig_bytes_remaining();
674             info->disk->total = blk_mig_bytes_total();
675         }
676 
677         get_xbzrle_cache_stats(info);
678         break;
679     case MIGRATION_STATUS_COLO:
680         info->has_status = true;
681         /* TODO: display COLO specific information (checkpoint info etc.) */
682         break;
683     case MIGRATION_STATUS_COMPLETED:
684         get_xbzrle_cache_stats(info);
685 
686         info->has_status = true;
687         info->has_total_time = true;
688         info->total_time = s->total_time;
689         info->has_downtime = true;
690         info->downtime = s->downtime;
691         info->has_setup_time = true;
692         info->setup_time = s->setup_time;
693 
694         populate_ram_info(info, s);
695         break;
696     case MIGRATION_STATUS_FAILED:
697         info->has_status = true;
698         if (s->error) {
699             info->has_error_desc = true;
700             info->error_desc = g_strdup(error_get_pretty(s->error));
701         }
702         break;
703     case MIGRATION_STATUS_CANCELLED:
704         info->has_status = true;
705         break;
706     }
707     info->status = s->state;
708 
709     return info;
710 }
711 
712 void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
713                                   Error **errp)
714 {
715     MigrationState *s = migrate_get_current();
716     MigrationCapabilityStatusList *cap;
717     bool old_postcopy_cap = migrate_postcopy_ram();
718 
719     if (migration_is_setup_or_active(s->state)) {
720         error_setg(errp, QERR_MIGRATION_ACTIVE);
721         return;
722     }
723 
724     for (cap = params; cap; cap = cap->next) {
725 #ifndef CONFIG_LIVE_BLOCK_MIGRATION
726         if (cap->value->capability == MIGRATION_CAPABILITY_BLOCK
727             && cap->value->state) {
728             error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) "
729                        "block migration");
730             error_append_hint(errp, "Use drive_mirror+NBD instead.\n");
731             continue;
732         }
733 #endif
734         if (cap->value->capability == MIGRATION_CAPABILITY_X_COLO) {
735             if (!colo_supported()) {
736                 error_setg(errp, "COLO is not currently supported, please"
737                              " configure with --enable-colo option in order to"
738                              " support COLO feature");
739                 continue;
740             }
741         }
742         s->enabled_capabilities[cap->value->capability] = cap->value->state;
743     }
744 
745     if (migrate_postcopy_ram()) {
746         if (migrate_use_compression()) {
747             /* The decompression threads asynchronously write into RAM
748              * rather than use the atomic copies needed to avoid
749              * userfaulting.  It should be possible to fix the decompression
750              * threads for compatibility in future.
751              */
752             error_report("Postcopy is not currently compatible with "
753                          "compression");
754             s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM] =
755                 false;
756         }
757         /* This check is reasonably expensive, so only when it's being
758          * set the first time, also it's only the destination that needs
759          * special support.
760          */
761         if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) &&
762             !postcopy_ram_supported_by_host()) {
763             /* postcopy_ram_supported_by_host will have emitted a more
764              * detailed message
765              */
766             error_report("Postcopy is not supported");
767             s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM] =
768                 false;
769         }
770     }
771 }
772 
773 void qmp_migrate_set_parameters(MigrationParameters *params, Error **errp)
774 {
775     MigrationState *s = migrate_get_current();
776 
777     if (params->has_compress_level &&
778         (params->compress_level < 0 || params->compress_level > 9)) {
779         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level",
780                    "is invalid, it should be in the range of 0 to 9");
781         return;
782     }
783     if (params->has_compress_threads &&
784         (params->compress_threads < 1 || params->compress_threads > 255)) {
785         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
786                    "compress_threads",
787                    "is invalid, it should be in the range of 1 to 255");
788         return;
789     }
790     if (params->has_decompress_threads &&
791         (params->decompress_threads < 1 || params->decompress_threads > 255)) {
792         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
793                    "decompress_threads",
794                    "is invalid, it should be in the range of 1 to 255");
795         return;
796     }
797     if (params->has_cpu_throttle_initial &&
798         (params->cpu_throttle_initial < 1 ||
799          params->cpu_throttle_initial > 99)) {
800         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
801                    "cpu_throttle_initial",
802                    "an integer in the range of 1 to 99");
803         return;
804     }
805     if (params->has_cpu_throttle_increment &&
806         (params->cpu_throttle_increment < 1 ||
807          params->cpu_throttle_increment > 99)) {
808         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
809                    "cpu_throttle_increment",
810                    "an integer in the range of 1 to 99");
811         return;
812     }
813     if (params->has_max_bandwidth &&
814         (params->max_bandwidth < 0 || params->max_bandwidth > SIZE_MAX)) {
815         error_setg(errp, "Parameter 'max_bandwidth' expects an integer in the"
816                          " range of 0 to %zu bytes/second", SIZE_MAX);
817         return;
818     }
819     if (params->has_downtime_limit &&
820         (params->downtime_limit < 0 ||
821          params->downtime_limit > MAX_MIGRATE_DOWNTIME)) {
822         error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
823                          "the range of 0 to %d milliseconds",
824                          MAX_MIGRATE_DOWNTIME);
825         return;
826     }
827     if (params->has_x_checkpoint_delay && (params->x_checkpoint_delay < 0)) {
828         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
829                     "x_checkpoint_delay",
830                     "is invalid, it should be positive");
831     }
832 
833     if (params->has_compress_level) {
834         s->parameters.compress_level = params->compress_level;
835     }
836     if (params->has_compress_threads) {
837         s->parameters.compress_threads = params->compress_threads;
838     }
839     if (params->has_decompress_threads) {
840         s->parameters.decompress_threads = params->decompress_threads;
841     }
842     if (params->has_cpu_throttle_initial) {
843         s->parameters.cpu_throttle_initial = params->cpu_throttle_initial;
844     }
845     if (params->has_cpu_throttle_increment) {
846         s->parameters.cpu_throttle_increment = params->cpu_throttle_increment;
847     }
848     if (params->has_tls_creds) {
849         g_free(s->parameters.tls_creds);
850         s->parameters.tls_creds = g_strdup(params->tls_creds);
851     }
852     if (params->has_tls_hostname) {
853         g_free(s->parameters.tls_hostname);
854         s->parameters.tls_hostname = g_strdup(params->tls_hostname);
855     }
856     if (params->has_max_bandwidth) {
857         s->parameters.max_bandwidth = params->max_bandwidth;
858         if (s->to_dst_file) {
859             qemu_file_set_rate_limit(s->to_dst_file,
860                                 s->parameters.max_bandwidth / XFER_LIMIT_RATIO);
861         }
862     }
863     if (params->has_downtime_limit) {
864         s->parameters.downtime_limit = params->downtime_limit;
865     }
866 
867     if (params->has_x_checkpoint_delay) {
868         s->parameters.x_checkpoint_delay = params->x_checkpoint_delay;
869         if (migration_in_colo_state()) {
870             colo_checkpoint_notify(s);
871         }
872     }
873     if (params->has_block_incremental) {
874         s->parameters.block_incremental = params->block_incremental;
875     }
876 }
877 
878 
879 void qmp_migrate_start_postcopy(Error **errp)
880 {
881     MigrationState *s = migrate_get_current();
882 
883     if (!migrate_postcopy_ram()) {
884         error_setg(errp, "Enable postcopy with migrate_set_capability before"
885                          " the start of migration");
886         return;
887     }
888 
889     if (s->state == MIGRATION_STATUS_NONE) {
890         error_setg(errp, "Postcopy must be started after migration has been"
891                          " started");
892         return;
893     }
894     /*
895      * we don't error if migration has finished since that would be racy
896      * with issuing this command.
897      */
898     atomic_set(&s->start_postcopy, true);
899 }
900 
901 /* shared migration helpers */
902 
903 void migrate_set_state(int *state, int old_state, int new_state)
904 {
905     if (atomic_cmpxchg(state, old_state, new_state) == old_state) {
906         trace_migrate_set_state(new_state);
907         migrate_generate_event(new_state);
908     }
909 }
910 
911 void migrate_set_block_enabled(bool value, Error **errp)
912 {
913     MigrationCapabilityStatusList *cap;
914 
915     cap = g_new0(MigrationCapabilityStatusList, 1);
916     cap->value = g_new0(MigrationCapabilityStatus, 1);
917     cap->value->capability = MIGRATION_CAPABILITY_BLOCK;
918     cap->value->state = value;
919     qmp_migrate_set_capabilities(cap, errp);
920     qapi_free_MigrationCapabilityStatusList(cap);
921 }
922 
923 static void migrate_set_block_incremental(MigrationState *s, bool value)
924 {
925     s->parameters.block_incremental = value;
926 }
927 
928 static void block_cleanup_parameters(MigrationState *s)
929 {
930     if (s->must_remove_block_options) {
931         /* setting to false can never fail */
932         migrate_set_block_enabled(false, &error_abort);
933         migrate_set_block_incremental(s, false);
934         s->must_remove_block_options = false;
935     }
936 }
937 
938 static void migrate_fd_cleanup(void *opaque)
939 {
940     MigrationState *s = opaque;
941 
942     qemu_bh_delete(s->cleanup_bh);
943     s->cleanup_bh = NULL;
944 
945     migration_page_queue_free();
946 
947     if (s->to_dst_file) {
948         trace_migrate_fd_cleanup();
949         qemu_mutex_unlock_iothread();
950         if (s->migration_thread_running) {
951             qemu_thread_join(&s->thread);
952             s->migration_thread_running = false;
953         }
954         qemu_mutex_lock_iothread();
955 
956         migrate_compress_threads_join();
957         qemu_fclose(s->to_dst_file);
958         s->to_dst_file = NULL;
959     }
960 
961     assert((s->state != MIGRATION_STATUS_ACTIVE) &&
962            (s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE));
963 
964     if (s->state == MIGRATION_STATUS_CANCELLING) {
965         migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING,
966                           MIGRATION_STATUS_CANCELLED);
967     }
968 
969     notifier_list_notify(&migration_state_notifiers, s);
970     block_cleanup_parameters(s);
971 }
972 
973 void migrate_fd_error(MigrationState *s, const Error *error)
974 {
975     trace_migrate_fd_error(error_get_pretty(error));
976     assert(s->to_dst_file == NULL);
977     migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
978                       MIGRATION_STATUS_FAILED);
979     if (!s->error) {
980         s->error = error_copy(error);
981     }
982     notifier_list_notify(&migration_state_notifiers, s);
983     block_cleanup_parameters(s);
984 }
985 
986 static void migrate_fd_cancel(MigrationState *s)
987 {
988     int old_state ;
989     QEMUFile *f = migrate_get_current()->to_dst_file;
990     trace_migrate_fd_cancel();
991 
992     if (s->rp_state.from_dst_file) {
993         /* shutdown the rp socket, so causing the rp thread to shutdown */
994         qemu_file_shutdown(s->rp_state.from_dst_file);
995     }
996 
997     do {
998         old_state = s->state;
999         if (!migration_is_setup_or_active(old_state)) {
1000             break;
1001         }
1002         migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING);
1003     } while (s->state != MIGRATION_STATUS_CANCELLING);
1004 
1005     /*
1006      * If we're unlucky the migration code might be stuck somewhere in a
1007      * send/write while the network has failed and is waiting to timeout;
1008      * if we've got shutdown(2) available then we can force it to quit.
1009      * The outgoing qemu file gets closed in migrate_fd_cleanup that is
1010      * called in a bh, so there is no race against this cancel.
1011      */
1012     if (s->state == MIGRATION_STATUS_CANCELLING && f) {
1013         qemu_file_shutdown(f);
1014     }
1015     if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) {
1016         Error *local_err = NULL;
1017 
1018         bdrv_invalidate_cache_all(&local_err);
1019         if (local_err) {
1020             error_report_err(local_err);
1021         } else {
1022             s->block_inactive = false;
1023         }
1024     }
1025     block_cleanup_parameters(s);
1026 }
1027 
1028 void add_migration_state_change_notifier(Notifier *notify)
1029 {
1030     notifier_list_add(&migration_state_notifiers, notify);
1031 }
1032 
1033 void remove_migration_state_change_notifier(Notifier *notify)
1034 {
1035     notifier_remove(notify);
1036 }
1037 
1038 bool migration_in_setup(MigrationState *s)
1039 {
1040     return s->state == MIGRATION_STATUS_SETUP;
1041 }
1042 
1043 bool migration_has_finished(MigrationState *s)
1044 {
1045     return s->state == MIGRATION_STATUS_COMPLETED;
1046 }
1047 
1048 bool migration_has_failed(MigrationState *s)
1049 {
1050     return (s->state == MIGRATION_STATUS_CANCELLED ||
1051             s->state == MIGRATION_STATUS_FAILED);
1052 }
1053 
1054 bool migration_in_postcopy(void)
1055 {
1056     MigrationState *s = migrate_get_current();
1057 
1058     return (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
1059 }
1060 
1061 bool migration_in_postcopy_after_devices(MigrationState *s)
1062 {
1063     return migration_in_postcopy() && s->postcopy_after_devices;
1064 }
1065 
1066 bool migration_is_idle(void)
1067 {
1068     MigrationState *s = migrate_get_current();
1069 
1070     switch (s->state) {
1071     case MIGRATION_STATUS_NONE:
1072     case MIGRATION_STATUS_CANCELLED:
1073     case MIGRATION_STATUS_COMPLETED:
1074     case MIGRATION_STATUS_FAILED:
1075         return true;
1076     case MIGRATION_STATUS_SETUP:
1077     case MIGRATION_STATUS_CANCELLING:
1078     case MIGRATION_STATUS_ACTIVE:
1079     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1080     case MIGRATION_STATUS_COLO:
1081         return false;
1082     case MIGRATION_STATUS__MAX:
1083         g_assert_not_reached();
1084     }
1085 
1086     return false;
1087 }
1088 
1089 MigrationState *migrate_init(void)
1090 {
1091     MigrationState *s = migrate_get_current();
1092 
1093     /*
1094      * Reinitialise all migration state, except
1095      * parameters/capabilities that the user set, and
1096      * locks.
1097      */
1098     s->bytes_xfer = 0;
1099     s->xfer_limit = 0;
1100     s->cleanup_bh = 0;
1101     s->to_dst_file = NULL;
1102     s->state = MIGRATION_STATUS_NONE;
1103     s->rp_state.from_dst_file = NULL;
1104     s->rp_state.error = false;
1105     s->mbps = 0.0;
1106     s->downtime = 0;
1107     s->expected_downtime = 0;
1108     s->setup_time = 0;
1109     s->start_postcopy = false;
1110     s->postcopy_after_devices = false;
1111     s->migration_thread_running = false;
1112     error_free(s->error);
1113     s->error = NULL;
1114 
1115     migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
1116 
1117     s->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1118     return s;
1119 }
1120 
1121 static GSList *migration_blockers;
1122 
1123 int migrate_add_blocker(Error *reason, Error **errp)
1124 {
1125     if (only_migratable) {
1126         error_propagate(errp, error_copy(reason));
1127         error_prepend(errp, "disallowing migration blocker "
1128                           "(--only_migratable) for: ");
1129         return -EACCES;
1130     }
1131 
1132     if (migration_is_idle()) {
1133         migration_blockers = g_slist_prepend(migration_blockers, reason);
1134         return 0;
1135     }
1136 
1137     error_propagate(errp, error_copy(reason));
1138     error_prepend(errp, "disallowing migration blocker (migration in "
1139                       "progress) for: ");
1140     return -EBUSY;
1141 }
1142 
1143 void migrate_del_blocker(Error *reason)
1144 {
1145     migration_blockers = g_slist_remove(migration_blockers, reason);
1146 }
1147 
1148 void qmp_migrate_incoming(const char *uri, Error **errp)
1149 {
1150     Error *local_err = NULL;
1151     static bool once = true;
1152 
1153     if (!deferred_incoming) {
1154         error_setg(errp, "For use with '-incoming defer'");
1155         return;
1156     }
1157     if (!once) {
1158         error_setg(errp, "The incoming migration has already been started");
1159     }
1160 
1161     qemu_start_incoming_migration(uri, &local_err);
1162 
1163     if (local_err) {
1164         error_propagate(errp, local_err);
1165         return;
1166     }
1167 
1168     once = false;
1169 }
1170 
1171 bool migration_is_blocked(Error **errp)
1172 {
1173     if (qemu_savevm_state_blocked(errp)) {
1174         return true;
1175     }
1176 
1177     if (migration_blockers) {
1178         *errp = error_copy(migration_blockers->data);
1179         return true;
1180     }
1181 
1182     return false;
1183 }
1184 
1185 void qmp_migrate(const char *uri, bool has_blk, bool blk,
1186                  bool has_inc, bool inc, bool has_detach, bool detach,
1187                  Error **errp)
1188 {
1189     Error *local_err = NULL;
1190     MigrationState *s = migrate_get_current();
1191     const char *p;
1192 
1193     if (migration_is_setup_or_active(s->state) ||
1194         s->state == MIGRATION_STATUS_CANCELLING ||
1195         s->state == MIGRATION_STATUS_COLO) {
1196         error_setg(errp, QERR_MIGRATION_ACTIVE);
1197         return;
1198     }
1199     if (runstate_check(RUN_STATE_INMIGRATE)) {
1200         error_setg(errp, "Guest is waiting for an incoming migration");
1201         return;
1202     }
1203 
1204     if (migration_is_blocked(errp)) {
1205         return;
1206     }
1207 
1208     if ((has_blk && blk) || (has_inc && inc)) {
1209         if (migrate_use_block() || migrate_use_block_incremental()) {
1210             error_setg(errp, "Command options are incompatible with "
1211                        "current migration capabilities");
1212             return;
1213         }
1214         migrate_set_block_enabled(true, &local_err);
1215         if (local_err) {
1216             error_propagate(errp, local_err);
1217             return;
1218         }
1219         s->must_remove_block_options = true;
1220     }
1221 
1222     if (has_inc && inc) {
1223         migrate_set_block_incremental(s, true);
1224     }
1225 
1226     s = migrate_init();
1227 
1228     if (strstart(uri, "tcp:", &p)) {
1229         tcp_start_outgoing_migration(s, p, &local_err);
1230 #ifdef CONFIG_RDMA
1231     } else if (strstart(uri, "rdma:", &p)) {
1232         rdma_start_outgoing_migration(s, p, &local_err);
1233 #endif
1234     } else if (strstart(uri, "exec:", &p)) {
1235         exec_start_outgoing_migration(s, p, &local_err);
1236     } else if (strstart(uri, "unix:", &p)) {
1237         unix_start_outgoing_migration(s, p, &local_err);
1238     } else if (strstart(uri, "fd:", &p)) {
1239         fd_start_outgoing_migration(s, p, &local_err);
1240     } else {
1241         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri",
1242                    "a valid migration protocol");
1243         migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
1244                           MIGRATION_STATUS_FAILED);
1245         return;
1246     }
1247 
1248     if (local_err) {
1249         migrate_fd_error(s, local_err);
1250         error_propagate(errp, local_err);
1251         return;
1252     }
1253 }
1254 
1255 void qmp_migrate_cancel(Error **errp)
1256 {
1257     migrate_fd_cancel(migrate_get_current());
1258 }
1259 
1260 void qmp_migrate_set_cache_size(int64_t value, Error **errp)
1261 {
1262     MigrationState *s = migrate_get_current();
1263     int64_t new_size;
1264 
1265     /* Check for truncation */
1266     if (value != (size_t)value) {
1267         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
1268                    "exceeding address space");
1269         return;
1270     }
1271 
1272     /* Cache should not be larger than guest ram size */
1273     if (value > ram_bytes_total()) {
1274         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
1275                    "exceeds guest ram size ");
1276         return;
1277     }
1278 
1279     new_size = xbzrle_cache_resize(value);
1280     if (new_size < 0) {
1281         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
1282                    "is smaller than page size");
1283         return;
1284     }
1285 
1286     s->xbzrle_cache_size = new_size;
1287 }
1288 
1289 int64_t qmp_query_migrate_cache_size(Error **errp)
1290 {
1291     return migrate_xbzrle_cache_size();
1292 }
1293 
1294 void qmp_migrate_set_speed(int64_t value, Error **errp)
1295 {
1296     MigrationParameters p = {
1297         .has_max_bandwidth = true,
1298         .max_bandwidth = value,
1299     };
1300 
1301     qmp_migrate_set_parameters(&p, errp);
1302 }
1303 
1304 void qmp_migrate_set_downtime(double value, Error **errp)
1305 {
1306     if (value < 0 || value > MAX_MIGRATE_DOWNTIME_SECONDS) {
1307         error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
1308                          "the range of 0 to %d seconds",
1309                          MAX_MIGRATE_DOWNTIME_SECONDS);
1310         return;
1311     }
1312 
1313     value *= 1000; /* Convert to milliseconds */
1314     value = MAX(0, MIN(INT64_MAX, value));
1315 
1316     MigrationParameters p = {
1317         .has_downtime_limit = true,
1318         .downtime_limit = value,
1319     };
1320 
1321     qmp_migrate_set_parameters(&p, errp);
1322 }
1323 
1324 bool migrate_release_ram(void)
1325 {
1326     MigrationState *s;
1327 
1328     s = migrate_get_current();
1329 
1330     return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM];
1331 }
1332 
1333 bool migrate_postcopy_ram(void)
1334 {
1335     MigrationState *s;
1336 
1337     s = migrate_get_current();
1338 
1339     return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM];
1340 }
1341 
1342 bool migrate_auto_converge(void)
1343 {
1344     MigrationState *s;
1345 
1346     s = migrate_get_current();
1347 
1348     return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
1349 }
1350 
1351 bool migrate_zero_blocks(void)
1352 {
1353     MigrationState *s;
1354 
1355     s = migrate_get_current();
1356 
1357     return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
1358 }
1359 
1360 bool migrate_use_compression(void)
1361 {
1362     MigrationState *s;
1363 
1364     s = migrate_get_current();
1365 
1366     return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS];
1367 }
1368 
1369 int migrate_compress_level(void)
1370 {
1371     MigrationState *s;
1372 
1373     s = migrate_get_current();
1374 
1375     return s->parameters.compress_level;
1376 }
1377 
1378 int migrate_compress_threads(void)
1379 {
1380     MigrationState *s;
1381 
1382     s = migrate_get_current();
1383 
1384     return s->parameters.compress_threads;
1385 }
1386 
1387 int migrate_decompress_threads(void)
1388 {
1389     MigrationState *s;
1390 
1391     s = migrate_get_current();
1392 
1393     return s->parameters.decompress_threads;
1394 }
1395 
1396 bool migrate_use_events(void)
1397 {
1398     MigrationState *s;
1399 
1400     s = migrate_get_current();
1401 
1402     return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS];
1403 }
1404 
1405 int migrate_use_xbzrle(void)
1406 {
1407     MigrationState *s;
1408 
1409     s = migrate_get_current();
1410 
1411     return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
1412 }
1413 
1414 int64_t migrate_xbzrle_cache_size(void)
1415 {
1416     MigrationState *s;
1417 
1418     s = migrate_get_current();
1419 
1420     return s->xbzrle_cache_size;
1421 }
1422 
1423 bool migrate_use_block(void)
1424 {
1425     MigrationState *s;
1426 
1427     s = migrate_get_current();
1428 
1429     return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK];
1430 }
1431 
1432 bool migrate_use_block_incremental(void)
1433 {
1434     MigrationState *s;
1435 
1436     s = migrate_get_current();
1437 
1438     return s->parameters.block_incremental;
1439 }
1440 
1441 /* migration thread support */
1442 /*
1443  * Something bad happened to the RP stream, mark an error
1444  * The caller shall print or trace something to indicate why
1445  */
1446 static void mark_source_rp_bad(MigrationState *s)
1447 {
1448     s->rp_state.error = true;
1449 }
1450 
1451 static struct rp_cmd_args {
1452     ssize_t     len; /* -1 = variable */
1453     const char *name;
1454 } rp_cmd_args[] = {
1455     [MIG_RP_MSG_INVALID]        = { .len = -1, .name = "INVALID" },
1456     [MIG_RP_MSG_SHUT]           = { .len =  4, .name = "SHUT" },
1457     [MIG_RP_MSG_PONG]           = { .len =  4, .name = "PONG" },
1458     [MIG_RP_MSG_REQ_PAGES]      = { .len = 12, .name = "REQ_PAGES" },
1459     [MIG_RP_MSG_REQ_PAGES_ID]   = { .len = -1, .name = "REQ_PAGES_ID" },
1460     [MIG_RP_MSG_MAX]            = { .len = -1, .name = "MAX" },
1461 };
1462 
1463 /*
1464  * Process a request for pages received on the return path,
1465  * We're allowed to send more than requested (e.g. to round to our page size)
1466  * and we don't need to send pages that have already been sent.
1467  */
1468 static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
1469                                        ram_addr_t start, size_t len)
1470 {
1471     long our_host_ps = getpagesize();
1472 
1473     trace_migrate_handle_rp_req_pages(rbname, start, len);
1474 
1475     /*
1476      * Since we currently insist on matching page sizes, just sanity check
1477      * we're being asked for whole host pages.
1478      */
1479     if (start & (our_host_ps-1) ||
1480        (len & (our_host_ps-1))) {
1481         error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT
1482                      " len: %zd", __func__, start, len);
1483         mark_source_rp_bad(ms);
1484         return;
1485     }
1486 
1487     if (ram_save_queue_pages(rbname, start, len)) {
1488         mark_source_rp_bad(ms);
1489     }
1490 }
1491 
1492 /*
1493  * Handles messages sent on the return path towards the source VM
1494  *
1495  */
1496 static void *source_return_path_thread(void *opaque)
1497 {
1498     MigrationState *ms = opaque;
1499     QEMUFile *rp = ms->rp_state.from_dst_file;
1500     uint16_t header_len, header_type;
1501     uint8_t buf[512];
1502     uint32_t tmp32, sibling_error;
1503     ram_addr_t start = 0; /* =0 to silence warning */
1504     size_t  len = 0, expected_len;
1505     int res;
1506 
1507     trace_source_return_path_thread_entry();
1508     while (!ms->rp_state.error && !qemu_file_get_error(rp) &&
1509            migration_is_setup_or_active(ms->state)) {
1510         trace_source_return_path_thread_loop_top();
1511         header_type = qemu_get_be16(rp);
1512         header_len = qemu_get_be16(rp);
1513 
1514         if (header_type >= MIG_RP_MSG_MAX ||
1515             header_type == MIG_RP_MSG_INVALID) {
1516             error_report("RP: Received invalid message 0x%04x length 0x%04x",
1517                     header_type, header_len);
1518             mark_source_rp_bad(ms);
1519             goto out;
1520         }
1521 
1522         if ((rp_cmd_args[header_type].len != -1 &&
1523             header_len != rp_cmd_args[header_type].len) ||
1524             header_len > sizeof(buf)) {
1525             error_report("RP: Received '%s' message (0x%04x) with"
1526                     "incorrect length %d expecting %zu",
1527                     rp_cmd_args[header_type].name, header_type, header_len,
1528                     (size_t)rp_cmd_args[header_type].len);
1529             mark_source_rp_bad(ms);
1530             goto out;
1531         }
1532 
1533         /* We know we've got a valid header by this point */
1534         res = qemu_get_buffer(rp, buf, header_len);
1535         if (res != header_len) {
1536             error_report("RP: Failed reading data for message 0x%04x"
1537                          " read %d expected %d",
1538                          header_type, res, header_len);
1539             mark_source_rp_bad(ms);
1540             goto out;
1541         }
1542 
1543         /* OK, we have the message and the data */
1544         switch (header_type) {
1545         case MIG_RP_MSG_SHUT:
1546             sibling_error = ldl_be_p(buf);
1547             trace_source_return_path_thread_shut(sibling_error);
1548             if (sibling_error) {
1549                 error_report("RP: Sibling indicated error %d", sibling_error);
1550                 mark_source_rp_bad(ms);
1551             }
1552             /*
1553              * We'll let the main thread deal with closing the RP
1554              * we could do a shutdown(2) on it, but we're the only user
1555              * anyway, so there's nothing gained.
1556              */
1557             goto out;
1558 
1559         case MIG_RP_MSG_PONG:
1560             tmp32 = ldl_be_p(buf);
1561             trace_source_return_path_thread_pong(tmp32);
1562             break;
1563 
1564         case MIG_RP_MSG_REQ_PAGES:
1565             start = ldq_be_p(buf);
1566             len = ldl_be_p(buf + 8);
1567             migrate_handle_rp_req_pages(ms, NULL, start, len);
1568             break;
1569 
1570         case MIG_RP_MSG_REQ_PAGES_ID:
1571             expected_len = 12 + 1; /* header + termination */
1572 
1573             if (header_len >= expected_len) {
1574                 start = ldq_be_p(buf);
1575                 len = ldl_be_p(buf + 8);
1576                 /* Now we expect an idstr */
1577                 tmp32 = buf[12]; /* Length of the following idstr */
1578                 buf[13 + tmp32] = '\0';
1579                 expected_len += tmp32;
1580             }
1581             if (header_len != expected_len) {
1582                 error_report("RP: Req_Page_id with length %d expecting %zd",
1583                         header_len, expected_len);
1584                 mark_source_rp_bad(ms);
1585                 goto out;
1586             }
1587             migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len);
1588             break;
1589 
1590         default:
1591             break;
1592         }
1593     }
1594     if (qemu_file_get_error(rp)) {
1595         trace_source_return_path_thread_bad_end();
1596         mark_source_rp_bad(ms);
1597     }
1598 
1599     trace_source_return_path_thread_end();
1600 out:
1601     ms->rp_state.from_dst_file = NULL;
1602     qemu_fclose(rp);
1603     return NULL;
1604 }
1605 
1606 static int open_return_path_on_source(MigrationState *ms)
1607 {
1608 
1609     ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
1610     if (!ms->rp_state.from_dst_file) {
1611         return -1;
1612     }
1613 
1614     trace_open_return_path_on_source();
1615     qemu_thread_create(&ms->rp_state.rp_thread, "return path",
1616                        source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
1617 
1618     trace_open_return_path_on_source_continue();
1619 
1620     return 0;
1621 }
1622 
1623 /* Returns 0 if the RP was ok, otherwise there was an error on the RP */
1624 static int await_return_path_close_on_source(MigrationState *ms)
1625 {
1626     /*
1627      * If this is a normal exit then the destination will send a SHUT and the
1628      * rp_thread will exit, however if there's an error we need to cause
1629      * it to exit.
1630      */
1631     if (qemu_file_get_error(ms->to_dst_file) && ms->rp_state.from_dst_file) {
1632         /*
1633          * shutdown(2), if we have it, will cause it to unblock if it's stuck
1634          * waiting for the destination.
1635          */
1636         qemu_file_shutdown(ms->rp_state.from_dst_file);
1637         mark_source_rp_bad(ms);
1638     }
1639     trace_await_return_path_close_on_source_joining();
1640     qemu_thread_join(&ms->rp_state.rp_thread);
1641     trace_await_return_path_close_on_source_close();
1642     return ms->rp_state.error;
1643 }
1644 
1645 /*
1646  * Switch from normal iteration to postcopy
1647  * Returns non-0 on error
1648  */
1649 static int postcopy_start(MigrationState *ms, bool *old_vm_running)
1650 {
1651     int ret;
1652     QIOChannelBuffer *bioc;
1653     QEMUFile *fb;
1654     int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1655     bool restart_block = false;
1656     migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
1657                       MIGRATION_STATUS_POSTCOPY_ACTIVE);
1658 
1659     trace_postcopy_start();
1660     qemu_mutex_lock_iothread();
1661     trace_postcopy_start_set_run();
1662 
1663     qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
1664     *old_vm_running = runstate_is_running();
1665     global_state_store();
1666     ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
1667     if (ret < 0) {
1668         goto fail;
1669     }
1670 
1671     ret = bdrv_inactivate_all();
1672     if (ret < 0) {
1673         goto fail;
1674     }
1675     restart_block = true;
1676 
1677     /*
1678      * Cause any non-postcopiable, but iterative devices to
1679      * send out their final data.
1680      */
1681     qemu_savevm_state_complete_precopy(ms->to_dst_file, true);
1682 
1683     /*
1684      * in Finish migrate and with the io-lock held everything should
1685      * be quiet, but we've potentially still got dirty pages and we
1686      * need to tell the destination to throw any pages it's already received
1687      * that are dirty
1688      */
1689     if (ram_postcopy_send_discard_bitmap(ms)) {
1690         error_report("postcopy send discard bitmap failed");
1691         goto fail;
1692     }
1693 
1694     /*
1695      * send rest of state - note things that are doing postcopy
1696      * will notice we're in POSTCOPY_ACTIVE and not actually
1697      * wrap their state up here
1698      */
1699     qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX);
1700     /* Ping just for debugging, helps line traces up */
1701     qemu_savevm_send_ping(ms->to_dst_file, 2);
1702 
1703     /*
1704      * While loading the device state we may trigger page transfer
1705      * requests and the fd must be free to process those, and thus
1706      * the destination must read the whole device state off the fd before
1707      * it starts processing it.  Unfortunately the ad-hoc migration format
1708      * doesn't allow the destination to know the size to read without fully
1709      * parsing it through each devices load-state code (especially the open
1710      * coded devices that use get/put).
1711      * So we wrap the device state up in a package with a length at the start;
1712      * to do this we use a qemu_buf to hold the whole of the device state.
1713      */
1714     bioc = qio_channel_buffer_new(4096);
1715     qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer");
1716     fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc));
1717     object_unref(OBJECT(bioc));
1718 
1719     /*
1720      * Make sure the receiver can get incoming pages before we send the rest
1721      * of the state
1722      */
1723     qemu_savevm_send_postcopy_listen(fb);
1724 
1725     qemu_savevm_state_complete_precopy(fb, false);
1726     qemu_savevm_send_ping(fb, 3);
1727 
1728     qemu_savevm_send_postcopy_run(fb);
1729 
1730     /* <><> end of stuff going into the package */
1731 
1732     /* Last point of recovery; as soon as we send the package the destination
1733      * can open devices and potentially start running.
1734      * Lets just check again we've not got any errors.
1735      */
1736     ret = qemu_file_get_error(ms->to_dst_file);
1737     if (ret) {
1738         error_report("postcopy_start: Migration stream errored (pre package)");
1739         goto fail_closefb;
1740     }
1741 
1742     restart_block = false;
1743 
1744     /* Now send that blob */
1745     if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) {
1746         goto fail_closefb;
1747     }
1748     qemu_fclose(fb);
1749 
1750     /* Send a notify to give a chance for anything that needs to happen
1751      * at the transition to postcopy and after the device state; in particular
1752      * spice needs to trigger a transition now
1753      */
1754     ms->postcopy_after_devices = true;
1755     notifier_list_notify(&migration_state_notifiers, ms);
1756 
1757     ms->downtime =  qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop;
1758 
1759     qemu_mutex_unlock_iothread();
1760 
1761     /*
1762      * Although this ping is just for debug, it could potentially be
1763      * used for getting a better measurement of downtime at the source.
1764      */
1765     qemu_savevm_send_ping(ms->to_dst_file, 4);
1766 
1767     if (migrate_release_ram()) {
1768         ram_postcopy_migrated_memory_release(ms);
1769     }
1770 
1771     ret = qemu_file_get_error(ms->to_dst_file);
1772     if (ret) {
1773         error_report("postcopy_start: Migration stream errored");
1774         migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
1775                               MIGRATION_STATUS_FAILED);
1776     }
1777 
1778     return ret;
1779 
1780 fail_closefb:
1781     qemu_fclose(fb);
1782 fail:
1783     migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
1784                           MIGRATION_STATUS_FAILED);
1785     if (restart_block) {
1786         /* A failure happened early enough that we know the destination hasn't
1787          * accessed block devices, so we're safe to recover.
1788          */
1789         Error *local_err = NULL;
1790 
1791         bdrv_invalidate_cache_all(&local_err);
1792         if (local_err) {
1793             error_report_err(local_err);
1794         }
1795     }
1796     qemu_mutex_unlock_iothread();
1797     return -1;
1798 }
1799 
1800 /**
1801  * migration_completion: Used by migration_thread when there's not much left.
1802  *   The caller 'breaks' the loop when this returns.
1803  *
1804  * @s: Current migration state
1805  * @current_active_state: The migration state we expect to be in
1806  * @*old_vm_running: Pointer to old_vm_running flag
1807  * @*start_time: Pointer to time to update
1808  */
1809 static void migration_completion(MigrationState *s, int current_active_state,
1810                                  bool *old_vm_running,
1811                                  int64_t *start_time)
1812 {
1813     int ret;
1814 
1815     if (s->state == MIGRATION_STATUS_ACTIVE) {
1816         qemu_mutex_lock_iothread();
1817         *start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1818         qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
1819         *old_vm_running = runstate_is_running();
1820         ret = global_state_store();
1821 
1822         if (!ret) {
1823             ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
1824             /*
1825              * Don't mark the image with BDRV_O_INACTIVE flag if
1826              * we will go into COLO stage later.
1827              */
1828             if (ret >= 0 && !migrate_colo_enabled()) {
1829                 ret = bdrv_inactivate_all();
1830             }
1831             if (ret >= 0) {
1832                 qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
1833                 qemu_savevm_state_complete_precopy(s->to_dst_file, false);
1834                 s->block_inactive = true;
1835             }
1836         }
1837         qemu_mutex_unlock_iothread();
1838 
1839         if (ret < 0) {
1840             goto fail;
1841         }
1842     } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
1843         trace_migration_completion_postcopy_end();
1844 
1845         qemu_savevm_state_complete_postcopy(s->to_dst_file);
1846         trace_migration_completion_postcopy_end_after_complete();
1847     }
1848 
1849     /*
1850      * If rp was opened we must clean up the thread before
1851      * cleaning everything else up (since if there are no failures
1852      * it will wait for the destination to send it's status in
1853      * a SHUT command).
1854      * Postcopy opens rp if enabled (even if it's not avtivated)
1855      */
1856     if (migrate_postcopy_ram()) {
1857         int rp_error;
1858         trace_migration_completion_postcopy_end_before_rp();
1859         rp_error = await_return_path_close_on_source(s);
1860         trace_migration_completion_postcopy_end_after_rp(rp_error);
1861         if (rp_error) {
1862             goto fail_invalidate;
1863         }
1864     }
1865 
1866     if (qemu_file_get_error(s->to_dst_file)) {
1867         trace_migration_completion_file_err();
1868         goto fail_invalidate;
1869     }
1870 
1871     if (!migrate_colo_enabled()) {
1872         migrate_set_state(&s->state, current_active_state,
1873                           MIGRATION_STATUS_COMPLETED);
1874     }
1875 
1876     return;
1877 
1878 fail_invalidate:
1879     /* If not doing postcopy, vm_start() will be called: let's regain
1880      * control on images.
1881      */
1882     if (s->state == MIGRATION_STATUS_ACTIVE) {
1883         Error *local_err = NULL;
1884 
1885         qemu_mutex_lock_iothread();
1886         bdrv_invalidate_cache_all(&local_err);
1887         if (local_err) {
1888             error_report_err(local_err);
1889         } else {
1890             s->block_inactive = false;
1891         }
1892         qemu_mutex_unlock_iothread();
1893     }
1894 
1895 fail:
1896     migrate_set_state(&s->state, current_active_state,
1897                       MIGRATION_STATUS_FAILED);
1898 }
1899 
1900 bool migrate_colo_enabled(void)
1901 {
1902     MigrationState *s = migrate_get_current();
1903     return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO];
1904 }
1905 
1906 /*
1907  * Master migration thread on the source VM.
1908  * It drives the migration and pumps the data down the outgoing channel.
1909  */
1910 static void *migration_thread(void *opaque)
1911 {
1912     MigrationState *s = opaque;
1913     /* Used by the bandwidth calcs, updated later */
1914     int64_t initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1915     int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
1916     int64_t initial_bytes = 0;
1917     /*
1918      * The final stage happens when the remaining data is smaller than
1919      * this threshold; it's calculated from the requested downtime and
1920      * measured bandwidth
1921      */
1922     int64_t threshold_size = 0;
1923     int64_t start_time = initial_time;
1924     int64_t end_time;
1925     bool old_vm_running = false;
1926     bool entered_postcopy = false;
1927     /* The active state we expect to be in; ACTIVE or POSTCOPY_ACTIVE */
1928     enum MigrationStatus current_active_state = MIGRATION_STATUS_ACTIVE;
1929     bool enable_colo = migrate_colo_enabled();
1930 
1931     rcu_register_thread();
1932 
1933     qemu_savevm_state_header(s->to_dst_file);
1934 
1935     if (migrate_postcopy_ram()) {
1936         /* Now tell the dest that it should open its end so it can reply */
1937         qemu_savevm_send_open_return_path(s->to_dst_file);
1938 
1939         /* And do a ping that will make stuff easier to debug */
1940         qemu_savevm_send_ping(s->to_dst_file, 1);
1941 
1942         /*
1943          * Tell the destination that we *might* want to do postcopy later;
1944          * if the other end can't do postcopy it should fail now, nice and
1945          * early.
1946          */
1947         qemu_savevm_send_postcopy_advise(s->to_dst_file);
1948     }
1949 
1950     qemu_savevm_state_begin(s->to_dst_file);
1951 
1952     s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
1953     migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
1954                       MIGRATION_STATUS_ACTIVE);
1955 
1956     trace_migration_thread_setup_complete();
1957 
1958     while (s->state == MIGRATION_STATUS_ACTIVE ||
1959            s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
1960         int64_t current_time;
1961         uint64_t pending_size;
1962 
1963         if (!qemu_file_rate_limit(s->to_dst_file)) {
1964             uint64_t pend_post, pend_nonpost;
1965 
1966             qemu_savevm_state_pending(s->to_dst_file, threshold_size,
1967                                       &pend_nonpost, &pend_post);
1968             pending_size = pend_nonpost + pend_post;
1969             trace_migrate_pending(pending_size, threshold_size,
1970                                   pend_post, pend_nonpost);
1971             if (pending_size && pending_size >= threshold_size) {
1972                 /* Still a significant amount to transfer */
1973 
1974                 if (migrate_postcopy_ram() &&
1975                     s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE &&
1976                     pend_nonpost <= threshold_size &&
1977                     atomic_read(&s->start_postcopy)) {
1978 
1979                     if (!postcopy_start(s, &old_vm_running)) {
1980                         current_active_state = MIGRATION_STATUS_POSTCOPY_ACTIVE;
1981                         entered_postcopy = true;
1982                     }
1983 
1984                     continue;
1985                 }
1986                 /* Just another iteration step */
1987                 qemu_savevm_state_iterate(s->to_dst_file, entered_postcopy);
1988             } else {
1989                 trace_migration_thread_low_pending(pending_size);
1990                 migration_completion(s, current_active_state,
1991                                      &old_vm_running, &start_time);
1992                 break;
1993             }
1994         }
1995 
1996         if (qemu_file_get_error(s->to_dst_file)) {
1997             migrate_set_state(&s->state, current_active_state,
1998                               MIGRATION_STATUS_FAILED);
1999             trace_migration_thread_file_err();
2000             break;
2001         }
2002         current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
2003         if (current_time >= initial_time + BUFFER_DELAY) {
2004             uint64_t transferred_bytes = qemu_ftell(s->to_dst_file) -
2005                                          initial_bytes;
2006             uint64_t time_spent = current_time - initial_time;
2007             double bandwidth = (double)transferred_bytes / time_spent;
2008             threshold_size = bandwidth * s->parameters.downtime_limit;
2009 
2010             s->mbps = (((double) transferred_bytes * 8.0) /
2011                     ((double) time_spent / 1000.0)) / 1000.0 / 1000.0;
2012 
2013             trace_migrate_transferred(transferred_bytes, time_spent,
2014                                       bandwidth, threshold_size);
2015             /* if we haven't sent anything, we don't want to recalculate
2016                10000 is a small enough number for our purposes */
2017             if (ram_dirty_pages_rate() && transferred_bytes > 10000) {
2018                 s->expected_downtime = ram_dirty_pages_rate() *
2019                     qemu_target_page_size() / bandwidth;
2020             }
2021 
2022             qemu_file_reset_rate_limit(s->to_dst_file);
2023             initial_time = current_time;
2024             initial_bytes = qemu_ftell(s->to_dst_file);
2025         }
2026         if (qemu_file_rate_limit(s->to_dst_file)) {
2027             /* usleep expects microseconds */
2028             g_usleep((initial_time + BUFFER_DELAY - current_time)*1000);
2029         }
2030     }
2031 
2032     trace_migration_thread_after_loop();
2033     /* If we enabled cpu throttling for auto-converge, turn it off. */
2034     cpu_throttle_stop();
2035     end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
2036 
2037     qemu_mutex_lock_iothread();
2038     /*
2039      * The resource has been allocated by migration will be reused in COLO
2040      * process, so don't release them.
2041      */
2042     if (!enable_colo) {
2043         qemu_savevm_state_cleanup();
2044     }
2045     if (s->state == MIGRATION_STATUS_COMPLETED) {
2046         uint64_t transferred_bytes = qemu_ftell(s->to_dst_file);
2047         s->total_time = end_time - s->total_time;
2048         if (!entered_postcopy) {
2049             s->downtime = end_time - start_time;
2050         }
2051         if (s->total_time) {
2052             s->mbps = (((double) transferred_bytes * 8.0) /
2053                        ((double) s->total_time)) / 1000;
2054         }
2055         runstate_set(RUN_STATE_POSTMIGRATE);
2056     } else {
2057         if (s->state == MIGRATION_STATUS_ACTIVE && enable_colo) {
2058             migrate_start_colo_process(s);
2059             qemu_savevm_state_cleanup();
2060             /*
2061             * Fixme: we will run VM in COLO no matter its old running state.
2062             * After exited COLO, we will keep running.
2063             */
2064             old_vm_running = true;
2065         }
2066         if (old_vm_running && !entered_postcopy) {
2067             vm_start();
2068         } else {
2069             if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
2070                 runstate_set(RUN_STATE_POSTMIGRATE);
2071             }
2072         }
2073     }
2074     qemu_bh_schedule(s->cleanup_bh);
2075     qemu_mutex_unlock_iothread();
2076 
2077     rcu_unregister_thread();
2078     return NULL;
2079 }
2080 
2081 void migrate_fd_connect(MigrationState *s)
2082 {
2083     s->expected_downtime = s->parameters.downtime_limit;
2084     s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s);
2085 
2086     qemu_file_set_blocking(s->to_dst_file, true);
2087     qemu_file_set_rate_limit(s->to_dst_file,
2088                              s->parameters.max_bandwidth / XFER_LIMIT_RATIO);
2089 
2090     /* Notify before starting migration thread */
2091     notifier_list_notify(&migration_state_notifiers, s);
2092 
2093     /*
2094      * Open the return path; currently for postcopy but other things might
2095      * also want it.
2096      */
2097     if (migrate_postcopy_ram()) {
2098         if (open_return_path_on_source(s)) {
2099             error_report("Unable to open return-path for postcopy");
2100             migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
2101                               MIGRATION_STATUS_FAILED);
2102             migrate_fd_cleanup(s);
2103             return;
2104         }
2105     }
2106 
2107     migrate_compress_threads_create();
2108     qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
2109                        QEMU_THREAD_JOINABLE);
2110     s->migration_thread_running = true;
2111 }
2112 
2113