xref: /openbmc/qemu/migration/migration.c (revision 64175afc695c0672876fbbfc31b299c86d562cb4)
1 /*
2  * QEMU live migration
3  *
4  * Copyright IBM, Corp. 2008
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  * Contributions after 2012-01-13 are licensed under the terms of the
13  * GNU GPL, version 2 or (at your option) any later version.
14  */
15 
16 #include "qemu/osdep.h"
17 #include "qemu/cutils.h"
18 #include "qemu/error-report.h"
19 #include "qemu/main-loop.h"
20 #include "migration/blocker.h"
21 #include "exec.h"
22 #include "fd.h"
23 #include "socket.h"
24 #include "rdma.h"
25 #include "ram.h"
26 #include "migration/migration.h"
27 #include "savevm.h"
28 #include "qemu-file-channel.h"
29 #include "qemu-file.h"
30 #include "migration/vmstate.h"
31 #include "sysemu/sysemu.h"
32 #include "block/block.h"
33 #include "qapi/qmp/qerror.h"
34 #include "qapi/util.h"
35 #include "qemu/sockets.h"
36 #include "qemu/rcu.h"
37 #include "block.h"
38 #include "postcopy-ram.h"
39 #include "qemu/thread.h"
40 #include "qmp-commands.h"
41 #include "trace.h"
42 #include "qapi-event.h"
43 #include "qom/cpu.h"
44 #include "exec/memory.h"
45 #include "exec/address-spaces.h"
46 #include "exec/target_page.h"
47 #include "io/channel-buffer.h"
48 #include "migration/colo.h"
49 
50 #define MAX_THROTTLE  (32 << 20)      /* Migration transfer speed throttling */
51 
52 /* Amount of time to allocate to each "chunk" of bandwidth-throttled
53  * data. */
54 #define BUFFER_DELAY     100
55 #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY)
56 
57 /* Time in milliseconds we are allowed to stop the source,
58  * for sending the last part */
59 #define DEFAULT_MIGRATE_SET_DOWNTIME 300
60 
61 /* Maximum migrate downtime set to 2000 seconds */
62 #define MAX_MIGRATE_DOWNTIME_SECONDS 2000
63 #define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000)
64 
65 /* Default compression thread count */
66 #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
67 /* Default decompression thread count, usually decompression is at
68  * least 4 times as fast as compression.*/
69 #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2
70 /*0: means nocompress, 1: best speed, ... 9: best compress ratio */
71 #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1
72 /* Define default autoconverge cpu throttle migration parameters */
73 #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20
74 #define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10
75 
76 /* Migration XBZRLE default cache size */
77 #define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024)
78 
79 /* The delay time (in ms) between two COLO checkpoints
80  * Note: Please change this default value to 10000 when we support hybrid mode.
81  */
82 #define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY 200
83 
84 static NotifierList migration_state_notifiers =
85     NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
86 
87 static bool deferred_incoming;
88 
89 /* When we add fault tolerance, we could have several
90    migrations at once.  For now we don't need to add
91    dynamic creation of migration */
92 
93 /* For outgoing */
94 MigrationState *migrate_get_current(void)
95 {
96     static bool once;
97     static MigrationState current_migration = {
98         .state = MIGRATION_STATUS_NONE,
99         .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE,
100         .mbps = -1,
101         .parameters = {
102             .compress_level = DEFAULT_MIGRATE_COMPRESS_LEVEL,
103             .compress_threads = DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT,
104             .decompress_threads = DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT,
105             .cpu_throttle_initial = DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL,
106             .cpu_throttle_increment = DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT,
107             .max_bandwidth = MAX_THROTTLE,
108             .downtime_limit = DEFAULT_MIGRATE_SET_DOWNTIME,
109             .x_checkpoint_delay = DEFAULT_MIGRATE_X_CHECKPOINT_DELAY,
110         },
111     };
112 
113     if (!once) {
114         current_migration.parameters.tls_creds = g_strdup("");
115         current_migration.parameters.tls_hostname = g_strdup("");
116         once = true;
117     }
118     return &current_migration;
119 }
120 
121 MigrationIncomingState *migration_incoming_get_current(void)
122 {
123     static bool once;
124     static MigrationIncomingState mis_current;
125 
126     if (!once) {
127         mis_current.state = MIGRATION_STATUS_NONE;
128         memset(&mis_current, 0, sizeof(MigrationIncomingState));
129         qemu_mutex_init(&mis_current.rp_mutex);
130         qemu_event_init(&mis_current.main_thread_load_event, false);
131         once = true;
132     }
133     return &mis_current;
134 }
135 
136 void migration_incoming_state_destroy(void)
137 {
138     struct MigrationIncomingState *mis = migration_incoming_get_current();
139 
140     if (mis->to_src_file) {
141         /* Tell source that we are done */
142         migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
143         qemu_fclose(mis->to_src_file);
144         mis->to_src_file = NULL;
145     }
146 
147     if (mis->from_src_file) {
148         qemu_fclose(mis->from_src_file);
149         mis->from_src_file = NULL;
150     }
151 
152     qemu_event_destroy(&mis->main_thread_load_event);
153 }
154 
155 
156 typedef struct {
157     bool optional;
158     uint32_t size;
159     uint8_t runstate[100];
160     RunState state;
161     bool received;
162 } GlobalState;
163 
164 static GlobalState global_state;
165 
166 int global_state_store(void)
167 {
168     if (!runstate_store((char *)global_state.runstate,
169                         sizeof(global_state.runstate))) {
170         error_report("runstate name too big: %s", global_state.runstate);
171         trace_migrate_state_too_big();
172         return -EINVAL;
173     }
174     return 0;
175 }
176 
177 void global_state_store_running(void)
178 {
179     const char *state = RunState_lookup[RUN_STATE_RUNNING];
180     strncpy((char *)global_state.runstate,
181            state, sizeof(global_state.runstate));
182 }
183 
184 static bool global_state_received(void)
185 {
186     return global_state.received;
187 }
188 
189 static RunState global_state_get_runstate(void)
190 {
191     return global_state.state;
192 }
193 
194 void global_state_set_optional(void)
195 {
196     global_state.optional = true;
197 }
198 
199 static bool global_state_needed(void *opaque)
200 {
201     GlobalState *s = opaque;
202     char *runstate = (char *)s->runstate;
203 
204     /* If it is not optional, it is mandatory */
205 
206     if (s->optional == false) {
207         return true;
208     }
209 
210     /* If state is running or paused, it is not needed */
211 
212     if (strcmp(runstate, "running") == 0 ||
213         strcmp(runstate, "paused") == 0) {
214         return false;
215     }
216 
217     /* for any other state it is needed */
218     return true;
219 }
220 
221 static int global_state_post_load(void *opaque, int version_id)
222 {
223     GlobalState *s = opaque;
224     Error *local_err = NULL;
225     int r;
226     char *runstate = (char *)s->runstate;
227 
228     s->received = true;
229     trace_migrate_global_state_post_load(runstate);
230 
231     r = qapi_enum_parse(RunState_lookup, runstate, RUN_STATE__MAX,
232                                 -1, &local_err);
233 
234     if (r == -1) {
235         if (local_err) {
236             error_report_err(local_err);
237         }
238         return -EINVAL;
239     }
240     s->state = r;
241 
242     return 0;
243 }
244 
245 static void global_state_pre_save(void *opaque)
246 {
247     GlobalState *s = opaque;
248 
249     trace_migrate_global_state_pre_save((char *)s->runstate);
250     s->size = strlen((char *)s->runstate) + 1;
251 }
252 
253 static const VMStateDescription vmstate_globalstate = {
254     .name = "globalstate",
255     .version_id = 1,
256     .minimum_version_id = 1,
257     .post_load = global_state_post_load,
258     .pre_save = global_state_pre_save,
259     .needed = global_state_needed,
260     .fields = (VMStateField[]) {
261         VMSTATE_UINT32(size, GlobalState),
262         VMSTATE_BUFFER(runstate, GlobalState),
263         VMSTATE_END_OF_LIST()
264     },
265 };
266 
267 void register_global_state(void)
268 {
269     /* We would use it independently that we receive it */
270     strcpy((char *)&global_state.runstate, "");
271     global_state.received = false;
272     vmstate_register(NULL, 0, &vmstate_globalstate, &global_state);
273 }
274 
275 static void migrate_generate_event(int new_state)
276 {
277     if (migrate_use_events()) {
278         qapi_event_send_migration(new_state, &error_abort);
279     }
280 }
281 
282 /*
283  * Called on -incoming with a defer: uri.
284  * The migration can be started later after any parameters have been
285  * changed.
286  */
287 static void deferred_incoming_migration(Error **errp)
288 {
289     if (deferred_incoming) {
290         error_setg(errp, "Incoming migration already deferred");
291     }
292     deferred_incoming = true;
293 }
294 
295 /* Request a range of pages from the source VM at the given
296  * start address.
297  *   rbname: Name of the RAMBlock to request the page in, if NULL it's the same
298  *           as the last request (a name must have been given previously)
299  *   Start: Address offset within the RB
300  *   Len: Length in bytes required - must be a multiple of pagesize
301  */
302 void migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rbname,
303                                ram_addr_t start, size_t len)
304 {
305     uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */
306     size_t msglen = 12; /* start + len */
307 
308     *(uint64_t *)bufc = cpu_to_be64((uint64_t)start);
309     *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len);
310 
311     if (rbname) {
312         int rbname_len = strlen(rbname);
313         assert(rbname_len < 256);
314 
315         bufc[msglen++] = rbname_len;
316         memcpy(bufc + msglen, rbname, rbname_len);
317         msglen += rbname_len;
318         migrate_send_rp_message(mis, MIG_RP_MSG_REQ_PAGES_ID, msglen, bufc);
319     } else {
320         migrate_send_rp_message(mis, MIG_RP_MSG_REQ_PAGES, msglen, bufc);
321     }
322 }
323 
324 void qemu_start_incoming_migration(const char *uri, Error **errp)
325 {
326     const char *p;
327 
328     qapi_event_send_migration(MIGRATION_STATUS_SETUP, &error_abort);
329     if (!strcmp(uri, "defer")) {
330         deferred_incoming_migration(errp);
331     } else if (strstart(uri, "tcp:", &p)) {
332         tcp_start_incoming_migration(p, errp);
333 #ifdef CONFIG_RDMA
334     } else if (strstart(uri, "rdma:", &p)) {
335         rdma_start_incoming_migration(p, errp);
336 #endif
337     } else if (strstart(uri, "exec:", &p)) {
338         exec_start_incoming_migration(p, errp);
339     } else if (strstart(uri, "unix:", &p)) {
340         unix_start_incoming_migration(p, errp);
341     } else if (strstart(uri, "fd:", &p)) {
342         fd_start_incoming_migration(p, errp);
343     } else {
344         error_setg(errp, "unknown migration protocol: %s", uri);
345     }
346 }
347 
348 static void process_incoming_migration_bh(void *opaque)
349 {
350     Error *local_err = NULL;
351     MigrationIncomingState *mis = opaque;
352 
353     /* Make sure all file formats flush their mutable metadata.
354      * If we get an error here, just don't restart the VM yet. */
355     bdrv_invalidate_cache_all(&local_err);
356     if (local_err) {
357         error_report_err(local_err);
358         local_err = NULL;
359         autostart = false;
360     }
361 
362     /*
363      * This must happen after all error conditions are dealt with and
364      * we're sure the VM is going to be running on this host.
365      */
366     qemu_announce_self();
367 
368     /* If global state section was not received or we are in running
369        state, we need to obey autostart. Any other state is set with
370        runstate_set. */
371 
372     if (!global_state_received() ||
373         global_state_get_runstate() == RUN_STATE_RUNNING) {
374         if (autostart) {
375             vm_start();
376         } else {
377             runstate_set(RUN_STATE_PAUSED);
378         }
379     } else {
380         runstate_set(global_state_get_runstate());
381     }
382     migrate_decompress_threads_join();
383     /*
384      * This must happen after any state changes since as soon as an external
385      * observer sees this event they might start to prod at the VM assuming
386      * it's ready to use.
387      */
388     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
389                       MIGRATION_STATUS_COMPLETED);
390     qemu_bh_delete(mis->bh);
391     migration_incoming_state_destroy();
392 }
393 
394 static void process_incoming_migration_co(void *opaque)
395 {
396     QEMUFile *f = opaque;
397     MigrationIncomingState *mis = migration_incoming_get_current();
398     PostcopyState ps;
399     int ret;
400 
401     mis->from_src_file = f;
402     mis->largest_page_size = qemu_ram_pagesize_largest();
403     postcopy_state_set(POSTCOPY_INCOMING_NONE);
404     migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
405                       MIGRATION_STATUS_ACTIVE);
406     ret = qemu_loadvm_state(f);
407 
408     ps = postcopy_state_get();
409     trace_process_incoming_migration_co_end(ret, ps);
410     if (ps != POSTCOPY_INCOMING_NONE) {
411         if (ps == POSTCOPY_INCOMING_ADVISE) {
412             /*
413              * Where a migration had postcopy enabled (and thus went to advise)
414              * but managed to complete within the precopy period, we can use
415              * the normal exit.
416              */
417             postcopy_ram_incoming_cleanup(mis);
418         } else if (ret >= 0) {
419             /*
420              * Postcopy was started, cleanup should happen at the end of the
421              * postcopy thread.
422              */
423             trace_process_incoming_migration_co_postcopy_end_main();
424             return;
425         }
426         /* Else if something went wrong then just fall out of the normal exit */
427     }
428 
429     /* we get COLO info, and know if we are in COLO mode */
430     if (!ret && migration_incoming_enable_colo()) {
431         mis->migration_incoming_co = qemu_coroutine_self();
432         qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
433              colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
434         mis->have_colo_incoming_thread = true;
435         qemu_coroutine_yield();
436 
437         /* Wait checkpoint incoming thread exit before free resource */
438         qemu_thread_join(&mis->colo_incoming_thread);
439     }
440 
441     if (ret < 0) {
442         migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
443                           MIGRATION_STATUS_FAILED);
444         error_report("load of migration failed: %s", strerror(-ret));
445         migrate_decompress_threads_join();
446         exit(EXIT_FAILURE);
447     }
448 
449     free_xbzrle_decoded_buf();
450 
451     mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
452     qemu_bh_schedule(mis->bh);
453 }
454 
455 void migration_fd_process_incoming(QEMUFile *f)
456 {
457     Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, f);
458 
459     migrate_decompress_threads_create();
460     qemu_file_set_blocking(f, false);
461     qemu_coroutine_enter(co);
462 }
463 
464 /*
465  * Send a message on the return channel back to the source
466  * of the migration.
467  */
468 void migrate_send_rp_message(MigrationIncomingState *mis,
469                              enum mig_rp_message_type message_type,
470                              uint16_t len, void *data)
471 {
472     trace_migrate_send_rp_message((int)message_type, len);
473     qemu_mutex_lock(&mis->rp_mutex);
474     qemu_put_be16(mis->to_src_file, (unsigned int)message_type);
475     qemu_put_be16(mis->to_src_file, len);
476     qemu_put_buffer(mis->to_src_file, data, len);
477     qemu_fflush(mis->to_src_file);
478     qemu_mutex_unlock(&mis->rp_mutex);
479 }
480 
481 /*
482  * Send a 'SHUT' message on the return channel with the given value
483  * to indicate that we've finished with the RP.  Non-0 value indicates
484  * error.
485  */
486 void migrate_send_rp_shut(MigrationIncomingState *mis,
487                           uint32_t value)
488 {
489     uint32_t buf;
490 
491     buf = cpu_to_be32(value);
492     migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf);
493 }
494 
495 /*
496  * Send a 'PONG' message on the return channel with the given value
497  * (normally in response to a 'PING')
498  */
499 void migrate_send_rp_pong(MigrationIncomingState *mis,
500                           uint32_t value)
501 {
502     uint32_t buf;
503 
504     buf = cpu_to_be32(value);
505     migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf);
506 }
507 
508 MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp)
509 {
510     MigrationCapabilityStatusList *head = NULL;
511     MigrationCapabilityStatusList *caps;
512     MigrationState *s = migrate_get_current();
513     int i;
514 
515     caps = NULL; /* silence compiler warning */
516     for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
517 #ifndef CONFIG_LIVE_BLOCK_MIGRATION
518         if (i == MIGRATION_CAPABILITY_BLOCK) {
519             continue;
520         }
521 #endif
522         if (i == MIGRATION_CAPABILITY_X_COLO && !colo_supported()) {
523             continue;
524         }
525         if (head == NULL) {
526             head = g_malloc0(sizeof(*caps));
527             caps = head;
528         } else {
529             caps->next = g_malloc0(sizeof(*caps));
530             caps = caps->next;
531         }
532         caps->value =
533             g_malloc(sizeof(*caps->value));
534         caps->value->capability = i;
535         caps->value->state = s->enabled_capabilities[i];
536     }
537 
538     return head;
539 }
540 
541 MigrationParameters *qmp_query_migrate_parameters(Error **errp)
542 {
543     MigrationParameters *params;
544     MigrationState *s = migrate_get_current();
545 
546     params = g_malloc0(sizeof(*params));
547     params->has_compress_level = true;
548     params->compress_level = s->parameters.compress_level;
549     params->has_compress_threads = true;
550     params->compress_threads = s->parameters.compress_threads;
551     params->has_decompress_threads = true;
552     params->decompress_threads = s->parameters.decompress_threads;
553     params->has_cpu_throttle_initial = true;
554     params->cpu_throttle_initial = s->parameters.cpu_throttle_initial;
555     params->has_cpu_throttle_increment = true;
556     params->cpu_throttle_increment = s->parameters.cpu_throttle_increment;
557     params->has_tls_creds = !!s->parameters.tls_creds;
558     params->tls_creds = g_strdup(s->parameters.tls_creds);
559     params->has_tls_hostname = !!s->parameters.tls_hostname;
560     params->tls_hostname = g_strdup(s->parameters.tls_hostname);
561     params->has_max_bandwidth = true;
562     params->max_bandwidth = s->parameters.max_bandwidth;
563     params->has_downtime_limit = true;
564     params->downtime_limit = s->parameters.downtime_limit;
565     params->has_x_checkpoint_delay = true;
566     params->x_checkpoint_delay = s->parameters.x_checkpoint_delay;
567     params->has_block_incremental = true;
568     params->block_incremental = s->parameters.block_incremental;
569 
570     return params;
571 }
572 
573 /*
574  * Return true if we're already in the middle of a migration
575  * (i.e. any of the active or setup states)
576  */
577 static bool migration_is_setup_or_active(int state)
578 {
579     switch (state) {
580     case MIGRATION_STATUS_ACTIVE:
581     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
582     case MIGRATION_STATUS_SETUP:
583         return true;
584 
585     default:
586         return false;
587 
588     }
589 }
590 
591 static void populate_ram_info(MigrationInfo *info, MigrationState *s)
592 {
593     info->has_ram = true;
594     info->ram = g_malloc0(sizeof(*info->ram));
595     info->ram->transferred = ram_counters.transferred;
596     info->ram->total = ram_bytes_total();
597     info->ram->duplicate = ram_counters.duplicate;
598     /* legacy value.  It is not used anymore */
599     info->ram->skipped = 0;
600     info->ram->normal = ram_counters.normal;
601     info->ram->normal_bytes = ram_counters.normal *
602         qemu_target_page_size();
603     info->ram->mbps = s->mbps;
604     info->ram->dirty_sync_count = ram_counters.dirty_sync_count;
605     info->ram->postcopy_requests = ram_counters.postcopy_requests;
606     info->ram->page_size = qemu_target_page_size();
607 
608     if (migrate_use_xbzrle()) {
609         info->has_xbzrle_cache = true;
610         info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache));
611         info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size();
612         info->xbzrle_cache->bytes = xbzrle_counters.bytes;
613         info->xbzrle_cache->pages = xbzrle_counters.pages;
614         info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss;
615         info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate;
616         info->xbzrle_cache->overflow = xbzrle_counters.overflow;
617     }
618 
619     if (cpu_throttle_active()) {
620         info->has_cpu_throttle_percentage = true;
621         info->cpu_throttle_percentage = cpu_throttle_get_percentage();
622     }
623 
624     if (s->state != MIGRATION_STATUS_COMPLETED) {
625         info->ram->remaining = ram_bytes_remaining();
626         info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate;
627     }
628 }
629 
630 MigrationInfo *qmp_query_migrate(Error **errp)
631 {
632     MigrationInfo *info = g_malloc0(sizeof(*info));
633     MigrationState *s = migrate_get_current();
634 
635     switch (s->state) {
636     case MIGRATION_STATUS_NONE:
637         /* no migration has happened ever */
638         break;
639     case MIGRATION_STATUS_SETUP:
640         info->has_status = true;
641         info->has_total_time = false;
642         break;
643     case MIGRATION_STATUS_ACTIVE:
644     case MIGRATION_STATUS_CANCELLING:
645         info->has_status = true;
646         info->has_total_time = true;
647         info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
648             - s->total_time;
649         info->has_expected_downtime = true;
650         info->expected_downtime = s->expected_downtime;
651         info->has_setup_time = true;
652         info->setup_time = s->setup_time;
653 
654         populate_ram_info(info, s);
655 
656         if (blk_mig_active()) {
657             info->has_disk = true;
658             info->disk = g_malloc0(sizeof(*info->disk));
659             info->disk->transferred = blk_mig_bytes_transferred();
660             info->disk->remaining = blk_mig_bytes_remaining();
661             info->disk->total = blk_mig_bytes_total();
662         }
663 
664         break;
665     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
666         /* Mostly the same as active; TODO add some postcopy stats */
667         info->has_status = true;
668         info->has_total_time = true;
669         info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
670             - s->total_time;
671         info->has_expected_downtime = true;
672         info->expected_downtime = s->expected_downtime;
673         info->has_setup_time = true;
674         info->setup_time = s->setup_time;
675 
676         populate_ram_info(info, s);
677 
678         if (blk_mig_active()) {
679             info->has_disk = true;
680             info->disk = g_malloc0(sizeof(*info->disk));
681             info->disk->transferred = blk_mig_bytes_transferred();
682             info->disk->remaining = blk_mig_bytes_remaining();
683             info->disk->total = blk_mig_bytes_total();
684         }
685 
686         break;
687     case MIGRATION_STATUS_COLO:
688         info->has_status = true;
689         /* TODO: display COLO specific information (checkpoint info etc.) */
690         break;
691     case MIGRATION_STATUS_COMPLETED:
692         info->has_status = true;
693         info->has_total_time = true;
694         info->total_time = s->total_time;
695         info->has_downtime = true;
696         info->downtime = s->downtime;
697         info->has_setup_time = true;
698         info->setup_time = s->setup_time;
699 
700         populate_ram_info(info, s);
701         break;
702     case MIGRATION_STATUS_FAILED:
703         info->has_status = true;
704         if (s->error) {
705             info->has_error_desc = true;
706             info->error_desc = g_strdup(error_get_pretty(s->error));
707         }
708         break;
709     case MIGRATION_STATUS_CANCELLED:
710         info->has_status = true;
711         break;
712     }
713     info->status = s->state;
714 
715     return info;
716 }
717 
718 void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
719                                   Error **errp)
720 {
721     MigrationState *s = migrate_get_current();
722     MigrationCapabilityStatusList *cap;
723     bool old_postcopy_cap = migrate_postcopy_ram();
724 
725     if (migration_is_setup_or_active(s->state)) {
726         error_setg(errp, QERR_MIGRATION_ACTIVE);
727         return;
728     }
729 
730     for (cap = params; cap; cap = cap->next) {
731 #ifndef CONFIG_LIVE_BLOCK_MIGRATION
732         if (cap->value->capability == MIGRATION_CAPABILITY_BLOCK
733             && cap->value->state) {
734             error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) "
735                        "block migration");
736             error_append_hint(errp, "Use drive_mirror+NBD instead.\n");
737             continue;
738         }
739 #endif
740         if (cap->value->capability == MIGRATION_CAPABILITY_X_COLO) {
741             if (!colo_supported()) {
742                 error_setg(errp, "COLO is not currently supported, please"
743                              " configure with --enable-colo option in order to"
744                              " support COLO feature");
745                 continue;
746             }
747         }
748         s->enabled_capabilities[cap->value->capability] = cap->value->state;
749     }
750 
751     if (migrate_postcopy_ram()) {
752         if (migrate_use_compression()) {
753             /* The decompression threads asynchronously write into RAM
754              * rather than use the atomic copies needed to avoid
755              * userfaulting.  It should be possible to fix the decompression
756              * threads for compatibility in future.
757              */
758             error_report("Postcopy is not currently compatible with "
759                          "compression");
760             s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM] =
761                 false;
762         }
763         /* This check is reasonably expensive, so only when it's being
764          * set the first time, also it's only the destination that needs
765          * special support.
766          */
767         if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) &&
768             !postcopy_ram_supported_by_host()) {
769             /* postcopy_ram_supported_by_host will have emitted a more
770              * detailed message
771              */
772             error_report("Postcopy is not supported");
773             s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM] =
774                 false;
775         }
776     }
777 }
778 
779 void qmp_migrate_set_parameters(MigrationParameters *params, Error **errp)
780 {
781     MigrationState *s = migrate_get_current();
782 
783     if (params->has_compress_level &&
784         (params->compress_level < 0 || params->compress_level > 9)) {
785         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level",
786                    "is invalid, it should be in the range of 0 to 9");
787         return;
788     }
789     if (params->has_compress_threads &&
790         (params->compress_threads < 1 || params->compress_threads > 255)) {
791         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
792                    "compress_threads",
793                    "is invalid, it should be in the range of 1 to 255");
794         return;
795     }
796     if (params->has_decompress_threads &&
797         (params->decompress_threads < 1 || params->decompress_threads > 255)) {
798         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
799                    "decompress_threads",
800                    "is invalid, it should be in the range of 1 to 255");
801         return;
802     }
803     if (params->has_cpu_throttle_initial &&
804         (params->cpu_throttle_initial < 1 ||
805          params->cpu_throttle_initial > 99)) {
806         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
807                    "cpu_throttle_initial",
808                    "an integer in the range of 1 to 99");
809         return;
810     }
811     if (params->has_cpu_throttle_increment &&
812         (params->cpu_throttle_increment < 1 ||
813          params->cpu_throttle_increment > 99)) {
814         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
815                    "cpu_throttle_increment",
816                    "an integer in the range of 1 to 99");
817         return;
818     }
819     if (params->has_max_bandwidth &&
820         (params->max_bandwidth < 0 || params->max_bandwidth > SIZE_MAX)) {
821         error_setg(errp, "Parameter 'max_bandwidth' expects an integer in the"
822                          " range of 0 to %zu bytes/second", SIZE_MAX);
823         return;
824     }
825     if (params->has_downtime_limit &&
826         (params->downtime_limit < 0 ||
827          params->downtime_limit > MAX_MIGRATE_DOWNTIME)) {
828         error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
829                          "the range of 0 to %d milliseconds",
830                          MAX_MIGRATE_DOWNTIME);
831         return;
832     }
833     if (params->has_x_checkpoint_delay && (params->x_checkpoint_delay < 0)) {
834         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
835                     "x_checkpoint_delay",
836                     "is invalid, it should be positive");
837     }
838 
839     if (params->has_compress_level) {
840         s->parameters.compress_level = params->compress_level;
841     }
842     if (params->has_compress_threads) {
843         s->parameters.compress_threads = params->compress_threads;
844     }
845     if (params->has_decompress_threads) {
846         s->parameters.decompress_threads = params->decompress_threads;
847     }
848     if (params->has_cpu_throttle_initial) {
849         s->parameters.cpu_throttle_initial = params->cpu_throttle_initial;
850     }
851     if (params->has_cpu_throttle_increment) {
852         s->parameters.cpu_throttle_increment = params->cpu_throttle_increment;
853     }
854     if (params->has_tls_creds) {
855         g_free(s->parameters.tls_creds);
856         s->parameters.tls_creds = g_strdup(params->tls_creds);
857     }
858     if (params->has_tls_hostname) {
859         g_free(s->parameters.tls_hostname);
860         s->parameters.tls_hostname = g_strdup(params->tls_hostname);
861     }
862     if (params->has_max_bandwidth) {
863         s->parameters.max_bandwidth = params->max_bandwidth;
864         if (s->to_dst_file) {
865             qemu_file_set_rate_limit(s->to_dst_file,
866                                 s->parameters.max_bandwidth / XFER_LIMIT_RATIO);
867         }
868     }
869     if (params->has_downtime_limit) {
870         s->parameters.downtime_limit = params->downtime_limit;
871     }
872 
873     if (params->has_x_checkpoint_delay) {
874         s->parameters.x_checkpoint_delay = params->x_checkpoint_delay;
875         if (migration_in_colo_state()) {
876             colo_checkpoint_notify(s);
877         }
878     }
879     if (params->has_block_incremental) {
880         s->parameters.block_incremental = params->block_incremental;
881     }
882 }
883 
884 
885 void qmp_migrate_start_postcopy(Error **errp)
886 {
887     MigrationState *s = migrate_get_current();
888 
889     if (!migrate_postcopy_ram()) {
890         error_setg(errp, "Enable postcopy with migrate_set_capability before"
891                          " the start of migration");
892         return;
893     }
894 
895     if (s->state == MIGRATION_STATUS_NONE) {
896         error_setg(errp, "Postcopy must be started after migration has been"
897                          " started");
898         return;
899     }
900     /*
901      * we don't error if migration has finished since that would be racy
902      * with issuing this command.
903      */
904     atomic_set(&s->start_postcopy, true);
905 }
906 
907 /* shared migration helpers */
908 
909 void migrate_set_state(int *state, int old_state, int new_state)
910 {
911     if (atomic_cmpxchg(state, old_state, new_state) == old_state) {
912         trace_migrate_set_state(new_state);
913         migrate_generate_event(new_state);
914     }
915 }
916 
917 void migrate_set_block_enabled(bool value, Error **errp)
918 {
919     MigrationCapabilityStatusList *cap;
920 
921     cap = g_new0(MigrationCapabilityStatusList, 1);
922     cap->value = g_new0(MigrationCapabilityStatus, 1);
923     cap->value->capability = MIGRATION_CAPABILITY_BLOCK;
924     cap->value->state = value;
925     qmp_migrate_set_capabilities(cap, errp);
926     qapi_free_MigrationCapabilityStatusList(cap);
927 }
928 
929 static void migrate_set_block_incremental(MigrationState *s, bool value)
930 {
931     s->parameters.block_incremental = value;
932 }
933 
934 static void block_cleanup_parameters(MigrationState *s)
935 {
936     if (s->must_remove_block_options) {
937         /* setting to false can never fail */
938         migrate_set_block_enabled(false, &error_abort);
939         migrate_set_block_incremental(s, false);
940         s->must_remove_block_options = false;
941     }
942 }
943 
944 static void migrate_fd_cleanup(void *opaque)
945 {
946     MigrationState *s = opaque;
947 
948     qemu_bh_delete(s->cleanup_bh);
949     s->cleanup_bh = NULL;
950 
951     if (s->to_dst_file) {
952         trace_migrate_fd_cleanup();
953         qemu_mutex_unlock_iothread();
954         if (s->migration_thread_running) {
955             qemu_thread_join(&s->thread);
956             s->migration_thread_running = false;
957         }
958         qemu_mutex_lock_iothread();
959 
960         migrate_compress_threads_join();
961         qemu_fclose(s->to_dst_file);
962         s->to_dst_file = NULL;
963     }
964 
965     assert((s->state != MIGRATION_STATUS_ACTIVE) &&
966            (s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE));
967 
968     if (s->state == MIGRATION_STATUS_CANCELLING) {
969         migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING,
970                           MIGRATION_STATUS_CANCELLED);
971     }
972 
973     notifier_list_notify(&migration_state_notifiers, s);
974     block_cleanup_parameters(s);
975 }
976 
977 void migrate_fd_error(MigrationState *s, const Error *error)
978 {
979     trace_migrate_fd_error(error_get_pretty(error));
980     assert(s->to_dst_file == NULL);
981     migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
982                       MIGRATION_STATUS_FAILED);
983     if (!s->error) {
984         s->error = error_copy(error);
985     }
986     notifier_list_notify(&migration_state_notifiers, s);
987     block_cleanup_parameters(s);
988 }
989 
990 static void migrate_fd_cancel(MigrationState *s)
991 {
992     int old_state ;
993     QEMUFile *f = migrate_get_current()->to_dst_file;
994     trace_migrate_fd_cancel();
995 
996     if (s->rp_state.from_dst_file) {
997         /* shutdown the rp socket, so causing the rp thread to shutdown */
998         qemu_file_shutdown(s->rp_state.from_dst_file);
999     }
1000 
1001     do {
1002         old_state = s->state;
1003         if (!migration_is_setup_or_active(old_state)) {
1004             break;
1005         }
1006         migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING);
1007     } while (s->state != MIGRATION_STATUS_CANCELLING);
1008 
1009     /*
1010      * If we're unlucky the migration code might be stuck somewhere in a
1011      * send/write while the network has failed and is waiting to timeout;
1012      * if we've got shutdown(2) available then we can force it to quit.
1013      * The outgoing qemu file gets closed in migrate_fd_cleanup that is
1014      * called in a bh, so there is no race against this cancel.
1015      */
1016     if (s->state == MIGRATION_STATUS_CANCELLING && f) {
1017         qemu_file_shutdown(f);
1018     }
1019     if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) {
1020         Error *local_err = NULL;
1021 
1022         bdrv_invalidate_cache_all(&local_err);
1023         if (local_err) {
1024             error_report_err(local_err);
1025         } else {
1026             s->block_inactive = false;
1027         }
1028     }
1029     block_cleanup_parameters(s);
1030 }
1031 
1032 void add_migration_state_change_notifier(Notifier *notify)
1033 {
1034     notifier_list_add(&migration_state_notifiers, notify);
1035 }
1036 
1037 void remove_migration_state_change_notifier(Notifier *notify)
1038 {
1039     notifier_remove(notify);
1040 }
1041 
1042 bool migration_in_setup(MigrationState *s)
1043 {
1044     return s->state == MIGRATION_STATUS_SETUP;
1045 }
1046 
1047 bool migration_has_finished(MigrationState *s)
1048 {
1049     return s->state == MIGRATION_STATUS_COMPLETED;
1050 }
1051 
1052 bool migration_has_failed(MigrationState *s)
1053 {
1054     return (s->state == MIGRATION_STATUS_CANCELLED ||
1055             s->state == MIGRATION_STATUS_FAILED);
1056 }
1057 
1058 bool migration_in_postcopy(void)
1059 {
1060     MigrationState *s = migrate_get_current();
1061 
1062     return (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
1063 }
1064 
1065 bool migration_in_postcopy_after_devices(MigrationState *s)
1066 {
1067     return migration_in_postcopy() && s->postcopy_after_devices;
1068 }
1069 
1070 bool migration_is_idle(void)
1071 {
1072     MigrationState *s = migrate_get_current();
1073 
1074     switch (s->state) {
1075     case MIGRATION_STATUS_NONE:
1076     case MIGRATION_STATUS_CANCELLED:
1077     case MIGRATION_STATUS_COMPLETED:
1078     case MIGRATION_STATUS_FAILED:
1079         return true;
1080     case MIGRATION_STATUS_SETUP:
1081     case MIGRATION_STATUS_CANCELLING:
1082     case MIGRATION_STATUS_ACTIVE:
1083     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1084     case MIGRATION_STATUS_COLO:
1085         return false;
1086     case MIGRATION_STATUS__MAX:
1087         g_assert_not_reached();
1088     }
1089 
1090     return false;
1091 }
1092 
1093 MigrationState *migrate_init(void)
1094 {
1095     MigrationState *s = migrate_get_current();
1096 
1097     /*
1098      * Reinitialise all migration state, except
1099      * parameters/capabilities that the user set, and
1100      * locks.
1101      */
1102     s->bytes_xfer = 0;
1103     s->xfer_limit = 0;
1104     s->cleanup_bh = 0;
1105     s->to_dst_file = NULL;
1106     s->state = MIGRATION_STATUS_NONE;
1107     s->rp_state.from_dst_file = NULL;
1108     s->rp_state.error = false;
1109     s->mbps = 0.0;
1110     s->downtime = 0;
1111     s->expected_downtime = 0;
1112     s->setup_time = 0;
1113     s->start_postcopy = false;
1114     s->postcopy_after_devices = false;
1115     s->migration_thread_running = false;
1116     error_free(s->error);
1117     s->error = NULL;
1118 
1119     migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
1120 
1121     s->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1122     return s;
1123 }
1124 
1125 static GSList *migration_blockers;
1126 
1127 int migrate_add_blocker(Error *reason, Error **errp)
1128 {
1129     if (only_migratable) {
1130         error_propagate(errp, error_copy(reason));
1131         error_prepend(errp, "disallowing migration blocker "
1132                           "(--only_migratable) for: ");
1133         return -EACCES;
1134     }
1135 
1136     if (migration_is_idle()) {
1137         migration_blockers = g_slist_prepend(migration_blockers, reason);
1138         return 0;
1139     }
1140 
1141     error_propagate(errp, error_copy(reason));
1142     error_prepend(errp, "disallowing migration blocker (migration in "
1143                       "progress) for: ");
1144     return -EBUSY;
1145 }
1146 
1147 void migrate_del_blocker(Error *reason)
1148 {
1149     migration_blockers = g_slist_remove(migration_blockers, reason);
1150 }
1151 
1152 void qmp_migrate_incoming(const char *uri, Error **errp)
1153 {
1154     Error *local_err = NULL;
1155     static bool once = true;
1156 
1157     if (!deferred_incoming) {
1158         error_setg(errp, "For use with '-incoming defer'");
1159         return;
1160     }
1161     if (!once) {
1162         error_setg(errp, "The incoming migration has already been started");
1163     }
1164 
1165     qemu_start_incoming_migration(uri, &local_err);
1166 
1167     if (local_err) {
1168         error_propagate(errp, local_err);
1169         return;
1170     }
1171 
1172     once = false;
1173 }
1174 
1175 bool migration_is_blocked(Error **errp)
1176 {
1177     if (qemu_savevm_state_blocked(errp)) {
1178         return true;
1179     }
1180 
1181     if (migration_blockers) {
1182         *errp = error_copy(migration_blockers->data);
1183         return true;
1184     }
1185 
1186     return false;
1187 }
1188 
1189 void qmp_migrate(const char *uri, bool has_blk, bool blk,
1190                  bool has_inc, bool inc, bool has_detach, bool detach,
1191                  Error **errp)
1192 {
1193     Error *local_err = NULL;
1194     MigrationState *s = migrate_get_current();
1195     const char *p;
1196 
1197     if (migration_is_setup_or_active(s->state) ||
1198         s->state == MIGRATION_STATUS_CANCELLING ||
1199         s->state == MIGRATION_STATUS_COLO) {
1200         error_setg(errp, QERR_MIGRATION_ACTIVE);
1201         return;
1202     }
1203     if (runstate_check(RUN_STATE_INMIGRATE)) {
1204         error_setg(errp, "Guest is waiting for an incoming migration");
1205         return;
1206     }
1207 
1208     if (migration_is_blocked(errp)) {
1209         return;
1210     }
1211 
1212     if ((has_blk && blk) || (has_inc && inc)) {
1213         if (migrate_use_block() || migrate_use_block_incremental()) {
1214             error_setg(errp, "Command options are incompatible with "
1215                        "current migration capabilities");
1216             return;
1217         }
1218         migrate_set_block_enabled(true, &local_err);
1219         if (local_err) {
1220             error_propagate(errp, local_err);
1221             return;
1222         }
1223         s->must_remove_block_options = true;
1224     }
1225 
1226     if (has_inc && inc) {
1227         migrate_set_block_incremental(s, true);
1228     }
1229 
1230     s = migrate_init();
1231 
1232     if (strstart(uri, "tcp:", &p)) {
1233         tcp_start_outgoing_migration(s, p, &local_err);
1234 #ifdef CONFIG_RDMA
1235     } else if (strstart(uri, "rdma:", &p)) {
1236         rdma_start_outgoing_migration(s, p, &local_err);
1237 #endif
1238     } else if (strstart(uri, "exec:", &p)) {
1239         exec_start_outgoing_migration(s, p, &local_err);
1240     } else if (strstart(uri, "unix:", &p)) {
1241         unix_start_outgoing_migration(s, p, &local_err);
1242     } else if (strstart(uri, "fd:", &p)) {
1243         fd_start_outgoing_migration(s, p, &local_err);
1244     } else {
1245         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri",
1246                    "a valid migration protocol");
1247         migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
1248                           MIGRATION_STATUS_FAILED);
1249         return;
1250     }
1251 
1252     if (local_err) {
1253         migrate_fd_error(s, local_err);
1254         error_propagate(errp, local_err);
1255         return;
1256     }
1257 }
1258 
1259 void qmp_migrate_cancel(Error **errp)
1260 {
1261     migrate_fd_cancel(migrate_get_current());
1262 }
1263 
1264 void qmp_migrate_set_cache_size(int64_t value, Error **errp)
1265 {
1266     MigrationState *s = migrate_get_current();
1267     int64_t new_size;
1268 
1269     /* Check for truncation */
1270     if (value != (size_t)value) {
1271         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
1272                    "exceeding address space");
1273         return;
1274     }
1275 
1276     /* Cache should not be larger than guest ram size */
1277     if (value > ram_bytes_total()) {
1278         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
1279                    "exceeds guest ram size ");
1280         return;
1281     }
1282 
1283     new_size = xbzrle_cache_resize(value);
1284     if (new_size < 0) {
1285         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
1286                    "is smaller than page size");
1287         return;
1288     }
1289 
1290     s->xbzrle_cache_size = new_size;
1291 }
1292 
1293 int64_t qmp_query_migrate_cache_size(Error **errp)
1294 {
1295     return migrate_xbzrle_cache_size();
1296 }
1297 
1298 void qmp_migrate_set_speed(int64_t value, Error **errp)
1299 {
1300     MigrationParameters p = {
1301         .has_max_bandwidth = true,
1302         .max_bandwidth = value,
1303     };
1304 
1305     qmp_migrate_set_parameters(&p, errp);
1306 }
1307 
1308 void qmp_migrate_set_downtime(double value, Error **errp)
1309 {
1310     if (value < 0 || value > MAX_MIGRATE_DOWNTIME_SECONDS) {
1311         error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
1312                          "the range of 0 to %d seconds",
1313                          MAX_MIGRATE_DOWNTIME_SECONDS);
1314         return;
1315     }
1316 
1317     value *= 1000; /* Convert to milliseconds */
1318     value = MAX(0, MIN(INT64_MAX, value));
1319 
1320     MigrationParameters p = {
1321         .has_downtime_limit = true,
1322         .downtime_limit = value,
1323     };
1324 
1325     qmp_migrate_set_parameters(&p, errp);
1326 }
1327 
1328 bool migrate_release_ram(void)
1329 {
1330     MigrationState *s;
1331 
1332     s = migrate_get_current();
1333 
1334     return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM];
1335 }
1336 
1337 bool migrate_postcopy_ram(void)
1338 {
1339     MigrationState *s;
1340 
1341     s = migrate_get_current();
1342 
1343     return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM];
1344 }
1345 
1346 bool migrate_auto_converge(void)
1347 {
1348     MigrationState *s;
1349 
1350     s = migrate_get_current();
1351 
1352     return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
1353 }
1354 
1355 bool migrate_zero_blocks(void)
1356 {
1357     MigrationState *s;
1358 
1359     s = migrate_get_current();
1360 
1361     return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
1362 }
1363 
1364 bool migrate_use_compression(void)
1365 {
1366     MigrationState *s;
1367 
1368     s = migrate_get_current();
1369 
1370     return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS];
1371 }
1372 
1373 int migrate_compress_level(void)
1374 {
1375     MigrationState *s;
1376 
1377     s = migrate_get_current();
1378 
1379     return s->parameters.compress_level;
1380 }
1381 
1382 int migrate_compress_threads(void)
1383 {
1384     MigrationState *s;
1385 
1386     s = migrate_get_current();
1387 
1388     return s->parameters.compress_threads;
1389 }
1390 
1391 int migrate_decompress_threads(void)
1392 {
1393     MigrationState *s;
1394 
1395     s = migrate_get_current();
1396 
1397     return s->parameters.decompress_threads;
1398 }
1399 
1400 bool migrate_use_events(void)
1401 {
1402     MigrationState *s;
1403 
1404     s = migrate_get_current();
1405 
1406     return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS];
1407 }
1408 
1409 int migrate_use_xbzrle(void)
1410 {
1411     MigrationState *s;
1412 
1413     s = migrate_get_current();
1414 
1415     return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
1416 }
1417 
1418 int64_t migrate_xbzrle_cache_size(void)
1419 {
1420     MigrationState *s;
1421 
1422     s = migrate_get_current();
1423 
1424     return s->xbzrle_cache_size;
1425 }
1426 
1427 bool migrate_use_block(void)
1428 {
1429     MigrationState *s;
1430 
1431     s = migrate_get_current();
1432 
1433     return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK];
1434 }
1435 
1436 bool migrate_use_block_incremental(void)
1437 {
1438     MigrationState *s;
1439 
1440     s = migrate_get_current();
1441 
1442     return s->parameters.block_incremental;
1443 }
1444 
1445 /* migration thread support */
1446 /*
1447  * Something bad happened to the RP stream, mark an error
1448  * The caller shall print or trace something to indicate why
1449  */
1450 static void mark_source_rp_bad(MigrationState *s)
1451 {
1452     s->rp_state.error = true;
1453 }
1454 
1455 static struct rp_cmd_args {
1456     ssize_t     len; /* -1 = variable */
1457     const char *name;
1458 } rp_cmd_args[] = {
1459     [MIG_RP_MSG_INVALID]        = { .len = -1, .name = "INVALID" },
1460     [MIG_RP_MSG_SHUT]           = { .len =  4, .name = "SHUT" },
1461     [MIG_RP_MSG_PONG]           = { .len =  4, .name = "PONG" },
1462     [MIG_RP_MSG_REQ_PAGES]      = { .len = 12, .name = "REQ_PAGES" },
1463     [MIG_RP_MSG_REQ_PAGES_ID]   = { .len = -1, .name = "REQ_PAGES_ID" },
1464     [MIG_RP_MSG_MAX]            = { .len = -1, .name = "MAX" },
1465 };
1466 
1467 /*
1468  * Process a request for pages received on the return path,
1469  * We're allowed to send more than requested (e.g. to round to our page size)
1470  * and we don't need to send pages that have already been sent.
1471  */
1472 static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
1473                                        ram_addr_t start, size_t len)
1474 {
1475     long our_host_ps = getpagesize();
1476 
1477     trace_migrate_handle_rp_req_pages(rbname, start, len);
1478 
1479     /*
1480      * Since we currently insist on matching page sizes, just sanity check
1481      * we're being asked for whole host pages.
1482      */
1483     if (start & (our_host_ps-1) ||
1484        (len & (our_host_ps-1))) {
1485         error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT
1486                      " len: %zd", __func__, start, len);
1487         mark_source_rp_bad(ms);
1488         return;
1489     }
1490 
1491     if (ram_save_queue_pages(rbname, start, len)) {
1492         mark_source_rp_bad(ms);
1493     }
1494 }
1495 
1496 /*
1497  * Handles messages sent on the return path towards the source VM
1498  *
1499  */
1500 static void *source_return_path_thread(void *opaque)
1501 {
1502     MigrationState *ms = opaque;
1503     QEMUFile *rp = ms->rp_state.from_dst_file;
1504     uint16_t header_len, header_type;
1505     uint8_t buf[512];
1506     uint32_t tmp32, sibling_error;
1507     ram_addr_t start = 0; /* =0 to silence warning */
1508     size_t  len = 0, expected_len;
1509     int res;
1510 
1511     trace_source_return_path_thread_entry();
1512     while (!ms->rp_state.error && !qemu_file_get_error(rp) &&
1513            migration_is_setup_or_active(ms->state)) {
1514         trace_source_return_path_thread_loop_top();
1515         header_type = qemu_get_be16(rp);
1516         header_len = qemu_get_be16(rp);
1517 
1518         if (header_type >= MIG_RP_MSG_MAX ||
1519             header_type == MIG_RP_MSG_INVALID) {
1520             error_report("RP: Received invalid message 0x%04x length 0x%04x",
1521                     header_type, header_len);
1522             mark_source_rp_bad(ms);
1523             goto out;
1524         }
1525 
1526         if ((rp_cmd_args[header_type].len != -1 &&
1527             header_len != rp_cmd_args[header_type].len) ||
1528             header_len > sizeof(buf)) {
1529             error_report("RP: Received '%s' message (0x%04x) with"
1530                     "incorrect length %d expecting %zu",
1531                     rp_cmd_args[header_type].name, header_type, header_len,
1532                     (size_t)rp_cmd_args[header_type].len);
1533             mark_source_rp_bad(ms);
1534             goto out;
1535         }
1536 
1537         /* We know we've got a valid header by this point */
1538         res = qemu_get_buffer(rp, buf, header_len);
1539         if (res != header_len) {
1540             error_report("RP: Failed reading data for message 0x%04x"
1541                          " read %d expected %d",
1542                          header_type, res, header_len);
1543             mark_source_rp_bad(ms);
1544             goto out;
1545         }
1546 
1547         /* OK, we have the message and the data */
1548         switch (header_type) {
1549         case MIG_RP_MSG_SHUT:
1550             sibling_error = ldl_be_p(buf);
1551             trace_source_return_path_thread_shut(sibling_error);
1552             if (sibling_error) {
1553                 error_report("RP: Sibling indicated error %d", sibling_error);
1554                 mark_source_rp_bad(ms);
1555             }
1556             /*
1557              * We'll let the main thread deal with closing the RP
1558              * we could do a shutdown(2) on it, but we're the only user
1559              * anyway, so there's nothing gained.
1560              */
1561             goto out;
1562 
1563         case MIG_RP_MSG_PONG:
1564             tmp32 = ldl_be_p(buf);
1565             trace_source_return_path_thread_pong(tmp32);
1566             break;
1567 
1568         case MIG_RP_MSG_REQ_PAGES:
1569             start = ldq_be_p(buf);
1570             len = ldl_be_p(buf + 8);
1571             migrate_handle_rp_req_pages(ms, NULL, start, len);
1572             break;
1573 
1574         case MIG_RP_MSG_REQ_PAGES_ID:
1575             expected_len = 12 + 1; /* header + termination */
1576 
1577             if (header_len >= expected_len) {
1578                 start = ldq_be_p(buf);
1579                 len = ldl_be_p(buf + 8);
1580                 /* Now we expect an idstr */
1581                 tmp32 = buf[12]; /* Length of the following idstr */
1582                 buf[13 + tmp32] = '\0';
1583                 expected_len += tmp32;
1584             }
1585             if (header_len != expected_len) {
1586                 error_report("RP: Req_Page_id with length %d expecting %zd",
1587                         header_len, expected_len);
1588                 mark_source_rp_bad(ms);
1589                 goto out;
1590             }
1591             migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len);
1592             break;
1593 
1594         default:
1595             break;
1596         }
1597     }
1598     if (qemu_file_get_error(rp)) {
1599         trace_source_return_path_thread_bad_end();
1600         mark_source_rp_bad(ms);
1601     }
1602 
1603     trace_source_return_path_thread_end();
1604 out:
1605     ms->rp_state.from_dst_file = NULL;
1606     qemu_fclose(rp);
1607     return NULL;
1608 }
1609 
1610 static int open_return_path_on_source(MigrationState *ms)
1611 {
1612 
1613     ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
1614     if (!ms->rp_state.from_dst_file) {
1615         return -1;
1616     }
1617 
1618     trace_open_return_path_on_source();
1619     qemu_thread_create(&ms->rp_state.rp_thread, "return path",
1620                        source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
1621 
1622     trace_open_return_path_on_source_continue();
1623 
1624     return 0;
1625 }
1626 
1627 /* Returns 0 if the RP was ok, otherwise there was an error on the RP */
1628 static int await_return_path_close_on_source(MigrationState *ms)
1629 {
1630     /*
1631      * If this is a normal exit then the destination will send a SHUT and the
1632      * rp_thread will exit, however if there's an error we need to cause
1633      * it to exit.
1634      */
1635     if (qemu_file_get_error(ms->to_dst_file) && ms->rp_state.from_dst_file) {
1636         /*
1637          * shutdown(2), if we have it, will cause it to unblock if it's stuck
1638          * waiting for the destination.
1639          */
1640         qemu_file_shutdown(ms->rp_state.from_dst_file);
1641         mark_source_rp_bad(ms);
1642     }
1643     trace_await_return_path_close_on_source_joining();
1644     qemu_thread_join(&ms->rp_state.rp_thread);
1645     trace_await_return_path_close_on_source_close();
1646     return ms->rp_state.error;
1647 }
1648 
1649 /*
1650  * Switch from normal iteration to postcopy
1651  * Returns non-0 on error
1652  */
1653 static int postcopy_start(MigrationState *ms, bool *old_vm_running)
1654 {
1655     int ret;
1656     QIOChannelBuffer *bioc;
1657     QEMUFile *fb;
1658     int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1659     bool restart_block = false;
1660     migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
1661                       MIGRATION_STATUS_POSTCOPY_ACTIVE);
1662 
1663     trace_postcopy_start();
1664     qemu_mutex_lock_iothread();
1665     trace_postcopy_start_set_run();
1666 
1667     qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
1668     *old_vm_running = runstate_is_running();
1669     global_state_store();
1670     ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
1671     if (ret < 0) {
1672         goto fail;
1673     }
1674 
1675     ret = bdrv_inactivate_all();
1676     if (ret < 0) {
1677         goto fail;
1678     }
1679     restart_block = true;
1680 
1681     /*
1682      * Cause any non-postcopiable, but iterative devices to
1683      * send out their final data.
1684      */
1685     qemu_savevm_state_complete_precopy(ms->to_dst_file, true);
1686 
1687     /*
1688      * in Finish migrate and with the io-lock held everything should
1689      * be quiet, but we've potentially still got dirty pages and we
1690      * need to tell the destination to throw any pages it's already received
1691      * that are dirty
1692      */
1693     if (ram_postcopy_send_discard_bitmap(ms)) {
1694         error_report("postcopy send discard bitmap failed");
1695         goto fail;
1696     }
1697 
1698     /*
1699      * send rest of state - note things that are doing postcopy
1700      * will notice we're in POSTCOPY_ACTIVE and not actually
1701      * wrap their state up here
1702      */
1703     qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX);
1704     /* Ping just for debugging, helps line traces up */
1705     qemu_savevm_send_ping(ms->to_dst_file, 2);
1706 
1707     /*
1708      * While loading the device state we may trigger page transfer
1709      * requests and the fd must be free to process those, and thus
1710      * the destination must read the whole device state off the fd before
1711      * it starts processing it.  Unfortunately the ad-hoc migration format
1712      * doesn't allow the destination to know the size to read without fully
1713      * parsing it through each devices load-state code (especially the open
1714      * coded devices that use get/put).
1715      * So we wrap the device state up in a package with a length at the start;
1716      * to do this we use a qemu_buf to hold the whole of the device state.
1717      */
1718     bioc = qio_channel_buffer_new(4096);
1719     qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer");
1720     fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc));
1721     object_unref(OBJECT(bioc));
1722 
1723     /*
1724      * Make sure the receiver can get incoming pages before we send the rest
1725      * of the state
1726      */
1727     qemu_savevm_send_postcopy_listen(fb);
1728 
1729     qemu_savevm_state_complete_precopy(fb, false);
1730     qemu_savevm_send_ping(fb, 3);
1731 
1732     qemu_savevm_send_postcopy_run(fb);
1733 
1734     /* <><> end of stuff going into the package */
1735 
1736     /* Last point of recovery; as soon as we send the package the destination
1737      * can open devices and potentially start running.
1738      * Lets just check again we've not got any errors.
1739      */
1740     ret = qemu_file_get_error(ms->to_dst_file);
1741     if (ret) {
1742         error_report("postcopy_start: Migration stream errored (pre package)");
1743         goto fail_closefb;
1744     }
1745 
1746     restart_block = false;
1747 
1748     /* Now send that blob */
1749     if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) {
1750         goto fail_closefb;
1751     }
1752     qemu_fclose(fb);
1753 
1754     /* Send a notify to give a chance for anything that needs to happen
1755      * at the transition to postcopy and after the device state; in particular
1756      * spice needs to trigger a transition now
1757      */
1758     ms->postcopy_after_devices = true;
1759     notifier_list_notify(&migration_state_notifiers, ms);
1760 
1761     ms->downtime =  qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop;
1762 
1763     qemu_mutex_unlock_iothread();
1764 
1765     /*
1766      * Although this ping is just for debug, it could potentially be
1767      * used for getting a better measurement of downtime at the source.
1768      */
1769     qemu_savevm_send_ping(ms->to_dst_file, 4);
1770 
1771     if (migrate_release_ram()) {
1772         ram_postcopy_migrated_memory_release(ms);
1773     }
1774 
1775     ret = qemu_file_get_error(ms->to_dst_file);
1776     if (ret) {
1777         error_report("postcopy_start: Migration stream errored");
1778         migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
1779                               MIGRATION_STATUS_FAILED);
1780     }
1781 
1782     return ret;
1783 
1784 fail_closefb:
1785     qemu_fclose(fb);
1786 fail:
1787     migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
1788                           MIGRATION_STATUS_FAILED);
1789     if (restart_block) {
1790         /* A failure happened early enough that we know the destination hasn't
1791          * accessed block devices, so we're safe to recover.
1792          */
1793         Error *local_err = NULL;
1794 
1795         bdrv_invalidate_cache_all(&local_err);
1796         if (local_err) {
1797             error_report_err(local_err);
1798         }
1799     }
1800     qemu_mutex_unlock_iothread();
1801     return -1;
1802 }
1803 
1804 /**
1805  * migration_completion: Used by migration_thread when there's not much left.
1806  *   The caller 'breaks' the loop when this returns.
1807  *
1808  * @s: Current migration state
1809  * @current_active_state: The migration state we expect to be in
1810  * @*old_vm_running: Pointer to old_vm_running flag
1811  * @*start_time: Pointer to time to update
1812  */
1813 static void migration_completion(MigrationState *s, int current_active_state,
1814                                  bool *old_vm_running,
1815                                  int64_t *start_time)
1816 {
1817     int ret;
1818 
1819     if (s->state == MIGRATION_STATUS_ACTIVE) {
1820         qemu_mutex_lock_iothread();
1821         *start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1822         qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
1823         *old_vm_running = runstate_is_running();
1824         ret = global_state_store();
1825 
1826         if (!ret) {
1827             ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
1828             /*
1829              * Don't mark the image with BDRV_O_INACTIVE flag if
1830              * we will go into COLO stage later.
1831              */
1832             if (ret >= 0 && !migrate_colo_enabled()) {
1833                 ret = bdrv_inactivate_all();
1834             }
1835             if (ret >= 0) {
1836                 qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
1837                 qemu_savevm_state_complete_precopy(s->to_dst_file, false);
1838                 s->block_inactive = true;
1839             }
1840         }
1841         qemu_mutex_unlock_iothread();
1842 
1843         if (ret < 0) {
1844             goto fail;
1845         }
1846     } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
1847         trace_migration_completion_postcopy_end();
1848 
1849         qemu_savevm_state_complete_postcopy(s->to_dst_file);
1850         trace_migration_completion_postcopy_end_after_complete();
1851     }
1852 
1853     /*
1854      * If rp was opened we must clean up the thread before
1855      * cleaning everything else up (since if there are no failures
1856      * it will wait for the destination to send it's status in
1857      * a SHUT command).
1858      * Postcopy opens rp if enabled (even if it's not avtivated)
1859      */
1860     if (migrate_postcopy_ram()) {
1861         int rp_error;
1862         trace_migration_completion_postcopy_end_before_rp();
1863         rp_error = await_return_path_close_on_source(s);
1864         trace_migration_completion_postcopy_end_after_rp(rp_error);
1865         if (rp_error) {
1866             goto fail_invalidate;
1867         }
1868     }
1869 
1870     if (qemu_file_get_error(s->to_dst_file)) {
1871         trace_migration_completion_file_err();
1872         goto fail_invalidate;
1873     }
1874 
1875     if (!migrate_colo_enabled()) {
1876         migrate_set_state(&s->state, current_active_state,
1877                           MIGRATION_STATUS_COMPLETED);
1878     }
1879 
1880     return;
1881 
1882 fail_invalidate:
1883     /* If not doing postcopy, vm_start() will be called: let's regain
1884      * control on images.
1885      */
1886     if (s->state == MIGRATION_STATUS_ACTIVE) {
1887         Error *local_err = NULL;
1888 
1889         qemu_mutex_lock_iothread();
1890         bdrv_invalidate_cache_all(&local_err);
1891         if (local_err) {
1892             error_report_err(local_err);
1893         } else {
1894             s->block_inactive = false;
1895         }
1896         qemu_mutex_unlock_iothread();
1897     }
1898 
1899 fail:
1900     migrate_set_state(&s->state, current_active_state,
1901                       MIGRATION_STATUS_FAILED);
1902 }
1903 
1904 bool migrate_colo_enabled(void)
1905 {
1906     MigrationState *s = migrate_get_current();
1907     return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO];
1908 }
1909 
1910 /*
1911  * Master migration thread on the source VM.
1912  * It drives the migration and pumps the data down the outgoing channel.
1913  */
1914 static void *migration_thread(void *opaque)
1915 {
1916     MigrationState *s = opaque;
1917     /* Used by the bandwidth calcs, updated later */
1918     int64_t initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1919     int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
1920     int64_t initial_bytes = 0;
1921     /*
1922      * The final stage happens when the remaining data is smaller than
1923      * this threshold; it's calculated from the requested downtime and
1924      * measured bandwidth
1925      */
1926     int64_t threshold_size = 0;
1927     int64_t start_time = initial_time;
1928     int64_t end_time;
1929     bool old_vm_running = false;
1930     bool entered_postcopy = false;
1931     /* The active state we expect to be in; ACTIVE or POSTCOPY_ACTIVE */
1932     enum MigrationStatus current_active_state = MIGRATION_STATUS_ACTIVE;
1933     bool enable_colo = migrate_colo_enabled();
1934 
1935     rcu_register_thread();
1936 
1937     qemu_savevm_state_header(s->to_dst_file);
1938 
1939     if (migrate_postcopy_ram()) {
1940         /* Now tell the dest that it should open its end so it can reply */
1941         qemu_savevm_send_open_return_path(s->to_dst_file);
1942 
1943         /* And do a ping that will make stuff easier to debug */
1944         qemu_savevm_send_ping(s->to_dst_file, 1);
1945 
1946         /*
1947          * Tell the destination that we *might* want to do postcopy later;
1948          * if the other end can't do postcopy it should fail now, nice and
1949          * early.
1950          */
1951         qemu_savevm_send_postcopy_advise(s->to_dst_file);
1952     }
1953 
1954     qemu_savevm_state_begin(s->to_dst_file);
1955 
1956     s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
1957     migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
1958                       MIGRATION_STATUS_ACTIVE);
1959 
1960     trace_migration_thread_setup_complete();
1961 
1962     while (s->state == MIGRATION_STATUS_ACTIVE ||
1963            s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
1964         int64_t current_time;
1965         uint64_t pending_size;
1966 
1967         if (!qemu_file_rate_limit(s->to_dst_file)) {
1968             uint64_t pend_post, pend_nonpost;
1969 
1970             qemu_savevm_state_pending(s->to_dst_file, threshold_size,
1971                                       &pend_nonpost, &pend_post);
1972             pending_size = pend_nonpost + pend_post;
1973             trace_migrate_pending(pending_size, threshold_size,
1974                                   pend_post, pend_nonpost);
1975             if (pending_size && pending_size >= threshold_size) {
1976                 /* Still a significant amount to transfer */
1977 
1978                 if (migrate_postcopy_ram() &&
1979                     s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE &&
1980                     pend_nonpost <= threshold_size &&
1981                     atomic_read(&s->start_postcopy)) {
1982 
1983                     if (!postcopy_start(s, &old_vm_running)) {
1984                         current_active_state = MIGRATION_STATUS_POSTCOPY_ACTIVE;
1985                         entered_postcopy = true;
1986                     }
1987 
1988                     continue;
1989                 }
1990                 /* Just another iteration step */
1991                 qemu_savevm_state_iterate(s->to_dst_file, entered_postcopy);
1992             } else {
1993                 trace_migration_thread_low_pending(pending_size);
1994                 migration_completion(s, current_active_state,
1995                                      &old_vm_running, &start_time);
1996                 break;
1997             }
1998         }
1999 
2000         if (qemu_file_get_error(s->to_dst_file)) {
2001             migrate_set_state(&s->state, current_active_state,
2002                               MIGRATION_STATUS_FAILED);
2003             trace_migration_thread_file_err();
2004             break;
2005         }
2006         current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
2007         if (current_time >= initial_time + BUFFER_DELAY) {
2008             uint64_t transferred_bytes = qemu_ftell(s->to_dst_file) -
2009                                          initial_bytes;
2010             uint64_t time_spent = current_time - initial_time;
2011             double bandwidth = (double)transferred_bytes / time_spent;
2012             threshold_size = bandwidth * s->parameters.downtime_limit;
2013 
2014             s->mbps = (((double) transferred_bytes * 8.0) /
2015                     ((double) time_spent / 1000.0)) / 1000.0 / 1000.0;
2016 
2017             trace_migrate_transferred(transferred_bytes, time_spent,
2018                                       bandwidth, threshold_size);
2019             /* if we haven't sent anything, we don't want to recalculate
2020                10000 is a small enough number for our purposes */
2021             if (ram_counters.dirty_pages_rate && transferred_bytes > 10000) {
2022                 s->expected_downtime = ram_counters.dirty_pages_rate *
2023                     qemu_target_page_size() / bandwidth;
2024             }
2025 
2026             qemu_file_reset_rate_limit(s->to_dst_file);
2027             initial_time = current_time;
2028             initial_bytes = qemu_ftell(s->to_dst_file);
2029         }
2030         if (qemu_file_rate_limit(s->to_dst_file)) {
2031             /* usleep expects microseconds */
2032             g_usleep((initial_time + BUFFER_DELAY - current_time)*1000);
2033         }
2034     }
2035 
2036     trace_migration_thread_after_loop();
2037     /* If we enabled cpu throttling for auto-converge, turn it off. */
2038     cpu_throttle_stop();
2039     end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
2040 
2041     qemu_mutex_lock_iothread();
2042     /*
2043      * The resource has been allocated by migration will be reused in COLO
2044      * process, so don't release them.
2045      */
2046     if (!enable_colo) {
2047         qemu_savevm_state_cleanup();
2048     }
2049     if (s->state == MIGRATION_STATUS_COMPLETED) {
2050         uint64_t transferred_bytes = qemu_ftell(s->to_dst_file);
2051         s->total_time = end_time - s->total_time;
2052         if (!entered_postcopy) {
2053             s->downtime = end_time - start_time;
2054         }
2055         if (s->total_time) {
2056             s->mbps = (((double) transferred_bytes * 8.0) /
2057                        ((double) s->total_time)) / 1000;
2058         }
2059         runstate_set(RUN_STATE_POSTMIGRATE);
2060     } else {
2061         if (s->state == MIGRATION_STATUS_ACTIVE && enable_colo) {
2062             migrate_start_colo_process(s);
2063             qemu_savevm_state_cleanup();
2064             /*
2065             * Fixme: we will run VM in COLO no matter its old running state.
2066             * After exited COLO, we will keep running.
2067             */
2068             old_vm_running = true;
2069         }
2070         if (old_vm_running && !entered_postcopy) {
2071             vm_start();
2072         } else {
2073             if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
2074                 runstate_set(RUN_STATE_POSTMIGRATE);
2075             }
2076         }
2077     }
2078     qemu_bh_schedule(s->cleanup_bh);
2079     qemu_mutex_unlock_iothread();
2080 
2081     rcu_unregister_thread();
2082     return NULL;
2083 }
2084 
2085 void migrate_fd_connect(MigrationState *s)
2086 {
2087     s->expected_downtime = s->parameters.downtime_limit;
2088     s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s);
2089 
2090     qemu_file_set_blocking(s->to_dst_file, true);
2091     qemu_file_set_rate_limit(s->to_dst_file,
2092                              s->parameters.max_bandwidth / XFER_LIMIT_RATIO);
2093 
2094     /* Notify before starting migration thread */
2095     notifier_list_notify(&migration_state_notifiers, s);
2096 
2097     /*
2098      * Open the return path; currently for postcopy but other things might
2099      * also want it.
2100      */
2101     if (migrate_postcopy_ram()) {
2102         if (open_return_path_on_source(s)) {
2103             error_report("Unable to open return-path for postcopy");
2104             migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
2105                               MIGRATION_STATUS_FAILED);
2106             migrate_fd_cleanup(s);
2107             return;
2108         }
2109     }
2110 
2111     migrate_compress_threads_create();
2112     qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
2113                        QEMU_THREAD_JOINABLE);
2114     s->migration_thread_running = true;
2115 }
2116 
2117