xref: /openbmc/qemu/migration/migration.c (revision c39f95dc)
1 /*
2  * QEMU live migration
3  *
4  * Copyright IBM, Corp. 2008
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  * Contributions after 2012-01-13 are licensed under the terms of the
13  * GNU GPL, version 2 or (at your option) any later version.
14  */
15 
16 #include "qemu/osdep.h"
17 #include "qemu/cutils.h"
18 #include "qemu/error-report.h"
19 #include "migration/blocker.h"
20 #include "exec.h"
21 #include "fd.h"
22 #include "socket.h"
23 #include "rdma.h"
24 #include "ram.h"
25 #include "migration/global_state.h"
26 #include "migration/misc.h"
27 #include "migration.h"
28 #include "savevm.h"
29 #include "qemu-file-channel.h"
30 #include "qemu-file.h"
31 #include "migration/vmstate.h"
32 #include "block/block.h"
33 #include "qapi/qmp/qerror.h"
34 #include "qemu/rcu.h"
35 #include "block.h"
36 #include "postcopy-ram.h"
37 #include "qemu/thread.h"
38 #include "qmp-commands.h"
39 #include "trace.h"
40 #include "qapi-event.h"
41 #include "exec/target_page.h"
42 #include "io/channel-buffer.h"
43 #include "migration/colo.h"
44 #include "hw/boards.h"
45 #include "monitor/monitor.h"
46 
47 #define MAX_THROTTLE  (32 << 20)      /* Migration transfer speed throttling */
48 
49 /* Amount of time to allocate to each "chunk" of bandwidth-throttled
50  * data. */
51 #define BUFFER_DELAY     100
52 #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY)
53 
54 /* Time in milliseconds we are allowed to stop the source,
55  * for sending the last part */
56 #define DEFAULT_MIGRATE_SET_DOWNTIME 300
57 
58 /* Maximum migrate downtime set to 2000 seconds */
59 #define MAX_MIGRATE_DOWNTIME_SECONDS 2000
60 #define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000)
61 
62 /* Default compression thread count */
63 #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
64 /* Default decompression thread count, usually decompression is at
65  * least 4 times as fast as compression.*/
66 #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2
67 /*0: means nocompress, 1: best speed, ... 9: best compress ratio */
68 #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1
69 /* Define default autoconverge cpu throttle migration parameters */
70 #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20
71 #define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10
72 
73 /* Migration XBZRLE default cache size */
74 #define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024)
75 
76 /* The delay time (in ms) between two COLO checkpoints
77  * Note: Please change this default value to 10000 when we support hybrid mode.
78  */
79 #define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY 200
80 #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2
81 #define DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT 16
82 
83 static NotifierList migration_state_notifiers =
84     NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
85 
86 static bool deferred_incoming;
87 
88 /* Messages sent on the return path from destination to source */
89 enum mig_rp_message_type {
90     MIG_RP_MSG_INVALID = 0,  /* Must be 0 */
91     MIG_RP_MSG_SHUT,         /* sibling will not send any more RP messages */
92     MIG_RP_MSG_PONG,         /* Response to a PING; data (seq: be32 ) */
93 
94     MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */
95     MIG_RP_MSG_REQ_PAGES,    /* data (start: be64, len: be32) */
96 
97     MIG_RP_MSG_MAX
98 };
99 
100 /* When we add fault tolerance, we could have several
101    migrations at once.  For now we don't need to add
102    dynamic creation of migration */
103 
104 static MigrationState *current_migration;
105 
106 static bool migration_object_check(MigrationState *ms, Error **errp);
107 
108 void migration_object_init(void)
109 {
110     MachineState *ms = MACHINE(qdev_get_machine());
111     Error *err = NULL;
112 
113     /* This can only be called once. */
114     assert(!current_migration);
115     current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION));
116 
117     if (!migration_object_check(current_migration, &err)) {
118         error_report_err(err);
119         exit(1);
120     }
121 
122     /*
123      * We cannot really do this in migration_instance_init() since at
124      * that time global properties are not yet applied, then this
125      * value will be definitely replaced by something else.
126      */
127     if (ms->enforce_config_section) {
128         current_migration->send_configuration = true;
129     }
130 }
131 
132 /* For outgoing */
133 MigrationState *migrate_get_current(void)
134 {
135     /* This can only be called after the object created. */
136     assert(current_migration);
137     return current_migration;
138 }
139 
140 MigrationIncomingState *migration_incoming_get_current(void)
141 {
142     static bool once;
143     static MigrationIncomingState mis_current;
144 
145     if (!once) {
146         mis_current.state = MIGRATION_STATUS_NONE;
147         memset(&mis_current, 0, sizeof(MigrationIncomingState));
148         qemu_mutex_init(&mis_current.rp_mutex);
149         qemu_event_init(&mis_current.main_thread_load_event, false);
150         once = true;
151     }
152     return &mis_current;
153 }
154 
155 void migration_incoming_state_destroy(void)
156 {
157     struct MigrationIncomingState *mis = migration_incoming_get_current();
158 
159     if (mis->to_src_file) {
160         /* Tell source that we are done */
161         migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
162         qemu_fclose(mis->to_src_file);
163         mis->to_src_file = NULL;
164     }
165 
166     if (mis->from_src_file) {
167         qemu_fclose(mis->from_src_file);
168         mis->from_src_file = NULL;
169     }
170 
171     qemu_event_reset(&mis->main_thread_load_event);
172 }
173 
174 static void migrate_generate_event(int new_state)
175 {
176     if (migrate_use_events()) {
177         qapi_event_send_migration(new_state, &error_abort);
178     }
179 }
180 
181 /*
182  * Called on -incoming with a defer: uri.
183  * The migration can be started later after any parameters have been
184  * changed.
185  */
186 static void deferred_incoming_migration(Error **errp)
187 {
188     if (deferred_incoming) {
189         error_setg(errp, "Incoming migration already deferred");
190     }
191     deferred_incoming = true;
192 }
193 
194 /*
195  * Send a message on the return channel back to the source
196  * of the migration.
197  */
198 static void migrate_send_rp_message(MigrationIncomingState *mis,
199                                     enum mig_rp_message_type message_type,
200                                     uint16_t len, void *data)
201 {
202     trace_migrate_send_rp_message((int)message_type, len);
203     qemu_mutex_lock(&mis->rp_mutex);
204     qemu_put_be16(mis->to_src_file, (unsigned int)message_type);
205     qemu_put_be16(mis->to_src_file, len);
206     qemu_put_buffer(mis->to_src_file, data, len);
207     qemu_fflush(mis->to_src_file);
208     qemu_mutex_unlock(&mis->rp_mutex);
209 }
210 
211 /* Request a range of pages from the source VM at the given
212  * start address.
213  *   rbname: Name of the RAMBlock to request the page in, if NULL it's the same
214  *           as the last request (a name must have been given previously)
215  *   Start: Address offset within the RB
216  *   Len: Length in bytes required - must be a multiple of pagesize
217  */
218 void migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rbname,
219                                ram_addr_t start, size_t len)
220 {
221     uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */
222     size_t msglen = 12; /* start + len */
223 
224     *(uint64_t *)bufc = cpu_to_be64((uint64_t)start);
225     *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len);
226 
227     if (rbname) {
228         int rbname_len = strlen(rbname);
229         assert(rbname_len < 256);
230 
231         bufc[msglen++] = rbname_len;
232         memcpy(bufc + msglen, rbname, rbname_len);
233         msglen += rbname_len;
234         migrate_send_rp_message(mis, MIG_RP_MSG_REQ_PAGES_ID, msglen, bufc);
235     } else {
236         migrate_send_rp_message(mis, MIG_RP_MSG_REQ_PAGES, msglen, bufc);
237     }
238 }
239 
240 void qemu_start_incoming_migration(const char *uri, Error **errp)
241 {
242     const char *p;
243 
244     qapi_event_send_migration(MIGRATION_STATUS_SETUP, &error_abort);
245     if (!strcmp(uri, "defer")) {
246         deferred_incoming_migration(errp);
247     } else if (strstart(uri, "tcp:", &p)) {
248         tcp_start_incoming_migration(p, errp);
249 #ifdef CONFIG_RDMA
250     } else if (strstart(uri, "rdma:", &p)) {
251         rdma_start_incoming_migration(p, errp);
252 #endif
253     } else if (strstart(uri, "exec:", &p)) {
254         exec_start_incoming_migration(p, errp);
255     } else if (strstart(uri, "unix:", &p)) {
256         unix_start_incoming_migration(p, errp);
257     } else if (strstart(uri, "fd:", &p)) {
258         fd_start_incoming_migration(p, errp);
259     } else {
260         error_setg(errp, "unknown migration protocol: %s", uri);
261     }
262 }
263 
264 static void process_incoming_migration_bh(void *opaque)
265 {
266     Error *local_err = NULL;
267     MigrationIncomingState *mis = opaque;
268 
269     /* Make sure all file formats flush their mutable metadata.
270      * If we get an error here, just don't restart the VM yet. */
271     bdrv_invalidate_cache_all(&local_err);
272     if (local_err) {
273         error_report_err(local_err);
274         local_err = NULL;
275         autostart = false;
276     }
277 
278     /*
279      * This must happen after all error conditions are dealt with and
280      * we're sure the VM is going to be running on this host.
281      */
282     qemu_announce_self();
283 
284     if (multifd_load_cleanup(&local_err) != 0) {
285         error_report_err(local_err);
286         autostart = false;
287     }
288     /* If global state section was not received or we are in running
289        state, we need to obey autostart. Any other state is set with
290        runstate_set. */
291 
292     if (!global_state_received() ||
293         global_state_get_runstate() == RUN_STATE_RUNNING) {
294         if (autostart) {
295             vm_start();
296         } else {
297             runstate_set(RUN_STATE_PAUSED);
298         }
299     } else {
300         runstate_set(global_state_get_runstate());
301     }
302     /*
303      * This must happen after any state changes since as soon as an external
304      * observer sees this event they might start to prod at the VM assuming
305      * it's ready to use.
306      */
307     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
308                       MIGRATION_STATUS_COMPLETED);
309     qemu_bh_delete(mis->bh);
310     migration_incoming_state_destroy();
311 }
312 
313 static void process_incoming_migration_co(void *opaque)
314 {
315     MigrationIncomingState *mis = migration_incoming_get_current();
316     PostcopyState ps;
317     int ret;
318 
319     assert(mis->from_src_file);
320     mis->largest_page_size = qemu_ram_pagesize_largest();
321     postcopy_state_set(POSTCOPY_INCOMING_NONE);
322     migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
323                       MIGRATION_STATUS_ACTIVE);
324     ret = qemu_loadvm_state(mis->from_src_file);
325 
326     ps = postcopy_state_get();
327     trace_process_incoming_migration_co_end(ret, ps);
328     if (ps != POSTCOPY_INCOMING_NONE) {
329         if (ps == POSTCOPY_INCOMING_ADVISE) {
330             /*
331              * Where a migration had postcopy enabled (and thus went to advise)
332              * but managed to complete within the precopy period, we can use
333              * the normal exit.
334              */
335             postcopy_ram_incoming_cleanup(mis);
336         } else if (ret >= 0) {
337             /*
338              * Postcopy was started, cleanup should happen at the end of the
339              * postcopy thread.
340              */
341             trace_process_incoming_migration_co_postcopy_end_main();
342             return;
343         }
344         /* Else if something went wrong then just fall out of the normal exit */
345     }
346 
347     /* we get COLO info, and know if we are in COLO mode */
348     if (!ret && migration_incoming_enable_colo()) {
349         mis->migration_incoming_co = qemu_coroutine_self();
350         qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
351              colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
352         mis->have_colo_incoming_thread = true;
353         qemu_coroutine_yield();
354 
355         /* Wait checkpoint incoming thread exit before free resource */
356         qemu_thread_join(&mis->colo_incoming_thread);
357     }
358 
359     if (ret < 0) {
360         Error *local_err = NULL;
361 
362         migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
363                           MIGRATION_STATUS_FAILED);
364         error_report("load of migration failed: %s", strerror(-ret));
365         qemu_fclose(mis->from_src_file);
366         if (multifd_load_cleanup(&local_err) != 0) {
367             error_report_err(local_err);
368         }
369         exit(EXIT_FAILURE);
370     }
371     mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
372     qemu_bh_schedule(mis->bh);
373 }
374 
375 static void migration_incoming_setup(QEMUFile *f)
376 {
377     MigrationIncomingState *mis = migration_incoming_get_current();
378 
379     if (multifd_load_setup() != 0) {
380         /* We haven't been able to create multifd threads
381            nothing better to do */
382         exit(EXIT_FAILURE);
383     }
384 
385     if (!mis->from_src_file) {
386         mis->from_src_file = f;
387     }
388     qemu_file_set_blocking(f, false);
389 }
390 
391 static void migration_incoming_process(void)
392 {
393     Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL);
394     qemu_coroutine_enter(co);
395 }
396 
397 void migration_fd_process_incoming(QEMUFile *f)
398 {
399     migration_incoming_setup(f);
400     migration_incoming_process();
401 }
402 
403 void migration_ioc_process_incoming(QIOChannel *ioc)
404 {
405     MigrationIncomingState *mis = migration_incoming_get_current();
406 
407     if (!mis->from_src_file) {
408         QEMUFile *f = qemu_fopen_channel_input(ioc);
409         migration_fd_process_incoming(f);
410     }
411     /* We still only have a single channel.  Nothing to do here yet */
412 }
413 
414 /**
415  * @migration_has_all_channels: We have received all channels that we need
416  *
417  * Returns true when we have got connections to all the channels that
418  * we need for migration.
419  */
420 bool migration_has_all_channels(void)
421 {
422     return true;
423 }
424 
425 /*
426  * Send a 'SHUT' message on the return channel with the given value
427  * to indicate that we've finished with the RP.  Non-0 value indicates
428  * error.
429  */
430 void migrate_send_rp_shut(MigrationIncomingState *mis,
431                           uint32_t value)
432 {
433     uint32_t buf;
434 
435     buf = cpu_to_be32(value);
436     migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf);
437 }
438 
439 /*
440  * Send a 'PONG' message on the return channel with the given value
441  * (normally in response to a 'PING')
442  */
443 void migrate_send_rp_pong(MigrationIncomingState *mis,
444                           uint32_t value)
445 {
446     uint32_t buf;
447 
448     buf = cpu_to_be32(value);
449     migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf);
450 }
451 
452 MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp)
453 {
454     MigrationCapabilityStatusList *head = NULL;
455     MigrationCapabilityStatusList *caps;
456     MigrationState *s = migrate_get_current();
457     int i;
458 
459     caps = NULL; /* silence compiler warning */
460     for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
461 #ifndef CONFIG_LIVE_BLOCK_MIGRATION
462         if (i == MIGRATION_CAPABILITY_BLOCK) {
463             continue;
464         }
465 #endif
466         if (head == NULL) {
467             head = g_malloc0(sizeof(*caps));
468             caps = head;
469         } else {
470             caps->next = g_malloc0(sizeof(*caps));
471             caps = caps->next;
472         }
473         caps->value =
474             g_malloc(sizeof(*caps->value));
475         caps->value->capability = i;
476         caps->value->state = s->enabled_capabilities[i];
477     }
478 
479     return head;
480 }
481 
482 MigrationParameters *qmp_query_migrate_parameters(Error **errp)
483 {
484     MigrationParameters *params;
485     MigrationState *s = migrate_get_current();
486 
487     /* TODO use QAPI_CLONE() instead of duplicating it inline */
488     params = g_malloc0(sizeof(*params));
489     params->has_compress_level = true;
490     params->compress_level = s->parameters.compress_level;
491     params->has_compress_threads = true;
492     params->compress_threads = s->parameters.compress_threads;
493     params->has_decompress_threads = true;
494     params->decompress_threads = s->parameters.decompress_threads;
495     params->has_cpu_throttle_initial = true;
496     params->cpu_throttle_initial = s->parameters.cpu_throttle_initial;
497     params->has_cpu_throttle_increment = true;
498     params->cpu_throttle_increment = s->parameters.cpu_throttle_increment;
499     params->has_tls_creds = true;
500     params->tls_creds = g_strdup(s->parameters.tls_creds);
501     params->has_tls_hostname = true;
502     params->tls_hostname = g_strdup(s->parameters.tls_hostname);
503     params->has_max_bandwidth = true;
504     params->max_bandwidth = s->parameters.max_bandwidth;
505     params->has_downtime_limit = true;
506     params->downtime_limit = s->parameters.downtime_limit;
507     params->has_x_checkpoint_delay = true;
508     params->x_checkpoint_delay = s->parameters.x_checkpoint_delay;
509     params->has_block_incremental = true;
510     params->block_incremental = s->parameters.block_incremental;
511     params->has_x_multifd_channels = true;
512     params->x_multifd_channels = s->parameters.x_multifd_channels;
513     params->has_x_multifd_page_count = true;
514     params->x_multifd_page_count = s->parameters.x_multifd_page_count;
515 
516     return params;
517 }
518 
519 /*
520  * Return true if we're already in the middle of a migration
521  * (i.e. any of the active or setup states)
522  */
523 static bool migration_is_setup_or_active(int state)
524 {
525     switch (state) {
526     case MIGRATION_STATUS_ACTIVE:
527     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
528     case MIGRATION_STATUS_SETUP:
529         return true;
530 
531     default:
532         return false;
533 
534     }
535 }
536 
537 static void populate_ram_info(MigrationInfo *info, MigrationState *s)
538 {
539     info->has_ram = true;
540     info->ram = g_malloc0(sizeof(*info->ram));
541     info->ram->transferred = ram_counters.transferred;
542     info->ram->total = ram_bytes_total();
543     info->ram->duplicate = ram_counters.duplicate;
544     /* legacy value.  It is not used anymore */
545     info->ram->skipped = 0;
546     info->ram->normal = ram_counters.normal;
547     info->ram->normal_bytes = ram_counters.normal *
548         qemu_target_page_size();
549     info->ram->mbps = s->mbps;
550     info->ram->dirty_sync_count = ram_counters.dirty_sync_count;
551     info->ram->postcopy_requests = ram_counters.postcopy_requests;
552     info->ram->page_size = qemu_target_page_size();
553 
554     if (migrate_use_xbzrle()) {
555         info->has_xbzrle_cache = true;
556         info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache));
557         info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size();
558         info->xbzrle_cache->bytes = xbzrle_counters.bytes;
559         info->xbzrle_cache->pages = xbzrle_counters.pages;
560         info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss;
561         info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate;
562         info->xbzrle_cache->overflow = xbzrle_counters.overflow;
563     }
564 
565     if (cpu_throttle_active()) {
566         info->has_cpu_throttle_percentage = true;
567         info->cpu_throttle_percentage = cpu_throttle_get_percentage();
568     }
569 
570     if (s->state != MIGRATION_STATUS_COMPLETED) {
571         info->ram->remaining = ram_bytes_remaining();
572         info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate;
573     }
574 }
575 
576 static void populate_disk_info(MigrationInfo *info)
577 {
578     if (blk_mig_active()) {
579         info->has_disk = true;
580         info->disk = g_malloc0(sizeof(*info->disk));
581         info->disk->transferred = blk_mig_bytes_transferred();
582         info->disk->remaining = blk_mig_bytes_remaining();
583         info->disk->total = blk_mig_bytes_total();
584     }
585 }
586 
587 MigrationInfo *qmp_query_migrate(Error **errp)
588 {
589     MigrationInfo *info = g_malloc0(sizeof(*info));
590     MigrationState *s = migrate_get_current();
591 
592     switch (s->state) {
593     case MIGRATION_STATUS_NONE:
594         /* no migration has happened ever */
595         break;
596     case MIGRATION_STATUS_SETUP:
597         info->has_status = true;
598         info->has_total_time = false;
599         break;
600     case MIGRATION_STATUS_ACTIVE:
601     case MIGRATION_STATUS_CANCELLING:
602     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
603          /* TODO add some postcopy stats */
604         info->has_status = true;
605         info->has_total_time = true;
606         info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
607             - s->total_time;
608         info->has_expected_downtime = true;
609         info->expected_downtime = s->expected_downtime;
610         info->has_setup_time = true;
611         info->setup_time = s->setup_time;
612 
613         populate_ram_info(info, s);
614         populate_disk_info(info);
615         break;
616     case MIGRATION_STATUS_COLO:
617         info->has_status = true;
618         /* TODO: display COLO specific information (checkpoint info etc.) */
619         break;
620     case MIGRATION_STATUS_COMPLETED:
621         info->has_status = true;
622         info->has_total_time = true;
623         info->total_time = s->total_time;
624         info->has_downtime = true;
625         info->downtime = s->downtime;
626         info->has_setup_time = true;
627         info->setup_time = s->setup_time;
628 
629         populate_ram_info(info, s);
630         break;
631     case MIGRATION_STATUS_FAILED:
632         info->has_status = true;
633         if (s->error) {
634             info->has_error_desc = true;
635             info->error_desc = g_strdup(error_get_pretty(s->error));
636         }
637         break;
638     case MIGRATION_STATUS_CANCELLED:
639         info->has_status = true;
640         break;
641     }
642     info->status = s->state;
643 
644     return info;
645 }
646 
647 /**
648  * @migration_caps_check - check capability validity
649  *
650  * @cap_list: old capability list, array of bool
651  * @params: new capabilities to be applied soon
652  * @errp: set *errp if the check failed, with reason
653  *
654  * Returns true if check passed, otherwise false.
655  */
656 static bool migrate_caps_check(bool *cap_list,
657                                MigrationCapabilityStatusList *params,
658                                Error **errp)
659 {
660     MigrationCapabilityStatusList *cap;
661     bool old_postcopy_cap;
662     MigrationIncomingState *mis = migration_incoming_get_current();
663 
664     old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM];
665 
666     for (cap = params; cap; cap = cap->next) {
667         cap_list[cap->value->capability] = cap->value->state;
668     }
669 
670 #ifndef CONFIG_LIVE_BLOCK_MIGRATION
671     if (cap_list[MIGRATION_CAPABILITY_BLOCK]) {
672         error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) "
673                    "block migration");
674         error_append_hint(errp, "Use drive_mirror+NBD instead.\n");
675         return false;
676     }
677 #endif
678 
679     if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
680         if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) {
681             /* The decompression threads asynchronously write into RAM
682              * rather than use the atomic copies needed to avoid
683              * userfaulting.  It should be possible to fix the decompression
684              * threads for compatibility in future.
685              */
686             error_setg(errp, "Postcopy is not currently compatible "
687                        "with compression");
688             return false;
689         }
690 
691         /* This check is reasonably expensive, so only when it's being
692          * set the first time, also it's only the destination that needs
693          * special support.
694          */
695         if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) &&
696             !postcopy_ram_supported_by_host(mis)) {
697             /* postcopy_ram_supported_by_host will have emitted a more
698              * detailed message
699              */
700             error_setg(errp, "Postcopy is not supported");
701             return false;
702         }
703     }
704 
705     return true;
706 }
707 
708 void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
709                                   Error **errp)
710 {
711     MigrationState *s = migrate_get_current();
712     MigrationCapabilityStatusList *cap;
713 
714     if (migration_is_setup_or_active(s->state)) {
715         error_setg(errp, QERR_MIGRATION_ACTIVE);
716         return;
717     }
718 
719     if (!migrate_caps_check(s->enabled_capabilities, params, errp)) {
720         return;
721     }
722 
723     for (cap = params; cap; cap = cap->next) {
724         s->enabled_capabilities[cap->value->capability] = cap->value->state;
725     }
726 }
727 
728 /*
729  * Check whether the parameters are valid. Error will be put into errp
730  * (if provided). Return true if valid, otherwise false.
731  */
732 static bool migrate_params_check(MigrationParameters *params, Error **errp)
733 {
734     if (params->has_compress_level &&
735         (params->compress_level < 0 || params->compress_level > 9)) {
736         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level",
737                    "is invalid, it should be in the range of 0 to 9");
738         return false;
739     }
740 
741     if (params->has_compress_threads &&
742         (params->compress_threads < 1 || params->compress_threads > 255)) {
743         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
744                    "compress_threads",
745                    "is invalid, it should be in the range of 1 to 255");
746         return false;
747     }
748 
749     if (params->has_decompress_threads &&
750         (params->decompress_threads < 1 || params->decompress_threads > 255)) {
751         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
752                    "decompress_threads",
753                    "is invalid, it should be in the range of 1 to 255");
754         return false;
755     }
756 
757     if (params->has_cpu_throttle_initial &&
758         (params->cpu_throttle_initial < 1 ||
759          params->cpu_throttle_initial > 99)) {
760         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
761                    "cpu_throttle_initial",
762                    "an integer in the range of 1 to 99");
763         return false;
764     }
765 
766     if (params->has_cpu_throttle_increment &&
767         (params->cpu_throttle_increment < 1 ||
768          params->cpu_throttle_increment > 99)) {
769         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
770                    "cpu_throttle_increment",
771                    "an integer in the range of 1 to 99");
772         return false;
773     }
774 
775     if (params->has_max_bandwidth &&
776         (params->max_bandwidth < 0 || params->max_bandwidth > SIZE_MAX)) {
777         error_setg(errp, "Parameter 'max_bandwidth' expects an integer in the"
778                          " range of 0 to %zu bytes/second", SIZE_MAX);
779         return false;
780     }
781 
782     if (params->has_downtime_limit &&
783         (params->downtime_limit < 0 ||
784          params->downtime_limit > MAX_MIGRATE_DOWNTIME)) {
785         error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
786                          "the range of 0 to %d milliseconds",
787                          MAX_MIGRATE_DOWNTIME);
788         return false;
789     }
790 
791     if (params->has_x_checkpoint_delay && (params->x_checkpoint_delay < 0)) {
792         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
793                     "x_checkpoint_delay",
794                     "is invalid, it should be positive");
795         return false;
796     }
797     if (params->has_x_multifd_channels &&
798         (params->x_multifd_channels < 1 || params->x_multifd_channels > 255)) {
799         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
800                    "multifd_channels",
801                    "is invalid, it should be in the range of 1 to 255");
802         return false;
803     }
804     if (params->has_x_multifd_page_count &&
805             (params->x_multifd_page_count < 1 ||
806              params->x_multifd_page_count > 10000)) {
807         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
808                    "multifd_page_count",
809                    "is invalid, it should be in the range of 1 to 10000");
810         return false;
811     }
812 
813     return true;
814 }
815 
816 static void migrate_params_test_apply(MigrateSetParameters *params,
817                                       MigrationParameters *dest)
818 {
819     *dest = migrate_get_current()->parameters;
820 
821     /* TODO use QAPI_CLONE() instead of duplicating it inline */
822 
823     if (params->has_compress_level) {
824         dest->compress_level = params->compress_level;
825     }
826 
827     if (params->has_compress_threads) {
828         dest->compress_threads = params->compress_threads;
829     }
830 
831     if (params->has_decompress_threads) {
832         dest->decompress_threads = params->decompress_threads;
833     }
834 
835     if (params->has_cpu_throttle_initial) {
836         dest->cpu_throttle_initial = params->cpu_throttle_initial;
837     }
838 
839     if (params->has_cpu_throttle_increment) {
840         dest->cpu_throttle_increment = params->cpu_throttle_increment;
841     }
842 
843     if (params->has_tls_creds) {
844         assert(params->tls_creds->type == QTYPE_QSTRING);
845         dest->tls_creds = g_strdup(params->tls_creds->u.s);
846     }
847 
848     if (params->has_tls_hostname) {
849         assert(params->tls_hostname->type == QTYPE_QSTRING);
850         dest->tls_hostname = g_strdup(params->tls_hostname->u.s);
851     }
852 
853     if (params->has_max_bandwidth) {
854         dest->max_bandwidth = params->max_bandwidth;
855     }
856 
857     if (params->has_downtime_limit) {
858         dest->downtime_limit = params->downtime_limit;
859     }
860 
861     if (params->has_x_checkpoint_delay) {
862         dest->x_checkpoint_delay = params->x_checkpoint_delay;
863     }
864 
865     if (params->has_block_incremental) {
866         dest->block_incremental = params->block_incremental;
867     }
868 }
869 
870 static void migrate_params_apply(MigrateSetParameters *params)
871 {
872     MigrationState *s = migrate_get_current();
873 
874     /* TODO use QAPI_CLONE() instead of duplicating it inline */
875 
876     if (params->has_compress_level) {
877         s->parameters.compress_level = params->compress_level;
878     }
879 
880     if (params->has_compress_threads) {
881         s->parameters.compress_threads = params->compress_threads;
882     }
883 
884     if (params->has_decompress_threads) {
885         s->parameters.decompress_threads = params->decompress_threads;
886     }
887 
888     if (params->has_cpu_throttle_initial) {
889         s->parameters.cpu_throttle_initial = params->cpu_throttle_initial;
890     }
891 
892     if (params->has_cpu_throttle_increment) {
893         s->parameters.cpu_throttle_increment = params->cpu_throttle_increment;
894     }
895 
896     if (params->has_tls_creds) {
897         g_free(s->parameters.tls_creds);
898         assert(params->tls_creds->type == QTYPE_QSTRING);
899         s->parameters.tls_creds = g_strdup(params->tls_creds->u.s);
900     }
901 
902     if (params->has_tls_hostname) {
903         g_free(s->parameters.tls_hostname);
904         assert(params->tls_hostname->type == QTYPE_QSTRING);
905         s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s);
906     }
907 
908     if (params->has_max_bandwidth) {
909         s->parameters.max_bandwidth = params->max_bandwidth;
910         if (s->to_dst_file) {
911             qemu_file_set_rate_limit(s->to_dst_file,
912                                 s->parameters.max_bandwidth / XFER_LIMIT_RATIO);
913         }
914     }
915 
916     if (params->has_downtime_limit) {
917         s->parameters.downtime_limit = params->downtime_limit;
918     }
919 
920     if (params->has_x_checkpoint_delay) {
921         s->parameters.x_checkpoint_delay = params->x_checkpoint_delay;
922         if (migration_in_colo_state()) {
923             colo_checkpoint_notify(s);
924         }
925     }
926 
927     if (params->has_block_incremental) {
928         s->parameters.block_incremental = params->block_incremental;
929     }
930     if (params->has_x_multifd_channels) {
931         s->parameters.x_multifd_channels = params->x_multifd_channels;
932     }
933     if (params->has_x_multifd_page_count) {
934         s->parameters.x_multifd_page_count = params->x_multifd_page_count;
935     }
936 }
937 
938 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
939 {
940     MigrationParameters tmp;
941 
942     /* TODO Rewrite "" to null instead */
943     if (params->has_tls_creds
944         && params->tls_creds->type == QTYPE_QNULL) {
945         QDECREF(params->tls_creds->u.n);
946         params->tls_creds->type = QTYPE_QSTRING;
947         params->tls_creds->u.s = strdup("");
948     }
949     /* TODO Rewrite "" to null instead */
950     if (params->has_tls_hostname
951         && params->tls_hostname->type == QTYPE_QNULL) {
952         QDECREF(params->tls_hostname->u.n);
953         params->tls_hostname->type = QTYPE_QSTRING;
954         params->tls_hostname->u.s = strdup("");
955     }
956 
957     migrate_params_test_apply(params, &tmp);
958 
959     if (!migrate_params_check(&tmp, errp)) {
960         /* Invalid parameter */
961         return;
962     }
963 
964     migrate_params_apply(params);
965 }
966 
967 
968 void qmp_migrate_start_postcopy(Error **errp)
969 {
970     MigrationState *s = migrate_get_current();
971 
972     if (!migrate_postcopy_ram()) {
973         error_setg(errp, "Enable postcopy with migrate_set_capability before"
974                          " the start of migration");
975         return;
976     }
977 
978     if (s->state == MIGRATION_STATUS_NONE) {
979         error_setg(errp, "Postcopy must be started after migration has been"
980                          " started");
981         return;
982     }
983     /*
984      * we don't error if migration has finished since that would be racy
985      * with issuing this command.
986      */
987     atomic_set(&s->start_postcopy, true);
988 }
989 
990 /* shared migration helpers */
991 
992 void migrate_set_state(int *state, int old_state, int new_state)
993 {
994     assert(new_state < MIGRATION_STATUS__MAX);
995     if (atomic_cmpxchg(state, old_state, new_state) == old_state) {
996         trace_migrate_set_state(MigrationStatus_str(new_state));
997         migrate_generate_event(new_state);
998     }
999 }
1000 
1001 static MigrationCapabilityStatusList *migrate_cap_add(
1002     MigrationCapabilityStatusList *list,
1003     MigrationCapability index,
1004     bool state)
1005 {
1006     MigrationCapabilityStatusList *cap;
1007 
1008     cap = g_new0(MigrationCapabilityStatusList, 1);
1009     cap->value = g_new0(MigrationCapabilityStatus, 1);
1010     cap->value->capability = index;
1011     cap->value->state = state;
1012     cap->next = list;
1013 
1014     return cap;
1015 }
1016 
1017 void migrate_set_block_enabled(bool value, Error **errp)
1018 {
1019     MigrationCapabilityStatusList *cap;
1020 
1021     cap = migrate_cap_add(NULL, MIGRATION_CAPABILITY_BLOCK, value);
1022     qmp_migrate_set_capabilities(cap, errp);
1023     qapi_free_MigrationCapabilityStatusList(cap);
1024 }
1025 
1026 static void migrate_set_block_incremental(MigrationState *s, bool value)
1027 {
1028     s->parameters.block_incremental = value;
1029 }
1030 
1031 static void block_cleanup_parameters(MigrationState *s)
1032 {
1033     if (s->must_remove_block_options) {
1034         /* setting to false can never fail */
1035         migrate_set_block_enabled(false, &error_abort);
1036         migrate_set_block_incremental(s, false);
1037         s->must_remove_block_options = false;
1038     }
1039 }
1040 
1041 static void migrate_fd_cleanup(void *opaque)
1042 {
1043     MigrationState *s = opaque;
1044 
1045     qemu_bh_delete(s->cleanup_bh);
1046     s->cleanup_bh = NULL;
1047 
1048     if (s->to_dst_file) {
1049         Error *local_err = NULL;
1050 
1051         trace_migrate_fd_cleanup();
1052         qemu_mutex_unlock_iothread();
1053         if (s->migration_thread_running) {
1054             qemu_thread_join(&s->thread);
1055             s->migration_thread_running = false;
1056         }
1057         qemu_mutex_lock_iothread();
1058 
1059         if (multifd_save_cleanup(&local_err) != 0) {
1060             error_report_err(local_err);
1061         }
1062         qemu_fclose(s->to_dst_file);
1063         s->to_dst_file = NULL;
1064     }
1065 
1066     assert((s->state != MIGRATION_STATUS_ACTIVE) &&
1067            (s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE));
1068 
1069     if (s->state == MIGRATION_STATUS_CANCELLING) {
1070         migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING,
1071                           MIGRATION_STATUS_CANCELLED);
1072     }
1073 
1074     notifier_list_notify(&migration_state_notifiers, s);
1075     block_cleanup_parameters(s);
1076 }
1077 
1078 void migrate_fd_error(MigrationState *s, const Error *error)
1079 {
1080     trace_migrate_fd_error(error_get_pretty(error));
1081     assert(s->to_dst_file == NULL);
1082     migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
1083                       MIGRATION_STATUS_FAILED);
1084     if (!s->error) {
1085         s->error = error_copy(error);
1086     }
1087     notifier_list_notify(&migration_state_notifiers, s);
1088     block_cleanup_parameters(s);
1089 }
1090 
1091 static void migrate_fd_cancel(MigrationState *s)
1092 {
1093     int old_state ;
1094     QEMUFile *f = migrate_get_current()->to_dst_file;
1095     trace_migrate_fd_cancel();
1096 
1097     if (s->rp_state.from_dst_file) {
1098         /* shutdown the rp socket, so causing the rp thread to shutdown */
1099         qemu_file_shutdown(s->rp_state.from_dst_file);
1100     }
1101 
1102     do {
1103         old_state = s->state;
1104         if (!migration_is_setup_or_active(old_state)) {
1105             break;
1106         }
1107         migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING);
1108     } while (s->state != MIGRATION_STATUS_CANCELLING);
1109 
1110     /*
1111      * If we're unlucky the migration code might be stuck somewhere in a
1112      * send/write while the network has failed and is waiting to timeout;
1113      * if we've got shutdown(2) available then we can force it to quit.
1114      * The outgoing qemu file gets closed in migrate_fd_cleanup that is
1115      * called in a bh, so there is no race against this cancel.
1116      */
1117     if (s->state == MIGRATION_STATUS_CANCELLING && f) {
1118         qemu_file_shutdown(f);
1119     }
1120     if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) {
1121         Error *local_err = NULL;
1122 
1123         bdrv_invalidate_cache_all(&local_err);
1124         if (local_err) {
1125             error_report_err(local_err);
1126         } else {
1127             s->block_inactive = false;
1128         }
1129     }
1130     block_cleanup_parameters(s);
1131 }
1132 
1133 void add_migration_state_change_notifier(Notifier *notify)
1134 {
1135     notifier_list_add(&migration_state_notifiers, notify);
1136 }
1137 
1138 void remove_migration_state_change_notifier(Notifier *notify)
1139 {
1140     notifier_remove(notify);
1141 }
1142 
1143 bool migration_in_setup(MigrationState *s)
1144 {
1145     return s->state == MIGRATION_STATUS_SETUP;
1146 }
1147 
1148 bool migration_has_finished(MigrationState *s)
1149 {
1150     return s->state == MIGRATION_STATUS_COMPLETED;
1151 }
1152 
1153 bool migration_has_failed(MigrationState *s)
1154 {
1155     return (s->state == MIGRATION_STATUS_CANCELLED ||
1156             s->state == MIGRATION_STATUS_FAILED);
1157 }
1158 
1159 bool migration_in_postcopy(void)
1160 {
1161     MigrationState *s = migrate_get_current();
1162 
1163     return (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
1164 }
1165 
1166 bool migration_in_postcopy_after_devices(MigrationState *s)
1167 {
1168     return migration_in_postcopy() && s->postcopy_after_devices;
1169 }
1170 
1171 bool migration_is_idle(void)
1172 {
1173     MigrationState *s = migrate_get_current();
1174 
1175     switch (s->state) {
1176     case MIGRATION_STATUS_NONE:
1177     case MIGRATION_STATUS_CANCELLED:
1178     case MIGRATION_STATUS_COMPLETED:
1179     case MIGRATION_STATUS_FAILED:
1180         return true;
1181     case MIGRATION_STATUS_SETUP:
1182     case MIGRATION_STATUS_CANCELLING:
1183     case MIGRATION_STATUS_ACTIVE:
1184     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1185     case MIGRATION_STATUS_COLO:
1186         return false;
1187     case MIGRATION_STATUS__MAX:
1188         g_assert_not_reached();
1189     }
1190 
1191     return false;
1192 }
1193 
1194 MigrationState *migrate_init(void)
1195 {
1196     MigrationState *s = migrate_get_current();
1197 
1198     /*
1199      * Reinitialise all migration state, except
1200      * parameters/capabilities that the user set, and
1201      * locks.
1202      */
1203     s->bytes_xfer = 0;
1204     s->xfer_limit = 0;
1205     s->cleanup_bh = 0;
1206     s->to_dst_file = NULL;
1207     s->state = MIGRATION_STATUS_NONE;
1208     s->rp_state.from_dst_file = NULL;
1209     s->rp_state.error = false;
1210     s->mbps = 0.0;
1211     s->downtime = 0;
1212     s->expected_downtime = 0;
1213     s->setup_time = 0;
1214     s->start_postcopy = false;
1215     s->postcopy_after_devices = false;
1216     s->migration_thread_running = false;
1217     error_free(s->error);
1218     s->error = NULL;
1219 
1220     migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
1221 
1222     s->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1223     return s;
1224 }
1225 
1226 static GSList *migration_blockers;
1227 
1228 int migrate_add_blocker(Error *reason, Error **errp)
1229 {
1230     if (migrate_get_current()->only_migratable) {
1231         error_propagate(errp, error_copy(reason));
1232         error_prepend(errp, "disallowing migration blocker "
1233                           "(--only_migratable) for: ");
1234         return -EACCES;
1235     }
1236 
1237     if (migration_is_idle()) {
1238         migration_blockers = g_slist_prepend(migration_blockers, reason);
1239         return 0;
1240     }
1241 
1242     error_propagate(errp, error_copy(reason));
1243     error_prepend(errp, "disallowing migration blocker (migration in "
1244                       "progress) for: ");
1245     return -EBUSY;
1246 }
1247 
1248 void migrate_del_blocker(Error *reason)
1249 {
1250     migration_blockers = g_slist_remove(migration_blockers, reason);
1251 }
1252 
1253 void qmp_migrate_incoming(const char *uri, Error **errp)
1254 {
1255     Error *local_err = NULL;
1256     static bool once = true;
1257 
1258     if (!deferred_incoming) {
1259         error_setg(errp, "For use with '-incoming defer'");
1260         return;
1261     }
1262     if (!once) {
1263         error_setg(errp, "The incoming migration has already been started");
1264     }
1265 
1266     qemu_start_incoming_migration(uri, &local_err);
1267 
1268     if (local_err) {
1269         error_propagate(errp, local_err);
1270         return;
1271     }
1272 
1273     once = false;
1274 }
1275 
1276 bool migration_is_blocked(Error **errp)
1277 {
1278     if (qemu_savevm_state_blocked(errp)) {
1279         return true;
1280     }
1281 
1282     if (migration_blockers) {
1283         error_propagate(errp, error_copy(migration_blockers->data));
1284         return true;
1285     }
1286 
1287     return false;
1288 }
1289 
1290 void qmp_migrate(const char *uri, bool has_blk, bool blk,
1291                  bool has_inc, bool inc, bool has_detach, bool detach,
1292                  Error **errp)
1293 {
1294     Error *local_err = NULL;
1295     MigrationState *s = migrate_get_current();
1296     const char *p;
1297 
1298     if (migration_is_setup_or_active(s->state) ||
1299         s->state == MIGRATION_STATUS_CANCELLING ||
1300         s->state == MIGRATION_STATUS_COLO) {
1301         error_setg(errp, QERR_MIGRATION_ACTIVE);
1302         return;
1303     }
1304     if (runstate_check(RUN_STATE_INMIGRATE)) {
1305         error_setg(errp, "Guest is waiting for an incoming migration");
1306         return;
1307     }
1308 
1309     if (migration_is_blocked(errp)) {
1310         return;
1311     }
1312 
1313     if ((has_blk && blk) || (has_inc && inc)) {
1314         if (migrate_use_block() || migrate_use_block_incremental()) {
1315             error_setg(errp, "Command options are incompatible with "
1316                        "current migration capabilities");
1317             return;
1318         }
1319         migrate_set_block_enabled(true, &local_err);
1320         if (local_err) {
1321             error_propagate(errp, local_err);
1322             return;
1323         }
1324         s->must_remove_block_options = true;
1325     }
1326 
1327     if (has_inc && inc) {
1328         migrate_set_block_incremental(s, true);
1329     }
1330 
1331     s = migrate_init();
1332 
1333     if (strstart(uri, "tcp:", &p)) {
1334         tcp_start_outgoing_migration(s, p, &local_err);
1335 #ifdef CONFIG_RDMA
1336     } else if (strstart(uri, "rdma:", &p)) {
1337         rdma_start_outgoing_migration(s, p, &local_err);
1338 #endif
1339     } else if (strstart(uri, "exec:", &p)) {
1340         exec_start_outgoing_migration(s, p, &local_err);
1341     } else if (strstart(uri, "unix:", &p)) {
1342         unix_start_outgoing_migration(s, p, &local_err);
1343     } else if (strstart(uri, "fd:", &p)) {
1344         fd_start_outgoing_migration(s, p, &local_err);
1345     } else {
1346         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri",
1347                    "a valid migration protocol");
1348         migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
1349                           MIGRATION_STATUS_FAILED);
1350         return;
1351     }
1352 
1353     if (local_err) {
1354         migrate_fd_error(s, local_err);
1355         error_propagate(errp, local_err);
1356         return;
1357     }
1358 }
1359 
1360 void qmp_migrate_cancel(Error **errp)
1361 {
1362     migrate_fd_cancel(migrate_get_current());
1363 }
1364 
1365 void qmp_migrate_set_cache_size(int64_t value, Error **errp)
1366 {
1367     MigrationState *s = migrate_get_current();
1368     int64_t new_size;
1369 
1370     /* Check for truncation */
1371     if (value != (size_t)value) {
1372         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
1373                    "exceeding address space");
1374         return;
1375     }
1376 
1377     /* Cache should not be larger than guest ram size */
1378     if (value > ram_bytes_total()) {
1379         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
1380                    "exceeds guest ram size ");
1381         return;
1382     }
1383 
1384     new_size = xbzrle_cache_resize(value);
1385     if (new_size < 0) {
1386         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
1387                    "is smaller than page size");
1388         return;
1389     }
1390 
1391     s->xbzrle_cache_size = new_size;
1392 }
1393 
1394 int64_t qmp_query_migrate_cache_size(Error **errp)
1395 {
1396     return migrate_xbzrle_cache_size();
1397 }
1398 
1399 void qmp_migrate_set_speed(int64_t value, Error **errp)
1400 {
1401     MigrateSetParameters p = {
1402         .has_max_bandwidth = true,
1403         .max_bandwidth = value,
1404     };
1405 
1406     qmp_migrate_set_parameters(&p, errp);
1407 }
1408 
1409 void qmp_migrate_set_downtime(double value, Error **errp)
1410 {
1411     if (value < 0 || value > MAX_MIGRATE_DOWNTIME_SECONDS) {
1412         error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
1413                          "the range of 0 to %d seconds",
1414                          MAX_MIGRATE_DOWNTIME_SECONDS);
1415         return;
1416     }
1417 
1418     value *= 1000; /* Convert to milliseconds */
1419     value = MAX(0, MIN(INT64_MAX, value));
1420 
1421     MigrateSetParameters p = {
1422         .has_downtime_limit = true,
1423         .downtime_limit = value,
1424     };
1425 
1426     qmp_migrate_set_parameters(&p, errp);
1427 }
1428 
1429 bool migrate_release_ram(void)
1430 {
1431     MigrationState *s;
1432 
1433     s = migrate_get_current();
1434 
1435     return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM];
1436 }
1437 
1438 bool migrate_postcopy_ram(void)
1439 {
1440     MigrationState *s;
1441 
1442     s = migrate_get_current();
1443 
1444     return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM];
1445 }
1446 
1447 bool migrate_postcopy(void)
1448 {
1449     return migrate_postcopy_ram();
1450 }
1451 
1452 bool migrate_auto_converge(void)
1453 {
1454     MigrationState *s;
1455 
1456     s = migrate_get_current();
1457 
1458     return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
1459 }
1460 
1461 bool migrate_zero_blocks(void)
1462 {
1463     MigrationState *s;
1464 
1465     s = migrate_get_current();
1466 
1467     return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
1468 }
1469 
1470 bool migrate_use_compression(void)
1471 {
1472     MigrationState *s;
1473 
1474     s = migrate_get_current();
1475 
1476     return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS];
1477 }
1478 
1479 int migrate_compress_level(void)
1480 {
1481     MigrationState *s;
1482 
1483     s = migrate_get_current();
1484 
1485     return s->parameters.compress_level;
1486 }
1487 
1488 int migrate_compress_threads(void)
1489 {
1490     MigrationState *s;
1491 
1492     s = migrate_get_current();
1493 
1494     return s->parameters.compress_threads;
1495 }
1496 
1497 int migrate_decompress_threads(void)
1498 {
1499     MigrationState *s;
1500 
1501     s = migrate_get_current();
1502 
1503     return s->parameters.decompress_threads;
1504 }
1505 
1506 bool migrate_use_events(void)
1507 {
1508     MigrationState *s;
1509 
1510     s = migrate_get_current();
1511 
1512     return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS];
1513 }
1514 
1515 bool migrate_use_multifd(void)
1516 {
1517     MigrationState *s;
1518 
1519     s = migrate_get_current();
1520 
1521     return s->enabled_capabilities[MIGRATION_CAPABILITY_X_MULTIFD];
1522 }
1523 
1524 int migrate_multifd_channels(void)
1525 {
1526     MigrationState *s;
1527 
1528     s = migrate_get_current();
1529 
1530     return s->parameters.x_multifd_channels;
1531 }
1532 
1533 int migrate_multifd_page_count(void)
1534 {
1535     MigrationState *s;
1536 
1537     s = migrate_get_current();
1538 
1539     return s->parameters.x_multifd_page_count;
1540 }
1541 
1542 int migrate_use_xbzrle(void)
1543 {
1544     MigrationState *s;
1545 
1546     s = migrate_get_current();
1547 
1548     return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
1549 }
1550 
1551 int64_t migrate_xbzrle_cache_size(void)
1552 {
1553     MigrationState *s;
1554 
1555     s = migrate_get_current();
1556 
1557     return s->xbzrle_cache_size;
1558 }
1559 
1560 bool migrate_use_block(void)
1561 {
1562     MigrationState *s;
1563 
1564     s = migrate_get_current();
1565 
1566     return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK];
1567 }
1568 
1569 bool migrate_use_return_path(void)
1570 {
1571     MigrationState *s;
1572 
1573     s = migrate_get_current();
1574 
1575     return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH];
1576 }
1577 
1578 bool migrate_use_block_incremental(void)
1579 {
1580     MigrationState *s;
1581 
1582     s = migrate_get_current();
1583 
1584     return s->parameters.block_incremental;
1585 }
1586 
1587 /* migration thread support */
1588 /*
1589  * Something bad happened to the RP stream, mark an error
1590  * The caller shall print or trace something to indicate why
1591  */
1592 static void mark_source_rp_bad(MigrationState *s)
1593 {
1594     s->rp_state.error = true;
1595 }
1596 
1597 static struct rp_cmd_args {
1598     ssize_t     len; /* -1 = variable */
1599     const char *name;
1600 } rp_cmd_args[] = {
1601     [MIG_RP_MSG_INVALID]        = { .len = -1, .name = "INVALID" },
1602     [MIG_RP_MSG_SHUT]           = { .len =  4, .name = "SHUT" },
1603     [MIG_RP_MSG_PONG]           = { .len =  4, .name = "PONG" },
1604     [MIG_RP_MSG_REQ_PAGES]      = { .len = 12, .name = "REQ_PAGES" },
1605     [MIG_RP_MSG_REQ_PAGES_ID]   = { .len = -1, .name = "REQ_PAGES_ID" },
1606     [MIG_RP_MSG_MAX]            = { .len = -1, .name = "MAX" },
1607 };
1608 
1609 /*
1610  * Process a request for pages received on the return path,
1611  * We're allowed to send more than requested (e.g. to round to our page size)
1612  * and we don't need to send pages that have already been sent.
1613  */
1614 static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
1615                                        ram_addr_t start, size_t len)
1616 {
1617     long our_host_ps = getpagesize();
1618 
1619     trace_migrate_handle_rp_req_pages(rbname, start, len);
1620 
1621     /*
1622      * Since we currently insist on matching page sizes, just sanity check
1623      * we're being asked for whole host pages.
1624      */
1625     if (start & (our_host_ps-1) ||
1626        (len & (our_host_ps-1))) {
1627         error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT
1628                      " len: %zd", __func__, start, len);
1629         mark_source_rp_bad(ms);
1630         return;
1631     }
1632 
1633     if (ram_save_queue_pages(rbname, start, len)) {
1634         mark_source_rp_bad(ms);
1635     }
1636 }
1637 
1638 /*
1639  * Handles messages sent on the return path towards the source VM
1640  *
1641  */
1642 static void *source_return_path_thread(void *opaque)
1643 {
1644     MigrationState *ms = opaque;
1645     QEMUFile *rp = ms->rp_state.from_dst_file;
1646     uint16_t header_len, header_type;
1647     uint8_t buf[512];
1648     uint32_t tmp32, sibling_error;
1649     ram_addr_t start = 0; /* =0 to silence warning */
1650     size_t  len = 0, expected_len;
1651     int res;
1652 
1653     trace_source_return_path_thread_entry();
1654     while (!ms->rp_state.error && !qemu_file_get_error(rp) &&
1655            migration_is_setup_or_active(ms->state)) {
1656         trace_source_return_path_thread_loop_top();
1657         header_type = qemu_get_be16(rp);
1658         header_len = qemu_get_be16(rp);
1659 
1660         if (header_type >= MIG_RP_MSG_MAX ||
1661             header_type == MIG_RP_MSG_INVALID) {
1662             error_report("RP: Received invalid message 0x%04x length 0x%04x",
1663                     header_type, header_len);
1664             mark_source_rp_bad(ms);
1665             goto out;
1666         }
1667 
1668         if ((rp_cmd_args[header_type].len != -1 &&
1669             header_len != rp_cmd_args[header_type].len) ||
1670             header_len > sizeof(buf)) {
1671             error_report("RP: Received '%s' message (0x%04x) with"
1672                     "incorrect length %d expecting %zu",
1673                     rp_cmd_args[header_type].name, header_type, header_len,
1674                     (size_t)rp_cmd_args[header_type].len);
1675             mark_source_rp_bad(ms);
1676             goto out;
1677         }
1678 
1679         /* We know we've got a valid header by this point */
1680         res = qemu_get_buffer(rp, buf, header_len);
1681         if (res != header_len) {
1682             error_report("RP: Failed reading data for message 0x%04x"
1683                          " read %d expected %d",
1684                          header_type, res, header_len);
1685             mark_source_rp_bad(ms);
1686             goto out;
1687         }
1688 
1689         /* OK, we have the message and the data */
1690         switch (header_type) {
1691         case MIG_RP_MSG_SHUT:
1692             sibling_error = ldl_be_p(buf);
1693             trace_source_return_path_thread_shut(sibling_error);
1694             if (sibling_error) {
1695                 error_report("RP: Sibling indicated error %d", sibling_error);
1696                 mark_source_rp_bad(ms);
1697             }
1698             /*
1699              * We'll let the main thread deal with closing the RP
1700              * we could do a shutdown(2) on it, but we're the only user
1701              * anyway, so there's nothing gained.
1702              */
1703             goto out;
1704 
1705         case MIG_RP_MSG_PONG:
1706             tmp32 = ldl_be_p(buf);
1707             trace_source_return_path_thread_pong(tmp32);
1708             break;
1709 
1710         case MIG_RP_MSG_REQ_PAGES:
1711             start = ldq_be_p(buf);
1712             len = ldl_be_p(buf + 8);
1713             migrate_handle_rp_req_pages(ms, NULL, start, len);
1714             break;
1715 
1716         case MIG_RP_MSG_REQ_PAGES_ID:
1717             expected_len = 12 + 1; /* header + termination */
1718 
1719             if (header_len >= expected_len) {
1720                 start = ldq_be_p(buf);
1721                 len = ldl_be_p(buf + 8);
1722                 /* Now we expect an idstr */
1723                 tmp32 = buf[12]; /* Length of the following idstr */
1724                 buf[13 + tmp32] = '\0';
1725                 expected_len += tmp32;
1726             }
1727             if (header_len != expected_len) {
1728                 error_report("RP: Req_Page_id with length %d expecting %zd",
1729                         header_len, expected_len);
1730                 mark_source_rp_bad(ms);
1731                 goto out;
1732             }
1733             migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len);
1734             break;
1735 
1736         default:
1737             break;
1738         }
1739     }
1740     if (qemu_file_get_error(rp)) {
1741         trace_source_return_path_thread_bad_end();
1742         mark_source_rp_bad(ms);
1743     }
1744 
1745     trace_source_return_path_thread_end();
1746 out:
1747     ms->rp_state.from_dst_file = NULL;
1748     qemu_fclose(rp);
1749     return NULL;
1750 }
1751 
1752 static int open_return_path_on_source(MigrationState *ms)
1753 {
1754 
1755     ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
1756     if (!ms->rp_state.from_dst_file) {
1757         return -1;
1758     }
1759 
1760     trace_open_return_path_on_source();
1761     qemu_thread_create(&ms->rp_state.rp_thread, "return path",
1762                        source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
1763 
1764     trace_open_return_path_on_source_continue();
1765 
1766     return 0;
1767 }
1768 
1769 /* Returns 0 if the RP was ok, otherwise there was an error on the RP */
1770 static int await_return_path_close_on_source(MigrationState *ms)
1771 {
1772     /*
1773      * If this is a normal exit then the destination will send a SHUT and the
1774      * rp_thread will exit, however if there's an error we need to cause
1775      * it to exit.
1776      */
1777     if (qemu_file_get_error(ms->to_dst_file) && ms->rp_state.from_dst_file) {
1778         /*
1779          * shutdown(2), if we have it, will cause it to unblock if it's stuck
1780          * waiting for the destination.
1781          */
1782         qemu_file_shutdown(ms->rp_state.from_dst_file);
1783         mark_source_rp_bad(ms);
1784     }
1785     trace_await_return_path_close_on_source_joining();
1786     qemu_thread_join(&ms->rp_state.rp_thread);
1787     trace_await_return_path_close_on_source_close();
1788     return ms->rp_state.error;
1789 }
1790 
1791 /*
1792  * Switch from normal iteration to postcopy
1793  * Returns non-0 on error
1794  */
1795 static int postcopy_start(MigrationState *ms, bool *old_vm_running)
1796 {
1797     int ret;
1798     QIOChannelBuffer *bioc;
1799     QEMUFile *fb;
1800     int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1801     bool restart_block = false;
1802     migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
1803                       MIGRATION_STATUS_POSTCOPY_ACTIVE);
1804 
1805     trace_postcopy_start();
1806     qemu_mutex_lock_iothread();
1807     trace_postcopy_start_set_run();
1808 
1809     qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
1810     *old_vm_running = runstate_is_running();
1811     global_state_store();
1812     ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
1813     if (ret < 0) {
1814         goto fail;
1815     }
1816 
1817     ret = bdrv_inactivate_all();
1818     if (ret < 0) {
1819         goto fail;
1820     }
1821     restart_block = true;
1822 
1823     /*
1824      * Cause any non-postcopiable, but iterative devices to
1825      * send out their final data.
1826      */
1827     qemu_savevm_state_complete_precopy(ms->to_dst_file, true, false);
1828 
1829     /*
1830      * in Finish migrate and with the io-lock held everything should
1831      * be quiet, but we've potentially still got dirty pages and we
1832      * need to tell the destination to throw any pages it's already received
1833      * that are dirty
1834      */
1835     if (migrate_postcopy_ram()) {
1836         if (ram_postcopy_send_discard_bitmap(ms)) {
1837             error_report("postcopy send discard bitmap failed");
1838             goto fail;
1839         }
1840     }
1841 
1842     /*
1843      * send rest of state - note things that are doing postcopy
1844      * will notice we're in POSTCOPY_ACTIVE and not actually
1845      * wrap their state up here
1846      */
1847     qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX);
1848     if (migrate_postcopy_ram()) {
1849         /* Ping just for debugging, helps line traces up */
1850         qemu_savevm_send_ping(ms->to_dst_file, 2);
1851     }
1852 
1853     /*
1854      * While loading the device state we may trigger page transfer
1855      * requests and the fd must be free to process those, and thus
1856      * the destination must read the whole device state off the fd before
1857      * it starts processing it.  Unfortunately the ad-hoc migration format
1858      * doesn't allow the destination to know the size to read without fully
1859      * parsing it through each devices load-state code (especially the open
1860      * coded devices that use get/put).
1861      * So we wrap the device state up in a package with a length at the start;
1862      * to do this we use a qemu_buf to hold the whole of the device state.
1863      */
1864     bioc = qio_channel_buffer_new(4096);
1865     qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer");
1866     fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc));
1867     object_unref(OBJECT(bioc));
1868 
1869     /*
1870      * Make sure the receiver can get incoming pages before we send the rest
1871      * of the state
1872      */
1873     qemu_savevm_send_postcopy_listen(fb);
1874 
1875     qemu_savevm_state_complete_precopy(fb, false, false);
1876     if (migrate_postcopy_ram()) {
1877         qemu_savevm_send_ping(fb, 3);
1878     }
1879 
1880     qemu_savevm_send_postcopy_run(fb);
1881 
1882     /* <><> end of stuff going into the package */
1883 
1884     /* Last point of recovery; as soon as we send the package the destination
1885      * can open devices and potentially start running.
1886      * Lets just check again we've not got any errors.
1887      */
1888     ret = qemu_file_get_error(ms->to_dst_file);
1889     if (ret) {
1890         error_report("postcopy_start: Migration stream errored (pre package)");
1891         goto fail_closefb;
1892     }
1893 
1894     restart_block = false;
1895 
1896     /* Now send that blob */
1897     if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) {
1898         goto fail_closefb;
1899     }
1900     qemu_fclose(fb);
1901 
1902     /* Send a notify to give a chance for anything that needs to happen
1903      * at the transition to postcopy and after the device state; in particular
1904      * spice needs to trigger a transition now
1905      */
1906     ms->postcopy_after_devices = true;
1907     notifier_list_notify(&migration_state_notifiers, ms);
1908 
1909     ms->downtime =  qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop;
1910 
1911     qemu_mutex_unlock_iothread();
1912 
1913     if (migrate_postcopy_ram()) {
1914         /*
1915          * Although this ping is just for debug, it could potentially be
1916          * used for getting a better measurement of downtime at the source.
1917          */
1918         qemu_savevm_send_ping(ms->to_dst_file, 4);
1919     }
1920 
1921     if (migrate_release_ram()) {
1922         ram_postcopy_migrated_memory_release(ms);
1923     }
1924 
1925     ret = qemu_file_get_error(ms->to_dst_file);
1926     if (ret) {
1927         error_report("postcopy_start: Migration stream errored");
1928         migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
1929                               MIGRATION_STATUS_FAILED);
1930     }
1931 
1932     return ret;
1933 
1934 fail_closefb:
1935     qemu_fclose(fb);
1936 fail:
1937     migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
1938                           MIGRATION_STATUS_FAILED);
1939     if (restart_block) {
1940         /* A failure happened early enough that we know the destination hasn't
1941          * accessed block devices, so we're safe to recover.
1942          */
1943         Error *local_err = NULL;
1944 
1945         bdrv_invalidate_cache_all(&local_err);
1946         if (local_err) {
1947             error_report_err(local_err);
1948         }
1949     }
1950     qemu_mutex_unlock_iothread();
1951     return -1;
1952 }
1953 
1954 /**
1955  * migration_completion: Used by migration_thread when there's not much left.
1956  *   The caller 'breaks' the loop when this returns.
1957  *
1958  * @s: Current migration state
1959  * @current_active_state: The migration state we expect to be in
1960  * @*old_vm_running: Pointer to old_vm_running flag
1961  * @*start_time: Pointer to time to update
1962  */
1963 static void migration_completion(MigrationState *s, int current_active_state,
1964                                  bool *old_vm_running,
1965                                  int64_t *start_time)
1966 {
1967     int ret;
1968 
1969     if (s->state == MIGRATION_STATUS_ACTIVE) {
1970         qemu_mutex_lock_iothread();
1971         *start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1972         qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
1973         *old_vm_running = runstate_is_running();
1974         ret = global_state_store();
1975 
1976         if (!ret) {
1977             bool inactivate = !migrate_colo_enabled();
1978             ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
1979             if (ret >= 0) {
1980                 qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
1981                 ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
1982                                                          inactivate);
1983             }
1984             if (inactivate && ret >= 0) {
1985                 s->block_inactive = true;
1986             }
1987         }
1988         qemu_mutex_unlock_iothread();
1989 
1990         if (ret < 0) {
1991             goto fail;
1992         }
1993     } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
1994         trace_migration_completion_postcopy_end();
1995 
1996         qemu_savevm_state_complete_postcopy(s->to_dst_file);
1997         trace_migration_completion_postcopy_end_after_complete();
1998     }
1999 
2000     /*
2001      * If rp was opened we must clean up the thread before
2002      * cleaning everything else up (since if there are no failures
2003      * it will wait for the destination to send it's status in
2004      * a SHUT command).
2005      */
2006     if (s->rp_state.from_dst_file) {
2007         int rp_error;
2008         trace_migration_return_path_end_before();
2009         rp_error = await_return_path_close_on_source(s);
2010         trace_migration_return_path_end_after(rp_error);
2011         if (rp_error) {
2012             goto fail_invalidate;
2013         }
2014     }
2015 
2016     if (qemu_file_get_error(s->to_dst_file)) {
2017         trace_migration_completion_file_err();
2018         goto fail_invalidate;
2019     }
2020 
2021     if (!migrate_colo_enabled()) {
2022         migrate_set_state(&s->state, current_active_state,
2023                           MIGRATION_STATUS_COMPLETED);
2024     }
2025 
2026     return;
2027 
2028 fail_invalidate:
2029     /* If not doing postcopy, vm_start() will be called: let's regain
2030      * control on images.
2031      */
2032     if (s->state == MIGRATION_STATUS_ACTIVE) {
2033         Error *local_err = NULL;
2034 
2035         qemu_mutex_lock_iothread();
2036         bdrv_invalidate_cache_all(&local_err);
2037         if (local_err) {
2038             error_report_err(local_err);
2039         } else {
2040             s->block_inactive = false;
2041         }
2042         qemu_mutex_unlock_iothread();
2043     }
2044 
2045 fail:
2046     migrate_set_state(&s->state, current_active_state,
2047                       MIGRATION_STATUS_FAILED);
2048 }
2049 
2050 bool migrate_colo_enabled(void)
2051 {
2052     MigrationState *s = migrate_get_current();
2053     return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO];
2054 }
2055 
2056 /*
2057  * Master migration thread on the source VM.
2058  * It drives the migration and pumps the data down the outgoing channel.
2059  */
2060 static void *migration_thread(void *opaque)
2061 {
2062     MigrationState *s = opaque;
2063     /* Used by the bandwidth calcs, updated later */
2064     int64_t initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
2065     int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
2066     int64_t initial_bytes = 0;
2067     /*
2068      * The final stage happens when the remaining data is smaller than
2069      * this threshold; it's calculated from the requested downtime and
2070      * measured bandwidth
2071      */
2072     int64_t threshold_size = 0;
2073     int64_t start_time = initial_time;
2074     int64_t end_time;
2075     bool old_vm_running = false;
2076     bool entered_postcopy = false;
2077     /* The active state we expect to be in; ACTIVE or POSTCOPY_ACTIVE */
2078     enum MigrationStatus current_active_state = MIGRATION_STATUS_ACTIVE;
2079     bool enable_colo = migrate_colo_enabled();
2080 
2081     rcu_register_thread();
2082 
2083     qemu_savevm_state_header(s->to_dst_file);
2084 
2085     /*
2086      * If we opened the return path, we need to make sure dst has it
2087      * opened as well.
2088      */
2089     if (s->rp_state.from_dst_file) {
2090         /* Now tell the dest that it should open its end so it can reply */
2091         qemu_savevm_send_open_return_path(s->to_dst_file);
2092 
2093         /* And do a ping that will make stuff easier to debug */
2094         qemu_savevm_send_ping(s->to_dst_file, 1);
2095     }
2096 
2097     if (migrate_postcopy()) {
2098         /*
2099          * Tell the destination that we *might* want to do postcopy later;
2100          * if the other end can't do postcopy it should fail now, nice and
2101          * early.
2102          */
2103         qemu_savevm_send_postcopy_advise(s->to_dst_file);
2104     }
2105 
2106     qemu_savevm_state_setup(s->to_dst_file);
2107 
2108     s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
2109     migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
2110                       MIGRATION_STATUS_ACTIVE);
2111 
2112     trace_migration_thread_setup_complete();
2113 
2114     while (s->state == MIGRATION_STATUS_ACTIVE ||
2115            s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
2116         int64_t current_time;
2117         uint64_t pending_size;
2118 
2119         if (!qemu_file_rate_limit(s->to_dst_file)) {
2120             uint64_t pend_post, pend_nonpost;
2121 
2122             qemu_savevm_state_pending(s->to_dst_file, threshold_size,
2123                                       &pend_nonpost, &pend_post);
2124             pending_size = pend_nonpost + pend_post;
2125             trace_migrate_pending(pending_size, threshold_size,
2126                                   pend_post, pend_nonpost);
2127             if (pending_size && pending_size >= threshold_size) {
2128                 /* Still a significant amount to transfer */
2129 
2130                 if (migrate_postcopy() &&
2131                     s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE &&
2132                     pend_nonpost <= threshold_size &&
2133                     atomic_read(&s->start_postcopy)) {
2134 
2135                     if (!postcopy_start(s, &old_vm_running)) {
2136                         current_active_state = MIGRATION_STATUS_POSTCOPY_ACTIVE;
2137                         entered_postcopy = true;
2138                     }
2139 
2140                     continue;
2141                 }
2142                 /* Just another iteration step */
2143                 qemu_savevm_state_iterate(s->to_dst_file, entered_postcopy);
2144             } else {
2145                 trace_migration_thread_low_pending(pending_size);
2146                 migration_completion(s, current_active_state,
2147                                      &old_vm_running, &start_time);
2148                 break;
2149             }
2150         }
2151 
2152         if (qemu_file_get_error(s->to_dst_file)) {
2153             migrate_set_state(&s->state, current_active_state,
2154                               MIGRATION_STATUS_FAILED);
2155             trace_migration_thread_file_err();
2156             break;
2157         }
2158         current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
2159         if (current_time >= initial_time + BUFFER_DELAY) {
2160             uint64_t transferred_bytes = qemu_ftell(s->to_dst_file) -
2161                                          initial_bytes;
2162             uint64_t time_spent = current_time - initial_time;
2163             double bandwidth = (double)transferred_bytes / time_spent;
2164             threshold_size = bandwidth * s->parameters.downtime_limit;
2165 
2166             s->mbps = (((double) transferred_bytes * 8.0) /
2167                     ((double) time_spent / 1000.0)) / 1000.0 / 1000.0;
2168 
2169             trace_migrate_transferred(transferred_bytes, time_spent,
2170                                       bandwidth, threshold_size);
2171             /* if we haven't sent anything, we don't want to recalculate
2172                10000 is a small enough number for our purposes */
2173             if (ram_counters.dirty_pages_rate && transferred_bytes > 10000) {
2174                 s->expected_downtime = ram_counters.dirty_pages_rate *
2175                     qemu_target_page_size() / bandwidth;
2176             }
2177 
2178             qemu_file_reset_rate_limit(s->to_dst_file);
2179             initial_time = current_time;
2180             initial_bytes = qemu_ftell(s->to_dst_file);
2181         }
2182         if (qemu_file_rate_limit(s->to_dst_file)) {
2183             /* usleep expects microseconds */
2184             g_usleep((initial_time + BUFFER_DELAY - current_time)*1000);
2185         }
2186     }
2187 
2188     trace_migration_thread_after_loop();
2189     /* If we enabled cpu throttling for auto-converge, turn it off. */
2190     cpu_throttle_stop();
2191     end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
2192 
2193     qemu_mutex_lock_iothread();
2194     /*
2195      * The resource has been allocated by migration will be reused in COLO
2196      * process, so don't release them.
2197      */
2198     if (!enable_colo) {
2199         qemu_savevm_state_cleanup();
2200     }
2201     if (s->state == MIGRATION_STATUS_COMPLETED) {
2202         uint64_t transferred_bytes = qemu_ftell(s->to_dst_file);
2203         s->total_time = end_time - s->total_time;
2204         if (!entered_postcopy) {
2205             s->downtime = end_time - start_time;
2206         }
2207         if (s->total_time) {
2208             s->mbps = (((double) transferred_bytes * 8.0) /
2209                        ((double) s->total_time)) / 1000;
2210         }
2211         runstate_set(RUN_STATE_POSTMIGRATE);
2212     } else {
2213         if (s->state == MIGRATION_STATUS_ACTIVE && enable_colo) {
2214             migrate_start_colo_process(s);
2215             qemu_savevm_state_cleanup();
2216             /*
2217             * Fixme: we will run VM in COLO no matter its old running state.
2218             * After exited COLO, we will keep running.
2219             */
2220             old_vm_running = true;
2221         }
2222         if (old_vm_running && !entered_postcopy) {
2223             vm_start();
2224         } else {
2225             if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
2226                 runstate_set(RUN_STATE_POSTMIGRATE);
2227             }
2228         }
2229     }
2230     qemu_bh_schedule(s->cleanup_bh);
2231     qemu_mutex_unlock_iothread();
2232 
2233     rcu_unregister_thread();
2234     return NULL;
2235 }
2236 
2237 void migrate_fd_connect(MigrationState *s)
2238 {
2239     s->expected_downtime = s->parameters.downtime_limit;
2240     s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s);
2241 
2242     qemu_file_set_blocking(s->to_dst_file, true);
2243     qemu_file_set_rate_limit(s->to_dst_file,
2244                              s->parameters.max_bandwidth / XFER_LIMIT_RATIO);
2245 
2246     /* Notify before starting migration thread */
2247     notifier_list_notify(&migration_state_notifiers, s);
2248 
2249     /*
2250      * Open the return path. For postcopy, it is used exclusively. For
2251      * precopy, only if user specified "return-path" capability would
2252      * QEMU uses the return path.
2253      */
2254     if (migrate_postcopy_ram() || migrate_use_return_path()) {
2255         if (open_return_path_on_source(s)) {
2256             error_report("Unable to open return-path for postcopy");
2257             migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
2258                               MIGRATION_STATUS_FAILED);
2259             migrate_fd_cleanup(s);
2260             return;
2261         }
2262     }
2263 
2264     if (multifd_save_setup() != 0) {
2265         migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
2266                           MIGRATION_STATUS_FAILED);
2267         migrate_fd_cleanup(s);
2268         return;
2269     }
2270     qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
2271                        QEMU_THREAD_JOINABLE);
2272     s->migration_thread_running = true;
2273 }
2274 
2275 void migration_global_dump(Monitor *mon)
2276 {
2277     MigrationState *ms = migrate_get_current();
2278 
2279     monitor_printf(mon, "globals: store-global-state=%d, only_migratable=%d, "
2280                    "send-configuration=%d, send-section-footer=%d\n",
2281                    ms->store_global_state, ms->only_migratable,
2282                    ms->send_configuration, ms->send_section_footer);
2283 }
2284 
2285 #define DEFINE_PROP_MIG_CAP(name, x)             \
2286     DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false)
2287 
2288 static Property migration_properties[] = {
2289     DEFINE_PROP_BOOL("store-global-state", MigrationState,
2290                      store_global_state, true),
2291     DEFINE_PROP_BOOL("only-migratable", MigrationState, only_migratable, false),
2292     DEFINE_PROP_BOOL("send-configuration", MigrationState,
2293                      send_configuration, true),
2294     DEFINE_PROP_BOOL("send-section-footer", MigrationState,
2295                      send_section_footer, true),
2296 
2297     /* Migration parameters */
2298     DEFINE_PROP_INT64("x-compress-level", MigrationState,
2299                       parameters.compress_level,
2300                       DEFAULT_MIGRATE_COMPRESS_LEVEL),
2301     DEFINE_PROP_INT64("x-compress-threads", MigrationState,
2302                       parameters.compress_threads,
2303                       DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT),
2304     DEFINE_PROP_INT64("x-decompress-threads", MigrationState,
2305                       parameters.decompress_threads,
2306                       DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT),
2307     DEFINE_PROP_INT64("x-cpu-throttle-initial", MigrationState,
2308                       parameters.cpu_throttle_initial,
2309                       DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL),
2310     DEFINE_PROP_INT64("x-cpu-throttle-increment", MigrationState,
2311                       parameters.cpu_throttle_increment,
2312                       DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT),
2313     DEFINE_PROP_INT64("x-max-bandwidth", MigrationState,
2314                       parameters.max_bandwidth, MAX_THROTTLE),
2315     DEFINE_PROP_INT64("x-downtime-limit", MigrationState,
2316                       parameters.downtime_limit,
2317                       DEFAULT_MIGRATE_SET_DOWNTIME),
2318     DEFINE_PROP_INT64("x-checkpoint-delay", MigrationState,
2319                       parameters.x_checkpoint_delay,
2320                       DEFAULT_MIGRATE_X_CHECKPOINT_DELAY),
2321     DEFINE_PROP_INT64("x-multifd-channels", MigrationState,
2322                       parameters.x_multifd_channels,
2323                       DEFAULT_MIGRATE_MULTIFD_CHANNELS),
2324     DEFINE_PROP_INT64("x-multifd-page-count", MigrationState,
2325                       parameters.x_multifd_page_count,
2326                       DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT),
2327 
2328     /* Migration capabilities */
2329     DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
2330     DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL),
2331     DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE),
2332     DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS),
2333     DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS),
2334     DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS),
2335     DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM),
2336     DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO),
2337     DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM),
2338     DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK),
2339     DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH),
2340     DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_X_MULTIFD),
2341 
2342     DEFINE_PROP_END_OF_LIST(),
2343 };
2344 
2345 static void migration_class_init(ObjectClass *klass, void *data)
2346 {
2347     DeviceClass *dc = DEVICE_CLASS(klass);
2348 
2349     dc->user_creatable = false;
2350     dc->props = migration_properties;
2351 }
2352 
2353 static void migration_instance_finalize(Object *obj)
2354 {
2355     MigrationState *ms = MIGRATION_OBJ(obj);
2356     MigrationParameters *params = &ms->parameters;
2357 
2358     g_free(params->tls_hostname);
2359     g_free(params->tls_creds);
2360 }
2361 
2362 static void migration_instance_init(Object *obj)
2363 {
2364     MigrationState *ms = MIGRATION_OBJ(obj);
2365     MigrationParameters *params = &ms->parameters;
2366 
2367     ms->state = MIGRATION_STATUS_NONE;
2368     ms->xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE;
2369     ms->mbps = -1;
2370 
2371     params->tls_hostname = g_strdup("");
2372     params->tls_creds = g_strdup("");
2373 
2374     /* Set has_* up only for parameter checks */
2375     params->has_compress_level = true;
2376     params->has_compress_threads = true;
2377     params->has_decompress_threads = true;
2378     params->has_cpu_throttle_initial = true;
2379     params->has_cpu_throttle_increment = true;
2380     params->has_max_bandwidth = true;
2381     params->has_downtime_limit = true;
2382     params->has_x_checkpoint_delay = true;
2383     params->has_block_incremental = true;
2384     params->has_x_multifd_channels = true;
2385     params->has_x_multifd_page_count = true;
2386 }
2387 
2388 /*
2389  * Return true if check pass, false otherwise. Error will be put
2390  * inside errp if provided.
2391  */
2392 static bool migration_object_check(MigrationState *ms, Error **errp)
2393 {
2394     MigrationCapabilityStatusList *head = NULL;
2395     /* Assuming all off */
2396     bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret;
2397     int i;
2398 
2399     if (!migrate_params_check(&ms->parameters, errp)) {
2400         return false;
2401     }
2402 
2403     for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
2404         if (ms->enabled_capabilities[i]) {
2405             head = migrate_cap_add(head, i, true);
2406         }
2407     }
2408 
2409     ret = migrate_caps_check(cap_list, head, errp);
2410 
2411     /* It works with head == NULL */
2412     qapi_free_MigrationCapabilityStatusList(head);
2413 
2414     return ret;
2415 }
2416 
2417 static const TypeInfo migration_type = {
2418     .name = TYPE_MIGRATION,
2419     /*
2420      * NOTE: TYPE_MIGRATION is not really a device, as the object is
2421      * not created using qdev_create(), it is not attached to the qdev
2422      * device tree, and it is never realized.
2423      *
2424      * TODO: Make this TYPE_OBJECT once QOM provides something like
2425      * TYPE_DEVICE's "-global" properties.
2426      */
2427     .parent = TYPE_DEVICE,
2428     .class_init = migration_class_init,
2429     .class_size = sizeof(MigrationClass),
2430     .instance_size = sizeof(MigrationState),
2431     .instance_init = migration_instance_init,
2432     .instance_finalize = migration_instance_finalize,
2433 };
2434 
2435 static void register_migration_types(void)
2436 {
2437     type_register_static(&migration_type);
2438 }
2439 
2440 type_init(register_migration_types);
2441