1 /* 2 * QEMU live migration 3 * 4 * Copyright IBM, Corp. 2008 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 * Contributions after 2012-01-13 are licensed under the terms of the 13 * GNU GPL, version 2 or (at your option) any later version. 14 */ 15 16 #include "qemu/osdep.h" 17 #include "qemu/cutils.h" 18 #include "qemu/error-report.h" 19 #include "migration/blocker.h" 20 #include "exec.h" 21 #include "fd.h" 22 #include "socket.h" 23 #include "rdma.h" 24 #include "ram.h" 25 #include "migration/global_state.h" 26 #include "migration/misc.h" 27 #include "migration.h" 28 #include "savevm.h" 29 #include "qemu-file-channel.h" 30 #include "qemu-file.h" 31 #include "migration/vmstate.h" 32 #include "block/block.h" 33 #include "qapi/error.h" 34 #include "qapi/qmp/qerror.h" 35 #include "qapi/qmp/qnull.h" 36 #include "qemu/rcu.h" 37 #include "block.h" 38 #include "postcopy-ram.h" 39 #include "qemu/thread.h" 40 #include "qmp-commands.h" 41 #include "trace.h" 42 #include "qapi-event.h" 43 #include "exec/target_page.h" 44 #include "io/channel-buffer.h" 45 #include "migration/colo.h" 46 #include "hw/boards.h" 47 #include "monitor/monitor.h" 48 49 #define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */ 50 51 /* Amount of time to allocate to each "chunk" of bandwidth-throttled 52 * data. */ 53 #define BUFFER_DELAY 100 54 #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) 55 56 /* Time in milliseconds we are allowed to stop the source, 57 * for sending the last part */ 58 #define DEFAULT_MIGRATE_SET_DOWNTIME 300 59 60 /* Maximum migrate downtime set to 2000 seconds */ 61 #define MAX_MIGRATE_DOWNTIME_SECONDS 2000 62 #define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) 63 64 /* Default compression thread count */ 65 #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 66 /* Default decompression thread count, usually decompression is at 67 * least 4 times as fast as compression.*/ 68 #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 69 /*0: means nocompress, 1: best speed, ... 9: best compress ratio */ 70 #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 71 /* Define default autoconverge cpu throttle migration parameters */ 72 #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 73 #define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10 74 75 /* Migration XBZRLE default cache size */ 76 #define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) 77 78 /* The delay time (in ms) between two COLO checkpoints 79 * Note: Please change this default value to 10000 when we support hybrid mode. 80 */ 81 #define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY 200 82 #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 83 #define DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT 16 84 85 static NotifierList migration_state_notifiers = 86 NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); 87 88 static bool deferred_incoming; 89 90 /* Messages sent on the return path from destination to source */ 91 enum mig_rp_message_type { 92 MIG_RP_MSG_INVALID = 0, /* Must be 0 */ 93 MIG_RP_MSG_SHUT, /* sibling will not send any more RP messages */ 94 MIG_RP_MSG_PONG, /* Response to a PING; data (seq: be32 ) */ 95 96 MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */ 97 MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */ 98 99 MIG_RP_MSG_MAX 100 }; 101 102 /* When we add fault tolerance, we could have several 103 migrations at once. For now we don't need to add 104 dynamic creation of migration */ 105 106 static MigrationState *current_migration; 107 108 static bool migration_object_check(MigrationState *ms, Error **errp); 109 static int migration_maybe_pause(MigrationState *s, 110 int *current_active_state, 111 int new_state); 112 113 void migration_object_init(void) 114 { 115 MachineState *ms = MACHINE(qdev_get_machine()); 116 Error *err = NULL; 117 118 /* This can only be called once. */ 119 assert(!current_migration); 120 current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION)); 121 122 if (!migration_object_check(current_migration, &err)) { 123 error_report_err(err); 124 exit(1); 125 } 126 127 /* 128 * We cannot really do this in migration_instance_init() since at 129 * that time global properties are not yet applied, then this 130 * value will be definitely replaced by something else. 131 */ 132 if (ms->enforce_config_section) { 133 current_migration->send_configuration = true; 134 } 135 } 136 137 void migration_object_finalize(void) 138 { 139 object_unref(OBJECT(current_migration)); 140 } 141 142 /* For outgoing */ 143 MigrationState *migrate_get_current(void) 144 { 145 /* This can only be called after the object created. */ 146 assert(current_migration); 147 return current_migration; 148 } 149 150 MigrationIncomingState *migration_incoming_get_current(void) 151 { 152 static bool once; 153 static MigrationIncomingState mis_current; 154 155 if (!once) { 156 mis_current.state = MIGRATION_STATUS_NONE; 157 memset(&mis_current, 0, sizeof(MigrationIncomingState)); 158 qemu_mutex_init(&mis_current.rp_mutex); 159 qemu_event_init(&mis_current.main_thread_load_event, false); 160 once = true; 161 } 162 return &mis_current; 163 } 164 165 void migration_incoming_state_destroy(void) 166 { 167 struct MigrationIncomingState *mis = migration_incoming_get_current(); 168 169 if (mis->to_src_file) { 170 /* Tell source that we are done */ 171 migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0); 172 qemu_fclose(mis->to_src_file); 173 mis->to_src_file = NULL; 174 } 175 176 if (mis->from_src_file) { 177 qemu_fclose(mis->from_src_file); 178 mis->from_src_file = NULL; 179 } 180 181 qemu_event_reset(&mis->main_thread_load_event); 182 } 183 184 static void migrate_generate_event(int new_state) 185 { 186 if (migrate_use_events()) { 187 qapi_event_send_migration(new_state, &error_abort); 188 } 189 } 190 191 /* 192 * Called on -incoming with a defer: uri. 193 * The migration can be started later after any parameters have been 194 * changed. 195 */ 196 static void deferred_incoming_migration(Error **errp) 197 { 198 if (deferred_incoming) { 199 error_setg(errp, "Incoming migration already deferred"); 200 } 201 deferred_incoming = true; 202 } 203 204 /* 205 * Send a message on the return channel back to the source 206 * of the migration. 207 */ 208 static void migrate_send_rp_message(MigrationIncomingState *mis, 209 enum mig_rp_message_type message_type, 210 uint16_t len, void *data) 211 { 212 trace_migrate_send_rp_message((int)message_type, len); 213 qemu_mutex_lock(&mis->rp_mutex); 214 qemu_put_be16(mis->to_src_file, (unsigned int)message_type); 215 qemu_put_be16(mis->to_src_file, len); 216 qemu_put_buffer(mis->to_src_file, data, len); 217 qemu_fflush(mis->to_src_file); 218 qemu_mutex_unlock(&mis->rp_mutex); 219 } 220 221 /* Request a range of pages from the source VM at the given 222 * start address. 223 * rbname: Name of the RAMBlock to request the page in, if NULL it's the same 224 * as the last request (a name must have been given previously) 225 * Start: Address offset within the RB 226 * Len: Length in bytes required - must be a multiple of pagesize 227 */ 228 void migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rbname, 229 ram_addr_t start, size_t len) 230 { 231 uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */ 232 size_t msglen = 12; /* start + len */ 233 234 *(uint64_t *)bufc = cpu_to_be64((uint64_t)start); 235 *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len); 236 237 if (rbname) { 238 int rbname_len = strlen(rbname); 239 assert(rbname_len < 256); 240 241 bufc[msglen++] = rbname_len; 242 memcpy(bufc + msglen, rbname, rbname_len); 243 msglen += rbname_len; 244 migrate_send_rp_message(mis, MIG_RP_MSG_REQ_PAGES_ID, msglen, bufc); 245 } else { 246 migrate_send_rp_message(mis, MIG_RP_MSG_REQ_PAGES, msglen, bufc); 247 } 248 } 249 250 void qemu_start_incoming_migration(const char *uri, Error **errp) 251 { 252 const char *p; 253 254 qapi_event_send_migration(MIGRATION_STATUS_SETUP, &error_abort); 255 if (!strcmp(uri, "defer")) { 256 deferred_incoming_migration(errp); 257 } else if (strstart(uri, "tcp:", &p)) { 258 tcp_start_incoming_migration(p, errp); 259 #ifdef CONFIG_RDMA 260 } else if (strstart(uri, "rdma:", &p)) { 261 rdma_start_incoming_migration(p, errp); 262 #endif 263 } else if (strstart(uri, "exec:", &p)) { 264 exec_start_incoming_migration(p, errp); 265 } else if (strstart(uri, "unix:", &p)) { 266 unix_start_incoming_migration(p, errp); 267 } else if (strstart(uri, "fd:", &p)) { 268 fd_start_incoming_migration(p, errp); 269 } else { 270 error_setg(errp, "unknown migration protocol: %s", uri); 271 } 272 } 273 274 static void process_incoming_migration_bh(void *opaque) 275 { 276 Error *local_err = NULL; 277 MigrationIncomingState *mis = opaque; 278 279 /* Make sure all file formats flush their mutable metadata. 280 * If we get an error here, just don't restart the VM yet. */ 281 bdrv_invalidate_cache_all(&local_err); 282 if (local_err) { 283 error_report_err(local_err); 284 local_err = NULL; 285 autostart = false; 286 } 287 288 /* 289 * This must happen after all error conditions are dealt with and 290 * we're sure the VM is going to be running on this host. 291 */ 292 qemu_announce_self(); 293 294 if (multifd_load_cleanup(&local_err) != 0) { 295 error_report_err(local_err); 296 autostart = false; 297 } 298 /* If global state section was not received or we are in running 299 state, we need to obey autostart. Any other state is set with 300 runstate_set. */ 301 302 if (!global_state_received() || 303 global_state_get_runstate() == RUN_STATE_RUNNING) { 304 if (autostart) { 305 vm_start(); 306 } else { 307 runstate_set(RUN_STATE_PAUSED); 308 } 309 } else { 310 runstate_set(global_state_get_runstate()); 311 } 312 /* 313 * This must happen after any state changes since as soon as an external 314 * observer sees this event they might start to prod at the VM assuming 315 * it's ready to use. 316 */ 317 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, 318 MIGRATION_STATUS_COMPLETED); 319 qemu_bh_delete(mis->bh); 320 migration_incoming_state_destroy(); 321 } 322 323 static void process_incoming_migration_co(void *opaque) 324 { 325 MigrationIncomingState *mis = migration_incoming_get_current(); 326 PostcopyState ps; 327 int ret; 328 329 assert(mis->from_src_file); 330 mis->largest_page_size = qemu_ram_pagesize_largest(); 331 postcopy_state_set(POSTCOPY_INCOMING_NONE); 332 migrate_set_state(&mis->state, MIGRATION_STATUS_NONE, 333 MIGRATION_STATUS_ACTIVE); 334 ret = qemu_loadvm_state(mis->from_src_file); 335 336 ps = postcopy_state_get(); 337 trace_process_incoming_migration_co_end(ret, ps); 338 if (ps != POSTCOPY_INCOMING_NONE) { 339 if (ps == POSTCOPY_INCOMING_ADVISE) { 340 /* 341 * Where a migration had postcopy enabled (and thus went to advise) 342 * but managed to complete within the precopy period, we can use 343 * the normal exit. 344 */ 345 postcopy_ram_incoming_cleanup(mis); 346 } else if (ret >= 0) { 347 /* 348 * Postcopy was started, cleanup should happen at the end of the 349 * postcopy thread. 350 */ 351 trace_process_incoming_migration_co_postcopy_end_main(); 352 return; 353 } 354 /* Else if something went wrong then just fall out of the normal exit */ 355 } 356 357 /* we get COLO info, and know if we are in COLO mode */ 358 if (!ret && migration_incoming_enable_colo()) { 359 mis->migration_incoming_co = qemu_coroutine_self(); 360 qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming", 361 colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE); 362 mis->have_colo_incoming_thread = true; 363 qemu_coroutine_yield(); 364 365 /* Wait checkpoint incoming thread exit before free resource */ 366 qemu_thread_join(&mis->colo_incoming_thread); 367 } 368 369 if (ret < 0) { 370 Error *local_err = NULL; 371 372 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, 373 MIGRATION_STATUS_FAILED); 374 error_report("load of migration failed: %s", strerror(-ret)); 375 qemu_fclose(mis->from_src_file); 376 if (multifd_load_cleanup(&local_err) != 0) { 377 error_report_err(local_err); 378 } 379 exit(EXIT_FAILURE); 380 } 381 mis->bh = qemu_bh_new(process_incoming_migration_bh, mis); 382 qemu_bh_schedule(mis->bh); 383 } 384 385 static void migration_incoming_setup(QEMUFile *f) 386 { 387 MigrationIncomingState *mis = migration_incoming_get_current(); 388 389 if (multifd_load_setup() != 0) { 390 /* We haven't been able to create multifd threads 391 nothing better to do */ 392 exit(EXIT_FAILURE); 393 } 394 395 if (!mis->from_src_file) { 396 mis->from_src_file = f; 397 } 398 qemu_file_set_blocking(f, false); 399 } 400 401 static void migration_incoming_process(void) 402 { 403 Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL); 404 qemu_coroutine_enter(co); 405 } 406 407 void migration_fd_process_incoming(QEMUFile *f) 408 { 409 migration_incoming_setup(f); 410 migration_incoming_process(); 411 } 412 413 void migration_ioc_process_incoming(QIOChannel *ioc) 414 { 415 MigrationIncomingState *mis = migration_incoming_get_current(); 416 417 if (!mis->from_src_file) { 418 QEMUFile *f = qemu_fopen_channel_input(ioc); 419 migration_fd_process_incoming(f); 420 } 421 /* We still only have a single channel. Nothing to do here yet */ 422 } 423 424 /** 425 * @migration_has_all_channels: We have received all channels that we need 426 * 427 * Returns true when we have got connections to all the channels that 428 * we need for migration. 429 */ 430 bool migration_has_all_channels(void) 431 { 432 return true; 433 } 434 435 /* 436 * Send a 'SHUT' message on the return channel with the given value 437 * to indicate that we've finished with the RP. Non-0 value indicates 438 * error. 439 */ 440 void migrate_send_rp_shut(MigrationIncomingState *mis, 441 uint32_t value) 442 { 443 uint32_t buf; 444 445 buf = cpu_to_be32(value); 446 migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf); 447 } 448 449 /* 450 * Send a 'PONG' message on the return channel with the given value 451 * (normally in response to a 'PING') 452 */ 453 void migrate_send_rp_pong(MigrationIncomingState *mis, 454 uint32_t value) 455 { 456 uint32_t buf; 457 458 buf = cpu_to_be32(value); 459 migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf); 460 } 461 462 MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) 463 { 464 MigrationCapabilityStatusList *head = NULL; 465 MigrationCapabilityStatusList *caps; 466 MigrationState *s = migrate_get_current(); 467 int i; 468 469 caps = NULL; /* silence compiler warning */ 470 for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { 471 #ifndef CONFIG_LIVE_BLOCK_MIGRATION 472 if (i == MIGRATION_CAPABILITY_BLOCK) { 473 continue; 474 } 475 #endif 476 if (head == NULL) { 477 head = g_malloc0(sizeof(*caps)); 478 caps = head; 479 } else { 480 caps->next = g_malloc0(sizeof(*caps)); 481 caps = caps->next; 482 } 483 caps->value = 484 g_malloc(sizeof(*caps->value)); 485 caps->value->capability = i; 486 caps->value->state = s->enabled_capabilities[i]; 487 } 488 489 return head; 490 } 491 492 MigrationParameters *qmp_query_migrate_parameters(Error **errp) 493 { 494 MigrationParameters *params; 495 MigrationState *s = migrate_get_current(); 496 497 /* TODO use QAPI_CLONE() instead of duplicating it inline */ 498 params = g_malloc0(sizeof(*params)); 499 params->has_compress_level = true; 500 params->compress_level = s->parameters.compress_level; 501 params->has_compress_threads = true; 502 params->compress_threads = s->parameters.compress_threads; 503 params->has_decompress_threads = true; 504 params->decompress_threads = s->parameters.decompress_threads; 505 params->has_cpu_throttle_initial = true; 506 params->cpu_throttle_initial = s->parameters.cpu_throttle_initial; 507 params->has_cpu_throttle_increment = true; 508 params->cpu_throttle_increment = s->parameters.cpu_throttle_increment; 509 params->has_tls_creds = true; 510 params->tls_creds = g_strdup(s->parameters.tls_creds); 511 params->has_tls_hostname = true; 512 params->tls_hostname = g_strdup(s->parameters.tls_hostname); 513 params->has_max_bandwidth = true; 514 params->max_bandwidth = s->parameters.max_bandwidth; 515 params->has_downtime_limit = true; 516 params->downtime_limit = s->parameters.downtime_limit; 517 params->has_x_checkpoint_delay = true; 518 params->x_checkpoint_delay = s->parameters.x_checkpoint_delay; 519 params->has_block_incremental = true; 520 params->block_incremental = s->parameters.block_incremental; 521 params->has_x_multifd_channels = true; 522 params->x_multifd_channels = s->parameters.x_multifd_channels; 523 params->has_x_multifd_page_count = true; 524 params->x_multifd_page_count = s->parameters.x_multifd_page_count; 525 params->has_xbzrle_cache_size = true; 526 params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; 527 528 return params; 529 } 530 531 /* 532 * Return true if we're already in the middle of a migration 533 * (i.e. any of the active or setup states) 534 */ 535 static bool migration_is_setup_or_active(int state) 536 { 537 switch (state) { 538 case MIGRATION_STATUS_ACTIVE: 539 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 540 case MIGRATION_STATUS_SETUP: 541 case MIGRATION_STATUS_PRE_SWITCHOVER: 542 case MIGRATION_STATUS_DEVICE: 543 return true; 544 545 default: 546 return false; 547 548 } 549 } 550 551 static void populate_ram_info(MigrationInfo *info, MigrationState *s) 552 { 553 info->has_ram = true; 554 info->ram = g_malloc0(sizeof(*info->ram)); 555 info->ram->transferred = ram_counters.transferred; 556 info->ram->total = ram_bytes_total(); 557 info->ram->duplicate = ram_counters.duplicate; 558 /* legacy value. It is not used anymore */ 559 info->ram->skipped = 0; 560 info->ram->normal = ram_counters.normal; 561 info->ram->normal_bytes = ram_counters.normal * 562 qemu_target_page_size(); 563 info->ram->mbps = s->mbps; 564 info->ram->dirty_sync_count = ram_counters.dirty_sync_count; 565 info->ram->postcopy_requests = ram_counters.postcopy_requests; 566 info->ram->page_size = qemu_target_page_size(); 567 568 if (migrate_use_xbzrle()) { 569 info->has_xbzrle_cache = true; 570 info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache)); 571 info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size(); 572 info->xbzrle_cache->bytes = xbzrle_counters.bytes; 573 info->xbzrle_cache->pages = xbzrle_counters.pages; 574 info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss; 575 info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate; 576 info->xbzrle_cache->overflow = xbzrle_counters.overflow; 577 } 578 579 if (cpu_throttle_active()) { 580 info->has_cpu_throttle_percentage = true; 581 info->cpu_throttle_percentage = cpu_throttle_get_percentage(); 582 } 583 584 if (s->state != MIGRATION_STATUS_COMPLETED) { 585 info->ram->remaining = ram_bytes_remaining(); 586 info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate; 587 } 588 } 589 590 static void populate_disk_info(MigrationInfo *info) 591 { 592 if (blk_mig_active()) { 593 info->has_disk = true; 594 info->disk = g_malloc0(sizeof(*info->disk)); 595 info->disk->transferred = blk_mig_bytes_transferred(); 596 info->disk->remaining = blk_mig_bytes_remaining(); 597 info->disk->total = blk_mig_bytes_total(); 598 } 599 } 600 601 MigrationInfo *qmp_query_migrate(Error **errp) 602 { 603 MigrationInfo *info = g_malloc0(sizeof(*info)); 604 MigrationState *s = migrate_get_current(); 605 606 switch (s->state) { 607 case MIGRATION_STATUS_NONE: 608 /* no migration has happened ever */ 609 break; 610 case MIGRATION_STATUS_SETUP: 611 info->has_status = true; 612 info->has_total_time = false; 613 break; 614 case MIGRATION_STATUS_ACTIVE: 615 case MIGRATION_STATUS_CANCELLING: 616 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 617 case MIGRATION_STATUS_PRE_SWITCHOVER: 618 case MIGRATION_STATUS_DEVICE: 619 /* TODO add some postcopy stats */ 620 info->has_status = true; 621 info->has_total_time = true; 622 info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) 623 - s->start_time; 624 info->has_expected_downtime = true; 625 info->expected_downtime = s->expected_downtime; 626 info->has_setup_time = true; 627 info->setup_time = s->setup_time; 628 629 populate_ram_info(info, s); 630 populate_disk_info(info); 631 break; 632 case MIGRATION_STATUS_COLO: 633 info->has_status = true; 634 /* TODO: display COLO specific information (checkpoint info etc.) */ 635 break; 636 case MIGRATION_STATUS_COMPLETED: 637 info->has_status = true; 638 info->has_total_time = true; 639 info->total_time = s->total_time; 640 info->has_downtime = true; 641 info->downtime = s->downtime; 642 info->has_setup_time = true; 643 info->setup_time = s->setup_time; 644 645 populate_ram_info(info, s); 646 break; 647 case MIGRATION_STATUS_FAILED: 648 info->has_status = true; 649 if (s->error) { 650 info->has_error_desc = true; 651 info->error_desc = g_strdup(error_get_pretty(s->error)); 652 } 653 break; 654 case MIGRATION_STATUS_CANCELLED: 655 info->has_status = true; 656 break; 657 } 658 info->status = s->state; 659 660 return info; 661 } 662 663 /** 664 * @migration_caps_check - check capability validity 665 * 666 * @cap_list: old capability list, array of bool 667 * @params: new capabilities to be applied soon 668 * @errp: set *errp if the check failed, with reason 669 * 670 * Returns true if check passed, otherwise false. 671 */ 672 static bool migrate_caps_check(bool *cap_list, 673 MigrationCapabilityStatusList *params, 674 Error **errp) 675 { 676 MigrationCapabilityStatusList *cap; 677 bool old_postcopy_cap; 678 MigrationIncomingState *mis = migration_incoming_get_current(); 679 680 old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]; 681 682 for (cap = params; cap; cap = cap->next) { 683 cap_list[cap->value->capability] = cap->value->state; 684 } 685 686 #ifndef CONFIG_LIVE_BLOCK_MIGRATION 687 if (cap_list[MIGRATION_CAPABILITY_BLOCK]) { 688 error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " 689 "block migration"); 690 error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); 691 return false; 692 } 693 #endif 694 695 if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { 696 if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) { 697 /* The decompression threads asynchronously write into RAM 698 * rather than use the atomic copies needed to avoid 699 * userfaulting. It should be possible to fix the decompression 700 * threads for compatibility in future. 701 */ 702 error_setg(errp, "Postcopy is not currently compatible " 703 "with compression"); 704 return false; 705 } 706 707 /* This check is reasonably expensive, so only when it's being 708 * set the first time, also it's only the destination that needs 709 * special support. 710 */ 711 if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) && 712 !postcopy_ram_supported_by_host(mis)) { 713 /* postcopy_ram_supported_by_host will have emitted a more 714 * detailed message 715 */ 716 error_setg(errp, "Postcopy is not supported"); 717 return false; 718 } 719 } 720 721 return true; 722 } 723 724 void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, 725 Error **errp) 726 { 727 MigrationState *s = migrate_get_current(); 728 MigrationCapabilityStatusList *cap; 729 730 if (migration_is_setup_or_active(s->state)) { 731 error_setg(errp, QERR_MIGRATION_ACTIVE); 732 return; 733 } 734 735 if (!migrate_caps_check(s->enabled_capabilities, params, errp)) { 736 return; 737 } 738 739 for (cap = params; cap; cap = cap->next) { 740 s->enabled_capabilities[cap->value->capability] = cap->value->state; 741 } 742 } 743 744 /* 745 * Check whether the parameters are valid. Error will be put into errp 746 * (if provided). Return true if valid, otherwise false. 747 */ 748 static bool migrate_params_check(MigrationParameters *params, Error **errp) 749 { 750 if (params->has_compress_level && 751 (params->compress_level > 9)) { 752 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", 753 "is invalid, it should be in the range of 0 to 9"); 754 return false; 755 } 756 757 if (params->has_compress_threads && (params->compress_threads < 1)) { 758 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 759 "compress_threads", 760 "is invalid, it should be in the range of 1 to 255"); 761 return false; 762 } 763 764 if (params->has_decompress_threads && (params->decompress_threads < 1)) { 765 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 766 "decompress_threads", 767 "is invalid, it should be in the range of 1 to 255"); 768 return false; 769 } 770 771 if (params->has_cpu_throttle_initial && 772 (params->cpu_throttle_initial < 1 || 773 params->cpu_throttle_initial > 99)) { 774 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 775 "cpu_throttle_initial", 776 "an integer in the range of 1 to 99"); 777 return false; 778 } 779 780 if (params->has_cpu_throttle_increment && 781 (params->cpu_throttle_increment < 1 || 782 params->cpu_throttle_increment > 99)) { 783 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 784 "cpu_throttle_increment", 785 "an integer in the range of 1 to 99"); 786 return false; 787 } 788 789 if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) { 790 error_setg(errp, "Parameter 'max_bandwidth' expects an integer in the" 791 " range of 0 to %zu bytes/second", SIZE_MAX); 792 return false; 793 } 794 795 if (params->has_downtime_limit && 796 (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { 797 error_setg(errp, "Parameter 'downtime_limit' expects an integer in " 798 "the range of 0 to %d milliseconds", 799 MAX_MIGRATE_DOWNTIME); 800 return false; 801 } 802 803 /* x_checkpoint_delay is now always positive */ 804 805 if (params->has_x_multifd_channels && (params->x_multifd_channels < 1)) { 806 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 807 "multifd_channels", 808 "is invalid, it should be in the range of 1 to 255"); 809 return false; 810 } 811 if (params->has_x_multifd_page_count && 812 (params->x_multifd_page_count < 1 || 813 params->x_multifd_page_count > 10000)) { 814 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 815 "multifd_page_count", 816 "is invalid, it should be in the range of 1 to 10000"); 817 return false; 818 } 819 820 if (params->has_xbzrle_cache_size && 821 (params->xbzrle_cache_size < qemu_target_page_size() || 822 !is_power_of_2(params->xbzrle_cache_size))) { 823 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 824 "xbzrle_cache_size", 825 "is invalid, it should be bigger than target page size" 826 " and a power of two"); 827 return false; 828 } 829 830 return true; 831 } 832 833 static void migrate_params_test_apply(MigrateSetParameters *params, 834 MigrationParameters *dest) 835 { 836 *dest = migrate_get_current()->parameters; 837 838 /* TODO use QAPI_CLONE() instead of duplicating it inline */ 839 840 if (params->has_compress_level) { 841 dest->compress_level = params->compress_level; 842 } 843 844 if (params->has_compress_threads) { 845 dest->compress_threads = params->compress_threads; 846 } 847 848 if (params->has_decompress_threads) { 849 dest->decompress_threads = params->decompress_threads; 850 } 851 852 if (params->has_cpu_throttle_initial) { 853 dest->cpu_throttle_initial = params->cpu_throttle_initial; 854 } 855 856 if (params->has_cpu_throttle_increment) { 857 dest->cpu_throttle_increment = params->cpu_throttle_increment; 858 } 859 860 if (params->has_tls_creds) { 861 assert(params->tls_creds->type == QTYPE_QSTRING); 862 dest->tls_creds = g_strdup(params->tls_creds->u.s); 863 } 864 865 if (params->has_tls_hostname) { 866 assert(params->tls_hostname->type == QTYPE_QSTRING); 867 dest->tls_hostname = g_strdup(params->tls_hostname->u.s); 868 } 869 870 if (params->has_max_bandwidth) { 871 dest->max_bandwidth = params->max_bandwidth; 872 } 873 874 if (params->has_downtime_limit) { 875 dest->downtime_limit = params->downtime_limit; 876 } 877 878 if (params->has_x_checkpoint_delay) { 879 dest->x_checkpoint_delay = params->x_checkpoint_delay; 880 } 881 882 if (params->has_block_incremental) { 883 dest->block_incremental = params->block_incremental; 884 } 885 if (params->has_x_multifd_channels) { 886 dest->x_multifd_channels = params->x_multifd_channels; 887 } 888 if (params->has_x_multifd_page_count) { 889 dest->x_multifd_page_count = params->x_multifd_page_count; 890 } 891 if (params->has_xbzrle_cache_size) { 892 dest->xbzrle_cache_size = params->xbzrle_cache_size; 893 } 894 } 895 896 static void migrate_params_apply(MigrateSetParameters *params, Error **errp) 897 { 898 MigrationState *s = migrate_get_current(); 899 900 /* TODO use QAPI_CLONE() instead of duplicating it inline */ 901 902 if (params->has_compress_level) { 903 s->parameters.compress_level = params->compress_level; 904 } 905 906 if (params->has_compress_threads) { 907 s->parameters.compress_threads = params->compress_threads; 908 } 909 910 if (params->has_decompress_threads) { 911 s->parameters.decompress_threads = params->decompress_threads; 912 } 913 914 if (params->has_cpu_throttle_initial) { 915 s->parameters.cpu_throttle_initial = params->cpu_throttle_initial; 916 } 917 918 if (params->has_cpu_throttle_increment) { 919 s->parameters.cpu_throttle_increment = params->cpu_throttle_increment; 920 } 921 922 if (params->has_tls_creds) { 923 g_free(s->parameters.tls_creds); 924 assert(params->tls_creds->type == QTYPE_QSTRING); 925 s->parameters.tls_creds = g_strdup(params->tls_creds->u.s); 926 } 927 928 if (params->has_tls_hostname) { 929 g_free(s->parameters.tls_hostname); 930 assert(params->tls_hostname->type == QTYPE_QSTRING); 931 s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s); 932 } 933 934 if (params->has_max_bandwidth) { 935 s->parameters.max_bandwidth = params->max_bandwidth; 936 if (s->to_dst_file) { 937 qemu_file_set_rate_limit(s->to_dst_file, 938 s->parameters.max_bandwidth / XFER_LIMIT_RATIO); 939 } 940 } 941 942 if (params->has_downtime_limit) { 943 s->parameters.downtime_limit = params->downtime_limit; 944 } 945 946 if (params->has_x_checkpoint_delay) { 947 s->parameters.x_checkpoint_delay = params->x_checkpoint_delay; 948 if (migration_in_colo_state()) { 949 colo_checkpoint_notify(s); 950 } 951 } 952 953 if (params->has_block_incremental) { 954 s->parameters.block_incremental = params->block_incremental; 955 } 956 if (params->has_x_multifd_channels) { 957 s->parameters.x_multifd_channels = params->x_multifd_channels; 958 } 959 if (params->has_x_multifd_page_count) { 960 s->parameters.x_multifd_page_count = params->x_multifd_page_count; 961 } 962 if (params->has_xbzrle_cache_size) { 963 s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; 964 xbzrle_cache_resize(params->xbzrle_cache_size, errp); 965 } 966 } 967 968 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) 969 { 970 MigrationParameters tmp; 971 972 /* TODO Rewrite "" to null instead */ 973 if (params->has_tls_creds 974 && params->tls_creds->type == QTYPE_QNULL) { 975 QDECREF(params->tls_creds->u.n); 976 params->tls_creds->type = QTYPE_QSTRING; 977 params->tls_creds->u.s = strdup(""); 978 } 979 /* TODO Rewrite "" to null instead */ 980 if (params->has_tls_hostname 981 && params->tls_hostname->type == QTYPE_QNULL) { 982 QDECREF(params->tls_hostname->u.n); 983 params->tls_hostname->type = QTYPE_QSTRING; 984 params->tls_hostname->u.s = strdup(""); 985 } 986 987 migrate_params_test_apply(params, &tmp); 988 989 if (!migrate_params_check(&tmp, errp)) { 990 /* Invalid parameter */ 991 return; 992 } 993 994 migrate_params_apply(params, errp); 995 } 996 997 998 void qmp_migrate_start_postcopy(Error **errp) 999 { 1000 MigrationState *s = migrate_get_current(); 1001 1002 if (!migrate_postcopy_ram()) { 1003 error_setg(errp, "Enable postcopy with migrate_set_capability before" 1004 " the start of migration"); 1005 return; 1006 } 1007 1008 if (s->state == MIGRATION_STATUS_NONE) { 1009 error_setg(errp, "Postcopy must be started after migration has been" 1010 " started"); 1011 return; 1012 } 1013 /* 1014 * we don't error if migration has finished since that would be racy 1015 * with issuing this command. 1016 */ 1017 atomic_set(&s->start_postcopy, true); 1018 } 1019 1020 /* shared migration helpers */ 1021 1022 void migrate_set_state(int *state, int old_state, int new_state) 1023 { 1024 assert(new_state < MIGRATION_STATUS__MAX); 1025 if (atomic_cmpxchg(state, old_state, new_state) == old_state) { 1026 trace_migrate_set_state(MigrationStatus_str(new_state)); 1027 migrate_generate_event(new_state); 1028 } 1029 } 1030 1031 static MigrationCapabilityStatusList *migrate_cap_add( 1032 MigrationCapabilityStatusList *list, 1033 MigrationCapability index, 1034 bool state) 1035 { 1036 MigrationCapabilityStatusList *cap; 1037 1038 cap = g_new0(MigrationCapabilityStatusList, 1); 1039 cap->value = g_new0(MigrationCapabilityStatus, 1); 1040 cap->value->capability = index; 1041 cap->value->state = state; 1042 cap->next = list; 1043 1044 return cap; 1045 } 1046 1047 void migrate_set_block_enabled(bool value, Error **errp) 1048 { 1049 MigrationCapabilityStatusList *cap; 1050 1051 cap = migrate_cap_add(NULL, MIGRATION_CAPABILITY_BLOCK, value); 1052 qmp_migrate_set_capabilities(cap, errp); 1053 qapi_free_MigrationCapabilityStatusList(cap); 1054 } 1055 1056 static void migrate_set_block_incremental(MigrationState *s, bool value) 1057 { 1058 s->parameters.block_incremental = value; 1059 } 1060 1061 static void block_cleanup_parameters(MigrationState *s) 1062 { 1063 if (s->must_remove_block_options) { 1064 /* setting to false can never fail */ 1065 migrate_set_block_enabled(false, &error_abort); 1066 migrate_set_block_incremental(s, false); 1067 s->must_remove_block_options = false; 1068 } 1069 } 1070 1071 static void migrate_fd_cleanup(void *opaque) 1072 { 1073 MigrationState *s = opaque; 1074 1075 qemu_bh_delete(s->cleanup_bh); 1076 s->cleanup_bh = NULL; 1077 1078 qemu_savevm_state_cleanup(); 1079 1080 if (s->to_dst_file) { 1081 Error *local_err = NULL; 1082 1083 trace_migrate_fd_cleanup(); 1084 qemu_mutex_unlock_iothread(); 1085 if (s->migration_thread_running) { 1086 qemu_thread_join(&s->thread); 1087 s->migration_thread_running = false; 1088 } 1089 qemu_mutex_lock_iothread(); 1090 1091 if (multifd_save_cleanup(&local_err) != 0) { 1092 error_report_err(local_err); 1093 } 1094 qemu_fclose(s->to_dst_file); 1095 s->to_dst_file = NULL; 1096 } 1097 1098 assert((s->state != MIGRATION_STATUS_ACTIVE) && 1099 (s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE)); 1100 1101 if (s->state == MIGRATION_STATUS_CANCELLING) { 1102 migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING, 1103 MIGRATION_STATUS_CANCELLED); 1104 } 1105 1106 if (s->error) { 1107 /* It is used on info migrate. We can't free it */ 1108 error_report_err(error_copy(s->error)); 1109 } 1110 notifier_list_notify(&migration_state_notifiers, s); 1111 block_cleanup_parameters(s); 1112 } 1113 1114 void migrate_set_error(MigrationState *s, const Error *error) 1115 { 1116 qemu_mutex_lock(&s->error_mutex); 1117 if (!s->error) { 1118 s->error = error_copy(error); 1119 } 1120 qemu_mutex_unlock(&s->error_mutex); 1121 } 1122 1123 void migrate_fd_error(MigrationState *s, const Error *error) 1124 { 1125 trace_migrate_fd_error(error_get_pretty(error)); 1126 assert(s->to_dst_file == NULL); 1127 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 1128 MIGRATION_STATUS_FAILED); 1129 migrate_set_error(s, error); 1130 } 1131 1132 static void migrate_fd_cancel(MigrationState *s) 1133 { 1134 int old_state ; 1135 QEMUFile *f = migrate_get_current()->to_dst_file; 1136 trace_migrate_fd_cancel(); 1137 1138 if (s->rp_state.from_dst_file) { 1139 /* shutdown the rp socket, so causing the rp thread to shutdown */ 1140 qemu_file_shutdown(s->rp_state.from_dst_file); 1141 } 1142 1143 do { 1144 old_state = s->state; 1145 if (!migration_is_setup_or_active(old_state)) { 1146 break; 1147 } 1148 /* If the migration is paused, kick it out of the pause */ 1149 if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) { 1150 qemu_sem_post(&s->pause_sem); 1151 } 1152 migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING); 1153 } while (s->state != MIGRATION_STATUS_CANCELLING); 1154 1155 /* 1156 * If we're unlucky the migration code might be stuck somewhere in a 1157 * send/write while the network has failed and is waiting to timeout; 1158 * if we've got shutdown(2) available then we can force it to quit. 1159 * The outgoing qemu file gets closed in migrate_fd_cleanup that is 1160 * called in a bh, so there is no race against this cancel. 1161 */ 1162 if (s->state == MIGRATION_STATUS_CANCELLING && f) { 1163 qemu_file_shutdown(f); 1164 } 1165 if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) { 1166 Error *local_err = NULL; 1167 1168 bdrv_invalidate_cache_all(&local_err); 1169 if (local_err) { 1170 error_report_err(local_err); 1171 } else { 1172 s->block_inactive = false; 1173 } 1174 } 1175 } 1176 1177 void add_migration_state_change_notifier(Notifier *notify) 1178 { 1179 notifier_list_add(&migration_state_notifiers, notify); 1180 } 1181 1182 void remove_migration_state_change_notifier(Notifier *notify) 1183 { 1184 notifier_remove(notify); 1185 } 1186 1187 bool migration_in_setup(MigrationState *s) 1188 { 1189 return s->state == MIGRATION_STATUS_SETUP; 1190 } 1191 1192 bool migration_has_finished(MigrationState *s) 1193 { 1194 return s->state == MIGRATION_STATUS_COMPLETED; 1195 } 1196 1197 bool migration_has_failed(MigrationState *s) 1198 { 1199 return (s->state == MIGRATION_STATUS_CANCELLED || 1200 s->state == MIGRATION_STATUS_FAILED); 1201 } 1202 1203 bool migration_in_postcopy(void) 1204 { 1205 MigrationState *s = migrate_get_current(); 1206 1207 return (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); 1208 } 1209 1210 bool migration_in_postcopy_after_devices(MigrationState *s) 1211 { 1212 return migration_in_postcopy() && s->postcopy_after_devices; 1213 } 1214 1215 bool migration_is_idle(void) 1216 { 1217 MigrationState *s = migrate_get_current(); 1218 1219 switch (s->state) { 1220 case MIGRATION_STATUS_NONE: 1221 case MIGRATION_STATUS_CANCELLED: 1222 case MIGRATION_STATUS_COMPLETED: 1223 case MIGRATION_STATUS_FAILED: 1224 return true; 1225 case MIGRATION_STATUS_SETUP: 1226 case MIGRATION_STATUS_CANCELLING: 1227 case MIGRATION_STATUS_ACTIVE: 1228 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1229 case MIGRATION_STATUS_COLO: 1230 case MIGRATION_STATUS_PRE_SWITCHOVER: 1231 case MIGRATION_STATUS_DEVICE: 1232 return false; 1233 case MIGRATION_STATUS__MAX: 1234 g_assert_not_reached(); 1235 } 1236 1237 return false; 1238 } 1239 1240 MigrationState *migrate_init(void) 1241 { 1242 MigrationState *s = migrate_get_current(); 1243 1244 /* 1245 * Reinitialise all migration state, except 1246 * parameters/capabilities that the user set, and 1247 * locks. 1248 */ 1249 s->bytes_xfer = 0; 1250 s->xfer_limit = 0; 1251 s->cleanup_bh = 0; 1252 s->to_dst_file = NULL; 1253 s->state = MIGRATION_STATUS_NONE; 1254 s->rp_state.from_dst_file = NULL; 1255 s->rp_state.error = false; 1256 s->mbps = 0.0; 1257 s->downtime = 0; 1258 s->expected_downtime = 0; 1259 s->setup_time = 0; 1260 s->start_postcopy = false; 1261 s->postcopy_after_devices = false; 1262 s->migration_thread_running = false; 1263 error_free(s->error); 1264 s->error = NULL; 1265 1266 migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); 1267 1268 s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 1269 s->total_time = 0; 1270 s->vm_was_running = false; 1271 s->iteration_initial_bytes = 0; 1272 s->threshold_size = 0; 1273 return s; 1274 } 1275 1276 static GSList *migration_blockers; 1277 1278 int migrate_add_blocker(Error *reason, Error **errp) 1279 { 1280 if (migrate_get_current()->only_migratable) { 1281 error_propagate(errp, error_copy(reason)); 1282 error_prepend(errp, "disallowing migration blocker " 1283 "(--only_migratable) for: "); 1284 return -EACCES; 1285 } 1286 1287 if (migration_is_idle()) { 1288 migration_blockers = g_slist_prepend(migration_blockers, reason); 1289 return 0; 1290 } 1291 1292 error_propagate(errp, error_copy(reason)); 1293 error_prepend(errp, "disallowing migration blocker (migration in " 1294 "progress) for: "); 1295 return -EBUSY; 1296 } 1297 1298 void migrate_del_blocker(Error *reason) 1299 { 1300 migration_blockers = g_slist_remove(migration_blockers, reason); 1301 } 1302 1303 void qmp_migrate_incoming(const char *uri, Error **errp) 1304 { 1305 Error *local_err = NULL; 1306 static bool once = true; 1307 1308 if (!deferred_incoming) { 1309 error_setg(errp, "For use with '-incoming defer'"); 1310 return; 1311 } 1312 if (!once) { 1313 error_setg(errp, "The incoming migration has already been started"); 1314 } 1315 1316 qemu_start_incoming_migration(uri, &local_err); 1317 1318 if (local_err) { 1319 error_propagate(errp, local_err); 1320 return; 1321 } 1322 1323 once = false; 1324 } 1325 1326 bool migration_is_blocked(Error **errp) 1327 { 1328 if (qemu_savevm_state_blocked(errp)) { 1329 return true; 1330 } 1331 1332 if (migration_blockers) { 1333 error_propagate(errp, error_copy(migration_blockers->data)); 1334 return true; 1335 } 1336 1337 return false; 1338 } 1339 1340 void qmp_migrate(const char *uri, bool has_blk, bool blk, 1341 bool has_inc, bool inc, bool has_detach, bool detach, 1342 Error **errp) 1343 { 1344 Error *local_err = NULL; 1345 MigrationState *s = migrate_get_current(); 1346 const char *p; 1347 1348 if (migration_is_setup_or_active(s->state) || 1349 s->state == MIGRATION_STATUS_CANCELLING || 1350 s->state == MIGRATION_STATUS_COLO) { 1351 error_setg(errp, QERR_MIGRATION_ACTIVE); 1352 return; 1353 } 1354 if (runstate_check(RUN_STATE_INMIGRATE)) { 1355 error_setg(errp, "Guest is waiting for an incoming migration"); 1356 return; 1357 } 1358 1359 if (migration_is_blocked(errp)) { 1360 return; 1361 } 1362 1363 if ((has_blk && blk) || (has_inc && inc)) { 1364 if (migrate_use_block() || migrate_use_block_incremental()) { 1365 error_setg(errp, "Command options are incompatible with " 1366 "current migration capabilities"); 1367 return; 1368 } 1369 migrate_set_block_enabled(true, &local_err); 1370 if (local_err) { 1371 error_propagate(errp, local_err); 1372 return; 1373 } 1374 s->must_remove_block_options = true; 1375 } 1376 1377 if (has_inc && inc) { 1378 migrate_set_block_incremental(s, true); 1379 } 1380 1381 s = migrate_init(); 1382 1383 if (strstart(uri, "tcp:", &p)) { 1384 tcp_start_outgoing_migration(s, p, &local_err); 1385 #ifdef CONFIG_RDMA 1386 } else if (strstart(uri, "rdma:", &p)) { 1387 rdma_start_outgoing_migration(s, p, &local_err); 1388 #endif 1389 } else if (strstart(uri, "exec:", &p)) { 1390 exec_start_outgoing_migration(s, p, &local_err); 1391 } else if (strstart(uri, "unix:", &p)) { 1392 unix_start_outgoing_migration(s, p, &local_err); 1393 } else if (strstart(uri, "fd:", &p)) { 1394 fd_start_outgoing_migration(s, p, &local_err); 1395 } else { 1396 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri", 1397 "a valid migration protocol"); 1398 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 1399 MIGRATION_STATUS_FAILED); 1400 return; 1401 } 1402 1403 if (local_err) { 1404 migrate_fd_error(s, local_err); 1405 error_propagate(errp, local_err); 1406 return; 1407 } 1408 } 1409 1410 void qmp_migrate_cancel(Error **errp) 1411 { 1412 migrate_fd_cancel(migrate_get_current()); 1413 } 1414 1415 void qmp_migrate_continue(MigrationStatus state, Error **errp) 1416 { 1417 MigrationState *s = migrate_get_current(); 1418 if (s->state != state) { 1419 error_setg(errp, "Migration not in expected state: %s", 1420 MigrationStatus_str(s->state)); 1421 return; 1422 } 1423 qemu_sem_post(&s->pause_sem); 1424 } 1425 1426 void qmp_migrate_set_cache_size(int64_t value, Error **errp) 1427 { 1428 MigrateSetParameters p = { 1429 .has_xbzrle_cache_size = true, 1430 .xbzrle_cache_size = value, 1431 }; 1432 1433 qmp_migrate_set_parameters(&p, errp); 1434 } 1435 1436 int64_t qmp_query_migrate_cache_size(Error **errp) 1437 { 1438 return migrate_xbzrle_cache_size(); 1439 } 1440 1441 void qmp_migrate_set_speed(int64_t value, Error **errp) 1442 { 1443 MigrateSetParameters p = { 1444 .has_max_bandwidth = true, 1445 .max_bandwidth = value, 1446 }; 1447 1448 qmp_migrate_set_parameters(&p, errp); 1449 } 1450 1451 void qmp_migrate_set_downtime(double value, Error **errp) 1452 { 1453 if (value < 0 || value > MAX_MIGRATE_DOWNTIME_SECONDS) { 1454 error_setg(errp, "Parameter 'downtime_limit' expects an integer in " 1455 "the range of 0 to %d seconds", 1456 MAX_MIGRATE_DOWNTIME_SECONDS); 1457 return; 1458 } 1459 1460 value *= 1000; /* Convert to milliseconds */ 1461 value = MAX(0, MIN(INT64_MAX, value)); 1462 1463 MigrateSetParameters p = { 1464 .has_downtime_limit = true, 1465 .downtime_limit = value, 1466 }; 1467 1468 qmp_migrate_set_parameters(&p, errp); 1469 } 1470 1471 bool migrate_release_ram(void) 1472 { 1473 MigrationState *s; 1474 1475 s = migrate_get_current(); 1476 1477 return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; 1478 } 1479 1480 bool migrate_postcopy_ram(void) 1481 { 1482 MigrationState *s; 1483 1484 s = migrate_get_current(); 1485 1486 return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; 1487 } 1488 1489 bool migrate_postcopy(void) 1490 { 1491 return migrate_postcopy_ram(); 1492 } 1493 1494 bool migrate_auto_converge(void) 1495 { 1496 MigrationState *s; 1497 1498 s = migrate_get_current(); 1499 1500 return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; 1501 } 1502 1503 bool migrate_zero_blocks(void) 1504 { 1505 MigrationState *s; 1506 1507 s = migrate_get_current(); 1508 1509 return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; 1510 } 1511 1512 bool migrate_use_compression(void) 1513 { 1514 MigrationState *s; 1515 1516 s = migrate_get_current(); 1517 1518 return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS]; 1519 } 1520 1521 int migrate_compress_level(void) 1522 { 1523 MigrationState *s; 1524 1525 s = migrate_get_current(); 1526 1527 return s->parameters.compress_level; 1528 } 1529 1530 int migrate_compress_threads(void) 1531 { 1532 MigrationState *s; 1533 1534 s = migrate_get_current(); 1535 1536 return s->parameters.compress_threads; 1537 } 1538 1539 int migrate_decompress_threads(void) 1540 { 1541 MigrationState *s; 1542 1543 s = migrate_get_current(); 1544 1545 return s->parameters.decompress_threads; 1546 } 1547 1548 bool migrate_use_events(void) 1549 { 1550 MigrationState *s; 1551 1552 s = migrate_get_current(); 1553 1554 return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS]; 1555 } 1556 1557 bool migrate_use_multifd(void) 1558 { 1559 MigrationState *s; 1560 1561 s = migrate_get_current(); 1562 1563 return s->enabled_capabilities[MIGRATION_CAPABILITY_X_MULTIFD]; 1564 } 1565 1566 bool migrate_pause_before_switchover(void) 1567 { 1568 MigrationState *s; 1569 1570 s = migrate_get_current(); 1571 1572 return s->enabled_capabilities[ 1573 MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; 1574 } 1575 1576 int migrate_multifd_channels(void) 1577 { 1578 MigrationState *s; 1579 1580 s = migrate_get_current(); 1581 1582 return s->parameters.x_multifd_channels; 1583 } 1584 1585 int migrate_multifd_page_count(void) 1586 { 1587 MigrationState *s; 1588 1589 s = migrate_get_current(); 1590 1591 return s->parameters.x_multifd_page_count; 1592 } 1593 1594 int migrate_use_xbzrle(void) 1595 { 1596 MigrationState *s; 1597 1598 s = migrate_get_current(); 1599 1600 return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE]; 1601 } 1602 1603 int64_t migrate_xbzrle_cache_size(void) 1604 { 1605 MigrationState *s; 1606 1607 s = migrate_get_current(); 1608 1609 return s->parameters.xbzrle_cache_size; 1610 } 1611 1612 bool migrate_use_block(void) 1613 { 1614 MigrationState *s; 1615 1616 s = migrate_get_current(); 1617 1618 return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK]; 1619 } 1620 1621 bool migrate_use_return_path(void) 1622 { 1623 MigrationState *s; 1624 1625 s = migrate_get_current(); 1626 1627 return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; 1628 } 1629 1630 bool migrate_use_block_incremental(void) 1631 { 1632 MigrationState *s; 1633 1634 s = migrate_get_current(); 1635 1636 return s->parameters.block_incremental; 1637 } 1638 1639 /* migration thread support */ 1640 /* 1641 * Something bad happened to the RP stream, mark an error 1642 * The caller shall print or trace something to indicate why 1643 */ 1644 static void mark_source_rp_bad(MigrationState *s) 1645 { 1646 s->rp_state.error = true; 1647 } 1648 1649 static struct rp_cmd_args { 1650 ssize_t len; /* -1 = variable */ 1651 const char *name; 1652 } rp_cmd_args[] = { 1653 [MIG_RP_MSG_INVALID] = { .len = -1, .name = "INVALID" }, 1654 [MIG_RP_MSG_SHUT] = { .len = 4, .name = "SHUT" }, 1655 [MIG_RP_MSG_PONG] = { .len = 4, .name = "PONG" }, 1656 [MIG_RP_MSG_REQ_PAGES] = { .len = 12, .name = "REQ_PAGES" }, 1657 [MIG_RP_MSG_REQ_PAGES_ID] = { .len = -1, .name = "REQ_PAGES_ID" }, 1658 [MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" }, 1659 }; 1660 1661 /* 1662 * Process a request for pages received on the return path, 1663 * We're allowed to send more than requested (e.g. to round to our page size) 1664 * and we don't need to send pages that have already been sent. 1665 */ 1666 static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname, 1667 ram_addr_t start, size_t len) 1668 { 1669 long our_host_ps = getpagesize(); 1670 1671 trace_migrate_handle_rp_req_pages(rbname, start, len); 1672 1673 /* 1674 * Since we currently insist on matching page sizes, just sanity check 1675 * we're being asked for whole host pages. 1676 */ 1677 if (start & (our_host_ps-1) || 1678 (len & (our_host_ps-1))) { 1679 error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT 1680 " len: %zd", __func__, start, len); 1681 mark_source_rp_bad(ms); 1682 return; 1683 } 1684 1685 if (ram_save_queue_pages(rbname, start, len)) { 1686 mark_source_rp_bad(ms); 1687 } 1688 } 1689 1690 /* 1691 * Handles messages sent on the return path towards the source VM 1692 * 1693 */ 1694 static void *source_return_path_thread(void *opaque) 1695 { 1696 MigrationState *ms = opaque; 1697 QEMUFile *rp = ms->rp_state.from_dst_file; 1698 uint16_t header_len, header_type; 1699 uint8_t buf[512]; 1700 uint32_t tmp32, sibling_error; 1701 ram_addr_t start = 0; /* =0 to silence warning */ 1702 size_t len = 0, expected_len; 1703 int res; 1704 1705 trace_source_return_path_thread_entry(); 1706 while (!ms->rp_state.error && !qemu_file_get_error(rp) && 1707 migration_is_setup_or_active(ms->state)) { 1708 trace_source_return_path_thread_loop_top(); 1709 header_type = qemu_get_be16(rp); 1710 header_len = qemu_get_be16(rp); 1711 1712 if (header_type >= MIG_RP_MSG_MAX || 1713 header_type == MIG_RP_MSG_INVALID) { 1714 error_report("RP: Received invalid message 0x%04x length 0x%04x", 1715 header_type, header_len); 1716 mark_source_rp_bad(ms); 1717 goto out; 1718 } 1719 1720 if ((rp_cmd_args[header_type].len != -1 && 1721 header_len != rp_cmd_args[header_type].len) || 1722 header_len > sizeof(buf)) { 1723 error_report("RP: Received '%s' message (0x%04x) with" 1724 "incorrect length %d expecting %zu", 1725 rp_cmd_args[header_type].name, header_type, header_len, 1726 (size_t)rp_cmd_args[header_type].len); 1727 mark_source_rp_bad(ms); 1728 goto out; 1729 } 1730 1731 /* We know we've got a valid header by this point */ 1732 res = qemu_get_buffer(rp, buf, header_len); 1733 if (res != header_len) { 1734 error_report("RP: Failed reading data for message 0x%04x" 1735 " read %d expected %d", 1736 header_type, res, header_len); 1737 mark_source_rp_bad(ms); 1738 goto out; 1739 } 1740 1741 /* OK, we have the message and the data */ 1742 switch (header_type) { 1743 case MIG_RP_MSG_SHUT: 1744 sibling_error = ldl_be_p(buf); 1745 trace_source_return_path_thread_shut(sibling_error); 1746 if (sibling_error) { 1747 error_report("RP: Sibling indicated error %d", sibling_error); 1748 mark_source_rp_bad(ms); 1749 } 1750 /* 1751 * We'll let the main thread deal with closing the RP 1752 * we could do a shutdown(2) on it, but we're the only user 1753 * anyway, so there's nothing gained. 1754 */ 1755 goto out; 1756 1757 case MIG_RP_MSG_PONG: 1758 tmp32 = ldl_be_p(buf); 1759 trace_source_return_path_thread_pong(tmp32); 1760 break; 1761 1762 case MIG_RP_MSG_REQ_PAGES: 1763 start = ldq_be_p(buf); 1764 len = ldl_be_p(buf + 8); 1765 migrate_handle_rp_req_pages(ms, NULL, start, len); 1766 break; 1767 1768 case MIG_RP_MSG_REQ_PAGES_ID: 1769 expected_len = 12 + 1; /* header + termination */ 1770 1771 if (header_len >= expected_len) { 1772 start = ldq_be_p(buf); 1773 len = ldl_be_p(buf + 8); 1774 /* Now we expect an idstr */ 1775 tmp32 = buf[12]; /* Length of the following idstr */ 1776 buf[13 + tmp32] = '\0'; 1777 expected_len += tmp32; 1778 } 1779 if (header_len != expected_len) { 1780 error_report("RP: Req_Page_id with length %d expecting %zd", 1781 header_len, expected_len); 1782 mark_source_rp_bad(ms); 1783 goto out; 1784 } 1785 migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len); 1786 break; 1787 1788 default: 1789 break; 1790 } 1791 } 1792 if (qemu_file_get_error(rp)) { 1793 trace_source_return_path_thread_bad_end(); 1794 mark_source_rp_bad(ms); 1795 } 1796 1797 trace_source_return_path_thread_end(); 1798 out: 1799 ms->rp_state.from_dst_file = NULL; 1800 qemu_fclose(rp); 1801 return NULL; 1802 } 1803 1804 static int open_return_path_on_source(MigrationState *ms) 1805 { 1806 1807 ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file); 1808 if (!ms->rp_state.from_dst_file) { 1809 return -1; 1810 } 1811 1812 trace_open_return_path_on_source(); 1813 qemu_thread_create(&ms->rp_state.rp_thread, "return path", 1814 source_return_path_thread, ms, QEMU_THREAD_JOINABLE); 1815 1816 trace_open_return_path_on_source_continue(); 1817 1818 return 0; 1819 } 1820 1821 /* Returns 0 if the RP was ok, otherwise there was an error on the RP */ 1822 static int await_return_path_close_on_source(MigrationState *ms) 1823 { 1824 /* 1825 * If this is a normal exit then the destination will send a SHUT and the 1826 * rp_thread will exit, however if there's an error we need to cause 1827 * it to exit. 1828 */ 1829 if (qemu_file_get_error(ms->to_dst_file) && ms->rp_state.from_dst_file) { 1830 /* 1831 * shutdown(2), if we have it, will cause it to unblock if it's stuck 1832 * waiting for the destination. 1833 */ 1834 qemu_file_shutdown(ms->rp_state.from_dst_file); 1835 mark_source_rp_bad(ms); 1836 } 1837 trace_await_return_path_close_on_source_joining(); 1838 qemu_thread_join(&ms->rp_state.rp_thread); 1839 trace_await_return_path_close_on_source_close(); 1840 return ms->rp_state.error; 1841 } 1842 1843 /* 1844 * Switch from normal iteration to postcopy 1845 * Returns non-0 on error 1846 */ 1847 static int postcopy_start(MigrationState *ms) 1848 { 1849 int ret; 1850 QIOChannelBuffer *bioc; 1851 QEMUFile *fb; 1852 int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 1853 bool restart_block = false; 1854 int cur_state = MIGRATION_STATUS_ACTIVE; 1855 if (!migrate_pause_before_switchover()) { 1856 migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE, 1857 MIGRATION_STATUS_POSTCOPY_ACTIVE); 1858 } 1859 1860 trace_postcopy_start(); 1861 qemu_mutex_lock_iothread(); 1862 trace_postcopy_start_set_run(); 1863 1864 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); 1865 global_state_store(); 1866 ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); 1867 if (ret < 0) { 1868 goto fail; 1869 } 1870 1871 ret = migration_maybe_pause(ms, &cur_state, 1872 MIGRATION_STATUS_POSTCOPY_ACTIVE); 1873 if (ret < 0) { 1874 goto fail; 1875 } 1876 1877 ret = bdrv_inactivate_all(); 1878 if (ret < 0) { 1879 goto fail; 1880 } 1881 restart_block = true; 1882 1883 /* 1884 * Cause any non-postcopiable, but iterative devices to 1885 * send out their final data. 1886 */ 1887 qemu_savevm_state_complete_precopy(ms->to_dst_file, true, false); 1888 1889 /* 1890 * in Finish migrate and with the io-lock held everything should 1891 * be quiet, but we've potentially still got dirty pages and we 1892 * need to tell the destination to throw any pages it's already received 1893 * that are dirty 1894 */ 1895 if (migrate_postcopy_ram()) { 1896 if (ram_postcopy_send_discard_bitmap(ms)) { 1897 error_report("postcopy send discard bitmap failed"); 1898 goto fail; 1899 } 1900 } 1901 1902 /* 1903 * send rest of state - note things that are doing postcopy 1904 * will notice we're in POSTCOPY_ACTIVE and not actually 1905 * wrap their state up here 1906 */ 1907 qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX); 1908 if (migrate_postcopy_ram()) { 1909 /* Ping just for debugging, helps line traces up */ 1910 qemu_savevm_send_ping(ms->to_dst_file, 2); 1911 } 1912 1913 /* 1914 * While loading the device state we may trigger page transfer 1915 * requests and the fd must be free to process those, and thus 1916 * the destination must read the whole device state off the fd before 1917 * it starts processing it. Unfortunately the ad-hoc migration format 1918 * doesn't allow the destination to know the size to read without fully 1919 * parsing it through each devices load-state code (especially the open 1920 * coded devices that use get/put). 1921 * So we wrap the device state up in a package with a length at the start; 1922 * to do this we use a qemu_buf to hold the whole of the device state. 1923 */ 1924 bioc = qio_channel_buffer_new(4096); 1925 qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer"); 1926 fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc)); 1927 object_unref(OBJECT(bioc)); 1928 1929 /* 1930 * Make sure the receiver can get incoming pages before we send the rest 1931 * of the state 1932 */ 1933 qemu_savevm_send_postcopy_listen(fb); 1934 1935 qemu_savevm_state_complete_precopy(fb, false, false); 1936 if (migrate_postcopy_ram()) { 1937 qemu_savevm_send_ping(fb, 3); 1938 } 1939 1940 qemu_savevm_send_postcopy_run(fb); 1941 1942 /* <><> end of stuff going into the package */ 1943 1944 /* Last point of recovery; as soon as we send the package the destination 1945 * can open devices and potentially start running. 1946 * Lets just check again we've not got any errors. 1947 */ 1948 ret = qemu_file_get_error(ms->to_dst_file); 1949 if (ret) { 1950 error_report("postcopy_start: Migration stream errored (pre package)"); 1951 goto fail_closefb; 1952 } 1953 1954 restart_block = false; 1955 1956 /* Now send that blob */ 1957 if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) { 1958 goto fail_closefb; 1959 } 1960 qemu_fclose(fb); 1961 1962 /* Send a notify to give a chance for anything that needs to happen 1963 * at the transition to postcopy and after the device state; in particular 1964 * spice needs to trigger a transition now 1965 */ 1966 ms->postcopy_after_devices = true; 1967 notifier_list_notify(&migration_state_notifiers, ms); 1968 1969 ms->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop; 1970 1971 qemu_mutex_unlock_iothread(); 1972 1973 if (migrate_postcopy_ram()) { 1974 /* 1975 * Although this ping is just for debug, it could potentially be 1976 * used for getting a better measurement of downtime at the source. 1977 */ 1978 qemu_savevm_send_ping(ms->to_dst_file, 4); 1979 } 1980 1981 if (migrate_release_ram()) { 1982 ram_postcopy_migrated_memory_release(ms); 1983 } 1984 1985 ret = qemu_file_get_error(ms->to_dst_file); 1986 if (ret) { 1987 error_report("postcopy_start: Migration stream errored"); 1988 migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, 1989 MIGRATION_STATUS_FAILED); 1990 } 1991 1992 return ret; 1993 1994 fail_closefb: 1995 qemu_fclose(fb); 1996 fail: 1997 migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, 1998 MIGRATION_STATUS_FAILED); 1999 if (restart_block) { 2000 /* A failure happened early enough that we know the destination hasn't 2001 * accessed block devices, so we're safe to recover. 2002 */ 2003 Error *local_err = NULL; 2004 2005 bdrv_invalidate_cache_all(&local_err); 2006 if (local_err) { 2007 error_report_err(local_err); 2008 } 2009 } 2010 qemu_mutex_unlock_iothread(); 2011 return -1; 2012 } 2013 2014 /** 2015 * migration_maybe_pause: Pause if required to by 2016 * migrate_pause_before_switchover called with the iothread locked 2017 * Returns: 0 on success 2018 */ 2019 static int migration_maybe_pause(MigrationState *s, 2020 int *current_active_state, 2021 int new_state) 2022 { 2023 if (!migrate_pause_before_switchover()) { 2024 return 0; 2025 } 2026 2027 /* Since leaving this state is not atomic with posting the semaphore 2028 * it's possible that someone could have issued multiple migrate_continue 2029 * and the semaphore is incorrectly positive at this point; 2030 * the docs say it's undefined to reinit a semaphore that's already 2031 * init'd, so use timedwait to eat up any existing posts. 2032 */ 2033 while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) { 2034 /* This block intentionally left blank */ 2035 } 2036 2037 qemu_mutex_unlock_iothread(); 2038 migrate_set_state(&s->state, *current_active_state, 2039 MIGRATION_STATUS_PRE_SWITCHOVER); 2040 qemu_sem_wait(&s->pause_sem); 2041 migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, 2042 new_state); 2043 *current_active_state = new_state; 2044 qemu_mutex_lock_iothread(); 2045 2046 return s->state == new_state ? 0 : -EINVAL; 2047 } 2048 2049 /** 2050 * migration_completion: Used by migration_thread when there's not much left. 2051 * The caller 'breaks' the loop when this returns. 2052 * 2053 * @s: Current migration state 2054 */ 2055 static void migration_completion(MigrationState *s) 2056 { 2057 int ret; 2058 int current_active_state = s->state; 2059 2060 if (s->state == MIGRATION_STATUS_ACTIVE) { 2061 qemu_mutex_lock_iothread(); 2062 s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 2063 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); 2064 s->vm_was_running = runstate_is_running(); 2065 ret = global_state_store(); 2066 2067 if (!ret) { 2068 bool inactivate = !migrate_colo_enabled(); 2069 ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); 2070 if (ret >= 0) { 2071 ret = migration_maybe_pause(s, ¤t_active_state, 2072 MIGRATION_STATUS_DEVICE); 2073 } 2074 if (ret >= 0) { 2075 qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); 2076 ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, 2077 inactivate); 2078 } 2079 if (inactivate && ret >= 0) { 2080 s->block_inactive = true; 2081 } 2082 } 2083 qemu_mutex_unlock_iothread(); 2084 2085 if (ret < 0) { 2086 goto fail; 2087 } 2088 } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 2089 trace_migration_completion_postcopy_end(); 2090 2091 qemu_savevm_state_complete_postcopy(s->to_dst_file); 2092 trace_migration_completion_postcopy_end_after_complete(); 2093 } 2094 2095 /* 2096 * If rp was opened we must clean up the thread before 2097 * cleaning everything else up (since if there are no failures 2098 * it will wait for the destination to send it's status in 2099 * a SHUT command). 2100 */ 2101 if (s->rp_state.from_dst_file) { 2102 int rp_error; 2103 trace_migration_return_path_end_before(); 2104 rp_error = await_return_path_close_on_source(s); 2105 trace_migration_return_path_end_after(rp_error); 2106 if (rp_error) { 2107 goto fail_invalidate; 2108 } 2109 } 2110 2111 if (qemu_file_get_error(s->to_dst_file)) { 2112 trace_migration_completion_file_err(); 2113 goto fail_invalidate; 2114 } 2115 2116 if (!migrate_colo_enabled()) { 2117 migrate_set_state(&s->state, current_active_state, 2118 MIGRATION_STATUS_COMPLETED); 2119 } 2120 2121 return; 2122 2123 fail_invalidate: 2124 /* If not doing postcopy, vm_start() will be called: let's regain 2125 * control on images. 2126 */ 2127 if (s->state == MIGRATION_STATUS_ACTIVE || 2128 s->state == MIGRATION_STATUS_DEVICE) { 2129 Error *local_err = NULL; 2130 2131 qemu_mutex_lock_iothread(); 2132 bdrv_invalidate_cache_all(&local_err); 2133 if (local_err) { 2134 error_report_err(local_err); 2135 } else { 2136 s->block_inactive = false; 2137 } 2138 qemu_mutex_unlock_iothread(); 2139 } 2140 2141 fail: 2142 migrate_set_state(&s->state, current_active_state, 2143 MIGRATION_STATUS_FAILED); 2144 } 2145 2146 bool migrate_colo_enabled(void) 2147 { 2148 MigrationState *s = migrate_get_current(); 2149 return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO]; 2150 } 2151 2152 static void migration_calculate_complete(MigrationState *s) 2153 { 2154 uint64_t bytes = qemu_ftell(s->to_dst_file); 2155 int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 2156 2157 s->total_time = end_time - s->start_time; 2158 if (!s->downtime) { 2159 /* 2160 * It's still not set, so we are precopy migration. For 2161 * postcopy, downtime is calculated during postcopy_start(). 2162 */ 2163 s->downtime = end_time - s->downtime_start; 2164 } 2165 2166 if (s->total_time) { 2167 s->mbps = ((double) bytes * 8.0) / s->total_time / 1000; 2168 } 2169 } 2170 2171 static void migration_update_counters(MigrationState *s, 2172 int64_t current_time) 2173 { 2174 uint64_t transferred, time_spent; 2175 double bandwidth; 2176 2177 if (current_time < s->iteration_start_time + BUFFER_DELAY) { 2178 return; 2179 } 2180 2181 transferred = qemu_ftell(s->to_dst_file) - s->iteration_initial_bytes; 2182 time_spent = current_time - s->iteration_start_time; 2183 bandwidth = (double)transferred / time_spent; 2184 s->threshold_size = bandwidth * s->parameters.downtime_limit; 2185 2186 s->mbps = (((double) transferred * 8.0) / 2187 ((double) time_spent / 1000.0)) / 1000.0 / 1000.0; 2188 2189 /* 2190 * if we haven't sent anything, we don't want to 2191 * recalculate. 10000 is a small enough number for our purposes 2192 */ 2193 if (ram_counters.dirty_pages_rate && transferred > 10000) { 2194 s->expected_downtime = ram_counters.dirty_pages_rate * 2195 qemu_target_page_size() / bandwidth; 2196 } 2197 2198 qemu_file_reset_rate_limit(s->to_dst_file); 2199 2200 s->iteration_start_time = current_time; 2201 s->iteration_initial_bytes = qemu_ftell(s->to_dst_file); 2202 2203 trace_migrate_transferred(transferred, time_spent, 2204 bandwidth, s->threshold_size); 2205 } 2206 2207 /* Migration thread iteration status */ 2208 typedef enum { 2209 MIG_ITERATE_RESUME, /* Resume current iteration */ 2210 MIG_ITERATE_SKIP, /* Skip current iteration */ 2211 MIG_ITERATE_BREAK, /* Break the loop */ 2212 } MigIterateState; 2213 2214 /* 2215 * Return true if continue to the next iteration directly, false 2216 * otherwise. 2217 */ 2218 static MigIterateState migration_iteration_run(MigrationState *s) 2219 { 2220 uint64_t pending_size, pend_post, pend_nonpost; 2221 bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE; 2222 2223 qemu_savevm_state_pending(s->to_dst_file, s->threshold_size, 2224 &pend_nonpost, &pend_post); 2225 pending_size = pend_nonpost + pend_post; 2226 2227 trace_migrate_pending(pending_size, s->threshold_size, 2228 pend_post, pend_nonpost); 2229 2230 if (pending_size && pending_size >= s->threshold_size) { 2231 /* Still a significant amount to transfer */ 2232 if (migrate_postcopy() && !in_postcopy && 2233 pend_nonpost <= s->threshold_size && 2234 atomic_read(&s->start_postcopy)) { 2235 if (postcopy_start(s)) { 2236 error_report("%s: postcopy failed to start", __func__); 2237 } 2238 return MIG_ITERATE_SKIP; 2239 } 2240 /* Just another iteration step */ 2241 qemu_savevm_state_iterate(s->to_dst_file, 2242 s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); 2243 } else { 2244 trace_migration_thread_low_pending(pending_size); 2245 migration_completion(s); 2246 return MIG_ITERATE_BREAK; 2247 } 2248 2249 return MIG_ITERATE_RESUME; 2250 } 2251 2252 static void migration_iteration_finish(MigrationState *s) 2253 { 2254 /* If we enabled cpu throttling for auto-converge, turn it off. */ 2255 cpu_throttle_stop(); 2256 2257 qemu_mutex_lock_iothread(); 2258 switch (s->state) { 2259 case MIGRATION_STATUS_COMPLETED: 2260 migration_calculate_complete(s); 2261 runstate_set(RUN_STATE_POSTMIGRATE); 2262 break; 2263 2264 case MIGRATION_STATUS_ACTIVE: 2265 /* 2266 * We should really assert here, but since it's during 2267 * migration, let's try to reduce the usage of assertions. 2268 */ 2269 if (!migrate_colo_enabled()) { 2270 error_report("%s: critical error: calling COLO code without " 2271 "COLO enabled", __func__); 2272 } 2273 migrate_start_colo_process(s); 2274 /* 2275 * Fixme: we will run VM in COLO no matter its old running state. 2276 * After exited COLO, we will keep running. 2277 */ 2278 s->vm_was_running = true; 2279 /* Fallthrough */ 2280 case MIGRATION_STATUS_FAILED: 2281 case MIGRATION_STATUS_CANCELLED: 2282 if (s->vm_was_running) { 2283 vm_start(); 2284 } else { 2285 if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { 2286 runstate_set(RUN_STATE_POSTMIGRATE); 2287 } 2288 } 2289 break; 2290 2291 default: 2292 /* Should not reach here, but if so, forgive the VM. */ 2293 error_report("%s: Unknown ending state %d", __func__, s->state); 2294 break; 2295 } 2296 qemu_bh_schedule(s->cleanup_bh); 2297 qemu_mutex_unlock_iothread(); 2298 } 2299 2300 /* 2301 * Master migration thread on the source VM. 2302 * It drives the migration and pumps the data down the outgoing channel. 2303 */ 2304 static void *migration_thread(void *opaque) 2305 { 2306 MigrationState *s = opaque; 2307 int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST); 2308 2309 rcu_register_thread(); 2310 2311 s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 2312 2313 qemu_savevm_state_header(s->to_dst_file); 2314 2315 /* 2316 * If we opened the return path, we need to make sure dst has it 2317 * opened as well. 2318 */ 2319 if (s->rp_state.from_dst_file) { 2320 /* Now tell the dest that it should open its end so it can reply */ 2321 qemu_savevm_send_open_return_path(s->to_dst_file); 2322 2323 /* And do a ping that will make stuff easier to debug */ 2324 qemu_savevm_send_ping(s->to_dst_file, 1); 2325 } 2326 2327 if (migrate_postcopy()) { 2328 /* 2329 * Tell the destination that we *might* want to do postcopy later; 2330 * if the other end can't do postcopy it should fail now, nice and 2331 * early. 2332 */ 2333 qemu_savevm_send_postcopy_advise(s->to_dst_file); 2334 } 2335 2336 qemu_savevm_state_setup(s->to_dst_file); 2337 2338 s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; 2339 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 2340 MIGRATION_STATUS_ACTIVE); 2341 2342 trace_migration_thread_setup_complete(); 2343 2344 while (s->state == MIGRATION_STATUS_ACTIVE || 2345 s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 2346 int64_t current_time; 2347 2348 if (!qemu_file_rate_limit(s->to_dst_file)) { 2349 MigIterateState iter_state = migration_iteration_run(s); 2350 if (iter_state == MIG_ITERATE_SKIP) { 2351 continue; 2352 } else if (iter_state == MIG_ITERATE_BREAK) { 2353 break; 2354 } 2355 } 2356 2357 if (qemu_file_get_error(s->to_dst_file)) { 2358 if (migration_is_setup_or_active(s->state)) { 2359 migrate_set_state(&s->state, s->state, 2360 MIGRATION_STATUS_FAILED); 2361 } 2362 trace_migration_thread_file_err(); 2363 break; 2364 } 2365 2366 current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 2367 2368 migration_update_counters(s, current_time); 2369 2370 if (qemu_file_rate_limit(s->to_dst_file)) { 2371 /* usleep expects microseconds */ 2372 g_usleep((s->iteration_start_time + BUFFER_DELAY - 2373 current_time) * 1000); 2374 } 2375 } 2376 2377 trace_migration_thread_after_loop(); 2378 migration_iteration_finish(s); 2379 rcu_unregister_thread(); 2380 return NULL; 2381 } 2382 2383 void migrate_fd_connect(MigrationState *s, Error *error_in) 2384 { 2385 s->expected_downtime = s->parameters.downtime_limit; 2386 s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s); 2387 if (error_in) { 2388 migrate_fd_error(s, error_in); 2389 migrate_fd_cleanup(s); 2390 return; 2391 } 2392 2393 qemu_file_set_blocking(s->to_dst_file, true); 2394 qemu_file_set_rate_limit(s->to_dst_file, 2395 s->parameters.max_bandwidth / XFER_LIMIT_RATIO); 2396 2397 /* Notify before starting migration thread */ 2398 notifier_list_notify(&migration_state_notifiers, s); 2399 2400 /* 2401 * Open the return path. For postcopy, it is used exclusively. For 2402 * precopy, only if user specified "return-path" capability would 2403 * QEMU uses the return path. 2404 */ 2405 if (migrate_postcopy_ram() || migrate_use_return_path()) { 2406 if (open_return_path_on_source(s)) { 2407 error_report("Unable to open return-path for postcopy"); 2408 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 2409 MIGRATION_STATUS_FAILED); 2410 migrate_fd_cleanup(s); 2411 return; 2412 } 2413 } 2414 2415 if (multifd_save_setup() != 0) { 2416 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 2417 MIGRATION_STATUS_FAILED); 2418 migrate_fd_cleanup(s); 2419 return; 2420 } 2421 qemu_thread_create(&s->thread, "live_migration", migration_thread, s, 2422 QEMU_THREAD_JOINABLE); 2423 s->migration_thread_running = true; 2424 } 2425 2426 void migration_global_dump(Monitor *mon) 2427 { 2428 MigrationState *ms = migrate_get_current(); 2429 2430 monitor_printf(mon, "globals:\n"); 2431 monitor_printf(mon, "store-global-state: %s\n", 2432 ms->store_global_state ? "on" : "off"); 2433 monitor_printf(mon, "only-migratable: %s\n", 2434 ms->only_migratable ? "on" : "off"); 2435 monitor_printf(mon, "send-configuration: %s\n", 2436 ms->send_configuration ? "on" : "off"); 2437 monitor_printf(mon, "send-section-footer: %s\n", 2438 ms->send_section_footer ? "on" : "off"); 2439 } 2440 2441 #define DEFINE_PROP_MIG_CAP(name, x) \ 2442 DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false) 2443 2444 static Property migration_properties[] = { 2445 DEFINE_PROP_BOOL("store-global-state", MigrationState, 2446 store_global_state, true), 2447 DEFINE_PROP_BOOL("only-migratable", MigrationState, only_migratable, false), 2448 DEFINE_PROP_BOOL("send-configuration", MigrationState, 2449 send_configuration, true), 2450 DEFINE_PROP_BOOL("send-section-footer", MigrationState, 2451 send_section_footer, true), 2452 2453 /* Migration parameters */ 2454 DEFINE_PROP_UINT8("x-compress-level", MigrationState, 2455 parameters.compress_level, 2456 DEFAULT_MIGRATE_COMPRESS_LEVEL), 2457 DEFINE_PROP_UINT8("x-compress-threads", MigrationState, 2458 parameters.compress_threads, 2459 DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT), 2460 DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, 2461 parameters.decompress_threads, 2462 DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), 2463 DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState, 2464 parameters.cpu_throttle_initial, 2465 DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), 2466 DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState, 2467 parameters.cpu_throttle_increment, 2468 DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), 2469 DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState, 2470 parameters.max_bandwidth, MAX_THROTTLE), 2471 DEFINE_PROP_UINT64("x-downtime-limit", MigrationState, 2472 parameters.downtime_limit, 2473 DEFAULT_MIGRATE_SET_DOWNTIME), 2474 DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState, 2475 parameters.x_checkpoint_delay, 2476 DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), 2477 DEFINE_PROP_UINT8("x-multifd-channels", MigrationState, 2478 parameters.x_multifd_channels, 2479 DEFAULT_MIGRATE_MULTIFD_CHANNELS), 2480 DEFINE_PROP_UINT32("x-multifd-page-count", MigrationState, 2481 parameters.x_multifd_page_count, 2482 DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT), 2483 DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, 2484 parameters.xbzrle_cache_size, 2485 DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), 2486 2487 /* Migration capabilities */ 2488 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), 2489 DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL), 2490 DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE), 2491 DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS), 2492 DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS), 2493 DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS), 2494 DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM), 2495 DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO), 2496 DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM), 2497 DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), 2498 DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), 2499 DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_X_MULTIFD), 2500 2501 DEFINE_PROP_END_OF_LIST(), 2502 }; 2503 2504 static void migration_class_init(ObjectClass *klass, void *data) 2505 { 2506 DeviceClass *dc = DEVICE_CLASS(klass); 2507 2508 dc->user_creatable = false; 2509 dc->props = migration_properties; 2510 } 2511 2512 static void migration_instance_finalize(Object *obj) 2513 { 2514 MigrationState *ms = MIGRATION_OBJ(obj); 2515 MigrationParameters *params = &ms->parameters; 2516 2517 qemu_mutex_destroy(&ms->error_mutex); 2518 g_free(params->tls_hostname); 2519 g_free(params->tls_creds); 2520 qemu_sem_destroy(&ms->pause_sem); 2521 } 2522 2523 static void migration_instance_init(Object *obj) 2524 { 2525 MigrationState *ms = MIGRATION_OBJ(obj); 2526 MigrationParameters *params = &ms->parameters; 2527 2528 ms->state = MIGRATION_STATUS_NONE; 2529 ms->mbps = -1; 2530 qemu_sem_init(&ms->pause_sem, 0); 2531 qemu_mutex_init(&ms->error_mutex); 2532 2533 params->tls_hostname = g_strdup(""); 2534 params->tls_creds = g_strdup(""); 2535 2536 /* Set has_* up only for parameter checks */ 2537 params->has_compress_level = true; 2538 params->has_compress_threads = true; 2539 params->has_decompress_threads = true; 2540 params->has_cpu_throttle_initial = true; 2541 params->has_cpu_throttle_increment = true; 2542 params->has_max_bandwidth = true; 2543 params->has_downtime_limit = true; 2544 params->has_x_checkpoint_delay = true; 2545 params->has_block_incremental = true; 2546 params->has_x_multifd_channels = true; 2547 params->has_x_multifd_page_count = true; 2548 params->has_xbzrle_cache_size = true; 2549 } 2550 2551 /* 2552 * Return true if check pass, false otherwise. Error will be put 2553 * inside errp if provided. 2554 */ 2555 static bool migration_object_check(MigrationState *ms, Error **errp) 2556 { 2557 MigrationCapabilityStatusList *head = NULL; 2558 /* Assuming all off */ 2559 bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret; 2560 int i; 2561 2562 if (!migrate_params_check(&ms->parameters, errp)) { 2563 return false; 2564 } 2565 2566 for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { 2567 if (ms->enabled_capabilities[i]) { 2568 head = migrate_cap_add(head, i, true); 2569 } 2570 } 2571 2572 ret = migrate_caps_check(cap_list, head, errp); 2573 2574 /* It works with head == NULL */ 2575 qapi_free_MigrationCapabilityStatusList(head); 2576 2577 return ret; 2578 } 2579 2580 static const TypeInfo migration_type = { 2581 .name = TYPE_MIGRATION, 2582 /* 2583 * NOTE: TYPE_MIGRATION is not really a device, as the object is 2584 * not created using qdev_create(), it is not attached to the qdev 2585 * device tree, and it is never realized. 2586 * 2587 * TODO: Make this TYPE_OBJECT once QOM provides something like 2588 * TYPE_DEVICE's "-global" properties. 2589 */ 2590 .parent = TYPE_DEVICE, 2591 .class_init = migration_class_init, 2592 .class_size = sizeof(MigrationClass), 2593 .instance_size = sizeof(MigrationState), 2594 .instance_init = migration_instance_init, 2595 .instance_finalize = migration_instance_finalize, 2596 }; 2597 2598 static void register_migration_types(void) 2599 { 2600 type_register_static(&migration_type); 2601 } 2602 2603 type_init(register_migration_types); 2604