1 /* 2 * QEMU live migration 3 * 4 * Copyright IBM, Corp. 2008 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 * Contributions after 2012-01-13 are licensed under the terms of the 13 * GNU GPL, version 2 or (at your option) any later version. 14 */ 15 16 #include "qemu/osdep.h" 17 #include "qemu/cutils.h" 18 #include "qemu/error-report.h" 19 #include "migration/blocker.h" 20 #include "exec.h" 21 #include "fd.h" 22 #include "socket.h" 23 #include "rdma.h" 24 #include "ram.h" 25 #include "migration/global_state.h" 26 #include "migration/misc.h" 27 #include "migration.h" 28 #include "savevm.h" 29 #include "qemu-file-channel.h" 30 #include "qemu-file.h" 31 #include "migration/vmstate.h" 32 #include "block/block.h" 33 #include "qapi/qmp/qerror.h" 34 #include "qemu/rcu.h" 35 #include "block.h" 36 #include "postcopy-ram.h" 37 #include "qemu/thread.h" 38 #include "qmp-commands.h" 39 #include "trace.h" 40 #include "qapi-event.h" 41 #include "exec/target_page.h" 42 #include "io/channel-buffer.h" 43 #include "migration/colo.h" 44 #include "hw/boards.h" 45 #include "monitor/monitor.h" 46 47 #define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */ 48 49 /* Amount of time to allocate to each "chunk" of bandwidth-throttled 50 * data. */ 51 #define BUFFER_DELAY 100 52 #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) 53 54 /* Time in milliseconds we are allowed to stop the source, 55 * for sending the last part */ 56 #define DEFAULT_MIGRATE_SET_DOWNTIME 300 57 58 /* Maximum migrate downtime set to 2000 seconds */ 59 #define MAX_MIGRATE_DOWNTIME_SECONDS 2000 60 #define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) 61 62 /* Default compression thread count */ 63 #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 64 /* Default decompression thread count, usually decompression is at 65 * least 4 times as fast as compression.*/ 66 #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 67 /*0: means nocompress, 1: best speed, ... 9: best compress ratio */ 68 #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 69 /* Define default autoconverge cpu throttle migration parameters */ 70 #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 71 #define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10 72 73 /* Migration XBZRLE default cache size */ 74 #define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024) 75 76 /* The delay time (in ms) between two COLO checkpoints 77 * Note: Please change this default value to 10000 when we support hybrid mode. 78 */ 79 #define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY 200 80 #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 81 #define DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT 16 82 83 static NotifierList migration_state_notifiers = 84 NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); 85 86 static bool deferred_incoming; 87 88 /* Messages sent on the return path from destination to source */ 89 enum mig_rp_message_type { 90 MIG_RP_MSG_INVALID = 0, /* Must be 0 */ 91 MIG_RP_MSG_SHUT, /* sibling will not send any more RP messages */ 92 MIG_RP_MSG_PONG, /* Response to a PING; data (seq: be32 ) */ 93 94 MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */ 95 MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */ 96 97 MIG_RP_MSG_MAX 98 }; 99 100 /* When we add fault tolerance, we could have several 101 migrations at once. For now we don't need to add 102 dynamic creation of migration */ 103 104 static MigrationState *current_migration; 105 106 static bool migration_object_check(MigrationState *ms, Error **errp); 107 108 void migration_object_init(void) 109 { 110 MachineState *ms = MACHINE(qdev_get_machine()); 111 Error *err = NULL; 112 113 /* This can only be called once. */ 114 assert(!current_migration); 115 current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION)); 116 117 if (!migration_object_check(current_migration, &err)) { 118 error_report_err(err); 119 exit(1); 120 } 121 122 /* 123 * We cannot really do this in migration_instance_init() since at 124 * that time global properties are not yet applied, then this 125 * value will be definitely replaced by something else. 126 */ 127 if (ms->enforce_config_section) { 128 current_migration->send_configuration = true; 129 } 130 } 131 132 /* For outgoing */ 133 MigrationState *migrate_get_current(void) 134 { 135 /* This can only be called after the object created. */ 136 assert(current_migration); 137 return current_migration; 138 } 139 140 MigrationIncomingState *migration_incoming_get_current(void) 141 { 142 static bool once; 143 static MigrationIncomingState mis_current; 144 145 if (!once) { 146 mis_current.state = MIGRATION_STATUS_NONE; 147 memset(&mis_current, 0, sizeof(MigrationIncomingState)); 148 qemu_mutex_init(&mis_current.rp_mutex); 149 qemu_event_init(&mis_current.main_thread_load_event, false); 150 once = true; 151 } 152 return &mis_current; 153 } 154 155 void migration_incoming_state_destroy(void) 156 { 157 struct MigrationIncomingState *mis = migration_incoming_get_current(); 158 159 if (mis->to_src_file) { 160 /* Tell source that we are done */ 161 migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0); 162 qemu_fclose(mis->to_src_file); 163 mis->to_src_file = NULL; 164 } 165 166 if (mis->from_src_file) { 167 qemu_fclose(mis->from_src_file); 168 mis->from_src_file = NULL; 169 } 170 171 qemu_event_reset(&mis->main_thread_load_event); 172 } 173 174 static void migrate_generate_event(int new_state) 175 { 176 if (migrate_use_events()) { 177 qapi_event_send_migration(new_state, &error_abort); 178 } 179 } 180 181 /* 182 * Called on -incoming with a defer: uri. 183 * The migration can be started later after any parameters have been 184 * changed. 185 */ 186 static void deferred_incoming_migration(Error **errp) 187 { 188 if (deferred_incoming) { 189 error_setg(errp, "Incoming migration already deferred"); 190 } 191 deferred_incoming = true; 192 } 193 194 /* 195 * Send a message on the return channel back to the source 196 * of the migration. 197 */ 198 static void migrate_send_rp_message(MigrationIncomingState *mis, 199 enum mig_rp_message_type message_type, 200 uint16_t len, void *data) 201 { 202 trace_migrate_send_rp_message((int)message_type, len); 203 qemu_mutex_lock(&mis->rp_mutex); 204 qemu_put_be16(mis->to_src_file, (unsigned int)message_type); 205 qemu_put_be16(mis->to_src_file, len); 206 qemu_put_buffer(mis->to_src_file, data, len); 207 qemu_fflush(mis->to_src_file); 208 qemu_mutex_unlock(&mis->rp_mutex); 209 } 210 211 /* Request a range of pages from the source VM at the given 212 * start address. 213 * rbname: Name of the RAMBlock to request the page in, if NULL it's the same 214 * as the last request (a name must have been given previously) 215 * Start: Address offset within the RB 216 * Len: Length in bytes required - must be a multiple of pagesize 217 */ 218 void migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rbname, 219 ram_addr_t start, size_t len) 220 { 221 uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */ 222 size_t msglen = 12; /* start + len */ 223 224 *(uint64_t *)bufc = cpu_to_be64((uint64_t)start); 225 *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len); 226 227 if (rbname) { 228 int rbname_len = strlen(rbname); 229 assert(rbname_len < 256); 230 231 bufc[msglen++] = rbname_len; 232 memcpy(bufc + msglen, rbname, rbname_len); 233 msglen += rbname_len; 234 migrate_send_rp_message(mis, MIG_RP_MSG_REQ_PAGES_ID, msglen, bufc); 235 } else { 236 migrate_send_rp_message(mis, MIG_RP_MSG_REQ_PAGES, msglen, bufc); 237 } 238 } 239 240 void qemu_start_incoming_migration(const char *uri, Error **errp) 241 { 242 const char *p; 243 244 qapi_event_send_migration(MIGRATION_STATUS_SETUP, &error_abort); 245 if (!strcmp(uri, "defer")) { 246 deferred_incoming_migration(errp); 247 } else if (strstart(uri, "tcp:", &p)) { 248 tcp_start_incoming_migration(p, errp); 249 #ifdef CONFIG_RDMA 250 } else if (strstart(uri, "rdma:", &p)) { 251 rdma_start_incoming_migration(p, errp); 252 #endif 253 } else if (strstart(uri, "exec:", &p)) { 254 exec_start_incoming_migration(p, errp); 255 } else if (strstart(uri, "unix:", &p)) { 256 unix_start_incoming_migration(p, errp); 257 } else if (strstart(uri, "fd:", &p)) { 258 fd_start_incoming_migration(p, errp); 259 } else { 260 error_setg(errp, "unknown migration protocol: %s", uri); 261 } 262 } 263 264 static void process_incoming_migration_bh(void *opaque) 265 { 266 Error *local_err = NULL; 267 MigrationIncomingState *mis = opaque; 268 269 /* Make sure all file formats flush their mutable metadata. 270 * If we get an error here, just don't restart the VM yet. */ 271 bdrv_invalidate_cache_all(&local_err); 272 if (local_err) { 273 error_report_err(local_err); 274 local_err = NULL; 275 autostart = false; 276 } 277 278 /* 279 * This must happen after all error conditions are dealt with and 280 * we're sure the VM is going to be running on this host. 281 */ 282 qemu_announce_self(); 283 284 if (multifd_load_cleanup(&local_err) != 0) { 285 error_report_err(local_err); 286 autostart = false; 287 } 288 /* If global state section was not received or we are in running 289 state, we need to obey autostart. Any other state is set with 290 runstate_set. */ 291 292 if (!global_state_received() || 293 global_state_get_runstate() == RUN_STATE_RUNNING) { 294 if (autostart) { 295 vm_start(); 296 } else { 297 runstate_set(RUN_STATE_PAUSED); 298 } 299 } else { 300 runstate_set(global_state_get_runstate()); 301 } 302 /* 303 * This must happen after any state changes since as soon as an external 304 * observer sees this event they might start to prod at the VM assuming 305 * it's ready to use. 306 */ 307 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, 308 MIGRATION_STATUS_COMPLETED); 309 qemu_bh_delete(mis->bh); 310 migration_incoming_state_destroy(); 311 } 312 313 static void process_incoming_migration_co(void *opaque) 314 { 315 MigrationIncomingState *mis = migration_incoming_get_current(); 316 PostcopyState ps; 317 int ret; 318 319 assert(mis->from_src_file); 320 mis->largest_page_size = qemu_ram_pagesize_largest(); 321 postcopy_state_set(POSTCOPY_INCOMING_NONE); 322 migrate_set_state(&mis->state, MIGRATION_STATUS_NONE, 323 MIGRATION_STATUS_ACTIVE); 324 ret = qemu_loadvm_state(mis->from_src_file); 325 326 ps = postcopy_state_get(); 327 trace_process_incoming_migration_co_end(ret, ps); 328 if (ps != POSTCOPY_INCOMING_NONE) { 329 if (ps == POSTCOPY_INCOMING_ADVISE) { 330 /* 331 * Where a migration had postcopy enabled (and thus went to advise) 332 * but managed to complete within the precopy period, we can use 333 * the normal exit. 334 */ 335 postcopy_ram_incoming_cleanup(mis); 336 } else if (ret >= 0) { 337 /* 338 * Postcopy was started, cleanup should happen at the end of the 339 * postcopy thread. 340 */ 341 trace_process_incoming_migration_co_postcopy_end_main(); 342 return; 343 } 344 /* Else if something went wrong then just fall out of the normal exit */ 345 } 346 347 /* we get COLO info, and know if we are in COLO mode */ 348 if (!ret && migration_incoming_enable_colo()) { 349 mis->migration_incoming_co = qemu_coroutine_self(); 350 qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming", 351 colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE); 352 mis->have_colo_incoming_thread = true; 353 qemu_coroutine_yield(); 354 355 /* Wait checkpoint incoming thread exit before free resource */ 356 qemu_thread_join(&mis->colo_incoming_thread); 357 } 358 359 if (ret < 0) { 360 Error *local_err = NULL; 361 362 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, 363 MIGRATION_STATUS_FAILED); 364 error_report("load of migration failed: %s", strerror(-ret)); 365 qemu_fclose(mis->from_src_file); 366 if (multifd_load_cleanup(&local_err) != 0) { 367 error_report_err(local_err); 368 } 369 exit(EXIT_FAILURE); 370 } 371 mis->bh = qemu_bh_new(process_incoming_migration_bh, mis); 372 qemu_bh_schedule(mis->bh); 373 } 374 375 static void migration_incoming_setup(QEMUFile *f) 376 { 377 MigrationIncomingState *mis = migration_incoming_get_current(); 378 379 if (multifd_load_setup() != 0) { 380 /* We haven't been able to create multifd threads 381 nothing better to do */ 382 exit(EXIT_FAILURE); 383 } 384 385 if (!mis->from_src_file) { 386 mis->from_src_file = f; 387 } 388 qemu_file_set_blocking(f, false); 389 } 390 391 static void migration_incoming_process(void) 392 { 393 Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL); 394 qemu_coroutine_enter(co); 395 } 396 397 void migration_fd_process_incoming(QEMUFile *f) 398 { 399 migration_incoming_setup(f); 400 migration_incoming_process(); 401 } 402 403 void migration_ioc_process_incoming(QIOChannel *ioc) 404 { 405 MigrationIncomingState *mis = migration_incoming_get_current(); 406 407 if (!mis->from_src_file) { 408 QEMUFile *f = qemu_fopen_channel_input(ioc); 409 migration_fd_process_incoming(f); 410 } 411 /* We still only have a single channel. Nothing to do here yet */ 412 } 413 414 /** 415 * @migration_has_all_channels: We have received all channels that we need 416 * 417 * Returns true when we have got connections to all the channels that 418 * we need for migration. 419 */ 420 bool migration_has_all_channels(void) 421 { 422 return true; 423 } 424 425 /* 426 * Send a 'SHUT' message on the return channel with the given value 427 * to indicate that we've finished with the RP. Non-0 value indicates 428 * error. 429 */ 430 void migrate_send_rp_shut(MigrationIncomingState *mis, 431 uint32_t value) 432 { 433 uint32_t buf; 434 435 buf = cpu_to_be32(value); 436 migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf); 437 } 438 439 /* 440 * Send a 'PONG' message on the return channel with the given value 441 * (normally in response to a 'PING') 442 */ 443 void migrate_send_rp_pong(MigrationIncomingState *mis, 444 uint32_t value) 445 { 446 uint32_t buf; 447 448 buf = cpu_to_be32(value); 449 migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf); 450 } 451 452 MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) 453 { 454 MigrationCapabilityStatusList *head = NULL; 455 MigrationCapabilityStatusList *caps; 456 MigrationState *s = migrate_get_current(); 457 int i; 458 459 caps = NULL; /* silence compiler warning */ 460 for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { 461 #ifndef CONFIG_LIVE_BLOCK_MIGRATION 462 if (i == MIGRATION_CAPABILITY_BLOCK) { 463 continue; 464 } 465 #endif 466 if (head == NULL) { 467 head = g_malloc0(sizeof(*caps)); 468 caps = head; 469 } else { 470 caps->next = g_malloc0(sizeof(*caps)); 471 caps = caps->next; 472 } 473 caps->value = 474 g_malloc(sizeof(*caps->value)); 475 caps->value->capability = i; 476 caps->value->state = s->enabled_capabilities[i]; 477 } 478 479 return head; 480 } 481 482 MigrationParameters *qmp_query_migrate_parameters(Error **errp) 483 { 484 MigrationParameters *params; 485 MigrationState *s = migrate_get_current(); 486 487 /* TODO use QAPI_CLONE() instead of duplicating it inline */ 488 params = g_malloc0(sizeof(*params)); 489 params->has_compress_level = true; 490 params->compress_level = s->parameters.compress_level; 491 params->has_compress_threads = true; 492 params->compress_threads = s->parameters.compress_threads; 493 params->has_decompress_threads = true; 494 params->decompress_threads = s->parameters.decompress_threads; 495 params->has_cpu_throttle_initial = true; 496 params->cpu_throttle_initial = s->parameters.cpu_throttle_initial; 497 params->has_cpu_throttle_increment = true; 498 params->cpu_throttle_increment = s->parameters.cpu_throttle_increment; 499 params->has_tls_creds = true; 500 params->tls_creds = g_strdup(s->parameters.tls_creds); 501 params->has_tls_hostname = true; 502 params->tls_hostname = g_strdup(s->parameters.tls_hostname); 503 params->has_max_bandwidth = true; 504 params->max_bandwidth = s->parameters.max_bandwidth; 505 params->has_downtime_limit = true; 506 params->downtime_limit = s->parameters.downtime_limit; 507 params->has_x_checkpoint_delay = true; 508 params->x_checkpoint_delay = s->parameters.x_checkpoint_delay; 509 params->has_block_incremental = true; 510 params->block_incremental = s->parameters.block_incremental; 511 params->has_x_multifd_channels = true; 512 params->x_multifd_channels = s->parameters.x_multifd_channels; 513 params->has_x_multifd_page_count = true; 514 params->x_multifd_page_count = s->parameters.x_multifd_page_count; 515 516 return params; 517 } 518 519 /* 520 * Return true if we're already in the middle of a migration 521 * (i.e. any of the active or setup states) 522 */ 523 static bool migration_is_setup_or_active(int state) 524 { 525 switch (state) { 526 case MIGRATION_STATUS_ACTIVE: 527 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 528 case MIGRATION_STATUS_SETUP: 529 return true; 530 531 default: 532 return false; 533 534 } 535 } 536 537 static void populate_ram_info(MigrationInfo *info, MigrationState *s) 538 { 539 info->has_ram = true; 540 info->ram = g_malloc0(sizeof(*info->ram)); 541 info->ram->transferred = ram_counters.transferred; 542 info->ram->total = ram_bytes_total(); 543 info->ram->duplicate = ram_counters.duplicate; 544 /* legacy value. It is not used anymore */ 545 info->ram->skipped = 0; 546 info->ram->normal = ram_counters.normal; 547 info->ram->normal_bytes = ram_counters.normal * 548 qemu_target_page_size(); 549 info->ram->mbps = s->mbps; 550 info->ram->dirty_sync_count = ram_counters.dirty_sync_count; 551 info->ram->postcopy_requests = ram_counters.postcopy_requests; 552 info->ram->page_size = qemu_target_page_size(); 553 554 if (migrate_use_xbzrle()) { 555 info->has_xbzrle_cache = true; 556 info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache)); 557 info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size(); 558 info->xbzrle_cache->bytes = xbzrle_counters.bytes; 559 info->xbzrle_cache->pages = xbzrle_counters.pages; 560 info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss; 561 info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate; 562 info->xbzrle_cache->overflow = xbzrle_counters.overflow; 563 } 564 565 if (cpu_throttle_active()) { 566 info->has_cpu_throttle_percentage = true; 567 info->cpu_throttle_percentage = cpu_throttle_get_percentage(); 568 } 569 570 if (s->state != MIGRATION_STATUS_COMPLETED) { 571 info->ram->remaining = ram_bytes_remaining(); 572 info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate; 573 } 574 } 575 576 static void populate_disk_info(MigrationInfo *info) 577 { 578 if (blk_mig_active()) { 579 info->has_disk = true; 580 info->disk = g_malloc0(sizeof(*info->disk)); 581 info->disk->transferred = blk_mig_bytes_transferred(); 582 info->disk->remaining = blk_mig_bytes_remaining(); 583 info->disk->total = blk_mig_bytes_total(); 584 } 585 } 586 587 MigrationInfo *qmp_query_migrate(Error **errp) 588 { 589 MigrationInfo *info = g_malloc0(sizeof(*info)); 590 MigrationState *s = migrate_get_current(); 591 592 switch (s->state) { 593 case MIGRATION_STATUS_NONE: 594 /* no migration has happened ever */ 595 break; 596 case MIGRATION_STATUS_SETUP: 597 info->has_status = true; 598 info->has_total_time = false; 599 break; 600 case MIGRATION_STATUS_ACTIVE: 601 case MIGRATION_STATUS_CANCELLING: 602 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 603 /* TODO add some postcopy stats */ 604 info->has_status = true; 605 info->has_total_time = true; 606 info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) 607 - s->total_time; 608 info->has_expected_downtime = true; 609 info->expected_downtime = s->expected_downtime; 610 info->has_setup_time = true; 611 info->setup_time = s->setup_time; 612 613 populate_ram_info(info, s); 614 populate_disk_info(info); 615 break; 616 case MIGRATION_STATUS_COLO: 617 info->has_status = true; 618 /* TODO: display COLO specific information (checkpoint info etc.) */ 619 break; 620 case MIGRATION_STATUS_COMPLETED: 621 info->has_status = true; 622 info->has_total_time = true; 623 info->total_time = s->total_time; 624 info->has_downtime = true; 625 info->downtime = s->downtime; 626 info->has_setup_time = true; 627 info->setup_time = s->setup_time; 628 629 populate_ram_info(info, s); 630 break; 631 case MIGRATION_STATUS_FAILED: 632 info->has_status = true; 633 if (s->error) { 634 info->has_error_desc = true; 635 info->error_desc = g_strdup(error_get_pretty(s->error)); 636 } 637 break; 638 case MIGRATION_STATUS_CANCELLED: 639 info->has_status = true; 640 break; 641 } 642 info->status = s->state; 643 644 return info; 645 } 646 647 /** 648 * @migration_caps_check - check capability validity 649 * 650 * @cap_list: old capability list, array of bool 651 * @params: new capabilities to be applied soon 652 * @errp: set *errp if the check failed, with reason 653 * 654 * Returns true if check passed, otherwise false. 655 */ 656 static bool migrate_caps_check(bool *cap_list, 657 MigrationCapabilityStatusList *params, 658 Error **errp) 659 { 660 MigrationCapabilityStatusList *cap; 661 bool old_postcopy_cap; 662 MigrationIncomingState *mis = migration_incoming_get_current(); 663 664 old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]; 665 666 for (cap = params; cap; cap = cap->next) { 667 cap_list[cap->value->capability] = cap->value->state; 668 } 669 670 #ifndef CONFIG_LIVE_BLOCK_MIGRATION 671 if (cap_list[MIGRATION_CAPABILITY_BLOCK]) { 672 error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " 673 "block migration"); 674 error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); 675 return false; 676 } 677 #endif 678 679 if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { 680 if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) { 681 /* The decompression threads asynchronously write into RAM 682 * rather than use the atomic copies needed to avoid 683 * userfaulting. It should be possible to fix the decompression 684 * threads for compatibility in future. 685 */ 686 error_setg(errp, "Postcopy is not currently compatible " 687 "with compression"); 688 return false; 689 } 690 691 /* This check is reasonably expensive, so only when it's being 692 * set the first time, also it's only the destination that needs 693 * special support. 694 */ 695 if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) && 696 !postcopy_ram_supported_by_host(mis)) { 697 /* postcopy_ram_supported_by_host will have emitted a more 698 * detailed message 699 */ 700 error_setg(errp, "Postcopy is not supported"); 701 return false; 702 } 703 } 704 705 return true; 706 } 707 708 void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, 709 Error **errp) 710 { 711 MigrationState *s = migrate_get_current(); 712 MigrationCapabilityStatusList *cap; 713 714 if (migration_is_setup_or_active(s->state)) { 715 error_setg(errp, QERR_MIGRATION_ACTIVE); 716 return; 717 } 718 719 if (!migrate_caps_check(s->enabled_capabilities, params, errp)) { 720 return; 721 } 722 723 for (cap = params; cap; cap = cap->next) { 724 s->enabled_capabilities[cap->value->capability] = cap->value->state; 725 } 726 } 727 728 /* 729 * Check whether the parameters are valid. Error will be put into errp 730 * (if provided). Return true if valid, otherwise false. 731 */ 732 static bool migrate_params_check(MigrationParameters *params, Error **errp) 733 { 734 if (params->has_compress_level && 735 (params->compress_level < 0 || params->compress_level > 9)) { 736 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", 737 "is invalid, it should be in the range of 0 to 9"); 738 return false; 739 } 740 741 if (params->has_compress_threads && 742 (params->compress_threads < 1 || params->compress_threads > 255)) { 743 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 744 "compress_threads", 745 "is invalid, it should be in the range of 1 to 255"); 746 return false; 747 } 748 749 if (params->has_decompress_threads && 750 (params->decompress_threads < 1 || params->decompress_threads > 255)) { 751 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 752 "decompress_threads", 753 "is invalid, it should be in the range of 1 to 255"); 754 return false; 755 } 756 757 if (params->has_cpu_throttle_initial && 758 (params->cpu_throttle_initial < 1 || 759 params->cpu_throttle_initial > 99)) { 760 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 761 "cpu_throttle_initial", 762 "an integer in the range of 1 to 99"); 763 return false; 764 } 765 766 if (params->has_cpu_throttle_increment && 767 (params->cpu_throttle_increment < 1 || 768 params->cpu_throttle_increment > 99)) { 769 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 770 "cpu_throttle_increment", 771 "an integer in the range of 1 to 99"); 772 return false; 773 } 774 775 if (params->has_max_bandwidth && 776 (params->max_bandwidth < 0 || params->max_bandwidth > SIZE_MAX)) { 777 error_setg(errp, "Parameter 'max_bandwidth' expects an integer in the" 778 " range of 0 to %zu bytes/second", SIZE_MAX); 779 return false; 780 } 781 782 if (params->has_downtime_limit && 783 (params->downtime_limit < 0 || 784 params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { 785 error_setg(errp, "Parameter 'downtime_limit' expects an integer in " 786 "the range of 0 to %d milliseconds", 787 MAX_MIGRATE_DOWNTIME); 788 return false; 789 } 790 791 if (params->has_x_checkpoint_delay && (params->x_checkpoint_delay < 0)) { 792 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 793 "x_checkpoint_delay", 794 "is invalid, it should be positive"); 795 return false; 796 } 797 if (params->has_x_multifd_channels && 798 (params->x_multifd_channels < 1 || params->x_multifd_channels > 255)) { 799 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 800 "multifd_channels", 801 "is invalid, it should be in the range of 1 to 255"); 802 return false; 803 } 804 if (params->has_x_multifd_page_count && 805 (params->x_multifd_page_count < 1 || 806 params->x_multifd_page_count > 10000)) { 807 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 808 "multifd_page_count", 809 "is invalid, it should be in the range of 1 to 10000"); 810 return false; 811 } 812 813 return true; 814 } 815 816 static void migrate_params_test_apply(MigrateSetParameters *params, 817 MigrationParameters *dest) 818 { 819 *dest = migrate_get_current()->parameters; 820 821 /* TODO use QAPI_CLONE() instead of duplicating it inline */ 822 823 if (params->has_compress_level) { 824 dest->compress_level = params->compress_level; 825 } 826 827 if (params->has_compress_threads) { 828 dest->compress_threads = params->compress_threads; 829 } 830 831 if (params->has_decompress_threads) { 832 dest->decompress_threads = params->decompress_threads; 833 } 834 835 if (params->has_cpu_throttle_initial) { 836 dest->cpu_throttle_initial = params->cpu_throttle_initial; 837 } 838 839 if (params->has_cpu_throttle_increment) { 840 dest->cpu_throttle_increment = params->cpu_throttle_increment; 841 } 842 843 if (params->has_tls_creds) { 844 assert(params->tls_creds->type == QTYPE_QSTRING); 845 dest->tls_creds = g_strdup(params->tls_creds->u.s); 846 } 847 848 if (params->has_tls_hostname) { 849 assert(params->tls_hostname->type == QTYPE_QSTRING); 850 dest->tls_hostname = g_strdup(params->tls_hostname->u.s); 851 } 852 853 if (params->has_max_bandwidth) { 854 dest->max_bandwidth = params->max_bandwidth; 855 } 856 857 if (params->has_downtime_limit) { 858 dest->downtime_limit = params->downtime_limit; 859 } 860 861 if (params->has_x_checkpoint_delay) { 862 dest->x_checkpoint_delay = params->x_checkpoint_delay; 863 } 864 865 if (params->has_block_incremental) { 866 dest->block_incremental = params->block_incremental; 867 } 868 } 869 870 static void migrate_params_apply(MigrateSetParameters *params) 871 { 872 MigrationState *s = migrate_get_current(); 873 874 /* TODO use QAPI_CLONE() instead of duplicating it inline */ 875 876 if (params->has_compress_level) { 877 s->parameters.compress_level = params->compress_level; 878 } 879 880 if (params->has_compress_threads) { 881 s->parameters.compress_threads = params->compress_threads; 882 } 883 884 if (params->has_decompress_threads) { 885 s->parameters.decompress_threads = params->decompress_threads; 886 } 887 888 if (params->has_cpu_throttle_initial) { 889 s->parameters.cpu_throttle_initial = params->cpu_throttle_initial; 890 } 891 892 if (params->has_cpu_throttle_increment) { 893 s->parameters.cpu_throttle_increment = params->cpu_throttle_increment; 894 } 895 896 if (params->has_tls_creds) { 897 g_free(s->parameters.tls_creds); 898 assert(params->tls_creds->type == QTYPE_QSTRING); 899 s->parameters.tls_creds = g_strdup(params->tls_creds->u.s); 900 } 901 902 if (params->has_tls_hostname) { 903 g_free(s->parameters.tls_hostname); 904 assert(params->tls_hostname->type == QTYPE_QSTRING); 905 s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s); 906 } 907 908 if (params->has_max_bandwidth) { 909 s->parameters.max_bandwidth = params->max_bandwidth; 910 if (s->to_dst_file) { 911 qemu_file_set_rate_limit(s->to_dst_file, 912 s->parameters.max_bandwidth / XFER_LIMIT_RATIO); 913 } 914 } 915 916 if (params->has_downtime_limit) { 917 s->parameters.downtime_limit = params->downtime_limit; 918 } 919 920 if (params->has_x_checkpoint_delay) { 921 s->parameters.x_checkpoint_delay = params->x_checkpoint_delay; 922 if (migration_in_colo_state()) { 923 colo_checkpoint_notify(s); 924 } 925 } 926 927 if (params->has_block_incremental) { 928 s->parameters.block_incremental = params->block_incremental; 929 } 930 if (params->has_x_multifd_channels) { 931 s->parameters.x_multifd_channels = params->x_multifd_channels; 932 } 933 if (params->has_x_multifd_page_count) { 934 s->parameters.x_multifd_page_count = params->x_multifd_page_count; 935 } 936 } 937 938 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) 939 { 940 MigrationParameters tmp; 941 942 /* TODO Rewrite "" to null instead */ 943 if (params->has_tls_creds 944 && params->tls_creds->type == QTYPE_QNULL) { 945 QDECREF(params->tls_creds->u.n); 946 params->tls_creds->type = QTYPE_QSTRING; 947 params->tls_creds->u.s = strdup(""); 948 } 949 /* TODO Rewrite "" to null instead */ 950 if (params->has_tls_hostname 951 && params->tls_hostname->type == QTYPE_QNULL) { 952 QDECREF(params->tls_hostname->u.n); 953 params->tls_hostname->type = QTYPE_QSTRING; 954 params->tls_hostname->u.s = strdup(""); 955 } 956 957 migrate_params_test_apply(params, &tmp); 958 959 if (!migrate_params_check(&tmp, errp)) { 960 /* Invalid parameter */ 961 return; 962 } 963 964 migrate_params_apply(params); 965 } 966 967 968 void qmp_migrate_start_postcopy(Error **errp) 969 { 970 MigrationState *s = migrate_get_current(); 971 972 if (!migrate_postcopy_ram()) { 973 error_setg(errp, "Enable postcopy with migrate_set_capability before" 974 " the start of migration"); 975 return; 976 } 977 978 if (s->state == MIGRATION_STATUS_NONE) { 979 error_setg(errp, "Postcopy must be started after migration has been" 980 " started"); 981 return; 982 } 983 /* 984 * we don't error if migration has finished since that would be racy 985 * with issuing this command. 986 */ 987 atomic_set(&s->start_postcopy, true); 988 } 989 990 /* shared migration helpers */ 991 992 void migrate_set_state(int *state, int old_state, int new_state) 993 { 994 assert(new_state < MIGRATION_STATUS__MAX); 995 if (atomic_cmpxchg(state, old_state, new_state) == old_state) { 996 trace_migrate_set_state(MigrationStatus_str(new_state)); 997 migrate_generate_event(new_state); 998 } 999 } 1000 1001 static MigrationCapabilityStatusList *migrate_cap_add( 1002 MigrationCapabilityStatusList *list, 1003 MigrationCapability index, 1004 bool state) 1005 { 1006 MigrationCapabilityStatusList *cap; 1007 1008 cap = g_new0(MigrationCapabilityStatusList, 1); 1009 cap->value = g_new0(MigrationCapabilityStatus, 1); 1010 cap->value->capability = index; 1011 cap->value->state = state; 1012 cap->next = list; 1013 1014 return cap; 1015 } 1016 1017 void migrate_set_block_enabled(bool value, Error **errp) 1018 { 1019 MigrationCapabilityStatusList *cap; 1020 1021 cap = migrate_cap_add(NULL, MIGRATION_CAPABILITY_BLOCK, value); 1022 qmp_migrate_set_capabilities(cap, errp); 1023 qapi_free_MigrationCapabilityStatusList(cap); 1024 } 1025 1026 static void migrate_set_block_incremental(MigrationState *s, bool value) 1027 { 1028 s->parameters.block_incremental = value; 1029 } 1030 1031 static void block_cleanup_parameters(MigrationState *s) 1032 { 1033 if (s->must_remove_block_options) { 1034 /* setting to false can never fail */ 1035 migrate_set_block_enabled(false, &error_abort); 1036 migrate_set_block_incremental(s, false); 1037 s->must_remove_block_options = false; 1038 } 1039 } 1040 1041 static void migrate_fd_cleanup(void *opaque) 1042 { 1043 MigrationState *s = opaque; 1044 1045 qemu_bh_delete(s->cleanup_bh); 1046 s->cleanup_bh = NULL; 1047 1048 if (s->to_dst_file) { 1049 Error *local_err = NULL; 1050 1051 trace_migrate_fd_cleanup(); 1052 qemu_mutex_unlock_iothread(); 1053 if (s->migration_thread_running) { 1054 qemu_thread_join(&s->thread); 1055 s->migration_thread_running = false; 1056 } 1057 qemu_mutex_lock_iothread(); 1058 1059 if (multifd_save_cleanup(&local_err) != 0) { 1060 error_report_err(local_err); 1061 } 1062 qemu_fclose(s->to_dst_file); 1063 s->to_dst_file = NULL; 1064 } 1065 1066 assert((s->state != MIGRATION_STATUS_ACTIVE) && 1067 (s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE)); 1068 1069 if (s->state == MIGRATION_STATUS_CANCELLING) { 1070 migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING, 1071 MIGRATION_STATUS_CANCELLED); 1072 } 1073 1074 notifier_list_notify(&migration_state_notifiers, s); 1075 block_cleanup_parameters(s); 1076 } 1077 1078 void migrate_fd_error(MigrationState *s, const Error *error) 1079 { 1080 trace_migrate_fd_error(error_get_pretty(error)); 1081 assert(s->to_dst_file == NULL); 1082 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 1083 MIGRATION_STATUS_FAILED); 1084 if (!s->error) { 1085 s->error = error_copy(error); 1086 } 1087 notifier_list_notify(&migration_state_notifiers, s); 1088 block_cleanup_parameters(s); 1089 } 1090 1091 static void migrate_fd_cancel(MigrationState *s) 1092 { 1093 int old_state ; 1094 QEMUFile *f = migrate_get_current()->to_dst_file; 1095 trace_migrate_fd_cancel(); 1096 1097 if (s->rp_state.from_dst_file) { 1098 /* shutdown the rp socket, so causing the rp thread to shutdown */ 1099 qemu_file_shutdown(s->rp_state.from_dst_file); 1100 } 1101 1102 do { 1103 old_state = s->state; 1104 if (!migration_is_setup_or_active(old_state)) { 1105 break; 1106 } 1107 migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING); 1108 } while (s->state != MIGRATION_STATUS_CANCELLING); 1109 1110 /* 1111 * If we're unlucky the migration code might be stuck somewhere in a 1112 * send/write while the network has failed and is waiting to timeout; 1113 * if we've got shutdown(2) available then we can force it to quit. 1114 * The outgoing qemu file gets closed in migrate_fd_cleanup that is 1115 * called in a bh, so there is no race against this cancel. 1116 */ 1117 if (s->state == MIGRATION_STATUS_CANCELLING && f) { 1118 qemu_file_shutdown(f); 1119 } 1120 if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) { 1121 Error *local_err = NULL; 1122 1123 bdrv_invalidate_cache_all(&local_err); 1124 if (local_err) { 1125 error_report_err(local_err); 1126 } else { 1127 s->block_inactive = false; 1128 } 1129 } 1130 block_cleanup_parameters(s); 1131 } 1132 1133 void add_migration_state_change_notifier(Notifier *notify) 1134 { 1135 notifier_list_add(&migration_state_notifiers, notify); 1136 } 1137 1138 void remove_migration_state_change_notifier(Notifier *notify) 1139 { 1140 notifier_remove(notify); 1141 } 1142 1143 bool migration_in_setup(MigrationState *s) 1144 { 1145 return s->state == MIGRATION_STATUS_SETUP; 1146 } 1147 1148 bool migration_has_finished(MigrationState *s) 1149 { 1150 return s->state == MIGRATION_STATUS_COMPLETED; 1151 } 1152 1153 bool migration_has_failed(MigrationState *s) 1154 { 1155 return (s->state == MIGRATION_STATUS_CANCELLED || 1156 s->state == MIGRATION_STATUS_FAILED); 1157 } 1158 1159 bool migration_in_postcopy(void) 1160 { 1161 MigrationState *s = migrate_get_current(); 1162 1163 return (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); 1164 } 1165 1166 bool migration_in_postcopy_after_devices(MigrationState *s) 1167 { 1168 return migration_in_postcopy() && s->postcopy_after_devices; 1169 } 1170 1171 bool migration_is_idle(void) 1172 { 1173 MigrationState *s = migrate_get_current(); 1174 1175 switch (s->state) { 1176 case MIGRATION_STATUS_NONE: 1177 case MIGRATION_STATUS_CANCELLED: 1178 case MIGRATION_STATUS_COMPLETED: 1179 case MIGRATION_STATUS_FAILED: 1180 return true; 1181 case MIGRATION_STATUS_SETUP: 1182 case MIGRATION_STATUS_CANCELLING: 1183 case MIGRATION_STATUS_ACTIVE: 1184 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1185 case MIGRATION_STATUS_COLO: 1186 return false; 1187 case MIGRATION_STATUS__MAX: 1188 g_assert_not_reached(); 1189 } 1190 1191 return false; 1192 } 1193 1194 MigrationState *migrate_init(void) 1195 { 1196 MigrationState *s = migrate_get_current(); 1197 1198 /* 1199 * Reinitialise all migration state, except 1200 * parameters/capabilities that the user set, and 1201 * locks. 1202 */ 1203 s->bytes_xfer = 0; 1204 s->xfer_limit = 0; 1205 s->cleanup_bh = 0; 1206 s->to_dst_file = NULL; 1207 s->state = MIGRATION_STATUS_NONE; 1208 s->rp_state.from_dst_file = NULL; 1209 s->rp_state.error = false; 1210 s->mbps = 0.0; 1211 s->downtime = 0; 1212 s->expected_downtime = 0; 1213 s->setup_time = 0; 1214 s->start_postcopy = false; 1215 s->postcopy_after_devices = false; 1216 s->migration_thread_running = false; 1217 error_free(s->error); 1218 s->error = NULL; 1219 1220 migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); 1221 1222 s->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 1223 return s; 1224 } 1225 1226 static GSList *migration_blockers; 1227 1228 int migrate_add_blocker(Error *reason, Error **errp) 1229 { 1230 if (migrate_get_current()->only_migratable) { 1231 error_propagate(errp, error_copy(reason)); 1232 error_prepend(errp, "disallowing migration blocker " 1233 "(--only_migratable) for: "); 1234 return -EACCES; 1235 } 1236 1237 if (migration_is_idle()) { 1238 migration_blockers = g_slist_prepend(migration_blockers, reason); 1239 return 0; 1240 } 1241 1242 error_propagate(errp, error_copy(reason)); 1243 error_prepend(errp, "disallowing migration blocker (migration in " 1244 "progress) for: "); 1245 return -EBUSY; 1246 } 1247 1248 void migrate_del_blocker(Error *reason) 1249 { 1250 migration_blockers = g_slist_remove(migration_blockers, reason); 1251 } 1252 1253 void qmp_migrate_incoming(const char *uri, Error **errp) 1254 { 1255 Error *local_err = NULL; 1256 static bool once = true; 1257 1258 if (!deferred_incoming) { 1259 error_setg(errp, "For use with '-incoming defer'"); 1260 return; 1261 } 1262 if (!once) { 1263 error_setg(errp, "The incoming migration has already been started"); 1264 } 1265 1266 qemu_start_incoming_migration(uri, &local_err); 1267 1268 if (local_err) { 1269 error_propagate(errp, local_err); 1270 return; 1271 } 1272 1273 once = false; 1274 } 1275 1276 bool migration_is_blocked(Error **errp) 1277 { 1278 if (qemu_savevm_state_blocked(errp)) { 1279 return true; 1280 } 1281 1282 if (migration_blockers) { 1283 error_propagate(errp, error_copy(migration_blockers->data)); 1284 return true; 1285 } 1286 1287 return false; 1288 } 1289 1290 void qmp_migrate(const char *uri, bool has_blk, bool blk, 1291 bool has_inc, bool inc, bool has_detach, bool detach, 1292 Error **errp) 1293 { 1294 Error *local_err = NULL; 1295 MigrationState *s = migrate_get_current(); 1296 const char *p; 1297 1298 if (migration_is_setup_or_active(s->state) || 1299 s->state == MIGRATION_STATUS_CANCELLING || 1300 s->state == MIGRATION_STATUS_COLO) { 1301 error_setg(errp, QERR_MIGRATION_ACTIVE); 1302 return; 1303 } 1304 if (runstate_check(RUN_STATE_INMIGRATE)) { 1305 error_setg(errp, "Guest is waiting for an incoming migration"); 1306 return; 1307 } 1308 1309 if (migration_is_blocked(errp)) { 1310 return; 1311 } 1312 1313 if ((has_blk && blk) || (has_inc && inc)) { 1314 if (migrate_use_block() || migrate_use_block_incremental()) { 1315 error_setg(errp, "Command options are incompatible with " 1316 "current migration capabilities"); 1317 return; 1318 } 1319 migrate_set_block_enabled(true, &local_err); 1320 if (local_err) { 1321 error_propagate(errp, local_err); 1322 return; 1323 } 1324 s->must_remove_block_options = true; 1325 } 1326 1327 if (has_inc && inc) { 1328 migrate_set_block_incremental(s, true); 1329 } 1330 1331 s = migrate_init(); 1332 1333 if (strstart(uri, "tcp:", &p)) { 1334 tcp_start_outgoing_migration(s, p, &local_err); 1335 #ifdef CONFIG_RDMA 1336 } else if (strstart(uri, "rdma:", &p)) { 1337 rdma_start_outgoing_migration(s, p, &local_err); 1338 #endif 1339 } else if (strstart(uri, "exec:", &p)) { 1340 exec_start_outgoing_migration(s, p, &local_err); 1341 } else if (strstart(uri, "unix:", &p)) { 1342 unix_start_outgoing_migration(s, p, &local_err); 1343 } else if (strstart(uri, "fd:", &p)) { 1344 fd_start_outgoing_migration(s, p, &local_err); 1345 } else { 1346 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri", 1347 "a valid migration protocol"); 1348 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 1349 MIGRATION_STATUS_FAILED); 1350 return; 1351 } 1352 1353 if (local_err) { 1354 migrate_fd_error(s, local_err); 1355 error_propagate(errp, local_err); 1356 return; 1357 } 1358 } 1359 1360 void qmp_migrate_cancel(Error **errp) 1361 { 1362 migrate_fd_cancel(migrate_get_current()); 1363 } 1364 1365 void qmp_migrate_set_cache_size(int64_t value, Error **errp) 1366 { 1367 MigrationState *s = migrate_get_current(); 1368 int64_t new_size; 1369 1370 /* Check for truncation */ 1371 if (value != (size_t)value) { 1372 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", 1373 "exceeding address space"); 1374 return; 1375 } 1376 1377 /* Cache should not be larger than guest ram size */ 1378 if (value > ram_bytes_total()) { 1379 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", 1380 "exceeds guest ram size "); 1381 return; 1382 } 1383 1384 new_size = xbzrle_cache_resize(value); 1385 if (new_size < 0) { 1386 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", 1387 "is smaller than page size"); 1388 return; 1389 } 1390 1391 s->xbzrle_cache_size = new_size; 1392 } 1393 1394 int64_t qmp_query_migrate_cache_size(Error **errp) 1395 { 1396 return migrate_xbzrle_cache_size(); 1397 } 1398 1399 void qmp_migrate_set_speed(int64_t value, Error **errp) 1400 { 1401 MigrateSetParameters p = { 1402 .has_max_bandwidth = true, 1403 .max_bandwidth = value, 1404 }; 1405 1406 qmp_migrate_set_parameters(&p, errp); 1407 } 1408 1409 void qmp_migrate_set_downtime(double value, Error **errp) 1410 { 1411 if (value < 0 || value > MAX_MIGRATE_DOWNTIME_SECONDS) { 1412 error_setg(errp, "Parameter 'downtime_limit' expects an integer in " 1413 "the range of 0 to %d seconds", 1414 MAX_MIGRATE_DOWNTIME_SECONDS); 1415 return; 1416 } 1417 1418 value *= 1000; /* Convert to milliseconds */ 1419 value = MAX(0, MIN(INT64_MAX, value)); 1420 1421 MigrateSetParameters p = { 1422 .has_downtime_limit = true, 1423 .downtime_limit = value, 1424 }; 1425 1426 qmp_migrate_set_parameters(&p, errp); 1427 } 1428 1429 bool migrate_release_ram(void) 1430 { 1431 MigrationState *s; 1432 1433 s = migrate_get_current(); 1434 1435 return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; 1436 } 1437 1438 bool migrate_postcopy_ram(void) 1439 { 1440 MigrationState *s; 1441 1442 s = migrate_get_current(); 1443 1444 return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; 1445 } 1446 1447 bool migrate_postcopy(void) 1448 { 1449 return migrate_postcopy_ram(); 1450 } 1451 1452 bool migrate_auto_converge(void) 1453 { 1454 MigrationState *s; 1455 1456 s = migrate_get_current(); 1457 1458 return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; 1459 } 1460 1461 bool migrate_zero_blocks(void) 1462 { 1463 MigrationState *s; 1464 1465 s = migrate_get_current(); 1466 1467 return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; 1468 } 1469 1470 bool migrate_use_compression(void) 1471 { 1472 MigrationState *s; 1473 1474 s = migrate_get_current(); 1475 1476 return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS]; 1477 } 1478 1479 int migrate_compress_level(void) 1480 { 1481 MigrationState *s; 1482 1483 s = migrate_get_current(); 1484 1485 return s->parameters.compress_level; 1486 } 1487 1488 int migrate_compress_threads(void) 1489 { 1490 MigrationState *s; 1491 1492 s = migrate_get_current(); 1493 1494 return s->parameters.compress_threads; 1495 } 1496 1497 int migrate_decompress_threads(void) 1498 { 1499 MigrationState *s; 1500 1501 s = migrate_get_current(); 1502 1503 return s->parameters.decompress_threads; 1504 } 1505 1506 bool migrate_use_events(void) 1507 { 1508 MigrationState *s; 1509 1510 s = migrate_get_current(); 1511 1512 return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS]; 1513 } 1514 1515 bool migrate_use_multifd(void) 1516 { 1517 MigrationState *s; 1518 1519 s = migrate_get_current(); 1520 1521 return s->enabled_capabilities[MIGRATION_CAPABILITY_X_MULTIFD]; 1522 } 1523 1524 int migrate_multifd_channels(void) 1525 { 1526 MigrationState *s; 1527 1528 s = migrate_get_current(); 1529 1530 return s->parameters.x_multifd_channels; 1531 } 1532 1533 int migrate_multifd_page_count(void) 1534 { 1535 MigrationState *s; 1536 1537 s = migrate_get_current(); 1538 1539 return s->parameters.x_multifd_page_count; 1540 } 1541 1542 int migrate_use_xbzrle(void) 1543 { 1544 MigrationState *s; 1545 1546 s = migrate_get_current(); 1547 1548 return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE]; 1549 } 1550 1551 int64_t migrate_xbzrle_cache_size(void) 1552 { 1553 MigrationState *s; 1554 1555 s = migrate_get_current(); 1556 1557 return s->xbzrle_cache_size; 1558 } 1559 1560 bool migrate_use_block(void) 1561 { 1562 MigrationState *s; 1563 1564 s = migrate_get_current(); 1565 1566 return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK]; 1567 } 1568 1569 bool migrate_use_return_path(void) 1570 { 1571 MigrationState *s; 1572 1573 s = migrate_get_current(); 1574 1575 return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; 1576 } 1577 1578 bool migrate_use_block_incremental(void) 1579 { 1580 MigrationState *s; 1581 1582 s = migrate_get_current(); 1583 1584 return s->parameters.block_incremental; 1585 } 1586 1587 /* migration thread support */ 1588 /* 1589 * Something bad happened to the RP stream, mark an error 1590 * The caller shall print or trace something to indicate why 1591 */ 1592 static void mark_source_rp_bad(MigrationState *s) 1593 { 1594 s->rp_state.error = true; 1595 } 1596 1597 static struct rp_cmd_args { 1598 ssize_t len; /* -1 = variable */ 1599 const char *name; 1600 } rp_cmd_args[] = { 1601 [MIG_RP_MSG_INVALID] = { .len = -1, .name = "INVALID" }, 1602 [MIG_RP_MSG_SHUT] = { .len = 4, .name = "SHUT" }, 1603 [MIG_RP_MSG_PONG] = { .len = 4, .name = "PONG" }, 1604 [MIG_RP_MSG_REQ_PAGES] = { .len = 12, .name = "REQ_PAGES" }, 1605 [MIG_RP_MSG_REQ_PAGES_ID] = { .len = -1, .name = "REQ_PAGES_ID" }, 1606 [MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" }, 1607 }; 1608 1609 /* 1610 * Process a request for pages received on the return path, 1611 * We're allowed to send more than requested (e.g. to round to our page size) 1612 * and we don't need to send pages that have already been sent. 1613 */ 1614 static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname, 1615 ram_addr_t start, size_t len) 1616 { 1617 long our_host_ps = getpagesize(); 1618 1619 trace_migrate_handle_rp_req_pages(rbname, start, len); 1620 1621 /* 1622 * Since we currently insist on matching page sizes, just sanity check 1623 * we're being asked for whole host pages. 1624 */ 1625 if (start & (our_host_ps-1) || 1626 (len & (our_host_ps-1))) { 1627 error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT 1628 " len: %zd", __func__, start, len); 1629 mark_source_rp_bad(ms); 1630 return; 1631 } 1632 1633 if (ram_save_queue_pages(rbname, start, len)) { 1634 mark_source_rp_bad(ms); 1635 } 1636 } 1637 1638 /* 1639 * Handles messages sent on the return path towards the source VM 1640 * 1641 */ 1642 static void *source_return_path_thread(void *opaque) 1643 { 1644 MigrationState *ms = opaque; 1645 QEMUFile *rp = ms->rp_state.from_dst_file; 1646 uint16_t header_len, header_type; 1647 uint8_t buf[512]; 1648 uint32_t tmp32, sibling_error; 1649 ram_addr_t start = 0; /* =0 to silence warning */ 1650 size_t len = 0, expected_len; 1651 int res; 1652 1653 trace_source_return_path_thread_entry(); 1654 while (!ms->rp_state.error && !qemu_file_get_error(rp) && 1655 migration_is_setup_or_active(ms->state)) { 1656 trace_source_return_path_thread_loop_top(); 1657 header_type = qemu_get_be16(rp); 1658 header_len = qemu_get_be16(rp); 1659 1660 if (header_type >= MIG_RP_MSG_MAX || 1661 header_type == MIG_RP_MSG_INVALID) { 1662 error_report("RP: Received invalid message 0x%04x length 0x%04x", 1663 header_type, header_len); 1664 mark_source_rp_bad(ms); 1665 goto out; 1666 } 1667 1668 if ((rp_cmd_args[header_type].len != -1 && 1669 header_len != rp_cmd_args[header_type].len) || 1670 header_len > sizeof(buf)) { 1671 error_report("RP: Received '%s' message (0x%04x) with" 1672 "incorrect length %d expecting %zu", 1673 rp_cmd_args[header_type].name, header_type, header_len, 1674 (size_t)rp_cmd_args[header_type].len); 1675 mark_source_rp_bad(ms); 1676 goto out; 1677 } 1678 1679 /* We know we've got a valid header by this point */ 1680 res = qemu_get_buffer(rp, buf, header_len); 1681 if (res != header_len) { 1682 error_report("RP: Failed reading data for message 0x%04x" 1683 " read %d expected %d", 1684 header_type, res, header_len); 1685 mark_source_rp_bad(ms); 1686 goto out; 1687 } 1688 1689 /* OK, we have the message and the data */ 1690 switch (header_type) { 1691 case MIG_RP_MSG_SHUT: 1692 sibling_error = ldl_be_p(buf); 1693 trace_source_return_path_thread_shut(sibling_error); 1694 if (sibling_error) { 1695 error_report("RP: Sibling indicated error %d", sibling_error); 1696 mark_source_rp_bad(ms); 1697 } 1698 /* 1699 * We'll let the main thread deal with closing the RP 1700 * we could do a shutdown(2) on it, but we're the only user 1701 * anyway, so there's nothing gained. 1702 */ 1703 goto out; 1704 1705 case MIG_RP_MSG_PONG: 1706 tmp32 = ldl_be_p(buf); 1707 trace_source_return_path_thread_pong(tmp32); 1708 break; 1709 1710 case MIG_RP_MSG_REQ_PAGES: 1711 start = ldq_be_p(buf); 1712 len = ldl_be_p(buf + 8); 1713 migrate_handle_rp_req_pages(ms, NULL, start, len); 1714 break; 1715 1716 case MIG_RP_MSG_REQ_PAGES_ID: 1717 expected_len = 12 + 1; /* header + termination */ 1718 1719 if (header_len >= expected_len) { 1720 start = ldq_be_p(buf); 1721 len = ldl_be_p(buf + 8); 1722 /* Now we expect an idstr */ 1723 tmp32 = buf[12]; /* Length of the following idstr */ 1724 buf[13 + tmp32] = '\0'; 1725 expected_len += tmp32; 1726 } 1727 if (header_len != expected_len) { 1728 error_report("RP: Req_Page_id with length %d expecting %zd", 1729 header_len, expected_len); 1730 mark_source_rp_bad(ms); 1731 goto out; 1732 } 1733 migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len); 1734 break; 1735 1736 default: 1737 break; 1738 } 1739 } 1740 if (qemu_file_get_error(rp)) { 1741 trace_source_return_path_thread_bad_end(); 1742 mark_source_rp_bad(ms); 1743 } 1744 1745 trace_source_return_path_thread_end(); 1746 out: 1747 ms->rp_state.from_dst_file = NULL; 1748 qemu_fclose(rp); 1749 return NULL; 1750 } 1751 1752 static int open_return_path_on_source(MigrationState *ms) 1753 { 1754 1755 ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file); 1756 if (!ms->rp_state.from_dst_file) { 1757 return -1; 1758 } 1759 1760 trace_open_return_path_on_source(); 1761 qemu_thread_create(&ms->rp_state.rp_thread, "return path", 1762 source_return_path_thread, ms, QEMU_THREAD_JOINABLE); 1763 1764 trace_open_return_path_on_source_continue(); 1765 1766 return 0; 1767 } 1768 1769 /* Returns 0 if the RP was ok, otherwise there was an error on the RP */ 1770 static int await_return_path_close_on_source(MigrationState *ms) 1771 { 1772 /* 1773 * If this is a normal exit then the destination will send a SHUT and the 1774 * rp_thread will exit, however if there's an error we need to cause 1775 * it to exit. 1776 */ 1777 if (qemu_file_get_error(ms->to_dst_file) && ms->rp_state.from_dst_file) { 1778 /* 1779 * shutdown(2), if we have it, will cause it to unblock if it's stuck 1780 * waiting for the destination. 1781 */ 1782 qemu_file_shutdown(ms->rp_state.from_dst_file); 1783 mark_source_rp_bad(ms); 1784 } 1785 trace_await_return_path_close_on_source_joining(); 1786 qemu_thread_join(&ms->rp_state.rp_thread); 1787 trace_await_return_path_close_on_source_close(); 1788 return ms->rp_state.error; 1789 } 1790 1791 /* 1792 * Switch from normal iteration to postcopy 1793 * Returns non-0 on error 1794 */ 1795 static int postcopy_start(MigrationState *ms, bool *old_vm_running) 1796 { 1797 int ret; 1798 QIOChannelBuffer *bioc; 1799 QEMUFile *fb; 1800 int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 1801 bool restart_block = false; 1802 migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE, 1803 MIGRATION_STATUS_POSTCOPY_ACTIVE); 1804 1805 trace_postcopy_start(); 1806 qemu_mutex_lock_iothread(); 1807 trace_postcopy_start_set_run(); 1808 1809 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); 1810 *old_vm_running = runstate_is_running(); 1811 global_state_store(); 1812 ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); 1813 if (ret < 0) { 1814 goto fail; 1815 } 1816 1817 ret = bdrv_inactivate_all(); 1818 if (ret < 0) { 1819 goto fail; 1820 } 1821 restart_block = true; 1822 1823 /* 1824 * Cause any non-postcopiable, but iterative devices to 1825 * send out their final data. 1826 */ 1827 qemu_savevm_state_complete_precopy(ms->to_dst_file, true, false); 1828 1829 /* 1830 * in Finish migrate and with the io-lock held everything should 1831 * be quiet, but we've potentially still got dirty pages and we 1832 * need to tell the destination to throw any pages it's already received 1833 * that are dirty 1834 */ 1835 if (migrate_postcopy_ram()) { 1836 if (ram_postcopy_send_discard_bitmap(ms)) { 1837 error_report("postcopy send discard bitmap failed"); 1838 goto fail; 1839 } 1840 } 1841 1842 /* 1843 * send rest of state - note things that are doing postcopy 1844 * will notice we're in POSTCOPY_ACTIVE and not actually 1845 * wrap their state up here 1846 */ 1847 qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX); 1848 if (migrate_postcopy_ram()) { 1849 /* Ping just for debugging, helps line traces up */ 1850 qemu_savevm_send_ping(ms->to_dst_file, 2); 1851 } 1852 1853 /* 1854 * While loading the device state we may trigger page transfer 1855 * requests and the fd must be free to process those, and thus 1856 * the destination must read the whole device state off the fd before 1857 * it starts processing it. Unfortunately the ad-hoc migration format 1858 * doesn't allow the destination to know the size to read without fully 1859 * parsing it through each devices load-state code (especially the open 1860 * coded devices that use get/put). 1861 * So we wrap the device state up in a package with a length at the start; 1862 * to do this we use a qemu_buf to hold the whole of the device state. 1863 */ 1864 bioc = qio_channel_buffer_new(4096); 1865 qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer"); 1866 fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc)); 1867 object_unref(OBJECT(bioc)); 1868 1869 /* 1870 * Make sure the receiver can get incoming pages before we send the rest 1871 * of the state 1872 */ 1873 qemu_savevm_send_postcopy_listen(fb); 1874 1875 qemu_savevm_state_complete_precopy(fb, false, false); 1876 if (migrate_postcopy_ram()) { 1877 qemu_savevm_send_ping(fb, 3); 1878 } 1879 1880 qemu_savevm_send_postcopy_run(fb); 1881 1882 /* <><> end of stuff going into the package */ 1883 1884 /* Last point of recovery; as soon as we send the package the destination 1885 * can open devices and potentially start running. 1886 * Lets just check again we've not got any errors. 1887 */ 1888 ret = qemu_file_get_error(ms->to_dst_file); 1889 if (ret) { 1890 error_report("postcopy_start: Migration stream errored (pre package)"); 1891 goto fail_closefb; 1892 } 1893 1894 restart_block = false; 1895 1896 /* Now send that blob */ 1897 if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) { 1898 goto fail_closefb; 1899 } 1900 qemu_fclose(fb); 1901 1902 /* Send a notify to give a chance for anything that needs to happen 1903 * at the transition to postcopy and after the device state; in particular 1904 * spice needs to trigger a transition now 1905 */ 1906 ms->postcopy_after_devices = true; 1907 notifier_list_notify(&migration_state_notifiers, ms); 1908 1909 ms->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop; 1910 1911 qemu_mutex_unlock_iothread(); 1912 1913 if (migrate_postcopy_ram()) { 1914 /* 1915 * Although this ping is just for debug, it could potentially be 1916 * used for getting a better measurement of downtime at the source. 1917 */ 1918 qemu_savevm_send_ping(ms->to_dst_file, 4); 1919 } 1920 1921 if (migrate_release_ram()) { 1922 ram_postcopy_migrated_memory_release(ms); 1923 } 1924 1925 ret = qemu_file_get_error(ms->to_dst_file); 1926 if (ret) { 1927 error_report("postcopy_start: Migration stream errored"); 1928 migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, 1929 MIGRATION_STATUS_FAILED); 1930 } 1931 1932 return ret; 1933 1934 fail_closefb: 1935 qemu_fclose(fb); 1936 fail: 1937 migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, 1938 MIGRATION_STATUS_FAILED); 1939 if (restart_block) { 1940 /* A failure happened early enough that we know the destination hasn't 1941 * accessed block devices, so we're safe to recover. 1942 */ 1943 Error *local_err = NULL; 1944 1945 bdrv_invalidate_cache_all(&local_err); 1946 if (local_err) { 1947 error_report_err(local_err); 1948 } 1949 } 1950 qemu_mutex_unlock_iothread(); 1951 return -1; 1952 } 1953 1954 /** 1955 * migration_completion: Used by migration_thread when there's not much left. 1956 * The caller 'breaks' the loop when this returns. 1957 * 1958 * @s: Current migration state 1959 * @current_active_state: The migration state we expect to be in 1960 * @*old_vm_running: Pointer to old_vm_running flag 1961 * @*start_time: Pointer to time to update 1962 */ 1963 static void migration_completion(MigrationState *s, int current_active_state, 1964 bool *old_vm_running, 1965 int64_t *start_time) 1966 { 1967 int ret; 1968 1969 if (s->state == MIGRATION_STATUS_ACTIVE) { 1970 qemu_mutex_lock_iothread(); 1971 *start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 1972 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); 1973 *old_vm_running = runstate_is_running(); 1974 ret = global_state_store(); 1975 1976 if (!ret) { 1977 bool inactivate = !migrate_colo_enabled(); 1978 ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); 1979 if (ret >= 0) { 1980 qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); 1981 ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, 1982 inactivate); 1983 } 1984 if (inactivate && ret >= 0) { 1985 s->block_inactive = true; 1986 } 1987 } 1988 qemu_mutex_unlock_iothread(); 1989 1990 if (ret < 0) { 1991 goto fail; 1992 } 1993 } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 1994 trace_migration_completion_postcopy_end(); 1995 1996 qemu_savevm_state_complete_postcopy(s->to_dst_file); 1997 trace_migration_completion_postcopy_end_after_complete(); 1998 } 1999 2000 /* 2001 * If rp was opened we must clean up the thread before 2002 * cleaning everything else up (since if there are no failures 2003 * it will wait for the destination to send it's status in 2004 * a SHUT command). 2005 */ 2006 if (s->rp_state.from_dst_file) { 2007 int rp_error; 2008 trace_migration_return_path_end_before(); 2009 rp_error = await_return_path_close_on_source(s); 2010 trace_migration_return_path_end_after(rp_error); 2011 if (rp_error) { 2012 goto fail_invalidate; 2013 } 2014 } 2015 2016 if (qemu_file_get_error(s->to_dst_file)) { 2017 trace_migration_completion_file_err(); 2018 goto fail_invalidate; 2019 } 2020 2021 if (!migrate_colo_enabled()) { 2022 migrate_set_state(&s->state, current_active_state, 2023 MIGRATION_STATUS_COMPLETED); 2024 } 2025 2026 return; 2027 2028 fail_invalidate: 2029 /* If not doing postcopy, vm_start() will be called: let's regain 2030 * control on images. 2031 */ 2032 if (s->state == MIGRATION_STATUS_ACTIVE) { 2033 Error *local_err = NULL; 2034 2035 qemu_mutex_lock_iothread(); 2036 bdrv_invalidate_cache_all(&local_err); 2037 if (local_err) { 2038 error_report_err(local_err); 2039 } else { 2040 s->block_inactive = false; 2041 } 2042 qemu_mutex_unlock_iothread(); 2043 } 2044 2045 fail: 2046 migrate_set_state(&s->state, current_active_state, 2047 MIGRATION_STATUS_FAILED); 2048 } 2049 2050 bool migrate_colo_enabled(void) 2051 { 2052 MigrationState *s = migrate_get_current(); 2053 return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO]; 2054 } 2055 2056 /* 2057 * Master migration thread on the source VM. 2058 * It drives the migration and pumps the data down the outgoing channel. 2059 */ 2060 static void *migration_thread(void *opaque) 2061 { 2062 MigrationState *s = opaque; 2063 /* Used by the bandwidth calcs, updated later */ 2064 int64_t initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 2065 int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST); 2066 int64_t initial_bytes = 0; 2067 /* 2068 * The final stage happens when the remaining data is smaller than 2069 * this threshold; it's calculated from the requested downtime and 2070 * measured bandwidth 2071 */ 2072 int64_t threshold_size = 0; 2073 int64_t start_time = initial_time; 2074 int64_t end_time; 2075 bool old_vm_running = false; 2076 bool entered_postcopy = false; 2077 /* The active state we expect to be in; ACTIVE or POSTCOPY_ACTIVE */ 2078 enum MigrationStatus current_active_state = MIGRATION_STATUS_ACTIVE; 2079 bool enable_colo = migrate_colo_enabled(); 2080 2081 rcu_register_thread(); 2082 2083 qemu_savevm_state_header(s->to_dst_file); 2084 2085 /* 2086 * If we opened the return path, we need to make sure dst has it 2087 * opened as well. 2088 */ 2089 if (s->rp_state.from_dst_file) { 2090 /* Now tell the dest that it should open its end so it can reply */ 2091 qemu_savevm_send_open_return_path(s->to_dst_file); 2092 2093 /* And do a ping that will make stuff easier to debug */ 2094 qemu_savevm_send_ping(s->to_dst_file, 1); 2095 } 2096 2097 if (migrate_postcopy()) { 2098 /* 2099 * Tell the destination that we *might* want to do postcopy later; 2100 * if the other end can't do postcopy it should fail now, nice and 2101 * early. 2102 */ 2103 qemu_savevm_send_postcopy_advise(s->to_dst_file); 2104 } 2105 2106 qemu_savevm_state_setup(s->to_dst_file); 2107 2108 s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; 2109 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 2110 MIGRATION_STATUS_ACTIVE); 2111 2112 trace_migration_thread_setup_complete(); 2113 2114 while (s->state == MIGRATION_STATUS_ACTIVE || 2115 s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 2116 int64_t current_time; 2117 uint64_t pending_size; 2118 2119 if (!qemu_file_rate_limit(s->to_dst_file)) { 2120 uint64_t pend_post, pend_nonpost; 2121 2122 qemu_savevm_state_pending(s->to_dst_file, threshold_size, 2123 &pend_nonpost, &pend_post); 2124 pending_size = pend_nonpost + pend_post; 2125 trace_migrate_pending(pending_size, threshold_size, 2126 pend_post, pend_nonpost); 2127 if (pending_size && pending_size >= threshold_size) { 2128 /* Still a significant amount to transfer */ 2129 2130 if (migrate_postcopy() && 2131 s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE && 2132 pend_nonpost <= threshold_size && 2133 atomic_read(&s->start_postcopy)) { 2134 2135 if (!postcopy_start(s, &old_vm_running)) { 2136 current_active_state = MIGRATION_STATUS_POSTCOPY_ACTIVE; 2137 entered_postcopy = true; 2138 } 2139 2140 continue; 2141 } 2142 /* Just another iteration step */ 2143 qemu_savevm_state_iterate(s->to_dst_file, entered_postcopy); 2144 } else { 2145 trace_migration_thread_low_pending(pending_size); 2146 migration_completion(s, current_active_state, 2147 &old_vm_running, &start_time); 2148 break; 2149 } 2150 } 2151 2152 if (qemu_file_get_error(s->to_dst_file)) { 2153 migrate_set_state(&s->state, current_active_state, 2154 MIGRATION_STATUS_FAILED); 2155 trace_migration_thread_file_err(); 2156 break; 2157 } 2158 current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 2159 if (current_time >= initial_time + BUFFER_DELAY) { 2160 uint64_t transferred_bytes = qemu_ftell(s->to_dst_file) - 2161 initial_bytes; 2162 uint64_t time_spent = current_time - initial_time; 2163 double bandwidth = (double)transferred_bytes / time_spent; 2164 threshold_size = bandwidth * s->parameters.downtime_limit; 2165 2166 s->mbps = (((double) transferred_bytes * 8.0) / 2167 ((double) time_spent / 1000.0)) / 1000.0 / 1000.0; 2168 2169 trace_migrate_transferred(transferred_bytes, time_spent, 2170 bandwidth, threshold_size); 2171 /* if we haven't sent anything, we don't want to recalculate 2172 10000 is a small enough number for our purposes */ 2173 if (ram_counters.dirty_pages_rate && transferred_bytes > 10000) { 2174 s->expected_downtime = ram_counters.dirty_pages_rate * 2175 qemu_target_page_size() / bandwidth; 2176 } 2177 2178 qemu_file_reset_rate_limit(s->to_dst_file); 2179 initial_time = current_time; 2180 initial_bytes = qemu_ftell(s->to_dst_file); 2181 } 2182 if (qemu_file_rate_limit(s->to_dst_file)) { 2183 /* usleep expects microseconds */ 2184 g_usleep((initial_time + BUFFER_DELAY - current_time)*1000); 2185 } 2186 } 2187 2188 trace_migration_thread_after_loop(); 2189 /* If we enabled cpu throttling for auto-converge, turn it off. */ 2190 cpu_throttle_stop(); 2191 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 2192 2193 qemu_mutex_lock_iothread(); 2194 /* 2195 * The resource has been allocated by migration will be reused in COLO 2196 * process, so don't release them. 2197 */ 2198 if (!enable_colo) { 2199 qemu_savevm_state_cleanup(); 2200 } 2201 if (s->state == MIGRATION_STATUS_COMPLETED) { 2202 uint64_t transferred_bytes = qemu_ftell(s->to_dst_file); 2203 s->total_time = end_time - s->total_time; 2204 if (!entered_postcopy) { 2205 s->downtime = end_time - start_time; 2206 } 2207 if (s->total_time) { 2208 s->mbps = (((double) transferred_bytes * 8.0) / 2209 ((double) s->total_time)) / 1000; 2210 } 2211 runstate_set(RUN_STATE_POSTMIGRATE); 2212 } else { 2213 if (s->state == MIGRATION_STATUS_ACTIVE && enable_colo) { 2214 migrate_start_colo_process(s); 2215 qemu_savevm_state_cleanup(); 2216 /* 2217 * Fixme: we will run VM in COLO no matter its old running state. 2218 * After exited COLO, we will keep running. 2219 */ 2220 old_vm_running = true; 2221 } 2222 if (old_vm_running && !entered_postcopy) { 2223 vm_start(); 2224 } else { 2225 if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { 2226 runstate_set(RUN_STATE_POSTMIGRATE); 2227 } 2228 } 2229 } 2230 qemu_bh_schedule(s->cleanup_bh); 2231 qemu_mutex_unlock_iothread(); 2232 2233 rcu_unregister_thread(); 2234 return NULL; 2235 } 2236 2237 void migrate_fd_connect(MigrationState *s) 2238 { 2239 s->expected_downtime = s->parameters.downtime_limit; 2240 s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s); 2241 2242 qemu_file_set_blocking(s->to_dst_file, true); 2243 qemu_file_set_rate_limit(s->to_dst_file, 2244 s->parameters.max_bandwidth / XFER_LIMIT_RATIO); 2245 2246 /* Notify before starting migration thread */ 2247 notifier_list_notify(&migration_state_notifiers, s); 2248 2249 /* 2250 * Open the return path. For postcopy, it is used exclusively. For 2251 * precopy, only if user specified "return-path" capability would 2252 * QEMU uses the return path. 2253 */ 2254 if (migrate_postcopy_ram() || migrate_use_return_path()) { 2255 if (open_return_path_on_source(s)) { 2256 error_report("Unable to open return-path for postcopy"); 2257 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 2258 MIGRATION_STATUS_FAILED); 2259 migrate_fd_cleanup(s); 2260 return; 2261 } 2262 } 2263 2264 if (multifd_save_setup() != 0) { 2265 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 2266 MIGRATION_STATUS_FAILED); 2267 migrate_fd_cleanup(s); 2268 return; 2269 } 2270 qemu_thread_create(&s->thread, "live_migration", migration_thread, s, 2271 QEMU_THREAD_JOINABLE); 2272 s->migration_thread_running = true; 2273 } 2274 2275 void migration_global_dump(Monitor *mon) 2276 { 2277 MigrationState *ms = migrate_get_current(); 2278 2279 monitor_printf(mon, "globals: store-global-state=%d, only_migratable=%d, " 2280 "send-configuration=%d, send-section-footer=%d\n", 2281 ms->store_global_state, ms->only_migratable, 2282 ms->send_configuration, ms->send_section_footer); 2283 } 2284 2285 #define DEFINE_PROP_MIG_CAP(name, x) \ 2286 DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false) 2287 2288 static Property migration_properties[] = { 2289 DEFINE_PROP_BOOL("store-global-state", MigrationState, 2290 store_global_state, true), 2291 DEFINE_PROP_BOOL("only-migratable", MigrationState, only_migratable, false), 2292 DEFINE_PROP_BOOL("send-configuration", MigrationState, 2293 send_configuration, true), 2294 DEFINE_PROP_BOOL("send-section-footer", MigrationState, 2295 send_section_footer, true), 2296 2297 /* Migration parameters */ 2298 DEFINE_PROP_INT64("x-compress-level", MigrationState, 2299 parameters.compress_level, 2300 DEFAULT_MIGRATE_COMPRESS_LEVEL), 2301 DEFINE_PROP_INT64("x-compress-threads", MigrationState, 2302 parameters.compress_threads, 2303 DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT), 2304 DEFINE_PROP_INT64("x-decompress-threads", MigrationState, 2305 parameters.decompress_threads, 2306 DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), 2307 DEFINE_PROP_INT64("x-cpu-throttle-initial", MigrationState, 2308 parameters.cpu_throttle_initial, 2309 DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), 2310 DEFINE_PROP_INT64("x-cpu-throttle-increment", MigrationState, 2311 parameters.cpu_throttle_increment, 2312 DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), 2313 DEFINE_PROP_INT64("x-max-bandwidth", MigrationState, 2314 parameters.max_bandwidth, MAX_THROTTLE), 2315 DEFINE_PROP_INT64("x-downtime-limit", MigrationState, 2316 parameters.downtime_limit, 2317 DEFAULT_MIGRATE_SET_DOWNTIME), 2318 DEFINE_PROP_INT64("x-checkpoint-delay", MigrationState, 2319 parameters.x_checkpoint_delay, 2320 DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), 2321 DEFINE_PROP_INT64("x-multifd-channels", MigrationState, 2322 parameters.x_multifd_channels, 2323 DEFAULT_MIGRATE_MULTIFD_CHANNELS), 2324 DEFINE_PROP_INT64("x-multifd-page-count", MigrationState, 2325 parameters.x_multifd_page_count, 2326 DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT), 2327 2328 /* Migration capabilities */ 2329 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), 2330 DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL), 2331 DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE), 2332 DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS), 2333 DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS), 2334 DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS), 2335 DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM), 2336 DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO), 2337 DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM), 2338 DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), 2339 DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), 2340 DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_X_MULTIFD), 2341 2342 DEFINE_PROP_END_OF_LIST(), 2343 }; 2344 2345 static void migration_class_init(ObjectClass *klass, void *data) 2346 { 2347 DeviceClass *dc = DEVICE_CLASS(klass); 2348 2349 dc->user_creatable = false; 2350 dc->props = migration_properties; 2351 } 2352 2353 static void migration_instance_finalize(Object *obj) 2354 { 2355 MigrationState *ms = MIGRATION_OBJ(obj); 2356 MigrationParameters *params = &ms->parameters; 2357 2358 g_free(params->tls_hostname); 2359 g_free(params->tls_creds); 2360 } 2361 2362 static void migration_instance_init(Object *obj) 2363 { 2364 MigrationState *ms = MIGRATION_OBJ(obj); 2365 MigrationParameters *params = &ms->parameters; 2366 2367 ms->state = MIGRATION_STATUS_NONE; 2368 ms->xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE; 2369 ms->mbps = -1; 2370 2371 params->tls_hostname = g_strdup(""); 2372 params->tls_creds = g_strdup(""); 2373 2374 /* Set has_* up only for parameter checks */ 2375 params->has_compress_level = true; 2376 params->has_compress_threads = true; 2377 params->has_decompress_threads = true; 2378 params->has_cpu_throttle_initial = true; 2379 params->has_cpu_throttle_increment = true; 2380 params->has_max_bandwidth = true; 2381 params->has_downtime_limit = true; 2382 params->has_x_checkpoint_delay = true; 2383 params->has_block_incremental = true; 2384 params->has_x_multifd_channels = true; 2385 params->has_x_multifd_page_count = true; 2386 } 2387 2388 /* 2389 * Return true if check pass, false otherwise. Error will be put 2390 * inside errp if provided. 2391 */ 2392 static bool migration_object_check(MigrationState *ms, Error **errp) 2393 { 2394 MigrationCapabilityStatusList *head = NULL; 2395 /* Assuming all off */ 2396 bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret; 2397 int i; 2398 2399 if (!migrate_params_check(&ms->parameters, errp)) { 2400 return false; 2401 } 2402 2403 for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { 2404 if (ms->enabled_capabilities[i]) { 2405 head = migrate_cap_add(head, i, true); 2406 } 2407 } 2408 2409 ret = migrate_caps_check(cap_list, head, errp); 2410 2411 /* It works with head == NULL */ 2412 qapi_free_MigrationCapabilityStatusList(head); 2413 2414 return ret; 2415 } 2416 2417 static const TypeInfo migration_type = { 2418 .name = TYPE_MIGRATION, 2419 /* 2420 * NOTE: TYPE_MIGRATION is not really a device, as the object is 2421 * not created using qdev_create(), it is not attached to the qdev 2422 * device tree, and it is never realized. 2423 * 2424 * TODO: Make this TYPE_OBJECT once QOM provides something like 2425 * TYPE_DEVICE's "-global" properties. 2426 */ 2427 .parent = TYPE_DEVICE, 2428 .class_init = migration_class_init, 2429 .class_size = sizeof(MigrationClass), 2430 .instance_size = sizeof(MigrationState), 2431 .instance_init = migration_instance_init, 2432 .instance_finalize = migration_instance_finalize, 2433 }; 2434 2435 static void register_migration_types(void) 2436 { 2437 type_register_static(&migration_type); 2438 } 2439 2440 type_init(register_migration_types); 2441