1 /* 2 * QEMU live migration 3 * 4 * Copyright IBM, Corp. 2008 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 * Contributions after 2012-01-13 are licensed under the terms of the 13 * GNU GPL, version 2 or (at your option) any later version. 14 */ 15 16 #include "qemu/osdep.h" 17 #include "qemu/cutils.h" 18 #include "qemu/error-report.h" 19 #include "qemu/main-loop.h" 20 #include "migration/blocker.h" 21 #include "exec.h" 22 #include "fd.h" 23 #include "socket.h" 24 #include "sysemu/runstate.h" 25 #include "sysemu/sysemu.h" 26 #include "sysemu/cpu-throttle.h" 27 #include "rdma.h" 28 #include "ram.h" 29 #include "migration/global_state.h" 30 #include "migration/misc.h" 31 #include "migration.h" 32 #include "savevm.h" 33 #include "qemu-file-channel.h" 34 #include "qemu-file.h" 35 #include "migration/vmstate.h" 36 #include "block/block.h" 37 #include "qapi/error.h" 38 #include "qapi/clone-visitor.h" 39 #include "qapi/qapi-visit-migration.h" 40 #include "qapi/qapi-visit-sockets.h" 41 #include "qapi/qapi-commands-migration.h" 42 #include "qapi/qapi-events-migration.h" 43 #include "qapi/qmp/qerror.h" 44 #include "qapi/qmp/qnull.h" 45 #include "qemu/rcu.h" 46 #include "block.h" 47 #include "postcopy-ram.h" 48 #include "qemu/thread.h" 49 #include "trace.h" 50 #include "exec/target_page.h" 51 #include "io/channel-buffer.h" 52 #include "migration/colo.h" 53 #include "hw/boards.h" 54 #include "hw/qdev-properties.h" 55 #include "hw/qdev-properties-system.h" 56 #include "monitor/monitor.h" 57 #include "net/announce.h" 58 #include "qemu/queue.h" 59 #include "multifd.h" 60 #include "qemu/yank.h" 61 #include "sysemu/cpus.h" 62 #include "yank_functions.h" 63 64 #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ 65 66 /* Amount of time to allocate to each "chunk" of bandwidth-throttled 67 * data. */ 68 #define BUFFER_DELAY 100 69 #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) 70 71 /* Time in milliseconds we are allowed to stop the source, 72 * for sending the last part */ 73 #define DEFAULT_MIGRATE_SET_DOWNTIME 300 74 75 /* Maximum migrate downtime set to 2000 seconds */ 76 #define MAX_MIGRATE_DOWNTIME_SECONDS 2000 77 #define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) 78 79 /* Default compression thread count */ 80 #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 81 /* Default decompression thread count, usually decompression is at 82 * least 4 times as fast as compression.*/ 83 #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 84 /*0: means nocompress, 1: best speed, ... 9: best compress ratio */ 85 #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 86 /* Define default autoconverge cpu throttle migration parameters */ 87 #define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50 88 #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 89 #define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10 90 #define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99 91 92 /* Migration XBZRLE default cache size */ 93 #define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) 94 95 /* The delay time (in ms) between two COLO checkpoints */ 96 #define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) 97 #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 98 #define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE 99 /* 0: means nocompress, 1: best speed, ... 9: best compress ratio */ 100 #define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1 101 /* 0: means nocompress, 1: best speed, ... 20: best compress ratio */ 102 #define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1 103 104 /* Background transfer rate for postcopy, 0 means unlimited, note 105 * that page requests can still exceed this limit. 106 */ 107 #define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0 108 109 /* 110 * Parameters for self_announce_delay giving a stream of RARP/ARP 111 * packets after migration. 112 */ 113 #define DEFAULT_MIGRATE_ANNOUNCE_INITIAL 50 114 #define DEFAULT_MIGRATE_ANNOUNCE_MAX 550 115 #define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS 5 116 #define DEFAULT_MIGRATE_ANNOUNCE_STEP 100 117 118 static NotifierList migration_state_notifiers = 119 NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); 120 121 /* Messages sent on the return path from destination to source */ 122 enum mig_rp_message_type { 123 MIG_RP_MSG_INVALID = 0, /* Must be 0 */ 124 MIG_RP_MSG_SHUT, /* sibling will not send any more RP messages */ 125 MIG_RP_MSG_PONG, /* Response to a PING; data (seq: be32 ) */ 126 127 MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */ 128 MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */ 129 MIG_RP_MSG_RECV_BITMAP, /* send recved_bitmap back to source */ 130 MIG_RP_MSG_RESUME_ACK, /* tell source that we are ready to resume */ 131 132 MIG_RP_MSG_MAX 133 }; 134 135 /* Migration capabilities set */ 136 struct MigrateCapsSet { 137 int size; /* Capability set size */ 138 MigrationCapability caps[]; /* Variadic array of capabilities */ 139 }; 140 typedef struct MigrateCapsSet MigrateCapsSet; 141 142 /* Define and initialize MigrateCapsSet */ 143 #define INITIALIZE_MIGRATE_CAPS_SET(_name, ...) \ 144 MigrateCapsSet _name = { \ 145 .size = sizeof((int []) { __VA_ARGS__ }) / sizeof(int), \ 146 .caps = { __VA_ARGS__ } \ 147 } 148 149 /* Background-snapshot compatibility check list */ 150 static const 151 INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, 152 MIGRATION_CAPABILITY_POSTCOPY_RAM, 153 MIGRATION_CAPABILITY_DIRTY_BITMAPS, 154 MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME, 155 MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE, 156 MIGRATION_CAPABILITY_RETURN_PATH, 157 MIGRATION_CAPABILITY_MULTIFD, 158 MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER, 159 MIGRATION_CAPABILITY_AUTO_CONVERGE, 160 MIGRATION_CAPABILITY_RELEASE_RAM, 161 MIGRATION_CAPABILITY_RDMA_PIN_ALL, 162 MIGRATION_CAPABILITY_COMPRESS, 163 MIGRATION_CAPABILITY_XBZRLE, 164 MIGRATION_CAPABILITY_X_COLO, 165 MIGRATION_CAPABILITY_VALIDATE_UUID); 166 167 /* When we add fault tolerance, we could have several 168 migrations at once. For now we don't need to add 169 dynamic creation of migration */ 170 171 static MigrationState *current_migration; 172 static MigrationIncomingState *current_incoming; 173 174 static GSList *migration_blockers; 175 176 static bool migration_object_check(MigrationState *ms, Error **errp); 177 static int migration_maybe_pause(MigrationState *s, 178 int *current_active_state, 179 int new_state); 180 static void migrate_fd_cancel(MigrationState *s); 181 182 static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp) 183 { 184 uintptr_t a = (uintptr_t) ap, b = (uintptr_t) bp; 185 186 return (a > b) - (a < b); 187 } 188 189 void migration_object_init(void) 190 { 191 /* This can only be called once. */ 192 assert(!current_migration); 193 current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION)); 194 195 /* 196 * Init the migrate incoming object as well no matter whether 197 * we'll use it or not. 198 */ 199 assert(!current_incoming); 200 current_incoming = g_new0(MigrationIncomingState, 1); 201 current_incoming->state = MIGRATION_STATUS_NONE; 202 current_incoming->postcopy_remote_fds = 203 g_array_new(FALSE, TRUE, sizeof(struct PostCopyFD)); 204 qemu_mutex_init(¤t_incoming->rp_mutex); 205 qemu_event_init(¤t_incoming->main_thread_load_event, false); 206 qemu_sem_init(¤t_incoming->postcopy_pause_sem_dst, 0); 207 qemu_sem_init(¤t_incoming->postcopy_pause_sem_fault, 0); 208 qemu_mutex_init(¤t_incoming->page_request_mutex); 209 current_incoming->page_requested = g_tree_new(page_request_addr_cmp); 210 211 migration_object_check(current_migration, &error_fatal); 212 213 blk_mig_init(); 214 ram_mig_init(); 215 dirty_bitmap_mig_init(); 216 } 217 218 void migration_cancel(const Error *error) 219 { 220 if (error) { 221 migrate_set_error(current_migration, error); 222 } 223 migrate_fd_cancel(current_migration); 224 } 225 226 void migration_shutdown(void) 227 { 228 /* 229 * Cancel the current migration - that will (eventually) 230 * stop the migration using this structure 231 */ 232 migration_cancel(NULL); 233 object_unref(OBJECT(current_migration)); 234 235 /* 236 * Cancel outgoing migration of dirty bitmaps. It should 237 * at least unref used block nodes. 238 */ 239 dirty_bitmap_mig_cancel_outgoing(); 240 241 /* 242 * Cancel incoming migration of dirty bitmaps. Dirty bitmaps 243 * are non-critical data, and their loss never considered as 244 * something serious. 245 */ 246 dirty_bitmap_mig_cancel_incoming(); 247 } 248 249 /* For outgoing */ 250 MigrationState *migrate_get_current(void) 251 { 252 /* This can only be called after the object created. */ 253 assert(current_migration); 254 return current_migration; 255 } 256 257 MigrationIncomingState *migration_incoming_get_current(void) 258 { 259 assert(current_incoming); 260 return current_incoming; 261 } 262 263 void migration_incoming_state_destroy(void) 264 { 265 struct MigrationIncomingState *mis = migration_incoming_get_current(); 266 267 if (mis->to_src_file) { 268 /* Tell source that we are done */ 269 migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0); 270 qemu_fclose(mis->to_src_file); 271 mis->to_src_file = NULL; 272 } 273 274 if (mis->from_src_file) { 275 migration_ioc_unregister_yank_from_file(mis->from_src_file); 276 qemu_fclose(mis->from_src_file); 277 mis->from_src_file = NULL; 278 } 279 if (mis->postcopy_remote_fds) { 280 g_array_free(mis->postcopy_remote_fds, TRUE); 281 mis->postcopy_remote_fds = NULL; 282 } 283 if (mis->transport_cleanup) { 284 mis->transport_cleanup(mis->transport_data); 285 } 286 287 qemu_event_reset(&mis->main_thread_load_event); 288 289 if (mis->page_requested) { 290 g_tree_destroy(mis->page_requested); 291 mis->page_requested = NULL; 292 } 293 294 if (mis->socket_address_list) { 295 qapi_free_SocketAddressList(mis->socket_address_list); 296 mis->socket_address_list = NULL; 297 } 298 299 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 300 } 301 302 static void migrate_generate_event(int new_state) 303 { 304 if (migrate_use_events()) { 305 qapi_event_send_migration(new_state); 306 } 307 } 308 309 static bool migrate_late_block_activate(void) 310 { 311 MigrationState *s; 312 313 s = migrate_get_current(); 314 315 return s->enabled_capabilities[ 316 MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; 317 } 318 319 /* 320 * Send a message on the return channel back to the source 321 * of the migration. 322 */ 323 static int migrate_send_rp_message(MigrationIncomingState *mis, 324 enum mig_rp_message_type message_type, 325 uint16_t len, void *data) 326 { 327 int ret = 0; 328 329 trace_migrate_send_rp_message((int)message_type, len); 330 QEMU_LOCK_GUARD(&mis->rp_mutex); 331 332 /* 333 * It's possible that the file handle got lost due to network 334 * failures. 335 */ 336 if (!mis->to_src_file) { 337 ret = -EIO; 338 return ret; 339 } 340 341 qemu_put_be16(mis->to_src_file, (unsigned int)message_type); 342 qemu_put_be16(mis->to_src_file, len); 343 qemu_put_buffer(mis->to_src_file, data, len); 344 qemu_fflush(mis->to_src_file); 345 346 /* It's possible that qemu file got error during sending */ 347 ret = qemu_file_get_error(mis->to_src_file); 348 349 return ret; 350 } 351 352 /* Request one page from the source VM at the given start address. 353 * rb: the RAMBlock to request the page in 354 * Start: Address offset within the RB 355 * Len: Length in bytes required - must be a multiple of pagesize 356 */ 357 int migrate_send_rp_message_req_pages(MigrationIncomingState *mis, 358 RAMBlock *rb, ram_addr_t start) 359 { 360 uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */ 361 size_t msglen = 12; /* start + len */ 362 size_t len = qemu_ram_pagesize(rb); 363 enum mig_rp_message_type msg_type; 364 const char *rbname; 365 int rbname_len; 366 367 *(uint64_t *)bufc = cpu_to_be64((uint64_t)start); 368 *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len); 369 370 /* 371 * We maintain the last ramblock that we requested for page. Note that we 372 * don't need locking because this function will only be called within the 373 * postcopy ram fault thread. 374 */ 375 if (rb != mis->last_rb) { 376 mis->last_rb = rb; 377 378 rbname = qemu_ram_get_idstr(rb); 379 rbname_len = strlen(rbname); 380 381 assert(rbname_len < 256); 382 383 bufc[msglen++] = rbname_len; 384 memcpy(bufc + msglen, rbname, rbname_len); 385 msglen += rbname_len; 386 msg_type = MIG_RP_MSG_REQ_PAGES_ID; 387 } else { 388 msg_type = MIG_RP_MSG_REQ_PAGES; 389 } 390 391 return migrate_send_rp_message(mis, msg_type, msglen, bufc); 392 } 393 394 int migrate_send_rp_req_pages(MigrationIncomingState *mis, 395 RAMBlock *rb, ram_addr_t start, uint64_t haddr) 396 { 397 void *aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, qemu_ram_pagesize(rb)); 398 bool received = false; 399 400 WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) { 401 received = ramblock_recv_bitmap_test_byte_offset(rb, start); 402 if (!received && !g_tree_lookup(mis->page_requested, aligned)) { 403 /* 404 * The page has not been received, and it's not yet in the page 405 * request list. Queue it. Set the value of element to 1, so that 406 * things like g_tree_lookup() will return TRUE (1) when found. 407 */ 408 g_tree_insert(mis->page_requested, aligned, (gpointer)1); 409 mis->page_requested_count++; 410 trace_postcopy_page_req_add(aligned, mis->page_requested_count); 411 } 412 } 413 414 /* 415 * If the page is there, skip sending the message. We don't even need the 416 * lock because as long as the page arrived, it'll be there forever. 417 */ 418 if (received) { 419 return 0; 420 } 421 422 return migrate_send_rp_message_req_pages(mis, rb, start); 423 } 424 425 static bool migration_colo_enabled; 426 bool migration_incoming_colo_enabled(void) 427 { 428 return migration_colo_enabled; 429 } 430 431 void migration_incoming_disable_colo(void) 432 { 433 ram_block_discard_disable(false); 434 migration_colo_enabled = false; 435 } 436 437 int migration_incoming_enable_colo(void) 438 { 439 if (ram_block_discard_disable(true)) { 440 error_report("COLO: cannot disable RAM discard"); 441 return -EBUSY; 442 } 443 migration_colo_enabled = true; 444 return 0; 445 } 446 447 void migrate_add_address(SocketAddress *address) 448 { 449 MigrationIncomingState *mis = migration_incoming_get_current(); 450 451 QAPI_LIST_PREPEND(mis->socket_address_list, 452 QAPI_CLONE(SocketAddress, address)); 453 } 454 455 static void qemu_start_incoming_migration(const char *uri, Error **errp) 456 { 457 const char *p = NULL; 458 459 migrate_protocol_allow_multifd(false); /* reset it anyway */ 460 qapi_event_send_migration(MIGRATION_STATUS_SETUP); 461 if (strstart(uri, "tcp:", &p) || 462 strstart(uri, "unix:", NULL) || 463 strstart(uri, "vsock:", NULL)) { 464 migrate_protocol_allow_multifd(true); 465 socket_start_incoming_migration(p ? p : uri, errp); 466 #ifdef CONFIG_RDMA 467 } else if (strstart(uri, "rdma:", &p)) { 468 rdma_start_incoming_migration(p, errp); 469 #endif 470 } else if (strstart(uri, "exec:", &p)) { 471 exec_start_incoming_migration(p, errp); 472 } else if (strstart(uri, "fd:", &p)) { 473 fd_start_incoming_migration(p, errp); 474 } else { 475 error_setg(errp, "unknown migration protocol: %s", uri); 476 } 477 } 478 479 static void process_incoming_migration_bh(void *opaque) 480 { 481 Error *local_err = NULL; 482 MigrationIncomingState *mis = opaque; 483 484 /* If capability late_block_activate is set: 485 * Only fire up the block code now if we're going to restart the 486 * VM, else 'cont' will do it. 487 * This causes file locking to happen; so we don't want it to happen 488 * unless we really are starting the VM. 489 */ 490 if (!migrate_late_block_activate() || 491 (autostart && (!global_state_received() || 492 global_state_get_runstate() == RUN_STATE_RUNNING))) { 493 /* Make sure all file formats flush their mutable metadata. 494 * If we get an error here, just don't restart the VM yet. */ 495 bdrv_invalidate_cache_all(&local_err); 496 if (local_err) { 497 error_report_err(local_err); 498 local_err = NULL; 499 autostart = false; 500 } 501 } 502 503 /* 504 * This must happen after all error conditions are dealt with and 505 * we're sure the VM is going to be running on this host. 506 */ 507 qemu_announce_self(&mis->announce_timer, migrate_announce_params()); 508 509 if (multifd_load_cleanup(&local_err) != 0) { 510 error_report_err(local_err); 511 autostart = false; 512 } 513 /* If global state section was not received or we are in running 514 state, we need to obey autostart. Any other state is set with 515 runstate_set. */ 516 517 dirty_bitmap_mig_before_vm_start(); 518 519 if (!global_state_received() || 520 global_state_get_runstate() == RUN_STATE_RUNNING) { 521 if (autostart) { 522 vm_start(); 523 } else { 524 runstate_set(RUN_STATE_PAUSED); 525 } 526 } else if (migration_incoming_colo_enabled()) { 527 migration_incoming_disable_colo(); 528 vm_start(); 529 } else { 530 runstate_set(global_state_get_runstate()); 531 } 532 /* 533 * This must happen after any state changes since as soon as an external 534 * observer sees this event they might start to prod at the VM assuming 535 * it's ready to use. 536 */ 537 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, 538 MIGRATION_STATUS_COMPLETED); 539 qemu_bh_delete(mis->bh); 540 migration_incoming_state_destroy(); 541 } 542 543 static void process_incoming_migration_co(void *opaque) 544 { 545 MigrationIncomingState *mis = migration_incoming_get_current(); 546 PostcopyState ps; 547 int ret; 548 Error *local_err = NULL; 549 550 assert(mis->from_src_file); 551 mis->migration_incoming_co = qemu_coroutine_self(); 552 mis->largest_page_size = qemu_ram_pagesize_largest(); 553 postcopy_state_set(POSTCOPY_INCOMING_NONE); 554 migrate_set_state(&mis->state, MIGRATION_STATUS_NONE, 555 MIGRATION_STATUS_ACTIVE); 556 ret = qemu_loadvm_state(mis->from_src_file); 557 558 ps = postcopy_state_get(); 559 trace_process_incoming_migration_co_end(ret, ps); 560 if (ps != POSTCOPY_INCOMING_NONE) { 561 if (ps == POSTCOPY_INCOMING_ADVISE) { 562 /* 563 * Where a migration had postcopy enabled (and thus went to advise) 564 * but managed to complete within the precopy period, we can use 565 * the normal exit. 566 */ 567 postcopy_ram_incoming_cleanup(mis); 568 } else if (ret >= 0) { 569 /* 570 * Postcopy was started, cleanup should happen at the end of the 571 * postcopy thread. 572 */ 573 trace_process_incoming_migration_co_postcopy_end_main(); 574 return; 575 } 576 /* Else if something went wrong then just fall out of the normal exit */ 577 } 578 579 /* we get COLO info, and know if we are in COLO mode */ 580 if (!ret && migration_incoming_colo_enabled()) { 581 /* Make sure all file formats flush their mutable metadata */ 582 bdrv_invalidate_cache_all(&local_err); 583 if (local_err) { 584 error_report_err(local_err); 585 goto fail; 586 } 587 588 qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming", 589 colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE); 590 mis->have_colo_incoming_thread = true; 591 qemu_coroutine_yield(); 592 593 /* Wait checkpoint incoming thread exit before free resource */ 594 qemu_thread_join(&mis->colo_incoming_thread); 595 /* We hold the global iothread lock, so it is safe here */ 596 colo_release_ram_cache(); 597 } 598 599 if (ret < 0) { 600 error_report("load of migration failed: %s", strerror(-ret)); 601 goto fail; 602 } 603 mis->bh = qemu_bh_new(process_incoming_migration_bh, mis); 604 qemu_bh_schedule(mis->bh); 605 mis->migration_incoming_co = NULL; 606 return; 607 fail: 608 local_err = NULL; 609 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, 610 MIGRATION_STATUS_FAILED); 611 qemu_fclose(mis->from_src_file); 612 if (multifd_load_cleanup(&local_err) != 0) { 613 error_report_err(local_err); 614 } 615 exit(EXIT_FAILURE); 616 } 617 618 /** 619 * migration_incoming_setup: Setup incoming migration 620 * @f: file for main migration channel 621 * @errp: where to put errors 622 * 623 * Returns: %true on success, %false on error. 624 */ 625 static bool migration_incoming_setup(QEMUFile *f, Error **errp) 626 { 627 MigrationIncomingState *mis = migration_incoming_get_current(); 628 629 if (multifd_load_setup(errp) != 0) { 630 return false; 631 } 632 633 if (!mis->from_src_file) { 634 mis->from_src_file = f; 635 } 636 qemu_file_set_blocking(f, false); 637 return true; 638 } 639 640 void migration_incoming_process(void) 641 { 642 Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL); 643 qemu_coroutine_enter(co); 644 } 645 646 /* Returns true if recovered from a paused migration, otherwise false */ 647 static bool postcopy_try_recover(QEMUFile *f) 648 { 649 MigrationIncomingState *mis = migration_incoming_get_current(); 650 651 if (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { 652 /* Resumed from a paused postcopy migration */ 653 654 mis->from_src_file = f; 655 /* Postcopy has standalone thread to do vm load */ 656 qemu_file_set_blocking(f, true); 657 658 /* Re-configure the return path */ 659 mis->to_src_file = qemu_file_get_return_path(f); 660 661 migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_PAUSED, 662 MIGRATION_STATUS_POSTCOPY_RECOVER); 663 664 /* 665 * Here, we only wake up the main loading thread (while the 666 * fault thread will still be waiting), so that we can receive 667 * commands from source now, and answer it if needed. The 668 * fault thread will be woken up afterwards until we are sure 669 * that source is ready to reply to page requests. 670 */ 671 qemu_sem_post(&mis->postcopy_pause_sem_dst); 672 return true; 673 } 674 675 return false; 676 } 677 678 void migration_fd_process_incoming(QEMUFile *f, Error **errp) 679 { 680 if (postcopy_try_recover(f)) { 681 return; 682 } 683 684 if (!migration_incoming_setup(f, errp)) { 685 return; 686 } 687 migration_incoming_process(); 688 } 689 690 void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) 691 { 692 MigrationIncomingState *mis = migration_incoming_get_current(); 693 Error *local_err = NULL; 694 bool start_migration; 695 696 if (!mis->from_src_file) { 697 /* The first connection (multifd may have multiple) */ 698 QEMUFile *f = qemu_fopen_channel_input(ioc); 699 700 /* If it's a recovery, we're done */ 701 if (postcopy_try_recover(f)) { 702 return; 703 } 704 705 if (!migration_incoming_setup(f, errp)) { 706 return; 707 } 708 709 /* 710 * Common migration only needs one channel, so we can start 711 * right now. Multifd needs more than one channel, we wait. 712 */ 713 start_migration = !migrate_use_multifd(); 714 } else { 715 /* Multiple connections */ 716 assert(migrate_use_multifd()); 717 start_migration = multifd_recv_new_channel(ioc, &local_err); 718 if (local_err) { 719 error_propagate(errp, local_err); 720 return; 721 } 722 } 723 724 if (start_migration) { 725 migration_incoming_process(); 726 } 727 } 728 729 /** 730 * @migration_has_all_channels: We have received all channels that we need 731 * 732 * Returns true when we have got connections to all the channels that 733 * we need for migration. 734 */ 735 bool migration_has_all_channels(void) 736 { 737 MigrationIncomingState *mis = migration_incoming_get_current(); 738 bool all_channels; 739 740 all_channels = multifd_recv_all_channels_created(); 741 742 return all_channels && mis->from_src_file != NULL; 743 } 744 745 /* 746 * Send a 'SHUT' message on the return channel with the given value 747 * to indicate that we've finished with the RP. Non-0 value indicates 748 * error. 749 */ 750 void migrate_send_rp_shut(MigrationIncomingState *mis, 751 uint32_t value) 752 { 753 uint32_t buf; 754 755 buf = cpu_to_be32(value); 756 migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf); 757 } 758 759 /* 760 * Send a 'PONG' message on the return channel with the given value 761 * (normally in response to a 'PING') 762 */ 763 void migrate_send_rp_pong(MigrationIncomingState *mis, 764 uint32_t value) 765 { 766 uint32_t buf; 767 768 buf = cpu_to_be32(value); 769 migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf); 770 } 771 772 void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis, 773 char *block_name) 774 { 775 char buf[512]; 776 int len; 777 int64_t res; 778 779 /* 780 * First, we send the header part. It contains only the len of 781 * idstr, and the idstr itself. 782 */ 783 len = strlen(block_name); 784 buf[0] = len; 785 memcpy(buf + 1, block_name, len); 786 787 if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) { 788 error_report("%s: MSG_RP_RECV_BITMAP only used for recovery", 789 __func__); 790 return; 791 } 792 793 migrate_send_rp_message(mis, MIG_RP_MSG_RECV_BITMAP, len + 1, buf); 794 795 /* 796 * Next, we dump the received bitmap to the stream. 797 * 798 * TODO: currently we are safe since we are the only one that is 799 * using the to_src_file handle (fault thread is still paused), 800 * and it's ok even not taking the mutex. However the best way is 801 * to take the lock before sending the message header, and release 802 * the lock after sending the bitmap. 803 */ 804 qemu_mutex_lock(&mis->rp_mutex); 805 res = ramblock_recv_bitmap_send(mis->to_src_file, block_name); 806 qemu_mutex_unlock(&mis->rp_mutex); 807 808 trace_migrate_send_rp_recv_bitmap(block_name, res); 809 } 810 811 void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value) 812 { 813 uint32_t buf; 814 815 buf = cpu_to_be32(value); 816 migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf); 817 } 818 819 MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) 820 { 821 MigrationCapabilityStatusList *head = NULL, **tail = &head; 822 MigrationCapabilityStatus *caps; 823 MigrationState *s = migrate_get_current(); 824 int i; 825 826 for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { 827 #ifndef CONFIG_LIVE_BLOCK_MIGRATION 828 if (i == MIGRATION_CAPABILITY_BLOCK) { 829 continue; 830 } 831 #endif 832 caps = g_malloc0(sizeof(*caps)); 833 caps->capability = i; 834 caps->state = s->enabled_capabilities[i]; 835 QAPI_LIST_APPEND(tail, caps); 836 } 837 838 return head; 839 } 840 841 MigrationParameters *qmp_query_migrate_parameters(Error **errp) 842 { 843 MigrationParameters *params; 844 MigrationState *s = migrate_get_current(); 845 846 /* TODO use QAPI_CLONE() instead of duplicating it inline */ 847 params = g_malloc0(sizeof(*params)); 848 params->has_compress_level = true; 849 params->compress_level = s->parameters.compress_level; 850 params->has_compress_threads = true; 851 params->compress_threads = s->parameters.compress_threads; 852 params->has_compress_wait_thread = true; 853 params->compress_wait_thread = s->parameters.compress_wait_thread; 854 params->has_decompress_threads = true; 855 params->decompress_threads = s->parameters.decompress_threads; 856 params->has_throttle_trigger_threshold = true; 857 params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold; 858 params->has_cpu_throttle_initial = true; 859 params->cpu_throttle_initial = s->parameters.cpu_throttle_initial; 860 params->has_cpu_throttle_increment = true; 861 params->cpu_throttle_increment = s->parameters.cpu_throttle_increment; 862 params->has_cpu_throttle_tailslow = true; 863 params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow; 864 params->has_tls_creds = true; 865 params->tls_creds = g_strdup(s->parameters.tls_creds); 866 params->has_tls_hostname = true; 867 params->tls_hostname = g_strdup(s->parameters.tls_hostname); 868 params->has_tls_authz = true; 869 params->tls_authz = g_strdup(s->parameters.tls_authz ? 870 s->parameters.tls_authz : ""); 871 params->has_max_bandwidth = true; 872 params->max_bandwidth = s->parameters.max_bandwidth; 873 params->has_downtime_limit = true; 874 params->downtime_limit = s->parameters.downtime_limit; 875 params->has_x_checkpoint_delay = true; 876 params->x_checkpoint_delay = s->parameters.x_checkpoint_delay; 877 params->has_block_incremental = true; 878 params->block_incremental = s->parameters.block_incremental; 879 params->has_multifd_channels = true; 880 params->multifd_channels = s->parameters.multifd_channels; 881 params->has_multifd_compression = true; 882 params->multifd_compression = s->parameters.multifd_compression; 883 params->has_multifd_zlib_level = true; 884 params->multifd_zlib_level = s->parameters.multifd_zlib_level; 885 params->has_multifd_zstd_level = true; 886 params->multifd_zstd_level = s->parameters.multifd_zstd_level; 887 params->has_xbzrle_cache_size = true; 888 params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; 889 params->has_max_postcopy_bandwidth = true; 890 params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth; 891 params->has_max_cpu_throttle = true; 892 params->max_cpu_throttle = s->parameters.max_cpu_throttle; 893 params->has_announce_initial = true; 894 params->announce_initial = s->parameters.announce_initial; 895 params->has_announce_max = true; 896 params->announce_max = s->parameters.announce_max; 897 params->has_announce_rounds = true; 898 params->announce_rounds = s->parameters.announce_rounds; 899 params->has_announce_step = true; 900 params->announce_step = s->parameters.announce_step; 901 902 if (s->parameters.has_block_bitmap_mapping) { 903 params->has_block_bitmap_mapping = true; 904 params->block_bitmap_mapping = 905 QAPI_CLONE(BitmapMigrationNodeAliasList, 906 s->parameters.block_bitmap_mapping); 907 } 908 909 return params; 910 } 911 912 AnnounceParameters *migrate_announce_params(void) 913 { 914 static AnnounceParameters ap; 915 916 MigrationState *s = migrate_get_current(); 917 918 ap.initial = s->parameters.announce_initial; 919 ap.max = s->parameters.announce_max; 920 ap.rounds = s->parameters.announce_rounds; 921 ap.step = s->parameters.announce_step; 922 923 return ≈ 924 } 925 926 /* 927 * Return true if we're already in the middle of a migration 928 * (i.e. any of the active or setup states) 929 */ 930 bool migration_is_setup_or_active(int state) 931 { 932 switch (state) { 933 case MIGRATION_STATUS_ACTIVE: 934 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 935 case MIGRATION_STATUS_POSTCOPY_PAUSED: 936 case MIGRATION_STATUS_POSTCOPY_RECOVER: 937 case MIGRATION_STATUS_SETUP: 938 case MIGRATION_STATUS_PRE_SWITCHOVER: 939 case MIGRATION_STATUS_DEVICE: 940 case MIGRATION_STATUS_WAIT_UNPLUG: 941 case MIGRATION_STATUS_COLO: 942 return true; 943 944 default: 945 return false; 946 947 } 948 } 949 950 bool migration_is_running(int state) 951 { 952 switch (state) { 953 case MIGRATION_STATUS_ACTIVE: 954 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 955 case MIGRATION_STATUS_POSTCOPY_PAUSED: 956 case MIGRATION_STATUS_POSTCOPY_RECOVER: 957 case MIGRATION_STATUS_SETUP: 958 case MIGRATION_STATUS_PRE_SWITCHOVER: 959 case MIGRATION_STATUS_DEVICE: 960 case MIGRATION_STATUS_WAIT_UNPLUG: 961 case MIGRATION_STATUS_CANCELLING: 962 return true; 963 964 default: 965 return false; 966 967 } 968 } 969 970 static void populate_time_info(MigrationInfo *info, MigrationState *s) 971 { 972 info->has_status = true; 973 info->has_setup_time = true; 974 info->setup_time = s->setup_time; 975 if (s->state == MIGRATION_STATUS_COMPLETED) { 976 info->has_total_time = true; 977 info->total_time = s->total_time; 978 info->has_downtime = true; 979 info->downtime = s->downtime; 980 } else { 981 info->has_total_time = true; 982 info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - 983 s->start_time; 984 info->has_expected_downtime = true; 985 info->expected_downtime = s->expected_downtime; 986 } 987 } 988 989 static void populate_ram_info(MigrationInfo *info, MigrationState *s) 990 { 991 info->has_ram = true; 992 info->ram = g_malloc0(sizeof(*info->ram)); 993 info->ram->transferred = ram_counters.transferred; 994 info->ram->total = ram_bytes_total(); 995 info->ram->duplicate = ram_counters.duplicate; 996 /* legacy value. It is not used anymore */ 997 info->ram->skipped = 0; 998 info->ram->normal = ram_counters.normal; 999 info->ram->normal_bytes = ram_counters.normal * 1000 qemu_target_page_size(); 1001 info->ram->mbps = s->mbps; 1002 info->ram->dirty_sync_count = ram_counters.dirty_sync_count; 1003 info->ram->postcopy_requests = ram_counters.postcopy_requests; 1004 info->ram->page_size = qemu_target_page_size(); 1005 info->ram->multifd_bytes = ram_counters.multifd_bytes; 1006 info->ram->pages_per_second = s->pages_per_second; 1007 1008 if (migrate_use_xbzrle()) { 1009 info->has_xbzrle_cache = true; 1010 info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache)); 1011 info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size(); 1012 info->xbzrle_cache->bytes = xbzrle_counters.bytes; 1013 info->xbzrle_cache->pages = xbzrle_counters.pages; 1014 info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss; 1015 info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate; 1016 info->xbzrle_cache->encoding_rate = xbzrle_counters.encoding_rate; 1017 info->xbzrle_cache->overflow = xbzrle_counters.overflow; 1018 } 1019 1020 if (migrate_use_compression()) { 1021 info->has_compression = true; 1022 info->compression = g_malloc0(sizeof(*info->compression)); 1023 info->compression->pages = compression_counters.pages; 1024 info->compression->busy = compression_counters.busy; 1025 info->compression->busy_rate = compression_counters.busy_rate; 1026 info->compression->compressed_size = 1027 compression_counters.compressed_size; 1028 info->compression->compression_rate = 1029 compression_counters.compression_rate; 1030 } 1031 1032 if (cpu_throttle_active()) { 1033 info->has_cpu_throttle_percentage = true; 1034 info->cpu_throttle_percentage = cpu_throttle_get_percentage(); 1035 } 1036 1037 if (s->state != MIGRATION_STATUS_COMPLETED) { 1038 info->ram->remaining = ram_bytes_remaining(); 1039 info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate; 1040 } 1041 } 1042 1043 static void populate_disk_info(MigrationInfo *info) 1044 { 1045 if (blk_mig_active()) { 1046 info->has_disk = true; 1047 info->disk = g_malloc0(sizeof(*info->disk)); 1048 info->disk->transferred = blk_mig_bytes_transferred(); 1049 info->disk->remaining = blk_mig_bytes_remaining(); 1050 info->disk->total = blk_mig_bytes_total(); 1051 } 1052 } 1053 1054 static void fill_source_migration_info(MigrationInfo *info) 1055 { 1056 MigrationState *s = migrate_get_current(); 1057 GSList *cur_blocker = migration_blockers; 1058 1059 info->blocked_reasons = NULL; 1060 1061 /* 1062 * There are two types of reasons a migration might be blocked; 1063 * a) devices marked in VMState as non-migratable, and 1064 * b) Explicit migration blockers 1065 * We need to add both of them here. 1066 */ 1067 qemu_savevm_non_migratable_list(&info->blocked_reasons); 1068 1069 while (cur_blocker) { 1070 QAPI_LIST_PREPEND(info->blocked_reasons, 1071 g_strdup(error_get_pretty(cur_blocker->data))); 1072 cur_blocker = g_slist_next(cur_blocker); 1073 } 1074 info->has_blocked_reasons = info->blocked_reasons != NULL; 1075 1076 switch (s->state) { 1077 case MIGRATION_STATUS_NONE: 1078 /* no migration has happened ever */ 1079 /* do not overwrite destination migration status */ 1080 return; 1081 case MIGRATION_STATUS_SETUP: 1082 info->has_status = true; 1083 info->has_total_time = false; 1084 break; 1085 case MIGRATION_STATUS_ACTIVE: 1086 case MIGRATION_STATUS_CANCELLING: 1087 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1088 case MIGRATION_STATUS_PRE_SWITCHOVER: 1089 case MIGRATION_STATUS_DEVICE: 1090 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1091 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1092 /* TODO add some postcopy stats */ 1093 populate_time_info(info, s); 1094 populate_ram_info(info, s); 1095 populate_disk_info(info); 1096 populate_vfio_info(info); 1097 break; 1098 case MIGRATION_STATUS_COLO: 1099 info->has_status = true; 1100 /* TODO: display COLO specific information (checkpoint info etc.) */ 1101 break; 1102 case MIGRATION_STATUS_COMPLETED: 1103 populate_time_info(info, s); 1104 populate_ram_info(info, s); 1105 populate_vfio_info(info); 1106 break; 1107 case MIGRATION_STATUS_FAILED: 1108 info->has_status = true; 1109 if (s->error) { 1110 info->has_error_desc = true; 1111 info->error_desc = g_strdup(error_get_pretty(s->error)); 1112 } 1113 break; 1114 case MIGRATION_STATUS_CANCELLED: 1115 info->has_status = true; 1116 break; 1117 case MIGRATION_STATUS_WAIT_UNPLUG: 1118 info->has_status = true; 1119 break; 1120 } 1121 info->status = s->state; 1122 } 1123 1124 typedef enum WriteTrackingSupport { 1125 WT_SUPPORT_UNKNOWN = 0, 1126 WT_SUPPORT_ABSENT, 1127 WT_SUPPORT_AVAILABLE, 1128 WT_SUPPORT_COMPATIBLE 1129 } WriteTrackingSupport; 1130 1131 static 1132 WriteTrackingSupport migrate_query_write_tracking(void) 1133 { 1134 /* Check if kernel supports required UFFD features */ 1135 if (!ram_write_tracking_available()) { 1136 return WT_SUPPORT_ABSENT; 1137 } 1138 /* 1139 * Check if current memory configuration is 1140 * compatible with required UFFD features. 1141 */ 1142 if (!ram_write_tracking_compatible()) { 1143 return WT_SUPPORT_AVAILABLE; 1144 } 1145 1146 return WT_SUPPORT_COMPATIBLE; 1147 } 1148 1149 /** 1150 * @migration_caps_check - check capability validity 1151 * 1152 * @cap_list: old capability list, array of bool 1153 * @params: new capabilities to be applied soon 1154 * @errp: set *errp if the check failed, with reason 1155 * 1156 * Returns true if check passed, otherwise false. 1157 */ 1158 static bool migrate_caps_check(bool *cap_list, 1159 MigrationCapabilityStatusList *params, 1160 Error **errp) 1161 { 1162 MigrationCapabilityStatusList *cap; 1163 bool old_postcopy_cap; 1164 MigrationIncomingState *mis = migration_incoming_get_current(); 1165 1166 old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]; 1167 1168 for (cap = params; cap; cap = cap->next) { 1169 cap_list[cap->value->capability] = cap->value->state; 1170 } 1171 1172 #ifndef CONFIG_LIVE_BLOCK_MIGRATION 1173 if (cap_list[MIGRATION_CAPABILITY_BLOCK]) { 1174 error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " 1175 "block migration"); 1176 error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); 1177 return false; 1178 } 1179 #endif 1180 1181 #ifndef CONFIG_REPLICATION 1182 if (cap_list[MIGRATION_CAPABILITY_X_COLO]) { 1183 error_setg(errp, "QEMU compiled without replication module" 1184 " can't enable COLO"); 1185 error_append_hint(errp, "Please enable replication before COLO.\n"); 1186 return false; 1187 } 1188 #endif 1189 1190 if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { 1191 /* This check is reasonably expensive, so only when it's being 1192 * set the first time, also it's only the destination that needs 1193 * special support. 1194 */ 1195 if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) && 1196 !postcopy_ram_supported_by_host(mis)) { 1197 /* postcopy_ram_supported_by_host will have emitted a more 1198 * detailed message 1199 */ 1200 error_setg(errp, "Postcopy is not supported"); 1201 return false; 1202 } 1203 1204 if (cap_list[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { 1205 error_setg(errp, "Postcopy is not compatible with ignore-shared"); 1206 return false; 1207 } 1208 } 1209 1210 if (cap_list[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { 1211 WriteTrackingSupport wt_support; 1212 int idx; 1213 /* 1214 * Check if 'background-snapshot' capability is supported by 1215 * host kernel and compatible with guest memory configuration. 1216 */ 1217 wt_support = migrate_query_write_tracking(); 1218 if (wt_support < WT_SUPPORT_AVAILABLE) { 1219 error_setg(errp, "Background-snapshot is not supported by host kernel"); 1220 return false; 1221 } 1222 if (wt_support < WT_SUPPORT_COMPATIBLE) { 1223 error_setg(errp, "Background-snapshot is not compatible " 1224 "with guest memory configuration"); 1225 return false; 1226 } 1227 1228 /* 1229 * Check if there are any migration capabilities 1230 * incompatible with 'background-snapshot'. 1231 */ 1232 for (idx = 0; idx < check_caps_background_snapshot.size; idx++) { 1233 int incomp_cap = check_caps_background_snapshot.caps[idx]; 1234 if (cap_list[incomp_cap]) { 1235 error_setg(errp, 1236 "Background-snapshot is not compatible with %s", 1237 MigrationCapability_str(incomp_cap)); 1238 return false; 1239 } 1240 } 1241 } 1242 1243 /* incoming side only */ 1244 if (runstate_check(RUN_STATE_INMIGRATE) && 1245 !migrate_multifd_is_allowed() && 1246 cap_list[MIGRATION_CAPABILITY_MULTIFD]) { 1247 error_setg(errp, "multifd is not supported by current protocol"); 1248 return false; 1249 } 1250 1251 return true; 1252 } 1253 1254 static void fill_destination_migration_info(MigrationInfo *info) 1255 { 1256 MigrationIncomingState *mis = migration_incoming_get_current(); 1257 1258 if (mis->socket_address_list) { 1259 info->has_socket_address = true; 1260 info->socket_address = 1261 QAPI_CLONE(SocketAddressList, mis->socket_address_list); 1262 } 1263 1264 switch (mis->state) { 1265 case MIGRATION_STATUS_NONE: 1266 return; 1267 case MIGRATION_STATUS_SETUP: 1268 case MIGRATION_STATUS_CANCELLING: 1269 case MIGRATION_STATUS_CANCELLED: 1270 case MIGRATION_STATUS_ACTIVE: 1271 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1272 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1273 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1274 case MIGRATION_STATUS_FAILED: 1275 case MIGRATION_STATUS_COLO: 1276 info->has_status = true; 1277 break; 1278 case MIGRATION_STATUS_COMPLETED: 1279 info->has_status = true; 1280 fill_destination_postcopy_migration_info(info); 1281 break; 1282 } 1283 info->status = mis->state; 1284 } 1285 1286 MigrationInfo *qmp_query_migrate(Error **errp) 1287 { 1288 MigrationInfo *info = g_malloc0(sizeof(*info)); 1289 1290 fill_destination_migration_info(info); 1291 fill_source_migration_info(info); 1292 1293 return info; 1294 } 1295 1296 void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, 1297 Error **errp) 1298 { 1299 MigrationState *s = migrate_get_current(); 1300 MigrationCapabilityStatusList *cap; 1301 bool cap_list[MIGRATION_CAPABILITY__MAX]; 1302 1303 if (migration_is_running(s->state)) { 1304 error_setg(errp, QERR_MIGRATION_ACTIVE); 1305 return; 1306 } 1307 1308 memcpy(cap_list, s->enabled_capabilities, sizeof(cap_list)); 1309 if (!migrate_caps_check(cap_list, params, errp)) { 1310 return; 1311 } 1312 1313 for (cap = params; cap; cap = cap->next) { 1314 s->enabled_capabilities[cap->value->capability] = cap->value->state; 1315 } 1316 } 1317 1318 /* 1319 * Check whether the parameters are valid. Error will be put into errp 1320 * (if provided). Return true if valid, otherwise false. 1321 */ 1322 static bool migrate_params_check(MigrationParameters *params, Error **errp) 1323 { 1324 if (params->has_compress_level && 1325 (params->compress_level > 9)) { 1326 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", 1327 "a value between 0 and 9"); 1328 return false; 1329 } 1330 1331 if (params->has_compress_threads && (params->compress_threads < 1)) { 1332 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1333 "compress_threads", 1334 "a value between 1 and 255"); 1335 return false; 1336 } 1337 1338 if (params->has_decompress_threads && (params->decompress_threads < 1)) { 1339 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1340 "decompress_threads", 1341 "a value between 1 and 255"); 1342 return false; 1343 } 1344 1345 if (params->has_throttle_trigger_threshold && 1346 (params->throttle_trigger_threshold < 1 || 1347 params->throttle_trigger_threshold > 100)) { 1348 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1349 "throttle_trigger_threshold", 1350 "an integer in the range of 1 to 100"); 1351 return false; 1352 } 1353 1354 if (params->has_cpu_throttle_initial && 1355 (params->cpu_throttle_initial < 1 || 1356 params->cpu_throttle_initial > 99)) { 1357 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1358 "cpu_throttle_initial", 1359 "an integer in the range of 1 to 99"); 1360 return false; 1361 } 1362 1363 if (params->has_cpu_throttle_increment && 1364 (params->cpu_throttle_increment < 1 || 1365 params->cpu_throttle_increment > 99)) { 1366 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1367 "cpu_throttle_increment", 1368 "an integer in the range of 1 to 99"); 1369 return false; 1370 } 1371 1372 if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) { 1373 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1374 "max_bandwidth", 1375 "an integer in the range of 0 to "stringify(SIZE_MAX) 1376 " bytes/second"); 1377 return false; 1378 } 1379 1380 if (params->has_downtime_limit && 1381 (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { 1382 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1383 "downtime_limit", 1384 "an integer in the range of 0 to " 1385 stringify(MAX_MIGRATE_DOWNTIME)" ms"); 1386 return false; 1387 } 1388 1389 /* x_checkpoint_delay is now always positive */ 1390 1391 if (params->has_multifd_channels && (params->multifd_channels < 1)) { 1392 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1393 "multifd_channels", 1394 "a value between 1 and 255"); 1395 return false; 1396 } 1397 1398 if (params->has_multifd_zlib_level && 1399 (params->multifd_zlib_level > 9)) { 1400 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level", 1401 "a value between 0 and 9"); 1402 return false; 1403 } 1404 1405 if (params->has_multifd_zstd_level && 1406 (params->multifd_zstd_level > 20)) { 1407 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level", 1408 "a value between 0 and 20"); 1409 return false; 1410 } 1411 1412 if (params->has_xbzrle_cache_size && 1413 (params->xbzrle_cache_size < qemu_target_page_size() || 1414 !is_power_of_2(params->xbzrle_cache_size))) { 1415 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1416 "xbzrle_cache_size", 1417 "a power of two no less than the target page size"); 1418 return false; 1419 } 1420 1421 if (params->has_max_cpu_throttle && 1422 (params->max_cpu_throttle < params->cpu_throttle_initial || 1423 params->max_cpu_throttle > 99)) { 1424 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1425 "max_cpu_throttle", 1426 "an integer in the range of cpu_throttle_initial to 99"); 1427 return false; 1428 } 1429 1430 if (params->has_announce_initial && 1431 params->announce_initial > 100000) { 1432 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1433 "announce_initial", 1434 "a value between 0 and 100000"); 1435 return false; 1436 } 1437 if (params->has_announce_max && 1438 params->announce_max > 100000) { 1439 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1440 "announce_max", 1441 "a value between 0 and 100000"); 1442 return false; 1443 } 1444 if (params->has_announce_rounds && 1445 params->announce_rounds > 1000) { 1446 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1447 "announce_rounds", 1448 "a value between 0 and 1000"); 1449 return false; 1450 } 1451 if (params->has_announce_step && 1452 (params->announce_step < 1 || 1453 params->announce_step > 10000)) { 1454 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1455 "announce_step", 1456 "a value between 0 and 10000"); 1457 return false; 1458 } 1459 1460 if (params->has_block_bitmap_mapping && 1461 !check_dirty_bitmap_mig_alias_map(params->block_bitmap_mapping, errp)) { 1462 error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); 1463 return false; 1464 } 1465 1466 return true; 1467 } 1468 1469 static void migrate_params_test_apply(MigrateSetParameters *params, 1470 MigrationParameters *dest) 1471 { 1472 *dest = migrate_get_current()->parameters; 1473 1474 /* TODO use QAPI_CLONE() instead of duplicating it inline */ 1475 1476 if (params->has_compress_level) { 1477 dest->compress_level = params->compress_level; 1478 } 1479 1480 if (params->has_compress_threads) { 1481 dest->compress_threads = params->compress_threads; 1482 } 1483 1484 if (params->has_compress_wait_thread) { 1485 dest->compress_wait_thread = params->compress_wait_thread; 1486 } 1487 1488 if (params->has_decompress_threads) { 1489 dest->decompress_threads = params->decompress_threads; 1490 } 1491 1492 if (params->has_throttle_trigger_threshold) { 1493 dest->throttle_trigger_threshold = params->throttle_trigger_threshold; 1494 } 1495 1496 if (params->has_cpu_throttle_initial) { 1497 dest->cpu_throttle_initial = params->cpu_throttle_initial; 1498 } 1499 1500 if (params->has_cpu_throttle_increment) { 1501 dest->cpu_throttle_increment = params->cpu_throttle_increment; 1502 } 1503 1504 if (params->has_cpu_throttle_tailslow) { 1505 dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow; 1506 } 1507 1508 if (params->has_tls_creds) { 1509 assert(params->tls_creds->type == QTYPE_QSTRING); 1510 dest->tls_creds = params->tls_creds->u.s; 1511 } 1512 1513 if (params->has_tls_hostname) { 1514 assert(params->tls_hostname->type == QTYPE_QSTRING); 1515 dest->tls_hostname = params->tls_hostname->u.s; 1516 } 1517 1518 if (params->has_max_bandwidth) { 1519 dest->max_bandwidth = params->max_bandwidth; 1520 } 1521 1522 if (params->has_downtime_limit) { 1523 dest->downtime_limit = params->downtime_limit; 1524 } 1525 1526 if (params->has_x_checkpoint_delay) { 1527 dest->x_checkpoint_delay = params->x_checkpoint_delay; 1528 } 1529 1530 if (params->has_block_incremental) { 1531 dest->block_incremental = params->block_incremental; 1532 } 1533 if (params->has_multifd_channels) { 1534 dest->multifd_channels = params->multifd_channels; 1535 } 1536 if (params->has_multifd_compression) { 1537 dest->multifd_compression = params->multifd_compression; 1538 } 1539 if (params->has_xbzrle_cache_size) { 1540 dest->xbzrle_cache_size = params->xbzrle_cache_size; 1541 } 1542 if (params->has_max_postcopy_bandwidth) { 1543 dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth; 1544 } 1545 if (params->has_max_cpu_throttle) { 1546 dest->max_cpu_throttle = params->max_cpu_throttle; 1547 } 1548 if (params->has_announce_initial) { 1549 dest->announce_initial = params->announce_initial; 1550 } 1551 if (params->has_announce_max) { 1552 dest->announce_max = params->announce_max; 1553 } 1554 if (params->has_announce_rounds) { 1555 dest->announce_rounds = params->announce_rounds; 1556 } 1557 if (params->has_announce_step) { 1558 dest->announce_step = params->announce_step; 1559 } 1560 1561 if (params->has_block_bitmap_mapping) { 1562 dest->has_block_bitmap_mapping = true; 1563 dest->block_bitmap_mapping = params->block_bitmap_mapping; 1564 } 1565 } 1566 1567 static void migrate_params_apply(MigrateSetParameters *params, Error **errp) 1568 { 1569 MigrationState *s = migrate_get_current(); 1570 1571 /* TODO use QAPI_CLONE() instead of duplicating it inline */ 1572 1573 if (params->has_compress_level) { 1574 s->parameters.compress_level = params->compress_level; 1575 } 1576 1577 if (params->has_compress_threads) { 1578 s->parameters.compress_threads = params->compress_threads; 1579 } 1580 1581 if (params->has_compress_wait_thread) { 1582 s->parameters.compress_wait_thread = params->compress_wait_thread; 1583 } 1584 1585 if (params->has_decompress_threads) { 1586 s->parameters.decompress_threads = params->decompress_threads; 1587 } 1588 1589 if (params->has_throttle_trigger_threshold) { 1590 s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold; 1591 } 1592 1593 if (params->has_cpu_throttle_initial) { 1594 s->parameters.cpu_throttle_initial = params->cpu_throttle_initial; 1595 } 1596 1597 if (params->has_cpu_throttle_increment) { 1598 s->parameters.cpu_throttle_increment = params->cpu_throttle_increment; 1599 } 1600 1601 if (params->has_cpu_throttle_tailslow) { 1602 s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow; 1603 } 1604 1605 if (params->has_tls_creds) { 1606 g_free(s->parameters.tls_creds); 1607 assert(params->tls_creds->type == QTYPE_QSTRING); 1608 s->parameters.tls_creds = g_strdup(params->tls_creds->u.s); 1609 } 1610 1611 if (params->has_tls_hostname) { 1612 g_free(s->parameters.tls_hostname); 1613 assert(params->tls_hostname->type == QTYPE_QSTRING); 1614 s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s); 1615 } 1616 1617 if (params->has_tls_authz) { 1618 g_free(s->parameters.tls_authz); 1619 assert(params->tls_authz->type == QTYPE_QSTRING); 1620 s->parameters.tls_authz = g_strdup(params->tls_authz->u.s); 1621 } 1622 1623 if (params->has_max_bandwidth) { 1624 s->parameters.max_bandwidth = params->max_bandwidth; 1625 if (s->to_dst_file && !migration_in_postcopy()) { 1626 qemu_file_set_rate_limit(s->to_dst_file, 1627 s->parameters.max_bandwidth / XFER_LIMIT_RATIO); 1628 } 1629 } 1630 1631 if (params->has_downtime_limit) { 1632 s->parameters.downtime_limit = params->downtime_limit; 1633 } 1634 1635 if (params->has_x_checkpoint_delay) { 1636 s->parameters.x_checkpoint_delay = params->x_checkpoint_delay; 1637 if (migration_in_colo_state()) { 1638 colo_checkpoint_notify(s); 1639 } 1640 } 1641 1642 if (params->has_block_incremental) { 1643 s->parameters.block_incremental = params->block_incremental; 1644 } 1645 if (params->has_multifd_channels) { 1646 s->parameters.multifd_channels = params->multifd_channels; 1647 } 1648 if (params->has_multifd_compression) { 1649 s->parameters.multifd_compression = params->multifd_compression; 1650 } 1651 if (params->has_xbzrle_cache_size) { 1652 s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; 1653 xbzrle_cache_resize(params->xbzrle_cache_size, errp); 1654 } 1655 if (params->has_max_postcopy_bandwidth) { 1656 s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth; 1657 if (s->to_dst_file && migration_in_postcopy()) { 1658 qemu_file_set_rate_limit(s->to_dst_file, 1659 s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO); 1660 } 1661 } 1662 if (params->has_max_cpu_throttle) { 1663 s->parameters.max_cpu_throttle = params->max_cpu_throttle; 1664 } 1665 if (params->has_announce_initial) { 1666 s->parameters.announce_initial = params->announce_initial; 1667 } 1668 if (params->has_announce_max) { 1669 s->parameters.announce_max = params->announce_max; 1670 } 1671 if (params->has_announce_rounds) { 1672 s->parameters.announce_rounds = params->announce_rounds; 1673 } 1674 if (params->has_announce_step) { 1675 s->parameters.announce_step = params->announce_step; 1676 } 1677 1678 if (params->has_block_bitmap_mapping) { 1679 qapi_free_BitmapMigrationNodeAliasList( 1680 s->parameters.block_bitmap_mapping); 1681 1682 s->parameters.has_block_bitmap_mapping = true; 1683 s->parameters.block_bitmap_mapping = 1684 QAPI_CLONE(BitmapMigrationNodeAliasList, 1685 params->block_bitmap_mapping); 1686 } 1687 } 1688 1689 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) 1690 { 1691 MigrationParameters tmp; 1692 1693 /* TODO Rewrite "" to null instead */ 1694 if (params->has_tls_creds 1695 && params->tls_creds->type == QTYPE_QNULL) { 1696 qobject_unref(params->tls_creds->u.n); 1697 params->tls_creds->type = QTYPE_QSTRING; 1698 params->tls_creds->u.s = strdup(""); 1699 } 1700 /* TODO Rewrite "" to null instead */ 1701 if (params->has_tls_hostname 1702 && params->tls_hostname->type == QTYPE_QNULL) { 1703 qobject_unref(params->tls_hostname->u.n); 1704 params->tls_hostname->type = QTYPE_QSTRING; 1705 params->tls_hostname->u.s = strdup(""); 1706 } 1707 1708 migrate_params_test_apply(params, &tmp); 1709 1710 if (!migrate_params_check(&tmp, errp)) { 1711 /* Invalid parameter */ 1712 return; 1713 } 1714 1715 migrate_params_apply(params, errp); 1716 } 1717 1718 1719 void qmp_migrate_start_postcopy(Error **errp) 1720 { 1721 MigrationState *s = migrate_get_current(); 1722 1723 if (!migrate_postcopy()) { 1724 error_setg(errp, "Enable postcopy with migrate_set_capability before" 1725 " the start of migration"); 1726 return; 1727 } 1728 1729 if (s->state == MIGRATION_STATUS_NONE) { 1730 error_setg(errp, "Postcopy must be started after migration has been" 1731 " started"); 1732 return; 1733 } 1734 /* 1735 * we don't error if migration has finished since that would be racy 1736 * with issuing this command. 1737 */ 1738 qatomic_set(&s->start_postcopy, true); 1739 } 1740 1741 /* shared migration helpers */ 1742 1743 void migrate_set_state(int *state, int old_state, int new_state) 1744 { 1745 assert(new_state < MIGRATION_STATUS__MAX); 1746 if (qatomic_cmpxchg(state, old_state, new_state) == old_state) { 1747 trace_migrate_set_state(MigrationStatus_str(new_state)); 1748 migrate_generate_event(new_state); 1749 } 1750 } 1751 1752 static MigrationCapabilityStatus *migrate_cap_add(MigrationCapability index, 1753 bool state) 1754 { 1755 MigrationCapabilityStatus *cap; 1756 1757 cap = g_new0(MigrationCapabilityStatus, 1); 1758 cap->capability = index; 1759 cap->state = state; 1760 1761 return cap; 1762 } 1763 1764 void migrate_set_block_enabled(bool value, Error **errp) 1765 { 1766 MigrationCapabilityStatusList *cap = NULL; 1767 1768 QAPI_LIST_PREPEND(cap, migrate_cap_add(MIGRATION_CAPABILITY_BLOCK, value)); 1769 qmp_migrate_set_capabilities(cap, errp); 1770 qapi_free_MigrationCapabilityStatusList(cap); 1771 } 1772 1773 static void migrate_set_block_incremental(MigrationState *s, bool value) 1774 { 1775 s->parameters.block_incremental = value; 1776 } 1777 1778 static void block_cleanup_parameters(MigrationState *s) 1779 { 1780 if (s->must_remove_block_options) { 1781 /* setting to false can never fail */ 1782 migrate_set_block_enabled(false, &error_abort); 1783 migrate_set_block_incremental(s, false); 1784 s->must_remove_block_options = false; 1785 } 1786 } 1787 1788 static void migrate_fd_cleanup(MigrationState *s) 1789 { 1790 qemu_bh_delete(s->cleanup_bh); 1791 s->cleanup_bh = NULL; 1792 1793 qemu_savevm_state_cleanup(); 1794 1795 if (s->to_dst_file) { 1796 QEMUFile *tmp; 1797 1798 trace_migrate_fd_cleanup(); 1799 qemu_mutex_unlock_iothread(); 1800 if (s->migration_thread_running) { 1801 qemu_thread_join(&s->thread); 1802 s->migration_thread_running = false; 1803 } 1804 qemu_mutex_lock_iothread(); 1805 1806 multifd_save_cleanup(); 1807 qemu_mutex_lock(&s->qemu_file_lock); 1808 tmp = s->to_dst_file; 1809 s->to_dst_file = NULL; 1810 qemu_mutex_unlock(&s->qemu_file_lock); 1811 /* 1812 * Close the file handle without the lock to make sure the 1813 * critical section won't block for long. 1814 */ 1815 migration_ioc_unregister_yank_from_file(tmp); 1816 qemu_fclose(tmp); 1817 } 1818 1819 assert(!migration_is_active(s)); 1820 1821 if (s->state == MIGRATION_STATUS_CANCELLING) { 1822 migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING, 1823 MIGRATION_STATUS_CANCELLED); 1824 } 1825 1826 if (s->error) { 1827 /* It is used on info migrate. We can't free it */ 1828 error_report_err(error_copy(s->error)); 1829 } 1830 notifier_list_notify(&migration_state_notifiers, s); 1831 block_cleanup_parameters(s); 1832 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 1833 } 1834 1835 static void migrate_fd_cleanup_schedule(MigrationState *s) 1836 { 1837 /* 1838 * Ref the state for bh, because it may be called when 1839 * there're already no other refs 1840 */ 1841 object_ref(OBJECT(s)); 1842 qemu_bh_schedule(s->cleanup_bh); 1843 } 1844 1845 static void migrate_fd_cleanup_bh(void *opaque) 1846 { 1847 MigrationState *s = opaque; 1848 migrate_fd_cleanup(s); 1849 object_unref(OBJECT(s)); 1850 } 1851 1852 void migrate_set_error(MigrationState *s, const Error *error) 1853 { 1854 QEMU_LOCK_GUARD(&s->error_mutex); 1855 if (!s->error) { 1856 s->error = error_copy(error); 1857 } 1858 } 1859 1860 static void migrate_error_free(MigrationState *s) 1861 { 1862 QEMU_LOCK_GUARD(&s->error_mutex); 1863 if (s->error) { 1864 error_free(s->error); 1865 s->error = NULL; 1866 } 1867 } 1868 1869 void migrate_fd_error(MigrationState *s, const Error *error) 1870 { 1871 trace_migrate_fd_error(error_get_pretty(error)); 1872 assert(s->to_dst_file == NULL); 1873 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 1874 MIGRATION_STATUS_FAILED); 1875 migrate_set_error(s, error); 1876 } 1877 1878 static void migrate_fd_cancel(MigrationState *s) 1879 { 1880 int old_state ; 1881 QEMUFile *f = migrate_get_current()->to_dst_file; 1882 trace_migrate_fd_cancel(); 1883 1884 WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) { 1885 if (s->rp_state.from_dst_file) { 1886 /* shutdown the rp socket, so causing the rp thread to shutdown */ 1887 qemu_file_shutdown(s->rp_state.from_dst_file); 1888 } 1889 } 1890 1891 do { 1892 old_state = s->state; 1893 if (!migration_is_running(old_state)) { 1894 break; 1895 } 1896 /* If the migration is paused, kick it out of the pause */ 1897 if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) { 1898 qemu_sem_post(&s->pause_sem); 1899 } 1900 migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING); 1901 } while (s->state != MIGRATION_STATUS_CANCELLING); 1902 1903 /* 1904 * If we're unlucky the migration code might be stuck somewhere in a 1905 * send/write while the network has failed and is waiting to timeout; 1906 * if we've got shutdown(2) available then we can force it to quit. 1907 * The outgoing qemu file gets closed in migrate_fd_cleanup that is 1908 * called in a bh, so there is no race against this cancel. 1909 */ 1910 if (s->state == MIGRATION_STATUS_CANCELLING && f) { 1911 qemu_file_shutdown(f); 1912 } 1913 if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) { 1914 Error *local_err = NULL; 1915 1916 bdrv_invalidate_cache_all(&local_err); 1917 if (local_err) { 1918 error_report_err(local_err); 1919 } else { 1920 s->block_inactive = false; 1921 } 1922 } 1923 } 1924 1925 void add_migration_state_change_notifier(Notifier *notify) 1926 { 1927 notifier_list_add(&migration_state_notifiers, notify); 1928 } 1929 1930 void remove_migration_state_change_notifier(Notifier *notify) 1931 { 1932 notifier_remove(notify); 1933 } 1934 1935 bool migration_in_setup(MigrationState *s) 1936 { 1937 return s->state == MIGRATION_STATUS_SETUP; 1938 } 1939 1940 bool migration_has_finished(MigrationState *s) 1941 { 1942 return s->state == MIGRATION_STATUS_COMPLETED; 1943 } 1944 1945 bool migration_has_failed(MigrationState *s) 1946 { 1947 return (s->state == MIGRATION_STATUS_CANCELLED || 1948 s->state == MIGRATION_STATUS_FAILED); 1949 } 1950 1951 bool migration_in_postcopy(void) 1952 { 1953 MigrationState *s = migrate_get_current(); 1954 1955 switch (s->state) { 1956 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1957 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1958 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1959 return true; 1960 default: 1961 return false; 1962 } 1963 } 1964 1965 bool migration_in_postcopy_after_devices(MigrationState *s) 1966 { 1967 return migration_in_postcopy() && s->postcopy_after_devices; 1968 } 1969 1970 bool migration_in_incoming_postcopy(void) 1971 { 1972 PostcopyState ps = postcopy_state_get(); 1973 1974 return ps >= POSTCOPY_INCOMING_DISCARD && ps < POSTCOPY_INCOMING_END; 1975 } 1976 1977 bool migration_in_bg_snapshot(void) 1978 { 1979 MigrationState *s = migrate_get_current(); 1980 1981 return migrate_background_snapshot() && 1982 migration_is_setup_or_active(s->state); 1983 } 1984 1985 bool migration_is_idle(void) 1986 { 1987 MigrationState *s = current_migration; 1988 1989 if (!s) { 1990 return true; 1991 } 1992 1993 switch (s->state) { 1994 case MIGRATION_STATUS_NONE: 1995 case MIGRATION_STATUS_CANCELLED: 1996 case MIGRATION_STATUS_COMPLETED: 1997 case MIGRATION_STATUS_FAILED: 1998 return true; 1999 case MIGRATION_STATUS_SETUP: 2000 case MIGRATION_STATUS_CANCELLING: 2001 case MIGRATION_STATUS_ACTIVE: 2002 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 2003 case MIGRATION_STATUS_COLO: 2004 case MIGRATION_STATUS_PRE_SWITCHOVER: 2005 case MIGRATION_STATUS_DEVICE: 2006 case MIGRATION_STATUS_WAIT_UNPLUG: 2007 return false; 2008 case MIGRATION_STATUS__MAX: 2009 g_assert_not_reached(); 2010 } 2011 2012 return false; 2013 } 2014 2015 bool migration_is_active(MigrationState *s) 2016 { 2017 return (s->state == MIGRATION_STATUS_ACTIVE || 2018 s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); 2019 } 2020 2021 void migrate_init(MigrationState *s) 2022 { 2023 /* 2024 * Reinitialise all migration state, except 2025 * parameters/capabilities that the user set, and 2026 * locks. 2027 */ 2028 s->cleanup_bh = 0; 2029 s->vm_start_bh = 0; 2030 s->to_dst_file = NULL; 2031 s->state = MIGRATION_STATUS_NONE; 2032 s->rp_state.from_dst_file = NULL; 2033 s->rp_state.error = false; 2034 s->mbps = 0.0; 2035 s->pages_per_second = 0.0; 2036 s->downtime = 0; 2037 s->expected_downtime = 0; 2038 s->setup_time = 0; 2039 s->start_postcopy = false; 2040 s->postcopy_after_devices = false; 2041 s->migration_thread_running = false; 2042 error_free(s->error); 2043 s->error = NULL; 2044 s->hostname = NULL; 2045 2046 migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); 2047 2048 s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 2049 s->total_time = 0; 2050 s->vm_was_running = false; 2051 s->iteration_initial_bytes = 0; 2052 s->threshold_size = 0; 2053 } 2054 2055 int migrate_add_blocker_internal(Error *reason, Error **errp) 2056 { 2057 /* Snapshots are similar to migrations, so check RUN_STATE_SAVE_VM too. */ 2058 if (runstate_check(RUN_STATE_SAVE_VM) || !migration_is_idle()) { 2059 error_propagate_prepend(errp, error_copy(reason), 2060 "disallowing migration blocker " 2061 "(migration/snapshot in progress) for: "); 2062 return -EBUSY; 2063 } 2064 2065 migration_blockers = g_slist_prepend(migration_blockers, reason); 2066 return 0; 2067 } 2068 2069 int migrate_add_blocker(Error *reason, Error **errp) 2070 { 2071 if (only_migratable) { 2072 error_propagate_prepend(errp, error_copy(reason), 2073 "disallowing migration blocker " 2074 "(--only-migratable) for: "); 2075 return -EACCES; 2076 } 2077 2078 return migrate_add_blocker_internal(reason, errp); 2079 } 2080 2081 void migrate_del_blocker(Error *reason) 2082 { 2083 migration_blockers = g_slist_remove(migration_blockers, reason); 2084 } 2085 2086 void qmp_migrate_incoming(const char *uri, Error **errp) 2087 { 2088 Error *local_err = NULL; 2089 static bool once = true; 2090 2091 if (!once) { 2092 error_setg(errp, "The incoming migration has already been started"); 2093 return; 2094 } 2095 if (!runstate_check(RUN_STATE_INMIGRATE)) { 2096 error_setg(errp, "'-incoming' was not specified on the command line"); 2097 return; 2098 } 2099 2100 if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) { 2101 return; 2102 } 2103 2104 qemu_start_incoming_migration(uri, &local_err); 2105 2106 if (local_err) { 2107 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 2108 error_propagate(errp, local_err); 2109 return; 2110 } 2111 2112 once = false; 2113 } 2114 2115 void qmp_migrate_recover(const char *uri, Error **errp) 2116 { 2117 MigrationIncomingState *mis = migration_incoming_get_current(); 2118 2119 /* 2120 * Don't even bother to use ERRP_GUARD() as it _must_ always be set by 2121 * callers (no one should ignore a recover failure); if there is, it's a 2122 * programming error. 2123 */ 2124 assert(errp); 2125 2126 if (mis->state != MIGRATION_STATUS_POSTCOPY_PAUSED) { 2127 error_setg(errp, "Migrate recover can only be run " 2128 "when postcopy is paused."); 2129 return; 2130 } 2131 2132 if (qatomic_cmpxchg(&mis->postcopy_recover_triggered, 2133 false, true) == true) { 2134 error_setg(errp, "Migrate recovery is triggered already"); 2135 return; 2136 } 2137 2138 /* 2139 * Note that this call will never start a real migration; it will 2140 * only re-setup the migration stream and poke existing migration 2141 * to continue using that newly established channel. 2142 */ 2143 qemu_start_incoming_migration(uri, errp); 2144 2145 /* Safe to dereference with the assert above */ 2146 if (*errp) { 2147 /* Reset the flag so user could still retry */ 2148 qatomic_set(&mis->postcopy_recover_triggered, false); 2149 } 2150 } 2151 2152 void qmp_migrate_pause(Error **errp) 2153 { 2154 MigrationState *ms = migrate_get_current(); 2155 MigrationIncomingState *mis = migration_incoming_get_current(); 2156 int ret; 2157 2158 if (ms->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 2159 /* Source side, during postcopy */ 2160 qemu_mutex_lock(&ms->qemu_file_lock); 2161 ret = qemu_file_shutdown(ms->to_dst_file); 2162 qemu_mutex_unlock(&ms->qemu_file_lock); 2163 if (ret) { 2164 error_setg(errp, "Failed to pause source migration"); 2165 } 2166 return; 2167 } 2168 2169 if (mis->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 2170 ret = qemu_file_shutdown(mis->from_src_file); 2171 if (ret) { 2172 error_setg(errp, "Failed to pause destination migration"); 2173 } 2174 return; 2175 } 2176 2177 error_setg(errp, "migrate-pause is currently only supported " 2178 "during postcopy-active state"); 2179 } 2180 2181 bool migration_is_blocked(Error **errp) 2182 { 2183 if (qemu_savevm_state_blocked(errp)) { 2184 return true; 2185 } 2186 2187 if (migration_blockers) { 2188 error_propagate(errp, error_copy(migration_blockers->data)); 2189 return true; 2190 } 2191 2192 return false; 2193 } 2194 2195 /* Returns true if continue to migrate, or false if error detected */ 2196 static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, 2197 bool resume, Error **errp) 2198 { 2199 Error *local_err = NULL; 2200 2201 if (resume) { 2202 if (s->state != MIGRATION_STATUS_POSTCOPY_PAUSED) { 2203 error_setg(errp, "Cannot resume if there is no " 2204 "paused migration"); 2205 return false; 2206 } 2207 2208 /* 2209 * Postcopy recovery won't work well with release-ram 2210 * capability since release-ram will drop the page buffer as 2211 * long as the page is put into the send buffer. So if there 2212 * is a network failure happened, any page buffers that have 2213 * not yet reached the destination VM but have already been 2214 * sent from the source VM will be lost forever. Let's refuse 2215 * the client from resuming such a postcopy migration. 2216 * Luckily release-ram was designed to only be used when src 2217 * and destination VMs are on the same host, so it should be 2218 * fine. 2219 */ 2220 if (migrate_release_ram()) { 2221 error_setg(errp, "Postcopy recovery cannot work " 2222 "when release-ram capability is set"); 2223 return false; 2224 } 2225 2226 /* This is a resume, skip init status */ 2227 return true; 2228 } 2229 2230 if (migration_is_running(s->state)) { 2231 error_setg(errp, QERR_MIGRATION_ACTIVE); 2232 return false; 2233 } 2234 2235 if (runstate_check(RUN_STATE_INMIGRATE)) { 2236 error_setg(errp, "Guest is waiting for an incoming migration"); 2237 return false; 2238 } 2239 2240 if (runstate_check(RUN_STATE_POSTMIGRATE)) { 2241 error_setg(errp, "Can't migrate the vm that was paused due to " 2242 "previous migration"); 2243 return false; 2244 } 2245 2246 if (migration_is_blocked(errp)) { 2247 return false; 2248 } 2249 2250 if (blk || blk_inc) { 2251 if (migrate_colo_enabled()) { 2252 error_setg(errp, "No disk migration is required in COLO mode"); 2253 return false; 2254 } 2255 if (migrate_use_block() || migrate_use_block_incremental()) { 2256 error_setg(errp, "Command options are incompatible with " 2257 "current migration capabilities"); 2258 return false; 2259 } 2260 migrate_set_block_enabled(true, &local_err); 2261 if (local_err) { 2262 error_propagate(errp, local_err); 2263 return false; 2264 } 2265 s->must_remove_block_options = true; 2266 } 2267 2268 if (blk_inc) { 2269 migrate_set_block_incremental(s, true); 2270 } 2271 2272 migrate_init(s); 2273 /* 2274 * set ram_counters compression_counters memory to zero for a 2275 * new migration 2276 */ 2277 memset(&ram_counters, 0, sizeof(ram_counters)); 2278 memset(&compression_counters, 0, sizeof(compression_counters)); 2279 2280 return true; 2281 } 2282 2283 void qmp_migrate(const char *uri, bool has_blk, bool blk, 2284 bool has_inc, bool inc, bool has_detach, bool detach, 2285 bool has_resume, bool resume, Error **errp) 2286 { 2287 Error *local_err = NULL; 2288 MigrationState *s = migrate_get_current(); 2289 const char *p = NULL; 2290 2291 if (!migrate_prepare(s, has_blk && blk, has_inc && inc, 2292 has_resume && resume, errp)) { 2293 /* Error detected, put into errp */ 2294 return; 2295 } 2296 2297 if (!(has_resume && resume)) { 2298 if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) { 2299 return; 2300 } 2301 } 2302 2303 migrate_protocol_allow_multifd(false); 2304 if (strstart(uri, "tcp:", &p) || 2305 strstart(uri, "unix:", NULL) || 2306 strstart(uri, "vsock:", NULL)) { 2307 migrate_protocol_allow_multifd(true); 2308 socket_start_outgoing_migration(s, p ? p : uri, &local_err); 2309 #ifdef CONFIG_RDMA 2310 } else if (strstart(uri, "rdma:", &p)) { 2311 rdma_start_outgoing_migration(s, p, &local_err); 2312 #endif 2313 } else if (strstart(uri, "exec:", &p)) { 2314 exec_start_outgoing_migration(s, p, &local_err); 2315 } else if (strstart(uri, "fd:", &p)) { 2316 fd_start_outgoing_migration(s, p, &local_err); 2317 } else { 2318 if (!(has_resume && resume)) { 2319 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 2320 } 2321 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri", 2322 "a valid migration protocol"); 2323 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 2324 MIGRATION_STATUS_FAILED); 2325 block_cleanup_parameters(s); 2326 return; 2327 } 2328 2329 if (local_err) { 2330 if (!(has_resume && resume)) { 2331 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 2332 } 2333 migrate_fd_error(s, local_err); 2334 error_propagate(errp, local_err); 2335 return; 2336 } 2337 } 2338 2339 void qmp_migrate_cancel(Error **errp) 2340 { 2341 migration_cancel(NULL); 2342 } 2343 2344 void qmp_migrate_continue(MigrationStatus state, Error **errp) 2345 { 2346 MigrationState *s = migrate_get_current(); 2347 if (s->state != state) { 2348 error_setg(errp, "Migration not in expected state: %s", 2349 MigrationStatus_str(s->state)); 2350 return; 2351 } 2352 qemu_sem_post(&s->pause_sem); 2353 } 2354 2355 bool migrate_release_ram(void) 2356 { 2357 MigrationState *s; 2358 2359 s = migrate_get_current(); 2360 2361 return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; 2362 } 2363 2364 bool migrate_postcopy_ram(void) 2365 { 2366 MigrationState *s; 2367 2368 s = migrate_get_current(); 2369 2370 return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; 2371 } 2372 2373 bool migrate_postcopy(void) 2374 { 2375 return migrate_postcopy_ram() || migrate_dirty_bitmaps(); 2376 } 2377 2378 bool migrate_auto_converge(void) 2379 { 2380 MigrationState *s; 2381 2382 s = migrate_get_current(); 2383 2384 return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; 2385 } 2386 2387 bool migrate_zero_blocks(void) 2388 { 2389 MigrationState *s; 2390 2391 s = migrate_get_current(); 2392 2393 return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; 2394 } 2395 2396 bool migrate_postcopy_blocktime(void) 2397 { 2398 MigrationState *s; 2399 2400 s = migrate_get_current(); 2401 2402 return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; 2403 } 2404 2405 bool migrate_use_compression(void) 2406 { 2407 MigrationState *s; 2408 2409 s = migrate_get_current(); 2410 2411 return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS]; 2412 } 2413 2414 int migrate_compress_level(void) 2415 { 2416 MigrationState *s; 2417 2418 s = migrate_get_current(); 2419 2420 return s->parameters.compress_level; 2421 } 2422 2423 int migrate_compress_threads(void) 2424 { 2425 MigrationState *s; 2426 2427 s = migrate_get_current(); 2428 2429 return s->parameters.compress_threads; 2430 } 2431 2432 int migrate_compress_wait_thread(void) 2433 { 2434 MigrationState *s; 2435 2436 s = migrate_get_current(); 2437 2438 return s->parameters.compress_wait_thread; 2439 } 2440 2441 int migrate_decompress_threads(void) 2442 { 2443 MigrationState *s; 2444 2445 s = migrate_get_current(); 2446 2447 return s->parameters.decompress_threads; 2448 } 2449 2450 bool migrate_dirty_bitmaps(void) 2451 { 2452 MigrationState *s; 2453 2454 s = migrate_get_current(); 2455 2456 return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; 2457 } 2458 2459 bool migrate_ignore_shared(void) 2460 { 2461 MigrationState *s; 2462 2463 s = migrate_get_current(); 2464 2465 return s->enabled_capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; 2466 } 2467 2468 bool migrate_validate_uuid(void) 2469 { 2470 MigrationState *s; 2471 2472 s = migrate_get_current(); 2473 2474 return s->enabled_capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; 2475 } 2476 2477 bool migrate_use_events(void) 2478 { 2479 MigrationState *s; 2480 2481 s = migrate_get_current(); 2482 2483 return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS]; 2484 } 2485 2486 bool migrate_use_multifd(void) 2487 { 2488 MigrationState *s; 2489 2490 s = migrate_get_current(); 2491 2492 return s->enabled_capabilities[MIGRATION_CAPABILITY_MULTIFD]; 2493 } 2494 2495 bool migrate_pause_before_switchover(void) 2496 { 2497 MigrationState *s; 2498 2499 s = migrate_get_current(); 2500 2501 return s->enabled_capabilities[ 2502 MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; 2503 } 2504 2505 int migrate_multifd_channels(void) 2506 { 2507 MigrationState *s; 2508 2509 s = migrate_get_current(); 2510 2511 return s->parameters.multifd_channels; 2512 } 2513 2514 MultiFDCompression migrate_multifd_compression(void) 2515 { 2516 MigrationState *s; 2517 2518 s = migrate_get_current(); 2519 2520 return s->parameters.multifd_compression; 2521 } 2522 2523 int migrate_multifd_zlib_level(void) 2524 { 2525 MigrationState *s; 2526 2527 s = migrate_get_current(); 2528 2529 return s->parameters.multifd_zlib_level; 2530 } 2531 2532 int migrate_multifd_zstd_level(void) 2533 { 2534 MigrationState *s; 2535 2536 s = migrate_get_current(); 2537 2538 return s->parameters.multifd_zstd_level; 2539 } 2540 2541 int migrate_use_xbzrle(void) 2542 { 2543 MigrationState *s; 2544 2545 s = migrate_get_current(); 2546 2547 return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE]; 2548 } 2549 2550 uint64_t migrate_xbzrle_cache_size(void) 2551 { 2552 MigrationState *s; 2553 2554 s = migrate_get_current(); 2555 2556 return s->parameters.xbzrle_cache_size; 2557 } 2558 2559 static int64_t migrate_max_postcopy_bandwidth(void) 2560 { 2561 MigrationState *s; 2562 2563 s = migrate_get_current(); 2564 2565 return s->parameters.max_postcopy_bandwidth; 2566 } 2567 2568 bool migrate_use_block(void) 2569 { 2570 MigrationState *s; 2571 2572 s = migrate_get_current(); 2573 2574 return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK]; 2575 } 2576 2577 bool migrate_use_return_path(void) 2578 { 2579 MigrationState *s; 2580 2581 s = migrate_get_current(); 2582 2583 return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; 2584 } 2585 2586 bool migrate_use_block_incremental(void) 2587 { 2588 MigrationState *s; 2589 2590 s = migrate_get_current(); 2591 2592 return s->parameters.block_incremental; 2593 } 2594 2595 bool migrate_background_snapshot(void) 2596 { 2597 MigrationState *s; 2598 2599 s = migrate_get_current(); 2600 2601 return s->enabled_capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; 2602 } 2603 2604 /* migration thread support */ 2605 /* 2606 * Something bad happened to the RP stream, mark an error 2607 * The caller shall print or trace something to indicate why 2608 */ 2609 static void mark_source_rp_bad(MigrationState *s) 2610 { 2611 s->rp_state.error = true; 2612 } 2613 2614 static struct rp_cmd_args { 2615 ssize_t len; /* -1 = variable */ 2616 const char *name; 2617 } rp_cmd_args[] = { 2618 [MIG_RP_MSG_INVALID] = { .len = -1, .name = "INVALID" }, 2619 [MIG_RP_MSG_SHUT] = { .len = 4, .name = "SHUT" }, 2620 [MIG_RP_MSG_PONG] = { .len = 4, .name = "PONG" }, 2621 [MIG_RP_MSG_REQ_PAGES] = { .len = 12, .name = "REQ_PAGES" }, 2622 [MIG_RP_MSG_REQ_PAGES_ID] = { .len = -1, .name = "REQ_PAGES_ID" }, 2623 [MIG_RP_MSG_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" }, 2624 [MIG_RP_MSG_RESUME_ACK] = { .len = 4, .name = "RESUME_ACK" }, 2625 [MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" }, 2626 }; 2627 2628 /* 2629 * Process a request for pages received on the return path, 2630 * We're allowed to send more than requested (e.g. to round to our page size) 2631 * and we don't need to send pages that have already been sent. 2632 */ 2633 static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname, 2634 ram_addr_t start, size_t len) 2635 { 2636 long our_host_ps = qemu_real_host_page_size; 2637 2638 trace_migrate_handle_rp_req_pages(rbname, start, len); 2639 2640 /* 2641 * Since we currently insist on matching page sizes, just sanity check 2642 * we're being asked for whole host pages. 2643 */ 2644 if (!QEMU_IS_ALIGNED(start, our_host_ps) || 2645 !QEMU_IS_ALIGNED(len, our_host_ps)) { 2646 error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT 2647 " len: %zd", __func__, start, len); 2648 mark_source_rp_bad(ms); 2649 return; 2650 } 2651 2652 if (ram_save_queue_pages(rbname, start, len)) { 2653 mark_source_rp_bad(ms); 2654 } 2655 } 2656 2657 /* Return true to retry, false to quit */ 2658 static bool postcopy_pause_return_path_thread(MigrationState *s) 2659 { 2660 trace_postcopy_pause_return_path(); 2661 2662 qemu_sem_wait(&s->postcopy_pause_rp_sem); 2663 2664 trace_postcopy_pause_return_path_continued(); 2665 2666 return true; 2667 } 2668 2669 static int migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name) 2670 { 2671 RAMBlock *block = qemu_ram_block_by_name(block_name); 2672 2673 if (!block) { 2674 error_report("%s: invalid block name '%s'", __func__, block_name); 2675 return -EINVAL; 2676 } 2677 2678 /* Fetch the received bitmap and refresh the dirty bitmap */ 2679 return ram_dirty_bitmap_reload(s, block); 2680 } 2681 2682 static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value) 2683 { 2684 trace_source_return_path_thread_resume_ack(value); 2685 2686 if (value != MIGRATION_RESUME_ACK_VALUE) { 2687 error_report("%s: illegal resume_ack value %"PRIu32, 2688 __func__, value); 2689 return -1; 2690 } 2691 2692 /* Now both sides are active. */ 2693 migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER, 2694 MIGRATION_STATUS_POSTCOPY_ACTIVE); 2695 2696 /* Notify send thread that time to continue send pages */ 2697 qemu_sem_post(&s->rp_state.rp_sem); 2698 2699 return 0; 2700 } 2701 2702 /* Release ms->rp_state.from_dst_file in a safe way */ 2703 static void migration_release_from_dst_file(MigrationState *ms) 2704 { 2705 QEMUFile *file; 2706 2707 WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) { 2708 /* 2709 * Reset the from_dst_file pointer first before releasing it, as we 2710 * can't block within lock section 2711 */ 2712 file = ms->rp_state.from_dst_file; 2713 ms->rp_state.from_dst_file = NULL; 2714 } 2715 2716 qemu_fclose(file); 2717 } 2718 2719 /* 2720 * Handles messages sent on the return path towards the source VM 2721 * 2722 */ 2723 static void *source_return_path_thread(void *opaque) 2724 { 2725 MigrationState *ms = opaque; 2726 QEMUFile *rp = ms->rp_state.from_dst_file; 2727 uint16_t header_len, header_type; 2728 uint8_t buf[512]; 2729 uint32_t tmp32, sibling_error; 2730 ram_addr_t start = 0; /* =0 to silence warning */ 2731 size_t len = 0, expected_len; 2732 int res; 2733 2734 trace_source_return_path_thread_entry(); 2735 rcu_register_thread(); 2736 2737 retry: 2738 while (!ms->rp_state.error && !qemu_file_get_error(rp) && 2739 migration_is_setup_or_active(ms->state)) { 2740 trace_source_return_path_thread_loop_top(); 2741 header_type = qemu_get_be16(rp); 2742 header_len = qemu_get_be16(rp); 2743 2744 if (qemu_file_get_error(rp)) { 2745 mark_source_rp_bad(ms); 2746 goto out; 2747 } 2748 2749 if (header_type >= MIG_RP_MSG_MAX || 2750 header_type == MIG_RP_MSG_INVALID) { 2751 error_report("RP: Received invalid message 0x%04x length 0x%04x", 2752 header_type, header_len); 2753 mark_source_rp_bad(ms); 2754 goto out; 2755 } 2756 2757 if ((rp_cmd_args[header_type].len != -1 && 2758 header_len != rp_cmd_args[header_type].len) || 2759 header_len > sizeof(buf)) { 2760 error_report("RP: Received '%s' message (0x%04x) with" 2761 "incorrect length %d expecting %zu", 2762 rp_cmd_args[header_type].name, header_type, header_len, 2763 (size_t)rp_cmd_args[header_type].len); 2764 mark_source_rp_bad(ms); 2765 goto out; 2766 } 2767 2768 /* We know we've got a valid header by this point */ 2769 res = qemu_get_buffer(rp, buf, header_len); 2770 if (res != header_len) { 2771 error_report("RP: Failed reading data for message 0x%04x" 2772 " read %d expected %d", 2773 header_type, res, header_len); 2774 mark_source_rp_bad(ms); 2775 goto out; 2776 } 2777 2778 /* OK, we have the message and the data */ 2779 switch (header_type) { 2780 case MIG_RP_MSG_SHUT: 2781 sibling_error = ldl_be_p(buf); 2782 trace_source_return_path_thread_shut(sibling_error); 2783 if (sibling_error) { 2784 error_report("RP: Sibling indicated error %d", sibling_error); 2785 mark_source_rp_bad(ms); 2786 } 2787 /* 2788 * We'll let the main thread deal with closing the RP 2789 * we could do a shutdown(2) on it, but we're the only user 2790 * anyway, so there's nothing gained. 2791 */ 2792 goto out; 2793 2794 case MIG_RP_MSG_PONG: 2795 tmp32 = ldl_be_p(buf); 2796 trace_source_return_path_thread_pong(tmp32); 2797 break; 2798 2799 case MIG_RP_MSG_REQ_PAGES: 2800 start = ldq_be_p(buf); 2801 len = ldl_be_p(buf + 8); 2802 migrate_handle_rp_req_pages(ms, NULL, start, len); 2803 break; 2804 2805 case MIG_RP_MSG_REQ_PAGES_ID: 2806 expected_len = 12 + 1; /* header + termination */ 2807 2808 if (header_len >= expected_len) { 2809 start = ldq_be_p(buf); 2810 len = ldl_be_p(buf + 8); 2811 /* Now we expect an idstr */ 2812 tmp32 = buf[12]; /* Length of the following idstr */ 2813 buf[13 + tmp32] = '\0'; 2814 expected_len += tmp32; 2815 } 2816 if (header_len != expected_len) { 2817 error_report("RP: Req_Page_id with length %d expecting %zd", 2818 header_len, expected_len); 2819 mark_source_rp_bad(ms); 2820 goto out; 2821 } 2822 migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len); 2823 break; 2824 2825 case MIG_RP_MSG_RECV_BITMAP: 2826 if (header_len < 1) { 2827 error_report("%s: missing block name", __func__); 2828 mark_source_rp_bad(ms); 2829 goto out; 2830 } 2831 /* Format: len (1B) + idstr (<255B). This ends the idstr. */ 2832 buf[buf[0] + 1] = '\0'; 2833 if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1))) { 2834 mark_source_rp_bad(ms); 2835 goto out; 2836 } 2837 break; 2838 2839 case MIG_RP_MSG_RESUME_ACK: 2840 tmp32 = ldl_be_p(buf); 2841 if (migrate_handle_rp_resume_ack(ms, tmp32)) { 2842 mark_source_rp_bad(ms); 2843 goto out; 2844 } 2845 break; 2846 2847 default: 2848 break; 2849 } 2850 } 2851 2852 out: 2853 res = qemu_file_get_error(rp); 2854 if (res) { 2855 if (res == -EIO && migration_in_postcopy()) { 2856 /* 2857 * Maybe there is something we can do: it looks like a 2858 * network down issue, and we pause for a recovery. 2859 */ 2860 migration_release_from_dst_file(ms); 2861 rp = NULL; 2862 if (postcopy_pause_return_path_thread(ms)) { 2863 /* 2864 * Reload rp, reset the rest. Referencing it is safe since 2865 * it's reset only by us above, or when migration completes 2866 */ 2867 rp = ms->rp_state.from_dst_file; 2868 ms->rp_state.error = false; 2869 goto retry; 2870 } 2871 } 2872 2873 trace_source_return_path_thread_bad_end(); 2874 mark_source_rp_bad(ms); 2875 } 2876 2877 trace_source_return_path_thread_end(); 2878 migration_release_from_dst_file(ms); 2879 rcu_unregister_thread(); 2880 return NULL; 2881 } 2882 2883 static int open_return_path_on_source(MigrationState *ms, 2884 bool create_thread) 2885 { 2886 ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file); 2887 if (!ms->rp_state.from_dst_file) { 2888 return -1; 2889 } 2890 2891 trace_open_return_path_on_source(); 2892 2893 if (!create_thread) { 2894 /* We're done */ 2895 return 0; 2896 } 2897 2898 qemu_thread_create(&ms->rp_state.rp_thread, "return path", 2899 source_return_path_thread, ms, QEMU_THREAD_JOINABLE); 2900 ms->rp_state.rp_thread_created = true; 2901 2902 trace_open_return_path_on_source_continue(); 2903 2904 return 0; 2905 } 2906 2907 /* Returns 0 if the RP was ok, otherwise there was an error on the RP */ 2908 static int await_return_path_close_on_source(MigrationState *ms) 2909 { 2910 /* 2911 * If this is a normal exit then the destination will send a SHUT and the 2912 * rp_thread will exit, however if there's an error we need to cause 2913 * it to exit. 2914 */ 2915 if (qemu_file_get_error(ms->to_dst_file) && ms->rp_state.from_dst_file) { 2916 /* 2917 * shutdown(2), if we have it, will cause it to unblock if it's stuck 2918 * waiting for the destination. 2919 */ 2920 qemu_file_shutdown(ms->rp_state.from_dst_file); 2921 mark_source_rp_bad(ms); 2922 } 2923 trace_await_return_path_close_on_source_joining(); 2924 qemu_thread_join(&ms->rp_state.rp_thread); 2925 ms->rp_state.rp_thread_created = false; 2926 trace_await_return_path_close_on_source_close(); 2927 return ms->rp_state.error; 2928 } 2929 2930 /* 2931 * Switch from normal iteration to postcopy 2932 * Returns non-0 on error 2933 */ 2934 static int postcopy_start(MigrationState *ms) 2935 { 2936 int ret; 2937 QIOChannelBuffer *bioc; 2938 QEMUFile *fb; 2939 int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 2940 int64_t bandwidth = migrate_max_postcopy_bandwidth(); 2941 bool restart_block = false; 2942 int cur_state = MIGRATION_STATUS_ACTIVE; 2943 if (!migrate_pause_before_switchover()) { 2944 migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE, 2945 MIGRATION_STATUS_POSTCOPY_ACTIVE); 2946 } 2947 2948 trace_postcopy_start(); 2949 qemu_mutex_lock_iothread(); 2950 trace_postcopy_start_set_run(); 2951 2952 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); 2953 global_state_store(); 2954 ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); 2955 if (ret < 0) { 2956 goto fail; 2957 } 2958 2959 ret = migration_maybe_pause(ms, &cur_state, 2960 MIGRATION_STATUS_POSTCOPY_ACTIVE); 2961 if (ret < 0) { 2962 goto fail; 2963 } 2964 2965 ret = bdrv_inactivate_all(); 2966 if (ret < 0) { 2967 goto fail; 2968 } 2969 restart_block = true; 2970 2971 /* 2972 * Cause any non-postcopiable, but iterative devices to 2973 * send out their final data. 2974 */ 2975 qemu_savevm_state_complete_precopy(ms->to_dst_file, true, false); 2976 2977 /* 2978 * in Finish migrate and with the io-lock held everything should 2979 * be quiet, but we've potentially still got dirty pages and we 2980 * need to tell the destination to throw any pages it's already received 2981 * that are dirty 2982 */ 2983 if (migrate_postcopy_ram()) { 2984 if (ram_postcopy_send_discard_bitmap(ms)) { 2985 error_report("postcopy send discard bitmap failed"); 2986 goto fail; 2987 } 2988 } 2989 2990 /* 2991 * send rest of state - note things that are doing postcopy 2992 * will notice we're in POSTCOPY_ACTIVE and not actually 2993 * wrap their state up here 2994 */ 2995 /* 0 max-postcopy-bandwidth means unlimited */ 2996 if (!bandwidth) { 2997 qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX); 2998 } else { 2999 qemu_file_set_rate_limit(ms->to_dst_file, bandwidth / XFER_LIMIT_RATIO); 3000 } 3001 if (migrate_postcopy_ram()) { 3002 /* Ping just for debugging, helps line traces up */ 3003 qemu_savevm_send_ping(ms->to_dst_file, 2); 3004 } 3005 3006 /* 3007 * While loading the device state we may trigger page transfer 3008 * requests and the fd must be free to process those, and thus 3009 * the destination must read the whole device state off the fd before 3010 * it starts processing it. Unfortunately the ad-hoc migration format 3011 * doesn't allow the destination to know the size to read without fully 3012 * parsing it through each devices load-state code (especially the open 3013 * coded devices that use get/put). 3014 * So we wrap the device state up in a package with a length at the start; 3015 * to do this we use a qemu_buf to hold the whole of the device state. 3016 */ 3017 bioc = qio_channel_buffer_new(4096); 3018 qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer"); 3019 fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc)); 3020 object_unref(OBJECT(bioc)); 3021 3022 /* 3023 * Make sure the receiver can get incoming pages before we send the rest 3024 * of the state 3025 */ 3026 qemu_savevm_send_postcopy_listen(fb); 3027 3028 qemu_savevm_state_complete_precopy(fb, false, false); 3029 if (migrate_postcopy_ram()) { 3030 qemu_savevm_send_ping(fb, 3); 3031 } 3032 3033 qemu_savevm_send_postcopy_run(fb); 3034 3035 /* <><> end of stuff going into the package */ 3036 3037 /* Last point of recovery; as soon as we send the package the destination 3038 * can open devices and potentially start running. 3039 * Lets just check again we've not got any errors. 3040 */ 3041 ret = qemu_file_get_error(ms->to_dst_file); 3042 if (ret) { 3043 error_report("postcopy_start: Migration stream errored (pre package)"); 3044 goto fail_closefb; 3045 } 3046 3047 restart_block = false; 3048 3049 /* Now send that blob */ 3050 if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) { 3051 goto fail_closefb; 3052 } 3053 qemu_fclose(fb); 3054 3055 /* Send a notify to give a chance for anything that needs to happen 3056 * at the transition to postcopy and after the device state; in particular 3057 * spice needs to trigger a transition now 3058 */ 3059 ms->postcopy_after_devices = true; 3060 notifier_list_notify(&migration_state_notifiers, ms); 3061 3062 ms->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop; 3063 3064 qemu_mutex_unlock_iothread(); 3065 3066 if (migrate_postcopy_ram()) { 3067 /* 3068 * Although this ping is just for debug, it could potentially be 3069 * used for getting a better measurement of downtime at the source. 3070 */ 3071 qemu_savevm_send_ping(ms->to_dst_file, 4); 3072 } 3073 3074 if (migrate_release_ram()) { 3075 ram_postcopy_migrated_memory_release(ms); 3076 } 3077 3078 ret = qemu_file_get_error(ms->to_dst_file); 3079 if (ret) { 3080 error_report("postcopy_start: Migration stream errored"); 3081 migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, 3082 MIGRATION_STATUS_FAILED); 3083 } 3084 3085 return ret; 3086 3087 fail_closefb: 3088 qemu_fclose(fb); 3089 fail: 3090 migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, 3091 MIGRATION_STATUS_FAILED); 3092 if (restart_block) { 3093 /* A failure happened early enough that we know the destination hasn't 3094 * accessed block devices, so we're safe to recover. 3095 */ 3096 Error *local_err = NULL; 3097 3098 bdrv_invalidate_cache_all(&local_err); 3099 if (local_err) { 3100 error_report_err(local_err); 3101 } 3102 } 3103 qemu_mutex_unlock_iothread(); 3104 return -1; 3105 } 3106 3107 /** 3108 * migration_maybe_pause: Pause if required to by 3109 * migrate_pause_before_switchover called with the iothread locked 3110 * Returns: 0 on success 3111 */ 3112 static int migration_maybe_pause(MigrationState *s, 3113 int *current_active_state, 3114 int new_state) 3115 { 3116 if (!migrate_pause_before_switchover()) { 3117 return 0; 3118 } 3119 3120 /* Since leaving this state is not atomic with posting the semaphore 3121 * it's possible that someone could have issued multiple migrate_continue 3122 * and the semaphore is incorrectly positive at this point; 3123 * the docs say it's undefined to reinit a semaphore that's already 3124 * init'd, so use timedwait to eat up any existing posts. 3125 */ 3126 while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) { 3127 /* This block intentionally left blank */ 3128 } 3129 3130 /* 3131 * If the migration is cancelled when it is in the completion phase, 3132 * the migration state is set to MIGRATION_STATUS_CANCELLING. 3133 * So we don't need to wait a semaphore, otherwise we would always 3134 * wait for the 'pause_sem' semaphore. 3135 */ 3136 if (s->state != MIGRATION_STATUS_CANCELLING) { 3137 qemu_mutex_unlock_iothread(); 3138 migrate_set_state(&s->state, *current_active_state, 3139 MIGRATION_STATUS_PRE_SWITCHOVER); 3140 qemu_sem_wait(&s->pause_sem); 3141 migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, 3142 new_state); 3143 *current_active_state = new_state; 3144 qemu_mutex_lock_iothread(); 3145 } 3146 3147 return s->state == new_state ? 0 : -EINVAL; 3148 } 3149 3150 /** 3151 * migration_completion: Used by migration_thread when there's not much left. 3152 * The caller 'breaks' the loop when this returns. 3153 * 3154 * @s: Current migration state 3155 */ 3156 static void migration_completion(MigrationState *s) 3157 { 3158 int ret; 3159 int current_active_state = s->state; 3160 3161 if (s->state == MIGRATION_STATUS_ACTIVE) { 3162 qemu_mutex_lock_iothread(); 3163 s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3164 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); 3165 s->vm_was_running = runstate_is_running(); 3166 ret = global_state_store(); 3167 3168 if (!ret) { 3169 bool inactivate = !migrate_colo_enabled(); 3170 ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); 3171 trace_migration_completion_vm_stop(ret); 3172 if (ret >= 0) { 3173 ret = migration_maybe_pause(s, ¤t_active_state, 3174 MIGRATION_STATUS_DEVICE); 3175 } 3176 if (ret >= 0) { 3177 qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); 3178 ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, 3179 inactivate); 3180 } 3181 if (inactivate && ret >= 0) { 3182 s->block_inactive = true; 3183 } 3184 } 3185 qemu_mutex_unlock_iothread(); 3186 3187 if (ret < 0) { 3188 goto fail; 3189 } 3190 } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 3191 trace_migration_completion_postcopy_end(); 3192 3193 qemu_mutex_lock_iothread(); 3194 qemu_savevm_state_complete_postcopy(s->to_dst_file); 3195 qemu_mutex_unlock_iothread(); 3196 3197 trace_migration_completion_postcopy_end_after_complete(); 3198 } else if (s->state == MIGRATION_STATUS_CANCELLING) { 3199 goto fail; 3200 } 3201 3202 /* 3203 * If rp was opened we must clean up the thread before 3204 * cleaning everything else up (since if there are no failures 3205 * it will wait for the destination to send it's status in 3206 * a SHUT command). 3207 */ 3208 if (s->rp_state.rp_thread_created) { 3209 int rp_error; 3210 trace_migration_return_path_end_before(); 3211 rp_error = await_return_path_close_on_source(s); 3212 trace_migration_return_path_end_after(rp_error); 3213 if (rp_error) { 3214 goto fail_invalidate; 3215 } 3216 } 3217 3218 if (qemu_file_get_error(s->to_dst_file)) { 3219 trace_migration_completion_file_err(); 3220 goto fail_invalidate; 3221 } 3222 3223 if (!migrate_colo_enabled()) { 3224 migrate_set_state(&s->state, current_active_state, 3225 MIGRATION_STATUS_COMPLETED); 3226 } 3227 3228 return; 3229 3230 fail_invalidate: 3231 /* If not doing postcopy, vm_start() will be called: let's regain 3232 * control on images. 3233 */ 3234 if (s->state == MIGRATION_STATUS_ACTIVE || 3235 s->state == MIGRATION_STATUS_DEVICE) { 3236 Error *local_err = NULL; 3237 3238 qemu_mutex_lock_iothread(); 3239 bdrv_invalidate_cache_all(&local_err); 3240 if (local_err) { 3241 error_report_err(local_err); 3242 } else { 3243 s->block_inactive = false; 3244 } 3245 qemu_mutex_unlock_iothread(); 3246 } 3247 3248 fail: 3249 migrate_set_state(&s->state, current_active_state, 3250 MIGRATION_STATUS_FAILED); 3251 } 3252 3253 /** 3254 * bg_migration_completion: Used by bg_migration_thread when after all the 3255 * RAM has been saved. The caller 'breaks' the loop when this returns. 3256 * 3257 * @s: Current migration state 3258 */ 3259 static void bg_migration_completion(MigrationState *s) 3260 { 3261 int current_active_state = s->state; 3262 3263 /* 3264 * Stop tracking RAM writes - un-protect memory, un-register UFFD 3265 * memory ranges, flush kernel wait queues and wake up threads 3266 * waiting for write fault to be resolved. 3267 */ 3268 ram_write_tracking_stop(); 3269 3270 if (s->state == MIGRATION_STATUS_ACTIVE) { 3271 /* 3272 * By this moment we have RAM content saved into the migration stream. 3273 * The next step is to flush the non-RAM content (device state) 3274 * right after the ram content. The device state has been stored into 3275 * the temporary buffer before RAM saving started. 3276 */ 3277 qemu_put_buffer(s->to_dst_file, s->bioc->data, s->bioc->usage); 3278 qemu_fflush(s->to_dst_file); 3279 } else if (s->state == MIGRATION_STATUS_CANCELLING) { 3280 goto fail; 3281 } 3282 3283 if (qemu_file_get_error(s->to_dst_file)) { 3284 trace_migration_completion_file_err(); 3285 goto fail; 3286 } 3287 3288 migrate_set_state(&s->state, current_active_state, 3289 MIGRATION_STATUS_COMPLETED); 3290 return; 3291 3292 fail: 3293 migrate_set_state(&s->state, current_active_state, 3294 MIGRATION_STATUS_FAILED); 3295 } 3296 3297 bool migrate_colo_enabled(void) 3298 { 3299 MigrationState *s = migrate_get_current(); 3300 return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO]; 3301 } 3302 3303 typedef enum MigThrError { 3304 /* No error detected */ 3305 MIG_THR_ERR_NONE = 0, 3306 /* Detected error, but resumed successfully */ 3307 MIG_THR_ERR_RECOVERED = 1, 3308 /* Detected fatal error, need to exit */ 3309 MIG_THR_ERR_FATAL = 2, 3310 } MigThrError; 3311 3312 static int postcopy_resume_handshake(MigrationState *s) 3313 { 3314 qemu_savevm_send_postcopy_resume(s->to_dst_file); 3315 3316 while (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { 3317 qemu_sem_wait(&s->rp_state.rp_sem); 3318 } 3319 3320 if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 3321 return 0; 3322 } 3323 3324 return -1; 3325 } 3326 3327 /* Return zero if success, or <0 for error */ 3328 static int postcopy_do_resume(MigrationState *s) 3329 { 3330 int ret; 3331 3332 /* 3333 * Call all the resume_prepare() hooks, so that modules can be 3334 * ready for the migration resume. 3335 */ 3336 ret = qemu_savevm_state_resume_prepare(s); 3337 if (ret) { 3338 error_report("%s: resume_prepare() failure detected: %d", 3339 __func__, ret); 3340 return ret; 3341 } 3342 3343 /* 3344 * Last handshake with destination on the resume (destination will 3345 * switch to postcopy-active afterwards) 3346 */ 3347 ret = postcopy_resume_handshake(s); 3348 if (ret) { 3349 error_report("%s: handshake failed: %d", __func__, ret); 3350 return ret; 3351 } 3352 3353 return 0; 3354 } 3355 3356 /* 3357 * We don't return until we are in a safe state to continue current 3358 * postcopy migration. Returns MIG_THR_ERR_RECOVERED if recovered, or 3359 * MIG_THR_ERR_FATAL if unrecovery failure happened. 3360 */ 3361 static MigThrError postcopy_pause(MigrationState *s) 3362 { 3363 assert(s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); 3364 3365 while (true) { 3366 QEMUFile *file; 3367 3368 /* 3369 * Current channel is possibly broken. Release it. Note that this is 3370 * guaranteed even without lock because to_dst_file should only be 3371 * modified by the migration thread. That also guarantees that the 3372 * unregister of yank is safe too without the lock. It should be safe 3373 * even to be within the qemu_file_lock, but we didn't do that to avoid 3374 * taking more mutex (yank_lock) within qemu_file_lock. TL;DR: we make 3375 * the qemu_file_lock critical section as small as possible. 3376 */ 3377 assert(s->to_dst_file); 3378 migration_ioc_unregister_yank_from_file(s->to_dst_file); 3379 qemu_mutex_lock(&s->qemu_file_lock); 3380 file = s->to_dst_file; 3381 s->to_dst_file = NULL; 3382 qemu_mutex_unlock(&s->qemu_file_lock); 3383 3384 qemu_file_shutdown(file); 3385 qemu_fclose(file); 3386 3387 migrate_set_state(&s->state, s->state, 3388 MIGRATION_STATUS_POSTCOPY_PAUSED); 3389 3390 error_report("Detected IO failure for postcopy. " 3391 "Migration paused."); 3392 3393 /* 3394 * We wait until things fixed up. Then someone will setup the 3395 * status back for us. 3396 */ 3397 while (s->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { 3398 qemu_sem_wait(&s->postcopy_pause_sem); 3399 } 3400 3401 if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { 3402 /* Woken up by a recover procedure. Give it a shot */ 3403 3404 /* 3405 * Firstly, let's wake up the return path now, with a new 3406 * return path channel. 3407 */ 3408 qemu_sem_post(&s->postcopy_pause_rp_sem); 3409 3410 /* Do the resume logic */ 3411 if (postcopy_do_resume(s) == 0) { 3412 /* Let's continue! */ 3413 trace_postcopy_pause_continued(); 3414 return MIG_THR_ERR_RECOVERED; 3415 } else { 3416 /* 3417 * Something wrong happened during the recovery, let's 3418 * pause again. Pause is always better than throwing 3419 * data away. 3420 */ 3421 continue; 3422 } 3423 } else { 3424 /* This is not right... Time to quit. */ 3425 return MIG_THR_ERR_FATAL; 3426 } 3427 } 3428 } 3429 3430 static MigThrError migration_detect_error(MigrationState *s) 3431 { 3432 int ret; 3433 int state = s->state; 3434 Error *local_error = NULL; 3435 3436 if (state == MIGRATION_STATUS_CANCELLING || 3437 state == MIGRATION_STATUS_CANCELLED) { 3438 /* End the migration, but don't set the state to failed */ 3439 return MIG_THR_ERR_FATAL; 3440 } 3441 3442 /* Try to detect any file errors */ 3443 ret = qemu_file_get_error_obj(s->to_dst_file, &local_error); 3444 if (!ret) { 3445 /* Everything is fine */ 3446 assert(!local_error); 3447 return MIG_THR_ERR_NONE; 3448 } 3449 3450 if (local_error) { 3451 migrate_set_error(s, local_error); 3452 error_free(local_error); 3453 } 3454 3455 if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret == -EIO) { 3456 /* 3457 * For postcopy, we allow the network to be down for a 3458 * while. After that, it can be continued by a 3459 * recovery phase. 3460 */ 3461 return postcopy_pause(s); 3462 } else { 3463 /* 3464 * For precopy (or postcopy with error outside IO), we fail 3465 * with no time. 3466 */ 3467 migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED); 3468 trace_migration_thread_file_err(); 3469 3470 /* Time to stop the migration, now. */ 3471 return MIG_THR_ERR_FATAL; 3472 } 3473 } 3474 3475 /* How many bytes have we transferred since the beginning of the migration */ 3476 static uint64_t migration_total_bytes(MigrationState *s) 3477 { 3478 return qemu_ftell(s->to_dst_file) + ram_counters.multifd_bytes; 3479 } 3480 3481 static void migration_calculate_complete(MigrationState *s) 3482 { 3483 uint64_t bytes = migration_total_bytes(s); 3484 int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3485 int64_t transfer_time; 3486 3487 s->total_time = end_time - s->start_time; 3488 if (!s->downtime) { 3489 /* 3490 * It's still not set, so we are precopy migration. For 3491 * postcopy, downtime is calculated during postcopy_start(). 3492 */ 3493 s->downtime = end_time - s->downtime_start; 3494 } 3495 3496 transfer_time = s->total_time - s->setup_time; 3497 if (transfer_time) { 3498 s->mbps = ((double) bytes * 8.0) / transfer_time / 1000; 3499 } 3500 } 3501 3502 static void update_iteration_initial_status(MigrationState *s) 3503 { 3504 /* 3505 * Update these three fields at the same time to avoid mismatch info lead 3506 * wrong speed calculation. 3507 */ 3508 s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3509 s->iteration_initial_bytes = migration_total_bytes(s); 3510 s->iteration_initial_pages = ram_get_total_transferred_pages(); 3511 } 3512 3513 static void migration_update_counters(MigrationState *s, 3514 int64_t current_time) 3515 { 3516 uint64_t transferred, transferred_pages, time_spent; 3517 uint64_t current_bytes; /* bytes transferred since the beginning */ 3518 double bandwidth; 3519 3520 if (current_time < s->iteration_start_time + BUFFER_DELAY) { 3521 return; 3522 } 3523 3524 current_bytes = migration_total_bytes(s); 3525 transferred = current_bytes - s->iteration_initial_bytes; 3526 time_spent = current_time - s->iteration_start_time; 3527 bandwidth = (double)transferred / time_spent; 3528 s->threshold_size = bandwidth * s->parameters.downtime_limit; 3529 3530 s->mbps = (((double) transferred * 8.0) / 3531 ((double) time_spent / 1000.0)) / 1000.0 / 1000.0; 3532 3533 transferred_pages = ram_get_total_transferred_pages() - 3534 s->iteration_initial_pages; 3535 s->pages_per_second = (double) transferred_pages / 3536 (((double) time_spent / 1000.0)); 3537 3538 /* 3539 * if we haven't sent anything, we don't want to 3540 * recalculate. 10000 is a small enough number for our purposes 3541 */ 3542 if (ram_counters.dirty_pages_rate && transferred > 10000) { 3543 s->expected_downtime = ram_counters.remaining / bandwidth; 3544 } 3545 3546 qemu_file_reset_rate_limit(s->to_dst_file); 3547 3548 update_iteration_initial_status(s); 3549 3550 trace_migrate_transferred(transferred, time_spent, 3551 bandwidth, s->threshold_size); 3552 } 3553 3554 /* Migration thread iteration status */ 3555 typedef enum { 3556 MIG_ITERATE_RESUME, /* Resume current iteration */ 3557 MIG_ITERATE_SKIP, /* Skip current iteration */ 3558 MIG_ITERATE_BREAK, /* Break the loop */ 3559 } MigIterateState; 3560 3561 /* 3562 * Return true if continue to the next iteration directly, false 3563 * otherwise. 3564 */ 3565 static MigIterateState migration_iteration_run(MigrationState *s) 3566 { 3567 uint64_t pending_size, pend_pre, pend_compat, pend_post; 3568 bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE; 3569 3570 qemu_savevm_state_pending(s->to_dst_file, s->threshold_size, &pend_pre, 3571 &pend_compat, &pend_post); 3572 pending_size = pend_pre + pend_compat + pend_post; 3573 3574 trace_migrate_pending(pending_size, s->threshold_size, 3575 pend_pre, pend_compat, pend_post); 3576 3577 if (pending_size && pending_size >= s->threshold_size) { 3578 /* Still a significant amount to transfer */ 3579 if (!in_postcopy && pend_pre <= s->threshold_size && 3580 qatomic_read(&s->start_postcopy)) { 3581 if (postcopy_start(s)) { 3582 error_report("%s: postcopy failed to start", __func__); 3583 } 3584 return MIG_ITERATE_SKIP; 3585 } 3586 /* Just another iteration step */ 3587 qemu_savevm_state_iterate(s->to_dst_file, in_postcopy); 3588 } else { 3589 trace_migration_thread_low_pending(pending_size); 3590 migration_completion(s); 3591 return MIG_ITERATE_BREAK; 3592 } 3593 3594 return MIG_ITERATE_RESUME; 3595 } 3596 3597 static void migration_iteration_finish(MigrationState *s) 3598 { 3599 /* If we enabled cpu throttling for auto-converge, turn it off. */ 3600 cpu_throttle_stop(); 3601 3602 qemu_mutex_lock_iothread(); 3603 switch (s->state) { 3604 case MIGRATION_STATUS_COMPLETED: 3605 migration_calculate_complete(s); 3606 runstate_set(RUN_STATE_POSTMIGRATE); 3607 break; 3608 3609 case MIGRATION_STATUS_ACTIVE: 3610 /* 3611 * We should really assert here, but since it's during 3612 * migration, let's try to reduce the usage of assertions. 3613 */ 3614 if (!migrate_colo_enabled()) { 3615 error_report("%s: critical error: calling COLO code without " 3616 "COLO enabled", __func__); 3617 } 3618 migrate_start_colo_process(s); 3619 /* 3620 * Fixme: we will run VM in COLO no matter its old running state. 3621 * After exited COLO, we will keep running. 3622 */ 3623 s->vm_was_running = true; 3624 /* Fallthrough */ 3625 case MIGRATION_STATUS_FAILED: 3626 case MIGRATION_STATUS_CANCELLED: 3627 case MIGRATION_STATUS_CANCELLING: 3628 if (s->vm_was_running) { 3629 vm_start(); 3630 } else { 3631 if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { 3632 runstate_set(RUN_STATE_POSTMIGRATE); 3633 } 3634 } 3635 break; 3636 3637 default: 3638 /* Should not reach here, but if so, forgive the VM. */ 3639 error_report("%s: Unknown ending state %d", __func__, s->state); 3640 break; 3641 } 3642 migrate_fd_cleanup_schedule(s); 3643 qemu_mutex_unlock_iothread(); 3644 } 3645 3646 static void bg_migration_iteration_finish(MigrationState *s) 3647 { 3648 qemu_mutex_lock_iothread(); 3649 switch (s->state) { 3650 case MIGRATION_STATUS_COMPLETED: 3651 migration_calculate_complete(s); 3652 break; 3653 3654 case MIGRATION_STATUS_ACTIVE: 3655 case MIGRATION_STATUS_FAILED: 3656 case MIGRATION_STATUS_CANCELLED: 3657 case MIGRATION_STATUS_CANCELLING: 3658 break; 3659 3660 default: 3661 /* Should not reach here, but if so, forgive the VM. */ 3662 error_report("%s: Unknown ending state %d", __func__, s->state); 3663 break; 3664 } 3665 3666 migrate_fd_cleanup_schedule(s); 3667 qemu_mutex_unlock_iothread(); 3668 } 3669 3670 /* 3671 * Return true if continue to the next iteration directly, false 3672 * otherwise. 3673 */ 3674 static MigIterateState bg_migration_iteration_run(MigrationState *s) 3675 { 3676 int res; 3677 3678 res = qemu_savevm_state_iterate(s->to_dst_file, false); 3679 if (res > 0) { 3680 bg_migration_completion(s); 3681 return MIG_ITERATE_BREAK; 3682 } 3683 3684 return MIG_ITERATE_RESUME; 3685 } 3686 3687 void migration_make_urgent_request(void) 3688 { 3689 qemu_sem_post(&migrate_get_current()->rate_limit_sem); 3690 } 3691 3692 void migration_consume_urgent_request(void) 3693 { 3694 qemu_sem_wait(&migrate_get_current()->rate_limit_sem); 3695 } 3696 3697 /* Returns true if the rate limiting was broken by an urgent request */ 3698 bool migration_rate_limit(void) 3699 { 3700 int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3701 MigrationState *s = migrate_get_current(); 3702 3703 bool urgent = false; 3704 migration_update_counters(s, now); 3705 if (qemu_file_rate_limit(s->to_dst_file)) { 3706 3707 if (qemu_file_get_error(s->to_dst_file)) { 3708 return false; 3709 } 3710 /* 3711 * Wait for a delay to do rate limiting OR 3712 * something urgent to post the semaphore. 3713 */ 3714 int ms = s->iteration_start_time + BUFFER_DELAY - now; 3715 trace_migration_rate_limit_pre(ms); 3716 if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { 3717 /* 3718 * We were woken by one or more urgent things but 3719 * the timedwait will have consumed one of them. 3720 * The service routine for the urgent wake will dec 3721 * the semaphore itself for each item it consumes, 3722 * so add this one we just eat back. 3723 */ 3724 qemu_sem_post(&s->rate_limit_sem); 3725 urgent = true; 3726 } 3727 trace_migration_rate_limit_post(urgent); 3728 } 3729 return urgent; 3730 } 3731 3732 /* 3733 * if failover devices are present, wait they are completely 3734 * unplugged 3735 */ 3736 3737 static void qemu_savevm_wait_unplug(MigrationState *s, int old_state, 3738 int new_state) 3739 { 3740 if (qemu_savevm_state_guest_unplug_pending()) { 3741 migrate_set_state(&s->state, old_state, MIGRATION_STATUS_WAIT_UNPLUG); 3742 3743 while (s->state == MIGRATION_STATUS_WAIT_UNPLUG && 3744 qemu_savevm_state_guest_unplug_pending()) { 3745 qemu_sem_timedwait(&s->wait_unplug_sem, 250); 3746 } 3747 if (s->state != MIGRATION_STATUS_WAIT_UNPLUG) { 3748 int timeout = 120; /* 30 seconds */ 3749 /* 3750 * migration has been canceled 3751 * but as we have started an unplug we must wait the end 3752 * to be able to plug back the card 3753 */ 3754 while (timeout-- && qemu_savevm_state_guest_unplug_pending()) { 3755 qemu_sem_timedwait(&s->wait_unplug_sem, 250); 3756 } 3757 if (qemu_savevm_state_guest_unplug_pending()) { 3758 warn_report("migration: partially unplugged device on " 3759 "failure"); 3760 } 3761 } 3762 3763 migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, new_state); 3764 } else { 3765 migrate_set_state(&s->state, old_state, new_state); 3766 } 3767 } 3768 3769 /* 3770 * Master migration thread on the source VM. 3771 * It drives the migration and pumps the data down the outgoing channel. 3772 */ 3773 static void *migration_thread(void *opaque) 3774 { 3775 MigrationState *s = opaque; 3776 int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST); 3777 MigThrError thr_error; 3778 bool urgent = false; 3779 3780 rcu_register_thread(); 3781 3782 object_ref(OBJECT(s)); 3783 update_iteration_initial_status(s); 3784 3785 qemu_savevm_state_header(s->to_dst_file); 3786 3787 /* 3788 * If we opened the return path, we need to make sure dst has it 3789 * opened as well. 3790 */ 3791 if (s->rp_state.rp_thread_created) { 3792 /* Now tell the dest that it should open its end so it can reply */ 3793 qemu_savevm_send_open_return_path(s->to_dst_file); 3794 3795 /* And do a ping that will make stuff easier to debug */ 3796 qemu_savevm_send_ping(s->to_dst_file, 1); 3797 } 3798 3799 if (migrate_postcopy()) { 3800 /* 3801 * Tell the destination that we *might* want to do postcopy later; 3802 * if the other end can't do postcopy it should fail now, nice and 3803 * early. 3804 */ 3805 qemu_savevm_send_postcopy_advise(s->to_dst_file); 3806 } 3807 3808 if (migrate_colo_enabled()) { 3809 /* Notify migration destination that we enable COLO */ 3810 qemu_savevm_send_colo_enable(s->to_dst_file); 3811 } 3812 3813 qemu_savevm_state_setup(s->to_dst_file); 3814 3815 qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP, 3816 MIGRATION_STATUS_ACTIVE); 3817 3818 s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; 3819 3820 trace_migration_thread_setup_complete(); 3821 3822 while (migration_is_active(s)) { 3823 if (urgent || !qemu_file_rate_limit(s->to_dst_file)) { 3824 MigIterateState iter_state = migration_iteration_run(s); 3825 if (iter_state == MIG_ITERATE_SKIP) { 3826 continue; 3827 } else if (iter_state == MIG_ITERATE_BREAK) { 3828 break; 3829 } 3830 } 3831 3832 /* 3833 * Try to detect any kind of failures, and see whether we 3834 * should stop the migration now. 3835 */ 3836 thr_error = migration_detect_error(s); 3837 if (thr_error == MIG_THR_ERR_FATAL) { 3838 /* Stop migration */ 3839 break; 3840 } else if (thr_error == MIG_THR_ERR_RECOVERED) { 3841 /* 3842 * Just recovered from a e.g. network failure, reset all 3843 * the local variables. This is important to avoid 3844 * breaking transferred_bytes and bandwidth calculation 3845 */ 3846 update_iteration_initial_status(s); 3847 } 3848 3849 urgent = migration_rate_limit(); 3850 } 3851 3852 trace_migration_thread_after_loop(); 3853 migration_iteration_finish(s); 3854 object_unref(OBJECT(s)); 3855 rcu_unregister_thread(); 3856 return NULL; 3857 } 3858 3859 static void bg_migration_vm_start_bh(void *opaque) 3860 { 3861 MigrationState *s = opaque; 3862 3863 qemu_bh_delete(s->vm_start_bh); 3864 s->vm_start_bh = NULL; 3865 3866 vm_start(); 3867 s->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - s->downtime_start; 3868 } 3869 3870 /** 3871 * Background snapshot thread, based on live migration code. 3872 * This is an alternative implementation of live migration mechanism 3873 * introduced specifically to support background snapshots. 3874 * 3875 * It takes advantage of userfault_fd write protection mechanism introduced 3876 * in v5.7 kernel. Compared to existing dirty page logging migration much 3877 * lesser stream traffic is produced resulting in smaller snapshot images, 3878 * simply cause of no page duplicates can get into the stream. 3879 * 3880 * Another key point is that generated vmstate stream reflects machine state 3881 * 'frozen' at the beginning of snapshot creation compared to dirty page logging 3882 * mechanism, which effectively results in that saved snapshot is the state of VM 3883 * at the end of the process. 3884 */ 3885 static void *bg_migration_thread(void *opaque) 3886 { 3887 MigrationState *s = opaque; 3888 int64_t setup_start; 3889 MigThrError thr_error; 3890 QEMUFile *fb; 3891 bool early_fail = true; 3892 3893 rcu_register_thread(); 3894 object_ref(OBJECT(s)); 3895 3896 qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); 3897 3898 setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST); 3899 /* 3900 * We want to save vmstate for the moment when migration has been 3901 * initiated but also we want to save RAM content while VM is running. 3902 * The RAM content should appear first in the vmstate. So, we first 3903 * stash the non-RAM part of the vmstate to the temporary buffer, 3904 * then write RAM part of the vmstate to the migration stream 3905 * with vCPUs running and, finally, write stashed non-RAM part of 3906 * the vmstate from the buffer to the migration stream. 3907 */ 3908 s->bioc = qio_channel_buffer_new(512 * 1024); 3909 qio_channel_set_name(QIO_CHANNEL(s->bioc), "vmstate-buffer"); 3910 fb = qemu_fopen_channel_output(QIO_CHANNEL(s->bioc)); 3911 object_unref(OBJECT(s->bioc)); 3912 3913 update_iteration_initial_status(s); 3914 3915 /* 3916 * Prepare for tracking memory writes with UFFD-WP - populate 3917 * RAM pages before protecting. 3918 */ 3919 #ifdef __linux__ 3920 ram_write_tracking_prepare(); 3921 #endif 3922 3923 qemu_savevm_state_header(s->to_dst_file); 3924 qemu_savevm_state_setup(s->to_dst_file); 3925 3926 qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP, 3927 MIGRATION_STATUS_ACTIVE); 3928 3929 s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; 3930 3931 trace_migration_thread_setup_complete(); 3932 s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3933 3934 qemu_mutex_lock_iothread(); 3935 3936 /* 3937 * If VM is currently in suspended state, then, to make a valid runstate 3938 * transition in vm_stop_force_state() we need to wakeup it up. 3939 */ 3940 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); 3941 s->vm_was_running = runstate_is_running(); 3942 3943 if (global_state_store()) { 3944 goto fail; 3945 } 3946 /* Forcibly stop VM before saving state of vCPUs and devices */ 3947 if (vm_stop_force_state(RUN_STATE_PAUSED)) { 3948 goto fail; 3949 } 3950 /* 3951 * Put vCPUs in sync with shadow context structures, then 3952 * save their state to channel-buffer along with devices. 3953 */ 3954 cpu_synchronize_all_states(); 3955 if (qemu_savevm_state_complete_precopy_non_iterable(fb, false, false)) { 3956 goto fail; 3957 } 3958 /* 3959 * Since we are going to get non-iterable state data directly 3960 * from s->bioc->data, explicit flush is needed here. 3961 */ 3962 qemu_fflush(fb); 3963 3964 /* Now initialize UFFD context and start tracking RAM writes */ 3965 if (ram_write_tracking_start()) { 3966 goto fail; 3967 } 3968 early_fail = false; 3969 3970 /* 3971 * Start VM from BH handler to avoid write-fault lock here. 3972 * UFFD-WP protection for the whole RAM is already enabled so 3973 * calling VM state change notifiers from vm_start() would initiate 3974 * writes to virtio VQs memory which is in write-protected region. 3975 */ 3976 s->vm_start_bh = qemu_bh_new(bg_migration_vm_start_bh, s); 3977 qemu_bh_schedule(s->vm_start_bh); 3978 3979 qemu_mutex_unlock_iothread(); 3980 3981 while (migration_is_active(s)) { 3982 MigIterateState iter_state = bg_migration_iteration_run(s); 3983 if (iter_state == MIG_ITERATE_SKIP) { 3984 continue; 3985 } else if (iter_state == MIG_ITERATE_BREAK) { 3986 break; 3987 } 3988 3989 /* 3990 * Try to detect any kind of failures, and see whether we 3991 * should stop the migration now. 3992 */ 3993 thr_error = migration_detect_error(s); 3994 if (thr_error == MIG_THR_ERR_FATAL) { 3995 /* Stop migration */ 3996 break; 3997 } 3998 3999 migration_update_counters(s, qemu_clock_get_ms(QEMU_CLOCK_REALTIME)); 4000 } 4001 4002 trace_migration_thread_after_loop(); 4003 4004 fail: 4005 if (early_fail) { 4006 migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, 4007 MIGRATION_STATUS_FAILED); 4008 qemu_mutex_unlock_iothread(); 4009 } 4010 4011 bg_migration_iteration_finish(s); 4012 4013 qemu_fclose(fb); 4014 object_unref(OBJECT(s)); 4015 rcu_unregister_thread(); 4016 4017 return NULL; 4018 } 4019 4020 void migrate_fd_connect(MigrationState *s, Error *error_in) 4021 { 4022 Error *local_err = NULL; 4023 int64_t rate_limit; 4024 bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED; 4025 4026 /* 4027 * If there's a previous error, free it and prepare for another one. 4028 * Meanwhile if migration completes successfully, there won't have an error 4029 * dumped when calling migrate_fd_cleanup(). 4030 */ 4031 migrate_error_free(s); 4032 4033 s->expected_downtime = s->parameters.downtime_limit; 4034 if (resume) { 4035 assert(s->cleanup_bh); 4036 } else { 4037 assert(!s->cleanup_bh); 4038 s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup_bh, s); 4039 } 4040 if (error_in) { 4041 migrate_fd_error(s, error_in); 4042 if (resume) { 4043 /* 4044 * Don't do cleanup for resume if channel is invalid, but only dump 4045 * the error. We wait for another channel connect from the user. 4046 * The error_report still gives HMP user a hint on what failed. 4047 * It's normally done in migrate_fd_cleanup(), but call it here 4048 * explicitly. 4049 */ 4050 error_report_err(error_copy(s->error)); 4051 } else { 4052 migrate_fd_cleanup(s); 4053 } 4054 return; 4055 } 4056 4057 if (resume) { 4058 /* This is a resumed migration */ 4059 rate_limit = s->parameters.max_postcopy_bandwidth / 4060 XFER_LIMIT_RATIO; 4061 } else { 4062 /* This is a fresh new migration */ 4063 rate_limit = s->parameters.max_bandwidth / XFER_LIMIT_RATIO; 4064 4065 /* Notify before starting migration thread */ 4066 notifier_list_notify(&migration_state_notifiers, s); 4067 } 4068 4069 qemu_file_set_rate_limit(s->to_dst_file, rate_limit); 4070 qemu_file_set_blocking(s->to_dst_file, true); 4071 4072 /* 4073 * Open the return path. For postcopy, it is used exclusively. For 4074 * precopy, only if user specified "return-path" capability would 4075 * QEMU uses the return path. 4076 */ 4077 if (migrate_postcopy_ram() || migrate_use_return_path()) { 4078 if (open_return_path_on_source(s, !resume)) { 4079 error_report("Unable to open return-path for postcopy"); 4080 migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); 4081 migrate_fd_cleanup(s); 4082 return; 4083 } 4084 } 4085 4086 if (resume) { 4087 /* Wakeup the main migration thread to do the recovery */ 4088 migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED, 4089 MIGRATION_STATUS_POSTCOPY_RECOVER); 4090 qemu_sem_post(&s->postcopy_pause_sem); 4091 return; 4092 } 4093 4094 if (multifd_save_setup(&local_err) != 0) { 4095 error_report_err(local_err); 4096 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 4097 MIGRATION_STATUS_FAILED); 4098 migrate_fd_cleanup(s); 4099 return; 4100 } 4101 4102 if (migrate_background_snapshot()) { 4103 qemu_thread_create(&s->thread, "bg_snapshot", 4104 bg_migration_thread, s, QEMU_THREAD_JOINABLE); 4105 } else { 4106 qemu_thread_create(&s->thread, "live_migration", 4107 migration_thread, s, QEMU_THREAD_JOINABLE); 4108 } 4109 s->migration_thread_running = true; 4110 } 4111 4112 void migration_global_dump(Monitor *mon) 4113 { 4114 MigrationState *ms = migrate_get_current(); 4115 4116 monitor_printf(mon, "globals:\n"); 4117 monitor_printf(mon, "store-global-state: %s\n", 4118 ms->store_global_state ? "on" : "off"); 4119 monitor_printf(mon, "only-migratable: %s\n", 4120 only_migratable ? "on" : "off"); 4121 monitor_printf(mon, "send-configuration: %s\n", 4122 ms->send_configuration ? "on" : "off"); 4123 monitor_printf(mon, "send-section-footer: %s\n", 4124 ms->send_section_footer ? "on" : "off"); 4125 monitor_printf(mon, "decompress-error-check: %s\n", 4126 ms->decompress_error_check ? "on" : "off"); 4127 monitor_printf(mon, "clear-bitmap-shift: %u\n", 4128 ms->clear_bitmap_shift); 4129 } 4130 4131 #define DEFINE_PROP_MIG_CAP(name, x) \ 4132 DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false) 4133 4134 static Property migration_properties[] = { 4135 DEFINE_PROP_BOOL("store-global-state", MigrationState, 4136 store_global_state, true), 4137 DEFINE_PROP_BOOL("send-configuration", MigrationState, 4138 send_configuration, true), 4139 DEFINE_PROP_BOOL("send-section-footer", MigrationState, 4140 send_section_footer, true), 4141 DEFINE_PROP_BOOL("decompress-error-check", MigrationState, 4142 decompress_error_check, true), 4143 DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, 4144 clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), 4145 4146 /* Migration parameters */ 4147 DEFINE_PROP_UINT8("x-compress-level", MigrationState, 4148 parameters.compress_level, 4149 DEFAULT_MIGRATE_COMPRESS_LEVEL), 4150 DEFINE_PROP_UINT8("x-compress-threads", MigrationState, 4151 parameters.compress_threads, 4152 DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT), 4153 DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState, 4154 parameters.compress_wait_thread, true), 4155 DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, 4156 parameters.decompress_threads, 4157 DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), 4158 DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, 4159 parameters.throttle_trigger_threshold, 4160 DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD), 4161 DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState, 4162 parameters.cpu_throttle_initial, 4163 DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), 4164 DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState, 4165 parameters.cpu_throttle_increment, 4166 DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), 4167 DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState, 4168 parameters.cpu_throttle_tailslow, false), 4169 DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState, 4170 parameters.max_bandwidth, MAX_THROTTLE), 4171 DEFINE_PROP_UINT64("x-downtime-limit", MigrationState, 4172 parameters.downtime_limit, 4173 DEFAULT_MIGRATE_SET_DOWNTIME), 4174 DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState, 4175 parameters.x_checkpoint_delay, 4176 DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), 4177 DEFINE_PROP_UINT8("multifd-channels", MigrationState, 4178 parameters.multifd_channels, 4179 DEFAULT_MIGRATE_MULTIFD_CHANNELS), 4180 DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState, 4181 parameters.multifd_compression, 4182 DEFAULT_MIGRATE_MULTIFD_COMPRESSION), 4183 DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState, 4184 parameters.multifd_zlib_level, 4185 DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL), 4186 DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, 4187 parameters.multifd_zstd_level, 4188 DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), 4189 DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, 4190 parameters.xbzrle_cache_size, 4191 DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), 4192 DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState, 4193 parameters.max_postcopy_bandwidth, 4194 DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH), 4195 DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState, 4196 parameters.max_cpu_throttle, 4197 DEFAULT_MIGRATE_MAX_CPU_THROTTLE), 4198 DEFINE_PROP_SIZE("announce-initial", MigrationState, 4199 parameters.announce_initial, 4200 DEFAULT_MIGRATE_ANNOUNCE_INITIAL), 4201 DEFINE_PROP_SIZE("announce-max", MigrationState, 4202 parameters.announce_max, 4203 DEFAULT_MIGRATE_ANNOUNCE_MAX), 4204 DEFINE_PROP_SIZE("announce-rounds", MigrationState, 4205 parameters.announce_rounds, 4206 DEFAULT_MIGRATE_ANNOUNCE_ROUNDS), 4207 DEFINE_PROP_SIZE("announce-step", MigrationState, 4208 parameters.announce_step, 4209 DEFAULT_MIGRATE_ANNOUNCE_STEP), 4210 4211 /* Migration capabilities */ 4212 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), 4213 DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL), 4214 DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE), 4215 DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS), 4216 DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS), 4217 DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS), 4218 DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM), 4219 DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO), 4220 DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM), 4221 DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), 4222 DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), 4223 DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), 4224 DEFINE_PROP_MIG_CAP("x-background-snapshot", 4225 MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), 4226 4227 DEFINE_PROP_END_OF_LIST(), 4228 }; 4229 4230 static void migration_class_init(ObjectClass *klass, void *data) 4231 { 4232 DeviceClass *dc = DEVICE_CLASS(klass); 4233 4234 dc->user_creatable = false; 4235 device_class_set_props(dc, migration_properties); 4236 } 4237 4238 static void migration_instance_finalize(Object *obj) 4239 { 4240 MigrationState *ms = MIGRATION_OBJ(obj); 4241 MigrationParameters *params = &ms->parameters; 4242 4243 qemu_mutex_destroy(&ms->error_mutex); 4244 qemu_mutex_destroy(&ms->qemu_file_lock); 4245 g_free(params->tls_hostname); 4246 g_free(params->tls_creds); 4247 qemu_sem_destroy(&ms->wait_unplug_sem); 4248 qemu_sem_destroy(&ms->rate_limit_sem); 4249 qemu_sem_destroy(&ms->pause_sem); 4250 qemu_sem_destroy(&ms->postcopy_pause_sem); 4251 qemu_sem_destroy(&ms->postcopy_pause_rp_sem); 4252 qemu_sem_destroy(&ms->rp_state.rp_sem); 4253 error_free(ms->error); 4254 } 4255 4256 static void migration_instance_init(Object *obj) 4257 { 4258 MigrationState *ms = MIGRATION_OBJ(obj); 4259 MigrationParameters *params = &ms->parameters; 4260 4261 ms->state = MIGRATION_STATUS_NONE; 4262 ms->mbps = -1; 4263 ms->pages_per_second = -1; 4264 qemu_sem_init(&ms->pause_sem, 0); 4265 qemu_mutex_init(&ms->error_mutex); 4266 4267 params->tls_hostname = g_strdup(""); 4268 params->tls_creds = g_strdup(""); 4269 4270 /* Set has_* up only for parameter checks */ 4271 params->has_compress_level = true; 4272 params->has_compress_threads = true; 4273 params->has_decompress_threads = true; 4274 params->has_throttle_trigger_threshold = true; 4275 params->has_cpu_throttle_initial = true; 4276 params->has_cpu_throttle_increment = true; 4277 params->has_cpu_throttle_tailslow = true; 4278 params->has_max_bandwidth = true; 4279 params->has_downtime_limit = true; 4280 params->has_x_checkpoint_delay = true; 4281 params->has_block_incremental = true; 4282 params->has_multifd_channels = true; 4283 params->has_multifd_compression = true; 4284 params->has_multifd_zlib_level = true; 4285 params->has_multifd_zstd_level = true; 4286 params->has_xbzrle_cache_size = true; 4287 params->has_max_postcopy_bandwidth = true; 4288 params->has_max_cpu_throttle = true; 4289 params->has_announce_initial = true; 4290 params->has_announce_max = true; 4291 params->has_announce_rounds = true; 4292 params->has_announce_step = true; 4293 4294 qemu_sem_init(&ms->postcopy_pause_sem, 0); 4295 qemu_sem_init(&ms->postcopy_pause_rp_sem, 0); 4296 qemu_sem_init(&ms->rp_state.rp_sem, 0); 4297 qemu_sem_init(&ms->rate_limit_sem, 0); 4298 qemu_sem_init(&ms->wait_unplug_sem, 0); 4299 qemu_mutex_init(&ms->qemu_file_lock); 4300 } 4301 4302 /* 4303 * Return true if check pass, false otherwise. Error will be put 4304 * inside errp if provided. 4305 */ 4306 static bool migration_object_check(MigrationState *ms, Error **errp) 4307 { 4308 MigrationCapabilityStatusList *head = NULL; 4309 /* Assuming all off */ 4310 bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret; 4311 int i; 4312 4313 if (!migrate_params_check(&ms->parameters, errp)) { 4314 return false; 4315 } 4316 4317 for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { 4318 if (ms->enabled_capabilities[i]) { 4319 QAPI_LIST_PREPEND(head, migrate_cap_add(i, true)); 4320 } 4321 } 4322 4323 ret = migrate_caps_check(cap_list, head, errp); 4324 4325 /* It works with head == NULL */ 4326 qapi_free_MigrationCapabilityStatusList(head); 4327 4328 return ret; 4329 } 4330 4331 static const TypeInfo migration_type = { 4332 .name = TYPE_MIGRATION, 4333 /* 4334 * NOTE: TYPE_MIGRATION is not really a device, as the object is 4335 * not created using qdev_new(), it is not attached to the qdev 4336 * device tree, and it is never realized. 4337 * 4338 * TODO: Make this TYPE_OBJECT once QOM provides something like 4339 * TYPE_DEVICE's "-global" properties. 4340 */ 4341 .parent = TYPE_DEVICE, 4342 .class_init = migration_class_init, 4343 .class_size = sizeof(MigrationClass), 4344 .instance_size = sizeof(MigrationState), 4345 .instance_init = migration_instance_init, 4346 .instance_finalize = migration_instance_finalize, 4347 }; 4348 4349 static void register_migration_types(void) 4350 { 4351 type_register_static(&migration_type); 4352 } 4353 4354 type_init(register_migration_types); 4355