1 /* 2 * QEMU live migration 3 * 4 * Copyright IBM, Corp. 2008 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 * Contributions after 2012-01-13 are licensed under the terms of the 13 * GNU GPL, version 2 or (at your option) any later version. 14 */ 15 16 #include "qemu/osdep.h" 17 #include "qemu/cutils.h" 18 #include "qemu/error-report.h" 19 #include "qemu/main-loop.h" 20 #include "migration/blocker.h" 21 #include "exec.h" 22 #include "fd.h" 23 #include "socket.h" 24 #include "sysemu/runstate.h" 25 #include "sysemu/sysemu.h" 26 #include "sysemu/cpu-throttle.h" 27 #include "rdma.h" 28 #include "ram.h" 29 #include "migration/global_state.h" 30 #include "migration/misc.h" 31 #include "migration.h" 32 #include "savevm.h" 33 #include "qemu-file-channel.h" 34 #include "qemu-file.h" 35 #include "migration/vmstate.h" 36 #include "block/block.h" 37 #include "qapi/error.h" 38 #include "qapi/clone-visitor.h" 39 #include "qapi/qapi-visit-migration.h" 40 #include "qapi/qapi-visit-sockets.h" 41 #include "qapi/qapi-commands-migration.h" 42 #include "qapi/qapi-events-migration.h" 43 #include "qapi/qmp/qerror.h" 44 #include "qapi/qmp/qnull.h" 45 #include "qemu/rcu.h" 46 #include "block.h" 47 #include "postcopy-ram.h" 48 #include "qemu/thread.h" 49 #include "trace.h" 50 #include "exec/target_page.h" 51 #include "io/channel-buffer.h" 52 #include "migration/colo.h" 53 #include "hw/boards.h" 54 #include "hw/qdev-properties.h" 55 #include "hw/qdev-properties-system.h" 56 #include "monitor/monitor.h" 57 #include "net/announce.h" 58 #include "qemu/queue.h" 59 #include "multifd.h" 60 #include "qemu/yank.h" 61 #include "sysemu/cpus.h" 62 #include "yank_functions.h" 63 64 #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ 65 66 /* Amount of time to allocate to each "chunk" of bandwidth-throttled 67 * data. */ 68 #define BUFFER_DELAY 100 69 #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) 70 71 /* Time in milliseconds we are allowed to stop the source, 72 * for sending the last part */ 73 #define DEFAULT_MIGRATE_SET_DOWNTIME 300 74 75 /* Maximum migrate downtime set to 2000 seconds */ 76 #define MAX_MIGRATE_DOWNTIME_SECONDS 2000 77 #define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) 78 79 /* Default compression thread count */ 80 #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 81 /* Default decompression thread count, usually decompression is at 82 * least 4 times as fast as compression.*/ 83 #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 84 /*0: means nocompress, 1: best speed, ... 9: best compress ratio */ 85 #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 86 /* Define default autoconverge cpu throttle migration parameters */ 87 #define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50 88 #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 89 #define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10 90 #define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99 91 92 /* Migration XBZRLE default cache size */ 93 #define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) 94 95 /* The delay time (in ms) between two COLO checkpoints */ 96 #define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) 97 #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 98 #define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE 99 /* 0: means nocompress, 1: best speed, ... 9: best compress ratio */ 100 #define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1 101 /* 0: means nocompress, 1: best speed, ... 20: best compress ratio */ 102 #define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1 103 104 /* Background transfer rate for postcopy, 0 means unlimited, note 105 * that page requests can still exceed this limit. 106 */ 107 #define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0 108 109 /* 110 * Parameters for self_announce_delay giving a stream of RARP/ARP 111 * packets after migration. 112 */ 113 #define DEFAULT_MIGRATE_ANNOUNCE_INITIAL 50 114 #define DEFAULT_MIGRATE_ANNOUNCE_MAX 550 115 #define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS 5 116 #define DEFAULT_MIGRATE_ANNOUNCE_STEP 100 117 118 static NotifierList migration_state_notifiers = 119 NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); 120 121 /* Messages sent on the return path from destination to source */ 122 enum mig_rp_message_type { 123 MIG_RP_MSG_INVALID = 0, /* Must be 0 */ 124 MIG_RP_MSG_SHUT, /* sibling will not send any more RP messages */ 125 MIG_RP_MSG_PONG, /* Response to a PING; data (seq: be32 ) */ 126 127 MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */ 128 MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */ 129 MIG_RP_MSG_RECV_BITMAP, /* send recved_bitmap back to source */ 130 MIG_RP_MSG_RESUME_ACK, /* tell source that we are ready to resume */ 131 132 MIG_RP_MSG_MAX 133 }; 134 135 /* Migration capabilities set */ 136 struct MigrateCapsSet { 137 int size; /* Capability set size */ 138 MigrationCapability caps[]; /* Variadic array of capabilities */ 139 }; 140 typedef struct MigrateCapsSet MigrateCapsSet; 141 142 /* Define and initialize MigrateCapsSet */ 143 #define INITIALIZE_MIGRATE_CAPS_SET(_name, ...) \ 144 MigrateCapsSet _name = { \ 145 .size = sizeof((int []) { __VA_ARGS__ }) / sizeof(int), \ 146 .caps = { __VA_ARGS__ } \ 147 } 148 149 /* Background-snapshot compatibility check list */ 150 static const 151 INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, 152 MIGRATION_CAPABILITY_POSTCOPY_RAM, 153 MIGRATION_CAPABILITY_DIRTY_BITMAPS, 154 MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME, 155 MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE, 156 MIGRATION_CAPABILITY_RETURN_PATH, 157 MIGRATION_CAPABILITY_MULTIFD, 158 MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER, 159 MIGRATION_CAPABILITY_AUTO_CONVERGE, 160 MIGRATION_CAPABILITY_RELEASE_RAM, 161 MIGRATION_CAPABILITY_RDMA_PIN_ALL, 162 MIGRATION_CAPABILITY_COMPRESS, 163 MIGRATION_CAPABILITY_XBZRLE, 164 MIGRATION_CAPABILITY_X_COLO, 165 MIGRATION_CAPABILITY_VALIDATE_UUID); 166 167 /* When we add fault tolerance, we could have several 168 migrations at once. For now we don't need to add 169 dynamic creation of migration */ 170 171 static MigrationState *current_migration; 172 static MigrationIncomingState *current_incoming; 173 174 static GSList *migration_blockers; 175 176 static bool migration_object_check(MigrationState *ms, Error **errp); 177 static int migration_maybe_pause(MigrationState *s, 178 int *current_active_state, 179 int new_state); 180 static void migrate_fd_cancel(MigrationState *s); 181 182 static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp) 183 { 184 uintptr_t a = (uintptr_t) ap, b = (uintptr_t) bp; 185 186 return (a > b) - (a < b); 187 } 188 189 void migration_object_init(void) 190 { 191 /* This can only be called once. */ 192 assert(!current_migration); 193 current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION)); 194 195 /* 196 * Init the migrate incoming object as well no matter whether 197 * we'll use it or not. 198 */ 199 assert(!current_incoming); 200 current_incoming = g_new0(MigrationIncomingState, 1); 201 current_incoming->state = MIGRATION_STATUS_NONE; 202 current_incoming->postcopy_remote_fds = 203 g_array_new(FALSE, TRUE, sizeof(struct PostCopyFD)); 204 qemu_mutex_init(¤t_incoming->rp_mutex); 205 qemu_event_init(¤t_incoming->main_thread_load_event, false); 206 qemu_sem_init(¤t_incoming->postcopy_pause_sem_dst, 0); 207 qemu_sem_init(¤t_incoming->postcopy_pause_sem_fault, 0); 208 qemu_mutex_init(¤t_incoming->page_request_mutex); 209 current_incoming->page_requested = g_tree_new(page_request_addr_cmp); 210 211 migration_object_check(current_migration, &error_fatal); 212 213 blk_mig_init(); 214 ram_mig_init(); 215 dirty_bitmap_mig_init(); 216 } 217 218 void migration_cancel(void) 219 { 220 migrate_fd_cancel(current_migration); 221 } 222 223 void migration_shutdown(void) 224 { 225 /* 226 * Cancel the current migration - that will (eventually) 227 * stop the migration using this structure 228 */ 229 migration_cancel(); 230 object_unref(OBJECT(current_migration)); 231 232 /* 233 * Cancel outgoing migration of dirty bitmaps. It should 234 * at least unref used block nodes. 235 */ 236 dirty_bitmap_mig_cancel_outgoing(); 237 238 /* 239 * Cancel incoming migration of dirty bitmaps. Dirty bitmaps 240 * are non-critical data, and their loss never considered as 241 * something serious. 242 */ 243 dirty_bitmap_mig_cancel_incoming(); 244 } 245 246 /* For outgoing */ 247 MigrationState *migrate_get_current(void) 248 { 249 /* This can only be called after the object created. */ 250 assert(current_migration); 251 return current_migration; 252 } 253 254 MigrationIncomingState *migration_incoming_get_current(void) 255 { 256 assert(current_incoming); 257 return current_incoming; 258 } 259 260 void migration_incoming_state_destroy(void) 261 { 262 struct MigrationIncomingState *mis = migration_incoming_get_current(); 263 264 if (mis->to_src_file) { 265 /* Tell source that we are done */ 266 migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0); 267 qemu_fclose(mis->to_src_file); 268 mis->to_src_file = NULL; 269 } 270 271 if (mis->from_src_file) { 272 migration_ioc_unregister_yank_from_file(mis->from_src_file); 273 qemu_fclose(mis->from_src_file); 274 mis->from_src_file = NULL; 275 } 276 if (mis->postcopy_remote_fds) { 277 g_array_free(mis->postcopy_remote_fds, TRUE); 278 mis->postcopy_remote_fds = NULL; 279 } 280 if (mis->transport_cleanup) { 281 mis->transport_cleanup(mis->transport_data); 282 } 283 284 qemu_event_reset(&mis->main_thread_load_event); 285 286 if (mis->page_requested) { 287 g_tree_destroy(mis->page_requested); 288 mis->page_requested = NULL; 289 } 290 291 if (mis->socket_address_list) { 292 qapi_free_SocketAddressList(mis->socket_address_list); 293 mis->socket_address_list = NULL; 294 } 295 296 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 297 } 298 299 static void migrate_generate_event(int new_state) 300 { 301 if (migrate_use_events()) { 302 qapi_event_send_migration(new_state); 303 } 304 } 305 306 static bool migrate_late_block_activate(void) 307 { 308 MigrationState *s; 309 310 s = migrate_get_current(); 311 312 return s->enabled_capabilities[ 313 MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; 314 } 315 316 /* 317 * Send a message on the return channel back to the source 318 * of the migration. 319 */ 320 static int migrate_send_rp_message(MigrationIncomingState *mis, 321 enum mig_rp_message_type message_type, 322 uint16_t len, void *data) 323 { 324 int ret = 0; 325 326 trace_migrate_send_rp_message((int)message_type, len); 327 QEMU_LOCK_GUARD(&mis->rp_mutex); 328 329 /* 330 * It's possible that the file handle got lost due to network 331 * failures. 332 */ 333 if (!mis->to_src_file) { 334 ret = -EIO; 335 return ret; 336 } 337 338 qemu_put_be16(mis->to_src_file, (unsigned int)message_type); 339 qemu_put_be16(mis->to_src_file, len); 340 qemu_put_buffer(mis->to_src_file, data, len); 341 qemu_fflush(mis->to_src_file); 342 343 /* It's possible that qemu file got error during sending */ 344 ret = qemu_file_get_error(mis->to_src_file); 345 346 return ret; 347 } 348 349 /* Request one page from the source VM at the given start address. 350 * rb: the RAMBlock to request the page in 351 * Start: Address offset within the RB 352 * Len: Length in bytes required - must be a multiple of pagesize 353 */ 354 int migrate_send_rp_message_req_pages(MigrationIncomingState *mis, 355 RAMBlock *rb, ram_addr_t start) 356 { 357 uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */ 358 size_t msglen = 12; /* start + len */ 359 size_t len = qemu_ram_pagesize(rb); 360 enum mig_rp_message_type msg_type; 361 const char *rbname; 362 int rbname_len; 363 364 *(uint64_t *)bufc = cpu_to_be64((uint64_t)start); 365 *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len); 366 367 /* 368 * We maintain the last ramblock that we requested for page. Note that we 369 * don't need locking because this function will only be called within the 370 * postcopy ram fault thread. 371 */ 372 if (rb != mis->last_rb) { 373 mis->last_rb = rb; 374 375 rbname = qemu_ram_get_idstr(rb); 376 rbname_len = strlen(rbname); 377 378 assert(rbname_len < 256); 379 380 bufc[msglen++] = rbname_len; 381 memcpy(bufc + msglen, rbname, rbname_len); 382 msglen += rbname_len; 383 msg_type = MIG_RP_MSG_REQ_PAGES_ID; 384 } else { 385 msg_type = MIG_RP_MSG_REQ_PAGES; 386 } 387 388 return migrate_send_rp_message(mis, msg_type, msglen, bufc); 389 } 390 391 int migrate_send_rp_req_pages(MigrationIncomingState *mis, 392 RAMBlock *rb, ram_addr_t start, uint64_t haddr) 393 { 394 void *aligned = (void *)(uintptr_t)(haddr & (-qemu_ram_pagesize(rb))); 395 bool received = false; 396 397 WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) { 398 received = ramblock_recv_bitmap_test_byte_offset(rb, start); 399 if (!received && !g_tree_lookup(mis->page_requested, aligned)) { 400 /* 401 * The page has not been received, and it's not yet in the page 402 * request list. Queue it. Set the value of element to 1, so that 403 * things like g_tree_lookup() will return TRUE (1) when found. 404 */ 405 g_tree_insert(mis->page_requested, aligned, (gpointer)1); 406 mis->page_requested_count++; 407 trace_postcopy_page_req_add(aligned, mis->page_requested_count); 408 } 409 } 410 411 /* 412 * If the page is there, skip sending the message. We don't even need the 413 * lock because as long as the page arrived, it'll be there forever. 414 */ 415 if (received) { 416 return 0; 417 } 418 419 return migrate_send_rp_message_req_pages(mis, rb, start); 420 } 421 422 static bool migration_colo_enabled; 423 bool migration_incoming_colo_enabled(void) 424 { 425 return migration_colo_enabled; 426 } 427 428 void migration_incoming_disable_colo(void) 429 { 430 ram_block_discard_disable(false); 431 migration_colo_enabled = false; 432 } 433 434 int migration_incoming_enable_colo(void) 435 { 436 if (ram_block_discard_disable(true)) { 437 error_report("COLO: cannot disable RAM discard"); 438 return -EBUSY; 439 } 440 migration_colo_enabled = true; 441 return 0; 442 } 443 444 void migrate_add_address(SocketAddress *address) 445 { 446 MigrationIncomingState *mis = migration_incoming_get_current(); 447 448 QAPI_LIST_PREPEND(mis->socket_address_list, 449 QAPI_CLONE(SocketAddress, address)); 450 } 451 452 static void qemu_start_incoming_migration(const char *uri, Error **errp) 453 { 454 const char *p = NULL; 455 456 migrate_protocol_allow_multifd(false); /* reset it anyway */ 457 qapi_event_send_migration(MIGRATION_STATUS_SETUP); 458 if (strstart(uri, "tcp:", &p) || 459 strstart(uri, "unix:", NULL) || 460 strstart(uri, "vsock:", NULL)) { 461 migrate_protocol_allow_multifd(true); 462 socket_start_incoming_migration(p ? p : uri, errp); 463 #ifdef CONFIG_RDMA 464 } else if (strstart(uri, "rdma:", &p)) { 465 rdma_start_incoming_migration(p, errp); 466 #endif 467 } else if (strstart(uri, "exec:", &p)) { 468 exec_start_incoming_migration(p, errp); 469 } else if (strstart(uri, "fd:", &p)) { 470 fd_start_incoming_migration(p, errp); 471 } else { 472 error_setg(errp, "unknown migration protocol: %s", uri); 473 } 474 } 475 476 static void process_incoming_migration_bh(void *opaque) 477 { 478 Error *local_err = NULL; 479 MigrationIncomingState *mis = opaque; 480 481 /* If capability late_block_activate is set: 482 * Only fire up the block code now if we're going to restart the 483 * VM, else 'cont' will do it. 484 * This causes file locking to happen; so we don't want it to happen 485 * unless we really are starting the VM. 486 */ 487 if (!migrate_late_block_activate() || 488 (autostart && (!global_state_received() || 489 global_state_get_runstate() == RUN_STATE_RUNNING))) { 490 /* Make sure all file formats flush their mutable metadata. 491 * If we get an error here, just don't restart the VM yet. */ 492 bdrv_invalidate_cache_all(&local_err); 493 if (local_err) { 494 error_report_err(local_err); 495 local_err = NULL; 496 autostart = false; 497 } 498 } 499 500 /* 501 * This must happen after all error conditions are dealt with and 502 * we're sure the VM is going to be running on this host. 503 */ 504 qemu_announce_self(&mis->announce_timer, migrate_announce_params()); 505 506 if (multifd_load_cleanup(&local_err) != 0) { 507 error_report_err(local_err); 508 autostart = false; 509 } 510 /* If global state section was not received or we are in running 511 state, we need to obey autostart. Any other state is set with 512 runstate_set. */ 513 514 dirty_bitmap_mig_before_vm_start(); 515 516 if (!global_state_received() || 517 global_state_get_runstate() == RUN_STATE_RUNNING) { 518 if (autostart) { 519 vm_start(); 520 } else { 521 runstate_set(RUN_STATE_PAUSED); 522 } 523 } else if (migration_incoming_colo_enabled()) { 524 migration_incoming_disable_colo(); 525 vm_start(); 526 } else { 527 runstate_set(global_state_get_runstate()); 528 } 529 /* 530 * This must happen after any state changes since as soon as an external 531 * observer sees this event they might start to prod at the VM assuming 532 * it's ready to use. 533 */ 534 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, 535 MIGRATION_STATUS_COMPLETED); 536 qemu_bh_delete(mis->bh); 537 migration_incoming_state_destroy(); 538 } 539 540 static void process_incoming_migration_co(void *opaque) 541 { 542 MigrationIncomingState *mis = migration_incoming_get_current(); 543 PostcopyState ps; 544 int ret; 545 Error *local_err = NULL; 546 547 assert(mis->from_src_file); 548 mis->migration_incoming_co = qemu_coroutine_self(); 549 mis->largest_page_size = qemu_ram_pagesize_largest(); 550 postcopy_state_set(POSTCOPY_INCOMING_NONE); 551 migrate_set_state(&mis->state, MIGRATION_STATUS_NONE, 552 MIGRATION_STATUS_ACTIVE); 553 ret = qemu_loadvm_state(mis->from_src_file); 554 555 ps = postcopy_state_get(); 556 trace_process_incoming_migration_co_end(ret, ps); 557 if (ps != POSTCOPY_INCOMING_NONE) { 558 if (ps == POSTCOPY_INCOMING_ADVISE) { 559 /* 560 * Where a migration had postcopy enabled (and thus went to advise) 561 * but managed to complete within the precopy period, we can use 562 * the normal exit. 563 */ 564 postcopy_ram_incoming_cleanup(mis); 565 } else if (ret >= 0) { 566 /* 567 * Postcopy was started, cleanup should happen at the end of the 568 * postcopy thread. 569 */ 570 trace_process_incoming_migration_co_postcopy_end_main(); 571 return; 572 } 573 /* Else if something went wrong then just fall out of the normal exit */ 574 } 575 576 /* we get COLO info, and know if we are in COLO mode */ 577 if (!ret && migration_incoming_colo_enabled()) { 578 /* Make sure all file formats flush their mutable metadata */ 579 bdrv_invalidate_cache_all(&local_err); 580 if (local_err) { 581 error_report_err(local_err); 582 goto fail; 583 } 584 585 qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming", 586 colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE); 587 mis->have_colo_incoming_thread = true; 588 qemu_coroutine_yield(); 589 590 /* Wait checkpoint incoming thread exit before free resource */ 591 qemu_thread_join(&mis->colo_incoming_thread); 592 /* We hold the global iothread lock, so it is safe here */ 593 colo_release_ram_cache(); 594 } 595 596 if (ret < 0) { 597 error_report("load of migration failed: %s", strerror(-ret)); 598 goto fail; 599 } 600 mis->bh = qemu_bh_new(process_incoming_migration_bh, mis); 601 qemu_bh_schedule(mis->bh); 602 mis->migration_incoming_co = NULL; 603 return; 604 fail: 605 local_err = NULL; 606 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, 607 MIGRATION_STATUS_FAILED); 608 qemu_fclose(mis->from_src_file); 609 if (multifd_load_cleanup(&local_err) != 0) { 610 error_report_err(local_err); 611 } 612 exit(EXIT_FAILURE); 613 } 614 615 /** 616 * migration_incoming_setup: Setup incoming migration 617 * @f: file for main migration channel 618 * @errp: where to put errors 619 * 620 * Returns: %true on success, %false on error. 621 */ 622 static bool migration_incoming_setup(QEMUFile *f, Error **errp) 623 { 624 MigrationIncomingState *mis = migration_incoming_get_current(); 625 626 if (multifd_load_setup(errp) != 0) { 627 return false; 628 } 629 630 if (!mis->from_src_file) { 631 mis->from_src_file = f; 632 } 633 qemu_file_set_blocking(f, false); 634 return true; 635 } 636 637 void migration_incoming_process(void) 638 { 639 Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL); 640 qemu_coroutine_enter(co); 641 } 642 643 /* Returns true if recovered from a paused migration, otherwise false */ 644 static bool postcopy_try_recover(QEMUFile *f) 645 { 646 MigrationIncomingState *mis = migration_incoming_get_current(); 647 648 if (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { 649 /* Resumed from a paused postcopy migration */ 650 651 mis->from_src_file = f; 652 /* Postcopy has standalone thread to do vm load */ 653 qemu_file_set_blocking(f, true); 654 655 /* Re-configure the return path */ 656 mis->to_src_file = qemu_file_get_return_path(f); 657 658 migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_PAUSED, 659 MIGRATION_STATUS_POSTCOPY_RECOVER); 660 661 /* 662 * Here, we only wake up the main loading thread (while the 663 * fault thread will still be waiting), so that we can receive 664 * commands from source now, and answer it if needed. The 665 * fault thread will be woken up afterwards until we are sure 666 * that source is ready to reply to page requests. 667 */ 668 qemu_sem_post(&mis->postcopy_pause_sem_dst); 669 return true; 670 } 671 672 return false; 673 } 674 675 void migration_fd_process_incoming(QEMUFile *f, Error **errp) 676 { 677 if (postcopy_try_recover(f)) { 678 return; 679 } 680 681 if (!migration_incoming_setup(f, errp)) { 682 return; 683 } 684 migration_incoming_process(); 685 } 686 687 void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) 688 { 689 MigrationIncomingState *mis = migration_incoming_get_current(); 690 Error *local_err = NULL; 691 bool start_migration; 692 693 if (!mis->from_src_file) { 694 /* The first connection (multifd may have multiple) */ 695 QEMUFile *f = qemu_fopen_channel_input(ioc); 696 697 /* If it's a recovery, we're done */ 698 if (postcopy_try_recover(f)) { 699 return; 700 } 701 702 if (!migration_incoming_setup(f, errp)) { 703 return; 704 } 705 706 /* 707 * Common migration only needs one channel, so we can start 708 * right now. Multifd needs more than one channel, we wait. 709 */ 710 start_migration = !migrate_use_multifd(); 711 } else { 712 /* Multiple connections */ 713 assert(migrate_use_multifd()); 714 start_migration = multifd_recv_new_channel(ioc, &local_err); 715 if (local_err) { 716 error_propagate(errp, local_err); 717 return; 718 } 719 } 720 721 if (start_migration) { 722 migration_incoming_process(); 723 } 724 } 725 726 /** 727 * @migration_has_all_channels: We have received all channels that we need 728 * 729 * Returns true when we have got connections to all the channels that 730 * we need for migration. 731 */ 732 bool migration_has_all_channels(void) 733 { 734 MigrationIncomingState *mis = migration_incoming_get_current(); 735 bool all_channels; 736 737 all_channels = multifd_recv_all_channels_created(); 738 739 return all_channels && mis->from_src_file != NULL; 740 } 741 742 /* 743 * Send a 'SHUT' message on the return channel with the given value 744 * to indicate that we've finished with the RP. Non-0 value indicates 745 * error. 746 */ 747 void migrate_send_rp_shut(MigrationIncomingState *mis, 748 uint32_t value) 749 { 750 uint32_t buf; 751 752 buf = cpu_to_be32(value); 753 migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf); 754 } 755 756 /* 757 * Send a 'PONG' message on the return channel with the given value 758 * (normally in response to a 'PING') 759 */ 760 void migrate_send_rp_pong(MigrationIncomingState *mis, 761 uint32_t value) 762 { 763 uint32_t buf; 764 765 buf = cpu_to_be32(value); 766 migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf); 767 } 768 769 void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis, 770 char *block_name) 771 { 772 char buf[512]; 773 int len; 774 int64_t res; 775 776 /* 777 * First, we send the header part. It contains only the len of 778 * idstr, and the idstr itself. 779 */ 780 len = strlen(block_name); 781 buf[0] = len; 782 memcpy(buf + 1, block_name, len); 783 784 if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) { 785 error_report("%s: MSG_RP_RECV_BITMAP only used for recovery", 786 __func__); 787 return; 788 } 789 790 migrate_send_rp_message(mis, MIG_RP_MSG_RECV_BITMAP, len + 1, buf); 791 792 /* 793 * Next, we dump the received bitmap to the stream. 794 * 795 * TODO: currently we are safe since we are the only one that is 796 * using the to_src_file handle (fault thread is still paused), 797 * and it's ok even not taking the mutex. However the best way is 798 * to take the lock before sending the message header, and release 799 * the lock after sending the bitmap. 800 */ 801 qemu_mutex_lock(&mis->rp_mutex); 802 res = ramblock_recv_bitmap_send(mis->to_src_file, block_name); 803 qemu_mutex_unlock(&mis->rp_mutex); 804 805 trace_migrate_send_rp_recv_bitmap(block_name, res); 806 } 807 808 void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value) 809 { 810 uint32_t buf; 811 812 buf = cpu_to_be32(value); 813 migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf); 814 } 815 816 MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) 817 { 818 MigrationCapabilityStatusList *head = NULL, **tail = &head; 819 MigrationCapabilityStatus *caps; 820 MigrationState *s = migrate_get_current(); 821 int i; 822 823 for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { 824 #ifndef CONFIG_LIVE_BLOCK_MIGRATION 825 if (i == MIGRATION_CAPABILITY_BLOCK) { 826 continue; 827 } 828 #endif 829 caps = g_malloc0(sizeof(*caps)); 830 caps->capability = i; 831 caps->state = s->enabled_capabilities[i]; 832 QAPI_LIST_APPEND(tail, caps); 833 } 834 835 return head; 836 } 837 838 MigrationParameters *qmp_query_migrate_parameters(Error **errp) 839 { 840 MigrationParameters *params; 841 MigrationState *s = migrate_get_current(); 842 843 /* TODO use QAPI_CLONE() instead of duplicating it inline */ 844 params = g_malloc0(sizeof(*params)); 845 params->has_compress_level = true; 846 params->compress_level = s->parameters.compress_level; 847 params->has_compress_threads = true; 848 params->compress_threads = s->parameters.compress_threads; 849 params->has_compress_wait_thread = true; 850 params->compress_wait_thread = s->parameters.compress_wait_thread; 851 params->has_decompress_threads = true; 852 params->decompress_threads = s->parameters.decompress_threads; 853 params->has_throttle_trigger_threshold = true; 854 params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold; 855 params->has_cpu_throttle_initial = true; 856 params->cpu_throttle_initial = s->parameters.cpu_throttle_initial; 857 params->has_cpu_throttle_increment = true; 858 params->cpu_throttle_increment = s->parameters.cpu_throttle_increment; 859 params->has_cpu_throttle_tailslow = true; 860 params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow; 861 params->has_tls_creds = true; 862 params->tls_creds = g_strdup(s->parameters.tls_creds); 863 params->has_tls_hostname = true; 864 params->tls_hostname = g_strdup(s->parameters.tls_hostname); 865 params->has_tls_authz = true; 866 params->tls_authz = g_strdup(s->parameters.tls_authz ? 867 s->parameters.tls_authz : ""); 868 params->has_max_bandwidth = true; 869 params->max_bandwidth = s->parameters.max_bandwidth; 870 params->has_downtime_limit = true; 871 params->downtime_limit = s->parameters.downtime_limit; 872 params->has_x_checkpoint_delay = true; 873 params->x_checkpoint_delay = s->parameters.x_checkpoint_delay; 874 params->has_block_incremental = true; 875 params->block_incremental = s->parameters.block_incremental; 876 params->has_multifd_channels = true; 877 params->multifd_channels = s->parameters.multifd_channels; 878 params->has_multifd_compression = true; 879 params->multifd_compression = s->parameters.multifd_compression; 880 params->has_multifd_zlib_level = true; 881 params->multifd_zlib_level = s->parameters.multifd_zlib_level; 882 params->has_multifd_zstd_level = true; 883 params->multifd_zstd_level = s->parameters.multifd_zstd_level; 884 params->has_xbzrle_cache_size = true; 885 params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; 886 params->has_max_postcopy_bandwidth = true; 887 params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth; 888 params->has_max_cpu_throttle = true; 889 params->max_cpu_throttle = s->parameters.max_cpu_throttle; 890 params->has_announce_initial = true; 891 params->announce_initial = s->parameters.announce_initial; 892 params->has_announce_max = true; 893 params->announce_max = s->parameters.announce_max; 894 params->has_announce_rounds = true; 895 params->announce_rounds = s->parameters.announce_rounds; 896 params->has_announce_step = true; 897 params->announce_step = s->parameters.announce_step; 898 899 if (s->parameters.has_block_bitmap_mapping) { 900 params->has_block_bitmap_mapping = true; 901 params->block_bitmap_mapping = 902 QAPI_CLONE(BitmapMigrationNodeAliasList, 903 s->parameters.block_bitmap_mapping); 904 } 905 906 return params; 907 } 908 909 AnnounceParameters *migrate_announce_params(void) 910 { 911 static AnnounceParameters ap; 912 913 MigrationState *s = migrate_get_current(); 914 915 ap.initial = s->parameters.announce_initial; 916 ap.max = s->parameters.announce_max; 917 ap.rounds = s->parameters.announce_rounds; 918 ap.step = s->parameters.announce_step; 919 920 return ≈ 921 } 922 923 /* 924 * Return true if we're already in the middle of a migration 925 * (i.e. any of the active or setup states) 926 */ 927 bool migration_is_setup_or_active(int state) 928 { 929 switch (state) { 930 case MIGRATION_STATUS_ACTIVE: 931 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 932 case MIGRATION_STATUS_POSTCOPY_PAUSED: 933 case MIGRATION_STATUS_POSTCOPY_RECOVER: 934 case MIGRATION_STATUS_SETUP: 935 case MIGRATION_STATUS_PRE_SWITCHOVER: 936 case MIGRATION_STATUS_DEVICE: 937 case MIGRATION_STATUS_WAIT_UNPLUG: 938 case MIGRATION_STATUS_COLO: 939 return true; 940 941 default: 942 return false; 943 944 } 945 } 946 947 bool migration_is_running(int state) 948 { 949 switch (state) { 950 case MIGRATION_STATUS_ACTIVE: 951 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 952 case MIGRATION_STATUS_POSTCOPY_PAUSED: 953 case MIGRATION_STATUS_POSTCOPY_RECOVER: 954 case MIGRATION_STATUS_SETUP: 955 case MIGRATION_STATUS_PRE_SWITCHOVER: 956 case MIGRATION_STATUS_DEVICE: 957 case MIGRATION_STATUS_WAIT_UNPLUG: 958 case MIGRATION_STATUS_CANCELLING: 959 return true; 960 961 default: 962 return false; 963 964 } 965 } 966 967 static void populate_time_info(MigrationInfo *info, MigrationState *s) 968 { 969 info->has_status = true; 970 info->has_setup_time = true; 971 info->setup_time = s->setup_time; 972 if (s->state == MIGRATION_STATUS_COMPLETED) { 973 info->has_total_time = true; 974 info->total_time = s->total_time; 975 info->has_downtime = true; 976 info->downtime = s->downtime; 977 } else { 978 info->has_total_time = true; 979 info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - 980 s->start_time; 981 info->has_expected_downtime = true; 982 info->expected_downtime = s->expected_downtime; 983 } 984 } 985 986 static void populate_ram_info(MigrationInfo *info, MigrationState *s) 987 { 988 info->has_ram = true; 989 info->ram = g_malloc0(sizeof(*info->ram)); 990 info->ram->transferred = ram_counters.transferred; 991 info->ram->total = ram_bytes_total(); 992 info->ram->duplicate = ram_counters.duplicate; 993 /* legacy value. It is not used anymore */ 994 info->ram->skipped = 0; 995 info->ram->normal = ram_counters.normal; 996 info->ram->normal_bytes = ram_counters.normal * 997 qemu_target_page_size(); 998 info->ram->mbps = s->mbps; 999 info->ram->dirty_sync_count = ram_counters.dirty_sync_count; 1000 info->ram->postcopy_requests = ram_counters.postcopy_requests; 1001 info->ram->page_size = qemu_target_page_size(); 1002 info->ram->multifd_bytes = ram_counters.multifd_bytes; 1003 info->ram->pages_per_second = s->pages_per_second; 1004 1005 if (migrate_use_xbzrle()) { 1006 info->has_xbzrle_cache = true; 1007 info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache)); 1008 info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size(); 1009 info->xbzrle_cache->bytes = xbzrle_counters.bytes; 1010 info->xbzrle_cache->pages = xbzrle_counters.pages; 1011 info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss; 1012 info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate; 1013 info->xbzrle_cache->encoding_rate = xbzrle_counters.encoding_rate; 1014 info->xbzrle_cache->overflow = xbzrle_counters.overflow; 1015 } 1016 1017 if (migrate_use_compression()) { 1018 info->has_compression = true; 1019 info->compression = g_malloc0(sizeof(*info->compression)); 1020 info->compression->pages = compression_counters.pages; 1021 info->compression->busy = compression_counters.busy; 1022 info->compression->busy_rate = compression_counters.busy_rate; 1023 info->compression->compressed_size = 1024 compression_counters.compressed_size; 1025 info->compression->compression_rate = 1026 compression_counters.compression_rate; 1027 } 1028 1029 if (cpu_throttle_active()) { 1030 info->has_cpu_throttle_percentage = true; 1031 info->cpu_throttle_percentage = cpu_throttle_get_percentage(); 1032 } 1033 1034 if (s->state != MIGRATION_STATUS_COMPLETED) { 1035 info->ram->remaining = ram_bytes_remaining(); 1036 info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate; 1037 } 1038 } 1039 1040 static void populate_disk_info(MigrationInfo *info) 1041 { 1042 if (blk_mig_active()) { 1043 info->has_disk = true; 1044 info->disk = g_malloc0(sizeof(*info->disk)); 1045 info->disk->transferred = blk_mig_bytes_transferred(); 1046 info->disk->remaining = blk_mig_bytes_remaining(); 1047 info->disk->total = blk_mig_bytes_total(); 1048 } 1049 } 1050 1051 static void fill_source_migration_info(MigrationInfo *info) 1052 { 1053 MigrationState *s = migrate_get_current(); 1054 GSList *cur_blocker = migration_blockers; 1055 1056 info->blocked_reasons = NULL; 1057 1058 /* 1059 * There are two types of reasons a migration might be blocked; 1060 * a) devices marked in VMState as non-migratable, and 1061 * b) Explicit migration blockers 1062 * We need to add both of them here. 1063 */ 1064 qemu_savevm_non_migratable_list(&info->blocked_reasons); 1065 1066 while (cur_blocker) { 1067 QAPI_LIST_PREPEND(info->blocked_reasons, 1068 g_strdup(error_get_pretty(cur_blocker->data))); 1069 cur_blocker = g_slist_next(cur_blocker); 1070 } 1071 info->has_blocked_reasons = info->blocked_reasons != NULL; 1072 1073 switch (s->state) { 1074 case MIGRATION_STATUS_NONE: 1075 /* no migration has happened ever */ 1076 /* do not overwrite destination migration status */ 1077 return; 1078 case MIGRATION_STATUS_SETUP: 1079 info->has_status = true; 1080 info->has_total_time = false; 1081 break; 1082 case MIGRATION_STATUS_ACTIVE: 1083 case MIGRATION_STATUS_CANCELLING: 1084 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1085 case MIGRATION_STATUS_PRE_SWITCHOVER: 1086 case MIGRATION_STATUS_DEVICE: 1087 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1088 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1089 /* TODO add some postcopy stats */ 1090 populate_time_info(info, s); 1091 populate_ram_info(info, s); 1092 populate_disk_info(info); 1093 populate_vfio_info(info); 1094 break; 1095 case MIGRATION_STATUS_COLO: 1096 info->has_status = true; 1097 /* TODO: display COLO specific information (checkpoint info etc.) */ 1098 break; 1099 case MIGRATION_STATUS_COMPLETED: 1100 populate_time_info(info, s); 1101 populate_ram_info(info, s); 1102 populate_vfio_info(info); 1103 break; 1104 case MIGRATION_STATUS_FAILED: 1105 info->has_status = true; 1106 if (s->error) { 1107 info->has_error_desc = true; 1108 info->error_desc = g_strdup(error_get_pretty(s->error)); 1109 } 1110 break; 1111 case MIGRATION_STATUS_CANCELLED: 1112 info->has_status = true; 1113 break; 1114 case MIGRATION_STATUS_WAIT_UNPLUG: 1115 info->has_status = true; 1116 break; 1117 } 1118 info->status = s->state; 1119 } 1120 1121 typedef enum WriteTrackingSupport { 1122 WT_SUPPORT_UNKNOWN = 0, 1123 WT_SUPPORT_ABSENT, 1124 WT_SUPPORT_AVAILABLE, 1125 WT_SUPPORT_COMPATIBLE 1126 } WriteTrackingSupport; 1127 1128 static 1129 WriteTrackingSupport migrate_query_write_tracking(void) 1130 { 1131 /* Check if kernel supports required UFFD features */ 1132 if (!ram_write_tracking_available()) { 1133 return WT_SUPPORT_ABSENT; 1134 } 1135 /* 1136 * Check if current memory configuration is 1137 * compatible with required UFFD features. 1138 */ 1139 if (!ram_write_tracking_compatible()) { 1140 return WT_SUPPORT_AVAILABLE; 1141 } 1142 1143 return WT_SUPPORT_COMPATIBLE; 1144 } 1145 1146 /** 1147 * @migration_caps_check - check capability validity 1148 * 1149 * @cap_list: old capability list, array of bool 1150 * @params: new capabilities to be applied soon 1151 * @errp: set *errp if the check failed, with reason 1152 * 1153 * Returns true if check passed, otherwise false. 1154 */ 1155 static bool migrate_caps_check(bool *cap_list, 1156 MigrationCapabilityStatusList *params, 1157 Error **errp) 1158 { 1159 MigrationCapabilityStatusList *cap; 1160 bool old_postcopy_cap; 1161 MigrationIncomingState *mis = migration_incoming_get_current(); 1162 1163 old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]; 1164 1165 for (cap = params; cap; cap = cap->next) { 1166 cap_list[cap->value->capability] = cap->value->state; 1167 } 1168 1169 #ifndef CONFIG_LIVE_BLOCK_MIGRATION 1170 if (cap_list[MIGRATION_CAPABILITY_BLOCK]) { 1171 error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " 1172 "block migration"); 1173 error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); 1174 return false; 1175 } 1176 #endif 1177 1178 #ifndef CONFIG_REPLICATION 1179 if (cap_list[MIGRATION_CAPABILITY_X_COLO]) { 1180 error_setg(errp, "QEMU compiled without replication module" 1181 " can't enable COLO"); 1182 error_append_hint(errp, "Please enable replication before COLO.\n"); 1183 return false; 1184 } 1185 #endif 1186 1187 if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { 1188 /* This check is reasonably expensive, so only when it's being 1189 * set the first time, also it's only the destination that needs 1190 * special support. 1191 */ 1192 if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) && 1193 !postcopy_ram_supported_by_host(mis)) { 1194 /* postcopy_ram_supported_by_host will have emitted a more 1195 * detailed message 1196 */ 1197 error_setg(errp, "Postcopy is not supported"); 1198 return false; 1199 } 1200 1201 if (cap_list[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { 1202 error_setg(errp, "Postcopy is not compatible with ignore-shared"); 1203 return false; 1204 } 1205 } 1206 1207 if (cap_list[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { 1208 WriteTrackingSupport wt_support; 1209 int idx; 1210 /* 1211 * Check if 'background-snapshot' capability is supported by 1212 * host kernel and compatible with guest memory configuration. 1213 */ 1214 wt_support = migrate_query_write_tracking(); 1215 if (wt_support < WT_SUPPORT_AVAILABLE) { 1216 error_setg(errp, "Background-snapshot is not supported by host kernel"); 1217 return false; 1218 } 1219 if (wt_support < WT_SUPPORT_COMPATIBLE) { 1220 error_setg(errp, "Background-snapshot is not compatible " 1221 "with guest memory configuration"); 1222 return false; 1223 } 1224 1225 /* 1226 * Check if there are any migration capabilities 1227 * incompatible with 'background-snapshot'. 1228 */ 1229 for (idx = 0; idx < check_caps_background_snapshot.size; idx++) { 1230 int incomp_cap = check_caps_background_snapshot.caps[idx]; 1231 if (cap_list[incomp_cap]) { 1232 error_setg(errp, 1233 "Background-snapshot is not compatible with %s", 1234 MigrationCapability_str(incomp_cap)); 1235 return false; 1236 } 1237 } 1238 } 1239 1240 /* incoming side only */ 1241 if (runstate_check(RUN_STATE_INMIGRATE) && 1242 !migrate_multifd_is_allowed() && 1243 cap_list[MIGRATION_CAPABILITY_MULTIFD]) { 1244 error_setg(errp, "multifd is not supported by current protocol"); 1245 return false; 1246 } 1247 1248 return true; 1249 } 1250 1251 static void fill_destination_migration_info(MigrationInfo *info) 1252 { 1253 MigrationIncomingState *mis = migration_incoming_get_current(); 1254 1255 if (mis->socket_address_list) { 1256 info->has_socket_address = true; 1257 info->socket_address = 1258 QAPI_CLONE(SocketAddressList, mis->socket_address_list); 1259 } 1260 1261 switch (mis->state) { 1262 case MIGRATION_STATUS_NONE: 1263 return; 1264 case MIGRATION_STATUS_SETUP: 1265 case MIGRATION_STATUS_CANCELLING: 1266 case MIGRATION_STATUS_CANCELLED: 1267 case MIGRATION_STATUS_ACTIVE: 1268 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1269 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1270 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1271 case MIGRATION_STATUS_FAILED: 1272 case MIGRATION_STATUS_COLO: 1273 info->has_status = true; 1274 break; 1275 case MIGRATION_STATUS_COMPLETED: 1276 info->has_status = true; 1277 fill_destination_postcopy_migration_info(info); 1278 break; 1279 } 1280 info->status = mis->state; 1281 } 1282 1283 MigrationInfo *qmp_query_migrate(Error **errp) 1284 { 1285 MigrationInfo *info = g_malloc0(sizeof(*info)); 1286 1287 fill_destination_migration_info(info); 1288 fill_source_migration_info(info); 1289 1290 return info; 1291 } 1292 1293 void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, 1294 Error **errp) 1295 { 1296 MigrationState *s = migrate_get_current(); 1297 MigrationCapabilityStatusList *cap; 1298 bool cap_list[MIGRATION_CAPABILITY__MAX]; 1299 1300 if (migration_is_running(s->state)) { 1301 error_setg(errp, QERR_MIGRATION_ACTIVE); 1302 return; 1303 } 1304 1305 memcpy(cap_list, s->enabled_capabilities, sizeof(cap_list)); 1306 if (!migrate_caps_check(cap_list, params, errp)) { 1307 return; 1308 } 1309 1310 for (cap = params; cap; cap = cap->next) { 1311 s->enabled_capabilities[cap->value->capability] = cap->value->state; 1312 } 1313 } 1314 1315 /* 1316 * Check whether the parameters are valid. Error will be put into errp 1317 * (if provided). Return true if valid, otherwise false. 1318 */ 1319 static bool migrate_params_check(MigrationParameters *params, Error **errp) 1320 { 1321 if (params->has_compress_level && 1322 (params->compress_level > 9)) { 1323 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", 1324 "a value between 0 and 9"); 1325 return false; 1326 } 1327 1328 if (params->has_compress_threads && (params->compress_threads < 1)) { 1329 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1330 "compress_threads", 1331 "a value between 1 and 255"); 1332 return false; 1333 } 1334 1335 if (params->has_decompress_threads && (params->decompress_threads < 1)) { 1336 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1337 "decompress_threads", 1338 "a value between 1 and 255"); 1339 return false; 1340 } 1341 1342 if (params->has_throttle_trigger_threshold && 1343 (params->throttle_trigger_threshold < 1 || 1344 params->throttle_trigger_threshold > 100)) { 1345 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1346 "throttle_trigger_threshold", 1347 "an integer in the range of 1 to 100"); 1348 return false; 1349 } 1350 1351 if (params->has_cpu_throttle_initial && 1352 (params->cpu_throttle_initial < 1 || 1353 params->cpu_throttle_initial > 99)) { 1354 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1355 "cpu_throttle_initial", 1356 "an integer in the range of 1 to 99"); 1357 return false; 1358 } 1359 1360 if (params->has_cpu_throttle_increment && 1361 (params->cpu_throttle_increment < 1 || 1362 params->cpu_throttle_increment > 99)) { 1363 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1364 "cpu_throttle_increment", 1365 "an integer in the range of 1 to 99"); 1366 return false; 1367 } 1368 1369 if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) { 1370 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1371 "max_bandwidth", 1372 "an integer in the range of 0 to "stringify(SIZE_MAX) 1373 " bytes/second"); 1374 return false; 1375 } 1376 1377 if (params->has_downtime_limit && 1378 (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { 1379 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1380 "downtime_limit", 1381 "an integer in the range of 0 to " 1382 stringify(MAX_MIGRATE_DOWNTIME)" ms"); 1383 return false; 1384 } 1385 1386 /* x_checkpoint_delay is now always positive */ 1387 1388 if (params->has_multifd_channels && (params->multifd_channels < 1)) { 1389 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1390 "multifd_channels", 1391 "a value between 1 and 255"); 1392 return false; 1393 } 1394 1395 if (params->has_multifd_zlib_level && 1396 (params->multifd_zlib_level > 9)) { 1397 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level", 1398 "a value between 0 and 9"); 1399 return false; 1400 } 1401 1402 if (params->has_multifd_zstd_level && 1403 (params->multifd_zstd_level > 20)) { 1404 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level", 1405 "a value between 0 and 20"); 1406 return false; 1407 } 1408 1409 if (params->has_xbzrle_cache_size && 1410 (params->xbzrle_cache_size < qemu_target_page_size() || 1411 !is_power_of_2(params->xbzrle_cache_size))) { 1412 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1413 "xbzrle_cache_size", 1414 "a power of two no less than the target page size"); 1415 return false; 1416 } 1417 1418 if (params->has_max_cpu_throttle && 1419 (params->max_cpu_throttle < params->cpu_throttle_initial || 1420 params->max_cpu_throttle > 99)) { 1421 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1422 "max_cpu_throttle", 1423 "an integer in the range of cpu_throttle_initial to 99"); 1424 return false; 1425 } 1426 1427 if (params->has_announce_initial && 1428 params->announce_initial > 100000) { 1429 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1430 "announce_initial", 1431 "a value between 0 and 100000"); 1432 return false; 1433 } 1434 if (params->has_announce_max && 1435 params->announce_max > 100000) { 1436 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1437 "announce_max", 1438 "a value between 0 and 100000"); 1439 return false; 1440 } 1441 if (params->has_announce_rounds && 1442 params->announce_rounds > 1000) { 1443 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1444 "announce_rounds", 1445 "a value between 0 and 1000"); 1446 return false; 1447 } 1448 if (params->has_announce_step && 1449 (params->announce_step < 1 || 1450 params->announce_step > 10000)) { 1451 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1452 "announce_step", 1453 "a value between 0 and 10000"); 1454 return false; 1455 } 1456 1457 if (params->has_block_bitmap_mapping && 1458 !check_dirty_bitmap_mig_alias_map(params->block_bitmap_mapping, errp)) { 1459 error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); 1460 return false; 1461 } 1462 1463 return true; 1464 } 1465 1466 static void migrate_params_test_apply(MigrateSetParameters *params, 1467 MigrationParameters *dest) 1468 { 1469 *dest = migrate_get_current()->parameters; 1470 1471 /* TODO use QAPI_CLONE() instead of duplicating it inline */ 1472 1473 if (params->has_compress_level) { 1474 dest->compress_level = params->compress_level; 1475 } 1476 1477 if (params->has_compress_threads) { 1478 dest->compress_threads = params->compress_threads; 1479 } 1480 1481 if (params->has_compress_wait_thread) { 1482 dest->compress_wait_thread = params->compress_wait_thread; 1483 } 1484 1485 if (params->has_decompress_threads) { 1486 dest->decompress_threads = params->decompress_threads; 1487 } 1488 1489 if (params->has_throttle_trigger_threshold) { 1490 dest->throttle_trigger_threshold = params->throttle_trigger_threshold; 1491 } 1492 1493 if (params->has_cpu_throttle_initial) { 1494 dest->cpu_throttle_initial = params->cpu_throttle_initial; 1495 } 1496 1497 if (params->has_cpu_throttle_increment) { 1498 dest->cpu_throttle_increment = params->cpu_throttle_increment; 1499 } 1500 1501 if (params->has_cpu_throttle_tailslow) { 1502 dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow; 1503 } 1504 1505 if (params->has_tls_creds) { 1506 assert(params->tls_creds->type == QTYPE_QSTRING); 1507 dest->tls_creds = params->tls_creds->u.s; 1508 } 1509 1510 if (params->has_tls_hostname) { 1511 assert(params->tls_hostname->type == QTYPE_QSTRING); 1512 dest->tls_hostname = params->tls_hostname->u.s; 1513 } 1514 1515 if (params->has_max_bandwidth) { 1516 dest->max_bandwidth = params->max_bandwidth; 1517 } 1518 1519 if (params->has_downtime_limit) { 1520 dest->downtime_limit = params->downtime_limit; 1521 } 1522 1523 if (params->has_x_checkpoint_delay) { 1524 dest->x_checkpoint_delay = params->x_checkpoint_delay; 1525 } 1526 1527 if (params->has_block_incremental) { 1528 dest->block_incremental = params->block_incremental; 1529 } 1530 if (params->has_multifd_channels) { 1531 dest->multifd_channels = params->multifd_channels; 1532 } 1533 if (params->has_multifd_compression) { 1534 dest->multifd_compression = params->multifd_compression; 1535 } 1536 if (params->has_xbzrle_cache_size) { 1537 dest->xbzrle_cache_size = params->xbzrle_cache_size; 1538 } 1539 if (params->has_max_postcopy_bandwidth) { 1540 dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth; 1541 } 1542 if (params->has_max_cpu_throttle) { 1543 dest->max_cpu_throttle = params->max_cpu_throttle; 1544 } 1545 if (params->has_announce_initial) { 1546 dest->announce_initial = params->announce_initial; 1547 } 1548 if (params->has_announce_max) { 1549 dest->announce_max = params->announce_max; 1550 } 1551 if (params->has_announce_rounds) { 1552 dest->announce_rounds = params->announce_rounds; 1553 } 1554 if (params->has_announce_step) { 1555 dest->announce_step = params->announce_step; 1556 } 1557 1558 if (params->has_block_bitmap_mapping) { 1559 dest->has_block_bitmap_mapping = true; 1560 dest->block_bitmap_mapping = params->block_bitmap_mapping; 1561 } 1562 } 1563 1564 static void migrate_params_apply(MigrateSetParameters *params, Error **errp) 1565 { 1566 MigrationState *s = migrate_get_current(); 1567 1568 /* TODO use QAPI_CLONE() instead of duplicating it inline */ 1569 1570 if (params->has_compress_level) { 1571 s->parameters.compress_level = params->compress_level; 1572 } 1573 1574 if (params->has_compress_threads) { 1575 s->parameters.compress_threads = params->compress_threads; 1576 } 1577 1578 if (params->has_compress_wait_thread) { 1579 s->parameters.compress_wait_thread = params->compress_wait_thread; 1580 } 1581 1582 if (params->has_decompress_threads) { 1583 s->parameters.decompress_threads = params->decompress_threads; 1584 } 1585 1586 if (params->has_throttle_trigger_threshold) { 1587 s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold; 1588 } 1589 1590 if (params->has_cpu_throttle_initial) { 1591 s->parameters.cpu_throttle_initial = params->cpu_throttle_initial; 1592 } 1593 1594 if (params->has_cpu_throttle_increment) { 1595 s->parameters.cpu_throttle_increment = params->cpu_throttle_increment; 1596 } 1597 1598 if (params->has_cpu_throttle_tailslow) { 1599 s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow; 1600 } 1601 1602 if (params->has_tls_creds) { 1603 g_free(s->parameters.tls_creds); 1604 assert(params->tls_creds->type == QTYPE_QSTRING); 1605 s->parameters.tls_creds = g_strdup(params->tls_creds->u.s); 1606 } 1607 1608 if (params->has_tls_hostname) { 1609 g_free(s->parameters.tls_hostname); 1610 assert(params->tls_hostname->type == QTYPE_QSTRING); 1611 s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s); 1612 } 1613 1614 if (params->has_tls_authz) { 1615 g_free(s->parameters.tls_authz); 1616 assert(params->tls_authz->type == QTYPE_QSTRING); 1617 s->parameters.tls_authz = g_strdup(params->tls_authz->u.s); 1618 } 1619 1620 if (params->has_max_bandwidth) { 1621 s->parameters.max_bandwidth = params->max_bandwidth; 1622 if (s->to_dst_file && !migration_in_postcopy()) { 1623 qemu_file_set_rate_limit(s->to_dst_file, 1624 s->parameters.max_bandwidth / XFER_LIMIT_RATIO); 1625 } 1626 } 1627 1628 if (params->has_downtime_limit) { 1629 s->parameters.downtime_limit = params->downtime_limit; 1630 } 1631 1632 if (params->has_x_checkpoint_delay) { 1633 s->parameters.x_checkpoint_delay = params->x_checkpoint_delay; 1634 if (migration_in_colo_state()) { 1635 colo_checkpoint_notify(s); 1636 } 1637 } 1638 1639 if (params->has_block_incremental) { 1640 s->parameters.block_incremental = params->block_incremental; 1641 } 1642 if (params->has_multifd_channels) { 1643 s->parameters.multifd_channels = params->multifd_channels; 1644 } 1645 if (params->has_multifd_compression) { 1646 s->parameters.multifd_compression = params->multifd_compression; 1647 } 1648 if (params->has_xbzrle_cache_size) { 1649 s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; 1650 xbzrle_cache_resize(params->xbzrle_cache_size, errp); 1651 } 1652 if (params->has_max_postcopy_bandwidth) { 1653 s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth; 1654 if (s->to_dst_file && migration_in_postcopy()) { 1655 qemu_file_set_rate_limit(s->to_dst_file, 1656 s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO); 1657 } 1658 } 1659 if (params->has_max_cpu_throttle) { 1660 s->parameters.max_cpu_throttle = params->max_cpu_throttle; 1661 } 1662 if (params->has_announce_initial) { 1663 s->parameters.announce_initial = params->announce_initial; 1664 } 1665 if (params->has_announce_max) { 1666 s->parameters.announce_max = params->announce_max; 1667 } 1668 if (params->has_announce_rounds) { 1669 s->parameters.announce_rounds = params->announce_rounds; 1670 } 1671 if (params->has_announce_step) { 1672 s->parameters.announce_step = params->announce_step; 1673 } 1674 1675 if (params->has_block_bitmap_mapping) { 1676 qapi_free_BitmapMigrationNodeAliasList( 1677 s->parameters.block_bitmap_mapping); 1678 1679 s->parameters.has_block_bitmap_mapping = true; 1680 s->parameters.block_bitmap_mapping = 1681 QAPI_CLONE(BitmapMigrationNodeAliasList, 1682 params->block_bitmap_mapping); 1683 } 1684 } 1685 1686 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) 1687 { 1688 MigrationParameters tmp; 1689 1690 /* TODO Rewrite "" to null instead */ 1691 if (params->has_tls_creds 1692 && params->tls_creds->type == QTYPE_QNULL) { 1693 qobject_unref(params->tls_creds->u.n); 1694 params->tls_creds->type = QTYPE_QSTRING; 1695 params->tls_creds->u.s = strdup(""); 1696 } 1697 /* TODO Rewrite "" to null instead */ 1698 if (params->has_tls_hostname 1699 && params->tls_hostname->type == QTYPE_QNULL) { 1700 qobject_unref(params->tls_hostname->u.n); 1701 params->tls_hostname->type = QTYPE_QSTRING; 1702 params->tls_hostname->u.s = strdup(""); 1703 } 1704 1705 migrate_params_test_apply(params, &tmp); 1706 1707 if (!migrate_params_check(&tmp, errp)) { 1708 /* Invalid parameter */ 1709 return; 1710 } 1711 1712 migrate_params_apply(params, errp); 1713 } 1714 1715 1716 void qmp_migrate_start_postcopy(Error **errp) 1717 { 1718 MigrationState *s = migrate_get_current(); 1719 1720 if (!migrate_postcopy()) { 1721 error_setg(errp, "Enable postcopy with migrate_set_capability before" 1722 " the start of migration"); 1723 return; 1724 } 1725 1726 if (s->state == MIGRATION_STATUS_NONE) { 1727 error_setg(errp, "Postcopy must be started after migration has been" 1728 " started"); 1729 return; 1730 } 1731 /* 1732 * we don't error if migration has finished since that would be racy 1733 * with issuing this command. 1734 */ 1735 qatomic_set(&s->start_postcopy, true); 1736 } 1737 1738 /* shared migration helpers */ 1739 1740 void migrate_set_state(int *state, int old_state, int new_state) 1741 { 1742 assert(new_state < MIGRATION_STATUS__MAX); 1743 if (qatomic_cmpxchg(state, old_state, new_state) == old_state) { 1744 trace_migrate_set_state(MigrationStatus_str(new_state)); 1745 migrate_generate_event(new_state); 1746 } 1747 } 1748 1749 static MigrationCapabilityStatus *migrate_cap_add(MigrationCapability index, 1750 bool state) 1751 { 1752 MigrationCapabilityStatus *cap; 1753 1754 cap = g_new0(MigrationCapabilityStatus, 1); 1755 cap->capability = index; 1756 cap->state = state; 1757 1758 return cap; 1759 } 1760 1761 void migrate_set_block_enabled(bool value, Error **errp) 1762 { 1763 MigrationCapabilityStatusList *cap = NULL; 1764 1765 QAPI_LIST_PREPEND(cap, migrate_cap_add(MIGRATION_CAPABILITY_BLOCK, value)); 1766 qmp_migrate_set_capabilities(cap, errp); 1767 qapi_free_MigrationCapabilityStatusList(cap); 1768 } 1769 1770 static void migrate_set_block_incremental(MigrationState *s, bool value) 1771 { 1772 s->parameters.block_incremental = value; 1773 } 1774 1775 static void block_cleanup_parameters(MigrationState *s) 1776 { 1777 if (s->must_remove_block_options) { 1778 /* setting to false can never fail */ 1779 migrate_set_block_enabled(false, &error_abort); 1780 migrate_set_block_incremental(s, false); 1781 s->must_remove_block_options = false; 1782 } 1783 } 1784 1785 static void migrate_fd_cleanup(MigrationState *s) 1786 { 1787 qemu_bh_delete(s->cleanup_bh); 1788 s->cleanup_bh = NULL; 1789 1790 qemu_savevm_state_cleanup(); 1791 1792 if (s->to_dst_file) { 1793 QEMUFile *tmp; 1794 1795 trace_migrate_fd_cleanup(); 1796 qemu_mutex_unlock_iothread(); 1797 if (s->migration_thread_running) { 1798 qemu_thread_join(&s->thread); 1799 s->migration_thread_running = false; 1800 } 1801 qemu_mutex_lock_iothread(); 1802 1803 multifd_save_cleanup(); 1804 qemu_mutex_lock(&s->qemu_file_lock); 1805 tmp = s->to_dst_file; 1806 s->to_dst_file = NULL; 1807 qemu_mutex_unlock(&s->qemu_file_lock); 1808 /* 1809 * Close the file handle without the lock to make sure the 1810 * critical section won't block for long. 1811 */ 1812 migration_ioc_unregister_yank_from_file(tmp); 1813 qemu_fclose(tmp); 1814 } 1815 1816 assert(!migration_is_active(s)); 1817 1818 if (s->state == MIGRATION_STATUS_CANCELLING) { 1819 migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING, 1820 MIGRATION_STATUS_CANCELLED); 1821 } 1822 1823 if (s->error) { 1824 /* It is used on info migrate. We can't free it */ 1825 error_report_err(error_copy(s->error)); 1826 } 1827 notifier_list_notify(&migration_state_notifiers, s); 1828 block_cleanup_parameters(s); 1829 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 1830 } 1831 1832 static void migrate_fd_cleanup_schedule(MigrationState *s) 1833 { 1834 /* 1835 * Ref the state for bh, because it may be called when 1836 * there're already no other refs 1837 */ 1838 object_ref(OBJECT(s)); 1839 qemu_bh_schedule(s->cleanup_bh); 1840 } 1841 1842 static void migrate_fd_cleanup_bh(void *opaque) 1843 { 1844 MigrationState *s = opaque; 1845 migrate_fd_cleanup(s); 1846 object_unref(OBJECT(s)); 1847 } 1848 1849 void migrate_set_error(MigrationState *s, const Error *error) 1850 { 1851 QEMU_LOCK_GUARD(&s->error_mutex); 1852 if (!s->error) { 1853 s->error = error_copy(error); 1854 } 1855 } 1856 1857 static void migrate_error_free(MigrationState *s) 1858 { 1859 QEMU_LOCK_GUARD(&s->error_mutex); 1860 if (s->error) { 1861 error_free(s->error); 1862 s->error = NULL; 1863 } 1864 } 1865 1866 void migrate_fd_error(MigrationState *s, const Error *error) 1867 { 1868 trace_migrate_fd_error(error_get_pretty(error)); 1869 assert(s->to_dst_file == NULL); 1870 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 1871 MIGRATION_STATUS_FAILED); 1872 migrate_set_error(s, error); 1873 } 1874 1875 static void migrate_fd_cancel(MigrationState *s) 1876 { 1877 int old_state ; 1878 QEMUFile *f = migrate_get_current()->to_dst_file; 1879 trace_migrate_fd_cancel(); 1880 1881 WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) { 1882 if (s->rp_state.from_dst_file) { 1883 /* shutdown the rp socket, so causing the rp thread to shutdown */ 1884 qemu_file_shutdown(s->rp_state.from_dst_file); 1885 } 1886 } 1887 1888 do { 1889 old_state = s->state; 1890 if (!migration_is_running(old_state)) { 1891 break; 1892 } 1893 /* If the migration is paused, kick it out of the pause */ 1894 if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) { 1895 qemu_sem_post(&s->pause_sem); 1896 } 1897 migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING); 1898 } while (s->state != MIGRATION_STATUS_CANCELLING); 1899 1900 /* 1901 * If we're unlucky the migration code might be stuck somewhere in a 1902 * send/write while the network has failed and is waiting to timeout; 1903 * if we've got shutdown(2) available then we can force it to quit. 1904 * The outgoing qemu file gets closed in migrate_fd_cleanup that is 1905 * called in a bh, so there is no race against this cancel. 1906 */ 1907 if (s->state == MIGRATION_STATUS_CANCELLING && f) { 1908 qemu_file_shutdown(f); 1909 } 1910 if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) { 1911 Error *local_err = NULL; 1912 1913 bdrv_invalidate_cache_all(&local_err); 1914 if (local_err) { 1915 error_report_err(local_err); 1916 } else { 1917 s->block_inactive = false; 1918 } 1919 } 1920 } 1921 1922 void add_migration_state_change_notifier(Notifier *notify) 1923 { 1924 notifier_list_add(&migration_state_notifiers, notify); 1925 } 1926 1927 void remove_migration_state_change_notifier(Notifier *notify) 1928 { 1929 notifier_remove(notify); 1930 } 1931 1932 bool migration_in_setup(MigrationState *s) 1933 { 1934 return s->state == MIGRATION_STATUS_SETUP; 1935 } 1936 1937 bool migration_has_finished(MigrationState *s) 1938 { 1939 return s->state == MIGRATION_STATUS_COMPLETED; 1940 } 1941 1942 bool migration_has_failed(MigrationState *s) 1943 { 1944 return (s->state == MIGRATION_STATUS_CANCELLED || 1945 s->state == MIGRATION_STATUS_FAILED); 1946 } 1947 1948 bool migration_in_postcopy(void) 1949 { 1950 MigrationState *s = migrate_get_current(); 1951 1952 switch (s->state) { 1953 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1954 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1955 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1956 return true; 1957 default: 1958 return false; 1959 } 1960 } 1961 1962 bool migration_in_postcopy_after_devices(MigrationState *s) 1963 { 1964 return migration_in_postcopy() && s->postcopy_after_devices; 1965 } 1966 1967 bool migration_in_incoming_postcopy(void) 1968 { 1969 PostcopyState ps = postcopy_state_get(); 1970 1971 return ps >= POSTCOPY_INCOMING_DISCARD && ps < POSTCOPY_INCOMING_END; 1972 } 1973 1974 bool migration_in_bg_snapshot(void) 1975 { 1976 MigrationState *s = migrate_get_current(); 1977 1978 return migrate_background_snapshot() && 1979 migration_is_setup_or_active(s->state); 1980 } 1981 1982 bool migration_is_idle(void) 1983 { 1984 MigrationState *s = current_migration; 1985 1986 if (!s) { 1987 return true; 1988 } 1989 1990 switch (s->state) { 1991 case MIGRATION_STATUS_NONE: 1992 case MIGRATION_STATUS_CANCELLED: 1993 case MIGRATION_STATUS_COMPLETED: 1994 case MIGRATION_STATUS_FAILED: 1995 return true; 1996 case MIGRATION_STATUS_SETUP: 1997 case MIGRATION_STATUS_CANCELLING: 1998 case MIGRATION_STATUS_ACTIVE: 1999 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 2000 case MIGRATION_STATUS_COLO: 2001 case MIGRATION_STATUS_PRE_SWITCHOVER: 2002 case MIGRATION_STATUS_DEVICE: 2003 case MIGRATION_STATUS_WAIT_UNPLUG: 2004 return false; 2005 case MIGRATION_STATUS__MAX: 2006 g_assert_not_reached(); 2007 } 2008 2009 return false; 2010 } 2011 2012 bool migration_is_active(MigrationState *s) 2013 { 2014 return (s->state == MIGRATION_STATUS_ACTIVE || 2015 s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); 2016 } 2017 2018 void migrate_init(MigrationState *s) 2019 { 2020 /* 2021 * Reinitialise all migration state, except 2022 * parameters/capabilities that the user set, and 2023 * locks. 2024 */ 2025 s->cleanup_bh = 0; 2026 s->vm_start_bh = 0; 2027 s->to_dst_file = NULL; 2028 s->state = MIGRATION_STATUS_NONE; 2029 s->rp_state.from_dst_file = NULL; 2030 s->rp_state.error = false; 2031 s->mbps = 0.0; 2032 s->pages_per_second = 0.0; 2033 s->downtime = 0; 2034 s->expected_downtime = 0; 2035 s->setup_time = 0; 2036 s->start_postcopy = false; 2037 s->postcopy_after_devices = false; 2038 s->migration_thread_running = false; 2039 error_free(s->error); 2040 s->error = NULL; 2041 s->hostname = NULL; 2042 2043 migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); 2044 2045 s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 2046 s->total_time = 0; 2047 s->vm_was_running = false; 2048 s->iteration_initial_bytes = 0; 2049 s->threshold_size = 0; 2050 } 2051 2052 int migrate_add_blocker(Error *reason, Error **errp) 2053 { 2054 if (only_migratable) { 2055 error_propagate_prepend(errp, error_copy(reason), 2056 "disallowing migration blocker " 2057 "(--only-migratable) for: "); 2058 return -EACCES; 2059 } 2060 2061 if (migration_is_idle()) { 2062 migration_blockers = g_slist_prepend(migration_blockers, reason); 2063 return 0; 2064 } 2065 2066 error_propagate_prepend(errp, error_copy(reason), 2067 "disallowing migration blocker " 2068 "(migration in progress) for: "); 2069 return -EBUSY; 2070 } 2071 2072 void migrate_del_blocker(Error *reason) 2073 { 2074 migration_blockers = g_slist_remove(migration_blockers, reason); 2075 } 2076 2077 void qmp_migrate_incoming(const char *uri, Error **errp) 2078 { 2079 Error *local_err = NULL; 2080 static bool once = true; 2081 2082 if (!once) { 2083 error_setg(errp, "The incoming migration has already been started"); 2084 return; 2085 } 2086 if (!runstate_check(RUN_STATE_INMIGRATE)) { 2087 error_setg(errp, "'-incoming' was not specified on the command line"); 2088 return; 2089 } 2090 2091 if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) { 2092 return; 2093 } 2094 2095 qemu_start_incoming_migration(uri, &local_err); 2096 2097 if (local_err) { 2098 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 2099 error_propagate(errp, local_err); 2100 return; 2101 } 2102 2103 once = false; 2104 } 2105 2106 void qmp_migrate_recover(const char *uri, Error **errp) 2107 { 2108 MigrationIncomingState *mis = migration_incoming_get_current(); 2109 2110 /* 2111 * Don't even bother to use ERRP_GUARD() as it _must_ always be set by 2112 * callers (no one should ignore a recover failure); if there is, it's a 2113 * programming error. 2114 */ 2115 assert(errp); 2116 2117 if (mis->state != MIGRATION_STATUS_POSTCOPY_PAUSED) { 2118 error_setg(errp, "Migrate recover can only be run " 2119 "when postcopy is paused."); 2120 return; 2121 } 2122 2123 if (qatomic_cmpxchg(&mis->postcopy_recover_triggered, 2124 false, true) == true) { 2125 error_setg(errp, "Migrate recovery is triggered already"); 2126 return; 2127 } 2128 2129 /* 2130 * Note that this call will never start a real migration; it will 2131 * only re-setup the migration stream and poke existing migration 2132 * to continue using that newly established channel. 2133 */ 2134 qemu_start_incoming_migration(uri, errp); 2135 2136 /* Safe to dereference with the assert above */ 2137 if (*errp) { 2138 /* Reset the flag so user could still retry */ 2139 qatomic_set(&mis->postcopy_recover_triggered, false); 2140 } 2141 } 2142 2143 void qmp_migrate_pause(Error **errp) 2144 { 2145 MigrationState *ms = migrate_get_current(); 2146 MigrationIncomingState *mis = migration_incoming_get_current(); 2147 int ret; 2148 2149 if (ms->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 2150 /* Source side, during postcopy */ 2151 qemu_mutex_lock(&ms->qemu_file_lock); 2152 ret = qemu_file_shutdown(ms->to_dst_file); 2153 qemu_mutex_unlock(&ms->qemu_file_lock); 2154 if (ret) { 2155 error_setg(errp, "Failed to pause source migration"); 2156 } 2157 return; 2158 } 2159 2160 if (mis->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 2161 ret = qemu_file_shutdown(mis->from_src_file); 2162 if (ret) { 2163 error_setg(errp, "Failed to pause destination migration"); 2164 } 2165 return; 2166 } 2167 2168 error_setg(errp, "migrate-pause is currently only supported " 2169 "during postcopy-active state"); 2170 } 2171 2172 bool migration_is_blocked(Error **errp) 2173 { 2174 if (qemu_savevm_state_blocked(errp)) { 2175 return true; 2176 } 2177 2178 if (migration_blockers) { 2179 error_propagate(errp, error_copy(migration_blockers->data)); 2180 return true; 2181 } 2182 2183 return false; 2184 } 2185 2186 /* Returns true if continue to migrate, or false if error detected */ 2187 static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, 2188 bool resume, Error **errp) 2189 { 2190 Error *local_err = NULL; 2191 2192 if (resume) { 2193 if (s->state != MIGRATION_STATUS_POSTCOPY_PAUSED) { 2194 error_setg(errp, "Cannot resume if there is no " 2195 "paused migration"); 2196 return false; 2197 } 2198 2199 /* 2200 * Postcopy recovery won't work well with release-ram 2201 * capability since release-ram will drop the page buffer as 2202 * long as the page is put into the send buffer. So if there 2203 * is a network failure happened, any page buffers that have 2204 * not yet reached the destination VM but have already been 2205 * sent from the source VM will be lost forever. Let's refuse 2206 * the client from resuming such a postcopy migration. 2207 * Luckily release-ram was designed to only be used when src 2208 * and destination VMs are on the same host, so it should be 2209 * fine. 2210 */ 2211 if (migrate_release_ram()) { 2212 error_setg(errp, "Postcopy recovery cannot work " 2213 "when release-ram capability is set"); 2214 return false; 2215 } 2216 2217 /* This is a resume, skip init status */ 2218 return true; 2219 } 2220 2221 if (migration_is_running(s->state)) { 2222 error_setg(errp, QERR_MIGRATION_ACTIVE); 2223 return false; 2224 } 2225 2226 if (runstate_check(RUN_STATE_INMIGRATE)) { 2227 error_setg(errp, "Guest is waiting for an incoming migration"); 2228 return false; 2229 } 2230 2231 if (runstate_check(RUN_STATE_POSTMIGRATE)) { 2232 error_setg(errp, "Can't migrate the vm that was paused due to " 2233 "previous migration"); 2234 return false; 2235 } 2236 2237 if (migration_is_blocked(errp)) { 2238 return false; 2239 } 2240 2241 if (blk || blk_inc) { 2242 if (migrate_colo_enabled()) { 2243 error_setg(errp, "No disk migration is required in COLO mode"); 2244 return false; 2245 } 2246 if (migrate_use_block() || migrate_use_block_incremental()) { 2247 error_setg(errp, "Command options are incompatible with " 2248 "current migration capabilities"); 2249 return false; 2250 } 2251 migrate_set_block_enabled(true, &local_err); 2252 if (local_err) { 2253 error_propagate(errp, local_err); 2254 return false; 2255 } 2256 s->must_remove_block_options = true; 2257 } 2258 2259 if (blk_inc) { 2260 migrate_set_block_incremental(s, true); 2261 } 2262 2263 migrate_init(s); 2264 /* 2265 * set ram_counters memory to zero for a 2266 * new migration 2267 */ 2268 memset(&ram_counters, 0, sizeof(ram_counters)); 2269 2270 return true; 2271 } 2272 2273 void qmp_migrate(const char *uri, bool has_blk, bool blk, 2274 bool has_inc, bool inc, bool has_detach, bool detach, 2275 bool has_resume, bool resume, Error **errp) 2276 { 2277 Error *local_err = NULL; 2278 MigrationState *s = migrate_get_current(); 2279 const char *p = NULL; 2280 2281 if (!migrate_prepare(s, has_blk && blk, has_inc && inc, 2282 has_resume && resume, errp)) { 2283 /* Error detected, put into errp */ 2284 return; 2285 } 2286 2287 if (!(has_resume && resume)) { 2288 if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) { 2289 return; 2290 } 2291 } 2292 2293 migrate_protocol_allow_multifd(false); 2294 if (strstart(uri, "tcp:", &p) || 2295 strstart(uri, "unix:", NULL) || 2296 strstart(uri, "vsock:", NULL)) { 2297 migrate_protocol_allow_multifd(true); 2298 socket_start_outgoing_migration(s, p ? p : uri, &local_err); 2299 #ifdef CONFIG_RDMA 2300 } else if (strstart(uri, "rdma:", &p)) { 2301 rdma_start_outgoing_migration(s, p, &local_err); 2302 #endif 2303 } else if (strstart(uri, "exec:", &p)) { 2304 exec_start_outgoing_migration(s, p, &local_err); 2305 } else if (strstart(uri, "fd:", &p)) { 2306 fd_start_outgoing_migration(s, p, &local_err); 2307 } else { 2308 if (!(has_resume && resume)) { 2309 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 2310 } 2311 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri", 2312 "a valid migration protocol"); 2313 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 2314 MIGRATION_STATUS_FAILED); 2315 block_cleanup_parameters(s); 2316 return; 2317 } 2318 2319 if (local_err) { 2320 if (!(has_resume && resume)) { 2321 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 2322 } 2323 migrate_fd_error(s, local_err); 2324 error_propagate(errp, local_err); 2325 return; 2326 } 2327 } 2328 2329 void qmp_migrate_cancel(Error **errp) 2330 { 2331 migration_cancel(); 2332 } 2333 2334 void qmp_migrate_continue(MigrationStatus state, Error **errp) 2335 { 2336 MigrationState *s = migrate_get_current(); 2337 if (s->state != state) { 2338 error_setg(errp, "Migration not in expected state: %s", 2339 MigrationStatus_str(s->state)); 2340 return; 2341 } 2342 qemu_sem_post(&s->pause_sem); 2343 } 2344 2345 bool migrate_release_ram(void) 2346 { 2347 MigrationState *s; 2348 2349 s = migrate_get_current(); 2350 2351 return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; 2352 } 2353 2354 bool migrate_postcopy_ram(void) 2355 { 2356 MigrationState *s; 2357 2358 s = migrate_get_current(); 2359 2360 return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; 2361 } 2362 2363 bool migrate_postcopy(void) 2364 { 2365 return migrate_postcopy_ram() || migrate_dirty_bitmaps(); 2366 } 2367 2368 bool migrate_auto_converge(void) 2369 { 2370 MigrationState *s; 2371 2372 s = migrate_get_current(); 2373 2374 return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; 2375 } 2376 2377 bool migrate_zero_blocks(void) 2378 { 2379 MigrationState *s; 2380 2381 s = migrate_get_current(); 2382 2383 return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; 2384 } 2385 2386 bool migrate_postcopy_blocktime(void) 2387 { 2388 MigrationState *s; 2389 2390 s = migrate_get_current(); 2391 2392 return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; 2393 } 2394 2395 bool migrate_use_compression(void) 2396 { 2397 MigrationState *s; 2398 2399 s = migrate_get_current(); 2400 2401 return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS]; 2402 } 2403 2404 int migrate_compress_level(void) 2405 { 2406 MigrationState *s; 2407 2408 s = migrate_get_current(); 2409 2410 return s->parameters.compress_level; 2411 } 2412 2413 int migrate_compress_threads(void) 2414 { 2415 MigrationState *s; 2416 2417 s = migrate_get_current(); 2418 2419 return s->parameters.compress_threads; 2420 } 2421 2422 int migrate_compress_wait_thread(void) 2423 { 2424 MigrationState *s; 2425 2426 s = migrate_get_current(); 2427 2428 return s->parameters.compress_wait_thread; 2429 } 2430 2431 int migrate_decompress_threads(void) 2432 { 2433 MigrationState *s; 2434 2435 s = migrate_get_current(); 2436 2437 return s->parameters.decompress_threads; 2438 } 2439 2440 bool migrate_dirty_bitmaps(void) 2441 { 2442 MigrationState *s; 2443 2444 s = migrate_get_current(); 2445 2446 return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; 2447 } 2448 2449 bool migrate_ignore_shared(void) 2450 { 2451 MigrationState *s; 2452 2453 s = migrate_get_current(); 2454 2455 return s->enabled_capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; 2456 } 2457 2458 bool migrate_validate_uuid(void) 2459 { 2460 MigrationState *s; 2461 2462 s = migrate_get_current(); 2463 2464 return s->enabled_capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; 2465 } 2466 2467 bool migrate_use_events(void) 2468 { 2469 MigrationState *s; 2470 2471 s = migrate_get_current(); 2472 2473 return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS]; 2474 } 2475 2476 bool migrate_use_multifd(void) 2477 { 2478 MigrationState *s; 2479 2480 s = migrate_get_current(); 2481 2482 return s->enabled_capabilities[MIGRATION_CAPABILITY_MULTIFD]; 2483 } 2484 2485 bool migrate_pause_before_switchover(void) 2486 { 2487 MigrationState *s; 2488 2489 s = migrate_get_current(); 2490 2491 return s->enabled_capabilities[ 2492 MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; 2493 } 2494 2495 int migrate_multifd_channels(void) 2496 { 2497 MigrationState *s; 2498 2499 s = migrate_get_current(); 2500 2501 return s->parameters.multifd_channels; 2502 } 2503 2504 MultiFDCompression migrate_multifd_compression(void) 2505 { 2506 MigrationState *s; 2507 2508 s = migrate_get_current(); 2509 2510 return s->parameters.multifd_compression; 2511 } 2512 2513 int migrate_multifd_zlib_level(void) 2514 { 2515 MigrationState *s; 2516 2517 s = migrate_get_current(); 2518 2519 return s->parameters.multifd_zlib_level; 2520 } 2521 2522 int migrate_multifd_zstd_level(void) 2523 { 2524 MigrationState *s; 2525 2526 s = migrate_get_current(); 2527 2528 return s->parameters.multifd_zstd_level; 2529 } 2530 2531 int migrate_use_xbzrle(void) 2532 { 2533 MigrationState *s; 2534 2535 s = migrate_get_current(); 2536 2537 return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE]; 2538 } 2539 2540 uint64_t migrate_xbzrle_cache_size(void) 2541 { 2542 MigrationState *s; 2543 2544 s = migrate_get_current(); 2545 2546 return s->parameters.xbzrle_cache_size; 2547 } 2548 2549 static int64_t migrate_max_postcopy_bandwidth(void) 2550 { 2551 MigrationState *s; 2552 2553 s = migrate_get_current(); 2554 2555 return s->parameters.max_postcopy_bandwidth; 2556 } 2557 2558 bool migrate_use_block(void) 2559 { 2560 MigrationState *s; 2561 2562 s = migrate_get_current(); 2563 2564 return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK]; 2565 } 2566 2567 bool migrate_use_return_path(void) 2568 { 2569 MigrationState *s; 2570 2571 s = migrate_get_current(); 2572 2573 return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; 2574 } 2575 2576 bool migrate_use_block_incremental(void) 2577 { 2578 MigrationState *s; 2579 2580 s = migrate_get_current(); 2581 2582 return s->parameters.block_incremental; 2583 } 2584 2585 bool migrate_background_snapshot(void) 2586 { 2587 MigrationState *s; 2588 2589 s = migrate_get_current(); 2590 2591 return s->enabled_capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; 2592 } 2593 2594 /* migration thread support */ 2595 /* 2596 * Something bad happened to the RP stream, mark an error 2597 * The caller shall print or trace something to indicate why 2598 */ 2599 static void mark_source_rp_bad(MigrationState *s) 2600 { 2601 s->rp_state.error = true; 2602 } 2603 2604 static struct rp_cmd_args { 2605 ssize_t len; /* -1 = variable */ 2606 const char *name; 2607 } rp_cmd_args[] = { 2608 [MIG_RP_MSG_INVALID] = { .len = -1, .name = "INVALID" }, 2609 [MIG_RP_MSG_SHUT] = { .len = 4, .name = "SHUT" }, 2610 [MIG_RP_MSG_PONG] = { .len = 4, .name = "PONG" }, 2611 [MIG_RP_MSG_REQ_PAGES] = { .len = 12, .name = "REQ_PAGES" }, 2612 [MIG_RP_MSG_REQ_PAGES_ID] = { .len = -1, .name = "REQ_PAGES_ID" }, 2613 [MIG_RP_MSG_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" }, 2614 [MIG_RP_MSG_RESUME_ACK] = { .len = 4, .name = "RESUME_ACK" }, 2615 [MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" }, 2616 }; 2617 2618 /* 2619 * Process a request for pages received on the return path, 2620 * We're allowed to send more than requested (e.g. to round to our page size) 2621 * and we don't need to send pages that have already been sent. 2622 */ 2623 static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname, 2624 ram_addr_t start, size_t len) 2625 { 2626 long our_host_ps = qemu_real_host_page_size; 2627 2628 trace_migrate_handle_rp_req_pages(rbname, start, len); 2629 2630 /* 2631 * Since we currently insist on matching page sizes, just sanity check 2632 * we're being asked for whole host pages. 2633 */ 2634 if (start & (our_host_ps - 1) || 2635 (len & (our_host_ps - 1))) { 2636 error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT 2637 " len: %zd", __func__, start, len); 2638 mark_source_rp_bad(ms); 2639 return; 2640 } 2641 2642 if (ram_save_queue_pages(rbname, start, len)) { 2643 mark_source_rp_bad(ms); 2644 } 2645 } 2646 2647 /* Return true to retry, false to quit */ 2648 static bool postcopy_pause_return_path_thread(MigrationState *s) 2649 { 2650 trace_postcopy_pause_return_path(); 2651 2652 qemu_sem_wait(&s->postcopy_pause_rp_sem); 2653 2654 trace_postcopy_pause_return_path_continued(); 2655 2656 return true; 2657 } 2658 2659 static int migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name) 2660 { 2661 RAMBlock *block = qemu_ram_block_by_name(block_name); 2662 2663 if (!block) { 2664 error_report("%s: invalid block name '%s'", __func__, block_name); 2665 return -EINVAL; 2666 } 2667 2668 /* Fetch the received bitmap and refresh the dirty bitmap */ 2669 return ram_dirty_bitmap_reload(s, block); 2670 } 2671 2672 static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value) 2673 { 2674 trace_source_return_path_thread_resume_ack(value); 2675 2676 if (value != MIGRATION_RESUME_ACK_VALUE) { 2677 error_report("%s: illegal resume_ack value %"PRIu32, 2678 __func__, value); 2679 return -1; 2680 } 2681 2682 /* Now both sides are active. */ 2683 migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER, 2684 MIGRATION_STATUS_POSTCOPY_ACTIVE); 2685 2686 /* Notify send thread that time to continue send pages */ 2687 qemu_sem_post(&s->rp_state.rp_sem); 2688 2689 return 0; 2690 } 2691 2692 /* Release ms->rp_state.from_dst_file in a safe way */ 2693 static void migration_release_from_dst_file(MigrationState *ms) 2694 { 2695 QEMUFile *file; 2696 2697 WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) { 2698 /* 2699 * Reset the from_dst_file pointer first before releasing it, as we 2700 * can't block within lock section 2701 */ 2702 file = ms->rp_state.from_dst_file; 2703 ms->rp_state.from_dst_file = NULL; 2704 } 2705 2706 qemu_fclose(file); 2707 } 2708 2709 /* 2710 * Handles messages sent on the return path towards the source VM 2711 * 2712 */ 2713 static void *source_return_path_thread(void *opaque) 2714 { 2715 MigrationState *ms = opaque; 2716 QEMUFile *rp = ms->rp_state.from_dst_file; 2717 uint16_t header_len, header_type; 2718 uint8_t buf[512]; 2719 uint32_t tmp32, sibling_error; 2720 ram_addr_t start = 0; /* =0 to silence warning */ 2721 size_t len = 0, expected_len; 2722 int res; 2723 2724 trace_source_return_path_thread_entry(); 2725 rcu_register_thread(); 2726 2727 retry: 2728 while (!ms->rp_state.error && !qemu_file_get_error(rp) && 2729 migration_is_setup_or_active(ms->state)) { 2730 trace_source_return_path_thread_loop_top(); 2731 header_type = qemu_get_be16(rp); 2732 header_len = qemu_get_be16(rp); 2733 2734 if (qemu_file_get_error(rp)) { 2735 mark_source_rp_bad(ms); 2736 goto out; 2737 } 2738 2739 if (header_type >= MIG_RP_MSG_MAX || 2740 header_type == MIG_RP_MSG_INVALID) { 2741 error_report("RP: Received invalid message 0x%04x length 0x%04x", 2742 header_type, header_len); 2743 mark_source_rp_bad(ms); 2744 goto out; 2745 } 2746 2747 if ((rp_cmd_args[header_type].len != -1 && 2748 header_len != rp_cmd_args[header_type].len) || 2749 header_len > sizeof(buf)) { 2750 error_report("RP: Received '%s' message (0x%04x) with" 2751 "incorrect length %d expecting %zu", 2752 rp_cmd_args[header_type].name, header_type, header_len, 2753 (size_t)rp_cmd_args[header_type].len); 2754 mark_source_rp_bad(ms); 2755 goto out; 2756 } 2757 2758 /* We know we've got a valid header by this point */ 2759 res = qemu_get_buffer(rp, buf, header_len); 2760 if (res != header_len) { 2761 error_report("RP: Failed reading data for message 0x%04x" 2762 " read %d expected %d", 2763 header_type, res, header_len); 2764 mark_source_rp_bad(ms); 2765 goto out; 2766 } 2767 2768 /* OK, we have the message and the data */ 2769 switch (header_type) { 2770 case MIG_RP_MSG_SHUT: 2771 sibling_error = ldl_be_p(buf); 2772 trace_source_return_path_thread_shut(sibling_error); 2773 if (sibling_error) { 2774 error_report("RP: Sibling indicated error %d", sibling_error); 2775 mark_source_rp_bad(ms); 2776 } 2777 /* 2778 * We'll let the main thread deal with closing the RP 2779 * we could do a shutdown(2) on it, but we're the only user 2780 * anyway, so there's nothing gained. 2781 */ 2782 goto out; 2783 2784 case MIG_RP_MSG_PONG: 2785 tmp32 = ldl_be_p(buf); 2786 trace_source_return_path_thread_pong(tmp32); 2787 break; 2788 2789 case MIG_RP_MSG_REQ_PAGES: 2790 start = ldq_be_p(buf); 2791 len = ldl_be_p(buf + 8); 2792 migrate_handle_rp_req_pages(ms, NULL, start, len); 2793 break; 2794 2795 case MIG_RP_MSG_REQ_PAGES_ID: 2796 expected_len = 12 + 1; /* header + termination */ 2797 2798 if (header_len >= expected_len) { 2799 start = ldq_be_p(buf); 2800 len = ldl_be_p(buf + 8); 2801 /* Now we expect an idstr */ 2802 tmp32 = buf[12]; /* Length of the following idstr */ 2803 buf[13 + tmp32] = '\0'; 2804 expected_len += tmp32; 2805 } 2806 if (header_len != expected_len) { 2807 error_report("RP: Req_Page_id with length %d expecting %zd", 2808 header_len, expected_len); 2809 mark_source_rp_bad(ms); 2810 goto out; 2811 } 2812 migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len); 2813 break; 2814 2815 case MIG_RP_MSG_RECV_BITMAP: 2816 if (header_len < 1) { 2817 error_report("%s: missing block name", __func__); 2818 mark_source_rp_bad(ms); 2819 goto out; 2820 } 2821 /* Format: len (1B) + idstr (<255B). This ends the idstr. */ 2822 buf[buf[0] + 1] = '\0'; 2823 if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1))) { 2824 mark_source_rp_bad(ms); 2825 goto out; 2826 } 2827 break; 2828 2829 case MIG_RP_MSG_RESUME_ACK: 2830 tmp32 = ldl_be_p(buf); 2831 if (migrate_handle_rp_resume_ack(ms, tmp32)) { 2832 mark_source_rp_bad(ms); 2833 goto out; 2834 } 2835 break; 2836 2837 default: 2838 break; 2839 } 2840 } 2841 2842 out: 2843 res = qemu_file_get_error(rp); 2844 if (res) { 2845 if (res == -EIO && migration_in_postcopy()) { 2846 /* 2847 * Maybe there is something we can do: it looks like a 2848 * network down issue, and we pause for a recovery. 2849 */ 2850 migration_release_from_dst_file(ms); 2851 rp = NULL; 2852 if (postcopy_pause_return_path_thread(ms)) { 2853 /* 2854 * Reload rp, reset the rest. Referencing it is safe since 2855 * it's reset only by us above, or when migration completes 2856 */ 2857 rp = ms->rp_state.from_dst_file; 2858 ms->rp_state.error = false; 2859 goto retry; 2860 } 2861 } 2862 2863 trace_source_return_path_thread_bad_end(); 2864 mark_source_rp_bad(ms); 2865 } 2866 2867 trace_source_return_path_thread_end(); 2868 migration_release_from_dst_file(ms); 2869 rcu_unregister_thread(); 2870 return NULL; 2871 } 2872 2873 static int open_return_path_on_source(MigrationState *ms, 2874 bool create_thread) 2875 { 2876 ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file); 2877 if (!ms->rp_state.from_dst_file) { 2878 return -1; 2879 } 2880 2881 trace_open_return_path_on_source(); 2882 2883 if (!create_thread) { 2884 /* We're done */ 2885 return 0; 2886 } 2887 2888 qemu_thread_create(&ms->rp_state.rp_thread, "return path", 2889 source_return_path_thread, ms, QEMU_THREAD_JOINABLE); 2890 ms->rp_state.rp_thread_created = true; 2891 2892 trace_open_return_path_on_source_continue(); 2893 2894 return 0; 2895 } 2896 2897 /* Returns 0 if the RP was ok, otherwise there was an error on the RP */ 2898 static int await_return_path_close_on_source(MigrationState *ms) 2899 { 2900 /* 2901 * If this is a normal exit then the destination will send a SHUT and the 2902 * rp_thread will exit, however if there's an error we need to cause 2903 * it to exit. 2904 */ 2905 if (qemu_file_get_error(ms->to_dst_file) && ms->rp_state.from_dst_file) { 2906 /* 2907 * shutdown(2), if we have it, will cause it to unblock if it's stuck 2908 * waiting for the destination. 2909 */ 2910 qemu_file_shutdown(ms->rp_state.from_dst_file); 2911 mark_source_rp_bad(ms); 2912 } 2913 trace_await_return_path_close_on_source_joining(); 2914 qemu_thread_join(&ms->rp_state.rp_thread); 2915 ms->rp_state.rp_thread_created = false; 2916 trace_await_return_path_close_on_source_close(); 2917 return ms->rp_state.error; 2918 } 2919 2920 /* 2921 * Switch from normal iteration to postcopy 2922 * Returns non-0 on error 2923 */ 2924 static int postcopy_start(MigrationState *ms) 2925 { 2926 int ret; 2927 QIOChannelBuffer *bioc; 2928 QEMUFile *fb; 2929 int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 2930 int64_t bandwidth = migrate_max_postcopy_bandwidth(); 2931 bool restart_block = false; 2932 int cur_state = MIGRATION_STATUS_ACTIVE; 2933 if (!migrate_pause_before_switchover()) { 2934 migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE, 2935 MIGRATION_STATUS_POSTCOPY_ACTIVE); 2936 } 2937 2938 trace_postcopy_start(); 2939 qemu_mutex_lock_iothread(); 2940 trace_postcopy_start_set_run(); 2941 2942 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); 2943 global_state_store(); 2944 ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); 2945 if (ret < 0) { 2946 goto fail; 2947 } 2948 2949 ret = migration_maybe_pause(ms, &cur_state, 2950 MIGRATION_STATUS_POSTCOPY_ACTIVE); 2951 if (ret < 0) { 2952 goto fail; 2953 } 2954 2955 ret = bdrv_inactivate_all(); 2956 if (ret < 0) { 2957 goto fail; 2958 } 2959 restart_block = true; 2960 2961 /* 2962 * Cause any non-postcopiable, but iterative devices to 2963 * send out their final data. 2964 */ 2965 qemu_savevm_state_complete_precopy(ms->to_dst_file, true, false); 2966 2967 /* 2968 * in Finish migrate and with the io-lock held everything should 2969 * be quiet, but we've potentially still got dirty pages and we 2970 * need to tell the destination to throw any pages it's already received 2971 * that are dirty 2972 */ 2973 if (migrate_postcopy_ram()) { 2974 if (ram_postcopy_send_discard_bitmap(ms)) { 2975 error_report("postcopy send discard bitmap failed"); 2976 goto fail; 2977 } 2978 } 2979 2980 /* 2981 * send rest of state - note things that are doing postcopy 2982 * will notice we're in POSTCOPY_ACTIVE and not actually 2983 * wrap their state up here 2984 */ 2985 /* 0 max-postcopy-bandwidth means unlimited */ 2986 if (!bandwidth) { 2987 qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX); 2988 } else { 2989 qemu_file_set_rate_limit(ms->to_dst_file, bandwidth / XFER_LIMIT_RATIO); 2990 } 2991 if (migrate_postcopy_ram()) { 2992 /* Ping just for debugging, helps line traces up */ 2993 qemu_savevm_send_ping(ms->to_dst_file, 2); 2994 } 2995 2996 /* 2997 * While loading the device state we may trigger page transfer 2998 * requests and the fd must be free to process those, and thus 2999 * the destination must read the whole device state off the fd before 3000 * it starts processing it. Unfortunately the ad-hoc migration format 3001 * doesn't allow the destination to know the size to read without fully 3002 * parsing it through each devices load-state code (especially the open 3003 * coded devices that use get/put). 3004 * So we wrap the device state up in a package with a length at the start; 3005 * to do this we use a qemu_buf to hold the whole of the device state. 3006 */ 3007 bioc = qio_channel_buffer_new(4096); 3008 qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer"); 3009 fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc)); 3010 object_unref(OBJECT(bioc)); 3011 3012 /* 3013 * Make sure the receiver can get incoming pages before we send the rest 3014 * of the state 3015 */ 3016 qemu_savevm_send_postcopy_listen(fb); 3017 3018 qemu_savevm_state_complete_precopy(fb, false, false); 3019 if (migrate_postcopy_ram()) { 3020 qemu_savevm_send_ping(fb, 3); 3021 } 3022 3023 qemu_savevm_send_postcopy_run(fb); 3024 3025 /* <><> end of stuff going into the package */ 3026 3027 /* Last point of recovery; as soon as we send the package the destination 3028 * can open devices and potentially start running. 3029 * Lets just check again we've not got any errors. 3030 */ 3031 ret = qemu_file_get_error(ms->to_dst_file); 3032 if (ret) { 3033 error_report("postcopy_start: Migration stream errored (pre package)"); 3034 goto fail_closefb; 3035 } 3036 3037 restart_block = false; 3038 3039 /* Now send that blob */ 3040 if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) { 3041 goto fail_closefb; 3042 } 3043 qemu_fclose(fb); 3044 3045 /* Send a notify to give a chance for anything that needs to happen 3046 * at the transition to postcopy and after the device state; in particular 3047 * spice needs to trigger a transition now 3048 */ 3049 ms->postcopy_after_devices = true; 3050 notifier_list_notify(&migration_state_notifiers, ms); 3051 3052 ms->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop; 3053 3054 qemu_mutex_unlock_iothread(); 3055 3056 if (migrate_postcopy_ram()) { 3057 /* 3058 * Although this ping is just for debug, it could potentially be 3059 * used for getting a better measurement of downtime at the source. 3060 */ 3061 qemu_savevm_send_ping(ms->to_dst_file, 4); 3062 } 3063 3064 if (migrate_release_ram()) { 3065 ram_postcopy_migrated_memory_release(ms); 3066 } 3067 3068 ret = qemu_file_get_error(ms->to_dst_file); 3069 if (ret) { 3070 error_report("postcopy_start: Migration stream errored"); 3071 migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, 3072 MIGRATION_STATUS_FAILED); 3073 } 3074 3075 return ret; 3076 3077 fail_closefb: 3078 qemu_fclose(fb); 3079 fail: 3080 migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, 3081 MIGRATION_STATUS_FAILED); 3082 if (restart_block) { 3083 /* A failure happened early enough that we know the destination hasn't 3084 * accessed block devices, so we're safe to recover. 3085 */ 3086 Error *local_err = NULL; 3087 3088 bdrv_invalidate_cache_all(&local_err); 3089 if (local_err) { 3090 error_report_err(local_err); 3091 } 3092 } 3093 qemu_mutex_unlock_iothread(); 3094 return -1; 3095 } 3096 3097 /** 3098 * migration_maybe_pause: Pause if required to by 3099 * migrate_pause_before_switchover called with the iothread locked 3100 * Returns: 0 on success 3101 */ 3102 static int migration_maybe_pause(MigrationState *s, 3103 int *current_active_state, 3104 int new_state) 3105 { 3106 if (!migrate_pause_before_switchover()) { 3107 return 0; 3108 } 3109 3110 /* Since leaving this state is not atomic with posting the semaphore 3111 * it's possible that someone could have issued multiple migrate_continue 3112 * and the semaphore is incorrectly positive at this point; 3113 * the docs say it's undefined to reinit a semaphore that's already 3114 * init'd, so use timedwait to eat up any existing posts. 3115 */ 3116 while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) { 3117 /* This block intentionally left blank */ 3118 } 3119 3120 /* 3121 * If the migration is cancelled when it is in the completion phase, 3122 * the migration state is set to MIGRATION_STATUS_CANCELLING. 3123 * So we don't need to wait a semaphore, otherwise we would always 3124 * wait for the 'pause_sem' semaphore. 3125 */ 3126 if (s->state != MIGRATION_STATUS_CANCELLING) { 3127 qemu_mutex_unlock_iothread(); 3128 migrate_set_state(&s->state, *current_active_state, 3129 MIGRATION_STATUS_PRE_SWITCHOVER); 3130 qemu_sem_wait(&s->pause_sem); 3131 migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, 3132 new_state); 3133 *current_active_state = new_state; 3134 qemu_mutex_lock_iothread(); 3135 } 3136 3137 return s->state == new_state ? 0 : -EINVAL; 3138 } 3139 3140 /** 3141 * migration_completion: Used by migration_thread when there's not much left. 3142 * The caller 'breaks' the loop when this returns. 3143 * 3144 * @s: Current migration state 3145 */ 3146 static void migration_completion(MigrationState *s) 3147 { 3148 int ret; 3149 int current_active_state = s->state; 3150 3151 if (s->state == MIGRATION_STATUS_ACTIVE) { 3152 qemu_mutex_lock_iothread(); 3153 s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3154 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); 3155 s->vm_was_running = runstate_is_running(); 3156 ret = global_state_store(); 3157 3158 if (!ret) { 3159 bool inactivate = !migrate_colo_enabled(); 3160 ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); 3161 trace_migration_completion_vm_stop(ret); 3162 if (ret >= 0) { 3163 ret = migration_maybe_pause(s, ¤t_active_state, 3164 MIGRATION_STATUS_DEVICE); 3165 } 3166 if (ret >= 0) { 3167 qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); 3168 ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, 3169 inactivate); 3170 } 3171 if (inactivate && ret >= 0) { 3172 s->block_inactive = true; 3173 } 3174 } 3175 qemu_mutex_unlock_iothread(); 3176 3177 if (ret < 0) { 3178 goto fail; 3179 } 3180 } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 3181 trace_migration_completion_postcopy_end(); 3182 3183 qemu_mutex_lock_iothread(); 3184 qemu_savevm_state_complete_postcopy(s->to_dst_file); 3185 qemu_mutex_unlock_iothread(); 3186 3187 trace_migration_completion_postcopy_end_after_complete(); 3188 } else if (s->state == MIGRATION_STATUS_CANCELLING) { 3189 goto fail; 3190 } 3191 3192 /* 3193 * If rp was opened we must clean up the thread before 3194 * cleaning everything else up (since if there are no failures 3195 * it will wait for the destination to send it's status in 3196 * a SHUT command). 3197 */ 3198 if (s->rp_state.rp_thread_created) { 3199 int rp_error; 3200 trace_migration_return_path_end_before(); 3201 rp_error = await_return_path_close_on_source(s); 3202 trace_migration_return_path_end_after(rp_error); 3203 if (rp_error) { 3204 goto fail_invalidate; 3205 } 3206 } 3207 3208 if (qemu_file_get_error(s->to_dst_file)) { 3209 trace_migration_completion_file_err(); 3210 goto fail_invalidate; 3211 } 3212 3213 if (!migrate_colo_enabled()) { 3214 migrate_set_state(&s->state, current_active_state, 3215 MIGRATION_STATUS_COMPLETED); 3216 } 3217 3218 return; 3219 3220 fail_invalidate: 3221 /* If not doing postcopy, vm_start() will be called: let's regain 3222 * control on images. 3223 */ 3224 if (s->state == MIGRATION_STATUS_ACTIVE || 3225 s->state == MIGRATION_STATUS_DEVICE) { 3226 Error *local_err = NULL; 3227 3228 qemu_mutex_lock_iothread(); 3229 bdrv_invalidate_cache_all(&local_err); 3230 if (local_err) { 3231 error_report_err(local_err); 3232 } else { 3233 s->block_inactive = false; 3234 } 3235 qemu_mutex_unlock_iothread(); 3236 } 3237 3238 fail: 3239 migrate_set_state(&s->state, current_active_state, 3240 MIGRATION_STATUS_FAILED); 3241 } 3242 3243 /** 3244 * bg_migration_completion: Used by bg_migration_thread when after all the 3245 * RAM has been saved. The caller 'breaks' the loop when this returns. 3246 * 3247 * @s: Current migration state 3248 */ 3249 static void bg_migration_completion(MigrationState *s) 3250 { 3251 int current_active_state = s->state; 3252 3253 /* 3254 * Stop tracking RAM writes - un-protect memory, un-register UFFD 3255 * memory ranges, flush kernel wait queues and wake up threads 3256 * waiting for write fault to be resolved. 3257 */ 3258 ram_write_tracking_stop(); 3259 3260 if (s->state == MIGRATION_STATUS_ACTIVE) { 3261 /* 3262 * By this moment we have RAM content saved into the migration stream. 3263 * The next step is to flush the non-RAM content (device state) 3264 * right after the ram content. The device state has been stored into 3265 * the temporary buffer before RAM saving started. 3266 */ 3267 qemu_put_buffer(s->to_dst_file, s->bioc->data, s->bioc->usage); 3268 qemu_fflush(s->to_dst_file); 3269 } else if (s->state == MIGRATION_STATUS_CANCELLING) { 3270 goto fail; 3271 } 3272 3273 if (qemu_file_get_error(s->to_dst_file)) { 3274 trace_migration_completion_file_err(); 3275 goto fail; 3276 } 3277 3278 migrate_set_state(&s->state, current_active_state, 3279 MIGRATION_STATUS_COMPLETED); 3280 return; 3281 3282 fail: 3283 migrate_set_state(&s->state, current_active_state, 3284 MIGRATION_STATUS_FAILED); 3285 } 3286 3287 bool migrate_colo_enabled(void) 3288 { 3289 MigrationState *s = migrate_get_current(); 3290 return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO]; 3291 } 3292 3293 typedef enum MigThrError { 3294 /* No error detected */ 3295 MIG_THR_ERR_NONE = 0, 3296 /* Detected error, but resumed successfully */ 3297 MIG_THR_ERR_RECOVERED = 1, 3298 /* Detected fatal error, need to exit */ 3299 MIG_THR_ERR_FATAL = 2, 3300 } MigThrError; 3301 3302 static int postcopy_resume_handshake(MigrationState *s) 3303 { 3304 qemu_savevm_send_postcopy_resume(s->to_dst_file); 3305 3306 while (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { 3307 qemu_sem_wait(&s->rp_state.rp_sem); 3308 } 3309 3310 if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 3311 return 0; 3312 } 3313 3314 return -1; 3315 } 3316 3317 /* Return zero if success, or <0 for error */ 3318 static int postcopy_do_resume(MigrationState *s) 3319 { 3320 int ret; 3321 3322 /* 3323 * Call all the resume_prepare() hooks, so that modules can be 3324 * ready for the migration resume. 3325 */ 3326 ret = qemu_savevm_state_resume_prepare(s); 3327 if (ret) { 3328 error_report("%s: resume_prepare() failure detected: %d", 3329 __func__, ret); 3330 return ret; 3331 } 3332 3333 /* 3334 * Last handshake with destination on the resume (destination will 3335 * switch to postcopy-active afterwards) 3336 */ 3337 ret = postcopy_resume_handshake(s); 3338 if (ret) { 3339 error_report("%s: handshake failed: %d", __func__, ret); 3340 return ret; 3341 } 3342 3343 return 0; 3344 } 3345 3346 /* 3347 * We don't return until we are in a safe state to continue current 3348 * postcopy migration. Returns MIG_THR_ERR_RECOVERED if recovered, or 3349 * MIG_THR_ERR_FATAL if unrecovery failure happened. 3350 */ 3351 static MigThrError postcopy_pause(MigrationState *s) 3352 { 3353 assert(s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); 3354 3355 while (true) { 3356 QEMUFile *file; 3357 3358 /* 3359 * Current channel is possibly broken. Release it. Note that this is 3360 * guaranteed even without lock because to_dst_file should only be 3361 * modified by the migration thread. That also guarantees that the 3362 * unregister of yank is safe too without the lock. It should be safe 3363 * even to be within the qemu_file_lock, but we didn't do that to avoid 3364 * taking more mutex (yank_lock) within qemu_file_lock. TL;DR: we make 3365 * the qemu_file_lock critical section as small as possible. 3366 */ 3367 assert(s->to_dst_file); 3368 migration_ioc_unregister_yank_from_file(s->to_dst_file); 3369 qemu_mutex_lock(&s->qemu_file_lock); 3370 file = s->to_dst_file; 3371 s->to_dst_file = NULL; 3372 qemu_mutex_unlock(&s->qemu_file_lock); 3373 3374 qemu_file_shutdown(file); 3375 qemu_fclose(file); 3376 3377 migrate_set_state(&s->state, s->state, 3378 MIGRATION_STATUS_POSTCOPY_PAUSED); 3379 3380 error_report("Detected IO failure for postcopy. " 3381 "Migration paused."); 3382 3383 /* 3384 * We wait until things fixed up. Then someone will setup the 3385 * status back for us. 3386 */ 3387 while (s->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { 3388 qemu_sem_wait(&s->postcopy_pause_sem); 3389 } 3390 3391 if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { 3392 /* Woken up by a recover procedure. Give it a shot */ 3393 3394 /* 3395 * Firstly, let's wake up the return path now, with a new 3396 * return path channel. 3397 */ 3398 qemu_sem_post(&s->postcopy_pause_rp_sem); 3399 3400 /* Do the resume logic */ 3401 if (postcopy_do_resume(s) == 0) { 3402 /* Let's continue! */ 3403 trace_postcopy_pause_continued(); 3404 return MIG_THR_ERR_RECOVERED; 3405 } else { 3406 /* 3407 * Something wrong happened during the recovery, let's 3408 * pause again. Pause is always better than throwing 3409 * data away. 3410 */ 3411 continue; 3412 } 3413 } else { 3414 /* This is not right... Time to quit. */ 3415 return MIG_THR_ERR_FATAL; 3416 } 3417 } 3418 } 3419 3420 static MigThrError migration_detect_error(MigrationState *s) 3421 { 3422 int ret; 3423 int state = s->state; 3424 Error *local_error = NULL; 3425 3426 if (state == MIGRATION_STATUS_CANCELLING || 3427 state == MIGRATION_STATUS_CANCELLED) { 3428 /* End the migration, but don't set the state to failed */ 3429 return MIG_THR_ERR_FATAL; 3430 } 3431 3432 /* Try to detect any file errors */ 3433 ret = qemu_file_get_error_obj(s->to_dst_file, &local_error); 3434 if (!ret) { 3435 /* Everything is fine */ 3436 assert(!local_error); 3437 return MIG_THR_ERR_NONE; 3438 } 3439 3440 if (local_error) { 3441 migrate_set_error(s, local_error); 3442 error_free(local_error); 3443 } 3444 3445 if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret == -EIO) { 3446 /* 3447 * For postcopy, we allow the network to be down for a 3448 * while. After that, it can be continued by a 3449 * recovery phase. 3450 */ 3451 return postcopy_pause(s); 3452 } else { 3453 /* 3454 * For precopy (or postcopy with error outside IO), we fail 3455 * with no time. 3456 */ 3457 migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED); 3458 trace_migration_thread_file_err(); 3459 3460 /* Time to stop the migration, now. */ 3461 return MIG_THR_ERR_FATAL; 3462 } 3463 } 3464 3465 /* How many bytes have we transferred since the beginning of the migration */ 3466 static uint64_t migration_total_bytes(MigrationState *s) 3467 { 3468 return qemu_ftell(s->to_dst_file) + ram_counters.multifd_bytes; 3469 } 3470 3471 static void migration_calculate_complete(MigrationState *s) 3472 { 3473 uint64_t bytes = migration_total_bytes(s); 3474 int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3475 int64_t transfer_time; 3476 3477 s->total_time = end_time - s->start_time; 3478 if (!s->downtime) { 3479 /* 3480 * It's still not set, so we are precopy migration. For 3481 * postcopy, downtime is calculated during postcopy_start(). 3482 */ 3483 s->downtime = end_time - s->downtime_start; 3484 } 3485 3486 transfer_time = s->total_time - s->setup_time; 3487 if (transfer_time) { 3488 s->mbps = ((double) bytes * 8.0) / transfer_time / 1000; 3489 } 3490 } 3491 3492 static void update_iteration_initial_status(MigrationState *s) 3493 { 3494 /* 3495 * Update these three fields at the same time to avoid mismatch info lead 3496 * wrong speed calculation. 3497 */ 3498 s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3499 s->iteration_initial_bytes = migration_total_bytes(s); 3500 s->iteration_initial_pages = ram_get_total_transferred_pages(); 3501 } 3502 3503 static void migration_update_counters(MigrationState *s, 3504 int64_t current_time) 3505 { 3506 uint64_t transferred, transferred_pages, time_spent; 3507 uint64_t current_bytes; /* bytes transferred since the beginning */ 3508 double bandwidth; 3509 3510 if (current_time < s->iteration_start_time + BUFFER_DELAY) { 3511 return; 3512 } 3513 3514 current_bytes = migration_total_bytes(s); 3515 transferred = current_bytes - s->iteration_initial_bytes; 3516 time_spent = current_time - s->iteration_start_time; 3517 bandwidth = (double)transferred / time_spent; 3518 s->threshold_size = bandwidth * s->parameters.downtime_limit; 3519 3520 s->mbps = (((double) transferred * 8.0) / 3521 ((double) time_spent / 1000.0)) / 1000.0 / 1000.0; 3522 3523 transferred_pages = ram_get_total_transferred_pages() - 3524 s->iteration_initial_pages; 3525 s->pages_per_second = (double) transferred_pages / 3526 (((double) time_spent / 1000.0)); 3527 3528 /* 3529 * if we haven't sent anything, we don't want to 3530 * recalculate. 10000 is a small enough number for our purposes 3531 */ 3532 if (ram_counters.dirty_pages_rate && transferred > 10000) { 3533 s->expected_downtime = ram_counters.remaining / bandwidth; 3534 } 3535 3536 qemu_file_reset_rate_limit(s->to_dst_file); 3537 3538 update_iteration_initial_status(s); 3539 3540 trace_migrate_transferred(transferred, time_spent, 3541 bandwidth, s->threshold_size); 3542 } 3543 3544 /* Migration thread iteration status */ 3545 typedef enum { 3546 MIG_ITERATE_RESUME, /* Resume current iteration */ 3547 MIG_ITERATE_SKIP, /* Skip current iteration */ 3548 MIG_ITERATE_BREAK, /* Break the loop */ 3549 } MigIterateState; 3550 3551 /* 3552 * Return true if continue to the next iteration directly, false 3553 * otherwise. 3554 */ 3555 static MigIterateState migration_iteration_run(MigrationState *s) 3556 { 3557 uint64_t pending_size, pend_pre, pend_compat, pend_post; 3558 bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE; 3559 3560 qemu_savevm_state_pending(s->to_dst_file, s->threshold_size, &pend_pre, 3561 &pend_compat, &pend_post); 3562 pending_size = pend_pre + pend_compat + pend_post; 3563 3564 trace_migrate_pending(pending_size, s->threshold_size, 3565 pend_pre, pend_compat, pend_post); 3566 3567 if (pending_size && pending_size >= s->threshold_size) { 3568 /* Still a significant amount to transfer */ 3569 if (!in_postcopy && pend_pre <= s->threshold_size && 3570 qatomic_read(&s->start_postcopy)) { 3571 if (postcopy_start(s)) { 3572 error_report("%s: postcopy failed to start", __func__); 3573 } 3574 return MIG_ITERATE_SKIP; 3575 } 3576 /* Just another iteration step */ 3577 qemu_savevm_state_iterate(s->to_dst_file, in_postcopy); 3578 } else { 3579 trace_migration_thread_low_pending(pending_size); 3580 migration_completion(s); 3581 return MIG_ITERATE_BREAK; 3582 } 3583 3584 return MIG_ITERATE_RESUME; 3585 } 3586 3587 static void migration_iteration_finish(MigrationState *s) 3588 { 3589 /* If we enabled cpu throttling for auto-converge, turn it off. */ 3590 cpu_throttle_stop(); 3591 3592 qemu_mutex_lock_iothread(); 3593 switch (s->state) { 3594 case MIGRATION_STATUS_COMPLETED: 3595 migration_calculate_complete(s); 3596 runstate_set(RUN_STATE_POSTMIGRATE); 3597 break; 3598 3599 case MIGRATION_STATUS_ACTIVE: 3600 /* 3601 * We should really assert here, but since it's during 3602 * migration, let's try to reduce the usage of assertions. 3603 */ 3604 if (!migrate_colo_enabled()) { 3605 error_report("%s: critical error: calling COLO code without " 3606 "COLO enabled", __func__); 3607 } 3608 migrate_start_colo_process(s); 3609 /* 3610 * Fixme: we will run VM in COLO no matter its old running state. 3611 * After exited COLO, we will keep running. 3612 */ 3613 s->vm_was_running = true; 3614 /* Fallthrough */ 3615 case MIGRATION_STATUS_FAILED: 3616 case MIGRATION_STATUS_CANCELLED: 3617 case MIGRATION_STATUS_CANCELLING: 3618 if (s->vm_was_running) { 3619 vm_start(); 3620 } else { 3621 if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { 3622 runstate_set(RUN_STATE_POSTMIGRATE); 3623 } 3624 } 3625 break; 3626 3627 default: 3628 /* Should not reach here, but if so, forgive the VM. */ 3629 error_report("%s: Unknown ending state %d", __func__, s->state); 3630 break; 3631 } 3632 migrate_fd_cleanup_schedule(s); 3633 qemu_mutex_unlock_iothread(); 3634 } 3635 3636 static void bg_migration_iteration_finish(MigrationState *s) 3637 { 3638 qemu_mutex_lock_iothread(); 3639 switch (s->state) { 3640 case MIGRATION_STATUS_COMPLETED: 3641 migration_calculate_complete(s); 3642 break; 3643 3644 case MIGRATION_STATUS_ACTIVE: 3645 case MIGRATION_STATUS_FAILED: 3646 case MIGRATION_STATUS_CANCELLED: 3647 case MIGRATION_STATUS_CANCELLING: 3648 break; 3649 3650 default: 3651 /* Should not reach here, but if so, forgive the VM. */ 3652 error_report("%s: Unknown ending state %d", __func__, s->state); 3653 break; 3654 } 3655 3656 migrate_fd_cleanup_schedule(s); 3657 qemu_mutex_unlock_iothread(); 3658 } 3659 3660 /* 3661 * Return true if continue to the next iteration directly, false 3662 * otherwise. 3663 */ 3664 static MigIterateState bg_migration_iteration_run(MigrationState *s) 3665 { 3666 int res; 3667 3668 res = qemu_savevm_state_iterate(s->to_dst_file, false); 3669 if (res > 0) { 3670 bg_migration_completion(s); 3671 return MIG_ITERATE_BREAK; 3672 } 3673 3674 return MIG_ITERATE_RESUME; 3675 } 3676 3677 void migration_make_urgent_request(void) 3678 { 3679 qemu_sem_post(&migrate_get_current()->rate_limit_sem); 3680 } 3681 3682 void migration_consume_urgent_request(void) 3683 { 3684 qemu_sem_wait(&migrate_get_current()->rate_limit_sem); 3685 } 3686 3687 /* Returns true if the rate limiting was broken by an urgent request */ 3688 bool migration_rate_limit(void) 3689 { 3690 int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3691 MigrationState *s = migrate_get_current(); 3692 3693 bool urgent = false; 3694 migration_update_counters(s, now); 3695 if (qemu_file_rate_limit(s->to_dst_file)) { 3696 3697 if (qemu_file_get_error(s->to_dst_file)) { 3698 return false; 3699 } 3700 /* 3701 * Wait for a delay to do rate limiting OR 3702 * something urgent to post the semaphore. 3703 */ 3704 int ms = s->iteration_start_time + BUFFER_DELAY - now; 3705 trace_migration_rate_limit_pre(ms); 3706 if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { 3707 /* 3708 * We were woken by one or more urgent things but 3709 * the timedwait will have consumed one of them. 3710 * The service routine for the urgent wake will dec 3711 * the semaphore itself for each item it consumes, 3712 * so add this one we just eat back. 3713 */ 3714 qemu_sem_post(&s->rate_limit_sem); 3715 urgent = true; 3716 } 3717 trace_migration_rate_limit_post(urgent); 3718 } 3719 return urgent; 3720 } 3721 3722 /* 3723 * if failover devices are present, wait they are completely 3724 * unplugged 3725 */ 3726 3727 static void qemu_savevm_wait_unplug(MigrationState *s, int old_state, 3728 int new_state) 3729 { 3730 if (qemu_savevm_state_guest_unplug_pending()) { 3731 migrate_set_state(&s->state, old_state, MIGRATION_STATUS_WAIT_UNPLUG); 3732 3733 while (s->state == MIGRATION_STATUS_WAIT_UNPLUG && 3734 qemu_savevm_state_guest_unplug_pending()) { 3735 qemu_sem_timedwait(&s->wait_unplug_sem, 250); 3736 } 3737 if (s->state != MIGRATION_STATUS_WAIT_UNPLUG) { 3738 int timeout = 120; /* 30 seconds */ 3739 /* 3740 * migration has been canceled 3741 * but as we have started an unplug we must wait the end 3742 * to be able to plug back the card 3743 */ 3744 while (timeout-- && qemu_savevm_state_guest_unplug_pending()) { 3745 qemu_sem_timedwait(&s->wait_unplug_sem, 250); 3746 } 3747 if (qemu_savevm_state_guest_unplug_pending()) { 3748 warn_report("migration: partially unplugged device on " 3749 "failure"); 3750 } 3751 } 3752 3753 migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, new_state); 3754 } else { 3755 migrate_set_state(&s->state, old_state, new_state); 3756 } 3757 } 3758 3759 /* 3760 * Master migration thread on the source VM. 3761 * It drives the migration and pumps the data down the outgoing channel. 3762 */ 3763 static void *migration_thread(void *opaque) 3764 { 3765 MigrationState *s = opaque; 3766 int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST); 3767 MigThrError thr_error; 3768 bool urgent = false; 3769 3770 rcu_register_thread(); 3771 3772 object_ref(OBJECT(s)); 3773 update_iteration_initial_status(s); 3774 3775 qemu_savevm_state_header(s->to_dst_file); 3776 3777 /* 3778 * If we opened the return path, we need to make sure dst has it 3779 * opened as well. 3780 */ 3781 if (s->rp_state.rp_thread_created) { 3782 /* Now tell the dest that it should open its end so it can reply */ 3783 qemu_savevm_send_open_return_path(s->to_dst_file); 3784 3785 /* And do a ping that will make stuff easier to debug */ 3786 qemu_savevm_send_ping(s->to_dst_file, 1); 3787 } 3788 3789 if (migrate_postcopy()) { 3790 /* 3791 * Tell the destination that we *might* want to do postcopy later; 3792 * if the other end can't do postcopy it should fail now, nice and 3793 * early. 3794 */ 3795 qemu_savevm_send_postcopy_advise(s->to_dst_file); 3796 } 3797 3798 if (migrate_colo_enabled()) { 3799 /* Notify migration destination that we enable COLO */ 3800 qemu_savevm_send_colo_enable(s->to_dst_file); 3801 } 3802 3803 qemu_savevm_state_setup(s->to_dst_file); 3804 3805 qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP, 3806 MIGRATION_STATUS_ACTIVE); 3807 3808 s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; 3809 3810 trace_migration_thread_setup_complete(); 3811 3812 while (migration_is_active(s)) { 3813 if (urgent || !qemu_file_rate_limit(s->to_dst_file)) { 3814 MigIterateState iter_state = migration_iteration_run(s); 3815 if (iter_state == MIG_ITERATE_SKIP) { 3816 continue; 3817 } else if (iter_state == MIG_ITERATE_BREAK) { 3818 break; 3819 } 3820 } 3821 3822 /* 3823 * Try to detect any kind of failures, and see whether we 3824 * should stop the migration now. 3825 */ 3826 thr_error = migration_detect_error(s); 3827 if (thr_error == MIG_THR_ERR_FATAL) { 3828 /* Stop migration */ 3829 break; 3830 } else if (thr_error == MIG_THR_ERR_RECOVERED) { 3831 /* 3832 * Just recovered from a e.g. network failure, reset all 3833 * the local variables. This is important to avoid 3834 * breaking transferred_bytes and bandwidth calculation 3835 */ 3836 update_iteration_initial_status(s); 3837 } 3838 3839 urgent = migration_rate_limit(); 3840 } 3841 3842 trace_migration_thread_after_loop(); 3843 migration_iteration_finish(s); 3844 object_unref(OBJECT(s)); 3845 rcu_unregister_thread(); 3846 return NULL; 3847 } 3848 3849 static void bg_migration_vm_start_bh(void *opaque) 3850 { 3851 MigrationState *s = opaque; 3852 3853 qemu_bh_delete(s->vm_start_bh); 3854 s->vm_start_bh = NULL; 3855 3856 vm_start(); 3857 s->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - s->downtime_start; 3858 } 3859 3860 /** 3861 * Background snapshot thread, based on live migration code. 3862 * This is an alternative implementation of live migration mechanism 3863 * introduced specifically to support background snapshots. 3864 * 3865 * It takes advantage of userfault_fd write protection mechanism introduced 3866 * in v5.7 kernel. Compared to existing dirty page logging migration much 3867 * lesser stream traffic is produced resulting in smaller snapshot images, 3868 * simply cause of no page duplicates can get into the stream. 3869 * 3870 * Another key point is that generated vmstate stream reflects machine state 3871 * 'frozen' at the beginning of snapshot creation compared to dirty page logging 3872 * mechanism, which effectively results in that saved snapshot is the state of VM 3873 * at the end of the process. 3874 */ 3875 static void *bg_migration_thread(void *opaque) 3876 { 3877 MigrationState *s = opaque; 3878 int64_t setup_start; 3879 MigThrError thr_error; 3880 QEMUFile *fb; 3881 bool early_fail = true; 3882 3883 rcu_register_thread(); 3884 object_ref(OBJECT(s)); 3885 3886 qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); 3887 3888 setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST); 3889 /* 3890 * We want to save vmstate for the moment when migration has been 3891 * initiated but also we want to save RAM content while VM is running. 3892 * The RAM content should appear first in the vmstate. So, we first 3893 * stash the non-RAM part of the vmstate to the temporary buffer, 3894 * then write RAM part of the vmstate to the migration stream 3895 * with vCPUs running and, finally, write stashed non-RAM part of 3896 * the vmstate from the buffer to the migration stream. 3897 */ 3898 s->bioc = qio_channel_buffer_new(512 * 1024); 3899 qio_channel_set_name(QIO_CHANNEL(s->bioc), "vmstate-buffer"); 3900 fb = qemu_fopen_channel_output(QIO_CHANNEL(s->bioc)); 3901 object_unref(OBJECT(s->bioc)); 3902 3903 update_iteration_initial_status(s); 3904 3905 /* 3906 * Prepare for tracking memory writes with UFFD-WP - populate 3907 * RAM pages before protecting. 3908 */ 3909 #ifdef __linux__ 3910 ram_write_tracking_prepare(); 3911 #endif 3912 3913 qemu_savevm_state_header(s->to_dst_file); 3914 qemu_savevm_state_setup(s->to_dst_file); 3915 3916 qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP, 3917 MIGRATION_STATUS_ACTIVE); 3918 3919 s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; 3920 3921 trace_migration_thread_setup_complete(); 3922 s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3923 3924 qemu_mutex_lock_iothread(); 3925 3926 /* 3927 * If VM is currently in suspended state, then, to make a valid runstate 3928 * transition in vm_stop_force_state() we need to wakeup it up. 3929 */ 3930 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); 3931 s->vm_was_running = runstate_is_running(); 3932 3933 if (global_state_store()) { 3934 goto fail; 3935 } 3936 /* Forcibly stop VM before saving state of vCPUs and devices */ 3937 if (vm_stop_force_state(RUN_STATE_PAUSED)) { 3938 goto fail; 3939 } 3940 /* 3941 * Put vCPUs in sync with shadow context structures, then 3942 * save their state to channel-buffer along with devices. 3943 */ 3944 cpu_synchronize_all_states(); 3945 if (qemu_savevm_state_complete_precopy_non_iterable(fb, false, false)) { 3946 goto fail; 3947 } 3948 /* 3949 * Since we are going to get non-iterable state data directly 3950 * from s->bioc->data, explicit flush is needed here. 3951 */ 3952 qemu_fflush(fb); 3953 3954 /* Now initialize UFFD context and start tracking RAM writes */ 3955 if (ram_write_tracking_start()) { 3956 goto fail; 3957 } 3958 early_fail = false; 3959 3960 /* 3961 * Start VM from BH handler to avoid write-fault lock here. 3962 * UFFD-WP protection for the whole RAM is already enabled so 3963 * calling VM state change notifiers from vm_start() would initiate 3964 * writes to virtio VQs memory which is in write-protected region. 3965 */ 3966 s->vm_start_bh = qemu_bh_new(bg_migration_vm_start_bh, s); 3967 qemu_bh_schedule(s->vm_start_bh); 3968 3969 qemu_mutex_unlock_iothread(); 3970 3971 while (migration_is_active(s)) { 3972 MigIterateState iter_state = bg_migration_iteration_run(s); 3973 if (iter_state == MIG_ITERATE_SKIP) { 3974 continue; 3975 } else if (iter_state == MIG_ITERATE_BREAK) { 3976 break; 3977 } 3978 3979 /* 3980 * Try to detect any kind of failures, and see whether we 3981 * should stop the migration now. 3982 */ 3983 thr_error = migration_detect_error(s); 3984 if (thr_error == MIG_THR_ERR_FATAL) { 3985 /* Stop migration */ 3986 break; 3987 } 3988 3989 migration_update_counters(s, qemu_clock_get_ms(QEMU_CLOCK_REALTIME)); 3990 } 3991 3992 trace_migration_thread_after_loop(); 3993 3994 fail: 3995 if (early_fail) { 3996 migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, 3997 MIGRATION_STATUS_FAILED); 3998 qemu_mutex_unlock_iothread(); 3999 } 4000 4001 bg_migration_iteration_finish(s); 4002 4003 qemu_fclose(fb); 4004 object_unref(OBJECT(s)); 4005 rcu_unregister_thread(); 4006 4007 return NULL; 4008 } 4009 4010 void migrate_fd_connect(MigrationState *s, Error *error_in) 4011 { 4012 Error *local_err = NULL; 4013 int64_t rate_limit; 4014 bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED; 4015 4016 /* 4017 * If there's a previous error, free it and prepare for another one. 4018 * Meanwhile if migration completes successfully, there won't have an error 4019 * dumped when calling migrate_fd_cleanup(). 4020 */ 4021 migrate_error_free(s); 4022 4023 s->expected_downtime = s->parameters.downtime_limit; 4024 if (resume) { 4025 assert(s->cleanup_bh); 4026 } else { 4027 assert(!s->cleanup_bh); 4028 s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup_bh, s); 4029 } 4030 if (error_in) { 4031 migrate_fd_error(s, error_in); 4032 if (resume) { 4033 /* 4034 * Don't do cleanup for resume if channel is invalid, but only dump 4035 * the error. We wait for another channel connect from the user. 4036 * The error_report still gives HMP user a hint on what failed. 4037 * It's normally done in migrate_fd_cleanup(), but call it here 4038 * explicitly. 4039 */ 4040 error_report_err(error_copy(s->error)); 4041 } else { 4042 migrate_fd_cleanup(s); 4043 } 4044 return; 4045 } 4046 4047 if (resume) { 4048 /* This is a resumed migration */ 4049 rate_limit = s->parameters.max_postcopy_bandwidth / 4050 XFER_LIMIT_RATIO; 4051 } else { 4052 /* This is a fresh new migration */ 4053 rate_limit = s->parameters.max_bandwidth / XFER_LIMIT_RATIO; 4054 4055 /* Notify before starting migration thread */ 4056 notifier_list_notify(&migration_state_notifiers, s); 4057 } 4058 4059 qemu_file_set_rate_limit(s->to_dst_file, rate_limit); 4060 qemu_file_set_blocking(s->to_dst_file, true); 4061 4062 /* 4063 * Open the return path. For postcopy, it is used exclusively. For 4064 * precopy, only if user specified "return-path" capability would 4065 * QEMU uses the return path. 4066 */ 4067 if (migrate_postcopy_ram() || migrate_use_return_path()) { 4068 if (open_return_path_on_source(s, !resume)) { 4069 error_report("Unable to open return-path for postcopy"); 4070 migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); 4071 migrate_fd_cleanup(s); 4072 return; 4073 } 4074 } 4075 4076 if (resume) { 4077 /* Wakeup the main migration thread to do the recovery */ 4078 migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED, 4079 MIGRATION_STATUS_POSTCOPY_RECOVER); 4080 qemu_sem_post(&s->postcopy_pause_sem); 4081 return; 4082 } 4083 4084 if (multifd_save_setup(&local_err) != 0) { 4085 error_report_err(local_err); 4086 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 4087 MIGRATION_STATUS_FAILED); 4088 migrate_fd_cleanup(s); 4089 return; 4090 } 4091 4092 if (migrate_background_snapshot()) { 4093 qemu_thread_create(&s->thread, "bg_snapshot", 4094 bg_migration_thread, s, QEMU_THREAD_JOINABLE); 4095 } else { 4096 qemu_thread_create(&s->thread, "live_migration", 4097 migration_thread, s, QEMU_THREAD_JOINABLE); 4098 } 4099 s->migration_thread_running = true; 4100 } 4101 4102 void migration_global_dump(Monitor *mon) 4103 { 4104 MigrationState *ms = migrate_get_current(); 4105 4106 monitor_printf(mon, "globals:\n"); 4107 monitor_printf(mon, "store-global-state: %s\n", 4108 ms->store_global_state ? "on" : "off"); 4109 monitor_printf(mon, "only-migratable: %s\n", 4110 only_migratable ? "on" : "off"); 4111 monitor_printf(mon, "send-configuration: %s\n", 4112 ms->send_configuration ? "on" : "off"); 4113 monitor_printf(mon, "send-section-footer: %s\n", 4114 ms->send_section_footer ? "on" : "off"); 4115 monitor_printf(mon, "decompress-error-check: %s\n", 4116 ms->decompress_error_check ? "on" : "off"); 4117 monitor_printf(mon, "clear-bitmap-shift: %u\n", 4118 ms->clear_bitmap_shift); 4119 } 4120 4121 #define DEFINE_PROP_MIG_CAP(name, x) \ 4122 DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false) 4123 4124 static Property migration_properties[] = { 4125 DEFINE_PROP_BOOL("store-global-state", MigrationState, 4126 store_global_state, true), 4127 DEFINE_PROP_BOOL("send-configuration", MigrationState, 4128 send_configuration, true), 4129 DEFINE_PROP_BOOL("send-section-footer", MigrationState, 4130 send_section_footer, true), 4131 DEFINE_PROP_BOOL("decompress-error-check", MigrationState, 4132 decompress_error_check, true), 4133 DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, 4134 clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), 4135 4136 /* Migration parameters */ 4137 DEFINE_PROP_UINT8("x-compress-level", MigrationState, 4138 parameters.compress_level, 4139 DEFAULT_MIGRATE_COMPRESS_LEVEL), 4140 DEFINE_PROP_UINT8("x-compress-threads", MigrationState, 4141 parameters.compress_threads, 4142 DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT), 4143 DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState, 4144 parameters.compress_wait_thread, true), 4145 DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, 4146 parameters.decompress_threads, 4147 DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), 4148 DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, 4149 parameters.throttle_trigger_threshold, 4150 DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD), 4151 DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState, 4152 parameters.cpu_throttle_initial, 4153 DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), 4154 DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState, 4155 parameters.cpu_throttle_increment, 4156 DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), 4157 DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState, 4158 parameters.cpu_throttle_tailslow, false), 4159 DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState, 4160 parameters.max_bandwidth, MAX_THROTTLE), 4161 DEFINE_PROP_UINT64("x-downtime-limit", MigrationState, 4162 parameters.downtime_limit, 4163 DEFAULT_MIGRATE_SET_DOWNTIME), 4164 DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState, 4165 parameters.x_checkpoint_delay, 4166 DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), 4167 DEFINE_PROP_UINT8("multifd-channels", MigrationState, 4168 parameters.multifd_channels, 4169 DEFAULT_MIGRATE_MULTIFD_CHANNELS), 4170 DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState, 4171 parameters.multifd_compression, 4172 DEFAULT_MIGRATE_MULTIFD_COMPRESSION), 4173 DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState, 4174 parameters.multifd_zlib_level, 4175 DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL), 4176 DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, 4177 parameters.multifd_zstd_level, 4178 DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), 4179 DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, 4180 parameters.xbzrle_cache_size, 4181 DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), 4182 DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState, 4183 parameters.max_postcopy_bandwidth, 4184 DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH), 4185 DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState, 4186 parameters.max_cpu_throttle, 4187 DEFAULT_MIGRATE_MAX_CPU_THROTTLE), 4188 DEFINE_PROP_SIZE("announce-initial", MigrationState, 4189 parameters.announce_initial, 4190 DEFAULT_MIGRATE_ANNOUNCE_INITIAL), 4191 DEFINE_PROP_SIZE("announce-max", MigrationState, 4192 parameters.announce_max, 4193 DEFAULT_MIGRATE_ANNOUNCE_MAX), 4194 DEFINE_PROP_SIZE("announce-rounds", MigrationState, 4195 parameters.announce_rounds, 4196 DEFAULT_MIGRATE_ANNOUNCE_ROUNDS), 4197 DEFINE_PROP_SIZE("announce-step", MigrationState, 4198 parameters.announce_step, 4199 DEFAULT_MIGRATE_ANNOUNCE_STEP), 4200 4201 /* Migration capabilities */ 4202 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), 4203 DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL), 4204 DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE), 4205 DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS), 4206 DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS), 4207 DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS), 4208 DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM), 4209 DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO), 4210 DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM), 4211 DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), 4212 DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), 4213 DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), 4214 DEFINE_PROP_MIG_CAP("x-background-snapshot", 4215 MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), 4216 4217 DEFINE_PROP_END_OF_LIST(), 4218 }; 4219 4220 static void migration_class_init(ObjectClass *klass, void *data) 4221 { 4222 DeviceClass *dc = DEVICE_CLASS(klass); 4223 4224 dc->user_creatable = false; 4225 device_class_set_props(dc, migration_properties); 4226 } 4227 4228 static void migration_instance_finalize(Object *obj) 4229 { 4230 MigrationState *ms = MIGRATION_OBJ(obj); 4231 MigrationParameters *params = &ms->parameters; 4232 4233 qemu_mutex_destroy(&ms->error_mutex); 4234 qemu_mutex_destroy(&ms->qemu_file_lock); 4235 g_free(params->tls_hostname); 4236 g_free(params->tls_creds); 4237 qemu_sem_destroy(&ms->wait_unplug_sem); 4238 qemu_sem_destroy(&ms->rate_limit_sem); 4239 qemu_sem_destroy(&ms->pause_sem); 4240 qemu_sem_destroy(&ms->postcopy_pause_sem); 4241 qemu_sem_destroy(&ms->postcopy_pause_rp_sem); 4242 qemu_sem_destroy(&ms->rp_state.rp_sem); 4243 error_free(ms->error); 4244 } 4245 4246 static void migration_instance_init(Object *obj) 4247 { 4248 MigrationState *ms = MIGRATION_OBJ(obj); 4249 MigrationParameters *params = &ms->parameters; 4250 4251 ms->state = MIGRATION_STATUS_NONE; 4252 ms->mbps = -1; 4253 ms->pages_per_second = -1; 4254 qemu_sem_init(&ms->pause_sem, 0); 4255 qemu_mutex_init(&ms->error_mutex); 4256 4257 params->tls_hostname = g_strdup(""); 4258 params->tls_creds = g_strdup(""); 4259 4260 /* Set has_* up only for parameter checks */ 4261 params->has_compress_level = true; 4262 params->has_compress_threads = true; 4263 params->has_decompress_threads = true; 4264 params->has_throttle_trigger_threshold = true; 4265 params->has_cpu_throttle_initial = true; 4266 params->has_cpu_throttle_increment = true; 4267 params->has_cpu_throttle_tailslow = true; 4268 params->has_max_bandwidth = true; 4269 params->has_downtime_limit = true; 4270 params->has_x_checkpoint_delay = true; 4271 params->has_block_incremental = true; 4272 params->has_multifd_channels = true; 4273 params->has_multifd_compression = true; 4274 params->has_multifd_zlib_level = true; 4275 params->has_multifd_zstd_level = true; 4276 params->has_xbzrle_cache_size = true; 4277 params->has_max_postcopy_bandwidth = true; 4278 params->has_max_cpu_throttle = true; 4279 params->has_announce_initial = true; 4280 params->has_announce_max = true; 4281 params->has_announce_rounds = true; 4282 params->has_announce_step = true; 4283 4284 qemu_sem_init(&ms->postcopy_pause_sem, 0); 4285 qemu_sem_init(&ms->postcopy_pause_rp_sem, 0); 4286 qemu_sem_init(&ms->rp_state.rp_sem, 0); 4287 qemu_sem_init(&ms->rate_limit_sem, 0); 4288 qemu_sem_init(&ms->wait_unplug_sem, 0); 4289 qemu_mutex_init(&ms->qemu_file_lock); 4290 } 4291 4292 /* 4293 * Return true if check pass, false otherwise. Error will be put 4294 * inside errp if provided. 4295 */ 4296 static bool migration_object_check(MigrationState *ms, Error **errp) 4297 { 4298 MigrationCapabilityStatusList *head = NULL; 4299 /* Assuming all off */ 4300 bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret; 4301 int i; 4302 4303 if (!migrate_params_check(&ms->parameters, errp)) { 4304 return false; 4305 } 4306 4307 for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { 4308 if (ms->enabled_capabilities[i]) { 4309 QAPI_LIST_PREPEND(head, migrate_cap_add(i, true)); 4310 } 4311 } 4312 4313 ret = migrate_caps_check(cap_list, head, errp); 4314 4315 /* It works with head == NULL */ 4316 qapi_free_MigrationCapabilityStatusList(head); 4317 4318 return ret; 4319 } 4320 4321 static const TypeInfo migration_type = { 4322 .name = TYPE_MIGRATION, 4323 /* 4324 * NOTE: TYPE_MIGRATION is not really a device, as the object is 4325 * not created using qdev_new(), it is not attached to the qdev 4326 * device tree, and it is never realized. 4327 * 4328 * TODO: Make this TYPE_OBJECT once QOM provides something like 4329 * TYPE_DEVICE's "-global" properties. 4330 */ 4331 .parent = TYPE_DEVICE, 4332 .class_init = migration_class_init, 4333 .class_size = sizeof(MigrationClass), 4334 .instance_size = sizeof(MigrationState), 4335 .instance_init = migration_instance_init, 4336 .instance_finalize = migration_instance_finalize, 4337 }; 4338 4339 static void register_migration_types(void) 4340 { 4341 type_register_static(&migration_type); 4342 } 4343 4344 type_init(register_migration_types); 4345