1 /* 2 * QEMU live migration 3 * 4 * Copyright IBM, Corp. 2008 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 * Contributions after 2012-01-13 are licensed under the terms of the 13 * GNU GPL, version 2 or (at your option) any later version. 14 */ 15 16 #include "qemu/osdep.h" 17 #include "qemu/cutils.h" 18 #include "qemu/error-report.h" 19 #include "qemu/main-loop.h" 20 #include "migration/blocker.h" 21 #include "exec.h" 22 #include "fd.h" 23 #include "socket.h" 24 #include "sysemu/runstate.h" 25 #include "sysemu/sysemu.h" 26 #include "sysemu/cpu-throttle.h" 27 #include "rdma.h" 28 #include "ram.h" 29 #include "migration/global_state.h" 30 #include "migration/misc.h" 31 #include "migration.h" 32 #include "savevm.h" 33 #include "qemu-file.h" 34 #include "channel.h" 35 #include "migration/vmstate.h" 36 #include "block/block.h" 37 #include "qapi/error.h" 38 #include "qapi/clone-visitor.h" 39 #include "qapi/qapi-visit-migration.h" 40 #include "qapi/qapi-visit-sockets.h" 41 #include "qapi/qapi-commands-migration.h" 42 #include "qapi/qapi-events-migration.h" 43 #include "qapi/qmp/qerror.h" 44 #include "qapi/qmp/qnull.h" 45 #include "qemu/rcu.h" 46 #include "block.h" 47 #include "postcopy-ram.h" 48 #include "qemu/thread.h" 49 #include "trace.h" 50 #include "exec/target_page.h" 51 #include "io/channel-buffer.h" 52 #include "io/channel-tls.h" 53 #include "migration/colo.h" 54 #include "hw/boards.h" 55 #include "hw/qdev-properties.h" 56 #include "hw/qdev-properties-system.h" 57 #include "monitor/monitor.h" 58 #include "net/announce.h" 59 #include "qemu/queue.h" 60 #include "multifd.h" 61 #include "threadinfo.h" 62 #include "qemu/yank.h" 63 #include "sysemu/cpus.h" 64 #include "yank_functions.h" 65 #include "sysemu/qtest.h" 66 #include "options.h" 67 68 #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ 69 70 /* Time in milliseconds we are allowed to stop the source, 71 * for sending the last part */ 72 #define DEFAULT_MIGRATE_SET_DOWNTIME 300 73 74 /* Default compression thread count */ 75 #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 76 /* Default decompression thread count, usually decompression is at 77 * least 4 times as fast as compression.*/ 78 #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 79 /*0: means nocompress, 1: best speed, ... 9: best compress ratio */ 80 #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 81 /* Define default autoconverge cpu throttle migration parameters */ 82 #define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50 83 #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 84 #define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10 85 #define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99 86 87 /* Migration XBZRLE default cache size */ 88 #define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) 89 90 /* The delay time (in ms) between two COLO checkpoints */ 91 #define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) 92 #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 93 #define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE 94 /* 0: means nocompress, 1: best speed, ... 9: best compress ratio */ 95 #define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1 96 /* 0: means nocompress, 1: best speed, ... 20: best compress ratio */ 97 #define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1 98 99 /* Background transfer rate for postcopy, 0 means unlimited, note 100 * that page requests can still exceed this limit. 101 */ 102 #define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0 103 104 /* 105 * Parameters for self_announce_delay giving a stream of RARP/ARP 106 * packets after migration. 107 */ 108 #define DEFAULT_MIGRATE_ANNOUNCE_INITIAL 50 109 #define DEFAULT_MIGRATE_ANNOUNCE_MAX 550 110 #define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS 5 111 #define DEFAULT_MIGRATE_ANNOUNCE_STEP 100 112 113 static NotifierList migration_state_notifiers = 114 NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); 115 116 /* Messages sent on the return path from destination to source */ 117 enum mig_rp_message_type { 118 MIG_RP_MSG_INVALID = 0, /* Must be 0 */ 119 MIG_RP_MSG_SHUT, /* sibling will not send any more RP messages */ 120 MIG_RP_MSG_PONG, /* Response to a PING; data (seq: be32 ) */ 121 122 MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */ 123 MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */ 124 MIG_RP_MSG_RECV_BITMAP, /* send recved_bitmap back to source */ 125 MIG_RP_MSG_RESUME_ACK, /* tell source that we are ready to resume */ 126 127 MIG_RP_MSG_MAX 128 }; 129 130 /* When we add fault tolerance, we could have several 131 migrations at once. For now we don't need to add 132 dynamic creation of migration */ 133 134 static MigrationState *current_migration; 135 static MigrationIncomingState *current_incoming; 136 137 static GSList *migration_blockers; 138 139 static bool migration_object_check(MigrationState *ms, Error **errp); 140 static int migration_maybe_pause(MigrationState *s, 141 int *current_active_state, 142 int new_state); 143 static void migrate_fd_cancel(MigrationState *s); 144 145 static bool migration_needs_multiple_sockets(void) 146 { 147 return migrate_multifd() || migrate_postcopy_preempt(); 148 } 149 150 static bool uri_supports_multi_channels(const char *uri) 151 { 152 return strstart(uri, "tcp:", NULL) || strstart(uri, "unix:", NULL) || 153 strstart(uri, "vsock:", NULL); 154 } 155 156 static bool 157 migration_channels_and_uri_compatible(const char *uri, Error **errp) 158 { 159 if (migration_needs_multiple_sockets() && 160 !uri_supports_multi_channels(uri)) { 161 error_setg(errp, "Migration requires multi-channel URIs (e.g. tcp)"); 162 return false; 163 } 164 165 return true; 166 } 167 168 static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp) 169 { 170 uintptr_t a = (uintptr_t) ap, b = (uintptr_t) bp; 171 172 return (a > b) - (a < b); 173 } 174 175 void migration_object_init(void) 176 { 177 /* This can only be called once. */ 178 assert(!current_migration); 179 current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION)); 180 181 /* 182 * Init the migrate incoming object as well no matter whether 183 * we'll use it or not. 184 */ 185 assert(!current_incoming); 186 current_incoming = g_new0(MigrationIncomingState, 1); 187 current_incoming->state = MIGRATION_STATUS_NONE; 188 current_incoming->postcopy_remote_fds = 189 g_array_new(FALSE, TRUE, sizeof(struct PostCopyFD)); 190 qemu_mutex_init(¤t_incoming->rp_mutex); 191 qemu_mutex_init(¤t_incoming->postcopy_prio_thread_mutex); 192 qemu_event_init(¤t_incoming->main_thread_load_event, false); 193 qemu_sem_init(¤t_incoming->postcopy_pause_sem_dst, 0); 194 qemu_sem_init(¤t_incoming->postcopy_pause_sem_fault, 0); 195 qemu_sem_init(¤t_incoming->postcopy_pause_sem_fast_load, 0); 196 qemu_sem_init(¤t_incoming->postcopy_qemufile_dst_done, 0); 197 198 qemu_mutex_init(¤t_incoming->page_request_mutex); 199 current_incoming->page_requested = g_tree_new(page_request_addr_cmp); 200 201 migration_object_check(current_migration, &error_fatal); 202 203 blk_mig_init(); 204 ram_mig_init(); 205 dirty_bitmap_mig_init(); 206 } 207 208 void migration_cancel(const Error *error) 209 { 210 if (error) { 211 migrate_set_error(current_migration, error); 212 } 213 migrate_fd_cancel(current_migration); 214 } 215 216 void migration_shutdown(void) 217 { 218 /* 219 * When the QEMU main thread exit, the COLO thread 220 * may wait a semaphore. So, we should wakeup the 221 * COLO thread before migration shutdown. 222 */ 223 colo_shutdown(); 224 /* 225 * Cancel the current migration - that will (eventually) 226 * stop the migration using this structure 227 */ 228 migration_cancel(NULL); 229 object_unref(OBJECT(current_migration)); 230 231 /* 232 * Cancel outgoing migration of dirty bitmaps. It should 233 * at least unref used block nodes. 234 */ 235 dirty_bitmap_mig_cancel_outgoing(); 236 237 /* 238 * Cancel incoming migration of dirty bitmaps. Dirty bitmaps 239 * are non-critical data, and their loss never considered as 240 * something serious. 241 */ 242 dirty_bitmap_mig_cancel_incoming(); 243 } 244 245 /* For outgoing */ 246 MigrationState *migrate_get_current(void) 247 { 248 /* This can only be called after the object created. */ 249 assert(current_migration); 250 return current_migration; 251 } 252 253 MigrationIncomingState *migration_incoming_get_current(void) 254 { 255 assert(current_incoming); 256 return current_incoming; 257 } 258 259 void migration_incoming_transport_cleanup(MigrationIncomingState *mis) 260 { 261 if (mis->socket_address_list) { 262 qapi_free_SocketAddressList(mis->socket_address_list); 263 mis->socket_address_list = NULL; 264 } 265 266 if (mis->transport_cleanup) { 267 mis->transport_cleanup(mis->transport_data); 268 mis->transport_data = mis->transport_cleanup = NULL; 269 } 270 } 271 272 void migration_incoming_state_destroy(void) 273 { 274 struct MigrationIncomingState *mis = migration_incoming_get_current(); 275 276 multifd_load_cleanup(); 277 278 if (mis->to_src_file) { 279 /* Tell source that we are done */ 280 migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0); 281 qemu_fclose(mis->to_src_file); 282 mis->to_src_file = NULL; 283 } 284 285 if (mis->from_src_file) { 286 migration_ioc_unregister_yank_from_file(mis->from_src_file); 287 qemu_fclose(mis->from_src_file); 288 mis->from_src_file = NULL; 289 } 290 if (mis->postcopy_remote_fds) { 291 g_array_free(mis->postcopy_remote_fds, TRUE); 292 mis->postcopy_remote_fds = NULL; 293 } 294 295 migration_incoming_transport_cleanup(mis); 296 qemu_event_reset(&mis->main_thread_load_event); 297 298 if (mis->page_requested) { 299 g_tree_destroy(mis->page_requested); 300 mis->page_requested = NULL; 301 } 302 303 if (mis->postcopy_qemufile_dst) { 304 migration_ioc_unregister_yank_from_file(mis->postcopy_qemufile_dst); 305 qemu_fclose(mis->postcopy_qemufile_dst); 306 mis->postcopy_qemufile_dst = NULL; 307 } 308 309 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 310 } 311 312 static void migrate_generate_event(int new_state) 313 { 314 if (migrate_events()) { 315 qapi_event_send_migration(new_state); 316 } 317 } 318 319 /* 320 * Send a message on the return channel back to the source 321 * of the migration. 322 */ 323 static int migrate_send_rp_message(MigrationIncomingState *mis, 324 enum mig_rp_message_type message_type, 325 uint16_t len, void *data) 326 { 327 int ret = 0; 328 329 trace_migrate_send_rp_message((int)message_type, len); 330 QEMU_LOCK_GUARD(&mis->rp_mutex); 331 332 /* 333 * It's possible that the file handle got lost due to network 334 * failures. 335 */ 336 if (!mis->to_src_file) { 337 ret = -EIO; 338 return ret; 339 } 340 341 qemu_put_be16(mis->to_src_file, (unsigned int)message_type); 342 qemu_put_be16(mis->to_src_file, len); 343 qemu_put_buffer(mis->to_src_file, data, len); 344 qemu_fflush(mis->to_src_file); 345 346 /* It's possible that qemu file got error during sending */ 347 ret = qemu_file_get_error(mis->to_src_file); 348 349 return ret; 350 } 351 352 /* Request one page from the source VM at the given start address. 353 * rb: the RAMBlock to request the page in 354 * Start: Address offset within the RB 355 * Len: Length in bytes required - must be a multiple of pagesize 356 */ 357 int migrate_send_rp_message_req_pages(MigrationIncomingState *mis, 358 RAMBlock *rb, ram_addr_t start) 359 { 360 uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */ 361 size_t msglen = 12; /* start + len */ 362 size_t len = qemu_ram_pagesize(rb); 363 enum mig_rp_message_type msg_type; 364 const char *rbname; 365 int rbname_len; 366 367 *(uint64_t *)bufc = cpu_to_be64((uint64_t)start); 368 *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len); 369 370 /* 371 * We maintain the last ramblock that we requested for page. Note that we 372 * don't need locking because this function will only be called within the 373 * postcopy ram fault thread. 374 */ 375 if (rb != mis->last_rb) { 376 mis->last_rb = rb; 377 378 rbname = qemu_ram_get_idstr(rb); 379 rbname_len = strlen(rbname); 380 381 assert(rbname_len < 256); 382 383 bufc[msglen++] = rbname_len; 384 memcpy(bufc + msglen, rbname, rbname_len); 385 msglen += rbname_len; 386 msg_type = MIG_RP_MSG_REQ_PAGES_ID; 387 } else { 388 msg_type = MIG_RP_MSG_REQ_PAGES; 389 } 390 391 return migrate_send_rp_message(mis, msg_type, msglen, bufc); 392 } 393 394 int migrate_send_rp_req_pages(MigrationIncomingState *mis, 395 RAMBlock *rb, ram_addr_t start, uint64_t haddr) 396 { 397 void *aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, qemu_ram_pagesize(rb)); 398 bool received = false; 399 400 WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) { 401 received = ramblock_recv_bitmap_test_byte_offset(rb, start); 402 if (!received && !g_tree_lookup(mis->page_requested, aligned)) { 403 /* 404 * The page has not been received, and it's not yet in the page 405 * request list. Queue it. Set the value of element to 1, so that 406 * things like g_tree_lookup() will return TRUE (1) when found. 407 */ 408 g_tree_insert(mis->page_requested, aligned, (gpointer)1); 409 mis->page_requested_count++; 410 trace_postcopy_page_req_add(aligned, mis->page_requested_count); 411 } 412 } 413 414 /* 415 * If the page is there, skip sending the message. We don't even need the 416 * lock because as long as the page arrived, it'll be there forever. 417 */ 418 if (received) { 419 return 0; 420 } 421 422 return migrate_send_rp_message_req_pages(mis, rb, start); 423 } 424 425 static bool migration_colo_enabled; 426 bool migration_incoming_colo_enabled(void) 427 { 428 return migration_colo_enabled; 429 } 430 431 void migration_incoming_disable_colo(void) 432 { 433 ram_block_discard_disable(false); 434 migration_colo_enabled = false; 435 } 436 437 int migration_incoming_enable_colo(void) 438 { 439 if (ram_block_discard_disable(true)) { 440 error_report("COLO: cannot disable RAM discard"); 441 return -EBUSY; 442 } 443 migration_colo_enabled = true; 444 return 0; 445 } 446 447 void migrate_add_address(SocketAddress *address) 448 { 449 MigrationIncomingState *mis = migration_incoming_get_current(); 450 451 QAPI_LIST_PREPEND(mis->socket_address_list, 452 QAPI_CLONE(SocketAddress, address)); 453 } 454 455 static void qemu_start_incoming_migration(const char *uri, Error **errp) 456 { 457 const char *p = NULL; 458 459 /* URI is not suitable for migration? */ 460 if (!migration_channels_and_uri_compatible(uri, errp)) { 461 return; 462 } 463 464 qapi_event_send_migration(MIGRATION_STATUS_SETUP); 465 if (strstart(uri, "tcp:", &p) || 466 strstart(uri, "unix:", NULL) || 467 strstart(uri, "vsock:", NULL)) { 468 socket_start_incoming_migration(p ? p : uri, errp); 469 #ifdef CONFIG_RDMA 470 } else if (strstart(uri, "rdma:", &p)) { 471 rdma_start_incoming_migration(p, errp); 472 #endif 473 } else if (strstart(uri, "exec:", &p)) { 474 exec_start_incoming_migration(p, errp); 475 } else if (strstart(uri, "fd:", &p)) { 476 fd_start_incoming_migration(p, errp); 477 } else { 478 error_setg(errp, "unknown migration protocol: %s", uri); 479 } 480 } 481 482 static void process_incoming_migration_bh(void *opaque) 483 { 484 Error *local_err = NULL; 485 MigrationIncomingState *mis = opaque; 486 487 /* If capability late_block_activate is set: 488 * Only fire up the block code now if we're going to restart the 489 * VM, else 'cont' will do it. 490 * This causes file locking to happen; so we don't want it to happen 491 * unless we really are starting the VM. 492 */ 493 if (!migrate_late_block_activate() || 494 (autostart && (!global_state_received() || 495 global_state_get_runstate() == RUN_STATE_RUNNING))) { 496 /* Make sure all file formats throw away their mutable metadata. 497 * If we get an error here, just don't restart the VM yet. */ 498 bdrv_activate_all(&local_err); 499 if (local_err) { 500 error_report_err(local_err); 501 local_err = NULL; 502 autostart = false; 503 } 504 } 505 506 /* 507 * This must happen after all error conditions are dealt with and 508 * we're sure the VM is going to be running on this host. 509 */ 510 qemu_announce_self(&mis->announce_timer, migrate_announce_params()); 511 512 multifd_load_shutdown(); 513 514 dirty_bitmap_mig_before_vm_start(); 515 516 if (!global_state_received() || 517 global_state_get_runstate() == RUN_STATE_RUNNING) { 518 if (autostart) { 519 vm_start(); 520 } else { 521 runstate_set(RUN_STATE_PAUSED); 522 } 523 } else if (migration_incoming_colo_enabled()) { 524 migration_incoming_disable_colo(); 525 vm_start(); 526 } else { 527 runstate_set(global_state_get_runstate()); 528 } 529 /* 530 * This must happen after any state changes since as soon as an external 531 * observer sees this event they might start to prod at the VM assuming 532 * it's ready to use. 533 */ 534 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, 535 MIGRATION_STATUS_COMPLETED); 536 qemu_bh_delete(mis->bh); 537 migration_incoming_state_destroy(); 538 } 539 540 static void coroutine_fn 541 process_incoming_migration_co(void *opaque) 542 { 543 MigrationIncomingState *mis = migration_incoming_get_current(); 544 PostcopyState ps; 545 int ret; 546 Error *local_err = NULL; 547 548 assert(mis->from_src_file); 549 mis->migration_incoming_co = qemu_coroutine_self(); 550 mis->largest_page_size = qemu_ram_pagesize_largest(); 551 postcopy_state_set(POSTCOPY_INCOMING_NONE); 552 migrate_set_state(&mis->state, MIGRATION_STATUS_NONE, 553 MIGRATION_STATUS_ACTIVE); 554 ret = qemu_loadvm_state(mis->from_src_file); 555 556 ps = postcopy_state_get(); 557 trace_process_incoming_migration_co_end(ret, ps); 558 if (ps != POSTCOPY_INCOMING_NONE) { 559 if (ps == POSTCOPY_INCOMING_ADVISE) { 560 /* 561 * Where a migration had postcopy enabled (and thus went to advise) 562 * but managed to complete within the precopy period, we can use 563 * the normal exit. 564 */ 565 postcopy_ram_incoming_cleanup(mis); 566 } else if (ret >= 0) { 567 /* 568 * Postcopy was started, cleanup should happen at the end of the 569 * postcopy thread. 570 */ 571 trace_process_incoming_migration_co_postcopy_end_main(); 572 return; 573 } 574 /* Else if something went wrong then just fall out of the normal exit */ 575 } 576 577 /* we get COLO info, and know if we are in COLO mode */ 578 if (!ret && migration_incoming_colo_enabled()) { 579 /* Make sure all file formats throw away their mutable metadata */ 580 bdrv_activate_all(&local_err); 581 if (local_err) { 582 error_report_err(local_err); 583 goto fail; 584 } 585 586 qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming", 587 colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE); 588 mis->have_colo_incoming_thread = true; 589 qemu_coroutine_yield(); 590 591 qemu_mutex_unlock_iothread(); 592 /* Wait checkpoint incoming thread exit before free resource */ 593 qemu_thread_join(&mis->colo_incoming_thread); 594 qemu_mutex_lock_iothread(); 595 /* We hold the global iothread lock, so it is safe here */ 596 colo_release_ram_cache(); 597 } 598 599 if (ret < 0) { 600 error_report("load of migration failed: %s", strerror(-ret)); 601 goto fail; 602 } 603 mis->bh = qemu_bh_new(process_incoming_migration_bh, mis); 604 qemu_bh_schedule(mis->bh); 605 mis->migration_incoming_co = NULL; 606 return; 607 fail: 608 local_err = NULL; 609 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, 610 MIGRATION_STATUS_FAILED); 611 qemu_fclose(mis->from_src_file); 612 613 multifd_load_cleanup(); 614 615 exit(EXIT_FAILURE); 616 } 617 618 /** 619 * migration_incoming_setup: Setup incoming migration 620 * @f: file for main migration channel 621 * @errp: where to put errors 622 * 623 * Returns: %true on success, %false on error. 624 */ 625 static bool migration_incoming_setup(QEMUFile *f, Error **errp) 626 { 627 MigrationIncomingState *mis = migration_incoming_get_current(); 628 629 if (!mis->from_src_file) { 630 mis->from_src_file = f; 631 } 632 qemu_file_set_blocking(f, false); 633 return true; 634 } 635 636 void migration_incoming_process(void) 637 { 638 Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL); 639 qemu_coroutine_enter(co); 640 } 641 642 /* Returns true if recovered from a paused migration, otherwise false */ 643 static bool postcopy_try_recover(void) 644 { 645 MigrationIncomingState *mis = migration_incoming_get_current(); 646 647 if (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { 648 /* Resumed from a paused postcopy migration */ 649 650 /* This should be set already in migration_incoming_setup() */ 651 assert(mis->from_src_file); 652 /* Postcopy has standalone thread to do vm load */ 653 qemu_file_set_blocking(mis->from_src_file, true); 654 655 /* Re-configure the return path */ 656 mis->to_src_file = qemu_file_get_return_path(mis->from_src_file); 657 658 migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_PAUSED, 659 MIGRATION_STATUS_POSTCOPY_RECOVER); 660 661 /* 662 * Here, we only wake up the main loading thread (while the 663 * rest threads will still be waiting), so that we can receive 664 * commands from source now, and answer it if needed. The 665 * rest threads will be woken up afterwards until we are sure 666 * that source is ready to reply to page requests. 667 */ 668 qemu_sem_post(&mis->postcopy_pause_sem_dst); 669 return true; 670 } 671 672 return false; 673 } 674 675 void migration_fd_process_incoming(QEMUFile *f, Error **errp) 676 { 677 if (!migration_incoming_setup(f, errp)) { 678 return; 679 } 680 if (postcopy_try_recover()) { 681 return; 682 } 683 migration_incoming_process(); 684 } 685 686 /* 687 * Returns true when we want to start a new incoming migration process, 688 * false otherwise. 689 */ 690 static bool migration_should_start_incoming(bool main_channel) 691 { 692 /* Multifd doesn't start unless all channels are established */ 693 if (migrate_multifd()) { 694 return migration_has_all_channels(); 695 } 696 697 /* Preempt channel only starts when the main channel is created */ 698 if (migrate_postcopy_preempt()) { 699 return main_channel; 700 } 701 702 /* 703 * For all the rest types of migration, we should only reach here when 704 * it's the main channel that's being created, and we should always 705 * proceed with this channel. 706 */ 707 assert(main_channel); 708 return true; 709 } 710 711 void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) 712 { 713 MigrationIncomingState *mis = migration_incoming_get_current(); 714 Error *local_err = NULL; 715 QEMUFile *f; 716 bool default_channel = true; 717 uint32_t channel_magic = 0; 718 int ret = 0; 719 720 if (migrate_multifd() && !migrate_postcopy_ram() && 721 qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { 722 /* 723 * With multiple channels, it is possible that we receive channels 724 * out of order on destination side, causing incorrect mapping of 725 * source channels on destination side. Check channel MAGIC to 726 * decide type of channel. Please note this is best effort, postcopy 727 * preempt channel does not send any magic number so avoid it for 728 * postcopy live migration. Also tls live migration already does 729 * tls handshake while initializing main channel so with tls this 730 * issue is not possible. 731 */ 732 ret = migration_channel_read_peek(ioc, (void *)&channel_magic, 733 sizeof(channel_magic), &local_err); 734 735 if (ret != 0) { 736 error_propagate(errp, local_err); 737 return; 738 } 739 740 default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC)); 741 } else { 742 default_channel = !mis->from_src_file; 743 } 744 745 if (multifd_load_setup(errp) != 0) { 746 error_setg(errp, "Failed to setup multifd channels"); 747 return; 748 } 749 750 if (default_channel) { 751 f = qemu_file_new_input(ioc); 752 753 if (!migration_incoming_setup(f, errp)) { 754 return; 755 } 756 } else { 757 /* Multiple connections */ 758 assert(migration_needs_multiple_sockets()); 759 if (migrate_multifd()) { 760 multifd_recv_new_channel(ioc, &local_err); 761 } else { 762 assert(migrate_postcopy_preempt()); 763 f = qemu_file_new_input(ioc); 764 postcopy_preempt_new_channel(mis, f); 765 } 766 if (local_err) { 767 error_propagate(errp, local_err); 768 return; 769 } 770 } 771 772 if (migration_should_start_incoming(default_channel)) { 773 /* If it's a recovery, we're done */ 774 if (postcopy_try_recover()) { 775 return; 776 } 777 migration_incoming_process(); 778 } 779 } 780 781 /** 782 * @migration_has_all_channels: We have received all channels that we need 783 * 784 * Returns true when we have got connections to all the channels that 785 * we need for migration. 786 */ 787 bool migration_has_all_channels(void) 788 { 789 MigrationIncomingState *mis = migration_incoming_get_current(); 790 791 if (!mis->from_src_file) { 792 return false; 793 } 794 795 if (migrate_multifd()) { 796 return multifd_recv_all_channels_created(); 797 } 798 799 if (migrate_postcopy_preempt()) { 800 return mis->postcopy_qemufile_dst != NULL; 801 } 802 803 return true; 804 } 805 806 /* 807 * Send a 'SHUT' message on the return channel with the given value 808 * to indicate that we've finished with the RP. Non-0 value indicates 809 * error. 810 */ 811 void migrate_send_rp_shut(MigrationIncomingState *mis, 812 uint32_t value) 813 { 814 uint32_t buf; 815 816 buf = cpu_to_be32(value); 817 migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf); 818 } 819 820 /* 821 * Send a 'PONG' message on the return channel with the given value 822 * (normally in response to a 'PING') 823 */ 824 void migrate_send_rp_pong(MigrationIncomingState *mis, 825 uint32_t value) 826 { 827 uint32_t buf; 828 829 buf = cpu_to_be32(value); 830 migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf); 831 } 832 833 void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis, 834 char *block_name) 835 { 836 char buf[512]; 837 int len; 838 int64_t res; 839 840 /* 841 * First, we send the header part. It contains only the len of 842 * idstr, and the idstr itself. 843 */ 844 len = strlen(block_name); 845 buf[0] = len; 846 memcpy(buf + 1, block_name, len); 847 848 if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) { 849 error_report("%s: MSG_RP_RECV_BITMAP only used for recovery", 850 __func__); 851 return; 852 } 853 854 migrate_send_rp_message(mis, MIG_RP_MSG_RECV_BITMAP, len + 1, buf); 855 856 /* 857 * Next, we dump the received bitmap to the stream. 858 * 859 * TODO: currently we are safe since we are the only one that is 860 * using the to_src_file handle (fault thread is still paused), 861 * and it's ok even not taking the mutex. However the best way is 862 * to take the lock before sending the message header, and release 863 * the lock after sending the bitmap. 864 */ 865 qemu_mutex_lock(&mis->rp_mutex); 866 res = ramblock_recv_bitmap_send(mis->to_src_file, block_name); 867 qemu_mutex_unlock(&mis->rp_mutex); 868 869 trace_migrate_send_rp_recv_bitmap(block_name, res); 870 } 871 872 void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value) 873 { 874 uint32_t buf; 875 876 buf = cpu_to_be32(value); 877 migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf); 878 } 879 880 /* 881 * Return true if we're already in the middle of a migration 882 * (i.e. any of the active or setup states) 883 */ 884 bool migration_is_setup_or_active(int state) 885 { 886 switch (state) { 887 case MIGRATION_STATUS_ACTIVE: 888 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 889 case MIGRATION_STATUS_POSTCOPY_PAUSED: 890 case MIGRATION_STATUS_POSTCOPY_RECOVER: 891 case MIGRATION_STATUS_SETUP: 892 case MIGRATION_STATUS_PRE_SWITCHOVER: 893 case MIGRATION_STATUS_DEVICE: 894 case MIGRATION_STATUS_WAIT_UNPLUG: 895 case MIGRATION_STATUS_COLO: 896 return true; 897 898 default: 899 return false; 900 901 } 902 } 903 904 bool migration_is_running(int state) 905 { 906 switch (state) { 907 case MIGRATION_STATUS_ACTIVE: 908 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 909 case MIGRATION_STATUS_POSTCOPY_PAUSED: 910 case MIGRATION_STATUS_POSTCOPY_RECOVER: 911 case MIGRATION_STATUS_SETUP: 912 case MIGRATION_STATUS_PRE_SWITCHOVER: 913 case MIGRATION_STATUS_DEVICE: 914 case MIGRATION_STATUS_WAIT_UNPLUG: 915 case MIGRATION_STATUS_CANCELLING: 916 return true; 917 918 default: 919 return false; 920 921 } 922 } 923 924 static bool migrate_show_downtime(MigrationState *s) 925 { 926 return (s->state == MIGRATION_STATUS_COMPLETED) || migration_in_postcopy(); 927 } 928 929 static void populate_time_info(MigrationInfo *info, MigrationState *s) 930 { 931 info->has_status = true; 932 info->has_setup_time = true; 933 info->setup_time = s->setup_time; 934 935 if (s->state == MIGRATION_STATUS_COMPLETED) { 936 info->has_total_time = true; 937 info->total_time = s->total_time; 938 } else { 939 info->has_total_time = true; 940 info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - 941 s->start_time; 942 } 943 944 if (migrate_show_downtime(s)) { 945 info->has_downtime = true; 946 info->downtime = s->downtime; 947 } else { 948 info->has_expected_downtime = true; 949 info->expected_downtime = s->expected_downtime; 950 } 951 } 952 953 static void populate_ram_info(MigrationInfo *info, MigrationState *s) 954 { 955 size_t page_size = qemu_target_page_size(); 956 957 info->ram = g_malloc0(sizeof(*info->ram)); 958 info->ram->transferred = stat64_get(&ram_counters.transferred); 959 info->ram->total = ram_bytes_total(); 960 info->ram->duplicate = stat64_get(&ram_counters.zero_pages); 961 /* legacy value. It is not used anymore */ 962 info->ram->skipped = 0; 963 info->ram->normal = stat64_get(&ram_counters.normal_pages); 964 info->ram->normal_bytes = info->ram->normal * page_size; 965 info->ram->mbps = s->mbps; 966 info->ram->dirty_sync_count = 967 stat64_get(&ram_counters.dirty_sync_count); 968 info->ram->dirty_sync_missed_zero_copy = 969 stat64_get(&ram_counters.dirty_sync_missed_zero_copy); 970 info->ram->postcopy_requests = 971 stat64_get(&ram_counters.postcopy_requests); 972 info->ram->page_size = page_size; 973 info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); 974 info->ram->pages_per_second = s->pages_per_second; 975 info->ram->precopy_bytes = stat64_get(&ram_counters.precopy_bytes); 976 info->ram->downtime_bytes = stat64_get(&ram_counters.downtime_bytes); 977 info->ram->postcopy_bytes = stat64_get(&ram_counters.postcopy_bytes); 978 979 if (migrate_xbzrle()) { 980 info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache)); 981 info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size(); 982 info->xbzrle_cache->bytes = xbzrle_counters.bytes; 983 info->xbzrle_cache->pages = xbzrle_counters.pages; 984 info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss; 985 info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate; 986 info->xbzrle_cache->encoding_rate = xbzrle_counters.encoding_rate; 987 info->xbzrle_cache->overflow = xbzrle_counters.overflow; 988 } 989 990 if (migrate_compress()) { 991 info->compression = g_malloc0(sizeof(*info->compression)); 992 info->compression->pages = compression_counters.pages; 993 info->compression->busy = compression_counters.busy; 994 info->compression->busy_rate = compression_counters.busy_rate; 995 info->compression->compressed_size = 996 compression_counters.compressed_size; 997 info->compression->compression_rate = 998 compression_counters.compression_rate; 999 } 1000 1001 if (cpu_throttle_active()) { 1002 info->has_cpu_throttle_percentage = true; 1003 info->cpu_throttle_percentage = cpu_throttle_get_percentage(); 1004 } 1005 1006 if (s->state != MIGRATION_STATUS_COMPLETED) { 1007 info->ram->remaining = ram_bytes_remaining(); 1008 info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate; 1009 } 1010 } 1011 1012 static void populate_disk_info(MigrationInfo *info) 1013 { 1014 if (blk_mig_active()) { 1015 info->disk = g_malloc0(sizeof(*info->disk)); 1016 info->disk->transferred = blk_mig_bytes_transferred(); 1017 info->disk->remaining = blk_mig_bytes_remaining(); 1018 info->disk->total = blk_mig_bytes_total(); 1019 } 1020 } 1021 1022 static void fill_source_migration_info(MigrationInfo *info) 1023 { 1024 MigrationState *s = migrate_get_current(); 1025 int state = qatomic_read(&s->state); 1026 GSList *cur_blocker = migration_blockers; 1027 1028 info->blocked_reasons = NULL; 1029 1030 /* 1031 * There are two types of reasons a migration might be blocked; 1032 * a) devices marked in VMState as non-migratable, and 1033 * b) Explicit migration blockers 1034 * We need to add both of them here. 1035 */ 1036 qemu_savevm_non_migratable_list(&info->blocked_reasons); 1037 1038 while (cur_blocker) { 1039 QAPI_LIST_PREPEND(info->blocked_reasons, 1040 g_strdup(error_get_pretty(cur_blocker->data))); 1041 cur_blocker = g_slist_next(cur_blocker); 1042 } 1043 info->has_blocked_reasons = info->blocked_reasons != NULL; 1044 1045 switch (state) { 1046 case MIGRATION_STATUS_NONE: 1047 /* no migration has happened ever */ 1048 /* do not overwrite destination migration status */ 1049 return; 1050 case MIGRATION_STATUS_SETUP: 1051 info->has_status = true; 1052 info->has_total_time = false; 1053 break; 1054 case MIGRATION_STATUS_ACTIVE: 1055 case MIGRATION_STATUS_CANCELLING: 1056 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1057 case MIGRATION_STATUS_PRE_SWITCHOVER: 1058 case MIGRATION_STATUS_DEVICE: 1059 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1060 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1061 /* TODO add some postcopy stats */ 1062 populate_time_info(info, s); 1063 populate_ram_info(info, s); 1064 populate_disk_info(info); 1065 populate_vfio_info(info); 1066 break; 1067 case MIGRATION_STATUS_COLO: 1068 info->has_status = true; 1069 /* TODO: display COLO specific information (checkpoint info etc.) */ 1070 break; 1071 case MIGRATION_STATUS_COMPLETED: 1072 populate_time_info(info, s); 1073 populate_ram_info(info, s); 1074 populate_vfio_info(info); 1075 break; 1076 case MIGRATION_STATUS_FAILED: 1077 info->has_status = true; 1078 if (s->error) { 1079 info->error_desc = g_strdup(error_get_pretty(s->error)); 1080 } 1081 break; 1082 case MIGRATION_STATUS_CANCELLED: 1083 info->has_status = true; 1084 break; 1085 case MIGRATION_STATUS_WAIT_UNPLUG: 1086 info->has_status = true; 1087 break; 1088 } 1089 info->status = state; 1090 } 1091 1092 static void fill_destination_migration_info(MigrationInfo *info) 1093 { 1094 MigrationIncomingState *mis = migration_incoming_get_current(); 1095 1096 if (mis->socket_address_list) { 1097 info->has_socket_address = true; 1098 info->socket_address = 1099 QAPI_CLONE(SocketAddressList, mis->socket_address_list); 1100 } 1101 1102 switch (mis->state) { 1103 case MIGRATION_STATUS_NONE: 1104 return; 1105 case MIGRATION_STATUS_SETUP: 1106 case MIGRATION_STATUS_CANCELLING: 1107 case MIGRATION_STATUS_CANCELLED: 1108 case MIGRATION_STATUS_ACTIVE: 1109 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1110 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1111 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1112 case MIGRATION_STATUS_FAILED: 1113 case MIGRATION_STATUS_COLO: 1114 info->has_status = true; 1115 break; 1116 case MIGRATION_STATUS_COMPLETED: 1117 info->has_status = true; 1118 fill_destination_postcopy_migration_info(info); 1119 break; 1120 } 1121 info->status = mis->state; 1122 } 1123 1124 MigrationInfo *qmp_query_migrate(Error **errp) 1125 { 1126 MigrationInfo *info = g_malloc0(sizeof(*info)); 1127 1128 fill_destination_migration_info(info); 1129 fill_source_migration_info(info); 1130 1131 return info; 1132 } 1133 1134 void qmp_migrate_start_postcopy(Error **errp) 1135 { 1136 MigrationState *s = migrate_get_current(); 1137 1138 if (!migrate_postcopy()) { 1139 error_setg(errp, "Enable postcopy with migrate_set_capability before" 1140 " the start of migration"); 1141 return; 1142 } 1143 1144 if (s->state == MIGRATION_STATUS_NONE) { 1145 error_setg(errp, "Postcopy must be started after migration has been" 1146 " started"); 1147 return; 1148 } 1149 /* 1150 * we don't error if migration has finished since that would be racy 1151 * with issuing this command. 1152 */ 1153 qatomic_set(&s->start_postcopy, true); 1154 } 1155 1156 /* shared migration helpers */ 1157 1158 void migrate_set_state(int *state, int old_state, int new_state) 1159 { 1160 assert(new_state < MIGRATION_STATUS__MAX); 1161 if (qatomic_cmpxchg(state, old_state, new_state) == old_state) { 1162 trace_migrate_set_state(MigrationStatus_str(new_state)); 1163 migrate_generate_event(new_state); 1164 } 1165 } 1166 1167 static void migrate_set_block_incremental(MigrationState *s, bool value) 1168 { 1169 s->parameters.block_incremental = value; 1170 } 1171 1172 static void block_cleanup_parameters(MigrationState *s) 1173 { 1174 if (s->must_remove_block_options) { 1175 /* setting to false can never fail */ 1176 migrate_cap_set(MIGRATION_CAPABILITY_BLOCK, false, &error_abort); 1177 migrate_set_block_incremental(s, false); 1178 s->must_remove_block_options = false; 1179 } 1180 } 1181 1182 static void migrate_fd_cleanup(MigrationState *s) 1183 { 1184 qemu_bh_delete(s->cleanup_bh); 1185 s->cleanup_bh = NULL; 1186 1187 g_free(s->hostname); 1188 s->hostname = NULL; 1189 json_writer_free(s->vmdesc); 1190 s->vmdesc = NULL; 1191 1192 qemu_savevm_state_cleanup(); 1193 1194 if (s->to_dst_file) { 1195 QEMUFile *tmp; 1196 1197 trace_migrate_fd_cleanup(); 1198 qemu_mutex_unlock_iothread(); 1199 if (s->migration_thread_running) { 1200 qemu_thread_join(&s->thread); 1201 s->migration_thread_running = false; 1202 } 1203 qemu_mutex_lock_iothread(); 1204 1205 multifd_save_cleanup(); 1206 qemu_mutex_lock(&s->qemu_file_lock); 1207 tmp = s->to_dst_file; 1208 s->to_dst_file = NULL; 1209 qemu_mutex_unlock(&s->qemu_file_lock); 1210 /* 1211 * Close the file handle without the lock to make sure the 1212 * critical section won't block for long. 1213 */ 1214 migration_ioc_unregister_yank_from_file(tmp); 1215 qemu_fclose(tmp); 1216 } 1217 1218 if (s->postcopy_qemufile_src) { 1219 migration_ioc_unregister_yank_from_file(s->postcopy_qemufile_src); 1220 qemu_fclose(s->postcopy_qemufile_src); 1221 s->postcopy_qemufile_src = NULL; 1222 } 1223 1224 assert(!migration_is_active(s)); 1225 1226 if (s->state == MIGRATION_STATUS_CANCELLING) { 1227 migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING, 1228 MIGRATION_STATUS_CANCELLED); 1229 } 1230 1231 if (s->error) { 1232 /* It is used on info migrate. We can't free it */ 1233 error_report_err(error_copy(s->error)); 1234 } 1235 notifier_list_notify(&migration_state_notifiers, s); 1236 block_cleanup_parameters(s); 1237 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 1238 } 1239 1240 static void migrate_fd_cleanup_schedule(MigrationState *s) 1241 { 1242 /* 1243 * Ref the state for bh, because it may be called when 1244 * there're already no other refs 1245 */ 1246 object_ref(OBJECT(s)); 1247 qemu_bh_schedule(s->cleanup_bh); 1248 } 1249 1250 static void migrate_fd_cleanup_bh(void *opaque) 1251 { 1252 MigrationState *s = opaque; 1253 migrate_fd_cleanup(s); 1254 object_unref(OBJECT(s)); 1255 } 1256 1257 void migrate_set_error(MigrationState *s, const Error *error) 1258 { 1259 QEMU_LOCK_GUARD(&s->error_mutex); 1260 if (!s->error) { 1261 s->error = error_copy(error); 1262 } 1263 } 1264 1265 static void migrate_error_free(MigrationState *s) 1266 { 1267 QEMU_LOCK_GUARD(&s->error_mutex); 1268 if (s->error) { 1269 error_free(s->error); 1270 s->error = NULL; 1271 } 1272 } 1273 1274 void migrate_fd_error(MigrationState *s, const Error *error) 1275 { 1276 trace_migrate_fd_error(error_get_pretty(error)); 1277 assert(s->to_dst_file == NULL); 1278 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 1279 MIGRATION_STATUS_FAILED); 1280 migrate_set_error(s, error); 1281 } 1282 1283 static void migrate_fd_cancel(MigrationState *s) 1284 { 1285 int old_state ; 1286 QEMUFile *f = migrate_get_current()->to_dst_file; 1287 trace_migrate_fd_cancel(); 1288 1289 WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) { 1290 if (s->rp_state.from_dst_file) { 1291 /* shutdown the rp socket, so causing the rp thread to shutdown */ 1292 qemu_file_shutdown(s->rp_state.from_dst_file); 1293 } 1294 } 1295 1296 do { 1297 old_state = s->state; 1298 if (!migration_is_running(old_state)) { 1299 break; 1300 } 1301 /* If the migration is paused, kick it out of the pause */ 1302 if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) { 1303 qemu_sem_post(&s->pause_sem); 1304 } 1305 migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING); 1306 } while (s->state != MIGRATION_STATUS_CANCELLING); 1307 1308 /* 1309 * If we're unlucky the migration code might be stuck somewhere in a 1310 * send/write while the network has failed and is waiting to timeout; 1311 * if we've got shutdown(2) available then we can force it to quit. 1312 * The outgoing qemu file gets closed in migrate_fd_cleanup that is 1313 * called in a bh, so there is no race against this cancel. 1314 */ 1315 if (s->state == MIGRATION_STATUS_CANCELLING && f) { 1316 qemu_file_shutdown(f); 1317 } 1318 if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) { 1319 Error *local_err = NULL; 1320 1321 bdrv_activate_all(&local_err); 1322 if (local_err) { 1323 error_report_err(local_err); 1324 } else { 1325 s->block_inactive = false; 1326 } 1327 } 1328 } 1329 1330 void add_migration_state_change_notifier(Notifier *notify) 1331 { 1332 notifier_list_add(&migration_state_notifiers, notify); 1333 } 1334 1335 void remove_migration_state_change_notifier(Notifier *notify) 1336 { 1337 notifier_remove(notify); 1338 } 1339 1340 bool migration_in_setup(MigrationState *s) 1341 { 1342 return s->state == MIGRATION_STATUS_SETUP; 1343 } 1344 1345 bool migration_has_finished(MigrationState *s) 1346 { 1347 return s->state == MIGRATION_STATUS_COMPLETED; 1348 } 1349 1350 bool migration_has_failed(MigrationState *s) 1351 { 1352 return (s->state == MIGRATION_STATUS_CANCELLED || 1353 s->state == MIGRATION_STATUS_FAILED); 1354 } 1355 1356 bool migration_in_postcopy(void) 1357 { 1358 MigrationState *s = migrate_get_current(); 1359 1360 switch (s->state) { 1361 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1362 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1363 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1364 return true; 1365 default: 1366 return false; 1367 } 1368 } 1369 1370 bool migration_in_postcopy_after_devices(MigrationState *s) 1371 { 1372 return migration_in_postcopy() && s->postcopy_after_devices; 1373 } 1374 1375 bool migration_in_incoming_postcopy(void) 1376 { 1377 PostcopyState ps = postcopy_state_get(); 1378 1379 return ps >= POSTCOPY_INCOMING_DISCARD && ps < POSTCOPY_INCOMING_END; 1380 } 1381 1382 bool migration_incoming_postcopy_advised(void) 1383 { 1384 PostcopyState ps = postcopy_state_get(); 1385 1386 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END; 1387 } 1388 1389 bool migration_in_bg_snapshot(void) 1390 { 1391 MigrationState *s = migrate_get_current(); 1392 1393 return migrate_background_snapshot() && 1394 migration_is_setup_or_active(s->state); 1395 } 1396 1397 bool migration_is_idle(void) 1398 { 1399 MigrationState *s = current_migration; 1400 1401 if (!s) { 1402 return true; 1403 } 1404 1405 switch (s->state) { 1406 case MIGRATION_STATUS_NONE: 1407 case MIGRATION_STATUS_CANCELLED: 1408 case MIGRATION_STATUS_COMPLETED: 1409 case MIGRATION_STATUS_FAILED: 1410 return true; 1411 case MIGRATION_STATUS_SETUP: 1412 case MIGRATION_STATUS_CANCELLING: 1413 case MIGRATION_STATUS_ACTIVE: 1414 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1415 case MIGRATION_STATUS_COLO: 1416 case MIGRATION_STATUS_PRE_SWITCHOVER: 1417 case MIGRATION_STATUS_DEVICE: 1418 case MIGRATION_STATUS_WAIT_UNPLUG: 1419 return false; 1420 case MIGRATION_STATUS__MAX: 1421 g_assert_not_reached(); 1422 } 1423 1424 return false; 1425 } 1426 1427 bool migration_is_active(MigrationState *s) 1428 { 1429 return (s->state == MIGRATION_STATUS_ACTIVE || 1430 s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); 1431 } 1432 1433 void migrate_init(MigrationState *s) 1434 { 1435 /* 1436 * Reinitialise all migration state, except 1437 * parameters/capabilities that the user set, and 1438 * locks. 1439 */ 1440 s->cleanup_bh = 0; 1441 s->vm_start_bh = 0; 1442 s->to_dst_file = NULL; 1443 s->state = MIGRATION_STATUS_NONE; 1444 s->rp_state.from_dst_file = NULL; 1445 s->rp_state.error = false; 1446 s->mbps = 0.0; 1447 s->pages_per_second = 0.0; 1448 s->downtime = 0; 1449 s->expected_downtime = 0; 1450 s->setup_time = 0; 1451 s->start_postcopy = false; 1452 s->postcopy_after_devices = false; 1453 s->migration_thread_running = false; 1454 error_free(s->error); 1455 s->error = NULL; 1456 s->hostname = NULL; 1457 1458 migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); 1459 1460 s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 1461 s->total_time = 0; 1462 s->vm_was_running = false; 1463 s->iteration_initial_bytes = 0; 1464 s->threshold_size = 0; 1465 } 1466 1467 int migrate_add_blocker_internal(Error *reason, Error **errp) 1468 { 1469 /* Snapshots are similar to migrations, so check RUN_STATE_SAVE_VM too. */ 1470 if (runstate_check(RUN_STATE_SAVE_VM) || !migration_is_idle()) { 1471 error_propagate_prepend(errp, error_copy(reason), 1472 "disallowing migration blocker " 1473 "(migration/snapshot in progress) for: "); 1474 return -EBUSY; 1475 } 1476 1477 migration_blockers = g_slist_prepend(migration_blockers, reason); 1478 return 0; 1479 } 1480 1481 int migrate_add_blocker(Error *reason, Error **errp) 1482 { 1483 if (only_migratable) { 1484 error_propagate_prepend(errp, error_copy(reason), 1485 "disallowing migration blocker " 1486 "(--only-migratable) for: "); 1487 return -EACCES; 1488 } 1489 1490 return migrate_add_blocker_internal(reason, errp); 1491 } 1492 1493 void migrate_del_blocker(Error *reason) 1494 { 1495 migration_blockers = g_slist_remove(migration_blockers, reason); 1496 } 1497 1498 void qmp_migrate_incoming(const char *uri, Error **errp) 1499 { 1500 Error *local_err = NULL; 1501 static bool once = true; 1502 1503 if (!once) { 1504 error_setg(errp, "The incoming migration has already been started"); 1505 return; 1506 } 1507 if (!runstate_check(RUN_STATE_INMIGRATE)) { 1508 error_setg(errp, "'-incoming' was not specified on the command line"); 1509 return; 1510 } 1511 1512 if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) { 1513 return; 1514 } 1515 1516 qemu_start_incoming_migration(uri, &local_err); 1517 1518 if (local_err) { 1519 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 1520 error_propagate(errp, local_err); 1521 return; 1522 } 1523 1524 once = false; 1525 } 1526 1527 void qmp_migrate_recover(const char *uri, Error **errp) 1528 { 1529 MigrationIncomingState *mis = migration_incoming_get_current(); 1530 1531 /* 1532 * Don't even bother to use ERRP_GUARD() as it _must_ always be set by 1533 * callers (no one should ignore a recover failure); if there is, it's a 1534 * programming error. 1535 */ 1536 assert(errp); 1537 1538 if (mis->state != MIGRATION_STATUS_POSTCOPY_PAUSED) { 1539 error_setg(errp, "Migrate recover can only be run " 1540 "when postcopy is paused."); 1541 return; 1542 } 1543 1544 /* If there's an existing transport, release it */ 1545 migration_incoming_transport_cleanup(mis); 1546 1547 /* 1548 * Note that this call will never start a real migration; it will 1549 * only re-setup the migration stream and poke existing migration 1550 * to continue using that newly established channel. 1551 */ 1552 qemu_start_incoming_migration(uri, errp); 1553 } 1554 1555 void qmp_migrate_pause(Error **errp) 1556 { 1557 MigrationState *ms = migrate_get_current(); 1558 MigrationIncomingState *mis = migration_incoming_get_current(); 1559 int ret; 1560 1561 if (ms->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 1562 /* Source side, during postcopy */ 1563 qemu_mutex_lock(&ms->qemu_file_lock); 1564 ret = qemu_file_shutdown(ms->to_dst_file); 1565 qemu_mutex_unlock(&ms->qemu_file_lock); 1566 if (ret) { 1567 error_setg(errp, "Failed to pause source migration"); 1568 } 1569 return; 1570 } 1571 1572 if (mis->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 1573 ret = qemu_file_shutdown(mis->from_src_file); 1574 if (ret) { 1575 error_setg(errp, "Failed to pause destination migration"); 1576 } 1577 return; 1578 } 1579 1580 error_setg(errp, "migrate-pause is currently only supported " 1581 "during postcopy-active state"); 1582 } 1583 1584 bool migration_is_blocked(Error **errp) 1585 { 1586 if (qemu_savevm_state_blocked(errp)) { 1587 return true; 1588 } 1589 1590 if (migration_blockers) { 1591 error_propagate(errp, error_copy(migration_blockers->data)); 1592 return true; 1593 } 1594 1595 return false; 1596 } 1597 1598 /* Returns true if continue to migrate, or false if error detected */ 1599 static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, 1600 bool resume, Error **errp) 1601 { 1602 Error *local_err = NULL; 1603 1604 if (resume) { 1605 if (s->state != MIGRATION_STATUS_POSTCOPY_PAUSED) { 1606 error_setg(errp, "Cannot resume if there is no " 1607 "paused migration"); 1608 return false; 1609 } 1610 1611 /* 1612 * Postcopy recovery won't work well with release-ram 1613 * capability since release-ram will drop the page buffer as 1614 * long as the page is put into the send buffer. So if there 1615 * is a network failure happened, any page buffers that have 1616 * not yet reached the destination VM but have already been 1617 * sent from the source VM will be lost forever. Let's refuse 1618 * the client from resuming such a postcopy migration. 1619 * Luckily release-ram was designed to only be used when src 1620 * and destination VMs are on the same host, so it should be 1621 * fine. 1622 */ 1623 if (migrate_release_ram()) { 1624 error_setg(errp, "Postcopy recovery cannot work " 1625 "when release-ram capability is set"); 1626 return false; 1627 } 1628 1629 /* This is a resume, skip init status */ 1630 return true; 1631 } 1632 1633 if (migration_is_running(s->state)) { 1634 error_setg(errp, QERR_MIGRATION_ACTIVE); 1635 return false; 1636 } 1637 1638 if (runstate_check(RUN_STATE_INMIGRATE)) { 1639 error_setg(errp, "Guest is waiting for an incoming migration"); 1640 return false; 1641 } 1642 1643 if (runstate_check(RUN_STATE_POSTMIGRATE)) { 1644 error_setg(errp, "Can't migrate the vm that was paused due to " 1645 "previous migration"); 1646 return false; 1647 } 1648 1649 if (migration_is_blocked(errp)) { 1650 return false; 1651 } 1652 1653 if (blk || blk_inc) { 1654 if (migrate_colo()) { 1655 error_setg(errp, "No disk migration is required in COLO mode"); 1656 return false; 1657 } 1658 if (migrate_block() || migrate_block_incremental()) { 1659 error_setg(errp, "Command options are incompatible with " 1660 "current migration capabilities"); 1661 return false; 1662 } 1663 if (!migrate_cap_set(MIGRATION_CAPABILITY_BLOCK, true, &local_err)) { 1664 error_propagate(errp, local_err); 1665 return false; 1666 } 1667 s->must_remove_block_options = true; 1668 } 1669 1670 if (blk_inc) { 1671 migrate_set_block_incremental(s, true); 1672 } 1673 1674 migrate_init(s); 1675 /* 1676 * set ram_counters compression_counters memory to zero for a 1677 * new migration 1678 */ 1679 memset(&ram_counters, 0, sizeof(ram_counters)); 1680 memset(&compression_counters, 0, sizeof(compression_counters)); 1681 1682 return true; 1683 } 1684 1685 void qmp_migrate(const char *uri, bool has_blk, bool blk, 1686 bool has_inc, bool inc, bool has_detach, bool detach, 1687 bool has_resume, bool resume, Error **errp) 1688 { 1689 Error *local_err = NULL; 1690 MigrationState *s = migrate_get_current(); 1691 const char *p = NULL; 1692 1693 /* URI is not suitable for migration? */ 1694 if (!migration_channels_and_uri_compatible(uri, errp)) { 1695 return; 1696 } 1697 1698 if (!migrate_prepare(s, has_blk && blk, has_inc && inc, 1699 has_resume && resume, errp)) { 1700 /* Error detected, put into errp */ 1701 return; 1702 } 1703 1704 if (!(has_resume && resume)) { 1705 if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) { 1706 return; 1707 } 1708 } 1709 1710 if (strstart(uri, "tcp:", &p) || 1711 strstart(uri, "unix:", NULL) || 1712 strstart(uri, "vsock:", NULL)) { 1713 socket_start_outgoing_migration(s, p ? p : uri, &local_err); 1714 #ifdef CONFIG_RDMA 1715 } else if (strstart(uri, "rdma:", &p)) { 1716 rdma_start_outgoing_migration(s, p, &local_err); 1717 #endif 1718 } else if (strstart(uri, "exec:", &p)) { 1719 exec_start_outgoing_migration(s, p, &local_err); 1720 } else if (strstart(uri, "fd:", &p)) { 1721 fd_start_outgoing_migration(s, p, &local_err); 1722 } else { 1723 if (!(has_resume && resume)) { 1724 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 1725 } 1726 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri", 1727 "a valid migration protocol"); 1728 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 1729 MIGRATION_STATUS_FAILED); 1730 block_cleanup_parameters(s); 1731 return; 1732 } 1733 1734 if (local_err) { 1735 if (!(has_resume && resume)) { 1736 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 1737 } 1738 migrate_fd_error(s, local_err); 1739 error_propagate(errp, local_err); 1740 return; 1741 } 1742 } 1743 1744 void qmp_migrate_cancel(Error **errp) 1745 { 1746 migration_cancel(NULL); 1747 } 1748 1749 void qmp_migrate_continue(MigrationStatus state, Error **errp) 1750 { 1751 MigrationState *s = migrate_get_current(); 1752 if (s->state != state) { 1753 error_setg(errp, "Migration not in expected state: %s", 1754 MigrationStatus_str(s->state)); 1755 return; 1756 } 1757 qemu_sem_post(&s->pause_sem); 1758 } 1759 1760 /* migration thread support */ 1761 /* 1762 * Something bad happened to the RP stream, mark an error 1763 * The caller shall print or trace something to indicate why 1764 */ 1765 static void mark_source_rp_bad(MigrationState *s) 1766 { 1767 s->rp_state.error = true; 1768 } 1769 1770 static struct rp_cmd_args { 1771 ssize_t len; /* -1 = variable */ 1772 const char *name; 1773 } rp_cmd_args[] = { 1774 [MIG_RP_MSG_INVALID] = { .len = -1, .name = "INVALID" }, 1775 [MIG_RP_MSG_SHUT] = { .len = 4, .name = "SHUT" }, 1776 [MIG_RP_MSG_PONG] = { .len = 4, .name = "PONG" }, 1777 [MIG_RP_MSG_REQ_PAGES] = { .len = 12, .name = "REQ_PAGES" }, 1778 [MIG_RP_MSG_REQ_PAGES_ID] = { .len = -1, .name = "REQ_PAGES_ID" }, 1779 [MIG_RP_MSG_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" }, 1780 [MIG_RP_MSG_RESUME_ACK] = { .len = 4, .name = "RESUME_ACK" }, 1781 [MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" }, 1782 }; 1783 1784 /* 1785 * Process a request for pages received on the return path, 1786 * We're allowed to send more than requested (e.g. to round to our page size) 1787 * and we don't need to send pages that have already been sent. 1788 */ 1789 static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname, 1790 ram_addr_t start, size_t len) 1791 { 1792 long our_host_ps = qemu_real_host_page_size(); 1793 1794 trace_migrate_handle_rp_req_pages(rbname, start, len); 1795 1796 /* 1797 * Since we currently insist on matching page sizes, just sanity check 1798 * we're being asked for whole host pages. 1799 */ 1800 if (!QEMU_IS_ALIGNED(start, our_host_ps) || 1801 !QEMU_IS_ALIGNED(len, our_host_ps)) { 1802 error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT 1803 " len: %zd", __func__, start, len); 1804 mark_source_rp_bad(ms); 1805 return; 1806 } 1807 1808 if (ram_save_queue_pages(rbname, start, len)) { 1809 mark_source_rp_bad(ms); 1810 } 1811 } 1812 1813 /* Return true to retry, false to quit */ 1814 static bool postcopy_pause_return_path_thread(MigrationState *s) 1815 { 1816 trace_postcopy_pause_return_path(); 1817 1818 qemu_sem_wait(&s->postcopy_pause_rp_sem); 1819 1820 trace_postcopy_pause_return_path_continued(); 1821 1822 return true; 1823 } 1824 1825 static int migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name) 1826 { 1827 RAMBlock *block = qemu_ram_block_by_name(block_name); 1828 1829 if (!block) { 1830 error_report("%s: invalid block name '%s'", __func__, block_name); 1831 return -EINVAL; 1832 } 1833 1834 /* Fetch the received bitmap and refresh the dirty bitmap */ 1835 return ram_dirty_bitmap_reload(s, block); 1836 } 1837 1838 static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value) 1839 { 1840 trace_source_return_path_thread_resume_ack(value); 1841 1842 if (value != MIGRATION_RESUME_ACK_VALUE) { 1843 error_report("%s: illegal resume_ack value %"PRIu32, 1844 __func__, value); 1845 return -1; 1846 } 1847 1848 /* Now both sides are active. */ 1849 migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER, 1850 MIGRATION_STATUS_POSTCOPY_ACTIVE); 1851 1852 /* Notify send thread that time to continue send pages */ 1853 qemu_sem_post(&s->rp_state.rp_sem); 1854 1855 return 0; 1856 } 1857 1858 /* 1859 * Release ms->rp_state.from_dst_file (and postcopy_qemufile_src if 1860 * existed) in a safe way. 1861 */ 1862 static void migration_release_dst_files(MigrationState *ms) 1863 { 1864 QEMUFile *file; 1865 1866 WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) { 1867 /* 1868 * Reset the from_dst_file pointer first before releasing it, as we 1869 * can't block within lock section 1870 */ 1871 file = ms->rp_state.from_dst_file; 1872 ms->rp_state.from_dst_file = NULL; 1873 } 1874 1875 /* 1876 * Do the same to postcopy fast path socket too if there is. No 1877 * locking needed because this qemufile should only be managed by 1878 * return path thread. 1879 */ 1880 if (ms->postcopy_qemufile_src) { 1881 migration_ioc_unregister_yank_from_file(ms->postcopy_qemufile_src); 1882 qemu_file_shutdown(ms->postcopy_qemufile_src); 1883 qemu_fclose(ms->postcopy_qemufile_src); 1884 ms->postcopy_qemufile_src = NULL; 1885 } 1886 1887 qemu_fclose(file); 1888 } 1889 1890 /* 1891 * Handles messages sent on the return path towards the source VM 1892 * 1893 */ 1894 static void *source_return_path_thread(void *opaque) 1895 { 1896 MigrationState *ms = opaque; 1897 QEMUFile *rp = ms->rp_state.from_dst_file; 1898 uint16_t header_len, header_type; 1899 uint8_t buf[512]; 1900 uint32_t tmp32, sibling_error; 1901 ram_addr_t start = 0; /* =0 to silence warning */ 1902 size_t len = 0, expected_len; 1903 int res; 1904 1905 trace_source_return_path_thread_entry(); 1906 rcu_register_thread(); 1907 1908 retry: 1909 while (!ms->rp_state.error && !qemu_file_get_error(rp) && 1910 migration_is_setup_or_active(ms->state)) { 1911 trace_source_return_path_thread_loop_top(); 1912 header_type = qemu_get_be16(rp); 1913 header_len = qemu_get_be16(rp); 1914 1915 if (qemu_file_get_error(rp)) { 1916 mark_source_rp_bad(ms); 1917 goto out; 1918 } 1919 1920 if (header_type >= MIG_RP_MSG_MAX || 1921 header_type == MIG_RP_MSG_INVALID) { 1922 error_report("RP: Received invalid message 0x%04x length 0x%04x", 1923 header_type, header_len); 1924 mark_source_rp_bad(ms); 1925 goto out; 1926 } 1927 1928 if ((rp_cmd_args[header_type].len != -1 && 1929 header_len != rp_cmd_args[header_type].len) || 1930 header_len > sizeof(buf)) { 1931 error_report("RP: Received '%s' message (0x%04x) with" 1932 "incorrect length %d expecting %zu", 1933 rp_cmd_args[header_type].name, header_type, header_len, 1934 (size_t)rp_cmd_args[header_type].len); 1935 mark_source_rp_bad(ms); 1936 goto out; 1937 } 1938 1939 /* We know we've got a valid header by this point */ 1940 res = qemu_get_buffer(rp, buf, header_len); 1941 if (res != header_len) { 1942 error_report("RP: Failed reading data for message 0x%04x" 1943 " read %d expected %d", 1944 header_type, res, header_len); 1945 mark_source_rp_bad(ms); 1946 goto out; 1947 } 1948 1949 /* OK, we have the message and the data */ 1950 switch (header_type) { 1951 case MIG_RP_MSG_SHUT: 1952 sibling_error = ldl_be_p(buf); 1953 trace_source_return_path_thread_shut(sibling_error); 1954 if (sibling_error) { 1955 error_report("RP: Sibling indicated error %d", sibling_error); 1956 mark_source_rp_bad(ms); 1957 } 1958 /* 1959 * We'll let the main thread deal with closing the RP 1960 * we could do a shutdown(2) on it, but we're the only user 1961 * anyway, so there's nothing gained. 1962 */ 1963 goto out; 1964 1965 case MIG_RP_MSG_PONG: 1966 tmp32 = ldl_be_p(buf); 1967 trace_source_return_path_thread_pong(tmp32); 1968 qemu_sem_post(&ms->rp_state.rp_pong_acks); 1969 break; 1970 1971 case MIG_RP_MSG_REQ_PAGES: 1972 start = ldq_be_p(buf); 1973 len = ldl_be_p(buf + 8); 1974 migrate_handle_rp_req_pages(ms, NULL, start, len); 1975 break; 1976 1977 case MIG_RP_MSG_REQ_PAGES_ID: 1978 expected_len = 12 + 1; /* header + termination */ 1979 1980 if (header_len >= expected_len) { 1981 start = ldq_be_p(buf); 1982 len = ldl_be_p(buf + 8); 1983 /* Now we expect an idstr */ 1984 tmp32 = buf[12]; /* Length of the following idstr */ 1985 buf[13 + tmp32] = '\0'; 1986 expected_len += tmp32; 1987 } 1988 if (header_len != expected_len) { 1989 error_report("RP: Req_Page_id with length %d expecting %zd", 1990 header_len, expected_len); 1991 mark_source_rp_bad(ms); 1992 goto out; 1993 } 1994 migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len); 1995 break; 1996 1997 case MIG_RP_MSG_RECV_BITMAP: 1998 if (header_len < 1) { 1999 error_report("%s: missing block name", __func__); 2000 mark_source_rp_bad(ms); 2001 goto out; 2002 } 2003 /* Format: len (1B) + idstr (<255B). This ends the idstr. */ 2004 buf[buf[0] + 1] = '\0'; 2005 if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1))) { 2006 mark_source_rp_bad(ms); 2007 goto out; 2008 } 2009 break; 2010 2011 case MIG_RP_MSG_RESUME_ACK: 2012 tmp32 = ldl_be_p(buf); 2013 if (migrate_handle_rp_resume_ack(ms, tmp32)) { 2014 mark_source_rp_bad(ms); 2015 goto out; 2016 } 2017 break; 2018 2019 default: 2020 break; 2021 } 2022 } 2023 2024 out: 2025 res = qemu_file_get_error(rp); 2026 if (res) { 2027 if (res && migration_in_postcopy()) { 2028 /* 2029 * Maybe there is something we can do: it looks like a 2030 * network down issue, and we pause for a recovery. 2031 */ 2032 migration_release_dst_files(ms); 2033 rp = NULL; 2034 if (postcopy_pause_return_path_thread(ms)) { 2035 /* 2036 * Reload rp, reset the rest. Referencing it is safe since 2037 * it's reset only by us above, or when migration completes 2038 */ 2039 rp = ms->rp_state.from_dst_file; 2040 ms->rp_state.error = false; 2041 goto retry; 2042 } 2043 } 2044 2045 trace_source_return_path_thread_bad_end(); 2046 mark_source_rp_bad(ms); 2047 } 2048 2049 trace_source_return_path_thread_end(); 2050 migration_release_dst_files(ms); 2051 rcu_unregister_thread(); 2052 return NULL; 2053 } 2054 2055 static int open_return_path_on_source(MigrationState *ms, 2056 bool create_thread) 2057 { 2058 ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file); 2059 if (!ms->rp_state.from_dst_file) { 2060 return -1; 2061 } 2062 2063 trace_open_return_path_on_source(); 2064 2065 if (!create_thread) { 2066 /* We're done */ 2067 return 0; 2068 } 2069 2070 qemu_thread_create(&ms->rp_state.rp_thread, "return path", 2071 source_return_path_thread, ms, QEMU_THREAD_JOINABLE); 2072 ms->rp_state.rp_thread_created = true; 2073 2074 trace_open_return_path_on_source_continue(); 2075 2076 return 0; 2077 } 2078 2079 /* Returns 0 if the RP was ok, otherwise there was an error on the RP */ 2080 static int await_return_path_close_on_source(MigrationState *ms) 2081 { 2082 /* 2083 * If this is a normal exit then the destination will send a SHUT and the 2084 * rp_thread will exit, however if there's an error we need to cause 2085 * it to exit. 2086 */ 2087 if (qemu_file_get_error(ms->to_dst_file) && ms->rp_state.from_dst_file) { 2088 /* 2089 * shutdown(2), if we have it, will cause it to unblock if it's stuck 2090 * waiting for the destination. 2091 */ 2092 qemu_file_shutdown(ms->rp_state.from_dst_file); 2093 mark_source_rp_bad(ms); 2094 } 2095 trace_await_return_path_close_on_source_joining(); 2096 qemu_thread_join(&ms->rp_state.rp_thread); 2097 ms->rp_state.rp_thread_created = false; 2098 trace_await_return_path_close_on_source_close(); 2099 return ms->rp_state.error; 2100 } 2101 2102 static inline void 2103 migration_wait_main_channel(MigrationState *ms) 2104 { 2105 /* Wait until one PONG message received */ 2106 qemu_sem_wait(&ms->rp_state.rp_pong_acks); 2107 } 2108 2109 /* 2110 * Switch from normal iteration to postcopy 2111 * Returns non-0 on error 2112 */ 2113 static int postcopy_start(MigrationState *ms) 2114 { 2115 int ret; 2116 QIOChannelBuffer *bioc; 2117 QEMUFile *fb; 2118 int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 2119 int64_t bandwidth = migrate_max_postcopy_bandwidth(); 2120 bool restart_block = false; 2121 int cur_state = MIGRATION_STATUS_ACTIVE; 2122 2123 if (migrate_postcopy_preempt()) { 2124 migration_wait_main_channel(ms); 2125 if (postcopy_preempt_establish_channel(ms)) { 2126 migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILED); 2127 return -1; 2128 } 2129 } 2130 2131 if (!migrate_pause_before_switchover()) { 2132 migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE, 2133 MIGRATION_STATUS_POSTCOPY_ACTIVE); 2134 } 2135 2136 trace_postcopy_start(); 2137 qemu_mutex_lock_iothread(); 2138 trace_postcopy_start_set_run(); 2139 2140 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); 2141 global_state_store(); 2142 ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); 2143 if (ret < 0) { 2144 goto fail; 2145 } 2146 2147 ret = migration_maybe_pause(ms, &cur_state, 2148 MIGRATION_STATUS_POSTCOPY_ACTIVE); 2149 if (ret < 0) { 2150 goto fail; 2151 } 2152 2153 ret = bdrv_inactivate_all(); 2154 if (ret < 0) { 2155 goto fail; 2156 } 2157 restart_block = true; 2158 2159 /* 2160 * Cause any non-postcopiable, but iterative devices to 2161 * send out their final data. 2162 */ 2163 qemu_savevm_state_complete_precopy(ms->to_dst_file, true, false); 2164 2165 /* 2166 * in Finish migrate and with the io-lock held everything should 2167 * be quiet, but we've potentially still got dirty pages and we 2168 * need to tell the destination to throw any pages it's already received 2169 * that are dirty 2170 */ 2171 if (migrate_postcopy_ram()) { 2172 ram_postcopy_send_discard_bitmap(ms); 2173 } 2174 2175 /* 2176 * send rest of state - note things that are doing postcopy 2177 * will notice we're in POSTCOPY_ACTIVE and not actually 2178 * wrap their state up here 2179 */ 2180 /* 0 max-postcopy-bandwidth means unlimited */ 2181 if (!bandwidth) { 2182 qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX); 2183 } else { 2184 qemu_file_set_rate_limit(ms->to_dst_file, bandwidth / XFER_LIMIT_RATIO); 2185 } 2186 if (migrate_postcopy_ram()) { 2187 /* Ping just for debugging, helps line traces up */ 2188 qemu_savevm_send_ping(ms->to_dst_file, 2); 2189 } 2190 2191 /* 2192 * While loading the device state we may trigger page transfer 2193 * requests and the fd must be free to process those, and thus 2194 * the destination must read the whole device state off the fd before 2195 * it starts processing it. Unfortunately the ad-hoc migration format 2196 * doesn't allow the destination to know the size to read without fully 2197 * parsing it through each devices load-state code (especially the open 2198 * coded devices that use get/put). 2199 * So we wrap the device state up in a package with a length at the start; 2200 * to do this we use a qemu_buf to hold the whole of the device state. 2201 */ 2202 bioc = qio_channel_buffer_new(4096); 2203 qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer"); 2204 fb = qemu_file_new_output(QIO_CHANNEL(bioc)); 2205 object_unref(OBJECT(bioc)); 2206 2207 /* 2208 * Make sure the receiver can get incoming pages before we send the rest 2209 * of the state 2210 */ 2211 qemu_savevm_send_postcopy_listen(fb); 2212 2213 qemu_savevm_state_complete_precopy(fb, false, false); 2214 if (migrate_postcopy_ram()) { 2215 qemu_savevm_send_ping(fb, 3); 2216 } 2217 2218 qemu_savevm_send_postcopy_run(fb); 2219 2220 /* <><> end of stuff going into the package */ 2221 2222 /* Last point of recovery; as soon as we send the package the destination 2223 * can open devices and potentially start running. 2224 * Lets just check again we've not got any errors. 2225 */ 2226 ret = qemu_file_get_error(ms->to_dst_file); 2227 if (ret) { 2228 error_report("postcopy_start: Migration stream errored (pre package)"); 2229 goto fail_closefb; 2230 } 2231 2232 restart_block = false; 2233 2234 /* Now send that blob */ 2235 if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) { 2236 goto fail_closefb; 2237 } 2238 qemu_fclose(fb); 2239 2240 /* Send a notify to give a chance for anything that needs to happen 2241 * at the transition to postcopy and after the device state; in particular 2242 * spice needs to trigger a transition now 2243 */ 2244 ms->postcopy_after_devices = true; 2245 notifier_list_notify(&migration_state_notifiers, ms); 2246 2247 ms->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop; 2248 2249 qemu_mutex_unlock_iothread(); 2250 2251 if (migrate_postcopy_ram()) { 2252 /* 2253 * Although this ping is just for debug, it could potentially be 2254 * used for getting a better measurement of downtime at the source. 2255 */ 2256 qemu_savevm_send_ping(ms->to_dst_file, 4); 2257 } 2258 2259 if (migrate_release_ram()) { 2260 ram_postcopy_migrated_memory_release(ms); 2261 } 2262 2263 ret = qemu_file_get_error(ms->to_dst_file); 2264 if (ret) { 2265 error_report("postcopy_start: Migration stream errored"); 2266 migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, 2267 MIGRATION_STATUS_FAILED); 2268 } 2269 2270 trace_postcopy_preempt_enabled(migrate_postcopy_preempt()); 2271 2272 return ret; 2273 2274 fail_closefb: 2275 qemu_fclose(fb); 2276 fail: 2277 migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, 2278 MIGRATION_STATUS_FAILED); 2279 if (restart_block) { 2280 /* A failure happened early enough that we know the destination hasn't 2281 * accessed block devices, so we're safe to recover. 2282 */ 2283 Error *local_err = NULL; 2284 2285 bdrv_activate_all(&local_err); 2286 if (local_err) { 2287 error_report_err(local_err); 2288 } 2289 } 2290 qemu_mutex_unlock_iothread(); 2291 return -1; 2292 } 2293 2294 /** 2295 * migration_maybe_pause: Pause if required to by 2296 * migrate_pause_before_switchover called with the iothread locked 2297 * Returns: 0 on success 2298 */ 2299 static int migration_maybe_pause(MigrationState *s, 2300 int *current_active_state, 2301 int new_state) 2302 { 2303 if (!migrate_pause_before_switchover()) { 2304 return 0; 2305 } 2306 2307 /* Since leaving this state is not atomic with posting the semaphore 2308 * it's possible that someone could have issued multiple migrate_continue 2309 * and the semaphore is incorrectly positive at this point; 2310 * the docs say it's undefined to reinit a semaphore that's already 2311 * init'd, so use timedwait to eat up any existing posts. 2312 */ 2313 while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) { 2314 /* This block intentionally left blank */ 2315 } 2316 2317 /* 2318 * If the migration is cancelled when it is in the completion phase, 2319 * the migration state is set to MIGRATION_STATUS_CANCELLING. 2320 * So we don't need to wait a semaphore, otherwise we would always 2321 * wait for the 'pause_sem' semaphore. 2322 */ 2323 if (s->state != MIGRATION_STATUS_CANCELLING) { 2324 qemu_mutex_unlock_iothread(); 2325 migrate_set_state(&s->state, *current_active_state, 2326 MIGRATION_STATUS_PRE_SWITCHOVER); 2327 qemu_sem_wait(&s->pause_sem); 2328 migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, 2329 new_state); 2330 *current_active_state = new_state; 2331 qemu_mutex_lock_iothread(); 2332 } 2333 2334 return s->state == new_state ? 0 : -EINVAL; 2335 } 2336 2337 /** 2338 * migration_completion: Used by migration_thread when there's not much left. 2339 * The caller 'breaks' the loop when this returns. 2340 * 2341 * @s: Current migration state 2342 */ 2343 static void migration_completion(MigrationState *s) 2344 { 2345 int ret; 2346 int current_active_state = s->state; 2347 2348 if (s->state == MIGRATION_STATUS_ACTIVE) { 2349 qemu_mutex_lock_iothread(); 2350 s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 2351 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); 2352 s->vm_was_running = runstate_is_running(); 2353 ret = global_state_store(); 2354 2355 if (!ret) { 2356 ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); 2357 trace_migration_completion_vm_stop(ret); 2358 if (ret >= 0) { 2359 ret = migration_maybe_pause(s, ¤t_active_state, 2360 MIGRATION_STATUS_DEVICE); 2361 } 2362 if (ret >= 0) { 2363 s->block_inactive = !migrate_colo(); 2364 qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); 2365 ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, 2366 s->block_inactive); 2367 } 2368 } 2369 qemu_mutex_unlock_iothread(); 2370 2371 if (ret < 0) { 2372 goto fail; 2373 } 2374 } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 2375 trace_migration_completion_postcopy_end(); 2376 2377 qemu_mutex_lock_iothread(); 2378 qemu_savevm_state_complete_postcopy(s->to_dst_file); 2379 qemu_mutex_unlock_iothread(); 2380 2381 /* 2382 * Shutdown the postcopy fast path thread. This is only needed 2383 * when dest QEMU binary is old (7.1/7.2). QEMU 8.0+ doesn't need 2384 * this. 2385 */ 2386 if (migrate_postcopy_preempt() && s->preempt_pre_7_2) { 2387 postcopy_preempt_shutdown_file(s); 2388 } 2389 2390 trace_migration_completion_postcopy_end_after_complete(); 2391 } else { 2392 goto fail; 2393 } 2394 2395 /* 2396 * If rp was opened we must clean up the thread before 2397 * cleaning everything else up (since if there are no failures 2398 * it will wait for the destination to send it's status in 2399 * a SHUT command). 2400 */ 2401 if (s->rp_state.rp_thread_created) { 2402 int rp_error; 2403 trace_migration_return_path_end_before(); 2404 rp_error = await_return_path_close_on_source(s); 2405 trace_migration_return_path_end_after(rp_error); 2406 if (rp_error) { 2407 goto fail_invalidate; 2408 } 2409 } 2410 2411 if (qemu_file_get_error(s->to_dst_file)) { 2412 trace_migration_completion_file_err(); 2413 goto fail_invalidate; 2414 } 2415 2416 if (migrate_colo() && s->state == MIGRATION_STATUS_ACTIVE) { 2417 /* COLO does not support postcopy */ 2418 migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, 2419 MIGRATION_STATUS_COLO); 2420 } else { 2421 migrate_set_state(&s->state, current_active_state, 2422 MIGRATION_STATUS_COMPLETED); 2423 } 2424 2425 return; 2426 2427 fail_invalidate: 2428 /* If not doing postcopy, vm_start() will be called: let's regain 2429 * control on images. 2430 */ 2431 if (s->state == MIGRATION_STATUS_ACTIVE || 2432 s->state == MIGRATION_STATUS_DEVICE) { 2433 Error *local_err = NULL; 2434 2435 qemu_mutex_lock_iothread(); 2436 bdrv_activate_all(&local_err); 2437 if (local_err) { 2438 error_report_err(local_err); 2439 s->block_inactive = true; 2440 } else { 2441 s->block_inactive = false; 2442 } 2443 qemu_mutex_unlock_iothread(); 2444 } 2445 2446 fail: 2447 migrate_set_state(&s->state, current_active_state, 2448 MIGRATION_STATUS_FAILED); 2449 } 2450 2451 /** 2452 * bg_migration_completion: Used by bg_migration_thread when after all the 2453 * RAM has been saved. The caller 'breaks' the loop when this returns. 2454 * 2455 * @s: Current migration state 2456 */ 2457 static void bg_migration_completion(MigrationState *s) 2458 { 2459 int current_active_state = s->state; 2460 2461 /* 2462 * Stop tracking RAM writes - un-protect memory, un-register UFFD 2463 * memory ranges, flush kernel wait queues and wake up threads 2464 * waiting for write fault to be resolved. 2465 */ 2466 ram_write_tracking_stop(); 2467 2468 if (s->state == MIGRATION_STATUS_ACTIVE) { 2469 /* 2470 * By this moment we have RAM content saved into the migration stream. 2471 * The next step is to flush the non-RAM content (device state) 2472 * right after the ram content. The device state has been stored into 2473 * the temporary buffer before RAM saving started. 2474 */ 2475 qemu_put_buffer(s->to_dst_file, s->bioc->data, s->bioc->usage); 2476 qemu_fflush(s->to_dst_file); 2477 } else if (s->state == MIGRATION_STATUS_CANCELLING) { 2478 goto fail; 2479 } 2480 2481 if (qemu_file_get_error(s->to_dst_file)) { 2482 trace_migration_completion_file_err(); 2483 goto fail; 2484 } 2485 2486 migrate_set_state(&s->state, current_active_state, 2487 MIGRATION_STATUS_COMPLETED); 2488 return; 2489 2490 fail: 2491 migrate_set_state(&s->state, current_active_state, 2492 MIGRATION_STATUS_FAILED); 2493 } 2494 2495 typedef enum MigThrError { 2496 /* No error detected */ 2497 MIG_THR_ERR_NONE = 0, 2498 /* Detected error, but resumed successfully */ 2499 MIG_THR_ERR_RECOVERED = 1, 2500 /* Detected fatal error, need to exit */ 2501 MIG_THR_ERR_FATAL = 2, 2502 } MigThrError; 2503 2504 static int postcopy_resume_handshake(MigrationState *s) 2505 { 2506 qemu_savevm_send_postcopy_resume(s->to_dst_file); 2507 2508 while (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { 2509 qemu_sem_wait(&s->rp_state.rp_sem); 2510 } 2511 2512 if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 2513 return 0; 2514 } 2515 2516 return -1; 2517 } 2518 2519 /* Return zero if success, or <0 for error */ 2520 static int postcopy_do_resume(MigrationState *s) 2521 { 2522 int ret; 2523 2524 /* 2525 * Call all the resume_prepare() hooks, so that modules can be 2526 * ready for the migration resume. 2527 */ 2528 ret = qemu_savevm_state_resume_prepare(s); 2529 if (ret) { 2530 error_report("%s: resume_prepare() failure detected: %d", 2531 __func__, ret); 2532 return ret; 2533 } 2534 2535 /* 2536 * If preempt is enabled, re-establish the preempt channel. Note that 2537 * we do it after resume prepare to make sure the main channel will be 2538 * created before the preempt channel. E.g. with weak network, the 2539 * dest QEMU may get messed up with the preempt and main channels on 2540 * the order of connection setup. This guarantees the correct order. 2541 */ 2542 ret = postcopy_preempt_establish_channel(s); 2543 if (ret) { 2544 error_report("%s: postcopy_preempt_establish_channel(): %d", 2545 __func__, ret); 2546 return ret; 2547 } 2548 2549 /* 2550 * Last handshake with destination on the resume (destination will 2551 * switch to postcopy-active afterwards) 2552 */ 2553 ret = postcopy_resume_handshake(s); 2554 if (ret) { 2555 error_report("%s: handshake failed: %d", __func__, ret); 2556 return ret; 2557 } 2558 2559 return 0; 2560 } 2561 2562 /* 2563 * We don't return until we are in a safe state to continue current 2564 * postcopy migration. Returns MIG_THR_ERR_RECOVERED if recovered, or 2565 * MIG_THR_ERR_FATAL if unrecovery failure happened. 2566 */ 2567 static MigThrError postcopy_pause(MigrationState *s) 2568 { 2569 assert(s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); 2570 2571 while (true) { 2572 QEMUFile *file; 2573 2574 /* 2575 * Current channel is possibly broken. Release it. Note that this is 2576 * guaranteed even without lock because to_dst_file should only be 2577 * modified by the migration thread. That also guarantees that the 2578 * unregister of yank is safe too without the lock. It should be safe 2579 * even to be within the qemu_file_lock, but we didn't do that to avoid 2580 * taking more mutex (yank_lock) within qemu_file_lock. TL;DR: we make 2581 * the qemu_file_lock critical section as small as possible. 2582 */ 2583 assert(s->to_dst_file); 2584 migration_ioc_unregister_yank_from_file(s->to_dst_file); 2585 qemu_mutex_lock(&s->qemu_file_lock); 2586 file = s->to_dst_file; 2587 s->to_dst_file = NULL; 2588 qemu_mutex_unlock(&s->qemu_file_lock); 2589 2590 qemu_file_shutdown(file); 2591 qemu_fclose(file); 2592 2593 migrate_set_state(&s->state, s->state, 2594 MIGRATION_STATUS_POSTCOPY_PAUSED); 2595 2596 error_report("Detected IO failure for postcopy. " 2597 "Migration paused."); 2598 2599 /* 2600 * We wait until things fixed up. Then someone will setup the 2601 * status back for us. 2602 */ 2603 while (s->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { 2604 qemu_sem_wait(&s->postcopy_pause_sem); 2605 } 2606 2607 if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { 2608 /* Woken up by a recover procedure. Give it a shot */ 2609 2610 /* 2611 * Firstly, let's wake up the return path now, with a new 2612 * return path channel. 2613 */ 2614 qemu_sem_post(&s->postcopy_pause_rp_sem); 2615 2616 /* Do the resume logic */ 2617 if (postcopy_do_resume(s) == 0) { 2618 /* Let's continue! */ 2619 trace_postcopy_pause_continued(); 2620 return MIG_THR_ERR_RECOVERED; 2621 } else { 2622 /* 2623 * Something wrong happened during the recovery, let's 2624 * pause again. Pause is always better than throwing 2625 * data away. 2626 */ 2627 continue; 2628 } 2629 } else { 2630 /* This is not right... Time to quit. */ 2631 return MIG_THR_ERR_FATAL; 2632 } 2633 } 2634 } 2635 2636 static MigThrError migration_detect_error(MigrationState *s) 2637 { 2638 int ret; 2639 int state = s->state; 2640 Error *local_error = NULL; 2641 2642 if (state == MIGRATION_STATUS_CANCELLING || 2643 state == MIGRATION_STATUS_CANCELLED) { 2644 /* End the migration, but don't set the state to failed */ 2645 return MIG_THR_ERR_FATAL; 2646 } 2647 2648 /* 2649 * Try to detect any file errors. Note that postcopy_qemufile_src will 2650 * be NULL when postcopy preempt is not enabled. 2651 */ 2652 ret = qemu_file_get_error_obj_any(s->to_dst_file, 2653 s->postcopy_qemufile_src, 2654 &local_error); 2655 if (!ret) { 2656 /* Everything is fine */ 2657 assert(!local_error); 2658 return MIG_THR_ERR_NONE; 2659 } 2660 2661 if (local_error) { 2662 migrate_set_error(s, local_error); 2663 error_free(local_error); 2664 } 2665 2666 if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret) { 2667 /* 2668 * For postcopy, we allow the network to be down for a 2669 * while. After that, it can be continued by a 2670 * recovery phase. 2671 */ 2672 return postcopy_pause(s); 2673 } else { 2674 /* 2675 * For precopy (or postcopy with error outside IO), we fail 2676 * with no time. 2677 */ 2678 migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED); 2679 trace_migration_thread_file_err(); 2680 2681 /* Time to stop the migration, now. */ 2682 return MIG_THR_ERR_FATAL; 2683 } 2684 } 2685 2686 /* How many bytes have we transferred since the beginning of the migration */ 2687 static uint64_t migration_total_bytes(MigrationState *s) 2688 { 2689 return qemu_file_total_transferred(s->to_dst_file) + 2690 stat64_get(&ram_counters.multifd_bytes); 2691 } 2692 2693 static void migration_calculate_complete(MigrationState *s) 2694 { 2695 uint64_t bytes = migration_total_bytes(s); 2696 int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 2697 int64_t transfer_time; 2698 2699 s->total_time = end_time - s->start_time; 2700 if (!s->downtime) { 2701 /* 2702 * It's still not set, so we are precopy migration. For 2703 * postcopy, downtime is calculated during postcopy_start(). 2704 */ 2705 s->downtime = end_time - s->downtime_start; 2706 } 2707 2708 transfer_time = s->total_time - s->setup_time; 2709 if (transfer_time) { 2710 s->mbps = ((double) bytes * 8.0) / transfer_time / 1000; 2711 } 2712 } 2713 2714 static void update_iteration_initial_status(MigrationState *s) 2715 { 2716 /* 2717 * Update these three fields at the same time to avoid mismatch info lead 2718 * wrong speed calculation. 2719 */ 2720 s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 2721 s->iteration_initial_bytes = migration_total_bytes(s); 2722 s->iteration_initial_pages = ram_get_total_transferred_pages(); 2723 } 2724 2725 static void migration_update_counters(MigrationState *s, 2726 int64_t current_time) 2727 { 2728 uint64_t transferred, transferred_pages, time_spent; 2729 uint64_t current_bytes; /* bytes transferred since the beginning */ 2730 double bandwidth; 2731 2732 if (current_time < s->iteration_start_time + BUFFER_DELAY) { 2733 return; 2734 } 2735 2736 current_bytes = migration_total_bytes(s); 2737 transferred = current_bytes - s->iteration_initial_bytes; 2738 time_spent = current_time - s->iteration_start_time; 2739 bandwidth = (double)transferred / time_spent; 2740 s->threshold_size = bandwidth * s->parameters.downtime_limit; 2741 2742 s->mbps = (((double) transferred * 8.0) / 2743 ((double) time_spent / 1000.0)) / 1000.0 / 1000.0; 2744 2745 transferred_pages = ram_get_total_transferred_pages() - 2746 s->iteration_initial_pages; 2747 s->pages_per_second = (double) transferred_pages / 2748 (((double) time_spent / 1000.0)); 2749 2750 /* 2751 * if we haven't sent anything, we don't want to 2752 * recalculate. 10000 is a small enough number for our purposes 2753 */ 2754 if (ram_counters.dirty_pages_rate && transferred > 10000) { 2755 s->expected_downtime = ram_counters.remaining / bandwidth; 2756 } 2757 2758 qemu_file_reset_rate_limit(s->to_dst_file); 2759 2760 update_iteration_initial_status(s); 2761 2762 trace_migrate_transferred(transferred, time_spent, 2763 bandwidth, s->threshold_size); 2764 } 2765 2766 /* Migration thread iteration status */ 2767 typedef enum { 2768 MIG_ITERATE_RESUME, /* Resume current iteration */ 2769 MIG_ITERATE_SKIP, /* Skip current iteration */ 2770 MIG_ITERATE_BREAK, /* Break the loop */ 2771 } MigIterateState; 2772 2773 /* 2774 * Return true if continue to the next iteration directly, false 2775 * otherwise. 2776 */ 2777 static MigIterateState migration_iteration_run(MigrationState *s) 2778 { 2779 uint64_t must_precopy, can_postcopy; 2780 bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE; 2781 2782 qemu_savevm_state_pending_estimate(&must_precopy, &can_postcopy); 2783 uint64_t pending_size = must_precopy + can_postcopy; 2784 2785 trace_migrate_pending_estimate(pending_size, must_precopy, can_postcopy); 2786 2787 if (must_precopy <= s->threshold_size) { 2788 qemu_savevm_state_pending_exact(&must_precopy, &can_postcopy); 2789 pending_size = must_precopy + can_postcopy; 2790 trace_migrate_pending_exact(pending_size, must_precopy, can_postcopy); 2791 } 2792 2793 if (!pending_size || pending_size < s->threshold_size) { 2794 trace_migration_thread_low_pending(pending_size); 2795 migration_completion(s); 2796 return MIG_ITERATE_BREAK; 2797 } 2798 2799 /* Still a significant amount to transfer */ 2800 if (!in_postcopy && must_precopy <= s->threshold_size && 2801 qatomic_read(&s->start_postcopy)) { 2802 if (postcopy_start(s)) { 2803 error_report("%s: postcopy failed to start", __func__); 2804 } 2805 return MIG_ITERATE_SKIP; 2806 } 2807 2808 /* Just another iteration step */ 2809 qemu_savevm_state_iterate(s->to_dst_file, in_postcopy); 2810 return MIG_ITERATE_RESUME; 2811 } 2812 2813 static void migration_iteration_finish(MigrationState *s) 2814 { 2815 /* If we enabled cpu throttling for auto-converge, turn it off. */ 2816 cpu_throttle_stop(); 2817 2818 qemu_mutex_lock_iothread(); 2819 switch (s->state) { 2820 case MIGRATION_STATUS_COMPLETED: 2821 migration_calculate_complete(s); 2822 runstate_set(RUN_STATE_POSTMIGRATE); 2823 break; 2824 case MIGRATION_STATUS_COLO: 2825 if (!migrate_colo()) { 2826 error_report("%s: critical error: calling COLO code without " 2827 "COLO enabled", __func__); 2828 } 2829 migrate_start_colo_process(s); 2830 s->vm_was_running = true; 2831 /* Fallthrough */ 2832 case MIGRATION_STATUS_FAILED: 2833 case MIGRATION_STATUS_CANCELLED: 2834 case MIGRATION_STATUS_CANCELLING: 2835 if (s->vm_was_running) { 2836 if (!runstate_check(RUN_STATE_SHUTDOWN)) { 2837 vm_start(); 2838 } 2839 } else { 2840 if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { 2841 runstate_set(RUN_STATE_POSTMIGRATE); 2842 } 2843 } 2844 break; 2845 2846 default: 2847 /* Should not reach here, but if so, forgive the VM. */ 2848 error_report("%s: Unknown ending state %d", __func__, s->state); 2849 break; 2850 } 2851 migrate_fd_cleanup_schedule(s); 2852 qemu_mutex_unlock_iothread(); 2853 } 2854 2855 static void bg_migration_iteration_finish(MigrationState *s) 2856 { 2857 qemu_mutex_lock_iothread(); 2858 switch (s->state) { 2859 case MIGRATION_STATUS_COMPLETED: 2860 migration_calculate_complete(s); 2861 break; 2862 2863 case MIGRATION_STATUS_ACTIVE: 2864 case MIGRATION_STATUS_FAILED: 2865 case MIGRATION_STATUS_CANCELLED: 2866 case MIGRATION_STATUS_CANCELLING: 2867 break; 2868 2869 default: 2870 /* Should not reach here, but if so, forgive the VM. */ 2871 error_report("%s: Unknown ending state %d", __func__, s->state); 2872 break; 2873 } 2874 2875 migrate_fd_cleanup_schedule(s); 2876 qemu_mutex_unlock_iothread(); 2877 } 2878 2879 /* 2880 * Return true if continue to the next iteration directly, false 2881 * otherwise. 2882 */ 2883 static MigIterateState bg_migration_iteration_run(MigrationState *s) 2884 { 2885 int res; 2886 2887 res = qemu_savevm_state_iterate(s->to_dst_file, false); 2888 if (res > 0) { 2889 bg_migration_completion(s); 2890 return MIG_ITERATE_BREAK; 2891 } 2892 2893 return MIG_ITERATE_RESUME; 2894 } 2895 2896 void migration_make_urgent_request(void) 2897 { 2898 qemu_sem_post(&migrate_get_current()->rate_limit_sem); 2899 } 2900 2901 void migration_consume_urgent_request(void) 2902 { 2903 qemu_sem_wait(&migrate_get_current()->rate_limit_sem); 2904 } 2905 2906 /* Returns true if the rate limiting was broken by an urgent request */ 2907 bool migration_rate_limit(void) 2908 { 2909 int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 2910 MigrationState *s = migrate_get_current(); 2911 2912 bool urgent = false; 2913 migration_update_counters(s, now); 2914 if (qemu_file_rate_limit(s->to_dst_file)) { 2915 2916 if (qemu_file_get_error(s->to_dst_file)) { 2917 return false; 2918 } 2919 /* 2920 * Wait for a delay to do rate limiting OR 2921 * something urgent to post the semaphore. 2922 */ 2923 int ms = s->iteration_start_time + BUFFER_DELAY - now; 2924 trace_migration_rate_limit_pre(ms); 2925 if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { 2926 /* 2927 * We were woken by one or more urgent things but 2928 * the timedwait will have consumed one of them. 2929 * The service routine for the urgent wake will dec 2930 * the semaphore itself for each item it consumes, 2931 * so add this one we just eat back. 2932 */ 2933 qemu_sem_post(&s->rate_limit_sem); 2934 urgent = true; 2935 } 2936 trace_migration_rate_limit_post(urgent); 2937 } 2938 return urgent; 2939 } 2940 2941 /* 2942 * if failover devices are present, wait they are completely 2943 * unplugged 2944 */ 2945 2946 static void qemu_savevm_wait_unplug(MigrationState *s, int old_state, 2947 int new_state) 2948 { 2949 if (qemu_savevm_state_guest_unplug_pending()) { 2950 migrate_set_state(&s->state, old_state, MIGRATION_STATUS_WAIT_UNPLUG); 2951 2952 while (s->state == MIGRATION_STATUS_WAIT_UNPLUG && 2953 qemu_savevm_state_guest_unplug_pending()) { 2954 qemu_sem_timedwait(&s->wait_unplug_sem, 250); 2955 } 2956 if (s->state != MIGRATION_STATUS_WAIT_UNPLUG) { 2957 int timeout = 120; /* 30 seconds */ 2958 /* 2959 * migration has been canceled 2960 * but as we have started an unplug we must wait the end 2961 * to be able to plug back the card 2962 */ 2963 while (timeout-- && qemu_savevm_state_guest_unplug_pending()) { 2964 qemu_sem_timedwait(&s->wait_unplug_sem, 250); 2965 } 2966 if (qemu_savevm_state_guest_unplug_pending() && 2967 !qtest_enabled()) { 2968 warn_report("migration: partially unplugged device on " 2969 "failure"); 2970 } 2971 } 2972 2973 migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, new_state); 2974 } else { 2975 migrate_set_state(&s->state, old_state, new_state); 2976 } 2977 } 2978 2979 /* 2980 * Master migration thread on the source VM. 2981 * It drives the migration and pumps the data down the outgoing channel. 2982 */ 2983 static void *migration_thread(void *opaque) 2984 { 2985 MigrationState *s = opaque; 2986 MigrationThread *thread = NULL; 2987 int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST); 2988 MigThrError thr_error; 2989 bool urgent = false; 2990 2991 thread = MigrationThreadAdd("live_migration", qemu_get_thread_id()); 2992 2993 rcu_register_thread(); 2994 2995 object_ref(OBJECT(s)); 2996 update_iteration_initial_status(s); 2997 2998 qemu_savevm_state_header(s->to_dst_file); 2999 3000 /* 3001 * If we opened the return path, we need to make sure dst has it 3002 * opened as well. 3003 */ 3004 if (s->rp_state.rp_thread_created) { 3005 /* Now tell the dest that it should open its end so it can reply */ 3006 qemu_savevm_send_open_return_path(s->to_dst_file); 3007 3008 /* And do a ping that will make stuff easier to debug */ 3009 qemu_savevm_send_ping(s->to_dst_file, 1); 3010 } 3011 3012 if (migrate_postcopy()) { 3013 /* 3014 * Tell the destination that we *might* want to do postcopy later; 3015 * if the other end can't do postcopy it should fail now, nice and 3016 * early. 3017 */ 3018 qemu_savevm_send_postcopy_advise(s->to_dst_file); 3019 } 3020 3021 if (migrate_colo()) { 3022 /* Notify migration destination that we enable COLO */ 3023 qemu_savevm_send_colo_enable(s->to_dst_file); 3024 } 3025 3026 qemu_savevm_state_setup(s->to_dst_file); 3027 3028 qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP, 3029 MIGRATION_STATUS_ACTIVE); 3030 3031 s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; 3032 3033 trace_migration_thread_setup_complete(); 3034 3035 while (migration_is_active(s)) { 3036 if (urgent || !qemu_file_rate_limit(s->to_dst_file)) { 3037 MigIterateState iter_state = migration_iteration_run(s); 3038 if (iter_state == MIG_ITERATE_SKIP) { 3039 continue; 3040 } else if (iter_state == MIG_ITERATE_BREAK) { 3041 break; 3042 } 3043 } 3044 3045 /* 3046 * Try to detect any kind of failures, and see whether we 3047 * should stop the migration now. 3048 */ 3049 thr_error = migration_detect_error(s); 3050 if (thr_error == MIG_THR_ERR_FATAL) { 3051 /* Stop migration */ 3052 break; 3053 } else if (thr_error == MIG_THR_ERR_RECOVERED) { 3054 /* 3055 * Just recovered from a e.g. network failure, reset all 3056 * the local variables. This is important to avoid 3057 * breaking transferred_bytes and bandwidth calculation 3058 */ 3059 update_iteration_initial_status(s); 3060 } 3061 3062 urgent = migration_rate_limit(); 3063 } 3064 3065 trace_migration_thread_after_loop(); 3066 migration_iteration_finish(s); 3067 object_unref(OBJECT(s)); 3068 rcu_unregister_thread(); 3069 MigrationThreadDel(thread); 3070 return NULL; 3071 } 3072 3073 static void bg_migration_vm_start_bh(void *opaque) 3074 { 3075 MigrationState *s = opaque; 3076 3077 qemu_bh_delete(s->vm_start_bh); 3078 s->vm_start_bh = NULL; 3079 3080 vm_start(); 3081 s->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - s->downtime_start; 3082 } 3083 3084 /** 3085 * Background snapshot thread, based on live migration code. 3086 * This is an alternative implementation of live migration mechanism 3087 * introduced specifically to support background snapshots. 3088 * 3089 * It takes advantage of userfault_fd write protection mechanism introduced 3090 * in v5.7 kernel. Compared to existing dirty page logging migration much 3091 * lesser stream traffic is produced resulting in smaller snapshot images, 3092 * simply cause of no page duplicates can get into the stream. 3093 * 3094 * Another key point is that generated vmstate stream reflects machine state 3095 * 'frozen' at the beginning of snapshot creation compared to dirty page logging 3096 * mechanism, which effectively results in that saved snapshot is the state of VM 3097 * at the end of the process. 3098 */ 3099 static void *bg_migration_thread(void *opaque) 3100 { 3101 MigrationState *s = opaque; 3102 int64_t setup_start; 3103 MigThrError thr_error; 3104 QEMUFile *fb; 3105 bool early_fail = true; 3106 3107 rcu_register_thread(); 3108 object_ref(OBJECT(s)); 3109 3110 qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); 3111 3112 setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST); 3113 /* 3114 * We want to save vmstate for the moment when migration has been 3115 * initiated but also we want to save RAM content while VM is running. 3116 * The RAM content should appear first in the vmstate. So, we first 3117 * stash the non-RAM part of the vmstate to the temporary buffer, 3118 * then write RAM part of the vmstate to the migration stream 3119 * with vCPUs running and, finally, write stashed non-RAM part of 3120 * the vmstate from the buffer to the migration stream. 3121 */ 3122 s->bioc = qio_channel_buffer_new(512 * 1024); 3123 qio_channel_set_name(QIO_CHANNEL(s->bioc), "vmstate-buffer"); 3124 fb = qemu_file_new_output(QIO_CHANNEL(s->bioc)); 3125 object_unref(OBJECT(s->bioc)); 3126 3127 update_iteration_initial_status(s); 3128 3129 /* 3130 * Prepare for tracking memory writes with UFFD-WP - populate 3131 * RAM pages before protecting. 3132 */ 3133 #ifdef __linux__ 3134 ram_write_tracking_prepare(); 3135 #endif 3136 3137 qemu_savevm_state_header(s->to_dst_file); 3138 qemu_savevm_state_setup(s->to_dst_file); 3139 3140 qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP, 3141 MIGRATION_STATUS_ACTIVE); 3142 3143 s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; 3144 3145 trace_migration_thread_setup_complete(); 3146 s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3147 3148 qemu_mutex_lock_iothread(); 3149 3150 /* 3151 * If VM is currently in suspended state, then, to make a valid runstate 3152 * transition in vm_stop_force_state() we need to wakeup it up. 3153 */ 3154 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); 3155 s->vm_was_running = runstate_is_running(); 3156 3157 if (global_state_store()) { 3158 goto fail; 3159 } 3160 /* Forcibly stop VM before saving state of vCPUs and devices */ 3161 if (vm_stop_force_state(RUN_STATE_PAUSED)) { 3162 goto fail; 3163 } 3164 /* 3165 * Put vCPUs in sync with shadow context structures, then 3166 * save their state to channel-buffer along with devices. 3167 */ 3168 cpu_synchronize_all_states(); 3169 if (qemu_savevm_state_complete_precopy_non_iterable(fb, false, false)) { 3170 goto fail; 3171 } 3172 /* 3173 * Since we are going to get non-iterable state data directly 3174 * from s->bioc->data, explicit flush is needed here. 3175 */ 3176 qemu_fflush(fb); 3177 3178 /* Now initialize UFFD context and start tracking RAM writes */ 3179 if (ram_write_tracking_start()) { 3180 goto fail; 3181 } 3182 early_fail = false; 3183 3184 /* 3185 * Start VM from BH handler to avoid write-fault lock here. 3186 * UFFD-WP protection for the whole RAM is already enabled so 3187 * calling VM state change notifiers from vm_start() would initiate 3188 * writes to virtio VQs memory which is in write-protected region. 3189 */ 3190 s->vm_start_bh = qemu_bh_new(bg_migration_vm_start_bh, s); 3191 qemu_bh_schedule(s->vm_start_bh); 3192 3193 qemu_mutex_unlock_iothread(); 3194 3195 while (migration_is_active(s)) { 3196 MigIterateState iter_state = bg_migration_iteration_run(s); 3197 if (iter_state == MIG_ITERATE_SKIP) { 3198 continue; 3199 } else if (iter_state == MIG_ITERATE_BREAK) { 3200 break; 3201 } 3202 3203 /* 3204 * Try to detect any kind of failures, and see whether we 3205 * should stop the migration now. 3206 */ 3207 thr_error = migration_detect_error(s); 3208 if (thr_error == MIG_THR_ERR_FATAL) { 3209 /* Stop migration */ 3210 break; 3211 } 3212 3213 migration_update_counters(s, qemu_clock_get_ms(QEMU_CLOCK_REALTIME)); 3214 } 3215 3216 trace_migration_thread_after_loop(); 3217 3218 fail: 3219 if (early_fail) { 3220 migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, 3221 MIGRATION_STATUS_FAILED); 3222 qemu_mutex_unlock_iothread(); 3223 } 3224 3225 bg_migration_iteration_finish(s); 3226 3227 qemu_fclose(fb); 3228 object_unref(OBJECT(s)); 3229 rcu_unregister_thread(); 3230 3231 return NULL; 3232 } 3233 3234 void migrate_fd_connect(MigrationState *s, Error *error_in) 3235 { 3236 Error *local_err = NULL; 3237 int64_t rate_limit; 3238 bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED; 3239 3240 /* 3241 * If there's a previous error, free it and prepare for another one. 3242 * Meanwhile if migration completes successfully, there won't have an error 3243 * dumped when calling migrate_fd_cleanup(). 3244 */ 3245 migrate_error_free(s); 3246 3247 s->expected_downtime = s->parameters.downtime_limit; 3248 if (resume) { 3249 assert(s->cleanup_bh); 3250 } else { 3251 assert(!s->cleanup_bh); 3252 s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup_bh, s); 3253 } 3254 if (error_in) { 3255 migrate_fd_error(s, error_in); 3256 if (resume) { 3257 /* 3258 * Don't do cleanup for resume if channel is invalid, but only dump 3259 * the error. We wait for another channel connect from the user. 3260 * The error_report still gives HMP user a hint on what failed. 3261 * It's normally done in migrate_fd_cleanup(), but call it here 3262 * explicitly. 3263 */ 3264 error_report_err(error_copy(s->error)); 3265 } else { 3266 migrate_fd_cleanup(s); 3267 } 3268 return; 3269 } 3270 3271 if (resume) { 3272 /* This is a resumed migration */ 3273 rate_limit = migrate_max_postcopy_bandwidth() / 3274 XFER_LIMIT_RATIO; 3275 } else { 3276 /* This is a fresh new migration */ 3277 rate_limit = migrate_max_bandwidth() / XFER_LIMIT_RATIO; 3278 3279 /* Notify before starting migration thread */ 3280 notifier_list_notify(&migration_state_notifiers, s); 3281 } 3282 3283 qemu_file_set_rate_limit(s->to_dst_file, rate_limit); 3284 qemu_file_set_blocking(s->to_dst_file, true); 3285 3286 /* 3287 * Open the return path. For postcopy, it is used exclusively. For 3288 * precopy, only if user specified "return-path" capability would 3289 * QEMU uses the return path. 3290 */ 3291 if (migrate_postcopy_ram() || migrate_return_path()) { 3292 if (open_return_path_on_source(s, !resume)) { 3293 error_report("Unable to open return-path for postcopy"); 3294 migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); 3295 migrate_fd_cleanup(s); 3296 return; 3297 } 3298 } 3299 3300 /* 3301 * This needs to be done before resuming a postcopy. Note: for newer 3302 * QEMUs we will delay the channel creation until postcopy_start(), to 3303 * avoid disorder of channel creations. 3304 */ 3305 if (migrate_postcopy_preempt() && s->preempt_pre_7_2) { 3306 postcopy_preempt_setup(s); 3307 } 3308 3309 if (resume) { 3310 /* Wakeup the main migration thread to do the recovery */ 3311 migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED, 3312 MIGRATION_STATUS_POSTCOPY_RECOVER); 3313 qemu_sem_post(&s->postcopy_pause_sem); 3314 return; 3315 } 3316 3317 if (multifd_save_setup(&local_err) != 0) { 3318 error_report_err(local_err); 3319 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 3320 MIGRATION_STATUS_FAILED); 3321 migrate_fd_cleanup(s); 3322 return; 3323 } 3324 3325 if (migrate_background_snapshot()) { 3326 qemu_thread_create(&s->thread, "bg_snapshot", 3327 bg_migration_thread, s, QEMU_THREAD_JOINABLE); 3328 } else { 3329 qemu_thread_create(&s->thread, "live_migration", 3330 migration_thread, s, QEMU_THREAD_JOINABLE); 3331 } 3332 s->migration_thread_running = true; 3333 } 3334 3335 #define DEFINE_PROP_MIG_CAP(name, x) \ 3336 DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) 3337 3338 static Property migration_properties[] = { 3339 DEFINE_PROP_BOOL("store-global-state", MigrationState, 3340 store_global_state, true), 3341 DEFINE_PROP_BOOL("send-configuration", MigrationState, 3342 send_configuration, true), 3343 DEFINE_PROP_BOOL("send-section-footer", MigrationState, 3344 send_section_footer, true), 3345 DEFINE_PROP_BOOL("decompress-error-check", MigrationState, 3346 decompress_error_check, true), 3347 DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, 3348 clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), 3349 DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, 3350 preempt_pre_7_2, false), 3351 3352 /* Migration parameters */ 3353 DEFINE_PROP_UINT8("x-compress-level", MigrationState, 3354 parameters.compress_level, 3355 DEFAULT_MIGRATE_COMPRESS_LEVEL), 3356 DEFINE_PROP_UINT8("x-compress-threads", MigrationState, 3357 parameters.compress_threads, 3358 DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT), 3359 DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState, 3360 parameters.compress_wait_thread, true), 3361 DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, 3362 parameters.decompress_threads, 3363 DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), 3364 DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, 3365 parameters.throttle_trigger_threshold, 3366 DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD), 3367 DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState, 3368 parameters.cpu_throttle_initial, 3369 DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), 3370 DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState, 3371 parameters.cpu_throttle_increment, 3372 DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), 3373 DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState, 3374 parameters.cpu_throttle_tailslow, false), 3375 DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState, 3376 parameters.max_bandwidth, MAX_THROTTLE), 3377 DEFINE_PROP_UINT64("x-downtime-limit", MigrationState, 3378 parameters.downtime_limit, 3379 DEFAULT_MIGRATE_SET_DOWNTIME), 3380 DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState, 3381 parameters.x_checkpoint_delay, 3382 DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), 3383 DEFINE_PROP_UINT8("multifd-channels", MigrationState, 3384 parameters.multifd_channels, 3385 DEFAULT_MIGRATE_MULTIFD_CHANNELS), 3386 DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState, 3387 parameters.multifd_compression, 3388 DEFAULT_MIGRATE_MULTIFD_COMPRESSION), 3389 DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState, 3390 parameters.multifd_zlib_level, 3391 DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL), 3392 DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, 3393 parameters.multifd_zstd_level, 3394 DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), 3395 DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, 3396 parameters.xbzrle_cache_size, 3397 DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), 3398 DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState, 3399 parameters.max_postcopy_bandwidth, 3400 DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH), 3401 DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState, 3402 parameters.max_cpu_throttle, 3403 DEFAULT_MIGRATE_MAX_CPU_THROTTLE), 3404 DEFINE_PROP_SIZE("announce-initial", MigrationState, 3405 parameters.announce_initial, 3406 DEFAULT_MIGRATE_ANNOUNCE_INITIAL), 3407 DEFINE_PROP_SIZE("announce-max", MigrationState, 3408 parameters.announce_max, 3409 DEFAULT_MIGRATE_ANNOUNCE_MAX), 3410 DEFINE_PROP_SIZE("announce-rounds", MigrationState, 3411 parameters.announce_rounds, 3412 DEFAULT_MIGRATE_ANNOUNCE_ROUNDS), 3413 DEFINE_PROP_SIZE("announce-step", MigrationState, 3414 parameters.announce_step, 3415 DEFAULT_MIGRATE_ANNOUNCE_STEP), 3416 DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds), 3417 DEFINE_PROP_STRING("tls-hostname", MigrationState, parameters.tls_hostname), 3418 DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz), 3419 3420 /* Migration capabilities */ 3421 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), 3422 DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL), 3423 DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE), 3424 DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS), 3425 DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS), 3426 DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS), 3427 DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM), 3428 DEFINE_PROP_MIG_CAP("x-postcopy-preempt", 3429 MIGRATION_CAPABILITY_POSTCOPY_PREEMPT), 3430 DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO), 3431 DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM), 3432 DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), 3433 DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), 3434 DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), 3435 DEFINE_PROP_MIG_CAP("x-background-snapshot", 3436 MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), 3437 #ifdef CONFIG_LINUX 3438 DEFINE_PROP_MIG_CAP("x-zero-copy-send", 3439 MIGRATION_CAPABILITY_ZERO_COPY_SEND), 3440 #endif 3441 3442 DEFINE_PROP_END_OF_LIST(), 3443 }; 3444 3445 static void migration_class_init(ObjectClass *klass, void *data) 3446 { 3447 DeviceClass *dc = DEVICE_CLASS(klass); 3448 3449 dc->user_creatable = false; 3450 device_class_set_props(dc, migration_properties); 3451 } 3452 3453 static void migration_instance_finalize(Object *obj) 3454 { 3455 MigrationState *ms = MIGRATION_OBJ(obj); 3456 3457 qemu_mutex_destroy(&ms->error_mutex); 3458 qemu_mutex_destroy(&ms->qemu_file_lock); 3459 qemu_sem_destroy(&ms->wait_unplug_sem); 3460 qemu_sem_destroy(&ms->rate_limit_sem); 3461 qemu_sem_destroy(&ms->pause_sem); 3462 qemu_sem_destroy(&ms->postcopy_pause_sem); 3463 qemu_sem_destroy(&ms->postcopy_pause_rp_sem); 3464 qemu_sem_destroy(&ms->rp_state.rp_sem); 3465 qemu_sem_destroy(&ms->rp_state.rp_pong_acks); 3466 qemu_sem_destroy(&ms->postcopy_qemufile_src_sem); 3467 error_free(ms->error); 3468 } 3469 3470 static void migration_instance_init(Object *obj) 3471 { 3472 MigrationState *ms = MIGRATION_OBJ(obj); 3473 MigrationParameters *params = &ms->parameters; 3474 3475 ms->state = MIGRATION_STATUS_NONE; 3476 ms->mbps = -1; 3477 ms->pages_per_second = -1; 3478 qemu_sem_init(&ms->pause_sem, 0); 3479 qemu_mutex_init(&ms->error_mutex); 3480 3481 params->tls_hostname = g_strdup(""); 3482 params->tls_creds = g_strdup(""); 3483 3484 /* Set has_* up only for parameter checks */ 3485 params->has_compress_level = true; 3486 params->has_compress_threads = true; 3487 params->has_compress_wait_thread = true; 3488 params->has_decompress_threads = true; 3489 params->has_throttle_trigger_threshold = true; 3490 params->has_cpu_throttle_initial = true; 3491 params->has_cpu_throttle_increment = true; 3492 params->has_cpu_throttle_tailslow = true; 3493 params->has_max_bandwidth = true; 3494 params->has_downtime_limit = true; 3495 params->has_x_checkpoint_delay = true; 3496 params->has_block_incremental = true; 3497 params->has_multifd_channels = true; 3498 params->has_multifd_compression = true; 3499 params->has_multifd_zlib_level = true; 3500 params->has_multifd_zstd_level = true; 3501 params->has_xbzrle_cache_size = true; 3502 params->has_max_postcopy_bandwidth = true; 3503 params->has_max_cpu_throttle = true; 3504 params->has_announce_initial = true; 3505 params->has_announce_max = true; 3506 params->has_announce_rounds = true; 3507 params->has_announce_step = true; 3508 3509 qemu_sem_init(&ms->postcopy_pause_sem, 0); 3510 qemu_sem_init(&ms->postcopy_pause_rp_sem, 0); 3511 qemu_sem_init(&ms->rp_state.rp_sem, 0); 3512 qemu_sem_init(&ms->rp_state.rp_pong_acks, 0); 3513 qemu_sem_init(&ms->rate_limit_sem, 0); 3514 qemu_sem_init(&ms->wait_unplug_sem, 0); 3515 qemu_sem_init(&ms->postcopy_qemufile_src_sem, 0); 3516 qemu_mutex_init(&ms->qemu_file_lock); 3517 } 3518 3519 /* 3520 * Return true if check pass, false otherwise. Error will be put 3521 * inside errp if provided. 3522 */ 3523 static bool migration_object_check(MigrationState *ms, Error **errp) 3524 { 3525 /* Assuming all off */ 3526 bool old_caps[MIGRATION_CAPABILITY__MAX] = { 0 }; 3527 3528 if (!migrate_params_check(&ms->parameters, errp)) { 3529 return false; 3530 } 3531 3532 return migrate_caps_check(old_caps, ms->capabilities, errp); 3533 } 3534 3535 static const TypeInfo migration_type = { 3536 .name = TYPE_MIGRATION, 3537 /* 3538 * NOTE: TYPE_MIGRATION is not really a device, as the object is 3539 * not created using qdev_new(), it is not attached to the qdev 3540 * device tree, and it is never realized. 3541 * 3542 * TODO: Make this TYPE_OBJECT once QOM provides something like 3543 * TYPE_DEVICE's "-global" properties. 3544 */ 3545 .parent = TYPE_DEVICE, 3546 .class_init = migration_class_init, 3547 .class_size = sizeof(MigrationClass), 3548 .instance_size = sizeof(MigrationState), 3549 .instance_init = migration_instance_init, 3550 .instance_finalize = migration_instance_finalize, 3551 }; 3552 3553 static void register_migration_types(void) 3554 { 3555 type_register_static(&migration_type); 3556 } 3557 3558 type_init(register_migration_types); 3559