1 /* 2 * QEMU live migration 3 * 4 * Copyright IBM, Corp. 2008 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 * Contributions after 2012-01-13 are licensed under the terms of the 13 * GNU GPL, version 2 or (at your option) any later version. 14 */ 15 16 #include "qemu/osdep.h" 17 #include "qemu/cutils.h" 18 #include "qemu/error-report.h" 19 #include "qemu/main-loop.h" 20 #include "migration/blocker.h" 21 #include "exec.h" 22 #include "fd.h" 23 #include "file.h" 24 #include "socket.h" 25 #include "sysemu/runstate.h" 26 #include "sysemu/sysemu.h" 27 #include "sysemu/cpu-throttle.h" 28 #include "rdma.h" 29 #include "ram.h" 30 #include "ram-compress.h" 31 #include "migration/global_state.h" 32 #include "migration/misc.h" 33 #include "migration.h" 34 #include "migration-stats.h" 35 #include "savevm.h" 36 #include "qemu-file.h" 37 #include "channel.h" 38 #include "migration/vmstate.h" 39 #include "block/block.h" 40 #include "qapi/error.h" 41 #include "qapi/clone-visitor.h" 42 #include "qapi/qapi-visit-migration.h" 43 #include "qapi/qapi-visit-sockets.h" 44 #include "qapi/qapi-commands-migration.h" 45 #include "qapi/qapi-events-migration.h" 46 #include "qapi/qmp/qerror.h" 47 #include "qapi/qmp/qnull.h" 48 #include "qemu/rcu.h" 49 #include "block.h" 50 #include "postcopy-ram.h" 51 #include "qemu/thread.h" 52 #include "trace.h" 53 #include "exec/target_page.h" 54 #include "io/channel-buffer.h" 55 #include "io/channel-tls.h" 56 #include "migration/colo.h" 57 #include "hw/boards.h" 58 #include "monitor/monitor.h" 59 #include "net/announce.h" 60 #include "qemu/queue.h" 61 #include "multifd.h" 62 #include "threadinfo.h" 63 #include "qemu/yank.h" 64 #include "sysemu/cpus.h" 65 #include "yank_functions.h" 66 #include "sysemu/qtest.h" 67 #include "options.h" 68 #include "sysemu/dirtylimit.h" 69 70 static NotifierList migration_state_notifiers = 71 NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); 72 73 /* Messages sent on the return path from destination to source */ 74 enum mig_rp_message_type { 75 MIG_RP_MSG_INVALID = 0, /* Must be 0 */ 76 MIG_RP_MSG_SHUT, /* sibling will not send any more RP messages */ 77 MIG_RP_MSG_PONG, /* Response to a PING; data (seq: be32 ) */ 78 79 MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */ 80 MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */ 81 MIG_RP_MSG_RECV_BITMAP, /* send recved_bitmap back to source */ 82 MIG_RP_MSG_RESUME_ACK, /* tell source that we are ready to resume */ 83 MIG_RP_MSG_SWITCHOVER_ACK, /* Tell source it's OK to do switchover */ 84 85 MIG_RP_MSG_MAX 86 }; 87 88 /* When we add fault tolerance, we could have several 89 migrations at once. For now we don't need to add 90 dynamic creation of migration */ 91 92 static MigrationState *current_migration; 93 static MigrationIncomingState *current_incoming; 94 95 static GSList *migration_blockers; 96 97 static bool migration_object_check(MigrationState *ms, Error **errp); 98 static int migration_maybe_pause(MigrationState *s, 99 int *current_active_state, 100 int new_state); 101 static void migrate_fd_cancel(MigrationState *s); 102 static int close_return_path_on_source(MigrationState *s); 103 104 static bool migration_needs_multiple_sockets(void) 105 { 106 return migrate_multifd() || migrate_postcopy_preempt(); 107 } 108 109 static bool uri_supports_multi_channels(const char *uri) 110 { 111 return strstart(uri, "tcp:", NULL) || strstart(uri, "unix:", NULL) || 112 strstart(uri, "vsock:", NULL); 113 } 114 115 static bool 116 migration_channels_and_uri_compatible(const char *uri, Error **errp) 117 { 118 if (migration_needs_multiple_sockets() && 119 !uri_supports_multi_channels(uri)) { 120 error_setg(errp, "Migration requires multi-channel URIs (e.g. tcp)"); 121 return false; 122 } 123 124 return true; 125 } 126 127 static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp) 128 { 129 uintptr_t a = (uintptr_t) ap, b = (uintptr_t) bp; 130 131 return (a > b) - (a < b); 132 } 133 134 void migration_object_init(void) 135 { 136 /* This can only be called once. */ 137 assert(!current_migration); 138 current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION)); 139 140 /* 141 * Init the migrate incoming object as well no matter whether 142 * we'll use it or not. 143 */ 144 assert(!current_incoming); 145 current_incoming = g_new0(MigrationIncomingState, 1); 146 current_incoming->state = MIGRATION_STATUS_NONE; 147 current_incoming->postcopy_remote_fds = 148 g_array_new(FALSE, TRUE, sizeof(struct PostCopyFD)); 149 qemu_mutex_init(¤t_incoming->rp_mutex); 150 qemu_mutex_init(¤t_incoming->postcopy_prio_thread_mutex); 151 qemu_event_init(¤t_incoming->main_thread_load_event, false); 152 qemu_sem_init(¤t_incoming->postcopy_pause_sem_dst, 0); 153 qemu_sem_init(¤t_incoming->postcopy_pause_sem_fault, 0); 154 qemu_sem_init(¤t_incoming->postcopy_pause_sem_fast_load, 0); 155 qemu_sem_init(¤t_incoming->postcopy_qemufile_dst_done, 0); 156 157 qemu_mutex_init(¤t_incoming->page_request_mutex); 158 qemu_cond_init(¤t_incoming->page_request_cond); 159 current_incoming->page_requested = g_tree_new(page_request_addr_cmp); 160 161 migration_object_check(current_migration, &error_fatal); 162 163 blk_mig_init(); 164 ram_mig_init(); 165 dirty_bitmap_mig_init(); 166 } 167 168 void migration_cancel(const Error *error) 169 { 170 if (error) { 171 migrate_set_error(current_migration, error); 172 } 173 if (migrate_dirty_limit()) { 174 qmp_cancel_vcpu_dirty_limit(false, -1, NULL); 175 } 176 migrate_fd_cancel(current_migration); 177 } 178 179 void migration_shutdown(void) 180 { 181 /* 182 * When the QEMU main thread exit, the COLO thread 183 * may wait a semaphore. So, we should wakeup the 184 * COLO thread before migration shutdown. 185 */ 186 colo_shutdown(); 187 /* 188 * Cancel the current migration - that will (eventually) 189 * stop the migration using this structure 190 */ 191 migration_cancel(NULL); 192 object_unref(OBJECT(current_migration)); 193 194 /* 195 * Cancel outgoing migration of dirty bitmaps. It should 196 * at least unref used block nodes. 197 */ 198 dirty_bitmap_mig_cancel_outgoing(); 199 200 /* 201 * Cancel incoming migration of dirty bitmaps. Dirty bitmaps 202 * are non-critical data, and their loss never considered as 203 * something serious. 204 */ 205 dirty_bitmap_mig_cancel_incoming(); 206 } 207 208 /* For outgoing */ 209 MigrationState *migrate_get_current(void) 210 { 211 /* This can only be called after the object created. */ 212 assert(current_migration); 213 return current_migration; 214 } 215 216 MigrationIncomingState *migration_incoming_get_current(void) 217 { 218 assert(current_incoming); 219 return current_incoming; 220 } 221 222 void migration_incoming_transport_cleanup(MigrationIncomingState *mis) 223 { 224 if (mis->socket_address_list) { 225 qapi_free_SocketAddressList(mis->socket_address_list); 226 mis->socket_address_list = NULL; 227 } 228 229 if (mis->transport_cleanup) { 230 mis->transport_cleanup(mis->transport_data); 231 mis->transport_data = mis->transport_cleanup = NULL; 232 } 233 } 234 235 void migration_incoming_state_destroy(void) 236 { 237 struct MigrationIncomingState *mis = migration_incoming_get_current(); 238 239 multifd_load_cleanup(); 240 compress_threads_load_cleanup(); 241 242 if (mis->to_src_file) { 243 /* Tell source that we are done */ 244 migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0); 245 qemu_fclose(mis->to_src_file); 246 mis->to_src_file = NULL; 247 } 248 249 if (mis->from_src_file) { 250 migration_ioc_unregister_yank_from_file(mis->from_src_file); 251 qemu_fclose(mis->from_src_file); 252 mis->from_src_file = NULL; 253 } 254 if (mis->postcopy_remote_fds) { 255 g_array_free(mis->postcopy_remote_fds, TRUE); 256 mis->postcopy_remote_fds = NULL; 257 } 258 259 migration_incoming_transport_cleanup(mis); 260 qemu_event_reset(&mis->main_thread_load_event); 261 262 if (mis->page_requested) { 263 g_tree_destroy(mis->page_requested); 264 mis->page_requested = NULL; 265 } 266 267 if (mis->postcopy_qemufile_dst) { 268 migration_ioc_unregister_yank_from_file(mis->postcopy_qemufile_dst); 269 qemu_fclose(mis->postcopy_qemufile_dst); 270 mis->postcopy_qemufile_dst = NULL; 271 } 272 273 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 274 } 275 276 static void migrate_generate_event(int new_state) 277 { 278 if (migrate_events()) { 279 qapi_event_send_migration(new_state); 280 } 281 } 282 283 /* 284 * Send a message on the return channel back to the source 285 * of the migration. 286 */ 287 static int migrate_send_rp_message(MigrationIncomingState *mis, 288 enum mig_rp_message_type message_type, 289 uint16_t len, void *data) 290 { 291 int ret = 0; 292 293 trace_migrate_send_rp_message((int)message_type, len); 294 QEMU_LOCK_GUARD(&mis->rp_mutex); 295 296 /* 297 * It's possible that the file handle got lost due to network 298 * failures. 299 */ 300 if (!mis->to_src_file) { 301 ret = -EIO; 302 return ret; 303 } 304 305 qemu_put_be16(mis->to_src_file, (unsigned int)message_type); 306 qemu_put_be16(mis->to_src_file, len); 307 qemu_put_buffer(mis->to_src_file, data, len); 308 qemu_fflush(mis->to_src_file); 309 310 /* It's possible that qemu file got error during sending */ 311 ret = qemu_file_get_error(mis->to_src_file); 312 313 return ret; 314 } 315 316 /* Request one page from the source VM at the given start address. 317 * rb: the RAMBlock to request the page in 318 * Start: Address offset within the RB 319 * Len: Length in bytes required - must be a multiple of pagesize 320 */ 321 int migrate_send_rp_message_req_pages(MigrationIncomingState *mis, 322 RAMBlock *rb, ram_addr_t start) 323 { 324 uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */ 325 size_t msglen = 12; /* start + len */ 326 size_t len = qemu_ram_pagesize(rb); 327 enum mig_rp_message_type msg_type; 328 const char *rbname; 329 int rbname_len; 330 331 *(uint64_t *)bufc = cpu_to_be64((uint64_t)start); 332 *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len); 333 334 /* 335 * We maintain the last ramblock that we requested for page. Note that we 336 * don't need locking because this function will only be called within the 337 * postcopy ram fault thread. 338 */ 339 if (rb != mis->last_rb) { 340 mis->last_rb = rb; 341 342 rbname = qemu_ram_get_idstr(rb); 343 rbname_len = strlen(rbname); 344 345 assert(rbname_len < 256); 346 347 bufc[msglen++] = rbname_len; 348 memcpy(bufc + msglen, rbname, rbname_len); 349 msglen += rbname_len; 350 msg_type = MIG_RP_MSG_REQ_PAGES_ID; 351 } else { 352 msg_type = MIG_RP_MSG_REQ_PAGES; 353 } 354 355 return migrate_send_rp_message(mis, msg_type, msglen, bufc); 356 } 357 358 int migrate_send_rp_req_pages(MigrationIncomingState *mis, 359 RAMBlock *rb, ram_addr_t start, uint64_t haddr) 360 { 361 void *aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, qemu_ram_pagesize(rb)); 362 bool received = false; 363 364 WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) { 365 received = ramblock_recv_bitmap_test_byte_offset(rb, start); 366 if (!received && !g_tree_lookup(mis->page_requested, aligned)) { 367 /* 368 * The page has not been received, and it's not yet in the page 369 * request list. Queue it. Set the value of element to 1, so that 370 * things like g_tree_lookup() will return TRUE (1) when found. 371 */ 372 g_tree_insert(mis->page_requested, aligned, (gpointer)1); 373 qatomic_inc(&mis->page_requested_count); 374 trace_postcopy_page_req_add(aligned, mis->page_requested_count); 375 } 376 } 377 378 /* 379 * If the page is there, skip sending the message. We don't even need the 380 * lock because as long as the page arrived, it'll be there forever. 381 */ 382 if (received) { 383 return 0; 384 } 385 386 return migrate_send_rp_message_req_pages(mis, rb, start); 387 } 388 389 static bool migration_colo_enabled; 390 bool migration_incoming_colo_enabled(void) 391 { 392 return migration_colo_enabled; 393 } 394 395 void migration_incoming_disable_colo(void) 396 { 397 ram_block_discard_disable(false); 398 migration_colo_enabled = false; 399 } 400 401 int migration_incoming_enable_colo(void) 402 { 403 #ifndef CONFIG_REPLICATION 404 error_report("ENABLE_COLO command come in migration stream, but COLO " 405 "module is not built in"); 406 return -ENOTSUP; 407 #endif 408 409 if (!migrate_colo()) { 410 error_report("ENABLE_COLO command come in migration stream, but c-colo " 411 "capability is not set"); 412 return -EINVAL; 413 } 414 415 if (ram_block_discard_disable(true)) { 416 error_report("COLO: cannot disable RAM discard"); 417 return -EBUSY; 418 } 419 migration_colo_enabled = true; 420 return 0; 421 } 422 423 void migrate_add_address(SocketAddress *address) 424 { 425 MigrationIncomingState *mis = migration_incoming_get_current(); 426 427 QAPI_LIST_PREPEND(mis->socket_address_list, 428 QAPI_CLONE(SocketAddress, address)); 429 } 430 431 static void qemu_start_incoming_migration(const char *uri, Error **errp) 432 { 433 const char *p = NULL; 434 MigrationIncomingState *mis = migration_incoming_get_current(); 435 436 /* URI is not suitable for migration? */ 437 if (!migration_channels_and_uri_compatible(uri, errp)) { 438 return; 439 } 440 441 migrate_set_state(&mis->state, MIGRATION_STATUS_NONE, 442 MIGRATION_STATUS_SETUP); 443 444 if (strstart(uri, "tcp:", &p) || 445 strstart(uri, "unix:", NULL) || 446 strstart(uri, "vsock:", NULL)) { 447 socket_start_incoming_migration(p ? p : uri, errp); 448 #ifdef CONFIG_RDMA 449 } else if (strstart(uri, "rdma:", &p)) { 450 if (migrate_compress()) { 451 error_setg(errp, "RDMA and compression can't be used together"); 452 return; 453 } 454 if (migrate_xbzrle()) { 455 error_setg(errp, "RDMA and XBZRLE can't be used together"); 456 return; 457 } 458 if (migrate_multifd()) { 459 error_setg(errp, "RDMA and multifd can't be used together"); 460 return; 461 } 462 rdma_start_incoming_migration(p, errp); 463 #endif 464 } else if (strstart(uri, "exec:", &p)) { 465 exec_start_incoming_migration(p, errp); 466 } else if (strstart(uri, "fd:", &p)) { 467 fd_start_incoming_migration(p, errp); 468 } else if (strstart(uri, "file:", &p)) { 469 file_start_incoming_migration(p, errp); 470 } else { 471 error_setg(errp, "unknown migration protocol: %s", uri); 472 } 473 } 474 475 static void process_incoming_migration_bh(void *opaque) 476 { 477 Error *local_err = NULL; 478 MigrationIncomingState *mis = opaque; 479 480 /* If capability late_block_activate is set: 481 * Only fire up the block code now if we're going to restart the 482 * VM, else 'cont' will do it. 483 * This causes file locking to happen; so we don't want it to happen 484 * unless we really are starting the VM. 485 */ 486 if (!migrate_late_block_activate() || 487 (autostart && (!global_state_received() || 488 global_state_get_runstate() == RUN_STATE_RUNNING))) { 489 /* Make sure all file formats throw away their mutable metadata. 490 * If we get an error here, just don't restart the VM yet. */ 491 bdrv_activate_all(&local_err); 492 if (local_err) { 493 error_report_err(local_err); 494 local_err = NULL; 495 autostart = false; 496 } 497 } 498 499 /* 500 * This must happen after all error conditions are dealt with and 501 * we're sure the VM is going to be running on this host. 502 */ 503 qemu_announce_self(&mis->announce_timer, migrate_announce_params()); 504 505 multifd_load_shutdown(); 506 507 dirty_bitmap_mig_before_vm_start(); 508 509 if (!global_state_received() || 510 global_state_get_runstate() == RUN_STATE_RUNNING) { 511 if (autostart) { 512 vm_start(); 513 } else { 514 runstate_set(RUN_STATE_PAUSED); 515 } 516 } else if (migration_incoming_colo_enabled()) { 517 migration_incoming_disable_colo(); 518 vm_start(); 519 } else { 520 runstate_set(global_state_get_runstate()); 521 } 522 /* 523 * This must happen after any state changes since as soon as an external 524 * observer sees this event they might start to prod at the VM assuming 525 * it's ready to use. 526 */ 527 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, 528 MIGRATION_STATUS_COMPLETED); 529 qemu_bh_delete(mis->bh); 530 migration_incoming_state_destroy(); 531 } 532 533 static void coroutine_fn 534 process_incoming_migration_co(void *opaque) 535 { 536 MigrationIncomingState *mis = migration_incoming_get_current(); 537 PostcopyState ps; 538 int ret; 539 540 assert(mis->from_src_file); 541 542 if (compress_threads_load_setup(mis->from_src_file)) { 543 error_report("Failed to setup decompress threads"); 544 goto fail; 545 } 546 547 mis->largest_page_size = qemu_ram_pagesize_largest(); 548 postcopy_state_set(POSTCOPY_INCOMING_NONE); 549 migrate_set_state(&mis->state, MIGRATION_STATUS_SETUP, 550 MIGRATION_STATUS_ACTIVE); 551 552 mis->loadvm_co = qemu_coroutine_self(); 553 ret = qemu_loadvm_state(mis->from_src_file); 554 mis->loadvm_co = NULL; 555 556 ps = postcopy_state_get(); 557 trace_process_incoming_migration_co_end(ret, ps); 558 if (ps != POSTCOPY_INCOMING_NONE) { 559 if (ps == POSTCOPY_INCOMING_ADVISE) { 560 /* 561 * Where a migration had postcopy enabled (and thus went to advise) 562 * but managed to complete within the precopy period, we can use 563 * the normal exit. 564 */ 565 postcopy_ram_incoming_cleanup(mis); 566 } else if (ret >= 0) { 567 /* 568 * Postcopy was started, cleanup should happen at the end of the 569 * postcopy thread. 570 */ 571 trace_process_incoming_migration_co_postcopy_end_main(); 572 return; 573 } 574 /* Else if something went wrong then just fall out of the normal exit */ 575 } 576 577 if (ret < 0) { 578 error_report("load of migration failed: %s", strerror(-ret)); 579 goto fail; 580 } 581 582 if (colo_incoming_co() < 0) { 583 goto fail; 584 } 585 586 mis->bh = qemu_bh_new(process_incoming_migration_bh, mis); 587 qemu_bh_schedule(mis->bh); 588 return; 589 fail: 590 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, 591 MIGRATION_STATUS_FAILED); 592 qemu_fclose(mis->from_src_file); 593 594 multifd_load_cleanup(); 595 compress_threads_load_cleanup(); 596 597 exit(EXIT_FAILURE); 598 } 599 600 /** 601 * migration_incoming_setup: Setup incoming migration 602 * @f: file for main migration channel 603 * @errp: where to put errors 604 * 605 * Returns: %true on success, %false on error. 606 */ 607 static bool migration_incoming_setup(QEMUFile *f, Error **errp) 608 { 609 MigrationIncomingState *mis = migration_incoming_get_current(); 610 611 if (!mis->from_src_file) { 612 mis->from_src_file = f; 613 } 614 qemu_file_set_blocking(f, false); 615 return true; 616 } 617 618 void migration_incoming_process(void) 619 { 620 Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL); 621 qemu_coroutine_enter(co); 622 } 623 624 /* Returns true if recovered from a paused migration, otherwise false */ 625 static bool postcopy_try_recover(void) 626 { 627 MigrationIncomingState *mis = migration_incoming_get_current(); 628 629 if (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { 630 /* Resumed from a paused postcopy migration */ 631 632 /* This should be set already in migration_incoming_setup() */ 633 assert(mis->from_src_file); 634 /* Postcopy has standalone thread to do vm load */ 635 qemu_file_set_blocking(mis->from_src_file, true); 636 637 /* Re-configure the return path */ 638 mis->to_src_file = qemu_file_get_return_path(mis->from_src_file); 639 640 migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_PAUSED, 641 MIGRATION_STATUS_POSTCOPY_RECOVER); 642 643 /* 644 * Here, we only wake up the main loading thread (while the 645 * rest threads will still be waiting), so that we can receive 646 * commands from source now, and answer it if needed. The 647 * rest threads will be woken up afterwards until we are sure 648 * that source is ready to reply to page requests. 649 */ 650 qemu_sem_post(&mis->postcopy_pause_sem_dst); 651 return true; 652 } 653 654 return false; 655 } 656 657 void migration_fd_process_incoming(QEMUFile *f, Error **errp) 658 { 659 if (!migration_incoming_setup(f, errp)) { 660 return; 661 } 662 if (postcopy_try_recover()) { 663 return; 664 } 665 migration_incoming_process(); 666 } 667 668 /* 669 * Returns true when we want to start a new incoming migration process, 670 * false otherwise. 671 */ 672 static bool migration_should_start_incoming(bool main_channel) 673 { 674 /* Multifd doesn't start unless all channels are established */ 675 if (migrate_multifd()) { 676 return migration_has_all_channels(); 677 } 678 679 /* Preempt channel only starts when the main channel is created */ 680 if (migrate_postcopy_preempt()) { 681 return main_channel; 682 } 683 684 /* 685 * For all the rest types of migration, we should only reach here when 686 * it's the main channel that's being created, and we should always 687 * proceed with this channel. 688 */ 689 assert(main_channel); 690 return true; 691 } 692 693 void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) 694 { 695 MigrationIncomingState *mis = migration_incoming_get_current(); 696 Error *local_err = NULL; 697 QEMUFile *f; 698 bool default_channel = true; 699 uint32_t channel_magic = 0; 700 int ret = 0; 701 702 if (migrate_multifd() && !migrate_postcopy_ram() && 703 qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { 704 /* 705 * With multiple channels, it is possible that we receive channels 706 * out of order on destination side, causing incorrect mapping of 707 * source channels on destination side. Check channel MAGIC to 708 * decide type of channel. Please note this is best effort, postcopy 709 * preempt channel does not send any magic number so avoid it for 710 * postcopy live migration. Also tls live migration already does 711 * tls handshake while initializing main channel so with tls this 712 * issue is not possible. 713 */ 714 ret = migration_channel_read_peek(ioc, (void *)&channel_magic, 715 sizeof(channel_magic), &local_err); 716 717 if (ret != 0) { 718 error_propagate(errp, local_err); 719 return; 720 } 721 722 default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC)); 723 } else { 724 default_channel = !mis->from_src_file; 725 } 726 727 if (multifd_load_setup(errp) != 0) { 728 error_setg(errp, "Failed to setup multifd channels"); 729 return; 730 } 731 732 if (default_channel) { 733 f = qemu_file_new_input(ioc); 734 735 if (!migration_incoming_setup(f, errp)) { 736 return; 737 } 738 } else { 739 /* Multiple connections */ 740 assert(migration_needs_multiple_sockets()); 741 if (migrate_multifd()) { 742 multifd_recv_new_channel(ioc, &local_err); 743 } else { 744 assert(migrate_postcopy_preempt()); 745 f = qemu_file_new_input(ioc); 746 postcopy_preempt_new_channel(mis, f); 747 } 748 if (local_err) { 749 error_propagate(errp, local_err); 750 return; 751 } 752 } 753 754 if (migration_should_start_incoming(default_channel)) { 755 /* If it's a recovery, we're done */ 756 if (postcopy_try_recover()) { 757 return; 758 } 759 migration_incoming_process(); 760 } 761 } 762 763 /** 764 * @migration_has_all_channels: We have received all channels that we need 765 * 766 * Returns true when we have got connections to all the channels that 767 * we need for migration. 768 */ 769 bool migration_has_all_channels(void) 770 { 771 MigrationIncomingState *mis = migration_incoming_get_current(); 772 773 if (!mis->from_src_file) { 774 return false; 775 } 776 777 if (migrate_multifd()) { 778 return multifd_recv_all_channels_created(); 779 } 780 781 if (migrate_postcopy_preempt()) { 782 return mis->postcopy_qemufile_dst != NULL; 783 } 784 785 return true; 786 } 787 788 int migrate_send_rp_switchover_ack(MigrationIncomingState *mis) 789 { 790 return migrate_send_rp_message(mis, MIG_RP_MSG_SWITCHOVER_ACK, 0, NULL); 791 } 792 793 /* 794 * Send a 'SHUT' message on the return channel with the given value 795 * to indicate that we've finished with the RP. Non-0 value indicates 796 * error. 797 */ 798 void migrate_send_rp_shut(MigrationIncomingState *mis, 799 uint32_t value) 800 { 801 uint32_t buf; 802 803 buf = cpu_to_be32(value); 804 migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf); 805 } 806 807 /* 808 * Send a 'PONG' message on the return channel with the given value 809 * (normally in response to a 'PING') 810 */ 811 void migrate_send_rp_pong(MigrationIncomingState *mis, 812 uint32_t value) 813 { 814 uint32_t buf; 815 816 buf = cpu_to_be32(value); 817 migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf); 818 } 819 820 void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis, 821 char *block_name) 822 { 823 char buf[512]; 824 int len; 825 int64_t res; 826 827 /* 828 * First, we send the header part. It contains only the len of 829 * idstr, and the idstr itself. 830 */ 831 len = strlen(block_name); 832 buf[0] = len; 833 memcpy(buf + 1, block_name, len); 834 835 if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) { 836 error_report("%s: MSG_RP_RECV_BITMAP only used for recovery", 837 __func__); 838 return; 839 } 840 841 migrate_send_rp_message(mis, MIG_RP_MSG_RECV_BITMAP, len + 1, buf); 842 843 /* 844 * Next, we dump the received bitmap to the stream. 845 * 846 * TODO: currently we are safe since we are the only one that is 847 * using the to_src_file handle (fault thread is still paused), 848 * and it's ok even not taking the mutex. However the best way is 849 * to take the lock before sending the message header, and release 850 * the lock after sending the bitmap. 851 */ 852 qemu_mutex_lock(&mis->rp_mutex); 853 res = ramblock_recv_bitmap_send(mis->to_src_file, block_name); 854 qemu_mutex_unlock(&mis->rp_mutex); 855 856 trace_migrate_send_rp_recv_bitmap(block_name, res); 857 } 858 859 void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value) 860 { 861 uint32_t buf; 862 863 buf = cpu_to_be32(value); 864 migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf); 865 } 866 867 /* 868 * Return true if we're already in the middle of a migration 869 * (i.e. any of the active or setup states) 870 */ 871 bool migration_is_setup_or_active(int state) 872 { 873 switch (state) { 874 case MIGRATION_STATUS_ACTIVE: 875 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 876 case MIGRATION_STATUS_POSTCOPY_PAUSED: 877 case MIGRATION_STATUS_POSTCOPY_RECOVER: 878 case MIGRATION_STATUS_SETUP: 879 case MIGRATION_STATUS_PRE_SWITCHOVER: 880 case MIGRATION_STATUS_DEVICE: 881 case MIGRATION_STATUS_WAIT_UNPLUG: 882 case MIGRATION_STATUS_COLO: 883 return true; 884 885 default: 886 return false; 887 888 } 889 } 890 891 bool migration_is_running(int state) 892 { 893 switch (state) { 894 case MIGRATION_STATUS_ACTIVE: 895 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 896 case MIGRATION_STATUS_POSTCOPY_PAUSED: 897 case MIGRATION_STATUS_POSTCOPY_RECOVER: 898 case MIGRATION_STATUS_SETUP: 899 case MIGRATION_STATUS_PRE_SWITCHOVER: 900 case MIGRATION_STATUS_DEVICE: 901 case MIGRATION_STATUS_WAIT_UNPLUG: 902 case MIGRATION_STATUS_CANCELLING: 903 return true; 904 905 default: 906 return false; 907 908 } 909 } 910 911 static bool migrate_show_downtime(MigrationState *s) 912 { 913 return (s->state == MIGRATION_STATUS_COMPLETED) || migration_in_postcopy(); 914 } 915 916 static void populate_time_info(MigrationInfo *info, MigrationState *s) 917 { 918 info->has_status = true; 919 info->has_setup_time = true; 920 info->setup_time = s->setup_time; 921 922 if (s->state == MIGRATION_STATUS_COMPLETED) { 923 info->has_total_time = true; 924 info->total_time = s->total_time; 925 } else { 926 info->has_total_time = true; 927 info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - 928 s->start_time; 929 } 930 931 if (migrate_show_downtime(s)) { 932 info->has_downtime = true; 933 info->downtime = s->downtime; 934 } else { 935 info->has_expected_downtime = true; 936 info->expected_downtime = s->expected_downtime; 937 } 938 } 939 940 static void populate_ram_info(MigrationInfo *info, MigrationState *s) 941 { 942 size_t page_size = qemu_target_page_size(); 943 944 info->ram = g_malloc0(sizeof(*info->ram)); 945 info->ram->transferred = stat64_get(&mig_stats.transferred); 946 info->ram->total = ram_bytes_total(); 947 info->ram->duplicate = stat64_get(&mig_stats.zero_pages); 948 /* legacy value. It is not used anymore */ 949 info->ram->skipped = 0; 950 info->ram->normal = stat64_get(&mig_stats.normal_pages); 951 info->ram->normal_bytes = info->ram->normal * page_size; 952 info->ram->mbps = s->mbps; 953 info->ram->dirty_sync_count = 954 stat64_get(&mig_stats.dirty_sync_count); 955 info->ram->dirty_sync_missed_zero_copy = 956 stat64_get(&mig_stats.dirty_sync_missed_zero_copy); 957 info->ram->postcopy_requests = 958 stat64_get(&mig_stats.postcopy_requests); 959 info->ram->page_size = page_size; 960 info->ram->multifd_bytes = stat64_get(&mig_stats.multifd_bytes); 961 info->ram->pages_per_second = s->pages_per_second; 962 info->ram->precopy_bytes = stat64_get(&mig_stats.precopy_bytes); 963 info->ram->downtime_bytes = stat64_get(&mig_stats.downtime_bytes); 964 info->ram->postcopy_bytes = stat64_get(&mig_stats.postcopy_bytes); 965 966 if (migrate_xbzrle()) { 967 info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache)); 968 info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size(); 969 info->xbzrle_cache->bytes = xbzrle_counters.bytes; 970 info->xbzrle_cache->pages = xbzrle_counters.pages; 971 info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss; 972 info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate; 973 info->xbzrle_cache->encoding_rate = xbzrle_counters.encoding_rate; 974 info->xbzrle_cache->overflow = xbzrle_counters.overflow; 975 } 976 977 populate_compress(info); 978 979 if (cpu_throttle_active()) { 980 info->has_cpu_throttle_percentage = true; 981 info->cpu_throttle_percentage = cpu_throttle_get_percentage(); 982 } 983 984 if (s->state != MIGRATION_STATUS_COMPLETED) { 985 info->ram->remaining = ram_bytes_remaining(); 986 info->ram->dirty_pages_rate = 987 stat64_get(&mig_stats.dirty_pages_rate); 988 } 989 990 if (migrate_dirty_limit() && dirtylimit_in_service()) { 991 info->has_dirty_limit_throttle_time_per_round = true; 992 info->dirty_limit_throttle_time_per_round = 993 dirtylimit_throttle_time_per_round(); 994 995 info->has_dirty_limit_ring_full_time = true; 996 info->dirty_limit_ring_full_time = dirtylimit_ring_full_time(); 997 } 998 } 999 1000 static void populate_disk_info(MigrationInfo *info) 1001 { 1002 if (blk_mig_active()) { 1003 info->disk = g_malloc0(sizeof(*info->disk)); 1004 info->disk->transferred = blk_mig_bytes_transferred(); 1005 info->disk->remaining = blk_mig_bytes_remaining(); 1006 info->disk->total = blk_mig_bytes_total(); 1007 } 1008 } 1009 1010 static void fill_source_migration_info(MigrationInfo *info) 1011 { 1012 MigrationState *s = migrate_get_current(); 1013 int state = qatomic_read(&s->state); 1014 GSList *cur_blocker = migration_blockers; 1015 1016 info->blocked_reasons = NULL; 1017 1018 /* 1019 * There are two types of reasons a migration might be blocked; 1020 * a) devices marked in VMState as non-migratable, and 1021 * b) Explicit migration blockers 1022 * We need to add both of them here. 1023 */ 1024 qemu_savevm_non_migratable_list(&info->blocked_reasons); 1025 1026 while (cur_blocker) { 1027 QAPI_LIST_PREPEND(info->blocked_reasons, 1028 g_strdup(error_get_pretty(cur_blocker->data))); 1029 cur_blocker = g_slist_next(cur_blocker); 1030 } 1031 info->has_blocked_reasons = info->blocked_reasons != NULL; 1032 1033 switch (state) { 1034 case MIGRATION_STATUS_NONE: 1035 /* no migration has happened ever */ 1036 /* do not overwrite destination migration status */ 1037 return; 1038 case MIGRATION_STATUS_SETUP: 1039 info->has_status = true; 1040 info->has_total_time = false; 1041 break; 1042 case MIGRATION_STATUS_ACTIVE: 1043 case MIGRATION_STATUS_CANCELLING: 1044 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1045 case MIGRATION_STATUS_PRE_SWITCHOVER: 1046 case MIGRATION_STATUS_DEVICE: 1047 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1048 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1049 /* TODO add some postcopy stats */ 1050 populate_time_info(info, s); 1051 populate_ram_info(info, s); 1052 populate_disk_info(info); 1053 migration_populate_vfio_info(info); 1054 break; 1055 case MIGRATION_STATUS_COLO: 1056 info->has_status = true; 1057 /* TODO: display COLO specific information (checkpoint info etc.) */ 1058 break; 1059 case MIGRATION_STATUS_COMPLETED: 1060 populate_time_info(info, s); 1061 populate_ram_info(info, s); 1062 migration_populate_vfio_info(info); 1063 break; 1064 case MIGRATION_STATUS_FAILED: 1065 info->has_status = true; 1066 break; 1067 case MIGRATION_STATUS_CANCELLED: 1068 info->has_status = true; 1069 break; 1070 case MIGRATION_STATUS_WAIT_UNPLUG: 1071 info->has_status = true; 1072 break; 1073 } 1074 info->status = state; 1075 1076 QEMU_LOCK_GUARD(&s->error_mutex); 1077 if (s->error) { 1078 info->error_desc = g_strdup(error_get_pretty(s->error)); 1079 } 1080 } 1081 1082 static void fill_destination_migration_info(MigrationInfo *info) 1083 { 1084 MigrationIncomingState *mis = migration_incoming_get_current(); 1085 1086 if (mis->socket_address_list) { 1087 info->has_socket_address = true; 1088 info->socket_address = 1089 QAPI_CLONE(SocketAddressList, mis->socket_address_list); 1090 } 1091 1092 switch (mis->state) { 1093 case MIGRATION_STATUS_NONE: 1094 return; 1095 case MIGRATION_STATUS_SETUP: 1096 case MIGRATION_STATUS_CANCELLING: 1097 case MIGRATION_STATUS_CANCELLED: 1098 case MIGRATION_STATUS_ACTIVE: 1099 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1100 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1101 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1102 case MIGRATION_STATUS_FAILED: 1103 case MIGRATION_STATUS_COLO: 1104 info->has_status = true; 1105 break; 1106 case MIGRATION_STATUS_COMPLETED: 1107 info->has_status = true; 1108 fill_destination_postcopy_migration_info(info); 1109 break; 1110 } 1111 info->status = mis->state; 1112 } 1113 1114 MigrationInfo *qmp_query_migrate(Error **errp) 1115 { 1116 MigrationInfo *info = g_malloc0(sizeof(*info)); 1117 1118 fill_destination_migration_info(info); 1119 fill_source_migration_info(info); 1120 1121 return info; 1122 } 1123 1124 void qmp_migrate_start_postcopy(Error **errp) 1125 { 1126 MigrationState *s = migrate_get_current(); 1127 1128 if (!migrate_postcopy()) { 1129 error_setg(errp, "Enable postcopy with migrate_set_capability before" 1130 " the start of migration"); 1131 return; 1132 } 1133 1134 if (s->state == MIGRATION_STATUS_NONE) { 1135 error_setg(errp, "Postcopy must be started after migration has been" 1136 " started"); 1137 return; 1138 } 1139 /* 1140 * we don't error if migration has finished since that would be racy 1141 * with issuing this command. 1142 */ 1143 qatomic_set(&s->start_postcopy, true); 1144 } 1145 1146 /* shared migration helpers */ 1147 1148 void migrate_set_state(int *state, int old_state, int new_state) 1149 { 1150 assert(new_state < MIGRATION_STATUS__MAX); 1151 if (qatomic_cmpxchg(state, old_state, new_state) == old_state) { 1152 trace_migrate_set_state(MigrationStatus_str(new_state)); 1153 migrate_generate_event(new_state); 1154 } 1155 } 1156 1157 static void migrate_fd_cleanup(MigrationState *s) 1158 { 1159 qemu_bh_delete(s->cleanup_bh); 1160 s->cleanup_bh = NULL; 1161 1162 g_free(s->hostname); 1163 s->hostname = NULL; 1164 json_writer_free(s->vmdesc); 1165 s->vmdesc = NULL; 1166 1167 qemu_savevm_state_cleanup(); 1168 1169 if (s->to_dst_file) { 1170 QEMUFile *tmp; 1171 1172 trace_migrate_fd_cleanup(); 1173 qemu_mutex_unlock_iothread(); 1174 if (s->migration_thread_running) { 1175 qemu_thread_join(&s->thread); 1176 s->migration_thread_running = false; 1177 } 1178 qemu_mutex_lock_iothread(); 1179 1180 multifd_save_cleanup(); 1181 qemu_mutex_lock(&s->qemu_file_lock); 1182 tmp = s->to_dst_file; 1183 s->to_dst_file = NULL; 1184 qemu_mutex_unlock(&s->qemu_file_lock); 1185 /* 1186 * Close the file handle without the lock to make sure the 1187 * critical section won't block for long. 1188 */ 1189 migration_ioc_unregister_yank_from_file(tmp); 1190 qemu_fclose(tmp); 1191 } 1192 1193 /* 1194 * We already cleaned up to_dst_file, so errors from the return 1195 * path might be due to that, ignore them. 1196 */ 1197 close_return_path_on_source(s); 1198 1199 assert(!migration_is_active(s)); 1200 1201 if (s->state == MIGRATION_STATUS_CANCELLING) { 1202 migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING, 1203 MIGRATION_STATUS_CANCELLED); 1204 } 1205 1206 if (s->error) { 1207 /* It is used on info migrate. We can't free it */ 1208 error_report_err(error_copy(s->error)); 1209 } 1210 migration_call_notifiers(s); 1211 block_cleanup_parameters(); 1212 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 1213 } 1214 1215 static void migrate_fd_cleanup_schedule(MigrationState *s) 1216 { 1217 /* 1218 * Ref the state for bh, because it may be called when 1219 * there're already no other refs 1220 */ 1221 object_ref(OBJECT(s)); 1222 qemu_bh_schedule(s->cleanup_bh); 1223 } 1224 1225 static void migrate_fd_cleanup_bh(void *opaque) 1226 { 1227 MigrationState *s = opaque; 1228 migrate_fd_cleanup(s); 1229 object_unref(OBJECT(s)); 1230 } 1231 1232 void migrate_set_error(MigrationState *s, const Error *error) 1233 { 1234 QEMU_LOCK_GUARD(&s->error_mutex); 1235 if (!s->error) { 1236 s->error = error_copy(error); 1237 } 1238 } 1239 1240 bool migrate_has_error(MigrationState *s) 1241 { 1242 /* The lock is not helpful here, but still follow the rule */ 1243 QEMU_LOCK_GUARD(&s->error_mutex); 1244 return qatomic_read(&s->error); 1245 } 1246 1247 static void migrate_error_free(MigrationState *s) 1248 { 1249 QEMU_LOCK_GUARD(&s->error_mutex); 1250 if (s->error) { 1251 error_free(s->error); 1252 s->error = NULL; 1253 } 1254 } 1255 1256 static void migrate_fd_error(MigrationState *s, const Error *error) 1257 { 1258 trace_migrate_fd_error(error_get_pretty(error)); 1259 assert(s->to_dst_file == NULL); 1260 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 1261 MIGRATION_STATUS_FAILED); 1262 migrate_set_error(s, error); 1263 } 1264 1265 static void migrate_fd_cancel(MigrationState *s) 1266 { 1267 int old_state ; 1268 1269 trace_migrate_fd_cancel(); 1270 1271 WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) { 1272 if (s->rp_state.from_dst_file) { 1273 /* shutdown the rp socket, so causing the rp thread to shutdown */ 1274 qemu_file_shutdown(s->rp_state.from_dst_file); 1275 } 1276 } 1277 1278 do { 1279 old_state = s->state; 1280 if (!migration_is_running(old_state)) { 1281 break; 1282 } 1283 /* If the migration is paused, kick it out of the pause */ 1284 if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) { 1285 qemu_sem_post(&s->pause_sem); 1286 } 1287 migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING); 1288 } while (s->state != MIGRATION_STATUS_CANCELLING); 1289 1290 /* 1291 * If we're unlucky the migration code might be stuck somewhere in a 1292 * send/write while the network has failed and is waiting to timeout; 1293 * if we've got shutdown(2) available then we can force it to quit. 1294 */ 1295 if (s->state == MIGRATION_STATUS_CANCELLING) { 1296 WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) { 1297 if (s->to_dst_file) { 1298 qemu_file_shutdown(s->to_dst_file); 1299 } 1300 } 1301 } 1302 if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) { 1303 Error *local_err = NULL; 1304 1305 bdrv_activate_all(&local_err); 1306 if (local_err) { 1307 error_report_err(local_err); 1308 } else { 1309 s->block_inactive = false; 1310 } 1311 } 1312 } 1313 1314 void migration_add_notifier(Notifier *notify, 1315 void (*func)(Notifier *notifier, void *data)) 1316 { 1317 notify->notify = func; 1318 notifier_list_add(&migration_state_notifiers, notify); 1319 } 1320 1321 void migration_remove_notifier(Notifier *notify) 1322 { 1323 if (notify->notify) { 1324 notifier_remove(notify); 1325 notify->notify = NULL; 1326 } 1327 } 1328 1329 void migration_call_notifiers(MigrationState *s) 1330 { 1331 notifier_list_notify(&migration_state_notifiers, s); 1332 } 1333 1334 bool migration_in_setup(MigrationState *s) 1335 { 1336 return s->state == MIGRATION_STATUS_SETUP; 1337 } 1338 1339 bool migration_has_finished(MigrationState *s) 1340 { 1341 return s->state == MIGRATION_STATUS_COMPLETED; 1342 } 1343 1344 bool migration_has_failed(MigrationState *s) 1345 { 1346 return (s->state == MIGRATION_STATUS_CANCELLED || 1347 s->state == MIGRATION_STATUS_FAILED); 1348 } 1349 1350 bool migration_in_postcopy(void) 1351 { 1352 MigrationState *s = migrate_get_current(); 1353 1354 switch (s->state) { 1355 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1356 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1357 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1358 return true; 1359 default: 1360 return false; 1361 } 1362 } 1363 1364 bool migration_in_postcopy_after_devices(MigrationState *s) 1365 { 1366 return migration_in_postcopy() && s->postcopy_after_devices; 1367 } 1368 1369 bool migration_in_incoming_postcopy(void) 1370 { 1371 PostcopyState ps = postcopy_state_get(); 1372 1373 return ps >= POSTCOPY_INCOMING_DISCARD && ps < POSTCOPY_INCOMING_END; 1374 } 1375 1376 bool migration_incoming_postcopy_advised(void) 1377 { 1378 PostcopyState ps = postcopy_state_get(); 1379 1380 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END; 1381 } 1382 1383 bool migration_in_bg_snapshot(void) 1384 { 1385 MigrationState *s = migrate_get_current(); 1386 1387 return migrate_background_snapshot() && 1388 migration_is_setup_or_active(s->state); 1389 } 1390 1391 bool migration_is_idle(void) 1392 { 1393 MigrationState *s = current_migration; 1394 1395 if (!s) { 1396 return true; 1397 } 1398 1399 switch (s->state) { 1400 case MIGRATION_STATUS_NONE: 1401 case MIGRATION_STATUS_CANCELLED: 1402 case MIGRATION_STATUS_COMPLETED: 1403 case MIGRATION_STATUS_FAILED: 1404 return true; 1405 case MIGRATION_STATUS_SETUP: 1406 case MIGRATION_STATUS_CANCELLING: 1407 case MIGRATION_STATUS_ACTIVE: 1408 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1409 case MIGRATION_STATUS_COLO: 1410 case MIGRATION_STATUS_PRE_SWITCHOVER: 1411 case MIGRATION_STATUS_DEVICE: 1412 case MIGRATION_STATUS_WAIT_UNPLUG: 1413 return false; 1414 case MIGRATION_STATUS__MAX: 1415 g_assert_not_reached(); 1416 } 1417 1418 return false; 1419 } 1420 1421 bool migration_is_active(MigrationState *s) 1422 { 1423 return (s->state == MIGRATION_STATUS_ACTIVE || 1424 s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); 1425 } 1426 1427 int migrate_init(MigrationState *s, Error **errp) 1428 { 1429 int ret; 1430 1431 ret = qemu_savevm_state_prepare(errp); 1432 if (ret) { 1433 return ret; 1434 } 1435 1436 /* 1437 * Reinitialise all migration state, except 1438 * parameters/capabilities that the user set, and 1439 * locks. 1440 */ 1441 s->cleanup_bh = 0; 1442 s->vm_start_bh = 0; 1443 s->to_dst_file = NULL; 1444 s->state = MIGRATION_STATUS_NONE; 1445 s->rp_state.from_dst_file = NULL; 1446 s->rp_state.error = false; 1447 s->mbps = 0.0; 1448 s->pages_per_second = 0.0; 1449 s->downtime = 0; 1450 s->expected_downtime = 0; 1451 s->setup_time = 0; 1452 s->start_postcopy = false; 1453 s->postcopy_after_devices = false; 1454 s->migration_thread_running = false; 1455 error_free(s->error); 1456 s->error = NULL; 1457 s->hostname = NULL; 1458 s->vmdesc = NULL; 1459 1460 migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); 1461 1462 s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 1463 s->total_time = 0; 1464 s->vm_old_state = -1; 1465 s->iteration_initial_bytes = 0; 1466 s->threshold_size = 0; 1467 s->switchover_acked = false; 1468 s->rdma_migration = false; 1469 /* 1470 * set mig_stats memory to zero for a new migration 1471 */ 1472 memset(&mig_stats, 0, sizeof(mig_stats)); 1473 migration_reset_vfio_bytes_transferred(); 1474 1475 return 0; 1476 } 1477 1478 int migrate_add_blocker_internal(Error **reasonp, Error **errp) 1479 { 1480 /* Snapshots are similar to migrations, so check RUN_STATE_SAVE_VM too. */ 1481 if (runstate_check(RUN_STATE_SAVE_VM) || !migration_is_idle()) { 1482 error_propagate_prepend(errp, *reasonp, 1483 "disallowing migration blocker " 1484 "(migration/snapshot in progress) for: "); 1485 *reasonp = NULL; 1486 return -EBUSY; 1487 } 1488 1489 migration_blockers = g_slist_prepend(migration_blockers, *reasonp); 1490 return 0; 1491 } 1492 1493 int migrate_add_blocker(Error **reasonp, Error **errp) 1494 { 1495 if (only_migratable) { 1496 error_propagate_prepend(errp, *reasonp, 1497 "disallowing migration blocker " 1498 "(--only-migratable) for: "); 1499 *reasonp = NULL; 1500 return -EACCES; 1501 } 1502 1503 return migrate_add_blocker_internal(reasonp, errp); 1504 } 1505 1506 void migrate_del_blocker(Error **reasonp) 1507 { 1508 if (*reasonp) { 1509 migration_blockers = g_slist_remove(migration_blockers, *reasonp); 1510 error_free(*reasonp); 1511 *reasonp = NULL; 1512 } 1513 } 1514 1515 void qmp_migrate_incoming(const char *uri, Error **errp) 1516 { 1517 Error *local_err = NULL; 1518 static bool once = true; 1519 1520 if (!once) { 1521 error_setg(errp, "The incoming migration has already been started"); 1522 return; 1523 } 1524 if (!runstate_check(RUN_STATE_INMIGRATE)) { 1525 error_setg(errp, "'-incoming' was not specified on the command line"); 1526 return; 1527 } 1528 1529 if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) { 1530 return; 1531 } 1532 1533 qemu_start_incoming_migration(uri, &local_err); 1534 1535 if (local_err) { 1536 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 1537 error_propagate(errp, local_err); 1538 return; 1539 } 1540 1541 once = false; 1542 } 1543 1544 void qmp_migrate_recover(const char *uri, Error **errp) 1545 { 1546 MigrationIncomingState *mis = migration_incoming_get_current(); 1547 1548 /* 1549 * Don't even bother to use ERRP_GUARD() as it _must_ always be set by 1550 * callers (no one should ignore a recover failure); if there is, it's a 1551 * programming error. 1552 */ 1553 assert(errp); 1554 1555 if (mis->state != MIGRATION_STATUS_POSTCOPY_PAUSED) { 1556 error_setg(errp, "Migrate recover can only be run " 1557 "when postcopy is paused."); 1558 return; 1559 } 1560 1561 /* If there's an existing transport, release it */ 1562 migration_incoming_transport_cleanup(mis); 1563 1564 /* 1565 * Note that this call will never start a real migration; it will 1566 * only re-setup the migration stream and poke existing migration 1567 * to continue using that newly established channel. 1568 */ 1569 qemu_start_incoming_migration(uri, errp); 1570 } 1571 1572 void qmp_migrate_pause(Error **errp) 1573 { 1574 MigrationState *ms = migrate_get_current(); 1575 MigrationIncomingState *mis = migration_incoming_get_current(); 1576 int ret = 0; 1577 1578 if (ms->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 1579 /* Source side, during postcopy */ 1580 qemu_mutex_lock(&ms->qemu_file_lock); 1581 if (ms->to_dst_file) { 1582 ret = qemu_file_shutdown(ms->to_dst_file); 1583 } 1584 qemu_mutex_unlock(&ms->qemu_file_lock); 1585 if (ret) { 1586 error_setg(errp, "Failed to pause source migration"); 1587 } 1588 return; 1589 } 1590 1591 if (mis->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 1592 ret = qemu_file_shutdown(mis->from_src_file); 1593 if (ret) { 1594 error_setg(errp, "Failed to pause destination migration"); 1595 } 1596 return; 1597 } 1598 1599 error_setg(errp, "migrate-pause is currently only supported " 1600 "during postcopy-active state"); 1601 } 1602 1603 bool migration_is_blocked(Error **errp) 1604 { 1605 if (qemu_savevm_state_blocked(errp)) { 1606 return true; 1607 } 1608 1609 if (migration_blockers) { 1610 error_propagate(errp, error_copy(migration_blockers->data)); 1611 return true; 1612 } 1613 1614 return false; 1615 } 1616 1617 /* Returns true if continue to migrate, or false if error detected */ 1618 static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, 1619 bool resume, Error **errp) 1620 { 1621 Error *local_err = NULL; 1622 1623 if (resume) { 1624 if (s->state != MIGRATION_STATUS_POSTCOPY_PAUSED) { 1625 error_setg(errp, "Cannot resume if there is no " 1626 "paused migration"); 1627 return false; 1628 } 1629 1630 /* 1631 * Postcopy recovery won't work well with release-ram 1632 * capability since release-ram will drop the page buffer as 1633 * long as the page is put into the send buffer. So if there 1634 * is a network failure happened, any page buffers that have 1635 * not yet reached the destination VM but have already been 1636 * sent from the source VM will be lost forever. Let's refuse 1637 * the client from resuming such a postcopy migration. 1638 * Luckily release-ram was designed to only be used when src 1639 * and destination VMs are on the same host, so it should be 1640 * fine. 1641 */ 1642 if (migrate_release_ram()) { 1643 error_setg(errp, "Postcopy recovery cannot work " 1644 "when release-ram capability is set"); 1645 return false; 1646 } 1647 1648 /* This is a resume, skip init status */ 1649 return true; 1650 } 1651 1652 if (migration_is_running(s->state)) { 1653 error_setg(errp, QERR_MIGRATION_ACTIVE); 1654 return false; 1655 } 1656 1657 if (runstate_check(RUN_STATE_INMIGRATE)) { 1658 error_setg(errp, "Guest is waiting for an incoming migration"); 1659 return false; 1660 } 1661 1662 if (runstate_check(RUN_STATE_POSTMIGRATE)) { 1663 error_setg(errp, "Can't migrate the vm that was paused due to " 1664 "previous migration"); 1665 return false; 1666 } 1667 1668 if (migration_is_blocked(errp)) { 1669 return false; 1670 } 1671 1672 if (blk || blk_inc) { 1673 if (migrate_colo()) { 1674 error_setg(errp, "No disk migration is required in COLO mode"); 1675 return false; 1676 } 1677 if (migrate_block() || migrate_block_incremental()) { 1678 error_setg(errp, "Command options are incompatible with " 1679 "current migration capabilities"); 1680 return false; 1681 } 1682 if (!migrate_cap_set(MIGRATION_CAPABILITY_BLOCK, true, &local_err)) { 1683 error_propagate(errp, local_err); 1684 return false; 1685 } 1686 s->must_remove_block_options = true; 1687 } 1688 1689 if (blk_inc) { 1690 migrate_set_block_incremental(true); 1691 } 1692 1693 if (migrate_init(s, errp)) { 1694 return false; 1695 } 1696 1697 return true; 1698 } 1699 1700 void qmp_migrate(const char *uri, bool has_blk, bool blk, 1701 bool has_inc, bool inc, bool has_detach, bool detach, 1702 bool has_resume, bool resume, Error **errp) 1703 { 1704 bool resume_requested; 1705 Error *local_err = NULL; 1706 MigrationState *s = migrate_get_current(); 1707 const char *p = NULL; 1708 1709 /* URI is not suitable for migration? */ 1710 if (!migration_channels_and_uri_compatible(uri, errp)) { 1711 return; 1712 } 1713 1714 resume_requested = has_resume && resume; 1715 if (!migrate_prepare(s, has_blk && blk, has_inc && inc, 1716 resume_requested, errp)) { 1717 /* Error detected, put into errp */ 1718 return; 1719 } 1720 1721 if (!resume_requested) { 1722 if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) { 1723 return; 1724 } 1725 } 1726 1727 if (strstart(uri, "tcp:", &p) || 1728 strstart(uri, "unix:", NULL) || 1729 strstart(uri, "vsock:", NULL)) { 1730 socket_start_outgoing_migration(s, p ? p : uri, &local_err); 1731 #ifdef CONFIG_RDMA 1732 } else if (strstart(uri, "rdma:", &p)) { 1733 rdma_start_outgoing_migration(s, p, &local_err); 1734 #endif 1735 } else if (strstart(uri, "exec:", &p)) { 1736 exec_start_outgoing_migration(s, p, &local_err); 1737 } else if (strstart(uri, "fd:", &p)) { 1738 fd_start_outgoing_migration(s, p, &local_err); 1739 } else if (strstart(uri, "file:", &p)) { 1740 file_start_outgoing_migration(s, p, &local_err); 1741 } else { 1742 error_setg(&local_err, QERR_INVALID_PARAMETER_VALUE, "uri", 1743 "a valid migration protocol"); 1744 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 1745 MIGRATION_STATUS_FAILED); 1746 block_cleanup_parameters(); 1747 } 1748 1749 if (local_err) { 1750 if (!resume_requested) { 1751 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 1752 } 1753 migrate_fd_error(s, local_err); 1754 error_propagate(errp, local_err); 1755 return; 1756 } 1757 } 1758 1759 void qmp_migrate_cancel(Error **errp) 1760 { 1761 migration_cancel(NULL); 1762 } 1763 1764 void qmp_migrate_continue(MigrationStatus state, Error **errp) 1765 { 1766 MigrationState *s = migrate_get_current(); 1767 if (s->state != state) { 1768 error_setg(errp, "Migration not in expected state: %s", 1769 MigrationStatus_str(s->state)); 1770 return; 1771 } 1772 qemu_sem_post(&s->pause_sem); 1773 } 1774 1775 /* migration thread support */ 1776 /* 1777 * Something bad happened to the RP stream, mark an error 1778 * The caller shall print or trace something to indicate why 1779 */ 1780 static void mark_source_rp_bad(MigrationState *s) 1781 { 1782 s->rp_state.error = true; 1783 } 1784 1785 void migration_rp_wait(MigrationState *s) 1786 { 1787 qemu_sem_wait(&s->rp_state.rp_sem); 1788 } 1789 1790 void migration_rp_kick(MigrationState *s) 1791 { 1792 qemu_sem_post(&s->rp_state.rp_sem); 1793 } 1794 1795 static struct rp_cmd_args { 1796 ssize_t len; /* -1 = variable */ 1797 const char *name; 1798 } rp_cmd_args[] = { 1799 [MIG_RP_MSG_INVALID] = { .len = -1, .name = "INVALID" }, 1800 [MIG_RP_MSG_SHUT] = { .len = 4, .name = "SHUT" }, 1801 [MIG_RP_MSG_PONG] = { .len = 4, .name = "PONG" }, 1802 [MIG_RP_MSG_REQ_PAGES] = { .len = 12, .name = "REQ_PAGES" }, 1803 [MIG_RP_MSG_REQ_PAGES_ID] = { .len = -1, .name = "REQ_PAGES_ID" }, 1804 [MIG_RP_MSG_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" }, 1805 [MIG_RP_MSG_RESUME_ACK] = { .len = 4, .name = "RESUME_ACK" }, 1806 [MIG_RP_MSG_SWITCHOVER_ACK] = { .len = 0, .name = "SWITCHOVER_ACK" }, 1807 [MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" }, 1808 }; 1809 1810 /* 1811 * Process a request for pages received on the return path, 1812 * We're allowed to send more than requested (e.g. to round to our page size) 1813 * and we don't need to send pages that have already been sent. 1814 */ 1815 static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname, 1816 ram_addr_t start, size_t len) 1817 { 1818 long our_host_ps = qemu_real_host_page_size(); 1819 1820 trace_migrate_handle_rp_req_pages(rbname, start, len); 1821 1822 /* 1823 * Since we currently insist on matching page sizes, just sanity check 1824 * we're being asked for whole host pages. 1825 */ 1826 if (!QEMU_IS_ALIGNED(start, our_host_ps) || 1827 !QEMU_IS_ALIGNED(len, our_host_ps)) { 1828 error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT 1829 " len: %zd", __func__, start, len); 1830 mark_source_rp_bad(ms); 1831 return; 1832 } 1833 1834 if (ram_save_queue_pages(rbname, start, len)) { 1835 mark_source_rp_bad(ms); 1836 } 1837 } 1838 1839 static int migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name) 1840 { 1841 RAMBlock *block = qemu_ram_block_by_name(block_name); 1842 1843 if (!block) { 1844 error_report("%s: invalid block name '%s'", __func__, block_name); 1845 return -EINVAL; 1846 } 1847 1848 /* Fetch the received bitmap and refresh the dirty bitmap */ 1849 return ram_dirty_bitmap_reload(s, block); 1850 } 1851 1852 static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value) 1853 { 1854 trace_source_return_path_thread_resume_ack(value); 1855 1856 if (value != MIGRATION_RESUME_ACK_VALUE) { 1857 error_report("%s: illegal resume_ack value %"PRIu32, 1858 __func__, value); 1859 return -1; 1860 } 1861 1862 /* Now both sides are active. */ 1863 migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER, 1864 MIGRATION_STATUS_POSTCOPY_ACTIVE); 1865 1866 /* Notify send thread that time to continue send pages */ 1867 migration_rp_kick(s); 1868 1869 return 0; 1870 } 1871 1872 /* 1873 * Release ms->rp_state.from_dst_file (and postcopy_qemufile_src if 1874 * existed) in a safe way. 1875 */ 1876 static void migration_release_dst_files(MigrationState *ms) 1877 { 1878 QEMUFile *file; 1879 1880 WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) { 1881 /* 1882 * Reset the from_dst_file pointer first before releasing it, as we 1883 * can't block within lock section 1884 */ 1885 file = ms->rp_state.from_dst_file; 1886 ms->rp_state.from_dst_file = NULL; 1887 } 1888 1889 /* 1890 * Do the same to postcopy fast path socket too if there is. No 1891 * locking needed because this qemufile should only be managed by 1892 * return path thread. 1893 */ 1894 if (ms->postcopy_qemufile_src) { 1895 migration_ioc_unregister_yank_from_file(ms->postcopy_qemufile_src); 1896 qemu_file_shutdown(ms->postcopy_qemufile_src); 1897 qemu_fclose(ms->postcopy_qemufile_src); 1898 ms->postcopy_qemufile_src = NULL; 1899 } 1900 1901 qemu_fclose(file); 1902 } 1903 1904 /* 1905 * Handles messages sent on the return path towards the source VM 1906 * 1907 */ 1908 static void *source_return_path_thread(void *opaque) 1909 { 1910 MigrationState *ms = opaque; 1911 QEMUFile *rp = ms->rp_state.from_dst_file; 1912 uint16_t header_len, header_type; 1913 uint8_t buf[512]; 1914 uint32_t tmp32, sibling_error; 1915 ram_addr_t start = 0; /* =0 to silence warning */ 1916 size_t len = 0, expected_len; 1917 int res; 1918 1919 trace_source_return_path_thread_entry(); 1920 rcu_register_thread(); 1921 1922 while (!ms->rp_state.error && !qemu_file_get_error(rp) && 1923 migration_is_setup_or_active(ms->state)) { 1924 trace_source_return_path_thread_loop_top(); 1925 header_type = qemu_get_be16(rp); 1926 header_len = qemu_get_be16(rp); 1927 1928 if (qemu_file_get_error(rp)) { 1929 mark_source_rp_bad(ms); 1930 goto out; 1931 } 1932 1933 if (header_type >= MIG_RP_MSG_MAX || 1934 header_type == MIG_RP_MSG_INVALID) { 1935 error_report("RP: Received invalid message 0x%04x length 0x%04x", 1936 header_type, header_len); 1937 mark_source_rp_bad(ms); 1938 goto out; 1939 } 1940 1941 if ((rp_cmd_args[header_type].len != -1 && 1942 header_len != rp_cmd_args[header_type].len) || 1943 header_len > sizeof(buf)) { 1944 error_report("RP: Received '%s' message (0x%04x) with" 1945 "incorrect length %d expecting %zu", 1946 rp_cmd_args[header_type].name, header_type, header_len, 1947 (size_t)rp_cmd_args[header_type].len); 1948 mark_source_rp_bad(ms); 1949 goto out; 1950 } 1951 1952 /* We know we've got a valid header by this point */ 1953 res = qemu_get_buffer(rp, buf, header_len); 1954 if (res != header_len) { 1955 error_report("RP: Failed reading data for message 0x%04x" 1956 " read %d expected %d", 1957 header_type, res, header_len); 1958 mark_source_rp_bad(ms); 1959 goto out; 1960 } 1961 1962 /* OK, we have the message and the data */ 1963 switch (header_type) { 1964 case MIG_RP_MSG_SHUT: 1965 sibling_error = ldl_be_p(buf); 1966 trace_source_return_path_thread_shut(sibling_error); 1967 if (sibling_error) { 1968 error_report("RP: Sibling indicated error %d", sibling_error); 1969 mark_source_rp_bad(ms); 1970 } 1971 /* 1972 * We'll let the main thread deal with closing the RP 1973 * we could do a shutdown(2) on it, but we're the only user 1974 * anyway, so there's nothing gained. 1975 */ 1976 goto out; 1977 1978 case MIG_RP_MSG_PONG: 1979 tmp32 = ldl_be_p(buf); 1980 trace_source_return_path_thread_pong(tmp32); 1981 qemu_sem_post(&ms->rp_state.rp_pong_acks); 1982 break; 1983 1984 case MIG_RP_MSG_REQ_PAGES: 1985 start = ldq_be_p(buf); 1986 len = ldl_be_p(buf + 8); 1987 migrate_handle_rp_req_pages(ms, NULL, start, len); 1988 break; 1989 1990 case MIG_RP_MSG_REQ_PAGES_ID: 1991 expected_len = 12 + 1; /* header + termination */ 1992 1993 if (header_len >= expected_len) { 1994 start = ldq_be_p(buf); 1995 len = ldl_be_p(buf + 8); 1996 /* Now we expect an idstr */ 1997 tmp32 = buf[12]; /* Length of the following idstr */ 1998 buf[13 + tmp32] = '\0'; 1999 expected_len += tmp32; 2000 } 2001 if (header_len != expected_len) { 2002 error_report("RP: Req_Page_id with length %d expecting %zd", 2003 header_len, expected_len); 2004 mark_source_rp_bad(ms); 2005 goto out; 2006 } 2007 migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len); 2008 break; 2009 2010 case MIG_RP_MSG_RECV_BITMAP: 2011 if (header_len < 1) { 2012 error_report("%s: missing block name", __func__); 2013 mark_source_rp_bad(ms); 2014 goto out; 2015 } 2016 /* Format: len (1B) + idstr (<255B). This ends the idstr. */ 2017 buf[buf[0] + 1] = '\0'; 2018 if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1))) { 2019 mark_source_rp_bad(ms); 2020 goto out; 2021 } 2022 break; 2023 2024 case MIG_RP_MSG_RESUME_ACK: 2025 tmp32 = ldl_be_p(buf); 2026 if (migrate_handle_rp_resume_ack(ms, tmp32)) { 2027 mark_source_rp_bad(ms); 2028 goto out; 2029 } 2030 break; 2031 2032 case MIG_RP_MSG_SWITCHOVER_ACK: 2033 ms->switchover_acked = true; 2034 trace_source_return_path_thread_switchover_acked(); 2035 break; 2036 2037 default: 2038 break; 2039 } 2040 } 2041 2042 out: 2043 if (qemu_file_get_error(rp)) { 2044 trace_source_return_path_thread_bad_end(); 2045 mark_source_rp_bad(ms); 2046 } 2047 2048 trace_source_return_path_thread_end(); 2049 rcu_unregister_thread(); 2050 return NULL; 2051 } 2052 2053 static int open_return_path_on_source(MigrationState *ms) 2054 { 2055 ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file); 2056 if (!ms->rp_state.from_dst_file) { 2057 return -1; 2058 } 2059 2060 trace_open_return_path_on_source(); 2061 2062 qemu_thread_create(&ms->rp_state.rp_thread, "return path", 2063 source_return_path_thread, ms, QEMU_THREAD_JOINABLE); 2064 ms->rp_state.rp_thread_created = true; 2065 2066 trace_open_return_path_on_source_continue(); 2067 2068 return 0; 2069 } 2070 2071 static int close_return_path_on_source(MigrationState *ms) 2072 { 2073 int ret; 2074 2075 if (!ms->rp_state.rp_thread_created) { 2076 return 0; 2077 } 2078 2079 trace_migration_return_path_end_before(); 2080 2081 /* 2082 * If this is a normal exit then the destination will send a SHUT 2083 * and the rp_thread will exit, however if there's an error we 2084 * need to cause it to exit. shutdown(2), if we have it, will 2085 * cause it to unblock if it's stuck waiting for the destination. 2086 */ 2087 WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) { 2088 if (ms->to_dst_file && ms->rp_state.from_dst_file && 2089 qemu_file_get_error(ms->to_dst_file)) { 2090 qemu_file_shutdown(ms->rp_state.from_dst_file); 2091 } 2092 } 2093 2094 trace_await_return_path_close_on_source_joining(); 2095 qemu_thread_join(&ms->rp_state.rp_thread); 2096 ms->rp_state.rp_thread_created = false; 2097 trace_await_return_path_close_on_source_close(); 2098 2099 ret = ms->rp_state.error; 2100 ms->rp_state.error = false; 2101 2102 migration_release_dst_files(ms); 2103 2104 trace_migration_return_path_end_after(ret); 2105 return ret; 2106 } 2107 2108 static inline void 2109 migration_wait_main_channel(MigrationState *ms) 2110 { 2111 /* Wait until one PONG message received */ 2112 qemu_sem_wait(&ms->rp_state.rp_pong_acks); 2113 } 2114 2115 /* 2116 * Switch from normal iteration to postcopy 2117 * Returns non-0 on error 2118 */ 2119 static int postcopy_start(MigrationState *ms, Error **errp) 2120 { 2121 int ret; 2122 QIOChannelBuffer *bioc; 2123 QEMUFile *fb; 2124 int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 2125 uint64_t bandwidth = migrate_max_postcopy_bandwidth(); 2126 bool restart_block = false; 2127 int cur_state = MIGRATION_STATUS_ACTIVE; 2128 2129 if (migrate_postcopy_preempt()) { 2130 migration_wait_main_channel(ms); 2131 if (postcopy_preempt_establish_channel(ms)) { 2132 migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILED); 2133 return -1; 2134 } 2135 } 2136 2137 if (!migrate_pause_before_switchover()) { 2138 migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE, 2139 MIGRATION_STATUS_POSTCOPY_ACTIVE); 2140 } 2141 2142 trace_postcopy_start(); 2143 qemu_mutex_lock_iothread(); 2144 trace_postcopy_start_set_run(); 2145 2146 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); 2147 global_state_store(); 2148 ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); 2149 if (ret < 0) { 2150 goto fail; 2151 } 2152 2153 ret = migration_maybe_pause(ms, &cur_state, 2154 MIGRATION_STATUS_POSTCOPY_ACTIVE); 2155 if (ret < 0) { 2156 goto fail; 2157 } 2158 2159 ret = bdrv_inactivate_all(); 2160 if (ret < 0) { 2161 goto fail; 2162 } 2163 restart_block = true; 2164 2165 /* 2166 * Cause any non-postcopiable, but iterative devices to 2167 * send out their final data. 2168 */ 2169 qemu_savevm_state_complete_precopy(ms->to_dst_file, true, false); 2170 2171 /* 2172 * in Finish migrate and with the io-lock held everything should 2173 * be quiet, but we've potentially still got dirty pages and we 2174 * need to tell the destination to throw any pages it's already received 2175 * that are dirty 2176 */ 2177 if (migrate_postcopy_ram()) { 2178 ram_postcopy_send_discard_bitmap(ms); 2179 } 2180 2181 /* 2182 * send rest of state - note things that are doing postcopy 2183 * will notice we're in POSTCOPY_ACTIVE and not actually 2184 * wrap their state up here 2185 */ 2186 migration_rate_set(bandwidth); 2187 if (migrate_postcopy_ram()) { 2188 /* Ping just for debugging, helps line traces up */ 2189 qemu_savevm_send_ping(ms->to_dst_file, 2); 2190 } 2191 2192 /* 2193 * While loading the device state we may trigger page transfer 2194 * requests and the fd must be free to process those, and thus 2195 * the destination must read the whole device state off the fd before 2196 * it starts processing it. Unfortunately the ad-hoc migration format 2197 * doesn't allow the destination to know the size to read without fully 2198 * parsing it through each devices load-state code (especially the open 2199 * coded devices that use get/put). 2200 * So we wrap the device state up in a package with a length at the start; 2201 * to do this we use a qemu_buf to hold the whole of the device state. 2202 */ 2203 bioc = qio_channel_buffer_new(4096); 2204 qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer"); 2205 fb = qemu_file_new_output(QIO_CHANNEL(bioc)); 2206 object_unref(OBJECT(bioc)); 2207 2208 /* 2209 * Make sure the receiver can get incoming pages before we send the rest 2210 * of the state 2211 */ 2212 qemu_savevm_send_postcopy_listen(fb); 2213 2214 qemu_savevm_state_complete_precopy(fb, false, false); 2215 if (migrate_postcopy_ram()) { 2216 qemu_savevm_send_ping(fb, 3); 2217 } 2218 2219 qemu_savevm_send_postcopy_run(fb); 2220 2221 /* <><> end of stuff going into the package */ 2222 2223 /* Last point of recovery; as soon as we send the package the destination 2224 * can open devices and potentially start running. 2225 * Lets just check again we've not got any errors. 2226 */ 2227 ret = qemu_file_get_error(ms->to_dst_file); 2228 if (ret) { 2229 error_setg(errp, "postcopy_start: Migration stream errored (pre package)"); 2230 goto fail_closefb; 2231 } 2232 2233 restart_block = false; 2234 2235 /* Now send that blob */ 2236 if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) { 2237 goto fail_closefb; 2238 } 2239 qemu_fclose(fb); 2240 2241 /* Send a notify to give a chance for anything that needs to happen 2242 * at the transition to postcopy and after the device state; in particular 2243 * spice needs to trigger a transition now 2244 */ 2245 ms->postcopy_after_devices = true; 2246 migration_call_notifiers(ms); 2247 2248 ms->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop; 2249 2250 qemu_mutex_unlock_iothread(); 2251 2252 if (migrate_postcopy_ram()) { 2253 /* 2254 * Although this ping is just for debug, it could potentially be 2255 * used for getting a better measurement of downtime at the source. 2256 */ 2257 qemu_savevm_send_ping(ms->to_dst_file, 4); 2258 } 2259 2260 if (migrate_release_ram()) { 2261 ram_postcopy_migrated_memory_release(ms); 2262 } 2263 2264 ret = qemu_file_get_error(ms->to_dst_file); 2265 if (ret) { 2266 error_setg(errp, "postcopy_start: Migration stream errored"); 2267 migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, 2268 MIGRATION_STATUS_FAILED); 2269 } 2270 2271 trace_postcopy_preempt_enabled(migrate_postcopy_preempt()); 2272 2273 return ret; 2274 2275 fail_closefb: 2276 qemu_fclose(fb); 2277 fail: 2278 migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, 2279 MIGRATION_STATUS_FAILED); 2280 if (restart_block) { 2281 /* A failure happened early enough that we know the destination hasn't 2282 * accessed block devices, so we're safe to recover. 2283 */ 2284 Error *local_err = NULL; 2285 2286 bdrv_activate_all(&local_err); 2287 if (local_err) { 2288 error_report_err(local_err); 2289 } 2290 } 2291 qemu_mutex_unlock_iothread(); 2292 return -1; 2293 } 2294 2295 /** 2296 * migration_maybe_pause: Pause if required to by 2297 * migrate_pause_before_switchover called with the iothread locked 2298 * Returns: 0 on success 2299 */ 2300 static int migration_maybe_pause(MigrationState *s, 2301 int *current_active_state, 2302 int new_state) 2303 { 2304 if (!migrate_pause_before_switchover()) { 2305 return 0; 2306 } 2307 2308 /* Since leaving this state is not atomic with posting the semaphore 2309 * it's possible that someone could have issued multiple migrate_continue 2310 * and the semaphore is incorrectly positive at this point; 2311 * the docs say it's undefined to reinit a semaphore that's already 2312 * init'd, so use timedwait to eat up any existing posts. 2313 */ 2314 while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) { 2315 /* This block intentionally left blank */ 2316 } 2317 2318 /* 2319 * If the migration is cancelled when it is in the completion phase, 2320 * the migration state is set to MIGRATION_STATUS_CANCELLING. 2321 * So we don't need to wait a semaphore, otherwise we would always 2322 * wait for the 'pause_sem' semaphore. 2323 */ 2324 if (s->state != MIGRATION_STATUS_CANCELLING) { 2325 qemu_mutex_unlock_iothread(); 2326 migrate_set_state(&s->state, *current_active_state, 2327 MIGRATION_STATUS_PRE_SWITCHOVER); 2328 qemu_sem_wait(&s->pause_sem); 2329 migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, 2330 new_state); 2331 *current_active_state = new_state; 2332 qemu_mutex_lock_iothread(); 2333 } 2334 2335 return s->state == new_state ? 0 : -EINVAL; 2336 } 2337 2338 static int migration_completion_precopy(MigrationState *s, 2339 int *current_active_state) 2340 { 2341 int ret; 2342 2343 qemu_mutex_lock_iothread(); 2344 s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 2345 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); 2346 2347 s->vm_old_state = runstate_get(); 2348 global_state_store(); 2349 2350 ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); 2351 trace_migration_completion_vm_stop(ret); 2352 if (ret < 0) { 2353 goto out_unlock; 2354 } 2355 2356 ret = migration_maybe_pause(s, current_active_state, 2357 MIGRATION_STATUS_DEVICE); 2358 if (ret < 0) { 2359 goto out_unlock; 2360 } 2361 2362 /* 2363 * Inactivate disks except in COLO, and track that we have done so in order 2364 * to remember to reactivate them if migration fails or is cancelled. 2365 */ 2366 s->block_inactive = !migrate_colo(); 2367 migration_rate_set(RATE_LIMIT_DISABLED); 2368 ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, 2369 s->block_inactive); 2370 out_unlock: 2371 qemu_mutex_unlock_iothread(); 2372 return ret; 2373 } 2374 2375 static void migration_completion_postcopy(MigrationState *s) 2376 { 2377 trace_migration_completion_postcopy_end(); 2378 2379 qemu_mutex_lock_iothread(); 2380 qemu_savevm_state_complete_postcopy(s->to_dst_file); 2381 qemu_mutex_unlock_iothread(); 2382 2383 /* 2384 * Shutdown the postcopy fast path thread. This is only needed when dest 2385 * QEMU binary is old (7.1/7.2). QEMU 8.0+ doesn't need this. 2386 */ 2387 if (migrate_postcopy_preempt() && s->preempt_pre_7_2) { 2388 postcopy_preempt_shutdown_file(s); 2389 } 2390 2391 trace_migration_completion_postcopy_end_after_complete(); 2392 } 2393 2394 static void migration_completion_failed(MigrationState *s, 2395 int current_active_state) 2396 { 2397 if (s->block_inactive && (s->state == MIGRATION_STATUS_ACTIVE || 2398 s->state == MIGRATION_STATUS_DEVICE)) { 2399 /* 2400 * If not doing postcopy, vm_start() will be called: let's 2401 * regain control on images. 2402 */ 2403 Error *local_err = NULL; 2404 2405 qemu_mutex_lock_iothread(); 2406 bdrv_activate_all(&local_err); 2407 if (local_err) { 2408 error_report_err(local_err); 2409 } else { 2410 s->block_inactive = false; 2411 } 2412 qemu_mutex_unlock_iothread(); 2413 } 2414 2415 migrate_set_state(&s->state, current_active_state, 2416 MIGRATION_STATUS_FAILED); 2417 } 2418 2419 /** 2420 * migration_completion: Used by migration_thread when there's not much left. 2421 * The caller 'breaks' the loop when this returns. 2422 * 2423 * @s: Current migration state 2424 */ 2425 static void migration_completion(MigrationState *s) 2426 { 2427 int ret = 0; 2428 int current_active_state = s->state; 2429 2430 if (s->state == MIGRATION_STATUS_ACTIVE) { 2431 ret = migration_completion_precopy(s, ¤t_active_state); 2432 } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 2433 migration_completion_postcopy(s); 2434 } else { 2435 ret = -1; 2436 } 2437 2438 if (ret < 0) { 2439 goto fail; 2440 } 2441 2442 if (close_return_path_on_source(s)) { 2443 goto fail; 2444 } 2445 2446 if (qemu_file_get_error(s->to_dst_file)) { 2447 trace_migration_completion_file_err(); 2448 goto fail; 2449 } 2450 2451 if (migrate_colo() && s->state == MIGRATION_STATUS_ACTIVE) { 2452 /* COLO does not support postcopy */ 2453 migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, 2454 MIGRATION_STATUS_COLO); 2455 } else { 2456 migrate_set_state(&s->state, current_active_state, 2457 MIGRATION_STATUS_COMPLETED); 2458 } 2459 2460 return; 2461 2462 fail: 2463 migration_completion_failed(s, current_active_state); 2464 } 2465 2466 /** 2467 * bg_migration_completion: Used by bg_migration_thread when after all the 2468 * RAM has been saved. The caller 'breaks' the loop when this returns. 2469 * 2470 * @s: Current migration state 2471 */ 2472 static void bg_migration_completion(MigrationState *s) 2473 { 2474 int current_active_state = s->state; 2475 2476 if (s->state == MIGRATION_STATUS_ACTIVE) { 2477 /* 2478 * By this moment we have RAM content saved into the migration stream. 2479 * The next step is to flush the non-RAM content (device state) 2480 * right after the ram content. The device state has been stored into 2481 * the temporary buffer before RAM saving started. 2482 */ 2483 qemu_put_buffer(s->to_dst_file, s->bioc->data, s->bioc->usage); 2484 qemu_fflush(s->to_dst_file); 2485 } else if (s->state == MIGRATION_STATUS_CANCELLING) { 2486 goto fail; 2487 } 2488 2489 if (qemu_file_get_error(s->to_dst_file)) { 2490 trace_migration_completion_file_err(); 2491 goto fail; 2492 } 2493 2494 migrate_set_state(&s->state, current_active_state, 2495 MIGRATION_STATUS_COMPLETED); 2496 return; 2497 2498 fail: 2499 migrate_set_state(&s->state, current_active_state, 2500 MIGRATION_STATUS_FAILED); 2501 } 2502 2503 typedef enum MigThrError { 2504 /* No error detected */ 2505 MIG_THR_ERR_NONE = 0, 2506 /* Detected error, but resumed successfully */ 2507 MIG_THR_ERR_RECOVERED = 1, 2508 /* Detected fatal error, need to exit */ 2509 MIG_THR_ERR_FATAL = 2, 2510 } MigThrError; 2511 2512 static int postcopy_resume_handshake(MigrationState *s) 2513 { 2514 qemu_savevm_send_postcopy_resume(s->to_dst_file); 2515 2516 while (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { 2517 migration_rp_wait(s); 2518 } 2519 2520 if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 2521 return 0; 2522 } 2523 2524 return -1; 2525 } 2526 2527 /* Return zero if success, or <0 for error */ 2528 static int postcopy_do_resume(MigrationState *s) 2529 { 2530 int ret; 2531 2532 /* 2533 * Call all the resume_prepare() hooks, so that modules can be 2534 * ready for the migration resume. 2535 */ 2536 ret = qemu_savevm_state_resume_prepare(s); 2537 if (ret) { 2538 error_report("%s: resume_prepare() failure detected: %d", 2539 __func__, ret); 2540 return ret; 2541 } 2542 2543 /* 2544 * If preempt is enabled, re-establish the preempt channel. Note that 2545 * we do it after resume prepare to make sure the main channel will be 2546 * created before the preempt channel. E.g. with weak network, the 2547 * dest QEMU may get messed up with the preempt and main channels on 2548 * the order of connection setup. This guarantees the correct order. 2549 */ 2550 ret = postcopy_preempt_establish_channel(s); 2551 if (ret) { 2552 error_report("%s: postcopy_preempt_establish_channel(): %d", 2553 __func__, ret); 2554 return ret; 2555 } 2556 2557 /* 2558 * Last handshake with destination on the resume (destination will 2559 * switch to postcopy-active afterwards) 2560 */ 2561 ret = postcopy_resume_handshake(s); 2562 if (ret) { 2563 error_report("%s: handshake failed: %d", __func__, ret); 2564 return ret; 2565 } 2566 2567 return 0; 2568 } 2569 2570 /* 2571 * We don't return until we are in a safe state to continue current 2572 * postcopy migration. Returns MIG_THR_ERR_RECOVERED if recovered, or 2573 * MIG_THR_ERR_FATAL if unrecovery failure happened. 2574 */ 2575 static MigThrError postcopy_pause(MigrationState *s) 2576 { 2577 assert(s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); 2578 2579 while (true) { 2580 QEMUFile *file; 2581 2582 /* 2583 * Current channel is possibly broken. Release it. Note that this is 2584 * guaranteed even without lock because to_dst_file should only be 2585 * modified by the migration thread. That also guarantees that the 2586 * unregister of yank is safe too without the lock. It should be safe 2587 * even to be within the qemu_file_lock, but we didn't do that to avoid 2588 * taking more mutex (yank_lock) within qemu_file_lock. TL;DR: we make 2589 * the qemu_file_lock critical section as small as possible. 2590 */ 2591 assert(s->to_dst_file); 2592 migration_ioc_unregister_yank_from_file(s->to_dst_file); 2593 qemu_mutex_lock(&s->qemu_file_lock); 2594 file = s->to_dst_file; 2595 s->to_dst_file = NULL; 2596 qemu_mutex_unlock(&s->qemu_file_lock); 2597 2598 qemu_file_shutdown(file); 2599 qemu_fclose(file); 2600 2601 /* 2602 * We're already pausing, so ignore any errors on the return 2603 * path and just wait for the thread to finish. It will be 2604 * re-created when we resume. 2605 */ 2606 close_return_path_on_source(s); 2607 2608 migrate_set_state(&s->state, s->state, 2609 MIGRATION_STATUS_POSTCOPY_PAUSED); 2610 2611 error_report("Detected IO failure for postcopy. " 2612 "Migration paused."); 2613 2614 /* 2615 * We wait until things fixed up. Then someone will setup the 2616 * status back for us. 2617 */ 2618 while (s->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { 2619 qemu_sem_wait(&s->postcopy_pause_sem); 2620 } 2621 2622 if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { 2623 /* Woken up by a recover procedure. Give it a shot */ 2624 2625 /* Do the resume logic */ 2626 if (postcopy_do_resume(s) == 0) { 2627 /* Let's continue! */ 2628 trace_postcopy_pause_continued(); 2629 return MIG_THR_ERR_RECOVERED; 2630 } else { 2631 /* 2632 * Something wrong happened during the recovery, let's 2633 * pause again. Pause is always better than throwing 2634 * data away. 2635 */ 2636 continue; 2637 } 2638 } else { 2639 /* This is not right... Time to quit. */ 2640 return MIG_THR_ERR_FATAL; 2641 } 2642 } 2643 } 2644 2645 static MigThrError migration_detect_error(MigrationState *s) 2646 { 2647 int ret; 2648 int state = s->state; 2649 Error *local_error = NULL; 2650 2651 if (state == MIGRATION_STATUS_CANCELLING || 2652 state == MIGRATION_STATUS_CANCELLED) { 2653 /* End the migration, but don't set the state to failed */ 2654 return MIG_THR_ERR_FATAL; 2655 } 2656 2657 /* 2658 * Try to detect any file errors. Note that postcopy_qemufile_src will 2659 * be NULL when postcopy preempt is not enabled. 2660 */ 2661 ret = qemu_file_get_error_obj_any(s->to_dst_file, 2662 s->postcopy_qemufile_src, 2663 &local_error); 2664 if (!ret) { 2665 /* Everything is fine */ 2666 assert(!local_error); 2667 return MIG_THR_ERR_NONE; 2668 } 2669 2670 if (local_error) { 2671 migrate_set_error(s, local_error); 2672 error_free(local_error); 2673 } 2674 2675 if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret) { 2676 /* 2677 * For postcopy, we allow the network to be down for a 2678 * while. After that, it can be continued by a 2679 * recovery phase. 2680 */ 2681 return postcopy_pause(s); 2682 } else { 2683 /* 2684 * For precopy (or postcopy with error outside IO), we fail 2685 * with no time. 2686 */ 2687 migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED); 2688 trace_migration_thread_file_err(); 2689 2690 /* Time to stop the migration, now. */ 2691 return MIG_THR_ERR_FATAL; 2692 } 2693 } 2694 2695 static void migration_calculate_complete(MigrationState *s) 2696 { 2697 uint64_t bytes = migration_transferred_bytes(s->to_dst_file); 2698 int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 2699 int64_t transfer_time; 2700 2701 s->total_time = end_time - s->start_time; 2702 if (!s->downtime) { 2703 /* 2704 * It's still not set, so we are precopy migration. For 2705 * postcopy, downtime is calculated during postcopy_start(). 2706 */ 2707 s->downtime = end_time - s->downtime_start; 2708 } 2709 2710 transfer_time = s->total_time - s->setup_time; 2711 if (transfer_time) { 2712 s->mbps = ((double) bytes * 8.0) / transfer_time / 1000; 2713 } 2714 } 2715 2716 static void update_iteration_initial_status(MigrationState *s) 2717 { 2718 /* 2719 * Update these three fields at the same time to avoid mismatch info lead 2720 * wrong speed calculation. 2721 */ 2722 s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 2723 s->iteration_initial_bytes = migration_transferred_bytes(s->to_dst_file); 2724 s->iteration_initial_pages = ram_get_total_transferred_pages(); 2725 } 2726 2727 static void migration_update_counters(MigrationState *s, 2728 int64_t current_time) 2729 { 2730 uint64_t transferred, transferred_pages, time_spent; 2731 uint64_t current_bytes; /* bytes transferred since the beginning */ 2732 uint64_t switchover_bw; 2733 /* Expected bandwidth when switching over to destination QEMU */ 2734 double expected_bw_per_ms; 2735 double bandwidth; 2736 2737 if (current_time < s->iteration_start_time + BUFFER_DELAY) { 2738 return; 2739 } 2740 2741 switchover_bw = migrate_avail_switchover_bandwidth(); 2742 current_bytes = migration_transferred_bytes(s->to_dst_file); 2743 transferred = current_bytes - s->iteration_initial_bytes; 2744 time_spent = current_time - s->iteration_start_time; 2745 bandwidth = (double)transferred / time_spent; 2746 2747 if (switchover_bw) { 2748 /* 2749 * If the user specified a switchover bandwidth, let's trust the 2750 * user so that can be more accurate than what we estimated. 2751 */ 2752 expected_bw_per_ms = switchover_bw / 1000; 2753 } else { 2754 /* If the user doesn't specify bandwidth, we use the estimated */ 2755 expected_bw_per_ms = bandwidth; 2756 } 2757 2758 s->threshold_size = expected_bw_per_ms * migrate_downtime_limit(); 2759 2760 s->mbps = (((double) transferred * 8.0) / 2761 ((double) time_spent / 1000.0)) / 1000.0 / 1000.0; 2762 2763 transferred_pages = ram_get_total_transferred_pages() - 2764 s->iteration_initial_pages; 2765 s->pages_per_second = (double) transferred_pages / 2766 (((double) time_spent / 1000.0)); 2767 2768 /* 2769 * if we haven't sent anything, we don't want to 2770 * recalculate. 10000 is a small enough number for our purposes 2771 */ 2772 if (stat64_get(&mig_stats.dirty_pages_rate) && 2773 transferred > 10000) { 2774 s->expected_downtime = 2775 stat64_get(&mig_stats.dirty_bytes_last_sync) / expected_bw_per_ms; 2776 } 2777 2778 migration_rate_reset(s->to_dst_file); 2779 2780 update_iteration_initial_status(s); 2781 2782 trace_migrate_transferred(transferred, time_spent, 2783 /* Both in unit bytes/ms */ 2784 bandwidth, switchover_bw / 1000, 2785 s->threshold_size); 2786 } 2787 2788 static bool migration_can_switchover(MigrationState *s) 2789 { 2790 if (!migrate_switchover_ack()) { 2791 return true; 2792 } 2793 2794 /* No reason to wait for switchover ACK if VM is stopped */ 2795 if (!runstate_is_running()) { 2796 return true; 2797 } 2798 2799 return s->switchover_acked; 2800 } 2801 2802 /* Migration thread iteration status */ 2803 typedef enum { 2804 MIG_ITERATE_RESUME, /* Resume current iteration */ 2805 MIG_ITERATE_SKIP, /* Skip current iteration */ 2806 MIG_ITERATE_BREAK, /* Break the loop */ 2807 } MigIterateState; 2808 2809 /* 2810 * Return true if continue to the next iteration directly, false 2811 * otherwise. 2812 */ 2813 static MigIterateState migration_iteration_run(MigrationState *s) 2814 { 2815 uint64_t must_precopy, can_postcopy; 2816 Error *local_err = NULL; 2817 bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE; 2818 bool can_switchover = migration_can_switchover(s); 2819 2820 qemu_savevm_state_pending_estimate(&must_precopy, &can_postcopy); 2821 uint64_t pending_size = must_precopy + can_postcopy; 2822 2823 trace_migrate_pending_estimate(pending_size, must_precopy, can_postcopy); 2824 2825 if (must_precopy <= s->threshold_size) { 2826 qemu_savevm_state_pending_exact(&must_precopy, &can_postcopy); 2827 pending_size = must_precopy + can_postcopy; 2828 trace_migrate_pending_exact(pending_size, must_precopy, can_postcopy); 2829 } 2830 2831 if ((!pending_size || pending_size < s->threshold_size) && can_switchover) { 2832 trace_migration_thread_low_pending(pending_size); 2833 migration_completion(s); 2834 return MIG_ITERATE_BREAK; 2835 } 2836 2837 /* Still a significant amount to transfer */ 2838 if (!in_postcopy && must_precopy <= s->threshold_size && can_switchover && 2839 qatomic_read(&s->start_postcopy)) { 2840 if (postcopy_start(s, &local_err)) { 2841 migrate_set_error(s, local_err); 2842 error_report_err(local_err); 2843 } 2844 return MIG_ITERATE_SKIP; 2845 } 2846 2847 /* Just another iteration step */ 2848 qemu_savevm_state_iterate(s->to_dst_file, in_postcopy); 2849 return MIG_ITERATE_RESUME; 2850 } 2851 2852 static void migration_iteration_finish(MigrationState *s) 2853 { 2854 /* If we enabled cpu throttling for auto-converge, turn it off. */ 2855 cpu_throttle_stop(); 2856 2857 qemu_mutex_lock_iothread(); 2858 switch (s->state) { 2859 case MIGRATION_STATUS_COMPLETED: 2860 migration_calculate_complete(s); 2861 runstate_set(RUN_STATE_POSTMIGRATE); 2862 break; 2863 case MIGRATION_STATUS_COLO: 2864 assert(migrate_colo()); 2865 migrate_start_colo_process(s); 2866 s->vm_old_state = RUN_STATE_RUNNING; 2867 /* Fallthrough */ 2868 case MIGRATION_STATUS_FAILED: 2869 case MIGRATION_STATUS_CANCELLED: 2870 case MIGRATION_STATUS_CANCELLING: 2871 if (s->vm_old_state == RUN_STATE_RUNNING) { 2872 if (!runstate_check(RUN_STATE_SHUTDOWN)) { 2873 vm_start(); 2874 } 2875 } else { 2876 if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { 2877 runstate_set(s->vm_old_state); 2878 } 2879 } 2880 break; 2881 2882 default: 2883 /* Should not reach here, but if so, forgive the VM. */ 2884 error_report("%s: Unknown ending state %d", __func__, s->state); 2885 break; 2886 } 2887 migrate_fd_cleanup_schedule(s); 2888 qemu_mutex_unlock_iothread(); 2889 } 2890 2891 static void bg_migration_iteration_finish(MigrationState *s) 2892 { 2893 /* 2894 * Stop tracking RAM writes - un-protect memory, un-register UFFD 2895 * memory ranges, flush kernel wait queues and wake up threads 2896 * waiting for write fault to be resolved. 2897 */ 2898 ram_write_tracking_stop(); 2899 2900 qemu_mutex_lock_iothread(); 2901 switch (s->state) { 2902 case MIGRATION_STATUS_COMPLETED: 2903 migration_calculate_complete(s); 2904 break; 2905 2906 case MIGRATION_STATUS_ACTIVE: 2907 case MIGRATION_STATUS_FAILED: 2908 case MIGRATION_STATUS_CANCELLED: 2909 case MIGRATION_STATUS_CANCELLING: 2910 break; 2911 2912 default: 2913 /* Should not reach here, but if so, forgive the VM. */ 2914 error_report("%s: Unknown ending state %d", __func__, s->state); 2915 break; 2916 } 2917 2918 migrate_fd_cleanup_schedule(s); 2919 qemu_mutex_unlock_iothread(); 2920 } 2921 2922 /* 2923 * Return true if continue to the next iteration directly, false 2924 * otherwise. 2925 */ 2926 static MigIterateState bg_migration_iteration_run(MigrationState *s) 2927 { 2928 int res; 2929 2930 res = qemu_savevm_state_iterate(s->to_dst_file, false); 2931 if (res > 0) { 2932 bg_migration_completion(s); 2933 return MIG_ITERATE_BREAK; 2934 } 2935 2936 return MIG_ITERATE_RESUME; 2937 } 2938 2939 void migration_make_urgent_request(void) 2940 { 2941 qemu_sem_post(&migrate_get_current()->rate_limit_sem); 2942 } 2943 2944 void migration_consume_urgent_request(void) 2945 { 2946 qemu_sem_wait(&migrate_get_current()->rate_limit_sem); 2947 } 2948 2949 /* Returns true if the rate limiting was broken by an urgent request */ 2950 bool migration_rate_limit(void) 2951 { 2952 int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 2953 MigrationState *s = migrate_get_current(); 2954 2955 bool urgent = false; 2956 migration_update_counters(s, now); 2957 if (migration_rate_exceeded(s->to_dst_file)) { 2958 2959 if (qemu_file_get_error(s->to_dst_file)) { 2960 return false; 2961 } 2962 /* 2963 * Wait for a delay to do rate limiting OR 2964 * something urgent to post the semaphore. 2965 */ 2966 int ms = s->iteration_start_time + BUFFER_DELAY - now; 2967 trace_migration_rate_limit_pre(ms); 2968 if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { 2969 /* 2970 * We were woken by one or more urgent things but 2971 * the timedwait will have consumed one of them. 2972 * The service routine for the urgent wake will dec 2973 * the semaphore itself for each item it consumes, 2974 * so add this one we just eat back. 2975 */ 2976 qemu_sem_post(&s->rate_limit_sem); 2977 urgent = true; 2978 } 2979 trace_migration_rate_limit_post(urgent); 2980 } 2981 return urgent; 2982 } 2983 2984 /* 2985 * if failover devices are present, wait they are completely 2986 * unplugged 2987 */ 2988 2989 static void qemu_savevm_wait_unplug(MigrationState *s, int old_state, 2990 int new_state) 2991 { 2992 if (qemu_savevm_state_guest_unplug_pending()) { 2993 migrate_set_state(&s->state, old_state, MIGRATION_STATUS_WAIT_UNPLUG); 2994 2995 while (s->state == MIGRATION_STATUS_WAIT_UNPLUG && 2996 qemu_savevm_state_guest_unplug_pending()) { 2997 qemu_sem_timedwait(&s->wait_unplug_sem, 250); 2998 } 2999 if (s->state != MIGRATION_STATUS_WAIT_UNPLUG) { 3000 int timeout = 120; /* 30 seconds */ 3001 /* 3002 * migration has been canceled 3003 * but as we have started an unplug we must wait the end 3004 * to be able to plug back the card 3005 */ 3006 while (timeout-- && qemu_savevm_state_guest_unplug_pending()) { 3007 qemu_sem_timedwait(&s->wait_unplug_sem, 250); 3008 } 3009 if (qemu_savevm_state_guest_unplug_pending() && 3010 !qtest_enabled()) { 3011 warn_report("migration: partially unplugged device on " 3012 "failure"); 3013 } 3014 } 3015 3016 migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, new_state); 3017 } else { 3018 migrate_set_state(&s->state, old_state, new_state); 3019 } 3020 } 3021 3022 /* 3023 * Master migration thread on the source VM. 3024 * It drives the migration and pumps the data down the outgoing channel. 3025 */ 3026 static void *migration_thread(void *opaque) 3027 { 3028 MigrationState *s = opaque; 3029 MigrationThread *thread = NULL; 3030 int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST); 3031 MigThrError thr_error; 3032 bool urgent = false; 3033 3034 thread = migration_threads_add("live_migration", qemu_get_thread_id()); 3035 3036 rcu_register_thread(); 3037 3038 object_ref(OBJECT(s)); 3039 update_iteration_initial_status(s); 3040 3041 qemu_mutex_lock_iothread(); 3042 qemu_savevm_state_header(s->to_dst_file); 3043 qemu_mutex_unlock_iothread(); 3044 3045 /* 3046 * If we opened the return path, we need to make sure dst has it 3047 * opened as well. 3048 */ 3049 if (s->rp_state.rp_thread_created) { 3050 /* Now tell the dest that it should open its end so it can reply */ 3051 qemu_savevm_send_open_return_path(s->to_dst_file); 3052 3053 /* And do a ping that will make stuff easier to debug */ 3054 qemu_savevm_send_ping(s->to_dst_file, 1); 3055 } 3056 3057 if (migrate_postcopy()) { 3058 /* 3059 * Tell the destination that we *might* want to do postcopy later; 3060 * if the other end can't do postcopy it should fail now, nice and 3061 * early. 3062 */ 3063 qemu_savevm_send_postcopy_advise(s->to_dst_file); 3064 } 3065 3066 if (migrate_colo()) { 3067 /* Notify migration destination that we enable COLO */ 3068 qemu_savevm_send_colo_enable(s->to_dst_file); 3069 } 3070 3071 qemu_mutex_lock_iothread(); 3072 qemu_savevm_state_setup(s->to_dst_file); 3073 qemu_mutex_unlock_iothread(); 3074 3075 qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP, 3076 MIGRATION_STATUS_ACTIVE); 3077 3078 s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; 3079 3080 trace_migration_thread_setup_complete(); 3081 3082 while (migration_is_active(s)) { 3083 if (urgent || !migration_rate_exceeded(s->to_dst_file)) { 3084 MigIterateState iter_state = migration_iteration_run(s); 3085 if (iter_state == MIG_ITERATE_SKIP) { 3086 continue; 3087 } else if (iter_state == MIG_ITERATE_BREAK) { 3088 break; 3089 } 3090 } 3091 3092 /* 3093 * Try to detect any kind of failures, and see whether we 3094 * should stop the migration now. 3095 */ 3096 thr_error = migration_detect_error(s); 3097 if (thr_error == MIG_THR_ERR_FATAL) { 3098 /* Stop migration */ 3099 break; 3100 } else if (thr_error == MIG_THR_ERR_RECOVERED) { 3101 /* 3102 * Just recovered from a e.g. network failure, reset all 3103 * the local variables. This is important to avoid 3104 * breaking transferred_bytes and bandwidth calculation 3105 */ 3106 update_iteration_initial_status(s); 3107 } 3108 3109 urgent = migration_rate_limit(); 3110 } 3111 3112 trace_migration_thread_after_loop(); 3113 migration_iteration_finish(s); 3114 object_unref(OBJECT(s)); 3115 rcu_unregister_thread(); 3116 migration_threads_remove(thread); 3117 return NULL; 3118 } 3119 3120 static void bg_migration_vm_start_bh(void *opaque) 3121 { 3122 MigrationState *s = opaque; 3123 3124 qemu_bh_delete(s->vm_start_bh); 3125 s->vm_start_bh = NULL; 3126 3127 vm_start(); 3128 s->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - s->downtime_start; 3129 } 3130 3131 /** 3132 * Background snapshot thread, based on live migration code. 3133 * This is an alternative implementation of live migration mechanism 3134 * introduced specifically to support background snapshots. 3135 * 3136 * It takes advantage of userfault_fd write protection mechanism introduced 3137 * in v5.7 kernel. Compared to existing dirty page logging migration much 3138 * lesser stream traffic is produced resulting in smaller snapshot images, 3139 * simply cause of no page duplicates can get into the stream. 3140 * 3141 * Another key point is that generated vmstate stream reflects machine state 3142 * 'frozen' at the beginning of snapshot creation compared to dirty page logging 3143 * mechanism, which effectively results in that saved snapshot is the state of VM 3144 * at the end of the process. 3145 */ 3146 static void *bg_migration_thread(void *opaque) 3147 { 3148 MigrationState *s = opaque; 3149 int64_t setup_start; 3150 MigThrError thr_error; 3151 QEMUFile *fb; 3152 bool early_fail = true; 3153 3154 rcu_register_thread(); 3155 object_ref(OBJECT(s)); 3156 3157 migration_rate_set(RATE_LIMIT_DISABLED); 3158 3159 setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST); 3160 /* 3161 * We want to save vmstate for the moment when migration has been 3162 * initiated but also we want to save RAM content while VM is running. 3163 * The RAM content should appear first in the vmstate. So, we first 3164 * stash the non-RAM part of the vmstate to the temporary buffer, 3165 * then write RAM part of the vmstate to the migration stream 3166 * with vCPUs running and, finally, write stashed non-RAM part of 3167 * the vmstate from the buffer to the migration stream. 3168 */ 3169 s->bioc = qio_channel_buffer_new(512 * 1024); 3170 qio_channel_set_name(QIO_CHANNEL(s->bioc), "vmstate-buffer"); 3171 fb = qemu_file_new_output(QIO_CHANNEL(s->bioc)); 3172 object_unref(OBJECT(s->bioc)); 3173 3174 update_iteration_initial_status(s); 3175 3176 /* 3177 * Prepare for tracking memory writes with UFFD-WP - populate 3178 * RAM pages before protecting. 3179 */ 3180 #ifdef __linux__ 3181 ram_write_tracking_prepare(); 3182 #endif 3183 3184 qemu_mutex_lock_iothread(); 3185 qemu_savevm_state_header(s->to_dst_file); 3186 qemu_savevm_state_setup(s->to_dst_file); 3187 qemu_mutex_unlock_iothread(); 3188 3189 qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP, 3190 MIGRATION_STATUS_ACTIVE); 3191 3192 s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; 3193 3194 trace_migration_thread_setup_complete(); 3195 s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3196 3197 qemu_mutex_lock_iothread(); 3198 3199 /* 3200 * If VM is currently in suspended state, then, to make a valid runstate 3201 * transition in vm_stop_force_state() we need to wakeup it up. 3202 */ 3203 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); 3204 s->vm_old_state = runstate_get(); 3205 3206 global_state_store(); 3207 /* Forcibly stop VM before saving state of vCPUs and devices */ 3208 if (vm_stop_force_state(RUN_STATE_PAUSED)) { 3209 goto fail; 3210 } 3211 /* 3212 * Put vCPUs in sync with shadow context structures, then 3213 * save their state to channel-buffer along with devices. 3214 */ 3215 cpu_synchronize_all_states(); 3216 if (qemu_savevm_state_complete_precopy_non_iterable(fb, false, false)) { 3217 goto fail; 3218 } 3219 /* 3220 * Since we are going to get non-iterable state data directly 3221 * from s->bioc->data, explicit flush is needed here. 3222 */ 3223 qemu_fflush(fb); 3224 3225 /* Now initialize UFFD context and start tracking RAM writes */ 3226 if (ram_write_tracking_start()) { 3227 goto fail; 3228 } 3229 early_fail = false; 3230 3231 /* 3232 * Start VM from BH handler to avoid write-fault lock here. 3233 * UFFD-WP protection for the whole RAM is already enabled so 3234 * calling VM state change notifiers from vm_start() would initiate 3235 * writes to virtio VQs memory which is in write-protected region. 3236 */ 3237 s->vm_start_bh = qemu_bh_new(bg_migration_vm_start_bh, s); 3238 qemu_bh_schedule(s->vm_start_bh); 3239 3240 qemu_mutex_unlock_iothread(); 3241 3242 while (migration_is_active(s)) { 3243 MigIterateState iter_state = bg_migration_iteration_run(s); 3244 if (iter_state == MIG_ITERATE_SKIP) { 3245 continue; 3246 } else if (iter_state == MIG_ITERATE_BREAK) { 3247 break; 3248 } 3249 3250 /* 3251 * Try to detect any kind of failures, and see whether we 3252 * should stop the migration now. 3253 */ 3254 thr_error = migration_detect_error(s); 3255 if (thr_error == MIG_THR_ERR_FATAL) { 3256 /* Stop migration */ 3257 break; 3258 } 3259 3260 migration_update_counters(s, qemu_clock_get_ms(QEMU_CLOCK_REALTIME)); 3261 } 3262 3263 trace_migration_thread_after_loop(); 3264 3265 fail: 3266 if (early_fail) { 3267 migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, 3268 MIGRATION_STATUS_FAILED); 3269 qemu_mutex_unlock_iothread(); 3270 } 3271 3272 bg_migration_iteration_finish(s); 3273 3274 qemu_fclose(fb); 3275 object_unref(OBJECT(s)); 3276 rcu_unregister_thread(); 3277 3278 return NULL; 3279 } 3280 3281 void migrate_fd_connect(MigrationState *s, Error *error_in) 3282 { 3283 Error *local_err = NULL; 3284 uint64_t rate_limit; 3285 bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED; 3286 3287 /* 3288 * If there's a previous error, free it and prepare for another one. 3289 * Meanwhile if migration completes successfully, there won't have an error 3290 * dumped when calling migrate_fd_cleanup(). 3291 */ 3292 migrate_error_free(s); 3293 3294 s->expected_downtime = migrate_downtime_limit(); 3295 if (resume) { 3296 assert(s->cleanup_bh); 3297 } else { 3298 assert(!s->cleanup_bh); 3299 s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup_bh, s); 3300 } 3301 if (error_in) { 3302 migrate_fd_error(s, error_in); 3303 if (resume) { 3304 /* 3305 * Don't do cleanup for resume if channel is invalid, but only dump 3306 * the error. We wait for another channel connect from the user. 3307 * The error_report still gives HMP user a hint on what failed. 3308 * It's normally done in migrate_fd_cleanup(), but call it here 3309 * explicitly. 3310 */ 3311 error_report_err(error_copy(s->error)); 3312 } else { 3313 migrate_fd_cleanup(s); 3314 } 3315 return; 3316 } 3317 3318 if (resume) { 3319 /* This is a resumed migration */ 3320 rate_limit = migrate_max_postcopy_bandwidth(); 3321 } else { 3322 /* This is a fresh new migration */ 3323 rate_limit = migrate_max_bandwidth(); 3324 3325 /* Notify before starting migration thread */ 3326 migration_call_notifiers(s); 3327 } 3328 3329 migration_rate_set(rate_limit); 3330 qemu_file_set_blocking(s->to_dst_file, true); 3331 3332 /* 3333 * Open the return path. For postcopy, it is used exclusively. For 3334 * precopy, only if user specified "return-path" capability would 3335 * QEMU uses the return path. 3336 */ 3337 if (migrate_postcopy_ram() || migrate_return_path()) { 3338 if (open_return_path_on_source(s)) { 3339 error_setg(&local_err, "Unable to open return-path for postcopy"); 3340 migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); 3341 migrate_set_error(s, local_err); 3342 error_report_err(local_err); 3343 migrate_fd_cleanup(s); 3344 return; 3345 } 3346 } 3347 3348 /* 3349 * This needs to be done before resuming a postcopy. Note: for newer 3350 * QEMUs we will delay the channel creation until postcopy_start(), to 3351 * avoid disorder of channel creations. 3352 */ 3353 if (migrate_postcopy_preempt() && s->preempt_pre_7_2) { 3354 postcopy_preempt_setup(s); 3355 } 3356 3357 if (resume) { 3358 /* Wakeup the main migration thread to do the recovery */ 3359 migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED, 3360 MIGRATION_STATUS_POSTCOPY_RECOVER); 3361 qemu_sem_post(&s->postcopy_pause_sem); 3362 return; 3363 } 3364 3365 if (multifd_save_setup(&local_err) != 0) { 3366 migrate_set_error(s, local_err); 3367 error_report_err(local_err); 3368 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 3369 MIGRATION_STATUS_FAILED); 3370 migrate_fd_cleanup(s); 3371 return; 3372 } 3373 3374 if (migrate_background_snapshot()) { 3375 qemu_thread_create(&s->thread, "bg_snapshot", 3376 bg_migration_thread, s, QEMU_THREAD_JOINABLE); 3377 } else { 3378 qemu_thread_create(&s->thread, "live_migration", 3379 migration_thread, s, QEMU_THREAD_JOINABLE); 3380 } 3381 s->migration_thread_running = true; 3382 } 3383 3384 static void migration_class_init(ObjectClass *klass, void *data) 3385 { 3386 DeviceClass *dc = DEVICE_CLASS(klass); 3387 3388 dc->user_creatable = false; 3389 device_class_set_props(dc, migration_properties); 3390 } 3391 3392 static void migration_instance_finalize(Object *obj) 3393 { 3394 MigrationState *ms = MIGRATION_OBJ(obj); 3395 3396 qemu_mutex_destroy(&ms->error_mutex); 3397 qemu_mutex_destroy(&ms->qemu_file_lock); 3398 qemu_sem_destroy(&ms->wait_unplug_sem); 3399 qemu_sem_destroy(&ms->rate_limit_sem); 3400 qemu_sem_destroy(&ms->pause_sem); 3401 qemu_sem_destroy(&ms->postcopy_pause_sem); 3402 qemu_sem_destroy(&ms->rp_state.rp_sem); 3403 qemu_sem_destroy(&ms->rp_state.rp_pong_acks); 3404 qemu_sem_destroy(&ms->postcopy_qemufile_src_sem); 3405 error_free(ms->error); 3406 } 3407 3408 static void migration_instance_init(Object *obj) 3409 { 3410 MigrationState *ms = MIGRATION_OBJ(obj); 3411 3412 ms->state = MIGRATION_STATUS_NONE; 3413 ms->mbps = -1; 3414 ms->pages_per_second = -1; 3415 qemu_sem_init(&ms->pause_sem, 0); 3416 qemu_mutex_init(&ms->error_mutex); 3417 3418 migrate_params_init(&ms->parameters); 3419 3420 qemu_sem_init(&ms->postcopy_pause_sem, 0); 3421 qemu_sem_init(&ms->rp_state.rp_sem, 0); 3422 qemu_sem_init(&ms->rp_state.rp_pong_acks, 0); 3423 qemu_sem_init(&ms->rate_limit_sem, 0); 3424 qemu_sem_init(&ms->wait_unplug_sem, 0); 3425 qemu_sem_init(&ms->postcopy_qemufile_src_sem, 0); 3426 qemu_mutex_init(&ms->qemu_file_lock); 3427 } 3428 3429 /* 3430 * Return true if check pass, false otherwise. Error will be put 3431 * inside errp if provided. 3432 */ 3433 static bool migration_object_check(MigrationState *ms, Error **errp) 3434 { 3435 /* Assuming all off */ 3436 bool old_caps[MIGRATION_CAPABILITY__MAX] = { 0 }; 3437 3438 if (!migrate_params_check(&ms->parameters, errp)) { 3439 return false; 3440 } 3441 3442 return migrate_caps_check(old_caps, ms->capabilities, errp); 3443 } 3444 3445 static const TypeInfo migration_type = { 3446 .name = TYPE_MIGRATION, 3447 /* 3448 * NOTE: TYPE_MIGRATION is not really a device, as the object is 3449 * not created using qdev_new(), it is not attached to the qdev 3450 * device tree, and it is never realized. 3451 * 3452 * TODO: Make this TYPE_OBJECT once QOM provides something like 3453 * TYPE_DEVICE's "-global" properties. 3454 */ 3455 .parent = TYPE_DEVICE, 3456 .class_init = migration_class_init, 3457 .class_size = sizeof(MigrationClass), 3458 .instance_size = sizeof(MigrationState), 3459 .instance_init = migration_instance_init, 3460 .instance_finalize = migration_instance_finalize, 3461 }; 3462 3463 static void register_migration_types(void) 3464 { 3465 type_register_static(&migration_type); 3466 } 3467 3468 type_init(register_migration_types); 3469