1 /* 2 * QEMU live migration 3 * 4 * Copyright IBM, Corp. 2008 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 * Contributions after 2012-01-13 are licensed under the terms of the 13 * GNU GPL, version 2 or (at your option) any later version. 14 */ 15 16 #include "qemu/osdep.h" 17 #include "qemu/cutils.h" 18 #include "qemu/error-report.h" 19 #include "qemu/main-loop.h" 20 #include "migration/blocker.h" 21 #include "exec.h" 22 #include "fd.h" 23 #include "socket.h" 24 #include "sysemu/runstate.h" 25 #include "sysemu/sysemu.h" 26 #include "sysemu/cpu-throttle.h" 27 #include "rdma.h" 28 #include "ram.h" 29 #include "migration/global_state.h" 30 #include "migration/misc.h" 31 #include "migration.h" 32 #include "savevm.h" 33 #include "qemu-file.h" 34 #include "channel.h" 35 #include "migration/vmstate.h" 36 #include "block/block.h" 37 #include "qapi/error.h" 38 #include "qapi/clone-visitor.h" 39 #include "qapi/qapi-visit-migration.h" 40 #include "qapi/qapi-visit-sockets.h" 41 #include "qapi/qapi-commands-migration.h" 42 #include "qapi/qapi-events-migration.h" 43 #include "qapi/qmp/qerror.h" 44 #include "qapi/qmp/qnull.h" 45 #include "qemu/rcu.h" 46 #include "block.h" 47 #include "postcopy-ram.h" 48 #include "qemu/thread.h" 49 #include "trace.h" 50 #include "exec/target_page.h" 51 #include "io/channel-buffer.h" 52 #include "io/channel-tls.h" 53 #include "migration/colo.h" 54 #include "hw/boards.h" 55 #include "hw/qdev-properties.h" 56 #include "hw/qdev-properties-system.h" 57 #include "monitor/monitor.h" 58 #include "net/announce.h" 59 #include "qemu/queue.h" 60 #include "multifd.h" 61 #include "threadinfo.h" 62 #include "qemu/yank.h" 63 #include "sysemu/cpus.h" 64 #include "yank_functions.h" 65 #include "sysemu/qtest.h" 66 #include "ui/qemu-spice.h" 67 68 #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ 69 70 /* Amount of time to allocate to each "chunk" of bandwidth-throttled 71 * data. */ 72 #define BUFFER_DELAY 100 73 #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) 74 75 /* Time in milliseconds we are allowed to stop the source, 76 * for sending the last part */ 77 #define DEFAULT_MIGRATE_SET_DOWNTIME 300 78 79 /* Maximum migrate downtime set to 2000 seconds */ 80 #define MAX_MIGRATE_DOWNTIME_SECONDS 2000 81 #define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) 82 83 /* Default compression thread count */ 84 #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 85 /* Default decompression thread count, usually decompression is at 86 * least 4 times as fast as compression.*/ 87 #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 88 /*0: means nocompress, 1: best speed, ... 9: best compress ratio */ 89 #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 90 /* Define default autoconverge cpu throttle migration parameters */ 91 #define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50 92 #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 93 #define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10 94 #define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99 95 96 /* Migration XBZRLE default cache size */ 97 #define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) 98 99 /* The delay time (in ms) between two COLO checkpoints */ 100 #define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) 101 #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 102 #define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE 103 /* 0: means nocompress, 1: best speed, ... 9: best compress ratio */ 104 #define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1 105 /* 0: means nocompress, 1: best speed, ... 20: best compress ratio */ 106 #define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1 107 108 /* Background transfer rate for postcopy, 0 means unlimited, note 109 * that page requests can still exceed this limit. 110 */ 111 #define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0 112 113 /* 114 * Parameters for self_announce_delay giving a stream of RARP/ARP 115 * packets after migration. 116 */ 117 #define DEFAULT_MIGRATE_ANNOUNCE_INITIAL 50 118 #define DEFAULT_MIGRATE_ANNOUNCE_MAX 550 119 #define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS 5 120 #define DEFAULT_MIGRATE_ANNOUNCE_STEP 100 121 122 static NotifierList migration_state_notifiers = 123 NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); 124 125 /* Messages sent on the return path from destination to source */ 126 enum mig_rp_message_type { 127 MIG_RP_MSG_INVALID = 0, /* Must be 0 */ 128 MIG_RP_MSG_SHUT, /* sibling will not send any more RP messages */ 129 MIG_RP_MSG_PONG, /* Response to a PING; data (seq: be32 ) */ 130 131 MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */ 132 MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */ 133 MIG_RP_MSG_RECV_BITMAP, /* send recved_bitmap back to source */ 134 MIG_RP_MSG_RESUME_ACK, /* tell source that we are ready to resume */ 135 136 MIG_RP_MSG_MAX 137 }; 138 139 /* Migration capabilities set */ 140 struct MigrateCapsSet { 141 int size; /* Capability set size */ 142 MigrationCapability caps[]; /* Variadic array of capabilities */ 143 }; 144 typedef struct MigrateCapsSet MigrateCapsSet; 145 146 /* Define and initialize MigrateCapsSet */ 147 #define INITIALIZE_MIGRATE_CAPS_SET(_name, ...) \ 148 MigrateCapsSet _name = { \ 149 .size = sizeof((int []) { __VA_ARGS__ }) / sizeof(int), \ 150 .caps = { __VA_ARGS__ } \ 151 } 152 153 /* Background-snapshot compatibility check list */ 154 static const 155 INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, 156 MIGRATION_CAPABILITY_POSTCOPY_RAM, 157 MIGRATION_CAPABILITY_DIRTY_BITMAPS, 158 MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME, 159 MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE, 160 MIGRATION_CAPABILITY_RETURN_PATH, 161 MIGRATION_CAPABILITY_MULTIFD, 162 MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER, 163 MIGRATION_CAPABILITY_AUTO_CONVERGE, 164 MIGRATION_CAPABILITY_RELEASE_RAM, 165 MIGRATION_CAPABILITY_RDMA_PIN_ALL, 166 MIGRATION_CAPABILITY_COMPRESS, 167 MIGRATION_CAPABILITY_XBZRLE, 168 MIGRATION_CAPABILITY_X_COLO, 169 MIGRATION_CAPABILITY_VALIDATE_UUID, 170 MIGRATION_CAPABILITY_ZERO_COPY_SEND); 171 172 /* When we add fault tolerance, we could have several 173 migrations at once. For now we don't need to add 174 dynamic creation of migration */ 175 176 static MigrationState *current_migration; 177 static MigrationIncomingState *current_incoming; 178 179 static GSList *migration_blockers; 180 181 static bool migration_object_check(MigrationState *ms, Error **errp); 182 static int migration_maybe_pause(MigrationState *s, 183 int *current_active_state, 184 int new_state); 185 static void migrate_fd_cancel(MigrationState *s); 186 187 static bool migration_needs_multiple_sockets(void) 188 { 189 return migrate_use_multifd() || migrate_postcopy_preempt(); 190 } 191 192 static bool uri_supports_multi_channels(const char *uri) 193 { 194 return strstart(uri, "tcp:", NULL) || strstart(uri, "unix:", NULL) || 195 strstart(uri, "vsock:", NULL); 196 } 197 198 static bool 199 migration_channels_and_uri_compatible(const char *uri, Error **errp) 200 { 201 if (migration_needs_multiple_sockets() && 202 !uri_supports_multi_channels(uri)) { 203 error_setg(errp, "Migration requires multi-channel URIs (e.g. tcp)"); 204 return false; 205 } 206 207 return true; 208 } 209 210 static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp) 211 { 212 uintptr_t a = (uintptr_t) ap, b = (uintptr_t) bp; 213 214 return (a > b) - (a < b); 215 } 216 217 void migration_object_init(void) 218 { 219 /* This can only be called once. */ 220 assert(!current_migration); 221 current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION)); 222 223 /* 224 * Init the migrate incoming object as well no matter whether 225 * we'll use it or not. 226 */ 227 assert(!current_incoming); 228 current_incoming = g_new0(MigrationIncomingState, 1); 229 current_incoming->state = MIGRATION_STATUS_NONE; 230 current_incoming->postcopy_remote_fds = 231 g_array_new(FALSE, TRUE, sizeof(struct PostCopyFD)); 232 qemu_mutex_init(¤t_incoming->rp_mutex); 233 qemu_mutex_init(¤t_incoming->postcopy_prio_thread_mutex); 234 qemu_event_init(¤t_incoming->main_thread_load_event, false); 235 qemu_sem_init(¤t_incoming->postcopy_pause_sem_dst, 0); 236 qemu_sem_init(¤t_incoming->postcopy_pause_sem_fault, 0); 237 qemu_sem_init(¤t_incoming->postcopy_pause_sem_fast_load, 0); 238 qemu_sem_init(¤t_incoming->postcopy_qemufile_dst_done, 0); 239 240 qemu_mutex_init(¤t_incoming->page_request_mutex); 241 current_incoming->page_requested = g_tree_new(page_request_addr_cmp); 242 243 migration_object_check(current_migration, &error_fatal); 244 245 blk_mig_init(); 246 ram_mig_init(); 247 dirty_bitmap_mig_init(); 248 } 249 250 void migration_cancel(const Error *error) 251 { 252 if (error) { 253 migrate_set_error(current_migration, error); 254 } 255 migrate_fd_cancel(current_migration); 256 } 257 258 void migration_shutdown(void) 259 { 260 /* 261 * When the QEMU main thread exit, the COLO thread 262 * may wait a semaphore. So, we should wakeup the 263 * COLO thread before migration shutdown. 264 */ 265 colo_shutdown(); 266 /* 267 * Cancel the current migration - that will (eventually) 268 * stop the migration using this structure 269 */ 270 migration_cancel(NULL); 271 object_unref(OBJECT(current_migration)); 272 273 /* 274 * Cancel outgoing migration of dirty bitmaps. It should 275 * at least unref used block nodes. 276 */ 277 dirty_bitmap_mig_cancel_outgoing(); 278 279 /* 280 * Cancel incoming migration of dirty bitmaps. Dirty bitmaps 281 * are non-critical data, and their loss never considered as 282 * something serious. 283 */ 284 dirty_bitmap_mig_cancel_incoming(); 285 } 286 287 /* For outgoing */ 288 MigrationState *migrate_get_current(void) 289 { 290 /* This can only be called after the object created. */ 291 assert(current_migration); 292 return current_migration; 293 } 294 295 MigrationIncomingState *migration_incoming_get_current(void) 296 { 297 assert(current_incoming); 298 return current_incoming; 299 } 300 301 void migration_incoming_transport_cleanup(MigrationIncomingState *mis) 302 { 303 if (mis->socket_address_list) { 304 qapi_free_SocketAddressList(mis->socket_address_list); 305 mis->socket_address_list = NULL; 306 } 307 308 if (mis->transport_cleanup) { 309 mis->transport_cleanup(mis->transport_data); 310 mis->transport_data = mis->transport_cleanup = NULL; 311 } 312 } 313 314 void migration_incoming_state_destroy(void) 315 { 316 struct MigrationIncomingState *mis = migration_incoming_get_current(); 317 318 multifd_load_cleanup(); 319 320 if (mis->to_src_file) { 321 /* Tell source that we are done */ 322 migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0); 323 qemu_fclose(mis->to_src_file); 324 mis->to_src_file = NULL; 325 } 326 327 if (mis->from_src_file) { 328 migration_ioc_unregister_yank_from_file(mis->from_src_file); 329 qemu_fclose(mis->from_src_file); 330 mis->from_src_file = NULL; 331 } 332 if (mis->postcopy_remote_fds) { 333 g_array_free(mis->postcopy_remote_fds, TRUE); 334 mis->postcopy_remote_fds = NULL; 335 } 336 337 migration_incoming_transport_cleanup(mis); 338 qemu_event_reset(&mis->main_thread_load_event); 339 340 if (mis->page_requested) { 341 g_tree_destroy(mis->page_requested); 342 mis->page_requested = NULL; 343 } 344 345 if (mis->postcopy_qemufile_dst) { 346 migration_ioc_unregister_yank_from_file(mis->postcopy_qemufile_dst); 347 qemu_fclose(mis->postcopy_qemufile_dst); 348 mis->postcopy_qemufile_dst = NULL; 349 } 350 351 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 352 } 353 354 static void migrate_generate_event(int new_state) 355 { 356 if (migrate_use_events()) { 357 qapi_event_send_migration(new_state); 358 } 359 } 360 361 static bool migrate_late_block_activate(void) 362 { 363 MigrationState *s; 364 365 s = migrate_get_current(); 366 367 return s->enabled_capabilities[ 368 MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; 369 } 370 371 /* 372 * Send a message on the return channel back to the source 373 * of the migration. 374 */ 375 static int migrate_send_rp_message(MigrationIncomingState *mis, 376 enum mig_rp_message_type message_type, 377 uint16_t len, void *data) 378 { 379 int ret = 0; 380 381 trace_migrate_send_rp_message((int)message_type, len); 382 QEMU_LOCK_GUARD(&mis->rp_mutex); 383 384 /* 385 * It's possible that the file handle got lost due to network 386 * failures. 387 */ 388 if (!mis->to_src_file) { 389 ret = -EIO; 390 return ret; 391 } 392 393 qemu_put_be16(mis->to_src_file, (unsigned int)message_type); 394 qemu_put_be16(mis->to_src_file, len); 395 qemu_put_buffer(mis->to_src_file, data, len); 396 qemu_fflush(mis->to_src_file); 397 398 /* It's possible that qemu file got error during sending */ 399 ret = qemu_file_get_error(mis->to_src_file); 400 401 return ret; 402 } 403 404 /* Request one page from the source VM at the given start address. 405 * rb: the RAMBlock to request the page in 406 * Start: Address offset within the RB 407 * Len: Length in bytes required - must be a multiple of pagesize 408 */ 409 int migrate_send_rp_message_req_pages(MigrationIncomingState *mis, 410 RAMBlock *rb, ram_addr_t start) 411 { 412 uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */ 413 size_t msglen = 12; /* start + len */ 414 size_t len = qemu_ram_pagesize(rb); 415 enum mig_rp_message_type msg_type; 416 const char *rbname; 417 int rbname_len; 418 419 *(uint64_t *)bufc = cpu_to_be64((uint64_t)start); 420 *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len); 421 422 /* 423 * We maintain the last ramblock that we requested for page. Note that we 424 * don't need locking because this function will only be called within the 425 * postcopy ram fault thread. 426 */ 427 if (rb != mis->last_rb) { 428 mis->last_rb = rb; 429 430 rbname = qemu_ram_get_idstr(rb); 431 rbname_len = strlen(rbname); 432 433 assert(rbname_len < 256); 434 435 bufc[msglen++] = rbname_len; 436 memcpy(bufc + msglen, rbname, rbname_len); 437 msglen += rbname_len; 438 msg_type = MIG_RP_MSG_REQ_PAGES_ID; 439 } else { 440 msg_type = MIG_RP_MSG_REQ_PAGES; 441 } 442 443 return migrate_send_rp_message(mis, msg_type, msglen, bufc); 444 } 445 446 int migrate_send_rp_req_pages(MigrationIncomingState *mis, 447 RAMBlock *rb, ram_addr_t start, uint64_t haddr) 448 { 449 void *aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, qemu_ram_pagesize(rb)); 450 bool received = false; 451 452 WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) { 453 received = ramblock_recv_bitmap_test_byte_offset(rb, start); 454 if (!received && !g_tree_lookup(mis->page_requested, aligned)) { 455 /* 456 * The page has not been received, and it's not yet in the page 457 * request list. Queue it. Set the value of element to 1, so that 458 * things like g_tree_lookup() will return TRUE (1) when found. 459 */ 460 g_tree_insert(mis->page_requested, aligned, (gpointer)1); 461 mis->page_requested_count++; 462 trace_postcopy_page_req_add(aligned, mis->page_requested_count); 463 } 464 } 465 466 /* 467 * If the page is there, skip sending the message. We don't even need the 468 * lock because as long as the page arrived, it'll be there forever. 469 */ 470 if (received) { 471 return 0; 472 } 473 474 return migrate_send_rp_message_req_pages(mis, rb, start); 475 } 476 477 static bool migration_colo_enabled; 478 bool migration_incoming_colo_enabled(void) 479 { 480 return migration_colo_enabled; 481 } 482 483 void migration_incoming_disable_colo(void) 484 { 485 ram_block_discard_disable(false); 486 migration_colo_enabled = false; 487 } 488 489 int migration_incoming_enable_colo(void) 490 { 491 if (ram_block_discard_disable(true)) { 492 error_report("COLO: cannot disable RAM discard"); 493 return -EBUSY; 494 } 495 migration_colo_enabled = true; 496 return 0; 497 } 498 499 void migrate_add_address(SocketAddress *address) 500 { 501 MigrationIncomingState *mis = migration_incoming_get_current(); 502 503 QAPI_LIST_PREPEND(mis->socket_address_list, 504 QAPI_CLONE(SocketAddress, address)); 505 } 506 507 static void qemu_start_incoming_migration(const char *uri, Error **errp) 508 { 509 const char *p = NULL; 510 511 /* URI is not suitable for migration? */ 512 if (!migration_channels_and_uri_compatible(uri, errp)) { 513 return; 514 } 515 516 qapi_event_send_migration(MIGRATION_STATUS_SETUP); 517 if (strstart(uri, "tcp:", &p) || 518 strstart(uri, "unix:", NULL) || 519 strstart(uri, "vsock:", NULL)) { 520 socket_start_incoming_migration(p ? p : uri, errp); 521 #ifdef CONFIG_RDMA 522 } else if (strstart(uri, "rdma:", &p)) { 523 rdma_start_incoming_migration(p, errp); 524 #endif 525 } else if (strstart(uri, "exec:", &p)) { 526 exec_start_incoming_migration(p, errp); 527 } else if (strstart(uri, "fd:", &p)) { 528 fd_start_incoming_migration(p, errp); 529 } else { 530 error_setg(errp, "unknown migration protocol: %s", uri); 531 } 532 } 533 534 static void process_incoming_migration_bh(void *opaque) 535 { 536 Error *local_err = NULL; 537 MigrationIncomingState *mis = opaque; 538 539 /* If capability late_block_activate is set: 540 * Only fire up the block code now if we're going to restart the 541 * VM, else 'cont' will do it. 542 * This causes file locking to happen; so we don't want it to happen 543 * unless we really are starting the VM. 544 */ 545 if (!migrate_late_block_activate() || 546 (autostart && (!global_state_received() || 547 global_state_get_runstate() == RUN_STATE_RUNNING))) { 548 /* Make sure all file formats throw away their mutable metadata. 549 * If we get an error here, just don't restart the VM yet. */ 550 bdrv_activate_all(&local_err); 551 if (local_err) { 552 error_report_err(local_err); 553 local_err = NULL; 554 autostart = false; 555 } 556 } 557 558 /* 559 * This must happen after all error conditions are dealt with and 560 * we're sure the VM is going to be running on this host. 561 */ 562 qemu_announce_self(&mis->announce_timer, migrate_announce_params()); 563 564 multifd_load_shutdown(); 565 566 dirty_bitmap_mig_before_vm_start(); 567 568 if (!global_state_received() || 569 global_state_get_runstate() == RUN_STATE_RUNNING) { 570 if (autostart) { 571 vm_start(); 572 } else { 573 runstate_set(RUN_STATE_PAUSED); 574 } 575 } else if (migration_incoming_colo_enabled()) { 576 migration_incoming_disable_colo(); 577 vm_start(); 578 } else { 579 runstate_set(global_state_get_runstate()); 580 } 581 /* 582 * This must happen after any state changes since as soon as an external 583 * observer sees this event they might start to prod at the VM assuming 584 * it's ready to use. 585 */ 586 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, 587 MIGRATION_STATUS_COMPLETED); 588 qemu_bh_delete(mis->bh); 589 migration_incoming_state_destroy(); 590 } 591 592 static void coroutine_fn 593 process_incoming_migration_co(void *opaque) 594 { 595 MigrationIncomingState *mis = migration_incoming_get_current(); 596 PostcopyState ps; 597 int ret; 598 Error *local_err = NULL; 599 600 assert(mis->from_src_file); 601 mis->migration_incoming_co = qemu_coroutine_self(); 602 mis->largest_page_size = qemu_ram_pagesize_largest(); 603 postcopy_state_set(POSTCOPY_INCOMING_NONE); 604 migrate_set_state(&mis->state, MIGRATION_STATUS_NONE, 605 MIGRATION_STATUS_ACTIVE); 606 ret = qemu_loadvm_state(mis->from_src_file); 607 608 ps = postcopy_state_get(); 609 trace_process_incoming_migration_co_end(ret, ps); 610 if (ps != POSTCOPY_INCOMING_NONE) { 611 if (ps == POSTCOPY_INCOMING_ADVISE) { 612 /* 613 * Where a migration had postcopy enabled (and thus went to advise) 614 * but managed to complete within the precopy period, we can use 615 * the normal exit. 616 */ 617 postcopy_ram_incoming_cleanup(mis); 618 } else if (ret >= 0) { 619 /* 620 * Postcopy was started, cleanup should happen at the end of the 621 * postcopy thread. 622 */ 623 trace_process_incoming_migration_co_postcopy_end_main(); 624 return; 625 } 626 /* Else if something went wrong then just fall out of the normal exit */ 627 } 628 629 /* we get COLO info, and know if we are in COLO mode */ 630 if (!ret && migration_incoming_colo_enabled()) { 631 /* Make sure all file formats throw away their mutable metadata */ 632 bdrv_activate_all(&local_err); 633 if (local_err) { 634 error_report_err(local_err); 635 goto fail; 636 } 637 638 qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming", 639 colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE); 640 mis->have_colo_incoming_thread = true; 641 qemu_coroutine_yield(); 642 643 qemu_mutex_unlock_iothread(); 644 /* Wait checkpoint incoming thread exit before free resource */ 645 qemu_thread_join(&mis->colo_incoming_thread); 646 qemu_mutex_lock_iothread(); 647 /* We hold the global iothread lock, so it is safe here */ 648 colo_release_ram_cache(); 649 } 650 651 if (ret < 0) { 652 error_report("load of migration failed: %s", strerror(-ret)); 653 goto fail; 654 } 655 mis->bh = qemu_bh_new(process_incoming_migration_bh, mis); 656 qemu_bh_schedule(mis->bh); 657 mis->migration_incoming_co = NULL; 658 return; 659 fail: 660 local_err = NULL; 661 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, 662 MIGRATION_STATUS_FAILED); 663 qemu_fclose(mis->from_src_file); 664 665 multifd_load_cleanup(); 666 667 exit(EXIT_FAILURE); 668 } 669 670 /** 671 * migration_incoming_setup: Setup incoming migration 672 * @f: file for main migration channel 673 * @errp: where to put errors 674 * 675 * Returns: %true on success, %false on error. 676 */ 677 static bool migration_incoming_setup(QEMUFile *f, Error **errp) 678 { 679 MigrationIncomingState *mis = migration_incoming_get_current(); 680 681 if (!mis->from_src_file) { 682 mis->from_src_file = f; 683 } 684 qemu_file_set_blocking(f, false); 685 return true; 686 } 687 688 void migration_incoming_process(void) 689 { 690 Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL); 691 qemu_coroutine_enter(co); 692 } 693 694 /* Returns true if recovered from a paused migration, otherwise false */ 695 static bool postcopy_try_recover(void) 696 { 697 MigrationIncomingState *mis = migration_incoming_get_current(); 698 699 if (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { 700 /* Resumed from a paused postcopy migration */ 701 702 /* This should be set already in migration_incoming_setup() */ 703 assert(mis->from_src_file); 704 /* Postcopy has standalone thread to do vm load */ 705 qemu_file_set_blocking(mis->from_src_file, true); 706 707 /* Re-configure the return path */ 708 mis->to_src_file = qemu_file_get_return_path(mis->from_src_file); 709 710 migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_PAUSED, 711 MIGRATION_STATUS_POSTCOPY_RECOVER); 712 713 /* 714 * Here, we only wake up the main loading thread (while the 715 * rest threads will still be waiting), so that we can receive 716 * commands from source now, and answer it if needed. The 717 * rest threads will be woken up afterwards until we are sure 718 * that source is ready to reply to page requests. 719 */ 720 qemu_sem_post(&mis->postcopy_pause_sem_dst); 721 return true; 722 } 723 724 return false; 725 } 726 727 void migration_fd_process_incoming(QEMUFile *f, Error **errp) 728 { 729 if (!migration_incoming_setup(f, errp)) { 730 return; 731 } 732 if (postcopy_try_recover()) { 733 return; 734 } 735 migration_incoming_process(); 736 } 737 738 /* 739 * Returns true when we want to start a new incoming migration process, 740 * false otherwise. 741 */ 742 static bool migration_should_start_incoming(bool main_channel) 743 { 744 /* Multifd doesn't start unless all channels are established */ 745 if (migrate_use_multifd()) { 746 return migration_has_all_channels(); 747 } 748 749 /* Preempt channel only starts when the main channel is created */ 750 if (migrate_postcopy_preempt()) { 751 return main_channel; 752 } 753 754 /* 755 * For all the rest types of migration, we should only reach here when 756 * it's the main channel that's being created, and we should always 757 * proceed with this channel. 758 */ 759 assert(main_channel); 760 return true; 761 } 762 763 void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) 764 { 765 MigrationIncomingState *mis = migration_incoming_get_current(); 766 Error *local_err = NULL; 767 QEMUFile *f; 768 bool default_channel = true; 769 uint32_t channel_magic = 0; 770 int ret = 0; 771 772 if (migrate_use_multifd() && !migrate_postcopy_ram() && 773 qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { 774 /* 775 * With multiple channels, it is possible that we receive channels 776 * out of order on destination side, causing incorrect mapping of 777 * source channels on destination side. Check channel MAGIC to 778 * decide type of channel. Please note this is best effort, postcopy 779 * preempt channel does not send any magic number so avoid it for 780 * postcopy live migration. Also tls live migration already does 781 * tls handshake while initializing main channel so with tls this 782 * issue is not possible. 783 */ 784 ret = migration_channel_read_peek(ioc, (void *)&channel_magic, 785 sizeof(channel_magic), &local_err); 786 787 if (ret != 0) { 788 error_propagate(errp, local_err); 789 return; 790 } 791 792 default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC)); 793 } else { 794 default_channel = !mis->from_src_file; 795 } 796 797 if (multifd_load_setup(errp) != 0) { 798 error_setg(errp, "Failed to setup multifd channels"); 799 return; 800 } 801 802 if (default_channel) { 803 f = qemu_file_new_input(ioc); 804 805 if (!migration_incoming_setup(f, errp)) { 806 return; 807 } 808 } else { 809 /* Multiple connections */ 810 assert(migration_needs_multiple_sockets()); 811 if (migrate_use_multifd()) { 812 multifd_recv_new_channel(ioc, &local_err); 813 } else { 814 assert(migrate_postcopy_preempt()); 815 f = qemu_file_new_input(ioc); 816 postcopy_preempt_new_channel(mis, f); 817 } 818 if (local_err) { 819 error_propagate(errp, local_err); 820 return; 821 } 822 } 823 824 if (migration_should_start_incoming(default_channel)) { 825 /* If it's a recovery, we're done */ 826 if (postcopy_try_recover()) { 827 return; 828 } 829 migration_incoming_process(); 830 } 831 } 832 833 /** 834 * @migration_has_all_channels: We have received all channels that we need 835 * 836 * Returns true when we have got connections to all the channels that 837 * we need for migration. 838 */ 839 bool migration_has_all_channels(void) 840 { 841 MigrationIncomingState *mis = migration_incoming_get_current(); 842 843 if (!mis->from_src_file) { 844 return false; 845 } 846 847 if (migrate_use_multifd()) { 848 return multifd_recv_all_channels_created(); 849 } 850 851 if (migrate_postcopy_preempt()) { 852 return mis->postcopy_qemufile_dst != NULL; 853 } 854 855 return true; 856 } 857 858 /* 859 * Send a 'SHUT' message on the return channel with the given value 860 * to indicate that we've finished with the RP. Non-0 value indicates 861 * error. 862 */ 863 void migrate_send_rp_shut(MigrationIncomingState *mis, 864 uint32_t value) 865 { 866 uint32_t buf; 867 868 buf = cpu_to_be32(value); 869 migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf); 870 } 871 872 /* 873 * Send a 'PONG' message on the return channel with the given value 874 * (normally in response to a 'PING') 875 */ 876 void migrate_send_rp_pong(MigrationIncomingState *mis, 877 uint32_t value) 878 { 879 uint32_t buf; 880 881 buf = cpu_to_be32(value); 882 migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf); 883 } 884 885 void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis, 886 char *block_name) 887 { 888 char buf[512]; 889 int len; 890 int64_t res; 891 892 /* 893 * First, we send the header part. It contains only the len of 894 * idstr, and the idstr itself. 895 */ 896 len = strlen(block_name); 897 buf[0] = len; 898 memcpy(buf + 1, block_name, len); 899 900 if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) { 901 error_report("%s: MSG_RP_RECV_BITMAP only used for recovery", 902 __func__); 903 return; 904 } 905 906 migrate_send_rp_message(mis, MIG_RP_MSG_RECV_BITMAP, len + 1, buf); 907 908 /* 909 * Next, we dump the received bitmap to the stream. 910 * 911 * TODO: currently we are safe since we are the only one that is 912 * using the to_src_file handle (fault thread is still paused), 913 * and it's ok even not taking the mutex. However the best way is 914 * to take the lock before sending the message header, and release 915 * the lock after sending the bitmap. 916 */ 917 qemu_mutex_lock(&mis->rp_mutex); 918 res = ramblock_recv_bitmap_send(mis->to_src_file, block_name); 919 qemu_mutex_unlock(&mis->rp_mutex); 920 921 trace_migrate_send_rp_recv_bitmap(block_name, res); 922 } 923 924 void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value) 925 { 926 uint32_t buf; 927 928 buf = cpu_to_be32(value); 929 migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf); 930 } 931 932 MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) 933 { 934 MigrationCapabilityStatusList *head = NULL, **tail = &head; 935 MigrationCapabilityStatus *caps; 936 MigrationState *s = migrate_get_current(); 937 int i; 938 939 for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { 940 #ifndef CONFIG_LIVE_BLOCK_MIGRATION 941 if (i == MIGRATION_CAPABILITY_BLOCK) { 942 continue; 943 } 944 #endif 945 caps = g_malloc0(sizeof(*caps)); 946 caps->capability = i; 947 caps->state = s->enabled_capabilities[i]; 948 QAPI_LIST_APPEND(tail, caps); 949 } 950 951 return head; 952 } 953 954 MigrationParameters *qmp_query_migrate_parameters(Error **errp) 955 { 956 MigrationParameters *params; 957 MigrationState *s = migrate_get_current(); 958 959 /* TODO use QAPI_CLONE() instead of duplicating it inline */ 960 params = g_malloc0(sizeof(*params)); 961 params->has_compress_level = true; 962 params->compress_level = s->parameters.compress_level; 963 params->has_compress_threads = true; 964 params->compress_threads = s->parameters.compress_threads; 965 params->has_compress_wait_thread = true; 966 params->compress_wait_thread = s->parameters.compress_wait_thread; 967 params->has_decompress_threads = true; 968 params->decompress_threads = s->parameters.decompress_threads; 969 params->has_throttle_trigger_threshold = true; 970 params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold; 971 params->has_cpu_throttle_initial = true; 972 params->cpu_throttle_initial = s->parameters.cpu_throttle_initial; 973 params->has_cpu_throttle_increment = true; 974 params->cpu_throttle_increment = s->parameters.cpu_throttle_increment; 975 params->has_cpu_throttle_tailslow = true; 976 params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow; 977 params->tls_creds = g_strdup(s->parameters.tls_creds); 978 params->tls_hostname = g_strdup(s->parameters.tls_hostname); 979 params->tls_authz = g_strdup(s->parameters.tls_authz ? 980 s->parameters.tls_authz : ""); 981 params->has_max_bandwidth = true; 982 params->max_bandwidth = s->parameters.max_bandwidth; 983 params->has_downtime_limit = true; 984 params->downtime_limit = s->parameters.downtime_limit; 985 params->has_x_checkpoint_delay = true; 986 params->x_checkpoint_delay = s->parameters.x_checkpoint_delay; 987 params->has_block_incremental = true; 988 params->block_incremental = s->parameters.block_incremental; 989 params->has_multifd_channels = true; 990 params->multifd_channels = s->parameters.multifd_channels; 991 params->has_multifd_compression = true; 992 params->multifd_compression = s->parameters.multifd_compression; 993 params->has_multifd_zlib_level = true; 994 params->multifd_zlib_level = s->parameters.multifd_zlib_level; 995 params->has_multifd_zstd_level = true; 996 params->multifd_zstd_level = s->parameters.multifd_zstd_level; 997 params->has_xbzrle_cache_size = true; 998 params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; 999 params->has_max_postcopy_bandwidth = true; 1000 params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth; 1001 params->has_max_cpu_throttle = true; 1002 params->max_cpu_throttle = s->parameters.max_cpu_throttle; 1003 params->has_announce_initial = true; 1004 params->announce_initial = s->parameters.announce_initial; 1005 params->has_announce_max = true; 1006 params->announce_max = s->parameters.announce_max; 1007 params->has_announce_rounds = true; 1008 params->announce_rounds = s->parameters.announce_rounds; 1009 params->has_announce_step = true; 1010 params->announce_step = s->parameters.announce_step; 1011 1012 if (s->parameters.has_block_bitmap_mapping) { 1013 params->has_block_bitmap_mapping = true; 1014 params->block_bitmap_mapping = 1015 QAPI_CLONE(BitmapMigrationNodeAliasList, 1016 s->parameters.block_bitmap_mapping); 1017 } 1018 1019 return params; 1020 } 1021 1022 void qmp_client_migrate_info(const char *protocol, const char *hostname, 1023 bool has_port, int64_t port, 1024 bool has_tls_port, int64_t tls_port, 1025 const char *cert_subject, 1026 Error **errp) 1027 { 1028 if (strcmp(protocol, "spice") == 0) { 1029 if (!qemu_using_spice(errp)) { 1030 return; 1031 } 1032 1033 if (!has_port && !has_tls_port) { 1034 error_setg(errp, QERR_MISSING_PARAMETER, "port/tls-port"); 1035 return; 1036 } 1037 1038 if (qemu_spice.migrate_info(hostname, 1039 has_port ? port : -1, 1040 has_tls_port ? tls_port : -1, 1041 cert_subject)) { 1042 error_setg(errp, "Could not set up display for migration"); 1043 return; 1044 } 1045 return; 1046 } 1047 1048 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "protocol", "'spice'"); 1049 } 1050 1051 AnnounceParameters *migrate_announce_params(void) 1052 { 1053 static AnnounceParameters ap; 1054 1055 MigrationState *s = migrate_get_current(); 1056 1057 ap.initial = s->parameters.announce_initial; 1058 ap.max = s->parameters.announce_max; 1059 ap.rounds = s->parameters.announce_rounds; 1060 ap.step = s->parameters.announce_step; 1061 1062 return ≈ 1063 } 1064 1065 /* 1066 * Return true if we're already in the middle of a migration 1067 * (i.e. any of the active or setup states) 1068 */ 1069 bool migration_is_setup_or_active(int state) 1070 { 1071 switch (state) { 1072 case MIGRATION_STATUS_ACTIVE: 1073 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1074 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1075 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1076 case MIGRATION_STATUS_SETUP: 1077 case MIGRATION_STATUS_PRE_SWITCHOVER: 1078 case MIGRATION_STATUS_DEVICE: 1079 case MIGRATION_STATUS_WAIT_UNPLUG: 1080 case MIGRATION_STATUS_COLO: 1081 return true; 1082 1083 default: 1084 return false; 1085 1086 } 1087 } 1088 1089 bool migration_is_running(int state) 1090 { 1091 switch (state) { 1092 case MIGRATION_STATUS_ACTIVE: 1093 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1094 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1095 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1096 case MIGRATION_STATUS_SETUP: 1097 case MIGRATION_STATUS_PRE_SWITCHOVER: 1098 case MIGRATION_STATUS_DEVICE: 1099 case MIGRATION_STATUS_WAIT_UNPLUG: 1100 case MIGRATION_STATUS_CANCELLING: 1101 return true; 1102 1103 default: 1104 return false; 1105 1106 } 1107 } 1108 1109 static bool migrate_show_downtime(MigrationState *s) 1110 { 1111 return (s->state == MIGRATION_STATUS_COMPLETED) || migration_in_postcopy(); 1112 } 1113 1114 static void populate_time_info(MigrationInfo *info, MigrationState *s) 1115 { 1116 info->has_status = true; 1117 info->has_setup_time = true; 1118 info->setup_time = s->setup_time; 1119 1120 if (s->state == MIGRATION_STATUS_COMPLETED) { 1121 info->has_total_time = true; 1122 info->total_time = s->total_time; 1123 } else { 1124 info->has_total_time = true; 1125 info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - 1126 s->start_time; 1127 } 1128 1129 if (migrate_show_downtime(s)) { 1130 info->has_downtime = true; 1131 info->downtime = s->downtime; 1132 } else { 1133 info->has_expected_downtime = true; 1134 info->expected_downtime = s->expected_downtime; 1135 } 1136 } 1137 1138 static void populate_ram_info(MigrationInfo *info, MigrationState *s) 1139 { 1140 size_t page_size = qemu_target_page_size(); 1141 1142 info->ram = g_malloc0(sizeof(*info->ram)); 1143 info->ram->transferred = stat64_get(&ram_atomic_counters.transferred); 1144 info->ram->total = ram_bytes_total(); 1145 info->ram->duplicate = stat64_get(&ram_atomic_counters.duplicate); 1146 /* legacy value. It is not used anymore */ 1147 info->ram->skipped = 0; 1148 info->ram->normal = stat64_get(&ram_atomic_counters.normal); 1149 info->ram->normal_bytes = info->ram->normal * page_size; 1150 info->ram->mbps = s->mbps; 1151 info->ram->dirty_sync_count = ram_counters.dirty_sync_count; 1152 info->ram->dirty_sync_missed_zero_copy = 1153 ram_counters.dirty_sync_missed_zero_copy; 1154 info->ram->postcopy_requests = ram_counters.postcopy_requests; 1155 info->ram->page_size = page_size; 1156 info->ram->multifd_bytes = ram_counters.multifd_bytes; 1157 info->ram->pages_per_second = s->pages_per_second; 1158 info->ram->precopy_bytes = ram_counters.precopy_bytes; 1159 info->ram->downtime_bytes = ram_counters.downtime_bytes; 1160 info->ram->postcopy_bytes = stat64_get(&ram_atomic_counters.postcopy_bytes); 1161 1162 if (migrate_use_xbzrle()) { 1163 info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache)); 1164 info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size(); 1165 info->xbzrle_cache->bytes = xbzrle_counters.bytes; 1166 info->xbzrle_cache->pages = xbzrle_counters.pages; 1167 info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss; 1168 info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate; 1169 info->xbzrle_cache->encoding_rate = xbzrle_counters.encoding_rate; 1170 info->xbzrle_cache->overflow = xbzrle_counters.overflow; 1171 } 1172 1173 if (migrate_use_compression()) { 1174 info->compression = g_malloc0(sizeof(*info->compression)); 1175 info->compression->pages = compression_counters.pages; 1176 info->compression->busy = compression_counters.busy; 1177 info->compression->busy_rate = compression_counters.busy_rate; 1178 info->compression->compressed_size = 1179 compression_counters.compressed_size; 1180 info->compression->compression_rate = 1181 compression_counters.compression_rate; 1182 } 1183 1184 if (cpu_throttle_active()) { 1185 info->has_cpu_throttle_percentage = true; 1186 info->cpu_throttle_percentage = cpu_throttle_get_percentage(); 1187 } 1188 1189 if (s->state != MIGRATION_STATUS_COMPLETED) { 1190 info->ram->remaining = ram_bytes_remaining(); 1191 info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate; 1192 } 1193 } 1194 1195 static void populate_disk_info(MigrationInfo *info) 1196 { 1197 if (blk_mig_active()) { 1198 info->disk = g_malloc0(sizeof(*info->disk)); 1199 info->disk->transferred = blk_mig_bytes_transferred(); 1200 info->disk->remaining = blk_mig_bytes_remaining(); 1201 info->disk->total = blk_mig_bytes_total(); 1202 } 1203 } 1204 1205 static void fill_source_migration_info(MigrationInfo *info) 1206 { 1207 MigrationState *s = migrate_get_current(); 1208 int state = qatomic_read(&s->state); 1209 GSList *cur_blocker = migration_blockers; 1210 1211 info->blocked_reasons = NULL; 1212 1213 /* 1214 * There are two types of reasons a migration might be blocked; 1215 * a) devices marked in VMState as non-migratable, and 1216 * b) Explicit migration blockers 1217 * We need to add both of them here. 1218 */ 1219 qemu_savevm_non_migratable_list(&info->blocked_reasons); 1220 1221 while (cur_blocker) { 1222 QAPI_LIST_PREPEND(info->blocked_reasons, 1223 g_strdup(error_get_pretty(cur_blocker->data))); 1224 cur_blocker = g_slist_next(cur_blocker); 1225 } 1226 info->has_blocked_reasons = info->blocked_reasons != NULL; 1227 1228 switch (state) { 1229 case MIGRATION_STATUS_NONE: 1230 /* no migration has happened ever */ 1231 /* do not overwrite destination migration status */ 1232 return; 1233 case MIGRATION_STATUS_SETUP: 1234 info->has_status = true; 1235 info->has_total_time = false; 1236 break; 1237 case MIGRATION_STATUS_ACTIVE: 1238 case MIGRATION_STATUS_CANCELLING: 1239 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1240 case MIGRATION_STATUS_PRE_SWITCHOVER: 1241 case MIGRATION_STATUS_DEVICE: 1242 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1243 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1244 /* TODO add some postcopy stats */ 1245 populate_time_info(info, s); 1246 populate_ram_info(info, s); 1247 populate_disk_info(info); 1248 populate_vfio_info(info); 1249 break; 1250 case MIGRATION_STATUS_COLO: 1251 info->has_status = true; 1252 /* TODO: display COLO specific information (checkpoint info etc.) */ 1253 break; 1254 case MIGRATION_STATUS_COMPLETED: 1255 populate_time_info(info, s); 1256 populate_ram_info(info, s); 1257 populate_vfio_info(info); 1258 break; 1259 case MIGRATION_STATUS_FAILED: 1260 info->has_status = true; 1261 if (s->error) { 1262 info->error_desc = g_strdup(error_get_pretty(s->error)); 1263 } 1264 break; 1265 case MIGRATION_STATUS_CANCELLED: 1266 info->has_status = true; 1267 break; 1268 case MIGRATION_STATUS_WAIT_UNPLUG: 1269 info->has_status = true; 1270 break; 1271 } 1272 info->status = state; 1273 } 1274 1275 typedef enum WriteTrackingSupport { 1276 WT_SUPPORT_UNKNOWN = 0, 1277 WT_SUPPORT_ABSENT, 1278 WT_SUPPORT_AVAILABLE, 1279 WT_SUPPORT_COMPATIBLE 1280 } WriteTrackingSupport; 1281 1282 static 1283 WriteTrackingSupport migrate_query_write_tracking(void) 1284 { 1285 /* Check if kernel supports required UFFD features */ 1286 if (!ram_write_tracking_available()) { 1287 return WT_SUPPORT_ABSENT; 1288 } 1289 /* 1290 * Check if current memory configuration is 1291 * compatible with required UFFD features. 1292 */ 1293 if (!ram_write_tracking_compatible()) { 1294 return WT_SUPPORT_AVAILABLE; 1295 } 1296 1297 return WT_SUPPORT_COMPATIBLE; 1298 } 1299 1300 /** 1301 * @migration_caps_check - check capability validity 1302 * 1303 * @cap_list: old capability list, array of bool 1304 * @params: new capabilities to be applied soon 1305 * @errp: set *errp if the check failed, with reason 1306 * 1307 * Returns true if check passed, otherwise false. 1308 */ 1309 static bool migrate_caps_check(bool *cap_list, 1310 MigrationCapabilityStatusList *params, 1311 Error **errp) 1312 { 1313 MigrationCapabilityStatusList *cap; 1314 bool old_postcopy_cap; 1315 MigrationIncomingState *mis = migration_incoming_get_current(); 1316 1317 old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]; 1318 1319 for (cap = params; cap; cap = cap->next) { 1320 cap_list[cap->value->capability] = cap->value->state; 1321 } 1322 1323 #ifndef CONFIG_LIVE_BLOCK_MIGRATION 1324 if (cap_list[MIGRATION_CAPABILITY_BLOCK]) { 1325 error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " 1326 "block migration"); 1327 error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); 1328 return false; 1329 } 1330 #endif 1331 1332 #ifndef CONFIG_REPLICATION 1333 if (cap_list[MIGRATION_CAPABILITY_X_COLO]) { 1334 error_setg(errp, "QEMU compiled without replication module" 1335 " can't enable COLO"); 1336 error_append_hint(errp, "Please enable replication before COLO.\n"); 1337 return false; 1338 } 1339 #endif 1340 1341 if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { 1342 /* This check is reasonably expensive, so only when it's being 1343 * set the first time, also it's only the destination that needs 1344 * special support. 1345 */ 1346 if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) && 1347 !postcopy_ram_supported_by_host(mis)) { 1348 /* postcopy_ram_supported_by_host will have emitted a more 1349 * detailed message 1350 */ 1351 error_setg(errp, "Postcopy is not supported"); 1352 return false; 1353 } 1354 1355 if (cap_list[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { 1356 error_setg(errp, "Postcopy is not compatible with ignore-shared"); 1357 return false; 1358 } 1359 } 1360 1361 if (cap_list[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { 1362 WriteTrackingSupport wt_support; 1363 int idx; 1364 /* 1365 * Check if 'background-snapshot' capability is supported by 1366 * host kernel and compatible with guest memory configuration. 1367 */ 1368 wt_support = migrate_query_write_tracking(); 1369 if (wt_support < WT_SUPPORT_AVAILABLE) { 1370 error_setg(errp, "Background-snapshot is not supported by host kernel"); 1371 return false; 1372 } 1373 if (wt_support < WT_SUPPORT_COMPATIBLE) { 1374 error_setg(errp, "Background-snapshot is not compatible " 1375 "with guest memory configuration"); 1376 return false; 1377 } 1378 1379 /* 1380 * Check if there are any migration capabilities 1381 * incompatible with 'background-snapshot'. 1382 */ 1383 for (idx = 0; idx < check_caps_background_snapshot.size; idx++) { 1384 int incomp_cap = check_caps_background_snapshot.caps[idx]; 1385 if (cap_list[incomp_cap]) { 1386 error_setg(errp, 1387 "Background-snapshot is not compatible with %s", 1388 MigrationCapability_str(incomp_cap)); 1389 return false; 1390 } 1391 } 1392 } 1393 1394 #ifdef CONFIG_LINUX 1395 if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && 1396 (!cap_list[MIGRATION_CAPABILITY_MULTIFD] || 1397 cap_list[MIGRATION_CAPABILITY_COMPRESS] || 1398 cap_list[MIGRATION_CAPABILITY_XBZRLE] || 1399 migrate_multifd_compression() || 1400 migrate_use_tls())) { 1401 error_setg(errp, 1402 "Zero copy only available for non-compressed non-TLS multifd migration"); 1403 return false; 1404 } 1405 #else 1406 if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { 1407 error_setg(errp, 1408 "Zero copy currently only available on Linux"); 1409 return false; 1410 } 1411 #endif 1412 1413 if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) { 1414 if (!cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { 1415 error_setg(errp, "Postcopy preempt requires postcopy-ram"); 1416 return false; 1417 } 1418 1419 /* 1420 * Preempt mode requires urgent pages to be sent in separate 1421 * channel, OTOH compression logic will disorder all pages into 1422 * different compression channels, which is not compatible with the 1423 * preempt assumptions on channel assignments. 1424 */ 1425 if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) { 1426 error_setg(errp, "Postcopy preempt not compatible with compress"); 1427 return false; 1428 } 1429 } 1430 1431 if (cap_list[MIGRATION_CAPABILITY_MULTIFD]) { 1432 if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) { 1433 error_setg(errp, "Multifd is not compatible with compress"); 1434 return false; 1435 } 1436 } 1437 1438 return true; 1439 } 1440 1441 static void fill_destination_migration_info(MigrationInfo *info) 1442 { 1443 MigrationIncomingState *mis = migration_incoming_get_current(); 1444 1445 if (mis->socket_address_list) { 1446 info->has_socket_address = true; 1447 info->socket_address = 1448 QAPI_CLONE(SocketAddressList, mis->socket_address_list); 1449 } 1450 1451 switch (mis->state) { 1452 case MIGRATION_STATUS_NONE: 1453 return; 1454 case MIGRATION_STATUS_SETUP: 1455 case MIGRATION_STATUS_CANCELLING: 1456 case MIGRATION_STATUS_CANCELLED: 1457 case MIGRATION_STATUS_ACTIVE: 1458 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1459 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1460 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1461 case MIGRATION_STATUS_FAILED: 1462 case MIGRATION_STATUS_COLO: 1463 info->has_status = true; 1464 break; 1465 case MIGRATION_STATUS_COMPLETED: 1466 info->has_status = true; 1467 fill_destination_postcopy_migration_info(info); 1468 break; 1469 } 1470 info->status = mis->state; 1471 } 1472 1473 MigrationInfo *qmp_query_migrate(Error **errp) 1474 { 1475 MigrationInfo *info = g_malloc0(sizeof(*info)); 1476 1477 fill_destination_migration_info(info); 1478 fill_source_migration_info(info); 1479 1480 return info; 1481 } 1482 1483 void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, 1484 Error **errp) 1485 { 1486 MigrationState *s = migrate_get_current(); 1487 MigrationCapabilityStatusList *cap; 1488 bool cap_list[MIGRATION_CAPABILITY__MAX]; 1489 1490 if (migration_is_running(s->state)) { 1491 error_setg(errp, QERR_MIGRATION_ACTIVE); 1492 return; 1493 } 1494 1495 memcpy(cap_list, s->enabled_capabilities, sizeof(cap_list)); 1496 if (!migrate_caps_check(cap_list, params, errp)) { 1497 return; 1498 } 1499 1500 for (cap = params; cap; cap = cap->next) { 1501 s->enabled_capabilities[cap->value->capability] = cap->value->state; 1502 } 1503 } 1504 1505 /* 1506 * Check whether the parameters are valid. Error will be put into errp 1507 * (if provided). Return true if valid, otherwise false. 1508 */ 1509 static bool migrate_params_check(MigrationParameters *params, Error **errp) 1510 { 1511 if (params->has_compress_level && 1512 (params->compress_level > 9)) { 1513 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", 1514 "a value between 0 and 9"); 1515 return false; 1516 } 1517 1518 if (params->has_compress_threads && (params->compress_threads < 1)) { 1519 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1520 "compress_threads", 1521 "a value between 1 and 255"); 1522 return false; 1523 } 1524 1525 if (params->has_decompress_threads && (params->decompress_threads < 1)) { 1526 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1527 "decompress_threads", 1528 "a value between 1 and 255"); 1529 return false; 1530 } 1531 1532 if (params->has_throttle_trigger_threshold && 1533 (params->throttle_trigger_threshold < 1 || 1534 params->throttle_trigger_threshold > 100)) { 1535 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1536 "throttle_trigger_threshold", 1537 "an integer in the range of 1 to 100"); 1538 return false; 1539 } 1540 1541 if (params->has_cpu_throttle_initial && 1542 (params->cpu_throttle_initial < 1 || 1543 params->cpu_throttle_initial > 99)) { 1544 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1545 "cpu_throttle_initial", 1546 "an integer in the range of 1 to 99"); 1547 return false; 1548 } 1549 1550 if (params->has_cpu_throttle_increment && 1551 (params->cpu_throttle_increment < 1 || 1552 params->cpu_throttle_increment > 99)) { 1553 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1554 "cpu_throttle_increment", 1555 "an integer in the range of 1 to 99"); 1556 return false; 1557 } 1558 1559 if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) { 1560 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1561 "max_bandwidth", 1562 "an integer in the range of 0 to "stringify(SIZE_MAX) 1563 " bytes/second"); 1564 return false; 1565 } 1566 1567 if (params->has_downtime_limit && 1568 (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { 1569 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1570 "downtime_limit", 1571 "an integer in the range of 0 to " 1572 stringify(MAX_MIGRATE_DOWNTIME)" ms"); 1573 return false; 1574 } 1575 1576 /* x_checkpoint_delay is now always positive */ 1577 1578 if (params->has_multifd_channels && (params->multifd_channels < 1)) { 1579 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1580 "multifd_channels", 1581 "a value between 1 and 255"); 1582 return false; 1583 } 1584 1585 if (params->has_multifd_zlib_level && 1586 (params->multifd_zlib_level > 9)) { 1587 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level", 1588 "a value between 0 and 9"); 1589 return false; 1590 } 1591 1592 if (params->has_multifd_zstd_level && 1593 (params->multifd_zstd_level > 20)) { 1594 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level", 1595 "a value between 0 and 20"); 1596 return false; 1597 } 1598 1599 if (params->has_xbzrle_cache_size && 1600 (params->xbzrle_cache_size < qemu_target_page_size() || 1601 !is_power_of_2(params->xbzrle_cache_size))) { 1602 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1603 "xbzrle_cache_size", 1604 "a power of two no less than the target page size"); 1605 return false; 1606 } 1607 1608 if (params->has_max_cpu_throttle && 1609 (params->max_cpu_throttle < params->cpu_throttle_initial || 1610 params->max_cpu_throttle > 99)) { 1611 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1612 "max_cpu_throttle", 1613 "an integer in the range of cpu_throttle_initial to 99"); 1614 return false; 1615 } 1616 1617 if (params->has_announce_initial && 1618 params->announce_initial > 100000) { 1619 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1620 "announce_initial", 1621 "a value between 0 and 100000"); 1622 return false; 1623 } 1624 if (params->has_announce_max && 1625 params->announce_max > 100000) { 1626 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1627 "announce_max", 1628 "a value between 0 and 100000"); 1629 return false; 1630 } 1631 if (params->has_announce_rounds && 1632 params->announce_rounds > 1000) { 1633 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1634 "announce_rounds", 1635 "a value between 0 and 1000"); 1636 return false; 1637 } 1638 if (params->has_announce_step && 1639 (params->announce_step < 1 || 1640 params->announce_step > 10000)) { 1641 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1642 "announce_step", 1643 "a value between 0 and 10000"); 1644 return false; 1645 } 1646 1647 if (params->has_block_bitmap_mapping && 1648 !check_dirty_bitmap_mig_alias_map(params->block_bitmap_mapping, errp)) { 1649 error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); 1650 return false; 1651 } 1652 1653 #ifdef CONFIG_LINUX 1654 if (migrate_use_zero_copy_send() && 1655 ((params->has_multifd_compression && params->multifd_compression) || 1656 (params->tls_creds && *params->tls_creds))) { 1657 error_setg(errp, 1658 "Zero copy only available for non-compressed non-TLS multifd migration"); 1659 return false; 1660 } 1661 #endif 1662 1663 return true; 1664 } 1665 1666 static void migrate_params_test_apply(MigrateSetParameters *params, 1667 MigrationParameters *dest) 1668 { 1669 *dest = migrate_get_current()->parameters; 1670 1671 /* TODO use QAPI_CLONE() instead of duplicating it inline */ 1672 1673 if (params->has_compress_level) { 1674 dest->compress_level = params->compress_level; 1675 } 1676 1677 if (params->has_compress_threads) { 1678 dest->compress_threads = params->compress_threads; 1679 } 1680 1681 if (params->has_compress_wait_thread) { 1682 dest->compress_wait_thread = params->compress_wait_thread; 1683 } 1684 1685 if (params->has_decompress_threads) { 1686 dest->decompress_threads = params->decompress_threads; 1687 } 1688 1689 if (params->has_throttle_trigger_threshold) { 1690 dest->throttle_trigger_threshold = params->throttle_trigger_threshold; 1691 } 1692 1693 if (params->has_cpu_throttle_initial) { 1694 dest->cpu_throttle_initial = params->cpu_throttle_initial; 1695 } 1696 1697 if (params->has_cpu_throttle_increment) { 1698 dest->cpu_throttle_increment = params->cpu_throttle_increment; 1699 } 1700 1701 if (params->has_cpu_throttle_tailslow) { 1702 dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow; 1703 } 1704 1705 if (params->tls_creds) { 1706 assert(params->tls_creds->type == QTYPE_QSTRING); 1707 dest->tls_creds = params->tls_creds->u.s; 1708 } 1709 1710 if (params->tls_hostname) { 1711 assert(params->tls_hostname->type == QTYPE_QSTRING); 1712 dest->tls_hostname = params->tls_hostname->u.s; 1713 } 1714 1715 if (params->has_max_bandwidth) { 1716 dest->max_bandwidth = params->max_bandwidth; 1717 } 1718 1719 if (params->has_downtime_limit) { 1720 dest->downtime_limit = params->downtime_limit; 1721 } 1722 1723 if (params->has_x_checkpoint_delay) { 1724 dest->x_checkpoint_delay = params->x_checkpoint_delay; 1725 } 1726 1727 if (params->has_block_incremental) { 1728 dest->block_incremental = params->block_incremental; 1729 } 1730 if (params->has_multifd_channels) { 1731 dest->multifd_channels = params->multifd_channels; 1732 } 1733 if (params->has_multifd_compression) { 1734 dest->multifd_compression = params->multifd_compression; 1735 } 1736 if (params->has_xbzrle_cache_size) { 1737 dest->xbzrle_cache_size = params->xbzrle_cache_size; 1738 } 1739 if (params->has_max_postcopy_bandwidth) { 1740 dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth; 1741 } 1742 if (params->has_max_cpu_throttle) { 1743 dest->max_cpu_throttle = params->max_cpu_throttle; 1744 } 1745 if (params->has_announce_initial) { 1746 dest->announce_initial = params->announce_initial; 1747 } 1748 if (params->has_announce_max) { 1749 dest->announce_max = params->announce_max; 1750 } 1751 if (params->has_announce_rounds) { 1752 dest->announce_rounds = params->announce_rounds; 1753 } 1754 if (params->has_announce_step) { 1755 dest->announce_step = params->announce_step; 1756 } 1757 1758 if (params->has_block_bitmap_mapping) { 1759 dest->has_block_bitmap_mapping = true; 1760 dest->block_bitmap_mapping = params->block_bitmap_mapping; 1761 } 1762 } 1763 1764 static void migrate_params_apply(MigrateSetParameters *params, Error **errp) 1765 { 1766 MigrationState *s = migrate_get_current(); 1767 1768 /* TODO use QAPI_CLONE() instead of duplicating it inline */ 1769 1770 if (params->has_compress_level) { 1771 s->parameters.compress_level = params->compress_level; 1772 } 1773 1774 if (params->has_compress_threads) { 1775 s->parameters.compress_threads = params->compress_threads; 1776 } 1777 1778 if (params->has_compress_wait_thread) { 1779 s->parameters.compress_wait_thread = params->compress_wait_thread; 1780 } 1781 1782 if (params->has_decompress_threads) { 1783 s->parameters.decompress_threads = params->decompress_threads; 1784 } 1785 1786 if (params->has_throttle_trigger_threshold) { 1787 s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold; 1788 } 1789 1790 if (params->has_cpu_throttle_initial) { 1791 s->parameters.cpu_throttle_initial = params->cpu_throttle_initial; 1792 } 1793 1794 if (params->has_cpu_throttle_increment) { 1795 s->parameters.cpu_throttle_increment = params->cpu_throttle_increment; 1796 } 1797 1798 if (params->has_cpu_throttle_tailslow) { 1799 s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow; 1800 } 1801 1802 if (params->tls_creds) { 1803 g_free(s->parameters.tls_creds); 1804 assert(params->tls_creds->type == QTYPE_QSTRING); 1805 s->parameters.tls_creds = g_strdup(params->tls_creds->u.s); 1806 } 1807 1808 if (params->tls_hostname) { 1809 g_free(s->parameters.tls_hostname); 1810 assert(params->tls_hostname->type == QTYPE_QSTRING); 1811 s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s); 1812 } 1813 1814 if (params->tls_authz) { 1815 g_free(s->parameters.tls_authz); 1816 assert(params->tls_authz->type == QTYPE_QSTRING); 1817 s->parameters.tls_authz = g_strdup(params->tls_authz->u.s); 1818 } 1819 1820 if (params->has_max_bandwidth) { 1821 s->parameters.max_bandwidth = params->max_bandwidth; 1822 if (s->to_dst_file && !migration_in_postcopy()) { 1823 qemu_file_set_rate_limit(s->to_dst_file, 1824 s->parameters.max_bandwidth / XFER_LIMIT_RATIO); 1825 } 1826 } 1827 1828 if (params->has_downtime_limit) { 1829 s->parameters.downtime_limit = params->downtime_limit; 1830 } 1831 1832 if (params->has_x_checkpoint_delay) { 1833 s->parameters.x_checkpoint_delay = params->x_checkpoint_delay; 1834 if (migration_in_colo_state()) { 1835 colo_checkpoint_notify(s); 1836 } 1837 } 1838 1839 if (params->has_block_incremental) { 1840 s->parameters.block_incremental = params->block_incremental; 1841 } 1842 if (params->has_multifd_channels) { 1843 s->parameters.multifd_channels = params->multifd_channels; 1844 } 1845 if (params->has_multifd_compression) { 1846 s->parameters.multifd_compression = params->multifd_compression; 1847 } 1848 if (params->has_xbzrle_cache_size) { 1849 s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; 1850 xbzrle_cache_resize(params->xbzrle_cache_size, errp); 1851 } 1852 if (params->has_max_postcopy_bandwidth) { 1853 s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth; 1854 if (s->to_dst_file && migration_in_postcopy()) { 1855 qemu_file_set_rate_limit(s->to_dst_file, 1856 s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO); 1857 } 1858 } 1859 if (params->has_max_cpu_throttle) { 1860 s->parameters.max_cpu_throttle = params->max_cpu_throttle; 1861 } 1862 if (params->has_announce_initial) { 1863 s->parameters.announce_initial = params->announce_initial; 1864 } 1865 if (params->has_announce_max) { 1866 s->parameters.announce_max = params->announce_max; 1867 } 1868 if (params->has_announce_rounds) { 1869 s->parameters.announce_rounds = params->announce_rounds; 1870 } 1871 if (params->has_announce_step) { 1872 s->parameters.announce_step = params->announce_step; 1873 } 1874 1875 if (params->has_block_bitmap_mapping) { 1876 qapi_free_BitmapMigrationNodeAliasList( 1877 s->parameters.block_bitmap_mapping); 1878 1879 s->parameters.has_block_bitmap_mapping = true; 1880 s->parameters.block_bitmap_mapping = 1881 QAPI_CLONE(BitmapMigrationNodeAliasList, 1882 params->block_bitmap_mapping); 1883 } 1884 } 1885 1886 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) 1887 { 1888 MigrationParameters tmp; 1889 1890 /* TODO Rewrite "" to null instead */ 1891 if (params->tls_creds 1892 && params->tls_creds->type == QTYPE_QNULL) { 1893 qobject_unref(params->tls_creds->u.n); 1894 params->tls_creds->type = QTYPE_QSTRING; 1895 params->tls_creds->u.s = strdup(""); 1896 } 1897 /* TODO Rewrite "" to null instead */ 1898 if (params->tls_hostname 1899 && params->tls_hostname->type == QTYPE_QNULL) { 1900 qobject_unref(params->tls_hostname->u.n); 1901 params->tls_hostname->type = QTYPE_QSTRING; 1902 params->tls_hostname->u.s = strdup(""); 1903 } 1904 1905 migrate_params_test_apply(params, &tmp); 1906 1907 if (!migrate_params_check(&tmp, errp)) { 1908 /* Invalid parameter */ 1909 return; 1910 } 1911 1912 migrate_params_apply(params, errp); 1913 } 1914 1915 1916 void qmp_migrate_start_postcopy(Error **errp) 1917 { 1918 MigrationState *s = migrate_get_current(); 1919 1920 if (!migrate_postcopy()) { 1921 error_setg(errp, "Enable postcopy with migrate_set_capability before" 1922 " the start of migration"); 1923 return; 1924 } 1925 1926 if (s->state == MIGRATION_STATUS_NONE) { 1927 error_setg(errp, "Postcopy must be started after migration has been" 1928 " started"); 1929 return; 1930 } 1931 /* 1932 * we don't error if migration has finished since that would be racy 1933 * with issuing this command. 1934 */ 1935 qatomic_set(&s->start_postcopy, true); 1936 } 1937 1938 /* shared migration helpers */ 1939 1940 void migrate_set_state(int *state, int old_state, int new_state) 1941 { 1942 assert(new_state < MIGRATION_STATUS__MAX); 1943 if (qatomic_cmpxchg(state, old_state, new_state) == old_state) { 1944 trace_migrate_set_state(MigrationStatus_str(new_state)); 1945 migrate_generate_event(new_state); 1946 } 1947 } 1948 1949 static MigrationCapabilityStatus *migrate_cap_add(MigrationCapability index, 1950 bool state) 1951 { 1952 MigrationCapabilityStatus *cap; 1953 1954 cap = g_new0(MigrationCapabilityStatus, 1); 1955 cap->capability = index; 1956 cap->state = state; 1957 1958 return cap; 1959 } 1960 1961 void migrate_set_block_enabled(bool value, Error **errp) 1962 { 1963 MigrationCapabilityStatusList *cap = NULL; 1964 1965 QAPI_LIST_PREPEND(cap, migrate_cap_add(MIGRATION_CAPABILITY_BLOCK, value)); 1966 qmp_migrate_set_capabilities(cap, errp); 1967 qapi_free_MigrationCapabilityStatusList(cap); 1968 } 1969 1970 static void migrate_set_block_incremental(MigrationState *s, bool value) 1971 { 1972 s->parameters.block_incremental = value; 1973 } 1974 1975 static void block_cleanup_parameters(MigrationState *s) 1976 { 1977 if (s->must_remove_block_options) { 1978 /* setting to false can never fail */ 1979 migrate_set_block_enabled(false, &error_abort); 1980 migrate_set_block_incremental(s, false); 1981 s->must_remove_block_options = false; 1982 } 1983 } 1984 1985 static void migrate_fd_cleanup(MigrationState *s) 1986 { 1987 qemu_bh_delete(s->cleanup_bh); 1988 s->cleanup_bh = NULL; 1989 1990 g_free(s->hostname); 1991 s->hostname = NULL; 1992 json_writer_free(s->vmdesc); 1993 s->vmdesc = NULL; 1994 1995 qemu_savevm_state_cleanup(); 1996 1997 if (s->to_dst_file) { 1998 QEMUFile *tmp; 1999 2000 trace_migrate_fd_cleanup(); 2001 qemu_mutex_unlock_iothread(); 2002 if (s->migration_thread_running) { 2003 qemu_thread_join(&s->thread); 2004 s->migration_thread_running = false; 2005 } 2006 qemu_mutex_lock_iothread(); 2007 2008 multifd_save_cleanup(); 2009 qemu_mutex_lock(&s->qemu_file_lock); 2010 tmp = s->to_dst_file; 2011 s->to_dst_file = NULL; 2012 qemu_mutex_unlock(&s->qemu_file_lock); 2013 /* 2014 * Close the file handle without the lock to make sure the 2015 * critical section won't block for long. 2016 */ 2017 migration_ioc_unregister_yank_from_file(tmp); 2018 qemu_fclose(tmp); 2019 } 2020 2021 if (s->postcopy_qemufile_src) { 2022 migration_ioc_unregister_yank_from_file(s->postcopy_qemufile_src); 2023 qemu_fclose(s->postcopy_qemufile_src); 2024 s->postcopy_qemufile_src = NULL; 2025 } 2026 2027 assert(!migration_is_active(s)); 2028 2029 if (s->state == MIGRATION_STATUS_CANCELLING) { 2030 migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING, 2031 MIGRATION_STATUS_CANCELLED); 2032 } 2033 2034 if (s->error) { 2035 /* It is used on info migrate. We can't free it */ 2036 error_report_err(error_copy(s->error)); 2037 } 2038 notifier_list_notify(&migration_state_notifiers, s); 2039 block_cleanup_parameters(s); 2040 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 2041 } 2042 2043 static void migrate_fd_cleanup_schedule(MigrationState *s) 2044 { 2045 /* 2046 * Ref the state for bh, because it may be called when 2047 * there're already no other refs 2048 */ 2049 object_ref(OBJECT(s)); 2050 qemu_bh_schedule(s->cleanup_bh); 2051 } 2052 2053 static void migrate_fd_cleanup_bh(void *opaque) 2054 { 2055 MigrationState *s = opaque; 2056 migrate_fd_cleanup(s); 2057 object_unref(OBJECT(s)); 2058 } 2059 2060 void migrate_set_error(MigrationState *s, const Error *error) 2061 { 2062 QEMU_LOCK_GUARD(&s->error_mutex); 2063 if (!s->error) { 2064 s->error = error_copy(error); 2065 } 2066 } 2067 2068 static void migrate_error_free(MigrationState *s) 2069 { 2070 QEMU_LOCK_GUARD(&s->error_mutex); 2071 if (s->error) { 2072 error_free(s->error); 2073 s->error = NULL; 2074 } 2075 } 2076 2077 void migrate_fd_error(MigrationState *s, const Error *error) 2078 { 2079 trace_migrate_fd_error(error_get_pretty(error)); 2080 assert(s->to_dst_file == NULL); 2081 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 2082 MIGRATION_STATUS_FAILED); 2083 migrate_set_error(s, error); 2084 } 2085 2086 static void migrate_fd_cancel(MigrationState *s) 2087 { 2088 int old_state ; 2089 QEMUFile *f = migrate_get_current()->to_dst_file; 2090 trace_migrate_fd_cancel(); 2091 2092 WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) { 2093 if (s->rp_state.from_dst_file) { 2094 /* shutdown the rp socket, so causing the rp thread to shutdown */ 2095 qemu_file_shutdown(s->rp_state.from_dst_file); 2096 } 2097 } 2098 2099 do { 2100 old_state = s->state; 2101 if (!migration_is_running(old_state)) { 2102 break; 2103 } 2104 /* If the migration is paused, kick it out of the pause */ 2105 if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) { 2106 qemu_sem_post(&s->pause_sem); 2107 } 2108 migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING); 2109 } while (s->state != MIGRATION_STATUS_CANCELLING); 2110 2111 /* 2112 * If we're unlucky the migration code might be stuck somewhere in a 2113 * send/write while the network has failed and is waiting to timeout; 2114 * if we've got shutdown(2) available then we can force it to quit. 2115 * The outgoing qemu file gets closed in migrate_fd_cleanup that is 2116 * called in a bh, so there is no race against this cancel. 2117 */ 2118 if (s->state == MIGRATION_STATUS_CANCELLING && f) { 2119 qemu_file_shutdown(f); 2120 } 2121 if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) { 2122 Error *local_err = NULL; 2123 2124 bdrv_activate_all(&local_err); 2125 if (local_err) { 2126 error_report_err(local_err); 2127 } else { 2128 s->block_inactive = false; 2129 } 2130 } 2131 } 2132 2133 void add_migration_state_change_notifier(Notifier *notify) 2134 { 2135 notifier_list_add(&migration_state_notifiers, notify); 2136 } 2137 2138 void remove_migration_state_change_notifier(Notifier *notify) 2139 { 2140 notifier_remove(notify); 2141 } 2142 2143 bool migration_in_setup(MigrationState *s) 2144 { 2145 return s->state == MIGRATION_STATUS_SETUP; 2146 } 2147 2148 bool migration_has_finished(MigrationState *s) 2149 { 2150 return s->state == MIGRATION_STATUS_COMPLETED; 2151 } 2152 2153 bool migration_has_failed(MigrationState *s) 2154 { 2155 return (s->state == MIGRATION_STATUS_CANCELLED || 2156 s->state == MIGRATION_STATUS_FAILED); 2157 } 2158 2159 bool migration_in_postcopy(void) 2160 { 2161 MigrationState *s = migrate_get_current(); 2162 2163 switch (s->state) { 2164 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 2165 case MIGRATION_STATUS_POSTCOPY_PAUSED: 2166 case MIGRATION_STATUS_POSTCOPY_RECOVER: 2167 return true; 2168 default: 2169 return false; 2170 } 2171 } 2172 2173 bool migration_in_postcopy_after_devices(MigrationState *s) 2174 { 2175 return migration_in_postcopy() && s->postcopy_after_devices; 2176 } 2177 2178 bool migration_in_incoming_postcopy(void) 2179 { 2180 PostcopyState ps = postcopy_state_get(); 2181 2182 return ps >= POSTCOPY_INCOMING_DISCARD && ps < POSTCOPY_INCOMING_END; 2183 } 2184 2185 bool migration_incoming_postcopy_advised(void) 2186 { 2187 PostcopyState ps = postcopy_state_get(); 2188 2189 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END; 2190 } 2191 2192 bool migration_in_bg_snapshot(void) 2193 { 2194 MigrationState *s = migrate_get_current(); 2195 2196 return migrate_background_snapshot() && 2197 migration_is_setup_or_active(s->state); 2198 } 2199 2200 bool migration_is_idle(void) 2201 { 2202 MigrationState *s = current_migration; 2203 2204 if (!s) { 2205 return true; 2206 } 2207 2208 switch (s->state) { 2209 case MIGRATION_STATUS_NONE: 2210 case MIGRATION_STATUS_CANCELLED: 2211 case MIGRATION_STATUS_COMPLETED: 2212 case MIGRATION_STATUS_FAILED: 2213 return true; 2214 case MIGRATION_STATUS_SETUP: 2215 case MIGRATION_STATUS_CANCELLING: 2216 case MIGRATION_STATUS_ACTIVE: 2217 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 2218 case MIGRATION_STATUS_COLO: 2219 case MIGRATION_STATUS_PRE_SWITCHOVER: 2220 case MIGRATION_STATUS_DEVICE: 2221 case MIGRATION_STATUS_WAIT_UNPLUG: 2222 return false; 2223 case MIGRATION_STATUS__MAX: 2224 g_assert_not_reached(); 2225 } 2226 2227 return false; 2228 } 2229 2230 bool migration_is_active(MigrationState *s) 2231 { 2232 return (s->state == MIGRATION_STATUS_ACTIVE || 2233 s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); 2234 } 2235 2236 void migrate_init(MigrationState *s) 2237 { 2238 /* 2239 * Reinitialise all migration state, except 2240 * parameters/capabilities that the user set, and 2241 * locks. 2242 */ 2243 s->cleanup_bh = 0; 2244 s->vm_start_bh = 0; 2245 s->to_dst_file = NULL; 2246 s->state = MIGRATION_STATUS_NONE; 2247 s->rp_state.from_dst_file = NULL; 2248 s->rp_state.error = false; 2249 s->mbps = 0.0; 2250 s->pages_per_second = 0.0; 2251 s->downtime = 0; 2252 s->expected_downtime = 0; 2253 s->setup_time = 0; 2254 s->start_postcopy = false; 2255 s->postcopy_after_devices = false; 2256 s->migration_thread_running = false; 2257 error_free(s->error); 2258 s->error = NULL; 2259 s->hostname = NULL; 2260 2261 migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); 2262 2263 s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 2264 s->total_time = 0; 2265 s->vm_was_running = false; 2266 s->iteration_initial_bytes = 0; 2267 s->threshold_size = 0; 2268 } 2269 2270 int migrate_add_blocker_internal(Error *reason, Error **errp) 2271 { 2272 /* Snapshots are similar to migrations, so check RUN_STATE_SAVE_VM too. */ 2273 if (runstate_check(RUN_STATE_SAVE_VM) || !migration_is_idle()) { 2274 error_propagate_prepend(errp, error_copy(reason), 2275 "disallowing migration blocker " 2276 "(migration/snapshot in progress) for: "); 2277 return -EBUSY; 2278 } 2279 2280 migration_blockers = g_slist_prepend(migration_blockers, reason); 2281 return 0; 2282 } 2283 2284 int migrate_add_blocker(Error *reason, Error **errp) 2285 { 2286 if (only_migratable) { 2287 error_propagate_prepend(errp, error_copy(reason), 2288 "disallowing migration blocker " 2289 "(--only-migratable) for: "); 2290 return -EACCES; 2291 } 2292 2293 return migrate_add_blocker_internal(reason, errp); 2294 } 2295 2296 void migrate_del_blocker(Error *reason) 2297 { 2298 migration_blockers = g_slist_remove(migration_blockers, reason); 2299 } 2300 2301 void qmp_migrate_incoming(const char *uri, Error **errp) 2302 { 2303 Error *local_err = NULL; 2304 static bool once = true; 2305 2306 if (!once) { 2307 error_setg(errp, "The incoming migration has already been started"); 2308 return; 2309 } 2310 if (!runstate_check(RUN_STATE_INMIGRATE)) { 2311 error_setg(errp, "'-incoming' was not specified on the command line"); 2312 return; 2313 } 2314 2315 if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) { 2316 return; 2317 } 2318 2319 qemu_start_incoming_migration(uri, &local_err); 2320 2321 if (local_err) { 2322 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 2323 error_propagate(errp, local_err); 2324 return; 2325 } 2326 2327 once = false; 2328 } 2329 2330 void qmp_migrate_recover(const char *uri, Error **errp) 2331 { 2332 MigrationIncomingState *mis = migration_incoming_get_current(); 2333 2334 /* 2335 * Don't even bother to use ERRP_GUARD() as it _must_ always be set by 2336 * callers (no one should ignore a recover failure); if there is, it's a 2337 * programming error. 2338 */ 2339 assert(errp); 2340 2341 if (mis->state != MIGRATION_STATUS_POSTCOPY_PAUSED) { 2342 error_setg(errp, "Migrate recover can only be run " 2343 "when postcopy is paused."); 2344 return; 2345 } 2346 2347 /* If there's an existing transport, release it */ 2348 migration_incoming_transport_cleanup(mis); 2349 2350 /* 2351 * Note that this call will never start a real migration; it will 2352 * only re-setup the migration stream and poke existing migration 2353 * to continue using that newly established channel. 2354 */ 2355 qemu_start_incoming_migration(uri, errp); 2356 } 2357 2358 void qmp_migrate_pause(Error **errp) 2359 { 2360 MigrationState *ms = migrate_get_current(); 2361 MigrationIncomingState *mis = migration_incoming_get_current(); 2362 int ret; 2363 2364 if (ms->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 2365 /* Source side, during postcopy */ 2366 qemu_mutex_lock(&ms->qemu_file_lock); 2367 ret = qemu_file_shutdown(ms->to_dst_file); 2368 qemu_mutex_unlock(&ms->qemu_file_lock); 2369 if (ret) { 2370 error_setg(errp, "Failed to pause source migration"); 2371 } 2372 return; 2373 } 2374 2375 if (mis->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 2376 ret = qemu_file_shutdown(mis->from_src_file); 2377 if (ret) { 2378 error_setg(errp, "Failed to pause destination migration"); 2379 } 2380 return; 2381 } 2382 2383 error_setg(errp, "migrate-pause is currently only supported " 2384 "during postcopy-active state"); 2385 } 2386 2387 bool migration_is_blocked(Error **errp) 2388 { 2389 if (qemu_savevm_state_blocked(errp)) { 2390 return true; 2391 } 2392 2393 if (migration_blockers) { 2394 error_propagate(errp, error_copy(migration_blockers->data)); 2395 return true; 2396 } 2397 2398 return false; 2399 } 2400 2401 /* Returns true if continue to migrate, or false if error detected */ 2402 static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, 2403 bool resume, Error **errp) 2404 { 2405 Error *local_err = NULL; 2406 2407 if (resume) { 2408 if (s->state != MIGRATION_STATUS_POSTCOPY_PAUSED) { 2409 error_setg(errp, "Cannot resume if there is no " 2410 "paused migration"); 2411 return false; 2412 } 2413 2414 /* 2415 * Postcopy recovery won't work well with release-ram 2416 * capability since release-ram will drop the page buffer as 2417 * long as the page is put into the send buffer. So if there 2418 * is a network failure happened, any page buffers that have 2419 * not yet reached the destination VM but have already been 2420 * sent from the source VM will be lost forever. Let's refuse 2421 * the client from resuming such a postcopy migration. 2422 * Luckily release-ram was designed to only be used when src 2423 * and destination VMs are on the same host, so it should be 2424 * fine. 2425 */ 2426 if (migrate_release_ram()) { 2427 error_setg(errp, "Postcopy recovery cannot work " 2428 "when release-ram capability is set"); 2429 return false; 2430 } 2431 2432 /* This is a resume, skip init status */ 2433 return true; 2434 } 2435 2436 if (migration_is_running(s->state)) { 2437 error_setg(errp, QERR_MIGRATION_ACTIVE); 2438 return false; 2439 } 2440 2441 if (runstate_check(RUN_STATE_INMIGRATE)) { 2442 error_setg(errp, "Guest is waiting for an incoming migration"); 2443 return false; 2444 } 2445 2446 if (runstate_check(RUN_STATE_POSTMIGRATE)) { 2447 error_setg(errp, "Can't migrate the vm that was paused due to " 2448 "previous migration"); 2449 return false; 2450 } 2451 2452 if (migration_is_blocked(errp)) { 2453 return false; 2454 } 2455 2456 if (blk || blk_inc) { 2457 if (migrate_colo_enabled()) { 2458 error_setg(errp, "No disk migration is required in COLO mode"); 2459 return false; 2460 } 2461 if (migrate_use_block() || migrate_use_block_incremental()) { 2462 error_setg(errp, "Command options are incompatible with " 2463 "current migration capabilities"); 2464 return false; 2465 } 2466 migrate_set_block_enabled(true, &local_err); 2467 if (local_err) { 2468 error_propagate(errp, local_err); 2469 return false; 2470 } 2471 s->must_remove_block_options = true; 2472 } 2473 2474 if (blk_inc) { 2475 migrate_set_block_incremental(s, true); 2476 } 2477 2478 migrate_init(s); 2479 /* 2480 * set ram_counters compression_counters memory to zero for a 2481 * new migration 2482 */ 2483 memset(&ram_counters, 0, sizeof(ram_counters)); 2484 memset(&compression_counters, 0, sizeof(compression_counters)); 2485 2486 return true; 2487 } 2488 2489 void qmp_migrate(const char *uri, bool has_blk, bool blk, 2490 bool has_inc, bool inc, bool has_detach, bool detach, 2491 bool has_resume, bool resume, Error **errp) 2492 { 2493 Error *local_err = NULL; 2494 MigrationState *s = migrate_get_current(); 2495 const char *p = NULL; 2496 2497 /* URI is not suitable for migration? */ 2498 if (!migration_channels_and_uri_compatible(uri, errp)) { 2499 return; 2500 } 2501 2502 if (!migrate_prepare(s, has_blk && blk, has_inc && inc, 2503 has_resume && resume, errp)) { 2504 /* Error detected, put into errp */ 2505 return; 2506 } 2507 2508 if (!(has_resume && resume)) { 2509 if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) { 2510 return; 2511 } 2512 } 2513 2514 if (strstart(uri, "tcp:", &p) || 2515 strstart(uri, "unix:", NULL) || 2516 strstart(uri, "vsock:", NULL)) { 2517 socket_start_outgoing_migration(s, p ? p : uri, &local_err); 2518 #ifdef CONFIG_RDMA 2519 } else if (strstart(uri, "rdma:", &p)) { 2520 rdma_start_outgoing_migration(s, p, &local_err); 2521 #endif 2522 } else if (strstart(uri, "exec:", &p)) { 2523 exec_start_outgoing_migration(s, p, &local_err); 2524 } else if (strstart(uri, "fd:", &p)) { 2525 fd_start_outgoing_migration(s, p, &local_err); 2526 } else { 2527 if (!(has_resume && resume)) { 2528 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 2529 } 2530 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri", 2531 "a valid migration protocol"); 2532 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 2533 MIGRATION_STATUS_FAILED); 2534 block_cleanup_parameters(s); 2535 return; 2536 } 2537 2538 if (local_err) { 2539 if (!(has_resume && resume)) { 2540 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 2541 } 2542 migrate_fd_error(s, local_err); 2543 error_propagate(errp, local_err); 2544 return; 2545 } 2546 } 2547 2548 void qmp_migrate_cancel(Error **errp) 2549 { 2550 migration_cancel(NULL); 2551 } 2552 2553 void qmp_migrate_continue(MigrationStatus state, Error **errp) 2554 { 2555 MigrationState *s = migrate_get_current(); 2556 if (s->state != state) { 2557 error_setg(errp, "Migration not in expected state: %s", 2558 MigrationStatus_str(s->state)); 2559 return; 2560 } 2561 qemu_sem_post(&s->pause_sem); 2562 } 2563 2564 bool migrate_release_ram(void) 2565 { 2566 MigrationState *s; 2567 2568 s = migrate_get_current(); 2569 2570 return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; 2571 } 2572 2573 bool migrate_postcopy_ram(void) 2574 { 2575 MigrationState *s; 2576 2577 s = migrate_get_current(); 2578 2579 return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; 2580 } 2581 2582 bool migrate_postcopy(void) 2583 { 2584 return migrate_postcopy_ram() || migrate_dirty_bitmaps(); 2585 } 2586 2587 bool migrate_auto_converge(void) 2588 { 2589 MigrationState *s; 2590 2591 s = migrate_get_current(); 2592 2593 return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; 2594 } 2595 2596 bool migrate_zero_blocks(void) 2597 { 2598 MigrationState *s; 2599 2600 s = migrate_get_current(); 2601 2602 return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; 2603 } 2604 2605 bool migrate_postcopy_blocktime(void) 2606 { 2607 MigrationState *s; 2608 2609 s = migrate_get_current(); 2610 2611 return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; 2612 } 2613 2614 bool migrate_use_compression(void) 2615 { 2616 MigrationState *s; 2617 2618 s = migrate_get_current(); 2619 2620 return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS]; 2621 } 2622 2623 int migrate_compress_level(void) 2624 { 2625 MigrationState *s; 2626 2627 s = migrate_get_current(); 2628 2629 return s->parameters.compress_level; 2630 } 2631 2632 int migrate_compress_threads(void) 2633 { 2634 MigrationState *s; 2635 2636 s = migrate_get_current(); 2637 2638 return s->parameters.compress_threads; 2639 } 2640 2641 int migrate_compress_wait_thread(void) 2642 { 2643 MigrationState *s; 2644 2645 s = migrate_get_current(); 2646 2647 return s->parameters.compress_wait_thread; 2648 } 2649 2650 int migrate_decompress_threads(void) 2651 { 2652 MigrationState *s; 2653 2654 s = migrate_get_current(); 2655 2656 return s->parameters.decompress_threads; 2657 } 2658 2659 bool migrate_dirty_bitmaps(void) 2660 { 2661 MigrationState *s; 2662 2663 s = migrate_get_current(); 2664 2665 return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; 2666 } 2667 2668 bool migrate_ignore_shared(void) 2669 { 2670 MigrationState *s; 2671 2672 s = migrate_get_current(); 2673 2674 return s->enabled_capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; 2675 } 2676 2677 bool migrate_validate_uuid(void) 2678 { 2679 MigrationState *s; 2680 2681 s = migrate_get_current(); 2682 2683 return s->enabled_capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; 2684 } 2685 2686 bool migrate_use_events(void) 2687 { 2688 MigrationState *s; 2689 2690 s = migrate_get_current(); 2691 2692 return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS]; 2693 } 2694 2695 bool migrate_use_multifd(void) 2696 { 2697 MigrationState *s; 2698 2699 s = migrate_get_current(); 2700 2701 return s->enabled_capabilities[MIGRATION_CAPABILITY_MULTIFD]; 2702 } 2703 2704 bool migrate_pause_before_switchover(void) 2705 { 2706 MigrationState *s; 2707 2708 s = migrate_get_current(); 2709 2710 return s->enabled_capabilities[ 2711 MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; 2712 } 2713 2714 int migrate_multifd_channels(void) 2715 { 2716 MigrationState *s; 2717 2718 s = migrate_get_current(); 2719 2720 return s->parameters.multifd_channels; 2721 } 2722 2723 MultiFDCompression migrate_multifd_compression(void) 2724 { 2725 MigrationState *s; 2726 2727 s = migrate_get_current(); 2728 2729 assert(s->parameters.multifd_compression < MULTIFD_COMPRESSION__MAX); 2730 return s->parameters.multifd_compression; 2731 } 2732 2733 int migrate_multifd_zlib_level(void) 2734 { 2735 MigrationState *s; 2736 2737 s = migrate_get_current(); 2738 2739 return s->parameters.multifd_zlib_level; 2740 } 2741 2742 int migrate_multifd_zstd_level(void) 2743 { 2744 MigrationState *s; 2745 2746 s = migrate_get_current(); 2747 2748 return s->parameters.multifd_zstd_level; 2749 } 2750 2751 #ifdef CONFIG_LINUX 2752 bool migrate_use_zero_copy_send(void) 2753 { 2754 MigrationState *s; 2755 2756 s = migrate_get_current(); 2757 2758 return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; 2759 } 2760 #endif 2761 2762 int migrate_use_tls(void) 2763 { 2764 MigrationState *s; 2765 2766 s = migrate_get_current(); 2767 2768 return s->parameters.tls_creds && *s->parameters.tls_creds; 2769 } 2770 2771 int migrate_use_xbzrle(void) 2772 { 2773 MigrationState *s; 2774 2775 s = migrate_get_current(); 2776 2777 return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE]; 2778 } 2779 2780 uint64_t migrate_xbzrle_cache_size(void) 2781 { 2782 MigrationState *s; 2783 2784 s = migrate_get_current(); 2785 2786 return s->parameters.xbzrle_cache_size; 2787 } 2788 2789 static int64_t migrate_max_postcopy_bandwidth(void) 2790 { 2791 MigrationState *s; 2792 2793 s = migrate_get_current(); 2794 2795 return s->parameters.max_postcopy_bandwidth; 2796 } 2797 2798 bool migrate_use_block(void) 2799 { 2800 MigrationState *s; 2801 2802 s = migrate_get_current(); 2803 2804 return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK]; 2805 } 2806 2807 bool migrate_use_return_path(void) 2808 { 2809 MigrationState *s; 2810 2811 s = migrate_get_current(); 2812 2813 return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; 2814 } 2815 2816 bool migrate_use_block_incremental(void) 2817 { 2818 MigrationState *s; 2819 2820 s = migrate_get_current(); 2821 2822 return s->parameters.block_incremental; 2823 } 2824 2825 bool migrate_background_snapshot(void) 2826 { 2827 MigrationState *s; 2828 2829 s = migrate_get_current(); 2830 2831 return s->enabled_capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; 2832 } 2833 2834 bool migrate_postcopy_preempt(void) 2835 { 2836 MigrationState *s; 2837 2838 s = migrate_get_current(); 2839 2840 return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; 2841 } 2842 2843 /* migration thread support */ 2844 /* 2845 * Something bad happened to the RP stream, mark an error 2846 * The caller shall print or trace something to indicate why 2847 */ 2848 static void mark_source_rp_bad(MigrationState *s) 2849 { 2850 s->rp_state.error = true; 2851 } 2852 2853 static struct rp_cmd_args { 2854 ssize_t len; /* -1 = variable */ 2855 const char *name; 2856 } rp_cmd_args[] = { 2857 [MIG_RP_MSG_INVALID] = { .len = -1, .name = "INVALID" }, 2858 [MIG_RP_MSG_SHUT] = { .len = 4, .name = "SHUT" }, 2859 [MIG_RP_MSG_PONG] = { .len = 4, .name = "PONG" }, 2860 [MIG_RP_MSG_REQ_PAGES] = { .len = 12, .name = "REQ_PAGES" }, 2861 [MIG_RP_MSG_REQ_PAGES_ID] = { .len = -1, .name = "REQ_PAGES_ID" }, 2862 [MIG_RP_MSG_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" }, 2863 [MIG_RP_MSG_RESUME_ACK] = { .len = 4, .name = "RESUME_ACK" }, 2864 [MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" }, 2865 }; 2866 2867 /* 2868 * Process a request for pages received on the return path, 2869 * We're allowed to send more than requested (e.g. to round to our page size) 2870 * and we don't need to send pages that have already been sent. 2871 */ 2872 static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname, 2873 ram_addr_t start, size_t len) 2874 { 2875 long our_host_ps = qemu_real_host_page_size(); 2876 2877 trace_migrate_handle_rp_req_pages(rbname, start, len); 2878 2879 /* 2880 * Since we currently insist on matching page sizes, just sanity check 2881 * we're being asked for whole host pages. 2882 */ 2883 if (!QEMU_IS_ALIGNED(start, our_host_ps) || 2884 !QEMU_IS_ALIGNED(len, our_host_ps)) { 2885 error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT 2886 " len: %zd", __func__, start, len); 2887 mark_source_rp_bad(ms); 2888 return; 2889 } 2890 2891 if (ram_save_queue_pages(rbname, start, len)) { 2892 mark_source_rp_bad(ms); 2893 } 2894 } 2895 2896 /* Return true to retry, false to quit */ 2897 static bool postcopy_pause_return_path_thread(MigrationState *s) 2898 { 2899 trace_postcopy_pause_return_path(); 2900 2901 qemu_sem_wait(&s->postcopy_pause_rp_sem); 2902 2903 trace_postcopy_pause_return_path_continued(); 2904 2905 return true; 2906 } 2907 2908 static int migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name) 2909 { 2910 RAMBlock *block = qemu_ram_block_by_name(block_name); 2911 2912 if (!block) { 2913 error_report("%s: invalid block name '%s'", __func__, block_name); 2914 return -EINVAL; 2915 } 2916 2917 /* Fetch the received bitmap and refresh the dirty bitmap */ 2918 return ram_dirty_bitmap_reload(s, block); 2919 } 2920 2921 static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value) 2922 { 2923 trace_source_return_path_thread_resume_ack(value); 2924 2925 if (value != MIGRATION_RESUME_ACK_VALUE) { 2926 error_report("%s: illegal resume_ack value %"PRIu32, 2927 __func__, value); 2928 return -1; 2929 } 2930 2931 /* Now both sides are active. */ 2932 migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER, 2933 MIGRATION_STATUS_POSTCOPY_ACTIVE); 2934 2935 /* Notify send thread that time to continue send pages */ 2936 qemu_sem_post(&s->rp_state.rp_sem); 2937 2938 return 0; 2939 } 2940 2941 /* 2942 * Release ms->rp_state.from_dst_file (and postcopy_qemufile_src if 2943 * existed) in a safe way. 2944 */ 2945 static void migration_release_dst_files(MigrationState *ms) 2946 { 2947 QEMUFile *file; 2948 2949 WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) { 2950 /* 2951 * Reset the from_dst_file pointer first before releasing it, as we 2952 * can't block within lock section 2953 */ 2954 file = ms->rp_state.from_dst_file; 2955 ms->rp_state.from_dst_file = NULL; 2956 } 2957 2958 /* 2959 * Do the same to postcopy fast path socket too if there is. No 2960 * locking needed because this qemufile should only be managed by 2961 * return path thread. 2962 */ 2963 if (ms->postcopy_qemufile_src) { 2964 migration_ioc_unregister_yank_from_file(ms->postcopy_qemufile_src); 2965 qemu_file_shutdown(ms->postcopy_qemufile_src); 2966 qemu_fclose(ms->postcopy_qemufile_src); 2967 ms->postcopy_qemufile_src = NULL; 2968 } 2969 2970 qemu_fclose(file); 2971 } 2972 2973 /* 2974 * Handles messages sent on the return path towards the source VM 2975 * 2976 */ 2977 static void *source_return_path_thread(void *opaque) 2978 { 2979 MigrationState *ms = opaque; 2980 QEMUFile *rp = ms->rp_state.from_dst_file; 2981 uint16_t header_len, header_type; 2982 uint8_t buf[512]; 2983 uint32_t tmp32, sibling_error; 2984 ram_addr_t start = 0; /* =0 to silence warning */ 2985 size_t len = 0, expected_len; 2986 int res; 2987 2988 trace_source_return_path_thread_entry(); 2989 rcu_register_thread(); 2990 2991 retry: 2992 while (!ms->rp_state.error && !qemu_file_get_error(rp) && 2993 migration_is_setup_or_active(ms->state)) { 2994 trace_source_return_path_thread_loop_top(); 2995 header_type = qemu_get_be16(rp); 2996 header_len = qemu_get_be16(rp); 2997 2998 if (qemu_file_get_error(rp)) { 2999 mark_source_rp_bad(ms); 3000 goto out; 3001 } 3002 3003 if (header_type >= MIG_RP_MSG_MAX || 3004 header_type == MIG_RP_MSG_INVALID) { 3005 error_report("RP: Received invalid message 0x%04x length 0x%04x", 3006 header_type, header_len); 3007 mark_source_rp_bad(ms); 3008 goto out; 3009 } 3010 3011 if ((rp_cmd_args[header_type].len != -1 && 3012 header_len != rp_cmd_args[header_type].len) || 3013 header_len > sizeof(buf)) { 3014 error_report("RP: Received '%s' message (0x%04x) with" 3015 "incorrect length %d expecting %zu", 3016 rp_cmd_args[header_type].name, header_type, header_len, 3017 (size_t)rp_cmd_args[header_type].len); 3018 mark_source_rp_bad(ms); 3019 goto out; 3020 } 3021 3022 /* We know we've got a valid header by this point */ 3023 res = qemu_get_buffer(rp, buf, header_len); 3024 if (res != header_len) { 3025 error_report("RP: Failed reading data for message 0x%04x" 3026 " read %d expected %d", 3027 header_type, res, header_len); 3028 mark_source_rp_bad(ms); 3029 goto out; 3030 } 3031 3032 /* OK, we have the message and the data */ 3033 switch (header_type) { 3034 case MIG_RP_MSG_SHUT: 3035 sibling_error = ldl_be_p(buf); 3036 trace_source_return_path_thread_shut(sibling_error); 3037 if (sibling_error) { 3038 error_report("RP: Sibling indicated error %d", sibling_error); 3039 mark_source_rp_bad(ms); 3040 } 3041 /* 3042 * We'll let the main thread deal with closing the RP 3043 * we could do a shutdown(2) on it, but we're the only user 3044 * anyway, so there's nothing gained. 3045 */ 3046 goto out; 3047 3048 case MIG_RP_MSG_PONG: 3049 tmp32 = ldl_be_p(buf); 3050 trace_source_return_path_thread_pong(tmp32); 3051 qemu_sem_post(&ms->rp_state.rp_pong_acks); 3052 break; 3053 3054 case MIG_RP_MSG_REQ_PAGES: 3055 start = ldq_be_p(buf); 3056 len = ldl_be_p(buf + 8); 3057 migrate_handle_rp_req_pages(ms, NULL, start, len); 3058 break; 3059 3060 case MIG_RP_MSG_REQ_PAGES_ID: 3061 expected_len = 12 + 1; /* header + termination */ 3062 3063 if (header_len >= expected_len) { 3064 start = ldq_be_p(buf); 3065 len = ldl_be_p(buf + 8); 3066 /* Now we expect an idstr */ 3067 tmp32 = buf[12]; /* Length of the following idstr */ 3068 buf[13 + tmp32] = '\0'; 3069 expected_len += tmp32; 3070 } 3071 if (header_len != expected_len) { 3072 error_report("RP: Req_Page_id with length %d expecting %zd", 3073 header_len, expected_len); 3074 mark_source_rp_bad(ms); 3075 goto out; 3076 } 3077 migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len); 3078 break; 3079 3080 case MIG_RP_MSG_RECV_BITMAP: 3081 if (header_len < 1) { 3082 error_report("%s: missing block name", __func__); 3083 mark_source_rp_bad(ms); 3084 goto out; 3085 } 3086 /* Format: len (1B) + idstr (<255B). This ends the idstr. */ 3087 buf[buf[0] + 1] = '\0'; 3088 if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1))) { 3089 mark_source_rp_bad(ms); 3090 goto out; 3091 } 3092 break; 3093 3094 case MIG_RP_MSG_RESUME_ACK: 3095 tmp32 = ldl_be_p(buf); 3096 if (migrate_handle_rp_resume_ack(ms, tmp32)) { 3097 mark_source_rp_bad(ms); 3098 goto out; 3099 } 3100 break; 3101 3102 default: 3103 break; 3104 } 3105 } 3106 3107 out: 3108 res = qemu_file_get_error(rp); 3109 if (res) { 3110 if (res && migration_in_postcopy()) { 3111 /* 3112 * Maybe there is something we can do: it looks like a 3113 * network down issue, and we pause for a recovery. 3114 */ 3115 migration_release_dst_files(ms); 3116 rp = NULL; 3117 if (postcopy_pause_return_path_thread(ms)) { 3118 /* 3119 * Reload rp, reset the rest. Referencing it is safe since 3120 * it's reset only by us above, or when migration completes 3121 */ 3122 rp = ms->rp_state.from_dst_file; 3123 ms->rp_state.error = false; 3124 goto retry; 3125 } 3126 } 3127 3128 trace_source_return_path_thread_bad_end(); 3129 mark_source_rp_bad(ms); 3130 } 3131 3132 trace_source_return_path_thread_end(); 3133 migration_release_dst_files(ms); 3134 rcu_unregister_thread(); 3135 return NULL; 3136 } 3137 3138 static int open_return_path_on_source(MigrationState *ms, 3139 bool create_thread) 3140 { 3141 ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file); 3142 if (!ms->rp_state.from_dst_file) { 3143 return -1; 3144 } 3145 3146 trace_open_return_path_on_source(); 3147 3148 if (!create_thread) { 3149 /* We're done */ 3150 return 0; 3151 } 3152 3153 qemu_thread_create(&ms->rp_state.rp_thread, "return path", 3154 source_return_path_thread, ms, QEMU_THREAD_JOINABLE); 3155 ms->rp_state.rp_thread_created = true; 3156 3157 trace_open_return_path_on_source_continue(); 3158 3159 return 0; 3160 } 3161 3162 /* Returns 0 if the RP was ok, otherwise there was an error on the RP */ 3163 static int await_return_path_close_on_source(MigrationState *ms) 3164 { 3165 /* 3166 * If this is a normal exit then the destination will send a SHUT and the 3167 * rp_thread will exit, however if there's an error we need to cause 3168 * it to exit. 3169 */ 3170 if (qemu_file_get_error(ms->to_dst_file) && ms->rp_state.from_dst_file) { 3171 /* 3172 * shutdown(2), if we have it, will cause it to unblock if it's stuck 3173 * waiting for the destination. 3174 */ 3175 qemu_file_shutdown(ms->rp_state.from_dst_file); 3176 mark_source_rp_bad(ms); 3177 } 3178 trace_await_return_path_close_on_source_joining(); 3179 qemu_thread_join(&ms->rp_state.rp_thread); 3180 ms->rp_state.rp_thread_created = false; 3181 trace_await_return_path_close_on_source_close(); 3182 return ms->rp_state.error; 3183 } 3184 3185 static inline void 3186 migration_wait_main_channel(MigrationState *ms) 3187 { 3188 /* Wait until one PONG message received */ 3189 qemu_sem_wait(&ms->rp_state.rp_pong_acks); 3190 } 3191 3192 /* 3193 * Switch from normal iteration to postcopy 3194 * Returns non-0 on error 3195 */ 3196 static int postcopy_start(MigrationState *ms) 3197 { 3198 int ret; 3199 QIOChannelBuffer *bioc; 3200 QEMUFile *fb; 3201 int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3202 int64_t bandwidth = migrate_max_postcopy_bandwidth(); 3203 bool restart_block = false; 3204 int cur_state = MIGRATION_STATUS_ACTIVE; 3205 3206 if (migrate_postcopy_preempt()) { 3207 migration_wait_main_channel(ms); 3208 if (postcopy_preempt_establish_channel(ms)) { 3209 migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILED); 3210 return -1; 3211 } 3212 } 3213 3214 if (!migrate_pause_before_switchover()) { 3215 migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE, 3216 MIGRATION_STATUS_POSTCOPY_ACTIVE); 3217 } 3218 3219 trace_postcopy_start(); 3220 qemu_mutex_lock_iothread(); 3221 trace_postcopy_start_set_run(); 3222 3223 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); 3224 global_state_store(); 3225 ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); 3226 if (ret < 0) { 3227 goto fail; 3228 } 3229 3230 ret = migration_maybe_pause(ms, &cur_state, 3231 MIGRATION_STATUS_POSTCOPY_ACTIVE); 3232 if (ret < 0) { 3233 goto fail; 3234 } 3235 3236 ret = bdrv_inactivate_all(); 3237 if (ret < 0) { 3238 goto fail; 3239 } 3240 restart_block = true; 3241 3242 /* 3243 * Cause any non-postcopiable, but iterative devices to 3244 * send out their final data. 3245 */ 3246 qemu_savevm_state_complete_precopy(ms->to_dst_file, true, false); 3247 3248 /* 3249 * in Finish migrate and with the io-lock held everything should 3250 * be quiet, but we've potentially still got dirty pages and we 3251 * need to tell the destination to throw any pages it's already received 3252 * that are dirty 3253 */ 3254 if (migrate_postcopy_ram()) { 3255 ram_postcopy_send_discard_bitmap(ms); 3256 } 3257 3258 /* 3259 * send rest of state - note things that are doing postcopy 3260 * will notice we're in POSTCOPY_ACTIVE and not actually 3261 * wrap their state up here 3262 */ 3263 /* 0 max-postcopy-bandwidth means unlimited */ 3264 if (!bandwidth) { 3265 qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX); 3266 } else { 3267 qemu_file_set_rate_limit(ms->to_dst_file, bandwidth / XFER_LIMIT_RATIO); 3268 } 3269 if (migrate_postcopy_ram()) { 3270 /* Ping just for debugging, helps line traces up */ 3271 qemu_savevm_send_ping(ms->to_dst_file, 2); 3272 } 3273 3274 /* 3275 * While loading the device state we may trigger page transfer 3276 * requests and the fd must be free to process those, and thus 3277 * the destination must read the whole device state off the fd before 3278 * it starts processing it. Unfortunately the ad-hoc migration format 3279 * doesn't allow the destination to know the size to read without fully 3280 * parsing it through each devices load-state code (especially the open 3281 * coded devices that use get/put). 3282 * So we wrap the device state up in a package with a length at the start; 3283 * to do this we use a qemu_buf to hold the whole of the device state. 3284 */ 3285 bioc = qio_channel_buffer_new(4096); 3286 qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer"); 3287 fb = qemu_file_new_output(QIO_CHANNEL(bioc)); 3288 object_unref(OBJECT(bioc)); 3289 3290 /* 3291 * Make sure the receiver can get incoming pages before we send the rest 3292 * of the state 3293 */ 3294 qemu_savevm_send_postcopy_listen(fb); 3295 3296 qemu_savevm_state_complete_precopy(fb, false, false); 3297 if (migrate_postcopy_ram()) { 3298 qemu_savevm_send_ping(fb, 3); 3299 } 3300 3301 qemu_savevm_send_postcopy_run(fb); 3302 3303 /* <><> end of stuff going into the package */ 3304 3305 /* Last point of recovery; as soon as we send the package the destination 3306 * can open devices and potentially start running. 3307 * Lets just check again we've not got any errors. 3308 */ 3309 ret = qemu_file_get_error(ms->to_dst_file); 3310 if (ret) { 3311 error_report("postcopy_start: Migration stream errored (pre package)"); 3312 goto fail_closefb; 3313 } 3314 3315 restart_block = false; 3316 3317 /* Now send that blob */ 3318 if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) { 3319 goto fail_closefb; 3320 } 3321 qemu_fclose(fb); 3322 3323 /* Send a notify to give a chance for anything that needs to happen 3324 * at the transition to postcopy and after the device state; in particular 3325 * spice needs to trigger a transition now 3326 */ 3327 ms->postcopy_after_devices = true; 3328 notifier_list_notify(&migration_state_notifiers, ms); 3329 3330 ms->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop; 3331 3332 qemu_mutex_unlock_iothread(); 3333 3334 if (migrate_postcopy_ram()) { 3335 /* 3336 * Although this ping is just for debug, it could potentially be 3337 * used for getting a better measurement of downtime at the source. 3338 */ 3339 qemu_savevm_send_ping(ms->to_dst_file, 4); 3340 } 3341 3342 if (migrate_release_ram()) { 3343 ram_postcopy_migrated_memory_release(ms); 3344 } 3345 3346 ret = qemu_file_get_error(ms->to_dst_file); 3347 if (ret) { 3348 error_report("postcopy_start: Migration stream errored"); 3349 migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, 3350 MIGRATION_STATUS_FAILED); 3351 } 3352 3353 trace_postcopy_preempt_enabled(migrate_postcopy_preempt()); 3354 3355 return ret; 3356 3357 fail_closefb: 3358 qemu_fclose(fb); 3359 fail: 3360 migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, 3361 MIGRATION_STATUS_FAILED); 3362 if (restart_block) { 3363 /* A failure happened early enough that we know the destination hasn't 3364 * accessed block devices, so we're safe to recover. 3365 */ 3366 Error *local_err = NULL; 3367 3368 bdrv_activate_all(&local_err); 3369 if (local_err) { 3370 error_report_err(local_err); 3371 } 3372 } 3373 qemu_mutex_unlock_iothread(); 3374 return -1; 3375 } 3376 3377 /** 3378 * migration_maybe_pause: Pause if required to by 3379 * migrate_pause_before_switchover called with the iothread locked 3380 * Returns: 0 on success 3381 */ 3382 static int migration_maybe_pause(MigrationState *s, 3383 int *current_active_state, 3384 int new_state) 3385 { 3386 if (!migrate_pause_before_switchover()) { 3387 return 0; 3388 } 3389 3390 /* Since leaving this state is not atomic with posting the semaphore 3391 * it's possible that someone could have issued multiple migrate_continue 3392 * and the semaphore is incorrectly positive at this point; 3393 * the docs say it's undefined to reinit a semaphore that's already 3394 * init'd, so use timedwait to eat up any existing posts. 3395 */ 3396 while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) { 3397 /* This block intentionally left blank */ 3398 } 3399 3400 /* 3401 * If the migration is cancelled when it is in the completion phase, 3402 * the migration state is set to MIGRATION_STATUS_CANCELLING. 3403 * So we don't need to wait a semaphore, otherwise we would always 3404 * wait for the 'pause_sem' semaphore. 3405 */ 3406 if (s->state != MIGRATION_STATUS_CANCELLING) { 3407 qemu_mutex_unlock_iothread(); 3408 migrate_set_state(&s->state, *current_active_state, 3409 MIGRATION_STATUS_PRE_SWITCHOVER); 3410 qemu_sem_wait(&s->pause_sem); 3411 migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, 3412 new_state); 3413 *current_active_state = new_state; 3414 qemu_mutex_lock_iothread(); 3415 } 3416 3417 return s->state == new_state ? 0 : -EINVAL; 3418 } 3419 3420 /** 3421 * migration_completion: Used by migration_thread when there's not much left. 3422 * The caller 'breaks' the loop when this returns. 3423 * 3424 * @s: Current migration state 3425 */ 3426 static void migration_completion(MigrationState *s) 3427 { 3428 int ret; 3429 int current_active_state = s->state; 3430 3431 if (s->state == MIGRATION_STATUS_ACTIVE) { 3432 qemu_mutex_lock_iothread(); 3433 s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3434 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); 3435 s->vm_was_running = runstate_is_running(); 3436 ret = global_state_store(); 3437 3438 if (!ret) { 3439 bool inactivate = !migrate_colo_enabled(); 3440 ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); 3441 trace_migration_completion_vm_stop(ret); 3442 if (ret >= 0) { 3443 ret = migration_maybe_pause(s, ¤t_active_state, 3444 MIGRATION_STATUS_DEVICE); 3445 } 3446 if (ret >= 0) { 3447 qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); 3448 ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, 3449 inactivate); 3450 } 3451 if (inactivate && ret >= 0) { 3452 s->block_inactive = true; 3453 } 3454 } 3455 qemu_mutex_unlock_iothread(); 3456 3457 if (ret < 0) { 3458 goto fail; 3459 } 3460 } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 3461 trace_migration_completion_postcopy_end(); 3462 3463 qemu_mutex_lock_iothread(); 3464 qemu_savevm_state_complete_postcopy(s->to_dst_file); 3465 qemu_mutex_unlock_iothread(); 3466 3467 /* Shutdown the postcopy fast path thread */ 3468 if (migrate_postcopy_preempt()) { 3469 postcopy_preempt_shutdown_file(s); 3470 } 3471 3472 trace_migration_completion_postcopy_end_after_complete(); 3473 } else { 3474 goto fail; 3475 } 3476 3477 /* 3478 * If rp was opened we must clean up the thread before 3479 * cleaning everything else up (since if there are no failures 3480 * it will wait for the destination to send it's status in 3481 * a SHUT command). 3482 */ 3483 if (s->rp_state.rp_thread_created) { 3484 int rp_error; 3485 trace_migration_return_path_end_before(); 3486 rp_error = await_return_path_close_on_source(s); 3487 trace_migration_return_path_end_after(rp_error); 3488 if (rp_error) { 3489 goto fail_invalidate; 3490 } 3491 } 3492 3493 if (qemu_file_get_error(s->to_dst_file)) { 3494 trace_migration_completion_file_err(); 3495 goto fail_invalidate; 3496 } 3497 3498 if (migrate_colo_enabled() && s->state == MIGRATION_STATUS_ACTIVE) { 3499 /* COLO does not support postcopy */ 3500 migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, 3501 MIGRATION_STATUS_COLO); 3502 } else { 3503 migrate_set_state(&s->state, current_active_state, 3504 MIGRATION_STATUS_COMPLETED); 3505 } 3506 3507 return; 3508 3509 fail_invalidate: 3510 /* If not doing postcopy, vm_start() will be called: let's regain 3511 * control on images. 3512 */ 3513 if (s->state == MIGRATION_STATUS_ACTIVE || 3514 s->state == MIGRATION_STATUS_DEVICE) { 3515 Error *local_err = NULL; 3516 3517 qemu_mutex_lock_iothread(); 3518 bdrv_activate_all(&local_err); 3519 if (local_err) { 3520 error_report_err(local_err); 3521 } else { 3522 s->block_inactive = false; 3523 } 3524 qemu_mutex_unlock_iothread(); 3525 } 3526 3527 fail: 3528 migrate_set_state(&s->state, current_active_state, 3529 MIGRATION_STATUS_FAILED); 3530 } 3531 3532 /** 3533 * bg_migration_completion: Used by bg_migration_thread when after all the 3534 * RAM has been saved. The caller 'breaks' the loop when this returns. 3535 * 3536 * @s: Current migration state 3537 */ 3538 static void bg_migration_completion(MigrationState *s) 3539 { 3540 int current_active_state = s->state; 3541 3542 /* 3543 * Stop tracking RAM writes - un-protect memory, un-register UFFD 3544 * memory ranges, flush kernel wait queues and wake up threads 3545 * waiting for write fault to be resolved. 3546 */ 3547 ram_write_tracking_stop(); 3548 3549 if (s->state == MIGRATION_STATUS_ACTIVE) { 3550 /* 3551 * By this moment we have RAM content saved into the migration stream. 3552 * The next step is to flush the non-RAM content (device state) 3553 * right after the ram content. The device state has been stored into 3554 * the temporary buffer before RAM saving started. 3555 */ 3556 qemu_put_buffer(s->to_dst_file, s->bioc->data, s->bioc->usage); 3557 qemu_fflush(s->to_dst_file); 3558 } else if (s->state == MIGRATION_STATUS_CANCELLING) { 3559 goto fail; 3560 } 3561 3562 if (qemu_file_get_error(s->to_dst_file)) { 3563 trace_migration_completion_file_err(); 3564 goto fail; 3565 } 3566 3567 migrate_set_state(&s->state, current_active_state, 3568 MIGRATION_STATUS_COMPLETED); 3569 return; 3570 3571 fail: 3572 migrate_set_state(&s->state, current_active_state, 3573 MIGRATION_STATUS_FAILED); 3574 } 3575 3576 bool migrate_colo_enabled(void) 3577 { 3578 MigrationState *s = migrate_get_current(); 3579 return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO]; 3580 } 3581 3582 typedef enum MigThrError { 3583 /* No error detected */ 3584 MIG_THR_ERR_NONE = 0, 3585 /* Detected error, but resumed successfully */ 3586 MIG_THR_ERR_RECOVERED = 1, 3587 /* Detected fatal error, need to exit */ 3588 MIG_THR_ERR_FATAL = 2, 3589 } MigThrError; 3590 3591 static int postcopy_resume_handshake(MigrationState *s) 3592 { 3593 qemu_savevm_send_postcopy_resume(s->to_dst_file); 3594 3595 while (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { 3596 qemu_sem_wait(&s->rp_state.rp_sem); 3597 } 3598 3599 if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 3600 return 0; 3601 } 3602 3603 return -1; 3604 } 3605 3606 /* Return zero if success, or <0 for error */ 3607 static int postcopy_do_resume(MigrationState *s) 3608 { 3609 int ret; 3610 3611 /* 3612 * Call all the resume_prepare() hooks, so that modules can be 3613 * ready for the migration resume. 3614 */ 3615 ret = qemu_savevm_state_resume_prepare(s); 3616 if (ret) { 3617 error_report("%s: resume_prepare() failure detected: %d", 3618 __func__, ret); 3619 return ret; 3620 } 3621 3622 /* 3623 * If preempt is enabled, re-establish the preempt channel. Note that 3624 * we do it after resume prepare to make sure the main channel will be 3625 * created before the preempt channel. E.g. with weak network, the 3626 * dest QEMU may get messed up with the preempt and main channels on 3627 * the order of connection setup. This guarantees the correct order. 3628 */ 3629 ret = postcopy_preempt_establish_channel(s); 3630 if (ret) { 3631 error_report("%s: postcopy_preempt_establish_channel(): %d", 3632 __func__, ret); 3633 return ret; 3634 } 3635 3636 /* 3637 * Last handshake with destination on the resume (destination will 3638 * switch to postcopy-active afterwards) 3639 */ 3640 ret = postcopy_resume_handshake(s); 3641 if (ret) { 3642 error_report("%s: handshake failed: %d", __func__, ret); 3643 return ret; 3644 } 3645 3646 return 0; 3647 } 3648 3649 /* 3650 * We don't return until we are in a safe state to continue current 3651 * postcopy migration. Returns MIG_THR_ERR_RECOVERED if recovered, or 3652 * MIG_THR_ERR_FATAL if unrecovery failure happened. 3653 */ 3654 static MigThrError postcopy_pause(MigrationState *s) 3655 { 3656 assert(s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); 3657 3658 while (true) { 3659 QEMUFile *file; 3660 3661 /* 3662 * Current channel is possibly broken. Release it. Note that this is 3663 * guaranteed even without lock because to_dst_file should only be 3664 * modified by the migration thread. That also guarantees that the 3665 * unregister of yank is safe too without the lock. It should be safe 3666 * even to be within the qemu_file_lock, but we didn't do that to avoid 3667 * taking more mutex (yank_lock) within qemu_file_lock. TL;DR: we make 3668 * the qemu_file_lock critical section as small as possible. 3669 */ 3670 assert(s->to_dst_file); 3671 migration_ioc_unregister_yank_from_file(s->to_dst_file); 3672 qemu_mutex_lock(&s->qemu_file_lock); 3673 file = s->to_dst_file; 3674 s->to_dst_file = NULL; 3675 qemu_mutex_unlock(&s->qemu_file_lock); 3676 3677 qemu_file_shutdown(file); 3678 qemu_fclose(file); 3679 3680 migrate_set_state(&s->state, s->state, 3681 MIGRATION_STATUS_POSTCOPY_PAUSED); 3682 3683 error_report("Detected IO failure for postcopy. " 3684 "Migration paused."); 3685 3686 /* 3687 * We wait until things fixed up. Then someone will setup the 3688 * status back for us. 3689 */ 3690 while (s->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { 3691 qemu_sem_wait(&s->postcopy_pause_sem); 3692 } 3693 3694 if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { 3695 /* Woken up by a recover procedure. Give it a shot */ 3696 3697 /* 3698 * Firstly, let's wake up the return path now, with a new 3699 * return path channel. 3700 */ 3701 qemu_sem_post(&s->postcopy_pause_rp_sem); 3702 3703 /* Do the resume logic */ 3704 if (postcopy_do_resume(s) == 0) { 3705 /* Let's continue! */ 3706 trace_postcopy_pause_continued(); 3707 return MIG_THR_ERR_RECOVERED; 3708 } else { 3709 /* 3710 * Something wrong happened during the recovery, let's 3711 * pause again. Pause is always better than throwing 3712 * data away. 3713 */ 3714 continue; 3715 } 3716 } else { 3717 /* This is not right... Time to quit. */ 3718 return MIG_THR_ERR_FATAL; 3719 } 3720 } 3721 } 3722 3723 static MigThrError migration_detect_error(MigrationState *s) 3724 { 3725 int ret; 3726 int state = s->state; 3727 Error *local_error = NULL; 3728 3729 if (state == MIGRATION_STATUS_CANCELLING || 3730 state == MIGRATION_STATUS_CANCELLED) { 3731 /* End the migration, but don't set the state to failed */ 3732 return MIG_THR_ERR_FATAL; 3733 } 3734 3735 /* 3736 * Try to detect any file errors. Note that postcopy_qemufile_src will 3737 * be NULL when postcopy preempt is not enabled. 3738 */ 3739 ret = qemu_file_get_error_obj_any(s->to_dst_file, 3740 s->postcopy_qemufile_src, 3741 &local_error); 3742 if (!ret) { 3743 /* Everything is fine */ 3744 assert(!local_error); 3745 return MIG_THR_ERR_NONE; 3746 } 3747 3748 if (local_error) { 3749 migrate_set_error(s, local_error); 3750 error_free(local_error); 3751 } 3752 3753 if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret) { 3754 /* 3755 * For postcopy, we allow the network to be down for a 3756 * while. After that, it can be continued by a 3757 * recovery phase. 3758 */ 3759 return postcopy_pause(s); 3760 } else { 3761 /* 3762 * For precopy (or postcopy with error outside IO), we fail 3763 * with no time. 3764 */ 3765 migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED); 3766 trace_migration_thread_file_err(); 3767 3768 /* Time to stop the migration, now. */ 3769 return MIG_THR_ERR_FATAL; 3770 } 3771 } 3772 3773 /* How many bytes have we transferred since the beginning of the migration */ 3774 static uint64_t migration_total_bytes(MigrationState *s) 3775 { 3776 return qemu_file_total_transferred(s->to_dst_file) + 3777 ram_counters.multifd_bytes; 3778 } 3779 3780 static void migration_calculate_complete(MigrationState *s) 3781 { 3782 uint64_t bytes = migration_total_bytes(s); 3783 int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3784 int64_t transfer_time; 3785 3786 s->total_time = end_time - s->start_time; 3787 if (!s->downtime) { 3788 /* 3789 * It's still not set, so we are precopy migration. For 3790 * postcopy, downtime is calculated during postcopy_start(). 3791 */ 3792 s->downtime = end_time - s->downtime_start; 3793 } 3794 3795 transfer_time = s->total_time - s->setup_time; 3796 if (transfer_time) { 3797 s->mbps = ((double) bytes * 8.0) / transfer_time / 1000; 3798 } 3799 } 3800 3801 static void update_iteration_initial_status(MigrationState *s) 3802 { 3803 /* 3804 * Update these three fields at the same time to avoid mismatch info lead 3805 * wrong speed calculation. 3806 */ 3807 s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3808 s->iteration_initial_bytes = migration_total_bytes(s); 3809 s->iteration_initial_pages = ram_get_total_transferred_pages(); 3810 } 3811 3812 static void migration_update_counters(MigrationState *s, 3813 int64_t current_time) 3814 { 3815 uint64_t transferred, transferred_pages, time_spent; 3816 uint64_t current_bytes; /* bytes transferred since the beginning */ 3817 double bandwidth; 3818 3819 if (current_time < s->iteration_start_time + BUFFER_DELAY) { 3820 return; 3821 } 3822 3823 current_bytes = migration_total_bytes(s); 3824 transferred = current_bytes - s->iteration_initial_bytes; 3825 time_spent = current_time - s->iteration_start_time; 3826 bandwidth = (double)transferred / time_spent; 3827 s->threshold_size = bandwidth * s->parameters.downtime_limit; 3828 3829 s->mbps = (((double) transferred * 8.0) / 3830 ((double) time_spent / 1000.0)) / 1000.0 / 1000.0; 3831 3832 transferred_pages = ram_get_total_transferred_pages() - 3833 s->iteration_initial_pages; 3834 s->pages_per_second = (double) transferred_pages / 3835 (((double) time_spent / 1000.0)); 3836 3837 /* 3838 * if we haven't sent anything, we don't want to 3839 * recalculate. 10000 is a small enough number for our purposes 3840 */ 3841 if (ram_counters.dirty_pages_rate && transferred > 10000) { 3842 s->expected_downtime = ram_counters.remaining / bandwidth; 3843 } 3844 3845 qemu_file_reset_rate_limit(s->to_dst_file); 3846 3847 update_iteration_initial_status(s); 3848 3849 trace_migrate_transferred(transferred, time_spent, 3850 bandwidth, s->threshold_size); 3851 } 3852 3853 /* Migration thread iteration status */ 3854 typedef enum { 3855 MIG_ITERATE_RESUME, /* Resume current iteration */ 3856 MIG_ITERATE_SKIP, /* Skip current iteration */ 3857 MIG_ITERATE_BREAK, /* Break the loop */ 3858 } MigIterateState; 3859 3860 /* 3861 * Return true if continue to the next iteration directly, false 3862 * otherwise. 3863 */ 3864 static MigIterateState migration_iteration_run(MigrationState *s) 3865 { 3866 uint64_t must_precopy, can_postcopy; 3867 bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE; 3868 3869 qemu_savevm_state_pending_estimate(&must_precopy, &can_postcopy); 3870 uint64_t pending_size = must_precopy + can_postcopy; 3871 3872 trace_migrate_pending_estimate(pending_size, must_precopy, can_postcopy); 3873 3874 if (must_precopy <= s->threshold_size) { 3875 qemu_savevm_state_pending_exact(&must_precopy, &can_postcopy); 3876 pending_size = must_precopy + can_postcopy; 3877 trace_migrate_pending_exact(pending_size, must_precopy, can_postcopy); 3878 } 3879 3880 if (!pending_size || pending_size < s->threshold_size) { 3881 trace_migration_thread_low_pending(pending_size); 3882 migration_completion(s); 3883 return MIG_ITERATE_BREAK; 3884 } 3885 3886 /* Still a significant amount to transfer */ 3887 if (!in_postcopy && must_precopy <= s->threshold_size && 3888 qatomic_read(&s->start_postcopy)) { 3889 if (postcopy_start(s)) { 3890 error_report("%s: postcopy failed to start", __func__); 3891 } 3892 return MIG_ITERATE_SKIP; 3893 } 3894 3895 /* Just another iteration step */ 3896 qemu_savevm_state_iterate(s->to_dst_file, in_postcopy); 3897 return MIG_ITERATE_RESUME; 3898 } 3899 3900 static void migration_iteration_finish(MigrationState *s) 3901 { 3902 /* If we enabled cpu throttling for auto-converge, turn it off. */ 3903 cpu_throttle_stop(); 3904 3905 qemu_mutex_lock_iothread(); 3906 switch (s->state) { 3907 case MIGRATION_STATUS_COMPLETED: 3908 migration_calculate_complete(s); 3909 runstate_set(RUN_STATE_POSTMIGRATE); 3910 break; 3911 case MIGRATION_STATUS_COLO: 3912 if (!migrate_colo_enabled()) { 3913 error_report("%s: critical error: calling COLO code without " 3914 "COLO enabled", __func__); 3915 } 3916 migrate_start_colo_process(s); 3917 s->vm_was_running = true; 3918 /* Fallthrough */ 3919 case MIGRATION_STATUS_FAILED: 3920 case MIGRATION_STATUS_CANCELLED: 3921 case MIGRATION_STATUS_CANCELLING: 3922 if (s->vm_was_running) { 3923 if (!runstate_check(RUN_STATE_SHUTDOWN)) { 3924 vm_start(); 3925 } 3926 } else { 3927 if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { 3928 runstate_set(RUN_STATE_POSTMIGRATE); 3929 } 3930 } 3931 break; 3932 3933 default: 3934 /* Should not reach here, but if so, forgive the VM. */ 3935 error_report("%s: Unknown ending state %d", __func__, s->state); 3936 break; 3937 } 3938 migrate_fd_cleanup_schedule(s); 3939 qemu_mutex_unlock_iothread(); 3940 } 3941 3942 static void bg_migration_iteration_finish(MigrationState *s) 3943 { 3944 qemu_mutex_lock_iothread(); 3945 switch (s->state) { 3946 case MIGRATION_STATUS_COMPLETED: 3947 migration_calculate_complete(s); 3948 break; 3949 3950 case MIGRATION_STATUS_ACTIVE: 3951 case MIGRATION_STATUS_FAILED: 3952 case MIGRATION_STATUS_CANCELLED: 3953 case MIGRATION_STATUS_CANCELLING: 3954 break; 3955 3956 default: 3957 /* Should not reach here, but if so, forgive the VM. */ 3958 error_report("%s: Unknown ending state %d", __func__, s->state); 3959 break; 3960 } 3961 3962 migrate_fd_cleanup_schedule(s); 3963 qemu_mutex_unlock_iothread(); 3964 } 3965 3966 /* 3967 * Return true if continue to the next iteration directly, false 3968 * otherwise. 3969 */ 3970 static MigIterateState bg_migration_iteration_run(MigrationState *s) 3971 { 3972 int res; 3973 3974 res = qemu_savevm_state_iterate(s->to_dst_file, false); 3975 if (res > 0) { 3976 bg_migration_completion(s); 3977 return MIG_ITERATE_BREAK; 3978 } 3979 3980 return MIG_ITERATE_RESUME; 3981 } 3982 3983 void migration_make_urgent_request(void) 3984 { 3985 qemu_sem_post(&migrate_get_current()->rate_limit_sem); 3986 } 3987 3988 void migration_consume_urgent_request(void) 3989 { 3990 qemu_sem_wait(&migrate_get_current()->rate_limit_sem); 3991 } 3992 3993 /* Returns true if the rate limiting was broken by an urgent request */ 3994 bool migration_rate_limit(void) 3995 { 3996 int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3997 MigrationState *s = migrate_get_current(); 3998 3999 bool urgent = false; 4000 migration_update_counters(s, now); 4001 if (qemu_file_rate_limit(s->to_dst_file)) { 4002 4003 if (qemu_file_get_error(s->to_dst_file)) { 4004 return false; 4005 } 4006 /* 4007 * Wait for a delay to do rate limiting OR 4008 * something urgent to post the semaphore. 4009 */ 4010 int ms = s->iteration_start_time + BUFFER_DELAY - now; 4011 trace_migration_rate_limit_pre(ms); 4012 if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { 4013 /* 4014 * We were woken by one or more urgent things but 4015 * the timedwait will have consumed one of them. 4016 * The service routine for the urgent wake will dec 4017 * the semaphore itself for each item it consumes, 4018 * so add this one we just eat back. 4019 */ 4020 qemu_sem_post(&s->rate_limit_sem); 4021 urgent = true; 4022 } 4023 trace_migration_rate_limit_post(urgent); 4024 } 4025 return urgent; 4026 } 4027 4028 /* 4029 * if failover devices are present, wait they are completely 4030 * unplugged 4031 */ 4032 4033 static void qemu_savevm_wait_unplug(MigrationState *s, int old_state, 4034 int new_state) 4035 { 4036 if (qemu_savevm_state_guest_unplug_pending()) { 4037 migrate_set_state(&s->state, old_state, MIGRATION_STATUS_WAIT_UNPLUG); 4038 4039 while (s->state == MIGRATION_STATUS_WAIT_UNPLUG && 4040 qemu_savevm_state_guest_unplug_pending()) { 4041 qemu_sem_timedwait(&s->wait_unplug_sem, 250); 4042 } 4043 if (s->state != MIGRATION_STATUS_WAIT_UNPLUG) { 4044 int timeout = 120; /* 30 seconds */ 4045 /* 4046 * migration has been canceled 4047 * but as we have started an unplug we must wait the end 4048 * to be able to plug back the card 4049 */ 4050 while (timeout-- && qemu_savevm_state_guest_unplug_pending()) { 4051 qemu_sem_timedwait(&s->wait_unplug_sem, 250); 4052 } 4053 if (qemu_savevm_state_guest_unplug_pending() && 4054 !qtest_enabled()) { 4055 warn_report("migration: partially unplugged device on " 4056 "failure"); 4057 } 4058 } 4059 4060 migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, new_state); 4061 } else { 4062 migrate_set_state(&s->state, old_state, new_state); 4063 } 4064 } 4065 4066 /* 4067 * Master migration thread on the source VM. 4068 * It drives the migration and pumps the data down the outgoing channel. 4069 */ 4070 static void *migration_thread(void *opaque) 4071 { 4072 MigrationState *s = opaque; 4073 MigrationThread *thread = NULL; 4074 int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST); 4075 MigThrError thr_error; 4076 bool urgent = false; 4077 4078 thread = MigrationThreadAdd("live_migration", qemu_get_thread_id()); 4079 4080 rcu_register_thread(); 4081 4082 object_ref(OBJECT(s)); 4083 update_iteration_initial_status(s); 4084 4085 qemu_savevm_state_header(s->to_dst_file); 4086 4087 /* 4088 * If we opened the return path, we need to make sure dst has it 4089 * opened as well. 4090 */ 4091 if (s->rp_state.rp_thread_created) { 4092 /* Now tell the dest that it should open its end so it can reply */ 4093 qemu_savevm_send_open_return_path(s->to_dst_file); 4094 4095 /* And do a ping that will make stuff easier to debug */ 4096 qemu_savevm_send_ping(s->to_dst_file, 1); 4097 } 4098 4099 if (migrate_postcopy()) { 4100 /* 4101 * Tell the destination that we *might* want to do postcopy later; 4102 * if the other end can't do postcopy it should fail now, nice and 4103 * early. 4104 */ 4105 qemu_savevm_send_postcopy_advise(s->to_dst_file); 4106 } 4107 4108 if (migrate_colo_enabled()) { 4109 /* Notify migration destination that we enable COLO */ 4110 qemu_savevm_send_colo_enable(s->to_dst_file); 4111 } 4112 4113 qemu_savevm_state_setup(s->to_dst_file); 4114 4115 qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP, 4116 MIGRATION_STATUS_ACTIVE); 4117 4118 s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; 4119 4120 trace_migration_thread_setup_complete(); 4121 4122 while (migration_is_active(s)) { 4123 if (urgent || !qemu_file_rate_limit(s->to_dst_file)) { 4124 MigIterateState iter_state = migration_iteration_run(s); 4125 if (iter_state == MIG_ITERATE_SKIP) { 4126 continue; 4127 } else if (iter_state == MIG_ITERATE_BREAK) { 4128 break; 4129 } 4130 } 4131 4132 /* 4133 * Try to detect any kind of failures, and see whether we 4134 * should stop the migration now. 4135 */ 4136 thr_error = migration_detect_error(s); 4137 if (thr_error == MIG_THR_ERR_FATAL) { 4138 /* Stop migration */ 4139 break; 4140 } else if (thr_error == MIG_THR_ERR_RECOVERED) { 4141 /* 4142 * Just recovered from a e.g. network failure, reset all 4143 * the local variables. This is important to avoid 4144 * breaking transferred_bytes and bandwidth calculation 4145 */ 4146 update_iteration_initial_status(s); 4147 } 4148 4149 urgent = migration_rate_limit(); 4150 } 4151 4152 trace_migration_thread_after_loop(); 4153 migration_iteration_finish(s); 4154 object_unref(OBJECT(s)); 4155 rcu_unregister_thread(); 4156 MigrationThreadDel(thread); 4157 return NULL; 4158 } 4159 4160 static void bg_migration_vm_start_bh(void *opaque) 4161 { 4162 MigrationState *s = opaque; 4163 4164 qemu_bh_delete(s->vm_start_bh); 4165 s->vm_start_bh = NULL; 4166 4167 vm_start(); 4168 s->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - s->downtime_start; 4169 } 4170 4171 /** 4172 * Background snapshot thread, based on live migration code. 4173 * This is an alternative implementation of live migration mechanism 4174 * introduced specifically to support background snapshots. 4175 * 4176 * It takes advantage of userfault_fd write protection mechanism introduced 4177 * in v5.7 kernel. Compared to existing dirty page logging migration much 4178 * lesser stream traffic is produced resulting in smaller snapshot images, 4179 * simply cause of no page duplicates can get into the stream. 4180 * 4181 * Another key point is that generated vmstate stream reflects machine state 4182 * 'frozen' at the beginning of snapshot creation compared to dirty page logging 4183 * mechanism, which effectively results in that saved snapshot is the state of VM 4184 * at the end of the process. 4185 */ 4186 static void *bg_migration_thread(void *opaque) 4187 { 4188 MigrationState *s = opaque; 4189 int64_t setup_start; 4190 MigThrError thr_error; 4191 QEMUFile *fb; 4192 bool early_fail = true; 4193 4194 rcu_register_thread(); 4195 object_ref(OBJECT(s)); 4196 4197 qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); 4198 4199 setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST); 4200 /* 4201 * We want to save vmstate for the moment when migration has been 4202 * initiated but also we want to save RAM content while VM is running. 4203 * The RAM content should appear first in the vmstate. So, we first 4204 * stash the non-RAM part of the vmstate to the temporary buffer, 4205 * then write RAM part of the vmstate to the migration stream 4206 * with vCPUs running and, finally, write stashed non-RAM part of 4207 * the vmstate from the buffer to the migration stream. 4208 */ 4209 s->bioc = qio_channel_buffer_new(512 * 1024); 4210 qio_channel_set_name(QIO_CHANNEL(s->bioc), "vmstate-buffer"); 4211 fb = qemu_file_new_output(QIO_CHANNEL(s->bioc)); 4212 object_unref(OBJECT(s->bioc)); 4213 4214 update_iteration_initial_status(s); 4215 4216 /* 4217 * Prepare for tracking memory writes with UFFD-WP - populate 4218 * RAM pages before protecting. 4219 */ 4220 #ifdef __linux__ 4221 ram_write_tracking_prepare(); 4222 #endif 4223 4224 qemu_savevm_state_header(s->to_dst_file); 4225 qemu_savevm_state_setup(s->to_dst_file); 4226 4227 qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP, 4228 MIGRATION_STATUS_ACTIVE); 4229 4230 s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; 4231 4232 trace_migration_thread_setup_complete(); 4233 s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 4234 4235 qemu_mutex_lock_iothread(); 4236 4237 /* 4238 * If VM is currently in suspended state, then, to make a valid runstate 4239 * transition in vm_stop_force_state() we need to wakeup it up. 4240 */ 4241 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); 4242 s->vm_was_running = runstate_is_running(); 4243 4244 if (global_state_store()) { 4245 goto fail; 4246 } 4247 /* Forcibly stop VM before saving state of vCPUs and devices */ 4248 if (vm_stop_force_state(RUN_STATE_PAUSED)) { 4249 goto fail; 4250 } 4251 /* 4252 * Put vCPUs in sync with shadow context structures, then 4253 * save their state to channel-buffer along with devices. 4254 */ 4255 cpu_synchronize_all_states(); 4256 if (qemu_savevm_state_complete_precopy_non_iterable(fb, false, false)) { 4257 goto fail; 4258 } 4259 /* 4260 * Since we are going to get non-iterable state data directly 4261 * from s->bioc->data, explicit flush is needed here. 4262 */ 4263 qemu_fflush(fb); 4264 4265 /* Now initialize UFFD context and start tracking RAM writes */ 4266 if (ram_write_tracking_start()) { 4267 goto fail; 4268 } 4269 early_fail = false; 4270 4271 /* 4272 * Start VM from BH handler to avoid write-fault lock here. 4273 * UFFD-WP protection for the whole RAM is already enabled so 4274 * calling VM state change notifiers from vm_start() would initiate 4275 * writes to virtio VQs memory which is in write-protected region. 4276 */ 4277 s->vm_start_bh = qemu_bh_new(bg_migration_vm_start_bh, s); 4278 qemu_bh_schedule(s->vm_start_bh); 4279 4280 qemu_mutex_unlock_iothread(); 4281 4282 while (migration_is_active(s)) { 4283 MigIterateState iter_state = bg_migration_iteration_run(s); 4284 if (iter_state == MIG_ITERATE_SKIP) { 4285 continue; 4286 } else if (iter_state == MIG_ITERATE_BREAK) { 4287 break; 4288 } 4289 4290 /* 4291 * Try to detect any kind of failures, and see whether we 4292 * should stop the migration now. 4293 */ 4294 thr_error = migration_detect_error(s); 4295 if (thr_error == MIG_THR_ERR_FATAL) { 4296 /* Stop migration */ 4297 break; 4298 } 4299 4300 migration_update_counters(s, qemu_clock_get_ms(QEMU_CLOCK_REALTIME)); 4301 } 4302 4303 trace_migration_thread_after_loop(); 4304 4305 fail: 4306 if (early_fail) { 4307 migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, 4308 MIGRATION_STATUS_FAILED); 4309 qemu_mutex_unlock_iothread(); 4310 } 4311 4312 bg_migration_iteration_finish(s); 4313 4314 qemu_fclose(fb); 4315 object_unref(OBJECT(s)); 4316 rcu_unregister_thread(); 4317 4318 return NULL; 4319 } 4320 4321 void migrate_fd_connect(MigrationState *s, Error *error_in) 4322 { 4323 Error *local_err = NULL; 4324 int64_t rate_limit; 4325 bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED; 4326 4327 /* 4328 * If there's a previous error, free it and prepare for another one. 4329 * Meanwhile if migration completes successfully, there won't have an error 4330 * dumped when calling migrate_fd_cleanup(). 4331 */ 4332 migrate_error_free(s); 4333 4334 s->expected_downtime = s->parameters.downtime_limit; 4335 if (resume) { 4336 assert(s->cleanup_bh); 4337 } else { 4338 assert(!s->cleanup_bh); 4339 s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup_bh, s); 4340 } 4341 if (error_in) { 4342 migrate_fd_error(s, error_in); 4343 if (resume) { 4344 /* 4345 * Don't do cleanup for resume if channel is invalid, but only dump 4346 * the error. We wait for another channel connect from the user. 4347 * The error_report still gives HMP user a hint on what failed. 4348 * It's normally done in migrate_fd_cleanup(), but call it here 4349 * explicitly. 4350 */ 4351 error_report_err(error_copy(s->error)); 4352 } else { 4353 migrate_fd_cleanup(s); 4354 } 4355 return; 4356 } 4357 4358 if (resume) { 4359 /* This is a resumed migration */ 4360 rate_limit = s->parameters.max_postcopy_bandwidth / 4361 XFER_LIMIT_RATIO; 4362 } else { 4363 /* This is a fresh new migration */ 4364 rate_limit = s->parameters.max_bandwidth / XFER_LIMIT_RATIO; 4365 4366 /* Notify before starting migration thread */ 4367 notifier_list_notify(&migration_state_notifiers, s); 4368 } 4369 4370 qemu_file_set_rate_limit(s->to_dst_file, rate_limit); 4371 qemu_file_set_blocking(s->to_dst_file, true); 4372 4373 /* 4374 * Open the return path. For postcopy, it is used exclusively. For 4375 * precopy, only if user specified "return-path" capability would 4376 * QEMU uses the return path. 4377 */ 4378 if (migrate_postcopy_ram() || migrate_use_return_path()) { 4379 if (open_return_path_on_source(s, !resume)) { 4380 error_report("Unable to open return-path for postcopy"); 4381 migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); 4382 migrate_fd_cleanup(s); 4383 return; 4384 } 4385 } 4386 4387 if (resume) { 4388 /* Wakeup the main migration thread to do the recovery */ 4389 migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED, 4390 MIGRATION_STATUS_POSTCOPY_RECOVER); 4391 qemu_sem_post(&s->postcopy_pause_sem); 4392 return; 4393 } 4394 4395 if (multifd_save_setup(&local_err) != 0) { 4396 error_report_err(local_err); 4397 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 4398 MIGRATION_STATUS_FAILED); 4399 migrate_fd_cleanup(s); 4400 return; 4401 } 4402 4403 if (migrate_background_snapshot()) { 4404 qemu_thread_create(&s->thread, "bg_snapshot", 4405 bg_migration_thread, s, QEMU_THREAD_JOINABLE); 4406 } else { 4407 qemu_thread_create(&s->thread, "live_migration", 4408 migration_thread, s, QEMU_THREAD_JOINABLE); 4409 } 4410 s->migration_thread_running = true; 4411 } 4412 4413 void migration_global_dump(Monitor *mon) 4414 { 4415 MigrationState *ms = migrate_get_current(); 4416 4417 monitor_printf(mon, "globals:\n"); 4418 monitor_printf(mon, "store-global-state: %s\n", 4419 ms->store_global_state ? "on" : "off"); 4420 monitor_printf(mon, "only-migratable: %s\n", 4421 only_migratable ? "on" : "off"); 4422 monitor_printf(mon, "send-configuration: %s\n", 4423 ms->send_configuration ? "on" : "off"); 4424 monitor_printf(mon, "send-section-footer: %s\n", 4425 ms->send_section_footer ? "on" : "off"); 4426 monitor_printf(mon, "decompress-error-check: %s\n", 4427 ms->decompress_error_check ? "on" : "off"); 4428 monitor_printf(mon, "clear-bitmap-shift: %u\n", 4429 ms->clear_bitmap_shift); 4430 } 4431 4432 #define DEFINE_PROP_MIG_CAP(name, x) \ 4433 DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false) 4434 4435 static Property migration_properties[] = { 4436 DEFINE_PROP_BOOL("store-global-state", MigrationState, 4437 store_global_state, true), 4438 DEFINE_PROP_BOOL("send-configuration", MigrationState, 4439 send_configuration, true), 4440 DEFINE_PROP_BOOL("send-section-footer", MigrationState, 4441 send_section_footer, true), 4442 DEFINE_PROP_BOOL("decompress-error-check", MigrationState, 4443 decompress_error_check, true), 4444 DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, 4445 clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), 4446 4447 /* Migration parameters */ 4448 DEFINE_PROP_UINT8("x-compress-level", MigrationState, 4449 parameters.compress_level, 4450 DEFAULT_MIGRATE_COMPRESS_LEVEL), 4451 DEFINE_PROP_UINT8("x-compress-threads", MigrationState, 4452 parameters.compress_threads, 4453 DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT), 4454 DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState, 4455 parameters.compress_wait_thread, true), 4456 DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, 4457 parameters.decompress_threads, 4458 DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), 4459 DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, 4460 parameters.throttle_trigger_threshold, 4461 DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD), 4462 DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState, 4463 parameters.cpu_throttle_initial, 4464 DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), 4465 DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState, 4466 parameters.cpu_throttle_increment, 4467 DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), 4468 DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState, 4469 parameters.cpu_throttle_tailslow, false), 4470 DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState, 4471 parameters.max_bandwidth, MAX_THROTTLE), 4472 DEFINE_PROP_UINT64("x-downtime-limit", MigrationState, 4473 parameters.downtime_limit, 4474 DEFAULT_MIGRATE_SET_DOWNTIME), 4475 DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState, 4476 parameters.x_checkpoint_delay, 4477 DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), 4478 DEFINE_PROP_UINT8("multifd-channels", MigrationState, 4479 parameters.multifd_channels, 4480 DEFAULT_MIGRATE_MULTIFD_CHANNELS), 4481 DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState, 4482 parameters.multifd_compression, 4483 DEFAULT_MIGRATE_MULTIFD_COMPRESSION), 4484 DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState, 4485 parameters.multifd_zlib_level, 4486 DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL), 4487 DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, 4488 parameters.multifd_zstd_level, 4489 DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), 4490 DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, 4491 parameters.xbzrle_cache_size, 4492 DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), 4493 DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState, 4494 parameters.max_postcopy_bandwidth, 4495 DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH), 4496 DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState, 4497 parameters.max_cpu_throttle, 4498 DEFAULT_MIGRATE_MAX_CPU_THROTTLE), 4499 DEFINE_PROP_SIZE("announce-initial", MigrationState, 4500 parameters.announce_initial, 4501 DEFAULT_MIGRATE_ANNOUNCE_INITIAL), 4502 DEFINE_PROP_SIZE("announce-max", MigrationState, 4503 parameters.announce_max, 4504 DEFAULT_MIGRATE_ANNOUNCE_MAX), 4505 DEFINE_PROP_SIZE("announce-rounds", MigrationState, 4506 parameters.announce_rounds, 4507 DEFAULT_MIGRATE_ANNOUNCE_ROUNDS), 4508 DEFINE_PROP_SIZE("announce-step", MigrationState, 4509 parameters.announce_step, 4510 DEFAULT_MIGRATE_ANNOUNCE_STEP), 4511 DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds), 4512 DEFINE_PROP_STRING("tls-hostname", MigrationState, parameters.tls_hostname), 4513 DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz), 4514 4515 /* Migration capabilities */ 4516 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), 4517 DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL), 4518 DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE), 4519 DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS), 4520 DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS), 4521 DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS), 4522 DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM), 4523 DEFINE_PROP_MIG_CAP("x-postcopy-preempt", 4524 MIGRATION_CAPABILITY_POSTCOPY_PREEMPT), 4525 DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO), 4526 DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM), 4527 DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), 4528 DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), 4529 DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), 4530 DEFINE_PROP_MIG_CAP("x-background-snapshot", 4531 MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), 4532 #ifdef CONFIG_LINUX 4533 DEFINE_PROP_MIG_CAP("x-zero-copy-send", 4534 MIGRATION_CAPABILITY_ZERO_COPY_SEND), 4535 #endif 4536 4537 DEFINE_PROP_END_OF_LIST(), 4538 }; 4539 4540 static void migration_class_init(ObjectClass *klass, void *data) 4541 { 4542 DeviceClass *dc = DEVICE_CLASS(klass); 4543 4544 dc->user_creatable = false; 4545 device_class_set_props(dc, migration_properties); 4546 } 4547 4548 static void migration_instance_finalize(Object *obj) 4549 { 4550 MigrationState *ms = MIGRATION_OBJ(obj); 4551 4552 qemu_mutex_destroy(&ms->error_mutex); 4553 qemu_mutex_destroy(&ms->qemu_file_lock); 4554 qemu_sem_destroy(&ms->wait_unplug_sem); 4555 qemu_sem_destroy(&ms->rate_limit_sem); 4556 qemu_sem_destroy(&ms->pause_sem); 4557 qemu_sem_destroy(&ms->postcopy_pause_sem); 4558 qemu_sem_destroy(&ms->postcopy_pause_rp_sem); 4559 qemu_sem_destroy(&ms->rp_state.rp_sem); 4560 qemu_sem_destroy(&ms->rp_state.rp_pong_acks); 4561 qemu_sem_destroy(&ms->postcopy_qemufile_src_sem); 4562 error_free(ms->error); 4563 } 4564 4565 static void migration_instance_init(Object *obj) 4566 { 4567 MigrationState *ms = MIGRATION_OBJ(obj); 4568 MigrationParameters *params = &ms->parameters; 4569 4570 ms->state = MIGRATION_STATUS_NONE; 4571 ms->mbps = -1; 4572 ms->pages_per_second = -1; 4573 qemu_sem_init(&ms->pause_sem, 0); 4574 qemu_mutex_init(&ms->error_mutex); 4575 4576 params->tls_hostname = g_strdup(""); 4577 params->tls_creds = g_strdup(""); 4578 4579 /* Set has_* up only for parameter checks */ 4580 params->has_compress_level = true; 4581 params->has_compress_threads = true; 4582 params->has_compress_wait_thread = true; 4583 params->has_decompress_threads = true; 4584 params->has_throttle_trigger_threshold = true; 4585 params->has_cpu_throttle_initial = true; 4586 params->has_cpu_throttle_increment = true; 4587 params->has_cpu_throttle_tailslow = true; 4588 params->has_max_bandwidth = true; 4589 params->has_downtime_limit = true; 4590 params->has_x_checkpoint_delay = true; 4591 params->has_block_incremental = true; 4592 params->has_multifd_channels = true; 4593 params->has_multifd_compression = true; 4594 params->has_multifd_zlib_level = true; 4595 params->has_multifd_zstd_level = true; 4596 params->has_xbzrle_cache_size = true; 4597 params->has_max_postcopy_bandwidth = true; 4598 params->has_max_cpu_throttle = true; 4599 params->has_announce_initial = true; 4600 params->has_announce_max = true; 4601 params->has_announce_rounds = true; 4602 params->has_announce_step = true; 4603 4604 qemu_sem_init(&ms->postcopy_pause_sem, 0); 4605 qemu_sem_init(&ms->postcopy_pause_rp_sem, 0); 4606 qemu_sem_init(&ms->rp_state.rp_sem, 0); 4607 qemu_sem_init(&ms->rp_state.rp_pong_acks, 0); 4608 qemu_sem_init(&ms->rate_limit_sem, 0); 4609 qemu_sem_init(&ms->wait_unplug_sem, 0); 4610 qemu_sem_init(&ms->postcopy_qemufile_src_sem, 0); 4611 qemu_mutex_init(&ms->qemu_file_lock); 4612 } 4613 4614 /* 4615 * Return true if check pass, false otherwise. Error will be put 4616 * inside errp if provided. 4617 */ 4618 static bool migration_object_check(MigrationState *ms, Error **errp) 4619 { 4620 MigrationCapabilityStatusList *head = NULL; 4621 /* Assuming all off */ 4622 bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret; 4623 int i; 4624 4625 if (!migrate_params_check(&ms->parameters, errp)) { 4626 return false; 4627 } 4628 4629 for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { 4630 if (ms->enabled_capabilities[i]) { 4631 QAPI_LIST_PREPEND(head, migrate_cap_add(i, true)); 4632 } 4633 } 4634 4635 ret = migrate_caps_check(cap_list, head, errp); 4636 4637 /* It works with head == NULL */ 4638 qapi_free_MigrationCapabilityStatusList(head); 4639 4640 return ret; 4641 } 4642 4643 static const TypeInfo migration_type = { 4644 .name = TYPE_MIGRATION, 4645 /* 4646 * NOTE: TYPE_MIGRATION is not really a device, as the object is 4647 * not created using qdev_new(), it is not attached to the qdev 4648 * device tree, and it is never realized. 4649 * 4650 * TODO: Make this TYPE_OBJECT once QOM provides something like 4651 * TYPE_DEVICE's "-global" properties. 4652 */ 4653 .parent = TYPE_DEVICE, 4654 .class_init = migration_class_init, 4655 .class_size = sizeof(MigrationClass), 4656 .instance_size = sizeof(MigrationState), 4657 .instance_init = migration_instance_init, 4658 .instance_finalize = migration_instance_finalize, 4659 }; 4660 4661 static void register_migration_types(void) 4662 { 4663 type_register_static(&migration_type); 4664 } 4665 4666 type_init(register_migration_types); 4667