1 /* 2 * QEMU live migration 3 * 4 * Copyright IBM, Corp. 2008 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 * Contributions after 2012-01-13 are licensed under the terms of the 13 * GNU GPL, version 2 or (at your option) any later version. 14 */ 15 16 #include "qemu/osdep.h" 17 #include "qemu/cutils.h" 18 #include "qemu/error-report.h" 19 #include "qemu/main-loop.h" 20 #include "migration/blocker.h" 21 #include "exec.h" 22 #include "fd.h" 23 #include "socket.h" 24 #include "sysemu/runstate.h" 25 #include "sysemu/sysemu.h" 26 #include "sysemu/cpu-throttle.h" 27 #include "rdma.h" 28 #include "ram.h" 29 #include "migration/global_state.h" 30 #include "migration/misc.h" 31 #include "migration.h" 32 #include "savevm.h" 33 #include "qemu-file-channel.h" 34 #include "qemu-file.h" 35 #include "migration/vmstate.h" 36 #include "block/block.h" 37 #include "qapi/error.h" 38 #include "qapi/clone-visitor.h" 39 #include "qapi/qapi-visit-migration.h" 40 #include "qapi/qapi-visit-sockets.h" 41 #include "qapi/qapi-commands-migration.h" 42 #include "qapi/qapi-events-migration.h" 43 #include "qapi/qmp/qerror.h" 44 #include "qapi/qmp/qnull.h" 45 #include "qemu/rcu.h" 46 #include "block.h" 47 #include "postcopy-ram.h" 48 #include "qemu/thread.h" 49 #include "trace.h" 50 #include "exec/target_page.h" 51 #include "io/channel-buffer.h" 52 #include "migration/colo.h" 53 #include "hw/boards.h" 54 #include "hw/qdev-properties.h" 55 #include "hw/qdev-properties-system.h" 56 #include "monitor/monitor.h" 57 #include "net/announce.h" 58 #include "qemu/queue.h" 59 #include "multifd.h" 60 #include "qemu/yank.h" 61 62 #ifdef CONFIG_VFIO 63 #include "hw/vfio/vfio-common.h" 64 #endif 65 66 #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ 67 68 /* Amount of time to allocate to each "chunk" of bandwidth-throttled 69 * data. */ 70 #define BUFFER_DELAY 100 71 #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) 72 73 /* Time in milliseconds we are allowed to stop the source, 74 * for sending the last part */ 75 #define DEFAULT_MIGRATE_SET_DOWNTIME 300 76 77 /* Maximum migrate downtime set to 2000 seconds */ 78 #define MAX_MIGRATE_DOWNTIME_SECONDS 2000 79 #define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) 80 81 /* Default compression thread count */ 82 #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 83 /* Default decompression thread count, usually decompression is at 84 * least 4 times as fast as compression.*/ 85 #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 86 /*0: means nocompress, 1: best speed, ... 9: best compress ratio */ 87 #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 88 /* Define default autoconverge cpu throttle migration parameters */ 89 #define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50 90 #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 91 #define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10 92 #define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99 93 94 /* Migration XBZRLE default cache size */ 95 #define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) 96 97 /* The delay time (in ms) between two COLO checkpoints */ 98 #define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) 99 #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 100 #define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE 101 /* 0: means nocompress, 1: best speed, ... 9: best compress ratio */ 102 #define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1 103 /* 0: means nocompress, 1: best speed, ... 20: best compress ratio */ 104 #define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1 105 106 /* Background transfer rate for postcopy, 0 means unlimited, note 107 * that page requests can still exceed this limit. 108 */ 109 #define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0 110 111 /* 112 * Parameters for self_announce_delay giving a stream of RARP/ARP 113 * packets after migration. 114 */ 115 #define DEFAULT_MIGRATE_ANNOUNCE_INITIAL 50 116 #define DEFAULT_MIGRATE_ANNOUNCE_MAX 550 117 #define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS 5 118 #define DEFAULT_MIGRATE_ANNOUNCE_STEP 100 119 120 static NotifierList migration_state_notifiers = 121 NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); 122 123 /* Messages sent on the return path from destination to source */ 124 enum mig_rp_message_type { 125 MIG_RP_MSG_INVALID = 0, /* Must be 0 */ 126 MIG_RP_MSG_SHUT, /* sibling will not send any more RP messages */ 127 MIG_RP_MSG_PONG, /* Response to a PING; data (seq: be32 ) */ 128 129 MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */ 130 MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */ 131 MIG_RP_MSG_RECV_BITMAP, /* send recved_bitmap back to source */ 132 MIG_RP_MSG_RESUME_ACK, /* tell source that we are ready to resume */ 133 134 MIG_RP_MSG_MAX 135 }; 136 137 /* When we add fault tolerance, we could have several 138 migrations at once. For now we don't need to add 139 dynamic creation of migration */ 140 141 static MigrationState *current_migration; 142 static MigrationIncomingState *current_incoming; 143 144 static bool migration_object_check(MigrationState *ms, Error **errp); 145 static int migration_maybe_pause(MigrationState *s, 146 int *current_active_state, 147 int new_state); 148 static void migrate_fd_cancel(MigrationState *s); 149 150 static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp) 151 { 152 uintptr_t a = (uintptr_t) ap, b = (uintptr_t) bp; 153 154 return (a > b) - (a < b); 155 } 156 157 void migration_object_init(void) 158 { 159 Error *err = NULL; 160 161 /* This can only be called once. */ 162 assert(!current_migration); 163 current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION)); 164 165 /* 166 * Init the migrate incoming object as well no matter whether 167 * we'll use it or not. 168 */ 169 assert(!current_incoming); 170 current_incoming = g_new0(MigrationIncomingState, 1); 171 current_incoming->state = MIGRATION_STATUS_NONE; 172 current_incoming->postcopy_remote_fds = 173 g_array_new(FALSE, TRUE, sizeof(struct PostCopyFD)); 174 qemu_mutex_init(¤t_incoming->rp_mutex); 175 qemu_event_init(¤t_incoming->main_thread_load_event, false); 176 qemu_sem_init(¤t_incoming->postcopy_pause_sem_dst, 0); 177 qemu_sem_init(¤t_incoming->postcopy_pause_sem_fault, 0); 178 qemu_mutex_init(¤t_incoming->page_request_mutex); 179 current_incoming->page_requested = g_tree_new(page_request_addr_cmp); 180 181 if (!migration_object_check(current_migration, &err)) { 182 error_report_err(err); 183 exit(1); 184 } 185 186 blk_mig_init(); 187 ram_mig_init(); 188 dirty_bitmap_mig_init(); 189 } 190 191 void migration_shutdown(void) 192 { 193 /* 194 * Cancel the current migration - that will (eventually) 195 * stop the migration using this structure 196 */ 197 migrate_fd_cancel(current_migration); 198 object_unref(OBJECT(current_migration)); 199 200 /* 201 * Cancel outgoing migration of dirty bitmaps. It should 202 * at least unref used block nodes. 203 */ 204 dirty_bitmap_mig_cancel_outgoing(); 205 206 /* 207 * Cancel incoming migration of dirty bitmaps. Dirty bitmaps 208 * are non-critical data, and their loss never considered as 209 * something serious. 210 */ 211 dirty_bitmap_mig_cancel_incoming(); 212 } 213 214 /* For outgoing */ 215 MigrationState *migrate_get_current(void) 216 { 217 /* This can only be called after the object created. */ 218 assert(current_migration); 219 return current_migration; 220 } 221 222 MigrationIncomingState *migration_incoming_get_current(void) 223 { 224 assert(current_incoming); 225 return current_incoming; 226 } 227 228 void migration_incoming_state_destroy(void) 229 { 230 struct MigrationIncomingState *mis = migration_incoming_get_current(); 231 232 if (mis->to_src_file) { 233 /* Tell source that we are done */ 234 migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0); 235 qemu_fclose(mis->to_src_file); 236 mis->to_src_file = NULL; 237 } 238 239 if (mis->from_src_file) { 240 qemu_fclose(mis->from_src_file); 241 mis->from_src_file = NULL; 242 } 243 if (mis->postcopy_remote_fds) { 244 g_array_free(mis->postcopy_remote_fds, TRUE); 245 mis->postcopy_remote_fds = NULL; 246 } 247 248 qemu_event_reset(&mis->main_thread_load_event); 249 250 if (mis->page_requested) { 251 g_tree_destroy(mis->page_requested); 252 mis->page_requested = NULL; 253 } 254 255 if (mis->socket_address_list) { 256 qapi_free_SocketAddressList(mis->socket_address_list); 257 mis->socket_address_list = NULL; 258 } 259 260 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 261 } 262 263 static void migrate_generate_event(int new_state) 264 { 265 if (migrate_use_events()) { 266 qapi_event_send_migration(new_state); 267 } 268 } 269 270 static bool migrate_late_block_activate(void) 271 { 272 MigrationState *s; 273 274 s = migrate_get_current(); 275 276 return s->enabled_capabilities[ 277 MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; 278 } 279 280 /* 281 * Send a message on the return channel back to the source 282 * of the migration. 283 */ 284 static int migrate_send_rp_message(MigrationIncomingState *mis, 285 enum mig_rp_message_type message_type, 286 uint16_t len, void *data) 287 { 288 int ret = 0; 289 290 trace_migrate_send_rp_message((int)message_type, len); 291 qemu_mutex_lock(&mis->rp_mutex); 292 293 /* 294 * It's possible that the file handle got lost due to network 295 * failures. 296 */ 297 if (!mis->to_src_file) { 298 ret = -EIO; 299 goto error; 300 } 301 302 qemu_put_be16(mis->to_src_file, (unsigned int)message_type); 303 qemu_put_be16(mis->to_src_file, len); 304 qemu_put_buffer(mis->to_src_file, data, len); 305 qemu_fflush(mis->to_src_file); 306 307 /* It's possible that qemu file got error during sending */ 308 ret = qemu_file_get_error(mis->to_src_file); 309 310 error: 311 qemu_mutex_unlock(&mis->rp_mutex); 312 return ret; 313 } 314 315 /* Request one page from the source VM at the given start address. 316 * rb: the RAMBlock to request the page in 317 * Start: Address offset within the RB 318 * Len: Length in bytes required - must be a multiple of pagesize 319 */ 320 int migrate_send_rp_message_req_pages(MigrationIncomingState *mis, 321 RAMBlock *rb, ram_addr_t start) 322 { 323 uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */ 324 size_t msglen = 12; /* start + len */ 325 size_t len = qemu_ram_pagesize(rb); 326 enum mig_rp_message_type msg_type; 327 const char *rbname; 328 int rbname_len; 329 330 *(uint64_t *)bufc = cpu_to_be64((uint64_t)start); 331 *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len); 332 333 /* 334 * We maintain the last ramblock that we requested for page. Note that we 335 * don't need locking because this function will only be called within the 336 * postcopy ram fault thread. 337 */ 338 if (rb != mis->last_rb) { 339 mis->last_rb = rb; 340 341 rbname = qemu_ram_get_idstr(rb); 342 rbname_len = strlen(rbname); 343 344 assert(rbname_len < 256); 345 346 bufc[msglen++] = rbname_len; 347 memcpy(bufc + msglen, rbname, rbname_len); 348 msglen += rbname_len; 349 msg_type = MIG_RP_MSG_REQ_PAGES_ID; 350 } else { 351 msg_type = MIG_RP_MSG_REQ_PAGES; 352 } 353 354 return migrate_send_rp_message(mis, msg_type, msglen, bufc); 355 } 356 357 int migrate_send_rp_req_pages(MigrationIncomingState *mis, 358 RAMBlock *rb, ram_addr_t start, uint64_t haddr) 359 { 360 void *aligned = (void *)(uintptr_t)(haddr & (-qemu_ram_pagesize(rb))); 361 bool received = false; 362 363 WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) { 364 received = ramblock_recv_bitmap_test_byte_offset(rb, start); 365 if (!received && !g_tree_lookup(mis->page_requested, aligned)) { 366 /* 367 * The page has not been received, and it's not yet in the page 368 * request list. Queue it. Set the value of element to 1, so that 369 * things like g_tree_lookup() will return TRUE (1) when found. 370 */ 371 g_tree_insert(mis->page_requested, aligned, (gpointer)1); 372 mis->page_requested_count++; 373 trace_postcopy_page_req_add(aligned, mis->page_requested_count); 374 } 375 } 376 377 /* 378 * If the page is there, skip sending the message. We don't even need the 379 * lock because as long as the page arrived, it'll be there forever. 380 */ 381 if (received) { 382 return 0; 383 } 384 385 return migrate_send_rp_message_req_pages(mis, rb, start); 386 } 387 388 static bool migration_colo_enabled; 389 bool migration_incoming_colo_enabled(void) 390 { 391 return migration_colo_enabled; 392 } 393 394 void migration_incoming_disable_colo(void) 395 { 396 ram_block_discard_disable(false); 397 migration_colo_enabled = false; 398 } 399 400 int migration_incoming_enable_colo(void) 401 { 402 if (ram_block_discard_disable(true)) { 403 error_report("COLO: cannot disable RAM discard"); 404 return -EBUSY; 405 } 406 migration_colo_enabled = true; 407 return 0; 408 } 409 410 void migrate_add_address(SocketAddress *address) 411 { 412 MigrationIncomingState *mis = migration_incoming_get_current(); 413 414 QAPI_LIST_PREPEND(mis->socket_address_list, 415 QAPI_CLONE(SocketAddress, address)); 416 } 417 418 static void qemu_start_incoming_migration(const char *uri, Error **errp) 419 { 420 const char *p = NULL; 421 422 if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) { 423 return; 424 } 425 426 qapi_event_send_migration(MIGRATION_STATUS_SETUP); 427 if (strstart(uri, "tcp:", &p) || 428 strstart(uri, "unix:", NULL) || 429 strstart(uri, "vsock:", NULL)) { 430 socket_start_incoming_migration(p ? p : uri, errp); 431 #ifdef CONFIG_RDMA 432 } else if (strstart(uri, "rdma:", &p)) { 433 rdma_start_incoming_migration(p, errp); 434 #endif 435 } else if (strstart(uri, "exec:", &p)) { 436 exec_start_incoming_migration(p, errp); 437 } else if (strstart(uri, "fd:", &p)) { 438 fd_start_incoming_migration(p, errp); 439 } else { 440 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 441 error_setg(errp, "unknown migration protocol: %s", uri); 442 } 443 } 444 445 static void process_incoming_migration_bh(void *opaque) 446 { 447 Error *local_err = NULL; 448 MigrationIncomingState *mis = opaque; 449 450 /* If capability late_block_activate is set: 451 * Only fire up the block code now if we're going to restart the 452 * VM, else 'cont' will do it. 453 * This causes file locking to happen; so we don't want it to happen 454 * unless we really are starting the VM. 455 */ 456 if (!migrate_late_block_activate() || 457 (autostart && (!global_state_received() || 458 global_state_get_runstate() == RUN_STATE_RUNNING))) { 459 /* Make sure all file formats flush their mutable metadata. 460 * If we get an error here, just don't restart the VM yet. */ 461 bdrv_invalidate_cache_all(&local_err); 462 if (local_err) { 463 error_report_err(local_err); 464 local_err = NULL; 465 autostart = false; 466 } 467 } 468 469 /* 470 * This must happen after all error conditions are dealt with and 471 * we're sure the VM is going to be running on this host. 472 */ 473 qemu_announce_self(&mis->announce_timer, migrate_announce_params()); 474 475 if (multifd_load_cleanup(&local_err) != 0) { 476 error_report_err(local_err); 477 autostart = false; 478 } 479 /* If global state section was not received or we are in running 480 state, we need to obey autostart. Any other state is set with 481 runstate_set. */ 482 483 dirty_bitmap_mig_before_vm_start(); 484 485 if (!global_state_received() || 486 global_state_get_runstate() == RUN_STATE_RUNNING) { 487 if (autostart) { 488 vm_start(); 489 } else { 490 runstate_set(RUN_STATE_PAUSED); 491 } 492 } else if (migration_incoming_colo_enabled()) { 493 migration_incoming_disable_colo(); 494 vm_start(); 495 } else { 496 runstate_set(global_state_get_runstate()); 497 } 498 /* 499 * This must happen after any state changes since as soon as an external 500 * observer sees this event they might start to prod at the VM assuming 501 * it's ready to use. 502 */ 503 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, 504 MIGRATION_STATUS_COMPLETED); 505 qemu_bh_delete(mis->bh); 506 migration_incoming_state_destroy(); 507 } 508 509 static void process_incoming_migration_co(void *opaque) 510 { 511 MigrationIncomingState *mis = migration_incoming_get_current(); 512 PostcopyState ps; 513 int ret; 514 Error *local_err = NULL; 515 516 assert(mis->from_src_file); 517 mis->migration_incoming_co = qemu_coroutine_self(); 518 mis->largest_page_size = qemu_ram_pagesize_largest(); 519 postcopy_state_set(POSTCOPY_INCOMING_NONE); 520 migrate_set_state(&mis->state, MIGRATION_STATUS_NONE, 521 MIGRATION_STATUS_ACTIVE); 522 ret = qemu_loadvm_state(mis->from_src_file); 523 524 ps = postcopy_state_get(); 525 trace_process_incoming_migration_co_end(ret, ps); 526 if (ps != POSTCOPY_INCOMING_NONE) { 527 if (ps == POSTCOPY_INCOMING_ADVISE) { 528 /* 529 * Where a migration had postcopy enabled (and thus went to advise) 530 * but managed to complete within the precopy period, we can use 531 * the normal exit. 532 */ 533 postcopy_ram_incoming_cleanup(mis); 534 } else if (ret >= 0) { 535 /* 536 * Postcopy was started, cleanup should happen at the end of the 537 * postcopy thread. 538 */ 539 trace_process_incoming_migration_co_postcopy_end_main(); 540 return; 541 } 542 /* Else if something went wrong then just fall out of the normal exit */ 543 } 544 545 /* we get COLO info, and know if we are in COLO mode */ 546 if (!ret && migration_incoming_colo_enabled()) { 547 /* Make sure all file formats flush their mutable metadata */ 548 bdrv_invalidate_cache_all(&local_err); 549 if (local_err) { 550 error_report_err(local_err); 551 goto fail; 552 } 553 554 qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming", 555 colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE); 556 mis->have_colo_incoming_thread = true; 557 qemu_coroutine_yield(); 558 559 /* Wait checkpoint incoming thread exit before free resource */ 560 qemu_thread_join(&mis->colo_incoming_thread); 561 /* We hold the global iothread lock, so it is safe here */ 562 colo_release_ram_cache(); 563 } 564 565 if (ret < 0) { 566 error_report("load of migration failed: %s", strerror(-ret)); 567 goto fail; 568 } 569 mis->bh = qemu_bh_new(process_incoming_migration_bh, mis); 570 qemu_bh_schedule(mis->bh); 571 mis->migration_incoming_co = NULL; 572 return; 573 fail: 574 local_err = NULL; 575 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, 576 MIGRATION_STATUS_FAILED); 577 qemu_fclose(mis->from_src_file); 578 if (multifd_load_cleanup(&local_err) != 0) { 579 error_report_err(local_err); 580 } 581 exit(EXIT_FAILURE); 582 } 583 584 /** 585 * @migration_incoming_setup: Setup incoming migration 586 * 587 * Returns 0 for no error or 1 for error 588 * 589 * @f: file for main migration channel 590 * @errp: where to put errors 591 */ 592 static int migration_incoming_setup(QEMUFile *f, Error **errp) 593 { 594 MigrationIncomingState *mis = migration_incoming_get_current(); 595 Error *local_err = NULL; 596 597 if (multifd_load_setup(&local_err) != 0) { 598 /* We haven't been able to create multifd threads 599 nothing better to do */ 600 error_report_err(local_err); 601 exit(EXIT_FAILURE); 602 } 603 604 if (!mis->from_src_file) { 605 mis->from_src_file = f; 606 } 607 qemu_file_set_blocking(f, false); 608 return 0; 609 } 610 611 void migration_incoming_process(void) 612 { 613 Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL); 614 qemu_coroutine_enter(co); 615 } 616 617 /* Returns true if recovered from a paused migration, otherwise false */ 618 static bool postcopy_try_recover(QEMUFile *f) 619 { 620 MigrationIncomingState *mis = migration_incoming_get_current(); 621 622 if (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { 623 /* Resumed from a paused postcopy migration */ 624 625 mis->from_src_file = f; 626 /* Postcopy has standalone thread to do vm load */ 627 qemu_file_set_blocking(f, true); 628 629 /* Re-configure the return path */ 630 mis->to_src_file = qemu_file_get_return_path(f); 631 632 migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_PAUSED, 633 MIGRATION_STATUS_POSTCOPY_RECOVER); 634 635 /* 636 * Here, we only wake up the main loading thread (while the 637 * fault thread will still be waiting), so that we can receive 638 * commands from source now, and answer it if needed. The 639 * fault thread will be woken up afterwards until we are sure 640 * that source is ready to reply to page requests. 641 */ 642 qemu_sem_post(&mis->postcopy_pause_sem_dst); 643 return true; 644 } 645 646 return false; 647 } 648 649 void migration_fd_process_incoming(QEMUFile *f, Error **errp) 650 { 651 Error *local_err = NULL; 652 653 if (postcopy_try_recover(f)) { 654 return; 655 } 656 657 if (migration_incoming_setup(f, &local_err)) { 658 error_propagate(errp, local_err); 659 return; 660 } 661 migration_incoming_process(); 662 } 663 664 void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) 665 { 666 MigrationIncomingState *mis = migration_incoming_get_current(); 667 Error *local_err = NULL; 668 bool start_migration; 669 670 if (!mis->from_src_file) { 671 /* The first connection (multifd may have multiple) */ 672 QEMUFile *f = qemu_fopen_channel_input(ioc); 673 674 /* If it's a recovery, we're done */ 675 if (postcopy_try_recover(f)) { 676 return; 677 } 678 679 if (migration_incoming_setup(f, &local_err)) { 680 error_propagate(errp, local_err); 681 return; 682 } 683 684 /* 685 * Common migration only needs one channel, so we can start 686 * right now. Multifd needs more than one channel, we wait. 687 */ 688 start_migration = !migrate_use_multifd(); 689 } else { 690 /* Multiple connections */ 691 assert(migrate_use_multifd()); 692 start_migration = multifd_recv_new_channel(ioc, &local_err); 693 if (local_err) { 694 error_propagate(errp, local_err); 695 return; 696 } 697 } 698 699 if (start_migration) { 700 migration_incoming_process(); 701 } 702 } 703 704 /** 705 * @migration_has_all_channels: We have received all channels that we need 706 * 707 * Returns true when we have got connections to all the channels that 708 * we need for migration. 709 */ 710 bool migration_has_all_channels(void) 711 { 712 MigrationIncomingState *mis = migration_incoming_get_current(); 713 bool all_channels; 714 715 all_channels = multifd_recv_all_channels_created(); 716 717 return all_channels && mis->from_src_file != NULL; 718 } 719 720 /* 721 * Send a 'SHUT' message on the return channel with the given value 722 * to indicate that we've finished with the RP. Non-0 value indicates 723 * error. 724 */ 725 void migrate_send_rp_shut(MigrationIncomingState *mis, 726 uint32_t value) 727 { 728 uint32_t buf; 729 730 buf = cpu_to_be32(value); 731 migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf); 732 } 733 734 /* 735 * Send a 'PONG' message on the return channel with the given value 736 * (normally in response to a 'PING') 737 */ 738 void migrate_send_rp_pong(MigrationIncomingState *mis, 739 uint32_t value) 740 { 741 uint32_t buf; 742 743 buf = cpu_to_be32(value); 744 migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf); 745 } 746 747 void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis, 748 char *block_name) 749 { 750 char buf[512]; 751 int len; 752 int64_t res; 753 754 /* 755 * First, we send the header part. It contains only the len of 756 * idstr, and the idstr itself. 757 */ 758 len = strlen(block_name); 759 buf[0] = len; 760 memcpy(buf + 1, block_name, len); 761 762 if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) { 763 error_report("%s: MSG_RP_RECV_BITMAP only used for recovery", 764 __func__); 765 return; 766 } 767 768 migrate_send_rp_message(mis, MIG_RP_MSG_RECV_BITMAP, len + 1, buf); 769 770 /* 771 * Next, we dump the received bitmap to the stream. 772 * 773 * TODO: currently we are safe since we are the only one that is 774 * using the to_src_file handle (fault thread is still paused), 775 * and it's ok even not taking the mutex. However the best way is 776 * to take the lock before sending the message header, and release 777 * the lock after sending the bitmap. 778 */ 779 qemu_mutex_lock(&mis->rp_mutex); 780 res = ramblock_recv_bitmap_send(mis->to_src_file, block_name); 781 qemu_mutex_unlock(&mis->rp_mutex); 782 783 trace_migrate_send_rp_recv_bitmap(block_name, res); 784 } 785 786 void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value) 787 { 788 uint32_t buf; 789 790 buf = cpu_to_be32(value); 791 migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf); 792 } 793 794 MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) 795 { 796 MigrationCapabilityStatusList *head = NULL; 797 MigrationCapabilityStatusList *caps; 798 MigrationState *s = migrate_get_current(); 799 int i; 800 801 caps = NULL; /* silence compiler warning */ 802 for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { 803 #ifndef CONFIG_LIVE_BLOCK_MIGRATION 804 if (i == MIGRATION_CAPABILITY_BLOCK) { 805 continue; 806 } 807 #endif 808 if (head == NULL) { 809 head = g_malloc0(sizeof(*caps)); 810 caps = head; 811 } else { 812 caps->next = g_malloc0(sizeof(*caps)); 813 caps = caps->next; 814 } 815 caps->value = 816 g_malloc(sizeof(*caps->value)); 817 caps->value->capability = i; 818 caps->value->state = s->enabled_capabilities[i]; 819 } 820 821 return head; 822 } 823 824 MigrationParameters *qmp_query_migrate_parameters(Error **errp) 825 { 826 MigrationParameters *params; 827 MigrationState *s = migrate_get_current(); 828 829 /* TODO use QAPI_CLONE() instead of duplicating it inline */ 830 params = g_malloc0(sizeof(*params)); 831 params->has_compress_level = true; 832 params->compress_level = s->parameters.compress_level; 833 params->has_compress_threads = true; 834 params->compress_threads = s->parameters.compress_threads; 835 params->has_compress_wait_thread = true; 836 params->compress_wait_thread = s->parameters.compress_wait_thread; 837 params->has_decompress_threads = true; 838 params->decompress_threads = s->parameters.decompress_threads; 839 params->has_throttle_trigger_threshold = true; 840 params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold; 841 params->has_cpu_throttle_initial = true; 842 params->cpu_throttle_initial = s->parameters.cpu_throttle_initial; 843 params->has_cpu_throttle_increment = true; 844 params->cpu_throttle_increment = s->parameters.cpu_throttle_increment; 845 params->has_cpu_throttle_tailslow = true; 846 params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow; 847 params->has_tls_creds = true; 848 params->tls_creds = g_strdup(s->parameters.tls_creds); 849 params->has_tls_hostname = true; 850 params->tls_hostname = g_strdup(s->parameters.tls_hostname); 851 params->has_tls_authz = true; 852 params->tls_authz = g_strdup(s->parameters.tls_authz ? 853 s->parameters.tls_authz : ""); 854 params->has_max_bandwidth = true; 855 params->max_bandwidth = s->parameters.max_bandwidth; 856 params->has_downtime_limit = true; 857 params->downtime_limit = s->parameters.downtime_limit; 858 params->has_x_checkpoint_delay = true; 859 params->x_checkpoint_delay = s->parameters.x_checkpoint_delay; 860 params->has_block_incremental = true; 861 params->block_incremental = s->parameters.block_incremental; 862 params->has_multifd_channels = true; 863 params->multifd_channels = s->parameters.multifd_channels; 864 params->has_multifd_compression = true; 865 params->multifd_compression = s->parameters.multifd_compression; 866 params->has_multifd_zlib_level = true; 867 params->multifd_zlib_level = s->parameters.multifd_zlib_level; 868 params->has_multifd_zstd_level = true; 869 params->multifd_zstd_level = s->parameters.multifd_zstd_level; 870 params->has_xbzrle_cache_size = true; 871 params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; 872 params->has_max_postcopy_bandwidth = true; 873 params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth; 874 params->has_max_cpu_throttle = true; 875 params->max_cpu_throttle = s->parameters.max_cpu_throttle; 876 params->has_announce_initial = true; 877 params->announce_initial = s->parameters.announce_initial; 878 params->has_announce_max = true; 879 params->announce_max = s->parameters.announce_max; 880 params->has_announce_rounds = true; 881 params->announce_rounds = s->parameters.announce_rounds; 882 params->has_announce_step = true; 883 params->announce_step = s->parameters.announce_step; 884 885 if (s->parameters.has_block_bitmap_mapping) { 886 params->has_block_bitmap_mapping = true; 887 params->block_bitmap_mapping = 888 QAPI_CLONE(BitmapMigrationNodeAliasList, 889 s->parameters.block_bitmap_mapping); 890 } 891 892 return params; 893 } 894 895 AnnounceParameters *migrate_announce_params(void) 896 { 897 static AnnounceParameters ap; 898 899 MigrationState *s = migrate_get_current(); 900 901 ap.initial = s->parameters.announce_initial; 902 ap.max = s->parameters.announce_max; 903 ap.rounds = s->parameters.announce_rounds; 904 ap.step = s->parameters.announce_step; 905 906 return ≈ 907 } 908 909 /* 910 * Return true if we're already in the middle of a migration 911 * (i.e. any of the active or setup states) 912 */ 913 bool migration_is_setup_or_active(int state) 914 { 915 switch (state) { 916 case MIGRATION_STATUS_ACTIVE: 917 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 918 case MIGRATION_STATUS_POSTCOPY_PAUSED: 919 case MIGRATION_STATUS_POSTCOPY_RECOVER: 920 case MIGRATION_STATUS_SETUP: 921 case MIGRATION_STATUS_PRE_SWITCHOVER: 922 case MIGRATION_STATUS_DEVICE: 923 case MIGRATION_STATUS_WAIT_UNPLUG: 924 case MIGRATION_STATUS_COLO: 925 return true; 926 927 default: 928 return false; 929 930 } 931 } 932 933 bool migration_is_running(int state) 934 { 935 switch (state) { 936 case MIGRATION_STATUS_ACTIVE: 937 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 938 case MIGRATION_STATUS_POSTCOPY_PAUSED: 939 case MIGRATION_STATUS_POSTCOPY_RECOVER: 940 case MIGRATION_STATUS_SETUP: 941 case MIGRATION_STATUS_PRE_SWITCHOVER: 942 case MIGRATION_STATUS_DEVICE: 943 case MIGRATION_STATUS_WAIT_UNPLUG: 944 case MIGRATION_STATUS_CANCELLING: 945 return true; 946 947 default: 948 return false; 949 950 } 951 } 952 953 static void populate_time_info(MigrationInfo *info, MigrationState *s) 954 { 955 info->has_status = true; 956 info->has_setup_time = true; 957 info->setup_time = s->setup_time; 958 if (s->state == MIGRATION_STATUS_COMPLETED) { 959 info->has_total_time = true; 960 info->total_time = s->total_time; 961 info->has_downtime = true; 962 info->downtime = s->downtime; 963 } else { 964 info->has_total_time = true; 965 info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - 966 s->start_time; 967 info->has_expected_downtime = true; 968 info->expected_downtime = s->expected_downtime; 969 } 970 } 971 972 static void populate_ram_info(MigrationInfo *info, MigrationState *s) 973 { 974 info->has_ram = true; 975 info->ram = g_malloc0(sizeof(*info->ram)); 976 info->ram->transferred = ram_counters.transferred; 977 info->ram->total = ram_bytes_total(); 978 info->ram->duplicate = ram_counters.duplicate; 979 /* legacy value. It is not used anymore */ 980 info->ram->skipped = 0; 981 info->ram->normal = ram_counters.normal; 982 info->ram->normal_bytes = ram_counters.normal * 983 qemu_target_page_size(); 984 info->ram->mbps = s->mbps; 985 info->ram->dirty_sync_count = ram_counters.dirty_sync_count; 986 info->ram->postcopy_requests = ram_counters.postcopy_requests; 987 info->ram->page_size = qemu_target_page_size(); 988 info->ram->multifd_bytes = ram_counters.multifd_bytes; 989 info->ram->pages_per_second = s->pages_per_second; 990 991 if (migrate_use_xbzrle()) { 992 info->has_xbzrle_cache = true; 993 info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache)); 994 info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size(); 995 info->xbzrle_cache->bytes = xbzrle_counters.bytes; 996 info->xbzrle_cache->pages = xbzrle_counters.pages; 997 info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss; 998 info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate; 999 info->xbzrle_cache->encoding_rate = xbzrle_counters.encoding_rate; 1000 info->xbzrle_cache->overflow = xbzrle_counters.overflow; 1001 } 1002 1003 if (migrate_use_compression()) { 1004 info->has_compression = true; 1005 info->compression = g_malloc0(sizeof(*info->compression)); 1006 info->compression->pages = compression_counters.pages; 1007 info->compression->busy = compression_counters.busy; 1008 info->compression->busy_rate = compression_counters.busy_rate; 1009 info->compression->compressed_size = 1010 compression_counters.compressed_size; 1011 info->compression->compression_rate = 1012 compression_counters.compression_rate; 1013 } 1014 1015 if (cpu_throttle_active()) { 1016 info->has_cpu_throttle_percentage = true; 1017 info->cpu_throttle_percentage = cpu_throttle_get_percentage(); 1018 } 1019 1020 if (s->state != MIGRATION_STATUS_COMPLETED) { 1021 info->ram->remaining = ram_bytes_remaining(); 1022 info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate; 1023 } 1024 } 1025 1026 static void populate_disk_info(MigrationInfo *info) 1027 { 1028 if (blk_mig_active()) { 1029 info->has_disk = true; 1030 info->disk = g_malloc0(sizeof(*info->disk)); 1031 info->disk->transferred = blk_mig_bytes_transferred(); 1032 info->disk->remaining = blk_mig_bytes_remaining(); 1033 info->disk->total = blk_mig_bytes_total(); 1034 } 1035 } 1036 1037 static void populate_vfio_info(MigrationInfo *info) 1038 { 1039 #ifdef CONFIG_VFIO 1040 if (vfio_mig_active()) { 1041 info->has_vfio = true; 1042 info->vfio = g_malloc0(sizeof(*info->vfio)); 1043 info->vfio->transferred = vfio_mig_bytes_transferred(); 1044 } 1045 #endif 1046 } 1047 1048 static void fill_source_migration_info(MigrationInfo *info) 1049 { 1050 MigrationState *s = migrate_get_current(); 1051 1052 switch (s->state) { 1053 case MIGRATION_STATUS_NONE: 1054 /* no migration has happened ever */ 1055 /* do not overwrite destination migration status */ 1056 return; 1057 case MIGRATION_STATUS_SETUP: 1058 info->has_status = true; 1059 info->has_total_time = false; 1060 break; 1061 case MIGRATION_STATUS_ACTIVE: 1062 case MIGRATION_STATUS_CANCELLING: 1063 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1064 case MIGRATION_STATUS_PRE_SWITCHOVER: 1065 case MIGRATION_STATUS_DEVICE: 1066 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1067 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1068 /* TODO add some postcopy stats */ 1069 populate_time_info(info, s); 1070 populate_ram_info(info, s); 1071 populate_disk_info(info); 1072 populate_vfio_info(info); 1073 break; 1074 case MIGRATION_STATUS_COLO: 1075 info->has_status = true; 1076 /* TODO: display COLO specific information (checkpoint info etc.) */ 1077 break; 1078 case MIGRATION_STATUS_COMPLETED: 1079 populate_time_info(info, s); 1080 populate_ram_info(info, s); 1081 populate_vfio_info(info); 1082 break; 1083 case MIGRATION_STATUS_FAILED: 1084 info->has_status = true; 1085 if (s->error) { 1086 info->has_error_desc = true; 1087 info->error_desc = g_strdup(error_get_pretty(s->error)); 1088 } 1089 break; 1090 case MIGRATION_STATUS_CANCELLED: 1091 info->has_status = true; 1092 break; 1093 case MIGRATION_STATUS_WAIT_UNPLUG: 1094 info->has_status = true; 1095 break; 1096 } 1097 info->status = s->state; 1098 } 1099 1100 /** 1101 * @migration_caps_check - check capability validity 1102 * 1103 * @cap_list: old capability list, array of bool 1104 * @params: new capabilities to be applied soon 1105 * @errp: set *errp if the check failed, with reason 1106 * 1107 * Returns true if check passed, otherwise false. 1108 */ 1109 static bool migrate_caps_check(bool *cap_list, 1110 MigrationCapabilityStatusList *params, 1111 Error **errp) 1112 { 1113 MigrationCapabilityStatusList *cap; 1114 bool old_postcopy_cap; 1115 MigrationIncomingState *mis = migration_incoming_get_current(); 1116 1117 old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]; 1118 1119 for (cap = params; cap; cap = cap->next) { 1120 cap_list[cap->value->capability] = cap->value->state; 1121 } 1122 1123 #ifndef CONFIG_LIVE_BLOCK_MIGRATION 1124 if (cap_list[MIGRATION_CAPABILITY_BLOCK]) { 1125 error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " 1126 "block migration"); 1127 error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); 1128 return false; 1129 } 1130 #endif 1131 1132 #ifndef CONFIG_REPLICATION 1133 if (cap_list[MIGRATION_CAPABILITY_X_COLO]) { 1134 error_setg(errp, "QEMU compiled without replication module" 1135 " can't enable COLO"); 1136 error_append_hint(errp, "Please enable replication before COLO.\n"); 1137 return false; 1138 } 1139 #endif 1140 1141 if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { 1142 /* This check is reasonably expensive, so only when it's being 1143 * set the first time, also it's only the destination that needs 1144 * special support. 1145 */ 1146 if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) && 1147 !postcopy_ram_supported_by_host(mis)) { 1148 /* postcopy_ram_supported_by_host will have emitted a more 1149 * detailed message 1150 */ 1151 error_setg(errp, "Postcopy is not supported"); 1152 return false; 1153 } 1154 1155 if (cap_list[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { 1156 error_setg(errp, "Postcopy is not compatible with ignore-shared"); 1157 return false; 1158 } 1159 } 1160 1161 return true; 1162 } 1163 1164 static void fill_destination_migration_info(MigrationInfo *info) 1165 { 1166 MigrationIncomingState *mis = migration_incoming_get_current(); 1167 1168 if (mis->socket_address_list) { 1169 info->has_socket_address = true; 1170 info->socket_address = 1171 QAPI_CLONE(SocketAddressList, mis->socket_address_list); 1172 } 1173 1174 switch (mis->state) { 1175 case MIGRATION_STATUS_NONE: 1176 return; 1177 case MIGRATION_STATUS_SETUP: 1178 case MIGRATION_STATUS_CANCELLING: 1179 case MIGRATION_STATUS_CANCELLED: 1180 case MIGRATION_STATUS_ACTIVE: 1181 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1182 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1183 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1184 case MIGRATION_STATUS_FAILED: 1185 case MIGRATION_STATUS_COLO: 1186 info->has_status = true; 1187 break; 1188 case MIGRATION_STATUS_COMPLETED: 1189 info->has_status = true; 1190 fill_destination_postcopy_migration_info(info); 1191 break; 1192 } 1193 info->status = mis->state; 1194 } 1195 1196 MigrationInfo *qmp_query_migrate(Error **errp) 1197 { 1198 MigrationInfo *info = g_malloc0(sizeof(*info)); 1199 1200 fill_destination_migration_info(info); 1201 fill_source_migration_info(info); 1202 1203 return info; 1204 } 1205 1206 void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, 1207 Error **errp) 1208 { 1209 MigrationState *s = migrate_get_current(); 1210 MigrationCapabilityStatusList *cap; 1211 bool cap_list[MIGRATION_CAPABILITY__MAX]; 1212 1213 if (migration_is_running(s->state)) { 1214 error_setg(errp, QERR_MIGRATION_ACTIVE); 1215 return; 1216 } 1217 1218 memcpy(cap_list, s->enabled_capabilities, sizeof(cap_list)); 1219 if (!migrate_caps_check(cap_list, params, errp)) { 1220 return; 1221 } 1222 1223 for (cap = params; cap; cap = cap->next) { 1224 s->enabled_capabilities[cap->value->capability] = cap->value->state; 1225 } 1226 } 1227 1228 /* 1229 * Check whether the parameters are valid. Error will be put into errp 1230 * (if provided). Return true if valid, otherwise false. 1231 */ 1232 static bool migrate_params_check(MigrationParameters *params, Error **errp) 1233 { 1234 if (params->has_compress_level && 1235 (params->compress_level > 9)) { 1236 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", 1237 "is invalid, it should be in the range of 0 to 9"); 1238 return false; 1239 } 1240 1241 if (params->has_compress_threads && (params->compress_threads < 1)) { 1242 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1243 "compress_threads", 1244 "is invalid, it should be in the range of 1 to 255"); 1245 return false; 1246 } 1247 1248 if (params->has_decompress_threads && (params->decompress_threads < 1)) { 1249 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1250 "decompress_threads", 1251 "is invalid, it should be in the range of 1 to 255"); 1252 return false; 1253 } 1254 1255 if (params->has_throttle_trigger_threshold && 1256 (params->throttle_trigger_threshold < 1 || 1257 params->throttle_trigger_threshold > 100)) { 1258 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1259 "throttle_trigger_threshold", 1260 "an integer in the range of 1 to 100"); 1261 return false; 1262 } 1263 1264 if (params->has_cpu_throttle_initial && 1265 (params->cpu_throttle_initial < 1 || 1266 params->cpu_throttle_initial > 99)) { 1267 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1268 "cpu_throttle_initial", 1269 "an integer in the range of 1 to 99"); 1270 return false; 1271 } 1272 1273 if (params->has_cpu_throttle_increment && 1274 (params->cpu_throttle_increment < 1 || 1275 params->cpu_throttle_increment > 99)) { 1276 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1277 "cpu_throttle_increment", 1278 "an integer in the range of 1 to 99"); 1279 return false; 1280 } 1281 1282 if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) { 1283 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1284 "max_bandwidth", 1285 "an integer in the range of 0 to "stringify(SIZE_MAX) 1286 " bytes/second"); 1287 return false; 1288 } 1289 1290 if (params->has_downtime_limit && 1291 (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { 1292 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1293 "downtime_limit", 1294 "an integer in the range of 0 to " 1295 stringify(MAX_MIGRATE_DOWNTIME)" ms"); 1296 return false; 1297 } 1298 1299 /* x_checkpoint_delay is now always positive */ 1300 1301 if (params->has_multifd_channels && (params->multifd_channels < 1)) { 1302 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1303 "multifd_channels", 1304 "is invalid, it should be in the range of 1 to 255"); 1305 return false; 1306 } 1307 1308 if (params->has_multifd_zlib_level && 1309 (params->multifd_zlib_level > 9)) { 1310 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level", 1311 "is invalid, it should be in the range of 0 to 9"); 1312 return false; 1313 } 1314 1315 if (params->has_multifd_zstd_level && 1316 (params->multifd_zstd_level > 20)) { 1317 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level", 1318 "is invalid, it should be in the range of 0 to 20"); 1319 return false; 1320 } 1321 1322 if (params->has_xbzrle_cache_size && 1323 (params->xbzrle_cache_size < qemu_target_page_size() || 1324 !is_power_of_2(params->xbzrle_cache_size))) { 1325 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1326 "xbzrle_cache_size", 1327 "is invalid, it should be bigger than target page size" 1328 " and a power of 2"); 1329 return false; 1330 } 1331 1332 if (params->has_max_cpu_throttle && 1333 (params->max_cpu_throttle < params->cpu_throttle_initial || 1334 params->max_cpu_throttle > 99)) { 1335 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1336 "max_cpu_throttle", 1337 "an integer in the range of cpu_throttle_initial to 99"); 1338 return false; 1339 } 1340 1341 if (params->has_announce_initial && 1342 params->announce_initial > 100000) { 1343 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1344 "announce_initial", 1345 "is invalid, it must be less than 100000 ms"); 1346 return false; 1347 } 1348 if (params->has_announce_max && 1349 params->announce_max > 100000) { 1350 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1351 "announce_max", 1352 "is invalid, it must be less than 100000 ms"); 1353 return false; 1354 } 1355 if (params->has_announce_rounds && 1356 params->announce_rounds > 1000) { 1357 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1358 "announce_rounds", 1359 "is invalid, it must be in the range of 0 to 1000"); 1360 return false; 1361 } 1362 if (params->has_announce_step && 1363 (params->announce_step < 1 || 1364 params->announce_step > 10000)) { 1365 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1366 "announce_step", 1367 "is invalid, it must be in the range of 1 to 10000 ms"); 1368 return false; 1369 } 1370 1371 if (params->has_block_bitmap_mapping && 1372 !check_dirty_bitmap_mig_alias_map(params->block_bitmap_mapping, errp)) { 1373 error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); 1374 return false; 1375 } 1376 1377 return true; 1378 } 1379 1380 static void migrate_params_test_apply(MigrateSetParameters *params, 1381 MigrationParameters *dest) 1382 { 1383 *dest = migrate_get_current()->parameters; 1384 1385 /* TODO use QAPI_CLONE() instead of duplicating it inline */ 1386 1387 if (params->has_compress_level) { 1388 dest->compress_level = params->compress_level; 1389 } 1390 1391 if (params->has_compress_threads) { 1392 dest->compress_threads = params->compress_threads; 1393 } 1394 1395 if (params->has_compress_wait_thread) { 1396 dest->compress_wait_thread = params->compress_wait_thread; 1397 } 1398 1399 if (params->has_decompress_threads) { 1400 dest->decompress_threads = params->decompress_threads; 1401 } 1402 1403 if (params->has_throttle_trigger_threshold) { 1404 dest->throttle_trigger_threshold = params->throttle_trigger_threshold; 1405 } 1406 1407 if (params->has_cpu_throttle_initial) { 1408 dest->cpu_throttle_initial = params->cpu_throttle_initial; 1409 } 1410 1411 if (params->has_cpu_throttle_increment) { 1412 dest->cpu_throttle_increment = params->cpu_throttle_increment; 1413 } 1414 1415 if (params->has_cpu_throttle_tailslow) { 1416 dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow; 1417 } 1418 1419 if (params->has_tls_creds) { 1420 assert(params->tls_creds->type == QTYPE_QSTRING); 1421 dest->tls_creds = params->tls_creds->u.s; 1422 } 1423 1424 if (params->has_tls_hostname) { 1425 assert(params->tls_hostname->type == QTYPE_QSTRING); 1426 dest->tls_hostname = params->tls_hostname->u.s; 1427 } 1428 1429 if (params->has_max_bandwidth) { 1430 dest->max_bandwidth = params->max_bandwidth; 1431 } 1432 1433 if (params->has_downtime_limit) { 1434 dest->downtime_limit = params->downtime_limit; 1435 } 1436 1437 if (params->has_x_checkpoint_delay) { 1438 dest->x_checkpoint_delay = params->x_checkpoint_delay; 1439 } 1440 1441 if (params->has_block_incremental) { 1442 dest->block_incremental = params->block_incremental; 1443 } 1444 if (params->has_multifd_channels) { 1445 dest->multifd_channels = params->multifd_channels; 1446 } 1447 if (params->has_multifd_compression) { 1448 dest->multifd_compression = params->multifd_compression; 1449 } 1450 if (params->has_xbzrle_cache_size) { 1451 dest->xbzrle_cache_size = params->xbzrle_cache_size; 1452 } 1453 if (params->has_max_postcopy_bandwidth) { 1454 dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth; 1455 } 1456 if (params->has_max_cpu_throttle) { 1457 dest->max_cpu_throttle = params->max_cpu_throttle; 1458 } 1459 if (params->has_announce_initial) { 1460 dest->announce_initial = params->announce_initial; 1461 } 1462 if (params->has_announce_max) { 1463 dest->announce_max = params->announce_max; 1464 } 1465 if (params->has_announce_rounds) { 1466 dest->announce_rounds = params->announce_rounds; 1467 } 1468 if (params->has_announce_step) { 1469 dest->announce_step = params->announce_step; 1470 } 1471 1472 if (params->has_block_bitmap_mapping) { 1473 dest->has_block_bitmap_mapping = true; 1474 dest->block_bitmap_mapping = params->block_bitmap_mapping; 1475 } 1476 } 1477 1478 static void migrate_params_apply(MigrateSetParameters *params, Error **errp) 1479 { 1480 MigrationState *s = migrate_get_current(); 1481 1482 /* TODO use QAPI_CLONE() instead of duplicating it inline */ 1483 1484 if (params->has_compress_level) { 1485 s->parameters.compress_level = params->compress_level; 1486 } 1487 1488 if (params->has_compress_threads) { 1489 s->parameters.compress_threads = params->compress_threads; 1490 } 1491 1492 if (params->has_compress_wait_thread) { 1493 s->parameters.compress_wait_thread = params->compress_wait_thread; 1494 } 1495 1496 if (params->has_decompress_threads) { 1497 s->parameters.decompress_threads = params->decompress_threads; 1498 } 1499 1500 if (params->has_throttle_trigger_threshold) { 1501 s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold; 1502 } 1503 1504 if (params->has_cpu_throttle_initial) { 1505 s->parameters.cpu_throttle_initial = params->cpu_throttle_initial; 1506 } 1507 1508 if (params->has_cpu_throttle_increment) { 1509 s->parameters.cpu_throttle_increment = params->cpu_throttle_increment; 1510 } 1511 1512 if (params->has_cpu_throttle_tailslow) { 1513 s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow; 1514 } 1515 1516 if (params->has_tls_creds) { 1517 g_free(s->parameters.tls_creds); 1518 assert(params->tls_creds->type == QTYPE_QSTRING); 1519 s->parameters.tls_creds = g_strdup(params->tls_creds->u.s); 1520 } 1521 1522 if (params->has_tls_hostname) { 1523 g_free(s->parameters.tls_hostname); 1524 assert(params->tls_hostname->type == QTYPE_QSTRING); 1525 s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s); 1526 } 1527 1528 if (params->has_tls_authz) { 1529 g_free(s->parameters.tls_authz); 1530 assert(params->tls_authz->type == QTYPE_QSTRING); 1531 s->parameters.tls_authz = g_strdup(params->tls_authz->u.s); 1532 } 1533 1534 if (params->has_max_bandwidth) { 1535 s->parameters.max_bandwidth = params->max_bandwidth; 1536 if (s->to_dst_file && !migration_in_postcopy()) { 1537 qemu_file_set_rate_limit(s->to_dst_file, 1538 s->parameters.max_bandwidth / XFER_LIMIT_RATIO); 1539 } 1540 } 1541 1542 if (params->has_downtime_limit) { 1543 s->parameters.downtime_limit = params->downtime_limit; 1544 } 1545 1546 if (params->has_x_checkpoint_delay) { 1547 s->parameters.x_checkpoint_delay = params->x_checkpoint_delay; 1548 if (migration_in_colo_state()) { 1549 colo_checkpoint_notify(s); 1550 } 1551 } 1552 1553 if (params->has_block_incremental) { 1554 s->parameters.block_incremental = params->block_incremental; 1555 } 1556 if (params->has_multifd_channels) { 1557 s->parameters.multifd_channels = params->multifd_channels; 1558 } 1559 if (params->has_multifd_compression) { 1560 s->parameters.multifd_compression = params->multifd_compression; 1561 } 1562 if (params->has_xbzrle_cache_size) { 1563 s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; 1564 xbzrle_cache_resize(params->xbzrle_cache_size, errp); 1565 } 1566 if (params->has_max_postcopy_bandwidth) { 1567 s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth; 1568 if (s->to_dst_file && migration_in_postcopy()) { 1569 qemu_file_set_rate_limit(s->to_dst_file, 1570 s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO); 1571 } 1572 } 1573 if (params->has_max_cpu_throttle) { 1574 s->parameters.max_cpu_throttle = params->max_cpu_throttle; 1575 } 1576 if (params->has_announce_initial) { 1577 s->parameters.announce_initial = params->announce_initial; 1578 } 1579 if (params->has_announce_max) { 1580 s->parameters.announce_max = params->announce_max; 1581 } 1582 if (params->has_announce_rounds) { 1583 s->parameters.announce_rounds = params->announce_rounds; 1584 } 1585 if (params->has_announce_step) { 1586 s->parameters.announce_step = params->announce_step; 1587 } 1588 1589 if (params->has_block_bitmap_mapping) { 1590 qapi_free_BitmapMigrationNodeAliasList( 1591 s->parameters.block_bitmap_mapping); 1592 1593 s->parameters.has_block_bitmap_mapping = true; 1594 s->parameters.block_bitmap_mapping = 1595 QAPI_CLONE(BitmapMigrationNodeAliasList, 1596 params->block_bitmap_mapping); 1597 } 1598 } 1599 1600 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) 1601 { 1602 MigrationParameters tmp; 1603 1604 /* TODO Rewrite "" to null instead */ 1605 if (params->has_tls_creds 1606 && params->tls_creds->type == QTYPE_QNULL) { 1607 qobject_unref(params->tls_creds->u.n); 1608 params->tls_creds->type = QTYPE_QSTRING; 1609 params->tls_creds->u.s = strdup(""); 1610 } 1611 /* TODO Rewrite "" to null instead */ 1612 if (params->has_tls_hostname 1613 && params->tls_hostname->type == QTYPE_QNULL) { 1614 qobject_unref(params->tls_hostname->u.n); 1615 params->tls_hostname->type = QTYPE_QSTRING; 1616 params->tls_hostname->u.s = strdup(""); 1617 } 1618 1619 migrate_params_test_apply(params, &tmp); 1620 1621 if (!migrate_params_check(&tmp, errp)) { 1622 /* Invalid parameter */ 1623 return; 1624 } 1625 1626 migrate_params_apply(params, errp); 1627 } 1628 1629 1630 void qmp_migrate_start_postcopy(Error **errp) 1631 { 1632 MigrationState *s = migrate_get_current(); 1633 1634 if (!migrate_postcopy()) { 1635 error_setg(errp, "Enable postcopy with migrate_set_capability before" 1636 " the start of migration"); 1637 return; 1638 } 1639 1640 if (s->state == MIGRATION_STATUS_NONE) { 1641 error_setg(errp, "Postcopy must be started after migration has been" 1642 " started"); 1643 return; 1644 } 1645 /* 1646 * we don't error if migration has finished since that would be racy 1647 * with issuing this command. 1648 */ 1649 qatomic_set(&s->start_postcopy, true); 1650 } 1651 1652 /* shared migration helpers */ 1653 1654 void migrate_set_state(int *state, int old_state, int new_state) 1655 { 1656 assert(new_state < MIGRATION_STATUS__MAX); 1657 if (qatomic_cmpxchg(state, old_state, new_state) == old_state) { 1658 trace_migrate_set_state(MigrationStatus_str(new_state)); 1659 migrate_generate_event(new_state); 1660 } 1661 } 1662 1663 static MigrationCapabilityStatus *migrate_cap_add(MigrationCapability index, 1664 bool state) 1665 { 1666 MigrationCapabilityStatus *cap; 1667 1668 cap = g_new0(MigrationCapabilityStatus, 1); 1669 cap->capability = index; 1670 cap->state = state; 1671 1672 return cap; 1673 } 1674 1675 void migrate_set_block_enabled(bool value, Error **errp) 1676 { 1677 MigrationCapabilityStatusList *cap = NULL; 1678 1679 QAPI_LIST_PREPEND(cap, migrate_cap_add(MIGRATION_CAPABILITY_BLOCK, value)); 1680 qmp_migrate_set_capabilities(cap, errp); 1681 qapi_free_MigrationCapabilityStatusList(cap); 1682 } 1683 1684 static void migrate_set_block_incremental(MigrationState *s, bool value) 1685 { 1686 s->parameters.block_incremental = value; 1687 } 1688 1689 static void block_cleanup_parameters(MigrationState *s) 1690 { 1691 if (s->must_remove_block_options) { 1692 /* setting to false can never fail */ 1693 migrate_set_block_enabled(false, &error_abort); 1694 migrate_set_block_incremental(s, false); 1695 s->must_remove_block_options = false; 1696 } 1697 } 1698 1699 static void migrate_fd_cleanup(MigrationState *s) 1700 { 1701 qemu_bh_delete(s->cleanup_bh); 1702 s->cleanup_bh = NULL; 1703 1704 qemu_savevm_state_cleanup(); 1705 1706 if (s->to_dst_file) { 1707 QEMUFile *tmp; 1708 1709 trace_migrate_fd_cleanup(); 1710 qemu_mutex_unlock_iothread(); 1711 if (s->migration_thread_running) { 1712 qemu_thread_join(&s->thread); 1713 s->migration_thread_running = false; 1714 } 1715 qemu_mutex_lock_iothread(); 1716 1717 multifd_save_cleanup(); 1718 qemu_mutex_lock(&s->qemu_file_lock); 1719 tmp = s->to_dst_file; 1720 s->to_dst_file = NULL; 1721 qemu_mutex_unlock(&s->qemu_file_lock); 1722 /* 1723 * Close the file handle without the lock to make sure the 1724 * critical section won't block for long. 1725 */ 1726 qemu_fclose(tmp); 1727 } 1728 1729 assert(!migration_is_active(s)); 1730 1731 if (s->state == MIGRATION_STATUS_CANCELLING) { 1732 migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING, 1733 MIGRATION_STATUS_CANCELLED); 1734 } 1735 1736 if (s->error) { 1737 /* It is used on info migrate. We can't free it */ 1738 error_report_err(error_copy(s->error)); 1739 } 1740 notifier_list_notify(&migration_state_notifiers, s); 1741 block_cleanup_parameters(s); 1742 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 1743 } 1744 1745 static void migrate_fd_cleanup_schedule(MigrationState *s) 1746 { 1747 /* 1748 * Ref the state for bh, because it may be called when 1749 * there're already no other refs 1750 */ 1751 object_ref(OBJECT(s)); 1752 qemu_bh_schedule(s->cleanup_bh); 1753 } 1754 1755 static void migrate_fd_cleanup_bh(void *opaque) 1756 { 1757 MigrationState *s = opaque; 1758 migrate_fd_cleanup(s); 1759 object_unref(OBJECT(s)); 1760 } 1761 1762 void migrate_set_error(MigrationState *s, const Error *error) 1763 { 1764 QEMU_LOCK_GUARD(&s->error_mutex); 1765 if (!s->error) { 1766 s->error = error_copy(error); 1767 } 1768 } 1769 1770 void migrate_fd_error(MigrationState *s, const Error *error) 1771 { 1772 trace_migrate_fd_error(error_get_pretty(error)); 1773 assert(s->to_dst_file == NULL); 1774 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 1775 MIGRATION_STATUS_FAILED); 1776 migrate_set_error(s, error); 1777 } 1778 1779 static void migrate_fd_cancel(MigrationState *s) 1780 { 1781 int old_state ; 1782 QEMUFile *f = migrate_get_current()->to_dst_file; 1783 trace_migrate_fd_cancel(); 1784 1785 if (s->rp_state.from_dst_file) { 1786 /* shutdown the rp socket, so causing the rp thread to shutdown */ 1787 qemu_file_shutdown(s->rp_state.from_dst_file); 1788 } 1789 1790 do { 1791 old_state = s->state; 1792 if (!migration_is_running(old_state)) { 1793 break; 1794 } 1795 /* If the migration is paused, kick it out of the pause */ 1796 if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) { 1797 qemu_sem_post(&s->pause_sem); 1798 } 1799 migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING); 1800 } while (s->state != MIGRATION_STATUS_CANCELLING); 1801 1802 /* 1803 * If we're unlucky the migration code might be stuck somewhere in a 1804 * send/write while the network has failed and is waiting to timeout; 1805 * if we've got shutdown(2) available then we can force it to quit. 1806 * The outgoing qemu file gets closed in migrate_fd_cleanup that is 1807 * called in a bh, so there is no race against this cancel. 1808 */ 1809 if (s->state == MIGRATION_STATUS_CANCELLING && f) { 1810 qemu_file_shutdown(f); 1811 } 1812 if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) { 1813 Error *local_err = NULL; 1814 1815 bdrv_invalidate_cache_all(&local_err); 1816 if (local_err) { 1817 error_report_err(local_err); 1818 } else { 1819 s->block_inactive = false; 1820 } 1821 } 1822 } 1823 1824 void add_migration_state_change_notifier(Notifier *notify) 1825 { 1826 notifier_list_add(&migration_state_notifiers, notify); 1827 } 1828 1829 void remove_migration_state_change_notifier(Notifier *notify) 1830 { 1831 notifier_remove(notify); 1832 } 1833 1834 bool migration_in_setup(MigrationState *s) 1835 { 1836 return s->state == MIGRATION_STATUS_SETUP; 1837 } 1838 1839 bool migration_has_finished(MigrationState *s) 1840 { 1841 return s->state == MIGRATION_STATUS_COMPLETED; 1842 } 1843 1844 bool migration_has_failed(MigrationState *s) 1845 { 1846 return (s->state == MIGRATION_STATUS_CANCELLED || 1847 s->state == MIGRATION_STATUS_FAILED); 1848 } 1849 1850 bool migration_in_postcopy(void) 1851 { 1852 MigrationState *s = migrate_get_current(); 1853 1854 switch (s->state) { 1855 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1856 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1857 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1858 return true; 1859 default: 1860 return false; 1861 } 1862 } 1863 1864 bool migration_in_postcopy_after_devices(MigrationState *s) 1865 { 1866 return migration_in_postcopy() && s->postcopy_after_devices; 1867 } 1868 1869 bool migration_in_incoming_postcopy(void) 1870 { 1871 PostcopyState ps = postcopy_state_get(); 1872 1873 return ps >= POSTCOPY_INCOMING_DISCARD && ps < POSTCOPY_INCOMING_END; 1874 } 1875 1876 bool migration_is_idle(void) 1877 { 1878 MigrationState *s = current_migration; 1879 1880 if (!s) { 1881 return true; 1882 } 1883 1884 switch (s->state) { 1885 case MIGRATION_STATUS_NONE: 1886 case MIGRATION_STATUS_CANCELLED: 1887 case MIGRATION_STATUS_COMPLETED: 1888 case MIGRATION_STATUS_FAILED: 1889 return true; 1890 case MIGRATION_STATUS_SETUP: 1891 case MIGRATION_STATUS_CANCELLING: 1892 case MIGRATION_STATUS_ACTIVE: 1893 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1894 case MIGRATION_STATUS_COLO: 1895 case MIGRATION_STATUS_PRE_SWITCHOVER: 1896 case MIGRATION_STATUS_DEVICE: 1897 case MIGRATION_STATUS_WAIT_UNPLUG: 1898 return false; 1899 case MIGRATION_STATUS__MAX: 1900 g_assert_not_reached(); 1901 } 1902 1903 return false; 1904 } 1905 1906 bool migration_is_active(MigrationState *s) 1907 { 1908 return (s->state == MIGRATION_STATUS_ACTIVE || 1909 s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); 1910 } 1911 1912 void migrate_init(MigrationState *s) 1913 { 1914 /* 1915 * Reinitialise all migration state, except 1916 * parameters/capabilities that the user set, and 1917 * locks. 1918 */ 1919 s->cleanup_bh = 0; 1920 s->to_dst_file = NULL; 1921 s->state = MIGRATION_STATUS_NONE; 1922 s->rp_state.from_dst_file = NULL; 1923 s->rp_state.error = false; 1924 s->mbps = 0.0; 1925 s->pages_per_second = 0.0; 1926 s->downtime = 0; 1927 s->expected_downtime = 0; 1928 s->setup_time = 0; 1929 s->start_postcopy = false; 1930 s->postcopy_after_devices = false; 1931 s->migration_thread_running = false; 1932 error_free(s->error); 1933 s->error = NULL; 1934 s->hostname = NULL; 1935 1936 migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); 1937 1938 s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 1939 s->total_time = 0; 1940 s->vm_was_running = false; 1941 s->iteration_initial_bytes = 0; 1942 s->threshold_size = 0; 1943 } 1944 1945 static GSList *migration_blockers; 1946 1947 int migrate_add_blocker(Error *reason, Error **errp) 1948 { 1949 if (only_migratable) { 1950 error_propagate_prepend(errp, error_copy(reason), 1951 "disallowing migration blocker " 1952 "(--only-migratable) for: "); 1953 return -EACCES; 1954 } 1955 1956 if (migration_is_idle()) { 1957 migration_blockers = g_slist_prepend(migration_blockers, reason); 1958 return 0; 1959 } 1960 1961 error_propagate_prepend(errp, error_copy(reason), 1962 "disallowing migration blocker " 1963 "(migration in progress) for: "); 1964 return -EBUSY; 1965 } 1966 1967 void migrate_del_blocker(Error *reason) 1968 { 1969 migration_blockers = g_slist_remove(migration_blockers, reason); 1970 } 1971 1972 void qmp_migrate_incoming(const char *uri, Error **errp) 1973 { 1974 Error *local_err = NULL; 1975 static bool once = true; 1976 1977 if (!once) { 1978 error_setg(errp, "The incoming migration has already been started"); 1979 return; 1980 } 1981 if (!runstate_check(RUN_STATE_INMIGRATE)) { 1982 error_setg(errp, "'-incoming' was not specified on the command line"); 1983 return; 1984 } 1985 1986 qemu_start_incoming_migration(uri, &local_err); 1987 1988 if (local_err) { 1989 error_propagate(errp, local_err); 1990 return; 1991 } 1992 1993 once = false; 1994 } 1995 1996 void qmp_migrate_recover(const char *uri, Error **errp) 1997 { 1998 MigrationIncomingState *mis = migration_incoming_get_current(); 1999 2000 if (mis->state != MIGRATION_STATUS_POSTCOPY_PAUSED) { 2001 error_setg(errp, "Migrate recover can only be run " 2002 "when postcopy is paused."); 2003 return; 2004 } 2005 2006 if (qatomic_cmpxchg(&mis->postcopy_recover_triggered, 2007 false, true) == true) { 2008 error_setg(errp, "Migrate recovery is triggered already"); 2009 return; 2010 } 2011 2012 /* 2013 * Note that this call will never start a real migration; it will 2014 * only re-setup the migration stream and poke existing migration 2015 * to continue using that newly established channel. 2016 */ 2017 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 2018 qemu_start_incoming_migration(uri, errp); 2019 } 2020 2021 void qmp_migrate_pause(Error **errp) 2022 { 2023 MigrationState *ms = migrate_get_current(); 2024 MigrationIncomingState *mis = migration_incoming_get_current(); 2025 int ret; 2026 2027 if (ms->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 2028 /* Source side, during postcopy */ 2029 qemu_mutex_lock(&ms->qemu_file_lock); 2030 ret = qemu_file_shutdown(ms->to_dst_file); 2031 qemu_mutex_unlock(&ms->qemu_file_lock); 2032 if (ret) { 2033 error_setg(errp, "Failed to pause source migration"); 2034 } 2035 return; 2036 } 2037 2038 if (mis->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 2039 ret = qemu_file_shutdown(mis->from_src_file); 2040 if (ret) { 2041 error_setg(errp, "Failed to pause destination migration"); 2042 } 2043 return; 2044 } 2045 2046 error_setg(errp, "migrate-pause is currently only supported " 2047 "during postcopy-active state"); 2048 } 2049 2050 bool migration_is_blocked(Error **errp) 2051 { 2052 if (qemu_savevm_state_blocked(errp)) { 2053 return true; 2054 } 2055 2056 if (migration_blockers) { 2057 error_propagate(errp, error_copy(migration_blockers->data)); 2058 return true; 2059 } 2060 2061 return false; 2062 } 2063 2064 /* Returns true if continue to migrate, or false if error detected */ 2065 static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, 2066 bool resume, Error **errp) 2067 { 2068 Error *local_err = NULL; 2069 2070 if (resume) { 2071 if (s->state != MIGRATION_STATUS_POSTCOPY_PAUSED) { 2072 error_setg(errp, "Cannot resume if there is no " 2073 "paused migration"); 2074 return false; 2075 } 2076 2077 /* 2078 * Postcopy recovery won't work well with release-ram 2079 * capability since release-ram will drop the page buffer as 2080 * long as the page is put into the send buffer. So if there 2081 * is a network failure happened, any page buffers that have 2082 * not yet reached the destination VM but have already been 2083 * sent from the source VM will be lost forever. Let's refuse 2084 * the client from resuming such a postcopy migration. 2085 * Luckily release-ram was designed to only be used when src 2086 * and destination VMs are on the same host, so it should be 2087 * fine. 2088 */ 2089 if (migrate_release_ram()) { 2090 error_setg(errp, "Postcopy recovery cannot work " 2091 "when release-ram capability is set"); 2092 return false; 2093 } 2094 2095 /* This is a resume, skip init status */ 2096 return true; 2097 } 2098 2099 if (migration_is_running(s->state)) { 2100 error_setg(errp, QERR_MIGRATION_ACTIVE); 2101 return false; 2102 } 2103 2104 if (runstate_check(RUN_STATE_INMIGRATE)) { 2105 error_setg(errp, "Guest is waiting for an incoming migration"); 2106 return false; 2107 } 2108 2109 if (runstate_check(RUN_STATE_POSTMIGRATE)) { 2110 error_setg(errp, "Can't migrate the vm that was paused due to " 2111 "previous migration"); 2112 return false; 2113 } 2114 2115 if (migration_is_blocked(errp)) { 2116 return false; 2117 } 2118 2119 if (blk || blk_inc) { 2120 if (migrate_use_block() || migrate_use_block_incremental()) { 2121 error_setg(errp, "Command options are incompatible with " 2122 "current migration capabilities"); 2123 return false; 2124 } 2125 migrate_set_block_enabled(true, &local_err); 2126 if (local_err) { 2127 error_propagate(errp, local_err); 2128 return false; 2129 } 2130 s->must_remove_block_options = true; 2131 } 2132 2133 if (blk_inc) { 2134 migrate_set_block_incremental(s, true); 2135 } 2136 2137 migrate_init(s); 2138 /* 2139 * set ram_counters memory to zero for a 2140 * new migration 2141 */ 2142 memset(&ram_counters, 0, sizeof(ram_counters)); 2143 2144 return true; 2145 } 2146 2147 void qmp_migrate(const char *uri, bool has_blk, bool blk, 2148 bool has_inc, bool inc, bool has_detach, bool detach, 2149 bool has_resume, bool resume, Error **errp) 2150 { 2151 Error *local_err = NULL; 2152 MigrationState *s = migrate_get_current(); 2153 const char *p = NULL; 2154 2155 if (!migrate_prepare(s, has_blk && blk, has_inc && inc, 2156 has_resume && resume, errp)) { 2157 /* Error detected, put into errp */ 2158 return; 2159 } 2160 2161 if (!(has_resume && resume)) { 2162 if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) { 2163 return; 2164 } 2165 } 2166 2167 if (strstart(uri, "tcp:", &p) || 2168 strstart(uri, "unix:", NULL) || 2169 strstart(uri, "vsock:", NULL)) { 2170 socket_start_outgoing_migration(s, p ? p : uri, &local_err); 2171 #ifdef CONFIG_RDMA 2172 } else if (strstart(uri, "rdma:", &p)) { 2173 rdma_start_outgoing_migration(s, p, &local_err); 2174 #endif 2175 } else if (strstart(uri, "exec:", &p)) { 2176 exec_start_outgoing_migration(s, p, &local_err); 2177 } else if (strstart(uri, "fd:", &p)) { 2178 fd_start_outgoing_migration(s, p, &local_err); 2179 } else { 2180 if (!(has_resume && resume)) { 2181 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 2182 } 2183 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri", 2184 "a valid migration protocol"); 2185 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 2186 MIGRATION_STATUS_FAILED); 2187 block_cleanup_parameters(s); 2188 return; 2189 } 2190 2191 if (local_err) { 2192 if (!(has_resume && resume)) { 2193 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 2194 } 2195 migrate_fd_error(s, local_err); 2196 error_propagate(errp, local_err); 2197 return; 2198 } 2199 } 2200 2201 void qmp_migrate_cancel(Error **errp) 2202 { 2203 migrate_fd_cancel(migrate_get_current()); 2204 } 2205 2206 void qmp_migrate_continue(MigrationStatus state, Error **errp) 2207 { 2208 MigrationState *s = migrate_get_current(); 2209 if (s->state != state) { 2210 error_setg(errp, "Migration not in expected state: %s", 2211 MigrationStatus_str(s->state)); 2212 return; 2213 } 2214 qemu_sem_post(&s->pause_sem); 2215 } 2216 2217 void qmp_migrate_set_cache_size(int64_t value, Error **errp) 2218 { 2219 MigrateSetParameters p = { 2220 .has_xbzrle_cache_size = true, 2221 .xbzrle_cache_size = value, 2222 }; 2223 2224 qmp_migrate_set_parameters(&p, errp); 2225 } 2226 2227 int64_t qmp_query_migrate_cache_size(Error **errp) 2228 { 2229 return migrate_xbzrle_cache_size(); 2230 } 2231 2232 void qmp_migrate_set_speed(int64_t value, Error **errp) 2233 { 2234 MigrateSetParameters p = { 2235 .has_max_bandwidth = true, 2236 .max_bandwidth = value, 2237 }; 2238 2239 qmp_migrate_set_parameters(&p, errp); 2240 } 2241 2242 void qmp_migrate_set_downtime(double value, Error **errp) 2243 { 2244 if (value < 0 || value > MAX_MIGRATE_DOWNTIME_SECONDS) { 2245 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 2246 "downtime_limit", 2247 "an integer in the range of 0 to " 2248 stringify(MAX_MIGRATE_DOWNTIME_SECONDS)" seconds"); 2249 return; 2250 } 2251 2252 value *= 1000; /* Convert to milliseconds */ 2253 2254 MigrateSetParameters p = { 2255 .has_downtime_limit = true, 2256 .downtime_limit = (int64_t)value, 2257 }; 2258 2259 qmp_migrate_set_parameters(&p, errp); 2260 } 2261 2262 bool migrate_release_ram(void) 2263 { 2264 MigrationState *s; 2265 2266 s = migrate_get_current(); 2267 2268 return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; 2269 } 2270 2271 bool migrate_postcopy_ram(void) 2272 { 2273 MigrationState *s; 2274 2275 s = migrate_get_current(); 2276 2277 return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; 2278 } 2279 2280 bool migrate_postcopy(void) 2281 { 2282 return migrate_postcopy_ram() || migrate_dirty_bitmaps(); 2283 } 2284 2285 bool migrate_auto_converge(void) 2286 { 2287 MigrationState *s; 2288 2289 s = migrate_get_current(); 2290 2291 return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; 2292 } 2293 2294 bool migrate_zero_blocks(void) 2295 { 2296 MigrationState *s; 2297 2298 s = migrate_get_current(); 2299 2300 return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; 2301 } 2302 2303 bool migrate_postcopy_blocktime(void) 2304 { 2305 MigrationState *s; 2306 2307 s = migrate_get_current(); 2308 2309 return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; 2310 } 2311 2312 bool migrate_use_compression(void) 2313 { 2314 MigrationState *s; 2315 2316 s = migrate_get_current(); 2317 2318 return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS]; 2319 } 2320 2321 int migrate_compress_level(void) 2322 { 2323 MigrationState *s; 2324 2325 s = migrate_get_current(); 2326 2327 return s->parameters.compress_level; 2328 } 2329 2330 int migrate_compress_threads(void) 2331 { 2332 MigrationState *s; 2333 2334 s = migrate_get_current(); 2335 2336 return s->parameters.compress_threads; 2337 } 2338 2339 int migrate_compress_wait_thread(void) 2340 { 2341 MigrationState *s; 2342 2343 s = migrate_get_current(); 2344 2345 return s->parameters.compress_wait_thread; 2346 } 2347 2348 int migrate_decompress_threads(void) 2349 { 2350 MigrationState *s; 2351 2352 s = migrate_get_current(); 2353 2354 return s->parameters.decompress_threads; 2355 } 2356 2357 bool migrate_dirty_bitmaps(void) 2358 { 2359 MigrationState *s; 2360 2361 s = migrate_get_current(); 2362 2363 return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; 2364 } 2365 2366 bool migrate_ignore_shared(void) 2367 { 2368 MigrationState *s; 2369 2370 s = migrate_get_current(); 2371 2372 return s->enabled_capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; 2373 } 2374 2375 bool migrate_validate_uuid(void) 2376 { 2377 MigrationState *s; 2378 2379 s = migrate_get_current(); 2380 2381 return s->enabled_capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; 2382 } 2383 2384 bool migrate_use_events(void) 2385 { 2386 MigrationState *s; 2387 2388 s = migrate_get_current(); 2389 2390 return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS]; 2391 } 2392 2393 bool migrate_use_multifd(void) 2394 { 2395 MigrationState *s; 2396 2397 s = migrate_get_current(); 2398 2399 return s->enabled_capabilities[MIGRATION_CAPABILITY_MULTIFD]; 2400 } 2401 2402 bool migrate_pause_before_switchover(void) 2403 { 2404 MigrationState *s; 2405 2406 s = migrate_get_current(); 2407 2408 return s->enabled_capabilities[ 2409 MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; 2410 } 2411 2412 int migrate_multifd_channels(void) 2413 { 2414 MigrationState *s; 2415 2416 s = migrate_get_current(); 2417 2418 return s->parameters.multifd_channels; 2419 } 2420 2421 MultiFDCompression migrate_multifd_compression(void) 2422 { 2423 MigrationState *s; 2424 2425 s = migrate_get_current(); 2426 2427 return s->parameters.multifd_compression; 2428 } 2429 2430 int migrate_multifd_zlib_level(void) 2431 { 2432 MigrationState *s; 2433 2434 s = migrate_get_current(); 2435 2436 return s->parameters.multifd_zlib_level; 2437 } 2438 2439 int migrate_multifd_zstd_level(void) 2440 { 2441 MigrationState *s; 2442 2443 s = migrate_get_current(); 2444 2445 return s->parameters.multifd_zstd_level; 2446 } 2447 2448 int migrate_use_xbzrle(void) 2449 { 2450 MigrationState *s; 2451 2452 s = migrate_get_current(); 2453 2454 return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE]; 2455 } 2456 2457 int64_t migrate_xbzrle_cache_size(void) 2458 { 2459 MigrationState *s; 2460 2461 s = migrate_get_current(); 2462 2463 return s->parameters.xbzrle_cache_size; 2464 } 2465 2466 static int64_t migrate_max_postcopy_bandwidth(void) 2467 { 2468 MigrationState *s; 2469 2470 s = migrate_get_current(); 2471 2472 return s->parameters.max_postcopy_bandwidth; 2473 } 2474 2475 bool migrate_use_block(void) 2476 { 2477 MigrationState *s; 2478 2479 s = migrate_get_current(); 2480 2481 return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK]; 2482 } 2483 2484 bool migrate_use_return_path(void) 2485 { 2486 MigrationState *s; 2487 2488 s = migrate_get_current(); 2489 2490 return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; 2491 } 2492 2493 bool migrate_use_block_incremental(void) 2494 { 2495 MigrationState *s; 2496 2497 s = migrate_get_current(); 2498 2499 return s->parameters.block_incremental; 2500 } 2501 2502 /* migration thread support */ 2503 /* 2504 * Something bad happened to the RP stream, mark an error 2505 * The caller shall print or trace something to indicate why 2506 */ 2507 static void mark_source_rp_bad(MigrationState *s) 2508 { 2509 s->rp_state.error = true; 2510 } 2511 2512 static struct rp_cmd_args { 2513 ssize_t len; /* -1 = variable */ 2514 const char *name; 2515 } rp_cmd_args[] = { 2516 [MIG_RP_MSG_INVALID] = { .len = -1, .name = "INVALID" }, 2517 [MIG_RP_MSG_SHUT] = { .len = 4, .name = "SHUT" }, 2518 [MIG_RP_MSG_PONG] = { .len = 4, .name = "PONG" }, 2519 [MIG_RP_MSG_REQ_PAGES] = { .len = 12, .name = "REQ_PAGES" }, 2520 [MIG_RP_MSG_REQ_PAGES_ID] = { .len = -1, .name = "REQ_PAGES_ID" }, 2521 [MIG_RP_MSG_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" }, 2522 [MIG_RP_MSG_RESUME_ACK] = { .len = 4, .name = "RESUME_ACK" }, 2523 [MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" }, 2524 }; 2525 2526 /* 2527 * Process a request for pages received on the return path, 2528 * We're allowed to send more than requested (e.g. to round to our page size) 2529 * and we don't need to send pages that have already been sent. 2530 */ 2531 static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname, 2532 ram_addr_t start, size_t len) 2533 { 2534 long our_host_ps = qemu_real_host_page_size; 2535 2536 trace_migrate_handle_rp_req_pages(rbname, start, len); 2537 2538 /* 2539 * Since we currently insist on matching page sizes, just sanity check 2540 * we're being asked for whole host pages. 2541 */ 2542 if (start & (our_host_ps - 1) || 2543 (len & (our_host_ps - 1))) { 2544 error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT 2545 " len: %zd", __func__, start, len); 2546 mark_source_rp_bad(ms); 2547 return; 2548 } 2549 2550 if (ram_save_queue_pages(rbname, start, len)) { 2551 mark_source_rp_bad(ms); 2552 } 2553 } 2554 2555 /* Return true to retry, false to quit */ 2556 static bool postcopy_pause_return_path_thread(MigrationState *s) 2557 { 2558 trace_postcopy_pause_return_path(); 2559 2560 qemu_sem_wait(&s->postcopy_pause_rp_sem); 2561 2562 trace_postcopy_pause_return_path_continued(); 2563 2564 return true; 2565 } 2566 2567 static int migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name) 2568 { 2569 RAMBlock *block = qemu_ram_block_by_name(block_name); 2570 2571 if (!block) { 2572 error_report("%s: invalid block name '%s'", __func__, block_name); 2573 return -EINVAL; 2574 } 2575 2576 /* Fetch the received bitmap and refresh the dirty bitmap */ 2577 return ram_dirty_bitmap_reload(s, block); 2578 } 2579 2580 static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value) 2581 { 2582 trace_source_return_path_thread_resume_ack(value); 2583 2584 if (value != MIGRATION_RESUME_ACK_VALUE) { 2585 error_report("%s: illegal resume_ack value %"PRIu32, 2586 __func__, value); 2587 return -1; 2588 } 2589 2590 /* Now both sides are active. */ 2591 migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER, 2592 MIGRATION_STATUS_POSTCOPY_ACTIVE); 2593 2594 /* Notify send thread that time to continue send pages */ 2595 qemu_sem_post(&s->rp_state.rp_sem); 2596 2597 return 0; 2598 } 2599 2600 /* 2601 * Handles messages sent on the return path towards the source VM 2602 * 2603 */ 2604 static void *source_return_path_thread(void *opaque) 2605 { 2606 MigrationState *ms = opaque; 2607 QEMUFile *rp = ms->rp_state.from_dst_file; 2608 uint16_t header_len, header_type; 2609 uint8_t buf[512]; 2610 uint32_t tmp32, sibling_error; 2611 ram_addr_t start = 0; /* =0 to silence warning */ 2612 size_t len = 0, expected_len; 2613 int res; 2614 2615 trace_source_return_path_thread_entry(); 2616 rcu_register_thread(); 2617 2618 retry: 2619 while (!ms->rp_state.error && !qemu_file_get_error(rp) && 2620 migration_is_setup_or_active(ms->state)) { 2621 trace_source_return_path_thread_loop_top(); 2622 header_type = qemu_get_be16(rp); 2623 header_len = qemu_get_be16(rp); 2624 2625 if (qemu_file_get_error(rp)) { 2626 mark_source_rp_bad(ms); 2627 goto out; 2628 } 2629 2630 if (header_type >= MIG_RP_MSG_MAX || 2631 header_type == MIG_RP_MSG_INVALID) { 2632 error_report("RP: Received invalid message 0x%04x length 0x%04x", 2633 header_type, header_len); 2634 mark_source_rp_bad(ms); 2635 goto out; 2636 } 2637 2638 if ((rp_cmd_args[header_type].len != -1 && 2639 header_len != rp_cmd_args[header_type].len) || 2640 header_len > sizeof(buf)) { 2641 error_report("RP: Received '%s' message (0x%04x) with" 2642 "incorrect length %d expecting %zu", 2643 rp_cmd_args[header_type].name, header_type, header_len, 2644 (size_t)rp_cmd_args[header_type].len); 2645 mark_source_rp_bad(ms); 2646 goto out; 2647 } 2648 2649 /* We know we've got a valid header by this point */ 2650 res = qemu_get_buffer(rp, buf, header_len); 2651 if (res != header_len) { 2652 error_report("RP: Failed reading data for message 0x%04x" 2653 " read %d expected %d", 2654 header_type, res, header_len); 2655 mark_source_rp_bad(ms); 2656 goto out; 2657 } 2658 2659 /* OK, we have the message and the data */ 2660 switch (header_type) { 2661 case MIG_RP_MSG_SHUT: 2662 sibling_error = ldl_be_p(buf); 2663 trace_source_return_path_thread_shut(sibling_error); 2664 if (sibling_error) { 2665 error_report("RP: Sibling indicated error %d", sibling_error); 2666 mark_source_rp_bad(ms); 2667 } 2668 /* 2669 * We'll let the main thread deal with closing the RP 2670 * we could do a shutdown(2) on it, but we're the only user 2671 * anyway, so there's nothing gained. 2672 */ 2673 goto out; 2674 2675 case MIG_RP_MSG_PONG: 2676 tmp32 = ldl_be_p(buf); 2677 trace_source_return_path_thread_pong(tmp32); 2678 break; 2679 2680 case MIG_RP_MSG_REQ_PAGES: 2681 start = ldq_be_p(buf); 2682 len = ldl_be_p(buf + 8); 2683 migrate_handle_rp_req_pages(ms, NULL, start, len); 2684 break; 2685 2686 case MIG_RP_MSG_REQ_PAGES_ID: 2687 expected_len = 12 + 1; /* header + termination */ 2688 2689 if (header_len >= expected_len) { 2690 start = ldq_be_p(buf); 2691 len = ldl_be_p(buf + 8); 2692 /* Now we expect an idstr */ 2693 tmp32 = buf[12]; /* Length of the following idstr */ 2694 buf[13 + tmp32] = '\0'; 2695 expected_len += tmp32; 2696 } 2697 if (header_len != expected_len) { 2698 error_report("RP: Req_Page_id with length %d expecting %zd", 2699 header_len, expected_len); 2700 mark_source_rp_bad(ms); 2701 goto out; 2702 } 2703 migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len); 2704 break; 2705 2706 case MIG_RP_MSG_RECV_BITMAP: 2707 if (header_len < 1) { 2708 error_report("%s: missing block name", __func__); 2709 mark_source_rp_bad(ms); 2710 goto out; 2711 } 2712 /* Format: len (1B) + idstr (<255B). This ends the idstr. */ 2713 buf[buf[0] + 1] = '\0'; 2714 if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1))) { 2715 mark_source_rp_bad(ms); 2716 goto out; 2717 } 2718 break; 2719 2720 case MIG_RP_MSG_RESUME_ACK: 2721 tmp32 = ldl_be_p(buf); 2722 if (migrate_handle_rp_resume_ack(ms, tmp32)) { 2723 mark_source_rp_bad(ms); 2724 goto out; 2725 } 2726 break; 2727 2728 default: 2729 break; 2730 } 2731 } 2732 2733 out: 2734 res = qemu_file_get_error(rp); 2735 if (res) { 2736 if (res == -EIO && migration_in_postcopy()) { 2737 /* 2738 * Maybe there is something we can do: it looks like a 2739 * network down issue, and we pause for a recovery. 2740 */ 2741 if (postcopy_pause_return_path_thread(ms)) { 2742 /* Reload rp, reset the rest */ 2743 if (rp != ms->rp_state.from_dst_file) { 2744 qemu_fclose(rp); 2745 rp = ms->rp_state.from_dst_file; 2746 } 2747 ms->rp_state.error = false; 2748 goto retry; 2749 } 2750 } 2751 2752 trace_source_return_path_thread_bad_end(); 2753 mark_source_rp_bad(ms); 2754 } 2755 2756 trace_source_return_path_thread_end(); 2757 ms->rp_state.from_dst_file = NULL; 2758 qemu_fclose(rp); 2759 rcu_unregister_thread(); 2760 return NULL; 2761 } 2762 2763 static int open_return_path_on_source(MigrationState *ms, 2764 bool create_thread) 2765 { 2766 2767 ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file); 2768 if (!ms->rp_state.from_dst_file) { 2769 return -1; 2770 } 2771 2772 trace_open_return_path_on_source(); 2773 2774 if (!create_thread) { 2775 /* We're done */ 2776 return 0; 2777 } 2778 2779 qemu_thread_create(&ms->rp_state.rp_thread, "return path", 2780 source_return_path_thread, ms, QEMU_THREAD_JOINABLE); 2781 2782 trace_open_return_path_on_source_continue(); 2783 2784 return 0; 2785 } 2786 2787 /* Returns 0 if the RP was ok, otherwise there was an error on the RP */ 2788 static int await_return_path_close_on_source(MigrationState *ms) 2789 { 2790 /* 2791 * If this is a normal exit then the destination will send a SHUT and the 2792 * rp_thread will exit, however if there's an error we need to cause 2793 * it to exit. 2794 */ 2795 if (qemu_file_get_error(ms->to_dst_file) && ms->rp_state.from_dst_file) { 2796 /* 2797 * shutdown(2), if we have it, will cause it to unblock if it's stuck 2798 * waiting for the destination. 2799 */ 2800 qemu_file_shutdown(ms->rp_state.from_dst_file); 2801 mark_source_rp_bad(ms); 2802 } 2803 trace_await_return_path_close_on_source_joining(); 2804 qemu_thread_join(&ms->rp_state.rp_thread); 2805 trace_await_return_path_close_on_source_close(); 2806 return ms->rp_state.error; 2807 } 2808 2809 /* 2810 * Switch from normal iteration to postcopy 2811 * Returns non-0 on error 2812 */ 2813 static int postcopy_start(MigrationState *ms) 2814 { 2815 int ret; 2816 QIOChannelBuffer *bioc; 2817 QEMUFile *fb; 2818 int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 2819 int64_t bandwidth = migrate_max_postcopy_bandwidth(); 2820 bool restart_block = false; 2821 int cur_state = MIGRATION_STATUS_ACTIVE; 2822 if (!migrate_pause_before_switchover()) { 2823 migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE, 2824 MIGRATION_STATUS_POSTCOPY_ACTIVE); 2825 } 2826 2827 trace_postcopy_start(); 2828 qemu_mutex_lock_iothread(); 2829 trace_postcopy_start_set_run(); 2830 2831 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); 2832 global_state_store(); 2833 ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); 2834 if (ret < 0) { 2835 goto fail; 2836 } 2837 2838 ret = migration_maybe_pause(ms, &cur_state, 2839 MIGRATION_STATUS_POSTCOPY_ACTIVE); 2840 if (ret < 0) { 2841 goto fail; 2842 } 2843 2844 ret = bdrv_inactivate_all(); 2845 if (ret < 0) { 2846 goto fail; 2847 } 2848 restart_block = true; 2849 2850 /* 2851 * Cause any non-postcopiable, but iterative devices to 2852 * send out their final data. 2853 */ 2854 qemu_savevm_state_complete_precopy(ms->to_dst_file, true, false); 2855 2856 /* 2857 * in Finish migrate and with the io-lock held everything should 2858 * be quiet, but we've potentially still got dirty pages and we 2859 * need to tell the destination to throw any pages it's already received 2860 * that are dirty 2861 */ 2862 if (migrate_postcopy_ram()) { 2863 if (ram_postcopy_send_discard_bitmap(ms)) { 2864 error_report("postcopy send discard bitmap failed"); 2865 goto fail; 2866 } 2867 } 2868 2869 /* 2870 * send rest of state - note things that are doing postcopy 2871 * will notice we're in POSTCOPY_ACTIVE and not actually 2872 * wrap their state up here 2873 */ 2874 /* 0 max-postcopy-bandwidth means unlimited */ 2875 if (!bandwidth) { 2876 qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX); 2877 } else { 2878 qemu_file_set_rate_limit(ms->to_dst_file, bandwidth / XFER_LIMIT_RATIO); 2879 } 2880 if (migrate_postcopy_ram()) { 2881 /* Ping just for debugging, helps line traces up */ 2882 qemu_savevm_send_ping(ms->to_dst_file, 2); 2883 } 2884 2885 /* 2886 * While loading the device state we may trigger page transfer 2887 * requests and the fd must be free to process those, and thus 2888 * the destination must read the whole device state off the fd before 2889 * it starts processing it. Unfortunately the ad-hoc migration format 2890 * doesn't allow the destination to know the size to read without fully 2891 * parsing it through each devices load-state code (especially the open 2892 * coded devices that use get/put). 2893 * So we wrap the device state up in a package with a length at the start; 2894 * to do this we use a qemu_buf to hold the whole of the device state. 2895 */ 2896 bioc = qio_channel_buffer_new(4096); 2897 qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer"); 2898 fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc)); 2899 object_unref(OBJECT(bioc)); 2900 2901 /* 2902 * Make sure the receiver can get incoming pages before we send the rest 2903 * of the state 2904 */ 2905 qemu_savevm_send_postcopy_listen(fb); 2906 2907 qemu_savevm_state_complete_precopy(fb, false, false); 2908 if (migrate_postcopy_ram()) { 2909 qemu_savevm_send_ping(fb, 3); 2910 } 2911 2912 qemu_savevm_send_postcopy_run(fb); 2913 2914 /* <><> end of stuff going into the package */ 2915 2916 /* Last point of recovery; as soon as we send the package the destination 2917 * can open devices and potentially start running. 2918 * Lets just check again we've not got any errors. 2919 */ 2920 ret = qemu_file_get_error(ms->to_dst_file); 2921 if (ret) { 2922 error_report("postcopy_start: Migration stream errored (pre package)"); 2923 goto fail_closefb; 2924 } 2925 2926 restart_block = false; 2927 2928 /* Now send that blob */ 2929 if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) { 2930 goto fail_closefb; 2931 } 2932 qemu_fclose(fb); 2933 2934 /* Send a notify to give a chance for anything that needs to happen 2935 * at the transition to postcopy and after the device state; in particular 2936 * spice needs to trigger a transition now 2937 */ 2938 ms->postcopy_after_devices = true; 2939 notifier_list_notify(&migration_state_notifiers, ms); 2940 2941 ms->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop; 2942 2943 qemu_mutex_unlock_iothread(); 2944 2945 if (migrate_postcopy_ram()) { 2946 /* 2947 * Although this ping is just for debug, it could potentially be 2948 * used for getting a better measurement of downtime at the source. 2949 */ 2950 qemu_savevm_send_ping(ms->to_dst_file, 4); 2951 } 2952 2953 if (migrate_release_ram()) { 2954 ram_postcopy_migrated_memory_release(ms); 2955 } 2956 2957 ret = qemu_file_get_error(ms->to_dst_file); 2958 if (ret) { 2959 error_report("postcopy_start: Migration stream errored"); 2960 migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, 2961 MIGRATION_STATUS_FAILED); 2962 } 2963 2964 return ret; 2965 2966 fail_closefb: 2967 qemu_fclose(fb); 2968 fail: 2969 migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, 2970 MIGRATION_STATUS_FAILED); 2971 if (restart_block) { 2972 /* A failure happened early enough that we know the destination hasn't 2973 * accessed block devices, so we're safe to recover. 2974 */ 2975 Error *local_err = NULL; 2976 2977 bdrv_invalidate_cache_all(&local_err); 2978 if (local_err) { 2979 error_report_err(local_err); 2980 } 2981 } 2982 qemu_mutex_unlock_iothread(); 2983 return -1; 2984 } 2985 2986 /** 2987 * migration_maybe_pause: Pause if required to by 2988 * migrate_pause_before_switchover called with the iothread locked 2989 * Returns: 0 on success 2990 */ 2991 static int migration_maybe_pause(MigrationState *s, 2992 int *current_active_state, 2993 int new_state) 2994 { 2995 if (!migrate_pause_before_switchover()) { 2996 return 0; 2997 } 2998 2999 /* Since leaving this state is not atomic with posting the semaphore 3000 * it's possible that someone could have issued multiple migrate_continue 3001 * and the semaphore is incorrectly positive at this point; 3002 * the docs say it's undefined to reinit a semaphore that's already 3003 * init'd, so use timedwait to eat up any existing posts. 3004 */ 3005 while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) { 3006 /* This block intentionally left blank */ 3007 } 3008 3009 /* 3010 * If the migration is cancelled when it is in the completion phase, 3011 * the migration state is set to MIGRATION_STATUS_CANCELLING. 3012 * So we don't need to wait a semaphore, otherwise we would always 3013 * wait for the 'pause_sem' semaphore. 3014 */ 3015 if (s->state != MIGRATION_STATUS_CANCELLING) { 3016 qemu_mutex_unlock_iothread(); 3017 migrate_set_state(&s->state, *current_active_state, 3018 MIGRATION_STATUS_PRE_SWITCHOVER); 3019 qemu_sem_wait(&s->pause_sem); 3020 migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, 3021 new_state); 3022 *current_active_state = new_state; 3023 qemu_mutex_lock_iothread(); 3024 } 3025 3026 return s->state == new_state ? 0 : -EINVAL; 3027 } 3028 3029 /** 3030 * migration_completion: Used by migration_thread when there's not much left. 3031 * The caller 'breaks' the loop when this returns. 3032 * 3033 * @s: Current migration state 3034 */ 3035 static void migration_completion(MigrationState *s) 3036 { 3037 int ret; 3038 int current_active_state = s->state; 3039 3040 if (s->state == MIGRATION_STATUS_ACTIVE) { 3041 qemu_mutex_lock_iothread(); 3042 s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3043 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); 3044 s->vm_was_running = runstate_is_running(); 3045 ret = global_state_store(); 3046 3047 if (!ret) { 3048 bool inactivate = !migrate_colo_enabled(); 3049 ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); 3050 if (ret >= 0) { 3051 ret = migration_maybe_pause(s, ¤t_active_state, 3052 MIGRATION_STATUS_DEVICE); 3053 } 3054 if (ret >= 0) { 3055 qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); 3056 ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, 3057 inactivate); 3058 } 3059 if (inactivate && ret >= 0) { 3060 s->block_inactive = true; 3061 } 3062 } 3063 qemu_mutex_unlock_iothread(); 3064 3065 if (ret < 0) { 3066 goto fail; 3067 } 3068 } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 3069 trace_migration_completion_postcopy_end(); 3070 3071 qemu_savevm_state_complete_postcopy(s->to_dst_file); 3072 trace_migration_completion_postcopy_end_after_complete(); 3073 } else if (s->state == MIGRATION_STATUS_CANCELLING) { 3074 goto fail; 3075 } 3076 3077 /* 3078 * If rp was opened we must clean up the thread before 3079 * cleaning everything else up (since if there are no failures 3080 * it will wait for the destination to send it's status in 3081 * a SHUT command). 3082 */ 3083 if (s->rp_state.from_dst_file) { 3084 int rp_error; 3085 trace_migration_return_path_end_before(); 3086 rp_error = await_return_path_close_on_source(s); 3087 trace_migration_return_path_end_after(rp_error); 3088 if (rp_error) { 3089 goto fail_invalidate; 3090 } 3091 } 3092 3093 if (qemu_file_get_error(s->to_dst_file)) { 3094 trace_migration_completion_file_err(); 3095 goto fail_invalidate; 3096 } 3097 3098 if (!migrate_colo_enabled()) { 3099 migrate_set_state(&s->state, current_active_state, 3100 MIGRATION_STATUS_COMPLETED); 3101 } 3102 3103 return; 3104 3105 fail_invalidate: 3106 /* If not doing postcopy, vm_start() will be called: let's regain 3107 * control on images. 3108 */ 3109 if (s->state == MIGRATION_STATUS_ACTIVE || 3110 s->state == MIGRATION_STATUS_DEVICE) { 3111 Error *local_err = NULL; 3112 3113 qemu_mutex_lock_iothread(); 3114 bdrv_invalidate_cache_all(&local_err); 3115 if (local_err) { 3116 error_report_err(local_err); 3117 } else { 3118 s->block_inactive = false; 3119 } 3120 qemu_mutex_unlock_iothread(); 3121 } 3122 3123 fail: 3124 migrate_set_state(&s->state, current_active_state, 3125 MIGRATION_STATUS_FAILED); 3126 } 3127 3128 bool migrate_colo_enabled(void) 3129 { 3130 MigrationState *s = migrate_get_current(); 3131 return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO]; 3132 } 3133 3134 typedef enum MigThrError { 3135 /* No error detected */ 3136 MIG_THR_ERR_NONE = 0, 3137 /* Detected error, but resumed successfully */ 3138 MIG_THR_ERR_RECOVERED = 1, 3139 /* Detected fatal error, need to exit */ 3140 MIG_THR_ERR_FATAL = 2, 3141 } MigThrError; 3142 3143 static int postcopy_resume_handshake(MigrationState *s) 3144 { 3145 qemu_savevm_send_postcopy_resume(s->to_dst_file); 3146 3147 while (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { 3148 qemu_sem_wait(&s->rp_state.rp_sem); 3149 } 3150 3151 if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 3152 return 0; 3153 } 3154 3155 return -1; 3156 } 3157 3158 /* Return zero if success, or <0 for error */ 3159 static int postcopy_do_resume(MigrationState *s) 3160 { 3161 int ret; 3162 3163 /* 3164 * Call all the resume_prepare() hooks, so that modules can be 3165 * ready for the migration resume. 3166 */ 3167 ret = qemu_savevm_state_resume_prepare(s); 3168 if (ret) { 3169 error_report("%s: resume_prepare() failure detected: %d", 3170 __func__, ret); 3171 return ret; 3172 } 3173 3174 /* 3175 * Last handshake with destination on the resume (destination will 3176 * switch to postcopy-active afterwards) 3177 */ 3178 ret = postcopy_resume_handshake(s); 3179 if (ret) { 3180 error_report("%s: handshake failed: %d", __func__, ret); 3181 return ret; 3182 } 3183 3184 return 0; 3185 } 3186 3187 /* 3188 * We don't return until we are in a safe state to continue current 3189 * postcopy migration. Returns MIG_THR_ERR_RECOVERED if recovered, or 3190 * MIG_THR_ERR_FATAL if unrecovery failure happened. 3191 */ 3192 static MigThrError postcopy_pause(MigrationState *s) 3193 { 3194 assert(s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); 3195 3196 while (true) { 3197 QEMUFile *file; 3198 3199 /* Current channel is possibly broken. Release it. */ 3200 assert(s->to_dst_file); 3201 qemu_mutex_lock(&s->qemu_file_lock); 3202 file = s->to_dst_file; 3203 s->to_dst_file = NULL; 3204 qemu_mutex_unlock(&s->qemu_file_lock); 3205 3206 qemu_file_shutdown(file); 3207 qemu_fclose(file); 3208 3209 migrate_set_state(&s->state, s->state, 3210 MIGRATION_STATUS_POSTCOPY_PAUSED); 3211 3212 error_report("Detected IO failure for postcopy. " 3213 "Migration paused."); 3214 3215 /* 3216 * We wait until things fixed up. Then someone will setup the 3217 * status back for us. 3218 */ 3219 while (s->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { 3220 qemu_sem_wait(&s->postcopy_pause_sem); 3221 } 3222 3223 if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { 3224 /* Woken up by a recover procedure. Give it a shot */ 3225 3226 /* 3227 * Firstly, let's wake up the return path now, with a new 3228 * return path channel. 3229 */ 3230 qemu_sem_post(&s->postcopy_pause_rp_sem); 3231 3232 /* Do the resume logic */ 3233 if (postcopy_do_resume(s) == 0) { 3234 /* Let's continue! */ 3235 trace_postcopy_pause_continued(); 3236 return MIG_THR_ERR_RECOVERED; 3237 } else { 3238 /* 3239 * Something wrong happened during the recovery, let's 3240 * pause again. Pause is always better than throwing 3241 * data away. 3242 */ 3243 continue; 3244 } 3245 } else { 3246 /* This is not right... Time to quit. */ 3247 return MIG_THR_ERR_FATAL; 3248 } 3249 } 3250 } 3251 3252 static MigThrError migration_detect_error(MigrationState *s) 3253 { 3254 int ret; 3255 int state = s->state; 3256 Error *local_error = NULL; 3257 3258 if (state == MIGRATION_STATUS_CANCELLING || 3259 state == MIGRATION_STATUS_CANCELLED) { 3260 /* End the migration, but don't set the state to failed */ 3261 return MIG_THR_ERR_FATAL; 3262 } 3263 3264 /* Try to detect any file errors */ 3265 ret = qemu_file_get_error_obj(s->to_dst_file, &local_error); 3266 if (!ret) { 3267 /* Everything is fine */ 3268 assert(!local_error); 3269 return MIG_THR_ERR_NONE; 3270 } 3271 3272 if (local_error) { 3273 migrate_set_error(s, local_error); 3274 error_free(local_error); 3275 } 3276 3277 if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret == -EIO) { 3278 /* 3279 * For postcopy, we allow the network to be down for a 3280 * while. After that, it can be continued by a 3281 * recovery phase. 3282 */ 3283 return postcopy_pause(s); 3284 } else { 3285 /* 3286 * For precopy (or postcopy with error outside IO), we fail 3287 * with no time. 3288 */ 3289 migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED); 3290 trace_migration_thread_file_err(); 3291 3292 /* Time to stop the migration, now. */ 3293 return MIG_THR_ERR_FATAL; 3294 } 3295 } 3296 3297 /* How many bytes have we transferred since the beginning of the migration */ 3298 static uint64_t migration_total_bytes(MigrationState *s) 3299 { 3300 return qemu_ftell(s->to_dst_file) + ram_counters.multifd_bytes; 3301 } 3302 3303 static void migration_calculate_complete(MigrationState *s) 3304 { 3305 uint64_t bytes = migration_total_bytes(s); 3306 int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3307 int64_t transfer_time; 3308 3309 s->total_time = end_time - s->start_time; 3310 if (!s->downtime) { 3311 /* 3312 * It's still not set, so we are precopy migration. For 3313 * postcopy, downtime is calculated during postcopy_start(). 3314 */ 3315 s->downtime = end_time - s->downtime_start; 3316 } 3317 3318 transfer_time = s->total_time - s->setup_time; 3319 if (transfer_time) { 3320 s->mbps = ((double) bytes * 8.0) / transfer_time / 1000; 3321 } 3322 } 3323 3324 static void update_iteration_initial_status(MigrationState *s) 3325 { 3326 /* 3327 * Update these three fields at the same time to avoid mismatch info lead 3328 * wrong speed calculation. 3329 */ 3330 s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3331 s->iteration_initial_bytes = migration_total_bytes(s); 3332 s->iteration_initial_pages = ram_get_total_transferred_pages(); 3333 } 3334 3335 static void migration_update_counters(MigrationState *s, 3336 int64_t current_time) 3337 { 3338 uint64_t transferred, transferred_pages, time_spent; 3339 uint64_t current_bytes; /* bytes transferred since the beginning */ 3340 double bandwidth; 3341 3342 if (current_time < s->iteration_start_time + BUFFER_DELAY) { 3343 return; 3344 } 3345 3346 current_bytes = migration_total_bytes(s); 3347 transferred = current_bytes - s->iteration_initial_bytes; 3348 time_spent = current_time - s->iteration_start_time; 3349 bandwidth = (double)transferred / time_spent; 3350 s->threshold_size = bandwidth * s->parameters.downtime_limit; 3351 3352 s->mbps = (((double) transferred * 8.0) / 3353 ((double) time_spent / 1000.0)) / 1000.0 / 1000.0; 3354 3355 transferred_pages = ram_get_total_transferred_pages() - 3356 s->iteration_initial_pages; 3357 s->pages_per_second = (double) transferred_pages / 3358 (((double) time_spent / 1000.0)); 3359 3360 /* 3361 * if we haven't sent anything, we don't want to 3362 * recalculate. 10000 is a small enough number for our purposes 3363 */ 3364 if (ram_counters.dirty_pages_rate && transferred > 10000) { 3365 s->expected_downtime = ram_counters.remaining / bandwidth; 3366 } 3367 3368 qemu_file_reset_rate_limit(s->to_dst_file); 3369 3370 update_iteration_initial_status(s); 3371 3372 trace_migrate_transferred(transferred, time_spent, 3373 bandwidth, s->threshold_size); 3374 } 3375 3376 /* Migration thread iteration status */ 3377 typedef enum { 3378 MIG_ITERATE_RESUME, /* Resume current iteration */ 3379 MIG_ITERATE_SKIP, /* Skip current iteration */ 3380 MIG_ITERATE_BREAK, /* Break the loop */ 3381 } MigIterateState; 3382 3383 /* 3384 * Return true if continue to the next iteration directly, false 3385 * otherwise. 3386 */ 3387 static MigIterateState migration_iteration_run(MigrationState *s) 3388 { 3389 uint64_t pending_size, pend_pre, pend_compat, pend_post; 3390 bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE; 3391 3392 qemu_savevm_state_pending(s->to_dst_file, s->threshold_size, &pend_pre, 3393 &pend_compat, &pend_post); 3394 pending_size = pend_pre + pend_compat + pend_post; 3395 3396 trace_migrate_pending(pending_size, s->threshold_size, 3397 pend_pre, pend_compat, pend_post); 3398 3399 if (pending_size && pending_size >= s->threshold_size) { 3400 /* Still a significant amount to transfer */ 3401 if (!in_postcopy && pend_pre <= s->threshold_size && 3402 qatomic_read(&s->start_postcopy)) { 3403 if (postcopy_start(s)) { 3404 error_report("%s: postcopy failed to start", __func__); 3405 } 3406 return MIG_ITERATE_SKIP; 3407 } 3408 /* Just another iteration step */ 3409 qemu_savevm_state_iterate(s->to_dst_file, in_postcopy); 3410 } else { 3411 trace_migration_thread_low_pending(pending_size); 3412 migration_completion(s); 3413 return MIG_ITERATE_BREAK; 3414 } 3415 3416 return MIG_ITERATE_RESUME; 3417 } 3418 3419 static void migration_iteration_finish(MigrationState *s) 3420 { 3421 /* If we enabled cpu throttling for auto-converge, turn it off. */ 3422 cpu_throttle_stop(); 3423 3424 qemu_mutex_lock_iothread(); 3425 switch (s->state) { 3426 case MIGRATION_STATUS_COMPLETED: 3427 migration_calculate_complete(s); 3428 runstate_set(RUN_STATE_POSTMIGRATE); 3429 break; 3430 3431 case MIGRATION_STATUS_ACTIVE: 3432 /* 3433 * We should really assert here, but since it's during 3434 * migration, let's try to reduce the usage of assertions. 3435 */ 3436 if (!migrate_colo_enabled()) { 3437 error_report("%s: critical error: calling COLO code without " 3438 "COLO enabled", __func__); 3439 } 3440 migrate_start_colo_process(s); 3441 /* 3442 * Fixme: we will run VM in COLO no matter its old running state. 3443 * After exited COLO, we will keep running. 3444 */ 3445 s->vm_was_running = true; 3446 /* Fallthrough */ 3447 case MIGRATION_STATUS_FAILED: 3448 case MIGRATION_STATUS_CANCELLED: 3449 case MIGRATION_STATUS_CANCELLING: 3450 if (s->vm_was_running) { 3451 vm_start(); 3452 } else { 3453 if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { 3454 runstate_set(RUN_STATE_POSTMIGRATE); 3455 } 3456 } 3457 break; 3458 3459 default: 3460 /* Should not reach here, but if so, forgive the VM. */ 3461 error_report("%s: Unknown ending state %d", __func__, s->state); 3462 break; 3463 } 3464 migrate_fd_cleanup_schedule(s); 3465 qemu_mutex_unlock_iothread(); 3466 } 3467 3468 void migration_make_urgent_request(void) 3469 { 3470 qemu_sem_post(&migrate_get_current()->rate_limit_sem); 3471 } 3472 3473 void migration_consume_urgent_request(void) 3474 { 3475 qemu_sem_wait(&migrate_get_current()->rate_limit_sem); 3476 } 3477 3478 /* Returns true if the rate limiting was broken by an urgent request */ 3479 bool migration_rate_limit(void) 3480 { 3481 int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3482 MigrationState *s = migrate_get_current(); 3483 3484 bool urgent = false; 3485 migration_update_counters(s, now); 3486 if (qemu_file_rate_limit(s->to_dst_file)) { 3487 3488 if (qemu_file_get_error(s->to_dst_file)) { 3489 return false; 3490 } 3491 /* 3492 * Wait for a delay to do rate limiting OR 3493 * something urgent to post the semaphore. 3494 */ 3495 int ms = s->iteration_start_time + BUFFER_DELAY - now; 3496 trace_migration_rate_limit_pre(ms); 3497 if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { 3498 /* 3499 * We were woken by one or more urgent things but 3500 * the timedwait will have consumed one of them. 3501 * The service routine for the urgent wake will dec 3502 * the semaphore itself for each item it consumes, 3503 * so add this one we just eat back. 3504 */ 3505 qemu_sem_post(&s->rate_limit_sem); 3506 urgent = true; 3507 } 3508 trace_migration_rate_limit_post(urgent); 3509 } 3510 return urgent; 3511 } 3512 3513 /* 3514 * Master migration thread on the source VM. 3515 * It drives the migration and pumps the data down the outgoing channel. 3516 */ 3517 static void *migration_thread(void *opaque) 3518 { 3519 MigrationState *s = opaque; 3520 int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST); 3521 MigThrError thr_error; 3522 bool urgent = false; 3523 3524 rcu_register_thread(); 3525 3526 object_ref(OBJECT(s)); 3527 update_iteration_initial_status(s); 3528 3529 qemu_savevm_state_header(s->to_dst_file); 3530 3531 /* 3532 * If we opened the return path, we need to make sure dst has it 3533 * opened as well. 3534 */ 3535 if (s->rp_state.from_dst_file) { 3536 /* Now tell the dest that it should open its end so it can reply */ 3537 qemu_savevm_send_open_return_path(s->to_dst_file); 3538 3539 /* And do a ping that will make stuff easier to debug */ 3540 qemu_savevm_send_ping(s->to_dst_file, 1); 3541 } 3542 3543 if (migrate_postcopy()) { 3544 /* 3545 * Tell the destination that we *might* want to do postcopy later; 3546 * if the other end can't do postcopy it should fail now, nice and 3547 * early. 3548 */ 3549 qemu_savevm_send_postcopy_advise(s->to_dst_file); 3550 } 3551 3552 if (migrate_colo_enabled()) { 3553 /* Notify migration destination that we enable COLO */ 3554 qemu_savevm_send_colo_enable(s->to_dst_file); 3555 } 3556 3557 qemu_savevm_state_setup(s->to_dst_file); 3558 3559 if (qemu_savevm_state_guest_unplug_pending()) { 3560 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 3561 MIGRATION_STATUS_WAIT_UNPLUG); 3562 3563 while (s->state == MIGRATION_STATUS_WAIT_UNPLUG && 3564 qemu_savevm_state_guest_unplug_pending()) { 3565 qemu_sem_timedwait(&s->wait_unplug_sem, 250); 3566 } 3567 3568 migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, 3569 MIGRATION_STATUS_ACTIVE); 3570 } 3571 3572 s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; 3573 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 3574 MIGRATION_STATUS_ACTIVE); 3575 3576 trace_migration_thread_setup_complete(); 3577 3578 while (migration_is_active(s)) { 3579 if (urgent || !qemu_file_rate_limit(s->to_dst_file)) { 3580 MigIterateState iter_state = migration_iteration_run(s); 3581 if (iter_state == MIG_ITERATE_SKIP) { 3582 continue; 3583 } else if (iter_state == MIG_ITERATE_BREAK) { 3584 break; 3585 } 3586 } 3587 3588 /* 3589 * Try to detect any kind of failures, and see whether we 3590 * should stop the migration now. 3591 */ 3592 thr_error = migration_detect_error(s); 3593 if (thr_error == MIG_THR_ERR_FATAL) { 3594 /* Stop migration */ 3595 break; 3596 } else if (thr_error == MIG_THR_ERR_RECOVERED) { 3597 /* 3598 * Just recovered from a e.g. network failure, reset all 3599 * the local variables. This is important to avoid 3600 * breaking transferred_bytes and bandwidth calculation 3601 */ 3602 update_iteration_initial_status(s); 3603 } 3604 3605 urgent = migration_rate_limit(); 3606 } 3607 3608 trace_migration_thread_after_loop(); 3609 migration_iteration_finish(s); 3610 object_unref(OBJECT(s)); 3611 rcu_unregister_thread(); 3612 return NULL; 3613 } 3614 3615 void migrate_fd_connect(MigrationState *s, Error *error_in) 3616 { 3617 Error *local_err = NULL; 3618 int64_t rate_limit; 3619 bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED; 3620 3621 s->expected_downtime = s->parameters.downtime_limit; 3622 if (resume) { 3623 assert(s->cleanup_bh); 3624 } else { 3625 assert(!s->cleanup_bh); 3626 s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup_bh, s); 3627 } 3628 if (error_in) { 3629 migrate_fd_error(s, error_in); 3630 migrate_fd_cleanup(s); 3631 return; 3632 } 3633 3634 if (resume) { 3635 /* This is a resumed migration */ 3636 rate_limit = s->parameters.max_postcopy_bandwidth / 3637 XFER_LIMIT_RATIO; 3638 } else { 3639 /* This is a fresh new migration */ 3640 rate_limit = s->parameters.max_bandwidth / XFER_LIMIT_RATIO; 3641 3642 /* Notify before starting migration thread */ 3643 notifier_list_notify(&migration_state_notifiers, s); 3644 } 3645 3646 qemu_file_set_rate_limit(s->to_dst_file, rate_limit); 3647 qemu_file_set_blocking(s->to_dst_file, true); 3648 3649 /* 3650 * Open the return path. For postcopy, it is used exclusively. For 3651 * precopy, only if user specified "return-path" capability would 3652 * QEMU uses the return path. 3653 */ 3654 if (migrate_postcopy_ram() || migrate_use_return_path()) { 3655 if (open_return_path_on_source(s, !resume)) { 3656 error_report("Unable to open return-path for postcopy"); 3657 migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); 3658 migrate_fd_cleanup(s); 3659 return; 3660 } 3661 } 3662 3663 if (resume) { 3664 /* Wakeup the main migration thread to do the recovery */ 3665 migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED, 3666 MIGRATION_STATUS_POSTCOPY_RECOVER); 3667 qemu_sem_post(&s->postcopy_pause_sem); 3668 return; 3669 } 3670 3671 if (multifd_save_setup(&local_err) != 0) { 3672 error_report_err(local_err); 3673 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 3674 MIGRATION_STATUS_FAILED); 3675 migrate_fd_cleanup(s); 3676 return; 3677 } 3678 qemu_thread_create(&s->thread, "live_migration", migration_thread, s, 3679 QEMU_THREAD_JOINABLE); 3680 s->migration_thread_running = true; 3681 } 3682 3683 void migration_global_dump(Monitor *mon) 3684 { 3685 MigrationState *ms = migrate_get_current(); 3686 3687 monitor_printf(mon, "globals:\n"); 3688 monitor_printf(mon, "store-global-state: %s\n", 3689 ms->store_global_state ? "on" : "off"); 3690 monitor_printf(mon, "only-migratable: %s\n", 3691 only_migratable ? "on" : "off"); 3692 monitor_printf(mon, "send-configuration: %s\n", 3693 ms->send_configuration ? "on" : "off"); 3694 monitor_printf(mon, "send-section-footer: %s\n", 3695 ms->send_section_footer ? "on" : "off"); 3696 monitor_printf(mon, "decompress-error-check: %s\n", 3697 ms->decompress_error_check ? "on" : "off"); 3698 monitor_printf(mon, "clear-bitmap-shift: %u\n", 3699 ms->clear_bitmap_shift); 3700 } 3701 3702 #define DEFINE_PROP_MIG_CAP(name, x) \ 3703 DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false) 3704 3705 static Property migration_properties[] = { 3706 DEFINE_PROP_BOOL("store-global-state", MigrationState, 3707 store_global_state, true), 3708 DEFINE_PROP_BOOL("send-configuration", MigrationState, 3709 send_configuration, true), 3710 DEFINE_PROP_BOOL("send-section-footer", MigrationState, 3711 send_section_footer, true), 3712 DEFINE_PROP_BOOL("decompress-error-check", MigrationState, 3713 decompress_error_check, true), 3714 DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, 3715 clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), 3716 3717 /* Migration parameters */ 3718 DEFINE_PROP_UINT8("x-compress-level", MigrationState, 3719 parameters.compress_level, 3720 DEFAULT_MIGRATE_COMPRESS_LEVEL), 3721 DEFINE_PROP_UINT8("x-compress-threads", MigrationState, 3722 parameters.compress_threads, 3723 DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT), 3724 DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState, 3725 parameters.compress_wait_thread, true), 3726 DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, 3727 parameters.decompress_threads, 3728 DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), 3729 DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, 3730 parameters.throttle_trigger_threshold, 3731 DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD), 3732 DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState, 3733 parameters.cpu_throttle_initial, 3734 DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), 3735 DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState, 3736 parameters.cpu_throttle_increment, 3737 DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), 3738 DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState, 3739 parameters.cpu_throttle_tailslow, false), 3740 DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState, 3741 parameters.max_bandwidth, MAX_THROTTLE), 3742 DEFINE_PROP_UINT64("x-downtime-limit", MigrationState, 3743 parameters.downtime_limit, 3744 DEFAULT_MIGRATE_SET_DOWNTIME), 3745 DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState, 3746 parameters.x_checkpoint_delay, 3747 DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), 3748 DEFINE_PROP_UINT8("multifd-channels", MigrationState, 3749 parameters.multifd_channels, 3750 DEFAULT_MIGRATE_MULTIFD_CHANNELS), 3751 DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState, 3752 parameters.multifd_compression, 3753 DEFAULT_MIGRATE_MULTIFD_COMPRESSION), 3754 DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState, 3755 parameters.multifd_zlib_level, 3756 DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL), 3757 DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, 3758 parameters.multifd_zstd_level, 3759 DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), 3760 DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, 3761 parameters.xbzrle_cache_size, 3762 DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), 3763 DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState, 3764 parameters.max_postcopy_bandwidth, 3765 DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH), 3766 DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState, 3767 parameters.max_cpu_throttle, 3768 DEFAULT_MIGRATE_MAX_CPU_THROTTLE), 3769 DEFINE_PROP_SIZE("announce-initial", MigrationState, 3770 parameters.announce_initial, 3771 DEFAULT_MIGRATE_ANNOUNCE_INITIAL), 3772 DEFINE_PROP_SIZE("announce-max", MigrationState, 3773 parameters.announce_max, 3774 DEFAULT_MIGRATE_ANNOUNCE_MAX), 3775 DEFINE_PROP_SIZE("announce-rounds", MigrationState, 3776 parameters.announce_rounds, 3777 DEFAULT_MIGRATE_ANNOUNCE_ROUNDS), 3778 DEFINE_PROP_SIZE("announce-step", MigrationState, 3779 parameters.announce_step, 3780 DEFAULT_MIGRATE_ANNOUNCE_STEP), 3781 3782 /* Migration capabilities */ 3783 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), 3784 DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL), 3785 DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE), 3786 DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS), 3787 DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS), 3788 DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS), 3789 DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM), 3790 DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO), 3791 DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM), 3792 DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), 3793 DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), 3794 DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), 3795 3796 DEFINE_PROP_END_OF_LIST(), 3797 }; 3798 3799 static void migration_class_init(ObjectClass *klass, void *data) 3800 { 3801 DeviceClass *dc = DEVICE_CLASS(klass); 3802 3803 dc->user_creatable = false; 3804 device_class_set_props(dc, migration_properties); 3805 } 3806 3807 static void migration_instance_finalize(Object *obj) 3808 { 3809 MigrationState *ms = MIGRATION_OBJ(obj); 3810 MigrationParameters *params = &ms->parameters; 3811 3812 qemu_mutex_destroy(&ms->error_mutex); 3813 qemu_mutex_destroy(&ms->qemu_file_lock); 3814 g_free(params->tls_hostname); 3815 g_free(params->tls_creds); 3816 qemu_sem_destroy(&ms->wait_unplug_sem); 3817 qemu_sem_destroy(&ms->rate_limit_sem); 3818 qemu_sem_destroy(&ms->pause_sem); 3819 qemu_sem_destroy(&ms->postcopy_pause_sem); 3820 qemu_sem_destroy(&ms->postcopy_pause_rp_sem); 3821 qemu_sem_destroy(&ms->rp_state.rp_sem); 3822 error_free(ms->error); 3823 } 3824 3825 static void migration_instance_init(Object *obj) 3826 { 3827 MigrationState *ms = MIGRATION_OBJ(obj); 3828 MigrationParameters *params = &ms->parameters; 3829 3830 ms->state = MIGRATION_STATUS_NONE; 3831 ms->mbps = -1; 3832 ms->pages_per_second = -1; 3833 qemu_sem_init(&ms->pause_sem, 0); 3834 qemu_mutex_init(&ms->error_mutex); 3835 3836 params->tls_hostname = g_strdup(""); 3837 params->tls_creds = g_strdup(""); 3838 3839 /* Set has_* up only for parameter checks */ 3840 params->has_compress_level = true; 3841 params->has_compress_threads = true; 3842 params->has_decompress_threads = true; 3843 params->has_throttle_trigger_threshold = true; 3844 params->has_cpu_throttle_initial = true; 3845 params->has_cpu_throttle_increment = true; 3846 params->has_cpu_throttle_tailslow = true; 3847 params->has_max_bandwidth = true; 3848 params->has_downtime_limit = true; 3849 params->has_x_checkpoint_delay = true; 3850 params->has_block_incremental = true; 3851 params->has_multifd_channels = true; 3852 params->has_multifd_compression = true; 3853 params->has_multifd_zlib_level = true; 3854 params->has_multifd_zstd_level = true; 3855 params->has_xbzrle_cache_size = true; 3856 params->has_max_postcopy_bandwidth = true; 3857 params->has_max_cpu_throttle = true; 3858 params->has_announce_initial = true; 3859 params->has_announce_max = true; 3860 params->has_announce_rounds = true; 3861 params->has_announce_step = true; 3862 3863 qemu_sem_init(&ms->postcopy_pause_sem, 0); 3864 qemu_sem_init(&ms->postcopy_pause_rp_sem, 0); 3865 qemu_sem_init(&ms->rp_state.rp_sem, 0); 3866 qemu_sem_init(&ms->rate_limit_sem, 0); 3867 qemu_sem_init(&ms->wait_unplug_sem, 0); 3868 qemu_mutex_init(&ms->qemu_file_lock); 3869 } 3870 3871 /* 3872 * Return true if check pass, false otherwise. Error will be put 3873 * inside errp if provided. 3874 */ 3875 static bool migration_object_check(MigrationState *ms, Error **errp) 3876 { 3877 MigrationCapabilityStatusList *head = NULL; 3878 /* Assuming all off */ 3879 bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret; 3880 int i; 3881 3882 if (!migrate_params_check(&ms->parameters, errp)) { 3883 return false; 3884 } 3885 3886 for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { 3887 if (ms->enabled_capabilities[i]) { 3888 QAPI_LIST_PREPEND(head, migrate_cap_add(i, true)); 3889 } 3890 } 3891 3892 ret = migrate_caps_check(cap_list, head, errp); 3893 3894 /* It works with head == NULL */ 3895 qapi_free_MigrationCapabilityStatusList(head); 3896 3897 return ret; 3898 } 3899 3900 static const TypeInfo migration_type = { 3901 .name = TYPE_MIGRATION, 3902 /* 3903 * NOTE: TYPE_MIGRATION is not really a device, as the object is 3904 * not created using qdev_new(), it is not attached to the qdev 3905 * device tree, and it is never realized. 3906 * 3907 * TODO: Make this TYPE_OBJECT once QOM provides something like 3908 * TYPE_DEVICE's "-global" properties. 3909 */ 3910 .parent = TYPE_DEVICE, 3911 .class_init = migration_class_init, 3912 .class_size = sizeof(MigrationClass), 3913 .instance_size = sizeof(MigrationState), 3914 .instance_init = migration_instance_init, 3915 .instance_finalize = migration_instance_finalize, 3916 }; 3917 3918 static void register_migration_types(void) 3919 { 3920 type_register_static(&migration_type); 3921 } 3922 3923 type_init(register_migration_types); 3924