1 /* 2 * QEMU live migration 3 * 4 * Copyright IBM, Corp. 2008 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 * Contributions after 2012-01-13 are licensed under the terms of the 13 * GNU GPL, version 2 or (at your option) any later version. 14 */ 15 16 #include "qemu/osdep.h" 17 #include "qemu/cutils.h" 18 #include "qemu/error-report.h" 19 #include "qemu/main-loop.h" 20 #include "migration/blocker.h" 21 #include "exec.h" 22 #include "fd.h" 23 #include "socket.h" 24 #include "sysemu/runstate.h" 25 #include "sysemu/sysemu.h" 26 #include "sysemu/cpu-throttle.h" 27 #include "rdma.h" 28 #include "ram.h" 29 #include "migration/global_state.h" 30 #include "migration/misc.h" 31 #include "migration.h" 32 #include "savevm.h" 33 #include "qemu-file-channel.h" 34 #include "qemu-file.h" 35 #include "migration/vmstate.h" 36 #include "block/block.h" 37 #include "qapi/error.h" 38 #include "qapi/clone-visitor.h" 39 #include "qapi/qapi-visit-migration.h" 40 #include "qapi/qapi-visit-sockets.h" 41 #include "qapi/qapi-commands-migration.h" 42 #include "qapi/qapi-events-migration.h" 43 #include "qapi/qmp/qerror.h" 44 #include "qapi/qmp/qnull.h" 45 #include "qemu/rcu.h" 46 #include "block.h" 47 #include "postcopy-ram.h" 48 #include "qemu/thread.h" 49 #include "trace.h" 50 #include "exec/target_page.h" 51 #include "io/channel-buffer.h" 52 #include "migration/colo.h" 53 #include "hw/boards.h" 54 #include "hw/qdev-properties.h" 55 #include "monitor/monitor.h" 56 #include "net/announce.h" 57 #include "qemu/queue.h" 58 #include "multifd.h" 59 60 #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ 61 62 /* Amount of time to allocate to each "chunk" of bandwidth-throttled 63 * data. */ 64 #define BUFFER_DELAY 100 65 #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) 66 67 /* Time in milliseconds we are allowed to stop the source, 68 * for sending the last part */ 69 #define DEFAULT_MIGRATE_SET_DOWNTIME 300 70 71 /* Maximum migrate downtime set to 2000 seconds */ 72 #define MAX_MIGRATE_DOWNTIME_SECONDS 2000 73 #define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) 74 75 /* Default compression thread count */ 76 #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 77 /* Default decompression thread count, usually decompression is at 78 * least 4 times as fast as compression.*/ 79 #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 80 /*0: means nocompress, 1: best speed, ... 9: best compress ratio */ 81 #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 82 /* Define default autoconverge cpu throttle migration parameters */ 83 #define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50 84 #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 85 #define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10 86 #define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99 87 88 /* Migration XBZRLE default cache size */ 89 #define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) 90 91 /* The delay time (in ms) between two COLO checkpoints */ 92 #define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) 93 #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 94 #define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE 95 /* 0: means nocompress, 1: best speed, ... 9: best compress ratio */ 96 #define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1 97 /* 0: means nocompress, 1: best speed, ... 20: best compress ratio */ 98 #define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1 99 100 /* Background transfer rate for postcopy, 0 means unlimited, note 101 * that page requests can still exceed this limit. 102 */ 103 #define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0 104 105 /* 106 * Parameters for self_announce_delay giving a stream of RARP/ARP 107 * packets after migration. 108 */ 109 #define DEFAULT_MIGRATE_ANNOUNCE_INITIAL 50 110 #define DEFAULT_MIGRATE_ANNOUNCE_MAX 550 111 #define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS 5 112 #define DEFAULT_MIGRATE_ANNOUNCE_STEP 100 113 114 static NotifierList migration_state_notifiers = 115 NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); 116 117 static bool deferred_incoming; 118 119 /* Messages sent on the return path from destination to source */ 120 enum mig_rp_message_type { 121 MIG_RP_MSG_INVALID = 0, /* Must be 0 */ 122 MIG_RP_MSG_SHUT, /* sibling will not send any more RP messages */ 123 MIG_RP_MSG_PONG, /* Response to a PING; data (seq: be32 ) */ 124 125 MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */ 126 MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */ 127 MIG_RP_MSG_RECV_BITMAP, /* send recved_bitmap back to source */ 128 MIG_RP_MSG_RESUME_ACK, /* tell source that we are ready to resume */ 129 130 MIG_RP_MSG_MAX 131 }; 132 133 /* When we add fault tolerance, we could have several 134 migrations at once. For now we don't need to add 135 dynamic creation of migration */ 136 137 static MigrationState *current_migration; 138 static MigrationIncomingState *current_incoming; 139 140 static bool migration_object_check(MigrationState *ms, Error **errp); 141 static int migration_maybe_pause(MigrationState *s, 142 int *current_active_state, 143 int new_state); 144 static void migrate_fd_cancel(MigrationState *s); 145 146 static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp) 147 { 148 uintptr_t a = (uintptr_t) ap, b = (uintptr_t) bp; 149 150 return (a > b) - (a < b); 151 } 152 153 void migration_object_init(void) 154 { 155 Error *err = NULL; 156 157 /* This can only be called once. */ 158 assert(!current_migration); 159 current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION)); 160 161 /* 162 * Init the migrate incoming object as well no matter whether 163 * we'll use it or not. 164 */ 165 assert(!current_incoming); 166 current_incoming = g_new0(MigrationIncomingState, 1); 167 current_incoming->state = MIGRATION_STATUS_NONE; 168 current_incoming->postcopy_remote_fds = 169 g_array_new(FALSE, TRUE, sizeof(struct PostCopyFD)); 170 qemu_mutex_init(¤t_incoming->rp_mutex); 171 qemu_event_init(¤t_incoming->main_thread_load_event, false); 172 qemu_sem_init(¤t_incoming->postcopy_pause_sem_dst, 0); 173 qemu_sem_init(¤t_incoming->postcopy_pause_sem_fault, 0); 174 qemu_mutex_init(¤t_incoming->page_request_mutex); 175 current_incoming->page_requested = g_tree_new(page_request_addr_cmp); 176 177 if (!migration_object_check(current_migration, &err)) { 178 error_report_err(err); 179 exit(1); 180 } 181 } 182 183 void migration_shutdown(void) 184 { 185 /* 186 * Cancel the current migration - that will (eventually) 187 * stop the migration using this structure 188 */ 189 migrate_fd_cancel(current_migration); 190 object_unref(OBJECT(current_migration)); 191 192 /* 193 * Cancel outgoing migration of dirty bitmaps. It should 194 * at least unref used block nodes. 195 */ 196 dirty_bitmap_mig_cancel_outgoing(); 197 198 /* 199 * Cancel incoming migration of dirty bitmaps. Dirty bitmaps 200 * are non-critical data, and their loss never considered as 201 * something serious. 202 */ 203 dirty_bitmap_mig_cancel_incoming(); 204 } 205 206 /* For outgoing */ 207 MigrationState *migrate_get_current(void) 208 { 209 /* This can only be called after the object created. */ 210 assert(current_migration); 211 return current_migration; 212 } 213 214 MigrationIncomingState *migration_incoming_get_current(void) 215 { 216 assert(current_incoming); 217 return current_incoming; 218 } 219 220 void migration_incoming_state_destroy(void) 221 { 222 struct MigrationIncomingState *mis = migration_incoming_get_current(); 223 224 if (mis->to_src_file) { 225 /* Tell source that we are done */ 226 migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0); 227 qemu_fclose(mis->to_src_file); 228 mis->to_src_file = NULL; 229 } 230 231 if (mis->from_src_file) { 232 qemu_fclose(mis->from_src_file); 233 mis->from_src_file = NULL; 234 } 235 if (mis->postcopy_remote_fds) { 236 g_array_free(mis->postcopy_remote_fds, TRUE); 237 mis->postcopy_remote_fds = NULL; 238 } 239 240 qemu_event_reset(&mis->main_thread_load_event); 241 242 if (mis->page_requested) { 243 g_tree_destroy(mis->page_requested); 244 mis->page_requested = NULL; 245 } 246 247 if (mis->socket_address_list) { 248 qapi_free_SocketAddressList(mis->socket_address_list); 249 mis->socket_address_list = NULL; 250 } 251 } 252 253 static void migrate_generate_event(int new_state) 254 { 255 if (migrate_use_events()) { 256 qapi_event_send_migration(new_state); 257 } 258 } 259 260 static bool migrate_late_block_activate(void) 261 { 262 MigrationState *s; 263 264 s = migrate_get_current(); 265 266 return s->enabled_capabilities[ 267 MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; 268 } 269 270 /* 271 * Called on -incoming with a defer: uri. 272 * The migration can be started later after any parameters have been 273 * changed. 274 */ 275 static void deferred_incoming_migration(Error **errp) 276 { 277 if (deferred_incoming) { 278 error_setg(errp, "Incoming migration already deferred"); 279 } 280 deferred_incoming = true; 281 } 282 283 /* 284 * Send a message on the return channel back to the source 285 * of the migration. 286 */ 287 static int migrate_send_rp_message(MigrationIncomingState *mis, 288 enum mig_rp_message_type message_type, 289 uint16_t len, void *data) 290 { 291 int ret = 0; 292 293 trace_migrate_send_rp_message((int)message_type, len); 294 qemu_mutex_lock(&mis->rp_mutex); 295 296 /* 297 * It's possible that the file handle got lost due to network 298 * failures. 299 */ 300 if (!mis->to_src_file) { 301 ret = -EIO; 302 goto error; 303 } 304 305 qemu_put_be16(mis->to_src_file, (unsigned int)message_type); 306 qemu_put_be16(mis->to_src_file, len); 307 qemu_put_buffer(mis->to_src_file, data, len); 308 qemu_fflush(mis->to_src_file); 309 310 /* It's possible that qemu file got error during sending */ 311 ret = qemu_file_get_error(mis->to_src_file); 312 313 error: 314 qemu_mutex_unlock(&mis->rp_mutex); 315 return ret; 316 } 317 318 /* Request one page from the source VM at the given start address. 319 * rb: the RAMBlock to request the page in 320 * Start: Address offset within the RB 321 * Len: Length in bytes required - must be a multiple of pagesize 322 */ 323 int migrate_send_rp_message_req_pages(MigrationIncomingState *mis, 324 RAMBlock *rb, ram_addr_t start) 325 { 326 uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */ 327 size_t msglen = 12; /* start + len */ 328 size_t len = qemu_ram_pagesize(rb); 329 enum mig_rp_message_type msg_type; 330 const char *rbname; 331 int rbname_len; 332 333 *(uint64_t *)bufc = cpu_to_be64((uint64_t)start); 334 *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len); 335 336 /* 337 * We maintain the last ramblock that we requested for page. Note that we 338 * don't need locking because this function will only be called within the 339 * postcopy ram fault thread. 340 */ 341 if (rb != mis->last_rb) { 342 mis->last_rb = rb; 343 344 rbname = qemu_ram_get_idstr(rb); 345 rbname_len = strlen(rbname); 346 347 assert(rbname_len < 256); 348 349 bufc[msglen++] = rbname_len; 350 memcpy(bufc + msglen, rbname, rbname_len); 351 msglen += rbname_len; 352 msg_type = MIG_RP_MSG_REQ_PAGES_ID; 353 } else { 354 msg_type = MIG_RP_MSG_REQ_PAGES; 355 } 356 357 return migrate_send_rp_message(mis, msg_type, msglen, bufc); 358 } 359 360 int migrate_send_rp_req_pages(MigrationIncomingState *mis, 361 RAMBlock *rb, ram_addr_t start, uint64_t haddr) 362 { 363 void *aligned = (void *)(uintptr_t)(haddr & (-qemu_ram_pagesize(rb))); 364 bool received; 365 366 WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) { 367 received = ramblock_recv_bitmap_test_byte_offset(rb, start); 368 if (!received && !g_tree_lookup(mis->page_requested, aligned)) { 369 /* 370 * The page has not been received, and it's not yet in the page 371 * request list. Queue it. Set the value of element to 1, so that 372 * things like g_tree_lookup() will return TRUE (1) when found. 373 */ 374 g_tree_insert(mis->page_requested, aligned, (gpointer)1); 375 mis->page_requested_count++; 376 trace_postcopy_page_req_add(aligned, mis->page_requested_count); 377 } 378 } 379 380 /* 381 * If the page is there, skip sending the message. We don't even need the 382 * lock because as long as the page arrived, it'll be there forever. 383 */ 384 if (received) { 385 return 0; 386 } 387 388 return migrate_send_rp_message_req_pages(mis, rb, start); 389 } 390 391 static bool migration_colo_enabled; 392 bool migration_incoming_colo_enabled(void) 393 { 394 return migration_colo_enabled; 395 } 396 397 void migration_incoming_disable_colo(void) 398 { 399 ram_block_discard_disable(false); 400 migration_colo_enabled = false; 401 } 402 403 int migration_incoming_enable_colo(void) 404 { 405 if (ram_block_discard_disable(true)) { 406 error_report("COLO: cannot disable RAM discard"); 407 return -EBUSY; 408 } 409 migration_colo_enabled = true; 410 return 0; 411 } 412 413 void migrate_add_address(SocketAddress *address) 414 { 415 MigrationIncomingState *mis = migration_incoming_get_current(); 416 SocketAddressList *addrs; 417 418 addrs = g_new0(SocketAddressList, 1); 419 addrs->next = mis->socket_address_list; 420 mis->socket_address_list = addrs; 421 addrs->value = QAPI_CLONE(SocketAddress, address); 422 } 423 424 void qemu_start_incoming_migration(const char *uri, Error **errp) 425 { 426 const char *p = NULL; 427 428 qapi_event_send_migration(MIGRATION_STATUS_SETUP); 429 if (!strcmp(uri, "defer")) { 430 deferred_incoming_migration(errp); 431 } else if (strstart(uri, "tcp:", &p) || 432 strstart(uri, "unix:", NULL) || 433 strstart(uri, "vsock:", NULL)) { 434 socket_start_incoming_migration(p ? p : uri, errp); 435 #ifdef CONFIG_RDMA 436 } else if (strstart(uri, "rdma:", &p)) { 437 rdma_start_incoming_migration(p, errp); 438 #endif 439 } else if (strstart(uri, "exec:", &p)) { 440 exec_start_incoming_migration(p, errp); 441 } else if (strstart(uri, "fd:", &p)) { 442 fd_start_incoming_migration(p, errp); 443 } else { 444 error_setg(errp, "unknown migration protocol: %s", uri); 445 } 446 } 447 448 static void process_incoming_migration_bh(void *opaque) 449 { 450 Error *local_err = NULL; 451 MigrationIncomingState *mis = opaque; 452 453 /* If capability late_block_activate is set: 454 * Only fire up the block code now if we're going to restart the 455 * VM, else 'cont' will do it. 456 * This causes file locking to happen; so we don't want it to happen 457 * unless we really are starting the VM. 458 */ 459 if (!migrate_late_block_activate() || 460 (autostart && (!global_state_received() || 461 global_state_get_runstate() == RUN_STATE_RUNNING))) { 462 /* Make sure all file formats flush their mutable metadata. 463 * If we get an error here, just don't restart the VM yet. */ 464 bdrv_invalidate_cache_all(&local_err); 465 if (local_err) { 466 error_report_err(local_err); 467 local_err = NULL; 468 autostart = false; 469 } 470 } 471 472 /* 473 * This must happen after all error conditions are dealt with and 474 * we're sure the VM is going to be running on this host. 475 */ 476 qemu_announce_self(&mis->announce_timer, migrate_announce_params()); 477 478 if (multifd_load_cleanup(&local_err) != 0) { 479 error_report_err(local_err); 480 autostart = false; 481 } 482 /* If global state section was not received or we are in running 483 state, we need to obey autostart. Any other state is set with 484 runstate_set. */ 485 486 dirty_bitmap_mig_before_vm_start(); 487 488 if (!global_state_received() || 489 global_state_get_runstate() == RUN_STATE_RUNNING) { 490 if (autostart) { 491 vm_start(); 492 } else { 493 runstate_set(RUN_STATE_PAUSED); 494 } 495 } else if (migration_incoming_colo_enabled()) { 496 migration_incoming_disable_colo(); 497 vm_start(); 498 } else { 499 runstate_set(global_state_get_runstate()); 500 } 501 /* 502 * This must happen after any state changes since as soon as an external 503 * observer sees this event they might start to prod at the VM assuming 504 * it's ready to use. 505 */ 506 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, 507 MIGRATION_STATUS_COMPLETED); 508 qemu_bh_delete(mis->bh); 509 migration_incoming_state_destroy(); 510 } 511 512 static void process_incoming_migration_co(void *opaque) 513 { 514 MigrationIncomingState *mis = migration_incoming_get_current(); 515 PostcopyState ps; 516 int ret; 517 Error *local_err = NULL; 518 519 assert(mis->from_src_file); 520 mis->migration_incoming_co = qemu_coroutine_self(); 521 mis->largest_page_size = qemu_ram_pagesize_largest(); 522 postcopy_state_set(POSTCOPY_INCOMING_NONE); 523 migrate_set_state(&mis->state, MIGRATION_STATUS_NONE, 524 MIGRATION_STATUS_ACTIVE); 525 ret = qemu_loadvm_state(mis->from_src_file); 526 527 ps = postcopy_state_get(); 528 trace_process_incoming_migration_co_end(ret, ps); 529 if (ps != POSTCOPY_INCOMING_NONE) { 530 if (ps == POSTCOPY_INCOMING_ADVISE) { 531 /* 532 * Where a migration had postcopy enabled (and thus went to advise) 533 * but managed to complete within the precopy period, we can use 534 * the normal exit. 535 */ 536 postcopy_ram_incoming_cleanup(mis); 537 } else if (ret >= 0) { 538 /* 539 * Postcopy was started, cleanup should happen at the end of the 540 * postcopy thread. 541 */ 542 trace_process_incoming_migration_co_postcopy_end_main(); 543 return; 544 } 545 /* Else if something went wrong then just fall out of the normal exit */ 546 } 547 548 /* we get COLO info, and know if we are in COLO mode */ 549 if (!ret && migration_incoming_colo_enabled()) { 550 /* Make sure all file formats flush their mutable metadata */ 551 bdrv_invalidate_cache_all(&local_err); 552 if (local_err) { 553 error_report_err(local_err); 554 goto fail; 555 } 556 557 qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming", 558 colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE); 559 mis->have_colo_incoming_thread = true; 560 qemu_coroutine_yield(); 561 562 /* Wait checkpoint incoming thread exit before free resource */ 563 qemu_thread_join(&mis->colo_incoming_thread); 564 /* We hold the global iothread lock, so it is safe here */ 565 colo_release_ram_cache(); 566 } 567 568 if (ret < 0) { 569 error_report("load of migration failed: %s", strerror(-ret)); 570 goto fail; 571 } 572 mis->bh = qemu_bh_new(process_incoming_migration_bh, mis); 573 qemu_bh_schedule(mis->bh); 574 mis->migration_incoming_co = NULL; 575 return; 576 fail: 577 local_err = NULL; 578 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, 579 MIGRATION_STATUS_FAILED); 580 qemu_fclose(mis->from_src_file); 581 if (multifd_load_cleanup(&local_err) != 0) { 582 error_report_err(local_err); 583 } 584 exit(EXIT_FAILURE); 585 } 586 587 /** 588 * @migration_incoming_setup: Setup incoming migration 589 * 590 * Returns 0 for no error or 1 for error 591 * 592 * @f: file for main migration channel 593 * @errp: where to put errors 594 */ 595 static int migration_incoming_setup(QEMUFile *f, Error **errp) 596 { 597 MigrationIncomingState *mis = migration_incoming_get_current(); 598 Error *local_err = NULL; 599 600 if (multifd_load_setup(&local_err) != 0) { 601 /* We haven't been able to create multifd threads 602 nothing better to do */ 603 error_report_err(local_err); 604 exit(EXIT_FAILURE); 605 } 606 607 if (!mis->from_src_file) { 608 mis->from_src_file = f; 609 } 610 qemu_file_set_blocking(f, false); 611 return 0; 612 } 613 614 void migration_incoming_process(void) 615 { 616 Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL); 617 qemu_coroutine_enter(co); 618 } 619 620 /* Returns true if recovered from a paused migration, otherwise false */ 621 static bool postcopy_try_recover(QEMUFile *f) 622 { 623 MigrationIncomingState *mis = migration_incoming_get_current(); 624 625 if (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { 626 /* Resumed from a paused postcopy migration */ 627 628 mis->from_src_file = f; 629 /* Postcopy has standalone thread to do vm load */ 630 qemu_file_set_blocking(f, true); 631 632 /* Re-configure the return path */ 633 mis->to_src_file = qemu_file_get_return_path(f); 634 635 migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_PAUSED, 636 MIGRATION_STATUS_POSTCOPY_RECOVER); 637 638 /* 639 * Here, we only wake up the main loading thread (while the 640 * fault thread will still be waiting), so that we can receive 641 * commands from source now, and answer it if needed. The 642 * fault thread will be woken up afterwards until we are sure 643 * that source is ready to reply to page requests. 644 */ 645 qemu_sem_post(&mis->postcopy_pause_sem_dst); 646 return true; 647 } 648 649 return false; 650 } 651 652 void migration_fd_process_incoming(QEMUFile *f, Error **errp) 653 { 654 Error *local_err = NULL; 655 656 if (postcopy_try_recover(f)) { 657 return; 658 } 659 660 if (migration_incoming_setup(f, &local_err)) { 661 error_propagate(errp, local_err); 662 return; 663 } 664 migration_incoming_process(); 665 } 666 667 void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) 668 { 669 MigrationIncomingState *mis = migration_incoming_get_current(); 670 Error *local_err = NULL; 671 bool start_migration; 672 673 if (!mis->from_src_file) { 674 /* The first connection (multifd may have multiple) */ 675 QEMUFile *f = qemu_fopen_channel_input(ioc); 676 677 /* If it's a recovery, we're done */ 678 if (postcopy_try_recover(f)) { 679 return; 680 } 681 682 if (migration_incoming_setup(f, &local_err)) { 683 error_propagate(errp, local_err); 684 return; 685 } 686 687 /* 688 * Common migration only needs one channel, so we can start 689 * right now. Multifd needs more than one channel, we wait. 690 */ 691 start_migration = !migrate_use_multifd(); 692 } else { 693 /* Multiple connections */ 694 assert(migrate_use_multifd()); 695 start_migration = multifd_recv_new_channel(ioc, &local_err); 696 if (local_err) { 697 error_propagate(errp, local_err); 698 return; 699 } 700 } 701 702 if (start_migration) { 703 migration_incoming_process(); 704 } 705 } 706 707 /** 708 * @migration_has_all_channels: We have received all channels that we need 709 * 710 * Returns true when we have got connections to all the channels that 711 * we need for migration. 712 */ 713 bool migration_has_all_channels(void) 714 { 715 MigrationIncomingState *mis = migration_incoming_get_current(); 716 bool all_channels; 717 718 all_channels = multifd_recv_all_channels_created(); 719 720 return all_channels && mis->from_src_file != NULL; 721 } 722 723 /* 724 * Send a 'SHUT' message on the return channel with the given value 725 * to indicate that we've finished with the RP. Non-0 value indicates 726 * error. 727 */ 728 void migrate_send_rp_shut(MigrationIncomingState *mis, 729 uint32_t value) 730 { 731 uint32_t buf; 732 733 buf = cpu_to_be32(value); 734 migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf); 735 } 736 737 /* 738 * Send a 'PONG' message on the return channel with the given value 739 * (normally in response to a 'PING') 740 */ 741 void migrate_send_rp_pong(MigrationIncomingState *mis, 742 uint32_t value) 743 { 744 uint32_t buf; 745 746 buf = cpu_to_be32(value); 747 migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf); 748 } 749 750 void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis, 751 char *block_name) 752 { 753 char buf[512]; 754 int len; 755 int64_t res; 756 757 /* 758 * First, we send the header part. It contains only the len of 759 * idstr, and the idstr itself. 760 */ 761 len = strlen(block_name); 762 buf[0] = len; 763 memcpy(buf + 1, block_name, len); 764 765 if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) { 766 error_report("%s: MSG_RP_RECV_BITMAP only used for recovery", 767 __func__); 768 return; 769 } 770 771 migrate_send_rp_message(mis, MIG_RP_MSG_RECV_BITMAP, len + 1, buf); 772 773 /* 774 * Next, we dump the received bitmap to the stream. 775 * 776 * TODO: currently we are safe since we are the only one that is 777 * using the to_src_file handle (fault thread is still paused), 778 * and it's ok even not taking the mutex. However the best way is 779 * to take the lock before sending the message header, and release 780 * the lock after sending the bitmap. 781 */ 782 qemu_mutex_lock(&mis->rp_mutex); 783 res = ramblock_recv_bitmap_send(mis->to_src_file, block_name); 784 qemu_mutex_unlock(&mis->rp_mutex); 785 786 trace_migrate_send_rp_recv_bitmap(block_name, res); 787 } 788 789 void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value) 790 { 791 uint32_t buf; 792 793 buf = cpu_to_be32(value); 794 migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf); 795 } 796 797 MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) 798 { 799 MigrationCapabilityStatusList *head = NULL; 800 MigrationCapabilityStatusList *caps; 801 MigrationState *s = migrate_get_current(); 802 int i; 803 804 caps = NULL; /* silence compiler warning */ 805 for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { 806 #ifndef CONFIG_LIVE_BLOCK_MIGRATION 807 if (i == MIGRATION_CAPABILITY_BLOCK) { 808 continue; 809 } 810 #endif 811 if (head == NULL) { 812 head = g_malloc0(sizeof(*caps)); 813 caps = head; 814 } else { 815 caps->next = g_malloc0(sizeof(*caps)); 816 caps = caps->next; 817 } 818 caps->value = 819 g_malloc(sizeof(*caps->value)); 820 caps->value->capability = i; 821 caps->value->state = s->enabled_capabilities[i]; 822 } 823 824 return head; 825 } 826 827 MigrationParameters *qmp_query_migrate_parameters(Error **errp) 828 { 829 MigrationParameters *params; 830 MigrationState *s = migrate_get_current(); 831 832 /* TODO use QAPI_CLONE() instead of duplicating it inline */ 833 params = g_malloc0(sizeof(*params)); 834 params->has_compress_level = true; 835 params->compress_level = s->parameters.compress_level; 836 params->has_compress_threads = true; 837 params->compress_threads = s->parameters.compress_threads; 838 params->has_compress_wait_thread = true; 839 params->compress_wait_thread = s->parameters.compress_wait_thread; 840 params->has_decompress_threads = true; 841 params->decompress_threads = s->parameters.decompress_threads; 842 params->has_throttle_trigger_threshold = true; 843 params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold; 844 params->has_cpu_throttle_initial = true; 845 params->cpu_throttle_initial = s->parameters.cpu_throttle_initial; 846 params->has_cpu_throttle_increment = true; 847 params->cpu_throttle_increment = s->parameters.cpu_throttle_increment; 848 params->has_cpu_throttle_tailslow = true; 849 params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow; 850 params->has_tls_creds = true; 851 params->tls_creds = g_strdup(s->parameters.tls_creds); 852 params->has_tls_hostname = true; 853 params->tls_hostname = g_strdup(s->parameters.tls_hostname); 854 params->has_tls_authz = true; 855 params->tls_authz = g_strdup(s->parameters.tls_authz ? 856 s->parameters.tls_authz : ""); 857 params->has_max_bandwidth = true; 858 params->max_bandwidth = s->parameters.max_bandwidth; 859 params->has_downtime_limit = true; 860 params->downtime_limit = s->parameters.downtime_limit; 861 params->has_x_checkpoint_delay = true; 862 params->x_checkpoint_delay = s->parameters.x_checkpoint_delay; 863 params->has_block_incremental = true; 864 params->block_incremental = s->parameters.block_incremental; 865 params->has_multifd_channels = true; 866 params->multifd_channels = s->parameters.multifd_channels; 867 params->has_multifd_compression = true; 868 params->multifd_compression = s->parameters.multifd_compression; 869 params->has_multifd_zlib_level = true; 870 params->multifd_zlib_level = s->parameters.multifd_zlib_level; 871 params->has_multifd_zstd_level = true; 872 params->multifd_zstd_level = s->parameters.multifd_zstd_level; 873 params->has_xbzrle_cache_size = true; 874 params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; 875 params->has_max_postcopy_bandwidth = true; 876 params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth; 877 params->has_max_cpu_throttle = true; 878 params->max_cpu_throttle = s->parameters.max_cpu_throttle; 879 params->has_announce_initial = true; 880 params->announce_initial = s->parameters.announce_initial; 881 params->has_announce_max = true; 882 params->announce_max = s->parameters.announce_max; 883 params->has_announce_rounds = true; 884 params->announce_rounds = s->parameters.announce_rounds; 885 params->has_announce_step = true; 886 params->announce_step = s->parameters.announce_step; 887 888 if (s->parameters.has_block_bitmap_mapping) { 889 params->has_block_bitmap_mapping = true; 890 params->block_bitmap_mapping = 891 QAPI_CLONE(BitmapMigrationNodeAliasList, 892 s->parameters.block_bitmap_mapping); 893 } 894 895 return params; 896 } 897 898 AnnounceParameters *migrate_announce_params(void) 899 { 900 static AnnounceParameters ap; 901 902 MigrationState *s = migrate_get_current(); 903 904 ap.initial = s->parameters.announce_initial; 905 ap.max = s->parameters.announce_max; 906 ap.rounds = s->parameters.announce_rounds; 907 ap.step = s->parameters.announce_step; 908 909 return ≈ 910 } 911 912 /* 913 * Return true if we're already in the middle of a migration 914 * (i.e. any of the active or setup states) 915 */ 916 bool migration_is_setup_or_active(int state) 917 { 918 switch (state) { 919 case MIGRATION_STATUS_ACTIVE: 920 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 921 case MIGRATION_STATUS_POSTCOPY_PAUSED: 922 case MIGRATION_STATUS_POSTCOPY_RECOVER: 923 case MIGRATION_STATUS_SETUP: 924 case MIGRATION_STATUS_PRE_SWITCHOVER: 925 case MIGRATION_STATUS_DEVICE: 926 case MIGRATION_STATUS_WAIT_UNPLUG: 927 case MIGRATION_STATUS_COLO: 928 return true; 929 930 default: 931 return false; 932 933 } 934 } 935 936 bool migration_is_running(int state) 937 { 938 switch (state) { 939 case MIGRATION_STATUS_ACTIVE: 940 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 941 case MIGRATION_STATUS_POSTCOPY_PAUSED: 942 case MIGRATION_STATUS_POSTCOPY_RECOVER: 943 case MIGRATION_STATUS_SETUP: 944 case MIGRATION_STATUS_PRE_SWITCHOVER: 945 case MIGRATION_STATUS_DEVICE: 946 case MIGRATION_STATUS_WAIT_UNPLUG: 947 case MIGRATION_STATUS_CANCELLING: 948 return true; 949 950 default: 951 return false; 952 953 } 954 } 955 956 static void populate_time_info(MigrationInfo *info, MigrationState *s) 957 { 958 info->has_status = true; 959 info->has_setup_time = true; 960 info->setup_time = s->setup_time; 961 if (s->state == MIGRATION_STATUS_COMPLETED) { 962 info->has_total_time = true; 963 info->total_time = s->total_time; 964 info->has_downtime = true; 965 info->downtime = s->downtime; 966 } else { 967 info->has_total_time = true; 968 info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - 969 s->start_time; 970 info->has_expected_downtime = true; 971 info->expected_downtime = s->expected_downtime; 972 } 973 } 974 975 static void populate_ram_info(MigrationInfo *info, MigrationState *s) 976 { 977 info->has_ram = true; 978 info->ram = g_malloc0(sizeof(*info->ram)); 979 info->ram->transferred = ram_counters.transferred; 980 info->ram->total = ram_bytes_total(); 981 info->ram->duplicate = ram_counters.duplicate; 982 /* legacy value. It is not used anymore */ 983 info->ram->skipped = 0; 984 info->ram->normal = ram_counters.normal; 985 info->ram->normal_bytes = ram_counters.normal * 986 qemu_target_page_size(); 987 info->ram->mbps = s->mbps; 988 info->ram->dirty_sync_count = ram_counters.dirty_sync_count; 989 info->ram->postcopy_requests = ram_counters.postcopy_requests; 990 info->ram->page_size = qemu_target_page_size(); 991 info->ram->multifd_bytes = ram_counters.multifd_bytes; 992 info->ram->pages_per_second = s->pages_per_second; 993 994 if (migrate_use_xbzrle()) { 995 info->has_xbzrle_cache = true; 996 info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache)); 997 info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size(); 998 info->xbzrle_cache->bytes = xbzrle_counters.bytes; 999 info->xbzrle_cache->pages = xbzrle_counters.pages; 1000 info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss; 1001 info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate; 1002 info->xbzrle_cache->encoding_rate = xbzrle_counters.encoding_rate; 1003 info->xbzrle_cache->overflow = xbzrle_counters.overflow; 1004 } 1005 1006 if (migrate_use_compression()) { 1007 info->has_compression = true; 1008 info->compression = g_malloc0(sizeof(*info->compression)); 1009 info->compression->pages = compression_counters.pages; 1010 info->compression->busy = compression_counters.busy; 1011 info->compression->busy_rate = compression_counters.busy_rate; 1012 info->compression->compressed_size = 1013 compression_counters.compressed_size; 1014 info->compression->compression_rate = 1015 compression_counters.compression_rate; 1016 } 1017 1018 if (cpu_throttle_active()) { 1019 info->has_cpu_throttle_percentage = true; 1020 info->cpu_throttle_percentage = cpu_throttle_get_percentage(); 1021 } 1022 1023 if (s->state != MIGRATION_STATUS_COMPLETED) { 1024 info->ram->remaining = ram_bytes_remaining(); 1025 info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate; 1026 } 1027 } 1028 1029 static void populate_disk_info(MigrationInfo *info) 1030 { 1031 if (blk_mig_active()) { 1032 info->has_disk = true; 1033 info->disk = g_malloc0(sizeof(*info->disk)); 1034 info->disk->transferred = blk_mig_bytes_transferred(); 1035 info->disk->remaining = blk_mig_bytes_remaining(); 1036 info->disk->total = blk_mig_bytes_total(); 1037 } 1038 } 1039 1040 static void fill_source_migration_info(MigrationInfo *info) 1041 { 1042 MigrationState *s = migrate_get_current(); 1043 1044 switch (s->state) { 1045 case MIGRATION_STATUS_NONE: 1046 /* no migration has happened ever */ 1047 /* do not overwrite destination migration status */ 1048 return; 1049 case MIGRATION_STATUS_SETUP: 1050 info->has_status = true; 1051 info->has_total_time = false; 1052 break; 1053 case MIGRATION_STATUS_ACTIVE: 1054 case MIGRATION_STATUS_CANCELLING: 1055 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1056 case MIGRATION_STATUS_PRE_SWITCHOVER: 1057 case MIGRATION_STATUS_DEVICE: 1058 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1059 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1060 /* TODO add some postcopy stats */ 1061 populate_time_info(info, s); 1062 populate_ram_info(info, s); 1063 populate_disk_info(info); 1064 break; 1065 case MIGRATION_STATUS_COLO: 1066 info->has_status = true; 1067 /* TODO: display COLO specific information (checkpoint info etc.) */ 1068 break; 1069 case MIGRATION_STATUS_COMPLETED: 1070 populate_time_info(info, s); 1071 populate_ram_info(info, s); 1072 break; 1073 case MIGRATION_STATUS_FAILED: 1074 info->has_status = true; 1075 if (s->error) { 1076 info->has_error_desc = true; 1077 info->error_desc = g_strdup(error_get_pretty(s->error)); 1078 } 1079 break; 1080 case MIGRATION_STATUS_CANCELLED: 1081 info->has_status = true; 1082 break; 1083 case MIGRATION_STATUS_WAIT_UNPLUG: 1084 info->has_status = true; 1085 break; 1086 } 1087 info->status = s->state; 1088 } 1089 1090 /** 1091 * @migration_caps_check - check capability validity 1092 * 1093 * @cap_list: old capability list, array of bool 1094 * @params: new capabilities to be applied soon 1095 * @errp: set *errp if the check failed, with reason 1096 * 1097 * Returns true if check passed, otherwise false. 1098 */ 1099 static bool migrate_caps_check(bool *cap_list, 1100 MigrationCapabilityStatusList *params, 1101 Error **errp) 1102 { 1103 MigrationCapabilityStatusList *cap; 1104 bool old_postcopy_cap; 1105 MigrationIncomingState *mis = migration_incoming_get_current(); 1106 1107 old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]; 1108 1109 for (cap = params; cap; cap = cap->next) { 1110 cap_list[cap->value->capability] = cap->value->state; 1111 } 1112 1113 #ifndef CONFIG_LIVE_BLOCK_MIGRATION 1114 if (cap_list[MIGRATION_CAPABILITY_BLOCK]) { 1115 error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " 1116 "block migration"); 1117 error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); 1118 return false; 1119 } 1120 #endif 1121 1122 #ifndef CONFIG_REPLICATION 1123 if (cap_list[MIGRATION_CAPABILITY_X_COLO]) { 1124 error_setg(errp, "QEMU compiled without replication module" 1125 " can't enable COLO"); 1126 error_append_hint(errp, "Please enable replication before COLO.\n"); 1127 return false; 1128 } 1129 #endif 1130 1131 if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { 1132 /* This check is reasonably expensive, so only when it's being 1133 * set the first time, also it's only the destination that needs 1134 * special support. 1135 */ 1136 if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) && 1137 !postcopy_ram_supported_by_host(mis)) { 1138 /* postcopy_ram_supported_by_host will have emitted a more 1139 * detailed message 1140 */ 1141 error_setg(errp, "Postcopy is not supported"); 1142 return false; 1143 } 1144 1145 if (cap_list[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { 1146 error_setg(errp, "Postcopy is not compatible with ignore-shared"); 1147 return false; 1148 } 1149 } 1150 1151 return true; 1152 } 1153 1154 static void fill_destination_migration_info(MigrationInfo *info) 1155 { 1156 MigrationIncomingState *mis = migration_incoming_get_current(); 1157 1158 if (mis->socket_address_list) { 1159 info->has_socket_address = true; 1160 info->socket_address = 1161 QAPI_CLONE(SocketAddressList, mis->socket_address_list); 1162 } 1163 1164 switch (mis->state) { 1165 case MIGRATION_STATUS_NONE: 1166 return; 1167 case MIGRATION_STATUS_SETUP: 1168 case MIGRATION_STATUS_CANCELLING: 1169 case MIGRATION_STATUS_CANCELLED: 1170 case MIGRATION_STATUS_ACTIVE: 1171 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1172 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1173 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1174 case MIGRATION_STATUS_FAILED: 1175 case MIGRATION_STATUS_COLO: 1176 info->has_status = true; 1177 break; 1178 case MIGRATION_STATUS_COMPLETED: 1179 info->has_status = true; 1180 fill_destination_postcopy_migration_info(info); 1181 break; 1182 } 1183 info->status = mis->state; 1184 } 1185 1186 MigrationInfo *qmp_query_migrate(Error **errp) 1187 { 1188 MigrationInfo *info = g_malloc0(sizeof(*info)); 1189 1190 fill_destination_migration_info(info); 1191 fill_source_migration_info(info); 1192 1193 return info; 1194 } 1195 1196 void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, 1197 Error **errp) 1198 { 1199 MigrationState *s = migrate_get_current(); 1200 MigrationCapabilityStatusList *cap; 1201 bool cap_list[MIGRATION_CAPABILITY__MAX]; 1202 1203 if (migration_is_running(s->state)) { 1204 error_setg(errp, QERR_MIGRATION_ACTIVE); 1205 return; 1206 } 1207 1208 memcpy(cap_list, s->enabled_capabilities, sizeof(cap_list)); 1209 if (!migrate_caps_check(cap_list, params, errp)) { 1210 return; 1211 } 1212 1213 for (cap = params; cap; cap = cap->next) { 1214 s->enabled_capabilities[cap->value->capability] = cap->value->state; 1215 } 1216 } 1217 1218 /* 1219 * Check whether the parameters are valid. Error will be put into errp 1220 * (if provided). Return true if valid, otherwise false. 1221 */ 1222 static bool migrate_params_check(MigrationParameters *params, Error **errp) 1223 { 1224 if (params->has_compress_level && 1225 (params->compress_level > 9)) { 1226 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", 1227 "is invalid, it should be in the range of 0 to 9"); 1228 return false; 1229 } 1230 1231 if (params->has_compress_threads && (params->compress_threads < 1)) { 1232 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1233 "compress_threads", 1234 "is invalid, it should be in the range of 1 to 255"); 1235 return false; 1236 } 1237 1238 if (params->has_decompress_threads && (params->decompress_threads < 1)) { 1239 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1240 "decompress_threads", 1241 "is invalid, it should be in the range of 1 to 255"); 1242 return false; 1243 } 1244 1245 if (params->has_throttle_trigger_threshold && 1246 (params->throttle_trigger_threshold < 1 || 1247 params->throttle_trigger_threshold > 100)) { 1248 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1249 "throttle_trigger_threshold", 1250 "an integer in the range of 1 to 100"); 1251 return false; 1252 } 1253 1254 if (params->has_cpu_throttle_initial && 1255 (params->cpu_throttle_initial < 1 || 1256 params->cpu_throttle_initial > 99)) { 1257 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1258 "cpu_throttle_initial", 1259 "an integer in the range of 1 to 99"); 1260 return false; 1261 } 1262 1263 if (params->has_cpu_throttle_increment && 1264 (params->cpu_throttle_increment < 1 || 1265 params->cpu_throttle_increment > 99)) { 1266 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1267 "cpu_throttle_increment", 1268 "an integer in the range of 1 to 99"); 1269 return false; 1270 } 1271 1272 if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) { 1273 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1274 "max_bandwidth", 1275 "an integer in the range of 0 to "stringify(SIZE_MAX) 1276 " bytes/second"); 1277 return false; 1278 } 1279 1280 if (params->has_downtime_limit && 1281 (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { 1282 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1283 "downtime_limit", 1284 "an integer in the range of 0 to " 1285 stringify(MAX_MIGRATE_DOWNTIME)" ms"); 1286 return false; 1287 } 1288 1289 /* x_checkpoint_delay is now always positive */ 1290 1291 if (params->has_multifd_channels && (params->multifd_channels < 1)) { 1292 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1293 "multifd_channels", 1294 "is invalid, it should be in the range of 1 to 255"); 1295 return false; 1296 } 1297 1298 if (params->has_multifd_zlib_level && 1299 (params->multifd_zlib_level > 9)) { 1300 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level", 1301 "is invalid, it should be in the range of 0 to 9"); 1302 return false; 1303 } 1304 1305 if (params->has_multifd_zstd_level && 1306 (params->multifd_zstd_level > 20)) { 1307 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level", 1308 "is invalid, it should be in the range of 0 to 20"); 1309 return false; 1310 } 1311 1312 if (params->has_xbzrle_cache_size && 1313 (params->xbzrle_cache_size < qemu_target_page_size() || 1314 !is_power_of_2(params->xbzrle_cache_size))) { 1315 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1316 "xbzrle_cache_size", 1317 "is invalid, it should be bigger than target page size" 1318 " and a power of 2"); 1319 return false; 1320 } 1321 1322 if (params->has_max_cpu_throttle && 1323 (params->max_cpu_throttle < params->cpu_throttle_initial || 1324 params->max_cpu_throttle > 99)) { 1325 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1326 "max_cpu_throttle", 1327 "an integer in the range of cpu_throttle_initial to 99"); 1328 return false; 1329 } 1330 1331 if (params->has_announce_initial && 1332 params->announce_initial > 100000) { 1333 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1334 "announce_initial", 1335 "is invalid, it must be less than 100000 ms"); 1336 return false; 1337 } 1338 if (params->has_announce_max && 1339 params->announce_max > 100000) { 1340 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1341 "announce_max", 1342 "is invalid, it must be less than 100000 ms"); 1343 return false; 1344 } 1345 if (params->has_announce_rounds && 1346 params->announce_rounds > 1000) { 1347 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1348 "announce_rounds", 1349 "is invalid, it must be in the range of 0 to 1000"); 1350 return false; 1351 } 1352 if (params->has_announce_step && 1353 (params->announce_step < 1 || 1354 params->announce_step > 10000)) { 1355 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1356 "announce_step", 1357 "is invalid, it must be in the range of 1 to 10000 ms"); 1358 return false; 1359 } 1360 1361 if (params->has_block_bitmap_mapping && 1362 !check_dirty_bitmap_mig_alias_map(params->block_bitmap_mapping, errp)) { 1363 error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); 1364 return false; 1365 } 1366 1367 return true; 1368 } 1369 1370 static void migrate_params_test_apply(MigrateSetParameters *params, 1371 MigrationParameters *dest) 1372 { 1373 *dest = migrate_get_current()->parameters; 1374 1375 /* TODO use QAPI_CLONE() instead of duplicating it inline */ 1376 1377 if (params->has_compress_level) { 1378 dest->compress_level = params->compress_level; 1379 } 1380 1381 if (params->has_compress_threads) { 1382 dest->compress_threads = params->compress_threads; 1383 } 1384 1385 if (params->has_compress_wait_thread) { 1386 dest->compress_wait_thread = params->compress_wait_thread; 1387 } 1388 1389 if (params->has_decompress_threads) { 1390 dest->decompress_threads = params->decompress_threads; 1391 } 1392 1393 if (params->has_throttle_trigger_threshold) { 1394 dest->throttle_trigger_threshold = params->throttle_trigger_threshold; 1395 } 1396 1397 if (params->has_cpu_throttle_initial) { 1398 dest->cpu_throttle_initial = params->cpu_throttle_initial; 1399 } 1400 1401 if (params->has_cpu_throttle_increment) { 1402 dest->cpu_throttle_increment = params->cpu_throttle_increment; 1403 } 1404 1405 if (params->has_cpu_throttle_tailslow) { 1406 dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow; 1407 } 1408 1409 if (params->has_tls_creds) { 1410 assert(params->tls_creds->type == QTYPE_QSTRING); 1411 dest->tls_creds = params->tls_creds->u.s; 1412 } 1413 1414 if (params->has_tls_hostname) { 1415 assert(params->tls_hostname->type == QTYPE_QSTRING); 1416 dest->tls_hostname = params->tls_hostname->u.s; 1417 } 1418 1419 if (params->has_max_bandwidth) { 1420 dest->max_bandwidth = params->max_bandwidth; 1421 } 1422 1423 if (params->has_downtime_limit) { 1424 dest->downtime_limit = params->downtime_limit; 1425 } 1426 1427 if (params->has_x_checkpoint_delay) { 1428 dest->x_checkpoint_delay = params->x_checkpoint_delay; 1429 } 1430 1431 if (params->has_block_incremental) { 1432 dest->block_incremental = params->block_incremental; 1433 } 1434 if (params->has_multifd_channels) { 1435 dest->multifd_channels = params->multifd_channels; 1436 } 1437 if (params->has_multifd_compression) { 1438 dest->multifd_compression = params->multifd_compression; 1439 } 1440 if (params->has_xbzrle_cache_size) { 1441 dest->xbzrle_cache_size = params->xbzrle_cache_size; 1442 } 1443 if (params->has_max_postcopy_bandwidth) { 1444 dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth; 1445 } 1446 if (params->has_max_cpu_throttle) { 1447 dest->max_cpu_throttle = params->max_cpu_throttle; 1448 } 1449 if (params->has_announce_initial) { 1450 dest->announce_initial = params->announce_initial; 1451 } 1452 if (params->has_announce_max) { 1453 dest->announce_max = params->announce_max; 1454 } 1455 if (params->has_announce_rounds) { 1456 dest->announce_rounds = params->announce_rounds; 1457 } 1458 if (params->has_announce_step) { 1459 dest->announce_step = params->announce_step; 1460 } 1461 1462 if (params->has_block_bitmap_mapping) { 1463 dest->has_block_bitmap_mapping = true; 1464 dest->block_bitmap_mapping = params->block_bitmap_mapping; 1465 } 1466 } 1467 1468 static void migrate_params_apply(MigrateSetParameters *params, Error **errp) 1469 { 1470 MigrationState *s = migrate_get_current(); 1471 1472 /* TODO use QAPI_CLONE() instead of duplicating it inline */ 1473 1474 if (params->has_compress_level) { 1475 s->parameters.compress_level = params->compress_level; 1476 } 1477 1478 if (params->has_compress_threads) { 1479 s->parameters.compress_threads = params->compress_threads; 1480 } 1481 1482 if (params->has_compress_wait_thread) { 1483 s->parameters.compress_wait_thread = params->compress_wait_thread; 1484 } 1485 1486 if (params->has_decompress_threads) { 1487 s->parameters.decompress_threads = params->decompress_threads; 1488 } 1489 1490 if (params->has_throttle_trigger_threshold) { 1491 s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold; 1492 } 1493 1494 if (params->has_cpu_throttle_initial) { 1495 s->parameters.cpu_throttle_initial = params->cpu_throttle_initial; 1496 } 1497 1498 if (params->has_cpu_throttle_increment) { 1499 s->parameters.cpu_throttle_increment = params->cpu_throttle_increment; 1500 } 1501 1502 if (params->has_cpu_throttle_tailslow) { 1503 s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow; 1504 } 1505 1506 if (params->has_tls_creds) { 1507 g_free(s->parameters.tls_creds); 1508 assert(params->tls_creds->type == QTYPE_QSTRING); 1509 s->parameters.tls_creds = g_strdup(params->tls_creds->u.s); 1510 } 1511 1512 if (params->has_tls_hostname) { 1513 g_free(s->parameters.tls_hostname); 1514 assert(params->tls_hostname->type == QTYPE_QSTRING); 1515 s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s); 1516 } 1517 1518 if (params->has_tls_authz) { 1519 g_free(s->parameters.tls_authz); 1520 assert(params->tls_authz->type == QTYPE_QSTRING); 1521 s->parameters.tls_authz = g_strdup(params->tls_authz->u.s); 1522 } 1523 1524 if (params->has_max_bandwidth) { 1525 s->parameters.max_bandwidth = params->max_bandwidth; 1526 if (s->to_dst_file && !migration_in_postcopy()) { 1527 qemu_file_set_rate_limit(s->to_dst_file, 1528 s->parameters.max_bandwidth / XFER_LIMIT_RATIO); 1529 } 1530 } 1531 1532 if (params->has_downtime_limit) { 1533 s->parameters.downtime_limit = params->downtime_limit; 1534 } 1535 1536 if (params->has_x_checkpoint_delay) { 1537 s->parameters.x_checkpoint_delay = params->x_checkpoint_delay; 1538 if (migration_in_colo_state()) { 1539 colo_checkpoint_notify(s); 1540 } 1541 } 1542 1543 if (params->has_block_incremental) { 1544 s->parameters.block_incremental = params->block_incremental; 1545 } 1546 if (params->has_multifd_channels) { 1547 s->parameters.multifd_channels = params->multifd_channels; 1548 } 1549 if (params->has_multifd_compression) { 1550 s->parameters.multifd_compression = params->multifd_compression; 1551 } 1552 if (params->has_xbzrle_cache_size) { 1553 s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; 1554 xbzrle_cache_resize(params->xbzrle_cache_size, errp); 1555 } 1556 if (params->has_max_postcopy_bandwidth) { 1557 s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth; 1558 if (s->to_dst_file && migration_in_postcopy()) { 1559 qemu_file_set_rate_limit(s->to_dst_file, 1560 s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO); 1561 } 1562 } 1563 if (params->has_max_cpu_throttle) { 1564 s->parameters.max_cpu_throttle = params->max_cpu_throttle; 1565 } 1566 if (params->has_announce_initial) { 1567 s->parameters.announce_initial = params->announce_initial; 1568 } 1569 if (params->has_announce_max) { 1570 s->parameters.announce_max = params->announce_max; 1571 } 1572 if (params->has_announce_rounds) { 1573 s->parameters.announce_rounds = params->announce_rounds; 1574 } 1575 if (params->has_announce_step) { 1576 s->parameters.announce_step = params->announce_step; 1577 } 1578 1579 if (params->has_block_bitmap_mapping) { 1580 qapi_free_BitmapMigrationNodeAliasList( 1581 s->parameters.block_bitmap_mapping); 1582 1583 s->parameters.has_block_bitmap_mapping = true; 1584 s->parameters.block_bitmap_mapping = 1585 QAPI_CLONE(BitmapMigrationNodeAliasList, 1586 params->block_bitmap_mapping); 1587 } 1588 } 1589 1590 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) 1591 { 1592 MigrationParameters tmp; 1593 1594 /* TODO Rewrite "" to null instead */ 1595 if (params->has_tls_creds 1596 && params->tls_creds->type == QTYPE_QNULL) { 1597 qobject_unref(params->tls_creds->u.n); 1598 params->tls_creds->type = QTYPE_QSTRING; 1599 params->tls_creds->u.s = strdup(""); 1600 } 1601 /* TODO Rewrite "" to null instead */ 1602 if (params->has_tls_hostname 1603 && params->tls_hostname->type == QTYPE_QNULL) { 1604 qobject_unref(params->tls_hostname->u.n); 1605 params->tls_hostname->type = QTYPE_QSTRING; 1606 params->tls_hostname->u.s = strdup(""); 1607 } 1608 1609 migrate_params_test_apply(params, &tmp); 1610 1611 if (!migrate_params_check(&tmp, errp)) { 1612 /* Invalid parameter */ 1613 return; 1614 } 1615 1616 migrate_params_apply(params, errp); 1617 } 1618 1619 1620 void qmp_migrate_start_postcopy(Error **errp) 1621 { 1622 MigrationState *s = migrate_get_current(); 1623 1624 if (!migrate_postcopy()) { 1625 error_setg(errp, "Enable postcopy with migrate_set_capability before" 1626 " the start of migration"); 1627 return; 1628 } 1629 1630 if (s->state == MIGRATION_STATUS_NONE) { 1631 error_setg(errp, "Postcopy must be started after migration has been" 1632 " started"); 1633 return; 1634 } 1635 /* 1636 * we don't error if migration has finished since that would be racy 1637 * with issuing this command. 1638 */ 1639 qatomic_set(&s->start_postcopy, true); 1640 } 1641 1642 /* shared migration helpers */ 1643 1644 void migrate_set_state(int *state, int old_state, int new_state) 1645 { 1646 assert(new_state < MIGRATION_STATUS__MAX); 1647 if (qatomic_cmpxchg(state, old_state, new_state) == old_state) { 1648 trace_migrate_set_state(MigrationStatus_str(new_state)); 1649 migrate_generate_event(new_state); 1650 } 1651 } 1652 1653 static MigrationCapabilityStatusList *migrate_cap_add( 1654 MigrationCapabilityStatusList *list, 1655 MigrationCapability index, 1656 bool state) 1657 { 1658 MigrationCapabilityStatusList *cap; 1659 1660 cap = g_new0(MigrationCapabilityStatusList, 1); 1661 cap->value = g_new0(MigrationCapabilityStatus, 1); 1662 cap->value->capability = index; 1663 cap->value->state = state; 1664 cap->next = list; 1665 1666 return cap; 1667 } 1668 1669 void migrate_set_block_enabled(bool value, Error **errp) 1670 { 1671 MigrationCapabilityStatusList *cap; 1672 1673 cap = migrate_cap_add(NULL, MIGRATION_CAPABILITY_BLOCK, value); 1674 qmp_migrate_set_capabilities(cap, errp); 1675 qapi_free_MigrationCapabilityStatusList(cap); 1676 } 1677 1678 static void migrate_set_block_incremental(MigrationState *s, bool value) 1679 { 1680 s->parameters.block_incremental = value; 1681 } 1682 1683 static void block_cleanup_parameters(MigrationState *s) 1684 { 1685 if (s->must_remove_block_options) { 1686 /* setting to false can never fail */ 1687 migrate_set_block_enabled(false, &error_abort); 1688 migrate_set_block_incremental(s, false); 1689 s->must_remove_block_options = false; 1690 } 1691 } 1692 1693 static void migrate_fd_cleanup(MigrationState *s) 1694 { 1695 qemu_bh_delete(s->cleanup_bh); 1696 s->cleanup_bh = NULL; 1697 1698 qemu_savevm_state_cleanup(); 1699 1700 if (s->to_dst_file) { 1701 QEMUFile *tmp; 1702 1703 trace_migrate_fd_cleanup(); 1704 qemu_mutex_unlock_iothread(); 1705 if (s->migration_thread_running) { 1706 qemu_thread_join(&s->thread); 1707 s->migration_thread_running = false; 1708 } 1709 qemu_mutex_lock_iothread(); 1710 1711 multifd_save_cleanup(); 1712 qemu_mutex_lock(&s->qemu_file_lock); 1713 tmp = s->to_dst_file; 1714 s->to_dst_file = NULL; 1715 qemu_mutex_unlock(&s->qemu_file_lock); 1716 /* 1717 * Close the file handle without the lock to make sure the 1718 * critical section won't block for long. 1719 */ 1720 qemu_fclose(tmp); 1721 } 1722 1723 assert(!migration_is_active(s)); 1724 1725 if (s->state == MIGRATION_STATUS_CANCELLING) { 1726 migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING, 1727 MIGRATION_STATUS_CANCELLED); 1728 } 1729 1730 if (s->error) { 1731 /* It is used on info migrate. We can't free it */ 1732 error_report_err(error_copy(s->error)); 1733 } 1734 notifier_list_notify(&migration_state_notifiers, s); 1735 block_cleanup_parameters(s); 1736 } 1737 1738 static void migrate_fd_cleanup_schedule(MigrationState *s) 1739 { 1740 /* 1741 * Ref the state for bh, because it may be called when 1742 * there're already no other refs 1743 */ 1744 object_ref(OBJECT(s)); 1745 qemu_bh_schedule(s->cleanup_bh); 1746 } 1747 1748 static void migrate_fd_cleanup_bh(void *opaque) 1749 { 1750 MigrationState *s = opaque; 1751 migrate_fd_cleanup(s); 1752 object_unref(OBJECT(s)); 1753 } 1754 1755 void migrate_set_error(MigrationState *s, const Error *error) 1756 { 1757 QEMU_LOCK_GUARD(&s->error_mutex); 1758 if (!s->error) { 1759 s->error = error_copy(error); 1760 } 1761 } 1762 1763 void migrate_fd_error(MigrationState *s, const Error *error) 1764 { 1765 trace_migrate_fd_error(error_get_pretty(error)); 1766 assert(s->to_dst_file == NULL); 1767 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 1768 MIGRATION_STATUS_FAILED); 1769 migrate_set_error(s, error); 1770 } 1771 1772 static void migrate_fd_cancel(MigrationState *s) 1773 { 1774 int old_state ; 1775 QEMUFile *f = migrate_get_current()->to_dst_file; 1776 trace_migrate_fd_cancel(); 1777 1778 if (s->rp_state.from_dst_file) { 1779 /* shutdown the rp socket, so causing the rp thread to shutdown */ 1780 qemu_file_shutdown(s->rp_state.from_dst_file); 1781 } 1782 1783 do { 1784 old_state = s->state; 1785 if (!migration_is_running(old_state)) { 1786 break; 1787 } 1788 /* If the migration is paused, kick it out of the pause */ 1789 if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) { 1790 qemu_sem_post(&s->pause_sem); 1791 } 1792 migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING); 1793 } while (s->state != MIGRATION_STATUS_CANCELLING); 1794 1795 /* 1796 * If we're unlucky the migration code might be stuck somewhere in a 1797 * send/write while the network has failed and is waiting to timeout; 1798 * if we've got shutdown(2) available then we can force it to quit. 1799 * The outgoing qemu file gets closed in migrate_fd_cleanup that is 1800 * called in a bh, so there is no race against this cancel. 1801 */ 1802 if (s->state == MIGRATION_STATUS_CANCELLING && f) { 1803 qemu_file_shutdown(f); 1804 } 1805 if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) { 1806 Error *local_err = NULL; 1807 1808 bdrv_invalidate_cache_all(&local_err); 1809 if (local_err) { 1810 error_report_err(local_err); 1811 } else { 1812 s->block_inactive = false; 1813 } 1814 } 1815 } 1816 1817 void add_migration_state_change_notifier(Notifier *notify) 1818 { 1819 notifier_list_add(&migration_state_notifiers, notify); 1820 } 1821 1822 void remove_migration_state_change_notifier(Notifier *notify) 1823 { 1824 notifier_remove(notify); 1825 } 1826 1827 bool migration_in_setup(MigrationState *s) 1828 { 1829 return s->state == MIGRATION_STATUS_SETUP; 1830 } 1831 1832 bool migration_has_finished(MigrationState *s) 1833 { 1834 return s->state == MIGRATION_STATUS_COMPLETED; 1835 } 1836 1837 bool migration_has_failed(MigrationState *s) 1838 { 1839 return (s->state == MIGRATION_STATUS_CANCELLED || 1840 s->state == MIGRATION_STATUS_FAILED); 1841 } 1842 1843 bool migration_in_postcopy(void) 1844 { 1845 MigrationState *s = migrate_get_current(); 1846 1847 switch (s->state) { 1848 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1849 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1850 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1851 return true; 1852 default: 1853 return false; 1854 } 1855 } 1856 1857 bool migration_in_postcopy_after_devices(MigrationState *s) 1858 { 1859 return migration_in_postcopy() && s->postcopy_after_devices; 1860 } 1861 1862 bool migration_in_incoming_postcopy(void) 1863 { 1864 PostcopyState ps = postcopy_state_get(); 1865 1866 return ps >= POSTCOPY_INCOMING_DISCARD && ps < POSTCOPY_INCOMING_END; 1867 } 1868 1869 bool migration_is_idle(void) 1870 { 1871 MigrationState *s = current_migration; 1872 1873 if (!s) { 1874 return true; 1875 } 1876 1877 switch (s->state) { 1878 case MIGRATION_STATUS_NONE: 1879 case MIGRATION_STATUS_CANCELLED: 1880 case MIGRATION_STATUS_COMPLETED: 1881 case MIGRATION_STATUS_FAILED: 1882 return true; 1883 case MIGRATION_STATUS_SETUP: 1884 case MIGRATION_STATUS_CANCELLING: 1885 case MIGRATION_STATUS_ACTIVE: 1886 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1887 case MIGRATION_STATUS_COLO: 1888 case MIGRATION_STATUS_PRE_SWITCHOVER: 1889 case MIGRATION_STATUS_DEVICE: 1890 case MIGRATION_STATUS_WAIT_UNPLUG: 1891 return false; 1892 case MIGRATION_STATUS__MAX: 1893 g_assert_not_reached(); 1894 } 1895 1896 return false; 1897 } 1898 1899 bool migration_is_active(MigrationState *s) 1900 { 1901 return (s->state == MIGRATION_STATUS_ACTIVE || 1902 s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); 1903 } 1904 1905 void migrate_init(MigrationState *s) 1906 { 1907 /* 1908 * Reinitialise all migration state, except 1909 * parameters/capabilities that the user set, and 1910 * locks. 1911 */ 1912 s->cleanup_bh = 0; 1913 s->to_dst_file = NULL; 1914 s->state = MIGRATION_STATUS_NONE; 1915 s->rp_state.from_dst_file = NULL; 1916 s->rp_state.error = false; 1917 s->mbps = 0.0; 1918 s->pages_per_second = 0.0; 1919 s->downtime = 0; 1920 s->expected_downtime = 0; 1921 s->setup_time = 0; 1922 s->start_postcopy = false; 1923 s->postcopy_after_devices = false; 1924 s->migration_thread_running = false; 1925 error_free(s->error); 1926 s->error = NULL; 1927 s->hostname = NULL; 1928 1929 migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); 1930 1931 s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 1932 s->total_time = 0; 1933 s->vm_was_running = false; 1934 s->iteration_initial_bytes = 0; 1935 s->threshold_size = 0; 1936 } 1937 1938 static GSList *migration_blockers; 1939 1940 int migrate_add_blocker(Error *reason, Error **errp) 1941 { 1942 if (only_migratable) { 1943 error_propagate_prepend(errp, error_copy(reason), 1944 "disallowing migration blocker " 1945 "(--only-migratable) for: "); 1946 return -EACCES; 1947 } 1948 1949 if (migration_is_idle()) { 1950 migration_blockers = g_slist_prepend(migration_blockers, reason); 1951 return 0; 1952 } 1953 1954 error_propagate_prepend(errp, error_copy(reason), 1955 "disallowing migration blocker " 1956 "(migration in progress) for: "); 1957 return -EBUSY; 1958 } 1959 1960 void migrate_del_blocker(Error *reason) 1961 { 1962 migration_blockers = g_slist_remove(migration_blockers, reason); 1963 } 1964 1965 void qmp_migrate_incoming(const char *uri, Error **errp) 1966 { 1967 Error *local_err = NULL; 1968 static bool once = true; 1969 1970 if (!deferred_incoming) { 1971 error_setg(errp, "For use with '-incoming defer'"); 1972 return; 1973 } 1974 if (!once) { 1975 error_setg(errp, "The incoming migration has already been started"); 1976 return; 1977 } 1978 1979 qemu_start_incoming_migration(uri, &local_err); 1980 1981 if (local_err) { 1982 error_propagate(errp, local_err); 1983 return; 1984 } 1985 1986 once = false; 1987 } 1988 1989 void qmp_migrate_recover(const char *uri, Error **errp) 1990 { 1991 MigrationIncomingState *mis = migration_incoming_get_current(); 1992 1993 if (mis->state != MIGRATION_STATUS_POSTCOPY_PAUSED) { 1994 error_setg(errp, "Migrate recover can only be run " 1995 "when postcopy is paused."); 1996 return; 1997 } 1998 1999 if (qatomic_cmpxchg(&mis->postcopy_recover_triggered, 2000 false, true) == true) { 2001 error_setg(errp, "Migrate recovery is triggered already"); 2002 return; 2003 } 2004 2005 /* 2006 * Note that this call will never start a real migration; it will 2007 * only re-setup the migration stream and poke existing migration 2008 * to continue using that newly established channel. 2009 */ 2010 qemu_start_incoming_migration(uri, errp); 2011 } 2012 2013 void qmp_migrate_pause(Error **errp) 2014 { 2015 MigrationState *ms = migrate_get_current(); 2016 MigrationIncomingState *mis = migration_incoming_get_current(); 2017 int ret; 2018 2019 if (ms->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 2020 /* Source side, during postcopy */ 2021 qemu_mutex_lock(&ms->qemu_file_lock); 2022 ret = qemu_file_shutdown(ms->to_dst_file); 2023 qemu_mutex_unlock(&ms->qemu_file_lock); 2024 if (ret) { 2025 error_setg(errp, "Failed to pause source migration"); 2026 } 2027 return; 2028 } 2029 2030 if (mis->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 2031 ret = qemu_file_shutdown(mis->from_src_file); 2032 if (ret) { 2033 error_setg(errp, "Failed to pause destination migration"); 2034 } 2035 return; 2036 } 2037 2038 error_setg(errp, "migrate-pause is currently only supported " 2039 "during postcopy-active state"); 2040 } 2041 2042 bool migration_is_blocked(Error **errp) 2043 { 2044 if (qemu_savevm_state_blocked(errp)) { 2045 return true; 2046 } 2047 2048 if (migration_blockers) { 2049 error_propagate(errp, error_copy(migration_blockers->data)); 2050 return true; 2051 } 2052 2053 return false; 2054 } 2055 2056 /* Returns true if continue to migrate, or false if error detected */ 2057 static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, 2058 bool resume, Error **errp) 2059 { 2060 Error *local_err = NULL; 2061 2062 if (resume) { 2063 if (s->state != MIGRATION_STATUS_POSTCOPY_PAUSED) { 2064 error_setg(errp, "Cannot resume if there is no " 2065 "paused migration"); 2066 return false; 2067 } 2068 2069 /* 2070 * Postcopy recovery won't work well with release-ram 2071 * capability since release-ram will drop the page buffer as 2072 * long as the page is put into the send buffer. So if there 2073 * is a network failure happened, any page buffers that have 2074 * not yet reached the destination VM but have already been 2075 * sent from the source VM will be lost forever. Let's refuse 2076 * the client from resuming such a postcopy migration. 2077 * Luckily release-ram was designed to only be used when src 2078 * and destination VMs are on the same host, so it should be 2079 * fine. 2080 */ 2081 if (migrate_release_ram()) { 2082 error_setg(errp, "Postcopy recovery cannot work " 2083 "when release-ram capability is set"); 2084 return false; 2085 } 2086 2087 /* This is a resume, skip init status */ 2088 return true; 2089 } 2090 2091 if (migration_is_running(s->state)) { 2092 error_setg(errp, QERR_MIGRATION_ACTIVE); 2093 return false; 2094 } 2095 2096 if (runstate_check(RUN_STATE_INMIGRATE)) { 2097 error_setg(errp, "Guest is waiting for an incoming migration"); 2098 return false; 2099 } 2100 2101 if (migration_is_blocked(errp)) { 2102 return false; 2103 } 2104 2105 if (blk || blk_inc) { 2106 if (migrate_use_block() || migrate_use_block_incremental()) { 2107 error_setg(errp, "Command options are incompatible with " 2108 "current migration capabilities"); 2109 return false; 2110 } 2111 migrate_set_block_enabled(true, &local_err); 2112 if (local_err) { 2113 error_propagate(errp, local_err); 2114 return false; 2115 } 2116 s->must_remove_block_options = true; 2117 } 2118 2119 if (blk_inc) { 2120 migrate_set_block_incremental(s, true); 2121 } 2122 2123 migrate_init(s); 2124 /* 2125 * set ram_counters memory to zero for a 2126 * new migration 2127 */ 2128 memset(&ram_counters, 0, sizeof(ram_counters)); 2129 2130 return true; 2131 } 2132 2133 void qmp_migrate(const char *uri, bool has_blk, bool blk, 2134 bool has_inc, bool inc, bool has_detach, bool detach, 2135 bool has_resume, bool resume, Error **errp) 2136 { 2137 Error *local_err = NULL; 2138 MigrationState *s = migrate_get_current(); 2139 const char *p = NULL; 2140 2141 if (!migrate_prepare(s, has_blk && blk, has_inc && inc, 2142 has_resume && resume, errp)) { 2143 /* Error detected, put into errp */ 2144 return; 2145 } 2146 2147 if (strstart(uri, "tcp:", &p) || 2148 strstart(uri, "unix:", NULL) || 2149 strstart(uri, "vsock:", NULL)) { 2150 socket_start_outgoing_migration(s, p ? p : uri, &local_err); 2151 #ifdef CONFIG_RDMA 2152 } else if (strstart(uri, "rdma:", &p)) { 2153 rdma_start_outgoing_migration(s, p, &local_err); 2154 #endif 2155 } else if (strstart(uri, "exec:", &p)) { 2156 exec_start_outgoing_migration(s, p, &local_err); 2157 } else if (strstart(uri, "fd:", &p)) { 2158 fd_start_outgoing_migration(s, p, &local_err); 2159 } else { 2160 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri", 2161 "a valid migration protocol"); 2162 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 2163 MIGRATION_STATUS_FAILED); 2164 block_cleanup_parameters(s); 2165 return; 2166 } 2167 2168 if (local_err) { 2169 migrate_fd_error(s, local_err); 2170 error_propagate(errp, local_err); 2171 return; 2172 } 2173 } 2174 2175 void qmp_migrate_cancel(Error **errp) 2176 { 2177 migrate_fd_cancel(migrate_get_current()); 2178 } 2179 2180 void qmp_migrate_continue(MigrationStatus state, Error **errp) 2181 { 2182 MigrationState *s = migrate_get_current(); 2183 if (s->state != state) { 2184 error_setg(errp, "Migration not in expected state: %s", 2185 MigrationStatus_str(s->state)); 2186 return; 2187 } 2188 qemu_sem_post(&s->pause_sem); 2189 } 2190 2191 void qmp_migrate_set_cache_size(int64_t value, Error **errp) 2192 { 2193 MigrateSetParameters p = { 2194 .has_xbzrle_cache_size = true, 2195 .xbzrle_cache_size = value, 2196 }; 2197 2198 qmp_migrate_set_parameters(&p, errp); 2199 } 2200 2201 int64_t qmp_query_migrate_cache_size(Error **errp) 2202 { 2203 return migrate_xbzrle_cache_size(); 2204 } 2205 2206 void qmp_migrate_set_speed(int64_t value, Error **errp) 2207 { 2208 MigrateSetParameters p = { 2209 .has_max_bandwidth = true, 2210 .max_bandwidth = value, 2211 }; 2212 2213 qmp_migrate_set_parameters(&p, errp); 2214 } 2215 2216 void qmp_migrate_set_downtime(double value, Error **errp) 2217 { 2218 if (value < 0 || value > MAX_MIGRATE_DOWNTIME_SECONDS) { 2219 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 2220 "downtime_limit", 2221 "an integer in the range of 0 to " 2222 stringify(MAX_MIGRATE_DOWNTIME_SECONDS)" seconds"); 2223 return; 2224 } 2225 2226 value *= 1000; /* Convert to milliseconds */ 2227 2228 MigrateSetParameters p = { 2229 .has_downtime_limit = true, 2230 .downtime_limit = (int64_t)value, 2231 }; 2232 2233 qmp_migrate_set_parameters(&p, errp); 2234 } 2235 2236 bool migrate_release_ram(void) 2237 { 2238 MigrationState *s; 2239 2240 s = migrate_get_current(); 2241 2242 return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; 2243 } 2244 2245 bool migrate_postcopy_ram(void) 2246 { 2247 MigrationState *s; 2248 2249 s = migrate_get_current(); 2250 2251 return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; 2252 } 2253 2254 bool migrate_postcopy(void) 2255 { 2256 return migrate_postcopy_ram() || migrate_dirty_bitmaps(); 2257 } 2258 2259 bool migrate_auto_converge(void) 2260 { 2261 MigrationState *s; 2262 2263 s = migrate_get_current(); 2264 2265 return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; 2266 } 2267 2268 bool migrate_zero_blocks(void) 2269 { 2270 MigrationState *s; 2271 2272 s = migrate_get_current(); 2273 2274 return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; 2275 } 2276 2277 bool migrate_postcopy_blocktime(void) 2278 { 2279 MigrationState *s; 2280 2281 s = migrate_get_current(); 2282 2283 return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; 2284 } 2285 2286 bool migrate_use_compression(void) 2287 { 2288 MigrationState *s; 2289 2290 s = migrate_get_current(); 2291 2292 return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS]; 2293 } 2294 2295 int migrate_compress_level(void) 2296 { 2297 MigrationState *s; 2298 2299 s = migrate_get_current(); 2300 2301 return s->parameters.compress_level; 2302 } 2303 2304 int migrate_compress_threads(void) 2305 { 2306 MigrationState *s; 2307 2308 s = migrate_get_current(); 2309 2310 return s->parameters.compress_threads; 2311 } 2312 2313 int migrate_compress_wait_thread(void) 2314 { 2315 MigrationState *s; 2316 2317 s = migrate_get_current(); 2318 2319 return s->parameters.compress_wait_thread; 2320 } 2321 2322 int migrate_decompress_threads(void) 2323 { 2324 MigrationState *s; 2325 2326 s = migrate_get_current(); 2327 2328 return s->parameters.decompress_threads; 2329 } 2330 2331 bool migrate_dirty_bitmaps(void) 2332 { 2333 MigrationState *s; 2334 2335 s = migrate_get_current(); 2336 2337 return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; 2338 } 2339 2340 bool migrate_ignore_shared(void) 2341 { 2342 MigrationState *s; 2343 2344 s = migrate_get_current(); 2345 2346 return s->enabled_capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; 2347 } 2348 2349 bool migrate_validate_uuid(void) 2350 { 2351 MigrationState *s; 2352 2353 s = migrate_get_current(); 2354 2355 return s->enabled_capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; 2356 } 2357 2358 bool migrate_use_events(void) 2359 { 2360 MigrationState *s; 2361 2362 s = migrate_get_current(); 2363 2364 return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS]; 2365 } 2366 2367 bool migrate_use_multifd(void) 2368 { 2369 MigrationState *s; 2370 2371 s = migrate_get_current(); 2372 2373 return s->enabled_capabilities[MIGRATION_CAPABILITY_MULTIFD]; 2374 } 2375 2376 bool migrate_pause_before_switchover(void) 2377 { 2378 MigrationState *s; 2379 2380 s = migrate_get_current(); 2381 2382 return s->enabled_capabilities[ 2383 MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; 2384 } 2385 2386 int migrate_multifd_channels(void) 2387 { 2388 MigrationState *s; 2389 2390 s = migrate_get_current(); 2391 2392 return s->parameters.multifd_channels; 2393 } 2394 2395 MultiFDCompression migrate_multifd_compression(void) 2396 { 2397 MigrationState *s; 2398 2399 s = migrate_get_current(); 2400 2401 return s->parameters.multifd_compression; 2402 } 2403 2404 int migrate_multifd_zlib_level(void) 2405 { 2406 MigrationState *s; 2407 2408 s = migrate_get_current(); 2409 2410 return s->parameters.multifd_zlib_level; 2411 } 2412 2413 int migrate_multifd_zstd_level(void) 2414 { 2415 MigrationState *s; 2416 2417 s = migrate_get_current(); 2418 2419 return s->parameters.multifd_zstd_level; 2420 } 2421 2422 int migrate_use_xbzrle(void) 2423 { 2424 MigrationState *s; 2425 2426 s = migrate_get_current(); 2427 2428 return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE]; 2429 } 2430 2431 int64_t migrate_xbzrle_cache_size(void) 2432 { 2433 MigrationState *s; 2434 2435 s = migrate_get_current(); 2436 2437 return s->parameters.xbzrle_cache_size; 2438 } 2439 2440 static int64_t migrate_max_postcopy_bandwidth(void) 2441 { 2442 MigrationState *s; 2443 2444 s = migrate_get_current(); 2445 2446 return s->parameters.max_postcopy_bandwidth; 2447 } 2448 2449 bool migrate_use_block(void) 2450 { 2451 MigrationState *s; 2452 2453 s = migrate_get_current(); 2454 2455 return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK]; 2456 } 2457 2458 bool migrate_use_return_path(void) 2459 { 2460 MigrationState *s; 2461 2462 s = migrate_get_current(); 2463 2464 return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; 2465 } 2466 2467 bool migrate_use_block_incremental(void) 2468 { 2469 MigrationState *s; 2470 2471 s = migrate_get_current(); 2472 2473 return s->parameters.block_incremental; 2474 } 2475 2476 /* migration thread support */ 2477 /* 2478 * Something bad happened to the RP stream, mark an error 2479 * The caller shall print or trace something to indicate why 2480 */ 2481 static void mark_source_rp_bad(MigrationState *s) 2482 { 2483 s->rp_state.error = true; 2484 } 2485 2486 static struct rp_cmd_args { 2487 ssize_t len; /* -1 = variable */ 2488 const char *name; 2489 } rp_cmd_args[] = { 2490 [MIG_RP_MSG_INVALID] = { .len = -1, .name = "INVALID" }, 2491 [MIG_RP_MSG_SHUT] = { .len = 4, .name = "SHUT" }, 2492 [MIG_RP_MSG_PONG] = { .len = 4, .name = "PONG" }, 2493 [MIG_RP_MSG_REQ_PAGES] = { .len = 12, .name = "REQ_PAGES" }, 2494 [MIG_RP_MSG_REQ_PAGES_ID] = { .len = -1, .name = "REQ_PAGES_ID" }, 2495 [MIG_RP_MSG_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" }, 2496 [MIG_RP_MSG_RESUME_ACK] = { .len = 4, .name = "RESUME_ACK" }, 2497 [MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" }, 2498 }; 2499 2500 /* 2501 * Process a request for pages received on the return path, 2502 * We're allowed to send more than requested (e.g. to round to our page size) 2503 * and we don't need to send pages that have already been sent. 2504 */ 2505 static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname, 2506 ram_addr_t start, size_t len) 2507 { 2508 long our_host_ps = qemu_real_host_page_size; 2509 2510 trace_migrate_handle_rp_req_pages(rbname, start, len); 2511 2512 /* 2513 * Since we currently insist on matching page sizes, just sanity check 2514 * we're being asked for whole host pages. 2515 */ 2516 if (start & (our_host_ps - 1) || 2517 (len & (our_host_ps - 1))) { 2518 error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT 2519 " len: %zd", __func__, start, len); 2520 mark_source_rp_bad(ms); 2521 return; 2522 } 2523 2524 if (ram_save_queue_pages(rbname, start, len)) { 2525 mark_source_rp_bad(ms); 2526 } 2527 } 2528 2529 /* Return true to retry, false to quit */ 2530 static bool postcopy_pause_return_path_thread(MigrationState *s) 2531 { 2532 trace_postcopy_pause_return_path(); 2533 2534 qemu_sem_wait(&s->postcopy_pause_rp_sem); 2535 2536 trace_postcopy_pause_return_path_continued(); 2537 2538 return true; 2539 } 2540 2541 static int migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name) 2542 { 2543 RAMBlock *block = qemu_ram_block_by_name(block_name); 2544 2545 if (!block) { 2546 error_report("%s: invalid block name '%s'", __func__, block_name); 2547 return -EINVAL; 2548 } 2549 2550 /* Fetch the received bitmap and refresh the dirty bitmap */ 2551 return ram_dirty_bitmap_reload(s, block); 2552 } 2553 2554 static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value) 2555 { 2556 trace_source_return_path_thread_resume_ack(value); 2557 2558 if (value != MIGRATION_RESUME_ACK_VALUE) { 2559 error_report("%s: illegal resume_ack value %"PRIu32, 2560 __func__, value); 2561 return -1; 2562 } 2563 2564 /* Now both sides are active. */ 2565 migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER, 2566 MIGRATION_STATUS_POSTCOPY_ACTIVE); 2567 2568 /* Notify send thread that time to continue send pages */ 2569 qemu_sem_post(&s->rp_state.rp_sem); 2570 2571 return 0; 2572 } 2573 2574 /* 2575 * Handles messages sent on the return path towards the source VM 2576 * 2577 */ 2578 static void *source_return_path_thread(void *opaque) 2579 { 2580 MigrationState *ms = opaque; 2581 QEMUFile *rp = ms->rp_state.from_dst_file; 2582 uint16_t header_len, header_type; 2583 uint8_t buf[512]; 2584 uint32_t tmp32, sibling_error; 2585 ram_addr_t start = 0; /* =0 to silence warning */ 2586 size_t len = 0, expected_len; 2587 int res; 2588 2589 trace_source_return_path_thread_entry(); 2590 rcu_register_thread(); 2591 2592 retry: 2593 while (!ms->rp_state.error && !qemu_file_get_error(rp) && 2594 migration_is_setup_or_active(ms->state)) { 2595 trace_source_return_path_thread_loop_top(); 2596 header_type = qemu_get_be16(rp); 2597 header_len = qemu_get_be16(rp); 2598 2599 if (qemu_file_get_error(rp)) { 2600 mark_source_rp_bad(ms); 2601 goto out; 2602 } 2603 2604 if (header_type >= MIG_RP_MSG_MAX || 2605 header_type == MIG_RP_MSG_INVALID) { 2606 error_report("RP: Received invalid message 0x%04x length 0x%04x", 2607 header_type, header_len); 2608 mark_source_rp_bad(ms); 2609 goto out; 2610 } 2611 2612 if ((rp_cmd_args[header_type].len != -1 && 2613 header_len != rp_cmd_args[header_type].len) || 2614 header_len > sizeof(buf)) { 2615 error_report("RP: Received '%s' message (0x%04x) with" 2616 "incorrect length %d expecting %zu", 2617 rp_cmd_args[header_type].name, header_type, header_len, 2618 (size_t)rp_cmd_args[header_type].len); 2619 mark_source_rp_bad(ms); 2620 goto out; 2621 } 2622 2623 /* We know we've got a valid header by this point */ 2624 res = qemu_get_buffer(rp, buf, header_len); 2625 if (res != header_len) { 2626 error_report("RP: Failed reading data for message 0x%04x" 2627 " read %d expected %d", 2628 header_type, res, header_len); 2629 mark_source_rp_bad(ms); 2630 goto out; 2631 } 2632 2633 /* OK, we have the message and the data */ 2634 switch (header_type) { 2635 case MIG_RP_MSG_SHUT: 2636 sibling_error = ldl_be_p(buf); 2637 trace_source_return_path_thread_shut(sibling_error); 2638 if (sibling_error) { 2639 error_report("RP: Sibling indicated error %d", sibling_error); 2640 mark_source_rp_bad(ms); 2641 } 2642 /* 2643 * We'll let the main thread deal with closing the RP 2644 * we could do a shutdown(2) on it, but we're the only user 2645 * anyway, so there's nothing gained. 2646 */ 2647 goto out; 2648 2649 case MIG_RP_MSG_PONG: 2650 tmp32 = ldl_be_p(buf); 2651 trace_source_return_path_thread_pong(tmp32); 2652 break; 2653 2654 case MIG_RP_MSG_REQ_PAGES: 2655 start = ldq_be_p(buf); 2656 len = ldl_be_p(buf + 8); 2657 migrate_handle_rp_req_pages(ms, NULL, start, len); 2658 break; 2659 2660 case MIG_RP_MSG_REQ_PAGES_ID: 2661 expected_len = 12 + 1; /* header + termination */ 2662 2663 if (header_len >= expected_len) { 2664 start = ldq_be_p(buf); 2665 len = ldl_be_p(buf + 8); 2666 /* Now we expect an idstr */ 2667 tmp32 = buf[12]; /* Length of the following idstr */ 2668 buf[13 + tmp32] = '\0'; 2669 expected_len += tmp32; 2670 } 2671 if (header_len != expected_len) { 2672 error_report("RP: Req_Page_id with length %d expecting %zd", 2673 header_len, expected_len); 2674 mark_source_rp_bad(ms); 2675 goto out; 2676 } 2677 migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len); 2678 break; 2679 2680 case MIG_RP_MSG_RECV_BITMAP: 2681 if (header_len < 1) { 2682 error_report("%s: missing block name", __func__); 2683 mark_source_rp_bad(ms); 2684 goto out; 2685 } 2686 /* Format: len (1B) + idstr (<255B). This ends the idstr. */ 2687 buf[buf[0] + 1] = '\0'; 2688 if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1))) { 2689 mark_source_rp_bad(ms); 2690 goto out; 2691 } 2692 break; 2693 2694 case MIG_RP_MSG_RESUME_ACK: 2695 tmp32 = ldl_be_p(buf); 2696 if (migrate_handle_rp_resume_ack(ms, tmp32)) { 2697 mark_source_rp_bad(ms); 2698 goto out; 2699 } 2700 break; 2701 2702 default: 2703 break; 2704 } 2705 } 2706 2707 out: 2708 res = qemu_file_get_error(rp); 2709 if (res) { 2710 if (res == -EIO && migration_in_postcopy()) { 2711 /* 2712 * Maybe there is something we can do: it looks like a 2713 * network down issue, and we pause for a recovery. 2714 */ 2715 if (postcopy_pause_return_path_thread(ms)) { 2716 /* Reload rp, reset the rest */ 2717 if (rp != ms->rp_state.from_dst_file) { 2718 qemu_fclose(rp); 2719 rp = ms->rp_state.from_dst_file; 2720 } 2721 ms->rp_state.error = false; 2722 goto retry; 2723 } 2724 } 2725 2726 trace_source_return_path_thread_bad_end(); 2727 mark_source_rp_bad(ms); 2728 } 2729 2730 trace_source_return_path_thread_end(); 2731 ms->rp_state.from_dst_file = NULL; 2732 qemu_fclose(rp); 2733 rcu_unregister_thread(); 2734 return NULL; 2735 } 2736 2737 static int open_return_path_on_source(MigrationState *ms, 2738 bool create_thread) 2739 { 2740 2741 ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file); 2742 if (!ms->rp_state.from_dst_file) { 2743 return -1; 2744 } 2745 2746 trace_open_return_path_on_source(); 2747 2748 if (!create_thread) { 2749 /* We're done */ 2750 return 0; 2751 } 2752 2753 qemu_thread_create(&ms->rp_state.rp_thread, "return path", 2754 source_return_path_thread, ms, QEMU_THREAD_JOINABLE); 2755 2756 trace_open_return_path_on_source_continue(); 2757 2758 return 0; 2759 } 2760 2761 /* Returns 0 if the RP was ok, otherwise there was an error on the RP */ 2762 static int await_return_path_close_on_source(MigrationState *ms) 2763 { 2764 /* 2765 * If this is a normal exit then the destination will send a SHUT and the 2766 * rp_thread will exit, however if there's an error we need to cause 2767 * it to exit. 2768 */ 2769 if (qemu_file_get_error(ms->to_dst_file) && ms->rp_state.from_dst_file) { 2770 /* 2771 * shutdown(2), if we have it, will cause it to unblock if it's stuck 2772 * waiting for the destination. 2773 */ 2774 qemu_file_shutdown(ms->rp_state.from_dst_file); 2775 mark_source_rp_bad(ms); 2776 } 2777 trace_await_return_path_close_on_source_joining(); 2778 qemu_thread_join(&ms->rp_state.rp_thread); 2779 trace_await_return_path_close_on_source_close(); 2780 return ms->rp_state.error; 2781 } 2782 2783 /* 2784 * Switch from normal iteration to postcopy 2785 * Returns non-0 on error 2786 */ 2787 static int postcopy_start(MigrationState *ms) 2788 { 2789 int ret; 2790 QIOChannelBuffer *bioc; 2791 QEMUFile *fb; 2792 int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 2793 int64_t bandwidth = migrate_max_postcopy_bandwidth(); 2794 bool restart_block = false; 2795 int cur_state = MIGRATION_STATUS_ACTIVE; 2796 if (!migrate_pause_before_switchover()) { 2797 migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE, 2798 MIGRATION_STATUS_POSTCOPY_ACTIVE); 2799 } 2800 2801 trace_postcopy_start(); 2802 qemu_mutex_lock_iothread(); 2803 trace_postcopy_start_set_run(); 2804 2805 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); 2806 global_state_store(); 2807 ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); 2808 if (ret < 0) { 2809 goto fail; 2810 } 2811 2812 ret = migration_maybe_pause(ms, &cur_state, 2813 MIGRATION_STATUS_POSTCOPY_ACTIVE); 2814 if (ret < 0) { 2815 goto fail; 2816 } 2817 2818 ret = bdrv_inactivate_all(); 2819 if (ret < 0) { 2820 goto fail; 2821 } 2822 restart_block = true; 2823 2824 /* 2825 * Cause any non-postcopiable, but iterative devices to 2826 * send out their final data. 2827 */ 2828 qemu_savevm_state_complete_precopy(ms->to_dst_file, true, false); 2829 2830 /* 2831 * in Finish migrate and with the io-lock held everything should 2832 * be quiet, but we've potentially still got dirty pages and we 2833 * need to tell the destination to throw any pages it's already received 2834 * that are dirty 2835 */ 2836 if (migrate_postcopy_ram()) { 2837 if (ram_postcopy_send_discard_bitmap(ms)) { 2838 error_report("postcopy send discard bitmap failed"); 2839 goto fail; 2840 } 2841 } 2842 2843 /* 2844 * send rest of state - note things that are doing postcopy 2845 * will notice we're in POSTCOPY_ACTIVE and not actually 2846 * wrap their state up here 2847 */ 2848 /* 0 max-postcopy-bandwidth means unlimited */ 2849 if (!bandwidth) { 2850 qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX); 2851 } else { 2852 qemu_file_set_rate_limit(ms->to_dst_file, bandwidth / XFER_LIMIT_RATIO); 2853 } 2854 if (migrate_postcopy_ram()) { 2855 /* Ping just for debugging, helps line traces up */ 2856 qemu_savevm_send_ping(ms->to_dst_file, 2); 2857 } 2858 2859 /* 2860 * While loading the device state we may trigger page transfer 2861 * requests and the fd must be free to process those, and thus 2862 * the destination must read the whole device state off the fd before 2863 * it starts processing it. Unfortunately the ad-hoc migration format 2864 * doesn't allow the destination to know the size to read without fully 2865 * parsing it through each devices load-state code (especially the open 2866 * coded devices that use get/put). 2867 * So we wrap the device state up in a package with a length at the start; 2868 * to do this we use a qemu_buf to hold the whole of the device state. 2869 */ 2870 bioc = qio_channel_buffer_new(4096); 2871 qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer"); 2872 fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc)); 2873 object_unref(OBJECT(bioc)); 2874 2875 /* 2876 * Make sure the receiver can get incoming pages before we send the rest 2877 * of the state 2878 */ 2879 qemu_savevm_send_postcopy_listen(fb); 2880 2881 qemu_savevm_state_complete_precopy(fb, false, false); 2882 if (migrate_postcopy_ram()) { 2883 qemu_savevm_send_ping(fb, 3); 2884 } 2885 2886 qemu_savevm_send_postcopy_run(fb); 2887 2888 /* <><> end of stuff going into the package */ 2889 2890 /* Last point of recovery; as soon as we send the package the destination 2891 * can open devices and potentially start running. 2892 * Lets just check again we've not got any errors. 2893 */ 2894 ret = qemu_file_get_error(ms->to_dst_file); 2895 if (ret) { 2896 error_report("postcopy_start: Migration stream errored (pre package)"); 2897 goto fail_closefb; 2898 } 2899 2900 restart_block = false; 2901 2902 /* Now send that blob */ 2903 if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) { 2904 goto fail_closefb; 2905 } 2906 qemu_fclose(fb); 2907 2908 /* Send a notify to give a chance for anything that needs to happen 2909 * at the transition to postcopy and after the device state; in particular 2910 * spice needs to trigger a transition now 2911 */ 2912 ms->postcopy_after_devices = true; 2913 notifier_list_notify(&migration_state_notifiers, ms); 2914 2915 ms->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop; 2916 2917 qemu_mutex_unlock_iothread(); 2918 2919 if (migrate_postcopy_ram()) { 2920 /* 2921 * Although this ping is just for debug, it could potentially be 2922 * used for getting a better measurement of downtime at the source. 2923 */ 2924 qemu_savevm_send_ping(ms->to_dst_file, 4); 2925 } 2926 2927 if (migrate_release_ram()) { 2928 ram_postcopy_migrated_memory_release(ms); 2929 } 2930 2931 ret = qemu_file_get_error(ms->to_dst_file); 2932 if (ret) { 2933 error_report("postcopy_start: Migration stream errored"); 2934 migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, 2935 MIGRATION_STATUS_FAILED); 2936 } 2937 2938 return ret; 2939 2940 fail_closefb: 2941 qemu_fclose(fb); 2942 fail: 2943 migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, 2944 MIGRATION_STATUS_FAILED); 2945 if (restart_block) { 2946 /* A failure happened early enough that we know the destination hasn't 2947 * accessed block devices, so we're safe to recover. 2948 */ 2949 Error *local_err = NULL; 2950 2951 bdrv_invalidate_cache_all(&local_err); 2952 if (local_err) { 2953 error_report_err(local_err); 2954 } 2955 } 2956 qemu_mutex_unlock_iothread(); 2957 return -1; 2958 } 2959 2960 /** 2961 * migration_maybe_pause: Pause if required to by 2962 * migrate_pause_before_switchover called with the iothread locked 2963 * Returns: 0 on success 2964 */ 2965 static int migration_maybe_pause(MigrationState *s, 2966 int *current_active_state, 2967 int new_state) 2968 { 2969 if (!migrate_pause_before_switchover()) { 2970 return 0; 2971 } 2972 2973 /* Since leaving this state is not atomic with posting the semaphore 2974 * it's possible that someone could have issued multiple migrate_continue 2975 * and the semaphore is incorrectly positive at this point; 2976 * the docs say it's undefined to reinit a semaphore that's already 2977 * init'd, so use timedwait to eat up any existing posts. 2978 */ 2979 while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) { 2980 /* This block intentionally left blank */ 2981 } 2982 2983 /* 2984 * If the migration is cancelled when it is in the completion phase, 2985 * the migration state is set to MIGRATION_STATUS_CANCELLING. 2986 * So we don't need to wait a semaphore, otherwise we would always 2987 * wait for the 'pause_sem' semaphore. 2988 */ 2989 if (s->state != MIGRATION_STATUS_CANCELLING) { 2990 qemu_mutex_unlock_iothread(); 2991 migrate_set_state(&s->state, *current_active_state, 2992 MIGRATION_STATUS_PRE_SWITCHOVER); 2993 qemu_sem_wait(&s->pause_sem); 2994 migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, 2995 new_state); 2996 *current_active_state = new_state; 2997 qemu_mutex_lock_iothread(); 2998 } 2999 3000 return s->state == new_state ? 0 : -EINVAL; 3001 } 3002 3003 /** 3004 * migration_completion: Used by migration_thread when there's not much left. 3005 * The caller 'breaks' the loop when this returns. 3006 * 3007 * @s: Current migration state 3008 */ 3009 static void migration_completion(MigrationState *s) 3010 { 3011 int ret; 3012 int current_active_state = s->state; 3013 3014 if (s->state == MIGRATION_STATUS_ACTIVE) { 3015 qemu_mutex_lock_iothread(); 3016 s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3017 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); 3018 s->vm_was_running = runstate_is_running(); 3019 ret = global_state_store(); 3020 3021 if (!ret) { 3022 bool inactivate = !migrate_colo_enabled(); 3023 ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); 3024 if (ret >= 0) { 3025 ret = migration_maybe_pause(s, ¤t_active_state, 3026 MIGRATION_STATUS_DEVICE); 3027 } 3028 if (ret >= 0) { 3029 qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); 3030 ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, 3031 inactivate); 3032 } 3033 if (inactivate && ret >= 0) { 3034 s->block_inactive = true; 3035 } 3036 } 3037 qemu_mutex_unlock_iothread(); 3038 3039 if (ret < 0) { 3040 goto fail; 3041 } 3042 } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 3043 trace_migration_completion_postcopy_end(); 3044 3045 qemu_savevm_state_complete_postcopy(s->to_dst_file); 3046 trace_migration_completion_postcopy_end_after_complete(); 3047 } 3048 3049 /* 3050 * If rp was opened we must clean up the thread before 3051 * cleaning everything else up (since if there are no failures 3052 * it will wait for the destination to send it's status in 3053 * a SHUT command). 3054 */ 3055 if (s->rp_state.from_dst_file) { 3056 int rp_error; 3057 trace_migration_return_path_end_before(); 3058 rp_error = await_return_path_close_on_source(s); 3059 trace_migration_return_path_end_after(rp_error); 3060 if (rp_error) { 3061 goto fail_invalidate; 3062 } 3063 } 3064 3065 if (qemu_file_get_error(s->to_dst_file)) { 3066 trace_migration_completion_file_err(); 3067 goto fail_invalidate; 3068 } 3069 3070 if (!migrate_colo_enabled()) { 3071 migrate_set_state(&s->state, current_active_state, 3072 MIGRATION_STATUS_COMPLETED); 3073 } 3074 3075 return; 3076 3077 fail_invalidate: 3078 /* If not doing postcopy, vm_start() will be called: let's regain 3079 * control on images. 3080 */ 3081 if (s->state == MIGRATION_STATUS_ACTIVE || 3082 s->state == MIGRATION_STATUS_DEVICE) { 3083 Error *local_err = NULL; 3084 3085 qemu_mutex_lock_iothread(); 3086 bdrv_invalidate_cache_all(&local_err); 3087 if (local_err) { 3088 error_report_err(local_err); 3089 } else { 3090 s->block_inactive = false; 3091 } 3092 qemu_mutex_unlock_iothread(); 3093 } 3094 3095 fail: 3096 migrate_set_state(&s->state, current_active_state, 3097 MIGRATION_STATUS_FAILED); 3098 } 3099 3100 bool migrate_colo_enabled(void) 3101 { 3102 MigrationState *s = migrate_get_current(); 3103 return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO]; 3104 } 3105 3106 typedef enum MigThrError { 3107 /* No error detected */ 3108 MIG_THR_ERR_NONE = 0, 3109 /* Detected error, but resumed successfully */ 3110 MIG_THR_ERR_RECOVERED = 1, 3111 /* Detected fatal error, need to exit */ 3112 MIG_THR_ERR_FATAL = 2, 3113 } MigThrError; 3114 3115 static int postcopy_resume_handshake(MigrationState *s) 3116 { 3117 qemu_savevm_send_postcopy_resume(s->to_dst_file); 3118 3119 while (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { 3120 qemu_sem_wait(&s->rp_state.rp_sem); 3121 } 3122 3123 if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 3124 return 0; 3125 } 3126 3127 return -1; 3128 } 3129 3130 /* Return zero if success, or <0 for error */ 3131 static int postcopy_do_resume(MigrationState *s) 3132 { 3133 int ret; 3134 3135 /* 3136 * Call all the resume_prepare() hooks, so that modules can be 3137 * ready for the migration resume. 3138 */ 3139 ret = qemu_savevm_state_resume_prepare(s); 3140 if (ret) { 3141 error_report("%s: resume_prepare() failure detected: %d", 3142 __func__, ret); 3143 return ret; 3144 } 3145 3146 /* 3147 * Last handshake with destination on the resume (destination will 3148 * switch to postcopy-active afterwards) 3149 */ 3150 ret = postcopy_resume_handshake(s); 3151 if (ret) { 3152 error_report("%s: handshake failed: %d", __func__, ret); 3153 return ret; 3154 } 3155 3156 return 0; 3157 } 3158 3159 /* 3160 * We don't return until we are in a safe state to continue current 3161 * postcopy migration. Returns MIG_THR_ERR_RECOVERED if recovered, or 3162 * MIG_THR_ERR_FATAL if unrecovery failure happened. 3163 */ 3164 static MigThrError postcopy_pause(MigrationState *s) 3165 { 3166 assert(s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); 3167 3168 while (true) { 3169 QEMUFile *file; 3170 3171 /* Current channel is possibly broken. Release it. */ 3172 assert(s->to_dst_file); 3173 qemu_mutex_lock(&s->qemu_file_lock); 3174 file = s->to_dst_file; 3175 s->to_dst_file = NULL; 3176 qemu_mutex_unlock(&s->qemu_file_lock); 3177 3178 qemu_file_shutdown(file); 3179 qemu_fclose(file); 3180 3181 migrate_set_state(&s->state, s->state, 3182 MIGRATION_STATUS_POSTCOPY_PAUSED); 3183 3184 error_report("Detected IO failure for postcopy. " 3185 "Migration paused."); 3186 3187 /* 3188 * We wait until things fixed up. Then someone will setup the 3189 * status back for us. 3190 */ 3191 while (s->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { 3192 qemu_sem_wait(&s->postcopy_pause_sem); 3193 } 3194 3195 if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { 3196 /* Woken up by a recover procedure. Give it a shot */ 3197 3198 /* 3199 * Firstly, let's wake up the return path now, with a new 3200 * return path channel. 3201 */ 3202 qemu_sem_post(&s->postcopy_pause_rp_sem); 3203 3204 /* Do the resume logic */ 3205 if (postcopy_do_resume(s) == 0) { 3206 /* Let's continue! */ 3207 trace_postcopy_pause_continued(); 3208 return MIG_THR_ERR_RECOVERED; 3209 } else { 3210 /* 3211 * Something wrong happened during the recovery, let's 3212 * pause again. Pause is always better than throwing 3213 * data away. 3214 */ 3215 continue; 3216 } 3217 } else { 3218 /* This is not right... Time to quit. */ 3219 return MIG_THR_ERR_FATAL; 3220 } 3221 } 3222 } 3223 3224 static MigThrError migration_detect_error(MigrationState *s) 3225 { 3226 int ret; 3227 int state = s->state; 3228 Error *local_error = NULL; 3229 3230 if (state == MIGRATION_STATUS_CANCELLING || 3231 state == MIGRATION_STATUS_CANCELLED) { 3232 /* End the migration, but don't set the state to failed */ 3233 return MIG_THR_ERR_FATAL; 3234 } 3235 3236 /* Try to detect any file errors */ 3237 ret = qemu_file_get_error_obj(s->to_dst_file, &local_error); 3238 if (!ret) { 3239 /* Everything is fine */ 3240 assert(!local_error); 3241 return MIG_THR_ERR_NONE; 3242 } 3243 3244 if (local_error) { 3245 migrate_set_error(s, local_error); 3246 error_free(local_error); 3247 } 3248 3249 if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret == -EIO) { 3250 /* 3251 * For postcopy, we allow the network to be down for a 3252 * while. After that, it can be continued by a 3253 * recovery phase. 3254 */ 3255 return postcopy_pause(s); 3256 } else { 3257 /* 3258 * For precopy (or postcopy with error outside IO), we fail 3259 * with no time. 3260 */ 3261 migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED); 3262 trace_migration_thread_file_err(); 3263 3264 /* Time to stop the migration, now. */ 3265 return MIG_THR_ERR_FATAL; 3266 } 3267 } 3268 3269 /* How many bytes have we transferred since the beginning of the migration */ 3270 static uint64_t migration_total_bytes(MigrationState *s) 3271 { 3272 return qemu_ftell(s->to_dst_file) + ram_counters.multifd_bytes; 3273 } 3274 3275 static void migration_calculate_complete(MigrationState *s) 3276 { 3277 uint64_t bytes = migration_total_bytes(s); 3278 int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3279 int64_t transfer_time; 3280 3281 s->total_time = end_time - s->start_time; 3282 if (!s->downtime) { 3283 /* 3284 * It's still not set, so we are precopy migration. For 3285 * postcopy, downtime is calculated during postcopy_start(). 3286 */ 3287 s->downtime = end_time - s->downtime_start; 3288 } 3289 3290 transfer_time = s->total_time - s->setup_time; 3291 if (transfer_time) { 3292 s->mbps = ((double) bytes * 8.0) / transfer_time / 1000; 3293 } 3294 } 3295 3296 static void update_iteration_initial_status(MigrationState *s) 3297 { 3298 /* 3299 * Update these three fields at the same time to avoid mismatch info lead 3300 * wrong speed calculation. 3301 */ 3302 s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3303 s->iteration_initial_bytes = migration_total_bytes(s); 3304 s->iteration_initial_pages = ram_get_total_transferred_pages(); 3305 } 3306 3307 static void migration_update_counters(MigrationState *s, 3308 int64_t current_time) 3309 { 3310 uint64_t transferred, transferred_pages, time_spent; 3311 uint64_t current_bytes; /* bytes transferred since the beginning */ 3312 double bandwidth; 3313 3314 if (current_time < s->iteration_start_time + BUFFER_DELAY) { 3315 return; 3316 } 3317 3318 current_bytes = migration_total_bytes(s); 3319 transferred = current_bytes - s->iteration_initial_bytes; 3320 time_spent = current_time - s->iteration_start_time; 3321 bandwidth = (double)transferred / time_spent; 3322 s->threshold_size = bandwidth * s->parameters.downtime_limit; 3323 3324 s->mbps = (((double) transferred * 8.0) / 3325 ((double) time_spent / 1000.0)) / 1000.0 / 1000.0; 3326 3327 transferred_pages = ram_get_total_transferred_pages() - 3328 s->iteration_initial_pages; 3329 s->pages_per_second = (double) transferred_pages / 3330 (((double) time_spent / 1000.0)); 3331 3332 /* 3333 * if we haven't sent anything, we don't want to 3334 * recalculate. 10000 is a small enough number for our purposes 3335 */ 3336 if (ram_counters.dirty_pages_rate && transferred > 10000) { 3337 s->expected_downtime = ram_counters.remaining / bandwidth; 3338 } 3339 3340 qemu_file_reset_rate_limit(s->to_dst_file); 3341 3342 update_iteration_initial_status(s); 3343 3344 trace_migrate_transferred(transferred, time_spent, 3345 bandwidth, s->threshold_size); 3346 } 3347 3348 /* Migration thread iteration status */ 3349 typedef enum { 3350 MIG_ITERATE_RESUME, /* Resume current iteration */ 3351 MIG_ITERATE_SKIP, /* Skip current iteration */ 3352 MIG_ITERATE_BREAK, /* Break the loop */ 3353 } MigIterateState; 3354 3355 /* 3356 * Return true if continue to the next iteration directly, false 3357 * otherwise. 3358 */ 3359 static MigIterateState migration_iteration_run(MigrationState *s) 3360 { 3361 uint64_t pending_size, pend_pre, pend_compat, pend_post; 3362 bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE; 3363 3364 qemu_savevm_state_pending(s->to_dst_file, s->threshold_size, &pend_pre, 3365 &pend_compat, &pend_post); 3366 pending_size = pend_pre + pend_compat + pend_post; 3367 3368 trace_migrate_pending(pending_size, s->threshold_size, 3369 pend_pre, pend_compat, pend_post); 3370 3371 if (pending_size && pending_size >= s->threshold_size) { 3372 /* Still a significant amount to transfer */ 3373 if (!in_postcopy && pend_pre <= s->threshold_size && 3374 qatomic_read(&s->start_postcopy)) { 3375 if (postcopy_start(s)) { 3376 error_report("%s: postcopy failed to start", __func__); 3377 } 3378 return MIG_ITERATE_SKIP; 3379 } 3380 /* Just another iteration step */ 3381 qemu_savevm_state_iterate(s->to_dst_file, in_postcopy); 3382 } else { 3383 trace_migration_thread_low_pending(pending_size); 3384 migration_completion(s); 3385 return MIG_ITERATE_BREAK; 3386 } 3387 3388 return MIG_ITERATE_RESUME; 3389 } 3390 3391 static void migration_iteration_finish(MigrationState *s) 3392 { 3393 /* If we enabled cpu throttling for auto-converge, turn it off. */ 3394 cpu_throttle_stop(); 3395 3396 qemu_mutex_lock_iothread(); 3397 switch (s->state) { 3398 case MIGRATION_STATUS_COMPLETED: 3399 migration_calculate_complete(s); 3400 runstate_set(RUN_STATE_POSTMIGRATE); 3401 break; 3402 3403 case MIGRATION_STATUS_ACTIVE: 3404 /* 3405 * We should really assert here, but since it's during 3406 * migration, let's try to reduce the usage of assertions. 3407 */ 3408 if (!migrate_colo_enabled()) { 3409 error_report("%s: critical error: calling COLO code without " 3410 "COLO enabled", __func__); 3411 } 3412 migrate_start_colo_process(s); 3413 /* 3414 * Fixme: we will run VM in COLO no matter its old running state. 3415 * After exited COLO, we will keep running. 3416 */ 3417 s->vm_was_running = true; 3418 /* Fallthrough */ 3419 case MIGRATION_STATUS_FAILED: 3420 case MIGRATION_STATUS_CANCELLED: 3421 case MIGRATION_STATUS_CANCELLING: 3422 if (s->vm_was_running) { 3423 vm_start(); 3424 } else { 3425 if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { 3426 runstate_set(RUN_STATE_POSTMIGRATE); 3427 } 3428 } 3429 break; 3430 3431 default: 3432 /* Should not reach here, but if so, forgive the VM. */ 3433 error_report("%s: Unknown ending state %d", __func__, s->state); 3434 break; 3435 } 3436 migrate_fd_cleanup_schedule(s); 3437 qemu_mutex_unlock_iothread(); 3438 } 3439 3440 void migration_make_urgent_request(void) 3441 { 3442 qemu_sem_post(&migrate_get_current()->rate_limit_sem); 3443 } 3444 3445 void migration_consume_urgent_request(void) 3446 { 3447 qemu_sem_wait(&migrate_get_current()->rate_limit_sem); 3448 } 3449 3450 /* Returns true if the rate limiting was broken by an urgent request */ 3451 bool migration_rate_limit(void) 3452 { 3453 int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3454 MigrationState *s = migrate_get_current(); 3455 3456 bool urgent = false; 3457 migration_update_counters(s, now); 3458 if (qemu_file_rate_limit(s->to_dst_file)) { 3459 3460 if (qemu_file_get_error(s->to_dst_file)) { 3461 return false; 3462 } 3463 /* 3464 * Wait for a delay to do rate limiting OR 3465 * something urgent to post the semaphore. 3466 */ 3467 int ms = s->iteration_start_time + BUFFER_DELAY - now; 3468 trace_migration_rate_limit_pre(ms); 3469 if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { 3470 /* 3471 * We were woken by one or more urgent things but 3472 * the timedwait will have consumed one of them. 3473 * The service routine for the urgent wake will dec 3474 * the semaphore itself for each item it consumes, 3475 * so add this one we just eat back. 3476 */ 3477 qemu_sem_post(&s->rate_limit_sem); 3478 urgent = true; 3479 } 3480 trace_migration_rate_limit_post(urgent); 3481 } 3482 return urgent; 3483 } 3484 3485 /* 3486 * Master migration thread on the source VM. 3487 * It drives the migration and pumps the data down the outgoing channel. 3488 */ 3489 static void *migration_thread(void *opaque) 3490 { 3491 MigrationState *s = opaque; 3492 int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST); 3493 MigThrError thr_error; 3494 bool urgent = false; 3495 3496 rcu_register_thread(); 3497 3498 object_ref(OBJECT(s)); 3499 update_iteration_initial_status(s); 3500 3501 qemu_savevm_state_header(s->to_dst_file); 3502 3503 /* 3504 * If we opened the return path, we need to make sure dst has it 3505 * opened as well. 3506 */ 3507 if (s->rp_state.from_dst_file) { 3508 /* Now tell the dest that it should open its end so it can reply */ 3509 qemu_savevm_send_open_return_path(s->to_dst_file); 3510 3511 /* And do a ping that will make stuff easier to debug */ 3512 qemu_savevm_send_ping(s->to_dst_file, 1); 3513 } 3514 3515 if (migrate_postcopy()) { 3516 /* 3517 * Tell the destination that we *might* want to do postcopy later; 3518 * if the other end can't do postcopy it should fail now, nice and 3519 * early. 3520 */ 3521 qemu_savevm_send_postcopy_advise(s->to_dst_file); 3522 } 3523 3524 if (migrate_colo_enabled()) { 3525 /* Notify migration destination that we enable COLO */ 3526 qemu_savevm_send_colo_enable(s->to_dst_file); 3527 } 3528 3529 qemu_savevm_state_setup(s->to_dst_file); 3530 3531 if (qemu_savevm_state_guest_unplug_pending()) { 3532 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 3533 MIGRATION_STATUS_WAIT_UNPLUG); 3534 3535 while (s->state == MIGRATION_STATUS_WAIT_UNPLUG && 3536 qemu_savevm_state_guest_unplug_pending()) { 3537 qemu_sem_timedwait(&s->wait_unplug_sem, 250); 3538 } 3539 3540 migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, 3541 MIGRATION_STATUS_ACTIVE); 3542 } 3543 3544 s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; 3545 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 3546 MIGRATION_STATUS_ACTIVE); 3547 3548 trace_migration_thread_setup_complete(); 3549 3550 while (migration_is_active(s)) { 3551 if (urgent || !qemu_file_rate_limit(s->to_dst_file)) { 3552 MigIterateState iter_state = migration_iteration_run(s); 3553 if (iter_state == MIG_ITERATE_SKIP) { 3554 continue; 3555 } else if (iter_state == MIG_ITERATE_BREAK) { 3556 break; 3557 } 3558 } 3559 3560 /* 3561 * Try to detect any kind of failures, and see whether we 3562 * should stop the migration now. 3563 */ 3564 thr_error = migration_detect_error(s); 3565 if (thr_error == MIG_THR_ERR_FATAL) { 3566 /* Stop migration */ 3567 break; 3568 } else if (thr_error == MIG_THR_ERR_RECOVERED) { 3569 /* 3570 * Just recovered from a e.g. network failure, reset all 3571 * the local variables. This is important to avoid 3572 * breaking transferred_bytes and bandwidth calculation 3573 */ 3574 update_iteration_initial_status(s); 3575 } 3576 3577 urgent = migration_rate_limit(); 3578 } 3579 3580 trace_migration_thread_after_loop(); 3581 migration_iteration_finish(s); 3582 object_unref(OBJECT(s)); 3583 rcu_unregister_thread(); 3584 return NULL; 3585 } 3586 3587 void migrate_fd_connect(MigrationState *s, Error *error_in) 3588 { 3589 Error *local_err = NULL; 3590 int64_t rate_limit; 3591 bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED; 3592 3593 s->expected_downtime = s->parameters.downtime_limit; 3594 if (resume) { 3595 assert(s->cleanup_bh); 3596 } else { 3597 assert(!s->cleanup_bh); 3598 s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup_bh, s); 3599 } 3600 if (error_in) { 3601 migrate_fd_error(s, error_in); 3602 migrate_fd_cleanup(s); 3603 return; 3604 } 3605 3606 if (resume) { 3607 /* This is a resumed migration */ 3608 rate_limit = s->parameters.max_postcopy_bandwidth / 3609 XFER_LIMIT_RATIO; 3610 } else { 3611 /* This is a fresh new migration */ 3612 rate_limit = s->parameters.max_bandwidth / XFER_LIMIT_RATIO; 3613 3614 /* Notify before starting migration thread */ 3615 notifier_list_notify(&migration_state_notifiers, s); 3616 } 3617 3618 qemu_file_set_rate_limit(s->to_dst_file, rate_limit); 3619 qemu_file_set_blocking(s->to_dst_file, true); 3620 3621 /* 3622 * Open the return path. For postcopy, it is used exclusively. For 3623 * precopy, only if user specified "return-path" capability would 3624 * QEMU uses the return path. 3625 */ 3626 if (migrate_postcopy_ram() || migrate_use_return_path()) { 3627 if (open_return_path_on_source(s, !resume)) { 3628 error_report("Unable to open return-path for postcopy"); 3629 migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); 3630 migrate_fd_cleanup(s); 3631 return; 3632 } 3633 } 3634 3635 if (resume) { 3636 /* Wakeup the main migration thread to do the recovery */ 3637 migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED, 3638 MIGRATION_STATUS_POSTCOPY_RECOVER); 3639 qemu_sem_post(&s->postcopy_pause_sem); 3640 return; 3641 } 3642 3643 if (multifd_save_setup(&local_err) != 0) { 3644 error_report_err(local_err); 3645 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 3646 MIGRATION_STATUS_FAILED); 3647 migrate_fd_cleanup(s); 3648 return; 3649 } 3650 qemu_thread_create(&s->thread, "live_migration", migration_thread, s, 3651 QEMU_THREAD_JOINABLE); 3652 s->migration_thread_running = true; 3653 } 3654 3655 void migration_global_dump(Monitor *mon) 3656 { 3657 MigrationState *ms = migrate_get_current(); 3658 3659 monitor_printf(mon, "globals:\n"); 3660 monitor_printf(mon, "store-global-state: %s\n", 3661 ms->store_global_state ? "on" : "off"); 3662 monitor_printf(mon, "only-migratable: %s\n", 3663 only_migratable ? "on" : "off"); 3664 monitor_printf(mon, "send-configuration: %s\n", 3665 ms->send_configuration ? "on" : "off"); 3666 monitor_printf(mon, "send-section-footer: %s\n", 3667 ms->send_section_footer ? "on" : "off"); 3668 monitor_printf(mon, "decompress-error-check: %s\n", 3669 ms->decompress_error_check ? "on" : "off"); 3670 monitor_printf(mon, "clear-bitmap-shift: %u\n", 3671 ms->clear_bitmap_shift); 3672 } 3673 3674 #define DEFINE_PROP_MIG_CAP(name, x) \ 3675 DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false) 3676 3677 static Property migration_properties[] = { 3678 DEFINE_PROP_BOOL("store-global-state", MigrationState, 3679 store_global_state, true), 3680 DEFINE_PROP_BOOL("send-configuration", MigrationState, 3681 send_configuration, true), 3682 DEFINE_PROP_BOOL("send-section-footer", MigrationState, 3683 send_section_footer, true), 3684 DEFINE_PROP_BOOL("decompress-error-check", MigrationState, 3685 decompress_error_check, true), 3686 DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, 3687 clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), 3688 3689 /* Migration parameters */ 3690 DEFINE_PROP_UINT8("x-compress-level", MigrationState, 3691 parameters.compress_level, 3692 DEFAULT_MIGRATE_COMPRESS_LEVEL), 3693 DEFINE_PROP_UINT8("x-compress-threads", MigrationState, 3694 parameters.compress_threads, 3695 DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT), 3696 DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState, 3697 parameters.compress_wait_thread, true), 3698 DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, 3699 parameters.decompress_threads, 3700 DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), 3701 DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, 3702 parameters.throttle_trigger_threshold, 3703 DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD), 3704 DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState, 3705 parameters.cpu_throttle_initial, 3706 DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), 3707 DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState, 3708 parameters.cpu_throttle_increment, 3709 DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), 3710 DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState, 3711 parameters.cpu_throttle_tailslow, false), 3712 DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState, 3713 parameters.max_bandwidth, MAX_THROTTLE), 3714 DEFINE_PROP_UINT64("x-downtime-limit", MigrationState, 3715 parameters.downtime_limit, 3716 DEFAULT_MIGRATE_SET_DOWNTIME), 3717 DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState, 3718 parameters.x_checkpoint_delay, 3719 DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), 3720 DEFINE_PROP_UINT8("multifd-channels", MigrationState, 3721 parameters.multifd_channels, 3722 DEFAULT_MIGRATE_MULTIFD_CHANNELS), 3723 DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState, 3724 parameters.multifd_compression, 3725 DEFAULT_MIGRATE_MULTIFD_COMPRESSION), 3726 DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState, 3727 parameters.multifd_zlib_level, 3728 DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL), 3729 DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, 3730 parameters.multifd_zstd_level, 3731 DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), 3732 DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, 3733 parameters.xbzrle_cache_size, 3734 DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), 3735 DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState, 3736 parameters.max_postcopy_bandwidth, 3737 DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH), 3738 DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState, 3739 parameters.max_cpu_throttle, 3740 DEFAULT_MIGRATE_MAX_CPU_THROTTLE), 3741 DEFINE_PROP_SIZE("announce-initial", MigrationState, 3742 parameters.announce_initial, 3743 DEFAULT_MIGRATE_ANNOUNCE_INITIAL), 3744 DEFINE_PROP_SIZE("announce-max", MigrationState, 3745 parameters.announce_max, 3746 DEFAULT_MIGRATE_ANNOUNCE_MAX), 3747 DEFINE_PROP_SIZE("announce-rounds", MigrationState, 3748 parameters.announce_rounds, 3749 DEFAULT_MIGRATE_ANNOUNCE_ROUNDS), 3750 DEFINE_PROP_SIZE("announce-step", MigrationState, 3751 parameters.announce_step, 3752 DEFAULT_MIGRATE_ANNOUNCE_STEP), 3753 3754 /* Migration capabilities */ 3755 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), 3756 DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL), 3757 DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE), 3758 DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS), 3759 DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS), 3760 DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS), 3761 DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM), 3762 DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO), 3763 DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM), 3764 DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), 3765 DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), 3766 DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), 3767 3768 DEFINE_PROP_END_OF_LIST(), 3769 }; 3770 3771 static void migration_class_init(ObjectClass *klass, void *data) 3772 { 3773 DeviceClass *dc = DEVICE_CLASS(klass); 3774 3775 dc->user_creatable = false; 3776 device_class_set_props(dc, migration_properties); 3777 } 3778 3779 static void migration_instance_finalize(Object *obj) 3780 { 3781 MigrationState *ms = MIGRATION_OBJ(obj); 3782 MigrationParameters *params = &ms->parameters; 3783 3784 qemu_mutex_destroy(&ms->error_mutex); 3785 qemu_mutex_destroy(&ms->qemu_file_lock); 3786 g_free(params->tls_hostname); 3787 g_free(params->tls_creds); 3788 qemu_sem_destroy(&ms->wait_unplug_sem); 3789 qemu_sem_destroy(&ms->rate_limit_sem); 3790 qemu_sem_destroy(&ms->pause_sem); 3791 qemu_sem_destroy(&ms->postcopy_pause_sem); 3792 qemu_sem_destroy(&ms->postcopy_pause_rp_sem); 3793 qemu_sem_destroy(&ms->rp_state.rp_sem); 3794 error_free(ms->error); 3795 } 3796 3797 static void migration_instance_init(Object *obj) 3798 { 3799 MigrationState *ms = MIGRATION_OBJ(obj); 3800 MigrationParameters *params = &ms->parameters; 3801 3802 ms->state = MIGRATION_STATUS_NONE; 3803 ms->mbps = -1; 3804 ms->pages_per_second = -1; 3805 qemu_sem_init(&ms->pause_sem, 0); 3806 qemu_mutex_init(&ms->error_mutex); 3807 3808 params->tls_hostname = g_strdup(""); 3809 params->tls_creds = g_strdup(""); 3810 3811 /* Set has_* up only for parameter checks */ 3812 params->has_compress_level = true; 3813 params->has_compress_threads = true; 3814 params->has_decompress_threads = true; 3815 params->has_throttle_trigger_threshold = true; 3816 params->has_cpu_throttle_initial = true; 3817 params->has_cpu_throttle_increment = true; 3818 params->has_cpu_throttle_tailslow = true; 3819 params->has_max_bandwidth = true; 3820 params->has_downtime_limit = true; 3821 params->has_x_checkpoint_delay = true; 3822 params->has_block_incremental = true; 3823 params->has_multifd_channels = true; 3824 params->has_multifd_compression = true; 3825 params->has_multifd_zlib_level = true; 3826 params->has_multifd_zstd_level = true; 3827 params->has_xbzrle_cache_size = true; 3828 params->has_max_postcopy_bandwidth = true; 3829 params->has_max_cpu_throttle = true; 3830 params->has_announce_initial = true; 3831 params->has_announce_max = true; 3832 params->has_announce_rounds = true; 3833 params->has_announce_step = true; 3834 3835 qemu_sem_init(&ms->postcopy_pause_sem, 0); 3836 qemu_sem_init(&ms->postcopy_pause_rp_sem, 0); 3837 qemu_sem_init(&ms->rp_state.rp_sem, 0); 3838 qemu_sem_init(&ms->rate_limit_sem, 0); 3839 qemu_sem_init(&ms->wait_unplug_sem, 0); 3840 qemu_mutex_init(&ms->qemu_file_lock); 3841 } 3842 3843 /* 3844 * Return true if check pass, false otherwise. Error will be put 3845 * inside errp if provided. 3846 */ 3847 static bool migration_object_check(MigrationState *ms, Error **errp) 3848 { 3849 MigrationCapabilityStatusList *head = NULL; 3850 /* Assuming all off */ 3851 bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret; 3852 int i; 3853 3854 if (!migrate_params_check(&ms->parameters, errp)) { 3855 return false; 3856 } 3857 3858 for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { 3859 if (ms->enabled_capabilities[i]) { 3860 head = migrate_cap_add(head, i, true); 3861 } 3862 } 3863 3864 ret = migrate_caps_check(cap_list, head, errp); 3865 3866 /* It works with head == NULL */ 3867 qapi_free_MigrationCapabilityStatusList(head); 3868 3869 return ret; 3870 } 3871 3872 static const TypeInfo migration_type = { 3873 .name = TYPE_MIGRATION, 3874 /* 3875 * NOTE: TYPE_MIGRATION is not really a device, as the object is 3876 * not created using qdev_new(), it is not attached to the qdev 3877 * device tree, and it is never realized. 3878 * 3879 * TODO: Make this TYPE_OBJECT once QOM provides something like 3880 * TYPE_DEVICE's "-global" properties. 3881 */ 3882 .parent = TYPE_DEVICE, 3883 .class_init = migration_class_init, 3884 .class_size = sizeof(MigrationClass), 3885 .instance_size = sizeof(MigrationState), 3886 .instance_init = migration_instance_init, 3887 .instance_finalize = migration_instance_finalize, 3888 }; 3889 3890 static void register_migration_types(void) 3891 { 3892 type_register_static(&migration_type); 3893 } 3894 3895 type_init(register_migration_types); 3896