1 /* 2 * QEMU live migration 3 * 4 * Copyright IBM, Corp. 2008 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 * Contributions after 2012-01-13 are licensed under the terms of the 13 * GNU GPL, version 2 or (at your option) any later version. 14 */ 15 16 #include "qemu/osdep.h" 17 #include "qemu/cutils.h" 18 #include "qemu/error-report.h" 19 #include "qemu/main-loop.h" 20 #include "migration/blocker.h" 21 #include "exec.h" 22 #include "fd.h" 23 #include "socket.h" 24 #include "sysemu/runstate.h" 25 #include "sysemu/sysemu.h" 26 #include "sysemu/cpu-throttle.h" 27 #include "rdma.h" 28 #include "ram.h" 29 #include "migration/global_state.h" 30 #include "migration/misc.h" 31 #include "migration.h" 32 #include "savevm.h" 33 #include "qemu-file-channel.h" 34 #include "qemu-file.h" 35 #include "migration/vmstate.h" 36 #include "block/block.h" 37 #include "qapi/error.h" 38 #include "qapi/clone-visitor.h" 39 #include "qapi/qapi-visit-migration.h" 40 #include "qapi/qapi-visit-sockets.h" 41 #include "qapi/qapi-commands-migration.h" 42 #include "qapi/qapi-events-migration.h" 43 #include "qapi/qmp/qerror.h" 44 #include "qapi/qmp/qnull.h" 45 #include "qemu/rcu.h" 46 #include "block.h" 47 #include "postcopy-ram.h" 48 #include "qemu/thread.h" 49 #include "trace.h" 50 #include "exec/target_page.h" 51 #include "io/channel-buffer.h" 52 #include "migration/colo.h" 53 #include "hw/boards.h" 54 #include "hw/qdev-properties.h" 55 #include "monitor/monitor.h" 56 #include "net/announce.h" 57 #include "qemu/queue.h" 58 #include "multifd.h" 59 60 #ifdef CONFIG_VFIO 61 #include "hw/vfio/vfio-common.h" 62 #endif 63 64 #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ 65 66 /* Amount of time to allocate to each "chunk" of bandwidth-throttled 67 * data. */ 68 #define BUFFER_DELAY 100 69 #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) 70 71 /* Time in milliseconds we are allowed to stop the source, 72 * for sending the last part */ 73 #define DEFAULT_MIGRATE_SET_DOWNTIME 300 74 75 /* Maximum migrate downtime set to 2000 seconds */ 76 #define MAX_MIGRATE_DOWNTIME_SECONDS 2000 77 #define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) 78 79 /* Default compression thread count */ 80 #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 81 /* Default decompression thread count, usually decompression is at 82 * least 4 times as fast as compression.*/ 83 #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 84 /*0: means nocompress, 1: best speed, ... 9: best compress ratio */ 85 #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 86 /* Define default autoconverge cpu throttle migration parameters */ 87 #define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50 88 #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 89 #define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10 90 #define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99 91 92 /* Migration XBZRLE default cache size */ 93 #define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) 94 95 /* The delay time (in ms) between two COLO checkpoints */ 96 #define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) 97 #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 98 #define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE 99 /* 0: means nocompress, 1: best speed, ... 9: best compress ratio */ 100 #define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1 101 /* 0: means nocompress, 1: best speed, ... 20: best compress ratio */ 102 #define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1 103 104 /* Background transfer rate for postcopy, 0 means unlimited, note 105 * that page requests can still exceed this limit. 106 */ 107 #define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0 108 109 /* 110 * Parameters for self_announce_delay giving a stream of RARP/ARP 111 * packets after migration. 112 */ 113 #define DEFAULT_MIGRATE_ANNOUNCE_INITIAL 50 114 #define DEFAULT_MIGRATE_ANNOUNCE_MAX 550 115 #define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS 5 116 #define DEFAULT_MIGRATE_ANNOUNCE_STEP 100 117 118 static NotifierList migration_state_notifiers = 119 NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); 120 121 static bool deferred_incoming; 122 123 /* Messages sent on the return path from destination to source */ 124 enum mig_rp_message_type { 125 MIG_RP_MSG_INVALID = 0, /* Must be 0 */ 126 MIG_RP_MSG_SHUT, /* sibling will not send any more RP messages */ 127 MIG_RP_MSG_PONG, /* Response to a PING; data (seq: be32 ) */ 128 129 MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */ 130 MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */ 131 MIG_RP_MSG_RECV_BITMAP, /* send recved_bitmap back to source */ 132 MIG_RP_MSG_RESUME_ACK, /* tell source that we are ready to resume */ 133 134 MIG_RP_MSG_MAX 135 }; 136 137 /* When we add fault tolerance, we could have several 138 migrations at once. For now we don't need to add 139 dynamic creation of migration */ 140 141 static MigrationState *current_migration; 142 static MigrationIncomingState *current_incoming; 143 144 static bool migration_object_check(MigrationState *ms, Error **errp); 145 static int migration_maybe_pause(MigrationState *s, 146 int *current_active_state, 147 int new_state); 148 static void migrate_fd_cancel(MigrationState *s); 149 150 static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp) 151 { 152 uintptr_t a = (uintptr_t) ap, b = (uintptr_t) bp; 153 154 return (a > b) - (a < b); 155 } 156 157 void migration_object_init(void) 158 { 159 Error *err = NULL; 160 161 /* This can only be called once. */ 162 assert(!current_migration); 163 current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION)); 164 165 /* 166 * Init the migrate incoming object as well no matter whether 167 * we'll use it or not. 168 */ 169 assert(!current_incoming); 170 current_incoming = g_new0(MigrationIncomingState, 1); 171 current_incoming->state = MIGRATION_STATUS_NONE; 172 current_incoming->postcopy_remote_fds = 173 g_array_new(FALSE, TRUE, sizeof(struct PostCopyFD)); 174 qemu_mutex_init(¤t_incoming->rp_mutex); 175 qemu_event_init(¤t_incoming->main_thread_load_event, false); 176 qemu_sem_init(¤t_incoming->postcopy_pause_sem_dst, 0); 177 qemu_sem_init(¤t_incoming->postcopy_pause_sem_fault, 0); 178 qemu_mutex_init(¤t_incoming->page_request_mutex); 179 current_incoming->page_requested = g_tree_new(page_request_addr_cmp); 180 181 if (!migration_object_check(current_migration, &err)) { 182 error_report_err(err); 183 exit(1); 184 } 185 186 blk_mig_init(); 187 ram_mig_init(); 188 dirty_bitmap_mig_init(); 189 } 190 191 void migration_shutdown(void) 192 { 193 /* 194 * Cancel the current migration - that will (eventually) 195 * stop the migration using this structure 196 */ 197 migrate_fd_cancel(current_migration); 198 object_unref(OBJECT(current_migration)); 199 200 /* 201 * Cancel outgoing migration of dirty bitmaps. It should 202 * at least unref used block nodes. 203 */ 204 dirty_bitmap_mig_cancel_outgoing(); 205 206 /* 207 * Cancel incoming migration of dirty bitmaps. Dirty bitmaps 208 * are non-critical data, and their loss never considered as 209 * something serious. 210 */ 211 dirty_bitmap_mig_cancel_incoming(); 212 } 213 214 /* For outgoing */ 215 MigrationState *migrate_get_current(void) 216 { 217 /* This can only be called after the object created. */ 218 assert(current_migration); 219 return current_migration; 220 } 221 222 MigrationIncomingState *migration_incoming_get_current(void) 223 { 224 assert(current_incoming); 225 return current_incoming; 226 } 227 228 void migration_incoming_state_destroy(void) 229 { 230 struct MigrationIncomingState *mis = migration_incoming_get_current(); 231 232 if (mis->to_src_file) { 233 /* Tell source that we are done */ 234 migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0); 235 qemu_fclose(mis->to_src_file); 236 mis->to_src_file = NULL; 237 } 238 239 if (mis->from_src_file) { 240 qemu_fclose(mis->from_src_file); 241 mis->from_src_file = NULL; 242 } 243 if (mis->postcopy_remote_fds) { 244 g_array_free(mis->postcopy_remote_fds, TRUE); 245 mis->postcopy_remote_fds = NULL; 246 } 247 248 qemu_event_reset(&mis->main_thread_load_event); 249 250 if (mis->page_requested) { 251 g_tree_destroy(mis->page_requested); 252 mis->page_requested = NULL; 253 } 254 255 if (mis->socket_address_list) { 256 qapi_free_SocketAddressList(mis->socket_address_list); 257 mis->socket_address_list = NULL; 258 } 259 } 260 261 static void migrate_generate_event(int new_state) 262 { 263 if (migrate_use_events()) { 264 qapi_event_send_migration(new_state); 265 } 266 } 267 268 static bool migrate_late_block_activate(void) 269 { 270 MigrationState *s; 271 272 s = migrate_get_current(); 273 274 return s->enabled_capabilities[ 275 MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; 276 } 277 278 /* 279 * Called on -incoming with a defer: uri. 280 * The migration can be started later after any parameters have been 281 * changed. 282 */ 283 static void deferred_incoming_migration(Error **errp) 284 { 285 if (deferred_incoming) { 286 error_setg(errp, "Incoming migration already deferred"); 287 } 288 deferred_incoming = true; 289 } 290 291 /* 292 * Send a message on the return channel back to the source 293 * of the migration. 294 */ 295 static int migrate_send_rp_message(MigrationIncomingState *mis, 296 enum mig_rp_message_type message_type, 297 uint16_t len, void *data) 298 { 299 int ret = 0; 300 301 trace_migrate_send_rp_message((int)message_type, len); 302 qemu_mutex_lock(&mis->rp_mutex); 303 304 /* 305 * It's possible that the file handle got lost due to network 306 * failures. 307 */ 308 if (!mis->to_src_file) { 309 ret = -EIO; 310 goto error; 311 } 312 313 qemu_put_be16(mis->to_src_file, (unsigned int)message_type); 314 qemu_put_be16(mis->to_src_file, len); 315 qemu_put_buffer(mis->to_src_file, data, len); 316 qemu_fflush(mis->to_src_file); 317 318 /* It's possible that qemu file got error during sending */ 319 ret = qemu_file_get_error(mis->to_src_file); 320 321 error: 322 qemu_mutex_unlock(&mis->rp_mutex); 323 return ret; 324 } 325 326 /* Request one page from the source VM at the given start address. 327 * rb: the RAMBlock to request the page in 328 * Start: Address offset within the RB 329 * Len: Length in bytes required - must be a multiple of pagesize 330 */ 331 int migrate_send_rp_message_req_pages(MigrationIncomingState *mis, 332 RAMBlock *rb, ram_addr_t start) 333 { 334 uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */ 335 size_t msglen = 12; /* start + len */ 336 size_t len = qemu_ram_pagesize(rb); 337 enum mig_rp_message_type msg_type; 338 const char *rbname; 339 int rbname_len; 340 341 *(uint64_t *)bufc = cpu_to_be64((uint64_t)start); 342 *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len); 343 344 /* 345 * We maintain the last ramblock that we requested for page. Note that we 346 * don't need locking because this function will only be called within the 347 * postcopy ram fault thread. 348 */ 349 if (rb != mis->last_rb) { 350 mis->last_rb = rb; 351 352 rbname = qemu_ram_get_idstr(rb); 353 rbname_len = strlen(rbname); 354 355 assert(rbname_len < 256); 356 357 bufc[msglen++] = rbname_len; 358 memcpy(bufc + msglen, rbname, rbname_len); 359 msglen += rbname_len; 360 msg_type = MIG_RP_MSG_REQ_PAGES_ID; 361 } else { 362 msg_type = MIG_RP_MSG_REQ_PAGES; 363 } 364 365 return migrate_send_rp_message(mis, msg_type, msglen, bufc); 366 } 367 368 int migrate_send_rp_req_pages(MigrationIncomingState *mis, 369 RAMBlock *rb, ram_addr_t start, uint64_t haddr) 370 { 371 void *aligned = (void *)(uintptr_t)(haddr & (-qemu_ram_pagesize(rb))); 372 bool received = false; 373 374 WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) { 375 received = ramblock_recv_bitmap_test_byte_offset(rb, start); 376 if (!received && !g_tree_lookup(mis->page_requested, aligned)) { 377 /* 378 * The page has not been received, and it's not yet in the page 379 * request list. Queue it. Set the value of element to 1, so that 380 * things like g_tree_lookup() will return TRUE (1) when found. 381 */ 382 g_tree_insert(mis->page_requested, aligned, (gpointer)1); 383 mis->page_requested_count++; 384 trace_postcopy_page_req_add(aligned, mis->page_requested_count); 385 } 386 } 387 388 /* 389 * If the page is there, skip sending the message. We don't even need the 390 * lock because as long as the page arrived, it'll be there forever. 391 */ 392 if (received) { 393 return 0; 394 } 395 396 return migrate_send_rp_message_req_pages(mis, rb, start); 397 } 398 399 static bool migration_colo_enabled; 400 bool migration_incoming_colo_enabled(void) 401 { 402 return migration_colo_enabled; 403 } 404 405 void migration_incoming_disable_colo(void) 406 { 407 ram_block_discard_disable(false); 408 migration_colo_enabled = false; 409 } 410 411 int migration_incoming_enable_colo(void) 412 { 413 if (ram_block_discard_disable(true)) { 414 error_report("COLO: cannot disable RAM discard"); 415 return -EBUSY; 416 } 417 migration_colo_enabled = true; 418 return 0; 419 } 420 421 void migrate_add_address(SocketAddress *address) 422 { 423 MigrationIncomingState *mis = migration_incoming_get_current(); 424 SocketAddressList *addrs; 425 426 addrs = g_new0(SocketAddressList, 1); 427 addrs->next = mis->socket_address_list; 428 mis->socket_address_list = addrs; 429 addrs->value = QAPI_CLONE(SocketAddress, address); 430 } 431 432 void qemu_start_incoming_migration(const char *uri, Error **errp) 433 { 434 const char *p = NULL; 435 436 qapi_event_send_migration(MIGRATION_STATUS_SETUP); 437 if (!strcmp(uri, "defer")) { 438 deferred_incoming_migration(errp); 439 } else if (strstart(uri, "tcp:", &p) || 440 strstart(uri, "unix:", NULL) || 441 strstart(uri, "vsock:", NULL)) { 442 socket_start_incoming_migration(p ? p : uri, errp); 443 #ifdef CONFIG_RDMA 444 } else if (strstart(uri, "rdma:", &p)) { 445 rdma_start_incoming_migration(p, errp); 446 #endif 447 } else if (strstart(uri, "exec:", &p)) { 448 exec_start_incoming_migration(p, errp); 449 } else if (strstart(uri, "fd:", &p)) { 450 fd_start_incoming_migration(p, errp); 451 } else { 452 error_setg(errp, "unknown migration protocol: %s", uri); 453 } 454 } 455 456 static void process_incoming_migration_bh(void *opaque) 457 { 458 Error *local_err = NULL; 459 MigrationIncomingState *mis = opaque; 460 461 /* If capability late_block_activate is set: 462 * Only fire up the block code now if we're going to restart the 463 * VM, else 'cont' will do it. 464 * This causes file locking to happen; so we don't want it to happen 465 * unless we really are starting the VM. 466 */ 467 if (!migrate_late_block_activate() || 468 (autostart && (!global_state_received() || 469 global_state_get_runstate() == RUN_STATE_RUNNING))) { 470 /* Make sure all file formats flush their mutable metadata. 471 * If we get an error here, just don't restart the VM yet. */ 472 bdrv_invalidate_cache_all(&local_err); 473 if (local_err) { 474 error_report_err(local_err); 475 local_err = NULL; 476 autostart = false; 477 } 478 } 479 480 /* 481 * This must happen after all error conditions are dealt with and 482 * we're sure the VM is going to be running on this host. 483 */ 484 qemu_announce_self(&mis->announce_timer, migrate_announce_params()); 485 486 if (multifd_load_cleanup(&local_err) != 0) { 487 error_report_err(local_err); 488 autostart = false; 489 } 490 /* If global state section was not received or we are in running 491 state, we need to obey autostart. Any other state is set with 492 runstate_set. */ 493 494 dirty_bitmap_mig_before_vm_start(); 495 496 if (!global_state_received() || 497 global_state_get_runstate() == RUN_STATE_RUNNING) { 498 if (autostart) { 499 vm_start(); 500 } else { 501 runstate_set(RUN_STATE_PAUSED); 502 } 503 } else if (migration_incoming_colo_enabled()) { 504 migration_incoming_disable_colo(); 505 vm_start(); 506 } else { 507 runstate_set(global_state_get_runstate()); 508 } 509 /* 510 * This must happen after any state changes since as soon as an external 511 * observer sees this event they might start to prod at the VM assuming 512 * it's ready to use. 513 */ 514 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, 515 MIGRATION_STATUS_COMPLETED); 516 qemu_bh_delete(mis->bh); 517 migration_incoming_state_destroy(); 518 } 519 520 static void process_incoming_migration_co(void *opaque) 521 { 522 MigrationIncomingState *mis = migration_incoming_get_current(); 523 PostcopyState ps; 524 int ret; 525 Error *local_err = NULL; 526 527 assert(mis->from_src_file); 528 mis->migration_incoming_co = qemu_coroutine_self(); 529 mis->largest_page_size = qemu_ram_pagesize_largest(); 530 postcopy_state_set(POSTCOPY_INCOMING_NONE); 531 migrate_set_state(&mis->state, MIGRATION_STATUS_NONE, 532 MIGRATION_STATUS_ACTIVE); 533 ret = qemu_loadvm_state(mis->from_src_file); 534 535 ps = postcopy_state_get(); 536 trace_process_incoming_migration_co_end(ret, ps); 537 if (ps != POSTCOPY_INCOMING_NONE) { 538 if (ps == POSTCOPY_INCOMING_ADVISE) { 539 /* 540 * Where a migration had postcopy enabled (and thus went to advise) 541 * but managed to complete within the precopy period, we can use 542 * the normal exit. 543 */ 544 postcopy_ram_incoming_cleanup(mis); 545 } else if (ret >= 0) { 546 /* 547 * Postcopy was started, cleanup should happen at the end of the 548 * postcopy thread. 549 */ 550 trace_process_incoming_migration_co_postcopy_end_main(); 551 return; 552 } 553 /* Else if something went wrong then just fall out of the normal exit */ 554 } 555 556 /* we get COLO info, and know if we are in COLO mode */ 557 if (!ret && migration_incoming_colo_enabled()) { 558 /* Make sure all file formats flush their mutable metadata */ 559 bdrv_invalidate_cache_all(&local_err); 560 if (local_err) { 561 error_report_err(local_err); 562 goto fail; 563 } 564 565 qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming", 566 colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE); 567 mis->have_colo_incoming_thread = true; 568 qemu_coroutine_yield(); 569 570 /* Wait checkpoint incoming thread exit before free resource */ 571 qemu_thread_join(&mis->colo_incoming_thread); 572 /* We hold the global iothread lock, so it is safe here */ 573 colo_release_ram_cache(); 574 } 575 576 if (ret < 0) { 577 error_report("load of migration failed: %s", strerror(-ret)); 578 goto fail; 579 } 580 mis->bh = qemu_bh_new(process_incoming_migration_bh, mis); 581 qemu_bh_schedule(mis->bh); 582 mis->migration_incoming_co = NULL; 583 return; 584 fail: 585 local_err = NULL; 586 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, 587 MIGRATION_STATUS_FAILED); 588 qemu_fclose(mis->from_src_file); 589 if (multifd_load_cleanup(&local_err) != 0) { 590 error_report_err(local_err); 591 } 592 exit(EXIT_FAILURE); 593 } 594 595 /** 596 * @migration_incoming_setup: Setup incoming migration 597 * 598 * Returns 0 for no error or 1 for error 599 * 600 * @f: file for main migration channel 601 * @errp: where to put errors 602 */ 603 static int migration_incoming_setup(QEMUFile *f, Error **errp) 604 { 605 MigrationIncomingState *mis = migration_incoming_get_current(); 606 Error *local_err = NULL; 607 608 if (multifd_load_setup(&local_err) != 0) { 609 /* We haven't been able to create multifd threads 610 nothing better to do */ 611 error_report_err(local_err); 612 exit(EXIT_FAILURE); 613 } 614 615 if (!mis->from_src_file) { 616 mis->from_src_file = f; 617 } 618 qemu_file_set_blocking(f, false); 619 return 0; 620 } 621 622 void migration_incoming_process(void) 623 { 624 Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL); 625 qemu_coroutine_enter(co); 626 } 627 628 /* Returns true if recovered from a paused migration, otherwise false */ 629 static bool postcopy_try_recover(QEMUFile *f) 630 { 631 MigrationIncomingState *mis = migration_incoming_get_current(); 632 633 if (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { 634 /* Resumed from a paused postcopy migration */ 635 636 mis->from_src_file = f; 637 /* Postcopy has standalone thread to do vm load */ 638 qemu_file_set_blocking(f, true); 639 640 /* Re-configure the return path */ 641 mis->to_src_file = qemu_file_get_return_path(f); 642 643 migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_PAUSED, 644 MIGRATION_STATUS_POSTCOPY_RECOVER); 645 646 /* 647 * Here, we only wake up the main loading thread (while the 648 * fault thread will still be waiting), so that we can receive 649 * commands from source now, and answer it if needed. The 650 * fault thread will be woken up afterwards until we are sure 651 * that source is ready to reply to page requests. 652 */ 653 qemu_sem_post(&mis->postcopy_pause_sem_dst); 654 return true; 655 } 656 657 return false; 658 } 659 660 void migration_fd_process_incoming(QEMUFile *f, Error **errp) 661 { 662 Error *local_err = NULL; 663 664 if (postcopy_try_recover(f)) { 665 return; 666 } 667 668 if (migration_incoming_setup(f, &local_err)) { 669 error_propagate(errp, local_err); 670 return; 671 } 672 migration_incoming_process(); 673 } 674 675 void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) 676 { 677 MigrationIncomingState *mis = migration_incoming_get_current(); 678 Error *local_err = NULL; 679 bool start_migration; 680 681 if (!mis->from_src_file) { 682 /* The first connection (multifd may have multiple) */ 683 QEMUFile *f = qemu_fopen_channel_input(ioc); 684 685 /* If it's a recovery, we're done */ 686 if (postcopy_try_recover(f)) { 687 return; 688 } 689 690 if (migration_incoming_setup(f, &local_err)) { 691 error_propagate(errp, local_err); 692 return; 693 } 694 695 /* 696 * Common migration only needs one channel, so we can start 697 * right now. Multifd needs more than one channel, we wait. 698 */ 699 start_migration = !migrate_use_multifd(); 700 } else { 701 /* Multiple connections */ 702 assert(migrate_use_multifd()); 703 start_migration = multifd_recv_new_channel(ioc, &local_err); 704 if (local_err) { 705 error_propagate(errp, local_err); 706 return; 707 } 708 } 709 710 if (start_migration) { 711 migration_incoming_process(); 712 } 713 } 714 715 /** 716 * @migration_has_all_channels: We have received all channels that we need 717 * 718 * Returns true when we have got connections to all the channels that 719 * we need for migration. 720 */ 721 bool migration_has_all_channels(void) 722 { 723 MigrationIncomingState *mis = migration_incoming_get_current(); 724 bool all_channels; 725 726 all_channels = multifd_recv_all_channels_created(); 727 728 return all_channels && mis->from_src_file != NULL; 729 } 730 731 /* 732 * Send a 'SHUT' message on the return channel with the given value 733 * to indicate that we've finished with the RP. Non-0 value indicates 734 * error. 735 */ 736 void migrate_send_rp_shut(MigrationIncomingState *mis, 737 uint32_t value) 738 { 739 uint32_t buf; 740 741 buf = cpu_to_be32(value); 742 migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf); 743 } 744 745 /* 746 * Send a 'PONG' message on the return channel with the given value 747 * (normally in response to a 'PING') 748 */ 749 void migrate_send_rp_pong(MigrationIncomingState *mis, 750 uint32_t value) 751 { 752 uint32_t buf; 753 754 buf = cpu_to_be32(value); 755 migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf); 756 } 757 758 void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis, 759 char *block_name) 760 { 761 char buf[512]; 762 int len; 763 int64_t res; 764 765 /* 766 * First, we send the header part. It contains only the len of 767 * idstr, and the idstr itself. 768 */ 769 len = strlen(block_name); 770 buf[0] = len; 771 memcpy(buf + 1, block_name, len); 772 773 if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) { 774 error_report("%s: MSG_RP_RECV_BITMAP only used for recovery", 775 __func__); 776 return; 777 } 778 779 migrate_send_rp_message(mis, MIG_RP_MSG_RECV_BITMAP, len + 1, buf); 780 781 /* 782 * Next, we dump the received bitmap to the stream. 783 * 784 * TODO: currently we are safe since we are the only one that is 785 * using the to_src_file handle (fault thread is still paused), 786 * and it's ok even not taking the mutex. However the best way is 787 * to take the lock before sending the message header, and release 788 * the lock after sending the bitmap. 789 */ 790 qemu_mutex_lock(&mis->rp_mutex); 791 res = ramblock_recv_bitmap_send(mis->to_src_file, block_name); 792 qemu_mutex_unlock(&mis->rp_mutex); 793 794 trace_migrate_send_rp_recv_bitmap(block_name, res); 795 } 796 797 void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value) 798 { 799 uint32_t buf; 800 801 buf = cpu_to_be32(value); 802 migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf); 803 } 804 805 MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) 806 { 807 MigrationCapabilityStatusList *head = NULL; 808 MigrationCapabilityStatusList *caps; 809 MigrationState *s = migrate_get_current(); 810 int i; 811 812 caps = NULL; /* silence compiler warning */ 813 for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { 814 #ifndef CONFIG_LIVE_BLOCK_MIGRATION 815 if (i == MIGRATION_CAPABILITY_BLOCK) { 816 continue; 817 } 818 #endif 819 if (head == NULL) { 820 head = g_malloc0(sizeof(*caps)); 821 caps = head; 822 } else { 823 caps->next = g_malloc0(sizeof(*caps)); 824 caps = caps->next; 825 } 826 caps->value = 827 g_malloc(sizeof(*caps->value)); 828 caps->value->capability = i; 829 caps->value->state = s->enabled_capabilities[i]; 830 } 831 832 return head; 833 } 834 835 MigrationParameters *qmp_query_migrate_parameters(Error **errp) 836 { 837 MigrationParameters *params; 838 MigrationState *s = migrate_get_current(); 839 840 /* TODO use QAPI_CLONE() instead of duplicating it inline */ 841 params = g_malloc0(sizeof(*params)); 842 params->has_compress_level = true; 843 params->compress_level = s->parameters.compress_level; 844 params->has_compress_threads = true; 845 params->compress_threads = s->parameters.compress_threads; 846 params->has_compress_wait_thread = true; 847 params->compress_wait_thread = s->parameters.compress_wait_thread; 848 params->has_decompress_threads = true; 849 params->decompress_threads = s->parameters.decompress_threads; 850 params->has_throttle_trigger_threshold = true; 851 params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold; 852 params->has_cpu_throttle_initial = true; 853 params->cpu_throttle_initial = s->parameters.cpu_throttle_initial; 854 params->has_cpu_throttle_increment = true; 855 params->cpu_throttle_increment = s->parameters.cpu_throttle_increment; 856 params->has_cpu_throttle_tailslow = true; 857 params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow; 858 params->has_tls_creds = true; 859 params->tls_creds = g_strdup(s->parameters.tls_creds); 860 params->has_tls_hostname = true; 861 params->tls_hostname = g_strdup(s->parameters.tls_hostname); 862 params->has_tls_authz = true; 863 params->tls_authz = g_strdup(s->parameters.tls_authz ? 864 s->parameters.tls_authz : ""); 865 params->has_max_bandwidth = true; 866 params->max_bandwidth = s->parameters.max_bandwidth; 867 params->has_downtime_limit = true; 868 params->downtime_limit = s->parameters.downtime_limit; 869 params->has_x_checkpoint_delay = true; 870 params->x_checkpoint_delay = s->parameters.x_checkpoint_delay; 871 params->has_block_incremental = true; 872 params->block_incremental = s->parameters.block_incremental; 873 params->has_multifd_channels = true; 874 params->multifd_channels = s->parameters.multifd_channels; 875 params->has_multifd_compression = true; 876 params->multifd_compression = s->parameters.multifd_compression; 877 params->has_multifd_zlib_level = true; 878 params->multifd_zlib_level = s->parameters.multifd_zlib_level; 879 params->has_multifd_zstd_level = true; 880 params->multifd_zstd_level = s->parameters.multifd_zstd_level; 881 params->has_xbzrle_cache_size = true; 882 params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; 883 params->has_max_postcopy_bandwidth = true; 884 params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth; 885 params->has_max_cpu_throttle = true; 886 params->max_cpu_throttle = s->parameters.max_cpu_throttle; 887 params->has_announce_initial = true; 888 params->announce_initial = s->parameters.announce_initial; 889 params->has_announce_max = true; 890 params->announce_max = s->parameters.announce_max; 891 params->has_announce_rounds = true; 892 params->announce_rounds = s->parameters.announce_rounds; 893 params->has_announce_step = true; 894 params->announce_step = s->parameters.announce_step; 895 896 if (s->parameters.has_block_bitmap_mapping) { 897 params->has_block_bitmap_mapping = true; 898 params->block_bitmap_mapping = 899 QAPI_CLONE(BitmapMigrationNodeAliasList, 900 s->parameters.block_bitmap_mapping); 901 } 902 903 return params; 904 } 905 906 AnnounceParameters *migrate_announce_params(void) 907 { 908 static AnnounceParameters ap; 909 910 MigrationState *s = migrate_get_current(); 911 912 ap.initial = s->parameters.announce_initial; 913 ap.max = s->parameters.announce_max; 914 ap.rounds = s->parameters.announce_rounds; 915 ap.step = s->parameters.announce_step; 916 917 return ≈ 918 } 919 920 /* 921 * Return true if we're already in the middle of a migration 922 * (i.e. any of the active or setup states) 923 */ 924 bool migration_is_setup_or_active(int state) 925 { 926 switch (state) { 927 case MIGRATION_STATUS_ACTIVE: 928 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 929 case MIGRATION_STATUS_POSTCOPY_PAUSED: 930 case MIGRATION_STATUS_POSTCOPY_RECOVER: 931 case MIGRATION_STATUS_SETUP: 932 case MIGRATION_STATUS_PRE_SWITCHOVER: 933 case MIGRATION_STATUS_DEVICE: 934 case MIGRATION_STATUS_WAIT_UNPLUG: 935 case MIGRATION_STATUS_COLO: 936 return true; 937 938 default: 939 return false; 940 941 } 942 } 943 944 bool migration_is_running(int state) 945 { 946 switch (state) { 947 case MIGRATION_STATUS_ACTIVE: 948 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 949 case MIGRATION_STATUS_POSTCOPY_PAUSED: 950 case MIGRATION_STATUS_POSTCOPY_RECOVER: 951 case MIGRATION_STATUS_SETUP: 952 case MIGRATION_STATUS_PRE_SWITCHOVER: 953 case MIGRATION_STATUS_DEVICE: 954 case MIGRATION_STATUS_WAIT_UNPLUG: 955 case MIGRATION_STATUS_CANCELLING: 956 return true; 957 958 default: 959 return false; 960 961 } 962 } 963 964 static void populate_time_info(MigrationInfo *info, MigrationState *s) 965 { 966 info->has_status = true; 967 info->has_setup_time = true; 968 info->setup_time = s->setup_time; 969 if (s->state == MIGRATION_STATUS_COMPLETED) { 970 info->has_total_time = true; 971 info->total_time = s->total_time; 972 info->has_downtime = true; 973 info->downtime = s->downtime; 974 } else { 975 info->has_total_time = true; 976 info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - 977 s->start_time; 978 info->has_expected_downtime = true; 979 info->expected_downtime = s->expected_downtime; 980 } 981 } 982 983 static void populate_ram_info(MigrationInfo *info, MigrationState *s) 984 { 985 info->has_ram = true; 986 info->ram = g_malloc0(sizeof(*info->ram)); 987 info->ram->transferred = ram_counters.transferred; 988 info->ram->total = ram_bytes_total(); 989 info->ram->duplicate = ram_counters.duplicate; 990 /* legacy value. It is not used anymore */ 991 info->ram->skipped = 0; 992 info->ram->normal = ram_counters.normal; 993 info->ram->normal_bytes = ram_counters.normal * 994 qemu_target_page_size(); 995 info->ram->mbps = s->mbps; 996 info->ram->dirty_sync_count = ram_counters.dirty_sync_count; 997 info->ram->postcopy_requests = ram_counters.postcopy_requests; 998 info->ram->page_size = qemu_target_page_size(); 999 info->ram->multifd_bytes = ram_counters.multifd_bytes; 1000 info->ram->pages_per_second = s->pages_per_second; 1001 1002 if (migrate_use_xbzrle()) { 1003 info->has_xbzrle_cache = true; 1004 info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache)); 1005 info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size(); 1006 info->xbzrle_cache->bytes = xbzrle_counters.bytes; 1007 info->xbzrle_cache->pages = xbzrle_counters.pages; 1008 info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss; 1009 info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate; 1010 info->xbzrle_cache->encoding_rate = xbzrle_counters.encoding_rate; 1011 info->xbzrle_cache->overflow = xbzrle_counters.overflow; 1012 } 1013 1014 if (migrate_use_compression()) { 1015 info->has_compression = true; 1016 info->compression = g_malloc0(sizeof(*info->compression)); 1017 info->compression->pages = compression_counters.pages; 1018 info->compression->busy = compression_counters.busy; 1019 info->compression->busy_rate = compression_counters.busy_rate; 1020 info->compression->compressed_size = 1021 compression_counters.compressed_size; 1022 info->compression->compression_rate = 1023 compression_counters.compression_rate; 1024 } 1025 1026 if (cpu_throttle_active()) { 1027 info->has_cpu_throttle_percentage = true; 1028 info->cpu_throttle_percentage = cpu_throttle_get_percentage(); 1029 } 1030 1031 if (s->state != MIGRATION_STATUS_COMPLETED) { 1032 info->ram->remaining = ram_bytes_remaining(); 1033 info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate; 1034 } 1035 } 1036 1037 static void populate_disk_info(MigrationInfo *info) 1038 { 1039 if (blk_mig_active()) { 1040 info->has_disk = true; 1041 info->disk = g_malloc0(sizeof(*info->disk)); 1042 info->disk->transferred = blk_mig_bytes_transferred(); 1043 info->disk->remaining = blk_mig_bytes_remaining(); 1044 info->disk->total = blk_mig_bytes_total(); 1045 } 1046 } 1047 1048 static void populate_vfio_info(MigrationInfo *info) 1049 { 1050 #ifdef CONFIG_VFIO 1051 if (vfio_mig_active()) { 1052 info->has_vfio = true; 1053 info->vfio = g_malloc0(sizeof(*info->vfio)); 1054 info->vfio->transferred = vfio_mig_bytes_transferred(); 1055 } 1056 #endif 1057 } 1058 1059 static void fill_source_migration_info(MigrationInfo *info) 1060 { 1061 MigrationState *s = migrate_get_current(); 1062 1063 switch (s->state) { 1064 case MIGRATION_STATUS_NONE: 1065 /* no migration has happened ever */ 1066 /* do not overwrite destination migration status */ 1067 return; 1068 case MIGRATION_STATUS_SETUP: 1069 info->has_status = true; 1070 info->has_total_time = false; 1071 break; 1072 case MIGRATION_STATUS_ACTIVE: 1073 case MIGRATION_STATUS_CANCELLING: 1074 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1075 case MIGRATION_STATUS_PRE_SWITCHOVER: 1076 case MIGRATION_STATUS_DEVICE: 1077 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1078 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1079 /* TODO add some postcopy stats */ 1080 populate_time_info(info, s); 1081 populate_ram_info(info, s); 1082 populate_disk_info(info); 1083 populate_vfio_info(info); 1084 break; 1085 case MIGRATION_STATUS_COLO: 1086 info->has_status = true; 1087 /* TODO: display COLO specific information (checkpoint info etc.) */ 1088 break; 1089 case MIGRATION_STATUS_COMPLETED: 1090 populate_time_info(info, s); 1091 populate_ram_info(info, s); 1092 populate_vfio_info(info); 1093 break; 1094 case MIGRATION_STATUS_FAILED: 1095 info->has_status = true; 1096 if (s->error) { 1097 info->has_error_desc = true; 1098 info->error_desc = g_strdup(error_get_pretty(s->error)); 1099 } 1100 break; 1101 case MIGRATION_STATUS_CANCELLED: 1102 info->has_status = true; 1103 break; 1104 case MIGRATION_STATUS_WAIT_UNPLUG: 1105 info->has_status = true; 1106 break; 1107 } 1108 info->status = s->state; 1109 } 1110 1111 /** 1112 * @migration_caps_check - check capability validity 1113 * 1114 * @cap_list: old capability list, array of bool 1115 * @params: new capabilities to be applied soon 1116 * @errp: set *errp if the check failed, with reason 1117 * 1118 * Returns true if check passed, otherwise false. 1119 */ 1120 static bool migrate_caps_check(bool *cap_list, 1121 MigrationCapabilityStatusList *params, 1122 Error **errp) 1123 { 1124 MigrationCapabilityStatusList *cap; 1125 bool old_postcopy_cap; 1126 MigrationIncomingState *mis = migration_incoming_get_current(); 1127 1128 old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]; 1129 1130 for (cap = params; cap; cap = cap->next) { 1131 cap_list[cap->value->capability] = cap->value->state; 1132 } 1133 1134 #ifndef CONFIG_LIVE_BLOCK_MIGRATION 1135 if (cap_list[MIGRATION_CAPABILITY_BLOCK]) { 1136 error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " 1137 "block migration"); 1138 error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); 1139 return false; 1140 } 1141 #endif 1142 1143 #ifndef CONFIG_REPLICATION 1144 if (cap_list[MIGRATION_CAPABILITY_X_COLO]) { 1145 error_setg(errp, "QEMU compiled without replication module" 1146 " can't enable COLO"); 1147 error_append_hint(errp, "Please enable replication before COLO.\n"); 1148 return false; 1149 } 1150 #endif 1151 1152 if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { 1153 /* This check is reasonably expensive, so only when it's being 1154 * set the first time, also it's only the destination that needs 1155 * special support. 1156 */ 1157 if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) && 1158 !postcopy_ram_supported_by_host(mis)) { 1159 /* postcopy_ram_supported_by_host will have emitted a more 1160 * detailed message 1161 */ 1162 error_setg(errp, "Postcopy is not supported"); 1163 return false; 1164 } 1165 1166 if (cap_list[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { 1167 error_setg(errp, "Postcopy is not compatible with ignore-shared"); 1168 return false; 1169 } 1170 } 1171 1172 return true; 1173 } 1174 1175 static void fill_destination_migration_info(MigrationInfo *info) 1176 { 1177 MigrationIncomingState *mis = migration_incoming_get_current(); 1178 1179 if (mis->socket_address_list) { 1180 info->has_socket_address = true; 1181 info->socket_address = 1182 QAPI_CLONE(SocketAddressList, mis->socket_address_list); 1183 } 1184 1185 switch (mis->state) { 1186 case MIGRATION_STATUS_NONE: 1187 return; 1188 case MIGRATION_STATUS_SETUP: 1189 case MIGRATION_STATUS_CANCELLING: 1190 case MIGRATION_STATUS_CANCELLED: 1191 case MIGRATION_STATUS_ACTIVE: 1192 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1193 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1194 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1195 case MIGRATION_STATUS_FAILED: 1196 case MIGRATION_STATUS_COLO: 1197 info->has_status = true; 1198 break; 1199 case MIGRATION_STATUS_COMPLETED: 1200 info->has_status = true; 1201 fill_destination_postcopy_migration_info(info); 1202 break; 1203 } 1204 info->status = mis->state; 1205 } 1206 1207 MigrationInfo *qmp_query_migrate(Error **errp) 1208 { 1209 MigrationInfo *info = g_malloc0(sizeof(*info)); 1210 1211 fill_destination_migration_info(info); 1212 fill_source_migration_info(info); 1213 1214 return info; 1215 } 1216 1217 void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, 1218 Error **errp) 1219 { 1220 MigrationState *s = migrate_get_current(); 1221 MigrationCapabilityStatusList *cap; 1222 bool cap_list[MIGRATION_CAPABILITY__MAX]; 1223 1224 if (migration_is_running(s->state)) { 1225 error_setg(errp, QERR_MIGRATION_ACTIVE); 1226 return; 1227 } 1228 1229 memcpy(cap_list, s->enabled_capabilities, sizeof(cap_list)); 1230 if (!migrate_caps_check(cap_list, params, errp)) { 1231 return; 1232 } 1233 1234 for (cap = params; cap; cap = cap->next) { 1235 s->enabled_capabilities[cap->value->capability] = cap->value->state; 1236 } 1237 } 1238 1239 /* 1240 * Check whether the parameters are valid. Error will be put into errp 1241 * (if provided). Return true if valid, otherwise false. 1242 */ 1243 static bool migrate_params_check(MigrationParameters *params, Error **errp) 1244 { 1245 if (params->has_compress_level && 1246 (params->compress_level > 9)) { 1247 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", 1248 "is invalid, it should be in the range of 0 to 9"); 1249 return false; 1250 } 1251 1252 if (params->has_compress_threads && (params->compress_threads < 1)) { 1253 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1254 "compress_threads", 1255 "is invalid, it should be in the range of 1 to 255"); 1256 return false; 1257 } 1258 1259 if (params->has_decompress_threads && (params->decompress_threads < 1)) { 1260 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1261 "decompress_threads", 1262 "is invalid, it should be in the range of 1 to 255"); 1263 return false; 1264 } 1265 1266 if (params->has_throttle_trigger_threshold && 1267 (params->throttle_trigger_threshold < 1 || 1268 params->throttle_trigger_threshold > 100)) { 1269 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1270 "throttle_trigger_threshold", 1271 "an integer in the range of 1 to 100"); 1272 return false; 1273 } 1274 1275 if (params->has_cpu_throttle_initial && 1276 (params->cpu_throttle_initial < 1 || 1277 params->cpu_throttle_initial > 99)) { 1278 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1279 "cpu_throttle_initial", 1280 "an integer in the range of 1 to 99"); 1281 return false; 1282 } 1283 1284 if (params->has_cpu_throttle_increment && 1285 (params->cpu_throttle_increment < 1 || 1286 params->cpu_throttle_increment > 99)) { 1287 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1288 "cpu_throttle_increment", 1289 "an integer in the range of 1 to 99"); 1290 return false; 1291 } 1292 1293 if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) { 1294 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1295 "max_bandwidth", 1296 "an integer in the range of 0 to "stringify(SIZE_MAX) 1297 " bytes/second"); 1298 return false; 1299 } 1300 1301 if (params->has_downtime_limit && 1302 (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { 1303 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1304 "downtime_limit", 1305 "an integer in the range of 0 to " 1306 stringify(MAX_MIGRATE_DOWNTIME)" ms"); 1307 return false; 1308 } 1309 1310 /* x_checkpoint_delay is now always positive */ 1311 1312 if (params->has_multifd_channels && (params->multifd_channels < 1)) { 1313 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1314 "multifd_channels", 1315 "is invalid, it should be in the range of 1 to 255"); 1316 return false; 1317 } 1318 1319 if (params->has_multifd_zlib_level && 1320 (params->multifd_zlib_level > 9)) { 1321 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level", 1322 "is invalid, it should be in the range of 0 to 9"); 1323 return false; 1324 } 1325 1326 if (params->has_multifd_zstd_level && 1327 (params->multifd_zstd_level > 20)) { 1328 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level", 1329 "is invalid, it should be in the range of 0 to 20"); 1330 return false; 1331 } 1332 1333 if (params->has_xbzrle_cache_size && 1334 (params->xbzrle_cache_size < qemu_target_page_size() || 1335 !is_power_of_2(params->xbzrle_cache_size))) { 1336 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1337 "xbzrle_cache_size", 1338 "is invalid, it should be bigger than target page size" 1339 " and a power of 2"); 1340 return false; 1341 } 1342 1343 if (params->has_max_cpu_throttle && 1344 (params->max_cpu_throttle < params->cpu_throttle_initial || 1345 params->max_cpu_throttle > 99)) { 1346 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1347 "max_cpu_throttle", 1348 "an integer in the range of cpu_throttle_initial to 99"); 1349 return false; 1350 } 1351 1352 if (params->has_announce_initial && 1353 params->announce_initial > 100000) { 1354 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1355 "announce_initial", 1356 "is invalid, it must be less than 100000 ms"); 1357 return false; 1358 } 1359 if (params->has_announce_max && 1360 params->announce_max > 100000) { 1361 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1362 "announce_max", 1363 "is invalid, it must be less than 100000 ms"); 1364 return false; 1365 } 1366 if (params->has_announce_rounds && 1367 params->announce_rounds > 1000) { 1368 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1369 "announce_rounds", 1370 "is invalid, it must be in the range of 0 to 1000"); 1371 return false; 1372 } 1373 if (params->has_announce_step && 1374 (params->announce_step < 1 || 1375 params->announce_step > 10000)) { 1376 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 1377 "announce_step", 1378 "is invalid, it must be in the range of 1 to 10000 ms"); 1379 return false; 1380 } 1381 1382 if (params->has_block_bitmap_mapping && 1383 !check_dirty_bitmap_mig_alias_map(params->block_bitmap_mapping, errp)) { 1384 error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); 1385 return false; 1386 } 1387 1388 return true; 1389 } 1390 1391 static void migrate_params_test_apply(MigrateSetParameters *params, 1392 MigrationParameters *dest) 1393 { 1394 *dest = migrate_get_current()->parameters; 1395 1396 /* TODO use QAPI_CLONE() instead of duplicating it inline */ 1397 1398 if (params->has_compress_level) { 1399 dest->compress_level = params->compress_level; 1400 } 1401 1402 if (params->has_compress_threads) { 1403 dest->compress_threads = params->compress_threads; 1404 } 1405 1406 if (params->has_compress_wait_thread) { 1407 dest->compress_wait_thread = params->compress_wait_thread; 1408 } 1409 1410 if (params->has_decompress_threads) { 1411 dest->decompress_threads = params->decompress_threads; 1412 } 1413 1414 if (params->has_throttle_trigger_threshold) { 1415 dest->throttle_trigger_threshold = params->throttle_trigger_threshold; 1416 } 1417 1418 if (params->has_cpu_throttle_initial) { 1419 dest->cpu_throttle_initial = params->cpu_throttle_initial; 1420 } 1421 1422 if (params->has_cpu_throttle_increment) { 1423 dest->cpu_throttle_increment = params->cpu_throttle_increment; 1424 } 1425 1426 if (params->has_cpu_throttle_tailslow) { 1427 dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow; 1428 } 1429 1430 if (params->has_tls_creds) { 1431 assert(params->tls_creds->type == QTYPE_QSTRING); 1432 dest->tls_creds = params->tls_creds->u.s; 1433 } 1434 1435 if (params->has_tls_hostname) { 1436 assert(params->tls_hostname->type == QTYPE_QSTRING); 1437 dest->tls_hostname = params->tls_hostname->u.s; 1438 } 1439 1440 if (params->has_max_bandwidth) { 1441 dest->max_bandwidth = params->max_bandwidth; 1442 } 1443 1444 if (params->has_downtime_limit) { 1445 dest->downtime_limit = params->downtime_limit; 1446 } 1447 1448 if (params->has_x_checkpoint_delay) { 1449 dest->x_checkpoint_delay = params->x_checkpoint_delay; 1450 } 1451 1452 if (params->has_block_incremental) { 1453 dest->block_incremental = params->block_incremental; 1454 } 1455 if (params->has_multifd_channels) { 1456 dest->multifd_channels = params->multifd_channels; 1457 } 1458 if (params->has_multifd_compression) { 1459 dest->multifd_compression = params->multifd_compression; 1460 } 1461 if (params->has_xbzrle_cache_size) { 1462 dest->xbzrle_cache_size = params->xbzrle_cache_size; 1463 } 1464 if (params->has_max_postcopy_bandwidth) { 1465 dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth; 1466 } 1467 if (params->has_max_cpu_throttle) { 1468 dest->max_cpu_throttle = params->max_cpu_throttle; 1469 } 1470 if (params->has_announce_initial) { 1471 dest->announce_initial = params->announce_initial; 1472 } 1473 if (params->has_announce_max) { 1474 dest->announce_max = params->announce_max; 1475 } 1476 if (params->has_announce_rounds) { 1477 dest->announce_rounds = params->announce_rounds; 1478 } 1479 if (params->has_announce_step) { 1480 dest->announce_step = params->announce_step; 1481 } 1482 1483 if (params->has_block_bitmap_mapping) { 1484 dest->has_block_bitmap_mapping = true; 1485 dest->block_bitmap_mapping = params->block_bitmap_mapping; 1486 } 1487 } 1488 1489 static void migrate_params_apply(MigrateSetParameters *params, Error **errp) 1490 { 1491 MigrationState *s = migrate_get_current(); 1492 1493 /* TODO use QAPI_CLONE() instead of duplicating it inline */ 1494 1495 if (params->has_compress_level) { 1496 s->parameters.compress_level = params->compress_level; 1497 } 1498 1499 if (params->has_compress_threads) { 1500 s->parameters.compress_threads = params->compress_threads; 1501 } 1502 1503 if (params->has_compress_wait_thread) { 1504 s->parameters.compress_wait_thread = params->compress_wait_thread; 1505 } 1506 1507 if (params->has_decompress_threads) { 1508 s->parameters.decompress_threads = params->decompress_threads; 1509 } 1510 1511 if (params->has_throttle_trigger_threshold) { 1512 s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold; 1513 } 1514 1515 if (params->has_cpu_throttle_initial) { 1516 s->parameters.cpu_throttle_initial = params->cpu_throttle_initial; 1517 } 1518 1519 if (params->has_cpu_throttle_increment) { 1520 s->parameters.cpu_throttle_increment = params->cpu_throttle_increment; 1521 } 1522 1523 if (params->has_cpu_throttle_tailslow) { 1524 s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow; 1525 } 1526 1527 if (params->has_tls_creds) { 1528 g_free(s->parameters.tls_creds); 1529 assert(params->tls_creds->type == QTYPE_QSTRING); 1530 s->parameters.tls_creds = g_strdup(params->tls_creds->u.s); 1531 } 1532 1533 if (params->has_tls_hostname) { 1534 g_free(s->parameters.tls_hostname); 1535 assert(params->tls_hostname->type == QTYPE_QSTRING); 1536 s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s); 1537 } 1538 1539 if (params->has_tls_authz) { 1540 g_free(s->parameters.tls_authz); 1541 assert(params->tls_authz->type == QTYPE_QSTRING); 1542 s->parameters.tls_authz = g_strdup(params->tls_authz->u.s); 1543 } 1544 1545 if (params->has_max_bandwidth) { 1546 s->parameters.max_bandwidth = params->max_bandwidth; 1547 if (s->to_dst_file && !migration_in_postcopy()) { 1548 qemu_file_set_rate_limit(s->to_dst_file, 1549 s->parameters.max_bandwidth / XFER_LIMIT_RATIO); 1550 } 1551 } 1552 1553 if (params->has_downtime_limit) { 1554 s->parameters.downtime_limit = params->downtime_limit; 1555 } 1556 1557 if (params->has_x_checkpoint_delay) { 1558 s->parameters.x_checkpoint_delay = params->x_checkpoint_delay; 1559 if (migration_in_colo_state()) { 1560 colo_checkpoint_notify(s); 1561 } 1562 } 1563 1564 if (params->has_block_incremental) { 1565 s->parameters.block_incremental = params->block_incremental; 1566 } 1567 if (params->has_multifd_channels) { 1568 s->parameters.multifd_channels = params->multifd_channels; 1569 } 1570 if (params->has_multifd_compression) { 1571 s->parameters.multifd_compression = params->multifd_compression; 1572 } 1573 if (params->has_xbzrle_cache_size) { 1574 s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; 1575 xbzrle_cache_resize(params->xbzrle_cache_size, errp); 1576 } 1577 if (params->has_max_postcopy_bandwidth) { 1578 s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth; 1579 if (s->to_dst_file && migration_in_postcopy()) { 1580 qemu_file_set_rate_limit(s->to_dst_file, 1581 s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO); 1582 } 1583 } 1584 if (params->has_max_cpu_throttle) { 1585 s->parameters.max_cpu_throttle = params->max_cpu_throttle; 1586 } 1587 if (params->has_announce_initial) { 1588 s->parameters.announce_initial = params->announce_initial; 1589 } 1590 if (params->has_announce_max) { 1591 s->parameters.announce_max = params->announce_max; 1592 } 1593 if (params->has_announce_rounds) { 1594 s->parameters.announce_rounds = params->announce_rounds; 1595 } 1596 if (params->has_announce_step) { 1597 s->parameters.announce_step = params->announce_step; 1598 } 1599 1600 if (params->has_block_bitmap_mapping) { 1601 qapi_free_BitmapMigrationNodeAliasList( 1602 s->parameters.block_bitmap_mapping); 1603 1604 s->parameters.has_block_bitmap_mapping = true; 1605 s->parameters.block_bitmap_mapping = 1606 QAPI_CLONE(BitmapMigrationNodeAliasList, 1607 params->block_bitmap_mapping); 1608 } 1609 } 1610 1611 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) 1612 { 1613 MigrationParameters tmp; 1614 1615 /* TODO Rewrite "" to null instead */ 1616 if (params->has_tls_creds 1617 && params->tls_creds->type == QTYPE_QNULL) { 1618 qobject_unref(params->tls_creds->u.n); 1619 params->tls_creds->type = QTYPE_QSTRING; 1620 params->tls_creds->u.s = strdup(""); 1621 } 1622 /* TODO Rewrite "" to null instead */ 1623 if (params->has_tls_hostname 1624 && params->tls_hostname->type == QTYPE_QNULL) { 1625 qobject_unref(params->tls_hostname->u.n); 1626 params->tls_hostname->type = QTYPE_QSTRING; 1627 params->tls_hostname->u.s = strdup(""); 1628 } 1629 1630 migrate_params_test_apply(params, &tmp); 1631 1632 if (!migrate_params_check(&tmp, errp)) { 1633 /* Invalid parameter */ 1634 return; 1635 } 1636 1637 migrate_params_apply(params, errp); 1638 } 1639 1640 1641 void qmp_migrate_start_postcopy(Error **errp) 1642 { 1643 MigrationState *s = migrate_get_current(); 1644 1645 if (!migrate_postcopy()) { 1646 error_setg(errp, "Enable postcopy with migrate_set_capability before" 1647 " the start of migration"); 1648 return; 1649 } 1650 1651 if (s->state == MIGRATION_STATUS_NONE) { 1652 error_setg(errp, "Postcopy must be started after migration has been" 1653 " started"); 1654 return; 1655 } 1656 /* 1657 * we don't error if migration has finished since that would be racy 1658 * with issuing this command. 1659 */ 1660 qatomic_set(&s->start_postcopy, true); 1661 } 1662 1663 /* shared migration helpers */ 1664 1665 void migrate_set_state(int *state, int old_state, int new_state) 1666 { 1667 assert(new_state < MIGRATION_STATUS__MAX); 1668 if (qatomic_cmpxchg(state, old_state, new_state) == old_state) { 1669 trace_migrate_set_state(MigrationStatus_str(new_state)); 1670 migrate_generate_event(new_state); 1671 } 1672 } 1673 1674 static MigrationCapabilityStatusList *migrate_cap_add( 1675 MigrationCapabilityStatusList *list, 1676 MigrationCapability index, 1677 bool state) 1678 { 1679 MigrationCapabilityStatusList *cap; 1680 1681 cap = g_new0(MigrationCapabilityStatusList, 1); 1682 cap->value = g_new0(MigrationCapabilityStatus, 1); 1683 cap->value->capability = index; 1684 cap->value->state = state; 1685 cap->next = list; 1686 1687 return cap; 1688 } 1689 1690 void migrate_set_block_enabled(bool value, Error **errp) 1691 { 1692 MigrationCapabilityStatusList *cap; 1693 1694 cap = migrate_cap_add(NULL, MIGRATION_CAPABILITY_BLOCK, value); 1695 qmp_migrate_set_capabilities(cap, errp); 1696 qapi_free_MigrationCapabilityStatusList(cap); 1697 } 1698 1699 static void migrate_set_block_incremental(MigrationState *s, bool value) 1700 { 1701 s->parameters.block_incremental = value; 1702 } 1703 1704 static void block_cleanup_parameters(MigrationState *s) 1705 { 1706 if (s->must_remove_block_options) { 1707 /* setting to false can never fail */ 1708 migrate_set_block_enabled(false, &error_abort); 1709 migrate_set_block_incremental(s, false); 1710 s->must_remove_block_options = false; 1711 } 1712 } 1713 1714 static void migrate_fd_cleanup(MigrationState *s) 1715 { 1716 qemu_bh_delete(s->cleanup_bh); 1717 s->cleanup_bh = NULL; 1718 1719 qemu_savevm_state_cleanup(); 1720 1721 if (s->to_dst_file) { 1722 QEMUFile *tmp; 1723 1724 trace_migrate_fd_cleanup(); 1725 qemu_mutex_unlock_iothread(); 1726 if (s->migration_thread_running) { 1727 qemu_thread_join(&s->thread); 1728 s->migration_thread_running = false; 1729 } 1730 qemu_mutex_lock_iothread(); 1731 1732 multifd_save_cleanup(); 1733 qemu_mutex_lock(&s->qemu_file_lock); 1734 tmp = s->to_dst_file; 1735 s->to_dst_file = NULL; 1736 qemu_mutex_unlock(&s->qemu_file_lock); 1737 /* 1738 * Close the file handle without the lock to make sure the 1739 * critical section won't block for long. 1740 */ 1741 qemu_fclose(tmp); 1742 } 1743 1744 assert(!migration_is_active(s)); 1745 1746 if (s->state == MIGRATION_STATUS_CANCELLING) { 1747 migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING, 1748 MIGRATION_STATUS_CANCELLED); 1749 } 1750 1751 if (s->error) { 1752 /* It is used on info migrate. We can't free it */ 1753 error_report_err(error_copy(s->error)); 1754 } 1755 notifier_list_notify(&migration_state_notifiers, s); 1756 block_cleanup_parameters(s); 1757 } 1758 1759 static void migrate_fd_cleanup_schedule(MigrationState *s) 1760 { 1761 /* 1762 * Ref the state for bh, because it may be called when 1763 * there're already no other refs 1764 */ 1765 object_ref(OBJECT(s)); 1766 qemu_bh_schedule(s->cleanup_bh); 1767 } 1768 1769 static void migrate_fd_cleanup_bh(void *opaque) 1770 { 1771 MigrationState *s = opaque; 1772 migrate_fd_cleanup(s); 1773 object_unref(OBJECT(s)); 1774 } 1775 1776 void migrate_set_error(MigrationState *s, const Error *error) 1777 { 1778 QEMU_LOCK_GUARD(&s->error_mutex); 1779 if (!s->error) { 1780 s->error = error_copy(error); 1781 } 1782 } 1783 1784 void migrate_fd_error(MigrationState *s, const Error *error) 1785 { 1786 trace_migrate_fd_error(error_get_pretty(error)); 1787 assert(s->to_dst_file == NULL); 1788 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 1789 MIGRATION_STATUS_FAILED); 1790 migrate_set_error(s, error); 1791 } 1792 1793 static void migrate_fd_cancel(MigrationState *s) 1794 { 1795 int old_state ; 1796 QEMUFile *f = migrate_get_current()->to_dst_file; 1797 trace_migrate_fd_cancel(); 1798 1799 if (s->rp_state.from_dst_file) { 1800 /* shutdown the rp socket, so causing the rp thread to shutdown */ 1801 qemu_file_shutdown(s->rp_state.from_dst_file); 1802 } 1803 1804 do { 1805 old_state = s->state; 1806 if (!migration_is_running(old_state)) { 1807 break; 1808 } 1809 /* If the migration is paused, kick it out of the pause */ 1810 if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) { 1811 qemu_sem_post(&s->pause_sem); 1812 } 1813 migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING); 1814 } while (s->state != MIGRATION_STATUS_CANCELLING); 1815 1816 /* 1817 * If we're unlucky the migration code might be stuck somewhere in a 1818 * send/write while the network has failed and is waiting to timeout; 1819 * if we've got shutdown(2) available then we can force it to quit. 1820 * The outgoing qemu file gets closed in migrate_fd_cleanup that is 1821 * called in a bh, so there is no race against this cancel. 1822 */ 1823 if (s->state == MIGRATION_STATUS_CANCELLING && f) { 1824 qemu_file_shutdown(f); 1825 } 1826 if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) { 1827 Error *local_err = NULL; 1828 1829 bdrv_invalidate_cache_all(&local_err); 1830 if (local_err) { 1831 error_report_err(local_err); 1832 } else { 1833 s->block_inactive = false; 1834 } 1835 } 1836 } 1837 1838 void add_migration_state_change_notifier(Notifier *notify) 1839 { 1840 notifier_list_add(&migration_state_notifiers, notify); 1841 } 1842 1843 void remove_migration_state_change_notifier(Notifier *notify) 1844 { 1845 notifier_remove(notify); 1846 } 1847 1848 bool migration_in_setup(MigrationState *s) 1849 { 1850 return s->state == MIGRATION_STATUS_SETUP; 1851 } 1852 1853 bool migration_has_finished(MigrationState *s) 1854 { 1855 return s->state == MIGRATION_STATUS_COMPLETED; 1856 } 1857 1858 bool migration_has_failed(MigrationState *s) 1859 { 1860 return (s->state == MIGRATION_STATUS_CANCELLED || 1861 s->state == MIGRATION_STATUS_FAILED); 1862 } 1863 1864 bool migration_in_postcopy(void) 1865 { 1866 MigrationState *s = migrate_get_current(); 1867 1868 switch (s->state) { 1869 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1870 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1871 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1872 return true; 1873 default: 1874 return false; 1875 } 1876 } 1877 1878 bool migration_in_postcopy_after_devices(MigrationState *s) 1879 { 1880 return migration_in_postcopy() && s->postcopy_after_devices; 1881 } 1882 1883 bool migration_in_incoming_postcopy(void) 1884 { 1885 PostcopyState ps = postcopy_state_get(); 1886 1887 return ps >= POSTCOPY_INCOMING_DISCARD && ps < POSTCOPY_INCOMING_END; 1888 } 1889 1890 bool migration_is_idle(void) 1891 { 1892 MigrationState *s = current_migration; 1893 1894 if (!s) { 1895 return true; 1896 } 1897 1898 switch (s->state) { 1899 case MIGRATION_STATUS_NONE: 1900 case MIGRATION_STATUS_CANCELLED: 1901 case MIGRATION_STATUS_COMPLETED: 1902 case MIGRATION_STATUS_FAILED: 1903 return true; 1904 case MIGRATION_STATUS_SETUP: 1905 case MIGRATION_STATUS_CANCELLING: 1906 case MIGRATION_STATUS_ACTIVE: 1907 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1908 case MIGRATION_STATUS_COLO: 1909 case MIGRATION_STATUS_PRE_SWITCHOVER: 1910 case MIGRATION_STATUS_DEVICE: 1911 case MIGRATION_STATUS_WAIT_UNPLUG: 1912 return false; 1913 case MIGRATION_STATUS__MAX: 1914 g_assert_not_reached(); 1915 } 1916 1917 return false; 1918 } 1919 1920 bool migration_is_active(MigrationState *s) 1921 { 1922 return (s->state == MIGRATION_STATUS_ACTIVE || 1923 s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); 1924 } 1925 1926 void migrate_init(MigrationState *s) 1927 { 1928 /* 1929 * Reinitialise all migration state, except 1930 * parameters/capabilities that the user set, and 1931 * locks. 1932 */ 1933 s->cleanup_bh = 0; 1934 s->to_dst_file = NULL; 1935 s->state = MIGRATION_STATUS_NONE; 1936 s->rp_state.from_dst_file = NULL; 1937 s->rp_state.error = false; 1938 s->mbps = 0.0; 1939 s->pages_per_second = 0.0; 1940 s->downtime = 0; 1941 s->expected_downtime = 0; 1942 s->setup_time = 0; 1943 s->start_postcopy = false; 1944 s->postcopy_after_devices = false; 1945 s->migration_thread_running = false; 1946 error_free(s->error); 1947 s->error = NULL; 1948 s->hostname = NULL; 1949 1950 migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); 1951 1952 s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 1953 s->total_time = 0; 1954 s->vm_was_running = false; 1955 s->iteration_initial_bytes = 0; 1956 s->threshold_size = 0; 1957 } 1958 1959 static GSList *migration_blockers; 1960 1961 int migrate_add_blocker(Error *reason, Error **errp) 1962 { 1963 if (only_migratable) { 1964 error_propagate_prepend(errp, error_copy(reason), 1965 "disallowing migration blocker " 1966 "(--only-migratable) for: "); 1967 return -EACCES; 1968 } 1969 1970 if (migration_is_idle()) { 1971 migration_blockers = g_slist_prepend(migration_blockers, reason); 1972 return 0; 1973 } 1974 1975 error_propagate_prepend(errp, error_copy(reason), 1976 "disallowing migration blocker " 1977 "(migration in progress) for: "); 1978 return -EBUSY; 1979 } 1980 1981 void migrate_del_blocker(Error *reason) 1982 { 1983 migration_blockers = g_slist_remove(migration_blockers, reason); 1984 } 1985 1986 void qmp_migrate_incoming(const char *uri, Error **errp) 1987 { 1988 Error *local_err = NULL; 1989 static bool once = true; 1990 1991 if (!deferred_incoming) { 1992 error_setg(errp, "For use with '-incoming defer'"); 1993 return; 1994 } 1995 if (!once) { 1996 error_setg(errp, "The incoming migration has already been started"); 1997 return; 1998 } 1999 2000 qemu_start_incoming_migration(uri, &local_err); 2001 2002 if (local_err) { 2003 error_propagate(errp, local_err); 2004 return; 2005 } 2006 2007 once = false; 2008 } 2009 2010 void qmp_migrate_recover(const char *uri, Error **errp) 2011 { 2012 MigrationIncomingState *mis = migration_incoming_get_current(); 2013 2014 if (mis->state != MIGRATION_STATUS_POSTCOPY_PAUSED) { 2015 error_setg(errp, "Migrate recover can only be run " 2016 "when postcopy is paused."); 2017 return; 2018 } 2019 2020 if (qatomic_cmpxchg(&mis->postcopy_recover_triggered, 2021 false, true) == true) { 2022 error_setg(errp, "Migrate recovery is triggered already"); 2023 return; 2024 } 2025 2026 /* 2027 * Note that this call will never start a real migration; it will 2028 * only re-setup the migration stream and poke existing migration 2029 * to continue using that newly established channel. 2030 */ 2031 qemu_start_incoming_migration(uri, errp); 2032 } 2033 2034 void qmp_migrate_pause(Error **errp) 2035 { 2036 MigrationState *ms = migrate_get_current(); 2037 MigrationIncomingState *mis = migration_incoming_get_current(); 2038 int ret; 2039 2040 if (ms->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 2041 /* Source side, during postcopy */ 2042 qemu_mutex_lock(&ms->qemu_file_lock); 2043 ret = qemu_file_shutdown(ms->to_dst_file); 2044 qemu_mutex_unlock(&ms->qemu_file_lock); 2045 if (ret) { 2046 error_setg(errp, "Failed to pause source migration"); 2047 } 2048 return; 2049 } 2050 2051 if (mis->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 2052 ret = qemu_file_shutdown(mis->from_src_file); 2053 if (ret) { 2054 error_setg(errp, "Failed to pause destination migration"); 2055 } 2056 return; 2057 } 2058 2059 error_setg(errp, "migrate-pause is currently only supported " 2060 "during postcopy-active state"); 2061 } 2062 2063 bool migration_is_blocked(Error **errp) 2064 { 2065 if (qemu_savevm_state_blocked(errp)) { 2066 return true; 2067 } 2068 2069 if (migration_blockers) { 2070 error_propagate(errp, error_copy(migration_blockers->data)); 2071 return true; 2072 } 2073 2074 return false; 2075 } 2076 2077 /* Returns true if continue to migrate, or false if error detected */ 2078 static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, 2079 bool resume, Error **errp) 2080 { 2081 Error *local_err = NULL; 2082 2083 if (resume) { 2084 if (s->state != MIGRATION_STATUS_POSTCOPY_PAUSED) { 2085 error_setg(errp, "Cannot resume if there is no " 2086 "paused migration"); 2087 return false; 2088 } 2089 2090 /* 2091 * Postcopy recovery won't work well with release-ram 2092 * capability since release-ram will drop the page buffer as 2093 * long as the page is put into the send buffer. So if there 2094 * is a network failure happened, any page buffers that have 2095 * not yet reached the destination VM but have already been 2096 * sent from the source VM will be lost forever. Let's refuse 2097 * the client from resuming such a postcopy migration. 2098 * Luckily release-ram was designed to only be used when src 2099 * and destination VMs are on the same host, so it should be 2100 * fine. 2101 */ 2102 if (migrate_release_ram()) { 2103 error_setg(errp, "Postcopy recovery cannot work " 2104 "when release-ram capability is set"); 2105 return false; 2106 } 2107 2108 /* This is a resume, skip init status */ 2109 return true; 2110 } 2111 2112 if (migration_is_running(s->state)) { 2113 error_setg(errp, QERR_MIGRATION_ACTIVE); 2114 return false; 2115 } 2116 2117 if (runstate_check(RUN_STATE_INMIGRATE)) { 2118 error_setg(errp, "Guest is waiting for an incoming migration"); 2119 return false; 2120 } 2121 2122 if (migration_is_blocked(errp)) { 2123 return false; 2124 } 2125 2126 if (blk || blk_inc) { 2127 if (migrate_use_block() || migrate_use_block_incremental()) { 2128 error_setg(errp, "Command options are incompatible with " 2129 "current migration capabilities"); 2130 return false; 2131 } 2132 migrate_set_block_enabled(true, &local_err); 2133 if (local_err) { 2134 error_propagate(errp, local_err); 2135 return false; 2136 } 2137 s->must_remove_block_options = true; 2138 } 2139 2140 if (blk_inc) { 2141 migrate_set_block_incremental(s, true); 2142 } 2143 2144 migrate_init(s); 2145 /* 2146 * set ram_counters memory to zero for a 2147 * new migration 2148 */ 2149 memset(&ram_counters, 0, sizeof(ram_counters)); 2150 2151 return true; 2152 } 2153 2154 void qmp_migrate(const char *uri, bool has_blk, bool blk, 2155 bool has_inc, bool inc, bool has_detach, bool detach, 2156 bool has_resume, bool resume, Error **errp) 2157 { 2158 Error *local_err = NULL; 2159 MigrationState *s = migrate_get_current(); 2160 const char *p = NULL; 2161 2162 if (!migrate_prepare(s, has_blk && blk, has_inc && inc, 2163 has_resume && resume, errp)) { 2164 /* Error detected, put into errp */ 2165 return; 2166 } 2167 2168 if (strstart(uri, "tcp:", &p) || 2169 strstart(uri, "unix:", NULL) || 2170 strstart(uri, "vsock:", NULL)) { 2171 socket_start_outgoing_migration(s, p ? p : uri, &local_err); 2172 #ifdef CONFIG_RDMA 2173 } else if (strstart(uri, "rdma:", &p)) { 2174 rdma_start_outgoing_migration(s, p, &local_err); 2175 #endif 2176 } else if (strstart(uri, "exec:", &p)) { 2177 exec_start_outgoing_migration(s, p, &local_err); 2178 } else if (strstart(uri, "fd:", &p)) { 2179 fd_start_outgoing_migration(s, p, &local_err); 2180 } else { 2181 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri", 2182 "a valid migration protocol"); 2183 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 2184 MIGRATION_STATUS_FAILED); 2185 block_cleanup_parameters(s); 2186 return; 2187 } 2188 2189 if (local_err) { 2190 migrate_fd_error(s, local_err); 2191 error_propagate(errp, local_err); 2192 return; 2193 } 2194 } 2195 2196 void qmp_migrate_cancel(Error **errp) 2197 { 2198 migrate_fd_cancel(migrate_get_current()); 2199 } 2200 2201 void qmp_migrate_continue(MigrationStatus state, Error **errp) 2202 { 2203 MigrationState *s = migrate_get_current(); 2204 if (s->state != state) { 2205 error_setg(errp, "Migration not in expected state: %s", 2206 MigrationStatus_str(s->state)); 2207 return; 2208 } 2209 qemu_sem_post(&s->pause_sem); 2210 } 2211 2212 void qmp_migrate_set_cache_size(int64_t value, Error **errp) 2213 { 2214 MigrateSetParameters p = { 2215 .has_xbzrle_cache_size = true, 2216 .xbzrle_cache_size = value, 2217 }; 2218 2219 qmp_migrate_set_parameters(&p, errp); 2220 } 2221 2222 int64_t qmp_query_migrate_cache_size(Error **errp) 2223 { 2224 return migrate_xbzrle_cache_size(); 2225 } 2226 2227 void qmp_migrate_set_speed(int64_t value, Error **errp) 2228 { 2229 MigrateSetParameters p = { 2230 .has_max_bandwidth = true, 2231 .max_bandwidth = value, 2232 }; 2233 2234 qmp_migrate_set_parameters(&p, errp); 2235 } 2236 2237 void qmp_migrate_set_downtime(double value, Error **errp) 2238 { 2239 if (value < 0 || value > MAX_MIGRATE_DOWNTIME_SECONDS) { 2240 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 2241 "downtime_limit", 2242 "an integer in the range of 0 to " 2243 stringify(MAX_MIGRATE_DOWNTIME_SECONDS)" seconds"); 2244 return; 2245 } 2246 2247 value *= 1000; /* Convert to milliseconds */ 2248 2249 MigrateSetParameters p = { 2250 .has_downtime_limit = true, 2251 .downtime_limit = (int64_t)value, 2252 }; 2253 2254 qmp_migrate_set_parameters(&p, errp); 2255 } 2256 2257 bool migrate_release_ram(void) 2258 { 2259 MigrationState *s; 2260 2261 s = migrate_get_current(); 2262 2263 return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; 2264 } 2265 2266 bool migrate_postcopy_ram(void) 2267 { 2268 MigrationState *s; 2269 2270 s = migrate_get_current(); 2271 2272 return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; 2273 } 2274 2275 bool migrate_postcopy(void) 2276 { 2277 return migrate_postcopy_ram() || migrate_dirty_bitmaps(); 2278 } 2279 2280 bool migrate_auto_converge(void) 2281 { 2282 MigrationState *s; 2283 2284 s = migrate_get_current(); 2285 2286 return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; 2287 } 2288 2289 bool migrate_zero_blocks(void) 2290 { 2291 MigrationState *s; 2292 2293 s = migrate_get_current(); 2294 2295 return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; 2296 } 2297 2298 bool migrate_postcopy_blocktime(void) 2299 { 2300 MigrationState *s; 2301 2302 s = migrate_get_current(); 2303 2304 return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; 2305 } 2306 2307 bool migrate_use_compression(void) 2308 { 2309 MigrationState *s; 2310 2311 s = migrate_get_current(); 2312 2313 return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS]; 2314 } 2315 2316 int migrate_compress_level(void) 2317 { 2318 MigrationState *s; 2319 2320 s = migrate_get_current(); 2321 2322 return s->parameters.compress_level; 2323 } 2324 2325 int migrate_compress_threads(void) 2326 { 2327 MigrationState *s; 2328 2329 s = migrate_get_current(); 2330 2331 return s->parameters.compress_threads; 2332 } 2333 2334 int migrate_compress_wait_thread(void) 2335 { 2336 MigrationState *s; 2337 2338 s = migrate_get_current(); 2339 2340 return s->parameters.compress_wait_thread; 2341 } 2342 2343 int migrate_decompress_threads(void) 2344 { 2345 MigrationState *s; 2346 2347 s = migrate_get_current(); 2348 2349 return s->parameters.decompress_threads; 2350 } 2351 2352 bool migrate_dirty_bitmaps(void) 2353 { 2354 MigrationState *s; 2355 2356 s = migrate_get_current(); 2357 2358 return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; 2359 } 2360 2361 bool migrate_ignore_shared(void) 2362 { 2363 MigrationState *s; 2364 2365 s = migrate_get_current(); 2366 2367 return s->enabled_capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; 2368 } 2369 2370 bool migrate_validate_uuid(void) 2371 { 2372 MigrationState *s; 2373 2374 s = migrate_get_current(); 2375 2376 return s->enabled_capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; 2377 } 2378 2379 bool migrate_use_events(void) 2380 { 2381 MigrationState *s; 2382 2383 s = migrate_get_current(); 2384 2385 return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS]; 2386 } 2387 2388 bool migrate_use_multifd(void) 2389 { 2390 MigrationState *s; 2391 2392 s = migrate_get_current(); 2393 2394 return s->enabled_capabilities[MIGRATION_CAPABILITY_MULTIFD]; 2395 } 2396 2397 bool migrate_pause_before_switchover(void) 2398 { 2399 MigrationState *s; 2400 2401 s = migrate_get_current(); 2402 2403 return s->enabled_capabilities[ 2404 MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; 2405 } 2406 2407 int migrate_multifd_channels(void) 2408 { 2409 MigrationState *s; 2410 2411 s = migrate_get_current(); 2412 2413 return s->parameters.multifd_channels; 2414 } 2415 2416 MultiFDCompression migrate_multifd_compression(void) 2417 { 2418 MigrationState *s; 2419 2420 s = migrate_get_current(); 2421 2422 return s->parameters.multifd_compression; 2423 } 2424 2425 int migrate_multifd_zlib_level(void) 2426 { 2427 MigrationState *s; 2428 2429 s = migrate_get_current(); 2430 2431 return s->parameters.multifd_zlib_level; 2432 } 2433 2434 int migrate_multifd_zstd_level(void) 2435 { 2436 MigrationState *s; 2437 2438 s = migrate_get_current(); 2439 2440 return s->parameters.multifd_zstd_level; 2441 } 2442 2443 int migrate_use_xbzrle(void) 2444 { 2445 MigrationState *s; 2446 2447 s = migrate_get_current(); 2448 2449 return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE]; 2450 } 2451 2452 int64_t migrate_xbzrle_cache_size(void) 2453 { 2454 MigrationState *s; 2455 2456 s = migrate_get_current(); 2457 2458 return s->parameters.xbzrle_cache_size; 2459 } 2460 2461 static int64_t migrate_max_postcopy_bandwidth(void) 2462 { 2463 MigrationState *s; 2464 2465 s = migrate_get_current(); 2466 2467 return s->parameters.max_postcopy_bandwidth; 2468 } 2469 2470 bool migrate_use_block(void) 2471 { 2472 MigrationState *s; 2473 2474 s = migrate_get_current(); 2475 2476 return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK]; 2477 } 2478 2479 bool migrate_use_return_path(void) 2480 { 2481 MigrationState *s; 2482 2483 s = migrate_get_current(); 2484 2485 return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; 2486 } 2487 2488 bool migrate_use_block_incremental(void) 2489 { 2490 MigrationState *s; 2491 2492 s = migrate_get_current(); 2493 2494 return s->parameters.block_incremental; 2495 } 2496 2497 /* migration thread support */ 2498 /* 2499 * Something bad happened to the RP stream, mark an error 2500 * The caller shall print or trace something to indicate why 2501 */ 2502 static void mark_source_rp_bad(MigrationState *s) 2503 { 2504 s->rp_state.error = true; 2505 } 2506 2507 static struct rp_cmd_args { 2508 ssize_t len; /* -1 = variable */ 2509 const char *name; 2510 } rp_cmd_args[] = { 2511 [MIG_RP_MSG_INVALID] = { .len = -1, .name = "INVALID" }, 2512 [MIG_RP_MSG_SHUT] = { .len = 4, .name = "SHUT" }, 2513 [MIG_RP_MSG_PONG] = { .len = 4, .name = "PONG" }, 2514 [MIG_RP_MSG_REQ_PAGES] = { .len = 12, .name = "REQ_PAGES" }, 2515 [MIG_RP_MSG_REQ_PAGES_ID] = { .len = -1, .name = "REQ_PAGES_ID" }, 2516 [MIG_RP_MSG_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" }, 2517 [MIG_RP_MSG_RESUME_ACK] = { .len = 4, .name = "RESUME_ACK" }, 2518 [MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" }, 2519 }; 2520 2521 /* 2522 * Process a request for pages received on the return path, 2523 * We're allowed to send more than requested (e.g. to round to our page size) 2524 * and we don't need to send pages that have already been sent. 2525 */ 2526 static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname, 2527 ram_addr_t start, size_t len) 2528 { 2529 long our_host_ps = qemu_real_host_page_size; 2530 2531 trace_migrate_handle_rp_req_pages(rbname, start, len); 2532 2533 /* 2534 * Since we currently insist on matching page sizes, just sanity check 2535 * we're being asked for whole host pages. 2536 */ 2537 if (start & (our_host_ps - 1) || 2538 (len & (our_host_ps - 1))) { 2539 error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT 2540 " len: %zd", __func__, start, len); 2541 mark_source_rp_bad(ms); 2542 return; 2543 } 2544 2545 if (ram_save_queue_pages(rbname, start, len)) { 2546 mark_source_rp_bad(ms); 2547 } 2548 } 2549 2550 /* Return true to retry, false to quit */ 2551 static bool postcopy_pause_return_path_thread(MigrationState *s) 2552 { 2553 trace_postcopy_pause_return_path(); 2554 2555 qemu_sem_wait(&s->postcopy_pause_rp_sem); 2556 2557 trace_postcopy_pause_return_path_continued(); 2558 2559 return true; 2560 } 2561 2562 static int migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name) 2563 { 2564 RAMBlock *block = qemu_ram_block_by_name(block_name); 2565 2566 if (!block) { 2567 error_report("%s: invalid block name '%s'", __func__, block_name); 2568 return -EINVAL; 2569 } 2570 2571 /* Fetch the received bitmap and refresh the dirty bitmap */ 2572 return ram_dirty_bitmap_reload(s, block); 2573 } 2574 2575 static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value) 2576 { 2577 trace_source_return_path_thread_resume_ack(value); 2578 2579 if (value != MIGRATION_RESUME_ACK_VALUE) { 2580 error_report("%s: illegal resume_ack value %"PRIu32, 2581 __func__, value); 2582 return -1; 2583 } 2584 2585 /* Now both sides are active. */ 2586 migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER, 2587 MIGRATION_STATUS_POSTCOPY_ACTIVE); 2588 2589 /* Notify send thread that time to continue send pages */ 2590 qemu_sem_post(&s->rp_state.rp_sem); 2591 2592 return 0; 2593 } 2594 2595 /* 2596 * Handles messages sent on the return path towards the source VM 2597 * 2598 */ 2599 static void *source_return_path_thread(void *opaque) 2600 { 2601 MigrationState *ms = opaque; 2602 QEMUFile *rp = ms->rp_state.from_dst_file; 2603 uint16_t header_len, header_type; 2604 uint8_t buf[512]; 2605 uint32_t tmp32, sibling_error; 2606 ram_addr_t start = 0; /* =0 to silence warning */ 2607 size_t len = 0, expected_len; 2608 int res; 2609 2610 trace_source_return_path_thread_entry(); 2611 rcu_register_thread(); 2612 2613 retry: 2614 while (!ms->rp_state.error && !qemu_file_get_error(rp) && 2615 migration_is_setup_or_active(ms->state)) { 2616 trace_source_return_path_thread_loop_top(); 2617 header_type = qemu_get_be16(rp); 2618 header_len = qemu_get_be16(rp); 2619 2620 if (qemu_file_get_error(rp)) { 2621 mark_source_rp_bad(ms); 2622 goto out; 2623 } 2624 2625 if (header_type >= MIG_RP_MSG_MAX || 2626 header_type == MIG_RP_MSG_INVALID) { 2627 error_report("RP: Received invalid message 0x%04x length 0x%04x", 2628 header_type, header_len); 2629 mark_source_rp_bad(ms); 2630 goto out; 2631 } 2632 2633 if ((rp_cmd_args[header_type].len != -1 && 2634 header_len != rp_cmd_args[header_type].len) || 2635 header_len > sizeof(buf)) { 2636 error_report("RP: Received '%s' message (0x%04x) with" 2637 "incorrect length %d expecting %zu", 2638 rp_cmd_args[header_type].name, header_type, header_len, 2639 (size_t)rp_cmd_args[header_type].len); 2640 mark_source_rp_bad(ms); 2641 goto out; 2642 } 2643 2644 /* We know we've got a valid header by this point */ 2645 res = qemu_get_buffer(rp, buf, header_len); 2646 if (res != header_len) { 2647 error_report("RP: Failed reading data for message 0x%04x" 2648 " read %d expected %d", 2649 header_type, res, header_len); 2650 mark_source_rp_bad(ms); 2651 goto out; 2652 } 2653 2654 /* OK, we have the message and the data */ 2655 switch (header_type) { 2656 case MIG_RP_MSG_SHUT: 2657 sibling_error = ldl_be_p(buf); 2658 trace_source_return_path_thread_shut(sibling_error); 2659 if (sibling_error) { 2660 error_report("RP: Sibling indicated error %d", sibling_error); 2661 mark_source_rp_bad(ms); 2662 } 2663 /* 2664 * We'll let the main thread deal with closing the RP 2665 * we could do a shutdown(2) on it, but we're the only user 2666 * anyway, so there's nothing gained. 2667 */ 2668 goto out; 2669 2670 case MIG_RP_MSG_PONG: 2671 tmp32 = ldl_be_p(buf); 2672 trace_source_return_path_thread_pong(tmp32); 2673 break; 2674 2675 case MIG_RP_MSG_REQ_PAGES: 2676 start = ldq_be_p(buf); 2677 len = ldl_be_p(buf + 8); 2678 migrate_handle_rp_req_pages(ms, NULL, start, len); 2679 break; 2680 2681 case MIG_RP_MSG_REQ_PAGES_ID: 2682 expected_len = 12 + 1; /* header + termination */ 2683 2684 if (header_len >= expected_len) { 2685 start = ldq_be_p(buf); 2686 len = ldl_be_p(buf + 8); 2687 /* Now we expect an idstr */ 2688 tmp32 = buf[12]; /* Length of the following idstr */ 2689 buf[13 + tmp32] = '\0'; 2690 expected_len += tmp32; 2691 } 2692 if (header_len != expected_len) { 2693 error_report("RP: Req_Page_id with length %d expecting %zd", 2694 header_len, expected_len); 2695 mark_source_rp_bad(ms); 2696 goto out; 2697 } 2698 migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len); 2699 break; 2700 2701 case MIG_RP_MSG_RECV_BITMAP: 2702 if (header_len < 1) { 2703 error_report("%s: missing block name", __func__); 2704 mark_source_rp_bad(ms); 2705 goto out; 2706 } 2707 /* Format: len (1B) + idstr (<255B). This ends the idstr. */ 2708 buf[buf[0] + 1] = '\0'; 2709 if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1))) { 2710 mark_source_rp_bad(ms); 2711 goto out; 2712 } 2713 break; 2714 2715 case MIG_RP_MSG_RESUME_ACK: 2716 tmp32 = ldl_be_p(buf); 2717 if (migrate_handle_rp_resume_ack(ms, tmp32)) { 2718 mark_source_rp_bad(ms); 2719 goto out; 2720 } 2721 break; 2722 2723 default: 2724 break; 2725 } 2726 } 2727 2728 out: 2729 res = qemu_file_get_error(rp); 2730 if (res) { 2731 if (res == -EIO && migration_in_postcopy()) { 2732 /* 2733 * Maybe there is something we can do: it looks like a 2734 * network down issue, and we pause for a recovery. 2735 */ 2736 if (postcopy_pause_return_path_thread(ms)) { 2737 /* Reload rp, reset the rest */ 2738 if (rp != ms->rp_state.from_dst_file) { 2739 qemu_fclose(rp); 2740 rp = ms->rp_state.from_dst_file; 2741 } 2742 ms->rp_state.error = false; 2743 goto retry; 2744 } 2745 } 2746 2747 trace_source_return_path_thread_bad_end(); 2748 mark_source_rp_bad(ms); 2749 } 2750 2751 trace_source_return_path_thread_end(); 2752 ms->rp_state.from_dst_file = NULL; 2753 qemu_fclose(rp); 2754 rcu_unregister_thread(); 2755 return NULL; 2756 } 2757 2758 static int open_return_path_on_source(MigrationState *ms, 2759 bool create_thread) 2760 { 2761 2762 ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file); 2763 if (!ms->rp_state.from_dst_file) { 2764 return -1; 2765 } 2766 2767 trace_open_return_path_on_source(); 2768 2769 if (!create_thread) { 2770 /* We're done */ 2771 return 0; 2772 } 2773 2774 qemu_thread_create(&ms->rp_state.rp_thread, "return path", 2775 source_return_path_thread, ms, QEMU_THREAD_JOINABLE); 2776 2777 trace_open_return_path_on_source_continue(); 2778 2779 return 0; 2780 } 2781 2782 /* Returns 0 if the RP was ok, otherwise there was an error on the RP */ 2783 static int await_return_path_close_on_source(MigrationState *ms) 2784 { 2785 /* 2786 * If this is a normal exit then the destination will send a SHUT and the 2787 * rp_thread will exit, however if there's an error we need to cause 2788 * it to exit. 2789 */ 2790 if (qemu_file_get_error(ms->to_dst_file) && ms->rp_state.from_dst_file) { 2791 /* 2792 * shutdown(2), if we have it, will cause it to unblock if it's stuck 2793 * waiting for the destination. 2794 */ 2795 qemu_file_shutdown(ms->rp_state.from_dst_file); 2796 mark_source_rp_bad(ms); 2797 } 2798 trace_await_return_path_close_on_source_joining(); 2799 qemu_thread_join(&ms->rp_state.rp_thread); 2800 trace_await_return_path_close_on_source_close(); 2801 return ms->rp_state.error; 2802 } 2803 2804 /* 2805 * Switch from normal iteration to postcopy 2806 * Returns non-0 on error 2807 */ 2808 static int postcopy_start(MigrationState *ms) 2809 { 2810 int ret; 2811 QIOChannelBuffer *bioc; 2812 QEMUFile *fb; 2813 int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 2814 int64_t bandwidth = migrate_max_postcopy_bandwidth(); 2815 bool restart_block = false; 2816 int cur_state = MIGRATION_STATUS_ACTIVE; 2817 if (!migrate_pause_before_switchover()) { 2818 migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE, 2819 MIGRATION_STATUS_POSTCOPY_ACTIVE); 2820 } 2821 2822 trace_postcopy_start(); 2823 qemu_mutex_lock_iothread(); 2824 trace_postcopy_start_set_run(); 2825 2826 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); 2827 global_state_store(); 2828 ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); 2829 if (ret < 0) { 2830 goto fail; 2831 } 2832 2833 ret = migration_maybe_pause(ms, &cur_state, 2834 MIGRATION_STATUS_POSTCOPY_ACTIVE); 2835 if (ret < 0) { 2836 goto fail; 2837 } 2838 2839 ret = bdrv_inactivate_all(); 2840 if (ret < 0) { 2841 goto fail; 2842 } 2843 restart_block = true; 2844 2845 /* 2846 * Cause any non-postcopiable, but iterative devices to 2847 * send out their final data. 2848 */ 2849 qemu_savevm_state_complete_precopy(ms->to_dst_file, true, false); 2850 2851 /* 2852 * in Finish migrate and with the io-lock held everything should 2853 * be quiet, but we've potentially still got dirty pages and we 2854 * need to tell the destination to throw any pages it's already received 2855 * that are dirty 2856 */ 2857 if (migrate_postcopy_ram()) { 2858 if (ram_postcopy_send_discard_bitmap(ms)) { 2859 error_report("postcopy send discard bitmap failed"); 2860 goto fail; 2861 } 2862 } 2863 2864 /* 2865 * send rest of state - note things that are doing postcopy 2866 * will notice we're in POSTCOPY_ACTIVE and not actually 2867 * wrap their state up here 2868 */ 2869 /* 0 max-postcopy-bandwidth means unlimited */ 2870 if (!bandwidth) { 2871 qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX); 2872 } else { 2873 qemu_file_set_rate_limit(ms->to_dst_file, bandwidth / XFER_LIMIT_RATIO); 2874 } 2875 if (migrate_postcopy_ram()) { 2876 /* Ping just for debugging, helps line traces up */ 2877 qemu_savevm_send_ping(ms->to_dst_file, 2); 2878 } 2879 2880 /* 2881 * While loading the device state we may trigger page transfer 2882 * requests and the fd must be free to process those, and thus 2883 * the destination must read the whole device state off the fd before 2884 * it starts processing it. Unfortunately the ad-hoc migration format 2885 * doesn't allow the destination to know the size to read without fully 2886 * parsing it through each devices load-state code (especially the open 2887 * coded devices that use get/put). 2888 * So we wrap the device state up in a package with a length at the start; 2889 * to do this we use a qemu_buf to hold the whole of the device state. 2890 */ 2891 bioc = qio_channel_buffer_new(4096); 2892 qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer"); 2893 fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc)); 2894 object_unref(OBJECT(bioc)); 2895 2896 /* 2897 * Make sure the receiver can get incoming pages before we send the rest 2898 * of the state 2899 */ 2900 qemu_savevm_send_postcopy_listen(fb); 2901 2902 qemu_savevm_state_complete_precopy(fb, false, false); 2903 if (migrate_postcopy_ram()) { 2904 qemu_savevm_send_ping(fb, 3); 2905 } 2906 2907 qemu_savevm_send_postcopy_run(fb); 2908 2909 /* <><> end of stuff going into the package */ 2910 2911 /* Last point of recovery; as soon as we send the package the destination 2912 * can open devices and potentially start running. 2913 * Lets just check again we've not got any errors. 2914 */ 2915 ret = qemu_file_get_error(ms->to_dst_file); 2916 if (ret) { 2917 error_report("postcopy_start: Migration stream errored (pre package)"); 2918 goto fail_closefb; 2919 } 2920 2921 restart_block = false; 2922 2923 /* Now send that blob */ 2924 if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) { 2925 goto fail_closefb; 2926 } 2927 qemu_fclose(fb); 2928 2929 /* Send a notify to give a chance for anything that needs to happen 2930 * at the transition to postcopy and after the device state; in particular 2931 * spice needs to trigger a transition now 2932 */ 2933 ms->postcopy_after_devices = true; 2934 notifier_list_notify(&migration_state_notifiers, ms); 2935 2936 ms->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop; 2937 2938 qemu_mutex_unlock_iothread(); 2939 2940 if (migrate_postcopy_ram()) { 2941 /* 2942 * Although this ping is just for debug, it could potentially be 2943 * used for getting a better measurement of downtime at the source. 2944 */ 2945 qemu_savevm_send_ping(ms->to_dst_file, 4); 2946 } 2947 2948 if (migrate_release_ram()) { 2949 ram_postcopy_migrated_memory_release(ms); 2950 } 2951 2952 ret = qemu_file_get_error(ms->to_dst_file); 2953 if (ret) { 2954 error_report("postcopy_start: Migration stream errored"); 2955 migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, 2956 MIGRATION_STATUS_FAILED); 2957 } 2958 2959 return ret; 2960 2961 fail_closefb: 2962 qemu_fclose(fb); 2963 fail: 2964 migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, 2965 MIGRATION_STATUS_FAILED); 2966 if (restart_block) { 2967 /* A failure happened early enough that we know the destination hasn't 2968 * accessed block devices, so we're safe to recover. 2969 */ 2970 Error *local_err = NULL; 2971 2972 bdrv_invalidate_cache_all(&local_err); 2973 if (local_err) { 2974 error_report_err(local_err); 2975 } 2976 } 2977 qemu_mutex_unlock_iothread(); 2978 return -1; 2979 } 2980 2981 /** 2982 * migration_maybe_pause: Pause if required to by 2983 * migrate_pause_before_switchover called with the iothread locked 2984 * Returns: 0 on success 2985 */ 2986 static int migration_maybe_pause(MigrationState *s, 2987 int *current_active_state, 2988 int new_state) 2989 { 2990 if (!migrate_pause_before_switchover()) { 2991 return 0; 2992 } 2993 2994 /* Since leaving this state is not atomic with posting the semaphore 2995 * it's possible that someone could have issued multiple migrate_continue 2996 * and the semaphore is incorrectly positive at this point; 2997 * the docs say it's undefined to reinit a semaphore that's already 2998 * init'd, so use timedwait to eat up any existing posts. 2999 */ 3000 while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) { 3001 /* This block intentionally left blank */ 3002 } 3003 3004 /* 3005 * If the migration is cancelled when it is in the completion phase, 3006 * the migration state is set to MIGRATION_STATUS_CANCELLING. 3007 * So we don't need to wait a semaphore, otherwise we would always 3008 * wait for the 'pause_sem' semaphore. 3009 */ 3010 if (s->state != MIGRATION_STATUS_CANCELLING) { 3011 qemu_mutex_unlock_iothread(); 3012 migrate_set_state(&s->state, *current_active_state, 3013 MIGRATION_STATUS_PRE_SWITCHOVER); 3014 qemu_sem_wait(&s->pause_sem); 3015 migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, 3016 new_state); 3017 *current_active_state = new_state; 3018 qemu_mutex_lock_iothread(); 3019 } 3020 3021 return s->state == new_state ? 0 : -EINVAL; 3022 } 3023 3024 /** 3025 * migration_completion: Used by migration_thread when there's not much left. 3026 * The caller 'breaks' the loop when this returns. 3027 * 3028 * @s: Current migration state 3029 */ 3030 static void migration_completion(MigrationState *s) 3031 { 3032 int ret; 3033 int current_active_state = s->state; 3034 3035 if (s->state == MIGRATION_STATUS_ACTIVE) { 3036 qemu_mutex_lock_iothread(); 3037 s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3038 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); 3039 s->vm_was_running = runstate_is_running(); 3040 ret = global_state_store(); 3041 3042 if (!ret) { 3043 bool inactivate = !migrate_colo_enabled(); 3044 ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); 3045 if (ret >= 0) { 3046 ret = migration_maybe_pause(s, ¤t_active_state, 3047 MIGRATION_STATUS_DEVICE); 3048 } 3049 if (ret >= 0) { 3050 qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); 3051 ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, 3052 inactivate); 3053 } 3054 if (inactivate && ret >= 0) { 3055 s->block_inactive = true; 3056 } 3057 } 3058 qemu_mutex_unlock_iothread(); 3059 3060 if (ret < 0) { 3061 goto fail; 3062 } 3063 } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 3064 trace_migration_completion_postcopy_end(); 3065 3066 qemu_savevm_state_complete_postcopy(s->to_dst_file); 3067 trace_migration_completion_postcopy_end_after_complete(); 3068 } else if (s->state == MIGRATION_STATUS_CANCELLING) { 3069 goto fail; 3070 } 3071 3072 /* 3073 * If rp was opened we must clean up the thread before 3074 * cleaning everything else up (since if there are no failures 3075 * it will wait for the destination to send it's status in 3076 * a SHUT command). 3077 */ 3078 if (s->rp_state.from_dst_file) { 3079 int rp_error; 3080 trace_migration_return_path_end_before(); 3081 rp_error = await_return_path_close_on_source(s); 3082 trace_migration_return_path_end_after(rp_error); 3083 if (rp_error) { 3084 goto fail_invalidate; 3085 } 3086 } 3087 3088 if (qemu_file_get_error(s->to_dst_file)) { 3089 trace_migration_completion_file_err(); 3090 goto fail_invalidate; 3091 } 3092 3093 if (!migrate_colo_enabled()) { 3094 migrate_set_state(&s->state, current_active_state, 3095 MIGRATION_STATUS_COMPLETED); 3096 } 3097 3098 return; 3099 3100 fail_invalidate: 3101 /* If not doing postcopy, vm_start() will be called: let's regain 3102 * control on images. 3103 */ 3104 if (s->state == MIGRATION_STATUS_ACTIVE || 3105 s->state == MIGRATION_STATUS_DEVICE) { 3106 Error *local_err = NULL; 3107 3108 qemu_mutex_lock_iothread(); 3109 bdrv_invalidate_cache_all(&local_err); 3110 if (local_err) { 3111 error_report_err(local_err); 3112 } else { 3113 s->block_inactive = false; 3114 } 3115 qemu_mutex_unlock_iothread(); 3116 } 3117 3118 fail: 3119 migrate_set_state(&s->state, current_active_state, 3120 MIGRATION_STATUS_FAILED); 3121 } 3122 3123 bool migrate_colo_enabled(void) 3124 { 3125 MigrationState *s = migrate_get_current(); 3126 return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO]; 3127 } 3128 3129 typedef enum MigThrError { 3130 /* No error detected */ 3131 MIG_THR_ERR_NONE = 0, 3132 /* Detected error, but resumed successfully */ 3133 MIG_THR_ERR_RECOVERED = 1, 3134 /* Detected fatal error, need to exit */ 3135 MIG_THR_ERR_FATAL = 2, 3136 } MigThrError; 3137 3138 static int postcopy_resume_handshake(MigrationState *s) 3139 { 3140 qemu_savevm_send_postcopy_resume(s->to_dst_file); 3141 3142 while (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { 3143 qemu_sem_wait(&s->rp_state.rp_sem); 3144 } 3145 3146 if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 3147 return 0; 3148 } 3149 3150 return -1; 3151 } 3152 3153 /* Return zero if success, or <0 for error */ 3154 static int postcopy_do_resume(MigrationState *s) 3155 { 3156 int ret; 3157 3158 /* 3159 * Call all the resume_prepare() hooks, so that modules can be 3160 * ready for the migration resume. 3161 */ 3162 ret = qemu_savevm_state_resume_prepare(s); 3163 if (ret) { 3164 error_report("%s: resume_prepare() failure detected: %d", 3165 __func__, ret); 3166 return ret; 3167 } 3168 3169 /* 3170 * Last handshake with destination on the resume (destination will 3171 * switch to postcopy-active afterwards) 3172 */ 3173 ret = postcopy_resume_handshake(s); 3174 if (ret) { 3175 error_report("%s: handshake failed: %d", __func__, ret); 3176 return ret; 3177 } 3178 3179 return 0; 3180 } 3181 3182 /* 3183 * We don't return until we are in a safe state to continue current 3184 * postcopy migration. Returns MIG_THR_ERR_RECOVERED if recovered, or 3185 * MIG_THR_ERR_FATAL if unrecovery failure happened. 3186 */ 3187 static MigThrError postcopy_pause(MigrationState *s) 3188 { 3189 assert(s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); 3190 3191 while (true) { 3192 QEMUFile *file; 3193 3194 /* Current channel is possibly broken. Release it. */ 3195 assert(s->to_dst_file); 3196 qemu_mutex_lock(&s->qemu_file_lock); 3197 file = s->to_dst_file; 3198 s->to_dst_file = NULL; 3199 qemu_mutex_unlock(&s->qemu_file_lock); 3200 3201 qemu_file_shutdown(file); 3202 qemu_fclose(file); 3203 3204 migrate_set_state(&s->state, s->state, 3205 MIGRATION_STATUS_POSTCOPY_PAUSED); 3206 3207 error_report("Detected IO failure for postcopy. " 3208 "Migration paused."); 3209 3210 /* 3211 * We wait until things fixed up. Then someone will setup the 3212 * status back for us. 3213 */ 3214 while (s->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { 3215 qemu_sem_wait(&s->postcopy_pause_sem); 3216 } 3217 3218 if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { 3219 /* Woken up by a recover procedure. Give it a shot */ 3220 3221 /* 3222 * Firstly, let's wake up the return path now, with a new 3223 * return path channel. 3224 */ 3225 qemu_sem_post(&s->postcopy_pause_rp_sem); 3226 3227 /* Do the resume logic */ 3228 if (postcopy_do_resume(s) == 0) { 3229 /* Let's continue! */ 3230 trace_postcopy_pause_continued(); 3231 return MIG_THR_ERR_RECOVERED; 3232 } else { 3233 /* 3234 * Something wrong happened during the recovery, let's 3235 * pause again. Pause is always better than throwing 3236 * data away. 3237 */ 3238 continue; 3239 } 3240 } else { 3241 /* This is not right... Time to quit. */ 3242 return MIG_THR_ERR_FATAL; 3243 } 3244 } 3245 } 3246 3247 static MigThrError migration_detect_error(MigrationState *s) 3248 { 3249 int ret; 3250 int state = s->state; 3251 Error *local_error = NULL; 3252 3253 if (state == MIGRATION_STATUS_CANCELLING || 3254 state == MIGRATION_STATUS_CANCELLED) { 3255 /* End the migration, but don't set the state to failed */ 3256 return MIG_THR_ERR_FATAL; 3257 } 3258 3259 /* Try to detect any file errors */ 3260 ret = qemu_file_get_error_obj(s->to_dst_file, &local_error); 3261 if (!ret) { 3262 /* Everything is fine */ 3263 assert(!local_error); 3264 return MIG_THR_ERR_NONE; 3265 } 3266 3267 if (local_error) { 3268 migrate_set_error(s, local_error); 3269 error_free(local_error); 3270 } 3271 3272 if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret == -EIO) { 3273 /* 3274 * For postcopy, we allow the network to be down for a 3275 * while. After that, it can be continued by a 3276 * recovery phase. 3277 */ 3278 return postcopy_pause(s); 3279 } else { 3280 /* 3281 * For precopy (or postcopy with error outside IO), we fail 3282 * with no time. 3283 */ 3284 migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED); 3285 trace_migration_thread_file_err(); 3286 3287 /* Time to stop the migration, now. */ 3288 return MIG_THR_ERR_FATAL; 3289 } 3290 } 3291 3292 /* How many bytes have we transferred since the beginning of the migration */ 3293 static uint64_t migration_total_bytes(MigrationState *s) 3294 { 3295 return qemu_ftell(s->to_dst_file) + ram_counters.multifd_bytes; 3296 } 3297 3298 static void migration_calculate_complete(MigrationState *s) 3299 { 3300 uint64_t bytes = migration_total_bytes(s); 3301 int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3302 int64_t transfer_time; 3303 3304 s->total_time = end_time - s->start_time; 3305 if (!s->downtime) { 3306 /* 3307 * It's still not set, so we are precopy migration. For 3308 * postcopy, downtime is calculated during postcopy_start(). 3309 */ 3310 s->downtime = end_time - s->downtime_start; 3311 } 3312 3313 transfer_time = s->total_time - s->setup_time; 3314 if (transfer_time) { 3315 s->mbps = ((double) bytes * 8.0) / transfer_time / 1000; 3316 } 3317 } 3318 3319 static void update_iteration_initial_status(MigrationState *s) 3320 { 3321 /* 3322 * Update these three fields at the same time to avoid mismatch info lead 3323 * wrong speed calculation. 3324 */ 3325 s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3326 s->iteration_initial_bytes = migration_total_bytes(s); 3327 s->iteration_initial_pages = ram_get_total_transferred_pages(); 3328 } 3329 3330 static void migration_update_counters(MigrationState *s, 3331 int64_t current_time) 3332 { 3333 uint64_t transferred, transferred_pages, time_spent; 3334 uint64_t current_bytes; /* bytes transferred since the beginning */ 3335 double bandwidth; 3336 3337 if (current_time < s->iteration_start_time + BUFFER_DELAY) { 3338 return; 3339 } 3340 3341 current_bytes = migration_total_bytes(s); 3342 transferred = current_bytes - s->iteration_initial_bytes; 3343 time_spent = current_time - s->iteration_start_time; 3344 bandwidth = (double)transferred / time_spent; 3345 s->threshold_size = bandwidth * s->parameters.downtime_limit; 3346 3347 s->mbps = (((double) transferred * 8.0) / 3348 ((double) time_spent / 1000.0)) / 1000.0 / 1000.0; 3349 3350 transferred_pages = ram_get_total_transferred_pages() - 3351 s->iteration_initial_pages; 3352 s->pages_per_second = (double) transferred_pages / 3353 (((double) time_spent / 1000.0)); 3354 3355 /* 3356 * if we haven't sent anything, we don't want to 3357 * recalculate. 10000 is a small enough number for our purposes 3358 */ 3359 if (ram_counters.dirty_pages_rate && transferred > 10000) { 3360 s->expected_downtime = ram_counters.remaining / bandwidth; 3361 } 3362 3363 qemu_file_reset_rate_limit(s->to_dst_file); 3364 3365 update_iteration_initial_status(s); 3366 3367 trace_migrate_transferred(transferred, time_spent, 3368 bandwidth, s->threshold_size); 3369 } 3370 3371 /* Migration thread iteration status */ 3372 typedef enum { 3373 MIG_ITERATE_RESUME, /* Resume current iteration */ 3374 MIG_ITERATE_SKIP, /* Skip current iteration */ 3375 MIG_ITERATE_BREAK, /* Break the loop */ 3376 } MigIterateState; 3377 3378 /* 3379 * Return true if continue to the next iteration directly, false 3380 * otherwise. 3381 */ 3382 static MigIterateState migration_iteration_run(MigrationState *s) 3383 { 3384 uint64_t pending_size, pend_pre, pend_compat, pend_post; 3385 bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE; 3386 3387 qemu_savevm_state_pending(s->to_dst_file, s->threshold_size, &pend_pre, 3388 &pend_compat, &pend_post); 3389 pending_size = pend_pre + pend_compat + pend_post; 3390 3391 trace_migrate_pending(pending_size, s->threshold_size, 3392 pend_pre, pend_compat, pend_post); 3393 3394 if (pending_size && pending_size >= s->threshold_size) { 3395 /* Still a significant amount to transfer */ 3396 if (!in_postcopy && pend_pre <= s->threshold_size && 3397 qatomic_read(&s->start_postcopy)) { 3398 if (postcopy_start(s)) { 3399 error_report("%s: postcopy failed to start", __func__); 3400 } 3401 return MIG_ITERATE_SKIP; 3402 } 3403 /* Just another iteration step */ 3404 qemu_savevm_state_iterate(s->to_dst_file, in_postcopy); 3405 } else { 3406 trace_migration_thread_low_pending(pending_size); 3407 migration_completion(s); 3408 return MIG_ITERATE_BREAK; 3409 } 3410 3411 return MIG_ITERATE_RESUME; 3412 } 3413 3414 static void migration_iteration_finish(MigrationState *s) 3415 { 3416 /* If we enabled cpu throttling for auto-converge, turn it off. */ 3417 cpu_throttle_stop(); 3418 3419 qemu_mutex_lock_iothread(); 3420 switch (s->state) { 3421 case MIGRATION_STATUS_COMPLETED: 3422 migration_calculate_complete(s); 3423 runstate_set(RUN_STATE_POSTMIGRATE); 3424 break; 3425 3426 case MIGRATION_STATUS_ACTIVE: 3427 /* 3428 * We should really assert here, but since it's during 3429 * migration, let's try to reduce the usage of assertions. 3430 */ 3431 if (!migrate_colo_enabled()) { 3432 error_report("%s: critical error: calling COLO code without " 3433 "COLO enabled", __func__); 3434 } 3435 migrate_start_colo_process(s); 3436 /* 3437 * Fixme: we will run VM in COLO no matter its old running state. 3438 * After exited COLO, we will keep running. 3439 */ 3440 s->vm_was_running = true; 3441 /* Fallthrough */ 3442 case MIGRATION_STATUS_FAILED: 3443 case MIGRATION_STATUS_CANCELLED: 3444 case MIGRATION_STATUS_CANCELLING: 3445 if (s->vm_was_running) { 3446 vm_start(); 3447 } else { 3448 if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { 3449 runstate_set(RUN_STATE_POSTMIGRATE); 3450 } 3451 } 3452 break; 3453 3454 default: 3455 /* Should not reach here, but if so, forgive the VM. */ 3456 error_report("%s: Unknown ending state %d", __func__, s->state); 3457 break; 3458 } 3459 migrate_fd_cleanup_schedule(s); 3460 qemu_mutex_unlock_iothread(); 3461 } 3462 3463 void migration_make_urgent_request(void) 3464 { 3465 qemu_sem_post(&migrate_get_current()->rate_limit_sem); 3466 } 3467 3468 void migration_consume_urgent_request(void) 3469 { 3470 qemu_sem_wait(&migrate_get_current()->rate_limit_sem); 3471 } 3472 3473 /* Returns true if the rate limiting was broken by an urgent request */ 3474 bool migration_rate_limit(void) 3475 { 3476 int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3477 MigrationState *s = migrate_get_current(); 3478 3479 bool urgent = false; 3480 migration_update_counters(s, now); 3481 if (qemu_file_rate_limit(s->to_dst_file)) { 3482 3483 if (qemu_file_get_error(s->to_dst_file)) { 3484 return false; 3485 } 3486 /* 3487 * Wait for a delay to do rate limiting OR 3488 * something urgent to post the semaphore. 3489 */ 3490 int ms = s->iteration_start_time + BUFFER_DELAY - now; 3491 trace_migration_rate_limit_pre(ms); 3492 if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { 3493 /* 3494 * We were woken by one or more urgent things but 3495 * the timedwait will have consumed one of them. 3496 * The service routine for the urgent wake will dec 3497 * the semaphore itself for each item it consumes, 3498 * so add this one we just eat back. 3499 */ 3500 qemu_sem_post(&s->rate_limit_sem); 3501 urgent = true; 3502 } 3503 trace_migration_rate_limit_post(urgent); 3504 } 3505 return urgent; 3506 } 3507 3508 /* 3509 * Master migration thread on the source VM. 3510 * It drives the migration and pumps the data down the outgoing channel. 3511 */ 3512 static void *migration_thread(void *opaque) 3513 { 3514 MigrationState *s = opaque; 3515 int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST); 3516 MigThrError thr_error; 3517 bool urgent = false; 3518 3519 rcu_register_thread(); 3520 3521 object_ref(OBJECT(s)); 3522 update_iteration_initial_status(s); 3523 3524 qemu_savevm_state_header(s->to_dst_file); 3525 3526 /* 3527 * If we opened the return path, we need to make sure dst has it 3528 * opened as well. 3529 */ 3530 if (s->rp_state.from_dst_file) { 3531 /* Now tell the dest that it should open its end so it can reply */ 3532 qemu_savevm_send_open_return_path(s->to_dst_file); 3533 3534 /* And do a ping that will make stuff easier to debug */ 3535 qemu_savevm_send_ping(s->to_dst_file, 1); 3536 } 3537 3538 if (migrate_postcopy()) { 3539 /* 3540 * Tell the destination that we *might* want to do postcopy later; 3541 * if the other end can't do postcopy it should fail now, nice and 3542 * early. 3543 */ 3544 qemu_savevm_send_postcopy_advise(s->to_dst_file); 3545 } 3546 3547 if (migrate_colo_enabled()) { 3548 /* Notify migration destination that we enable COLO */ 3549 qemu_savevm_send_colo_enable(s->to_dst_file); 3550 } 3551 3552 qemu_savevm_state_setup(s->to_dst_file); 3553 3554 if (qemu_savevm_state_guest_unplug_pending()) { 3555 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 3556 MIGRATION_STATUS_WAIT_UNPLUG); 3557 3558 while (s->state == MIGRATION_STATUS_WAIT_UNPLUG && 3559 qemu_savevm_state_guest_unplug_pending()) { 3560 qemu_sem_timedwait(&s->wait_unplug_sem, 250); 3561 } 3562 3563 migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, 3564 MIGRATION_STATUS_ACTIVE); 3565 } 3566 3567 s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; 3568 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 3569 MIGRATION_STATUS_ACTIVE); 3570 3571 trace_migration_thread_setup_complete(); 3572 3573 while (migration_is_active(s)) { 3574 if (urgent || !qemu_file_rate_limit(s->to_dst_file)) { 3575 MigIterateState iter_state = migration_iteration_run(s); 3576 if (iter_state == MIG_ITERATE_SKIP) { 3577 continue; 3578 } else if (iter_state == MIG_ITERATE_BREAK) { 3579 break; 3580 } 3581 } 3582 3583 /* 3584 * Try to detect any kind of failures, and see whether we 3585 * should stop the migration now. 3586 */ 3587 thr_error = migration_detect_error(s); 3588 if (thr_error == MIG_THR_ERR_FATAL) { 3589 /* Stop migration */ 3590 break; 3591 } else if (thr_error == MIG_THR_ERR_RECOVERED) { 3592 /* 3593 * Just recovered from a e.g. network failure, reset all 3594 * the local variables. This is important to avoid 3595 * breaking transferred_bytes and bandwidth calculation 3596 */ 3597 update_iteration_initial_status(s); 3598 } 3599 3600 urgent = migration_rate_limit(); 3601 } 3602 3603 trace_migration_thread_after_loop(); 3604 migration_iteration_finish(s); 3605 object_unref(OBJECT(s)); 3606 rcu_unregister_thread(); 3607 return NULL; 3608 } 3609 3610 void migrate_fd_connect(MigrationState *s, Error *error_in) 3611 { 3612 Error *local_err = NULL; 3613 int64_t rate_limit; 3614 bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED; 3615 3616 s->expected_downtime = s->parameters.downtime_limit; 3617 if (resume) { 3618 assert(s->cleanup_bh); 3619 } else { 3620 assert(!s->cleanup_bh); 3621 s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup_bh, s); 3622 } 3623 if (error_in) { 3624 migrate_fd_error(s, error_in); 3625 migrate_fd_cleanup(s); 3626 return; 3627 } 3628 3629 if (resume) { 3630 /* This is a resumed migration */ 3631 rate_limit = s->parameters.max_postcopy_bandwidth / 3632 XFER_LIMIT_RATIO; 3633 } else { 3634 /* This is a fresh new migration */ 3635 rate_limit = s->parameters.max_bandwidth / XFER_LIMIT_RATIO; 3636 3637 /* Notify before starting migration thread */ 3638 notifier_list_notify(&migration_state_notifiers, s); 3639 } 3640 3641 qemu_file_set_rate_limit(s->to_dst_file, rate_limit); 3642 qemu_file_set_blocking(s->to_dst_file, true); 3643 3644 /* 3645 * Open the return path. For postcopy, it is used exclusively. For 3646 * precopy, only if user specified "return-path" capability would 3647 * QEMU uses the return path. 3648 */ 3649 if (migrate_postcopy_ram() || migrate_use_return_path()) { 3650 if (open_return_path_on_source(s, !resume)) { 3651 error_report("Unable to open return-path for postcopy"); 3652 migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); 3653 migrate_fd_cleanup(s); 3654 return; 3655 } 3656 } 3657 3658 if (resume) { 3659 /* Wakeup the main migration thread to do the recovery */ 3660 migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED, 3661 MIGRATION_STATUS_POSTCOPY_RECOVER); 3662 qemu_sem_post(&s->postcopy_pause_sem); 3663 return; 3664 } 3665 3666 if (multifd_save_setup(&local_err) != 0) { 3667 error_report_err(local_err); 3668 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 3669 MIGRATION_STATUS_FAILED); 3670 migrate_fd_cleanup(s); 3671 return; 3672 } 3673 qemu_thread_create(&s->thread, "live_migration", migration_thread, s, 3674 QEMU_THREAD_JOINABLE); 3675 s->migration_thread_running = true; 3676 } 3677 3678 void migration_global_dump(Monitor *mon) 3679 { 3680 MigrationState *ms = migrate_get_current(); 3681 3682 monitor_printf(mon, "globals:\n"); 3683 monitor_printf(mon, "store-global-state: %s\n", 3684 ms->store_global_state ? "on" : "off"); 3685 monitor_printf(mon, "only-migratable: %s\n", 3686 only_migratable ? "on" : "off"); 3687 monitor_printf(mon, "send-configuration: %s\n", 3688 ms->send_configuration ? "on" : "off"); 3689 monitor_printf(mon, "send-section-footer: %s\n", 3690 ms->send_section_footer ? "on" : "off"); 3691 monitor_printf(mon, "decompress-error-check: %s\n", 3692 ms->decompress_error_check ? "on" : "off"); 3693 monitor_printf(mon, "clear-bitmap-shift: %u\n", 3694 ms->clear_bitmap_shift); 3695 } 3696 3697 #define DEFINE_PROP_MIG_CAP(name, x) \ 3698 DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false) 3699 3700 static Property migration_properties[] = { 3701 DEFINE_PROP_BOOL("store-global-state", MigrationState, 3702 store_global_state, true), 3703 DEFINE_PROP_BOOL("send-configuration", MigrationState, 3704 send_configuration, true), 3705 DEFINE_PROP_BOOL("send-section-footer", MigrationState, 3706 send_section_footer, true), 3707 DEFINE_PROP_BOOL("decompress-error-check", MigrationState, 3708 decompress_error_check, true), 3709 DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, 3710 clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), 3711 3712 /* Migration parameters */ 3713 DEFINE_PROP_UINT8("x-compress-level", MigrationState, 3714 parameters.compress_level, 3715 DEFAULT_MIGRATE_COMPRESS_LEVEL), 3716 DEFINE_PROP_UINT8("x-compress-threads", MigrationState, 3717 parameters.compress_threads, 3718 DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT), 3719 DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState, 3720 parameters.compress_wait_thread, true), 3721 DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, 3722 parameters.decompress_threads, 3723 DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), 3724 DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, 3725 parameters.throttle_trigger_threshold, 3726 DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD), 3727 DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState, 3728 parameters.cpu_throttle_initial, 3729 DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), 3730 DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState, 3731 parameters.cpu_throttle_increment, 3732 DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), 3733 DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState, 3734 parameters.cpu_throttle_tailslow, false), 3735 DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState, 3736 parameters.max_bandwidth, MAX_THROTTLE), 3737 DEFINE_PROP_UINT64("x-downtime-limit", MigrationState, 3738 parameters.downtime_limit, 3739 DEFAULT_MIGRATE_SET_DOWNTIME), 3740 DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState, 3741 parameters.x_checkpoint_delay, 3742 DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), 3743 DEFINE_PROP_UINT8("multifd-channels", MigrationState, 3744 parameters.multifd_channels, 3745 DEFAULT_MIGRATE_MULTIFD_CHANNELS), 3746 DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState, 3747 parameters.multifd_compression, 3748 DEFAULT_MIGRATE_MULTIFD_COMPRESSION), 3749 DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState, 3750 parameters.multifd_zlib_level, 3751 DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL), 3752 DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, 3753 parameters.multifd_zstd_level, 3754 DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), 3755 DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, 3756 parameters.xbzrle_cache_size, 3757 DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), 3758 DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState, 3759 parameters.max_postcopy_bandwidth, 3760 DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH), 3761 DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState, 3762 parameters.max_cpu_throttle, 3763 DEFAULT_MIGRATE_MAX_CPU_THROTTLE), 3764 DEFINE_PROP_SIZE("announce-initial", MigrationState, 3765 parameters.announce_initial, 3766 DEFAULT_MIGRATE_ANNOUNCE_INITIAL), 3767 DEFINE_PROP_SIZE("announce-max", MigrationState, 3768 parameters.announce_max, 3769 DEFAULT_MIGRATE_ANNOUNCE_MAX), 3770 DEFINE_PROP_SIZE("announce-rounds", MigrationState, 3771 parameters.announce_rounds, 3772 DEFAULT_MIGRATE_ANNOUNCE_ROUNDS), 3773 DEFINE_PROP_SIZE("announce-step", MigrationState, 3774 parameters.announce_step, 3775 DEFAULT_MIGRATE_ANNOUNCE_STEP), 3776 3777 /* Migration capabilities */ 3778 DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), 3779 DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL), 3780 DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE), 3781 DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS), 3782 DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS), 3783 DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS), 3784 DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM), 3785 DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO), 3786 DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM), 3787 DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), 3788 DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), 3789 DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), 3790 3791 DEFINE_PROP_END_OF_LIST(), 3792 }; 3793 3794 static void migration_class_init(ObjectClass *klass, void *data) 3795 { 3796 DeviceClass *dc = DEVICE_CLASS(klass); 3797 3798 dc->user_creatable = false; 3799 device_class_set_props(dc, migration_properties); 3800 } 3801 3802 static void migration_instance_finalize(Object *obj) 3803 { 3804 MigrationState *ms = MIGRATION_OBJ(obj); 3805 MigrationParameters *params = &ms->parameters; 3806 3807 qemu_mutex_destroy(&ms->error_mutex); 3808 qemu_mutex_destroy(&ms->qemu_file_lock); 3809 g_free(params->tls_hostname); 3810 g_free(params->tls_creds); 3811 qemu_sem_destroy(&ms->wait_unplug_sem); 3812 qemu_sem_destroy(&ms->rate_limit_sem); 3813 qemu_sem_destroy(&ms->pause_sem); 3814 qemu_sem_destroy(&ms->postcopy_pause_sem); 3815 qemu_sem_destroy(&ms->postcopy_pause_rp_sem); 3816 qemu_sem_destroy(&ms->rp_state.rp_sem); 3817 error_free(ms->error); 3818 } 3819 3820 static void migration_instance_init(Object *obj) 3821 { 3822 MigrationState *ms = MIGRATION_OBJ(obj); 3823 MigrationParameters *params = &ms->parameters; 3824 3825 ms->state = MIGRATION_STATUS_NONE; 3826 ms->mbps = -1; 3827 ms->pages_per_second = -1; 3828 qemu_sem_init(&ms->pause_sem, 0); 3829 qemu_mutex_init(&ms->error_mutex); 3830 3831 params->tls_hostname = g_strdup(""); 3832 params->tls_creds = g_strdup(""); 3833 3834 /* Set has_* up only for parameter checks */ 3835 params->has_compress_level = true; 3836 params->has_compress_threads = true; 3837 params->has_decompress_threads = true; 3838 params->has_throttle_trigger_threshold = true; 3839 params->has_cpu_throttle_initial = true; 3840 params->has_cpu_throttle_increment = true; 3841 params->has_cpu_throttle_tailslow = true; 3842 params->has_max_bandwidth = true; 3843 params->has_downtime_limit = true; 3844 params->has_x_checkpoint_delay = true; 3845 params->has_block_incremental = true; 3846 params->has_multifd_channels = true; 3847 params->has_multifd_compression = true; 3848 params->has_multifd_zlib_level = true; 3849 params->has_multifd_zstd_level = true; 3850 params->has_xbzrle_cache_size = true; 3851 params->has_max_postcopy_bandwidth = true; 3852 params->has_max_cpu_throttle = true; 3853 params->has_announce_initial = true; 3854 params->has_announce_max = true; 3855 params->has_announce_rounds = true; 3856 params->has_announce_step = true; 3857 3858 qemu_sem_init(&ms->postcopy_pause_sem, 0); 3859 qemu_sem_init(&ms->postcopy_pause_rp_sem, 0); 3860 qemu_sem_init(&ms->rp_state.rp_sem, 0); 3861 qemu_sem_init(&ms->rate_limit_sem, 0); 3862 qemu_sem_init(&ms->wait_unplug_sem, 0); 3863 qemu_mutex_init(&ms->qemu_file_lock); 3864 } 3865 3866 /* 3867 * Return true if check pass, false otherwise. Error will be put 3868 * inside errp if provided. 3869 */ 3870 static bool migration_object_check(MigrationState *ms, Error **errp) 3871 { 3872 MigrationCapabilityStatusList *head = NULL; 3873 /* Assuming all off */ 3874 bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret; 3875 int i; 3876 3877 if (!migrate_params_check(&ms->parameters, errp)) { 3878 return false; 3879 } 3880 3881 for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { 3882 if (ms->enabled_capabilities[i]) { 3883 head = migrate_cap_add(head, i, true); 3884 } 3885 } 3886 3887 ret = migrate_caps_check(cap_list, head, errp); 3888 3889 /* It works with head == NULL */ 3890 qapi_free_MigrationCapabilityStatusList(head); 3891 3892 return ret; 3893 } 3894 3895 static const TypeInfo migration_type = { 3896 .name = TYPE_MIGRATION, 3897 /* 3898 * NOTE: TYPE_MIGRATION is not really a device, as the object is 3899 * not created using qdev_new(), it is not attached to the qdev 3900 * device tree, and it is never realized. 3901 * 3902 * TODO: Make this TYPE_OBJECT once QOM provides something like 3903 * TYPE_DEVICE's "-global" properties. 3904 */ 3905 .parent = TYPE_DEVICE, 3906 .class_init = migration_class_init, 3907 .class_size = sizeof(MigrationClass), 3908 .instance_size = sizeof(MigrationState), 3909 .instance_init = migration_instance_init, 3910 .instance_finalize = migration_instance_finalize, 3911 }; 3912 3913 static void register_migration_types(void) 3914 { 3915 type_register_static(&migration_type); 3916 } 3917 3918 type_init(register_migration_types); 3919