1 /* 2 * QEMU live migration 3 * 4 * Copyright IBM, Corp. 2008 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 * Contributions after 2012-01-13 are licensed under the terms of the 13 * GNU GPL, version 2 or (at your option) any later version. 14 */ 15 16 #include "qemu/osdep.h" 17 #include "qemu/ctype.h" 18 #include "qemu/cutils.h" 19 #include "qemu/error-report.h" 20 #include "qemu/main-loop.h" 21 #include "migration/blocker.h" 22 #include "exec.h" 23 #include "fd.h" 24 #include "file.h" 25 #include "socket.h" 26 #include "system/runstate.h" 27 #include "system/system.h" 28 #include "system/cpu-throttle.h" 29 #include "rdma.h" 30 #include "ram.h" 31 #include "migration/cpr.h" 32 #include "migration/global_state.h" 33 #include "migration/misc.h" 34 #include "migration.h" 35 #include "migration-stats.h" 36 #include "savevm.h" 37 #include "qemu-file.h" 38 #include "channel.h" 39 #include "migration/vmstate.h" 40 #include "block/block.h" 41 #include "qapi/error.h" 42 #include "qapi/clone-visitor.h" 43 #include "qapi/qapi-visit-migration.h" 44 #include "qapi/qapi-visit-sockets.h" 45 #include "qapi/qapi-commands-migration.h" 46 #include "qapi/qapi-events-migration.h" 47 #include "qapi/qmp/qerror.h" 48 #include "qobject/qnull.h" 49 #include "qemu/rcu.h" 50 #include "postcopy-ram.h" 51 #include "qemu/thread.h" 52 #include "trace.h" 53 #include "exec/target_page.h" 54 #include "io/channel-buffer.h" 55 #include "io/channel-tls.h" 56 #include "migration/colo.h" 57 #include "hw/boards.h" 58 #include "monitor/monitor.h" 59 #include "net/announce.h" 60 #include "qemu/queue.h" 61 #include "multifd.h" 62 #include "threadinfo.h" 63 #include "qemu/yank.h" 64 #include "system/cpus.h" 65 #include "yank_functions.h" 66 #include "system/qtest.h" 67 #include "options.h" 68 #include "system/dirtylimit.h" 69 #include "qemu/sockets.h" 70 #include "system/kvm.h" 71 72 #define NOTIFIER_ELEM_INIT(array, elem) \ 73 [elem] = NOTIFIER_WITH_RETURN_LIST_INITIALIZER((array)[elem]) 74 75 #define INMIGRATE_DEFAULT_EXIT_ON_ERROR true 76 77 static GSList *migration_state_notifiers[MIG_MODE__MAX]; 78 79 /* Messages sent on the return path from destination to source */ 80 enum mig_rp_message_type { 81 MIG_RP_MSG_INVALID = 0, /* Must be 0 */ 82 MIG_RP_MSG_SHUT, /* sibling will not send any more RP messages */ 83 MIG_RP_MSG_PONG, /* Response to a PING; data (seq: be32 ) */ 84 85 MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */ 86 MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */ 87 MIG_RP_MSG_RECV_BITMAP, /* send recved_bitmap back to source */ 88 MIG_RP_MSG_RESUME_ACK, /* tell source that we are ready to resume */ 89 MIG_RP_MSG_SWITCHOVER_ACK, /* Tell source it's OK to do switchover */ 90 91 MIG_RP_MSG_MAX 92 }; 93 94 /* Migration channel types */ 95 enum { CH_MAIN, CH_MULTIFD, CH_POSTCOPY }; 96 97 /* When we add fault tolerance, we could have several 98 migrations at once. For now we don't need to add 99 dynamic creation of migration */ 100 101 static MigrationState *current_migration; 102 static MigrationIncomingState *current_incoming; 103 104 static GSList *migration_blockers[MIG_MODE__MAX]; 105 106 static bool migration_object_check(MigrationState *ms, Error **errp); 107 static bool migration_switchover_start(MigrationState *s, Error **errp); 108 static bool close_return_path_on_source(MigrationState *s); 109 static void migration_completion_end(MigrationState *s); 110 static void migrate_hup_delete(MigrationState *s); 111 112 static void migration_downtime_start(MigrationState *s) 113 { 114 trace_vmstate_downtime_checkpoint("src-downtime-start"); 115 s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 116 } 117 118 /* 119 * This is unfortunate: incoming migration actually needs the outgoing 120 * migration state (MigrationState) to be there too, e.g. to query 121 * capabilities, parameters, using locks, setup errors, etc. 122 * 123 * NOTE: when calling this, making sure current_migration exists and not 124 * been freed yet! Otherwise trying to access the refcount is already 125 * an use-after-free itself.. 126 * 127 * TODO: Move shared part of incoming / outgoing out into separate object. 128 * Then this is not needed. 129 */ 130 static void migrate_incoming_ref_outgoing_state(void) 131 { 132 object_ref(migrate_get_current()); 133 } 134 static void migrate_incoming_unref_outgoing_state(void) 135 { 136 object_unref(migrate_get_current()); 137 } 138 139 static void migration_downtime_end(MigrationState *s) 140 { 141 int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 142 143 /* 144 * If downtime already set, should mean that postcopy already set it, 145 * then that should be the real downtime already. 146 */ 147 if (!s->downtime) { 148 s->downtime = now - s->downtime_start; 149 trace_vmstate_downtime_checkpoint("src-downtime-end"); 150 } 151 } 152 153 static void precopy_notify_complete(void) 154 { 155 Error *local_err = NULL; 156 157 if (precopy_notify(PRECOPY_NOTIFY_COMPLETE, &local_err)) { 158 error_report_err(local_err); 159 } 160 161 trace_migration_precopy_complete(); 162 } 163 164 static bool migration_needs_multiple_sockets(void) 165 { 166 return migrate_multifd() || migrate_postcopy_preempt(); 167 } 168 169 static RunState migration_get_target_runstate(void) 170 { 171 /* 172 * When the global state is not migrated, it means we don't know the 173 * runstate of the src QEMU. We don't have much choice but assuming 174 * the VM is running. NOTE: this is pretty rare case, so far only Xen 175 * uses it. 176 */ 177 if (!global_state_received()) { 178 return RUN_STATE_RUNNING; 179 } 180 181 return global_state_get_runstate(); 182 } 183 184 static bool transport_supports_multi_channels(MigrationAddress *addr) 185 { 186 if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) { 187 SocketAddress *saddr = &addr->u.socket; 188 189 return (saddr->type == SOCKET_ADDRESS_TYPE_INET || 190 saddr->type == SOCKET_ADDRESS_TYPE_UNIX || 191 saddr->type == SOCKET_ADDRESS_TYPE_VSOCK); 192 } else if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) { 193 return migrate_mapped_ram(); 194 } else { 195 return false; 196 } 197 } 198 199 static bool migration_needs_seekable_channel(void) 200 { 201 return migrate_mapped_ram(); 202 } 203 204 static bool migration_needs_extra_fds(void) 205 { 206 /* 207 * When doing direct-io, multifd requires two different, 208 * non-duplicated file descriptors so we can use one of them for 209 * unaligned IO. 210 */ 211 return migrate_multifd() && migrate_direct_io(); 212 } 213 214 static bool transport_supports_seeking(MigrationAddress *addr) 215 { 216 if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) { 217 return true; 218 } 219 220 return false; 221 } 222 223 static bool transport_supports_extra_fds(MigrationAddress *addr) 224 { 225 /* file: works because QEMU can open it multiple times */ 226 return addr->transport == MIGRATION_ADDRESS_TYPE_FILE; 227 } 228 229 static bool 230 migration_channels_and_transport_compatible(MigrationAddress *addr, 231 Error **errp) 232 { 233 if (migration_needs_seekable_channel() && 234 !transport_supports_seeking(addr)) { 235 error_setg(errp, "Migration requires seekable transport (e.g. file)"); 236 return false; 237 } 238 239 if (migration_needs_multiple_sockets() && 240 !transport_supports_multi_channels(addr)) { 241 error_setg(errp, "Migration requires multi-channel URIs (e.g. tcp)"); 242 return false; 243 } 244 245 if (migration_needs_extra_fds() && 246 !transport_supports_extra_fds(addr)) { 247 error_setg(errp, 248 "Migration requires a transport that allows for extra fds (e.g. file)"); 249 return false; 250 } 251 252 if (migrate_mode() == MIG_MODE_CPR_TRANSFER && 253 addr->transport == MIGRATION_ADDRESS_TYPE_FILE) { 254 error_setg(errp, "Migration requires streamable transport (eg unix)"); 255 return false; 256 } 257 258 return true; 259 } 260 261 static bool 262 migration_capabilities_and_transport_compatible(MigrationAddress *addr, 263 Error **errp) 264 { 265 if (addr->transport == MIGRATION_ADDRESS_TYPE_RDMA) { 266 return migrate_rdma_caps_check(migrate_get_current()->capabilities, 267 errp); 268 } 269 270 return true; 271 } 272 273 static bool migration_transport_compatible(MigrationAddress *addr, Error **errp) 274 { 275 return migration_channels_and_transport_compatible(addr, errp) && 276 migration_capabilities_and_transport_compatible(addr, errp); 277 } 278 279 static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp) 280 { 281 uintptr_t a = (uintptr_t) ap, b = (uintptr_t) bp; 282 283 return (a > b) - (a < b); 284 } 285 286 static int migration_stop_vm(MigrationState *s, RunState state) 287 { 288 int ret; 289 290 migration_downtime_start(s); 291 292 s->vm_old_state = runstate_get(); 293 global_state_store(); 294 295 ret = vm_stop_force_state(state); 296 297 trace_vmstate_downtime_checkpoint("src-vm-stopped"); 298 trace_migration_completion_vm_stop(ret); 299 300 return ret; 301 } 302 303 void migration_object_init(void) 304 { 305 /* This can only be called once. */ 306 assert(!current_migration); 307 current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION)); 308 309 /* 310 * Init the migrate incoming object as well no matter whether 311 * we'll use it or not. 312 */ 313 assert(!current_incoming); 314 current_incoming = g_new0(MigrationIncomingState, 1); 315 current_incoming->state = MIGRATION_STATUS_NONE; 316 current_incoming->postcopy_remote_fds = 317 g_array_new(FALSE, TRUE, sizeof(struct PostCopyFD)); 318 qemu_mutex_init(¤t_incoming->rp_mutex); 319 qemu_mutex_init(¤t_incoming->postcopy_prio_thread_mutex); 320 qemu_event_init(¤t_incoming->main_thread_load_event, false); 321 qemu_sem_init(¤t_incoming->postcopy_pause_sem_dst, 0); 322 qemu_sem_init(¤t_incoming->postcopy_pause_sem_fault, 0); 323 qemu_sem_init(¤t_incoming->postcopy_pause_sem_fast_load, 0); 324 qemu_sem_init(¤t_incoming->postcopy_qemufile_dst_done, 0); 325 326 qemu_mutex_init(¤t_incoming->page_request_mutex); 327 qemu_cond_init(¤t_incoming->page_request_cond); 328 current_incoming->page_requested = g_tree_new(page_request_addr_cmp); 329 330 current_incoming->exit_on_error = INMIGRATE_DEFAULT_EXIT_ON_ERROR; 331 332 migration_object_check(current_migration, &error_fatal); 333 334 ram_mig_init(); 335 dirty_bitmap_mig_init(); 336 cpr_exec_init(); 337 338 /* Initialize cpu throttle timers */ 339 cpu_throttle_init(); 340 } 341 342 typedef struct { 343 QEMUBH *bh; 344 QEMUBHFunc *cb; 345 void *opaque; 346 } MigrationBH; 347 348 static void migration_bh_dispatch_bh(void *opaque) 349 { 350 MigrationState *s = migrate_get_current(); 351 MigrationBH *migbh = opaque; 352 353 /* cleanup this BH */ 354 qemu_bh_delete(migbh->bh); 355 migbh->bh = NULL; 356 357 /* dispatch the other one */ 358 migbh->cb(migbh->opaque); 359 object_unref(OBJECT(s)); 360 361 g_free(migbh); 362 } 363 364 void migration_bh_schedule(QEMUBHFunc *cb, void *opaque) 365 { 366 MigrationState *s = migrate_get_current(); 367 MigrationBH *migbh = g_new0(MigrationBH, 1); 368 QEMUBH *bh = qemu_bh_new(migration_bh_dispatch_bh, migbh); 369 370 /* Store these to dispatch when the BH runs */ 371 migbh->bh = bh; 372 migbh->cb = cb; 373 migbh->opaque = opaque; 374 375 /* 376 * Ref the state for bh, because it may be called when 377 * there're already no other refs 378 */ 379 object_ref(OBJECT(s)); 380 qemu_bh_schedule(bh); 381 } 382 383 void migration_shutdown(void) 384 { 385 /* 386 * When the QEMU main thread exit, the COLO thread 387 * may wait a semaphore. So, we should wakeup the 388 * COLO thread before migration shutdown. 389 */ 390 colo_shutdown(); 391 /* 392 * Cancel the current migration - that will (eventually) 393 * stop the migration using this structure 394 */ 395 migration_cancel(); 396 object_unref(OBJECT(current_migration)); 397 398 /* 399 * Cancel outgoing migration of dirty bitmaps. It should 400 * at least unref used block nodes. 401 */ 402 dirty_bitmap_mig_cancel_outgoing(); 403 404 /* 405 * Cancel incoming migration of dirty bitmaps. Dirty bitmaps 406 * are non-critical data, and their loss never considered as 407 * something serious. 408 */ 409 dirty_bitmap_mig_cancel_incoming(); 410 } 411 412 /* For outgoing */ 413 MigrationState *migrate_get_current(void) 414 { 415 /* This can only be called after the object created. */ 416 assert(current_migration); 417 return current_migration; 418 } 419 420 MigrationIncomingState *migration_incoming_get_current(void) 421 { 422 assert(current_incoming); 423 return current_incoming; 424 } 425 426 void migration_incoming_transport_cleanup(MigrationIncomingState *mis) 427 { 428 if (mis->socket_address_list) { 429 qapi_free_SocketAddressList(mis->socket_address_list); 430 mis->socket_address_list = NULL; 431 } 432 433 if (mis->transport_cleanup) { 434 mis->transport_cleanup(mis->transport_data); 435 mis->transport_data = mis->transport_cleanup = NULL; 436 } 437 } 438 439 void migration_incoming_state_destroy(void) 440 { 441 MigrationIncomingState *mis = migration_incoming_get_current(); 442 PostcopyState ps = postcopy_state_get(); 443 444 multifd_recv_cleanup(); 445 446 if (ps != POSTCOPY_INCOMING_NONE) { 447 postcopy_incoming_cleanup(mis); 448 } 449 450 /* 451 * RAM state cleanup needs to happen after multifd cleanup, because 452 * multifd threads can use some of its states (receivedmap). 453 * The VFIO load_cleanup() implementation is BQL-sensitive. It requires 454 * BQL must NOT be taken when recycling load threads, so that it won't 455 * block the load threads from making progress on address space 456 * modification operations. 457 * 458 * To make it work, we could try to not take BQL for all load_cleanup(), 459 * or conditionally unlock BQL only if bql_locked() in VFIO. 460 * 461 * Since most existing call sites take BQL for load_cleanup(), make 462 * it simple by taking BQL always as the rule, so that VFIO can unlock 463 * BQL and retake unconditionally. 464 */ 465 assert(bql_locked()); 466 qemu_loadvm_state_cleanup(mis); 467 468 if (mis->to_src_file) { 469 /* Tell source that we are done */ 470 migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0); 471 qemu_fclose(mis->to_src_file); 472 mis->to_src_file = NULL; 473 } 474 475 if (mis->from_src_file) { 476 migration_ioc_unregister_yank_from_file(mis->from_src_file); 477 qemu_fclose(mis->from_src_file); 478 mis->from_src_file = NULL; 479 } 480 if (mis->postcopy_remote_fds) { 481 g_array_free(mis->postcopy_remote_fds, TRUE); 482 mis->postcopy_remote_fds = NULL; 483 } 484 485 migration_incoming_transport_cleanup(mis); 486 qemu_event_reset(&mis->main_thread_load_event); 487 488 if (mis->page_requested) { 489 g_tree_destroy(mis->page_requested); 490 mis->page_requested = NULL; 491 } 492 493 if (mis->postcopy_qemufile_dst) { 494 migration_ioc_unregister_yank_from_file(mis->postcopy_qemufile_dst); 495 qemu_fclose(mis->postcopy_qemufile_dst); 496 mis->postcopy_qemufile_dst = NULL; 497 } 498 499 cpr_set_incoming_mode(MIG_MODE_NONE); 500 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 501 } 502 503 static void migrate_generate_event(MigrationStatus new_state) 504 { 505 if (migrate_events()) { 506 qapi_event_send_migration(new_state); 507 } 508 } 509 510 /* 511 * Send a message on the return channel back to the source 512 * of the migration. 513 */ 514 static int migrate_send_rp_message(MigrationIncomingState *mis, 515 enum mig_rp_message_type message_type, 516 uint16_t len, void *data) 517 { 518 int ret = 0; 519 520 trace_migrate_send_rp_message((int)message_type, len); 521 QEMU_LOCK_GUARD(&mis->rp_mutex); 522 523 /* 524 * It's possible that the file handle got lost due to network 525 * failures. 526 */ 527 if (!mis->to_src_file) { 528 ret = -EIO; 529 return ret; 530 } 531 532 qemu_put_be16(mis->to_src_file, (unsigned int)message_type); 533 qemu_put_be16(mis->to_src_file, len); 534 qemu_put_buffer(mis->to_src_file, data, len); 535 return qemu_fflush(mis->to_src_file); 536 } 537 538 /* Request one page from the source VM at the given start address. 539 * rb: the RAMBlock to request the page in 540 * Start: Address offset within the RB 541 * Len: Length in bytes required - must be a multiple of pagesize 542 */ 543 int migrate_send_rp_message_req_pages(MigrationIncomingState *mis, 544 RAMBlock *rb, ram_addr_t start) 545 { 546 uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */ 547 size_t msglen = 12; /* start + len */ 548 size_t len = qemu_ram_pagesize(rb); 549 enum mig_rp_message_type msg_type; 550 const char *rbname; 551 int rbname_len; 552 553 *(uint64_t *)bufc = cpu_to_be64((uint64_t)start); 554 *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len); 555 556 /* 557 * We maintain the last ramblock that we requested for page. Note that we 558 * don't need locking because this function will only be called within the 559 * postcopy ram fault thread. 560 */ 561 if (rb != mis->last_rb) { 562 mis->last_rb = rb; 563 564 rbname = qemu_ram_get_idstr(rb); 565 rbname_len = strlen(rbname); 566 567 assert(rbname_len < 256); 568 569 bufc[msglen++] = rbname_len; 570 memcpy(bufc + msglen, rbname, rbname_len); 571 msglen += rbname_len; 572 msg_type = MIG_RP_MSG_REQ_PAGES_ID; 573 } else { 574 msg_type = MIG_RP_MSG_REQ_PAGES; 575 } 576 577 return migrate_send_rp_message(mis, msg_type, msglen, bufc); 578 } 579 580 int migrate_send_rp_req_pages(MigrationIncomingState *mis, 581 RAMBlock *rb, ram_addr_t start, uint64_t haddr, 582 uint32_t tid) 583 { 584 void *aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, qemu_ram_pagesize(rb)); 585 bool received = false; 586 587 WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) { 588 received = ramblock_recv_bitmap_test_byte_offset(rb, start); 589 if (!received) { 590 if (!g_tree_lookup(mis->page_requested, aligned)) { 591 /* 592 * The page has not been received, and it's not yet in the 593 * page request list. Queue it. Set the value of element 594 * to 1, so that things like g_tree_lookup() will return 595 * TRUE (1) when found. 596 */ 597 g_tree_insert(mis->page_requested, aligned, (gpointer)1); 598 qatomic_inc(&mis->page_requested_count); 599 trace_postcopy_page_req_add(aligned, mis->page_requested_count); 600 } 601 mark_postcopy_blocktime_begin(haddr, tid, rb); 602 } 603 } 604 605 /* 606 * If the page is there, skip sending the message. We don't even need the 607 * lock because as long as the page arrived, it'll be there forever. 608 */ 609 if (received) { 610 return 0; 611 } 612 613 return migrate_send_rp_message_req_pages(mis, rb, start); 614 } 615 616 static bool migration_colo_enabled; 617 bool migration_incoming_colo_enabled(void) 618 { 619 return migration_colo_enabled; 620 } 621 622 void migration_incoming_disable_colo(void) 623 { 624 ram_block_discard_disable(false); 625 migration_colo_enabled = false; 626 } 627 628 int migration_incoming_enable_colo(Error **errp) 629 { 630 #ifndef CONFIG_REPLICATION 631 error_setg(errp, "ENABLE_COLO command come in migration stream, but the " 632 "replication module is not built in"); 633 return -ENOTSUP; 634 #endif 635 636 if (!migrate_colo()) { 637 error_setg(errp, "ENABLE_COLO command come in migration stream" 638 ", but x-colo capability is not set"); 639 return -EINVAL; 640 } 641 642 if (ram_block_discard_disable(true)) { 643 error_setg(errp, "COLO: cannot disable RAM discard"); 644 return -EBUSY; 645 } 646 migration_colo_enabled = true; 647 return 0; 648 } 649 650 void migrate_add_address(SocketAddress *address) 651 { 652 MigrationIncomingState *mis = migration_incoming_get_current(); 653 654 QAPI_LIST_PREPEND(mis->socket_address_list, 655 QAPI_CLONE(SocketAddress, address)); 656 } 657 658 bool migrate_is_uri(const char *uri) 659 { 660 while (*uri && *uri != ':') { 661 if (!qemu_isalpha(*uri++)) { 662 return false; 663 } 664 } 665 return *uri == ':'; 666 } 667 668 bool migrate_uri_parse(const char *uri, MigrationChannel **channel, 669 Error **errp) 670 { 671 g_autoptr(MigrationChannel) val = g_new0(MigrationChannel, 1); 672 g_autoptr(MigrationAddress) addr = g_new0(MigrationAddress, 1); 673 InetSocketAddress *isock = &addr->u.rdma; 674 strList **tail = &addr->u.exec.args; 675 676 if (strstart(uri, "exec:", NULL)) { 677 addr->transport = MIGRATION_ADDRESS_TYPE_EXEC; 678 #ifdef WIN32 679 QAPI_LIST_APPEND(tail, g_strdup(exec_get_cmd_path())); 680 QAPI_LIST_APPEND(tail, g_strdup("/c")); 681 #else 682 QAPI_LIST_APPEND(tail, g_strdup("/bin/sh")); 683 QAPI_LIST_APPEND(tail, g_strdup("-c")); 684 #endif 685 QAPI_LIST_APPEND(tail, g_strdup(uri + strlen("exec:"))); 686 } else if (strstart(uri, "rdma:", NULL)) { 687 if (inet_parse(isock, uri + strlen("rdma:"), errp)) { 688 qapi_free_InetSocketAddress(isock); 689 return false; 690 } 691 addr->transport = MIGRATION_ADDRESS_TYPE_RDMA; 692 } else if (strstart(uri, "tcp:", NULL) || 693 strstart(uri, "unix:", NULL) || 694 strstart(uri, "vsock:", NULL) || 695 strstart(uri, "fd:", NULL)) { 696 addr->transport = MIGRATION_ADDRESS_TYPE_SOCKET; 697 SocketAddress *saddr = socket_parse(uri, errp); 698 if (!saddr) { 699 return false; 700 } 701 addr->u.socket.type = saddr->type; 702 addr->u.socket.u = saddr->u; 703 /* Don't free the objects inside; their ownership moved to "addr" */ 704 g_free(saddr); 705 } else if (strstart(uri, "file:", NULL)) { 706 addr->transport = MIGRATION_ADDRESS_TYPE_FILE; 707 addr->u.file.filename = g_strdup(uri + strlen("file:")); 708 if (file_parse_offset(addr->u.file.filename, &addr->u.file.offset, 709 errp)) { 710 return false; 711 } 712 } else { 713 error_setg(errp, "unknown migration protocol: %s", uri); 714 return false; 715 } 716 717 val->channel_type = MIGRATION_CHANNEL_TYPE_MAIN; 718 val->addr = g_steal_pointer(&addr); 719 *channel = g_steal_pointer(&val); 720 return true; 721 } 722 723 static bool 724 migration_incoming_state_setup(MigrationIncomingState *mis, Error **errp) 725 { 726 MigrationStatus current = mis->state; 727 728 if (current == MIGRATION_STATUS_POSTCOPY_PAUSED) { 729 /* 730 * Incoming postcopy migration will stay in PAUSED state even if 731 * reconnection happened. 732 */ 733 return true; 734 } 735 736 if (current != MIGRATION_STATUS_NONE) { 737 error_setg(errp, "Illegal migration incoming state: %s", 738 MigrationStatus_str(current)); 739 return false; 740 } 741 742 migrate_set_state(&mis->state, current, MIGRATION_STATUS_SETUP); 743 return true; 744 } 745 746 static void qemu_start_incoming_migration(const char *uri, bool has_channels, 747 MigrationChannelList *channels, 748 Error **errp) 749 { 750 g_autoptr(MigrationChannel) channel = NULL; 751 MigrationAddress *addr = NULL; 752 MigrationIncomingState *mis = migration_incoming_get_current(); 753 754 /* 755 * Having preliminary checks for uri and channel 756 */ 757 if (!uri == !channels) { 758 error_setg(errp, "need either 'uri' or 'channels' argument"); 759 return; 760 } 761 762 if (channels) { 763 /* To verify that Migrate channel list has only item */ 764 if (channels->next) { 765 error_setg(errp, "Channel list must have only one entry, " 766 "for type 'main'"); 767 return; 768 } 769 addr = channels->value->addr; 770 } 771 772 if (uri) { 773 /* caller uses the old URI syntax */ 774 if (!migrate_uri_parse(uri, &channel, errp)) { 775 return; 776 } 777 addr = channel->addr; 778 } 779 780 /* transport mechanism not suitable for migration? */ 781 if (!migration_transport_compatible(addr, errp)) { 782 return; 783 } 784 785 if (!migration_incoming_state_setup(mis, errp)) { 786 return; 787 } 788 789 if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) { 790 SocketAddress *saddr = &addr->u.socket; 791 if (saddr->type == SOCKET_ADDRESS_TYPE_INET || 792 saddr->type == SOCKET_ADDRESS_TYPE_UNIX || 793 saddr->type == SOCKET_ADDRESS_TYPE_VSOCK) { 794 socket_start_incoming_migration(saddr, errp); 795 } else if (saddr->type == SOCKET_ADDRESS_TYPE_FD) { 796 fd_start_incoming_migration(saddr->u.fd.str, errp); 797 } 798 #ifdef CONFIG_RDMA 799 } else if (addr->transport == MIGRATION_ADDRESS_TYPE_RDMA) { 800 rdma_start_incoming_migration(&addr->u.rdma, errp); 801 #endif 802 } else if (addr->transport == MIGRATION_ADDRESS_TYPE_EXEC) { 803 exec_start_incoming_migration(addr->u.exec.args, errp); 804 } else if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) { 805 file_start_incoming_migration(&addr->u.file, errp); 806 } else { 807 error_setg(errp, "unknown migration protocol: %s", uri); 808 } 809 810 /* Close cpr socket to tell source that we are listening */ 811 cpr_state_close(); 812 } 813 814 static void process_incoming_migration_bh(void *opaque) 815 { 816 MigrationIncomingState *mis = opaque; 817 818 trace_vmstate_downtime_checkpoint("dst-precopy-bh-enter"); 819 820 /* 821 * This must happen after all error conditions are dealt with and 822 * we're sure the VM is going to be running on this host. 823 */ 824 qemu_announce_self(&mis->announce_timer, migrate_announce_params()); 825 826 trace_vmstate_downtime_checkpoint("dst-precopy-bh-announced"); 827 828 multifd_recv_shutdown(); 829 830 dirty_bitmap_mig_before_vm_start(); 831 832 if (runstate_is_live(migration_get_target_runstate())) { 833 if (autostart) { 834 /* 835 * Block activation is always delayed until VM starts, either 836 * here (which means we need to start the dest VM right now..), 837 * or until qmp_cont() later. 838 * 839 * We used to have cap 'late-block-activate' but now we do this 840 * unconditionally, as it has no harm but only benefit. E.g., 841 * it's not part of migration ABI on the time of disk activation. 842 * 843 * Make sure all file formats throw away their mutable 844 * metadata. If error, don't restart the VM yet. 845 */ 846 if (migration_block_activate(NULL)) { 847 vm_start(); 848 } 849 } else { 850 runstate_set(RUN_STATE_PAUSED); 851 } 852 } else if (migration_incoming_colo_enabled()) { 853 migration_incoming_disable_colo(); 854 vm_start(); 855 } else { 856 runstate_set(global_state_get_runstate()); 857 } 858 trace_vmstate_downtime_checkpoint("dst-precopy-bh-vm-started"); 859 /* 860 * This must happen after any state changes since as soon as an external 861 * observer sees this event they might start to prod at the VM assuming 862 * it's ready to use. 863 */ 864 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, 865 MIGRATION_STATUS_COMPLETED); 866 migration_incoming_state_destroy(); 867 } 868 869 static void coroutine_fn 870 process_incoming_migration_co(void *opaque) 871 { 872 MigrationState *s = migrate_get_current(); 873 MigrationIncomingState *mis = migration_incoming_get_current(); 874 int ret; 875 Error *local_err = NULL; 876 877 assert(mis->from_src_file); 878 879 mis->largest_page_size = qemu_ram_pagesize_largest(); 880 postcopy_state_set(POSTCOPY_INCOMING_NONE); 881 migrate_set_state(&mis->state, MIGRATION_STATUS_SETUP, 882 MIGRATION_STATUS_ACTIVE); 883 884 mis->loadvm_co = qemu_coroutine_self(); 885 ret = qemu_loadvm_state(mis->from_src_file, &local_err); 886 mis->loadvm_co = NULL; 887 888 trace_vmstate_downtime_checkpoint("dst-precopy-loadvm-completed"); 889 890 trace_process_incoming_migration_co_end(ret); 891 if (mis->have_listen_thread) { 892 /* 893 * Postcopy was started, cleanup should happen at the end of the 894 * postcopy listen thread. 895 */ 896 trace_process_incoming_migration_co_postcopy_end_main(); 897 goto out; 898 } 899 900 if (ret < 0) { 901 error_prepend(&local_err, "load of migration failed: %s: ", 902 strerror(-ret)); 903 goto fail; 904 } 905 906 if (migration_incoming_colo_enabled()) { 907 /* yield until COLO exit */ 908 colo_incoming_co(); 909 } 910 911 migration_bh_schedule(process_incoming_migration_bh, mis); 912 goto out; 913 914 fail: 915 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, 916 MIGRATION_STATUS_FAILED); 917 migrate_set_error(s, local_err); 918 error_free(local_err); 919 920 migration_incoming_state_destroy(); 921 922 if (mis->exit_on_error) { 923 WITH_QEMU_LOCK_GUARD(&s->error_mutex) { 924 error_report_err(s->error); 925 s->error = NULL; 926 } 927 928 exit(EXIT_FAILURE); 929 } 930 out: 931 /* Pairs with the refcount taken in qmp_migrate_incoming() */ 932 migrate_incoming_unref_outgoing_state(); 933 } 934 935 /** 936 * migration_incoming_setup: Setup incoming migration 937 * @f: file for main migration channel 938 */ 939 static void migration_incoming_setup(QEMUFile *f) 940 { 941 MigrationIncomingState *mis = migration_incoming_get_current(); 942 943 assert(!mis->from_src_file); 944 mis->from_src_file = f; 945 qemu_file_set_blocking(f, false, &error_abort); 946 } 947 948 void migration_incoming_process(void) 949 { 950 Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL); 951 qemu_coroutine_enter(co); 952 } 953 954 /* Returns true if recovered from a paused migration, otherwise false */ 955 static bool postcopy_try_recover(void) 956 { 957 MigrationIncomingState *mis = migration_incoming_get_current(); 958 959 if (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { 960 /* Resumed from a paused postcopy migration */ 961 962 /* This should be set already in migration_incoming_setup() */ 963 assert(mis->from_src_file); 964 /* Postcopy has standalone thread to do vm load */ 965 qemu_file_set_blocking(mis->from_src_file, true, &error_abort); 966 967 /* Re-configure the return path */ 968 mis->to_src_file = qemu_file_get_return_path(mis->from_src_file); 969 970 migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_PAUSED, 971 MIGRATION_STATUS_POSTCOPY_RECOVER); 972 973 /* 974 * Here, we only wake up the main loading thread (while the 975 * rest threads will still be waiting), so that we can receive 976 * commands from source now, and answer it if needed. The 977 * rest threads will be woken up afterwards until we are sure 978 * that source is ready to reply to page requests. 979 */ 980 qemu_sem_post(&mis->postcopy_pause_sem_dst); 981 return true; 982 } 983 984 return false; 985 } 986 987 void migration_fd_process_incoming(QEMUFile *f) 988 { 989 migration_incoming_setup(f); 990 if (postcopy_try_recover()) { 991 return; 992 } 993 migration_incoming_process(); 994 } 995 996 static bool migration_has_main_and_multifd_channels(void) 997 { 998 MigrationIncomingState *mis = migration_incoming_get_current(); 999 if (!mis->from_src_file) { 1000 /* main channel not established */ 1001 return false; 1002 } 1003 1004 if (migrate_multifd() && !multifd_recv_all_channels_created()) { 1005 return false; 1006 } 1007 1008 /* main and all multifd channels are established */ 1009 return true; 1010 } 1011 1012 void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) 1013 { 1014 MigrationIncomingState *mis = migration_incoming_get_current(); 1015 Error *local_err = NULL; 1016 QEMUFile *f; 1017 uint8_t channel; 1018 uint32_t channel_magic = 0; 1019 int ret = 0; 1020 1021 if (!migration_has_main_and_multifd_channels()) { 1022 if (qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { 1023 /* 1024 * With multiple channels, it is possible that we receive channels 1025 * out of order on destination side, causing incorrect mapping of 1026 * source channels on destination side. Check channel MAGIC to 1027 * decide type of channel. Please note this is best effort, 1028 * postcopy preempt channel does not send any magic number so 1029 * avoid it for postcopy live migration. Also tls live migration 1030 * already does tls handshake while initializing main channel so 1031 * with tls this issue is not possible. 1032 */ 1033 ret = migration_channel_read_peek(ioc, (void *)&channel_magic, 1034 sizeof(channel_magic), errp); 1035 if (ret != 0) { 1036 return; 1037 } 1038 1039 channel_magic = be32_to_cpu(channel_magic); 1040 if (channel_magic == QEMU_VM_FILE_MAGIC) { 1041 channel = CH_MAIN; 1042 } else if (channel_magic == MULTIFD_MAGIC) { 1043 assert(migrate_multifd()); 1044 channel = CH_MULTIFD; 1045 } else if (!mis->from_src_file && 1046 mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { 1047 /* reconnect main channel for postcopy recovery */ 1048 channel = CH_MAIN; 1049 } else { 1050 error_setg(errp, "unknown channel magic: %u", channel_magic); 1051 return; 1052 } 1053 } else if (mis->from_src_file && migrate_multifd()) { 1054 /* 1055 * Non-peekable channels like tls/file are processed as 1056 * multifd channels when multifd is enabled. 1057 */ 1058 channel = CH_MULTIFD; 1059 } else if (!mis->from_src_file) { 1060 channel = CH_MAIN; 1061 } else { 1062 error_setg(errp, "non-peekable channel used without multifd"); 1063 return; 1064 } 1065 } else { 1066 assert(migrate_postcopy_preempt()); 1067 channel = CH_POSTCOPY; 1068 } 1069 1070 if (multifd_recv_setup(errp) != 0) { 1071 return; 1072 } 1073 1074 if (channel == CH_MAIN) { 1075 f = qemu_file_new_input(ioc); 1076 migration_incoming_setup(f); 1077 } else if (channel == CH_MULTIFD) { 1078 /* Multiple connections */ 1079 multifd_recv_new_channel(ioc, &local_err); 1080 if (local_err) { 1081 error_propagate(errp, local_err); 1082 return; 1083 } 1084 } else if (channel == CH_POSTCOPY) { 1085 assert(!mis->postcopy_qemufile_dst); 1086 f = qemu_file_new_input(ioc); 1087 postcopy_preempt_new_channel(mis, f); 1088 return; 1089 } 1090 1091 if (migration_has_main_and_multifd_channels()) { 1092 /* If it's a recovery, we're done */ 1093 if (postcopy_try_recover()) { 1094 return; 1095 } 1096 migration_incoming_process(); 1097 } 1098 } 1099 1100 /** 1101 * @migration_has_all_channels: We have received all channels that we need 1102 * 1103 * Returns true when we have got connections to all the channels that 1104 * we need for migration. 1105 */ 1106 bool migration_has_all_channels(void) 1107 { 1108 if (!migration_has_main_and_multifd_channels()) { 1109 return false; 1110 } 1111 1112 MigrationIncomingState *mis = migration_incoming_get_current(); 1113 if (migrate_postcopy_preempt() && !mis->postcopy_qemufile_dst) { 1114 return false; 1115 } 1116 1117 return true; 1118 } 1119 1120 int migrate_send_rp_switchover_ack(MigrationIncomingState *mis) 1121 { 1122 return migrate_send_rp_message(mis, MIG_RP_MSG_SWITCHOVER_ACK, 0, NULL); 1123 } 1124 1125 /* 1126 * Send a 'SHUT' message on the return channel with the given value 1127 * to indicate that we've finished with the RP. Non-0 value indicates 1128 * error. 1129 */ 1130 void migrate_send_rp_shut(MigrationIncomingState *mis, 1131 uint32_t value) 1132 { 1133 uint32_t buf; 1134 1135 buf = cpu_to_be32(value); 1136 migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf); 1137 } 1138 1139 /* 1140 * Send a 'PONG' message on the return channel with the given value 1141 * (normally in response to a 'PING') 1142 */ 1143 void migrate_send_rp_pong(MigrationIncomingState *mis, 1144 uint32_t value) 1145 { 1146 uint32_t buf; 1147 1148 buf = cpu_to_be32(value); 1149 migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf); 1150 } 1151 1152 void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis, 1153 char *block_name) 1154 { 1155 char buf[512]; 1156 int len; 1157 int64_t res; 1158 1159 /* 1160 * First, we send the header part. It contains only the len of 1161 * idstr, and the idstr itself. 1162 */ 1163 len = strlen(block_name); 1164 buf[0] = len; 1165 memcpy(buf + 1, block_name, len); 1166 1167 if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) { 1168 error_report("%s: MSG_RP_RECV_BITMAP only used for recovery", 1169 __func__); 1170 return; 1171 } 1172 1173 migrate_send_rp_message(mis, MIG_RP_MSG_RECV_BITMAP, len + 1, buf); 1174 1175 /* 1176 * Next, we dump the received bitmap to the stream. 1177 * 1178 * TODO: currently we are safe since we are the only one that is 1179 * using the to_src_file handle (fault thread is still paused), 1180 * and it's ok even not taking the mutex. However the best way is 1181 * to take the lock before sending the message header, and release 1182 * the lock after sending the bitmap. 1183 */ 1184 qemu_mutex_lock(&mis->rp_mutex); 1185 res = ramblock_recv_bitmap_send(mis->to_src_file, block_name); 1186 qemu_mutex_unlock(&mis->rp_mutex); 1187 1188 trace_migrate_send_rp_recv_bitmap(block_name, res); 1189 } 1190 1191 void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value) 1192 { 1193 uint32_t buf; 1194 1195 buf = cpu_to_be32(value); 1196 migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf); 1197 } 1198 1199 bool migration_is_running(void) 1200 { 1201 MigrationState *s = current_migration; 1202 1203 if (!s) { 1204 return false; 1205 } 1206 1207 switch (s->state) { 1208 case MIGRATION_STATUS_ACTIVE: 1209 case MIGRATION_STATUS_POSTCOPY_DEVICE: 1210 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1211 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1212 case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP: 1213 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1214 case MIGRATION_STATUS_SETUP: 1215 case MIGRATION_STATUS_PRE_SWITCHOVER: 1216 case MIGRATION_STATUS_DEVICE: 1217 case MIGRATION_STATUS_WAIT_UNPLUG: 1218 case MIGRATION_STATUS_CANCELLING: 1219 case MIGRATION_STATUS_COLO: 1220 return true; 1221 default: 1222 return false; 1223 } 1224 } 1225 1226 static bool migration_is_active(void) 1227 { 1228 MigrationState *s = current_migration; 1229 1230 return (s->state == MIGRATION_STATUS_ACTIVE || 1231 s->state == MIGRATION_STATUS_POSTCOPY_DEVICE || 1232 s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); 1233 } 1234 1235 static bool migrate_show_downtime(MigrationState *s) 1236 { 1237 return (s->state == MIGRATION_STATUS_COMPLETED) || migration_in_postcopy(); 1238 } 1239 1240 static void populate_time_info(MigrationInfo *info, MigrationState *s) 1241 { 1242 info->has_status = true; 1243 info->has_setup_time = true; 1244 info->setup_time = s->setup_time; 1245 1246 if (s->state == MIGRATION_STATUS_COMPLETED) { 1247 info->has_total_time = true; 1248 info->total_time = s->total_time; 1249 } else { 1250 info->has_total_time = true; 1251 info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - 1252 s->start_time; 1253 } 1254 1255 if (migrate_show_downtime(s)) { 1256 info->has_downtime = true; 1257 info->downtime = s->downtime; 1258 } else { 1259 info->has_expected_downtime = true; 1260 info->expected_downtime = s->expected_downtime; 1261 } 1262 } 1263 1264 static void populate_ram_info(MigrationInfo *info, MigrationState *s) 1265 { 1266 size_t page_size = qemu_target_page_size(); 1267 1268 info->ram = g_malloc0(sizeof(*info->ram)); 1269 info->ram->transferred = migration_transferred_bytes(); 1270 info->ram->total = ram_bytes_total(); 1271 info->ram->duplicate = stat64_get(&mig_stats.zero_pages); 1272 info->ram->normal = stat64_get(&mig_stats.normal_pages); 1273 info->ram->normal_bytes = info->ram->normal * page_size; 1274 info->ram->mbps = s->mbps; 1275 info->ram->dirty_sync_count = 1276 stat64_get(&mig_stats.dirty_sync_count); 1277 info->ram->dirty_sync_missed_zero_copy = 1278 stat64_get(&mig_stats.dirty_sync_missed_zero_copy); 1279 info->ram->postcopy_requests = 1280 stat64_get(&mig_stats.postcopy_requests); 1281 info->ram->page_size = page_size; 1282 info->ram->multifd_bytes = stat64_get(&mig_stats.multifd_bytes); 1283 info->ram->pages_per_second = s->pages_per_second; 1284 info->ram->precopy_bytes = stat64_get(&mig_stats.precopy_bytes); 1285 info->ram->downtime_bytes = stat64_get(&mig_stats.downtime_bytes); 1286 info->ram->postcopy_bytes = stat64_get(&mig_stats.postcopy_bytes); 1287 1288 if (migrate_xbzrle()) { 1289 info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache)); 1290 info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size(); 1291 info->xbzrle_cache->bytes = xbzrle_counters.bytes; 1292 info->xbzrle_cache->pages = xbzrle_counters.pages; 1293 info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss; 1294 info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate; 1295 info->xbzrle_cache->encoding_rate = xbzrle_counters.encoding_rate; 1296 info->xbzrle_cache->overflow = xbzrle_counters.overflow; 1297 } 1298 1299 if (cpu_throttle_active()) { 1300 info->has_cpu_throttle_percentage = true; 1301 info->cpu_throttle_percentage = cpu_throttle_get_percentage(); 1302 } 1303 1304 if (s->state != MIGRATION_STATUS_COMPLETED) { 1305 info->ram->remaining = ram_bytes_remaining(); 1306 info->ram->dirty_pages_rate = 1307 stat64_get(&mig_stats.dirty_pages_rate); 1308 } 1309 1310 if (migrate_dirty_limit() && dirtylimit_in_service()) { 1311 info->has_dirty_limit_throttle_time_per_round = true; 1312 info->dirty_limit_throttle_time_per_round = 1313 dirtylimit_throttle_time_per_round(); 1314 1315 info->has_dirty_limit_ring_full_time = true; 1316 info->dirty_limit_ring_full_time = dirtylimit_ring_full_time(); 1317 } 1318 } 1319 1320 static void fill_source_migration_info(MigrationInfo *info) 1321 { 1322 MigrationState *s = migrate_get_current(); 1323 int state = qatomic_read(&s->state); 1324 GSList *cur_blocker = migration_blockers[migrate_mode()]; 1325 1326 info->blocked_reasons = NULL; 1327 1328 /* 1329 * There are two types of reasons a migration might be blocked; 1330 * a) devices marked in VMState as non-migratable, and 1331 * b) Explicit migration blockers 1332 * We need to add both of them here. 1333 */ 1334 qemu_savevm_non_migratable_list(&info->blocked_reasons); 1335 1336 while (cur_blocker) { 1337 QAPI_LIST_PREPEND(info->blocked_reasons, 1338 g_strdup(error_get_pretty(cur_blocker->data))); 1339 cur_blocker = g_slist_next(cur_blocker); 1340 } 1341 info->has_blocked_reasons = info->blocked_reasons != NULL; 1342 1343 switch (state) { 1344 case MIGRATION_STATUS_NONE: 1345 /* no migration has happened ever */ 1346 /* do not overwrite destination migration status */ 1347 return; 1348 case MIGRATION_STATUS_SETUP: 1349 info->has_status = true; 1350 info->has_total_time = false; 1351 break; 1352 case MIGRATION_STATUS_ACTIVE: 1353 case MIGRATION_STATUS_CANCELLING: 1354 case MIGRATION_STATUS_POSTCOPY_DEVICE: 1355 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1356 case MIGRATION_STATUS_PRE_SWITCHOVER: 1357 case MIGRATION_STATUS_DEVICE: 1358 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1359 case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP: 1360 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1361 /* TODO add some postcopy stats */ 1362 populate_time_info(info, s); 1363 populate_ram_info(info, s); 1364 migration_populate_vfio_info(info); 1365 break; 1366 case MIGRATION_STATUS_COLO: 1367 info->has_status = true; 1368 /* TODO: display COLO specific information (checkpoint info etc.) */ 1369 break; 1370 case MIGRATION_STATUS_COMPLETED: 1371 populate_time_info(info, s); 1372 populate_ram_info(info, s); 1373 migration_populate_vfio_info(info); 1374 break; 1375 case MIGRATION_STATUS_FAILED: 1376 info->has_status = true; 1377 break; 1378 case MIGRATION_STATUS_CANCELLED: 1379 info->has_status = true; 1380 break; 1381 case MIGRATION_STATUS_WAIT_UNPLUG: 1382 info->has_status = true; 1383 break; 1384 } 1385 info->status = state; 1386 1387 QEMU_LOCK_GUARD(&s->error_mutex); 1388 if (s->error) { 1389 info->error_desc = g_strdup(error_get_pretty(s->error)); 1390 } 1391 } 1392 1393 static void fill_destination_migration_info(MigrationInfo *info) 1394 { 1395 MigrationIncomingState *mis = migration_incoming_get_current(); 1396 1397 if (mis->socket_address_list) { 1398 info->has_socket_address = true; 1399 info->socket_address = 1400 QAPI_CLONE(SocketAddressList, mis->socket_address_list); 1401 } 1402 1403 switch (mis->state) { 1404 case MIGRATION_STATUS_SETUP: 1405 case MIGRATION_STATUS_CANCELLING: 1406 case MIGRATION_STATUS_CANCELLED: 1407 case MIGRATION_STATUS_ACTIVE: 1408 case MIGRATION_STATUS_POSTCOPY_DEVICE: 1409 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1410 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1411 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1412 case MIGRATION_STATUS_FAILED: 1413 case MIGRATION_STATUS_COLO: 1414 info->has_status = true; 1415 break; 1416 case MIGRATION_STATUS_COMPLETED: 1417 info->has_status = true; 1418 fill_destination_postcopy_migration_info(info); 1419 break; 1420 default: 1421 return; 1422 } 1423 info->status = mis->state; 1424 1425 if (!info->error_desc) { 1426 MigrationState *s = migrate_get_current(); 1427 QEMU_LOCK_GUARD(&s->error_mutex); 1428 1429 if (s->error) { 1430 info->error_desc = g_strdup(error_get_pretty(s->error)); 1431 } 1432 } 1433 } 1434 1435 MigrationInfo *qmp_query_migrate(Error **errp) 1436 { 1437 MigrationInfo *info = g_malloc0(sizeof(*info)); 1438 1439 fill_destination_migration_info(info); 1440 fill_source_migration_info(info); 1441 1442 return info; 1443 } 1444 1445 void qmp_migrate_start_postcopy(Error **errp) 1446 { 1447 MigrationState *s = migrate_get_current(); 1448 1449 if (!migrate_postcopy()) { 1450 error_setg(errp, "Enable postcopy with migrate_set_capability before" 1451 " the start of migration"); 1452 return; 1453 } 1454 1455 if (s->state == MIGRATION_STATUS_NONE) { 1456 error_setg(errp, "Postcopy must be started after migration has been" 1457 " started"); 1458 return; 1459 } 1460 /* 1461 * we don't error if migration has finished since that would be racy 1462 * with issuing this command. 1463 */ 1464 qatomic_set(&s->start_postcopy, true); 1465 } 1466 1467 /* shared migration helpers */ 1468 1469 void migrate_set_state(MigrationStatus *state, MigrationStatus old_state, 1470 MigrationStatus new_state) 1471 { 1472 assert(new_state < MIGRATION_STATUS__MAX); 1473 if (qatomic_cmpxchg(state, old_state, new_state) == old_state) { 1474 trace_migrate_set_state(MigrationStatus_str(new_state)); 1475 migrate_generate_event(new_state); 1476 } 1477 } 1478 1479 static void migration_cleanup_json_writer(MigrationState *s) 1480 { 1481 g_clear_pointer(&s->vmdesc, json_writer_free); 1482 } 1483 1484 static void migration_cleanup(MigrationState *s) 1485 { 1486 MigrationEventType type; 1487 QEMUFile *tmp = NULL; 1488 1489 trace_migration_cleanup(); 1490 1491 migration_cleanup_json_writer(s); 1492 1493 g_free(s->hostname); 1494 s->hostname = NULL; 1495 1496 qemu_savevm_state_cleanup(); 1497 cpr_state_close(); 1498 migrate_hup_delete(s); 1499 1500 close_return_path_on_source(s); 1501 1502 if (s->migration_thread_running) { 1503 bql_unlock(); 1504 qemu_thread_join(&s->thread); 1505 s->migration_thread_running = false; 1506 bql_lock(); 1507 } 1508 1509 WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) { 1510 /* 1511 * Close the file handle without the lock to make sure the critical 1512 * section won't block for long. 1513 */ 1514 tmp = s->to_dst_file; 1515 s->to_dst_file = NULL; 1516 } 1517 1518 if (tmp) { 1519 /* 1520 * We only need to shutdown multifd if tmp!=NULL, because if 1521 * tmp==NULL, it means the main channel isn't established, while 1522 * multifd is only setup after that (in migration_thread()). 1523 */ 1524 multifd_send_shutdown(); 1525 migration_ioc_unregister_yank_from_file(tmp); 1526 qemu_fclose(tmp); 1527 } 1528 1529 assert(!migration_is_active()); 1530 1531 if (s->state == MIGRATION_STATUS_CANCELLING) { 1532 migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING, 1533 MIGRATION_STATUS_CANCELLED); 1534 } 1535 1536 if (s->error) { 1537 /* It is used on info migrate. We can't free it */ 1538 error_report_err(error_copy(s->error)); 1539 } 1540 type = migration_has_failed(s) ? MIG_EVENT_PRECOPY_FAILED : 1541 MIG_EVENT_PRECOPY_DONE; 1542 migration_call_notifiers(s, type, NULL); 1543 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 1544 } 1545 1546 static void migration_cleanup_bh(void *opaque) 1547 { 1548 migration_cleanup(opaque); 1549 } 1550 1551 void migrate_set_error(MigrationState *s, const Error *error) 1552 { 1553 QEMU_LOCK_GUARD(&s->error_mutex); 1554 1555 trace_migrate_error(error_get_pretty(error)); 1556 1557 if (!s->error) { 1558 s->error = error_copy(error); 1559 } 1560 } 1561 1562 bool migrate_has_error(MigrationState *s) 1563 { 1564 /* The lock is not helpful here, but still follow the rule */ 1565 QEMU_LOCK_GUARD(&s->error_mutex); 1566 return qatomic_read(&s->error); 1567 } 1568 1569 static void migrate_error_free(MigrationState *s) 1570 { 1571 QEMU_LOCK_GUARD(&s->error_mutex); 1572 if (s->error) { 1573 error_free(s->error); 1574 s->error = NULL; 1575 } 1576 } 1577 1578 static void migration_connect_set_error(MigrationState *s, const Error *error) 1579 { 1580 MigrationStatus current = s->state; 1581 MigrationStatus next; 1582 1583 assert(s->to_dst_file == NULL); 1584 1585 switch (current) { 1586 case MIGRATION_STATUS_SETUP: 1587 next = MIGRATION_STATUS_FAILED; 1588 break; 1589 case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP: 1590 /* Never fail a postcopy migration; switch back to PAUSED instead */ 1591 next = MIGRATION_STATUS_POSTCOPY_PAUSED; 1592 break; 1593 default: 1594 /* 1595 * This really shouldn't happen. Just be careful to not crash a VM 1596 * just for this. Instead, dump something. 1597 */ 1598 error_report("%s: Illegal migration status (%s) detected", 1599 __func__, MigrationStatus_str(current)); 1600 return; 1601 } 1602 1603 migrate_set_state(&s->state, current, next); 1604 migrate_set_error(s, error); 1605 } 1606 1607 void migration_cancel(void) 1608 { 1609 MigrationState *s = migrate_get_current(); 1610 int old_state ; 1611 bool setup = (s->state == MIGRATION_STATUS_SETUP); 1612 1613 trace_migration_cancel(); 1614 1615 if (migrate_dirty_limit()) { 1616 qmp_cancel_vcpu_dirty_limit(false, -1, NULL); 1617 } 1618 1619 WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) { 1620 if (s->rp_state.from_dst_file) { 1621 /* shutdown the rp socket, so causing the rp thread to shutdown */ 1622 qemu_file_shutdown(s->rp_state.from_dst_file); 1623 } 1624 } 1625 1626 do { 1627 old_state = s->state; 1628 if (!migration_is_running()) { 1629 break; 1630 } 1631 /* If the migration is paused, kick it out of the pause */ 1632 if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) { 1633 qemu_event_set(&s->pause_event); 1634 } 1635 migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING); 1636 } while (s->state != MIGRATION_STATUS_CANCELLING); 1637 1638 /* 1639 * If we're unlucky the migration code might be stuck somewhere in a 1640 * send/write while the network has failed and is waiting to timeout; 1641 * if we've got shutdown(2) available then we can force it to quit. 1642 */ 1643 if (s->state == MIGRATION_STATUS_CANCELLING) { 1644 WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) { 1645 if (s->to_dst_file) { 1646 qemu_file_shutdown(s->to_dst_file); 1647 } 1648 } 1649 } 1650 1651 /* 1652 * If qmp_migrate_finish has not been called, then there is no path that 1653 * will complete the cancellation. Do it now. 1654 */ 1655 if (setup && !s->to_dst_file) { 1656 migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING, 1657 MIGRATION_STATUS_CANCELLED); 1658 cpr_state_close(); 1659 migrate_hup_delete(s); 1660 } 1661 } 1662 1663 static void add_notifiers(NotifierWithReturn *notify, unsigned modes) 1664 { 1665 for (MigMode mode = 0; mode < MIG_MODE__MAX; mode++) { 1666 if (modes & BIT(mode)) { 1667 migration_state_notifiers[mode] = 1668 g_slist_prepend(migration_state_notifiers[mode], notify); 1669 } 1670 } 1671 } 1672 1673 void migration_add_notifier_modes(NotifierWithReturn *notify, 1674 MigrationNotifyFunc func, unsigned modes) 1675 { 1676 notify->notify = (NotifierWithReturnFunc)func; 1677 add_notifiers(notify, modes); 1678 } 1679 1680 void migration_add_notifier_mode(NotifierWithReturn *notify, 1681 MigrationNotifyFunc func, MigMode mode) 1682 { 1683 migration_add_notifier_modes(notify, func, BIT(mode)); 1684 } 1685 1686 void migration_add_notifier(NotifierWithReturn *notify, 1687 MigrationNotifyFunc func) 1688 { 1689 migration_add_notifier_mode(notify, func, MIG_MODE_NORMAL); 1690 } 1691 1692 void migration_remove_notifier(NotifierWithReturn *notify) 1693 { 1694 if (notify->notify) { 1695 for (MigMode mode = 0; mode < MIG_MODE__MAX; mode++) { 1696 migration_state_notifiers[mode] = 1697 g_slist_remove(migration_state_notifiers[mode], notify); 1698 } 1699 notify->notify = NULL; 1700 } 1701 } 1702 1703 int migration_call_notifiers(MigrationState *s, MigrationEventType type, 1704 Error **errp) 1705 { 1706 MigMode mode = s->parameters.mode; 1707 MigrationEvent e; 1708 NotifierWithReturn *notifier; 1709 GSList *elem, *next; 1710 int ret; 1711 1712 e.type = type; 1713 1714 for (elem = migration_state_notifiers[mode]; elem; elem = next) { 1715 next = elem->next; 1716 notifier = (NotifierWithReturn *)elem->data; 1717 ret = notifier->notify(notifier, &e, errp); 1718 if (ret) { 1719 assert(type == MIG_EVENT_PRECOPY_SETUP); 1720 return ret; 1721 } 1722 } 1723 1724 return 0; 1725 } 1726 1727 bool migration_has_failed(MigrationState *s) 1728 { 1729 return (s->state == MIGRATION_STATUS_CANCELLING || 1730 s->state == MIGRATION_STATUS_CANCELLED || 1731 s->state == MIGRATION_STATUS_FAILED); 1732 } 1733 1734 bool migration_in_postcopy(void) 1735 { 1736 MigrationState *s = migrate_get_current(); 1737 1738 switch (s->state) { 1739 case MIGRATION_STATUS_POSTCOPY_DEVICE: 1740 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1741 case MIGRATION_STATUS_POSTCOPY_PAUSED: 1742 case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP: 1743 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1744 return true; 1745 default: 1746 return false; 1747 } 1748 } 1749 1750 bool migration_postcopy_is_alive(MigrationStatus state) 1751 { 1752 switch (state) { 1753 case MIGRATION_STATUS_POSTCOPY_ACTIVE: 1754 case MIGRATION_STATUS_POSTCOPY_RECOVER: 1755 return true; 1756 default: 1757 return false; 1758 } 1759 } 1760 1761 bool migration_in_incoming_postcopy(void) 1762 { 1763 PostcopyState ps = postcopy_state_get(); 1764 1765 return ps >= POSTCOPY_INCOMING_DISCARD && ps < POSTCOPY_INCOMING_END; 1766 } 1767 1768 bool migration_incoming_postcopy_advised(void) 1769 { 1770 PostcopyState ps = postcopy_state_get(); 1771 1772 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END; 1773 } 1774 1775 bool migration_in_bg_snapshot(void) 1776 { 1777 return migrate_background_snapshot() && migration_is_running(); 1778 } 1779 1780 bool migration_thread_is_self(void) 1781 { 1782 MigrationState *s = current_migration; 1783 1784 return qemu_thread_is_self(&s->thread); 1785 } 1786 1787 bool migrate_mode_is_cpr(MigrationState *s) 1788 { 1789 MigMode mode = s->parameters.mode; 1790 return mode == MIG_MODE_CPR_REBOOT || 1791 mode == MIG_MODE_CPR_TRANSFER || 1792 mode == MIG_MODE_CPR_EXEC; 1793 } 1794 1795 int migrate_init(MigrationState *s, Error **errp) 1796 { 1797 int ret; 1798 1799 ret = qemu_savevm_state_prepare(errp); 1800 if (ret) { 1801 return ret; 1802 } 1803 1804 /* 1805 * Reinitialise all migration state, except 1806 * parameters/capabilities that the user set, and 1807 * locks. 1808 */ 1809 s->to_dst_file = NULL; 1810 s->state = MIGRATION_STATUS_NONE; 1811 s->rp_state.from_dst_file = NULL; 1812 s->mbps = 0.0; 1813 s->pages_per_second = 0.0; 1814 s->downtime = 0; 1815 s->expected_downtime = 0; 1816 s->setup_time = 0; 1817 s->start_postcopy = false; 1818 s->migration_thread_running = false; 1819 error_free(s->error); 1820 s->error = NULL; 1821 1822 if (should_send_vmdesc()) { 1823 s->vmdesc = json_writer_new(false); 1824 } 1825 1826 migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); 1827 1828 s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 1829 s->total_time = 0; 1830 s->vm_old_state = -1; 1831 s->iteration_initial_bytes = 0; 1832 s->threshold_size = 0; 1833 s->switchover_acked = false; 1834 s->rdma_migration = false; 1835 /* 1836 * set mig_stats memory to zero for a new migration 1837 */ 1838 memset(&mig_stats, 0, sizeof(mig_stats)); 1839 migration_reset_vfio_bytes_transferred(); 1840 1841 s->postcopy_package_loaded = false; 1842 qemu_event_reset(&s->postcopy_package_loaded_event); 1843 1844 return 0; 1845 } 1846 1847 static bool is_busy(Error **reasonp, Error **errp) 1848 { 1849 ERRP_GUARD(); 1850 1851 /* Snapshots are similar to migrations, so check RUN_STATE_SAVE_VM too. */ 1852 if (runstate_check(RUN_STATE_SAVE_VM) || migration_is_running()) { 1853 error_propagate_prepend(errp, *reasonp, 1854 "disallowing migration blocker " 1855 "(migration/snapshot in progress) for: "); 1856 *reasonp = NULL; 1857 return true; 1858 } 1859 return false; 1860 } 1861 1862 static bool is_only_migratable(Error **reasonp, unsigned modes, Error **errp) 1863 { 1864 ERRP_GUARD(); 1865 1866 if (only_migratable && (modes & BIT(MIG_MODE_NORMAL))) { 1867 error_propagate_prepend(errp, *reasonp, 1868 "disallowing migration blocker " 1869 "(--only-migratable) for: "); 1870 *reasonp = NULL; 1871 return true; 1872 } 1873 return false; 1874 } 1875 1876 static int add_blockers(Error **reasonp, unsigned modes, Error **errp) 1877 { 1878 for (MigMode mode = 0; mode < MIG_MODE__MAX; mode++) { 1879 if (modes & BIT(mode)) { 1880 migration_blockers[mode] = g_slist_prepend(migration_blockers[mode], 1881 *reasonp); 1882 } 1883 } 1884 return 0; 1885 } 1886 1887 int migrate_add_blocker(Error **reasonp, Error **errp) 1888 { 1889 return migrate_add_blocker_modes(reasonp, -1u, errp); 1890 } 1891 1892 int migrate_add_blocker_normal(Error **reasonp, Error **errp) 1893 { 1894 return migrate_add_blocker_modes(reasonp, BIT(MIG_MODE_NORMAL), errp); 1895 } 1896 1897 int migrate_add_blocker_modes(Error **reasonp, unsigned modes, Error **errp) 1898 { 1899 if (is_only_migratable(reasonp, modes, errp)) { 1900 return -EACCES; 1901 } else if (is_busy(reasonp, errp)) { 1902 return -EBUSY; 1903 } 1904 return add_blockers(reasonp, modes, errp); 1905 } 1906 1907 int migrate_add_blocker_internal(Error **reasonp, Error **errp) 1908 { 1909 unsigned modes = BIT(MIG_MODE__MAX) - 1; 1910 1911 if (is_busy(reasonp, errp)) { 1912 return -EBUSY; 1913 } 1914 return add_blockers(reasonp, modes, errp); 1915 } 1916 1917 void migrate_del_blocker(Error **reasonp) 1918 { 1919 if (*reasonp) { 1920 for (MigMode mode = 0; mode < MIG_MODE__MAX; mode++) { 1921 migration_blockers[mode] = g_slist_remove(migration_blockers[mode], 1922 *reasonp); 1923 } 1924 error_free(*reasonp); 1925 *reasonp = NULL; 1926 } 1927 } 1928 1929 void qmp_migrate_incoming(const char *uri, bool has_channels, 1930 MigrationChannelList *channels, 1931 bool has_exit_on_error, bool exit_on_error, 1932 Error **errp) 1933 { 1934 Error *local_err = NULL; 1935 static bool once = true; 1936 MigrationIncomingState *mis = migration_incoming_get_current(); 1937 1938 if (!once) { 1939 error_setg(errp, "The incoming migration has already been started"); 1940 return; 1941 } 1942 if (!runstate_check(RUN_STATE_INMIGRATE)) { 1943 error_setg(errp, "'-incoming' was not specified on the command line"); 1944 return; 1945 } 1946 1947 if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) { 1948 return; 1949 } 1950 1951 mis->exit_on_error = 1952 has_exit_on_error ? exit_on_error : INMIGRATE_DEFAULT_EXIT_ON_ERROR; 1953 1954 qemu_start_incoming_migration(uri, has_channels, channels, &local_err); 1955 1956 if (local_err) { 1957 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 1958 error_propagate(errp, local_err); 1959 return; 1960 } 1961 1962 /* 1963 * Making sure MigrationState is available until incoming migration 1964 * completes. 1965 * 1966 * NOTE: QEMU _might_ leak this refcount in some failure paths, but 1967 * that's OK. This is the minimum change we need to at least making 1968 * sure success case is clean on the refcount. We can try harder to 1969 * make it accurate for any kind of failures, but it might be an 1970 * overkill and doesn't bring us much benefit. 1971 */ 1972 migrate_incoming_ref_outgoing_state(); 1973 once = false; 1974 } 1975 1976 void qmp_migrate_recover(const char *uri, Error **errp) 1977 { 1978 MigrationIncomingState *mis = migration_incoming_get_current(); 1979 1980 /* 1981 * Don't even bother to use ERRP_GUARD() as it _must_ always be set by 1982 * callers (no one should ignore a recover failure); if there is, it's a 1983 * programming error. 1984 */ 1985 assert(errp); 1986 1987 if (mis->state != MIGRATION_STATUS_POSTCOPY_PAUSED) { 1988 error_setg(errp, "Migrate recover can only be run " 1989 "when postcopy is paused."); 1990 return; 1991 } 1992 1993 /* If there's an existing transport, release it */ 1994 migration_incoming_transport_cleanup(mis); 1995 1996 /* 1997 * Note that this call will never start a real migration; it will 1998 * only re-setup the migration stream and poke existing migration 1999 * to continue using that newly established channel. 2000 */ 2001 qemu_start_incoming_migration(uri, false, NULL, errp); 2002 } 2003 2004 void qmp_migrate_pause(Error **errp) 2005 { 2006 MigrationState *ms = migrate_get_current(); 2007 MigrationIncomingState *mis = migration_incoming_get_current(); 2008 int ret = 0; 2009 2010 if (migration_postcopy_is_alive(ms->state)) { 2011 /* Source side, during postcopy */ 2012 Error *error = NULL; 2013 2014 /* Tell the core migration that we're pausing */ 2015 error_setg(&error, "Postcopy migration is paused by the user"); 2016 migrate_set_error(ms, error); 2017 error_free(error); 2018 2019 qemu_mutex_lock(&ms->qemu_file_lock); 2020 if (ms->to_dst_file) { 2021 ret = qemu_file_shutdown(ms->to_dst_file); 2022 } 2023 qemu_mutex_unlock(&ms->qemu_file_lock); 2024 if (ret) { 2025 error_setg(errp, "Failed to pause source migration"); 2026 } 2027 2028 /* 2029 * Kick the migration thread out of any waiting windows (on behalf 2030 * of the rp thread). 2031 */ 2032 migration_rp_kick(ms); 2033 2034 return; 2035 } 2036 2037 if (migration_postcopy_is_alive(mis->state)) { 2038 ret = qemu_file_shutdown(mis->from_src_file); 2039 if (ret) { 2040 error_setg(errp, "Failed to pause destination migration"); 2041 } 2042 return; 2043 } 2044 2045 error_setg(errp, "migrate-pause is currently only supported " 2046 "during postcopy-active or postcopy-recover state"); 2047 } 2048 2049 bool migration_is_blocked(Error **errp) 2050 { 2051 GSList *blockers = migration_blockers[migrate_mode()]; 2052 2053 if (qemu_savevm_state_blocked(errp)) { 2054 return true; 2055 } 2056 2057 if (blockers) { 2058 error_propagate(errp, error_copy(blockers->data)); 2059 return true; 2060 } 2061 2062 return false; 2063 } 2064 2065 /* Returns true if continue to migrate, or false if error detected */ 2066 static bool migrate_prepare(MigrationState *s, bool resume, Error **errp) 2067 { 2068 if (resume) { 2069 if (s->state != MIGRATION_STATUS_POSTCOPY_PAUSED) { 2070 error_setg(errp, "Cannot resume if there is no " 2071 "paused migration"); 2072 return false; 2073 } 2074 2075 /* 2076 * Postcopy recovery won't work well with release-ram 2077 * capability since release-ram will drop the page buffer as 2078 * long as the page is put into the send buffer. So if there 2079 * is a network failure happened, any page buffers that have 2080 * not yet reached the destination VM but have already been 2081 * sent from the source VM will be lost forever. Let's refuse 2082 * the client from resuming such a postcopy migration. 2083 * Luckily release-ram was designed to only be used when src 2084 * and destination VMs are on the same host, so it should be 2085 * fine. 2086 */ 2087 if (migrate_release_ram()) { 2088 error_setg(errp, "Postcopy recovery cannot work " 2089 "when release-ram capability is set"); 2090 return false; 2091 } 2092 2093 migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED, 2094 MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP); 2095 2096 /* This is a resume, skip init status */ 2097 return true; 2098 } 2099 2100 if (migration_is_running()) { 2101 error_setg(errp, "There's a migration process in progress"); 2102 return false; 2103 } 2104 2105 if (runstate_check(RUN_STATE_INMIGRATE)) { 2106 error_setg(errp, "Guest is waiting for an incoming migration"); 2107 return false; 2108 } 2109 2110 if (runstate_check(RUN_STATE_POSTMIGRATE)) { 2111 error_setg(errp, "Can't migrate the vm that was paused due to " 2112 "previous migration"); 2113 return false; 2114 } 2115 2116 if (kvm_hwpoisoned_mem()) { 2117 error_setg(errp, "Can't migrate this vm with hardware poisoned memory, " 2118 "please reboot the vm and try again"); 2119 return false; 2120 } 2121 2122 if (migrate_mode() == MIG_MODE_CPR_EXEC && 2123 !s->parameters.has_cpr_exec_command) { 2124 error_setg(errp, "cpr-exec mode requires setting cpr-exec-command"); 2125 return false; 2126 } 2127 2128 if (migration_is_blocked(errp)) { 2129 return false; 2130 } 2131 2132 if (migrate_mapped_ram()) { 2133 if (migrate_tls()) { 2134 error_setg(errp, "Cannot use TLS with mapped-ram"); 2135 return false; 2136 } 2137 2138 if (migrate_multifd_compression()) { 2139 error_setg(errp, "Cannot use compression with mapped-ram"); 2140 return false; 2141 } 2142 } 2143 2144 if (migrate_mode_is_cpr(s)) { 2145 const char *conflict = NULL; 2146 2147 if (migrate_postcopy()) { 2148 conflict = "postcopy"; 2149 } else if (migrate_background_snapshot()) { 2150 conflict = "background snapshot"; 2151 } else if (migrate_colo()) { 2152 conflict = "COLO"; 2153 } 2154 2155 if (conflict) { 2156 error_setg(errp, "Cannot use %s with CPR", conflict); 2157 return false; 2158 } 2159 2160 if (s->parameters.mode == MIG_MODE_CPR_EXEC && 2161 !s->parameters.cpr_exec_command) { 2162 error_setg(errp, "Parameter 'cpr-exec-command' required for cpr-exec"); 2163 return false; 2164 } 2165 } 2166 2167 if (migrate_init(s, errp)) { 2168 return false; 2169 } 2170 2171 return true; 2172 } 2173 2174 static void qmp_migrate_finish(MigrationAddress *addr, bool resume_requested, 2175 Error **errp); 2176 2177 static void migrate_hup_add(MigrationState *s, QIOChannel *ioc, GSourceFunc cb, 2178 void *opaque) 2179 { 2180 s->hup_source = qio_channel_create_watch(ioc, G_IO_HUP); 2181 g_source_set_callback(s->hup_source, cb, opaque, NULL); 2182 g_source_attach(s->hup_source, NULL); 2183 } 2184 2185 static void migrate_hup_delete(MigrationState *s) 2186 { 2187 if (s->hup_source) { 2188 g_source_destroy(s->hup_source); 2189 g_source_unref(s->hup_source); 2190 s->hup_source = NULL; 2191 } 2192 } 2193 2194 static gboolean qmp_migrate_finish_cb(QIOChannel *channel, 2195 GIOCondition cond, 2196 void *opaque) 2197 { 2198 MigrationAddress *addr = opaque; 2199 2200 qmp_migrate_finish(addr, false, NULL); 2201 2202 cpr_state_close(); 2203 migrate_hup_delete(migrate_get_current()); 2204 qapi_free_MigrationAddress(addr); 2205 return G_SOURCE_REMOVE; 2206 } 2207 2208 void qmp_migrate(const char *uri, bool has_channels, 2209 MigrationChannelList *channels, bool has_detach, bool detach, 2210 bool has_resume, bool resume, Error **errp) 2211 { 2212 bool resume_requested; 2213 Error *local_err = NULL; 2214 MigrationState *s = migrate_get_current(); 2215 g_autoptr(MigrationChannel) channel = NULL; 2216 MigrationAddress *addr = NULL; 2217 MigrationChannel *channelv[MIGRATION_CHANNEL_TYPE__MAX] = { NULL }; 2218 MigrationChannel *cpr_channel = NULL; 2219 2220 /* 2221 * Having preliminary checks for uri and channel 2222 */ 2223 if (!uri == !channels) { 2224 error_setg(errp, "need either 'uri' or 'channels' argument"); 2225 return; 2226 } 2227 2228 if (channels) { 2229 for ( ; channels; channels = channels->next) { 2230 MigrationChannelType type = channels->value->channel_type; 2231 2232 if (channelv[type]) { 2233 error_setg(errp, "Channel list has more than one %s entry", 2234 MigrationChannelType_str(type)); 2235 return; 2236 } 2237 channelv[type] = channels->value; 2238 } 2239 cpr_channel = channelv[MIGRATION_CHANNEL_TYPE_CPR]; 2240 addr = channelv[MIGRATION_CHANNEL_TYPE_MAIN]->addr; 2241 if (!addr) { 2242 error_setg(errp, "Channel list has no main entry"); 2243 return; 2244 } 2245 } 2246 2247 if (uri) { 2248 /* caller uses the old URI syntax */ 2249 if (!migrate_uri_parse(uri, &channel, errp)) { 2250 return; 2251 } 2252 addr = channel->addr; 2253 } 2254 2255 /* transport mechanism not suitable for migration? */ 2256 if (!migration_transport_compatible(addr, errp)) { 2257 return; 2258 } 2259 2260 if (s->parameters.mode == MIG_MODE_CPR_TRANSFER && !cpr_channel) { 2261 error_setg(errp, "missing 'cpr' migration channel"); 2262 return; 2263 } 2264 2265 resume_requested = has_resume && resume; 2266 if (!migrate_prepare(s, resume_requested, errp)) { 2267 /* Error detected, put into errp */ 2268 return; 2269 } 2270 2271 if (!cpr_state_save(cpr_channel, &local_err)) { 2272 goto out; 2273 } 2274 2275 /* 2276 * For cpr-transfer, the target may not be listening yet on the migration 2277 * channel, because first it must finish cpr_load_state. The target tells 2278 * us it is listening by closing the cpr-state socket. Wait for that HUP 2279 * event before connecting in qmp_migrate_finish. 2280 * 2281 * The HUP could occur because the target fails while reading CPR state, 2282 * in which case the target will not listen for the incoming migration 2283 * connection, so qmp_migrate_finish will fail to connect, and then recover. 2284 */ 2285 if (s->parameters.mode == MIG_MODE_CPR_TRANSFER) { 2286 migrate_hup_add(s, cpr_state_ioc(), (GSourceFunc)qmp_migrate_finish_cb, 2287 QAPI_CLONE(MigrationAddress, addr)); 2288 2289 } else { 2290 qmp_migrate_finish(addr, resume_requested, errp); 2291 } 2292 2293 out: 2294 if (local_err) { 2295 migration_connect_set_error(s, local_err); 2296 error_propagate(errp, local_err); 2297 } 2298 } 2299 2300 static void qmp_migrate_finish(MigrationAddress *addr, bool resume_requested, 2301 Error **errp) 2302 { 2303 MigrationState *s = migrate_get_current(); 2304 Error *local_err = NULL; 2305 2306 if (!resume_requested) { 2307 if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) { 2308 return; 2309 } 2310 } 2311 2312 if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) { 2313 SocketAddress *saddr = &addr->u.socket; 2314 if (saddr->type == SOCKET_ADDRESS_TYPE_INET || 2315 saddr->type == SOCKET_ADDRESS_TYPE_UNIX || 2316 saddr->type == SOCKET_ADDRESS_TYPE_VSOCK) { 2317 socket_start_outgoing_migration(s, saddr, &local_err); 2318 } else if (saddr->type == SOCKET_ADDRESS_TYPE_FD) { 2319 fd_start_outgoing_migration(s, saddr->u.fd.str, &local_err); 2320 } 2321 #ifdef CONFIG_RDMA 2322 } else if (addr->transport == MIGRATION_ADDRESS_TYPE_RDMA) { 2323 rdma_start_outgoing_migration(s, &addr->u.rdma, &local_err); 2324 #endif 2325 } else if (addr->transport == MIGRATION_ADDRESS_TYPE_EXEC) { 2326 exec_start_outgoing_migration(s, addr->u.exec.args, &local_err); 2327 } else if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) { 2328 file_start_outgoing_migration(s, &addr->u.file, &local_err); 2329 } else { 2330 error_setg(&local_err, QERR_INVALID_PARAMETER_VALUE, "uri", 2331 "a valid migration protocol"); 2332 migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, 2333 MIGRATION_STATUS_FAILED); 2334 } 2335 2336 if (local_err) { 2337 if (!resume_requested) { 2338 yank_unregister_instance(MIGRATION_YANK_INSTANCE); 2339 } 2340 migration_connect_set_error(s, local_err); 2341 error_propagate(errp, local_err); 2342 return; 2343 } 2344 } 2345 2346 void qmp_migrate_cancel(Error **errp) 2347 { 2348 /* 2349 * After postcopy migration has started, the source machine is not 2350 * recoverable in case of a migration error. This also means the 2351 * cancel command cannot be used as cancel should allow the 2352 * machine to continue operation. 2353 */ 2354 if (migration_in_postcopy()) { 2355 error_setg(errp, "Postcopy migration in progress, cannot cancel."); 2356 return; 2357 } 2358 2359 migration_cancel(); 2360 } 2361 2362 void qmp_migrate_continue(MigrationStatus state, Error **errp) 2363 { 2364 MigrationState *s = migrate_get_current(); 2365 if (s->state != state) { 2366 error_setg(errp, "Migration not in expected state: %s", 2367 MigrationStatus_str(s->state)); 2368 return; 2369 } 2370 qemu_event_set(&s->pause_event); 2371 } 2372 2373 int migration_rp_wait(MigrationState *s) 2374 { 2375 /* If migration has failure already, ignore the wait */ 2376 if (migrate_has_error(s)) { 2377 return -1; 2378 } 2379 2380 qemu_sem_wait(&s->rp_state.rp_sem); 2381 2382 /* After wait, double check that there's no failure */ 2383 if (migrate_has_error(s)) { 2384 return -1; 2385 } 2386 2387 return 0; 2388 } 2389 2390 void migration_rp_kick(MigrationState *s) 2391 { 2392 qemu_sem_post(&s->rp_state.rp_sem); 2393 } 2394 2395 static struct rp_cmd_args { 2396 ssize_t len; /* -1 = variable */ 2397 const char *name; 2398 } rp_cmd_args[] = { 2399 [MIG_RP_MSG_INVALID] = { .len = -1, .name = "INVALID" }, 2400 [MIG_RP_MSG_SHUT] = { .len = 4, .name = "SHUT" }, 2401 [MIG_RP_MSG_PONG] = { .len = 4, .name = "PONG" }, 2402 [MIG_RP_MSG_REQ_PAGES] = { .len = 12, .name = "REQ_PAGES" }, 2403 [MIG_RP_MSG_REQ_PAGES_ID] = { .len = -1, .name = "REQ_PAGES_ID" }, 2404 [MIG_RP_MSG_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" }, 2405 [MIG_RP_MSG_RESUME_ACK] = { .len = 4, .name = "RESUME_ACK" }, 2406 [MIG_RP_MSG_SWITCHOVER_ACK] = { .len = 0, .name = "SWITCHOVER_ACK" }, 2407 [MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" }, 2408 }; 2409 2410 /* 2411 * Process a request for pages received on the return path, 2412 * We're allowed to send more than requested (e.g. to round to our page size) 2413 * and we don't need to send pages that have already been sent. 2414 */ 2415 static void 2416 migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname, 2417 ram_addr_t start, size_t len, Error **errp) 2418 { 2419 long our_host_ps = qemu_real_host_page_size(); 2420 2421 trace_migrate_handle_rp_req_pages(rbname, start, len); 2422 2423 /* 2424 * Since we currently insist on matching page sizes, just sanity check 2425 * we're being asked for whole host pages. 2426 */ 2427 if (!QEMU_IS_ALIGNED(start, our_host_ps) || 2428 !QEMU_IS_ALIGNED(len, our_host_ps)) { 2429 error_setg(errp, "MIG_RP_MSG_REQ_PAGES: Misaligned page request, start:" 2430 RAM_ADDR_FMT " len: %zd", start, len); 2431 return; 2432 } 2433 2434 ram_save_queue_pages(rbname, start, len, errp); 2435 } 2436 2437 static bool migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name, 2438 Error **errp) 2439 { 2440 RAMBlock *block = qemu_ram_block_by_name(block_name); 2441 2442 if (!block) { 2443 error_setg(errp, "MIG_RP_MSG_RECV_BITMAP has invalid block name '%s'", 2444 block_name); 2445 return false; 2446 } 2447 2448 /* Fetch the received bitmap and refresh the dirty bitmap */ 2449 return ram_dirty_bitmap_reload(s, block, errp); 2450 } 2451 2452 static bool migrate_handle_rp_resume_ack(MigrationState *s, 2453 uint32_t value, Error **errp) 2454 { 2455 trace_source_return_path_thread_resume_ack(value); 2456 2457 if (value != MIGRATION_RESUME_ACK_VALUE) { 2458 error_setg(errp, "illegal resume_ack value %"PRIu32, value); 2459 return false; 2460 } 2461 2462 /* Now both sides are active. */ 2463 migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER, 2464 MIGRATION_STATUS_POSTCOPY_ACTIVE); 2465 2466 /* Notify send thread that time to continue send pages */ 2467 migration_rp_kick(s); 2468 2469 return true; 2470 } 2471 2472 /* 2473 * Release ms->rp_state.from_dst_file (and postcopy_qemufile_src if 2474 * existed) in a safe way. 2475 */ 2476 static void migration_release_dst_files(MigrationState *ms) 2477 { 2478 QEMUFile *file = NULL; 2479 2480 WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) { 2481 /* 2482 * Reset the from_dst_file pointer first before releasing it, as we 2483 * can't block within lock section 2484 */ 2485 file = ms->rp_state.from_dst_file; 2486 ms->rp_state.from_dst_file = NULL; 2487 } 2488 2489 /* 2490 * Do the same to postcopy fast path socket too if there is. No 2491 * locking needed because this qemufile should only be managed by 2492 * return path thread. 2493 */ 2494 if (ms->postcopy_qemufile_src) { 2495 migration_ioc_unregister_yank_from_file(ms->postcopy_qemufile_src); 2496 qemu_file_shutdown(ms->postcopy_qemufile_src); 2497 qemu_fclose(ms->postcopy_qemufile_src); 2498 ms->postcopy_qemufile_src = NULL; 2499 } 2500 2501 qemu_fclose(file); 2502 } 2503 2504 /* 2505 * Handles messages sent on the return path towards the source VM 2506 * 2507 */ 2508 static void *source_return_path_thread(void *opaque) 2509 { 2510 MigrationState *ms = opaque; 2511 QEMUFile *rp = ms->rp_state.from_dst_file; 2512 uint16_t header_len, header_type; 2513 uint8_t buf[512]; 2514 uint32_t tmp32, sibling_error; 2515 ram_addr_t start = 0; /* =0 to silence warning */ 2516 size_t len = 0, expected_len; 2517 Error *err = NULL; 2518 int res; 2519 2520 trace_source_return_path_thread_entry(); 2521 rcu_register_thread(); 2522 2523 while (migration_is_running()) { 2524 trace_source_return_path_thread_loop_top(); 2525 2526 header_type = qemu_get_be16(rp); 2527 header_len = qemu_get_be16(rp); 2528 2529 if (qemu_file_get_error(rp)) { 2530 qemu_file_get_error_obj(rp, &err); 2531 goto out; 2532 } 2533 2534 if (header_type >= MIG_RP_MSG_MAX || 2535 header_type == MIG_RP_MSG_INVALID) { 2536 error_setg(&err, "Received invalid message 0x%04x length 0x%04x", 2537 header_type, header_len); 2538 goto out; 2539 } 2540 2541 if ((rp_cmd_args[header_type].len != -1 && 2542 header_len != rp_cmd_args[header_type].len) || 2543 header_len > sizeof(buf)) { 2544 error_setg(&err, "Received '%s' message (0x%04x) with" 2545 "incorrect length %d expecting %zu", 2546 rp_cmd_args[header_type].name, header_type, header_len, 2547 (size_t)rp_cmd_args[header_type].len); 2548 goto out; 2549 } 2550 2551 /* We know we've got a valid header by this point */ 2552 res = qemu_get_buffer(rp, buf, header_len); 2553 if (res != header_len) { 2554 error_setg(&err, "Failed reading data for message 0x%04x" 2555 " read %d expected %d", 2556 header_type, res, header_len); 2557 goto out; 2558 } 2559 2560 /* OK, we have the message and the data */ 2561 switch (header_type) { 2562 case MIG_RP_MSG_SHUT: 2563 sibling_error = ldl_be_p(buf); 2564 trace_source_return_path_thread_shut(sibling_error); 2565 if (sibling_error) { 2566 error_setg(&err, "Sibling indicated error %d", sibling_error); 2567 } 2568 /* 2569 * We'll let the main thread deal with closing the RP 2570 * we could do a shutdown(2) on it, but we're the only user 2571 * anyway, so there's nothing gained. 2572 */ 2573 goto out; 2574 2575 case MIG_RP_MSG_PONG: 2576 tmp32 = ldl_be_p(buf); 2577 trace_source_return_path_thread_pong(tmp32); 2578 qemu_sem_post(&ms->rp_state.rp_pong_acks); 2579 if (tmp32 == QEMU_VM_PING_PACKAGED_LOADED) { 2580 trace_source_return_path_thread_postcopy_package_loaded(); 2581 ms->postcopy_package_loaded = true; 2582 qemu_event_set(&ms->postcopy_package_loaded_event); 2583 } 2584 break; 2585 2586 case MIG_RP_MSG_REQ_PAGES: 2587 start = ldq_be_p(buf); 2588 len = ldl_be_p(buf + 8); 2589 migrate_handle_rp_req_pages(ms, NULL, start, len, &err); 2590 if (err) { 2591 goto out; 2592 } 2593 break; 2594 2595 case MIG_RP_MSG_REQ_PAGES_ID: 2596 expected_len = 12 + 1; /* header + termination */ 2597 2598 if (header_len >= expected_len) { 2599 start = ldq_be_p(buf); 2600 len = ldl_be_p(buf + 8); 2601 /* Now we expect an idstr */ 2602 tmp32 = buf[12]; /* Length of the following idstr */ 2603 buf[13 + tmp32] = '\0'; 2604 expected_len += tmp32; 2605 } 2606 if (header_len != expected_len) { 2607 error_setg(&err, "Req_Page_id with length %d expecting %zd", 2608 header_len, expected_len); 2609 goto out; 2610 } 2611 migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len, 2612 &err); 2613 if (err) { 2614 goto out; 2615 } 2616 break; 2617 2618 case MIG_RP_MSG_RECV_BITMAP: 2619 if (header_len < 1) { 2620 error_setg(&err, "MIG_RP_MSG_RECV_BITMAP missing block name"); 2621 goto out; 2622 } 2623 /* Format: len (1B) + idstr (<255B). This ends the idstr. */ 2624 buf[buf[0] + 1] = '\0'; 2625 if (!migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1), &err)) { 2626 goto out; 2627 } 2628 break; 2629 2630 case MIG_RP_MSG_RESUME_ACK: 2631 tmp32 = ldl_be_p(buf); 2632 if (!migrate_handle_rp_resume_ack(ms, tmp32, &err)) { 2633 goto out; 2634 } 2635 break; 2636 2637 case MIG_RP_MSG_SWITCHOVER_ACK: 2638 ms->switchover_acked = true; 2639 trace_source_return_path_thread_switchover_acked(); 2640 break; 2641 2642 default: 2643 break; 2644 } 2645 } 2646 2647 out: 2648 if (err) { 2649 migrate_set_error(ms, err); 2650 error_free(err); 2651 trace_source_return_path_thread_bad_end(); 2652 } 2653 2654 if (ms->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { 2655 /* 2656 * this will be extremely unlikely: that we got yet another network 2657 * issue during recovering of the 1st network failure.. during this 2658 * period the main migration thread can be waiting on rp_sem for 2659 * this thread to sync with the other side. 2660 * 2661 * When this happens, explicitly kick the migration thread out of 2662 * RECOVER stage and back to PAUSED, so the admin can try 2663 * everything again. 2664 */ 2665 migration_rp_kick(ms); 2666 } 2667 2668 trace_source_return_path_thread_end(); 2669 rcu_unregister_thread(); 2670 2671 return NULL; 2672 } 2673 2674 static void open_return_path_on_source(MigrationState *ms) 2675 { 2676 ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file); 2677 2678 trace_open_return_path_on_source(); 2679 2680 qemu_thread_create(&ms->rp_state.rp_thread, MIGRATION_THREAD_SRC_RETURN, 2681 source_return_path_thread, ms, QEMU_THREAD_JOINABLE); 2682 ms->rp_state.rp_thread_created = true; 2683 2684 trace_open_return_path_on_source_continue(); 2685 } 2686 2687 /* Return true if error detected, or false otherwise */ 2688 static bool close_return_path_on_source(MigrationState *ms) 2689 { 2690 if (!ms->rp_state.rp_thread_created) { 2691 return false; 2692 } 2693 2694 trace_migration_return_path_end_before(); 2695 2696 /* 2697 * If this is a normal exit then the destination will send a SHUT 2698 * and the rp_thread will exit, however if there's an error we 2699 * need to cause it to exit. shutdown(2), if we have it, will 2700 * cause it to unblock if it's stuck waiting for the destination. 2701 */ 2702 WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) { 2703 if (migrate_has_error(ms) && ms->rp_state.from_dst_file) { 2704 qemu_file_shutdown(ms->rp_state.from_dst_file); 2705 } 2706 } 2707 2708 qemu_thread_join(&ms->rp_state.rp_thread); 2709 ms->rp_state.rp_thread_created = false; 2710 migration_release_dst_files(ms); 2711 trace_migration_return_path_end_after(); 2712 2713 /* Return path will persist the error in MigrationState when quit */ 2714 return migrate_has_error(ms); 2715 } 2716 2717 static inline void 2718 migration_wait_main_channel(MigrationState *ms) 2719 { 2720 /* Wait until one PONG message received */ 2721 qemu_sem_wait(&ms->rp_state.rp_pong_acks); 2722 } 2723 2724 /* 2725 * Switch from normal iteration to postcopy 2726 * Returns non-0 on error 2727 */ 2728 static int postcopy_start(MigrationState *ms, Error **errp) 2729 { 2730 int ret; 2731 QIOChannelBuffer *bioc; 2732 QEMUFile *fb; 2733 2734 /* 2735 * Now we're 100% sure to switch to postcopy, so JSON writer won't be 2736 * useful anymore. Free the resources early if it is there. Clearing 2737 * the vmdesc also means any follow up vmstate_save()s will start to 2738 * skip all JSON operations, which can shrink postcopy downtime. 2739 */ 2740 migration_cleanup_json_writer(ms); 2741 2742 if (migrate_postcopy_preempt()) { 2743 migration_wait_main_channel(ms); 2744 if (postcopy_preempt_establish_channel(ms)) { 2745 if (ms->state != MIGRATION_STATUS_CANCELLING) { 2746 migrate_set_state(&ms->state, ms->state, 2747 MIGRATION_STATUS_FAILED); 2748 } 2749 error_setg(errp, "%s: Failed to establish preempt channel", 2750 __func__); 2751 return -1; 2752 } 2753 } 2754 2755 if (!qemu_savevm_state_postcopy_prepare(ms->to_dst_file, errp)) { 2756 return -1; 2757 } 2758 2759 trace_postcopy_start(); 2760 bql_lock(); 2761 trace_postcopy_start_set_run(); 2762 2763 ret = migration_stop_vm(ms, RUN_STATE_FINISH_MIGRATE); 2764 if (ret < 0) { 2765 error_setg_errno(errp, -ret, "%s: Failed to stop the VM", __func__); 2766 goto fail; 2767 } 2768 2769 if (!migration_switchover_start(ms, errp)) { 2770 goto fail; 2771 } 2772 2773 /* 2774 * Cause any non-postcopiable, but iterative devices to 2775 * send out their final data. 2776 */ 2777 ret = qemu_savevm_state_complete_precopy_iterable(ms->to_dst_file, true); 2778 if (ret) { 2779 error_setg(errp, "Postcopy save non-postcopiable iterables failed"); 2780 goto fail; 2781 } 2782 2783 /* 2784 * in Finish migrate and with the io-lock held everything should 2785 * be quiet, but we've potentially still got dirty pages and we 2786 * need to tell the destination to throw any pages it's already received 2787 * that are dirty 2788 */ 2789 if (migrate_postcopy_ram()) { 2790 ram_postcopy_send_discard_bitmap(ms); 2791 } 2792 2793 if (migrate_postcopy_ram()) { 2794 /* Ping just for debugging, helps line traces up */ 2795 qemu_savevm_send_ping(ms->to_dst_file, 2); 2796 } 2797 2798 /* 2799 * While loading the device state we may trigger page transfer 2800 * requests and the fd must be free to process those, and thus 2801 * the destination must read the whole device state off the fd before 2802 * it starts processing it. Unfortunately the ad-hoc migration format 2803 * doesn't allow the destination to know the size to read without fully 2804 * parsing it through each devices load-state code (especially the open 2805 * coded devices that use get/put). 2806 * So we wrap the device state up in a package with a length at the start; 2807 * to do this we use a qemu_buf to hold the whole of the device state. 2808 */ 2809 bioc = qio_channel_buffer_new(4096); 2810 qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer"); 2811 fb = qemu_file_new_output(QIO_CHANNEL(bioc)); 2812 object_unref(OBJECT(bioc)); 2813 2814 /* 2815 * Make sure the receiver can get incoming pages before we send the rest 2816 * of the state 2817 */ 2818 qemu_savevm_send_postcopy_listen(fb); 2819 2820 ret = qemu_savevm_state_complete_precopy_non_iterable(fb, true); 2821 if (ret) { 2822 error_setg(errp, "Postcopy save non-iterable device states failed"); 2823 goto fail_closefb; 2824 } 2825 2826 if (migrate_postcopy_ram()) { 2827 qemu_savevm_send_ping(fb, 3); 2828 } 2829 if (ms->rp_state.rp_thread_created) { 2830 /* 2831 * This ping will tell us that all non-postcopiable device state has been 2832 * successfully loaded and the destination is about to start. When 2833 * response is received, it will trigger transition from POSTCOPY_DEVICE 2834 * to POSTCOPY_ACTIVE state. 2835 */ 2836 qemu_savevm_send_ping(fb, QEMU_VM_PING_PACKAGED_LOADED); 2837 } 2838 2839 qemu_savevm_send_postcopy_run(fb); 2840 2841 /* <><> end of stuff going into the package */ 2842 2843 /* Last point of recovery; as soon as we send the package the destination 2844 * can open devices and potentially start running. 2845 * Lets just check again we've not got any errors. 2846 */ 2847 ret = qemu_file_get_error(ms->to_dst_file); 2848 if (ret) { 2849 error_setg(errp, "postcopy_start: Migration stream errored (pre package)"); 2850 goto fail_closefb; 2851 } 2852 2853 /* Now send that blob */ 2854 if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) { 2855 error_setg(errp, "%s: Failed to send packaged data", __func__); 2856 goto fail_closefb; 2857 } 2858 qemu_fclose(fb); 2859 2860 /* Send a notify to give a chance for anything that needs to happen 2861 * at the transition to postcopy and after the device state; in particular 2862 * spice needs to trigger a transition now 2863 */ 2864 migration_call_notifiers(ms, MIG_EVENT_PRECOPY_DONE, NULL); 2865 2866 migration_downtime_end(ms); 2867 2868 if (migrate_postcopy_ram()) { 2869 /* 2870 * Although this ping is just for debug, it could potentially be 2871 * used for getting a better measurement of downtime at the source. 2872 */ 2873 qemu_savevm_send_ping(ms->to_dst_file, 4); 2874 } 2875 2876 if (migrate_release_ram()) { 2877 ram_postcopy_migrated_memory_release(ms); 2878 } 2879 2880 ret = qemu_file_get_error(ms->to_dst_file); 2881 if (ret) { 2882 error_setg_errno(errp, -ret, "postcopy_start: Migration stream error"); 2883 goto fail; 2884 } 2885 trace_postcopy_preempt_enabled(migrate_postcopy_preempt()); 2886 2887 /* 2888 * Now postcopy officially started, switch to postcopy bandwidth that 2889 * user specified. 2890 */ 2891 migration_rate_set(migrate_max_postcopy_bandwidth()); 2892 2893 /* 2894 * Now, switchover looks all fine, switching to POSTCOPY_DEVICE, or 2895 * directly to POSTCOPY_ACTIVE if there is no return path. 2896 */ 2897 migrate_set_state(&ms->state, MIGRATION_STATUS_DEVICE, 2898 ms->rp_state.rp_thread_created ? 2899 MIGRATION_STATUS_POSTCOPY_DEVICE : 2900 MIGRATION_STATUS_POSTCOPY_ACTIVE); 2901 2902 bql_unlock(); 2903 2904 return ret; 2905 2906 fail_closefb: 2907 qemu_fclose(fb); 2908 fail: 2909 if (ms->state != MIGRATION_STATUS_CANCELLING) { 2910 migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILED); 2911 } 2912 migration_block_activate(NULL); 2913 migration_call_notifiers(ms, MIG_EVENT_PRECOPY_FAILED, NULL); 2914 bql_unlock(); 2915 return -1; 2916 } 2917 2918 /** 2919 * @migration_switchover_prepare: Start VM switchover procedure 2920 * 2921 * @s: The migration state object pointer 2922 * 2923 * Prepares for the switchover, depending on "pause-before-switchover" 2924 * capability. 2925 * 2926 * If cap set, state machine goes like: 2927 * [postcopy-]active -> pre-switchover -> device 2928 * 2929 * If cap not set: 2930 * [postcopy-]active -> device 2931 * 2932 * Returns: true on success, false on interruptions. 2933 */ 2934 static bool migration_switchover_prepare(MigrationState *s) 2935 { 2936 /* Concurrent cancellation? Quit */ 2937 if (s->state == MIGRATION_STATUS_CANCELLING) { 2938 return false; 2939 } 2940 2941 /* 2942 * No matter precopy or postcopy, since we still hold BQL it must not 2943 * change concurrently to CANCELLING, so it must be either ACTIVE or 2944 * POSTCOPY_ACTIVE. 2945 */ 2946 assert(migration_is_active()); 2947 2948 /* If the pre stage not requested, directly switch to DEVICE */ 2949 if (!migrate_pause_before_switchover()) { 2950 migrate_set_state(&s->state, s->state, MIGRATION_STATUS_DEVICE); 2951 return true; 2952 } 2953 2954 /* 2955 * Since leaving this state is not atomic with setting the event 2956 * it's possible that someone could have issued multiple migrate_continue 2957 * and the event is incorrectly set at this point so reset it. 2958 */ 2959 qemu_event_reset(&s->pause_event); 2960 2961 /* Update [POSTCOPY_]ACTIVE to PRE_SWITCHOVER */ 2962 migrate_set_state(&s->state, s->state, MIGRATION_STATUS_PRE_SWITCHOVER); 2963 bql_unlock(); 2964 2965 qemu_event_wait(&s->pause_event); 2966 2967 bql_lock(); 2968 /* 2969 * After BQL released and retaken, the state can be CANCELLING if it 2970 * happend during sem_wait().. Only change the state if it's still 2971 * pre-switchover. 2972 */ 2973 migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, 2974 MIGRATION_STATUS_DEVICE); 2975 2976 return s->state == MIGRATION_STATUS_DEVICE; 2977 } 2978 2979 static bool migration_switchover_start(MigrationState *s, Error **errp) 2980 { 2981 ERRP_GUARD(); 2982 2983 if (!migration_switchover_prepare(s)) { 2984 error_setg(errp, "Switchover is interrupted"); 2985 return false; 2986 } 2987 2988 /* Inactivate disks except in COLO */ 2989 if (!migrate_colo()) { 2990 /* 2991 * Inactivate before sending QEMU_VM_EOF so that the 2992 * bdrv_activate_all() on the other end won't fail. 2993 */ 2994 if (!migration_block_inactivate()) { 2995 error_setg(errp, "Block inactivate failed during switchover"); 2996 return false; 2997 } 2998 } 2999 3000 migration_rate_set(RATE_LIMIT_DISABLED); 3001 3002 precopy_notify_complete(); 3003 3004 qemu_savevm_maybe_send_switchover_start(s->to_dst_file); 3005 3006 return true; 3007 } 3008 3009 static int migration_completion_precopy(MigrationState *s) 3010 { 3011 int ret; 3012 3013 bql_lock(); 3014 3015 if (!migrate_mode_is_cpr(s)) { 3016 ret = migration_stop_vm(s, RUN_STATE_FINISH_MIGRATE); 3017 if (ret < 0) { 3018 goto out_unlock; 3019 } 3020 } 3021 3022 if (!migration_switchover_start(s, NULL)) { 3023 ret = -EFAULT; 3024 goto out_unlock; 3025 } 3026 3027 ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false); 3028 out_unlock: 3029 bql_unlock(); 3030 return ret; 3031 } 3032 3033 static void migration_completion_postcopy(MigrationState *s) 3034 { 3035 trace_migration_completion_postcopy_end(); 3036 3037 bql_lock(); 3038 qemu_savevm_state_complete_postcopy(s->to_dst_file); 3039 bql_unlock(); 3040 3041 /* 3042 * Shutdown the postcopy fast path thread. This is only needed when dest 3043 * QEMU binary is old (7.1/7.2). QEMU 8.0+ doesn't need this. 3044 */ 3045 if (migrate_postcopy_preempt() && s->preempt_pre_7_2) { 3046 postcopy_preempt_shutdown_file(s); 3047 } 3048 3049 trace_migration_completion_postcopy_end_after_complete(); 3050 } 3051 3052 /** 3053 * migration_completion: Used by migration_thread when there's not much left. 3054 * The caller 'breaks' the loop when this returns. 3055 * 3056 * @s: Current migration state 3057 */ 3058 static void migration_completion(MigrationState *s) 3059 { 3060 int ret = 0; 3061 Error *local_err = NULL; 3062 3063 if (s->state == MIGRATION_STATUS_ACTIVE) { 3064 ret = migration_completion_precopy(s); 3065 } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 3066 migration_completion_postcopy(s); 3067 } else { 3068 ret = -1; 3069 } 3070 3071 if (ret < 0) { 3072 goto fail; 3073 } 3074 3075 if (close_return_path_on_source(s)) { 3076 goto fail; 3077 } 3078 3079 if (qemu_file_get_error(s->to_dst_file)) { 3080 trace_migration_completion_file_err(); 3081 goto fail; 3082 } 3083 3084 if (migrate_colo() && s->state == MIGRATION_STATUS_DEVICE) { 3085 /* COLO does not support postcopy */ 3086 migrate_set_state(&s->state, MIGRATION_STATUS_DEVICE, 3087 MIGRATION_STATUS_COLO); 3088 } else { 3089 migration_completion_end(s); 3090 } 3091 3092 return; 3093 3094 fail: 3095 if (qemu_file_get_error_obj(s->to_dst_file, &local_err)) { 3096 migrate_set_error(s, local_err); 3097 error_free(local_err); 3098 } else if (ret) { 3099 error_setg_errno(&local_err, -ret, "Error in migration completion"); 3100 migrate_set_error(s, local_err); 3101 error_free(local_err); 3102 } 3103 3104 if (s->state != MIGRATION_STATUS_CANCELLING) { 3105 migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); 3106 } 3107 } 3108 3109 /** 3110 * bg_migration_completion: Used by bg_migration_thread when after all the 3111 * RAM has been saved. The caller 'breaks' the loop when this returns. 3112 * 3113 * @s: Current migration state 3114 */ 3115 static void bg_migration_completion(MigrationState *s) 3116 { 3117 int current_active_state = s->state; 3118 3119 if (s->state == MIGRATION_STATUS_ACTIVE) { 3120 /* 3121 * By this moment we have RAM content saved into the migration stream. 3122 * The next step is to flush the non-RAM content (device state) 3123 * right after the ram content. The device state has been stored into 3124 * the temporary buffer before RAM saving started. 3125 */ 3126 qemu_put_buffer(s->to_dst_file, s->bioc->data, s->bioc->usage); 3127 qemu_fflush(s->to_dst_file); 3128 } else if (s->state == MIGRATION_STATUS_CANCELLING) { 3129 return; 3130 } 3131 3132 if (qemu_file_get_error(s->to_dst_file)) { 3133 trace_migration_completion_file_err(); 3134 goto fail; 3135 } 3136 3137 migration_completion_end(s); 3138 return; 3139 3140 fail: 3141 migrate_set_state(&s->state, current_active_state, 3142 MIGRATION_STATUS_FAILED); 3143 } 3144 3145 typedef enum MigThrError { 3146 /* No error detected */ 3147 MIG_THR_ERR_NONE = 0, 3148 /* Detected error, but resumed successfully */ 3149 MIG_THR_ERR_RECOVERED = 1, 3150 /* Detected fatal error, need to exit */ 3151 MIG_THR_ERR_FATAL = 2, 3152 } MigThrError; 3153 3154 static int postcopy_resume_handshake(MigrationState *s) 3155 { 3156 qemu_savevm_send_postcopy_resume(s->to_dst_file); 3157 3158 while (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { 3159 if (migration_rp_wait(s)) { 3160 return -1; 3161 } 3162 } 3163 3164 if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { 3165 return 0; 3166 } 3167 3168 return -1; 3169 } 3170 3171 /* Return zero if success, or <0 for error */ 3172 static int postcopy_do_resume(MigrationState *s) 3173 { 3174 int ret; 3175 3176 /* 3177 * Call all the resume_prepare() hooks, so that modules can be 3178 * ready for the migration resume. 3179 */ 3180 ret = qemu_savevm_state_resume_prepare(s); 3181 if (ret) { 3182 error_report("%s: resume_prepare() failure detected: %d", 3183 __func__, ret); 3184 return ret; 3185 } 3186 3187 /* 3188 * If preempt is enabled, re-establish the preempt channel. Note that 3189 * we do it after resume prepare to make sure the main channel will be 3190 * created before the preempt channel. E.g. with weak network, the 3191 * dest QEMU may get messed up with the preempt and main channels on 3192 * the order of connection setup. This guarantees the correct order. 3193 */ 3194 ret = postcopy_preempt_establish_channel(s); 3195 if (ret) { 3196 error_report("%s: postcopy_preempt_establish_channel(): %d", 3197 __func__, ret); 3198 return ret; 3199 } 3200 3201 /* 3202 * Last handshake with destination on the resume (destination will 3203 * switch to postcopy-active afterwards) 3204 */ 3205 ret = postcopy_resume_handshake(s); 3206 if (ret) { 3207 error_report("%s: handshake failed: %d", __func__, ret); 3208 return ret; 3209 } 3210 3211 return 0; 3212 } 3213 3214 /* 3215 * We don't return until we are in a safe state to continue current 3216 * postcopy migration. Returns MIG_THR_ERR_RECOVERED if recovered, or 3217 * MIG_THR_ERR_FATAL if unrecovery failure happened. 3218 */ 3219 static MigThrError postcopy_pause(MigrationState *s) 3220 { 3221 assert(s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); 3222 3223 while (true) { 3224 QEMUFile *file; 3225 3226 /* 3227 * We're already pausing, so ignore any errors on the return 3228 * path and just wait for the thread to finish. It will be 3229 * re-created when we resume. 3230 */ 3231 close_return_path_on_source(s); 3232 3233 /* 3234 * Current channel is possibly broken. Release it. Note that this is 3235 * guaranteed even without lock because to_dst_file should only be 3236 * modified by the migration thread. That also guarantees that the 3237 * unregister of yank is safe too without the lock. It should be safe 3238 * even to be within the qemu_file_lock, but we didn't do that to avoid 3239 * taking more mutex (yank_lock) within qemu_file_lock. TL;DR: we make 3240 * the qemu_file_lock critical section as small as possible. 3241 */ 3242 assert(s->to_dst_file); 3243 migration_ioc_unregister_yank_from_file(s->to_dst_file); 3244 qemu_mutex_lock(&s->qemu_file_lock); 3245 file = s->to_dst_file; 3246 s->to_dst_file = NULL; 3247 qemu_mutex_unlock(&s->qemu_file_lock); 3248 3249 qemu_file_shutdown(file); 3250 qemu_fclose(file); 3251 3252 migrate_set_state(&s->state, s->state, 3253 MIGRATION_STATUS_POSTCOPY_PAUSED); 3254 3255 error_report("Detected IO failure for postcopy. " 3256 "Migration paused."); 3257 3258 /* 3259 * We wait until things fixed up. Then someone will setup the 3260 * status back for us. 3261 */ 3262 do { 3263 qemu_sem_wait(&s->postcopy_pause_sem); 3264 } while (postcopy_is_paused(s->state)); 3265 3266 if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { 3267 /* Woken up by a recover procedure. Give it a shot */ 3268 3269 /* Do the resume logic */ 3270 if (postcopy_do_resume(s) == 0) { 3271 /* Let's continue! */ 3272 trace_postcopy_pause_continued(); 3273 return MIG_THR_ERR_RECOVERED; 3274 } else { 3275 /* 3276 * Something wrong happened during the recovery, let's 3277 * pause again. Pause is always better than throwing 3278 * data away. 3279 */ 3280 continue; 3281 } 3282 } else { 3283 /* This is not right... Time to quit. */ 3284 return MIG_THR_ERR_FATAL; 3285 } 3286 } 3287 } 3288 3289 void migration_file_set_error(int ret, Error *err) 3290 { 3291 MigrationState *s = current_migration; 3292 3293 WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) { 3294 if (s->to_dst_file) { 3295 qemu_file_set_error_obj(s->to_dst_file, ret, err); 3296 } else if (err) { 3297 error_report_err(err); 3298 } 3299 } 3300 } 3301 3302 static MigThrError migration_detect_error(MigrationState *s) 3303 { 3304 int ret; 3305 int state = s->state; 3306 Error *local_error = NULL; 3307 3308 if (state == MIGRATION_STATUS_CANCELLING || 3309 state == MIGRATION_STATUS_CANCELLED) { 3310 /* End the migration, but don't set the state to failed */ 3311 return MIG_THR_ERR_FATAL; 3312 } 3313 3314 /* 3315 * Try to detect any file errors. Note that postcopy_qemufile_src will 3316 * be NULL when postcopy preempt is not enabled. 3317 */ 3318 ret = qemu_file_get_error_obj_any(s->to_dst_file, 3319 s->postcopy_qemufile_src, 3320 &local_error); 3321 if (!ret) { 3322 /* Everything is fine */ 3323 assert(!local_error); 3324 return MIG_THR_ERR_NONE; 3325 } 3326 3327 if (local_error) { 3328 migrate_set_error(s, local_error); 3329 error_free(local_error); 3330 } 3331 3332 if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret) { 3333 /* 3334 * For postcopy, we allow the network to be down for a 3335 * while. After that, it can be continued by a 3336 * recovery phase. 3337 */ 3338 return postcopy_pause(s); 3339 } else { 3340 /* 3341 * For precopy (or postcopy with error outside IO, or before dest 3342 * starts), we fail with no time. 3343 */ 3344 migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED); 3345 trace_migration_thread_file_err(); 3346 3347 /* Time to stop the migration, now. */ 3348 return MIG_THR_ERR_FATAL; 3349 } 3350 } 3351 3352 static void migration_completion_end(MigrationState *s) 3353 { 3354 uint64_t bytes = migration_transferred_bytes(); 3355 int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3356 int64_t transfer_time; 3357 3358 /* 3359 * Take the BQL here so that query-migrate on the QMP thread sees: 3360 * - atomic update of s->total_time and s->mbps; 3361 * - correct ordering of s->mbps update vs. s->state; 3362 */ 3363 bql_lock(); 3364 migration_downtime_end(s); 3365 s->total_time = end_time - s->start_time; 3366 transfer_time = s->total_time - s->setup_time; 3367 if (transfer_time) { 3368 s->mbps = ((double) bytes * 8.0) / transfer_time / 1000; 3369 } 3370 3371 migrate_set_state(&s->state, s->state, 3372 MIGRATION_STATUS_COMPLETED); 3373 bql_unlock(); 3374 } 3375 3376 static void update_iteration_initial_status(MigrationState *s) 3377 { 3378 /* 3379 * Update these three fields at the same time to avoid mismatch info lead 3380 * wrong speed calculation. 3381 */ 3382 s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3383 s->iteration_initial_bytes = migration_transferred_bytes(); 3384 s->iteration_initial_pages = ram_get_total_transferred_pages(); 3385 } 3386 3387 static void migration_update_counters(MigrationState *s, 3388 int64_t current_time) 3389 { 3390 uint64_t transferred, transferred_pages, time_spent; 3391 uint64_t current_bytes; /* bytes transferred since the beginning */ 3392 uint64_t switchover_bw; 3393 /* Expected bandwidth when switching over to destination QEMU */ 3394 double expected_bw_per_ms; 3395 double bandwidth; 3396 3397 if (current_time < s->iteration_start_time + BUFFER_DELAY) { 3398 return; 3399 } 3400 3401 switchover_bw = migrate_avail_switchover_bandwidth(); 3402 current_bytes = migration_transferred_bytes(); 3403 transferred = current_bytes - s->iteration_initial_bytes; 3404 time_spent = current_time - s->iteration_start_time; 3405 bandwidth = (double)transferred / time_spent; 3406 3407 if (switchover_bw) { 3408 /* 3409 * If the user specified a switchover bandwidth, let's trust the 3410 * user so that can be more accurate than what we estimated. 3411 */ 3412 expected_bw_per_ms = switchover_bw / 1000; 3413 } else { 3414 /* If the user doesn't specify bandwidth, we use the estimated */ 3415 expected_bw_per_ms = bandwidth; 3416 } 3417 3418 s->threshold_size = expected_bw_per_ms * migrate_downtime_limit(); 3419 3420 s->mbps = (((double) transferred * 8.0) / 3421 ((double) time_spent / 1000.0)) / 1000.0 / 1000.0; 3422 3423 transferred_pages = ram_get_total_transferred_pages() - 3424 s->iteration_initial_pages; 3425 s->pages_per_second = (double) transferred_pages / 3426 (((double) time_spent / 1000.0)); 3427 3428 /* 3429 * if we haven't sent anything, we don't want to 3430 * recalculate. 10000 is a small enough number for our purposes 3431 */ 3432 if (stat64_get(&mig_stats.dirty_pages_rate) && 3433 transferred > 10000) { 3434 s->expected_downtime = 3435 stat64_get(&mig_stats.dirty_bytes_last_sync) / expected_bw_per_ms; 3436 } 3437 3438 migration_rate_reset(); 3439 3440 update_iteration_initial_status(s); 3441 3442 trace_migrate_transferred(transferred, time_spent, 3443 /* Both in unit bytes/ms */ 3444 bandwidth, switchover_bw / 1000, 3445 s->threshold_size); 3446 } 3447 3448 static bool migration_can_switchover(MigrationState *s) 3449 { 3450 if (!migrate_switchover_ack()) { 3451 return true; 3452 } 3453 3454 /* No reason to wait for switchover ACK if VM is stopped */ 3455 if (!runstate_is_running()) { 3456 return true; 3457 } 3458 3459 return s->switchover_acked; 3460 } 3461 3462 /* Migration thread iteration status */ 3463 typedef enum { 3464 MIG_ITERATE_RESUME, /* Resume current iteration */ 3465 MIG_ITERATE_SKIP, /* Skip current iteration */ 3466 MIG_ITERATE_BREAK, /* Break the loop */ 3467 } MigIterateState; 3468 3469 /* 3470 * Return true if continue to the next iteration directly, false 3471 * otherwise. 3472 */ 3473 static MigIterateState migration_iteration_run(MigrationState *s) 3474 { 3475 uint64_t must_precopy, can_postcopy, pending_size; 3476 Error *local_err = NULL; 3477 bool in_postcopy = (s->state == MIGRATION_STATUS_POSTCOPY_DEVICE || 3478 s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); 3479 bool can_switchover = migration_can_switchover(s); 3480 bool complete_ready; 3481 3482 /* Fast path - get the estimated amount of pending data */ 3483 qemu_savevm_state_pending_estimate(&must_precopy, &can_postcopy); 3484 pending_size = must_precopy + can_postcopy; 3485 trace_migrate_pending_estimate(pending_size, must_precopy, can_postcopy); 3486 3487 if (in_postcopy) { 3488 /* 3489 * Iterate in postcopy until all pending data flushed. Note that 3490 * postcopy completion doesn't rely on can_switchover, because when 3491 * POSTCOPY_ACTIVE it means switchover already happened. 3492 */ 3493 complete_ready = !pending_size; 3494 if (s->state == MIGRATION_STATUS_POSTCOPY_DEVICE && 3495 (s->postcopy_package_loaded || complete_ready)) { 3496 /* 3497 * If package has been loaded, the event is set and we will 3498 * immediatelly transition to POSTCOPY_ACTIVE. If we are ready for 3499 * completion, we need to wait for destination to load the postcopy 3500 * package before actually completing. 3501 */ 3502 qemu_event_wait(&s->postcopy_package_loaded_event); 3503 migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_DEVICE, 3504 MIGRATION_STATUS_POSTCOPY_ACTIVE); 3505 } 3506 } else { 3507 /* 3508 * Exact pending reporting is only needed for precopy. Taking RAM 3509 * as example, there'll be no extra dirty information after 3510 * postcopy started, so ESTIMATE should always match with EXACT 3511 * during postcopy phase. 3512 */ 3513 if (pending_size < s->threshold_size) { 3514 qemu_savevm_state_pending_exact(&must_precopy, &can_postcopy); 3515 pending_size = must_precopy + can_postcopy; 3516 trace_migrate_pending_exact(pending_size, must_precopy, 3517 can_postcopy); 3518 } 3519 3520 /* Should we switch to postcopy now? */ 3521 if (must_precopy <= s->threshold_size && 3522 can_switchover && qatomic_read(&s->start_postcopy)) { 3523 if (postcopy_start(s, &local_err)) { 3524 migrate_set_error(s, local_err); 3525 error_report_err(local_err); 3526 } 3527 return MIG_ITERATE_SKIP; 3528 } 3529 3530 /* 3531 * For precopy, migration can complete only if: 3532 * 3533 * (1) Switchover is acknowledged by destination 3534 * (2) Pending size is no more than the threshold specified 3535 * (which was calculated from expected downtime) 3536 */ 3537 complete_ready = can_switchover && (pending_size <= s->threshold_size); 3538 } 3539 3540 if (complete_ready) { 3541 trace_migration_thread_low_pending(pending_size); 3542 migration_completion(s); 3543 return MIG_ITERATE_BREAK; 3544 } 3545 3546 /* Just another iteration step */ 3547 qemu_savevm_state_iterate(s->to_dst_file, in_postcopy); 3548 return MIG_ITERATE_RESUME; 3549 } 3550 3551 static void migration_iteration_finish(MigrationState *s) 3552 { 3553 Error *local_err = NULL; 3554 3555 bql_lock(); 3556 3557 /* 3558 * If we enabled cpu throttling for auto-converge, turn it off. 3559 * Stopping CPU throttle should be serialized by BQL to avoid 3560 * racing for the throttle_dirty_sync_timer. 3561 */ 3562 if (migrate_auto_converge()) { 3563 cpu_throttle_stop(); 3564 } 3565 3566 switch (s->state) { 3567 case MIGRATION_STATUS_COMPLETED: 3568 runstate_set(RUN_STATE_POSTMIGRATE); 3569 break; 3570 case MIGRATION_STATUS_COLO: 3571 assert(migrate_colo()); 3572 migrate_start_colo_process(s); 3573 s->vm_old_state = RUN_STATE_RUNNING; 3574 /* Fallthrough */ 3575 case MIGRATION_STATUS_FAILED: 3576 case MIGRATION_STATUS_CANCELLED: 3577 case MIGRATION_STATUS_CANCELLING: 3578 if (!migration_block_activate(&local_err)) { 3579 /* 3580 * Re-activate the block drives if they're inactivated. 3581 * 3582 * If it fails (e.g. in case of a split brain, where dest QEMU 3583 * might have taken some of the drive locks and running!), do 3584 * not start VM, instead wait for mgmt to decide the next step. 3585 * 3586 * If dest already started, it means dest QEMU should contain 3587 * all the data it needs and it properly owns all the drive 3588 * locks. Then even if src QEMU got a FAILED in migration, it 3589 * normally should mean we should treat the migration as 3590 * COMPLETED. 3591 * 3592 * NOTE: it's not safe anymore to start VM on src now even if 3593 * dest would release the drive locks. It's because as long as 3594 * dest started running then only dest QEMU's RAM is consistent 3595 * with the shared storage. 3596 */ 3597 error_free(local_err); 3598 break; 3599 } 3600 if (runstate_is_live(s->vm_old_state)) { 3601 if (!runstate_check(RUN_STATE_SHUTDOWN)) { 3602 vm_start(); 3603 } 3604 } else { 3605 if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { 3606 runstate_set(s->vm_old_state); 3607 } 3608 } 3609 break; 3610 3611 default: 3612 /* Should not reach here, but if so, forgive the VM. */ 3613 error_report("%s: Unknown ending state %d", __func__, s->state); 3614 break; 3615 } 3616 3617 migration_bh_schedule(migration_cleanup_bh, s); 3618 bql_unlock(); 3619 } 3620 3621 static void bg_migration_iteration_finish(MigrationState *s) 3622 { 3623 /* 3624 * Stop tracking RAM writes - un-protect memory, un-register UFFD 3625 * memory ranges, flush kernel wait queues and wake up threads 3626 * waiting for write fault to be resolved. 3627 */ 3628 ram_write_tracking_stop(); 3629 3630 bql_lock(); 3631 switch (s->state) { 3632 case MIGRATION_STATUS_COMPLETED: 3633 case MIGRATION_STATUS_ACTIVE: 3634 case MIGRATION_STATUS_FAILED: 3635 case MIGRATION_STATUS_CANCELLED: 3636 case MIGRATION_STATUS_CANCELLING: 3637 break; 3638 3639 default: 3640 /* Should not reach here, but if so, forgive the VM. */ 3641 error_report("%s: Unknown ending state %d", __func__, s->state); 3642 break; 3643 } 3644 3645 migration_bh_schedule(migration_cleanup_bh, s); 3646 bql_unlock(); 3647 } 3648 3649 /* 3650 * Return true if continue to the next iteration directly, false 3651 * otherwise. 3652 */ 3653 static MigIterateState bg_migration_iteration_run(MigrationState *s) 3654 { 3655 int res; 3656 3657 res = qemu_savevm_state_iterate(s->to_dst_file, false); 3658 if (res > 0) { 3659 bg_migration_completion(s); 3660 return MIG_ITERATE_BREAK; 3661 } 3662 3663 return MIG_ITERATE_RESUME; 3664 } 3665 3666 void migration_make_urgent_request(void) 3667 { 3668 qemu_sem_post(&migrate_get_current()->rate_limit_sem); 3669 } 3670 3671 void migration_consume_urgent_request(void) 3672 { 3673 qemu_sem_wait(&migrate_get_current()->rate_limit_sem); 3674 } 3675 3676 /* Returns true if the rate limiting was broken by an urgent request */ 3677 bool migration_rate_limit(void) 3678 { 3679 int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 3680 MigrationState *s = migrate_get_current(); 3681 3682 bool urgent = false; 3683 migration_update_counters(s, now); 3684 if (migration_rate_exceeded(s->to_dst_file)) { 3685 3686 if (qemu_file_get_error(s->to_dst_file)) { 3687 return false; 3688 } 3689 /* 3690 * Wait for a delay to do rate limiting OR 3691 * something urgent to post the semaphore. 3692 */ 3693 int ms = s->iteration_start_time + BUFFER_DELAY - now; 3694 trace_migration_rate_limit_pre(ms); 3695 if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { 3696 /* 3697 * We were woken by one or more urgent things but 3698 * the timedwait will have consumed one of them. 3699 * The service routine for the urgent wake will dec 3700 * the semaphore itself for each item it consumes, 3701 * so add this one we just eat back. 3702 */ 3703 qemu_sem_post(&s->rate_limit_sem); 3704 urgent = true; 3705 } 3706 trace_migration_rate_limit_post(urgent); 3707 } 3708 return urgent; 3709 } 3710 3711 /* 3712 * if failover devices are present, wait they are completely 3713 * unplugged 3714 */ 3715 3716 static void qemu_savevm_wait_unplug(MigrationState *s, int old_state, 3717 int new_state) 3718 { 3719 if (qemu_savevm_state_guest_unplug_pending()) { 3720 migrate_set_state(&s->state, old_state, MIGRATION_STATUS_WAIT_UNPLUG); 3721 3722 while (s->state == MIGRATION_STATUS_WAIT_UNPLUG && 3723 qemu_savevm_state_guest_unplug_pending()) { 3724 qemu_sem_timedwait(&s->wait_unplug_sem, 250); 3725 } 3726 if (s->state != MIGRATION_STATUS_WAIT_UNPLUG) { 3727 int timeout = 120; /* 30 seconds */ 3728 /* 3729 * migration has been canceled 3730 * but as we have started an unplug we must wait the end 3731 * to be able to plug back the card 3732 */ 3733 while (timeout-- && qemu_savevm_state_guest_unplug_pending()) { 3734 qemu_sem_timedwait(&s->wait_unplug_sem, 250); 3735 } 3736 if (qemu_savevm_state_guest_unplug_pending() && 3737 !qtest_enabled()) { 3738 warn_report("migration: partially unplugged device on " 3739 "failure"); 3740 } 3741 } 3742 3743 migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, new_state); 3744 } else { 3745 migrate_set_state(&s->state, old_state, new_state); 3746 } 3747 } 3748 3749 /* 3750 * Master migration thread on the source VM. 3751 * It drives the migration and pumps the data down the outgoing channel. 3752 */ 3753 static void *migration_thread(void *opaque) 3754 { 3755 MigrationState *s = opaque; 3756 MigrationThread *thread = NULL; 3757 int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST); 3758 MigThrError thr_error; 3759 bool urgent = false; 3760 Error *local_err = NULL; 3761 int ret; 3762 3763 thread = migration_threads_add(MIGRATION_THREAD_SRC_MAIN, 3764 qemu_get_thread_id()); 3765 3766 rcu_register_thread(); 3767 3768 update_iteration_initial_status(s); 3769 3770 if (!multifd_send_setup()) { 3771 goto out; 3772 } 3773 3774 bql_lock(); 3775 qemu_savevm_state_header(s->to_dst_file); 3776 bql_unlock(); 3777 3778 /* 3779 * If we opened the return path, we need to make sure dst has it 3780 * opened as well. 3781 */ 3782 if (s->rp_state.rp_thread_created) { 3783 /* Now tell the dest that it should open its end so it can reply */ 3784 qemu_savevm_send_open_return_path(s->to_dst_file); 3785 3786 /* And do a ping that will make stuff easier to debug */ 3787 qemu_savevm_send_ping(s->to_dst_file, 1); 3788 } 3789 3790 if (migrate_postcopy()) { 3791 /* 3792 * Tell the destination that we *might* want to do postcopy later; 3793 * if the other end can't do postcopy it should fail now, nice and 3794 * early. 3795 */ 3796 qemu_savevm_send_postcopy_advise(s->to_dst_file); 3797 } 3798 3799 if (migrate_colo()) { 3800 /* Notify migration destination that we enable COLO */ 3801 qemu_savevm_send_colo_enable(s->to_dst_file); 3802 } 3803 3804 if (migrate_auto_converge()) { 3805 /* Start RAMBlock dirty bitmap sync timer */ 3806 cpu_throttle_dirty_sync_timer(true); 3807 } 3808 3809 bql_lock(); 3810 ret = qemu_savevm_state_setup(s->to_dst_file, &local_err); 3811 bql_unlock(); 3812 3813 qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP, 3814 MIGRATION_STATUS_ACTIVE); 3815 3816 /* 3817 * Handle SETUP failures after waiting for virtio-net-failover 3818 * devices to unplug. This to preserve migration state transitions. 3819 */ 3820 if (ret) { 3821 migrate_set_error(s, local_err); 3822 error_free(local_err); 3823 migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, 3824 MIGRATION_STATUS_FAILED); 3825 goto out; 3826 } 3827 3828 s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; 3829 3830 trace_migration_thread_setup_complete(); 3831 3832 while (migration_is_active()) { 3833 if (urgent || !migration_rate_exceeded(s->to_dst_file)) { 3834 MigIterateState iter_state = migration_iteration_run(s); 3835 if (iter_state == MIG_ITERATE_SKIP) { 3836 continue; 3837 } else if (iter_state == MIG_ITERATE_BREAK) { 3838 break; 3839 } 3840 } 3841 3842 /* 3843 * Try to detect any kind of failures, and see whether we 3844 * should stop the migration now. 3845 */ 3846 thr_error = migration_detect_error(s); 3847 if (thr_error == MIG_THR_ERR_FATAL) { 3848 /* Stop migration */ 3849 break; 3850 } else if (thr_error == MIG_THR_ERR_RECOVERED) { 3851 /* 3852 * Just recovered from a e.g. network failure, reset all 3853 * the local variables. This is important to avoid 3854 * breaking transferred_bytes and bandwidth calculation 3855 */ 3856 update_iteration_initial_status(s); 3857 } 3858 3859 urgent = migration_rate_limit(); 3860 } 3861 3862 out: 3863 trace_migration_thread_after_loop(); 3864 migration_iteration_finish(s); 3865 object_unref(OBJECT(s)); 3866 rcu_unregister_thread(); 3867 migration_threads_remove(thread); 3868 return NULL; 3869 } 3870 3871 static void bg_migration_vm_start_bh(void *opaque) 3872 { 3873 MigrationState *s = opaque; 3874 3875 vm_resume(s->vm_old_state); 3876 migration_downtime_end(s); 3877 } 3878 3879 /** 3880 * Background snapshot thread, based on live migration code. 3881 * This is an alternative implementation of live migration mechanism 3882 * introduced specifically to support background snapshots. 3883 * 3884 * It takes advantage of userfault_fd write protection mechanism introduced 3885 * in v5.7 kernel. Compared to existing dirty page logging migration much 3886 * lesser stream traffic is produced resulting in smaller snapshot images, 3887 * simply cause of no page duplicates can get into the stream. 3888 * 3889 * Another key point is that generated vmstate stream reflects machine state 3890 * 'frozen' at the beginning of snapshot creation compared to dirty page logging 3891 * mechanism, which effectively results in that saved snapshot is the state of VM 3892 * at the end of the process. 3893 */ 3894 static void *bg_migration_thread(void *opaque) 3895 { 3896 MigrationState *s = opaque; 3897 int64_t setup_start; 3898 MigThrError thr_error; 3899 QEMUFile *fb; 3900 bool early_fail = true; 3901 Error *local_err = NULL; 3902 int ret; 3903 3904 rcu_register_thread(); 3905 3906 migration_rate_set(RATE_LIMIT_DISABLED); 3907 3908 setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST); 3909 /* 3910 * We want to save vmstate for the moment when migration has been 3911 * initiated but also we want to save RAM content while VM is running. 3912 * The RAM content should appear first in the vmstate. So, we first 3913 * stash the non-RAM part of the vmstate to the temporary buffer, 3914 * then write RAM part of the vmstate to the migration stream 3915 * with vCPUs running and, finally, write stashed non-RAM part of 3916 * the vmstate from the buffer to the migration stream. 3917 */ 3918 s->bioc = qio_channel_buffer_new(512 * 1024); 3919 qio_channel_set_name(QIO_CHANNEL(s->bioc), "vmstate-buffer"); 3920 fb = qemu_file_new_output(QIO_CHANNEL(s->bioc)); 3921 object_unref(OBJECT(s->bioc)); 3922 3923 update_iteration_initial_status(s); 3924 3925 /* 3926 * Prepare for tracking memory writes with UFFD-WP - populate 3927 * RAM pages before protecting. 3928 */ 3929 #ifdef __linux__ 3930 ram_write_tracking_prepare(); 3931 #endif 3932 3933 bql_lock(); 3934 qemu_savevm_state_header(s->to_dst_file); 3935 ret = qemu_savevm_state_setup(s->to_dst_file, &local_err); 3936 bql_unlock(); 3937 3938 qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP, 3939 MIGRATION_STATUS_ACTIVE); 3940 3941 /* 3942 * Handle SETUP failures after waiting for virtio-net-failover 3943 * devices to unplug. This to preserve migration state transitions. 3944 */ 3945 if (ret) { 3946 migrate_set_error(s, local_err); 3947 error_free(local_err); 3948 migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, 3949 MIGRATION_STATUS_FAILED); 3950 goto fail_setup; 3951 } 3952 3953 s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; 3954 3955 trace_migration_thread_setup_complete(); 3956 3957 bql_lock(); 3958 3959 if (migration_stop_vm(s, RUN_STATE_PAUSED)) { 3960 goto fail; 3961 } 3962 3963 if (qemu_savevm_state_complete_precopy_non_iterable(fb, false)) { 3964 goto fail; 3965 } 3966 /* 3967 * Since we are going to get non-iterable state data directly 3968 * from s->bioc->data, explicit flush is needed here. 3969 */ 3970 qemu_fflush(fb); 3971 3972 /* Now initialize UFFD context and start tracking RAM writes */ 3973 if (ram_write_tracking_start()) { 3974 goto fail; 3975 } 3976 early_fail = false; 3977 3978 /* 3979 * Start VM from BH handler to avoid write-fault lock here. 3980 * UFFD-WP protection for the whole RAM is already enabled so 3981 * calling VM state change notifiers from vm_start() would initiate 3982 * writes to virtio VQs memory which is in write-protected region. 3983 */ 3984 migration_bh_schedule(bg_migration_vm_start_bh, s); 3985 bql_unlock(); 3986 3987 while (migration_is_active()) { 3988 MigIterateState iter_state = bg_migration_iteration_run(s); 3989 3990 if (iter_state == MIG_ITERATE_BREAK) { 3991 break; 3992 } 3993 3994 /* 3995 * Try to detect any kind of failures, and see whether we 3996 * should stop the migration now. 3997 */ 3998 thr_error = migration_detect_error(s); 3999 if (thr_error == MIG_THR_ERR_FATAL) { 4000 /* Stop migration */ 4001 break; 4002 } 4003 4004 migration_update_counters(s, qemu_clock_get_ms(QEMU_CLOCK_REALTIME)); 4005 } 4006 4007 trace_migration_thread_after_loop(); 4008 4009 fail: 4010 if (early_fail) { 4011 migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, 4012 MIGRATION_STATUS_FAILED); 4013 bql_unlock(); 4014 } 4015 4016 fail_setup: 4017 bg_migration_iteration_finish(s); 4018 4019 qemu_fclose(fb); 4020 object_unref(OBJECT(s)); 4021 rcu_unregister_thread(); 4022 4023 return NULL; 4024 } 4025 4026 void migration_connect(MigrationState *s, Error *error_in) 4027 { 4028 Error *local_err = NULL; 4029 uint64_t rate_limit; 4030 bool resume = (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP); 4031 int ret; 4032 4033 /* 4034 * If there's a previous error, free it and prepare for another one. 4035 * Meanwhile if migration completes successfully, there won't have an error 4036 * dumped when calling migration_cleanup(). 4037 */ 4038 migrate_error_free(s); 4039 4040 s->expected_downtime = migrate_downtime_limit(); 4041 if (error_in) { 4042 migration_connect_set_error(s, error_in); 4043 if (resume) { 4044 /* 4045 * Don't do cleanup for resume if channel is invalid, but only dump 4046 * the error. We wait for another channel connect from the user. 4047 * The error_report still gives HMP user a hint on what failed. 4048 * It's normally done in migration_cleanup(), but call it here 4049 * explicitly. 4050 */ 4051 error_report_err(error_copy(s->error)); 4052 } else { 4053 migration_cleanup(s); 4054 } 4055 return; 4056 } 4057 4058 if (resume) { 4059 /* This is a resumed migration */ 4060 rate_limit = migrate_max_postcopy_bandwidth(); 4061 } else { 4062 /* This is a fresh new migration */ 4063 rate_limit = migrate_max_bandwidth(); 4064 4065 /* Notify before starting migration thread */ 4066 if (migration_call_notifiers(s, MIG_EVENT_PRECOPY_SETUP, &local_err)) { 4067 goto fail; 4068 } 4069 } 4070 4071 migration_rate_set(rate_limit); 4072 if (!qemu_file_set_blocking(s->to_dst_file, true, &local_err)) { 4073 goto fail; 4074 } 4075 4076 /* 4077 * Open the return path. For postcopy, it is used exclusively. For 4078 * precopy, only if user specified "return-path" capability would 4079 * QEMU uses the return path. 4080 */ 4081 if (migrate_postcopy_ram() || migrate_return_path()) { 4082 open_return_path_on_source(s); 4083 } 4084 4085 /* 4086 * This needs to be done before resuming a postcopy. Note: for newer 4087 * QEMUs we will delay the channel creation until postcopy_start(), to 4088 * avoid disorder of channel creations. 4089 */ 4090 if (migrate_postcopy_preempt() && s->preempt_pre_7_2) { 4091 postcopy_preempt_setup(s); 4092 } 4093 4094 if (resume) { 4095 /* Wakeup the main migration thread to do the recovery */ 4096 migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP, 4097 MIGRATION_STATUS_POSTCOPY_RECOVER); 4098 qemu_sem_post(&s->postcopy_pause_sem); 4099 return; 4100 } 4101 4102 if (migrate_mode_is_cpr(s)) { 4103 ret = migration_stop_vm(s, RUN_STATE_FINISH_MIGRATE); 4104 if (ret < 0) { 4105 error_setg(&local_err, "migration_stop_vm failed, error %d", -ret); 4106 goto fail; 4107 } 4108 } 4109 4110 /* 4111 * Take a refcount to make sure the migration object won't get freed by 4112 * the main thread already in migration_shutdown(). 4113 * 4114 * The refcount will be released at the end of the thread function. 4115 */ 4116 object_ref(OBJECT(s)); 4117 4118 if (migrate_background_snapshot()) { 4119 qemu_thread_create(&s->thread, MIGRATION_THREAD_SNAPSHOT, 4120 bg_migration_thread, s, QEMU_THREAD_JOINABLE); 4121 } else { 4122 qemu_thread_create(&s->thread, MIGRATION_THREAD_SRC_MAIN, 4123 migration_thread, s, QEMU_THREAD_JOINABLE); 4124 } 4125 s->migration_thread_running = true; 4126 return; 4127 4128 fail: 4129 migrate_set_error(s, local_err); 4130 if (s->state != MIGRATION_STATUS_CANCELLING) { 4131 migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); 4132 } 4133 error_report_err(local_err); 4134 migration_cleanup(s); 4135 } 4136 4137 static void migration_class_init(ObjectClass *klass, const void *data) 4138 { 4139 DeviceClass *dc = DEVICE_CLASS(klass); 4140 4141 dc->user_creatable = false; 4142 device_class_set_props_n(dc, migration_properties, 4143 migration_properties_count); 4144 } 4145 4146 static void migration_instance_finalize(Object *obj) 4147 { 4148 MigrationState *ms = MIGRATION_OBJ(obj); 4149 4150 qemu_mutex_destroy(&ms->error_mutex); 4151 qemu_mutex_destroy(&ms->qemu_file_lock); 4152 qemu_sem_destroy(&ms->wait_unplug_sem); 4153 qemu_sem_destroy(&ms->rate_limit_sem); 4154 qemu_event_destroy(&ms->pause_event); 4155 qemu_sem_destroy(&ms->postcopy_pause_sem); 4156 qemu_sem_destroy(&ms->rp_state.rp_sem); 4157 qemu_sem_destroy(&ms->rp_state.rp_pong_acks); 4158 qemu_sem_destroy(&ms->postcopy_qemufile_src_sem); 4159 error_free(ms->error); 4160 qemu_event_destroy(&ms->postcopy_package_loaded_event); 4161 } 4162 4163 static void migration_instance_init(Object *obj) 4164 { 4165 MigrationState *ms = MIGRATION_OBJ(obj); 4166 4167 ms->state = MIGRATION_STATUS_NONE; 4168 ms->mbps = -1; 4169 ms->pages_per_second = -1; 4170 qemu_event_init(&ms->pause_event, false); 4171 qemu_mutex_init(&ms->error_mutex); 4172 4173 migrate_params_init(&ms->parameters); 4174 4175 qemu_sem_init(&ms->postcopy_pause_sem, 0); 4176 qemu_sem_init(&ms->rp_state.rp_sem, 0); 4177 qemu_sem_init(&ms->rp_state.rp_pong_acks, 0); 4178 qemu_sem_init(&ms->rate_limit_sem, 0); 4179 qemu_sem_init(&ms->wait_unplug_sem, 0); 4180 qemu_sem_init(&ms->postcopy_qemufile_src_sem, 0); 4181 qemu_mutex_init(&ms->qemu_file_lock); 4182 qemu_event_init(&ms->postcopy_package_loaded_event, 0); 4183 } 4184 4185 /* 4186 * Return true if check pass, false otherwise. Error will be put 4187 * inside errp if provided. 4188 */ 4189 static bool migration_object_check(MigrationState *ms, Error **errp) 4190 { 4191 /* Assuming all off */ 4192 bool old_caps[MIGRATION_CAPABILITY__MAX] = { 0 }; 4193 4194 if (!migrate_params_check(&ms->parameters, errp)) { 4195 return false; 4196 } 4197 4198 return migrate_caps_check(old_caps, ms->capabilities, errp); 4199 } 4200 4201 static const TypeInfo migration_type = { 4202 .name = TYPE_MIGRATION, 4203 /* 4204 * NOTE: TYPE_MIGRATION is not really a device, as the object is 4205 * not created using qdev_new(), it is not attached to the qdev 4206 * device tree, and it is never realized. 4207 * 4208 * TODO: Make this TYPE_OBJECT once QOM provides something like 4209 * TYPE_DEVICE's "-global" properties. 4210 */ 4211 .parent = TYPE_DEVICE, 4212 .class_init = migration_class_init, 4213 .class_size = sizeof(MigrationClass), 4214 .instance_size = sizeof(MigrationState), 4215 .instance_init = migration_instance_init, 4216 .instance_finalize = migration_instance_finalize, 4217 }; 4218 4219 static void register_migration_types(void) 4220 { 4221 type_register_static(&migration_type); 4222 } 4223 4224 type_init(register_migration_types); 4225