1 /* 2 * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) 3 * (a.k.a. Fault Tolerance or Continuous Replication) 4 * 5 * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. 6 * Copyright (c) 2016 FUJITSU LIMITED 7 * Copyright (c) 2016 Intel Corporation 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or 10 * later. See the COPYING file in the top-level directory. 11 */ 12 13 #include "qemu/osdep.h" 14 #include "sysemu/sysemu.h" 15 #include "qapi/error.h" 16 #include "qapi/qapi-commands-migration.h" 17 #include "qemu-file-channel.h" 18 #include "migration.h" 19 #include "qemu-file.h" 20 #include "savevm.h" 21 #include "migration/colo.h" 22 #include "block.h" 23 #include "io/channel-buffer.h" 24 #include "trace.h" 25 #include "qemu/error-report.h" 26 #include "qemu/main-loop.h" 27 #include "qemu/rcu.h" 28 #include "migration/failover.h" 29 #ifdef CONFIG_REPLICATION 30 #include "replication.h" 31 #endif 32 #include "net/colo-compare.h" 33 #include "net/colo.h" 34 #include "block/block.h" 35 #include "qapi/qapi-events-migration.h" 36 #include "qapi/qmp/qerror.h" 37 #include "sysemu/cpus.h" 38 #include "net/filter.h" 39 40 static bool vmstate_loading; 41 static Notifier packets_compare_notifier; 42 43 /* User need to know colo mode after COLO failover */ 44 static COLOMode last_colo_mode; 45 46 #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024) 47 48 bool migration_in_colo_state(void) 49 { 50 MigrationState *s = migrate_get_current(); 51 52 return (s->state == MIGRATION_STATUS_COLO); 53 } 54 55 bool migration_incoming_in_colo_state(void) 56 { 57 MigrationIncomingState *mis = migration_incoming_get_current(); 58 59 return mis && (mis->state == MIGRATION_STATUS_COLO); 60 } 61 62 static bool colo_runstate_is_stopped(void) 63 { 64 return runstate_check(RUN_STATE_COLO) || !runstate_is_running(); 65 } 66 67 static void secondary_vm_do_failover(void) 68 { 69 /* COLO needs enable block-replication */ 70 #ifdef CONFIG_REPLICATION 71 int old_state; 72 MigrationIncomingState *mis = migration_incoming_get_current(); 73 Error *local_err = NULL; 74 75 /* Can not do failover during the process of VM's loading VMstate, Or 76 * it will break the secondary VM. 77 */ 78 if (vmstate_loading) { 79 old_state = failover_set_state(FAILOVER_STATUS_ACTIVE, 80 FAILOVER_STATUS_RELAUNCH); 81 if (old_state != FAILOVER_STATUS_ACTIVE) { 82 error_report("Unknown error while do failover for secondary VM," 83 "old_state: %s", FailoverStatus_str(old_state)); 84 } 85 return; 86 } 87 88 migrate_set_state(&mis->state, MIGRATION_STATUS_COLO, 89 MIGRATION_STATUS_COMPLETED); 90 91 replication_stop_all(true, &local_err); 92 if (local_err) { 93 error_report_err(local_err); 94 } 95 96 /* Notify all filters of all NIC to do checkpoint */ 97 colo_notify_filters_event(COLO_EVENT_FAILOVER, &local_err); 98 if (local_err) { 99 error_report_err(local_err); 100 } 101 102 if (!autostart) { 103 error_report("\"-S\" qemu option will be ignored in secondary side"); 104 /* recover runstate to normal migration finish state */ 105 autostart = true; 106 } 107 /* 108 * Make sure COLO incoming thread not block in recv or send, 109 * If mis->from_src_file and mis->to_src_file use the same fd, 110 * The second shutdown() will return -1, we ignore this value, 111 * It is harmless. 112 */ 113 if (mis->from_src_file) { 114 qemu_file_shutdown(mis->from_src_file); 115 } 116 if (mis->to_src_file) { 117 qemu_file_shutdown(mis->to_src_file); 118 } 119 120 old_state = failover_set_state(FAILOVER_STATUS_ACTIVE, 121 FAILOVER_STATUS_COMPLETED); 122 if (old_state != FAILOVER_STATUS_ACTIVE) { 123 error_report("Incorrect state (%s) while doing failover for " 124 "secondary VM", FailoverStatus_str(old_state)); 125 return; 126 } 127 /* Notify COLO incoming thread that failover work is finished */ 128 qemu_sem_post(&mis->colo_incoming_sem); 129 130 /* For Secondary VM, jump to incoming co */ 131 if (mis->migration_incoming_co) { 132 qemu_coroutine_enter(mis->migration_incoming_co); 133 } 134 #else 135 abort(); 136 #endif 137 } 138 139 static void primary_vm_do_failover(void) 140 { 141 #ifdef CONFIG_REPLICATION 142 MigrationState *s = migrate_get_current(); 143 int old_state; 144 Error *local_err = NULL; 145 146 migrate_set_state(&s->state, MIGRATION_STATUS_COLO, 147 MIGRATION_STATUS_COMPLETED); 148 /* 149 * kick COLO thread which might wait at 150 * qemu_sem_wait(&s->colo_checkpoint_sem). 151 */ 152 colo_checkpoint_notify(migrate_get_current()); 153 154 /* 155 * Wake up COLO thread which may blocked in recv() or send(), 156 * The s->rp_state.from_dst_file and s->to_dst_file may use the 157 * same fd, but we still shutdown the fd for twice, it is harmless. 158 */ 159 if (s->to_dst_file) { 160 qemu_file_shutdown(s->to_dst_file); 161 } 162 if (s->rp_state.from_dst_file) { 163 qemu_file_shutdown(s->rp_state.from_dst_file); 164 } 165 166 old_state = failover_set_state(FAILOVER_STATUS_ACTIVE, 167 FAILOVER_STATUS_COMPLETED); 168 if (old_state != FAILOVER_STATUS_ACTIVE) { 169 error_report("Incorrect state (%s) while doing failover for Primary VM", 170 FailoverStatus_str(old_state)); 171 return; 172 } 173 174 replication_stop_all(true, &local_err); 175 if (local_err) { 176 error_report_err(local_err); 177 local_err = NULL; 178 } 179 180 /* Notify COLO thread that failover work is finished */ 181 qemu_sem_post(&s->colo_exit_sem); 182 #else 183 abort(); 184 #endif 185 } 186 187 COLOMode get_colo_mode(void) 188 { 189 if (migration_in_colo_state()) { 190 return COLO_MODE_PRIMARY; 191 } else if (migration_incoming_in_colo_state()) { 192 return COLO_MODE_SECONDARY; 193 } else { 194 return COLO_MODE_NONE; 195 } 196 } 197 198 void colo_do_failover(void) 199 { 200 /* Make sure VM stopped while failover happened. */ 201 if (!colo_runstate_is_stopped()) { 202 vm_stop_force_state(RUN_STATE_COLO); 203 } 204 205 switch (get_colo_mode()) { 206 case COLO_MODE_PRIMARY: 207 primary_vm_do_failover(); 208 break; 209 case COLO_MODE_SECONDARY: 210 secondary_vm_do_failover(); 211 break; 212 default: 213 error_report("colo_do_failover failed because the colo mode" 214 " could not be obtained"); 215 } 216 } 217 218 #ifdef CONFIG_REPLICATION 219 void qmp_xen_set_replication(bool enable, bool primary, 220 bool has_failover, bool failover, 221 Error **errp) 222 { 223 ReplicationMode mode = primary ? 224 REPLICATION_MODE_PRIMARY : 225 REPLICATION_MODE_SECONDARY; 226 227 if (has_failover && enable) { 228 error_setg(errp, "Parameter 'failover' is only for" 229 " stopping replication"); 230 return; 231 } 232 233 if (enable) { 234 replication_start_all(mode, errp); 235 } else { 236 if (!has_failover) { 237 failover = NULL; 238 } 239 replication_stop_all(failover, failover ? NULL : errp); 240 } 241 } 242 243 ReplicationStatus *qmp_query_xen_replication_status(Error **errp) 244 { 245 Error *err = NULL; 246 ReplicationStatus *s = g_new0(ReplicationStatus, 1); 247 248 replication_get_error_all(&err); 249 if (err) { 250 s->error = true; 251 s->has_desc = true; 252 s->desc = g_strdup(error_get_pretty(err)); 253 } else { 254 s->error = false; 255 } 256 257 error_free(err); 258 return s; 259 } 260 261 void qmp_xen_colo_do_checkpoint(Error **errp) 262 { 263 replication_do_checkpoint_all(errp); 264 /* Notify all filters of all NIC to do checkpoint */ 265 colo_notify_filters_event(COLO_EVENT_CHECKPOINT, errp); 266 } 267 #endif 268 269 COLOStatus *qmp_query_colo_status(Error **errp) 270 { 271 COLOStatus *s = g_new0(COLOStatus, 1); 272 273 s->mode = get_colo_mode(); 274 s->last_mode = last_colo_mode; 275 276 switch (failover_get_state()) { 277 case FAILOVER_STATUS_NONE: 278 s->reason = COLO_EXIT_REASON_NONE; 279 break; 280 case FAILOVER_STATUS_COMPLETED: 281 s->reason = COLO_EXIT_REASON_REQUEST; 282 break; 283 default: 284 if (migration_in_colo_state()) { 285 s->reason = COLO_EXIT_REASON_PROCESSING; 286 } else { 287 s->reason = COLO_EXIT_REASON_ERROR; 288 } 289 } 290 291 return s; 292 } 293 294 static void colo_send_message(QEMUFile *f, COLOMessage msg, 295 Error **errp) 296 { 297 int ret; 298 299 if (msg >= COLO_MESSAGE__MAX) { 300 error_setg(errp, "%s: Invalid message", __func__); 301 return; 302 } 303 qemu_put_be32(f, msg); 304 qemu_fflush(f); 305 306 ret = qemu_file_get_error(f); 307 if (ret < 0) { 308 error_setg_errno(errp, -ret, "Can't send COLO message"); 309 } 310 trace_colo_send_message(COLOMessage_str(msg)); 311 } 312 313 static void colo_send_message_value(QEMUFile *f, COLOMessage msg, 314 uint64_t value, Error **errp) 315 { 316 Error *local_err = NULL; 317 int ret; 318 319 colo_send_message(f, msg, &local_err); 320 if (local_err) { 321 error_propagate(errp, local_err); 322 return; 323 } 324 qemu_put_be64(f, value); 325 qemu_fflush(f); 326 327 ret = qemu_file_get_error(f); 328 if (ret < 0) { 329 error_setg_errno(errp, -ret, "Failed to send value for message:%s", 330 COLOMessage_str(msg)); 331 } 332 } 333 334 static COLOMessage colo_receive_message(QEMUFile *f, Error **errp) 335 { 336 COLOMessage msg; 337 int ret; 338 339 msg = qemu_get_be32(f); 340 ret = qemu_file_get_error(f); 341 if (ret < 0) { 342 error_setg_errno(errp, -ret, "Can't receive COLO message"); 343 return msg; 344 } 345 if (msg >= COLO_MESSAGE__MAX) { 346 error_setg(errp, "%s: Invalid message", __func__); 347 return msg; 348 } 349 trace_colo_receive_message(COLOMessage_str(msg)); 350 return msg; 351 } 352 353 static void colo_receive_check_message(QEMUFile *f, COLOMessage expect_msg, 354 Error **errp) 355 { 356 COLOMessage msg; 357 Error *local_err = NULL; 358 359 msg = colo_receive_message(f, &local_err); 360 if (local_err) { 361 error_propagate(errp, local_err); 362 return; 363 } 364 if (msg != expect_msg) { 365 error_setg(errp, "Unexpected COLO message %d, expected %d", 366 msg, expect_msg); 367 } 368 } 369 370 static uint64_t colo_receive_message_value(QEMUFile *f, uint32_t expect_msg, 371 Error **errp) 372 { 373 Error *local_err = NULL; 374 uint64_t value; 375 int ret; 376 377 colo_receive_check_message(f, expect_msg, &local_err); 378 if (local_err) { 379 error_propagate(errp, local_err); 380 return 0; 381 } 382 383 value = qemu_get_be64(f); 384 ret = qemu_file_get_error(f); 385 if (ret < 0) { 386 error_setg_errno(errp, -ret, "Failed to get value for COLO message: %s", 387 COLOMessage_str(expect_msg)); 388 } 389 return value; 390 } 391 392 static int colo_do_checkpoint_transaction(MigrationState *s, 393 QIOChannelBuffer *bioc, 394 QEMUFile *fb) 395 { 396 Error *local_err = NULL; 397 int ret = -1; 398 399 colo_send_message(s->to_dst_file, COLO_MESSAGE_CHECKPOINT_REQUEST, 400 &local_err); 401 if (local_err) { 402 goto out; 403 } 404 405 colo_receive_check_message(s->rp_state.from_dst_file, 406 COLO_MESSAGE_CHECKPOINT_REPLY, &local_err); 407 if (local_err) { 408 goto out; 409 } 410 /* Reset channel-buffer directly */ 411 qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL); 412 bioc->usage = 0; 413 414 qemu_mutex_lock_iothread(); 415 if (failover_get_state() != FAILOVER_STATUS_NONE) { 416 qemu_mutex_unlock_iothread(); 417 goto out; 418 } 419 vm_stop_force_state(RUN_STATE_COLO); 420 qemu_mutex_unlock_iothread(); 421 trace_colo_vm_state_change("run", "stop"); 422 /* 423 * Failover request bh could be called after vm_stop_force_state(), 424 * So we need check failover_request_is_active() again. 425 */ 426 if (failover_get_state() != FAILOVER_STATUS_NONE) { 427 goto out; 428 } 429 430 colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err); 431 if (local_err) { 432 goto out; 433 } 434 435 /* Disable block migration */ 436 migrate_set_block_enabled(false, &local_err); 437 qemu_mutex_lock_iothread(); 438 439 #ifdef CONFIG_REPLICATION 440 replication_do_checkpoint_all(&local_err); 441 if (local_err) { 442 qemu_mutex_unlock_iothread(); 443 goto out; 444 } 445 #else 446 abort(); 447 #endif 448 449 colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err); 450 if (local_err) { 451 qemu_mutex_unlock_iothread(); 452 goto out; 453 } 454 /* Note: device state is saved into buffer */ 455 ret = qemu_save_device_state(fb); 456 457 qemu_mutex_unlock_iothread(); 458 if (ret < 0) { 459 goto out; 460 } 461 /* 462 * Only save VM's live state, which not including device state. 463 * TODO: We may need a timeout mechanism to prevent COLO process 464 * to be blocked here. 465 */ 466 qemu_savevm_live_state(s->to_dst_file); 467 468 qemu_fflush(fb); 469 470 /* 471 * We need the size of the VMstate data in Secondary side, 472 * With which we can decide how much data should be read. 473 */ 474 colo_send_message_value(s->to_dst_file, COLO_MESSAGE_VMSTATE_SIZE, 475 bioc->usage, &local_err); 476 if (local_err) { 477 goto out; 478 } 479 480 qemu_put_buffer(s->to_dst_file, bioc->data, bioc->usage); 481 qemu_fflush(s->to_dst_file); 482 ret = qemu_file_get_error(s->to_dst_file); 483 if (ret < 0) { 484 goto out; 485 } 486 487 colo_receive_check_message(s->rp_state.from_dst_file, 488 COLO_MESSAGE_VMSTATE_RECEIVED, &local_err); 489 if (local_err) { 490 goto out; 491 } 492 493 colo_receive_check_message(s->rp_state.from_dst_file, 494 COLO_MESSAGE_VMSTATE_LOADED, &local_err); 495 if (local_err) { 496 goto out; 497 } 498 499 ret = 0; 500 501 qemu_mutex_lock_iothread(); 502 vm_start(); 503 qemu_mutex_unlock_iothread(); 504 trace_colo_vm_state_change("stop", "run"); 505 506 out: 507 if (local_err) { 508 error_report_err(local_err); 509 } 510 return ret; 511 } 512 513 static void colo_compare_notify_checkpoint(Notifier *notifier, void *data) 514 { 515 colo_checkpoint_notify(data); 516 } 517 518 static void colo_process_checkpoint(MigrationState *s) 519 { 520 QIOChannelBuffer *bioc; 521 QEMUFile *fb = NULL; 522 int64_t current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); 523 Error *local_err = NULL; 524 int ret; 525 526 last_colo_mode = get_colo_mode(); 527 if (last_colo_mode != COLO_MODE_PRIMARY) { 528 error_report("COLO mode must be COLO_MODE_PRIMARY"); 529 return; 530 } 531 532 failover_init_state(); 533 534 s->rp_state.from_dst_file = qemu_file_get_return_path(s->to_dst_file); 535 if (!s->rp_state.from_dst_file) { 536 error_report("Open QEMUFile from_dst_file failed"); 537 goto out; 538 } 539 540 packets_compare_notifier.notify = colo_compare_notify_checkpoint; 541 colo_compare_register_notifier(&packets_compare_notifier); 542 543 /* 544 * Wait for Secondary finish loading VM states and enter COLO 545 * restore. 546 */ 547 colo_receive_check_message(s->rp_state.from_dst_file, 548 COLO_MESSAGE_CHECKPOINT_READY, &local_err); 549 if (local_err) { 550 goto out; 551 } 552 bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE); 553 fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc)); 554 object_unref(OBJECT(bioc)); 555 556 qemu_mutex_lock_iothread(); 557 #ifdef CONFIG_REPLICATION 558 replication_start_all(REPLICATION_MODE_PRIMARY, &local_err); 559 if (local_err) { 560 qemu_mutex_unlock_iothread(); 561 goto out; 562 } 563 #else 564 abort(); 565 #endif 566 567 vm_start(); 568 qemu_mutex_unlock_iothread(); 569 trace_colo_vm_state_change("stop", "run"); 570 571 timer_mod(s->colo_delay_timer, 572 current_time + s->parameters.x_checkpoint_delay); 573 574 while (s->state == MIGRATION_STATUS_COLO) { 575 if (failover_get_state() != FAILOVER_STATUS_NONE) { 576 error_report("failover request"); 577 goto out; 578 } 579 580 qemu_sem_wait(&s->colo_checkpoint_sem); 581 582 if (s->state != MIGRATION_STATUS_COLO) { 583 goto out; 584 } 585 ret = colo_do_checkpoint_transaction(s, bioc, fb); 586 if (ret < 0) { 587 goto out; 588 } 589 } 590 591 out: 592 /* Throw the unreported error message after exited from loop */ 593 if (local_err) { 594 error_report_err(local_err); 595 } 596 597 if (fb) { 598 qemu_fclose(fb); 599 } 600 601 /* 602 * There are only two reasons we can get here, some error happened 603 * or the user triggered failover. 604 */ 605 switch (failover_get_state()) { 606 case FAILOVER_STATUS_COMPLETED: 607 qapi_event_send_colo_exit(COLO_MODE_PRIMARY, 608 COLO_EXIT_REASON_REQUEST); 609 break; 610 default: 611 qapi_event_send_colo_exit(COLO_MODE_PRIMARY, 612 COLO_EXIT_REASON_ERROR); 613 } 614 615 /* Hope this not to be too long to wait here */ 616 qemu_sem_wait(&s->colo_exit_sem); 617 qemu_sem_destroy(&s->colo_exit_sem); 618 619 /* 620 * It is safe to unregister notifier after failover finished. 621 * Besides, colo_delay_timer and colo_checkpoint_sem can't be 622 * released befor unregister notifier, or there will be use-after-free 623 * error. 624 */ 625 colo_compare_unregister_notifier(&packets_compare_notifier); 626 timer_del(s->colo_delay_timer); 627 timer_free(s->colo_delay_timer); 628 qemu_sem_destroy(&s->colo_checkpoint_sem); 629 630 /* 631 * Must be called after failover BH is completed, 632 * Or the failover BH may shutdown the wrong fd that 633 * re-used by other threads after we release here. 634 */ 635 if (s->rp_state.from_dst_file) { 636 qemu_fclose(s->rp_state.from_dst_file); 637 } 638 } 639 640 void colo_checkpoint_notify(void *opaque) 641 { 642 MigrationState *s = opaque; 643 int64_t next_notify_time; 644 645 qemu_sem_post(&s->colo_checkpoint_sem); 646 s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); 647 next_notify_time = s->colo_checkpoint_time + 648 s->parameters.x_checkpoint_delay; 649 timer_mod(s->colo_delay_timer, next_notify_time); 650 } 651 652 void migrate_start_colo_process(MigrationState *s) 653 { 654 qemu_mutex_unlock_iothread(); 655 qemu_sem_init(&s->colo_checkpoint_sem, 0); 656 s->colo_delay_timer = timer_new_ms(QEMU_CLOCK_HOST, 657 colo_checkpoint_notify, s); 658 659 qemu_sem_init(&s->colo_exit_sem, 0); 660 migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, 661 MIGRATION_STATUS_COLO); 662 colo_process_checkpoint(s); 663 qemu_mutex_lock_iothread(); 664 } 665 666 static void colo_wait_handle_message(QEMUFile *f, int *checkpoint_request, 667 Error **errp) 668 { 669 COLOMessage msg; 670 Error *local_err = NULL; 671 672 msg = colo_receive_message(f, &local_err); 673 if (local_err) { 674 error_propagate(errp, local_err); 675 return; 676 } 677 678 switch (msg) { 679 case COLO_MESSAGE_CHECKPOINT_REQUEST: 680 *checkpoint_request = 1; 681 break; 682 default: 683 *checkpoint_request = 0; 684 error_setg(errp, "Got unknown COLO message: %d", msg); 685 break; 686 } 687 } 688 689 void *colo_process_incoming_thread(void *opaque) 690 { 691 MigrationIncomingState *mis = opaque; 692 QEMUFile *fb = NULL; 693 QIOChannelBuffer *bioc = NULL; /* Cache incoming device state */ 694 uint64_t total_size; 695 uint64_t value; 696 Error *local_err = NULL; 697 int ret; 698 699 rcu_register_thread(); 700 qemu_sem_init(&mis->colo_incoming_sem, 0); 701 702 migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, 703 MIGRATION_STATUS_COLO); 704 705 last_colo_mode = get_colo_mode(); 706 if (last_colo_mode != COLO_MODE_SECONDARY) { 707 error_report("COLO mode must be COLO_MODE_SECONDARY"); 708 return NULL; 709 } 710 711 failover_init_state(); 712 713 mis->to_src_file = qemu_file_get_return_path(mis->from_src_file); 714 if (!mis->to_src_file) { 715 error_report("COLO incoming thread: Open QEMUFile to_src_file failed"); 716 goto out; 717 } 718 /* 719 * Note: the communication between Primary side and Secondary side 720 * should be sequential, we set the fd to unblocked in migration incoming 721 * coroutine, and here we are in the COLO incoming thread, so it is ok to 722 * set the fd back to blocked. 723 */ 724 qemu_file_set_blocking(mis->from_src_file, true); 725 726 bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE); 727 fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc)); 728 object_unref(OBJECT(bioc)); 729 730 qemu_mutex_lock_iothread(); 731 #ifdef CONFIG_REPLICATION 732 replication_start_all(REPLICATION_MODE_SECONDARY, &local_err); 733 if (local_err) { 734 qemu_mutex_unlock_iothread(); 735 goto out; 736 } 737 #else 738 abort(); 739 #endif 740 vm_start(); 741 trace_colo_vm_state_change("stop", "run"); 742 qemu_mutex_unlock_iothread(); 743 744 colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY, 745 &local_err); 746 if (local_err) { 747 goto out; 748 } 749 750 while (mis->state == MIGRATION_STATUS_COLO) { 751 int request = 0; 752 753 colo_wait_handle_message(mis->from_src_file, &request, &local_err); 754 if (local_err) { 755 goto out; 756 } 757 assert(request); 758 if (failover_get_state() != FAILOVER_STATUS_NONE) { 759 error_report("failover request"); 760 goto out; 761 } 762 763 qemu_mutex_lock_iothread(); 764 vm_stop_force_state(RUN_STATE_COLO); 765 trace_colo_vm_state_change("run", "stop"); 766 qemu_mutex_unlock_iothread(); 767 768 /* FIXME: This is unnecessary for periodic checkpoint mode */ 769 colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY, 770 &local_err); 771 if (local_err) { 772 goto out; 773 } 774 775 colo_receive_check_message(mis->from_src_file, 776 COLO_MESSAGE_VMSTATE_SEND, &local_err); 777 if (local_err) { 778 goto out; 779 } 780 781 qemu_mutex_lock_iothread(); 782 cpu_synchronize_all_pre_loadvm(); 783 ret = qemu_loadvm_state_main(mis->from_src_file, mis); 784 qemu_mutex_unlock_iothread(); 785 786 if (ret < 0) { 787 error_report("Load VM's live state (ram) error"); 788 goto out; 789 } 790 791 value = colo_receive_message_value(mis->from_src_file, 792 COLO_MESSAGE_VMSTATE_SIZE, &local_err); 793 if (local_err) { 794 goto out; 795 } 796 797 /* 798 * Read VM device state data into channel buffer, 799 * It's better to re-use the memory allocated. 800 * Here we need to handle the channel buffer directly. 801 */ 802 if (value > bioc->capacity) { 803 bioc->capacity = value; 804 bioc->data = g_realloc(bioc->data, bioc->capacity); 805 } 806 total_size = qemu_get_buffer(mis->from_src_file, bioc->data, value); 807 if (total_size != value) { 808 error_report("Got %" PRIu64 " VMState data, less than expected" 809 " %" PRIu64, total_size, value); 810 goto out; 811 } 812 bioc->usage = total_size; 813 qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL); 814 815 colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_RECEIVED, 816 &local_err); 817 if (local_err) { 818 goto out; 819 } 820 821 qemu_mutex_lock_iothread(); 822 vmstate_loading = true; 823 ret = qemu_load_device_state(fb); 824 if (ret < 0) { 825 error_report("COLO: load device state failed"); 826 qemu_mutex_unlock_iothread(); 827 goto out; 828 } 829 830 #ifdef CONFIG_REPLICATION 831 replication_get_error_all(&local_err); 832 if (local_err) { 833 qemu_mutex_unlock_iothread(); 834 goto out; 835 } 836 837 /* discard colo disk buffer */ 838 replication_do_checkpoint_all(&local_err); 839 if (local_err) { 840 qemu_mutex_unlock_iothread(); 841 goto out; 842 } 843 #else 844 abort(); 845 #endif 846 /* Notify all filters of all NIC to do checkpoint */ 847 colo_notify_filters_event(COLO_EVENT_CHECKPOINT, &local_err); 848 849 if (local_err) { 850 qemu_mutex_unlock_iothread(); 851 goto out; 852 } 853 854 vmstate_loading = false; 855 vm_start(); 856 trace_colo_vm_state_change("stop", "run"); 857 qemu_mutex_unlock_iothread(); 858 859 if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) { 860 failover_set_state(FAILOVER_STATUS_RELAUNCH, 861 FAILOVER_STATUS_NONE); 862 failover_request_active(NULL); 863 goto out; 864 } 865 866 colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED, 867 &local_err); 868 if (local_err) { 869 goto out; 870 } 871 } 872 873 out: 874 vmstate_loading = false; 875 /* Throw the unreported error message after exited from loop */ 876 if (local_err) { 877 error_report_err(local_err); 878 } 879 880 /* 881 * There are only two reasons we can get here, some error happened 882 * or the user triggered failover. 883 */ 884 switch (failover_get_state()) { 885 case FAILOVER_STATUS_COMPLETED: 886 qapi_event_send_colo_exit(COLO_MODE_SECONDARY, 887 COLO_EXIT_REASON_REQUEST); 888 break; 889 default: 890 qapi_event_send_colo_exit(COLO_MODE_SECONDARY, 891 COLO_EXIT_REASON_ERROR); 892 } 893 894 if (fb) { 895 qemu_fclose(fb); 896 } 897 898 /* Hope this not to be too long to loop here */ 899 qemu_sem_wait(&mis->colo_incoming_sem); 900 qemu_sem_destroy(&mis->colo_incoming_sem); 901 /* Must be called after failover BH is completed */ 902 if (mis->to_src_file) { 903 qemu_fclose(mis->to_src_file); 904 mis->to_src_file = NULL; 905 } 906 907 rcu_unregister_thread(); 908 return NULL; 909 } 910