1 /* 2 * QEMU live migration 3 * 4 * Copyright IBM, Corp. 2008 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 * Contributions after 2012-01-13 are licensed under the terms of the 13 * GNU GPL, version 2 or (at your option) any later version. 14 */ 15 16 #include "qemu-common.h" 17 #include "qemu/error-report.h" 18 #include "qemu/main-loop.h" 19 #include "migration/migration.h" 20 #include "migration/qemu-file.h" 21 #include "sysemu/sysemu.h" 22 #include "block/block.h" 23 #include "qapi/qmp/qerror.h" 24 #include "qemu/sockets.h" 25 #include "qemu/rcu.h" 26 #include "migration/block.h" 27 #include "qemu/thread.h" 28 #include "qmp-commands.h" 29 #include "trace.h" 30 #include "qapi/util.h" 31 #include "qapi-event.h" 32 #include "qom/cpu.h" 33 34 #define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */ 35 36 /* Amount of time to allocate to each "chunk" of bandwidth-throttled 37 * data. */ 38 #define BUFFER_DELAY 100 39 #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) 40 41 /* Default compression thread count */ 42 #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 43 /* Default decompression thread count, usually decompression is at 44 * least 4 times as fast as compression.*/ 45 #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 46 /*0: means nocompress, 1: best speed, ... 9: best compress ratio */ 47 #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 48 /* Define default autoconverge cpu throttle migration parameters */ 49 #define DEFAULT_MIGRATE_X_CPU_THROTTLE_INITIAL 20 50 #define DEFAULT_MIGRATE_X_CPU_THROTTLE_INCREMENT 10 51 52 /* Migration XBZRLE default cache size */ 53 #define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024) 54 55 static NotifierList migration_state_notifiers = 56 NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); 57 58 static bool deferred_incoming; 59 60 /* When we add fault tolerance, we could have several 61 migrations at once. For now we don't need to add 62 dynamic creation of migration */ 63 64 /* For outgoing */ 65 MigrationState *migrate_get_current(void) 66 { 67 static MigrationState current_migration = { 68 .state = MIGRATION_STATUS_NONE, 69 .bandwidth_limit = MAX_THROTTLE, 70 .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE, 71 .mbps = -1, 72 .parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL] = 73 DEFAULT_MIGRATE_COMPRESS_LEVEL, 74 .parameters[MIGRATION_PARAMETER_COMPRESS_THREADS] = 75 DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT, 76 .parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] = 77 DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT, 78 .parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL] = 79 DEFAULT_MIGRATE_X_CPU_THROTTLE_INITIAL, 80 .parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT] = 81 DEFAULT_MIGRATE_X_CPU_THROTTLE_INCREMENT, 82 }; 83 84 return ¤t_migration; 85 } 86 87 /* For incoming */ 88 static MigrationIncomingState *mis_current; 89 90 MigrationIncomingState *migration_incoming_get_current(void) 91 { 92 return mis_current; 93 } 94 95 MigrationIncomingState *migration_incoming_state_new(QEMUFile* f) 96 { 97 mis_current = g_new0(MigrationIncomingState, 1); 98 mis_current->file = f; 99 QLIST_INIT(&mis_current->loadvm_handlers); 100 101 return mis_current; 102 } 103 104 void migration_incoming_state_destroy(void) 105 { 106 loadvm_free_handlers(mis_current); 107 g_free(mis_current); 108 mis_current = NULL; 109 } 110 111 112 typedef struct { 113 bool optional; 114 uint32_t size; 115 uint8_t runstate[100]; 116 RunState state; 117 bool received; 118 } GlobalState; 119 120 static GlobalState global_state; 121 122 int global_state_store(void) 123 { 124 if (!runstate_store((char *)global_state.runstate, 125 sizeof(global_state.runstate))) { 126 error_report("runstate name too big: %s", global_state.runstate); 127 trace_migrate_state_too_big(); 128 return -EINVAL; 129 } 130 return 0; 131 } 132 133 void global_state_store_running(void) 134 { 135 const char *state = RunState_lookup[RUN_STATE_RUNNING]; 136 strncpy((char *)global_state.runstate, 137 state, sizeof(global_state.runstate)); 138 } 139 140 static bool global_state_received(void) 141 { 142 return global_state.received; 143 } 144 145 static RunState global_state_get_runstate(void) 146 { 147 return global_state.state; 148 } 149 150 void global_state_set_optional(void) 151 { 152 global_state.optional = true; 153 } 154 155 static bool global_state_needed(void *opaque) 156 { 157 GlobalState *s = opaque; 158 char *runstate = (char *)s->runstate; 159 160 /* If it is not optional, it is mandatory */ 161 162 if (s->optional == false) { 163 return true; 164 } 165 166 /* If state is running or paused, it is not needed */ 167 168 if (strcmp(runstate, "running") == 0 || 169 strcmp(runstate, "paused") == 0) { 170 return false; 171 } 172 173 /* for any other state it is needed */ 174 return true; 175 } 176 177 static int global_state_post_load(void *opaque, int version_id) 178 { 179 GlobalState *s = opaque; 180 Error *local_err = NULL; 181 int r; 182 char *runstate = (char *)s->runstate; 183 184 s->received = true; 185 trace_migrate_global_state_post_load(runstate); 186 187 r = qapi_enum_parse(RunState_lookup, runstate, RUN_STATE_MAX, 188 -1, &local_err); 189 190 if (r == -1) { 191 if (local_err) { 192 error_report_err(local_err); 193 } 194 return -EINVAL; 195 } 196 s->state = r; 197 198 return 0; 199 } 200 201 static void global_state_pre_save(void *opaque) 202 { 203 GlobalState *s = opaque; 204 205 trace_migrate_global_state_pre_save((char *)s->runstate); 206 s->size = strlen((char *)s->runstate) + 1; 207 } 208 209 static const VMStateDescription vmstate_globalstate = { 210 .name = "globalstate", 211 .version_id = 1, 212 .minimum_version_id = 1, 213 .post_load = global_state_post_load, 214 .pre_save = global_state_pre_save, 215 .needed = global_state_needed, 216 .fields = (VMStateField[]) { 217 VMSTATE_UINT32(size, GlobalState), 218 VMSTATE_BUFFER(runstate, GlobalState), 219 VMSTATE_END_OF_LIST() 220 }, 221 }; 222 223 void register_global_state(void) 224 { 225 /* We would use it independently that we receive it */ 226 strcpy((char *)&global_state.runstate, ""); 227 global_state.received = false; 228 vmstate_register(NULL, 0, &vmstate_globalstate, &global_state); 229 } 230 231 static void migrate_generate_event(int new_state) 232 { 233 if (migrate_use_events()) { 234 qapi_event_send_migration(new_state, &error_abort); 235 } 236 } 237 238 /* 239 * Called on -incoming with a defer: uri. 240 * The migration can be started later after any parameters have been 241 * changed. 242 */ 243 static void deferred_incoming_migration(Error **errp) 244 { 245 if (deferred_incoming) { 246 error_setg(errp, "Incoming migration already deferred"); 247 } 248 deferred_incoming = true; 249 } 250 251 void qemu_start_incoming_migration(const char *uri, Error **errp) 252 { 253 const char *p; 254 255 qapi_event_send_migration(MIGRATION_STATUS_SETUP, &error_abort); 256 if (!strcmp(uri, "defer")) { 257 deferred_incoming_migration(errp); 258 } else if (strstart(uri, "tcp:", &p)) { 259 tcp_start_incoming_migration(p, errp); 260 #ifdef CONFIG_RDMA 261 } else if (strstart(uri, "rdma:", &p)) { 262 rdma_start_incoming_migration(p, errp); 263 #endif 264 #if !defined(WIN32) 265 } else if (strstart(uri, "exec:", &p)) { 266 exec_start_incoming_migration(p, errp); 267 } else if (strstart(uri, "unix:", &p)) { 268 unix_start_incoming_migration(p, errp); 269 } else if (strstart(uri, "fd:", &p)) { 270 fd_start_incoming_migration(p, errp); 271 #endif 272 } else { 273 error_setg(errp, "unknown migration protocol: %s", uri); 274 } 275 } 276 277 static void process_incoming_migration_co(void *opaque) 278 { 279 QEMUFile *f = opaque; 280 Error *local_err = NULL; 281 int ret; 282 283 migration_incoming_state_new(f); 284 migrate_generate_event(MIGRATION_STATUS_ACTIVE); 285 ret = qemu_loadvm_state(f); 286 287 qemu_fclose(f); 288 free_xbzrle_decoded_buf(); 289 migration_incoming_state_destroy(); 290 291 if (ret < 0) { 292 migrate_generate_event(MIGRATION_STATUS_FAILED); 293 error_report("load of migration failed: %s", strerror(-ret)); 294 migrate_decompress_threads_join(); 295 exit(EXIT_FAILURE); 296 } 297 298 /* Make sure all file formats flush their mutable metadata */ 299 bdrv_invalidate_cache_all(&local_err); 300 if (local_err) { 301 migrate_generate_event(MIGRATION_STATUS_FAILED); 302 error_report_err(local_err); 303 migrate_decompress_threads_join(); 304 exit(EXIT_FAILURE); 305 } 306 307 /* 308 * This must happen after all error conditions are dealt with and 309 * we're sure the VM is going to be running on this host. 310 */ 311 qemu_announce_self(); 312 313 /* If global state section was not received or we are in running 314 state, we need to obey autostart. Any other state is set with 315 runstate_set. */ 316 317 if (!global_state_received() || 318 global_state_get_runstate() == RUN_STATE_RUNNING) { 319 if (autostart) { 320 vm_start(); 321 } else { 322 runstate_set(RUN_STATE_PAUSED); 323 } 324 } else { 325 runstate_set(global_state_get_runstate()); 326 } 327 migrate_decompress_threads_join(); 328 /* 329 * This must happen after any state changes since as soon as an external 330 * observer sees this event they might start to prod at the VM assuming 331 * it's ready to use. 332 */ 333 migrate_generate_event(MIGRATION_STATUS_COMPLETED); 334 } 335 336 void process_incoming_migration(QEMUFile *f) 337 { 338 Coroutine *co = qemu_coroutine_create(process_incoming_migration_co); 339 int fd = qemu_get_fd(f); 340 341 assert(fd != -1); 342 migrate_decompress_threads_create(); 343 qemu_set_nonblock(fd); 344 qemu_coroutine_enter(co, f); 345 } 346 347 /* amount of nanoseconds we are willing to wait for migration to be down. 348 * the choice of nanoseconds is because it is the maximum resolution that 349 * get_clock() can achieve. It is an internal measure. All user-visible 350 * units must be in seconds */ 351 static uint64_t max_downtime = 300000000; 352 353 uint64_t migrate_max_downtime(void) 354 { 355 return max_downtime; 356 } 357 358 MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) 359 { 360 MigrationCapabilityStatusList *head = NULL; 361 MigrationCapabilityStatusList *caps; 362 MigrationState *s = migrate_get_current(); 363 int i; 364 365 caps = NULL; /* silence compiler warning */ 366 for (i = 0; i < MIGRATION_CAPABILITY_MAX; i++) { 367 if (head == NULL) { 368 head = g_malloc0(sizeof(*caps)); 369 caps = head; 370 } else { 371 caps->next = g_malloc0(sizeof(*caps)); 372 caps = caps->next; 373 } 374 caps->value = 375 g_malloc(sizeof(*caps->value)); 376 caps->value->capability = i; 377 caps->value->state = s->enabled_capabilities[i]; 378 } 379 380 return head; 381 } 382 383 MigrationParameters *qmp_query_migrate_parameters(Error **errp) 384 { 385 MigrationParameters *params; 386 MigrationState *s = migrate_get_current(); 387 388 params = g_malloc0(sizeof(*params)); 389 params->compress_level = s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL]; 390 params->compress_threads = 391 s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS]; 392 params->decompress_threads = 393 s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS]; 394 params->x_cpu_throttle_initial = 395 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL]; 396 params->x_cpu_throttle_increment = 397 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT]; 398 399 return params; 400 } 401 402 static void get_xbzrle_cache_stats(MigrationInfo *info) 403 { 404 if (migrate_use_xbzrle()) { 405 info->has_xbzrle_cache = true; 406 info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache)); 407 info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size(); 408 info->xbzrle_cache->bytes = xbzrle_mig_bytes_transferred(); 409 info->xbzrle_cache->pages = xbzrle_mig_pages_transferred(); 410 info->xbzrle_cache->cache_miss = xbzrle_mig_pages_cache_miss(); 411 info->xbzrle_cache->cache_miss_rate = xbzrle_mig_cache_miss_rate(); 412 info->xbzrle_cache->overflow = xbzrle_mig_pages_overflow(); 413 } 414 } 415 416 MigrationInfo *qmp_query_migrate(Error **errp) 417 { 418 MigrationInfo *info = g_malloc0(sizeof(*info)); 419 MigrationState *s = migrate_get_current(); 420 421 switch (s->state) { 422 case MIGRATION_STATUS_NONE: 423 /* no migration has happened ever */ 424 break; 425 case MIGRATION_STATUS_SETUP: 426 info->has_status = true; 427 info->has_total_time = false; 428 break; 429 case MIGRATION_STATUS_ACTIVE: 430 case MIGRATION_STATUS_CANCELLING: 431 info->has_status = true; 432 info->has_total_time = true; 433 info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) 434 - s->total_time; 435 info->has_expected_downtime = true; 436 info->expected_downtime = s->expected_downtime; 437 info->has_setup_time = true; 438 info->setup_time = s->setup_time; 439 440 info->has_ram = true; 441 info->ram = g_malloc0(sizeof(*info->ram)); 442 info->ram->transferred = ram_bytes_transferred(); 443 info->ram->remaining = ram_bytes_remaining(); 444 info->ram->total = ram_bytes_total(); 445 info->ram->duplicate = dup_mig_pages_transferred(); 446 info->ram->skipped = skipped_mig_pages_transferred(); 447 info->ram->normal = norm_mig_pages_transferred(); 448 info->ram->normal_bytes = norm_mig_bytes_transferred(); 449 info->ram->dirty_pages_rate = s->dirty_pages_rate; 450 info->ram->mbps = s->mbps; 451 info->ram->dirty_sync_count = s->dirty_sync_count; 452 453 if (blk_mig_active()) { 454 info->has_disk = true; 455 info->disk = g_malloc0(sizeof(*info->disk)); 456 info->disk->transferred = blk_mig_bytes_transferred(); 457 info->disk->remaining = blk_mig_bytes_remaining(); 458 info->disk->total = blk_mig_bytes_total(); 459 } 460 461 if (cpu_throttle_active()) { 462 info->has_x_cpu_throttle_percentage = true; 463 info->x_cpu_throttle_percentage = cpu_throttle_get_percentage(); 464 } 465 466 get_xbzrle_cache_stats(info); 467 break; 468 case MIGRATION_STATUS_COMPLETED: 469 get_xbzrle_cache_stats(info); 470 471 info->has_status = true; 472 info->has_total_time = true; 473 info->total_time = s->total_time; 474 info->has_downtime = true; 475 info->downtime = s->downtime; 476 info->has_setup_time = true; 477 info->setup_time = s->setup_time; 478 479 info->has_ram = true; 480 info->ram = g_malloc0(sizeof(*info->ram)); 481 info->ram->transferred = ram_bytes_transferred(); 482 info->ram->remaining = 0; 483 info->ram->total = ram_bytes_total(); 484 info->ram->duplicate = dup_mig_pages_transferred(); 485 info->ram->skipped = skipped_mig_pages_transferred(); 486 info->ram->normal = norm_mig_pages_transferred(); 487 info->ram->normal_bytes = norm_mig_bytes_transferred(); 488 info->ram->mbps = s->mbps; 489 info->ram->dirty_sync_count = s->dirty_sync_count; 490 break; 491 case MIGRATION_STATUS_FAILED: 492 info->has_status = true; 493 break; 494 case MIGRATION_STATUS_CANCELLED: 495 info->has_status = true; 496 break; 497 } 498 info->status = s->state; 499 500 return info; 501 } 502 503 void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, 504 Error **errp) 505 { 506 MigrationState *s = migrate_get_current(); 507 MigrationCapabilityStatusList *cap; 508 509 if (s->state == MIGRATION_STATUS_ACTIVE || 510 s->state == MIGRATION_STATUS_SETUP) { 511 error_setg(errp, QERR_MIGRATION_ACTIVE); 512 return; 513 } 514 515 for (cap = params; cap; cap = cap->next) { 516 s->enabled_capabilities[cap->value->capability] = cap->value->state; 517 } 518 } 519 520 void qmp_migrate_set_parameters(bool has_compress_level, 521 int64_t compress_level, 522 bool has_compress_threads, 523 int64_t compress_threads, 524 bool has_decompress_threads, 525 int64_t decompress_threads, 526 bool has_x_cpu_throttle_initial, 527 int64_t x_cpu_throttle_initial, 528 bool has_x_cpu_throttle_increment, 529 int64_t x_cpu_throttle_increment, Error **errp) 530 { 531 MigrationState *s = migrate_get_current(); 532 533 if (has_compress_level && (compress_level < 0 || compress_level > 9)) { 534 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", 535 "is invalid, it should be in the range of 0 to 9"); 536 return; 537 } 538 if (has_compress_threads && 539 (compress_threads < 1 || compress_threads > 255)) { 540 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 541 "compress_threads", 542 "is invalid, it should be in the range of 1 to 255"); 543 return; 544 } 545 if (has_decompress_threads && 546 (decompress_threads < 1 || decompress_threads > 255)) { 547 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 548 "decompress_threads", 549 "is invalid, it should be in the range of 1 to 255"); 550 return; 551 } 552 if (has_x_cpu_throttle_initial && 553 (x_cpu_throttle_initial < 1 || x_cpu_throttle_initial > 99)) { 554 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 555 "x_cpu_throttle_initial", 556 "an integer in the range of 1 to 99"); 557 } 558 if (has_x_cpu_throttle_increment && 559 (x_cpu_throttle_increment < 1 || x_cpu_throttle_increment > 99)) { 560 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, 561 "x_cpu_throttle_increment", 562 "an integer in the range of 1 to 99"); 563 } 564 565 if (has_compress_level) { 566 s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL] = compress_level; 567 } 568 if (has_compress_threads) { 569 s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS] = compress_threads; 570 } 571 if (has_decompress_threads) { 572 s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] = 573 decompress_threads; 574 } 575 if (has_x_cpu_throttle_initial) { 576 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL] = 577 x_cpu_throttle_initial; 578 } 579 580 if (has_x_cpu_throttle_increment) { 581 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT] = 582 x_cpu_throttle_increment; 583 } 584 } 585 586 /* shared migration helpers */ 587 588 static void migrate_set_state(MigrationState *s, int old_state, int new_state) 589 { 590 if (atomic_cmpxchg(&s->state, old_state, new_state) == old_state) { 591 trace_migrate_set_state(new_state); 592 migrate_generate_event(new_state); 593 } 594 } 595 596 static void migrate_fd_cleanup(void *opaque) 597 { 598 MigrationState *s = opaque; 599 600 qemu_bh_delete(s->cleanup_bh); 601 s->cleanup_bh = NULL; 602 603 if (s->file) { 604 trace_migrate_fd_cleanup(); 605 qemu_mutex_unlock_iothread(); 606 qemu_thread_join(&s->thread); 607 qemu_mutex_lock_iothread(); 608 609 migrate_compress_threads_join(); 610 qemu_fclose(s->file); 611 s->file = NULL; 612 } 613 614 assert(s->state != MIGRATION_STATUS_ACTIVE); 615 616 if (s->state != MIGRATION_STATUS_COMPLETED) { 617 qemu_savevm_state_cancel(); 618 if (s->state == MIGRATION_STATUS_CANCELLING) { 619 migrate_set_state(s, MIGRATION_STATUS_CANCELLING, 620 MIGRATION_STATUS_CANCELLED); 621 } 622 } 623 624 notifier_list_notify(&migration_state_notifiers, s); 625 } 626 627 void migrate_fd_error(MigrationState *s) 628 { 629 trace_migrate_fd_error(); 630 assert(s->file == NULL); 631 migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED); 632 notifier_list_notify(&migration_state_notifiers, s); 633 } 634 635 static void migrate_fd_cancel(MigrationState *s) 636 { 637 int old_state ; 638 QEMUFile *f = migrate_get_current()->file; 639 trace_migrate_fd_cancel(); 640 641 do { 642 old_state = s->state; 643 if (old_state != MIGRATION_STATUS_SETUP && 644 old_state != MIGRATION_STATUS_ACTIVE) { 645 break; 646 } 647 migrate_set_state(s, old_state, MIGRATION_STATUS_CANCELLING); 648 } while (s->state != MIGRATION_STATUS_CANCELLING); 649 650 /* 651 * If we're unlucky the migration code might be stuck somewhere in a 652 * send/write while the network has failed and is waiting to timeout; 653 * if we've got shutdown(2) available then we can force it to quit. 654 * The outgoing qemu file gets closed in migrate_fd_cleanup that is 655 * called in a bh, so there is no race against this cancel. 656 */ 657 if (s->state == MIGRATION_STATUS_CANCELLING && f) { 658 qemu_file_shutdown(f); 659 } 660 } 661 662 void add_migration_state_change_notifier(Notifier *notify) 663 { 664 notifier_list_add(&migration_state_notifiers, notify); 665 } 666 667 void remove_migration_state_change_notifier(Notifier *notify) 668 { 669 notifier_remove(notify); 670 } 671 672 bool migration_in_setup(MigrationState *s) 673 { 674 return s->state == MIGRATION_STATUS_SETUP; 675 } 676 677 bool migration_has_finished(MigrationState *s) 678 { 679 return s->state == MIGRATION_STATUS_COMPLETED; 680 } 681 682 bool migration_has_failed(MigrationState *s) 683 { 684 return (s->state == MIGRATION_STATUS_CANCELLED || 685 s->state == MIGRATION_STATUS_FAILED); 686 } 687 688 static MigrationState *migrate_init(const MigrationParams *params) 689 { 690 MigrationState *s = migrate_get_current(); 691 int64_t bandwidth_limit = s->bandwidth_limit; 692 bool enabled_capabilities[MIGRATION_CAPABILITY_MAX]; 693 int64_t xbzrle_cache_size = s->xbzrle_cache_size; 694 int compress_level = s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL]; 695 int compress_thread_count = 696 s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS]; 697 int decompress_thread_count = 698 s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS]; 699 int x_cpu_throttle_initial = 700 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL]; 701 int x_cpu_throttle_increment = 702 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT]; 703 704 memcpy(enabled_capabilities, s->enabled_capabilities, 705 sizeof(enabled_capabilities)); 706 707 memset(s, 0, sizeof(*s)); 708 s->params = *params; 709 memcpy(s->enabled_capabilities, enabled_capabilities, 710 sizeof(enabled_capabilities)); 711 s->xbzrle_cache_size = xbzrle_cache_size; 712 713 s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL] = compress_level; 714 s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS] = 715 compress_thread_count; 716 s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] = 717 decompress_thread_count; 718 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL] = 719 x_cpu_throttle_initial; 720 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT] = 721 x_cpu_throttle_increment; 722 s->bandwidth_limit = bandwidth_limit; 723 migrate_set_state(s, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); 724 725 s->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 726 return s; 727 } 728 729 static GSList *migration_blockers; 730 731 void migrate_add_blocker(Error *reason) 732 { 733 migration_blockers = g_slist_prepend(migration_blockers, reason); 734 } 735 736 void migrate_del_blocker(Error *reason) 737 { 738 migration_blockers = g_slist_remove(migration_blockers, reason); 739 } 740 741 void qmp_migrate_incoming(const char *uri, Error **errp) 742 { 743 Error *local_err = NULL; 744 static bool once = true; 745 746 if (!deferred_incoming) { 747 error_setg(errp, "For use with '-incoming defer'"); 748 return; 749 } 750 if (!once) { 751 error_setg(errp, "The incoming migration has already been started"); 752 } 753 754 qemu_start_incoming_migration(uri, &local_err); 755 756 if (local_err) { 757 error_propagate(errp, local_err); 758 return; 759 } 760 761 once = false; 762 } 763 764 void qmp_migrate(const char *uri, bool has_blk, bool blk, 765 bool has_inc, bool inc, bool has_detach, bool detach, 766 Error **errp) 767 { 768 Error *local_err = NULL; 769 MigrationState *s = migrate_get_current(); 770 MigrationParams params; 771 const char *p; 772 773 params.blk = has_blk && blk; 774 params.shared = has_inc && inc; 775 776 if (s->state == MIGRATION_STATUS_ACTIVE || 777 s->state == MIGRATION_STATUS_SETUP || 778 s->state == MIGRATION_STATUS_CANCELLING) { 779 error_setg(errp, QERR_MIGRATION_ACTIVE); 780 return; 781 } 782 if (runstate_check(RUN_STATE_INMIGRATE)) { 783 error_setg(errp, "Guest is waiting for an incoming migration"); 784 return; 785 } 786 787 if (qemu_savevm_state_blocked(errp)) { 788 return; 789 } 790 791 if (migration_blockers) { 792 *errp = error_copy(migration_blockers->data); 793 return; 794 } 795 796 /* We are starting a new migration, so we want to start in a clean 797 state. This change is only needed if previous migration 798 failed/was cancelled. We don't use migrate_set_state() because 799 we are setting the initial state, not changing it. */ 800 s->state = MIGRATION_STATUS_NONE; 801 802 s = migrate_init(¶ms); 803 804 if (strstart(uri, "tcp:", &p)) { 805 tcp_start_outgoing_migration(s, p, &local_err); 806 #ifdef CONFIG_RDMA 807 } else if (strstart(uri, "rdma:", &p)) { 808 rdma_start_outgoing_migration(s, p, &local_err); 809 #endif 810 #if !defined(WIN32) 811 } else if (strstart(uri, "exec:", &p)) { 812 exec_start_outgoing_migration(s, p, &local_err); 813 } else if (strstart(uri, "unix:", &p)) { 814 unix_start_outgoing_migration(s, p, &local_err); 815 } else if (strstart(uri, "fd:", &p)) { 816 fd_start_outgoing_migration(s, p, &local_err); 817 #endif 818 } else { 819 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri", 820 "a valid migration protocol"); 821 migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED); 822 return; 823 } 824 825 if (local_err) { 826 migrate_fd_error(s); 827 error_propagate(errp, local_err); 828 return; 829 } 830 } 831 832 void qmp_migrate_cancel(Error **errp) 833 { 834 migrate_fd_cancel(migrate_get_current()); 835 } 836 837 void qmp_migrate_set_cache_size(int64_t value, Error **errp) 838 { 839 MigrationState *s = migrate_get_current(); 840 int64_t new_size; 841 842 /* Check for truncation */ 843 if (value != (size_t)value) { 844 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", 845 "exceeding address space"); 846 return; 847 } 848 849 /* Cache should not be larger than guest ram size */ 850 if (value > ram_bytes_total()) { 851 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", 852 "exceeds guest ram size "); 853 return; 854 } 855 856 new_size = xbzrle_cache_resize(value); 857 if (new_size < 0) { 858 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", 859 "is smaller than page size"); 860 return; 861 } 862 863 s->xbzrle_cache_size = new_size; 864 } 865 866 int64_t qmp_query_migrate_cache_size(Error **errp) 867 { 868 return migrate_xbzrle_cache_size(); 869 } 870 871 void qmp_migrate_set_speed(int64_t value, Error **errp) 872 { 873 MigrationState *s; 874 875 if (value < 0) { 876 value = 0; 877 } 878 if (value > SIZE_MAX) { 879 value = SIZE_MAX; 880 } 881 882 s = migrate_get_current(); 883 s->bandwidth_limit = value; 884 if (s->file) { 885 qemu_file_set_rate_limit(s->file, s->bandwidth_limit / XFER_LIMIT_RATIO); 886 } 887 } 888 889 void qmp_migrate_set_downtime(double value, Error **errp) 890 { 891 value *= 1e9; 892 value = MAX(0, MIN(UINT64_MAX, value)); 893 max_downtime = (uint64_t)value; 894 } 895 896 bool migrate_auto_converge(void) 897 { 898 MigrationState *s; 899 900 s = migrate_get_current(); 901 902 return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; 903 } 904 905 bool migrate_zero_blocks(void) 906 { 907 MigrationState *s; 908 909 s = migrate_get_current(); 910 911 return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; 912 } 913 914 bool migrate_use_compression(void) 915 { 916 MigrationState *s; 917 918 s = migrate_get_current(); 919 920 return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS]; 921 } 922 923 int migrate_compress_level(void) 924 { 925 MigrationState *s; 926 927 s = migrate_get_current(); 928 929 return s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL]; 930 } 931 932 int migrate_compress_threads(void) 933 { 934 MigrationState *s; 935 936 s = migrate_get_current(); 937 938 return s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS]; 939 } 940 941 int migrate_decompress_threads(void) 942 { 943 MigrationState *s; 944 945 s = migrate_get_current(); 946 947 return s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS]; 948 } 949 950 bool migrate_use_events(void) 951 { 952 MigrationState *s; 953 954 s = migrate_get_current(); 955 956 return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS]; 957 } 958 959 int migrate_use_xbzrle(void) 960 { 961 MigrationState *s; 962 963 s = migrate_get_current(); 964 965 return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE]; 966 } 967 968 int64_t migrate_xbzrle_cache_size(void) 969 { 970 MigrationState *s; 971 972 s = migrate_get_current(); 973 974 return s->xbzrle_cache_size; 975 } 976 977 /** 978 * migration_completion: Used by migration_thread when there's not much left. 979 * The caller 'breaks' the loop when this returns. 980 * 981 * @s: Current migration state 982 * @*old_vm_running: Pointer to old_vm_running flag 983 * @*start_time: Pointer to time to update 984 */ 985 static void migration_completion(MigrationState *s, bool *old_vm_running, 986 int64_t *start_time) 987 { 988 int ret; 989 990 qemu_mutex_lock_iothread(); 991 *start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 992 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); 993 *old_vm_running = runstate_is_running(); 994 995 ret = global_state_store(); 996 if (!ret) { 997 ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); 998 if (ret >= 0) { 999 qemu_file_set_rate_limit(s->file, INT64_MAX); 1000 qemu_savevm_state_complete(s->file); 1001 } 1002 } 1003 qemu_mutex_unlock_iothread(); 1004 1005 if (ret < 0) { 1006 goto fail; 1007 } 1008 1009 if (qemu_file_get_error(s->file)) { 1010 trace_migration_completion_file_err(); 1011 goto fail; 1012 } 1013 1014 migrate_set_state(s, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_COMPLETED); 1015 return; 1016 1017 fail: 1018 migrate_set_state(s, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_FAILED); 1019 } 1020 1021 /* migration thread support */ 1022 1023 static void *migration_thread(void *opaque) 1024 { 1025 MigrationState *s = opaque; 1026 int64_t initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 1027 int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST); 1028 int64_t initial_bytes = 0; 1029 int64_t max_size = 0; 1030 int64_t start_time = initial_time; 1031 bool old_vm_running = false; 1032 1033 rcu_register_thread(); 1034 1035 qemu_savevm_state_header(s->file); 1036 qemu_savevm_state_begin(s->file, &s->params); 1037 1038 s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; 1039 migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_ACTIVE); 1040 1041 while (s->state == MIGRATION_STATUS_ACTIVE) { 1042 int64_t current_time; 1043 uint64_t pending_size; 1044 1045 if (!qemu_file_rate_limit(s->file)) { 1046 pending_size = qemu_savevm_state_pending(s->file, max_size); 1047 trace_migrate_pending(pending_size, max_size); 1048 if (pending_size && pending_size >= max_size) { 1049 qemu_savevm_state_iterate(s->file); 1050 } else { 1051 trace_migration_thread_low_pending(pending_size); 1052 migration_completion(s, &old_vm_running, &start_time); 1053 break; 1054 } 1055 } 1056 1057 if (qemu_file_get_error(s->file)) { 1058 migrate_set_state(s, MIGRATION_STATUS_ACTIVE, 1059 MIGRATION_STATUS_FAILED); 1060 break; 1061 } 1062 current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 1063 if (current_time >= initial_time + BUFFER_DELAY) { 1064 uint64_t transferred_bytes = qemu_ftell(s->file) - initial_bytes; 1065 uint64_t time_spent = current_time - initial_time; 1066 double bandwidth = transferred_bytes / time_spent; 1067 max_size = bandwidth * migrate_max_downtime() / 1000000; 1068 1069 s->mbps = time_spent ? (((double) transferred_bytes * 8.0) / 1070 ((double) time_spent / 1000.0)) / 1000.0 / 1000.0 : -1; 1071 1072 trace_migrate_transferred(transferred_bytes, time_spent, 1073 bandwidth, max_size); 1074 /* if we haven't sent anything, we don't want to recalculate 1075 10000 is a small enough number for our purposes */ 1076 if (s->dirty_bytes_rate && transferred_bytes > 10000) { 1077 s->expected_downtime = s->dirty_bytes_rate / bandwidth; 1078 } 1079 1080 qemu_file_reset_rate_limit(s->file); 1081 initial_time = current_time; 1082 initial_bytes = qemu_ftell(s->file); 1083 } 1084 if (qemu_file_rate_limit(s->file)) { 1085 /* usleep expects microseconds */ 1086 g_usleep((initial_time + BUFFER_DELAY - current_time)*1000); 1087 } 1088 } 1089 1090 /* If we enabled cpu throttling for auto-converge, turn it off. */ 1091 cpu_throttle_stop(); 1092 1093 qemu_mutex_lock_iothread(); 1094 if (s->state == MIGRATION_STATUS_COMPLETED) { 1095 int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 1096 uint64_t transferred_bytes = qemu_ftell(s->file); 1097 s->total_time = end_time - s->total_time; 1098 s->downtime = end_time - start_time; 1099 if (s->total_time) { 1100 s->mbps = (((double) transferred_bytes * 8.0) / 1101 ((double) s->total_time)) / 1000; 1102 } 1103 runstate_set(RUN_STATE_POSTMIGRATE); 1104 } else { 1105 if (old_vm_running) { 1106 vm_start(); 1107 } 1108 } 1109 qemu_bh_schedule(s->cleanup_bh); 1110 qemu_mutex_unlock_iothread(); 1111 1112 rcu_unregister_thread(); 1113 return NULL; 1114 } 1115 1116 void migrate_fd_connect(MigrationState *s) 1117 { 1118 /* This is a best 1st approximation. ns to ms */ 1119 s->expected_downtime = max_downtime/1000000; 1120 s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s); 1121 1122 qemu_file_set_rate_limit(s->file, 1123 s->bandwidth_limit / XFER_LIMIT_RATIO); 1124 1125 /* Notify before starting migration thread */ 1126 notifier_list_notify(&migration_state_notifiers, s); 1127 1128 migrate_compress_threads_create(); 1129 qemu_thread_create(&s->thread, "migration", migration_thread, s, 1130 QEMU_THREAD_JOINABLE); 1131 } 1132