1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * Copyright (c) 2011-2015 Red Hat Inc 6 * 7 * Authors: 8 * Juan Quintela <quintela@redhat.com> 9 * 10 * Permission is hereby granted, free of charge, to any person obtaining a copy 11 * of this software and associated documentation files (the "Software"), to deal 12 * in the Software without restriction, including without limitation the rights 13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 * copies of the Software, and to permit persons to whom the Software is 15 * furnished to do so, subject to the following conditions: 16 * 17 * The above copyright notice and this permission notice shall be included in 18 * all copies or substantial portions of the Software. 19 * 20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 26 * THE SOFTWARE. 27 */ 28 29 #include "qemu/osdep.h" 30 #include "cpu.h" 31 #include <zlib.h> 32 #include "qemu/cutils.h" 33 #include "qemu/bitops.h" 34 #include "qemu/bitmap.h" 35 #include "qemu/main-loop.h" 36 #include "xbzrle.h" 37 #include "ram.h" 38 #include "migration.h" 39 #include "socket.h" 40 #include "migration/register.h" 41 #include "migration/misc.h" 42 #include "qemu-file.h" 43 #include "postcopy-ram.h" 44 #include "page_cache.h" 45 #include "qemu/error-report.h" 46 #include "qapi/error.h" 47 #include "qapi/qapi-events-migration.h" 48 #include "qapi/qmp/qerror.h" 49 #include "trace.h" 50 #include "exec/ram_addr.h" 51 #include "exec/target_page.h" 52 #include "qemu/rcu_queue.h" 53 #include "migration/colo.h" 54 #include "block.h" 55 #include "sysemu/sysemu.h" 56 #include "qemu/uuid.h" 57 #include "savevm.h" 58 59 /***********************************************************/ 60 /* ram save/restore */ 61 62 /* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it 63 * worked for pages that where filled with the same char. We switched 64 * it to only search for the zero value. And to avoid confusion with 65 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it. 66 */ 67 68 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */ 69 #define RAM_SAVE_FLAG_ZERO 0x02 70 #define RAM_SAVE_FLAG_MEM_SIZE 0x04 71 #define RAM_SAVE_FLAG_PAGE 0x08 72 #define RAM_SAVE_FLAG_EOS 0x10 73 #define RAM_SAVE_FLAG_CONTINUE 0x20 74 #define RAM_SAVE_FLAG_XBZRLE 0x40 75 /* 0x80 is reserved in migration.h start with 0x100 next */ 76 #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100 77 78 static inline bool is_zero_range(uint8_t *p, uint64_t size) 79 { 80 return buffer_is_zero(p, size); 81 } 82 83 XBZRLECacheStats xbzrle_counters; 84 85 /* struct contains XBZRLE cache and a static page 86 used by the compression */ 87 static struct { 88 /* buffer used for XBZRLE encoding */ 89 uint8_t *encoded_buf; 90 /* buffer for storing page content */ 91 uint8_t *current_buf; 92 /* Cache for XBZRLE, Protected by lock. */ 93 PageCache *cache; 94 QemuMutex lock; 95 /* it will store a page full of zeros */ 96 uint8_t *zero_target_page; 97 /* buffer used for XBZRLE decoding */ 98 uint8_t *decoded_buf; 99 } XBZRLE; 100 101 static void XBZRLE_cache_lock(void) 102 { 103 if (migrate_use_xbzrle()) 104 qemu_mutex_lock(&XBZRLE.lock); 105 } 106 107 static void XBZRLE_cache_unlock(void) 108 { 109 if (migrate_use_xbzrle()) 110 qemu_mutex_unlock(&XBZRLE.lock); 111 } 112 113 /** 114 * xbzrle_cache_resize: resize the xbzrle cache 115 * 116 * This function is called from qmp_migrate_set_cache_size in main 117 * thread, possibly while a migration is in progress. A running 118 * migration may be using the cache and might finish during this call, 119 * hence changes to the cache are protected by XBZRLE.lock(). 120 * 121 * Returns 0 for success or -1 for error 122 * 123 * @new_size: new cache size 124 * @errp: set *errp if the check failed, with reason 125 */ 126 int xbzrle_cache_resize(int64_t new_size, Error **errp) 127 { 128 PageCache *new_cache; 129 int64_t ret = 0; 130 131 /* Check for truncation */ 132 if (new_size != (size_t)new_size) { 133 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", 134 "exceeding address space"); 135 return -1; 136 } 137 138 if (new_size == migrate_xbzrle_cache_size()) { 139 /* nothing to do */ 140 return 0; 141 } 142 143 XBZRLE_cache_lock(); 144 145 if (XBZRLE.cache != NULL) { 146 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp); 147 if (!new_cache) { 148 ret = -1; 149 goto out; 150 } 151 152 cache_fini(XBZRLE.cache); 153 XBZRLE.cache = new_cache; 154 } 155 out: 156 XBZRLE_cache_unlock(); 157 return ret; 158 } 159 160 /* Should be holding either ram_list.mutex, or the RCU lock. */ 161 #define RAMBLOCK_FOREACH_MIGRATABLE(block) \ 162 INTERNAL_RAMBLOCK_FOREACH(block) \ 163 if (!qemu_ram_is_migratable(block)) {} else 164 165 #undef RAMBLOCK_FOREACH 166 167 static void ramblock_recv_map_init(void) 168 { 169 RAMBlock *rb; 170 171 RAMBLOCK_FOREACH_MIGRATABLE(rb) { 172 assert(!rb->receivedmap); 173 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits()); 174 } 175 } 176 177 int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr) 178 { 179 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb), 180 rb->receivedmap); 181 } 182 183 bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset) 184 { 185 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap); 186 } 187 188 void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr) 189 { 190 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap); 191 } 192 193 void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, 194 size_t nr) 195 { 196 bitmap_set_atomic(rb->receivedmap, 197 ramblock_recv_bitmap_offset(host_addr, rb), 198 nr); 199 } 200 201 #define RAMBLOCK_RECV_BITMAP_ENDING (0x0123456789abcdefULL) 202 203 /* 204 * Format: bitmap_size (8 bytes) + whole_bitmap (N bytes). 205 * 206 * Returns >0 if success with sent bytes, or <0 if error. 207 */ 208 int64_t ramblock_recv_bitmap_send(QEMUFile *file, 209 const char *block_name) 210 { 211 RAMBlock *block = qemu_ram_block_by_name(block_name); 212 unsigned long *le_bitmap, nbits; 213 uint64_t size; 214 215 if (!block) { 216 error_report("%s: invalid block name: %s", __func__, block_name); 217 return -1; 218 } 219 220 nbits = block->used_length >> TARGET_PAGE_BITS; 221 222 /* 223 * Make sure the tmp bitmap buffer is big enough, e.g., on 32bit 224 * machines we may need 4 more bytes for padding (see below 225 * comment). So extend it a bit before hand. 226 */ 227 le_bitmap = bitmap_new(nbits + BITS_PER_LONG); 228 229 /* 230 * Always use little endian when sending the bitmap. This is 231 * required that when source and destination VMs are not using the 232 * same endianess. (Note: big endian won't work.) 233 */ 234 bitmap_to_le(le_bitmap, block->receivedmap, nbits); 235 236 /* Size of the bitmap, in bytes */ 237 size = nbits / 8; 238 239 /* 240 * size is always aligned to 8 bytes for 64bit machines, but it 241 * may not be true for 32bit machines. We need this padding to 242 * make sure the migration can survive even between 32bit and 243 * 64bit machines. 244 */ 245 size = ROUND_UP(size, 8); 246 247 qemu_put_be64(file, size); 248 qemu_put_buffer(file, (const uint8_t *)le_bitmap, size); 249 /* 250 * Mark as an end, in case the middle part is screwed up due to 251 * some "misterious" reason. 252 */ 253 qemu_put_be64(file, RAMBLOCK_RECV_BITMAP_ENDING); 254 qemu_fflush(file); 255 256 g_free(le_bitmap); 257 258 if (qemu_file_get_error(file)) { 259 return qemu_file_get_error(file); 260 } 261 262 return size + sizeof(size); 263 } 264 265 /* 266 * An outstanding page request, on the source, having been received 267 * and queued 268 */ 269 struct RAMSrcPageRequest { 270 RAMBlock *rb; 271 hwaddr offset; 272 hwaddr len; 273 274 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req; 275 }; 276 277 /* State of RAM for migration */ 278 struct RAMState { 279 /* QEMUFile used for this migration */ 280 QEMUFile *f; 281 /* Last block that we have visited searching for dirty pages */ 282 RAMBlock *last_seen_block; 283 /* Last block from where we have sent data */ 284 RAMBlock *last_sent_block; 285 /* Last dirty target page we have sent */ 286 ram_addr_t last_page; 287 /* last ram version we have seen */ 288 uint32_t last_version; 289 /* We are in the first round */ 290 bool ram_bulk_stage; 291 /* How many times we have dirty too many pages */ 292 int dirty_rate_high_cnt; 293 /* these variables are used for bitmap sync */ 294 /* last time we did a full bitmap_sync */ 295 int64_t time_last_bitmap_sync; 296 /* bytes transferred at start_time */ 297 uint64_t bytes_xfer_prev; 298 /* number of dirty pages since start_time */ 299 uint64_t num_dirty_pages_period; 300 /* xbzrle misses since the beginning of the period */ 301 uint64_t xbzrle_cache_miss_prev; 302 /* number of iterations at the beginning of period */ 303 uint64_t iterations_prev; 304 /* Iterations since start */ 305 uint64_t iterations; 306 /* number of dirty bits in the bitmap */ 307 uint64_t migration_dirty_pages; 308 /* protects modification of the bitmap */ 309 QemuMutex bitmap_mutex; 310 /* The RAMBlock used in the last src_page_requests */ 311 RAMBlock *last_req_rb; 312 /* Queue of outstanding page requests from the destination */ 313 QemuMutex src_page_req_mutex; 314 QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests; 315 }; 316 typedef struct RAMState RAMState; 317 318 static RAMState *ram_state; 319 320 uint64_t ram_bytes_remaining(void) 321 { 322 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) : 323 0; 324 } 325 326 MigrationStats ram_counters; 327 328 /* used by the search for pages to send */ 329 struct PageSearchStatus { 330 /* Current block being searched */ 331 RAMBlock *block; 332 /* Current page to search from */ 333 unsigned long page; 334 /* Set once we wrap around */ 335 bool complete_round; 336 }; 337 typedef struct PageSearchStatus PageSearchStatus; 338 339 struct CompressParam { 340 bool done; 341 bool quit; 342 QEMUFile *file; 343 QemuMutex mutex; 344 QemuCond cond; 345 RAMBlock *block; 346 ram_addr_t offset; 347 348 /* internally used fields */ 349 z_stream stream; 350 uint8_t *originbuf; 351 }; 352 typedef struct CompressParam CompressParam; 353 354 struct DecompressParam { 355 bool done; 356 bool quit; 357 QemuMutex mutex; 358 QemuCond cond; 359 void *des; 360 uint8_t *compbuf; 361 int len; 362 z_stream stream; 363 }; 364 typedef struct DecompressParam DecompressParam; 365 366 static CompressParam *comp_param; 367 static QemuThread *compress_threads; 368 /* comp_done_cond is used to wake up the migration thread when 369 * one of the compression threads has finished the compression. 370 * comp_done_lock is used to co-work with comp_done_cond. 371 */ 372 static QemuMutex comp_done_lock; 373 static QemuCond comp_done_cond; 374 /* The empty QEMUFileOps will be used by file in CompressParam */ 375 static const QEMUFileOps empty_ops = { }; 376 377 static QEMUFile *decomp_file; 378 static DecompressParam *decomp_param; 379 static QemuThread *decompress_threads; 380 static QemuMutex decomp_done_lock; 381 static QemuCond decomp_done_cond; 382 383 static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block, 384 ram_addr_t offset, uint8_t *source_buf); 385 386 static void *do_data_compress(void *opaque) 387 { 388 CompressParam *param = opaque; 389 RAMBlock *block; 390 ram_addr_t offset; 391 392 qemu_mutex_lock(¶m->mutex); 393 while (!param->quit) { 394 if (param->block) { 395 block = param->block; 396 offset = param->offset; 397 param->block = NULL; 398 qemu_mutex_unlock(¶m->mutex); 399 400 do_compress_ram_page(param->file, ¶m->stream, block, offset, 401 param->originbuf); 402 403 qemu_mutex_lock(&comp_done_lock); 404 param->done = true; 405 qemu_cond_signal(&comp_done_cond); 406 qemu_mutex_unlock(&comp_done_lock); 407 408 qemu_mutex_lock(¶m->mutex); 409 } else { 410 qemu_cond_wait(¶m->cond, ¶m->mutex); 411 } 412 } 413 qemu_mutex_unlock(¶m->mutex); 414 415 return NULL; 416 } 417 418 static inline void terminate_compression_threads(void) 419 { 420 int idx, thread_count; 421 422 thread_count = migrate_compress_threads(); 423 424 for (idx = 0; idx < thread_count; idx++) { 425 qemu_mutex_lock(&comp_param[idx].mutex); 426 comp_param[idx].quit = true; 427 qemu_cond_signal(&comp_param[idx].cond); 428 qemu_mutex_unlock(&comp_param[idx].mutex); 429 } 430 } 431 432 static void compress_threads_save_cleanup(void) 433 { 434 int i, thread_count; 435 436 if (!migrate_use_compression()) { 437 return; 438 } 439 terminate_compression_threads(); 440 thread_count = migrate_compress_threads(); 441 for (i = 0; i < thread_count; i++) { 442 /* 443 * we use it as a indicator which shows if the thread is 444 * properly init'd or not 445 */ 446 if (!comp_param[i].file) { 447 break; 448 } 449 qemu_thread_join(compress_threads + i); 450 qemu_mutex_destroy(&comp_param[i].mutex); 451 qemu_cond_destroy(&comp_param[i].cond); 452 deflateEnd(&comp_param[i].stream); 453 g_free(comp_param[i].originbuf); 454 qemu_fclose(comp_param[i].file); 455 comp_param[i].file = NULL; 456 } 457 qemu_mutex_destroy(&comp_done_lock); 458 qemu_cond_destroy(&comp_done_cond); 459 g_free(compress_threads); 460 g_free(comp_param); 461 compress_threads = NULL; 462 comp_param = NULL; 463 } 464 465 static int compress_threads_save_setup(void) 466 { 467 int i, thread_count; 468 469 if (!migrate_use_compression()) { 470 return 0; 471 } 472 thread_count = migrate_compress_threads(); 473 compress_threads = g_new0(QemuThread, thread_count); 474 comp_param = g_new0(CompressParam, thread_count); 475 qemu_cond_init(&comp_done_cond); 476 qemu_mutex_init(&comp_done_lock); 477 for (i = 0; i < thread_count; i++) { 478 comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE); 479 if (!comp_param[i].originbuf) { 480 goto exit; 481 } 482 483 if (deflateInit(&comp_param[i].stream, 484 migrate_compress_level()) != Z_OK) { 485 g_free(comp_param[i].originbuf); 486 goto exit; 487 } 488 489 /* comp_param[i].file is just used as a dummy buffer to save data, 490 * set its ops to empty. 491 */ 492 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops); 493 comp_param[i].done = true; 494 comp_param[i].quit = false; 495 qemu_mutex_init(&comp_param[i].mutex); 496 qemu_cond_init(&comp_param[i].cond); 497 qemu_thread_create(compress_threads + i, "compress", 498 do_data_compress, comp_param + i, 499 QEMU_THREAD_JOINABLE); 500 } 501 return 0; 502 503 exit: 504 compress_threads_save_cleanup(); 505 return -1; 506 } 507 508 /* Multiple fd's */ 509 510 #define MULTIFD_MAGIC 0x11223344U 511 #define MULTIFD_VERSION 1 512 513 typedef struct { 514 uint32_t magic; 515 uint32_t version; 516 unsigned char uuid[16]; /* QemuUUID */ 517 uint8_t id; 518 } __attribute__((packed)) MultiFDInit_t; 519 520 typedef struct { 521 /* this fields are not changed once the thread is created */ 522 /* channel number */ 523 uint8_t id; 524 /* channel thread name */ 525 char *name; 526 /* channel thread id */ 527 QemuThread thread; 528 /* communication channel */ 529 QIOChannel *c; 530 /* sem where to wait for more work */ 531 QemuSemaphore sem; 532 /* this mutex protects the following parameters */ 533 QemuMutex mutex; 534 /* is this channel thread running */ 535 bool running; 536 /* should this thread finish */ 537 bool quit; 538 } MultiFDSendParams; 539 540 typedef struct { 541 /* this fields are not changed once the thread is created */ 542 /* channel number */ 543 uint8_t id; 544 /* channel thread name */ 545 char *name; 546 /* channel thread id */ 547 QemuThread thread; 548 /* communication channel */ 549 QIOChannel *c; 550 /* sem where to wait for more work */ 551 QemuSemaphore sem; 552 /* this mutex protects the following parameters */ 553 QemuMutex mutex; 554 /* is this channel thread running */ 555 bool running; 556 /* should this thread finish */ 557 bool quit; 558 } MultiFDRecvParams; 559 560 static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp) 561 { 562 MultiFDInit_t msg; 563 int ret; 564 565 msg.magic = cpu_to_be32(MULTIFD_MAGIC); 566 msg.version = cpu_to_be32(MULTIFD_VERSION); 567 msg.id = p->id; 568 memcpy(msg.uuid, &qemu_uuid.data, sizeof(msg.uuid)); 569 570 ret = qio_channel_write_all(p->c, (char *)&msg, sizeof(msg), errp); 571 if (ret != 0) { 572 return -1; 573 } 574 return 0; 575 } 576 577 static int multifd_recv_initial_packet(QIOChannel *c, Error **errp) 578 { 579 MultiFDInit_t msg; 580 int ret; 581 582 ret = qio_channel_read_all(c, (char *)&msg, sizeof(msg), errp); 583 if (ret != 0) { 584 return -1; 585 } 586 587 be32_to_cpus(&msg.magic); 588 be32_to_cpus(&msg.version); 589 590 if (msg.magic != MULTIFD_MAGIC) { 591 error_setg(errp, "multifd: received packet magic %x " 592 "expected %x", msg.magic, MULTIFD_MAGIC); 593 return -1; 594 } 595 596 if (msg.version != MULTIFD_VERSION) { 597 error_setg(errp, "multifd: received packet version %d " 598 "expected %d", msg.version, MULTIFD_VERSION); 599 return -1; 600 } 601 602 if (memcmp(msg.uuid, &qemu_uuid, sizeof(qemu_uuid))) { 603 char *uuid = qemu_uuid_unparse_strdup(&qemu_uuid); 604 char *msg_uuid = qemu_uuid_unparse_strdup((const QemuUUID *)msg.uuid); 605 606 error_setg(errp, "multifd: received uuid '%s' and expected " 607 "uuid '%s' for channel %hhd", msg_uuid, uuid, msg.id); 608 g_free(uuid); 609 g_free(msg_uuid); 610 return -1; 611 } 612 613 if (msg.id > migrate_multifd_channels()) { 614 error_setg(errp, "multifd: received channel version %d " 615 "expected %d", msg.version, MULTIFD_VERSION); 616 return -1; 617 } 618 619 return msg.id; 620 } 621 622 struct { 623 MultiFDSendParams *params; 624 /* number of created threads */ 625 int count; 626 } *multifd_send_state; 627 628 static void multifd_send_terminate_threads(Error *err) 629 { 630 int i; 631 632 if (err) { 633 MigrationState *s = migrate_get_current(); 634 migrate_set_error(s, err); 635 if (s->state == MIGRATION_STATUS_SETUP || 636 s->state == MIGRATION_STATUS_PRE_SWITCHOVER || 637 s->state == MIGRATION_STATUS_DEVICE || 638 s->state == MIGRATION_STATUS_ACTIVE) { 639 migrate_set_state(&s->state, s->state, 640 MIGRATION_STATUS_FAILED); 641 } 642 } 643 644 for (i = 0; i < migrate_multifd_channels(); i++) { 645 MultiFDSendParams *p = &multifd_send_state->params[i]; 646 647 qemu_mutex_lock(&p->mutex); 648 p->quit = true; 649 qemu_sem_post(&p->sem); 650 qemu_mutex_unlock(&p->mutex); 651 } 652 } 653 654 int multifd_save_cleanup(Error **errp) 655 { 656 int i; 657 int ret = 0; 658 659 if (!migrate_use_multifd()) { 660 return 0; 661 } 662 multifd_send_terminate_threads(NULL); 663 for (i = 0; i < migrate_multifd_channels(); i++) { 664 MultiFDSendParams *p = &multifd_send_state->params[i]; 665 666 if (p->running) { 667 qemu_thread_join(&p->thread); 668 } 669 socket_send_channel_destroy(p->c); 670 p->c = NULL; 671 qemu_mutex_destroy(&p->mutex); 672 qemu_sem_destroy(&p->sem); 673 g_free(p->name); 674 p->name = NULL; 675 } 676 g_free(multifd_send_state->params); 677 multifd_send_state->params = NULL; 678 g_free(multifd_send_state); 679 multifd_send_state = NULL; 680 return ret; 681 } 682 683 static void *multifd_send_thread(void *opaque) 684 { 685 MultiFDSendParams *p = opaque; 686 Error *local_err = NULL; 687 688 if (multifd_send_initial_packet(p, &local_err) < 0) { 689 goto out; 690 } 691 692 while (true) { 693 qemu_mutex_lock(&p->mutex); 694 if (p->quit) { 695 qemu_mutex_unlock(&p->mutex); 696 break; 697 } 698 qemu_mutex_unlock(&p->mutex); 699 qemu_sem_wait(&p->sem); 700 } 701 702 out: 703 if (local_err) { 704 multifd_send_terminate_threads(local_err); 705 } 706 707 qemu_mutex_lock(&p->mutex); 708 p->running = false; 709 qemu_mutex_unlock(&p->mutex); 710 711 return NULL; 712 } 713 714 static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) 715 { 716 MultiFDSendParams *p = opaque; 717 QIOChannel *sioc = QIO_CHANNEL(qio_task_get_source(task)); 718 Error *local_err = NULL; 719 720 if (qio_task_propagate_error(task, &local_err)) { 721 if (multifd_save_cleanup(&local_err) != 0) { 722 migrate_set_error(migrate_get_current(), local_err); 723 } 724 } else { 725 p->c = QIO_CHANNEL(sioc); 726 qio_channel_set_delay(p->c, false); 727 p->running = true; 728 qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, 729 QEMU_THREAD_JOINABLE); 730 731 atomic_inc(&multifd_send_state->count); 732 } 733 } 734 735 int multifd_save_setup(void) 736 { 737 int thread_count; 738 uint8_t i; 739 740 if (!migrate_use_multifd()) { 741 return 0; 742 } 743 thread_count = migrate_multifd_channels(); 744 multifd_send_state = g_malloc0(sizeof(*multifd_send_state)); 745 multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); 746 atomic_set(&multifd_send_state->count, 0); 747 for (i = 0; i < thread_count; i++) { 748 MultiFDSendParams *p = &multifd_send_state->params[i]; 749 750 qemu_mutex_init(&p->mutex); 751 qemu_sem_init(&p->sem, 0); 752 p->quit = false; 753 p->id = i; 754 p->name = g_strdup_printf("multifdsend_%d", i); 755 socket_send_channel_create(multifd_new_send_channel_async, p); 756 } 757 return 0; 758 } 759 760 struct { 761 MultiFDRecvParams *params; 762 /* number of created threads */ 763 int count; 764 } *multifd_recv_state; 765 766 static void multifd_recv_terminate_threads(Error *err) 767 { 768 int i; 769 770 if (err) { 771 MigrationState *s = migrate_get_current(); 772 migrate_set_error(s, err); 773 if (s->state == MIGRATION_STATUS_SETUP || 774 s->state == MIGRATION_STATUS_ACTIVE) { 775 migrate_set_state(&s->state, s->state, 776 MIGRATION_STATUS_FAILED); 777 } 778 } 779 780 for (i = 0; i < migrate_multifd_channels(); i++) { 781 MultiFDRecvParams *p = &multifd_recv_state->params[i]; 782 783 qemu_mutex_lock(&p->mutex); 784 p->quit = true; 785 qemu_sem_post(&p->sem); 786 qemu_mutex_unlock(&p->mutex); 787 } 788 } 789 790 int multifd_load_cleanup(Error **errp) 791 { 792 int i; 793 int ret = 0; 794 795 if (!migrate_use_multifd()) { 796 return 0; 797 } 798 multifd_recv_terminate_threads(NULL); 799 for (i = 0; i < migrate_multifd_channels(); i++) { 800 MultiFDRecvParams *p = &multifd_recv_state->params[i]; 801 802 if (p->running) { 803 qemu_thread_join(&p->thread); 804 } 805 object_unref(OBJECT(p->c)); 806 p->c = NULL; 807 qemu_mutex_destroy(&p->mutex); 808 qemu_sem_destroy(&p->sem); 809 g_free(p->name); 810 p->name = NULL; 811 } 812 g_free(multifd_recv_state->params); 813 multifd_recv_state->params = NULL; 814 g_free(multifd_recv_state); 815 multifd_recv_state = NULL; 816 817 return ret; 818 } 819 820 static void *multifd_recv_thread(void *opaque) 821 { 822 MultiFDRecvParams *p = opaque; 823 824 while (true) { 825 qemu_mutex_lock(&p->mutex); 826 if (p->quit) { 827 qemu_mutex_unlock(&p->mutex); 828 break; 829 } 830 qemu_mutex_unlock(&p->mutex); 831 qemu_sem_wait(&p->sem); 832 } 833 834 qemu_mutex_lock(&p->mutex); 835 p->running = false; 836 qemu_mutex_unlock(&p->mutex); 837 838 return NULL; 839 } 840 841 int multifd_load_setup(void) 842 { 843 int thread_count; 844 uint8_t i; 845 846 if (!migrate_use_multifd()) { 847 return 0; 848 } 849 thread_count = migrate_multifd_channels(); 850 multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state)); 851 multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count); 852 atomic_set(&multifd_recv_state->count, 0); 853 for (i = 0; i < thread_count; i++) { 854 MultiFDRecvParams *p = &multifd_recv_state->params[i]; 855 856 qemu_mutex_init(&p->mutex); 857 qemu_sem_init(&p->sem, 0); 858 p->quit = false; 859 p->id = i; 860 p->name = g_strdup_printf("multifdrecv_%d", i); 861 } 862 return 0; 863 } 864 865 bool multifd_recv_all_channels_created(void) 866 { 867 int thread_count = migrate_multifd_channels(); 868 869 if (!migrate_use_multifd()) { 870 return true; 871 } 872 873 return thread_count == atomic_read(&multifd_recv_state->count); 874 } 875 876 void multifd_recv_new_channel(QIOChannel *ioc) 877 { 878 MultiFDRecvParams *p; 879 Error *local_err = NULL; 880 int id; 881 882 id = multifd_recv_initial_packet(ioc, &local_err); 883 if (id < 0) { 884 multifd_recv_terminate_threads(local_err); 885 return; 886 } 887 888 p = &multifd_recv_state->params[id]; 889 if (p->c != NULL) { 890 error_setg(&local_err, "multifd: received id '%d' already setup'", 891 id); 892 multifd_recv_terminate_threads(local_err); 893 return; 894 } 895 p->c = ioc; 896 object_ref(OBJECT(ioc)); 897 898 p->running = true; 899 qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p, 900 QEMU_THREAD_JOINABLE); 901 atomic_inc(&multifd_recv_state->count); 902 if (multifd_recv_state->count == migrate_multifd_channels()) { 903 migration_incoming_process(); 904 } 905 } 906 907 /** 908 * save_page_header: write page header to wire 909 * 910 * If this is the 1st block, it also writes the block identification 911 * 912 * Returns the number of bytes written 913 * 914 * @f: QEMUFile where to send the data 915 * @block: block that contains the page we want to send 916 * @offset: offset inside the block for the page 917 * in the lower bits, it contains flags 918 */ 919 static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block, 920 ram_addr_t offset) 921 { 922 size_t size, len; 923 924 if (block == rs->last_sent_block) { 925 offset |= RAM_SAVE_FLAG_CONTINUE; 926 } 927 qemu_put_be64(f, offset); 928 size = 8; 929 930 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) { 931 len = strlen(block->idstr); 932 qemu_put_byte(f, len); 933 qemu_put_buffer(f, (uint8_t *)block->idstr, len); 934 size += 1 + len; 935 rs->last_sent_block = block; 936 } 937 return size; 938 } 939 940 /** 941 * mig_throttle_guest_down: throotle down the guest 942 * 943 * Reduce amount of guest cpu execution to hopefully slow down memory 944 * writes. If guest dirty memory rate is reduced below the rate at 945 * which we can transfer pages to the destination then we should be 946 * able to complete migration. Some workloads dirty memory way too 947 * fast and will not effectively converge, even with auto-converge. 948 */ 949 static void mig_throttle_guest_down(void) 950 { 951 MigrationState *s = migrate_get_current(); 952 uint64_t pct_initial = s->parameters.cpu_throttle_initial; 953 uint64_t pct_icrement = s->parameters.cpu_throttle_increment; 954 955 /* We have not started throttling yet. Let's start it. */ 956 if (!cpu_throttle_active()) { 957 cpu_throttle_set(pct_initial); 958 } else { 959 /* Throttling already on, just increase the rate */ 960 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement); 961 } 962 } 963 964 /** 965 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache 966 * 967 * @rs: current RAM state 968 * @current_addr: address for the zero page 969 * 970 * Update the xbzrle cache to reflect a page that's been sent as all 0. 971 * The important thing is that a stale (not-yet-0'd) page be replaced 972 * by the new data. 973 * As a bonus, if the page wasn't in the cache it gets added so that 974 * when a small write is made into the 0'd page it gets XBZRLE sent. 975 */ 976 static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr) 977 { 978 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) { 979 return; 980 } 981 982 /* We don't care if this fails to allocate a new cache page 983 * as long as it updated an old one */ 984 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page, 985 ram_counters.dirty_sync_count); 986 } 987 988 #define ENCODING_FLAG_XBZRLE 0x1 989 990 /** 991 * save_xbzrle_page: compress and send current page 992 * 993 * Returns: 1 means that we wrote the page 994 * 0 means that page is identical to the one already sent 995 * -1 means that xbzrle would be longer than normal 996 * 997 * @rs: current RAM state 998 * @current_data: pointer to the address of the page contents 999 * @current_addr: addr of the page 1000 * @block: block that contains the page we want to send 1001 * @offset: offset inside the block for the page 1002 * @last_stage: if we are at the completion stage 1003 */ 1004 static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, 1005 ram_addr_t current_addr, RAMBlock *block, 1006 ram_addr_t offset, bool last_stage) 1007 { 1008 int encoded_len = 0, bytes_xbzrle; 1009 uint8_t *prev_cached_page; 1010 1011 if (!cache_is_cached(XBZRLE.cache, current_addr, 1012 ram_counters.dirty_sync_count)) { 1013 xbzrle_counters.cache_miss++; 1014 if (!last_stage) { 1015 if (cache_insert(XBZRLE.cache, current_addr, *current_data, 1016 ram_counters.dirty_sync_count) == -1) { 1017 return -1; 1018 } else { 1019 /* update *current_data when the page has been 1020 inserted into cache */ 1021 *current_data = get_cached_data(XBZRLE.cache, current_addr); 1022 } 1023 } 1024 return -1; 1025 } 1026 1027 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); 1028 1029 /* save current buffer into memory */ 1030 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE); 1031 1032 /* XBZRLE encoding (if there is no overflow) */ 1033 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, 1034 TARGET_PAGE_SIZE, XBZRLE.encoded_buf, 1035 TARGET_PAGE_SIZE); 1036 if (encoded_len == 0) { 1037 trace_save_xbzrle_page_skipping(); 1038 return 0; 1039 } else if (encoded_len == -1) { 1040 trace_save_xbzrle_page_overflow(); 1041 xbzrle_counters.overflow++; 1042 /* update data in the cache */ 1043 if (!last_stage) { 1044 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE); 1045 *current_data = prev_cached_page; 1046 } 1047 return -1; 1048 } 1049 1050 /* we need to update the data in the cache, in order to get the same data */ 1051 if (!last_stage) { 1052 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); 1053 } 1054 1055 /* Send XBZRLE based compressed page */ 1056 bytes_xbzrle = save_page_header(rs, rs->f, block, 1057 offset | RAM_SAVE_FLAG_XBZRLE); 1058 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE); 1059 qemu_put_be16(rs->f, encoded_len); 1060 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len); 1061 bytes_xbzrle += encoded_len + 1 + 2; 1062 xbzrle_counters.pages++; 1063 xbzrle_counters.bytes += bytes_xbzrle; 1064 ram_counters.transferred += bytes_xbzrle; 1065 1066 return 1; 1067 } 1068 1069 /** 1070 * migration_bitmap_find_dirty: find the next dirty page from start 1071 * 1072 * Called with rcu_read_lock() to protect migration_bitmap 1073 * 1074 * Returns the byte offset within memory region of the start of a dirty page 1075 * 1076 * @rs: current RAM state 1077 * @rb: RAMBlock where to search for dirty pages 1078 * @start: page where we start the search 1079 */ 1080 static inline 1081 unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb, 1082 unsigned long start) 1083 { 1084 unsigned long size = rb->used_length >> TARGET_PAGE_BITS; 1085 unsigned long *bitmap = rb->bmap; 1086 unsigned long next; 1087 1088 if (!qemu_ram_is_migratable(rb)) { 1089 return size; 1090 } 1091 1092 if (rs->ram_bulk_stage && start > 0) { 1093 next = start + 1; 1094 } else { 1095 next = find_next_bit(bitmap, size, start); 1096 } 1097 1098 return next; 1099 } 1100 1101 static inline bool migration_bitmap_clear_dirty(RAMState *rs, 1102 RAMBlock *rb, 1103 unsigned long page) 1104 { 1105 bool ret; 1106 1107 ret = test_and_clear_bit(page, rb->bmap); 1108 1109 if (ret) { 1110 rs->migration_dirty_pages--; 1111 } 1112 return ret; 1113 } 1114 1115 static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb, 1116 ram_addr_t start, ram_addr_t length) 1117 { 1118 rs->migration_dirty_pages += 1119 cpu_physical_memory_sync_dirty_bitmap(rb, start, length, 1120 &rs->num_dirty_pages_period); 1121 } 1122 1123 /** 1124 * ram_pagesize_summary: calculate all the pagesizes of a VM 1125 * 1126 * Returns a summary bitmap of the page sizes of all RAMBlocks 1127 * 1128 * For VMs with just normal pages this is equivalent to the host page 1129 * size. If it's got some huge pages then it's the OR of all the 1130 * different page sizes. 1131 */ 1132 uint64_t ram_pagesize_summary(void) 1133 { 1134 RAMBlock *block; 1135 uint64_t summary = 0; 1136 1137 RAMBLOCK_FOREACH_MIGRATABLE(block) { 1138 summary |= block->page_size; 1139 } 1140 1141 return summary; 1142 } 1143 1144 static void migration_update_rates(RAMState *rs, int64_t end_time) 1145 { 1146 uint64_t iter_count = rs->iterations - rs->iterations_prev; 1147 1148 /* calculate period counters */ 1149 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000 1150 / (end_time - rs->time_last_bitmap_sync); 1151 1152 if (!iter_count) { 1153 return; 1154 } 1155 1156 if (migrate_use_xbzrle()) { 1157 xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss - 1158 rs->xbzrle_cache_miss_prev) / iter_count; 1159 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss; 1160 } 1161 } 1162 1163 static void migration_bitmap_sync(RAMState *rs) 1164 { 1165 RAMBlock *block; 1166 int64_t end_time; 1167 uint64_t bytes_xfer_now; 1168 1169 ram_counters.dirty_sync_count++; 1170 1171 if (!rs->time_last_bitmap_sync) { 1172 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 1173 } 1174 1175 trace_migration_bitmap_sync_start(); 1176 memory_global_dirty_log_sync(); 1177 1178 qemu_mutex_lock(&rs->bitmap_mutex); 1179 rcu_read_lock(); 1180 RAMBLOCK_FOREACH_MIGRATABLE(block) { 1181 migration_bitmap_sync_range(rs, block, 0, block->used_length); 1182 } 1183 ram_counters.remaining = ram_bytes_remaining(); 1184 rcu_read_unlock(); 1185 qemu_mutex_unlock(&rs->bitmap_mutex); 1186 1187 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period); 1188 1189 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 1190 1191 /* more than 1 second = 1000 millisecons */ 1192 if (end_time > rs->time_last_bitmap_sync + 1000) { 1193 bytes_xfer_now = ram_counters.transferred; 1194 1195 /* During block migration the auto-converge logic incorrectly detects 1196 * that ram migration makes no progress. Avoid this by disabling the 1197 * throttling logic during the bulk phase of block migration. */ 1198 if (migrate_auto_converge() && !blk_mig_bulk_active()) { 1199 /* The following detection logic can be refined later. For now: 1200 Check to see if the dirtied bytes is 50% more than the approx. 1201 amount of bytes that just got transferred since the last time we 1202 were in this routine. If that happens twice, start or increase 1203 throttling */ 1204 1205 if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE > 1206 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) && 1207 (++rs->dirty_rate_high_cnt >= 2)) { 1208 trace_migration_throttle(); 1209 rs->dirty_rate_high_cnt = 0; 1210 mig_throttle_guest_down(); 1211 } 1212 } 1213 1214 migration_update_rates(rs, end_time); 1215 1216 rs->iterations_prev = rs->iterations; 1217 1218 /* reset period counters */ 1219 rs->time_last_bitmap_sync = end_time; 1220 rs->num_dirty_pages_period = 0; 1221 rs->bytes_xfer_prev = bytes_xfer_now; 1222 } 1223 if (migrate_use_events()) { 1224 qapi_event_send_migration_pass(ram_counters.dirty_sync_count, NULL); 1225 } 1226 } 1227 1228 /** 1229 * save_zero_page: send the zero page to the stream 1230 * 1231 * Returns the number of pages written. 1232 * 1233 * @rs: current RAM state 1234 * @block: block that contains the page we want to send 1235 * @offset: offset inside the block for the page 1236 */ 1237 static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset) 1238 { 1239 uint8_t *p = block->host + offset; 1240 int pages = -1; 1241 1242 if (is_zero_range(p, TARGET_PAGE_SIZE)) { 1243 ram_counters.duplicate++; 1244 ram_counters.transferred += 1245 save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_ZERO); 1246 qemu_put_byte(rs->f, 0); 1247 ram_counters.transferred += 1; 1248 pages = 1; 1249 } 1250 1251 return pages; 1252 } 1253 1254 static void ram_release_pages(const char *rbname, uint64_t offset, int pages) 1255 { 1256 if (!migrate_release_ram() || !migration_in_postcopy()) { 1257 return; 1258 } 1259 1260 ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS); 1261 } 1262 1263 /* 1264 * @pages: the number of pages written by the control path, 1265 * < 0 - error 1266 * > 0 - number of pages written 1267 * 1268 * Return true if the pages has been saved, otherwise false is returned. 1269 */ 1270 static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, 1271 int *pages) 1272 { 1273 uint64_t bytes_xmit = 0; 1274 int ret; 1275 1276 *pages = -1; 1277 ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE, 1278 &bytes_xmit); 1279 if (ret == RAM_SAVE_CONTROL_NOT_SUPP) { 1280 return false; 1281 } 1282 1283 if (bytes_xmit) { 1284 ram_counters.transferred += bytes_xmit; 1285 *pages = 1; 1286 } 1287 1288 if (ret == RAM_SAVE_CONTROL_DELAYED) { 1289 return true; 1290 } 1291 1292 if (bytes_xmit > 0) { 1293 ram_counters.normal++; 1294 } else if (bytes_xmit == 0) { 1295 ram_counters.duplicate++; 1296 } 1297 1298 return true; 1299 } 1300 1301 /* 1302 * directly send the page to the stream 1303 * 1304 * Returns the number of pages written. 1305 * 1306 * @rs: current RAM state 1307 * @block: block that contains the page we want to send 1308 * @offset: offset inside the block for the page 1309 * @buf: the page to be sent 1310 * @async: send to page asyncly 1311 */ 1312 static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, 1313 uint8_t *buf, bool async) 1314 { 1315 ram_counters.transferred += save_page_header(rs, rs->f, block, 1316 offset | RAM_SAVE_FLAG_PAGE); 1317 if (async) { 1318 qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE, 1319 migrate_release_ram() & 1320 migration_in_postcopy()); 1321 } else { 1322 qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE); 1323 } 1324 ram_counters.transferred += TARGET_PAGE_SIZE; 1325 ram_counters.normal++; 1326 return 1; 1327 } 1328 1329 /** 1330 * ram_save_page: send the given page to the stream 1331 * 1332 * Returns the number of pages written. 1333 * < 0 - error 1334 * >=0 - Number of pages written - this might legally be 0 1335 * if xbzrle noticed the page was the same. 1336 * 1337 * @rs: current RAM state 1338 * @block: block that contains the page we want to send 1339 * @offset: offset inside the block for the page 1340 * @last_stage: if we are at the completion stage 1341 */ 1342 static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) 1343 { 1344 int pages = -1; 1345 uint8_t *p; 1346 bool send_async = true; 1347 RAMBlock *block = pss->block; 1348 ram_addr_t offset = pss->page << TARGET_PAGE_BITS; 1349 ram_addr_t current_addr = block->offset + offset; 1350 1351 p = block->host + offset; 1352 trace_ram_save_page(block->idstr, (uint64_t)offset, p); 1353 1354 XBZRLE_cache_lock(); 1355 if (!rs->ram_bulk_stage && !migration_in_postcopy() && 1356 migrate_use_xbzrle()) { 1357 pages = save_xbzrle_page(rs, &p, current_addr, block, 1358 offset, last_stage); 1359 if (!last_stage) { 1360 /* Can't send this cached data async, since the cache page 1361 * might get updated before it gets to the wire 1362 */ 1363 send_async = false; 1364 } 1365 } 1366 1367 /* XBZRLE overflow or normal page */ 1368 if (pages == -1) { 1369 pages = save_normal_page(rs, block, offset, p, send_async); 1370 } 1371 1372 XBZRLE_cache_unlock(); 1373 1374 return pages; 1375 } 1376 1377 static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block, 1378 ram_addr_t offset, uint8_t *source_buf) 1379 { 1380 RAMState *rs = ram_state; 1381 int bytes_sent, blen; 1382 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK); 1383 1384 bytes_sent = save_page_header(rs, f, block, offset | 1385 RAM_SAVE_FLAG_COMPRESS_PAGE); 1386 1387 /* 1388 * copy it to a internal buffer to avoid it being modified by VM 1389 * so that we can catch up the error during compression and 1390 * decompression 1391 */ 1392 memcpy(source_buf, p, TARGET_PAGE_SIZE); 1393 blen = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE); 1394 if (blen < 0) { 1395 bytes_sent = 0; 1396 qemu_file_set_error(migrate_get_current()->to_dst_file, blen); 1397 error_report("compressed data failed!"); 1398 } else { 1399 bytes_sent += blen; 1400 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1); 1401 } 1402 1403 return bytes_sent; 1404 } 1405 1406 static void flush_compressed_data(RAMState *rs) 1407 { 1408 int idx, len, thread_count; 1409 1410 if (!migrate_use_compression()) { 1411 return; 1412 } 1413 thread_count = migrate_compress_threads(); 1414 1415 qemu_mutex_lock(&comp_done_lock); 1416 for (idx = 0; idx < thread_count; idx++) { 1417 while (!comp_param[idx].done) { 1418 qemu_cond_wait(&comp_done_cond, &comp_done_lock); 1419 } 1420 } 1421 qemu_mutex_unlock(&comp_done_lock); 1422 1423 for (idx = 0; idx < thread_count; idx++) { 1424 qemu_mutex_lock(&comp_param[idx].mutex); 1425 if (!comp_param[idx].quit) { 1426 len = qemu_put_qemu_file(rs->f, comp_param[idx].file); 1427 ram_counters.transferred += len; 1428 } 1429 qemu_mutex_unlock(&comp_param[idx].mutex); 1430 } 1431 } 1432 1433 static inline void set_compress_params(CompressParam *param, RAMBlock *block, 1434 ram_addr_t offset) 1435 { 1436 param->block = block; 1437 param->offset = offset; 1438 } 1439 1440 static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block, 1441 ram_addr_t offset) 1442 { 1443 int idx, thread_count, bytes_xmit = -1, pages = -1; 1444 1445 thread_count = migrate_compress_threads(); 1446 qemu_mutex_lock(&comp_done_lock); 1447 while (true) { 1448 for (idx = 0; idx < thread_count; idx++) { 1449 if (comp_param[idx].done) { 1450 comp_param[idx].done = false; 1451 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file); 1452 qemu_mutex_lock(&comp_param[idx].mutex); 1453 set_compress_params(&comp_param[idx], block, offset); 1454 qemu_cond_signal(&comp_param[idx].cond); 1455 qemu_mutex_unlock(&comp_param[idx].mutex); 1456 pages = 1; 1457 ram_counters.normal++; 1458 ram_counters.transferred += bytes_xmit; 1459 break; 1460 } 1461 } 1462 if (pages > 0) { 1463 break; 1464 } else { 1465 qemu_cond_wait(&comp_done_cond, &comp_done_lock); 1466 } 1467 } 1468 qemu_mutex_unlock(&comp_done_lock); 1469 1470 return pages; 1471 } 1472 1473 /** 1474 * find_dirty_block: find the next dirty page and update any state 1475 * associated with the search process. 1476 * 1477 * Returns if a page is found 1478 * 1479 * @rs: current RAM state 1480 * @pss: data about the state of the current dirty page scan 1481 * @again: set to false if the search has scanned the whole of RAM 1482 */ 1483 static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again) 1484 { 1485 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page); 1486 if (pss->complete_round && pss->block == rs->last_seen_block && 1487 pss->page >= rs->last_page) { 1488 /* 1489 * We've been once around the RAM and haven't found anything. 1490 * Give up. 1491 */ 1492 *again = false; 1493 return false; 1494 } 1495 if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) { 1496 /* Didn't find anything in this RAM Block */ 1497 pss->page = 0; 1498 pss->block = QLIST_NEXT_RCU(pss->block, next); 1499 if (!pss->block) { 1500 /* Hit the end of the list */ 1501 pss->block = QLIST_FIRST_RCU(&ram_list.blocks); 1502 /* Flag that we've looped */ 1503 pss->complete_round = true; 1504 rs->ram_bulk_stage = false; 1505 if (migrate_use_xbzrle()) { 1506 /* If xbzrle is on, stop using the data compression at this 1507 * point. In theory, xbzrle can do better than compression. 1508 */ 1509 flush_compressed_data(rs); 1510 } 1511 } 1512 /* Didn't find anything this time, but try again on the new block */ 1513 *again = true; 1514 return false; 1515 } else { 1516 /* Can go around again, but... */ 1517 *again = true; 1518 /* We've found something so probably don't need to */ 1519 return true; 1520 } 1521 } 1522 1523 /** 1524 * unqueue_page: gets a page of the queue 1525 * 1526 * Helper for 'get_queued_page' - gets a page off the queue 1527 * 1528 * Returns the block of the page (or NULL if none available) 1529 * 1530 * @rs: current RAM state 1531 * @offset: used to return the offset within the RAMBlock 1532 */ 1533 static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset) 1534 { 1535 RAMBlock *block = NULL; 1536 1537 qemu_mutex_lock(&rs->src_page_req_mutex); 1538 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) { 1539 struct RAMSrcPageRequest *entry = 1540 QSIMPLEQ_FIRST(&rs->src_page_requests); 1541 block = entry->rb; 1542 *offset = entry->offset; 1543 1544 if (entry->len > TARGET_PAGE_SIZE) { 1545 entry->len -= TARGET_PAGE_SIZE; 1546 entry->offset += TARGET_PAGE_SIZE; 1547 } else { 1548 memory_region_unref(block->mr); 1549 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req); 1550 g_free(entry); 1551 migration_consume_urgent_request(); 1552 } 1553 } 1554 qemu_mutex_unlock(&rs->src_page_req_mutex); 1555 1556 return block; 1557 } 1558 1559 /** 1560 * get_queued_page: unqueue a page from the postocpy requests 1561 * 1562 * Skips pages that are already sent (!dirty) 1563 * 1564 * Returns if a queued page is found 1565 * 1566 * @rs: current RAM state 1567 * @pss: data about the state of the current dirty page scan 1568 */ 1569 static bool get_queued_page(RAMState *rs, PageSearchStatus *pss) 1570 { 1571 RAMBlock *block; 1572 ram_addr_t offset; 1573 bool dirty; 1574 1575 do { 1576 block = unqueue_page(rs, &offset); 1577 /* 1578 * We're sending this page, and since it's postcopy nothing else 1579 * will dirty it, and we must make sure it doesn't get sent again 1580 * even if this queue request was received after the background 1581 * search already sent it. 1582 */ 1583 if (block) { 1584 unsigned long page; 1585 1586 page = offset >> TARGET_PAGE_BITS; 1587 dirty = test_bit(page, block->bmap); 1588 if (!dirty) { 1589 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset, 1590 page, test_bit(page, block->unsentmap)); 1591 } else { 1592 trace_get_queued_page(block->idstr, (uint64_t)offset, page); 1593 } 1594 } 1595 1596 } while (block && !dirty); 1597 1598 if (block) { 1599 /* 1600 * As soon as we start servicing pages out of order, then we have 1601 * to kill the bulk stage, since the bulk stage assumes 1602 * in (migration_bitmap_find_and_reset_dirty) that every page is 1603 * dirty, that's no longer true. 1604 */ 1605 rs->ram_bulk_stage = false; 1606 1607 /* 1608 * We want the background search to continue from the queued page 1609 * since the guest is likely to want other pages near to the page 1610 * it just requested. 1611 */ 1612 pss->block = block; 1613 pss->page = offset >> TARGET_PAGE_BITS; 1614 } 1615 1616 return !!block; 1617 } 1618 1619 /** 1620 * migration_page_queue_free: drop any remaining pages in the ram 1621 * request queue 1622 * 1623 * It should be empty at the end anyway, but in error cases there may 1624 * be some left. in case that there is any page left, we drop it. 1625 * 1626 */ 1627 static void migration_page_queue_free(RAMState *rs) 1628 { 1629 struct RAMSrcPageRequest *mspr, *next_mspr; 1630 /* This queue generally should be empty - but in the case of a failed 1631 * migration might have some droppings in. 1632 */ 1633 rcu_read_lock(); 1634 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) { 1635 memory_region_unref(mspr->rb->mr); 1636 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req); 1637 g_free(mspr); 1638 } 1639 rcu_read_unlock(); 1640 } 1641 1642 /** 1643 * ram_save_queue_pages: queue the page for transmission 1644 * 1645 * A request from postcopy destination for example. 1646 * 1647 * Returns zero on success or negative on error 1648 * 1649 * @rbname: Name of the RAMBLock of the request. NULL means the 1650 * same that last one. 1651 * @start: starting address from the start of the RAMBlock 1652 * @len: length (in bytes) to send 1653 */ 1654 int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) 1655 { 1656 RAMBlock *ramblock; 1657 RAMState *rs = ram_state; 1658 1659 ram_counters.postcopy_requests++; 1660 rcu_read_lock(); 1661 if (!rbname) { 1662 /* Reuse last RAMBlock */ 1663 ramblock = rs->last_req_rb; 1664 1665 if (!ramblock) { 1666 /* 1667 * Shouldn't happen, we can't reuse the last RAMBlock if 1668 * it's the 1st request. 1669 */ 1670 error_report("ram_save_queue_pages no previous block"); 1671 goto err; 1672 } 1673 } else { 1674 ramblock = qemu_ram_block_by_name(rbname); 1675 1676 if (!ramblock) { 1677 /* We shouldn't be asked for a non-existent RAMBlock */ 1678 error_report("ram_save_queue_pages no block '%s'", rbname); 1679 goto err; 1680 } 1681 rs->last_req_rb = ramblock; 1682 } 1683 trace_ram_save_queue_pages(ramblock->idstr, start, len); 1684 if (start+len > ramblock->used_length) { 1685 error_report("%s request overrun start=" RAM_ADDR_FMT " len=" 1686 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT, 1687 __func__, start, len, ramblock->used_length); 1688 goto err; 1689 } 1690 1691 struct RAMSrcPageRequest *new_entry = 1692 g_malloc0(sizeof(struct RAMSrcPageRequest)); 1693 new_entry->rb = ramblock; 1694 new_entry->offset = start; 1695 new_entry->len = len; 1696 1697 memory_region_ref(ramblock->mr); 1698 qemu_mutex_lock(&rs->src_page_req_mutex); 1699 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req); 1700 migration_make_urgent_request(); 1701 qemu_mutex_unlock(&rs->src_page_req_mutex); 1702 rcu_read_unlock(); 1703 1704 return 0; 1705 1706 err: 1707 rcu_read_unlock(); 1708 return -1; 1709 } 1710 1711 static bool save_page_use_compression(RAMState *rs) 1712 { 1713 if (!migrate_use_compression()) { 1714 return false; 1715 } 1716 1717 /* 1718 * If xbzrle is on, stop using the data compression after first 1719 * round of migration even if compression is enabled. In theory, 1720 * xbzrle can do better than compression. 1721 */ 1722 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) { 1723 return true; 1724 } 1725 1726 return false; 1727 } 1728 1729 /** 1730 * ram_save_target_page: save one target page 1731 * 1732 * Returns the number of pages written 1733 * 1734 * @rs: current RAM state 1735 * @pss: data about the page we want to send 1736 * @last_stage: if we are at the completion stage 1737 */ 1738 static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss, 1739 bool last_stage) 1740 { 1741 RAMBlock *block = pss->block; 1742 ram_addr_t offset = pss->page << TARGET_PAGE_BITS; 1743 int res; 1744 1745 if (control_save_page(rs, block, offset, &res)) { 1746 return res; 1747 } 1748 1749 /* 1750 * When starting the process of a new block, the first page of 1751 * the block should be sent out before other pages in the same 1752 * block, and all the pages in last block should have been sent 1753 * out, keeping this order is important, because the 'cont' flag 1754 * is used to avoid resending the block name. 1755 */ 1756 if (block != rs->last_sent_block && save_page_use_compression(rs)) { 1757 flush_compressed_data(rs); 1758 } 1759 1760 res = save_zero_page(rs, block, offset); 1761 if (res > 0) { 1762 /* Must let xbzrle know, otherwise a previous (now 0'd) cached 1763 * page would be stale 1764 */ 1765 if (!save_page_use_compression(rs)) { 1766 XBZRLE_cache_lock(); 1767 xbzrle_cache_zero_page(rs, block->offset + offset); 1768 XBZRLE_cache_unlock(); 1769 } 1770 ram_release_pages(block->idstr, offset, res); 1771 return res; 1772 } 1773 1774 /* 1775 * Make sure the first page is sent out before other pages. 1776 * 1777 * we post it as normal page as compression will take much 1778 * CPU resource. 1779 */ 1780 if (block == rs->last_sent_block && save_page_use_compression(rs)) { 1781 return compress_page_with_multi_thread(rs, block, offset); 1782 } 1783 1784 return ram_save_page(rs, pss, last_stage); 1785 } 1786 1787 /** 1788 * ram_save_host_page: save a whole host page 1789 * 1790 * Starting at *offset send pages up to the end of the current host 1791 * page. It's valid for the initial offset to point into the middle of 1792 * a host page in which case the remainder of the hostpage is sent. 1793 * Only dirty target pages are sent. Note that the host page size may 1794 * be a huge page for this block. 1795 * The saving stops at the boundary of the used_length of the block 1796 * if the RAMBlock isn't a multiple of the host page size. 1797 * 1798 * Returns the number of pages written or negative on error 1799 * 1800 * @rs: current RAM state 1801 * @ms: current migration state 1802 * @pss: data about the page we want to send 1803 * @last_stage: if we are at the completion stage 1804 */ 1805 static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, 1806 bool last_stage) 1807 { 1808 int tmppages, pages = 0; 1809 size_t pagesize_bits = 1810 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS; 1811 1812 if (!qemu_ram_is_migratable(pss->block)) { 1813 error_report("block %s should not be migrated !", pss->block->idstr); 1814 return 0; 1815 } 1816 1817 do { 1818 /* Check the pages is dirty and if it is send it */ 1819 if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) { 1820 pss->page++; 1821 continue; 1822 } 1823 1824 tmppages = ram_save_target_page(rs, pss, last_stage); 1825 if (tmppages < 0) { 1826 return tmppages; 1827 } 1828 1829 pages += tmppages; 1830 if (pss->block->unsentmap) { 1831 clear_bit(pss->page, pss->block->unsentmap); 1832 } 1833 1834 pss->page++; 1835 } while ((pss->page & (pagesize_bits - 1)) && 1836 offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS)); 1837 1838 /* The offset we leave with is the last one we looked at */ 1839 pss->page--; 1840 return pages; 1841 } 1842 1843 /** 1844 * ram_find_and_save_block: finds a dirty page and sends it to f 1845 * 1846 * Called within an RCU critical section. 1847 * 1848 * Returns the number of pages written where zero means no dirty pages 1849 * 1850 * @rs: current RAM state 1851 * @last_stage: if we are at the completion stage 1852 * 1853 * On systems where host-page-size > target-page-size it will send all the 1854 * pages in a host page that are dirty. 1855 */ 1856 1857 static int ram_find_and_save_block(RAMState *rs, bool last_stage) 1858 { 1859 PageSearchStatus pss; 1860 int pages = 0; 1861 bool again, found; 1862 1863 /* No dirty page as there is zero RAM */ 1864 if (!ram_bytes_total()) { 1865 return pages; 1866 } 1867 1868 pss.block = rs->last_seen_block; 1869 pss.page = rs->last_page; 1870 pss.complete_round = false; 1871 1872 if (!pss.block) { 1873 pss.block = QLIST_FIRST_RCU(&ram_list.blocks); 1874 } 1875 1876 do { 1877 again = true; 1878 found = get_queued_page(rs, &pss); 1879 1880 if (!found) { 1881 /* priority queue empty, so just search for something dirty */ 1882 found = find_dirty_block(rs, &pss, &again); 1883 } 1884 1885 if (found) { 1886 pages = ram_save_host_page(rs, &pss, last_stage); 1887 } 1888 } while (!pages && again); 1889 1890 rs->last_seen_block = pss.block; 1891 rs->last_page = pss.page; 1892 1893 return pages; 1894 } 1895 1896 void acct_update_position(QEMUFile *f, size_t size, bool zero) 1897 { 1898 uint64_t pages = size / TARGET_PAGE_SIZE; 1899 1900 if (zero) { 1901 ram_counters.duplicate += pages; 1902 } else { 1903 ram_counters.normal += pages; 1904 ram_counters.transferred += size; 1905 qemu_update_position(f, size); 1906 } 1907 } 1908 1909 uint64_t ram_bytes_total(void) 1910 { 1911 RAMBlock *block; 1912 uint64_t total = 0; 1913 1914 rcu_read_lock(); 1915 RAMBLOCK_FOREACH_MIGRATABLE(block) { 1916 total += block->used_length; 1917 } 1918 rcu_read_unlock(); 1919 return total; 1920 } 1921 1922 static void xbzrle_load_setup(void) 1923 { 1924 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE); 1925 } 1926 1927 static void xbzrle_load_cleanup(void) 1928 { 1929 g_free(XBZRLE.decoded_buf); 1930 XBZRLE.decoded_buf = NULL; 1931 } 1932 1933 static void ram_state_cleanup(RAMState **rsp) 1934 { 1935 if (*rsp) { 1936 migration_page_queue_free(*rsp); 1937 qemu_mutex_destroy(&(*rsp)->bitmap_mutex); 1938 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex); 1939 g_free(*rsp); 1940 *rsp = NULL; 1941 } 1942 } 1943 1944 static void xbzrle_cleanup(void) 1945 { 1946 XBZRLE_cache_lock(); 1947 if (XBZRLE.cache) { 1948 cache_fini(XBZRLE.cache); 1949 g_free(XBZRLE.encoded_buf); 1950 g_free(XBZRLE.current_buf); 1951 g_free(XBZRLE.zero_target_page); 1952 XBZRLE.cache = NULL; 1953 XBZRLE.encoded_buf = NULL; 1954 XBZRLE.current_buf = NULL; 1955 XBZRLE.zero_target_page = NULL; 1956 } 1957 XBZRLE_cache_unlock(); 1958 } 1959 1960 static void ram_save_cleanup(void *opaque) 1961 { 1962 RAMState **rsp = opaque; 1963 RAMBlock *block; 1964 1965 /* caller have hold iothread lock or is in a bh, so there is 1966 * no writing race against this migration_bitmap 1967 */ 1968 memory_global_dirty_log_stop(); 1969 1970 RAMBLOCK_FOREACH_MIGRATABLE(block) { 1971 g_free(block->bmap); 1972 block->bmap = NULL; 1973 g_free(block->unsentmap); 1974 block->unsentmap = NULL; 1975 } 1976 1977 xbzrle_cleanup(); 1978 compress_threads_save_cleanup(); 1979 ram_state_cleanup(rsp); 1980 } 1981 1982 static void ram_state_reset(RAMState *rs) 1983 { 1984 rs->last_seen_block = NULL; 1985 rs->last_sent_block = NULL; 1986 rs->last_page = 0; 1987 rs->last_version = ram_list.version; 1988 rs->ram_bulk_stage = true; 1989 } 1990 1991 #define MAX_WAIT 50 /* ms, half buffered_file limit */ 1992 1993 /* 1994 * 'expected' is the value you expect the bitmap mostly to be full 1995 * of; it won't bother printing lines that are all this value. 1996 * If 'todump' is null the migration bitmap is dumped. 1997 */ 1998 void ram_debug_dump_bitmap(unsigned long *todump, bool expected, 1999 unsigned long pages) 2000 { 2001 int64_t cur; 2002 int64_t linelen = 128; 2003 char linebuf[129]; 2004 2005 for (cur = 0; cur < pages; cur += linelen) { 2006 int64_t curb; 2007 bool found = false; 2008 /* 2009 * Last line; catch the case where the line length 2010 * is longer than remaining ram 2011 */ 2012 if (cur + linelen > pages) { 2013 linelen = pages - cur; 2014 } 2015 for (curb = 0; curb < linelen; curb++) { 2016 bool thisbit = test_bit(cur + curb, todump); 2017 linebuf[curb] = thisbit ? '1' : '.'; 2018 found = found || (thisbit != expected); 2019 } 2020 if (found) { 2021 linebuf[curb] = '\0'; 2022 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf); 2023 } 2024 } 2025 } 2026 2027 /* **** functions for postcopy ***** */ 2028 2029 void ram_postcopy_migrated_memory_release(MigrationState *ms) 2030 { 2031 struct RAMBlock *block; 2032 2033 RAMBLOCK_FOREACH_MIGRATABLE(block) { 2034 unsigned long *bitmap = block->bmap; 2035 unsigned long range = block->used_length >> TARGET_PAGE_BITS; 2036 unsigned long run_start = find_next_zero_bit(bitmap, range, 0); 2037 2038 while (run_start < range) { 2039 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1); 2040 ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS, 2041 (run_end - run_start) << TARGET_PAGE_BITS); 2042 run_start = find_next_zero_bit(bitmap, range, run_end + 1); 2043 } 2044 } 2045 } 2046 2047 /** 2048 * postcopy_send_discard_bm_ram: discard a RAMBlock 2049 * 2050 * Returns zero on success 2051 * 2052 * Callback from postcopy_each_ram_send_discard for each RAMBlock 2053 * Note: At this point the 'unsentmap' is the processed bitmap combined 2054 * with the dirtymap; so a '1' means it's either dirty or unsent. 2055 * 2056 * @ms: current migration state 2057 * @pds: state for postcopy 2058 * @start: RAMBlock starting page 2059 * @length: RAMBlock size 2060 */ 2061 static int postcopy_send_discard_bm_ram(MigrationState *ms, 2062 PostcopyDiscardState *pds, 2063 RAMBlock *block) 2064 { 2065 unsigned long end = block->used_length >> TARGET_PAGE_BITS; 2066 unsigned long current; 2067 unsigned long *unsentmap = block->unsentmap; 2068 2069 for (current = 0; current < end; ) { 2070 unsigned long one = find_next_bit(unsentmap, end, current); 2071 2072 if (one <= end) { 2073 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1); 2074 unsigned long discard_length; 2075 2076 if (zero >= end) { 2077 discard_length = end - one; 2078 } else { 2079 discard_length = zero - one; 2080 } 2081 if (discard_length) { 2082 postcopy_discard_send_range(ms, pds, one, discard_length); 2083 } 2084 current = one + discard_length; 2085 } else { 2086 current = one; 2087 } 2088 } 2089 2090 return 0; 2091 } 2092 2093 /** 2094 * postcopy_each_ram_send_discard: discard all RAMBlocks 2095 * 2096 * Returns 0 for success or negative for error 2097 * 2098 * Utility for the outgoing postcopy code. 2099 * Calls postcopy_send_discard_bm_ram for each RAMBlock 2100 * passing it bitmap indexes and name. 2101 * (qemu_ram_foreach_block ends up passing unscaled lengths 2102 * which would mean postcopy code would have to deal with target page) 2103 * 2104 * @ms: current migration state 2105 */ 2106 static int postcopy_each_ram_send_discard(MigrationState *ms) 2107 { 2108 struct RAMBlock *block; 2109 int ret; 2110 2111 RAMBLOCK_FOREACH_MIGRATABLE(block) { 2112 PostcopyDiscardState *pds = 2113 postcopy_discard_send_init(ms, block->idstr); 2114 2115 /* 2116 * Postcopy sends chunks of bitmap over the wire, but it 2117 * just needs indexes at this point, avoids it having 2118 * target page specific code. 2119 */ 2120 ret = postcopy_send_discard_bm_ram(ms, pds, block); 2121 postcopy_discard_send_finish(ms, pds); 2122 if (ret) { 2123 return ret; 2124 } 2125 } 2126 2127 return 0; 2128 } 2129 2130 /** 2131 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages 2132 * 2133 * Helper for postcopy_chunk_hostpages; it's called twice to 2134 * canonicalize the two bitmaps, that are similar, but one is 2135 * inverted. 2136 * 2137 * Postcopy requires that all target pages in a hostpage are dirty or 2138 * clean, not a mix. This function canonicalizes the bitmaps. 2139 * 2140 * @ms: current migration state 2141 * @unsent_pass: if true we need to canonicalize partially unsent host pages 2142 * otherwise we need to canonicalize partially dirty host pages 2143 * @block: block that contains the page we want to canonicalize 2144 * @pds: state for postcopy 2145 */ 2146 static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, 2147 RAMBlock *block, 2148 PostcopyDiscardState *pds) 2149 { 2150 RAMState *rs = ram_state; 2151 unsigned long *bitmap = block->bmap; 2152 unsigned long *unsentmap = block->unsentmap; 2153 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE; 2154 unsigned long pages = block->used_length >> TARGET_PAGE_BITS; 2155 unsigned long run_start; 2156 2157 if (block->page_size == TARGET_PAGE_SIZE) { 2158 /* Easy case - TPS==HPS for a non-huge page RAMBlock */ 2159 return; 2160 } 2161 2162 if (unsent_pass) { 2163 /* Find a sent page */ 2164 run_start = find_next_zero_bit(unsentmap, pages, 0); 2165 } else { 2166 /* Find a dirty page */ 2167 run_start = find_next_bit(bitmap, pages, 0); 2168 } 2169 2170 while (run_start < pages) { 2171 bool do_fixup = false; 2172 unsigned long fixup_start_addr; 2173 unsigned long host_offset; 2174 2175 /* 2176 * If the start of this run of pages is in the middle of a host 2177 * page, then we need to fixup this host page. 2178 */ 2179 host_offset = run_start % host_ratio; 2180 if (host_offset) { 2181 do_fixup = true; 2182 run_start -= host_offset; 2183 fixup_start_addr = run_start; 2184 /* For the next pass */ 2185 run_start = run_start + host_ratio; 2186 } else { 2187 /* Find the end of this run */ 2188 unsigned long run_end; 2189 if (unsent_pass) { 2190 run_end = find_next_bit(unsentmap, pages, run_start + 1); 2191 } else { 2192 run_end = find_next_zero_bit(bitmap, pages, run_start + 1); 2193 } 2194 /* 2195 * If the end isn't at the start of a host page, then the 2196 * run doesn't finish at the end of a host page 2197 * and we need to discard. 2198 */ 2199 host_offset = run_end % host_ratio; 2200 if (host_offset) { 2201 do_fixup = true; 2202 fixup_start_addr = run_end - host_offset; 2203 /* 2204 * This host page has gone, the next loop iteration starts 2205 * from after the fixup 2206 */ 2207 run_start = fixup_start_addr + host_ratio; 2208 } else { 2209 /* 2210 * No discards on this iteration, next loop starts from 2211 * next sent/dirty page 2212 */ 2213 run_start = run_end + 1; 2214 } 2215 } 2216 2217 if (do_fixup) { 2218 unsigned long page; 2219 2220 /* Tell the destination to discard this page */ 2221 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) { 2222 /* For the unsent_pass we: 2223 * discard partially sent pages 2224 * For the !unsent_pass (dirty) we: 2225 * discard partially dirty pages that were sent 2226 * (any partially sent pages were already discarded 2227 * by the previous unsent_pass) 2228 */ 2229 postcopy_discard_send_range(ms, pds, fixup_start_addr, 2230 host_ratio); 2231 } 2232 2233 /* Clean up the bitmap */ 2234 for (page = fixup_start_addr; 2235 page < fixup_start_addr + host_ratio; page++) { 2236 /* All pages in this host page are now not sent */ 2237 set_bit(page, unsentmap); 2238 2239 /* 2240 * Remark them as dirty, updating the count for any pages 2241 * that weren't previously dirty. 2242 */ 2243 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap); 2244 } 2245 } 2246 2247 if (unsent_pass) { 2248 /* Find the next sent page for the next iteration */ 2249 run_start = find_next_zero_bit(unsentmap, pages, run_start); 2250 } else { 2251 /* Find the next dirty page for the next iteration */ 2252 run_start = find_next_bit(bitmap, pages, run_start); 2253 } 2254 } 2255 } 2256 2257 /** 2258 * postcopy_chuck_hostpages: discrad any partially sent host page 2259 * 2260 * Utility for the outgoing postcopy code. 2261 * 2262 * Discard any partially sent host-page size chunks, mark any partially 2263 * dirty host-page size chunks as all dirty. In this case the host-page 2264 * is the host-page for the particular RAMBlock, i.e. it might be a huge page 2265 * 2266 * Returns zero on success 2267 * 2268 * @ms: current migration state 2269 * @block: block we want to work with 2270 */ 2271 static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block) 2272 { 2273 PostcopyDiscardState *pds = 2274 postcopy_discard_send_init(ms, block->idstr); 2275 2276 /* First pass: Discard all partially sent host pages */ 2277 postcopy_chunk_hostpages_pass(ms, true, block, pds); 2278 /* 2279 * Second pass: Ensure that all partially dirty host pages are made 2280 * fully dirty. 2281 */ 2282 postcopy_chunk_hostpages_pass(ms, false, block, pds); 2283 2284 postcopy_discard_send_finish(ms, pds); 2285 return 0; 2286 } 2287 2288 /** 2289 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap 2290 * 2291 * Returns zero on success 2292 * 2293 * Transmit the set of pages to be discarded after precopy to the target 2294 * these are pages that: 2295 * a) Have been previously transmitted but are now dirty again 2296 * b) Pages that have never been transmitted, this ensures that 2297 * any pages on the destination that have been mapped by background 2298 * tasks get discarded (transparent huge pages is the specific concern) 2299 * Hopefully this is pretty sparse 2300 * 2301 * @ms: current migration state 2302 */ 2303 int ram_postcopy_send_discard_bitmap(MigrationState *ms) 2304 { 2305 RAMState *rs = ram_state; 2306 RAMBlock *block; 2307 int ret; 2308 2309 rcu_read_lock(); 2310 2311 /* This should be our last sync, the src is now paused */ 2312 migration_bitmap_sync(rs); 2313 2314 /* Easiest way to make sure we don't resume in the middle of a host-page */ 2315 rs->last_seen_block = NULL; 2316 rs->last_sent_block = NULL; 2317 rs->last_page = 0; 2318 2319 RAMBLOCK_FOREACH_MIGRATABLE(block) { 2320 unsigned long pages = block->used_length >> TARGET_PAGE_BITS; 2321 unsigned long *bitmap = block->bmap; 2322 unsigned long *unsentmap = block->unsentmap; 2323 2324 if (!unsentmap) { 2325 /* We don't have a safe way to resize the sentmap, so 2326 * if the bitmap was resized it will be NULL at this 2327 * point. 2328 */ 2329 error_report("migration ram resized during precopy phase"); 2330 rcu_read_unlock(); 2331 return -EINVAL; 2332 } 2333 /* Deal with TPS != HPS and huge pages */ 2334 ret = postcopy_chunk_hostpages(ms, block); 2335 if (ret) { 2336 rcu_read_unlock(); 2337 return ret; 2338 } 2339 2340 /* 2341 * Update the unsentmap to be unsentmap = unsentmap | dirty 2342 */ 2343 bitmap_or(unsentmap, unsentmap, bitmap, pages); 2344 #ifdef DEBUG_POSTCOPY 2345 ram_debug_dump_bitmap(unsentmap, true, pages); 2346 #endif 2347 } 2348 trace_ram_postcopy_send_discard_bitmap(); 2349 2350 ret = postcopy_each_ram_send_discard(ms); 2351 rcu_read_unlock(); 2352 2353 return ret; 2354 } 2355 2356 /** 2357 * ram_discard_range: discard dirtied pages at the beginning of postcopy 2358 * 2359 * Returns zero on success 2360 * 2361 * @rbname: name of the RAMBlock of the request. NULL means the 2362 * same that last one. 2363 * @start: RAMBlock starting page 2364 * @length: RAMBlock size 2365 */ 2366 int ram_discard_range(const char *rbname, uint64_t start, size_t length) 2367 { 2368 int ret = -1; 2369 2370 trace_ram_discard_range(rbname, start, length); 2371 2372 rcu_read_lock(); 2373 RAMBlock *rb = qemu_ram_block_by_name(rbname); 2374 2375 if (!rb) { 2376 error_report("ram_discard_range: Failed to find block '%s'", rbname); 2377 goto err; 2378 } 2379 2380 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(), 2381 length >> qemu_target_page_bits()); 2382 ret = ram_block_discard_range(rb, start, length); 2383 2384 err: 2385 rcu_read_unlock(); 2386 2387 return ret; 2388 } 2389 2390 /* 2391 * For every allocation, we will try not to crash the VM if the 2392 * allocation failed. 2393 */ 2394 static int xbzrle_init(void) 2395 { 2396 Error *local_err = NULL; 2397 2398 if (!migrate_use_xbzrle()) { 2399 return 0; 2400 } 2401 2402 XBZRLE_cache_lock(); 2403 2404 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE); 2405 if (!XBZRLE.zero_target_page) { 2406 error_report("%s: Error allocating zero page", __func__); 2407 goto err_out; 2408 } 2409 2410 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(), 2411 TARGET_PAGE_SIZE, &local_err); 2412 if (!XBZRLE.cache) { 2413 error_report_err(local_err); 2414 goto free_zero_page; 2415 } 2416 2417 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE); 2418 if (!XBZRLE.encoded_buf) { 2419 error_report("%s: Error allocating encoded_buf", __func__); 2420 goto free_cache; 2421 } 2422 2423 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE); 2424 if (!XBZRLE.current_buf) { 2425 error_report("%s: Error allocating current_buf", __func__); 2426 goto free_encoded_buf; 2427 } 2428 2429 /* We are all good */ 2430 XBZRLE_cache_unlock(); 2431 return 0; 2432 2433 free_encoded_buf: 2434 g_free(XBZRLE.encoded_buf); 2435 XBZRLE.encoded_buf = NULL; 2436 free_cache: 2437 cache_fini(XBZRLE.cache); 2438 XBZRLE.cache = NULL; 2439 free_zero_page: 2440 g_free(XBZRLE.zero_target_page); 2441 XBZRLE.zero_target_page = NULL; 2442 err_out: 2443 XBZRLE_cache_unlock(); 2444 return -ENOMEM; 2445 } 2446 2447 static int ram_state_init(RAMState **rsp) 2448 { 2449 *rsp = g_try_new0(RAMState, 1); 2450 2451 if (!*rsp) { 2452 error_report("%s: Init ramstate fail", __func__); 2453 return -1; 2454 } 2455 2456 qemu_mutex_init(&(*rsp)->bitmap_mutex); 2457 qemu_mutex_init(&(*rsp)->src_page_req_mutex); 2458 QSIMPLEQ_INIT(&(*rsp)->src_page_requests); 2459 2460 /* 2461 * Count the total number of pages used by ram blocks not including any 2462 * gaps due to alignment or unplugs. 2463 */ 2464 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS; 2465 2466 ram_state_reset(*rsp); 2467 2468 return 0; 2469 } 2470 2471 static void ram_list_init_bitmaps(void) 2472 { 2473 RAMBlock *block; 2474 unsigned long pages; 2475 2476 /* Skip setting bitmap if there is no RAM */ 2477 if (ram_bytes_total()) { 2478 RAMBLOCK_FOREACH_MIGRATABLE(block) { 2479 pages = block->max_length >> TARGET_PAGE_BITS; 2480 block->bmap = bitmap_new(pages); 2481 bitmap_set(block->bmap, 0, pages); 2482 if (migrate_postcopy_ram()) { 2483 block->unsentmap = bitmap_new(pages); 2484 bitmap_set(block->unsentmap, 0, pages); 2485 } 2486 } 2487 } 2488 } 2489 2490 static void ram_init_bitmaps(RAMState *rs) 2491 { 2492 /* For memory_global_dirty_log_start below. */ 2493 qemu_mutex_lock_iothread(); 2494 qemu_mutex_lock_ramlist(); 2495 rcu_read_lock(); 2496 2497 ram_list_init_bitmaps(); 2498 memory_global_dirty_log_start(); 2499 migration_bitmap_sync(rs); 2500 2501 rcu_read_unlock(); 2502 qemu_mutex_unlock_ramlist(); 2503 qemu_mutex_unlock_iothread(); 2504 } 2505 2506 static int ram_init_all(RAMState **rsp) 2507 { 2508 if (ram_state_init(rsp)) { 2509 return -1; 2510 } 2511 2512 if (xbzrle_init()) { 2513 ram_state_cleanup(rsp); 2514 return -1; 2515 } 2516 2517 ram_init_bitmaps(*rsp); 2518 2519 return 0; 2520 } 2521 2522 static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out) 2523 { 2524 RAMBlock *block; 2525 uint64_t pages = 0; 2526 2527 /* 2528 * Postcopy is not using xbzrle/compression, so no need for that. 2529 * Also, since source are already halted, we don't need to care 2530 * about dirty page logging as well. 2531 */ 2532 2533 RAMBLOCK_FOREACH_MIGRATABLE(block) { 2534 pages += bitmap_count_one(block->bmap, 2535 block->used_length >> TARGET_PAGE_BITS); 2536 } 2537 2538 /* This may not be aligned with current bitmaps. Recalculate. */ 2539 rs->migration_dirty_pages = pages; 2540 2541 rs->last_seen_block = NULL; 2542 rs->last_sent_block = NULL; 2543 rs->last_page = 0; 2544 rs->last_version = ram_list.version; 2545 /* 2546 * Disable the bulk stage, otherwise we'll resend the whole RAM no 2547 * matter what we have sent. 2548 */ 2549 rs->ram_bulk_stage = false; 2550 2551 /* Update RAMState cache of output QEMUFile */ 2552 rs->f = out; 2553 2554 trace_ram_state_resume_prepare(pages); 2555 } 2556 2557 /* 2558 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has 2559 * long-running RCU critical section. When rcu-reclaims in the code 2560 * start to become numerous it will be necessary to reduce the 2561 * granularity of these critical sections. 2562 */ 2563 2564 /** 2565 * ram_save_setup: Setup RAM for migration 2566 * 2567 * Returns zero to indicate success and negative for error 2568 * 2569 * @f: QEMUFile where to send the data 2570 * @opaque: RAMState pointer 2571 */ 2572 static int ram_save_setup(QEMUFile *f, void *opaque) 2573 { 2574 RAMState **rsp = opaque; 2575 RAMBlock *block; 2576 2577 if (compress_threads_save_setup()) { 2578 return -1; 2579 } 2580 2581 /* migration has already setup the bitmap, reuse it. */ 2582 if (!migration_in_colo_state()) { 2583 if (ram_init_all(rsp) != 0) { 2584 compress_threads_save_cleanup(); 2585 return -1; 2586 } 2587 } 2588 (*rsp)->f = f; 2589 2590 rcu_read_lock(); 2591 2592 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); 2593 2594 RAMBLOCK_FOREACH_MIGRATABLE(block) { 2595 qemu_put_byte(f, strlen(block->idstr)); 2596 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); 2597 qemu_put_be64(f, block->used_length); 2598 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) { 2599 qemu_put_be64(f, block->page_size); 2600 } 2601 } 2602 2603 rcu_read_unlock(); 2604 2605 ram_control_before_iterate(f, RAM_CONTROL_SETUP); 2606 ram_control_after_iterate(f, RAM_CONTROL_SETUP); 2607 2608 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 2609 2610 return 0; 2611 } 2612 2613 /** 2614 * ram_save_iterate: iterative stage for migration 2615 * 2616 * Returns zero to indicate success and negative for error 2617 * 2618 * @f: QEMUFile where to send the data 2619 * @opaque: RAMState pointer 2620 */ 2621 static int ram_save_iterate(QEMUFile *f, void *opaque) 2622 { 2623 RAMState **temp = opaque; 2624 RAMState *rs = *temp; 2625 int ret; 2626 int i; 2627 int64_t t0; 2628 int done = 0; 2629 2630 if (blk_mig_bulk_active()) { 2631 /* Avoid transferring ram during bulk phase of block migration as 2632 * the bulk phase will usually take a long time and transferring 2633 * ram updates during that time is pointless. */ 2634 goto out; 2635 } 2636 2637 rcu_read_lock(); 2638 if (ram_list.version != rs->last_version) { 2639 ram_state_reset(rs); 2640 } 2641 2642 /* Read version before ram_list.blocks */ 2643 smp_rmb(); 2644 2645 ram_control_before_iterate(f, RAM_CONTROL_ROUND); 2646 2647 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 2648 i = 0; 2649 while ((ret = qemu_file_rate_limit(f)) == 0 || 2650 !QSIMPLEQ_EMPTY(&rs->src_page_requests)) { 2651 int pages; 2652 2653 if (qemu_file_get_error(f)) { 2654 break; 2655 } 2656 2657 pages = ram_find_and_save_block(rs, false); 2658 /* no more pages to sent */ 2659 if (pages == 0) { 2660 done = 1; 2661 break; 2662 } 2663 rs->iterations++; 2664 2665 /* we want to check in the 1st loop, just in case it was the 1st time 2666 and we had to sync the dirty bitmap. 2667 qemu_get_clock_ns() is a bit expensive, so we only check each some 2668 iterations 2669 */ 2670 if ((i & 63) == 0) { 2671 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000; 2672 if (t1 > MAX_WAIT) { 2673 trace_ram_save_iterate_big_wait(t1, i); 2674 break; 2675 } 2676 } 2677 i++; 2678 } 2679 flush_compressed_data(rs); 2680 rcu_read_unlock(); 2681 2682 /* 2683 * Must occur before EOS (or any QEMUFile operation) 2684 * because of RDMA protocol. 2685 */ 2686 ram_control_after_iterate(f, RAM_CONTROL_ROUND); 2687 2688 out: 2689 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 2690 ram_counters.transferred += 8; 2691 2692 ret = qemu_file_get_error(f); 2693 if (ret < 0) { 2694 return ret; 2695 } 2696 2697 return done; 2698 } 2699 2700 /** 2701 * ram_save_complete: function called to send the remaining amount of ram 2702 * 2703 * Returns zero to indicate success 2704 * 2705 * Called with iothread lock 2706 * 2707 * @f: QEMUFile where to send the data 2708 * @opaque: RAMState pointer 2709 */ 2710 static int ram_save_complete(QEMUFile *f, void *opaque) 2711 { 2712 RAMState **temp = opaque; 2713 RAMState *rs = *temp; 2714 2715 rcu_read_lock(); 2716 2717 if (!migration_in_postcopy()) { 2718 migration_bitmap_sync(rs); 2719 } 2720 2721 ram_control_before_iterate(f, RAM_CONTROL_FINISH); 2722 2723 /* try transferring iterative blocks of memory */ 2724 2725 /* flush all remaining blocks regardless of rate limiting */ 2726 while (true) { 2727 int pages; 2728 2729 pages = ram_find_and_save_block(rs, !migration_in_colo_state()); 2730 /* no more blocks to sent */ 2731 if (pages == 0) { 2732 break; 2733 } 2734 } 2735 2736 flush_compressed_data(rs); 2737 ram_control_after_iterate(f, RAM_CONTROL_FINISH); 2738 2739 rcu_read_unlock(); 2740 2741 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 2742 2743 return 0; 2744 } 2745 2746 static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, 2747 uint64_t *res_precopy_only, 2748 uint64_t *res_compatible, 2749 uint64_t *res_postcopy_only) 2750 { 2751 RAMState **temp = opaque; 2752 RAMState *rs = *temp; 2753 uint64_t remaining_size; 2754 2755 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; 2756 2757 if (!migration_in_postcopy() && 2758 remaining_size < max_size) { 2759 qemu_mutex_lock_iothread(); 2760 rcu_read_lock(); 2761 migration_bitmap_sync(rs); 2762 rcu_read_unlock(); 2763 qemu_mutex_unlock_iothread(); 2764 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; 2765 } 2766 2767 if (migrate_postcopy_ram()) { 2768 /* We can do postcopy, and all the data is postcopiable */ 2769 *res_compatible += remaining_size; 2770 } else { 2771 *res_precopy_only += remaining_size; 2772 } 2773 } 2774 2775 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) 2776 { 2777 unsigned int xh_len; 2778 int xh_flags; 2779 uint8_t *loaded_data; 2780 2781 /* extract RLE header */ 2782 xh_flags = qemu_get_byte(f); 2783 xh_len = qemu_get_be16(f); 2784 2785 if (xh_flags != ENCODING_FLAG_XBZRLE) { 2786 error_report("Failed to load XBZRLE page - wrong compression!"); 2787 return -1; 2788 } 2789 2790 if (xh_len > TARGET_PAGE_SIZE) { 2791 error_report("Failed to load XBZRLE page - len overflow!"); 2792 return -1; 2793 } 2794 loaded_data = XBZRLE.decoded_buf; 2795 /* load data and decode */ 2796 /* it can change loaded_data to point to an internal buffer */ 2797 qemu_get_buffer_in_place(f, &loaded_data, xh_len); 2798 2799 /* decode RLE */ 2800 if (xbzrle_decode_buffer(loaded_data, xh_len, host, 2801 TARGET_PAGE_SIZE) == -1) { 2802 error_report("Failed to load XBZRLE page - decode error!"); 2803 return -1; 2804 } 2805 2806 return 0; 2807 } 2808 2809 /** 2810 * ram_block_from_stream: read a RAMBlock id from the migration stream 2811 * 2812 * Must be called from within a rcu critical section. 2813 * 2814 * Returns a pointer from within the RCU-protected ram_list. 2815 * 2816 * @f: QEMUFile where to read the data from 2817 * @flags: Page flags (mostly to see if it's a continuation of previous block) 2818 */ 2819 static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags) 2820 { 2821 static RAMBlock *block = NULL; 2822 char id[256]; 2823 uint8_t len; 2824 2825 if (flags & RAM_SAVE_FLAG_CONTINUE) { 2826 if (!block) { 2827 error_report("Ack, bad migration stream!"); 2828 return NULL; 2829 } 2830 return block; 2831 } 2832 2833 len = qemu_get_byte(f); 2834 qemu_get_buffer(f, (uint8_t *)id, len); 2835 id[len] = 0; 2836 2837 block = qemu_ram_block_by_name(id); 2838 if (!block) { 2839 error_report("Can't find block %s", id); 2840 return NULL; 2841 } 2842 2843 if (!qemu_ram_is_migratable(block)) { 2844 error_report("block %s should not be migrated !", id); 2845 return NULL; 2846 } 2847 2848 return block; 2849 } 2850 2851 static inline void *host_from_ram_block_offset(RAMBlock *block, 2852 ram_addr_t offset) 2853 { 2854 if (!offset_in_ramblock(block, offset)) { 2855 return NULL; 2856 } 2857 2858 return block->host + offset; 2859 } 2860 2861 /** 2862 * ram_handle_compressed: handle the zero page case 2863 * 2864 * If a page (or a whole RDMA chunk) has been 2865 * determined to be zero, then zap it. 2866 * 2867 * @host: host address for the zero page 2868 * @ch: what the page is filled from. We only support zero 2869 * @size: size of the zero page 2870 */ 2871 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) 2872 { 2873 if (ch != 0 || !is_zero_range(host, size)) { 2874 memset(host, ch, size); 2875 } 2876 } 2877 2878 /* return the size after decompression, or negative value on error */ 2879 static int 2880 qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len, 2881 const uint8_t *source, size_t source_len) 2882 { 2883 int err; 2884 2885 err = inflateReset(stream); 2886 if (err != Z_OK) { 2887 return -1; 2888 } 2889 2890 stream->avail_in = source_len; 2891 stream->next_in = (uint8_t *)source; 2892 stream->avail_out = dest_len; 2893 stream->next_out = dest; 2894 2895 err = inflate(stream, Z_NO_FLUSH); 2896 if (err != Z_STREAM_END) { 2897 return -1; 2898 } 2899 2900 return stream->total_out; 2901 } 2902 2903 static void *do_data_decompress(void *opaque) 2904 { 2905 DecompressParam *param = opaque; 2906 unsigned long pagesize; 2907 uint8_t *des; 2908 int len, ret; 2909 2910 qemu_mutex_lock(¶m->mutex); 2911 while (!param->quit) { 2912 if (param->des) { 2913 des = param->des; 2914 len = param->len; 2915 param->des = 0; 2916 qemu_mutex_unlock(¶m->mutex); 2917 2918 pagesize = TARGET_PAGE_SIZE; 2919 2920 ret = qemu_uncompress_data(¶m->stream, des, pagesize, 2921 param->compbuf, len); 2922 if (ret < 0 && migrate_get_current()->decompress_error_check) { 2923 error_report("decompress data failed"); 2924 qemu_file_set_error(decomp_file, ret); 2925 } 2926 2927 qemu_mutex_lock(&decomp_done_lock); 2928 param->done = true; 2929 qemu_cond_signal(&decomp_done_cond); 2930 qemu_mutex_unlock(&decomp_done_lock); 2931 2932 qemu_mutex_lock(¶m->mutex); 2933 } else { 2934 qemu_cond_wait(¶m->cond, ¶m->mutex); 2935 } 2936 } 2937 qemu_mutex_unlock(¶m->mutex); 2938 2939 return NULL; 2940 } 2941 2942 static int wait_for_decompress_done(void) 2943 { 2944 int idx, thread_count; 2945 2946 if (!migrate_use_compression()) { 2947 return 0; 2948 } 2949 2950 thread_count = migrate_decompress_threads(); 2951 qemu_mutex_lock(&decomp_done_lock); 2952 for (idx = 0; idx < thread_count; idx++) { 2953 while (!decomp_param[idx].done) { 2954 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock); 2955 } 2956 } 2957 qemu_mutex_unlock(&decomp_done_lock); 2958 return qemu_file_get_error(decomp_file); 2959 } 2960 2961 static void compress_threads_load_cleanup(void) 2962 { 2963 int i, thread_count; 2964 2965 if (!migrate_use_compression()) { 2966 return; 2967 } 2968 thread_count = migrate_decompress_threads(); 2969 for (i = 0; i < thread_count; i++) { 2970 /* 2971 * we use it as a indicator which shows if the thread is 2972 * properly init'd or not 2973 */ 2974 if (!decomp_param[i].compbuf) { 2975 break; 2976 } 2977 2978 qemu_mutex_lock(&decomp_param[i].mutex); 2979 decomp_param[i].quit = true; 2980 qemu_cond_signal(&decomp_param[i].cond); 2981 qemu_mutex_unlock(&decomp_param[i].mutex); 2982 } 2983 for (i = 0; i < thread_count; i++) { 2984 if (!decomp_param[i].compbuf) { 2985 break; 2986 } 2987 2988 qemu_thread_join(decompress_threads + i); 2989 qemu_mutex_destroy(&decomp_param[i].mutex); 2990 qemu_cond_destroy(&decomp_param[i].cond); 2991 inflateEnd(&decomp_param[i].stream); 2992 g_free(decomp_param[i].compbuf); 2993 decomp_param[i].compbuf = NULL; 2994 } 2995 g_free(decompress_threads); 2996 g_free(decomp_param); 2997 decompress_threads = NULL; 2998 decomp_param = NULL; 2999 decomp_file = NULL; 3000 } 3001 3002 static int compress_threads_load_setup(QEMUFile *f) 3003 { 3004 int i, thread_count; 3005 3006 if (!migrate_use_compression()) { 3007 return 0; 3008 } 3009 3010 thread_count = migrate_decompress_threads(); 3011 decompress_threads = g_new0(QemuThread, thread_count); 3012 decomp_param = g_new0(DecompressParam, thread_count); 3013 qemu_mutex_init(&decomp_done_lock); 3014 qemu_cond_init(&decomp_done_cond); 3015 decomp_file = f; 3016 for (i = 0; i < thread_count; i++) { 3017 if (inflateInit(&decomp_param[i].stream) != Z_OK) { 3018 goto exit; 3019 } 3020 3021 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE)); 3022 qemu_mutex_init(&decomp_param[i].mutex); 3023 qemu_cond_init(&decomp_param[i].cond); 3024 decomp_param[i].done = true; 3025 decomp_param[i].quit = false; 3026 qemu_thread_create(decompress_threads + i, "decompress", 3027 do_data_decompress, decomp_param + i, 3028 QEMU_THREAD_JOINABLE); 3029 } 3030 return 0; 3031 exit: 3032 compress_threads_load_cleanup(); 3033 return -1; 3034 } 3035 3036 static void decompress_data_with_multi_threads(QEMUFile *f, 3037 void *host, int len) 3038 { 3039 int idx, thread_count; 3040 3041 thread_count = migrate_decompress_threads(); 3042 qemu_mutex_lock(&decomp_done_lock); 3043 while (true) { 3044 for (idx = 0; idx < thread_count; idx++) { 3045 if (decomp_param[idx].done) { 3046 decomp_param[idx].done = false; 3047 qemu_mutex_lock(&decomp_param[idx].mutex); 3048 qemu_get_buffer(f, decomp_param[idx].compbuf, len); 3049 decomp_param[idx].des = host; 3050 decomp_param[idx].len = len; 3051 qemu_cond_signal(&decomp_param[idx].cond); 3052 qemu_mutex_unlock(&decomp_param[idx].mutex); 3053 break; 3054 } 3055 } 3056 if (idx < thread_count) { 3057 break; 3058 } else { 3059 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock); 3060 } 3061 } 3062 qemu_mutex_unlock(&decomp_done_lock); 3063 } 3064 3065 /** 3066 * ram_load_setup: Setup RAM for migration incoming side 3067 * 3068 * Returns zero to indicate success and negative for error 3069 * 3070 * @f: QEMUFile where to receive the data 3071 * @opaque: RAMState pointer 3072 */ 3073 static int ram_load_setup(QEMUFile *f, void *opaque) 3074 { 3075 if (compress_threads_load_setup(f)) { 3076 return -1; 3077 } 3078 3079 xbzrle_load_setup(); 3080 ramblock_recv_map_init(); 3081 return 0; 3082 } 3083 3084 static int ram_load_cleanup(void *opaque) 3085 { 3086 RAMBlock *rb; 3087 xbzrle_load_cleanup(); 3088 compress_threads_load_cleanup(); 3089 3090 RAMBLOCK_FOREACH_MIGRATABLE(rb) { 3091 g_free(rb->receivedmap); 3092 rb->receivedmap = NULL; 3093 } 3094 return 0; 3095 } 3096 3097 /** 3098 * ram_postcopy_incoming_init: allocate postcopy data structures 3099 * 3100 * Returns 0 for success and negative if there was one error 3101 * 3102 * @mis: current migration incoming state 3103 * 3104 * Allocate data structures etc needed by incoming migration with 3105 * postcopy-ram. postcopy-ram's similarly names 3106 * postcopy_ram_incoming_init does the work. 3107 */ 3108 int ram_postcopy_incoming_init(MigrationIncomingState *mis) 3109 { 3110 unsigned long ram_pages = last_ram_page(); 3111 3112 return postcopy_ram_incoming_init(mis, ram_pages); 3113 } 3114 3115 /** 3116 * ram_load_postcopy: load a page in postcopy case 3117 * 3118 * Returns 0 for success or -errno in case of error 3119 * 3120 * Called in postcopy mode by ram_load(). 3121 * rcu_read_lock is taken prior to this being called. 3122 * 3123 * @f: QEMUFile where to send the data 3124 */ 3125 static int ram_load_postcopy(QEMUFile *f) 3126 { 3127 int flags = 0, ret = 0; 3128 bool place_needed = false; 3129 bool matching_page_sizes = false; 3130 MigrationIncomingState *mis = migration_incoming_get_current(); 3131 /* Temporary page that is later 'placed' */ 3132 void *postcopy_host_page = postcopy_get_tmp_page(mis); 3133 void *last_host = NULL; 3134 bool all_zero = false; 3135 3136 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { 3137 ram_addr_t addr; 3138 void *host = NULL; 3139 void *page_buffer = NULL; 3140 void *place_source = NULL; 3141 RAMBlock *block = NULL; 3142 uint8_t ch; 3143 3144 addr = qemu_get_be64(f); 3145 3146 /* 3147 * If qemu file error, we should stop here, and then "addr" 3148 * may be invalid 3149 */ 3150 ret = qemu_file_get_error(f); 3151 if (ret) { 3152 break; 3153 } 3154 3155 flags = addr & ~TARGET_PAGE_MASK; 3156 addr &= TARGET_PAGE_MASK; 3157 3158 trace_ram_load_postcopy_loop((uint64_t)addr, flags); 3159 place_needed = false; 3160 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) { 3161 block = ram_block_from_stream(f, flags); 3162 3163 host = host_from_ram_block_offset(block, addr); 3164 if (!host) { 3165 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); 3166 ret = -EINVAL; 3167 break; 3168 } 3169 matching_page_sizes = block->page_size == TARGET_PAGE_SIZE; 3170 /* 3171 * Postcopy requires that we place whole host pages atomically; 3172 * these may be huge pages for RAMBlocks that are backed by 3173 * hugetlbfs. 3174 * To make it atomic, the data is read into a temporary page 3175 * that's moved into place later. 3176 * The migration protocol uses, possibly smaller, target-pages 3177 * however the source ensures it always sends all the components 3178 * of a host page in order. 3179 */ 3180 page_buffer = postcopy_host_page + 3181 ((uintptr_t)host & (block->page_size - 1)); 3182 /* If all TP are zero then we can optimise the place */ 3183 if (!((uintptr_t)host & (block->page_size - 1))) { 3184 all_zero = true; 3185 } else { 3186 /* not the 1st TP within the HP */ 3187 if (host != (last_host + TARGET_PAGE_SIZE)) { 3188 error_report("Non-sequential target page %p/%p", 3189 host, last_host); 3190 ret = -EINVAL; 3191 break; 3192 } 3193 } 3194 3195 3196 /* 3197 * If it's the last part of a host page then we place the host 3198 * page 3199 */ 3200 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) & 3201 (block->page_size - 1)) == 0; 3202 place_source = postcopy_host_page; 3203 } 3204 last_host = host; 3205 3206 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { 3207 case RAM_SAVE_FLAG_ZERO: 3208 ch = qemu_get_byte(f); 3209 memset(page_buffer, ch, TARGET_PAGE_SIZE); 3210 if (ch) { 3211 all_zero = false; 3212 } 3213 break; 3214 3215 case RAM_SAVE_FLAG_PAGE: 3216 all_zero = false; 3217 if (!place_needed || !matching_page_sizes) { 3218 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE); 3219 } else { 3220 /* Avoids the qemu_file copy during postcopy, which is 3221 * going to do a copy later; can only do it when we 3222 * do this read in one go (matching page sizes) 3223 */ 3224 qemu_get_buffer_in_place(f, (uint8_t **)&place_source, 3225 TARGET_PAGE_SIZE); 3226 } 3227 break; 3228 case RAM_SAVE_FLAG_EOS: 3229 /* normal exit */ 3230 break; 3231 default: 3232 error_report("Unknown combination of migration flags: %#x" 3233 " (postcopy mode)", flags); 3234 ret = -EINVAL; 3235 break; 3236 } 3237 3238 /* Detect for any possible file errors */ 3239 if (!ret && qemu_file_get_error(f)) { 3240 ret = qemu_file_get_error(f); 3241 } 3242 3243 if (!ret && place_needed) { 3244 /* This gets called at the last target page in the host page */ 3245 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size; 3246 3247 if (all_zero) { 3248 ret = postcopy_place_page_zero(mis, place_dest, 3249 block); 3250 } else { 3251 ret = postcopy_place_page(mis, place_dest, 3252 place_source, block); 3253 } 3254 } 3255 } 3256 3257 return ret; 3258 } 3259 3260 static bool postcopy_is_advised(void) 3261 { 3262 PostcopyState ps = postcopy_state_get(); 3263 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END; 3264 } 3265 3266 static bool postcopy_is_running(void) 3267 { 3268 PostcopyState ps = postcopy_state_get(); 3269 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END; 3270 } 3271 3272 static int ram_load(QEMUFile *f, void *opaque, int version_id) 3273 { 3274 int flags = 0, ret = 0, invalid_flags = 0; 3275 static uint64_t seq_iter; 3276 int len = 0; 3277 /* 3278 * If system is running in postcopy mode, page inserts to host memory must 3279 * be atomic 3280 */ 3281 bool postcopy_running = postcopy_is_running(); 3282 /* ADVISE is earlier, it shows the source has the postcopy capability on */ 3283 bool postcopy_advised = postcopy_is_advised(); 3284 3285 seq_iter++; 3286 3287 if (version_id != 4) { 3288 ret = -EINVAL; 3289 } 3290 3291 if (!migrate_use_compression()) { 3292 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE; 3293 } 3294 /* This RCU critical section can be very long running. 3295 * When RCU reclaims in the code start to become numerous, 3296 * it will be necessary to reduce the granularity of this 3297 * critical section. 3298 */ 3299 rcu_read_lock(); 3300 3301 if (postcopy_running) { 3302 ret = ram_load_postcopy(f); 3303 } 3304 3305 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) { 3306 ram_addr_t addr, total_ram_bytes; 3307 void *host = NULL; 3308 uint8_t ch; 3309 3310 addr = qemu_get_be64(f); 3311 flags = addr & ~TARGET_PAGE_MASK; 3312 addr &= TARGET_PAGE_MASK; 3313 3314 if (flags & invalid_flags) { 3315 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) { 3316 error_report("Received an unexpected compressed page"); 3317 } 3318 3319 ret = -EINVAL; 3320 break; 3321 } 3322 3323 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE | 3324 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) { 3325 RAMBlock *block = ram_block_from_stream(f, flags); 3326 3327 host = host_from_ram_block_offset(block, addr); 3328 if (!host) { 3329 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); 3330 ret = -EINVAL; 3331 break; 3332 } 3333 ramblock_recv_bitmap_set(block, host); 3334 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host); 3335 } 3336 3337 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { 3338 case RAM_SAVE_FLAG_MEM_SIZE: 3339 /* Synchronize RAM block list */ 3340 total_ram_bytes = addr; 3341 while (!ret && total_ram_bytes) { 3342 RAMBlock *block; 3343 char id[256]; 3344 ram_addr_t length; 3345 3346 len = qemu_get_byte(f); 3347 qemu_get_buffer(f, (uint8_t *)id, len); 3348 id[len] = 0; 3349 length = qemu_get_be64(f); 3350 3351 block = qemu_ram_block_by_name(id); 3352 if (block && !qemu_ram_is_migratable(block)) { 3353 error_report("block %s should not be migrated !", id); 3354 ret = -EINVAL; 3355 } else if (block) { 3356 if (length != block->used_length) { 3357 Error *local_err = NULL; 3358 3359 ret = qemu_ram_resize(block, length, 3360 &local_err); 3361 if (local_err) { 3362 error_report_err(local_err); 3363 } 3364 } 3365 /* For postcopy we need to check hugepage sizes match */ 3366 if (postcopy_advised && 3367 block->page_size != qemu_host_page_size) { 3368 uint64_t remote_page_size = qemu_get_be64(f); 3369 if (remote_page_size != block->page_size) { 3370 error_report("Mismatched RAM page size %s " 3371 "(local) %zd != %" PRId64, 3372 id, block->page_size, 3373 remote_page_size); 3374 ret = -EINVAL; 3375 } 3376 } 3377 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG, 3378 block->idstr); 3379 } else { 3380 error_report("Unknown ramblock \"%s\", cannot " 3381 "accept migration", id); 3382 ret = -EINVAL; 3383 } 3384 3385 total_ram_bytes -= length; 3386 } 3387 break; 3388 3389 case RAM_SAVE_FLAG_ZERO: 3390 ch = qemu_get_byte(f); 3391 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE); 3392 break; 3393 3394 case RAM_SAVE_FLAG_PAGE: 3395 qemu_get_buffer(f, host, TARGET_PAGE_SIZE); 3396 break; 3397 3398 case RAM_SAVE_FLAG_COMPRESS_PAGE: 3399 len = qemu_get_be32(f); 3400 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) { 3401 error_report("Invalid compressed data length: %d", len); 3402 ret = -EINVAL; 3403 break; 3404 } 3405 decompress_data_with_multi_threads(f, host, len); 3406 break; 3407 3408 case RAM_SAVE_FLAG_XBZRLE: 3409 if (load_xbzrle(f, addr, host) < 0) { 3410 error_report("Failed to decompress XBZRLE page at " 3411 RAM_ADDR_FMT, addr); 3412 ret = -EINVAL; 3413 break; 3414 } 3415 break; 3416 case RAM_SAVE_FLAG_EOS: 3417 /* normal exit */ 3418 break; 3419 default: 3420 if (flags & RAM_SAVE_FLAG_HOOK) { 3421 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL); 3422 } else { 3423 error_report("Unknown combination of migration flags: %#x", 3424 flags); 3425 ret = -EINVAL; 3426 } 3427 } 3428 if (!ret) { 3429 ret = qemu_file_get_error(f); 3430 } 3431 } 3432 3433 ret |= wait_for_decompress_done(); 3434 rcu_read_unlock(); 3435 trace_ram_load_complete(ret, seq_iter); 3436 return ret; 3437 } 3438 3439 static bool ram_has_postcopy(void *opaque) 3440 { 3441 return migrate_postcopy_ram(); 3442 } 3443 3444 /* Sync all the dirty bitmap with destination VM. */ 3445 static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs) 3446 { 3447 RAMBlock *block; 3448 QEMUFile *file = s->to_dst_file; 3449 int ramblock_count = 0; 3450 3451 trace_ram_dirty_bitmap_sync_start(); 3452 3453 RAMBLOCK_FOREACH_MIGRATABLE(block) { 3454 qemu_savevm_send_recv_bitmap(file, block->idstr); 3455 trace_ram_dirty_bitmap_request(block->idstr); 3456 ramblock_count++; 3457 } 3458 3459 trace_ram_dirty_bitmap_sync_wait(); 3460 3461 /* Wait until all the ramblocks' dirty bitmap synced */ 3462 while (ramblock_count--) { 3463 qemu_sem_wait(&s->rp_state.rp_sem); 3464 } 3465 3466 trace_ram_dirty_bitmap_sync_complete(); 3467 3468 return 0; 3469 } 3470 3471 static void ram_dirty_bitmap_reload_notify(MigrationState *s) 3472 { 3473 qemu_sem_post(&s->rp_state.rp_sem); 3474 } 3475 3476 /* 3477 * Read the received bitmap, revert it as the initial dirty bitmap. 3478 * This is only used when the postcopy migration is paused but wants 3479 * to resume from a middle point. 3480 */ 3481 int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block) 3482 { 3483 int ret = -EINVAL; 3484 QEMUFile *file = s->rp_state.from_dst_file; 3485 unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS; 3486 uint64_t local_size = nbits / 8; 3487 uint64_t size, end_mark; 3488 3489 trace_ram_dirty_bitmap_reload_begin(block->idstr); 3490 3491 if (s->state != MIGRATION_STATUS_POSTCOPY_RECOVER) { 3492 error_report("%s: incorrect state %s", __func__, 3493 MigrationStatus_str(s->state)); 3494 return -EINVAL; 3495 } 3496 3497 /* 3498 * Note: see comments in ramblock_recv_bitmap_send() on why we 3499 * need the endianess convertion, and the paddings. 3500 */ 3501 local_size = ROUND_UP(local_size, 8); 3502 3503 /* Add paddings */ 3504 le_bitmap = bitmap_new(nbits + BITS_PER_LONG); 3505 3506 size = qemu_get_be64(file); 3507 3508 /* The size of the bitmap should match with our ramblock */ 3509 if (size != local_size) { 3510 error_report("%s: ramblock '%s' bitmap size mismatch " 3511 "(0x%"PRIx64" != 0x%"PRIx64")", __func__, 3512 block->idstr, size, local_size); 3513 ret = -EINVAL; 3514 goto out; 3515 } 3516 3517 size = qemu_get_buffer(file, (uint8_t *)le_bitmap, local_size); 3518 end_mark = qemu_get_be64(file); 3519 3520 ret = qemu_file_get_error(file); 3521 if (ret || size != local_size) { 3522 error_report("%s: read bitmap failed for ramblock '%s': %d" 3523 " (size 0x%"PRIx64", got: 0x%"PRIx64")", 3524 __func__, block->idstr, ret, local_size, size); 3525 ret = -EIO; 3526 goto out; 3527 } 3528 3529 if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) { 3530 error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIu64, 3531 __func__, block->idstr, end_mark); 3532 ret = -EINVAL; 3533 goto out; 3534 } 3535 3536 /* 3537 * Endianess convertion. We are during postcopy (though paused). 3538 * The dirty bitmap won't change. We can directly modify it. 3539 */ 3540 bitmap_from_le(block->bmap, le_bitmap, nbits); 3541 3542 /* 3543 * What we received is "received bitmap". Revert it as the initial 3544 * dirty bitmap for this ramblock. 3545 */ 3546 bitmap_complement(block->bmap, block->bmap, nbits); 3547 3548 trace_ram_dirty_bitmap_reload_complete(block->idstr); 3549 3550 /* 3551 * We succeeded to sync bitmap for current ramblock. If this is 3552 * the last one to sync, we need to notify the main send thread. 3553 */ 3554 ram_dirty_bitmap_reload_notify(s); 3555 3556 ret = 0; 3557 out: 3558 g_free(le_bitmap); 3559 return ret; 3560 } 3561 3562 static int ram_resume_prepare(MigrationState *s, void *opaque) 3563 { 3564 RAMState *rs = *(RAMState **)opaque; 3565 int ret; 3566 3567 ret = ram_dirty_bitmap_sync_all(s, rs); 3568 if (ret) { 3569 return ret; 3570 } 3571 3572 ram_state_resume_prepare(rs, s->to_dst_file); 3573 3574 return 0; 3575 } 3576 3577 static SaveVMHandlers savevm_ram_handlers = { 3578 .save_setup = ram_save_setup, 3579 .save_live_iterate = ram_save_iterate, 3580 .save_live_complete_postcopy = ram_save_complete, 3581 .save_live_complete_precopy = ram_save_complete, 3582 .has_postcopy = ram_has_postcopy, 3583 .save_live_pending = ram_save_pending, 3584 .load_state = ram_load, 3585 .save_cleanup = ram_save_cleanup, 3586 .load_setup = ram_load_setup, 3587 .load_cleanup = ram_load_cleanup, 3588 .resume_prepare = ram_resume_prepare, 3589 }; 3590 3591 void ram_mig_init(void) 3592 { 3593 qemu_mutex_init(&XBZRLE.lock); 3594 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state); 3595 } 3596