1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * Copyright (c) 2011-2015 Red Hat Inc 6 * 7 * Authors: 8 * Juan Quintela <quintela@redhat.com> 9 * 10 * Permission is hereby granted, free of charge, to any person obtaining a copy 11 * of this software and associated documentation files (the "Software"), to deal 12 * in the Software without restriction, including without limitation the rights 13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 * copies of the Software, and to permit persons to whom the Software is 15 * furnished to do so, subject to the following conditions: 16 * 17 * The above copyright notice and this permission notice shall be included in 18 * all copies or substantial portions of the Software. 19 * 20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 26 * THE SOFTWARE. 27 */ 28 #include "qemu/osdep.h" 29 #include "cpu.h" 30 #include <zlib.h> 31 #include "qapi-event.h" 32 #include "qemu/cutils.h" 33 #include "qemu/bitops.h" 34 #include "qemu/bitmap.h" 35 #include "qemu/main-loop.h" 36 #include "xbzrle.h" 37 #include "ram.h" 38 #include "migration.h" 39 #include "migration/register.h" 40 #include "migration/misc.h" 41 #include "qemu-file.h" 42 #include "postcopy-ram.h" 43 #include "migration/page_cache.h" 44 #include "qemu/error-report.h" 45 #include "qapi/qmp/qerror.h" 46 #include "trace.h" 47 #include "exec/ram_addr.h" 48 #include "exec/target_page.h" 49 #include "qemu/rcu_queue.h" 50 #include "migration/colo.h" 51 #include "migration/block.h" 52 53 /***********************************************************/ 54 /* ram save/restore */ 55 56 /* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it 57 * worked for pages that where filled with the same char. We switched 58 * it to only search for the zero value. And to avoid confusion with 59 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it. 60 */ 61 62 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */ 63 #define RAM_SAVE_FLAG_ZERO 0x02 64 #define RAM_SAVE_FLAG_MEM_SIZE 0x04 65 #define RAM_SAVE_FLAG_PAGE 0x08 66 #define RAM_SAVE_FLAG_EOS 0x10 67 #define RAM_SAVE_FLAG_CONTINUE 0x20 68 #define RAM_SAVE_FLAG_XBZRLE 0x40 69 /* 0x80 is reserved in migration.h start with 0x100 next */ 70 #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100 71 72 static inline bool is_zero_range(uint8_t *p, uint64_t size) 73 { 74 return buffer_is_zero(p, size); 75 } 76 77 XBZRLECacheStats xbzrle_counters; 78 79 /* struct contains XBZRLE cache and a static page 80 used by the compression */ 81 static struct { 82 /* buffer used for XBZRLE encoding */ 83 uint8_t *encoded_buf; 84 /* buffer for storing page content */ 85 uint8_t *current_buf; 86 /* Cache for XBZRLE, Protected by lock. */ 87 PageCache *cache; 88 QemuMutex lock; 89 /* it will store a page full of zeros */ 90 uint8_t *zero_target_page; 91 /* buffer used for XBZRLE decoding */ 92 uint8_t *decoded_buf; 93 } XBZRLE; 94 95 static void XBZRLE_cache_lock(void) 96 { 97 if (migrate_use_xbzrle()) 98 qemu_mutex_lock(&XBZRLE.lock); 99 } 100 101 static void XBZRLE_cache_unlock(void) 102 { 103 if (migrate_use_xbzrle()) 104 qemu_mutex_unlock(&XBZRLE.lock); 105 } 106 107 /** 108 * xbzrle_cache_resize: resize the xbzrle cache 109 * 110 * This function is called from qmp_migrate_set_cache_size in main 111 * thread, possibly while a migration is in progress. A running 112 * migration may be using the cache and might finish during this call, 113 * hence changes to the cache are protected by XBZRLE.lock(). 114 * 115 * Returns the new_size or negative in case of error. 116 * 117 * @new_size: new cache size 118 * @errp: set *errp if the check failed, with reason 119 */ 120 int64_t xbzrle_cache_resize(int64_t new_size, Error **errp) 121 { 122 PageCache *new_cache; 123 int64_t ret; 124 125 /* Check for truncation */ 126 if (new_size != (size_t)new_size) { 127 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", 128 "exceeding address space"); 129 return -1; 130 } 131 132 /* Cache should not be larger than guest ram size */ 133 if (new_size > ram_bytes_total()) { 134 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", 135 "exceeds guest ram size"); 136 return -1; 137 } 138 139 XBZRLE_cache_lock(); 140 141 if (XBZRLE.cache != NULL) { 142 if (pow2floor(new_size) == migrate_xbzrle_cache_size()) { 143 goto out_new_size; 144 } 145 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp); 146 if (!new_cache) { 147 ret = -1; 148 goto out; 149 } 150 151 cache_fini(XBZRLE.cache); 152 XBZRLE.cache = new_cache; 153 } 154 155 out_new_size: 156 ret = pow2floor(new_size); 157 out: 158 XBZRLE_cache_unlock(); 159 return ret; 160 } 161 162 static void ramblock_recv_map_init(void) 163 { 164 RAMBlock *rb; 165 166 RAMBLOCK_FOREACH(rb) { 167 assert(!rb->receivedmap); 168 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits()); 169 } 170 } 171 172 int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr) 173 { 174 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb), 175 rb->receivedmap); 176 } 177 178 void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr) 179 { 180 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap); 181 } 182 183 void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, 184 size_t nr) 185 { 186 bitmap_set_atomic(rb->receivedmap, 187 ramblock_recv_bitmap_offset(host_addr, rb), 188 nr); 189 } 190 191 /* 192 * An outstanding page request, on the source, having been received 193 * and queued 194 */ 195 struct RAMSrcPageRequest { 196 RAMBlock *rb; 197 hwaddr offset; 198 hwaddr len; 199 200 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req; 201 }; 202 203 /* State of RAM for migration */ 204 struct RAMState { 205 /* QEMUFile used for this migration */ 206 QEMUFile *f; 207 /* Last block that we have visited searching for dirty pages */ 208 RAMBlock *last_seen_block; 209 /* Last block from where we have sent data */ 210 RAMBlock *last_sent_block; 211 /* Last dirty target page we have sent */ 212 ram_addr_t last_page; 213 /* last ram version we have seen */ 214 uint32_t last_version; 215 /* We are in the first round */ 216 bool ram_bulk_stage; 217 /* How many times we have dirty too many pages */ 218 int dirty_rate_high_cnt; 219 /* these variables are used for bitmap sync */ 220 /* last time we did a full bitmap_sync */ 221 int64_t time_last_bitmap_sync; 222 /* bytes transferred at start_time */ 223 uint64_t bytes_xfer_prev; 224 /* number of dirty pages since start_time */ 225 uint64_t num_dirty_pages_period; 226 /* xbzrle misses since the beginning of the period */ 227 uint64_t xbzrle_cache_miss_prev; 228 /* number of iterations at the beginning of period */ 229 uint64_t iterations_prev; 230 /* Iterations since start */ 231 uint64_t iterations; 232 /* number of dirty bits in the bitmap */ 233 uint64_t migration_dirty_pages; 234 /* protects modification of the bitmap */ 235 QemuMutex bitmap_mutex; 236 /* The RAMBlock used in the last src_page_requests */ 237 RAMBlock *last_req_rb; 238 /* Queue of outstanding page requests from the destination */ 239 QemuMutex src_page_req_mutex; 240 QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests; 241 }; 242 typedef struct RAMState RAMState; 243 244 static RAMState *ram_state; 245 246 uint64_t ram_bytes_remaining(void) 247 { 248 return ram_state->migration_dirty_pages * TARGET_PAGE_SIZE; 249 } 250 251 MigrationStats ram_counters; 252 253 /* used by the search for pages to send */ 254 struct PageSearchStatus { 255 /* Current block being searched */ 256 RAMBlock *block; 257 /* Current page to search from */ 258 unsigned long page; 259 /* Set once we wrap around */ 260 bool complete_round; 261 }; 262 typedef struct PageSearchStatus PageSearchStatus; 263 264 struct CompressParam { 265 bool done; 266 bool quit; 267 QEMUFile *file; 268 QemuMutex mutex; 269 QemuCond cond; 270 RAMBlock *block; 271 ram_addr_t offset; 272 }; 273 typedef struct CompressParam CompressParam; 274 275 struct DecompressParam { 276 bool done; 277 bool quit; 278 QemuMutex mutex; 279 QemuCond cond; 280 void *des; 281 uint8_t *compbuf; 282 int len; 283 }; 284 typedef struct DecompressParam DecompressParam; 285 286 static CompressParam *comp_param; 287 static QemuThread *compress_threads; 288 /* comp_done_cond is used to wake up the migration thread when 289 * one of the compression threads has finished the compression. 290 * comp_done_lock is used to co-work with comp_done_cond. 291 */ 292 static QemuMutex comp_done_lock; 293 static QemuCond comp_done_cond; 294 /* The empty QEMUFileOps will be used by file in CompressParam */ 295 static const QEMUFileOps empty_ops = { }; 296 297 static DecompressParam *decomp_param; 298 static QemuThread *decompress_threads; 299 static QemuMutex decomp_done_lock; 300 static QemuCond decomp_done_cond; 301 302 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block, 303 ram_addr_t offset); 304 305 static void *do_data_compress(void *opaque) 306 { 307 CompressParam *param = opaque; 308 RAMBlock *block; 309 ram_addr_t offset; 310 311 qemu_mutex_lock(¶m->mutex); 312 while (!param->quit) { 313 if (param->block) { 314 block = param->block; 315 offset = param->offset; 316 param->block = NULL; 317 qemu_mutex_unlock(¶m->mutex); 318 319 do_compress_ram_page(param->file, block, offset); 320 321 qemu_mutex_lock(&comp_done_lock); 322 param->done = true; 323 qemu_cond_signal(&comp_done_cond); 324 qemu_mutex_unlock(&comp_done_lock); 325 326 qemu_mutex_lock(¶m->mutex); 327 } else { 328 qemu_cond_wait(¶m->cond, ¶m->mutex); 329 } 330 } 331 qemu_mutex_unlock(¶m->mutex); 332 333 return NULL; 334 } 335 336 static inline void terminate_compression_threads(void) 337 { 338 int idx, thread_count; 339 340 thread_count = migrate_compress_threads(); 341 342 for (idx = 0; idx < thread_count; idx++) { 343 qemu_mutex_lock(&comp_param[idx].mutex); 344 comp_param[idx].quit = true; 345 qemu_cond_signal(&comp_param[idx].cond); 346 qemu_mutex_unlock(&comp_param[idx].mutex); 347 } 348 } 349 350 static void compress_threads_save_cleanup(void) 351 { 352 int i, thread_count; 353 354 if (!migrate_use_compression()) { 355 return; 356 } 357 terminate_compression_threads(); 358 thread_count = migrate_compress_threads(); 359 for (i = 0; i < thread_count; i++) { 360 qemu_thread_join(compress_threads + i); 361 qemu_fclose(comp_param[i].file); 362 qemu_mutex_destroy(&comp_param[i].mutex); 363 qemu_cond_destroy(&comp_param[i].cond); 364 } 365 qemu_mutex_destroy(&comp_done_lock); 366 qemu_cond_destroy(&comp_done_cond); 367 g_free(compress_threads); 368 g_free(comp_param); 369 compress_threads = NULL; 370 comp_param = NULL; 371 } 372 373 static void compress_threads_save_setup(void) 374 { 375 int i, thread_count; 376 377 if (!migrate_use_compression()) { 378 return; 379 } 380 thread_count = migrate_compress_threads(); 381 compress_threads = g_new0(QemuThread, thread_count); 382 comp_param = g_new0(CompressParam, thread_count); 383 qemu_cond_init(&comp_done_cond); 384 qemu_mutex_init(&comp_done_lock); 385 for (i = 0; i < thread_count; i++) { 386 /* comp_param[i].file is just used as a dummy buffer to save data, 387 * set its ops to empty. 388 */ 389 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops); 390 comp_param[i].done = true; 391 comp_param[i].quit = false; 392 qemu_mutex_init(&comp_param[i].mutex); 393 qemu_cond_init(&comp_param[i].cond); 394 qemu_thread_create(compress_threads + i, "compress", 395 do_data_compress, comp_param + i, 396 QEMU_THREAD_JOINABLE); 397 } 398 } 399 400 /* Multiple fd's */ 401 402 struct MultiFDSendParams { 403 uint8_t id; 404 char *name; 405 QemuThread thread; 406 QemuSemaphore sem; 407 QemuMutex mutex; 408 bool quit; 409 }; 410 typedef struct MultiFDSendParams MultiFDSendParams; 411 412 struct { 413 MultiFDSendParams *params; 414 /* number of created threads */ 415 int count; 416 } *multifd_send_state; 417 418 static void terminate_multifd_send_threads(Error *errp) 419 { 420 int i; 421 422 for (i = 0; i < multifd_send_state->count; i++) { 423 MultiFDSendParams *p = &multifd_send_state->params[i]; 424 425 qemu_mutex_lock(&p->mutex); 426 p->quit = true; 427 qemu_sem_post(&p->sem); 428 qemu_mutex_unlock(&p->mutex); 429 } 430 } 431 432 int multifd_save_cleanup(Error **errp) 433 { 434 int i; 435 int ret = 0; 436 437 if (!migrate_use_multifd()) { 438 return 0; 439 } 440 terminate_multifd_send_threads(NULL); 441 for (i = 0; i < multifd_send_state->count; i++) { 442 MultiFDSendParams *p = &multifd_send_state->params[i]; 443 444 qemu_thread_join(&p->thread); 445 qemu_mutex_destroy(&p->mutex); 446 qemu_sem_destroy(&p->sem); 447 g_free(p->name); 448 p->name = NULL; 449 } 450 g_free(multifd_send_state->params); 451 multifd_send_state->params = NULL; 452 g_free(multifd_send_state); 453 multifd_send_state = NULL; 454 return ret; 455 } 456 457 static void *multifd_send_thread(void *opaque) 458 { 459 MultiFDSendParams *p = opaque; 460 461 while (true) { 462 qemu_mutex_lock(&p->mutex); 463 if (p->quit) { 464 qemu_mutex_unlock(&p->mutex); 465 break; 466 } 467 qemu_mutex_unlock(&p->mutex); 468 qemu_sem_wait(&p->sem); 469 } 470 471 return NULL; 472 } 473 474 int multifd_save_setup(void) 475 { 476 int thread_count; 477 uint8_t i; 478 479 if (!migrate_use_multifd()) { 480 return 0; 481 } 482 thread_count = migrate_multifd_channels(); 483 multifd_send_state = g_malloc0(sizeof(*multifd_send_state)); 484 multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); 485 multifd_send_state->count = 0; 486 for (i = 0; i < thread_count; i++) { 487 MultiFDSendParams *p = &multifd_send_state->params[i]; 488 489 qemu_mutex_init(&p->mutex); 490 qemu_sem_init(&p->sem, 0); 491 p->quit = false; 492 p->id = i; 493 p->name = g_strdup_printf("multifdsend_%d", i); 494 qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, 495 QEMU_THREAD_JOINABLE); 496 497 multifd_send_state->count++; 498 } 499 return 0; 500 } 501 502 struct MultiFDRecvParams { 503 uint8_t id; 504 char *name; 505 QemuThread thread; 506 QemuSemaphore sem; 507 QemuMutex mutex; 508 bool quit; 509 }; 510 typedef struct MultiFDRecvParams MultiFDRecvParams; 511 512 struct { 513 MultiFDRecvParams *params; 514 /* number of created threads */ 515 int count; 516 } *multifd_recv_state; 517 518 static void terminate_multifd_recv_threads(Error *errp) 519 { 520 int i; 521 522 for (i = 0; i < multifd_recv_state->count; i++) { 523 MultiFDRecvParams *p = &multifd_recv_state->params[i]; 524 525 qemu_mutex_lock(&p->mutex); 526 p->quit = true; 527 qemu_sem_post(&p->sem); 528 qemu_mutex_unlock(&p->mutex); 529 } 530 } 531 532 int multifd_load_cleanup(Error **errp) 533 { 534 int i; 535 int ret = 0; 536 537 if (!migrate_use_multifd()) { 538 return 0; 539 } 540 terminate_multifd_recv_threads(NULL); 541 for (i = 0; i < multifd_recv_state->count; i++) { 542 MultiFDRecvParams *p = &multifd_recv_state->params[i]; 543 544 qemu_thread_join(&p->thread); 545 qemu_mutex_destroy(&p->mutex); 546 qemu_sem_destroy(&p->sem); 547 g_free(p->name); 548 p->name = NULL; 549 } 550 g_free(multifd_recv_state->params); 551 multifd_recv_state->params = NULL; 552 g_free(multifd_recv_state); 553 multifd_recv_state = NULL; 554 555 return ret; 556 } 557 558 static void *multifd_recv_thread(void *opaque) 559 { 560 MultiFDRecvParams *p = opaque; 561 562 while (true) { 563 qemu_mutex_lock(&p->mutex); 564 if (p->quit) { 565 qemu_mutex_unlock(&p->mutex); 566 break; 567 } 568 qemu_mutex_unlock(&p->mutex); 569 qemu_sem_wait(&p->sem); 570 } 571 572 return NULL; 573 } 574 575 int multifd_load_setup(void) 576 { 577 int thread_count; 578 uint8_t i; 579 580 if (!migrate_use_multifd()) { 581 return 0; 582 } 583 thread_count = migrate_multifd_channels(); 584 multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state)); 585 multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count); 586 multifd_recv_state->count = 0; 587 for (i = 0; i < thread_count; i++) { 588 MultiFDRecvParams *p = &multifd_recv_state->params[i]; 589 590 qemu_mutex_init(&p->mutex); 591 qemu_sem_init(&p->sem, 0); 592 p->quit = false; 593 p->id = i; 594 p->name = g_strdup_printf("multifdrecv_%d", i); 595 qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p, 596 QEMU_THREAD_JOINABLE); 597 multifd_recv_state->count++; 598 } 599 return 0; 600 } 601 602 /** 603 * save_page_header: write page header to wire 604 * 605 * If this is the 1st block, it also writes the block identification 606 * 607 * Returns the number of bytes written 608 * 609 * @f: QEMUFile where to send the data 610 * @block: block that contains the page we want to send 611 * @offset: offset inside the block for the page 612 * in the lower bits, it contains flags 613 */ 614 static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block, 615 ram_addr_t offset) 616 { 617 size_t size, len; 618 619 if (block == rs->last_sent_block) { 620 offset |= RAM_SAVE_FLAG_CONTINUE; 621 } 622 qemu_put_be64(f, offset); 623 size = 8; 624 625 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) { 626 len = strlen(block->idstr); 627 qemu_put_byte(f, len); 628 qemu_put_buffer(f, (uint8_t *)block->idstr, len); 629 size += 1 + len; 630 rs->last_sent_block = block; 631 } 632 return size; 633 } 634 635 /** 636 * mig_throttle_guest_down: throotle down the guest 637 * 638 * Reduce amount of guest cpu execution to hopefully slow down memory 639 * writes. If guest dirty memory rate is reduced below the rate at 640 * which we can transfer pages to the destination then we should be 641 * able to complete migration. Some workloads dirty memory way too 642 * fast and will not effectively converge, even with auto-converge. 643 */ 644 static void mig_throttle_guest_down(void) 645 { 646 MigrationState *s = migrate_get_current(); 647 uint64_t pct_initial = s->parameters.cpu_throttle_initial; 648 uint64_t pct_icrement = s->parameters.cpu_throttle_increment; 649 650 /* We have not started throttling yet. Let's start it. */ 651 if (!cpu_throttle_active()) { 652 cpu_throttle_set(pct_initial); 653 } else { 654 /* Throttling already on, just increase the rate */ 655 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement); 656 } 657 } 658 659 /** 660 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache 661 * 662 * @rs: current RAM state 663 * @current_addr: address for the zero page 664 * 665 * Update the xbzrle cache to reflect a page that's been sent as all 0. 666 * The important thing is that a stale (not-yet-0'd) page be replaced 667 * by the new data. 668 * As a bonus, if the page wasn't in the cache it gets added so that 669 * when a small write is made into the 0'd page it gets XBZRLE sent. 670 */ 671 static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr) 672 { 673 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) { 674 return; 675 } 676 677 /* We don't care if this fails to allocate a new cache page 678 * as long as it updated an old one */ 679 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page, 680 ram_counters.dirty_sync_count); 681 } 682 683 #define ENCODING_FLAG_XBZRLE 0x1 684 685 /** 686 * save_xbzrle_page: compress and send current page 687 * 688 * Returns: 1 means that we wrote the page 689 * 0 means that page is identical to the one already sent 690 * -1 means that xbzrle would be longer than normal 691 * 692 * @rs: current RAM state 693 * @current_data: pointer to the address of the page contents 694 * @current_addr: addr of the page 695 * @block: block that contains the page we want to send 696 * @offset: offset inside the block for the page 697 * @last_stage: if we are at the completion stage 698 */ 699 static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, 700 ram_addr_t current_addr, RAMBlock *block, 701 ram_addr_t offset, bool last_stage) 702 { 703 int encoded_len = 0, bytes_xbzrle; 704 uint8_t *prev_cached_page; 705 706 if (!cache_is_cached(XBZRLE.cache, current_addr, 707 ram_counters.dirty_sync_count)) { 708 xbzrle_counters.cache_miss++; 709 if (!last_stage) { 710 if (cache_insert(XBZRLE.cache, current_addr, *current_data, 711 ram_counters.dirty_sync_count) == -1) { 712 return -1; 713 } else { 714 /* update *current_data when the page has been 715 inserted into cache */ 716 *current_data = get_cached_data(XBZRLE.cache, current_addr); 717 } 718 } 719 return -1; 720 } 721 722 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); 723 724 /* save current buffer into memory */ 725 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE); 726 727 /* XBZRLE encoding (if there is no overflow) */ 728 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, 729 TARGET_PAGE_SIZE, XBZRLE.encoded_buf, 730 TARGET_PAGE_SIZE); 731 if (encoded_len == 0) { 732 trace_save_xbzrle_page_skipping(); 733 return 0; 734 } else if (encoded_len == -1) { 735 trace_save_xbzrle_page_overflow(); 736 xbzrle_counters.overflow++; 737 /* update data in the cache */ 738 if (!last_stage) { 739 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE); 740 *current_data = prev_cached_page; 741 } 742 return -1; 743 } 744 745 /* we need to update the data in the cache, in order to get the same data */ 746 if (!last_stage) { 747 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); 748 } 749 750 /* Send XBZRLE based compressed page */ 751 bytes_xbzrle = save_page_header(rs, rs->f, block, 752 offset | RAM_SAVE_FLAG_XBZRLE); 753 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE); 754 qemu_put_be16(rs->f, encoded_len); 755 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len); 756 bytes_xbzrle += encoded_len + 1 + 2; 757 xbzrle_counters.pages++; 758 xbzrle_counters.bytes += bytes_xbzrle; 759 ram_counters.transferred += bytes_xbzrle; 760 761 return 1; 762 } 763 764 /** 765 * migration_bitmap_find_dirty: find the next dirty page from start 766 * 767 * Called with rcu_read_lock() to protect migration_bitmap 768 * 769 * Returns the byte offset within memory region of the start of a dirty page 770 * 771 * @rs: current RAM state 772 * @rb: RAMBlock where to search for dirty pages 773 * @start: page where we start the search 774 */ 775 static inline 776 unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb, 777 unsigned long start) 778 { 779 unsigned long size = rb->used_length >> TARGET_PAGE_BITS; 780 unsigned long *bitmap = rb->bmap; 781 unsigned long next; 782 783 if (rs->ram_bulk_stage && start > 0) { 784 next = start + 1; 785 } else { 786 next = find_next_bit(bitmap, size, start); 787 } 788 789 return next; 790 } 791 792 static inline bool migration_bitmap_clear_dirty(RAMState *rs, 793 RAMBlock *rb, 794 unsigned long page) 795 { 796 bool ret; 797 798 ret = test_and_clear_bit(page, rb->bmap); 799 800 if (ret) { 801 rs->migration_dirty_pages--; 802 } 803 return ret; 804 } 805 806 static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb, 807 ram_addr_t start, ram_addr_t length) 808 { 809 rs->migration_dirty_pages += 810 cpu_physical_memory_sync_dirty_bitmap(rb, start, length, 811 &rs->num_dirty_pages_period); 812 } 813 814 /** 815 * ram_pagesize_summary: calculate all the pagesizes of a VM 816 * 817 * Returns a summary bitmap of the page sizes of all RAMBlocks 818 * 819 * For VMs with just normal pages this is equivalent to the host page 820 * size. If it's got some huge pages then it's the OR of all the 821 * different page sizes. 822 */ 823 uint64_t ram_pagesize_summary(void) 824 { 825 RAMBlock *block; 826 uint64_t summary = 0; 827 828 RAMBLOCK_FOREACH(block) { 829 summary |= block->page_size; 830 } 831 832 return summary; 833 } 834 835 static void migration_bitmap_sync(RAMState *rs) 836 { 837 RAMBlock *block; 838 int64_t end_time; 839 uint64_t bytes_xfer_now; 840 841 ram_counters.dirty_sync_count++; 842 843 if (!rs->time_last_bitmap_sync) { 844 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 845 } 846 847 trace_migration_bitmap_sync_start(); 848 memory_global_dirty_log_sync(); 849 850 qemu_mutex_lock(&rs->bitmap_mutex); 851 rcu_read_lock(); 852 RAMBLOCK_FOREACH(block) { 853 migration_bitmap_sync_range(rs, block, 0, block->used_length); 854 } 855 rcu_read_unlock(); 856 qemu_mutex_unlock(&rs->bitmap_mutex); 857 858 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period); 859 860 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 861 862 /* more than 1 second = 1000 millisecons */ 863 if (end_time > rs->time_last_bitmap_sync + 1000) { 864 /* calculate period counters */ 865 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000 866 / (end_time - rs->time_last_bitmap_sync); 867 bytes_xfer_now = ram_counters.transferred; 868 869 /* During block migration the auto-converge logic incorrectly detects 870 * that ram migration makes no progress. Avoid this by disabling the 871 * throttling logic during the bulk phase of block migration. */ 872 if (migrate_auto_converge() && !blk_mig_bulk_active()) { 873 /* The following detection logic can be refined later. For now: 874 Check to see if the dirtied bytes is 50% more than the approx. 875 amount of bytes that just got transferred since the last time we 876 were in this routine. If that happens twice, start or increase 877 throttling */ 878 879 if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE > 880 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) && 881 (++rs->dirty_rate_high_cnt >= 2)) { 882 trace_migration_throttle(); 883 rs->dirty_rate_high_cnt = 0; 884 mig_throttle_guest_down(); 885 } 886 } 887 888 if (migrate_use_xbzrle()) { 889 if (rs->iterations_prev != rs->iterations) { 890 xbzrle_counters.cache_miss_rate = 891 (double)(xbzrle_counters.cache_miss - 892 rs->xbzrle_cache_miss_prev) / 893 (rs->iterations - rs->iterations_prev); 894 } 895 rs->iterations_prev = rs->iterations; 896 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss; 897 } 898 899 /* reset period counters */ 900 rs->time_last_bitmap_sync = end_time; 901 rs->num_dirty_pages_period = 0; 902 rs->bytes_xfer_prev = bytes_xfer_now; 903 } 904 if (migrate_use_events()) { 905 qapi_event_send_migration_pass(ram_counters.dirty_sync_count, NULL); 906 } 907 } 908 909 /** 910 * save_zero_page: send the zero page to the stream 911 * 912 * Returns the number of pages written. 913 * 914 * @rs: current RAM state 915 * @block: block that contains the page we want to send 916 * @offset: offset inside the block for the page 917 * @p: pointer to the page 918 */ 919 static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, 920 uint8_t *p) 921 { 922 int pages = -1; 923 924 if (is_zero_range(p, TARGET_PAGE_SIZE)) { 925 ram_counters.duplicate++; 926 ram_counters.transferred += 927 save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_ZERO); 928 qemu_put_byte(rs->f, 0); 929 ram_counters.transferred += 1; 930 pages = 1; 931 } 932 933 return pages; 934 } 935 936 static void ram_release_pages(const char *rbname, uint64_t offset, int pages) 937 { 938 if (!migrate_release_ram() || !migration_in_postcopy()) { 939 return; 940 } 941 942 ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS); 943 } 944 945 /** 946 * ram_save_page: send the given page to the stream 947 * 948 * Returns the number of pages written. 949 * < 0 - error 950 * >=0 - Number of pages written - this might legally be 0 951 * if xbzrle noticed the page was the same. 952 * 953 * @rs: current RAM state 954 * @block: block that contains the page we want to send 955 * @offset: offset inside the block for the page 956 * @last_stage: if we are at the completion stage 957 */ 958 static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) 959 { 960 int pages = -1; 961 uint64_t bytes_xmit; 962 ram_addr_t current_addr; 963 uint8_t *p; 964 int ret; 965 bool send_async = true; 966 RAMBlock *block = pss->block; 967 ram_addr_t offset = pss->page << TARGET_PAGE_BITS; 968 969 p = block->host + offset; 970 trace_ram_save_page(block->idstr, (uint64_t)offset, p); 971 972 /* In doubt sent page as normal */ 973 bytes_xmit = 0; 974 ret = ram_control_save_page(rs->f, block->offset, 975 offset, TARGET_PAGE_SIZE, &bytes_xmit); 976 if (bytes_xmit) { 977 ram_counters.transferred += bytes_xmit; 978 pages = 1; 979 } 980 981 XBZRLE_cache_lock(); 982 983 current_addr = block->offset + offset; 984 985 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { 986 if (ret != RAM_SAVE_CONTROL_DELAYED) { 987 if (bytes_xmit > 0) { 988 ram_counters.normal++; 989 } else if (bytes_xmit == 0) { 990 ram_counters.duplicate++; 991 } 992 } 993 } else { 994 pages = save_zero_page(rs, block, offset, p); 995 if (pages > 0) { 996 /* Must let xbzrle know, otherwise a previous (now 0'd) cached 997 * page would be stale 998 */ 999 xbzrle_cache_zero_page(rs, current_addr); 1000 ram_release_pages(block->idstr, offset, pages); 1001 } else if (!rs->ram_bulk_stage && 1002 !migration_in_postcopy() && migrate_use_xbzrle()) { 1003 pages = save_xbzrle_page(rs, &p, current_addr, block, 1004 offset, last_stage); 1005 if (!last_stage) { 1006 /* Can't send this cached data async, since the cache page 1007 * might get updated before it gets to the wire 1008 */ 1009 send_async = false; 1010 } 1011 } 1012 } 1013 1014 /* XBZRLE overflow or normal page */ 1015 if (pages == -1) { 1016 ram_counters.transferred += 1017 save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_PAGE); 1018 if (send_async) { 1019 qemu_put_buffer_async(rs->f, p, TARGET_PAGE_SIZE, 1020 migrate_release_ram() & 1021 migration_in_postcopy()); 1022 } else { 1023 qemu_put_buffer(rs->f, p, TARGET_PAGE_SIZE); 1024 } 1025 ram_counters.transferred += TARGET_PAGE_SIZE; 1026 pages = 1; 1027 ram_counters.normal++; 1028 } 1029 1030 XBZRLE_cache_unlock(); 1031 1032 return pages; 1033 } 1034 1035 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block, 1036 ram_addr_t offset) 1037 { 1038 RAMState *rs = ram_state; 1039 int bytes_sent, blen; 1040 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK); 1041 1042 bytes_sent = save_page_header(rs, f, block, offset | 1043 RAM_SAVE_FLAG_COMPRESS_PAGE); 1044 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE, 1045 migrate_compress_level()); 1046 if (blen < 0) { 1047 bytes_sent = 0; 1048 qemu_file_set_error(migrate_get_current()->to_dst_file, blen); 1049 error_report("compressed data failed!"); 1050 } else { 1051 bytes_sent += blen; 1052 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1); 1053 } 1054 1055 return bytes_sent; 1056 } 1057 1058 static void flush_compressed_data(RAMState *rs) 1059 { 1060 int idx, len, thread_count; 1061 1062 if (!migrate_use_compression()) { 1063 return; 1064 } 1065 thread_count = migrate_compress_threads(); 1066 1067 qemu_mutex_lock(&comp_done_lock); 1068 for (idx = 0; idx < thread_count; idx++) { 1069 while (!comp_param[idx].done) { 1070 qemu_cond_wait(&comp_done_cond, &comp_done_lock); 1071 } 1072 } 1073 qemu_mutex_unlock(&comp_done_lock); 1074 1075 for (idx = 0; idx < thread_count; idx++) { 1076 qemu_mutex_lock(&comp_param[idx].mutex); 1077 if (!comp_param[idx].quit) { 1078 len = qemu_put_qemu_file(rs->f, comp_param[idx].file); 1079 ram_counters.transferred += len; 1080 } 1081 qemu_mutex_unlock(&comp_param[idx].mutex); 1082 } 1083 } 1084 1085 static inline void set_compress_params(CompressParam *param, RAMBlock *block, 1086 ram_addr_t offset) 1087 { 1088 param->block = block; 1089 param->offset = offset; 1090 } 1091 1092 static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block, 1093 ram_addr_t offset) 1094 { 1095 int idx, thread_count, bytes_xmit = -1, pages = -1; 1096 1097 thread_count = migrate_compress_threads(); 1098 qemu_mutex_lock(&comp_done_lock); 1099 while (true) { 1100 for (idx = 0; idx < thread_count; idx++) { 1101 if (comp_param[idx].done) { 1102 comp_param[idx].done = false; 1103 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file); 1104 qemu_mutex_lock(&comp_param[idx].mutex); 1105 set_compress_params(&comp_param[idx], block, offset); 1106 qemu_cond_signal(&comp_param[idx].cond); 1107 qemu_mutex_unlock(&comp_param[idx].mutex); 1108 pages = 1; 1109 ram_counters.normal++; 1110 ram_counters.transferred += bytes_xmit; 1111 break; 1112 } 1113 } 1114 if (pages > 0) { 1115 break; 1116 } else { 1117 qemu_cond_wait(&comp_done_cond, &comp_done_lock); 1118 } 1119 } 1120 qemu_mutex_unlock(&comp_done_lock); 1121 1122 return pages; 1123 } 1124 1125 /** 1126 * ram_save_compressed_page: compress the given page and send it to the stream 1127 * 1128 * Returns the number of pages written. 1129 * 1130 * @rs: current RAM state 1131 * @block: block that contains the page we want to send 1132 * @offset: offset inside the block for the page 1133 * @last_stage: if we are at the completion stage 1134 */ 1135 static int ram_save_compressed_page(RAMState *rs, PageSearchStatus *pss, 1136 bool last_stage) 1137 { 1138 int pages = -1; 1139 uint64_t bytes_xmit = 0; 1140 uint8_t *p; 1141 int ret, blen; 1142 RAMBlock *block = pss->block; 1143 ram_addr_t offset = pss->page << TARGET_PAGE_BITS; 1144 1145 p = block->host + offset; 1146 1147 ret = ram_control_save_page(rs->f, block->offset, 1148 offset, TARGET_PAGE_SIZE, &bytes_xmit); 1149 if (bytes_xmit) { 1150 ram_counters.transferred += bytes_xmit; 1151 pages = 1; 1152 } 1153 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { 1154 if (ret != RAM_SAVE_CONTROL_DELAYED) { 1155 if (bytes_xmit > 0) { 1156 ram_counters.normal++; 1157 } else if (bytes_xmit == 0) { 1158 ram_counters.duplicate++; 1159 } 1160 } 1161 } else { 1162 /* When starting the process of a new block, the first page of 1163 * the block should be sent out before other pages in the same 1164 * block, and all the pages in last block should have been sent 1165 * out, keeping this order is important, because the 'cont' flag 1166 * is used to avoid resending the block name. 1167 */ 1168 if (block != rs->last_sent_block) { 1169 flush_compressed_data(rs); 1170 pages = save_zero_page(rs, block, offset, p); 1171 if (pages == -1) { 1172 /* Make sure the first page is sent out before other pages */ 1173 bytes_xmit = save_page_header(rs, rs->f, block, offset | 1174 RAM_SAVE_FLAG_COMPRESS_PAGE); 1175 blen = qemu_put_compression_data(rs->f, p, TARGET_PAGE_SIZE, 1176 migrate_compress_level()); 1177 if (blen > 0) { 1178 ram_counters.transferred += bytes_xmit + blen; 1179 ram_counters.normal++; 1180 pages = 1; 1181 } else { 1182 qemu_file_set_error(rs->f, blen); 1183 error_report("compressed data failed!"); 1184 } 1185 } 1186 if (pages > 0) { 1187 ram_release_pages(block->idstr, offset, pages); 1188 } 1189 } else { 1190 pages = save_zero_page(rs, block, offset, p); 1191 if (pages == -1) { 1192 pages = compress_page_with_multi_thread(rs, block, offset); 1193 } else { 1194 ram_release_pages(block->idstr, offset, pages); 1195 } 1196 } 1197 } 1198 1199 return pages; 1200 } 1201 1202 /** 1203 * find_dirty_block: find the next dirty page and update any state 1204 * associated with the search process. 1205 * 1206 * Returns if a page is found 1207 * 1208 * @rs: current RAM state 1209 * @pss: data about the state of the current dirty page scan 1210 * @again: set to false if the search has scanned the whole of RAM 1211 */ 1212 static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again) 1213 { 1214 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page); 1215 if (pss->complete_round && pss->block == rs->last_seen_block && 1216 pss->page >= rs->last_page) { 1217 /* 1218 * We've been once around the RAM and haven't found anything. 1219 * Give up. 1220 */ 1221 *again = false; 1222 return false; 1223 } 1224 if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) { 1225 /* Didn't find anything in this RAM Block */ 1226 pss->page = 0; 1227 pss->block = QLIST_NEXT_RCU(pss->block, next); 1228 if (!pss->block) { 1229 /* Hit the end of the list */ 1230 pss->block = QLIST_FIRST_RCU(&ram_list.blocks); 1231 /* Flag that we've looped */ 1232 pss->complete_round = true; 1233 rs->ram_bulk_stage = false; 1234 if (migrate_use_xbzrle()) { 1235 /* If xbzrle is on, stop using the data compression at this 1236 * point. In theory, xbzrle can do better than compression. 1237 */ 1238 flush_compressed_data(rs); 1239 } 1240 } 1241 /* Didn't find anything this time, but try again on the new block */ 1242 *again = true; 1243 return false; 1244 } else { 1245 /* Can go around again, but... */ 1246 *again = true; 1247 /* We've found something so probably don't need to */ 1248 return true; 1249 } 1250 } 1251 1252 /** 1253 * unqueue_page: gets a page of the queue 1254 * 1255 * Helper for 'get_queued_page' - gets a page off the queue 1256 * 1257 * Returns the block of the page (or NULL if none available) 1258 * 1259 * @rs: current RAM state 1260 * @offset: used to return the offset within the RAMBlock 1261 */ 1262 static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset) 1263 { 1264 RAMBlock *block = NULL; 1265 1266 qemu_mutex_lock(&rs->src_page_req_mutex); 1267 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) { 1268 struct RAMSrcPageRequest *entry = 1269 QSIMPLEQ_FIRST(&rs->src_page_requests); 1270 block = entry->rb; 1271 *offset = entry->offset; 1272 1273 if (entry->len > TARGET_PAGE_SIZE) { 1274 entry->len -= TARGET_PAGE_SIZE; 1275 entry->offset += TARGET_PAGE_SIZE; 1276 } else { 1277 memory_region_unref(block->mr); 1278 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req); 1279 g_free(entry); 1280 } 1281 } 1282 qemu_mutex_unlock(&rs->src_page_req_mutex); 1283 1284 return block; 1285 } 1286 1287 /** 1288 * get_queued_page: unqueue a page from the postocpy requests 1289 * 1290 * Skips pages that are already sent (!dirty) 1291 * 1292 * Returns if a queued page is found 1293 * 1294 * @rs: current RAM state 1295 * @pss: data about the state of the current dirty page scan 1296 */ 1297 static bool get_queued_page(RAMState *rs, PageSearchStatus *pss) 1298 { 1299 RAMBlock *block; 1300 ram_addr_t offset; 1301 bool dirty; 1302 1303 do { 1304 block = unqueue_page(rs, &offset); 1305 /* 1306 * We're sending this page, and since it's postcopy nothing else 1307 * will dirty it, and we must make sure it doesn't get sent again 1308 * even if this queue request was received after the background 1309 * search already sent it. 1310 */ 1311 if (block) { 1312 unsigned long page; 1313 1314 page = offset >> TARGET_PAGE_BITS; 1315 dirty = test_bit(page, block->bmap); 1316 if (!dirty) { 1317 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset, 1318 page, test_bit(page, block->unsentmap)); 1319 } else { 1320 trace_get_queued_page(block->idstr, (uint64_t)offset, page); 1321 } 1322 } 1323 1324 } while (block && !dirty); 1325 1326 if (block) { 1327 /* 1328 * As soon as we start servicing pages out of order, then we have 1329 * to kill the bulk stage, since the bulk stage assumes 1330 * in (migration_bitmap_find_and_reset_dirty) that every page is 1331 * dirty, that's no longer true. 1332 */ 1333 rs->ram_bulk_stage = false; 1334 1335 /* 1336 * We want the background search to continue from the queued page 1337 * since the guest is likely to want other pages near to the page 1338 * it just requested. 1339 */ 1340 pss->block = block; 1341 pss->page = offset >> TARGET_PAGE_BITS; 1342 } 1343 1344 return !!block; 1345 } 1346 1347 /** 1348 * migration_page_queue_free: drop any remaining pages in the ram 1349 * request queue 1350 * 1351 * It should be empty at the end anyway, but in error cases there may 1352 * be some left. in case that there is any page left, we drop it. 1353 * 1354 */ 1355 static void migration_page_queue_free(RAMState *rs) 1356 { 1357 struct RAMSrcPageRequest *mspr, *next_mspr; 1358 /* This queue generally should be empty - but in the case of a failed 1359 * migration might have some droppings in. 1360 */ 1361 rcu_read_lock(); 1362 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) { 1363 memory_region_unref(mspr->rb->mr); 1364 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req); 1365 g_free(mspr); 1366 } 1367 rcu_read_unlock(); 1368 } 1369 1370 /** 1371 * ram_save_queue_pages: queue the page for transmission 1372 * 1373 * A request from postcopy destination for example. 1374 * 1375 * Returns zero on success or negative on error 1376 * 1377 * @rbname: Name of the RAMBLock of the request. NULL means the 1378 * same that last one. 1379 * @start: starting address from the start of the RAMBlock 1380 * @len: length (in bytes) to send 1381 */ 1382 int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) 1383 { 1384 RAMBlock *ramblock; 1385 RAMState *rs = ram_state; 1386 1387 ram_counters.postcopy_requests++; 1388 rcu_read_lock(); 1389 if (!rbname) { 1390 /* Reuse last RAMBlock */ 1391 ramblock = rs->last_req_rb; 1392 1393 if (!ramblock) { 1394 /* 1395 * Shouldn't happen, we can't reuse the last RAMBlock if 1396 * it's the 1st request. 1397 */ 1398 error_report("ram_save_queue_pages no previous block"); 1399 goto err; 1400 } 1401 } else { 1402 ramblock = qemu_ram_block_by_name(rbname); 1403 1404 if (!ramblock) { 1405 /* We shouldn't be asked for a non-existent RAMBlock */ 1406 error_report("ram_save_queue_pages no block '%s'", rbname); 1407 goto err; 1408 } 1409 rs->last_req_rb = ramblock; 1410 } 1411 trace_ram_save_queue_pages(ramblock->idstr, start, len); 1412 if (start+len > ramblock->used_length) { 1413 error_report("%s request overrun start=" RAM_ADDR_FMT " len=" 1414 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT, 1415 __func__, start, len, ramblock->used_length); 1416 goto err; 1417 } 1418 1419 struct RAMSrcPageRequest *new_entry = 1420 g_malloc0(sizeof(struct RAMSrcPageRequest)); 1421 new_entry->rb = ramblock; 1422 new_entry->offset = start; 1423 new_entry->len = len; 1424 1425 memory_region_ref(ramblock->mr); 1426 qemu_mutex_lock(&rs->src_page_req_mutex); 1427 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req); 1428 qemu_mutex_unlock(&rs->src_page_req_mutex); 1429 rcu_read_unlock(); 1430 1431 return 0; 1432 1433 err: 1434 rcu_read_unlock(); 1435 return -1; 1436 } 1437 1438 /** 1439 * ram_save_target_page: save one target page 1440 * 1441 * Returns the number of pages written 1442 * 1443 * @rs: current RAM state 1444 * @ms: current migration state 1445 * @pss: data about the page we want to send 1446 * @last_stage: if we are at the completion stage 1447 */ 1448 static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss, 1449 bool last_stage) 1450 { 1451 int res = 0; 1452 1453 /* Check the pages is dirty and if it is send it */ 1454 if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) { 1455 /* 1456 * If xbzrle is on, stop using the data compression after first 1457 * round of migration even if compression is enabled. In theory, 1458 * xbzrle can do better than compression. 1459 */ 1460 if (migrate_use_compression() && 1461 (rs->ram_bulk_stage || !migrate_use_xbzrle())) { 1462 res = ram_save_compressed_page(rs, pss, last_stage); 1463 } else { 1464 res = ram_save_page(rs, pss, last_stage); 1465 } 1466 1467 if (res < 0) { 1468 return res; 1469 } 1470 if (pss->block->unsentmap) { 1471 clear_bit(pss->page, pss->block->unsentmap); 1472 } 1473 } 1474 1475 return res; 1476 } 1477 1478 /** 1479 * ram_save_host_page: save a whole host page 1480 * 1481 * Starting at *offset send pages up to the end of the current host 1482 * page. It's valid for the initial offset to point into the middle of 1483 * a host page in which case the remainder of the hostpage is sent. 1484 * Only dirty target pages are sent. Note that the host page size may 1485 * be a huge page for this block. 1486 * The saving stops at the boundary of the used_length of the block 1487 * if the RAMBlock isn't a multiple of the host page size. 1488 * 1489 * Returns the number of pages written or negative on error 1490 * 1491 * @rs: current RAM state 1492 * @ms: current migration state 1493 * @pss: data about the page we want to send 1494 * @last_stage: if we are at the completion stage 1495 */ 1496 static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, 1497 bool last_stage) 1498 { 1499 int tmppages, pages = 0; 1500 size_t pagesize_bits = 1501 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS; 1502 1503 do { 1504 tmppages = ram_save_target_page(rs, pss, last_stage); 1505 if (tmppages < 0) { 1506 return tmppages; 1507 } 1508 1509 pages += tmppages; 1510 pss->page++; 1511 } while ((pss->page & (pagesize_bits - 1)) && 1512 offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS)); 1513 1514 /* The offset we leave with is the last one we looked at */ 1515 pss->page--; 1516 return pages; 1517 } 1518 1519 /** 1520 * ram_find_and_save_block: finds a dirty page and sends it to f 1521 * 1522 * Called within an RCU critical section. 1523 * 1524 * Returns the number of pages written where zero means no dirty pages 1525 * 1526 * @rs: current RAM state 1527 * @last_stage: if we are at the completion stage 1528 * 1529 * On systems where host-page-size > target-page-size it will send all the 1530 * pages in a host page that are dirty. 1531 */ 1532 1533 static int ram_find_and_save_block(RAMState *rs, bool last_stage) 1534 { 1535 PageSearchStatus pss; 1536 int pages = 0; 1537 bool again, found; 1538 1539 /* No dirty page as there is zero RAM */ 1540 if (!ram_bytes_total()) { 1541 return pages; 1542 } 1543 1544 pss.block = rs->last_seen_block; 1545 pss.page = rs->last_page; 1546 pss.complete_round = false; 1547 1548 if (!pss.block) { 1549 pss.block = QLIST_FIRST_RCU(&ram_list.blocks); 1550 } 1551 1552 do { 1553 again = true; 1554 found = get_queued_page(rs, &pss); 1555 1556 if (!found) { 1557 /* priority queue empty, so just search for something dirty */ 1558 found = find_dirty_block(rs, &pss, &again); 1559 } 1560 1561 if (found) { 1562 pages = ram_save_host_page(rs, &pss, last_stage); 1563 } 1564 } while (!pages && again); 1565 1566 rs->last_seen_block = pss.block; 1567 rs->last_page = pss.page; 1568 1569 return pages; 1570 } 1571 1572 void acct_update_position(QEMUFile *f, size_t size, bool zero) 1573 { 1574 uint64_t pages = size / TARGET_PAGE_SIZE; 1575 1576 if (zero) { 1577 ram_counters.duplicate += pages; 1578 } else { 1579 ram_counters.normal += pages; 1580 ram_counters.transferred += size; 1581 qemu_update_position(f, size); 1582 } 1583 } 1584 1585 uint64_t ram_bytes_total(void) 1586 { 1587 RAMBlock *block; 1588 uint64_t total = 0; 1589 1590 rcu_read_lock(); 1591 RAMBLOCK_FOREACH(block) { 1592 total += block->used_length; 1593 } 1594 rcu_read_unlock(); 1595 return total; 1596 } 1597 1598 static void xbzrle_load_setup(void) 1599 { 1600 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE); 1601 } 1602 1603 static void xbzrle_load_cleanup(void) 1604 { 1605 g_free(XBZRLE.decoded_buf); 1606 XBZRLE.decoded_buf = NULL; 1607 } 1608 1609 static void ram_state_cleanup(RAMState **rsp) 1610 { 1611 migration_page_queue_free(*rsp); 1612 qemu_mutex_destroy(&(*rsp)->bitmap_mutex); 1613 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex); 1614 g_free(*rsp); 1615 *rsp = NULL; 1616 } 1617 1618 static void xbzrle_cleanup(void) 1619 { 1620 XBZRLE_cache_lock(); 1621 if (XBZRLE.cache) { 1622 cache_fini(XBZRLE.cache); 1623 g_free(XBZRLE.encoded_buf); 1624 g_free(XBZRLE.current_buf); 1625 g_free(XBZRLE.zero_target_page); 1626 XBZRLE.cache = NULL; 1627 XBZRLE.encoded_buf = NULL; 1628 XBZRLE.current_buf = NULL; 1629 XBZRLE.zero_target_page = NULL; 1630 } 1631 XBZRLE_cache_unlock(); 1632 } 1633 1634 static void ram_save_cleanup(void *opaque) 1635 { 1636 RAMState **rsp = opaque; 1637 RAMBlock *block; 1638 1639 /* caller have hold iothread lock or is in a bh, so there is 1640 * no writing race against this migration_bitmap 1641 */ 1642 memory_global_dirty_log_stop(); 1643 1644 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { 1645 g_free(block->bmap); 1646 block->bmap = NULL; 1647 g_free(block->unsentmap); 1648 block->unsentmap = NULL; 1649 } 1650 1651 xbzrle_cleanup(); 1652 compress_threads_save_cleanup(); 1653 ram_state_cleanup(rsp); 1654 } 1655 1656 static void ram_state_reset(RAMState *rs) 1657 { 1658 rs->last_seen_block = NULL; 1659 rs->last_sent_block = NULL; 1660 rs->last_page = 0; 1661 rs->last_version = ram_list.version; 1662 rs->ram_bulk_stage = true; 1663 } 1664 1665 #define MAX_WAIT 50 /* ms, half buffered_file limit */ 1666 1667 /* 1668 * 'expected' is the value you expect the bitmap mostly to be full 1669 * of; it won't bother printing lines that are all this value. 1670 * If 'todump' is null the migration bitmap is dumped. 1671 */ 1672 void ram_debug_dump_bitmap(unsigned long *todump, bool expected, 1673 unsigned long pages) 1674 { 1675 int64_t cur; 1676 int64_t linelen = 128; 1677 char linebuf[129]; 1678 1679 for (cur = 0; cur < pages; cur += linelen) { 1680 int64_t curb; 1681 bool found = false; 1682 /* 1683 * Last line; catch the case where the line length 1684 * is longer than remaining ram 1685 */ 1686 if (cur + linelen > pages) { 1687 linelen = pages - cur; 1688 } 1689 for (curb = 0; curb < linelen; curb++) { 1690 bool thisbit = test_bit(cur + curb, todump); 1691 linebuf[curb] = thisbit ? '1' : '.'; 1692 found = found || (thisbit != expected); 1693 } 1694 if (found) { 1695 linebuf[curb] = '\0'; 1696 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf); 1697 } 1698 } 1699 } 1700 1701 /* **** functions for postcopy ***** */ 1702 1703 void ram_postcopy_migrated_memory_release(MigrationState *ms) 1704 { 1705 struct RAMBlock *block; 1706 1707 RAMBLOCK_FOREACH(block) { 1708 unsigned long *bitmap = block->bmap; 1709 unsigned long range = block->used_length >> TARGET_PAGE_BITS; 1710 unsigned long run_start = find_next_zero_bit(bitmap, range, 0); 1711 1712 while (run_start < range) { 1713 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1); 1714 ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS, 1715 (run_end - run_start) << TARGET_PAGE_BITS); 1716 run_start = find_next_zero_bit(bitmap, range, run_end + 1); 1717 } 1718 } 1719 } 1720 1721 /** 1722 * postcopy_send_discard_bm_ram: discard a RAMBlock 1723 * 1724 * Returns zero on success 1725 * 1726 * Callback from postcopy_each_ram_send_discard for each RAMBlock 1727 * Note: At this point the 'unsentmap' is the processed bitmap combined 1728 * with the dirtymap; so a '1' means it's either dirty or unsent. 1729 * 1730 * @ms: current migration state 1731 * @pds: state for postcopy 1732 * @start: RAMBlock starting page 1733 * @length: RAMBlock size 1734 */ 1735 static int postcopy_send_discard_bm_ram(MigrationState *ms, 1736 PostcopyDiscardState *pds, 1737 RAMBlock *block) 1738 { 1739 unsigned long end = block->used_length >> TARGET_PAGE_BITS; 1740 unsigned long current; 1741 unsigned long *unsentmap = block->unsentmap; 1742 1743 for (current = 0; current < end; ) { 1744 unsigned long one = find_next_bit(unsentmap, end, current); 1745 1746 if (one <= end) { 1747 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1); 1748 unsigned long discard_length; 1749 1750 if (zero >= end) { 1751 discard_length = end - one; 1752 } else { 1753 discard_length = zero - one; 1754 } 1755 if (discard_length) { 1756 postcopy_discard_send_range(ms, pds, one, discard_length); 1757 } 1758 current = one + discard_length; 1759 } else { 1760 current = one; 1761 } 1762 } 1763 1764 return 0; 1765 } 1766 1767 /** 1768 * postcopy_each_ram_send_discard: discard all RAMBlocks 1769 * 1770 * Returns 0 for success or negative for error 1771 * 1772 * Utility for the outgoing postcopy code. 1773 * Calls postcopy_send_discard_bm_ram for each RAMBlock 1774 * passing it bitmap indexes and name. 1775 * (qemu_ram_foreach_block ends up passing unscaled lengths 1776 * which would mean postcopy code would have to deal with target page) 1777 * 1778 * @ms: current migration state 1779 */ 1780 static int postcopy_each_ram_send_discard(MigrationState *ms) 1781 { 1782 struct RAMBlock *block; 1783 int ret; 1784 1785 RAMBLOCK_FOREACH(block) { 1786 PostcopyDiscardState *pds = 1787 postcopy_discard_send_init(ms, block->idstr); 1788 1789 /* 1790 * Postcopy sends chunks of bitmap over the wire, but it 1791 * just needs indexes at this point, avoids it having 1792 * target page specific code. 1793 */ 1794 ret = postcopy_send_discard_bm_ram(ms, pds, block); 1795 postcopy_discard_send_finish(ms, pds); 1796 if (ret) { 1797 return ret; 1798 } 1799 } 1800 1801 return 0; 1802 } 1803 1804 /** 1805 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages 1806 * 1807 * Helper for postcopy_chunk_hostpages; it's called twice to 1808 * canonicalize the two bitmaps, that are similar, but one is 1809 * inverted. 1810 * 1811 * Postcopy requires that all target pages in a hostpage are dirty or 1812 * clean, not a mix. This function canonicalizes the bitmaps. 1813 * 1814 * @ms: current migration state 1815 * @unsent_pass: if true we need to canonicalize partially unsent host pages 1816 * otherwise we need to canonicalize partially dirty host pages 1817 * @block: block that contains the page we want to canonicalize 1818 * @pds: state for postcopy 1819 */ 1820 static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, 1821 RAMBlock *block, 1822 PostcopyDiscardState *pds) 1823 { 1824 RAMState *rs = ram_state; 1825 unsigned long *bitmap = block->bmap; 1826 unsigned long *unsentmap = block->unsentmap; 1827 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE; 1828 unsigned long pages = block->used_length >> TARGET_PAGE_BITS; 1829 unsigned long run_start; 1830 1831 if (block->page_size == TARGET_PAGE_SIZE) { 1832 /* Easy case - TPS==HPS for a non-huge page RAMBlock */ 1833 return; 1834 } 1835 1836 if (unsent_pass) { 1837 /* Find a sent page */ 1838 run_start = find_next_zero_bit(unsentmap, pages, 0); 1839 } else { 1840 /* Find a dirty page */ 1841 run_start = find_next_bit(bitmap, pages, 0); 1842 } 1843 1844 while (run_start < pages) { 1845 bool do_fixup = false; 1846 unsigned long fixup_start_addr; 1847 unsigned long host_offset; 1848 1849 /* 1850 * If the start of this run of pages is in the middle of a host 1851 * page, then we need to fixup this host page. 1852 */ 1853 host_offset = run_start % host_ratio; 1854 if (host_offset) { 1855 do_fixup = true; 1856 run_start -= host_offset; 1857 fixup_start_addr = run_start; 1858 /* For the next pass */ 1859 run_start = run_start + host_ratio; 1860 } else { 1861 /* Find the end of this run */ 1862 unsigned long run_end; 1863 if (unsent_pass) { 1864 run_end = find_next_bit(unsentmap, pages, run_start + 1); 1865 } else { 1866 run_end = find_next_zero_bit(bitmap, pages, run_start + 1); 1867 } 1868 /* 1869 * If the end isn't at the start of a host page, then the 1870 * run doesn't finish at the end of a host page 1871 * and we need to discard. 1872 */ 1873 host_offset = run_end % host_ratio; 1874 if (host_offset) { 1875 do_fixup = true; 1876 fixup_start_addr = run_end - host_offset; 1877 /* 1878 * This host page has gone, the next loop iteration starts 1879 * from after the fixup 1880 */ 1881 run_start = fixup_start_addr + host_ratio; 1882 } else { 1883 /* 1884 * No discards on this iteration, next loop starts from 1885 * next sent/dirty page 1886 */ 1887 run_start = run_end + 1; 1888 } 1889 } 1890 1891 if (do_fixup) { 1892 unsigned long page; 1893 1894 /* Tell the destination to discard this page */ 1895 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) { 1896 /* For the unsent_pass we: 1897 * discard partially sent pages 1898 * For the !unsent_pass (dirty) we: 1899 * discard partially dirty pages that were sent 1900 * (any partially sent pages were already discarded 1901 * by the previous unsent_pass) 1902 */ 1903 postcopy_discard_send_range(ms, pds, fixup_start_addr, 1904 host_ratio); 1905 } 1906 1907 /* Clean up the bitmap */ 1908 for (page = fixup_start_addr; 1909 page < fixup_start_addr + host_ratio; page++) { 1910 /* All pages in this host page are now not sent */ 1911 set_bit(page, unsentmap); 1912 1913 /* 1914 * Remark them as dirty, updating the count for any pages 1915 * that weren't previously dirty. 1916 */ 1917 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap); 1918 } 1919 } 1920 1921 if (unsent_pass) { 1922 /* Find the next sent page for the next iteration */ 1923 run_start = find_next_zero_bit(unsentmap, pages, run_start); 1924 } else { 1925 /* Find the next dirty page for the next iteration */ 1926 run_start = find_next_bit(bitmap, pages, run_start); 1927 } 1928 } 1929 } 1930 1931 /** 1932 * postcopy_chuck_hostpages: discrad any partially sent host page 1933 * 1934 * Utility for the outgoing postcopy code. 1935 * 1936 * Discard any partially sent host-page size chunks, mark any partially 1937 * dirty host-page size chunks as all dirty. In this case the host-page 1938 * is the host-page for the particular RAMBlock, i.e. it might be a huge page 1939 * 1940 * Returns zero on success 1941 * 1942 * @ms: current migration state 1943 * @block: block we want to work with 1944 */ 1945 static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block) 1946 { 1947 PostcopyDiscardState *pds = 1948 postcopy_discard_send_init(ms, block->idstr); 1949 1950 /* First pass: Discard all partially sent host pages */ 1951 postcopy_chunk_hostpages_pass(ms, true, block, pds); 1952 /* 1953 * Second pass: Ensure that all partially dirty host pages are made 1954 * fully dirty. 1955 */ 1956 postcopy_chunk_hostpages_pass(ms, false, block, pds); 1957 1958 postcopy_discard_send_finish(ms, pds); 1959 return 0; 1960 } 1961 1962 /** 1963 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap 1964 * 1965 * Returns zero on success 1966 * 1967 * Transmit the set of pages to be discarded after precopy to the target 1968 * these are pages that: 1969 * a) Have been previously transmitted but are now dirty again 1970 * b) Pages that have never been transmitted, this ensures that 1971 * any pages on the destination that have been mapped by background 1972 * tasks get discarded (transparent huge pages is the specific concern) 1973 * Hopefully this is pretty sparse 1974 * 1975 * @ms: current migration state 1976 */ 1977 int ram_postcopy_send_discard_bitmap(MigrationState *ms) 1978 { 1979 RAMState *rs = ram_state; 1980 RAMBlock *block; 1981 int ret; 1982 1983 rcu_read_lock(); 1984 1985 /* This should be our last sync, the src is now paused */ 1986 migration_bitmap_sync(rs); 1987 1988 /* Easiest way to make sure we don't resume in the middle of a host-page */ 1989 rs->last_seen_block = NULL; 1990 rs->last_sent_block = NULL; 1991 rs->last_page = 0; 1992 1993 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { 1994 unsigned long pages = block->used_length >> TARGET_PAGE_BITS; 1995 unsigned long *bitmap = block->bmap; 1996 unsigned long *unsentmap = block->unsentmap; 1997 1998 if (!unsentmap) { 1999 /* We don't have a safe way to resize the sentmap, so 2000 * if the bitmap was resized it will be NULL at this 2001 * point. 2002 */ 2003 error_report("migration ram resized during precopy phase"); 2004 rcu_read_unlock(); 2005 return -EINVAL; 2006 } 2007 /* Deal with TPS != HPS and huge pages */ 2008 ret = postcopy_chunk_hostpages(ms, block); 2009 if (ret) { 2010 rcu_read_unlock(); 2011 return ret; 2012 } 2013 2014 /* 2015 * Update the unsentmap to be unsentmap = unsentmap | dirty 2016 */ 2017 bitmap_or(unsentmap, unsentmap, bitmap, pages); 2018 #ifdef DEBUG_POSTCOPY 2019 ram_debug_dump_bitmap(unsentmap, true, pages); 2020 #endif 2021 } 2022 trace_ram_postcopy_send_discard_bitmap(); 2023 2024 ret = postcopy_each_ram_send_discard(ms); 2025 rcu_read_unlock(); 2026 2027 return ret; 2028 } 2029 2030 /** 2031 * ram_discard_range: discard dirtied pages at the beginning of postcopy 2032 * 2033 * Returns zero on success 2034 * 2035 * @rbname: name of the RAMBlock of the request. NULL means the 2036 * same that last one. 2037 * @start: RAMBlock starting page 2038 * @length: RAMBlock size 2039 */ 2040 int ram_discard_range(const char *rbname, uint64_t start, size_t length) 2041 { 2042 int ret = -1; 2043 2044 trace_ram_discard_range(rbname, start, length); 2045 2046 rcu_read_lock(); 2047 RAMBlock *rb = qemu_ram_block_by_name(rbname); 2048 2049 if (!rb) { 2050 error_report("ram_discard_range: Failed to find block '%s'", rbname); 2051 goto err; 2052 } 2053 2054 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(), 2055 length >> qemu_target_page_bits()); 2056 ret = ram_block_discard_range(rb, start, length); 2057 2058 err: 2059 rcu_read_unlock(); 2060 2061 return ret; 2062 } 2063 2064 /* 2065 * For every allocation, we will try not to crash the VM if the 2066 * allocation failed. 2067 */ 2068 static int xbzrle_init(void) 2069 { 2070 Error *local_err = NULL; 2071 2072 if (!migrate_use_xbzrle()) { 2073 return 0; 2074 } 2075 2076 XBZRLE_cache_lock(); 2077 2078 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE); 2079 if (!XBZRLE.zero_target_page) { 2080 error_report("%s: Error allocating zero page", __func__); 2081 goto err_out; 2082 } 2083 2084 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(), 2085 TARGET_PAGE_SIZE, &local_err); 2086 if (!XBZRLE.cache) { 2087 error_report_err(local_err); 2088 goto free_zero_page; 2089 } 2090 2091 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE); 2092 if (!XBZRLE.encoded_buf) { 2093 error_report("%s: Error allocating encoded_buf", __func__); 2094 goto free_cache; 2095 } 2096 2097 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE); 2098 if (!XBZRLE.current_buf) { 2099 error_report("%s: Error allocating current_buf", __func__); 2100 goto free_encoded_buf; 2101 } 2102 2103 /* We are all good */ 2104 XBZRLE_cache_unlock(); 2105 return 0; 2106 2107 free_encoded_buf: 2108 g_free(XBZRLE.encoded_buf); 2109 XBZRLE.encoded_buf = NULL; 2110 free_cache: 2111 cache_fini(XBZRLE.cache); 2112 XBZRLE.cache = NULL; 2113 free_zero_page: 2114 g_free(XBZRLE.zero_target_page); 2115 XBZRLE.zero_target_page = NULL; 2116 err_out: 2117 XBZRLE_cache_unlock(); 2118 return -ENOMEM; 2119 } 2120 2121 static int ram_state_init(RAMState **rsp) 2122 { 2123 *rsp = g_try_new0(RAMState, 1); 2124 2125 if (!*rsp) { 2126 error_report("%s: Init ramstate fail", __func__); 2127 return -1; 2128 } 2129 2130 qemu_mutex_init(&(*rsp)->bitmap_mutex); 2131 qemu_mutex_init(&(*rsp)->src_page_req_mutex); 2132 QSIMPLEQ_INIT(&(*rsp)->src_page_requests); 2133 2134 /* 2135 * Count the total number of pages used by ram blocks not including any 2136 * gaps due to alignment or unplugs. 2137 */ 2138 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS; 2139 2140 ram_state_reset(*rsp); 2141 2142 return 0; 2143 } 2144 2145 static void ram_list_init_bitmaps(void) 2146 { 2147 RAMBlock *block; 2148 unsigned long pages; 2149 2150 /* Skip setting bitmap if there is no RAM */ 2151 if (ram_bytes_total()) { 2152 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { 2153 pages = block->max_length >> TARGET_PAGE_BITS; 2154 block->bmap = bitmap_new(pages); 2155 bitmap_set(block->bmap, 0, pages); 2156 if (migrate_postcopy_ram()) { 2157 block->unsentmap = bitmap_new(pages); 2158 bitmap_set(block->unsentmap, 0, pages); 2159 } 2160 } 2161 } 2162 } 2163 2164 static void ram_init_bitmaps(RAMState *rs) 2165 { 2166 /* For memory_global_dirty_log_start below. */ 2167 qemu_mutex_lock_iothread(); 2168 qemu_mutex_lock_ramlist(); 2169 rcu_read_lock(); 2170 2171 ram_list_init_bitmaps(); 2172 memory_global_dirty_log_start(); 2173 migration_bitmap_sync(rs); 2174 2175 rcu_read_unlock(); 2176 qemu_mutex_unlock_ramlist(); 2177 qemu_mutex_unlock_iothread(); 2178 } 2179 2180 static int ram_init_all(RAMState **rsp) 2181 { 2182 if (ram_state_init(rsp)) { 2183 return -1; 2184 } 2185 2186 if (xbzrle_init()) { 2187 ram_state_cleanup(rsp); 2188 return -1; 2189 } 2190 2191 ram_init_bitmaps(*rsp); 2192 2193 return 0; 2194 } 2195 2196 /* 2197 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has 2198 * long-running RCU critical section. When rcu-reclaims in the code 2199 * start to become numerous it will be necessary to reduce the 2200 * granularity of these critical sections. 2201 */ 2202 2203 /** 2204 * ram_save_setup: Setup RAM for migration 2205 * 2206 * Returns zero to indicate success and negative for error 2207 * 2208 * @f: QEMUFile where to send the data 2209 * @opaque: RAMState pointer 2210 */ 2211 static int ram_save_setup(QEMUFile *f, void *opaque) 2212 { 2213 RAMState **rsp = opaque; 2214 RAMBlock *block; 2215 2216 /* migration has already setup the bitmap, reuse it. */ 2217 if (!migration_in_colo_state()) { 2218 if (ram_init_all(rsp) != 0) { 2219 return -1; 2220 } 2221 } 2222 (*rsp)->f = f; 2223 2224 rcu_read_lock(); 2225 2226 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); 2227 2228 RAMBLOCK_FOREACH(block) { 2229 qemu_put_byte(f, strlen(block->idstr)); 2230 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); 2231 qemu_put_be64(f, block->used_length); 2232 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) { 2233 qemu_put_be64(f, block->page_size); 2234 } 2235 } 2236 2237 rcu_read_unlock(); 2238 compress_threads_save_setup(); 2239 2240 ram_control_before_iterate(f, RAM_CONTROL_SETUP); 2241 ram_control_after_iterate(f, RAM_CONTROL_SETUP); 2242 2243 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 2244 2245 return 0; 2246 } 2247 2248 /** 2249 * ram_save_iterate: iterative stage for migration 2250 * 2251 * Returns zero to indicate success and negative for error 2252 * 2253 * @f: QEMUFile where to send the data 2254 * @opaque: RAMState pointer 2255 */ 2256 static int ram_save_iterate(QEMUFile *f, void *opaque) 2257 { 2258 RAMState **temp = opaque; 2259 RAMState *rs = *temp; 2260 int ret; 2261 int i; 2262 int64_t t0; 2263 int done = 0; 2264 2265 rcu_read_lock(); 2266 if (ram_list.version != rs->last_version) { 2267 ram_state_reset(rs); 2268 } 2269 2270 /* Read version before ram_list.blocks */ 2271 smp_rmb(); 2272 2273 ram_control_before_iterate(f, RAM_CONTROL_ROUND); 2274 2275 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 2276 i = 0; 2277 while ((ret = qemu_file_rate_limit(f)) == 0) { 2278 int pages; 2279 2280 pages = ram_find_and_save_block(rs, false); 2281 /* no more pages to sent */ 2282 if (pages == 0) { 2283 done = 1; 2284 break; 2285 } 2286 rs->iterations++; 2287 2288 /* we want to check in the 1st loop, just in case it was the 1st time 2289 and we had to sync the dirty bitmap. 2290 qemu_get_clock_ns() is a bit expensive, so we only check each some 2291 iterations 2292 */ 2293 if ((i & 63) == 0) { 2294 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000; 2295 if (t1 > MAX_WAIT) { 2296 trace_ram_save_iterate_big_wait(t1, i); 2297 break; 2298 } 2299 } 2300 i++; 2301 } 2302 flush_compressed_data(rs); 2303 rcu_read_unlock(); 2304 2305 /* 2306 * Must occur before EOS (or any QEMUFile operation) 2307 * because of RDMA protocol. 2308 */ 2309 ram_control_after_iterate(f, RAM_CONTROL_ROUND); 2310 2311 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 2312 ram_counters.transferred += 8; 2313 2314 ret = qemu_file_get_error(f); 2315 if (ret < 0) { 2316 return ret; 2317 } 2318 2319 return done; 2320 } 2321 2322 /** 2323 * ram_save_complete: function called to send the remaining amount of ram 2324 * 2325 * Returns zero to indicate success 2326 * 2327 * Called with iothread lock 2328 * 2329 * @f: QEMUFile where to send the data 2330 * @opaque: RAMState pointer 2331 */ 2332 static int ram_save_complete(QEMUFile *f, void *opaque) 2333 { 2334 RAMState **temp = opaque; 2335 RAMState *rs = *temp; 2336 2337 rcu_read_lock(); 2338 2339 if (!migration_in_postcopy()) { 2340 migration_bitmap_sync(rs); 2341 } 2342 2343 ram_control_before_iterate(f, RAM_CONTROL_FINISH); 2344 2345 /* try transferring iterative blocks of memory */ 2346 2347 /* flush all remaining blocks regardless of rate limiting */ 2348 while (true) { 2349 int pages; 2350 2351 pages = ram_find_and_save_block(rs, !migration_in_colo_state()); 2352 /* no more blocks to sent */ 2353 if (pages == 0) { 2354 break; 2355 } 2356 } 2357 2358 flush_compressed_data(rs); 2359 ram_control_after_iterate(f, RAM_CONTROL_FINISH); 2360 2361 rcu_read_unlock(); 2362 2363 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 2364 2365 return 0; 2366 } 2367 2368 static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, 2369 uint64_t *non_postcopiable_pending, 2370 uint64_t *postcopiable_pending) 2371 { 2372 RAMState **temp = opaque; 2373 RAMState *rs = *temp; 2374 uint64_t remaining_size; 2375 2376 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; 2377 2378 if (!migration_in_postcopy() && 2379 remaining_size < max_size) { 2380 qemu_mutex_lock_iothread(); 2381 rcu_read_lock(); 2382 migration_bitmap_sync(rs); 2383 rcu_read_unlock(); 2384 qemu_mutex_unlock_iothread(); 2385 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; 2386 } 2387 2388 if (migrate_postcopy_ram()) { 2389 /* We can do postcopy, and all the data is postcopiable */ 2390 *postcopiable_pending += remaining_size; 2391 } else { 2392 *non_postcopiable_pending += remaining_size; 2393 } 2394 } 2395 2396 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) 2397 { 2398 unsigned int xh_len; 2399 int xh_flags; 2400 uint8_t *loaded_data; 2401 2402 /* extract RLE header */ 2403 xh_flags = qemu_get_byte(f); 2404 xh_len = qemu_get_be16(f); 2405 2406 if (xh_flags != ENCODING_FLAG_XBZRLE) { 2407 error_report("Failed to load XBZRLE page - wrong compression!"); 2408 return -1; 2409 } 2410 2411 if (xh_len > TARGET_PAGE_SIZE) { 2412 error_report("Failed to load XBZRLE page - len overflow!"); 2413 return -1; 2414 } 2415 loaded_data = XBZRLE.decoded_buf; 2416 /* load data and decode */ 2417 /* it can change loaded_data to point to an internal buffer */ 2418 qemu_get_buffer_in_place(f, &loaded_data, xh_len); 2419 2420 /* decode RLE */ 2421 if (xbzrle_decode_buffer(loaded_data, xh_len, host, 2422 TARGET_PAGE_SIZE) == -1) { 2423 error_report("Failed to load XBZRLE page - decode error!"); 2424 return -1; 2425 } 2426 2427 return 0; 2428 } 2429 2430 /** 2431 * ram_block_from_stream: read a RAMBlock id from the migration stream 2432 * 2433 * Must be called from within a rcu critical section. 2434 * 2435 * Returns a pointer from within the RCU-protected ram_list. 2436 * 2437 * @f: QEMUFile where to read the data from 2438 * @flags: Page flags (mostly to see if it's a continuation of previous block) 2439 */ 2440 static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags) 2441 { 2442 static RAMBlock *block = NULL; 2443 char id[256]; 2444 uint8_t len; 2445 2446 if (flags & RAM_SAVE_FLAG_CONTINUE) { 2447 if (!block) { 2448 error_report("Ack, bad migration stream!"); 2449 return NULL; 2450 } 2451 return block; 2452 } 2453 2454 len = qemu_get_byte(f); 2455 qemu_get_buffer(f, (uint8_t *)id, len); 2456 id[len] = 0; 2457 2458 block = qemu_ram_block_by_name(id); 2459 if (!block) { 2460 error_report("Can't find block %s", id); 2461 return NULL; 2462 } 2463 2464 return block; 2465 } 2466 2467 static inline void *host_from_ram_block_offset(RAMBlock *block, 2468 ram_addr_t offset) 2469 { 2470 if (!offset_in_ramblock(block, offset)) { 2471 return NULL; 2472 } 2473 2474 return block->host + offset; 2475 } 2476 2477 /** 2478 * ram_handle_compressed: handle the zero page case 2479 * 2480 * If a page (or a whole RDMA chunk) has been 2481 * determined to be zero, then zap it. 2482 * 2483 * @host: host address for the zero page 2484 * @ch: what the page is filled from. We only support zero 2485 * @size: size of the zero page 2486 */ 2487 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) 2488 { 2489 if (ch != 0 || !is_zero_range(host, size)) { 2490 memset(host, ch, size); 2491 } 2492 } 2493 2494 static void *do_data_decompress(void *opaque) 2495 { 2496 DecompressParam *param = opaque; 2497 unsigned long pagesize; 2498 uint8_t *des; 2499 int len; 2500 2501 qemu_mutex_lock(¶m->mutex); 2502 while (!param->quit) { 2503 if (param->des) { 2504 des = param->des; 2505 len = param->len; 2506 param->des = 0; 2507 qemu_mutex_unlock(¶m->mutex); 2508 2509 pagesize = TARGET_PAGE_SIZE; 2510 /* uncompress() will return failed in some case, especially 2511 * when the page is dirted when doing the compression, it's 2512 * not a problem because the dirty page will be retransferred 2513 * and uncompress() won't break the data in other pages. 2514 */ 2515 uncompress((Bytef *)des, &pagesize, 2516 (const Bytef *)param->compbuf, len); 2517 2518 qemu_mutex_lock(&decomp_done_lock); 2519 param->done = true; 2520 qemu_cond_signal(&decomp_done_cond); 2521 qemu_mutex_unlock(&decomp_done_lock); 2522 2523 qemu_mutex_lock(¶m->mutex); 2524 } else { 2525 qemu_cond_wait(¶m->cond, ¶m->mutex); 2526 } 2527 } 2528 qemu_mutex_unlock(¶m->mutex); 2529 2530 return NULL; 2531 } 2532 2533 static void wait_for_decompress_done(void) 2534 { 2535 int idx, thread_count; 2536 2537 if (!migrate_use_compression()) { 2538 return; 2539 } 2540 2541 thread_count = migrate_decompress_threads(); 2542 qemu_mutex_lock(&decomp_done_lock); 2543 for (idx = 0; idx < thread_count; idx++) { 2544 while (!decomp_param[idx].done) { 2545 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock); 2546 } 2547 } 2548 qemu_mutex_unlock(&decomp_done_lock); 2549 } 2550 2551 static void compress_threads_load_setup(void) 2552 { 2553 int i, thread_count; 2554 2555 if (!migrate_use_compression()) { 2556 return; 2557 } 2558 thread_count = migrate_decompress_threads(); 2559 decompress_threads = g_new0(QemuThread, thread_count); 2560 decomp_param = g_new0(DecompressParam, thread_count); 2561 qemu_mutex_init(&decomp_done_lock); 2562 qemu_cond_init(&decomp_done_cond); 2563 for (i = 0; i < thread_count; i++) { 2564 qemu_mutex_init(&decomp_param[i].mutex); 2565 qemu_cond_init(&decomp_param[i].cond); 2566 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE)); 2567 decomp_param[i].done = true; 2568 decomp_param[i].quit = false; 2569 qemu_thread_create(decompress_threads + i, "decompress", 2570 do_data_decompress, decomp_param + i, 2571 QEMU_THREAD_JOINABLE); 2572 } 2573 } 2574 2575 static void compress_threads_load_cleanup(void) 2576 { 2577 int i, thread_count; 2578 2579 if (!migrate_use_compression()) { 2580 return; 2581 } 2582 thread_count = migrate_decompress_threads(); 2583 for (i = 0; i < thread_count; i++) { 2584 qemu_mutex_lock(&decomp_param[i].mutex); 2585 decomp_param[i].quit = true; 2586 qemu_cond_signal(&decomp_param[i].cond); 2587 qemu_mutex_unlock(&decomp_param[i].mutex); 2588 } 2589 for (i = 0; i < thread_count; i++) { 2590 qemu_thread_join(decompress_threads + i); 2591 qemu_mutex_destroy(&decomp_param[i].mutex); 2592 qemu_cond_destroy(&decomp_param[i].cond); 2593 g_free(decomp_param[i].compbuf); 2594 } 2595 g_free(decompress_threads); 2596 g_free(decomp_param); 2597 decompress_threads = NULL; 2598 decomp_param = NULL; 2599 } 2600 2601 static void decompress_data_with_multi_threads(QEMUFile *f, 2602 void *host, int len) 2603 { 2604 int idx, thread_count; 2605 2606 thread_count = migrate_decompress_threads(); 2607 qemu_mutex_lock(&decomp_done_lock); 2608 while (true) { 2609 for (idx = 0; idx < thread_count; idx++) { 2610 if (decomp_param[idx].done) { 2611 decomp_param[idx].done = false; 2612 qemu_mutex_lock(&decomp_param[idx].mutex); 2613 qemu_get_buffer(f, decomp_param[idx].compbuf, len); 2614 decomp_param[idx].des = host; 2615 decomp_param[idx].len = len; 2616 qemu_cond_signal(&decomp_param[idx].cond); 2617 qemu_mutex_unlock(&decomp_param[idx].mutex); 2618 break; 2619 } 2620 } 2621 if (idx < thread_count) { 2622 break; 2623 } else { 2624 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock); 2625 } 2626 } 2627 qemu_mutex_unlock(&decomp_done_lock); 2628 } 2629 2630 /** 2631 * ram_load_setup: Setup RAM for migration incoming side 2632 * 2633 * Returns zero to indicate success and negative for error 2634 * 2635 * @f: QEMUFile where to receive the data 2636 * @opaque: RAMState pointer 2637 */ 2638 static int ram_load_setup(QEMUFile *f, void *opaque) 2639 { 2640 xbzrle_load_setup(); 2641 compress_threads_load_setup(); 2642 ramblock_recv_map_init(); 2643 return 0; 2644 } 2645 2646 static int ram_load_cleanup(void *opaque) 2647 { 2648 RAMBlock *rb; 2649 xbzrle_load_cleanup(); 2650 compress_threads_load_cleanup(); 2651 2652 RAMBLOCK_FOREACH(rb) { 2653 g_free(rb->receivedmap); 2654 rb->receivedmap = NULL; 2655 } 2656 return 0; 2657 } 2658 2659 /** 2660 * ram_postcopy_incoming_init: allocate postcopy data structures 2661 * 2662 * Returns 0 for success and negative if there was one error 2663 * 2664 * @mis: current migration incoming state 2665 * 2666 * Allocate data structures etc needed by incoming migration with 2667 * postcopy-ram. postcopy-ram's similarly names 2668 * postcopy_ram_incoming_init does the work. 2669 */ 2670 int ram_postcopy_incoming_init(MigrationIncomingState *mis) 2671 { 2672 unsigned long ram_pages = last_ram_page(); 2673 2674 return postcopy_ram_incoming_init(mis, ram_pages); 2675 } 2676 2677 /** 2678 * ram_load_postcopy: load a page in postcopy case 2679 * 2680 * Returns 0 for success or -errno in case of error 2681 * 2682 * Called in postcopy mode by ram_load(). 2683 * rcu_read_lock is taken prior to this being called. 2684 * 2685 * @f: QEMUFile where to send the data 2686 */ 2687 static int ram_load_postcopy(QEMUFile *f) 2688 { 2689 int flags = 0, ret = 0; 2690 bool place_needed = false; 2691 bool matching_page_sizes = false; 2692 MigrationIncomingState *mis = migration_incoming_get_current(); 2693 /* Temporary page that is later 'placed' */ 2694 void *postcopy_host_page = postcopy_get_tmp_page(mis); 2695 void *last_host = NULL; 2696 bool all_zero = false; 2697 2698 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { 2699 ram_addr_t addr; 2700 void *host = NULL; 2701 void *page_buffer = NULL; 2702 void *place_source = NULL; 2703 RAMBlock *block = NULL; 2704 uint8_t ch; 2705 2706 addr = qemu_get_be64(f); 2707 flags = addr & ~TARGET_PAGE_MASK; 2708 addr &= TARGET_PAGE_MASK; 2709 2710 trace_ram_load_postcopy_loop((uint64_t)addr, flags); 2711 place_needed = false; 2712 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) { 2713 block = ram_block_from_stream(f, flags); 2714 2715 host = host_from_ram_block_offset(block, addr); 2716 if (!host) { 2717 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); 2718 ret = -EINVAL; 2719 break; 2720 } 2721 matching_page_sizes = block->page_size == TARGET_PAGE_SIZE; 2722 /* 2723 * Postcopy requires that we place whole host pages atomically; 2724 * these may be huge pages for RAMBlocks that are backed by 2725 * hugetlbfs. 2726 * To make it atomic, the data is read into a temporary page 2727 * that's moved into place later. 2728 * The migration protocol uses, possibly smaller, target-pages 2729 * however the source ensures it always sends all the components 2730 * of a host page in order. 2731 */ 2732 page_buffer = postcopy_host_page + 2733 ((uintptr_t)host & (block->page_size - 1)); 2734 /* If all TP are zero then we can optimise the place */ 2735 if (!((uintptr_t)host & (block->page_size - 1))) { 2736 all_zero = true; 2737 } else { 2738 /* not the 1st TP within the HP */ 2739 if (host != (last_host + TARGET_PAGE_SIZE)) { 2740 error_report("Non-sequential target page %p/%p", 2741 host, last_host); 2742 ret = -EINVAL; 2743 break; 2744 } 2745 } 2746 2747 2748 /* 2749 * If it's the last part of a host page then we place the host 2750 * page 2751 */ 2752 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) & 2753 (block->page_size - 1)) == 0; 2754 place_source = postcopy_host_page; 2755 } 2756 last_host = host; 2757 2758 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { 2759 case RAM_SAVE_FLAG_ZERO: 2760 ch = qemu_get_byte(f); 2761 memset(page_buffer, ch, TARGET_PAGE_SIZE); 2762 if (ch) { 2763 all_zero = false; 2764 } 2765 break; 2766 2767 case RAM_SAVE_FLAG_PAGE: 2768 all_zero = false; 2769 if (!place_needed || !matching_page_sizes) { 2770 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE); 2771 } else { 2772 /* Avoids the qemu_file copy during postcopy, which is 2773 * going to do a copy later; can only do it when we 2774 * do this read in one go (matching page sizes) 2775 */ 2776 qemu_get_buffer_in_place(f, (uint8_t **)&place_source, 2777 TARGET_PAGE_SIZE); 2778 } 2779 break; 2780 case RAM_SAVE_FLAG_EOS: 2781 /* normal exit */ 2782 break; 2783 default: 2784 error_report("Unknown combination of migration flags: %#x" 2785 " (postcopy mode)", flags); 2786 ret = -EINVAL; 2787 } 2788 2789 if (place_needed) { 2790 /* This gets called at the last target page in the host page */ 2791 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size; 2792 2793 if (all_zero) { 2794 ret = postcopy_place_page_zero(mis, place_dest, 2795 block); 2796 } else { 2797 ret = postcopy_place_page(mis, place_dest, 2798 place_source, block); 2799 } 2800 } 2801 if (!ret) { 2802 ret = qemu_file_get_error(f); 2803 } 2804 } 2805 2806 return ret; 2807 } 2808 2809 static int ram_load(QEMUFile *f, void *opaque, int version_id) 2810 { 2811 int flags = 0, ret = 0, invalid_flags = 0; 2812 static uint64_t seq_iter; 2813 int len = 0; 2814 /* 2815 * If system is running in postcopy mode, page inserts to host memory must 2816 * be atomic 2817 */ 2818 bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING; 2819 /* ADVISE is earlier, it shows the source has the postcopy capability on */ 2820 bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE; 2821 2822 seq_iter++; 2823 2824 if (version_id != 4) { 2825 ret = -EINVAL; 2826 } 2827 2828 if (!migrate_use_compression()) { 2829 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE; 2830 } 2831 /* This RCU critical section can be very long running. 2832 * When RCU reclaims in the code start to become numerous, 2833 * it will be necessary to reduce the granularity of this 2834 * critical section. 2835 */ 2836 rcu_read_lock(); 2837 2838 if (postcopy_running) { 2839 ret = ram_load_postcopy(f); 2840 } 2841 2842 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) { 2843 ram_addr_t addr, total_ram_bytes; 2844 void *host = NULL; 2845 uint8_t ch; 2846 2847 addr = qemu_get_be64(f); 2848 flags = addr & ~TARGET_PAGE_MASK; 2849 addr &= TARGET_PAGE_MASK; 2850 2851 if (flags & invalid_flags) { 2852 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) { 2853 error_report("Received an unexpected compressed page"); 2854 } 2855 2856 ret = -EINVAL; 2857 break; 2858 } 2859 2860 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE | 2861 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) { 2862 RAMBlock *block = ram_block_from_stream(f, flags); 2863 2864 host = host_from_ram_block_offset(block, addr); 2865 if (!host) { 2866 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); 2867 ret = -EINVAL; 2868 break; 2869 } 2870 ramblock_recv_bitmap_set(block, host); 2871 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host); 2872 } 2873 2874 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { 2875 case RAM_SAVE_FLAG_MEM_SIZE: 2876 /* Synchronize RAM block list */ 2877 total_ram_bytes = addr; 2878 while (!ret && total_ram_bytes) { 2879 RAMBlock *block; 2880 char id[256]; 2881 ram_addr_t length; 2882 2883 len = qemu_get_byte(f); 2884 qemu_get_buffer(f, (uint8_t *)id, len); 2885 id[len] = 0; 2886 length = qemu_get_be64(f); 2887 2888 block = qemu_ram_block_by_name(id); 2889 if (block) { 2890 if (length != block->used_length) { 2891 Error *local_err = NULL; 2892 2893 ret = qemu_ram_resize(block, length, 2894 &local_err); 2895 if (local_err) { 2896 error_report_err(local_err); 2897 } 2898 } 2899 /* For postcopy we need to check hugepage sizes match */ 2900 if (postcopy_advised && 2901 block->page_size != qemu_host_page_size) { 2902 uint64_t remote_page_size = qemu_get_be64(f); 2903 if (remote_page_size != block->page_size) { 2904 error_report("Mismatched RAM page size %s " 2905 "(local) %zd != %" PRId64, 2906 id, block->page_size, 2907 remote_page_size); 2908 ret = -EINVAL; 2909 } 2910 } 2911 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG, 2912 block->idstr); 2913 } else { 2914 error_report("Unknown ramblock \"%s\", cannot " 2915 "accept migration", id); 2916 ret = -EINVAL; 2917 } 2918 2919 total_ram_bytes -= length; 2920 } 2921 break; 2922 2923 case RAM_SAVE_FLAG_ZERO: 2924 ch = qemu_get_byte(f); 2925 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE); 2926 break; 2927 2928 case RAM_SAVE_FLAG_PAGE: 2929 qemu_get_buffer(f, host, TARGET_PAGE_SIZE); 2930 break; 2931 2932 case RAM_SAVE_FLAG_COMPRESS_PAGE: 2933 len = qemu_get_be32(f); 2934 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) { 2935 error_report("Invalid compressed data length: %d", len); 2936 ret = -EINVAL; 2937 break; 2938 } 2939 decompress_data_with_multi_threads(f, host, len); 2940 break; 2941 2942 case RAM_SAVE_FLAG_XBZRLE: 2943 if (load_xbzrle(f, addr, host) < 0) { 2944 error_report("Failed to decompress XBZRLE page at " 2945 RAM_ADDR_FMT, addr); 2946 ret = -EINVAL; 2947 break; 2948 } 2949 break; 2950 case RAM_SAVE_FLAG_EOS: 2951 /* normal exit */ 2952 break; 2953 default: 2954 if (flags & RAM_SAVE_FLAG_HOOK) { 2955 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL); 2956 } else { 2957 error_report("Unknown combination of migration flags: %#x", 2958 flags); 2959 ret = -EINVAL; 2960 } 2961 } 2962 if (!ret) { 2963 ret = qemu_file_get_error(f); 2964 } 2965 } 2966 2967 wait_for_decompress_done(); 2968 rcu_read_unlock(); 2969 trace_ram_load_complete(ret, seq_iter); 2970 return ret; 2971 } 2972 2973 static bool ram_has_postcopy(void *opaque) 2974 { 2975 return migrate_postcopy_ram(); 2976 } 2977 2978 static SaveVMHandlers savevm_ram_handlers = { 2979 .save_setup = ram_save_setup, 2980 .save_live_iterate = ram_save_iterate, 2981 .save_live_complete_postcopy = ram_save_complete, 2982 .save_live_complete_precopy = ram_save_complete, 2983 .has_postcopy = ram_has_postcopy, 2984 .save_live_pending = ram_save_pending, 2985 .load_state = ram_load, 2986 .save_cleanup = ram_save_cleanup, 2987 .load_setup = ram_load_setup, 2988 .load_cleanup = ram_load_cleanup, 2989 }; 2990 2991 void ram_mig_init(void) 2992 { 2993 qemu_mutex_init(&XBZRLE.lock); 2994 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state); 2995 } 2996