1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * Copyright (c) 2011-2015 Red Hat Inc 6 * 7 * Authors: 8 * Juan Quintela <quintela@redhat.com> 9 * 10 * Permission is hereby granted, free of charge, to any person obtaining a copy 11 * of this software and associated documentation files (the "Software"), to deal 12 * in the Software without restriction, including without limitation the rights 13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 * copies of the Software, and to permit persons to whom the Software is 15 * furnished to do so, subject to the following conditions: 16 * 17 * The above copyright notice and this permission notice shall be included in 18 * all copies or substantial portions of the Software. 19 * 20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 26 * THE SOFTWARE. 27 */ 28 29 #include "qemu/osdep.h" 30 #include "cpu.h" 31 #include <zlib.h> 32 #include "qemu/cutils.h" 33 #include "qemu/bitops.h" 34 #include "qemu/bitmap.h" 35 #include "qemu/main-loop.h" 36 #include "xbzrle.h" 37 #include "ram.h" 38 #include "migration.h" 39 #include "migration/register.h" 40 #include "migration/misc.h" 41 #include "qemu-file.h" 42 #include "postcopy-ram.h" 43 #include "migration/page_cache.h" 44 #include "qemu/error-report.h" 45 #include "qapi/error.h" 46 #include "qapi/qapi-events-migration.h" 47 #include "qapi/qmp/qerror.h" 48 #include "trace.h" 49 #include "exec/ram_addr.h" 50 #include "exec/target_page.h" 51 #include "qemu/rcu_queue.h" 52 #include "migration/colo.h" 53 #include "migration/block.h" 54 55 /***********************************************************/ 56 /* ram save/restore */ 57 58 /* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it 59 * worked for pages that where filled with the same char. We switched 60 * it to only search for the zero value. And to avoid confusion with 61 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it. 62 */ 63 64 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */ 65 #define RAM_SAVE_FLAG_ZERO 0x02 66 #define RAM_SAVE_FLAG_MEM_SIZE 0x04 67 #define RAM_SAVE_FLAG_PAGE 0x08 68 #define RAM_SAVE_FLAG_EOS 0x10 69 #define RAM_SAVE_FLAG_CONTINUE 0x20 70 #define RAM_SAVE_FLAG_XBZRLE 0x40 71 /* 0x80 is reserved in migration.h start with 0x100 next */ 72 #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100 73 74 static inline bool is_zero_range(uint8_t *p, uint64_t size) 75 { 76 return buffer_is_zero(p, size); 77 } 78 79 XBZRLECacheStats xbzrle_counters; 80 81 /* struct contains XBZRLE cache and a static page 82 used by the compression */ 83 static struct { 84 /* buffer used for XBZRLE encoding */ 85 uint8_t *encoded_buf; 86 /* buffer for storing page content */ 87 uint8_t *current_buf; 88 /* Cache for XBZRLE, Protected by lock. */ 89 PageCache *cache; 90 QemuMutex lock; 91 /* it will store a page full of zeros */ 92 uint8_t *zero_target_page; 93 /* buffer used for XBZRLE decoding */ 94 uint8_t *decoded_buf; 95 } XBZRLE; 96 97 static void XBZRLE_cache_lock(void) 98 { 99 if (migrate_use_xbzrle()) 100 qemu_mutex_lock(&XBZRLE.lock); 101 } 102 103 static void XBZRLE_cache_unlock(void) 104 { 105 if (migrate_use_xbzrle()) 106 qemu_mutex_unlock(&XBZRLE.lock); 107 } 108 109 /** 110 * xbzrle_cache_resize: resize the xbzrle cache 111 * 112 * This function is called from qmp_migrate_set_cache_size in main 113 * thread, possibly while a migration is in progress. A running 114 * migration may be using the cache and might finish during this call, 115 * hence changes to the cache are protected by XBZRLE.lock(). 116 * 117 * Returns 0 for success or -1 for error 118 * 119 * @new_size: new cache size 120 * @errp: set *errp if the check failed, with reason 121 */ 122 int xbzrle_cache_resize(int64_t new_size, Error **errp) 123 { 124 PageCache *new_cache; 125 int64_t ret = 0; 126 127 /* Check for truncation */ 128 if (new_size != (size_t)new_size) { 129 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", 130 "exceeding address space"); 131 return -1; 132 } 133 134 if (new_size == migrate_xbzrle_cache_size()) { 135 /* nothing to do */ 136 return 0; 137 } 138 139 XBZRLE_cache_lock(); 140 141 if (XBZRLE.cache != NULL) { 142 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp); 143 if (!new_cache) { 144 ret = -1; 145 goto out; 146 } 147 148 cache_fini(XBZRLE.cache); 149 XBZRLE.cache = new_cache; 150 } 151 out: 152 XBZRLE_cache_unlock(); 153 return ret; 154 } 155 156 static void ramblock_recv_map_init(void) 157 { 158 RAMBlock *rb; 159 160 RAMBLOCK_FOREACH(rb) { 161 assert(!rb->receivedmap); 162 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits()); 163 } 164 } 165 166 int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr) 167 { 168 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb), 169 rb->receivedmap); 170 } 171 172 void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr) 173 { 174 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap); 175 } 176 177 void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, 178 size_t nr) 179 { 180 bitmap_set_atomic(rb->receivedmap, 181 ramblock_recv_bitmap_offset(host_addr, rb), 182 nr); 183 } 184 185 /* 186 * An outstanding page request, on the source, having been received 187 * and queued 188 */ 189 struct RAMSrcPageRequest { 190 RAMBlock *rb; 191 hwaddr offset; 192 hwaddr len; 193 194 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req; 195 }; 196 197 /* State of RAM for migration */ 198 struct RAMState { 199 /* QEMUFile used for this migration */ 200 QEMUFile *f; 201 /* Last block that we have visited searching for dirty pages */ 202 RAMBlock *last_seen_block; 203 /* Last block from where we have sent data */ 204 RAMBlock *last_sent_block; 205 /* Last dirty target page we have sent */ 206 ram_addr_t last_page; 207 /* last ram version we have seen */ 208 uint32_t last_version; 209 /* We are in the first round */ 210 bool ram_bulk_stage; 211 /* How many times we have dirty too many pages */ 212 int dirty_rate_high_cnt; 213 /* these variables are used for bitmap sync */ 214 /* last time we did a full bitmap_sync */ 215 int64_t time_last_bitmap_sync; 216 /* bytes transferred at start_time */ 217 uint64_t bytes_xfer_prev; 218 /* number of dirty pages since start_time */ 219 uint64_t num_dirty_pages_period; 220 /* xbzrle misses since the beginning of the period */ 221 uint64_t xbzrle_cache_miss_prev; 222 /* number of iterations at the beginning of period */ 223 uint64_t iterations_prev; 224 /* Iterations since start */ 225 uint64_t iterations; 226 /* number of dirty bits in the bitmap */ 227 uint64_t migration_dirty_pages; 228 /* protects modification of the bitmap */ 229 QemuMutex bitmap_mutex; 230 /* The RAMBlock used in the last src_page_requests */ 231 RAMBlock *last_req_rb; 232 /* Queue of outstanding page requests from the destination */ 233 QemuMutex src_page_req_mutex; 234 QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests; 235 }; 236 typedef struct RAMState RAMState; 237 238 static RAMState *ram_state; 239 240 uint64_t ram_bytes_remaining(void) 241 { 242 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) : 243 0; 244 } 245 246 MigrationStats ram_counters; 247 248 /* used by the search for pages to send */ 249 struct PageSearchStatus { 250 /* Current block being searched */ 251 RAMBlock *block; 252 /* Current page to search from */ 253 unsigned long page; 254 /* Set once we wrap around */ 255 bool complete_round; 256 }; 257 typedef struct PageSearchStatus PageSearchStatus; 258 259 struct CompressParam { 260 bool done; 261 bool quit; 262 QEMUFile *file; 263 QemuMutex mutex; 264 QemuCond cond; 265 RAMBlock *block; 266 ram_addr_t offset; 267 }; 268 typedef struct CompressParam CompressParam; 269 270 struct DecompressParam { 271 bool done; 272 bool quit; 273 QemuMutex mutex; 274 QemuCond cond; 275 void *des; 276 uint8_t *compbuf; 277 int len; 278 }; 279 typedef struct DecompressParam DecompressParam; 280 281 static CompressParam *comp_param; 282 static QemuThread *compress_threads; 283 /* comp_done_cond is used to wake up the migration thread when 284 * one of the compression threads has finished the compression. 285 * comp_done_lock is used to co-work with comp_done_cond. 286 */ 287 static QemuMutex comp_done_lock; 288 static QemuCond comp_done_cond; 289 /* The empty QEMUFileOps will be used by file in CompressParam */ 290 static const QEMUFileOps empty_ops = { }; 291 292 static DecompressParam *decomp_param; 293 static QemuThread *decompress_threads; 294 static QemuMutex decomp_done_lock; 295 static QemuCond decomp_done_cond; 296 297 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block, 298 ram_addr_t offset); 299 300 static void *do_data_compress(void *opaque) 301 { 302 CompressParam *param = opaque; 303 RAMBlock *block; 304 ram_addr_t offset; 305 306 qemu_mutex_lock(¶m->mutex); 307 while (!param->quit) { 308 if (param->block) { 309 block = param->block; 310 offset = param->offset; 311 param->block = NULL; 312 qemu_mutex_unlock(¶m->mutex); 313 314 do_compress_ram_page(param->file, block, offset); 315 316 qemu_mutex_lock(&comp_done_lock); 317 param->done = true; 318 qemu_cond_signal(&comp_done_cond); 319 qemu_mutex_unlock(&comp_done_lock); 320 321 qemu_mutex_lock(¶m->mutex); 322 } else { 323 qemu_cond_wait(¶m->cond, ¶m->mutex); 324 } 325 } 326 qemu_mutex_unlock(¶m->mutex); 327 328 return NULL; 329 } 330 331 static inline void terminate_compression_threads(void) 332 { 333 int idx, thread_count; 334 335 thread_count = migrate_compress_threads(); 336 337 for (idx = 0; idx < thread_count; idx++) { 338 qemu_mutex_lock(&comp_param[idx].mutex); 339 comp_param[idx].quit = true; 340 qemu_cond_signal(&comp_param[idx].cond); 341 qemu_mutex_unlock(&comp_param[idx].mutex); 342 } 343 } 344 345 static void compress_threads_save_cleanup(void) 346 { 347 int i, thread_count; 348 349 if (!migrate_use_compression()) { 350 return; 351 } 352 terminate_compression_threads(); 353 thread_count = migrate_compress_threads(); 354 for (i = 0; i < thread_count; i++) { 355 qemu_thread_join(compress_threads + i); 356 qemu_fclose(comp_param[i].file); 357 qemu_mutex_destroy(&comp_param[i].mutex); 358 qemu_cond_destroy(&comp_param[i].cond); 359 } 360 qemu_mutex_destroy(&comp_done_lock); 361 qemu_cond_destroy(&comp_done_cond); 362 g_free(compress_threads); 363 g_free(comp_param); 364 compress_threads = NULL; 365 comp_param = NULL; 366 } 367 368 static void compress_threads_save_setup(void) 369 { 370 int i, thread_count; 371 372 if (!migrate_use_compression()) { 373 return; 374 } 375 thread_count = migrate_compress_threads(); 376 compress_threads = g_new0(QemuThread, thread_count); 377 comp_param = g_new0(CompressParam, thread_count); 378 qemu_cond_init(&comp_done_cond); 379 qemu_mutex_init(&comp_done_lock); 380 for (i = 0; i < thread_count; i++) { 381 /* comp_param[i].file is just used as a dummy buffer to save data, 382 * set its ops to empty. 383 */ 384 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops); 385 comp_param[i].done = true; 386 comp_param[i].quit = false; 387 qemu_mutex_init(&comp_param[i].mutex); 388 qemu_cond_init(&comp_param[i].cond); 389 qemu_thread_create(compress_threads + i, "compress", 390 do_data_compress, comp_param + i, 391 QEMU_THREAD_JOINABLE); 392 } 393 } 394 395 /* Multiple fd's */ 396 397 struct MultiFDSendParams { 398 uint8_t id; 399 char *name; 400 QemuThread thread; 401 QemuSemaphore sem; 402 QemuMutex mutex; 403 bool quit; 404 }; 405 typedef struct MultiFDSendParams MultiFDSendParams; 406 407 struct { 408 MultiFDSendParams *params; 409 /* number of created threads */ 410 int count; 411 } *multifd_send_state; 412 413 static void terminate_multifd_send_threads(Error *errp) 414 { 415 int i; 416 417 for (i = 0; i < multifd_send_state->count; i++) { 418 MultiFDSendParams *p = &multifd_send_state->params[i]; 419 420 qemu_mutex_lock(&p->mutex); 421 p->quit = true; 422 qemu_sem_post(&p->sem); 423 qemu_mutex_unlock(&p->mutex); 424 } 425 } 426 427 int multifd_save_cleanup(Error **errp) 428 { 429 int i; 430 int ret = 0; 431 432 if (!migrate_use_multifd()) { 433 return 0; 434 } 435 terminate_multifd_send_threads(NULL); 436 for (i = 0; i < multifd_send_state->count; i++) { 437 MultiFDSendParams *p = &multifd_send_state->params[i]; 438 439 qemu_thread_join(&p->thread); 440 qemu_mutex_destroy(&p->mutex); 441 qemu_sem_destroy(&p->sem); 442 g_free(p->name); 443 p->name = NULL; 444 } 445 g_free(multifd_send_state->params); 446 multifd_send_state->params = NULL; 447 g_free(multifd_send_state); 448 multifd_send_state = NULL; 449 return ret; 450 } 451 452 static void *multifd_send_thread(void *opaque) 453 { 454 MultiFDSendParams *p = opaque; 455 456 while (true) { 457 qemu_mutex_lock(&p->mutex); 458 if (p->quit) { 459 qemu_mutex_unlock(&p->mutex); 460 break; 461 } 462 qemu_mutex_unlock(&p->mutex); 463 qemu_sem_wait(&p->sem); 464 } 465 466 return NULL; 467 } 468 469 int multifd_save_setup(void) 470 { 471 int thread_count; 472 uint8_t i; 473 474 if (!migrate_use_multifd()) { 475 return 0; 476 } 477 thread_count = migrate_multifd_channels(); 478 multifd_send_state = g_malloc0(sizeof(*multifd_send_state)); 479 multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); 480 multifd_send_state->count = 0; 481 for (i = 0; i < thread_count; i++) { 482 MultiFDSendParams *p = &multifd_send_state->params[i]; 483 484 qemu_mutex_init(&p->mutex); 485 qemu_sem_init(&p->sem, 0); 486 p->quit = false; 487 p->id = i; 488 p->name = g_strdup_printf("multifdsend_%d", i); 489 qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, 490 QEMU_THREAD_JOINABLE); 491 492 multifd_send_state->count++; 493 } 494 return 0; 495 } 496 497 struct MultiFDRecvParams { 498 uint8_t id; 499 char *name; 500 QemuThread thread; 501 QemuSemaphore sem; 502 QemuMutex mutex; 503 bool quit; 504 }; 505 typedef struct MultiFDRecvParams MultiFDRecvParams; 506 507 struct { 508 MultiFDRecvParams *params; 509 /* number of created threads */ 510 int count; 511 } *multifd_recv_state; 512 513 static void terminate_multifd_recv_threads(Error *errp) 514 { 515 int i; 516 517 for (i = 0; i < multifd_recv_state->count; i++) { 518 MultiFDRecvParams *p = &multifd_recv_state->params[i]; 519 520 qemu_mutex_lock(&p->mutex); 521 p->quit = true; 522 qemu_sem_post(&p->sem); 523 qemu_mutex_unlock(&p->mutex); 524 } 525 } 526 527 int multifd_load_cleanup(Error **errp) 528 { 529 int i; 530 int ret = 0; 531 532 if (!migrate_use_multifd()) { 533 return 0; 534 } 535 terminate_multifd_recv_threads(NULL); 536 for (i = 0; i < multifd_recv_state->count; i++) { 537 MultiFDRecvParams *p = &multifd_recv_state->params[i]; 538 539 qemu_thread_join(&p->thread); 540 qemu_mutex_destroy(&p->mutex); 541 qemu_sem_destroy(&p->sem); 542 g_free(p->name); 543 p->name = NULL; 544 } 545 g_free(multifd_recv_state->params); 546 multifd_recv_state->params = NULL; 547 g_free(multifd_recv_state); 548 multifd_recv_state = NULL; 549 550 return ret; 551 } 552 553 static void *multifd_recv_thread(void *opaque) 554 { 555 MultiFDRecvParams *p = opaque; 556 557 while (true) { 558 qemu_mutex_lock(&p->mutex); 559 if (p->quit) { 560 qemu_mutex_unlock(&p->mutex); 561 break; 562 } 563 qemu_mutex_unlock(&p->mutex); 564 qemu_sem_wait(&p->sem); 565 } 566 567 return NULL; 568 } 569 570 int multifd_load_setup(void) 571 { 572 int thread_count; 573 uint8_t i; 574 575 if (!migrate_use_multifd()) { 576 return 0; 577 } 578 thread_count = migrate_multifd_channels(); 579 multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state)); 580 multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count); 581 multifd_recv_state->count = 0; 582 for (i = 0; i < thread_count; i++) { 583 MultiFDRecvParams *p = &multifd_recv_state->params[i]; 584 585 qemu_mutex_init(&p->mutex); 586 qemu_sem_init(&p->sem, 0); 587 p->quit = false; 588 p->id = i; 589 p->name = g_strdup_printf("multifdrecv_%d", i); 590 qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p, 591 QEMU_THREAD_JOINABLE); 592 multifd_recv_state->count++; 593 } 594 return 0; 595 } 596 597 /** 598 * save_page_header: write page header to wire 599 * 600 * If this is the 1st block, it also writes the block identification 601 * 602 * Returns the number of bytes written 603 * 604 * @f: QEMUFile where to send the data 605 * @block: block that contains the page we want to send 606 * @offset: offset inside the block for the page 607 * in the lower bits, it contains flags 608 */ 609 static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block, 610 ram_addr_t offset) 611 { 612 size_t size, len; 613 614 if (block == rs->last_sent_block) { 615 offset |= RAM_SAVE_FLAG_CONTINUE; 616 } 617 qemu_put_be64(f, offset); 618 size = 8; 619 620 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) { 621 len = strlen(block->idstr); 622 qemu_put_byte(f, len); 623 qemu_put_buffer(f, (uint8_t *)block->idstr, len); 624 size += 1 + len; 625 rs->last_sent_block = block; 626 } 627 return size; 628 } 629 630 /** 631 * mig_throttle_guest_down: throotle down the guest 632 * 633 * Reduce amount of guest cpu execution to hopefully slow down memory 634 * writes. If guest dirty memory rate is reduced below the rate at 635 * which we can transfer pages to the destination then we should be 636 * able to complete migration. Some workloads dirty memory way too 637 * fast and will not effectively converge, even with auto-converge. 638 */ 639 static void mig_throttle_guest_down(void) 640 { 641 MigrationState *s = migrate_get_current(); 642 uint64_t pct_initial = s->parameters.cpu_throttle_initial; 643 uint64_t pct_icrement = s->parameters.cpu_throttle_increment; 644 645 /* We have not started throttling yet. Let's start it. */ 646 if (!cpu_throttle_active()) { 647 cpu_throttle_set(pct_initial); 648 } else { 649 /* Throttling already on, just increase the rate */ 650 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement); 651 } 652 } 653 654 /** 655 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache 656 * 657 * @rs: current RAM state 658 * @current_addr: address for the zero page 659 * 660 * Update the xbzrle cache to reflect a page that's been sent as all 0. 661 * The important thing is that a stale (not-yet-0'd) page be replaced 662 * by the new data. 663 * As a bonus, if the page wasn't in the cache it gets added so that 664 * when a small write is made into the 0'd page it gets XBZRLE sent. 665 */ 666 static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr) 667 { 668 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) { 669 return; 670 } 671 672 /* We don't care if this fails to allocate a new cache page 673 * as long as it updated an old one */ 674 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page, 675 ram_counters.dirty_sync_count); 676 } 677 678 #define ENCODING_FLAG_XBZRLE 0x1 679 680 /** 681 * save_xbzrle_page: compress and send current page 682 * 683 * Returns: 1 means that we wrote the page 684 * 0 means that page is identical to the one already sent 685 * -1 means that xbzrle would be longer than normal 686 * 687 * @rs: current RAM state 688 * @current_data: pointer to the address of the page contents 689 * @current_addr: addr of the page 690 * @block: block that contains the page we want to send 691 * @offset: offset inside the block for the page 692 * @last_stage: if we are at the completion stage 693 */ 694 static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, 695 ram_addr_t current_addr, RAMBlock *block, 696 ram_addr_t offset, bool last_stage) 697 { 698 int encoded_len = 0, bytes_xbzrle; 699 uint8_t *prev_cached_page; 700 701 if (!cache_is_cached(XBZRLE.cache, current_addr, 702 ram_counters.dirty_sync_count)) { 703 xbzrle_counters.cache_miss++; 704 if (!last_stage) { 705 if (cache_insert(XBZRLE.cache, current_addr, *current_data, 706 ram_counters.dirty_sync_count) == -1) { 707 return -1; 708 } else { 709 /* update *current_data when the page has been 710 inserted into cache */ 711 *current_data = get_cached_data(XBZRLE.cache, current_addr); 712 } 713 } 714 return -1; 715 } 716 717 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); 718 719 /* save current buffer into memory */ 720 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE); 721 722 /* XBZRLE encoding (if there is no overflow) */ 723 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, 724 TARGET_PAGE_SIZE, XBZRLE.encoded_buf, 725 TARGET_PAGE_SIZE); 726 if (encoded_len == 0) { 727 trace_save_xbzrle_page_skipping(); 728 return 0; 729 } else if (encoded_len == -1) { 730 trace_save_xbzrle_page_overflow(); 731 xbzrle_counters.overflow++; 732 /* update data in the cache */ 733 if (!last_stage) { 734 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE); 735 *current_data = prev_cached_page; 736 } 737 return -1; 738 } 739 740 /* we need to update the data in the cache, in order to get the same data */ 741 if (!last_stage) { 742 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); 743 } 744 745 /* Send XBZRLE based compressed page */ 746 bytes_xbzrle = save_page_header(rs, rs->f, block, 747 offset | RAM_SAVE_FLAG_XBZRLE); 748 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE); 749 qemu_put_be16(rs->f, encoded_len); 750 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len); 751 bytes_xbzrle += encoded_len + 1 + 2; 752 xbzrle_counters.pages++; 753 xbzrle_counters.bytes += bytes_xbzrle; 754 ram_counters.transferred += bytes_xbzrle; 755 756 return 1; 757 } 758 759 /** 760 * migration_bitmap_find_dirty: find the next dirty page from start 761 * 762 * Called with rcu_read_lock() to protect migration_bitmap 763 * 764 * Returns the byte offset within memory region of the start of a dirty page 765 * 766 * @rs: current RAM state 767 * @rb: RAMBlock where to search for dirty pages 768 * @start: page where we start the search 769 */ 770 static inline 771 unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb, 772 unsigned long start) 773 { 774 unsigned long size = rb->used_length >> TARGET_PAGE_BITS; 775 unsigned long *bitmap = rb->bmap; 776 unsigned long next; 777 778 if (rs->ram_bulk_stage && start > 0) { 779 next = start + 1; 780 } else { 781 next = find_next_bit(bitmap, size, start); 782 } 783 784 return next; 785 } 786 787 static inline bool migration_bitmap_clear_dirty(RAMState *rs, 788 RAMBlock *rb, 789 unsigned long page) 790 { 791 bool ret; 792 793 ret = test_and_clear_bit(page, rb->bmap); 794 795 if (ret) { 796 rs->migration_dirty_pages--; 797 } 798 return ret; 799 } 800 801 static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb, 802 ram_addr_t start, ram_addr_t length) 803 { 804 rs->migration_dirty_pages += 805 cpu_physical_memory_sync_dirty_bitmap(rb, start, length, 806 &rs->num_dirty_pages_period); 807 } 808 809 /** 810 * ram_pagesize_summary: calculate all the pagesizes of a VM 811 * 812 * Returns a summary bitmap of the page sizes of all RAMBlocks 813 * 814 * For VMs with just normal pages this is equivalent to the host page 815 * size. If it's got some huge pages then it's the OR of all the 816 * different page sizes. 817 */ 818 uint64_t ram_pagesize_summary(void) 819 { 820 RAMBlock *block; 821 uint64_t summary = 0; 822 823 RAMBLOCK_FOREACH(block) { 824 summary |= block->page_size; 825 } 826 827 return summary; 828 } 829 830 static void migration_bitmap_sync(RAMState *rs) 831 { 832 RAMBlock *block; 833 int64_t end_time; 834 uint64_t bytes_xfer_now; 835 836 ram_counters.dirty_sync_count++; 837 838 if (!rs->time_last_bitmap_sync) { 839 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 840 } 841 842 trace_migration_bitmap_sync_start(); 843 memory_global_dirty_log_sync(); 844 845 qemu_mutex_lock(&rs->bitmap_mutex); 846 rcu_read_lock(); 847 RAMBLOCK_FOREACH(block) { 848 migration_bitmap_sync_range(rs, block, 0, block->used_length); 849 } 850 rcu_read_unlock(); 851 qemu_mutex_unlock(&rs->bitmap_mutex); 852 853 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period); 854 855 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 856 857 /* more than 1 second = 1000 millisecons */ 858 if (end_time > rs->time_last_bitmap_sync + 1000) { 859 /* calculate period counters */ 860 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000 861 / (end_time - rs->time_last_bitmap_sync); 862 bytes_xfer_now = ram_counters.transferred; 863 864 /* During block migration the auto-converge logic incorrectly detects 865 * that ram migration makes no progress. Avoid this by disabling the 866 * throttling logic during the bulk phase of block migration. */ 867 if (migrate_auto_converge() && !blk_mig_bulk_active()) { 868 /* The following detection logic can be refined later. For now: 869 Check to see if the dirtied bytes is 50% more than the approx. 870 amount of bytes that just got transferred since the last time we 871 were in this routine. If that happens twice, start or increase 872 throttling */ 873 874 if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE > 875 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) && 876 (++rs->dirty_rate_high_cnt >= 2)) { 877 trace_migration_throttle(); 878 rs->dirty_rate_high_cnt = 0; 879 mig_throttle_guest_down(); 880 } 881 } 882 883 if (migrate_use_xbzrle()) { 884 if (rs->iterations_prev != rs->iterations) { 885 xbzrle_counters.cache_miss_rate = 886 (double)(xbzrle_counters.cache_miss - 887 rs->xbzrle_cache_miss_prev) / 888 (rs->iterations - rs->iterations_prev); 889 } 890 rs->iterations_prev = rs->iterations; 891 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss; 892 } 893 894 /* reset period counters */ 895 rs->time_last_bitmap_sync = end_time; 896 rs->num_dirty_pages_period = 0; 897 rs->bytes_xfer_prev = bytes_xfer_now; 898 } 899 if (migrate_use_events()) { 900 qapi_event_send_migration_pass(ram_counters.dirty_sync_count, NULL); 901 } 902 } 903 904 /** 905 * save_zero_page: send the zero page to the stream 906 * 907 * Returns the number of pages written. 908 * 909 * @rs: current RAM state 910 * @block: block that contains the page we want to send 911 * @offset: offset inside the block for the page 912 */ 913 static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset) 914 { 915 uint8_t *p = block->host + offset; 916 int pages = -1; 917 918 if (is_zero_range(p, TARGET_PAGE_SIZE)) { 919 ram_counters.duplicate++; 920 ram_counters.transferred += 921 save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_ZERO); 922 qemu_put_byte(rs->f, 0); 923 ram_counters.transferred += 1; 924 pages = 1; 925 } 926 927 return pages; 928 } 929 930 static void ram_release_pages(const char *rbname, uint64_t offset, int pages) 931 { 932 if (!migrate_release_ram() || !migration_in_postcopy()) { 933 return; 934 } 935 936 ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS); 937 } 938 939 /** 940 * ram_save_page: send the given page to the stream 941 * 942 * Returns the number of pages written. 943 * < 0 - error 944 * >=0 - Number of pages written - this might legally be 0 945 * if xbzrle noticed the page was the same. 946 * 947 * @rs: current RAM state 948 * @block: block that contains the page we want to send 949 * @offset: offset inside the block for the page 950 * @last_stage: if we are at the completion stage 951 */ 952 static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) 953 { 954 int pages = -1; 955 uint64_t bytes_xmit; 956 ram_addr_t current_addr; 957 uint8_t *p; 958 int ret; 959 bool send_async = true; 960 RAMBlock *block = pss->block; 961 ram_addr_t offset = pss->page << TARGET_PAGE_BITS; 962 963 p = block->host + offset; 964 trace_ram_save_page(block->idstr, (uint64_t)offset, p); 965 966 /* In doubt sent page as normal */ 967 bytes_xmit = 0; 968 ret = ram_control_save_page(rs->f, block->offset, 969 offset, TARGET_PAGE_SIZE, &bytes_xmit); 970 if (bytes_xmit) { 971 ram_counters.transferred += bytes_xmit; 972 pages = 1; 973 } 974 975 XBZRLE_cache_lock(); 976 977 current_addr = block->offset + offset; 978 979 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { 980 if (ret != RAM_SAVE_CONTROL_DELAYED) { 981 if (bytes_xmit > 0) { 982 ram_counters.normal++; 983 } else if (bytes_xmit == 0) { 984 ram_counters.duplicate++; 985 } 986 } 987 } else { 988 pages = save_zero_page(rs, block, offset); 989 if (pages > 0) { 990 /* Must let xbzrle know, otherwise a previous (now 0'd) cached 991 * page would be stale 992 */ 993 xbzrle_cache_zero_page(rs, current_addr); 994 ram_release_pages(block->idstr, offset, pages); 995 } else if (!rs->ram_bulk_stage && 996 !migration_in_postcopy() && migrate_use_xbzrle()) { 997 pages = save_xbzrle_page(rs, &p, current_addr, block, 998 offset, last_stage); 999 if (!last_stage) { 1000 /* Can't send this cached data async, since the cache page 1001 * might get updated before it gets to the wire 1002 */ 1003 send_async = false; 1004 } 1005 } 1006 } 1007 1008 /* XBZRLE overflow or normal page */ 1009 if (pages == -1) { 1010 ram_counters.transferred += 1011 save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_PAGE); 1012 if (send_async) { 1013 qemu_put_buffer_async(rs->f, p, TARGET_PAGE_SIZE, 1014 migrate_release_ram() & 1015 migration_in_postcopy()); 1016 } else { 1017 qemu_put_buffer(rs->f, p, TARGET_PAGE_SIZE); 1018 } 1019 ram_counters.transferred += TARGET_PAGE_SIZE; 1020 pages = 1; 1021 ram_counters.normal++; 1022 } 1023 1024 XBZRLE_cache_unlock(); 1025 1026 return pages; 1027 } 1028 1029 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block, 1030 ram_addr_t offset) 1031 { 1032 RAMState *rs = ram_state; 1033 int bytes_sent, blen; 1034 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK); 1035 1036 bytes_sent = save_page_header(rs, f, block, offset | 1037 RAM_SAVE_FLAG_COMPRESS_PAGE); 1038 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE, 1039 migrate_compress_level()); 1040 if (blen < 0) { 1041 bytes_sent = 0; 1042 qemu_file_set_error(migrate_get_current()->to_dst_file, blen); 1043 error_report("compressed data failed!"); 1044 } else { 1045 bytes_sent += blen; 1046 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1); 1047 } 1048 1049 return bytes_sent; 1050 } 1051 1052 static void flush_compressed_data(RAMState *rs) 1053 { 1054 int idx, len, thread_count; 1055 1056 if (!migrate_use_compression()) { 1057 return; 1058 } 1059 thread_count = migrate_compress_threads(); 1060 1061 qemu_mutex_lock(&comp_done_lock); 1062 for (idx = 0; idx < thread_count; idx++) { 1063 while (!comp_param[idx].done) { 1064 qemu_cond_wait(&comp_done_cond, &comp_done_lock); 1065 } 1066 } 1067 qemu_mutex_unlock(&comp_done_lock); 1068 1069 for (idx = 0; idx < thread_count; idx++) { 1070 qemu_mutex_lock(&comp_param[idx].mutex); 1071 if (!comp_param[idx].quit) { 1072 len = qemu_put_qemu_file(rs->f, comp_param[idx].file); 1073 ram_counters.transferred += len; 1074 } 1075 qemu_mutex_unlock(&comp_param[idx].mutex); 1076 } 1077 } 1078 1079 static inline void set_compress_params(CompressParam *param, RAMBlock *block, 1080 ram_addr_t offset) 1081 { 1082 param->block = block; 1083 param->offset = offset; 1084 } 1085 1086 static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block, 1087 ram_addr_t offset) 1088 { 1089 int idx, thread_count, bytes_xmit = -1, pages = -1; 1090 1091 thread_count = migrate_compress_threads(); 1092 qemu_mutex_lock(&comp_done_lock); 1093 while (true) { 1094 for (idx = 0; idx < thread_count; idx++) { 1095 if (comp_param[idx].done) { 1096 comp_param[idx].done = false; 1097 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file); 1098 qemu_mutex_lock(&comp_param[idx].mutex); 1099 set_compress_params(&comp_param[idx], block, offset); 1100 qemu_cond_signal(&comp_param[idx].cond); 1101 qemu_mutex_unlock(&comp_param[idx].mutex); 1102 pages = 1; 1103 ram_counters.normal++; 1104 ram_counters.transferred += bytes_xmit; 1105 break; 1106 } 1107 } 1108 if (pages > 0) { 1109 break; 1110 } else { 1111 qemu_cond_wait(&comp_done_cond, &comp_done_lock); 1112 } 1113 } 1114 qemu_mutex_unlock(&comp_done_lock); 1115 1116 return pages; 1117 } 1118 1119 /** 1120 * ram_save_compressed_page: compress the given page and send it to the stream 1121 * 1122 * Returns the number of pages written. 1123 * 1124 * @rs: current RAM state 1125 * @block: block that contains the page we want to send 1126 * @offset: offset inside the block for the page 1127 * @last_stage: if we are at the completion stage 1128 */ 1129 static int ram_save_compressed_page(RAMState *rs, PageSearchStatus *pss, 1130 bool last_stage) 1131 { 1132 int pages = -1; 1133 uint64_t bytes_xmit = 0; 1134 uint8_t *p; 1135 int ret, blen; 1136 RAMBlock *block = pss->block; 1137 ram_addr_t offset = pss->page << TARGET_PAGE_BITS; 1138 1139 p = block->host + offset; 1140 1141 ret = ram_control_save_page(rs->f, block->offset, 1142 offset, TARGET_PAGE_SIZE, &bytes_xmit); 1143 if (bytes_xmit) { 1144 ram_counters.transferred += bytes_xmit; 1145 pages = 1; 1146 } 1147 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { 1148 if (ret != RAM_SAVE_CONTROL_DELAYED) { 1149 if (bytes_xmit > 0) { 1150 ram_counters.normal++; 1151 } else if (bytes_xmit == 0) { 1152 ram_counters.duplicate++; 1153 } 1154 } 1155 } else { 1156 /* When starting the process of a new block, the first page of 1157 * the block should be sent out before other pages in the same 1158 * block, and all the pages in last block should have been sent 1159 * out, keeping this order is important, because the 'cont' flag 1160 * is used to avoid resending the block name. 1161 */ 1162 if (block != rs->last_sent_block) { 1163 flush_compressed_data(rs); 1164 pages = save_zero_page(rs, block, offset); 1165 if (pages == -1) { 1166 /* Make sure the first page is sent out before other pages */ 1167 bytes_xmit = save_page_header(rs, rs->f, block, offset | 1168 RAM_SAVE_FLAG_COMPRESS_PAGE); 1169 blen = qemu_put_compression_data(rs->f, p, TARGET_PAGE_SIZE, 1170 migrate_compress_level()); 1171 if (blen > 0) { 1172 ram_counters.transferred += bytes_xmit + blen; 1173 ram_counters.normal++; 1174 pages = 1; 1175 } else { 1176 qemu_file_set_error(rs->f, blen); 1177 error_report("compressed data failed!"); 1178 } 1179 } 1180 if (pages > 0) { 1181 ram_release_pages(block->idstr, offset, pages); 1182 } 1183 } else { 1184 pages = save_zero_page(rs, block, offset); 1185 if (pages == -1) { 1186 pages = compress_page_with_multi_thread(rs, block, offset); 1187 } else { 1188 ram_release_pages(block->idstr, offset, pages); 1189 } 1190 } 1191 } 1192 1193 return pages; 1194 } 1195 1196 /** 1197 * find_dirty_block: find the next dirty page and update any state 1198 * associated with the search process. 1199 * 1200 * Returns if a page is found 1201 * 1202 * @rs: current RAM state 1203 * @pss: data about the state of the current dirty page scan 1204 * @again: set to false if the search has scanned the whole of RAM 1205 */ 1206 static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again) 1207 { 1208 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page); 1209 if (pss->complete_round && pss->block == rs->last_seen_block && 1210 pss->page >= rs->last_page) { 1211 /* 1212 * We've been once around the RAM and haven't found anything. 1213 * Give up. 1214 */ 1215 *again = false; 1216 return false; 1217 } 1218 if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) { 1219 /* Didn't find anything in this RAM Block */ 1220 pss->page = 0; 1221 pss->block = QLIST_NEXT_RCU(pss->block, next); 1222 if (!pss->block) { 1223 /* Hit the end of the list */ 1224 pss->block = QLIST_FIRST_RCU(&ram_list.blocks); 1225 /* Flag that we've looped */ 1226 pss->complete_round = true; 1227 rs->ram_bulk_stage = false; 1228 if (migrate_use_xbzrle()) { 1229 /* If xbzrle is on, stop using the data compression at this 1230 * point. In theory, xbzrle can do better than compression. 1231 */ 1232 flush_compressed_data(rs); 1233 } 1234 } 1235 /* Didn't find anything this time, but try again on the new block */ 1236 *again = true; 1237 return false; 1238 } else { 1239 /* Can go around again, but... */ 1240 *again = true; 1241 /* We've found something so probably don't need to */ 1242 return true; 1243 } 1244 } 1245 1246 /** 1247 * unqueue_page: gets a page of the queue 1248 * 1249 * Helper for 'get_queued_page' - gets a page off the queue 1250 * 1251 * Returns the block of the page (or NULL if none available) 1252 * 1253 * @rs: current RAM state 1254 * @offset: used to return the offset within the RAMBlock 1255 */ 1256 static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset) 1257 { 1258 RAMBlock *block = NULL; 1259 1260 qemu_mutex_lock(&rs->src_page_req_mutex); 1261 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) { 1262 struct RAMSrcPageRequest *entry = 1263 QSIMPLEQ_FIRST(&rs->src_page_requests); 1264 block = entry->rb; 1265 *offset = entry->offset; 1266 1267 if (entry->len > TARGET_PAGE_SIZE) { 1268 entry->len -= TARGET_PAGE_SIZE; 1269 entry->offset += TARGET_PAGE_SIZE; 1270 } else { 1271 memory_region_unref(block->mr); 1272 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req); 1273 g_free(entry); 1274 } 1275 } 1276 qemu_mutex_unlock(&rs->src_page_req_mutex); 1277 1278 return block; 1279 } 1280 1281 /** 1282 * get_queued_page: unqueue a page from the postocpy requests 1283 * 1284 * Skips pages that are already sent (!dirty) 1285 * 1286 * Returns if a queued page is found 1287 * 1288 * @rs: current RAM state 1289 * @pss: data about the state of the current dirty page scan 1290 */ 1291 static bool get_queued_page(RAMState *rs, PageSearchStatus *pss) 1292 { 1293 RAMBlock *block; 1294 ram_addr_t offset; 1295 bool dirty; 1296 1297 do { 1298 block = unqueue_page(rs, &offset); 1299 /* 1300 * We're sending this page, and since it's postcopy nothing else 1301 * will dirty it, and we must make sure it doesn't get sent again 1302 * even if this queue request was received after the background 1303 * search already sent it. 1304 */ 1305 if (block) { 1306 unsigned long page; 1307 1308 page = offset >> TARGET_PAGE_BITS; 1309 dirty = test_bit(page, block->bmap); 1310 if (!dirty) { 1311 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset, 1312 page, test_bit(page, block->unsentmap)); 1313 } else { 1314 trace_get_queued_page(block->idstr, (uint64_t)offset, page); 1315 } 1316 } 1317 1318 } while (block && !dirty); 1319 1320 if (block) { 1321 /* 1322 * As soon as we start servicing pages out of order, then we have 1323 * to kill the bulk stage, since the bulk stage assumes 1324 * in (migration_bitmap_find_and_reset_dirty) that every page is 1325 * dirty, that's no longer true. 1326 */ 1327 rs->ram_bulk_stage = false; 1328 1329 /* 1330 * We want the background search to continue from the queued page 1331 * since the guest is likely to want other pages near to the page 1332 * it just requested. 1333 */ 1334 pss->block = block; 1335 pss->page = offset >> TARGET_PAGE_BITS; 1336 } 1337 1338 return !!block; 1339 } 1340 1341 /** 1342 * migration_page_queue_free: drop any remaining pages in the ram 1343 * request queue 1344 * 1345 * It should be empty at the end anyway, but in error cases there may 1346 * be some left. in case that there is any page left, we drop it. 1347 * 1348 */ 1349 static void migration_page_queue_free(RAMState *rs) 1350 { 1351 struct RAMSrcPageRequest *mspr, *next_mspr; 1352 /* This queue generally should be empty - but in the case of a failed 1353 * migration might have some droppings in. 1354 */ 1355 rcu_read_lock(); 1356 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) { 1357 memory_region_unref(mspr->rb->mr); 1358 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req); 1359 g_free(mspr); 1360 } 1361 rcu_read_unlock(); 1362 } 1363 1364 /** 1365 * ram_save_queue_pages: queue the page for transmission 1366 * 1367 * A request from postcopy destination for example. 1368 * 1369 * Returns zero on success or negative on error 1370 * 1371 * @rbname: Name of the RAMBLock of the request. NULL means the 1372 * same that last one. 1373 * @start: starting address from the start of the RAMBlock 1374 * @len: length (in bytes) to send 1375 */ 1376 int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) 1377 { 1378 RAMBlock *ramblock; 1379 RAMState *rs = ram_state; 1380 1381 ram_counters.postcopy_requests++; 1382 rcu_read_lock(); 1383 if (!rbname) { 1384 /* Reuse last RAMBlock */ 1385 ramblock = rs->last_req_rb; 1386 1387 if (!ramblock) { 1388 /* 1389 * Shouldn't happen, we can't reuse the last RAMBlock if 1390 * it's the 1st request. 1391 */ 1392 error_report("ram_save_queue_pages no previous block"); 1393 goto err; 1394 } 1395 } else { 1396 ramblock = qemu_ram_block_by_name(rbname); 1397 1398 if (!ramblock) { 1399 /* We shouldn't be asked for a non-existent RAMBlock */ 1400 error_report("ram_save_queue_pages no block '%s'", rbname); 1401 goto err; 1402 } 1403 rs->last_req_rb = ramblock; 1404 } 1405 trace_ram_save_queue_pages(ramblock->idstr, start, len); 1406 if (start+len > ramblock->used_length) { 1407 error_report("%s request overrun start=" RAM_ADDR_FMT " len=" 1408 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT, 1409 __func__, start, len, ramblock->used_length); 1410 goto err; 1411 } 1412 1413 struct RAMSrcPageRequest *new_entry = 1414 g_malloc0(sizeof(struct RAMSrcPageRequest)); 1415 new_entry->rb = ramblock; 1416 new_entry->offset = start; 1417 new_entry->len = len; 1418 1419 memory_region_ref(ramblock->mr); 1420 qemu_mutex_lock(&rs->src_page_req_mutex); 1421 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req); 1422 qemu_mutex_unlock(&rs->src_page_req_mutex); 1423 rcu_read_unlock(); 1424 1425 return 0; 1426 1427 err: 1428 rcu_read_unlock(); 1429 return -1; 1430 } 1431 1432 /** 1433 * ram_save_target_page: save one target page 1434 * 1435 * Returns the number of pages written 1436 * 1437 * @rs: current RAM state 1438 * @ms: current migration state 1439 * @pss: data about the page we want to send 1440 * @last_stage: if we are at the completion stage 1441 */ 1442 static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss, 1443 bool last_stage) 1444 { 1445 int res = 0; 1446 1447 /* Check the pages is dirty and if it is send it */ 1448 if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) { 1449 /* 1450 * If xbzrle is on, stop using the data compression after first 1451 * round of migration even if compression is enabled. In theory, 1452 * xbzrle can do better than compression. 1453 */ 1454 if (migrate_use_compression() && 1455 (rs->ram_bulk_stage || !migrate_use_xbzrle())) { 1456 res = ram_save_compressed_page(rs, pss, last_stage); 1457 } else { 1458 res = ram_save_page(rs, pss, last_stage); 1459 } 1460 1461 if (res < 0) { 1462 return res; 1463 } 1464 if (pss->block->unsentmap) { 1465 clear_bit(pss->page, pss->block->unsentmap); 1466 } 1467 } 1468 1469 return res; 1470 } 1471 1472 /** 1473 * ram_save_host_page: save a whole host page 1474 * 1475 * Starting at *offset send pages up to the end of the current host 1476 * page. It's valid for the initial offset to point into the middle of 1477 * a host page in which case the remainder of the hostpage is sent. 1478 * Only dirty target pages are sent. Note that the host page size may 1479 * be a huge page for this block. 1480 * The saving stops at the boundary of the used_length of the block 1481 * if the RAMBlock isn't a multiple of the host page size. 1482 * 1483 * Returns the number of pages written or negative on error 1484 * 1485 * @rs: current RAM state 1486 * @ms: current migration state 1487 * @pss: data about the page we want to send 1488 * @last_stage: if we are at the completion stage 1489 */ 1490 static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, 1491 bool last_stage) 1492 { 1493 int tmppages, pages = 0; 1494 size_t pagesize_bits = 1495 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS; 1496 1497 do { 1498 tmppages = ram_save_target_page(rs, pss, last_stage); 1499 if (tmppages < 0) { 1500 return tmppages; 1501 } 1502 1503 pages += tmppages; 1504 pss->page++; 1505 } while ((pss->page & (pagesize_bits - 1)) && 1506 offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS)); 1507 1508 /* The offset we leave with is the last one we looked at */ 1509 pss->page--; 1510 return pages; 1511 } 1512 1513 /** 1514 * ram_find_and_save_block: finds a dirty page and sends it to f 1515 * 1516 * Called within an RCU critical section. 1517 * 1518 * Returns the number of pages written where zero means no dirty pages 1519 * 1520 * @rs: current RAM state 1521 * @last_stage: if we are at the completion stage 1522 * 1523 * On systems where host-page-size > target-page-size it will send all the 1524 * pages in a host page that are dirty. 1525 */ 1526 1527 static int ram_find_and_save_block(RAMState *rs, bool last_stage) 1528 { 1529 PageSearchStatus pss; 1530 int pages = 0; 1531 bool again, found; 1532 1533 /* No dirty page as there is zero RAM */ 1534 if (!ram_bytes_total()) { 1535 return pages; 1536 } 1537 1538 pss.block = rs->last_seen_block; 1539 pss.page = rs->last_page; 1540 pss.complete_round = false; 1541 1542 if (!pss.block) { 1543 pss.block = QLIST_FIRST_RCU(&ram_list.blocks); 1544 } 1545 1546 do { 1547 again = true; 1548 found = get_queued_page(rs, &pss); 1549 1550 if (!found) { 1551 /* priority queue empty, so just search for something dirty */ 1552 found = find_dirty_block(rs, &pss, &again); 1553 } 1554 1555 if (found) { 1556 pages = ram_save_host_page(rs, &pss, last_stage); 1557 } 1558 } while (!pages && again); 1559 1560 rs->last_seen_block = pss.block; 1561 rs->last_page = pss.page; 1562 1563 return pages; 1564 } 1565 1566 void acct_update_position(QEMUFile *f, size_t size, bool zero) 1567 { 1568 uint64_t pages = size / TARGET_PAGE_SIZE; 1569 1570 if (zero) { 1571 ram_counters.duplicate += pages; 1572 } else { 1573 ram_counters.normal += pages; 1574 ram_counters.transferred += size; 1575 qemu_update_position(f, size); 1576 } 1577 } 1578 1579 uint64_t ram_bytes_total(void) 1580 { 1581 RAMBlock *block; 1582 uint64_t total = 0; 1583 1584 rcu_read_lock(); 1585 RAMBLOCK_FOREACH(block) { 1586 total += block->used_length; 1587 } 1588 rcu_read_unlock(); 1589 return total; 1590 } 1591 1592 static void xbzrle_load_setup(void) 1593 { 1594 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE); 1595 } 1596 1597 static void xbzrle_load_cleanup(void) 1598 { 1599 g_free(XBZRLE.decoded_buf); 1600 XBZRLE.decoded_buf = NULL; 1601 } 1602 1603 static void ram_state_cleanup(RAMState **rsp) 1604 { 1605 if (*rsp) { 1606 migration_page_queue_free(*rsp); 1607 qemu_mutex_destroy(&(*rsp)->bitmap_mutex); 1608 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex); 1609 g_free(*rsp); 1610 *rsp = NULL; 1611 } 1612 } 1613 1614 static void xbzrle_cleanup(void) 1615 { 1616 XBZRLE_cache_lock(); 1617 if (XBZRLE.cache) { 1618 cache_fini(XBZRLE.cache); 1619 g_free(XBZRLE.encoded_buf); 1620 g_free(XBZRLE.current_buf); 1621 g_free(XBZRLE.zero_target_page); 1622 XBZRLE.cache = NULL; 1623 XBZRLE.encoded_buf = NULL; 1624 XBZRLE.current_buf = NULL; 1625 XBZRLE.zero_target_page = NULL; 1626 } 1627 XBZRLE_cache_unlock(); 1628 } 1629 1630 static void ram_save_cleanup(void *opaque) 1631 { 1632 RAMState **rsp = opaque; 1633 RAMBlock *block; 1634 1635 /* caller have hold iothread lock or is in a bh, so there is 1636 * no writing race against this migration_bitmap 1637 */ 1638 memory_global_dirty_log_stop(); 1639 1640 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { 1641 g_free(block->bmap); 1642 block->bmap = NULL; 1643 g_free(block->unsentmap); 1644 block->unsentmap = NULL; 1645 } 1646 1647 xbzrle_cleanup(); 1648 compress_threads_save_cleanup(); 1649 ram_state_cleanup(rsp); 1650 } 1651 1652 static void ram_state_reset(RAMState *rs) 1653 { 1654 rs->last_seen_block = NULL; 1655 rs->last_sent_block = NULL; 1656 rs->last_page = 0; 1657 rs->last_version = ram_list.version; 1658 rs->ram_bulk_stage = true; 1659 } 1660 1661 #define MAX_WAIT 50 /* ms, half buffered_file limit */ 1662 1663 /* 1664 * 'expected' is the value you expect the bitmap mostly to be full 1665 * of; it won't bother printing lines that are all this value. 1666 * If 'todump' is null the migration bitmap is dumped. 1667 */ 1668 void ram_debug_dump_bitmap(unsigned long *todump, bool expected, 1669 unsigned long pages) 1670 { 1671 int64_t cur; 1672 int64_t linelen = 128; 1673 char linebuf[129]; 1674 1675 for (cur = 0; cur < pages; cur += linelen) { 1676 int64_t curb; 1677 bool found = false; 1678 /* 1679 * Last line; catch the case where the line length 1680 * is longer than remaining ram 1681 */ 1682 if (cur + linelen > pages) { 1683 linelen = pages - cur; 1684 } 1685 for (curb = 0; curb < linelen; curb++) { 1686 bool thisbit = test_bit(cur + curb, todump); 1687 linebuf[curb] = thisbit ? '1' : '.'; 1688 found = found || (thisbit != expected); 1689 } 1690 if (found) { 1691 linebuf[curb] = '\0'; 1692 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf); 1693 } 1694 } 1695 } 1696 1697 /* **** functions for postcopy ***** */ 1698 1699 void ram_postcopy_migrated_memory_release(MigrationState *ms) 1700 { 1701 struct RAMBlock *block; 1702 1703 RAMBLOCK_FOREACH(block) { 1704 unsigned long *bitmap = block->bmap; 1705 unsigned long range = block->used_length >> TARGET_PAGE_BITS; 1706 unsigned long run_start = find_next_zero_bit(bitmap, range, 0); 1707 1708 while (run_start < range) { 1709 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1); 1710 ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS, 1711 (run_end - run_start) << TARGET_PAGE_BITS); 1712 run_start = find_next_zero_bit(bitmap, range, run_end + 1); 1713 } 1714 } 1715 } 1716 1717 /** 1718 * postcopy_send_discard_bm_ram: discard a RAMBlock 1719 * 1720 * Returns zero on success 1721 * 1722 * Callback from postcopy_each_ram_send_discard for each RAMBlock 1723 * Note: At this point the 'unsentmap' is the processed bitmap combined 1724 * with the dirtymap; so a '1' means it's either dirty or unsent. 1725 * 1726 * @ms: current migration state 1727 * @pds: state for postcopy 1728 * @start: RAMBlock starting page 1729 * @length: RAMBlock size 1730 */ 1731 static int postcopy_send_discard_bm_ram(MigrationState *ms, 1732 PostcopyDiscardState *pds, 1733 RAMBlock *block) 1734 { 1735 unsigned long end = block->used_length >> TARGET_PAGE_BITS; 1736 unsigned long current; 1737 unsigned long *unsentmap = block->unsentmap; 1738 1739 for (current = 0; current < end; ) { 1740 unsigned long one = find_next_bit(unsentmap, end, current); 1741 1742 if (one <= end) { 1743 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1); 1744 unsigned long discard_length; 1745 1746 if (zero >= end) { 1747 discard_length = end - one; 1748 } else { 1749 discard_length = zero - one; 1750 } 1751 if (discard_length) { 1752 postcopy_discard_send_range(ms, pds, one, discard_length); 1753 } 1754 current = one + discard_length; 1755 } else { 1756 current = one; 1757 } 1758 } 1759 1760 return 0; 1761 } 1762 1763 /** 1764 * postcopy_each_ram_send_discard: discard all RAMBlocks 1765 * 1766 * Returns 0 for success or negative for error 1767 * 1768 * Utility for the outgoing postcopy code. 1769 * Calls postcopy_send_discard_bm_ram for each RAMBlock 1770 * passing it bitmap indexes and name. 1771 * (qemu_ram_foreach_block ends up passing unscaled lengths 1772 * which would mean postcopy code would have to deal with target page) 1773 * 1774 * @ms: current migration state 1775 */ 1776 static int postcopy_each_ram_send_discard(MigrationState *ms) 1777 { 1778 struct RAMBlock *block; 1779 int ret; 1780 1781 RAMBLOCK_FOREACH(block) { 1782 PostcopyDiscardState *pds = 1783 postcopy_discard_send_init(ms, block->idstr); 1784 1785 /* 1786 * Postcopy sends chunks of bitmap over the wire, but it 1787 * just needs indexes at this point, avoids it having 1788 * target page specific code. 1789 */ 1790 ret = postcopy_send_discard_bm_ram(ms, pds, block); 1791 postcopy_discard_send_finish(ms, pds); 1792 if (ret) { 1793 return ret; 1794 } 1795 } 1796 1797 return 0; 1798 } 1799 1800 /** 1801 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages 1802 * 1803 * Helper for postcopy_chunk_hostpages; it's called twice to 1804 * canonicalize the two bitmaps, that are similar, but one is 1805 * inverted. 1806 * 1807 * Postcopy requires that all target pages in a hostpage are dirty or 1808 * clean, not a mix. This function canonicalizes the bitmaps. 1809 * 1810 * @ms: current migration state 1811 * @unsent_pass: if true we need to canonicalize partially unsent host pages 1812 * otherwise we need to canonicalize partially dirty host pages 1813 * @block: block that contains the page we want to canonicalize 1814 * @pds: state for postcopy 1815 */ 1816 static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, 1817 RAMBlock *block, 1818 PostcopyDiscardState *pds) 1819 { 1820 RAMState *rs = ram_state; 1821 unsigned long *bitmap = block->bmap; 1822 unsigned long *unsentmap = block->unsentmap; 1823 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE; 1824 unsigned long pages = block->used_length >> TARGET_PAGE_BITS; 1825 unsigned long run_start; 1826 1827 if (block->page_size == TARGET_PAGE_SIZE) { 1828 /* Easy case - TPS==HPS for a non-huge page RAMBlock */ 1829 return; 1830 } 1831 1832 if (unsent_pass) { 1833 /* Find a sent page */ 1834 run_start = find_next_zero_bit(unsentmap, pages, 0); 1835 } else { 1836 /* Find a dirty page */ 1837 run_start = find_next_bit(bitmap, pages, 0); 1838 } 1839 1840 while (run_start < pages) { 1841 bool do_fixup = false; 1842 unsigned long fixup_start_addr; 1843 unsigned long host_offset; 1844 1845 /* 1846 * If the start of this run of pages is in the middle of a host 1847 * page, then we need to fixup this host page. 1848 */ 1849 host_offset = run_start % host_ratio; 1850 if (host_offset) { 1851 do_fixup = true; 1852 run_start -= host_offset; 1853 fixup_start_addr = run_start; 1854 /* For the next pass */ 1855 run_start = run_start + host_ratio; 1856 } else { 1857 /* Find the end of this run */ 1858 unsigned long run_end; 1859 if (unsent_pass) { 1860 run_end = find_next_bit(unsentmap, pages, run_start + 1); 1861 } else { 1862 run_end = find_next_zero_bit(bitmap, pages, run_start + 1); 1863 } 1864 /* 1865 * If the end isn't at the start of a host page, then the 1866 * run doesn't finish at the end of a host page 1867 * and we need to discard. 1868 */ 1869 host_offset = run_end % host_ratio; 1870 if (host_offset) { 1871 do_fixup = true; 1872 fixup_start_addr = run_end - host_offset; 1873 /* 1874 * This host page has gone, the next loop iteration starts 1875 * from after the fixup 1876 */ 1877 run_start = fixup_start_addr + host_ratio; 1878 } else { 1879 /* 1880 * No discards on this iteration, next loop starts from 1881 * next sent/dirty page 1882 */ 1883 run_start = run_end + 1; 1884 } 1885 } 1886 1887 if (do_fixup) { 1888 unsigned long page; 1889 1890 /* Tell the destination to discard this page */ 1891 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) { 1892 /* For the unsent_pass we: 1893 * discard partially sent pages 1894 * For the !unsent_pass (dirty) we: 1895 * discard partially dirty pages that were sent 1896 * (any partially sent pages were already discarded 1897 * by the previous unsent_pass) 1898 */ 1899 postcopy_discard_send_range(ms, pds, fixup_start_addr, 1900 host_ratio); 1901 } 1902 1903 /* Clean up the bitmap */ 1904 for (page = fixup_start_addr; 1905 page < fixup_start_addr + host_ratio; page++) { 1906 /* All pages in this host page are now not sent */ 1907 set_bit(page, unsentmap); 1908 1909 /* 1910 * Remark them as dirty, updating the count for any pages 1911 * that weren't previously dirty. 1912 */ 1913 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap); 1914 } 1915 } 1916 1917 if (unsent_pass) { 1918 /* Find the next sent page for the next iteration */ 1919 run_start = find_next_zero_bit(unsentmap, pages, run_start); 1920 } else { 1921 /* Find the next dirty page for the next iteration */ 1922 run_start = find_next_bit(bitmap, pages, run_start); 1923 } 1924 } 1925 } 1926 1927 /** 1928 * postcopy_chuck_hostpages: discrad any partially sent host page 1929 * 1930 * Utility for the outgoing postcopy code. 1931 * 1932 * Discard any partially sent host-page size chunks, mark any partially 1933 * dirty host-page size chunks as all dirty. In this case the host-page 1934 * is the host-page for the particular RAMBlock, i.e. it might be a huge page 1935 * 1936 * Returns zero on success 1937 * 1938 * @ms: current migration state 1939 * @block: block we want to work with 1940 */ 1941 static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block) 1942 { 1943 PostcopyDiscardState *pds = 1944 postcopy_discard_send_init(ms, block->idstr); 1945 1946 /* First pass: Discard all partially sent host pages */ 1947 postcopy_chunk_hostpages_pass(ms, true, block, pds); 1948 /* 1949 * Second pass: Ensure that all partially dirty host pages are made 1950 * fully dirty. 1951 */ 1952 postcopy_chunk_hostpages_pass(ms, false, block, pds); 1953 1954 postcopy_discard_send_finish(ms, pds); 1955 return 0; 1956 } 1957 1958 /** 1959 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap 1960 * 1961 * Returns zero on success 1962 * 1963 * Transmit the set of pages to be discarded after precopy to the target 1964 * these are pages that: 1965 * a) Have been previously transmitted but are now dirty again 1966 * b) Pages that have never been transmitted, this ensures that 1967 * any pages on the destination that have been mapped by background 1968 * tasks get discarded (transparent huge pages is the specific concern) 1969 * Hopefully this is pretty sparse 1970 * 1971 * @ms: current migration state 1972 */ 1973 int ram_postcopy_send_discard_bitmap(MigrationState *ms) 1974 { 1975 RAMState *rs = ram_state; 1976 RAMBlock *block; 1977 int ret; 1978 1979 rcu_read_lock(); 1980 1981 /* This should be our last sync, the src is now paused */ 1982 migration_bitmap_sync(rs); 1983 1984 /* Easiest way to make sure we don't resume in the middle of a host-page */ 1985 rs->last_seen_block = NULL; 1986 rs->last_sent_block = NULL; 1987 rs->last_page = 0; 1988 1989 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { 1990 unsigned long pages = block->used_length >> TARGET_PAGE_BITS; 1991 unsigned long *bitmap = block->bmap; 1992 unsigned long *unsentmap = block->unsentmap; 1993 1994 if (!unsentmap) { 1995 /* We don't have a safe way to resize the sentmap, so 1996 * if the bitmap was resized it will be NULL at this 1997 * point. 1998 */ 1999 error_report("migration ram resized during precopy phase"); 2000 rcu_read_unlock(); 2001 return -EINVAL; 2002 } 2003 /* Deal with TPS != HPS and huge pages */ 2004 ret = postcopy_chunk_hostpages(ms, block); 2005 if (ret) { 2006 rcu_read_unlock(); 2007 return ret; 2008 } 2009 2010 /* 2011 * Update the unsentmap to be unsentmap = unsentmap | dirty 2012 */ 2013 bitmap_or(unsentmap, unsentmap, bitmap, pages); 2014 #ifdef DEBUG_POSTCOPY 2015 ram_debug_dump_bitmap(unsentmap, true, pages); 2016 #endif 2017 } 2018 trace_ram_postcopy_send_discard_bitmap(); 2019 2020 ret = postcopy_each_ram_send_discard(ms); 2021 rcu_read_unlock(); 2022 2023 return ret; 2024 } 2025 2026 /** 2027 * ram_discard_range: discard dirtied pages at the beginning of postcopy 2028 * 2029 * Returns zero on success 2030 * 2031 * @rbname: name of the RAMBlock of the request. NULL means the 2032 * same that last one. 2033 * @start: RAMBlock starting page 2034 * @length: RAMBlock size 2035 */ 2036 int ram_discard_range(const char *rbname, uint64_t start, size_t length) 2037 { 2038 int ret = -1; 2039 2040 trace_ram_discard_range(rbname, start, length); 2041 2042 rcu_read_lock(); 2043 RAMBlock *rb = qemu_ram_block_by_name(rbname); 2044 2045 if (!rb) { 2046 error_report("ram_discard_range: Failed to find block '%s'", rbname); 2047 goto err; 2048 } 2049 2050 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(), 2051 length >> qemu_target_page_bits()); 2052 ret = ram_block_discard_range(rb, start, length); 2053 2054 err: 2055 rcu_read_unlock(); 2056 2057 return ret; 2058 } 2059 2060 /* 2061 * For every allocation, we will try not to crash the VM if the 2062 * allocation failed. 2063 */ 2064 static int xbzrle_init(void) 2065 { 2066 Error *local_err = NULL; 2067 2068 if (!migrate_use_xbzrle()) { 2069 return 0; 2070 } 2071 2072 XBZRLE_cache_lock(); 2073 2074 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE); 2075 if (!XBZRLE.zero_target_page) { 2076 error_report("%s: Error allocating zero page", __func__); 2077 goto err_out; 2078 } 2079 2080 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(), 2081 TARGET_PAGE_SIZE, &local_err); 2082 if (!XBZRLE.cache) { 2083 error_report_err(local_err); 2084 goto free_zero_page; 2085 } 2086 2087 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE); 2088 if (!XBZRLE.encoded_buf) { 2089 error_report("%s: Error allocating encoded_buf", __func__); 2090 goto free_cache; 2091 } 2092 2093 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE); 2094 if (!XBZRLE.current_buf) { 2095 error_report("%s: Error allocating current_buf", __func__); 2096 goto free_encoded_buf; 2097 } 2098 2099 /* We are all good */ 2100 XBZRLE_cache_unlock(); 2101 return 0; 2102 2103 free_encoded_buf: 2104 g_free(XBZRLE.encoded_buf); 2105 XBZRLE.encoded_buf = NULL; 2106 free_cache: 2107 cache_fini(XBZRLE.cache); 2108 XBZRLE.cache = NULL; 2109 free_zero_page: 2110 g_free(XBZRLE.zero_target_page); 2111 XBZRLE.zero_target_page = NULL; 2112 err_out: 2113 XBZRLE_cache_unlock(); 2114 return -ENOMEM; 2115 } 2116 2117 static int ram_state_init(RAMState **rsp) 2118 { 2119 *rsp = g_try_new0(RAMState, 1); 2120 2121 if (!*rsp) { 2122 error_report("%s: Init ramstate fail", __func__); 2123 return -1; 2124 } 2125 2126 qemu_mutex_init(&(*rsp)->bitmap_mutex); 2127 qemu_mutex_init(&(*rsp)->src_page_req_mutex); 2128 QSIMPLEQ_INIT(&(*rsp)->src_page_requests); 2129 2130 /* 2131 * Count the total number of pages used by ram blocks not including any 2132 * gaps due to alignment or unplugs. 2133 */ 2134 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS; 2135 2136 ram_state_reset(*rsp); 2137 2138 return 0; 2139 } 2140 2141 static void ram_list_init_bitmaps(void) 2142 { 2143 RAMBlock *block; 2144 unsigned long pages; 2145 2146 /* Skip setting bitmap if there is no RAM */ 2147 if (ram_bytes_total()) { 2148 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { 2149 pages = block->max_length >> TARGET_PAGE_BITS; 2150 block->bmap = bitmap_new(pages); 2151 bitmap_set(block->bmap, 0, pages); 2152 if (migrate_postcopy_ram()) { 2153 block->unsentmap = bitmap_new(pages); 2154 bitmap_set(block->unsentmap, 0, pages); 2155 } 2156 } 2157 } 2158 } 2159 2160 static void ram_init_bitmaps(RAMState *rs) 2161 { 2162 /* For memory_global_dirty_log_start below. */ 2163 qemu_mutex_lock_iothread(); 2164 qemu_mutex_lock_ramlist(); 2165 rcu_read_lock(); 2166 2167 ram_list_init_bitmaps(); 2168 memory_global_dirty_log_start(); 2169 migration_bitmap_sync(rs); 2170 2171 rcu_read_unlock(); 2172 qemu_mutex_unlock_ramlist(); 2173 qemu_mutex_unlock_iothread(); 2174 } 2175 2176 static int ram_init_all(RAMState **rsp) 2177 { 2178 if (ram_state_init(rsp)) { 2179 return -1; 2180 } 2181 2182 if (xbzrle_init()) { 2183 ram_state_cleanup(rsp); 2184 return -1; 2185 } 2186 2187 ram_init_bitmaps(*rsp); 2188 2189 return 0; 2190 } 2191 2192 /* 2193 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has 2194 * long-running RCU critical section. When rcu-reclaims in the code 2195 * start to become numerous it will be necessary to reduce the 2196 * granularity of these critical sections. 2197 */ 2198 2199 /** 2200 * ram_save_setup: Setup RAM for migration 2201 * 2202 * Returns zero to indicate success and negative for error 2203 * 2204 * @f: QEMUFile where to send the data 2205 * @opaque: RAMState pointer 2206 */ 2207 static int ram_save_setup(QEMUFile *f, void *opaque) 2208 { 2209 RAMState **rsp = opaque; 2210 RAMBlock *block; 2211 2212 /* migration has already setup the bitmap, reuse it. */ 2213 if (!migration_in_colo_state()) { 2214 if (ram_init_all(rsp) != 0) { 2215 return -1; 2216 } 2217 } 2218 (*rsp)->f = f; 2219 2220 rcu_read_lock(); 2221 2222 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); 2223 2224 RAMBLOCK_FOREACH(block) { 2225 qemu_put_byte(f, strlen(block->idstr)); 2226 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); 2227 qemu_put_be64(f, block->used_length); 2228 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) { 2229 qemu_put_be64(f, block->page_size); 2230 } 2231 } 2232 2233 rcu_read_unlock(); 2234 compress_threads_save_setup(); 2235 2236 ram_control_before_iterate(f, RAM_CONTROL_SETUP); 2237 ram_control_after_iterate(f, RAM_CONTROL_SETUP); 2238 2239 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 2240 2241 return 0; 2242 } 2243 2244 /** 2245 * ram_save_iterate: iterative stage for migration 2246 * 2247 * Returns zero to indicate success and negative for error 2248 * 2249 * @f: QEMUFile where to send the data 2250 * @opaque: RAMState pointer 2251 */ 2252 static int ram_save_iterate(QEMUFile *f, void *opaque) 2253 { 2254 RAMState **temp = opaque; 2255 RAMState *rs = *temp; 2256 int ret; 2257 int i; 2258 int64_t t0; 2259 int done = 0; 2260 2261 if (blk_mig_bulk_active()) { 2262 /* Avoid transferring ram during bulk phase of block migration as 2263 * the bulk phase will usually take a long time and transferring 2264 * ram updates during that time is pointless. */ 2265 goto out; 2266 } 2267 2268 rcu_read_lock(); 2269 if (ram_list.version != rs->last_version) { 2270 ram_state_reset(rs); 2271 } 2272 2273 /* Read version before ram_list.blocks */ 2274 smp_rmb(); 2275 2276 ram_control_before_iterate(f, RAM_CONTROL_ROUND); 2277 2278 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 2279 i = 0; 2280 while ((ret = qemu_file_rate_limit(f)) == 0) { 2281 int pages; 2282 2283 pages = ram_find_and_save_block(rs, false); 2284 /* no more pages to sent */ 2285 if (pages == 0) { 2286 done = 1; 2287 break; 2288 } 2289 rs->iterations++; 2290 2291 /* we want to check in the 1st loop, just in case it was the 1st time 2292 and we had to sync the dirty bitmap. 2293 qemu_get_clock_ns() is a bit expensive, so we only check each some 2294 iterations 2295 */ 2296 if ((i & 63) == 0) { 2297 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000; 2298 if (t1 > MAX_WAIT) { 2299 trace_ram_save_iterate_big_wait(t1, i); 2300 break; 2301 } 2302 } 2303 i++; 2304 } 2305 flush_compressed_data(rs); 2306 rcu_read_unlock(); 2307 2308 /* 2309 * Must occur before EOS (or any QEMUFile operation) 2310 * because of RDMA protocol. 2311 */ 2312 ram_control_after_iterate(f, RAM_CONTROL_ROUND); 2313 2314 out: 2315 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 2316 ram_counters.transferred += 8; 2317 2318 ret = qemu_file_get_error(f); 2319 if (ret < 0) { 2320 return ret; 2321 } 2322 2323 return done; 2324 } 2325 2326 /** 2327 * ram_save_complete: function called to send the remaining amount of ram 2328 * 2329 * Returns zero to indicate success 2330 * 2331 * Called with iothread lock 2332 * 2333 * @f: QEMUFile where to send the data 2334 * @opaque: RAMState pointer 2335 */ 2336 static int ram_save_complete(QEMUFile *f, void *opaque) 2337 { 2338 RAMState **temp = opaque; 2339 RAMState *rs = *temp; 2340 2341 rcu_read_lock(); 2342 2343 if (!migration_in_postcopy()) { 2344 migration_bitmap_sync(rs); 2345 } 2346 2347 ram_control_before_iterate(f, RAM_CONTROL_FINISH); 2348 2349 /* try transferring iterative blocks of memory */ 2350 2351 /* flush all remaining blocks regardless of rate limiting */ 2352 while (true) { 2353 int pages; 2354 2355 pages = ram_find_and_save_block(rs, !migration_in_colo_state()); 2356 /* no more blocks to sent */ 2357 if (pages == 0) { 2358 break; 2359 } 2360 } 2361 2362 flush_compressed_data(rs); 2363 ram_control_after_iterate(f, RAM_CONTROL_FINISH); 2364 2365 rcu_read_unlock(); 2366 2367 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 2368 2369 return 0; 2370 } 2371 2372 static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, 2373 uint64_t *res_precopy_only, 2374 uint64_t *res_compatible, 2375 uint64_t *res_postcopy_only) 2376 { 2377 RAMState **temp = opaque; 2378 RAMState *rs = *temp; 2379 uint64_t remaining_size; 2380 2381 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; 2382 2383 if (!migration_in_postcopy() && 2384 remaining_size < max_size) { 2385 qemu_mutex_lock_iothread(); 2386 rcu_read_lock(); 2387 migration_bitmap_sync(rs); 2388 rcu_read_unlock(); 2389 qemu_mutex_unlock_iothread(); 2390 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; 2391 } 2392 2393 if (migrate_postcopy_ram()) { 2394 /* We can do postcopy, and all the data is postcopiable */ 2395 *res_compatible += remaining_size; 2396 } else { 2397 *res_precopy_only += remaining_size; 2398 } 2399 } 2400 2401 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) 2402 { 2403 unsigned int xh_len; 2404 int xh_flags; 2405 uint8_t *loaded_data; 2406 2407 /* extract RLE header */ 2408 xh_flags = qemu_get_byte(f); 2409 xh_len = qemu_get_be16(f); 2410 2411 if (xh_flags != ENCODING_FLAG_XBZRLE) { 2412 error_report("Failed to load XBZRLE page - wrong compression!"); 2413 return -1; 2414 } 2415 2416 if (xh_len > TARGET_PAGE_SIZE) { 2417 error_report("Failed to load XBZRLE page - len overflow!"); 2418 return -1; 2419 } 2420 loaded_data = XBZRLE.decoded_buf; 2421 /* load data and decode */ 2422 /* it can change loaded_data to point to an internal buffer */ 2423 qemu_get_buffer_in_place(f, &loaded_data, xh_len); 2424 2425 /* decode RLE */ 2426 if (xbzrle_decode_buffer(loaded_data, xh_len, host, 2427 TARGET_PAGE_SIZE) == -1) { 2428 error_report("Failed to load XBZRLE page - decode error!"); 2429 return -1; 2430 } 2431 2432 return 0; 2433 } 2434 2435 /** 2436 * ram_block_from_stream: read a RAMBlock id from the migration stream 2437 * 2438 * Must be called from within a rcu critical section. 2439 * 2440 * Returns a pointer from within the RCU-protected ram_list. 2441 * 2442 * @f: QEMUFile where to read the data from 2443 * @flags: Page flags (mostly to see if it's a continuation of previous block) 2444 */ 2445 static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags) 2446 { 2447 static RAMBlock *block = NULL; 2448 char id[256]; 2449 uint8_t len; 2450 2451 if (flags & RAM_SAVE_FLAG_CONTINUE) { 2452 if (!block) { 2453 error_report("Ack, bad migration stream!"); 2454 return NULL; 2455 } 2456 return block; 2457 } 2458 2459 len = qemu_get_byte(f); 2460 qemu_get_buffer(f, (uint8_t *)id, len); 2461 id[len] = 0; 2462 2463 block = qemu_ram_block_by_name(id); 2464 if (!block) { 2465 error_report("Can't find block %s", id); 2466 return NULL; 2467 } 2468 2469 return block; 2470 } 2471 2472 static inline void *host_from_ram_block_offset(RAMBlock *block, 2473 ram_addr_t offset) 2474 { 2475 if (!offset_in_ramblock(block, offset)) { 2476 return NULL; 2477 } 2478 2479 return block->host + offset; 2480 } 2481 2482 /** 2483 * ram_handle_compressed: handle the zero page case 2484 * 2485 * If a page (or a whole RDMA chunk) has been 2486 * determined to be zero, then zap it. 2487 * 2488 * @host: host address for the zero page 2489 * @ch: what the page is filled from. We only support zero 2490 * @size: size of the zero page 2491 */ 2492 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) 2493 { 2494 if (ch != 0 || !is_zero_range(host, size)) { 2495 memset(host, ch, size); 2496 } 2497 } 2498 2499 static void *do_data_decompress(void *opaque) 2500 { 2501 DecompressParam *param = opaque; 2502 unsigned long pagesize; 2503 uint8_t *des; 2504 int len; 2505 2506 qemu_mutex_lock(¶m->mutex); 2507 while (!param->quit) { 2508 if (param->des) { 2509 des = param->des; 2510 len = param->len; 2511 param->des = 0; 2512 qemu_mutex_unlock(¶m->mutex); 2513 2514 pagesize = TARGET_PAGE_SIZE; 2515 /* uncompress() will return failed in some case, especially 2516 * when the page is dirted when doing the compression, it's 2517 * not a problem because the dirty page will be retransferred 2518 * and uncompress() won't break the data in other pages. 2519 */ 2520 uncompress((Bytef *)des, &pagesize, 2521 (const Bytef *)param->compbuf, len); 2522 2523 qemu_mutex_lock(&decomp_done_lock); 2524 param->done = true; 2525 qemu_cond_signal(&decomp_done_cond); 2526 qemu_mutex_unlock(&decomp_done_lock); 2527 2528 qemu_mutex_lock(¶m->mutex); 2529 } else { 2530 qemu_cond_wait(¶m->cond, ¶m->mutex); 2531 } 2532 } 2533 qemu_mutex_unlock(¶m->mutex); 2534 2535 return NULL; 2536 } 2537 2538 static void wait_for_decompress_done(void) 2539 { 2540 int idx, thread_count; 2541 2542 if (!migrate_use_compression()) { 2543 return; 2544 } 2545 2546 thread_count = migrate_decompress_threads(); 2547 qemu_mutex_lock(&decomp_done_lock); 2548 for (idx = 0; idx < thread_count; idx++) { 2549 while (!decomp_param[idx].done) { 2550 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock); 2551 } 2552 } 2553 qemu_mutex_unlock(&decomp_done_lock); 2554 } 2555 2556 static void compress_threads_load_setup(void) 2557 { 2558 int i, thread_count; 2559 2560 if (!migrate_use_compression()) { 2561 return; 2562 } 2563 thread_count = migrate_decompress_threads(); 2564 decompress_threads = g_new0(QemuThread, thread_count); 2565 decomp_param = g_new0(DecompressParam, thread_count); 2566 qemu_mutex_init(&decomp_done_lock); 2567 qemu_cond_init(&decomp_done_cond); 2568 for (i = 0; i < thread_count; i++) { 2569 qemu_mutex_init(&decomp_param[i].mutex); 2570 qemu_cond_init(&decomp_param[i].cond); 2571 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE)); 2572 decomp_param[i].done = true; 2573 decomp_param[i].quit = false; 2574 qemu_thread_create(decompress_threads + i, "decompress", 2575 do_data_decompress, decomp_param + i, 2576 QEMU_THREAD_JOINABLE); 2577 } 2578 } 2579 2580 static void compress_threads_load_cleanup(void) 2581 { 2582 int i, thread_count; 2583 2584 if (!migrate_use_compression()) { 2585 return; 2586 } 2587 thread_count = migrate_decompress_threads(); 2588 for (i = 0; i < thread_count; i++) { 2589 qemu_mutex_lock(&decomp_param[i].mutex); 2590 decomp_param[i].quit = true; 2591 qemu_cond_signal(&decomp_param[i].cond); 2592 qemu_mutex_unlock(&decomp_param[i].mutex); 2593 } 2594 for (i = 0; i < thread_count; i++) { 2595 qemu_thread_join(decompress_threads + i); 2596 qemu_mutex_destroy(&decomp_param[i].mutex); 2597 qemu_cond_destroy(&decomp_param[i].cond); 2598 g_free(decomp_param[i].compbuf); 2599 } 2600 g_free(decompress_threads); 2601 g_free(decomp_param); 2602 decompress_threads = NULL; 2603 decomp_param = NULL; 2604 } 2605 2606 static void decompress_data_with_multi_threads(QEMUFile *f, 2607 void *host, int len) 2608 { 2609 int idx, thread_count; 2610 2611 thread_count = migrate_decompress_threads(); 2612 qemu_mutex_lock(&decomp_done_lock); 2613 while (true) { 2614 for (idx = 0; idx < thread_count; idx++) { 2615 if (decomp_param[idx].done) { 2616 decomp_param[idx].done = false; 2617 qemu_mutex_lock(&decomp_param[idx].mutex); 2618 qemu_get_buffer(f, decomp_param[idx].compbuf, len); 2619 decomp_param[idx].des = host; 2620 decomp_param[idx].len = len; 2621 qemu_cond_signal(&decomp_param[idx].cond); 2622 qemu_mutex_unlock(&decomp_param[idx].mutex); 2623 break; 2624 } 2625 } 2626 if (idx < thread_count) { 2627 break; 2628 } else { 2629 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock); 2630 } 2631 } 2632 qemu_mutex_unlock(&decomp_done_lock); 2633 } 2634 2635 /** 2636 * ram_load_setup: Setup RAM for migration incoming side 2637 * 2638 * Returns zero to indicate success and negative for error 2639 * 2640 * @f: QEMUFile where to receive the data 2641 * @opaque: RAMState pointer 2642 */ 2643 static int ram_load_setup(QEMUFile *f, void *opaque) 2644 { 2645 xbzrle_load_setup(); 2646 compress_threads_load_setup(); 2647 ramblock_recv_map_init(); 2648 return 0; 2649 } 2650 2651 static int ram_load_cleanup(void *opaque) 2652 { 2653 RAMBlock *rb; 2654 xbzrle_load_cleanup(); 2655 compress_threads_load_cleanup(); 2656 2657 RAMBLOCK_FOREACH(rb) { 2658 g_free(rb->receivedmap); 2659 rb->receivedmap = NULL; 2660 } 2661 return 0; 2662 } 2663 2664 /** 2665 * ram_postcopy_incoming_init: allocate postcopy data structures 2666 * 2667 * Returns 0 for success and negative if there was one error 2668 * 2669 * @mis: current migration incoming state 2670 * 2671 * Allocate data structures etc needed by incoming migration with 2672 * postcopy-ram. postcopy-ram's similarly names 2673 * postcopy_ram_incoming_init does the work. 2674 */ 2675 int ram_postcopy_incoming_init(MigrationIncomingState *mis) 2676 { 2677 unsigned long ram_pages = last_ram_page(); 2678 2679 return postcopy_ram_incoming_init(mis, ram_pages); 2680 } 2681 2682 /** 2683 * ram_load_postcopy: load a page in postcopy case 2684 * 2685 * Returns 0 for success or -errno in case of error 2686 * 2687 * Called in postcopy mode by ram_load(). 2688 * rcu_read_lock is taken prior to this being called. 2689 * 2690 * @f: QEMUFile where to send the data 2691 */ 2692 static int ram_load_postcopy(QEMUFile *f) 2693 { 2694 int flags = 0, ret = 0; 2695 bool place_needed = false; 2696 bool matching_page_sizes = false; 2697 MigrationIncomingState *mis = migration_incoming_get_current(); 2698 /* Temporary page that is later 'placed' */ 2699 void *postcopy_host_page = postcopy_get_tmp_page(mis); 2700 void *last_host = NULL; 2701 bool all_zero = false; 2702 2703 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { 2704 ram_addr_t addr; 2705 void *host = NULL; 2706 void *page_buffer = NULL; 2707 void *place_source = NULL; 2708 RAMBlock *block = NULL; 2709 uint8_t ch; 2710 2711 addr = qemu_get_be64(f); 2712 2713 /* 2714 * If qemu file error, we should stop here, and then "addr" 2715 * may be invalid 2716 */ 2717 ret = qemu_file_get_error(f); 2718 if (ret) { 2719 break; 2720 } 2721 2722 flags = addr & ~TARGET_PAGE_MASK; 2723 addr &= TARGET_PAGE_MASK; 2724 2725 trace_ram_load_postcopy_loop((uint64_t)addr, flags); 2726 place_needed = false; 2727 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) { 2728 block = ram_block_from_stream(f, flags); 2729 2730 host = host_from_ram_block_offset(block, addr); 2731 if (!host) { 2732 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); 2733 ret = -EINVAL; 2734 break; 2735 } 2736 matching_page_sizes = block->page_size == TARGET_PAGE_SIZE; 2737 /* 2738 * Postcopy requires that we place whole host pages atomically; 2739 * these may be huge pages for RAMBlocks that are backed by 2740 * hugetlbfs. 2741 * To make it atomic, the data is read into a temporary page 2742 * that's moved into place later. 2743 * The migration protocol uses, possibly smaller, target-pages 2744 * however the source ensures it always sends all the components 2745 * of a host page in order. 2746 */ 2747 page_buffer = postcopy_host_page + 2748 ((uintptr_t)host & (block->page_size - 1)); 2749 /* If all TP are zero then we can optimise the place */ 2750 if (!((uintptr_t)host & (block->page_size - 1))) { 2751 all_zero = true; 2752 } else { 2753 /* not the 1st TP within the HP */ 2754 if (host != (last_host + TARGET_PAGE_SIZE)) { 2755 error_report("Non-sequential target page %p/%p", 2756 host, last_host); 2757 ret = -EINVAL; 2758 break; 2759 } 2760 } 2761 2762 2763 /* 2764 * If it's the last part of a host page then we place the host 2765 * page 2766 */ 2767 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) & 2768 (block->page_size - 1)) == 0; 2769 place_source = postcopy_host_page; 2770 } 2771 last_host = host; 2772 2773 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { 2774 case RAM_SAVE_FLAG_ZERO: 2775 ch = qemu_get_byte(f); 2776 memset(page_buffer, ch, TARGET_PAGE_SIZE); 2777 if (ch) { 2778 all_zero = false; 2779 } 2780 break; 2781 2782 case RAM_SAVE_FLAG_PAGE: 2783 all_zero = false; 2784 if (!place_needed || !matching_page_sizes) { 2785 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE); 2786 } else { 2787 /* Avoids the qemu_file copy during postcopy, which is 2788 * going to do a copy later; can only do it when we 2789 * do this read in one go (matching page sizes) 2790 */ 2791 qemu_get_buffer_in_place(f, (uint8_t **)&place_source, 2792 TARGET_PAGE_SIZE); 2793 } 2794 break; 2795 case RAM_SAVE_FLAG_EOS: 2796 /* normal exit */ 2797 break; 2798 default: 2799 error_report("Unknown combination of migration flags: %#x" 2800 " (postcopy mode)", flags); 2801 ret = -EINVAL; 2802 break; 2803 } 2804 2805 /* Detect for any possible file errors */ 2806 if (!ret && qemu_file_get_error(f)) { 2807 ret = qemu_file_get_error(f); 2808 } 2809 2810 if (!ret && place_needed) { 2811 /* This gets called at the last target page in the host page */ 2812 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size; 2813 2814 if (all_zero) { 2815 ret = postcopy_place_page_zero(mis, place_dest, 2816 block); 2817 } else { 2818 ret = postcopy_place_page(mis, place_dest, 2819 place_source, block); 2820 } 2821 } 2822 } 2823 2824 return ret; 2825 } 2826 2827 static bool postcopy_is_advised(void) 2828 { 2829 PostcopyState ps = postcopy_state_get(); 2830 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END; 2831 } 2832 2833 static bool postcopy_is_running(void) 2834 { 2835 PostcopyState ps = postcopy_state_get(); 2836 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END; 2837 } 2838 2839 static int ram_load(QEMUFile *f, void *opaque, int version_id) 2840 { 2841 int flags = 0, ret = 0, invalid_flags = 0; 2842 static uint64_t seq_iter; 2843 int len = 0; 2844 /* 2845 * If system is running in postcopy mode, page inserts to host memory must 2846 * be atomic 2847 */ 2848 bool postcopy_running = postcopy_is_running(); 2849 /* ADVISE is earlier, it shows the source has the postcopy capability on */ 2850 bool postcopy_advised = postcopy_is_advised(); 2851 2852 seq_iter++; 2853 2854 if (version_id != 4) { 2855 ret = -EINVAL; 2856 } 2857 2858 if (!migrate_use_compression()) { 2859 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE; 2860 } 2861 /* This RCU critical section can be very long running. 2862 * When RCU reclaims in the code start to become numerous, 2863 * it will be necessary to reduce the granularity of this 2864 * critical section. 2865 */ 2866 rcu_read_lock(); 2867 2868 if (postcopy_running) { 2869 ret = ram_load_postcopy(f); 2870 } 2871 2872 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) { 2873 ram_addr_t addr, total_ram_bytes; 2874 void *host = NULL; 2875 uint8_t ch; 2876 2877 addr = qemu_get_be64(f); 2878 flags = addr & ~TARGET_PAGE_MASK; 2879 addr &= TARGET_PAGE_MASK; 2880 2881 if (flags & invalid_flags) { 2882 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) { 2883 error_report("Received an unexpected compressed page"); 2884 } 2885 2886 ret = -EINVAL; 2887 break; 2888 } 2889 2890 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE | 2891 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) { 2892 RAMBlock *block = ram_block_from_stream(f, flags); 2893 2894 host = host_from_ram_block_offset(block, addr); 2895 if (!host) { 2896 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); 2897 ret = -EINVAL; 2898 break; 2899 } 2900 ramblock_recv_bitmap_set(block, host); 2901 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host); 2902 } 2903 2904 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { 2905 case RAM_SAVE_FLAG_MEM_SIZE: 2906 /* Synchronize RAM block list */ 2907 total_ram_bytes = addr; 2908 while (!ret && total_ram_bytes) { 2909 RAMBlock *block; 2910 char id[256]; 2911 ram_addr_t length; 2912 2913 len = qemu_get_byte(f); 2914 qemu_get_buffer(f, (uint8_t *)id, len); 2915 id[len] = 0; 2916 length = qemu_get_be64(f); 2917 2918 block = qemu_ram_block_by_name(id); 2919 if (block) { 2920 if (length != block->used_length) { 2921 Error *local_err = NULL; 2922 2923 ret = qemu_ram_resize(block, length, 2924 &local_err); 2925 if (local_err) { 2926 error_report_err(local_err); 2927 } 2928 } 2929 /* For postcopy we need to check hugepage sizes match */ 2930 if (postcopy_advised && 2931 block->page_size != qemu_host_page_size) { 2932 uint64_t remote_page_size = qemu_get_be64(f); 2933 if (remote_page_size != block->page_size) { 2934 error_report("Mismatched RAM page size %s " 2935 "(local) %zd != %" PRId64, 2936 id, block->page_size, 2937 remote_page_size); 2938 ret = -EINVAL; 2939 } 2940 } 2941 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG, 2942 block->idstr); 2943 } else { 2944 error_report("Unknown ramblock \"%s\", cannot " 2945 "accept migration", id); 2946 ret = -EINVAL; 2947 } 2948 2949 total_ram_bytes -= length; 2950 } 2951 break; 2952 2953 case RAM_SAVE_FLAG_ZERO: 2954 ch = qemu_get_byte(f); 2955 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE); 2956 break; 2957 2958 case RAM_SAVE_FLAG_PAGE: 2959 qemu_get_buffer(f, host, TARGET_PAGE_SIZE); 2960 break; 2961 2962 case RAM_SAVE_FLAG_COMPRESS_PAGE: 2963 len = qemu_get_be32(f); 2964 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) { 2965 error_report("Invalid compressed data length: %d", len); 2966 ret = -EINVAL; 2967 break; 2968 } 2969 decompress_data_with_multi_threads(f, host, len); 2970 break; 2971 2972 case RAM_SAVE_FLAG_XBZRLE: 2973 if (load_xbzrle(f, addr, host) < 0) { 2974 error_report("Failed to decompress XBZRLE page at " 2975 RAM_ADDR_FMT, addr); 2976 ret = -EINVAL; 2977 break; 2978 } 2979 break; 2980 case RAM_SAVE_FLAG_EOS: 2981 /* normal exit */ 2982 break; 2983 default: 2984 if (flags & RAM_SAVE_FLAG_HOOK) { 2985 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL); 2986 } else { 2987 error_report("Unknown combination of migration flags: %#x", 2988 flags); 2989 ret = -EINVAL; 2990 } 2991 } 2992 if (!ret) { 2993 ret = qemu_file_get_error(f); 2994 } 2995 } 2996 2997 wait_for_decompress_done(); 2998 rcu_read_unlock(); 2999 trace_ram_load_complete(ret, seq_iter); 3000 return ret; 3001 } 3002 3003 static bool ram_has_postcopy(void *opaque) 3004 { 3005 return migrate_postcopy_ram(); 3006 } 3007 3008 static SaveVMHandlers savevm_ram_handlers = { 3009 .save_setup = ram_save_setup, 3010 .save_live_iterate = ram_save_iterate, 3011 .save_live_complete_postcopy = ram_save_complete, 3012 .save_live_complete_precopy = ram_save_complete, 3013 .has_postcopy = ram_has_postcopy, 3014 .save_live_pending = ram_save_pending, 3015 .load_state = ram_load, 3016 .save_cleanup = ram_save_cleanup, 3017 .load_setup = ram_load_setup, 3018 .load_cleanup = ram_load_cleanup, 3019 }; 3020 3021 void ram_mig_init(void) 3022 { 3023 qemu_mutex_init(&XBZRLE.lock); 3024 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state); 3025 } 3026