1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * Copyright (c) 2011-2015 Red Hat Inc 6 * 7 * Authors: 8 * Juan Quintela <quintela@redhat.com> 9 * 10 * Permission is hereby granted, free of charge, to any person obtaining a copy 11 * of this software and associated documentation files (the "Software"), to deal 12 * in the Software without restriction, including without limitation the rights 13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 * copies of the Software, and to permit persons to whom the Software is 15 * furnished to do so, subject to the following conditions: 16 * 17 * The above copyright notice and this permission notice shall be included in 18 * all copies or substantial portions of the Software. 19 * 20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 26 * THE SOFTWARE. 27 */ 28 #include "qemu/osdep.h" 29 #include "cpu.h" 30 #include <zlib.h> 31 #include "qapi-event.h" 32 #include "qemu/cutils.h" 33 #include "qemu/bitops.h" 34 #include "qemu/bitmap.h" 35 #include "qemu/main-loop.h" 36 #include "xbzrle.h" 37 #include "ram.h" 38 #include "migration.h" 39 #include "migration/register.h" 40 #include "migration/misc.h" 41 #include "qemu-file.h" 42 #include "postcopy-ram.h" 43 #include "migration/page_cache.h" 44 #include "qemu/error-report.h" 45 #include "trace.h" 46 #include "exec/ram_addr.h" 47 #include "qemu/rcu_queue.h" 48 #include "migration/colo.h" 49 #include "migration/block.h" 50 51 /***********************************************************/ 52 /* ram save/restore */ 53 54 /* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it 55 * worked for pages that where filled with the same char. We switched 56 * it to only search for the zero value. And to avoid confusion with 57 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it. 58 */ 59 60 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */ 61 #define RAM_SAVE_FLAG_ZERO 0x02 62 #define RAM_SAVE_FLAG_MEM_SIZE 0x04 63 #define RAM_SAVE_FLAG_PAGE 0x08 64 #define RAM_SAVE_FLAG_EOS 0x10 65 #define RAM_SAVE_FLAG_CONTINUE 0x20 66 #define RAM_SAVE_FLAG_XBZRLE 0x40 67 /* 0x80 is reserved in migration.h start with 0x100 next */ 68 #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100 69 70 static inline bool is_zero_range(uint8_t *p, uint64_t size) 71 { 72 return buffer_is_zero(p, size); 73 } 74 75 XBZRLECacheStats xbzrle_counters; 76 77 /* struct contains XBZRLE cache and a static page 78 used by the compression */ 79 static struct { 80 /* buffer used for XBZRLE encoding */ 81 uint8_t *encoded_buf; 82 /* buffer for storing page content */ 83 uint8_t *current_buf; 84 /* Cache for XBZRLE, Protected by lock. */ 85 PageCache *cache; 86 QemuMutex lock; 87 /* it will store a page full of zeros */ 88 uint8_t *zero_target_page; 89 /* buffer used for XBZRLE decoding */ 90 uint8_t *decoded_buf; 91 } XBZRLE; 92 93 static void XBZRLE_cache_lock(void) 94 { 95 if (migrate_use_xbzrle()) 96 qemu_mutex_lock(&XBZRLE.lock); 97 } 98 99 static void XBZRLE_cache_unlock(void) 100 { 101 if (migrate_use_xbzrle()) 102 qemu_mutex_unlock(&XBZRLE.lock); 103 } 104 105 /** 106 * xbzrle_cache_resize: resize the xbzrle cache 107 * 108 * This function is called from qmp_migrate_set_cache_size in main 109 * thread, possibly while a migration is in progress. A running 110 * migration may be using the cache and might finish during this call, 111 * hence changes to the cache are protected by XBZRLE.lock(). 112 * 113 * Returns the new_size or negative in case of error. 114 * 115 * @new_size: new cache size 116 */ 117 int64_t xbzrle_cache_resize(int64_t new_size) 118 { 119 PageCache *new_cache; 120 int64_t ret; 121 122 if (new_size < TARGET_PAGE_SIZE) { 123 return -1; 124 } 125 126 XBZRLE_cache_lock(); 127 128 if (XBZRLE.cache != NULL) { 129 if (pow2floor(new_size) == migrate_xbzrle_cache_size()) { 130 goto out_new_size; 131 } 132 new_cache = cache_init(new_size / TARGET_PAGE_SIZE, 133 TARGET_PAGE_SIZE); 134 if (!new_cache) { 135 error_report("Error creating cache"); 136 ret = -1; 137 goto out; 138 } 139 140 cache_fini(XBZRLE.cache); 141 XBZRLE.cache = new_cache; 142 } 143 144 out_new_size: 145 ret = pow2floor(new_size); 146 out: 147 XBZRLE_cache_unlock(); 148 return ret; 149 } 150 151 /* 152 * An outstanding page request, on the source, having been received 153 * and queued 154 */ 155 struct RAMSrcPageRequest { 156 RAMBlock *rb; 157 hwaddr offset; 158 hwaddr len; 159 160 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req; 161 }; 162 163 /* State of RAM for migration */ 164 struct RAMState { 165 /* QEMUFile used for this migration */ 166 QEMUFile *f; 167 /* Last block that we have visited searching for dirty pages */ 168 RAMBlock *last_seen_block; 169 /* Last block from where we have sent data */ 170 RAMBlock *last_sent_block; 171 /* Last dirty target page we have sent */ 172 ram_addr_t last_page; 173 /* last ram version we have seen */ 174 uint32_t last_version; 175 /* We are in the first round */ 176 bool ram_bulk_stage; 177 /* How many times we have dirty too many pages */ 178 int dirty_rate_high_cnt; 179 /* these variables are used for bitmap sync */ 180 /* last time we did a full bitmap_sync */ 181 int64_t time_last_bitmap_sync; 182 /* bytes transferred at start_time */ 183 uint64_t bytes_xfer_prev; 184 /* number of dirty pages since start_time */ 185 uint64_t num_dirty_pages_period; 186 /* xbzrle misses since the beginning of the period */ 187 uint64_t xbzrle_cache_miss_prev; 188 /* number of iterations at the beginning of period */ 189 uint64_t iterations_prev; 190 /* Iterations since start */ 191 uint64_t iterations; 192 /* number of dirty bits in the bitmap */ 193 uint64_t migration_dirty_pages; 194 /* protects modification of the bitmap */ 195 QemuMutex bitmap_mutex; 196 /* The RAMBlock used in the last src_page_requests */ 197 RAMBlock *last_req_rb; 198 /* Queue of outstanding page requests from the destination */ 199 QemuMutex src_page_req_mutex; 200 QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests; 201 }; 202 typedef struct RAMState RAMState; 203 204 static RAMState *ram_state; 205 206 uint64_t ram_bytes_remaining(void) 207 { 208 return ram_state->migration_dirty_pages * TARGET_PAGE_SIZE; 209 } 210 211 MigrationStats ram_counters; 212 213 /* used by the search for pages to send */ 214 struct PageSearchStatus { 215 /* Current block being searched */ 216 RAMBlock *block; 217 /* Current page to search from */ 218 unsigned long page; 219 /* Set once we wrap around */ 220 bool complete_round; 221 }; 222 typedef struct PageSearchStatus PageSearchStatus; 223 224 struct CompressParam { 225 bool done; 226 bool quit; 227 QEMUFile *file; 228 QemuMutex mutex; 229 QemuCond cond; 230 RAMBlock *block; 231 ram_addr_t offset; 232 }; 233 typedef struct CompressParam CompressParam; 234 235 struct DecompressParam { 236 bool done; 237 bool quit; 238 QemuMutex mutex; 239 QemuCond cond; 240 void *des; 241 uint8_t *compbuf; 242 int len; 243 }; 244 typedef struct DecompressParam DecompressParam; 245 246 static CompressParam *comp_param; 247 static QemuThread *compress_threads; 248 /* comp_done_cond is used to wake up the migration thread when 249 * one of the compression threads has finished the compression. 250 * comp_done_lock is used to co-work with comp_done_cond. 251 */ 252 static QemuMutex comp_done_lock; 253 static QemuCond comp_done_cond; 254 /* The empty QEMUFileOps will be used by file in CompressParam */ 255 static const QEMUFileOps empty_ops = { }; 256 257 static DecompressParam *decomp_param; 258 static QemuThread *decompress_threads; 259 static QemuMutex decomp_done_lock; 260 static QemuCond decomp_done_cond; 261 262 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block, 263 ram_addr_t offset); 264 265 static void *do_data_compress(void *opaque) 266 { 267 CompressParam *param = opaque; 268 RAMBlock *block; 269 ram_addr_t offset; 270 271 qemu_mutex_lock(¶m->mutex); 272 while (!param->quit) { 273 if (param->block) { 274 block = param->block; 275 offset = param->offset; 276 param->block = NULL; 277 qemu_mutex_unlock(¶m->mutex); 278 279 do_compress_ram_page(param->file, block, offset); 280 281 qemu_mutex_lock(&comp_done_lock); 282 param->done = true; 283 qemu_cond_signal(&comp_done_cond); 284 qemu_mutex_unlock(&comp_done_lock); 285 286 qemu_mutex_lock(¶m->mutex); 287 } else { 288 qemu_cond_wait(¶m->cond, ¶m->mutex); 289 } 290 } 291 qemu_mutex_unlock(¶m->mutex); 292 293 return NULL; 294 } 295 296 static inline void terminate_compression_threads(void) 297 { 298 int idx, thread_count; 299 300 thread_count = migrate_compress_threads(); 301 302 for (idx = 0; idx < thread_count; idx++) { 303 qemu_mutex_lock(&comp_param[idx].mutex); 304 comp_param[idx].quit = true; 305 qemu_cond_signal(&comp_param[idx].cond); 306 qemu_mutex_unlock(&comp_param[idx].mutex); 307 } 308 } 309 310 static void compress_threads_save_cleanup(void) 311 { 312 int i, thread_count; 313 314 if (!migrate_use_compression()) { 315 return; 316 } 317 terminate_compression_threads(); 318 thread_count = migrate_compress_threads(); 319 for (i = 0; i < thread_count; i++) { 320 qemu_thread_join(compress_threads + i); 321 qemu_fclose(comp_param[i].file); 322 qemu_mutex_destroy(&comp_param[i].mutex); 323 qemu_cond_destroy(&comp_param[i].cond); 324 } 325 qemu_mutex_destroy(&comp_done_lock); 326 qemu_cond_destroy(&comp_done_cond); 327 g_free(compress_threads); 328 g_free(comp_param); 329 compress_threads = NULL; 330 comp_param = NULL; 331 } 332 333 static void compress_threads_save_setup(void) 334 { 335 int i, thread_count; 336 337 if (!migrate_use_compression()) { 338 return; 339 } 340 thread_count = migrate_compress_threads(); 341 compress_threads = g_new0(QemuThread, thread_count); 342 comp_param = g_new0(CompressParam, thread_count); 343 qemu_cond_init(&comp_done_cond); 344 qemu_mutex_init(&comp_done_lock); 345 for (i = 0; i < thread_count; i++) { 346 /* comp_param[i].file is just used as a dummy buffer to save data, 347 * set its ops to empty. 348 */ 349 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops); 350 comp_param[i].done = true; 351 comp_param[i].quit = false; 352 qemu_mutex_init(&comp_param[i].mutex); 353 qemu_cond_init(&comp_param[i].cond); 354 qemu_thread_create(compress_threads + i, "compress", 355 do_data_compress, comp_param + i, 356 QEMU_THREAD_JOINABLE); 357 } 358 } 359 360 /* Multiple fd's */ 361 362 struct MultiFDSendParams { 363 uint8_t id; 364 char *name; 365 QemuThread thread; 366 QemuSemaphore sem; 367 QemuMutex mutex; 368 bool quit; 369 }; 370 typedef struct MultiFDSendParams MultiFDSendParams; 371 372 struct { 373 MultiFDSendParams *params; 374 /* number of created threads */ 375 int count; 376 } *multifd_send_state; 377 378 static void terminate_multifd_send_threads(Error *errp) 379 { 380 int i; 381 382 for (i = 0; i < multifd_send_state->count; i++) { 383 MultiFDSendParams *p = &multifd_send_state->params[i]; 384 385 qemu_mutex_lock(&p->mutex); 386 p->quit = true; 387 qemu_sem_post(&p->sem); 388 qemu_mutex_unlock(&p->mutex); 389 } 390 } 391 392 int multifd_save_cleanup(Error **errp) 393 { 394 int i; 395 int ret = 0; 396 397 if (!migrate_use_multifd()) { 398 return 0; 399 } 400 terminate_multifd_send_threads(NULL); 401 for (i = 0; i < multifd_send_state->count; i++) { 402 MultiFDSendParams *p = &multifd_send_state->params[i]; 403 404 qemu_thread_join(&p->thread); 405 qemu_mutex_destroy(&p->mutex); 406 qemu_sem_destroy(&p->sem); 407 g_free(p->name); 408 p->name = NULL; 409 } 410 g_free(multifd_send_state->params); 411 multifd_send_state->params = NULL; 412 g_free(multifd_send_state); 413 multifd_send_state = NULL; 414 return ret; 415 } 416 417 static void *multifd_send_thread(void *opaque) 418 { 419 MultiFDSendParams *p = opaque; 420 421 while (true) { 422 qemu_mutex_lock(&p->mutex); 423 if (p->quit) { 424 qemu_mutex_unlock(&p->mutex); 425 break; 426 } 427 qemu_mutex_unlock(&p->mutex); 428 qemu_sem_wait(&p->sem); 429 } 430 431 return NULL; 432 } 433 434 int multifd_save_setup(void) 435 { 436 int thread_count; 437 uint8_t i; 438 439 if (!migrate_use_multifd()) { 440 return 0; 441 } 442 thread_count = migrate_multifd_channels(); 443 multifd_send_state = g_malloc0(sizeof(*multifd_send_state)); 444 multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); 445 multifd_send_state->count = 0; 446 for (i = 0; i < thread_count; i++) { 447 MultiFDSendParams *p = &multifd_send_state->params[i]; 448 449 qemu_mutex_init(&p->mutex); 450 qemu_sem_init(&p->sem, 0); 451 p->quit = false; 452 p->id = i; 453 p->name = g_strdup_printf("multifdsend_%d", i); 454 qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, 455 QEMU_THREAD_JOINABLE); 456 457 multifd_send_state->count++; 458 } 459 return 0; 460 } 461 462 struct MultiFDRecvParams { 463 uint8_t id; 464 char *name; 465 QemuThread thread; 466 QemuSemaphore sem; 467 QemuMutex mutex; 468 bool quit; 469 }; 470 typedef struct MultiFDRecvParams MultiFDRecvParams; 471 472 struct { 473 MultiFDRecvParams *params; 474 /* number of created threads */ 475 int count; 476 } *multifd_recv_state; 477 478 static void terminate_multifd_recv_threads(Error *errp) 479 { 480 int i; 481 482 for (i = 0; i < multifd_recv_state->count; i++) { 483 MultiFDRecvParams *p = &multifd_recv_state->params[i]; 484 485 qemu_mutex_lock(&p->mutex); 486 p->quit = true; 487 qemu_sem_post(&p->sem); 488 qemu_mutex_unlock(&p->mutex); 489 } 490 } 491 492 int multifd_load_cleanup(Error **errp) 493 { 494 int i; 495 int ret = 0; 496 497 if (!migrate_use_multifd()) { 498 return 0; 499 } 500 terminate_multifd_recv_threads(NULL); 501 for (i = 0; i < multifd_recv_state->count; i++) { 502 MultiFDRecvParams *p = &multifd_recv_state->params[i]; 503 504 qemu_thread_join(&p->thread); 505 qemu_mutex_destroy(&p->mutex); 506 qemu_sem_destroy(&p->sem); 507 g_free(p->name); 508 p->name = NULL; 509 } 510 g_free(multifd_recv_state->params); 511 multifd_recv_state->params = NULL; 512 g_free(multifd_recv_state); 513 multifd_recv_state = NULL; 514 515 return ret; 516 } 517 518 static void *multifd_recv_thread(void *opaque) 519 { 520 MultiFDRecvParams *p = opaque; 521 522 while (true) { 523 qemu_mutex_lock(&p->mutex); 524 if (p->quit) { 525 qemu_mutex_unlock(&p->mutex); 526 break; 527 } 528 qemu_mutex_unlock(&p->mutex); 529 qemu_sem_wait(&p->sem); 530 } 531 532 return NULL; 533 } 534 535 int multifd_load_setup(void) 536 { 537 int thread_count; 538 uint8_t i; 539 540 if (!migrate_use_multifd()) { 541 return 0; 542 } 543 thread_count = migrate_multifd_channels(); 544 multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state)); 545 multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count); 546 multifd_recv_state->count = 0; 547 for (i = 0; i < thread_count; i++) { 548 MultiFDRecvParams *p = &multifd_recv_state->params[i]; 549 550 qemu_mutex_init(&p->mutex); 551 qemu_sem_init(&p->sem, 0); 552 p->quit = false; 553 p->id = i; 554 p->name = g_strdup_printf("multifdrecv_%d", i); 555 qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p, 556 QEMU_THREAD_JOINABLE); 557 multifd_recv_state->count++; 558 } 559 return 0; 560 } 561 562 /** 563 * save_page_header: write page header to wire 564 * 565 * If this is the 1st block, it also writes the block identification 566 * 567 * Returns the number of bytes written 568 * 569 * @f: QEMUFile where to send the data 570 * @block: block that contains the page we want to send 571 * @offset: offset inside the block for the page 572 * in the lower bits, it contains flags 573 */ 574 static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block, 575 ram_addr_t offset) 576 { 577 size_t size, len; 578 579 if (block == rs->last_sent_block) { 580 offset |= RAM_SAVE_FLAG_CONTINUE; 581 } 582 qemu_put_be64(f, offset); 583 size = 8; 584 585 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) { 586 len = strlen(block->idstr); 587 qemu_put_byte(f, len); 588 qemu_put_buffer(f, (uint8_t *)block->idstr, len); 589 size += 1 + len; 590 rs->last_sent_block = block; 591 } 592 return size; 593 } 594 595 /** 596 * mig_throttle_guest_down: throotle down the guest 597 * 598 * Reduce amount of guest cpu execution to hopefully slow down memory 599 * writes. If guest dirty memory rate is reduced below the rate at 600 * which we can transfer pages to the destination then we should be 601 * able to complete migration. Some workloads dirty memory way too 602 * fast and will not effectively converge, even with auto-converge. 603 */ 604 static void mig_throttle_guest_down(void) 605 { 606 MigrationState *s = migrate_get_current(); 607 uint64_t pct_initial = s->parameters.cpu_throttle_initial; 608 uint64_t pct_icrement = s->parameters.cpu_throttle_increment; 609 610 /* We have not started throttling yet. Let's start it. */ 611 if (!cpu_throttle_active()) { 612 cpu_throttle_set(pct_initial); 613 } else { 614 /* Throttling already on, just increase the rate */ 615 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement); 616 } 617 } 618 619 /** 620 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache 621 * 622 * @rs: current RAM state 623 * @current_addr: address for the zero page 624 * 625 * Update the xbzrle cache to reflect a page that's been sent as all 0. 626 * The important thing is that a stale (not-yet-0'd) page be replaced 627 * by the new data. 628 * As a bonus, if the page wasn't in the cache it gets added so that 629 * when a small write is made into the 0'd page it gets XBZRLE sent. 630 */ 631 static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr) 632 { 633 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) { 634 return; 635 } 636 637 /* We don't care if this fails to allocate a new cache page 638 * as long as it updated an old one */ 639 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page, 640 ram_counters.dirty_sync_count); 641 } 642 643 #define ENCODING_FLAG_XBZRLE 0x1 644 645 /** 646 * save_xbzrle_page: compress and send current page 647 * 648 * Returns: 1 means that we wrote the page 649 * 0 means that page is identical to the one already sent 650 * -1 means that xbzrle would be longer than normal 651 * 652 * @rs: current RAM state 653 * @current_data: pointer to the address of the page contents 654 * @current_addr: addr of the page 655 * @block: block that contains the page we want to send 656 * @offset: offset inside the block for the page 657 * @last_stage: if we are at the completion stage 658 */ 659 static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, 660 ram_addr_t current_addr, RAMBlock *block, 661 ram_addr_t offset, bool last_stage) 662 { 663 int encoded_len = 0, bytes_xbzrle; 664 uint8_t *prev_cached_page; 665 666 if (!cache_is_cached(XBZRLE.cache, current_addr, 667 ram_counters.dirty_sync_count)) { 668 xbzrle_counters.cache_miss++; 669 if (!last_stage) { 670 if (cache_insert(XBZRLE.cache, current_addr, *current_data, 671 ram_counters.dirty_sync_count) == -1) { 672 return -1; 673 } else { 674 /* update *current_data when the page has been 675 inserted into cache */ 676 *current_data = get_cached_data(XBZRLE.cache, current_addr); 677 } 678 } 679 return -1; 680 } 681 682 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); 683 684 /* save current buffer into memory */ 685 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE); 686 687 /* XBZRLE encoding (if there is no overflow) */ 688 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, 689 TARGET_PAGE_SIZE, XBZRLE.encoded_buf, 690 TARGET_PAGE_SIZE); 691 if (encoded_len == 0) { 692 trace_save_xbzrle_page_skipping(); 693 return 0; 694 } else if (encoded_len == -1) { 695 trace_save_xbzrle_page_overflow(); 696 xbzrle_counters.overflow++; 697 /* update data in the cache */ 698 if (!last_stage) { 699 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE); 700 *current_data = prev_cached_page; 701 } 702 return -1; 703 } 704 705 /* we need to update the data in the cache, in order to get the same data */ 706 if (!last_stage) { 707 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); 708 } 709 710 /* Send XBZRLE based compressed page */ 711 bytes_xbzrle = save_page_header(rs, rs->f, block, 712 offset | RAM_SAVE_FLAG_XBZRLE); 713 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE); 714 qemu_put_be16(rs->f, encoded_len); 715 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len); 716 bytes_xbzrle += encoded_len + 1 + 2; 717 xbzrle_counters.pages++; 718 xbzrle_counters.bytes += bytes_xbzrle; 719 ram_counters.transferred += bytes_xbzrle; 720 721 return 1; 722 } 723 724 /** 725 * migration_bitmap_find_dirty: find the next dirty page from start 726 * 727 * Called with rcu_read_lock() to protect migration_bitmap 728 * 729 * Returns the byte offset within memory region of the start of a dirty page 730 * 731 * @rs: current RAM state 732 * @rb: RAMBlock where to search for dirty pages 733 * @start: page where we start the search 734 */ 735 static inline 736 unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb, 737 unsigned long start) 738 { 739 unsigned long size = rb->used_length >> TARGET_PAGE_BITS; 740 unsigned long *bitmap = rb->bmap; 741 unsigned long next; 742 743 if (rs->ram_bulk_stage && start > 0) { 744 next = start + 1; 745 } else { 746 next = find_next_bit(bitmap, size, start); 747 } 748 749 return next; 750 } 751 752 static inline bool migration_bitmap_clear_dirty(RAMState *rs, 753 RAMBlock *rb, 754 unsigned long page) 755 { 756 bool ret; 757 758 ret = test_and_clear_bit(page, rb->bmap); 759 760 if (ret) { 761 rs->migration_dirty_pages--; 762 } 763 return ret; 764 } 765 766 static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb, 767 ram_addr_t start, ram_addr_t length) 768 { 769 rs->migration_dirty_pages += 770 cpu_physical_memory_sync_dirty_bitmap(rb, start, length, 771 &rs->num_dirty_pages_period); 772 } 773 774 /** 775 * ram_pagesize_summary: calculate all the pagesizes of a VM 776 * 777 * Returns a summary bitmap of the page sizes of all RAMBlocks 778 * 779 * For VMs with just normal pages this is equivalent to the host page 780 * size. If it's got some huge pages then it's the OR of all the 781 * different page sizes. 782 */ 783 uint64_t ram_pagesize_summary(void) 784 { 785 RAMBlock *block; 786 uint64_t summary = 0; 787 788 RAMBLOCK_FOREACH(block) { 789 summary |= block->page_size; 790 } 791 792 return summary; 793 } 794 795 static void migration_bitmap_sync(RAMState *rs) 796 { 797 RAMBlock *block; 798 int64_t end_time; 799 uint64_t bytes_xfer_now; 800 801 ram_counters.dirty_sync_count++; 802 803 if (!rs->time_last_bitmap_sync) { 804 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 805 } 806 807 trace_migration_bitmap_sync_start(); 808 memory_global_dirty_log_sync(); 809 810 qemu_mutex_lock(&rs->bitmap_mutex); 811 rcu_read_lock(); 812 RAMBLOCK_FOREACH(block) { 813 migration_bitmap_sync_range(rs, block, 0, block->used_length); 814 } 815 rcu_read_unlock(); 816 qemu_mutex_unlock(&rs->bitmap_mutex); 817 818 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period); 819 820 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 821 822 /* more than 1 second = 1000 millisecons */ 823 if (end_time > rs->time_last_bitmap_sync + 1000) { 824 /* calculate period counters */ 825 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000 826 / (end_time - rs->time_last_bitmap_sync); 827 bytes_xfer_now = ram_counters.transferred; 828 829 /* During block migration the auto-converge logic incorrectly detects 830 * that ram migration makes no progress. Avoid this by disabling the 831 * throttling logic during the bulk phase of block migration. */ 832 if (migrate_auto_converge() && !blk_mig_bulk_active()) { 833 /* The following detection logic can be refined later. For now: 834 Check to see if the dirtied bytes is 50% more than the approx. 835 amount of bytes that just got transferred since the last time we 836 were in this routine. If that happens twice, start or increase 837 throttling */ 838 839 if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE > 840 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) && 841 (++rs->dirty_rate_high_cnt >= 2)) { 842 trace_migration_throttle(); 843 rs->dirty_rate_high_cnt = 0; 844 mig_throttle_guest_down(); 845 } 846 } 847 848 if (migrate_use_xbzrle()) { 849 if (rs->iterations_prev != rs->iterations) { 850 xbzrle_counters.cache_miss_rate = 851 (double)(xbzrle_counters.cache_miss - 852 rs->xbzrle_cache_miss_prev) / 853 (rs->iterations - rs->iterations_prev); 854 } 855 rs->iterations_prev = rs->iterations; 856 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss; 857 } 858 859 /* reset period counters */ 860 rs->time_last_bitmap_sync = end_time; 861 rs->num_dirty_pages_period = 0; 862 rs->bytes_xfer_prev = bytes_xfer_now; 863 } 864 if (migrate_use_events()) { 865 qapi_event_send_migration_pass(ram_counters.dirty_sync_count, NULL); 866 } 867 } 868 869 /** 870 * save_zero_page: send the zero page to the stream 871 * 872 * Returns the number of pages written. 873 * 874 * @rs: current RAM state 875 * @block: block that contains the page we want to send 876 * @offset: offset inside the block for the page 877 * @p: pointer to the page 878 */ 879 static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, 880 uint8_t *p) 881 { 882 int pages = -1; 883 884 if (is_zero_range(p, TARGET_PAGE_SIZE)) { 885 ram_counters.duplicate++; 886 ram_counters.transferred += 887 save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_ZERO); 888 qemu_put_byte(rs->f, 0); 889 ram_counters.transferred += 1; 890 pages = 1; 891 } 892 893 return pages; 894 } 895 896 static void ram_release_pages(const char *rbname, uint64_t offset, int pages) 897 { 898 if (!migrate_release_ram() || !migration_in_postcopy()) { 899 return; 900 } 901 902 ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS); 903 } 904 905 /** 906 * ram_save_page: send the given page to the stream 907 * 908 * Returns the number of pages written. 909 * < 0 - error 910 * >=0 - Number of pages written - this might legally be 0 911 * if xbzrle noticed the page was the same. 912 * 913 * @rs: current RAM state 914 * @block: block that contains the page we want to send 915 * @offset: offset inside the block for the page 916 * @last_stage: if we are at the completion stage 917 */ 918 static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) 919 { 920 int pages = -1; 921 uint64_t bytes_xmit; 922 ram_addr_t current_addr; 923 uint8_t *p; 924 int ret; 925 bool send_async = true; 926 RAMBlock *block = pss->block; 927 ram_addr_t offset = pss->page << TARGET_PAGE_BITS; 928 929 p = block->host + offset; 930 trace_ram_save_page(block->idstr, (uint64_t)offset, p); 931 932 /* In doubt sent page as normal */ 933 bytes_xmit = 0; 934 ret = ram_control_save_page(rs->f, block->offset, 935 offset, TARGET_PAGE_SIZE, &bytes_xmit); 936 if (bytes_xmit) { 937 ram_counters.transferred += bytes_xmit; 938 pages = 1; 939 } 940 941 XBZRLE_cache_lock(); 942 943 current_addr = block->offset + offset; 944 945 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { 946 if (ret != RAM_SAVE_CONTROL_DELAYED) { 947 if (bytes_xmit > 0) { 948 ram_counters.normal++; 949 } else if (bytes_xmit == 0) { 950 ram_counters.duplicate++; 951 } 952 } 953 } else { 954 pages = save_zero_page(rs, block, offset, p); 955 if (pages > 0) { 956 /* Must let xbzrle know, otherwise a previous (now 0'd) cached 957 * page would be stale 958 */ 959 xbzrle_cache_zero_page(rs, current_addr); 960 ram_release_pages(block->idstr, offset, pages); 961 } else if (!rs->ram_bulk_stage && 962 !migration_in_postcopy() && migrate_use_xbzrle()) { 963 pages = save_xbzrle_page(rs, &p, current_addr, block, 964 offset, last_stage); 965 if (!last_stage) { 966 /* Can't send this cached data async, since the cache page 967 * might get updated before it gets to the wire 968 */ 969 send_async = false; 970 } 971 } 972 } 973 974 /* XBZRLE overflow or normal page */ 975 if (pages == -1) { 976 ram_counters.transferred += 977 save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_PAGE); 978 if (send_async) { 979 qemu_put_buffer_async(rs->f, p, TARGET_PAGE_SIZE, 980 migrate_release_ram() & 981 migration_in_postcopy()); 982 } else { 983 qemu_put_buffer(rs->f, p, TARGET_PAGE_SIZE); 984 } 985 ram_counters.transferred += TARGET_PAGE_SIZE; 986 pages = 1; 987 ram_counters.normal++; 988 } 989 990 XBZRLE_cache_unlock(); 991 992 return pages; 993 } 994 995 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block, 996 ram_addr_t offset) 997 { 998 RAMState *rs = ram_state; 999 int bytes_sent, blen; 1000 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK); 1001 1002 bytes_sent = save_page_header(rs, f, block, offset | 1003 RAM_SAVE_FLAG_COMPRESS_PAGE); 1004 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE, 1005 migrate_compress_level()); 1006 if (blen < 0) { 1007 bytes_sent = 0; 1008 qemu_file_set_error(migrate_get_current()->to_dst_file, blen); 1009 error_report("compressed data failed!"); 1010 } else { 1011 bytes_sent += blen; 1012 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1); 1013 } 1014 1015 return bytes_sent; 1016 } 1017 1018 static void flush_compressed_data(RAMState *rs) 1019 { 1020 int idx, len, thread_count; 1021 1022 if (!migrate_use_compression()) { 1023 return; 1024 } 1025 thread_count = migrate_compress_threads(); 1026 1027 qemu_mutex_lock(&comp_done_lock); 1028 for (idx = 0; idx < thread_count; idx++) { 1029 while (!comp_param[idx].done) { 1030 qemu_cond_wait(&comp_done_cond, &comp_done_lock); 1031 } 1032 } 1033 qemu_mutex_unlock(&comp_done_lock); 1034 1035 for (idx = 0; idx < thread_count; idx++) { 1036 qemu_mutex_lock(&comp_param[idx].mutex); 1037 if (!comp_param[idx].quit) { 1038 len = qemu_put_qemu_file(rs->f, comp_param[idx].file); 1039 ram_counters.transferred += len; 1040 } 1041 qemu_mutex_unlock(&comp_param[idx].mutex); 1042 } 1043 } 1044 1045 static inline void set_compress_params(CompressParam *param, RAMBlock *block, 1046 ram_addr_t offset) 1047 { 1048 param->block = block; 1049 param->offset = offset; 1050 } 1051 1052 static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block, 1053 ram_addr_t offset) 1054 { 1055 int idx, thread_count, bytes_xmit = -1, pages = -1; 1056 1057 thread_count = migrate_compress_threads(); 1058 qemu_mutex_lock(&comp_done_lock); 1059 while (true) { 1060 for (idx = 0; idx < thread_count; idx++) { 1061 if (comp_param[idx].done) { 1062 comp_param[idx].done = false; 1063 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file); 1064 qemu_mutex_lock(&comp_param[idx].mutex); 1065 set_compress_params(&comp_param[idx], block, offset); 1066 qemu_cond_signal(&comp_param[idx].cond); 1067 qemu_mutex_unlock(&comp_param[idx].mutex); 1068 pages = 1; 1069 ram_counters.normal++; 1070 ram_counters.transferred += bytes_xmit; 1071 break; 1072 } 1073 } 1074 if (pages > 0) { 1075 break; 1076 } else { 1077 qemu_cond_wait(&comp_done_cond, &comp_done_lock); 1078 } 1079 } 1080 qemu_mutex_unlock(&comp_done_lock); 1081 1082 return pages; 1083 } 1084 1085 /** 1086 * ram_save_compressed_page: compress the given page and send it to the stream 1087 * 1088 * Returns the number of pages written. 1089 * 1090 * @rs: current RAM state 1091 * @block: block that contains the page we want to send 1092 * @offset: offset inside the block for the page 1093 * @last_stage: if we are at the completion stage 1094 */ 1095 static int ram_save_compressed_page(RAMState *rs, PageSearchStatus *pss, 1096 bool last_stage) 1097 { 1098 int pages = -1; 1099 uint64_t bytes_xmit = 0; 1100 uint8_t *p; 1101 int ret, blen; 1102 RAMBlock *block = pss->block; 1103 ram_addr_t offset = pss->page << TARGET_PAGE_BITS; 1104 1105 p = block->host + offset; 1106 1107 ret = ram_control_save_page(rs->f, block->offset, 1108 offset, TARGET_PAGE_SIZE, &bytes_xmit); 1109 if (bytes_xmit) { 1110 ram_counters.transferred += bytes_xmit; 1111 pages = 1; 1112 } 1113 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { 1114 if (ret != RAM_SAVE_CONTROL_DELAYED) { 1115 if (bytes_xmit > 0) { 1116 ram_counters.normal++; 1117 } else if (bytes_xmit == 0) { 1118 ram_counters.duplicate++; 1119 } 1120 } 1121 } else { 1122 /* When starting the process of a new block, the first page of 1123 * the block should be sent out before other pages in the same 1124 * block, and all the pages in last block should have been sent 1125 * out, keeping this order is important, because the 'cont' flag 1126 * is used to avoid resending the block name. 1127 */ 1128 if (block != rs->last_sent_block) { 1129 flush_compressed_data(rs); 1130 pages = save_zero_page(rs, block, offset, p); 1131 if (pages == -1) { 1132 /* Make sure the first page is sent out before other pages */ 1133 bytes_xmit = save_page_header(rs, rs->f, block, offset | 1134 RAM_SAVE_FLAG_COMPRESS_PAGE); 1135 blen = qemu_put_compression_data(rs->f, p, TARGET_PAGE_SIZE, 1136 migrate_compress_level()); 1137 if (blen > 0) { 1138 ram_counters.transferred += bytes_xmit + blen; 1139 ram_counters.normal++; 1140 pages = 1; 1141 } else { 1142 qemu_file_set_error(rs->f, blen); 1143 error_report("compressed data failed!"); 1144 } 1145 } 1146 if (pages > 0) { 1147 ram_release_pages(block->idstr, offset, pages); 1148 } 1149 } else { 1150 pages = save_zero_page(rs, block, offset, p); 1151 if (pages == -1) { 1152 pages = compress_page_with_multi_thread(rs, block, offset); 1153 } else { 1154 ram_release_pages(block->idstr, offset, pages); 1155 } 1156 } 1157 } 1158 1159 return pages; 1160 } 1161 1162 /** 1163 * find_dirty_block: find the next dirty page and update any state 1164 * associated with the search process. 1165 * 1166 * Returns if a page is found 1167 * 1168 * @rs: current RAM state 1169 * @pss: data about the state of the current dirty page scan 1170 * @again: set to false if the search has scanned the whole of RAM 1171 */ 1172 static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again) 1173 { 1174 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page); 1175 if (pss->complete_round && pss->block == rs->last_seen_block && 1176 pss->page >= rs->last_page) { 1177 /* 1178 * We've been once around the RAM and haven't found anything. 1179 * Give up. 1180 */ 1181 *again = false; 1182 return false; 1183 } 1184 if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) { 1185 /* Didn't find anything in this RAM Block */ 1186 pss->page = 0; 1187 pss->block = QLIST_NEXT_RCU(pss->block, next); 1188 if (!pss->block) { 1189 /* Hit the end of the list */ 1190 pss->block = QLIST_FIRST_RCU(&ram_list.blocks); 1191 /* Flag that we've looped */ 1192 pss->complete_round = true; 1193 rs->ram_bulk_stage = false; 1194 if (migrate_use_xbzrle()) { 1195 /* If xbzrle is on, stop using the data compression at this 1196 * point. In theory, xbzrle can do better than compression. 1197 */ 1198 flush_compressed_data(rs); 1199 } 1200 } 1201 /* Didn't find anything this time, but try again on the new block */ 1202 *again = true; 1203 return false; 1204 } else { 1205 /* Can go around again, but... */ 1206 *again = true; 1207 /* We've found something so probably don't need to */ 1208 return true; 1209 } 1210 } 1211 1212 /** 1213 * unqueue_page: gets a page of the queue 1214 * 1215 * Helper for 'get_queued_page' - gets a page off the queue 1216 * 1217 * Returns the block of the page (or NULL if none available) 1218 * 1219 * @rs: current RAM state 1220 * @offset: used to return the offset within the RAMBlock 1221 */ 1222 static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset) 1223 { 1224 RAMBlock *block = NULL; 1225 1226 qemu_mutex_lock(&rs->src_page_req_mutex); 1227 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) { 1228 struct RAMSrcPageRequest *entry = 1229 QSIMPLEQ_FIRST(&rs->src_page_requests); 1230 block = entry->rb; 1231 *offset = entry->offset; 1232 1233 if (entry->len > TARGET_PAGE_SIZE) { 1234 entry->len -= TARGET_PAGE_SIZE; 1235 entry->offset += TARGET_PAGE_SIZE; 1236 } else { 1237 memory_region_unref(block->mr); 1238 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req); 1239 g_free(entry); 1240 } 1241 } 1242 qemu_mutex_unlock(&rs->src_page_req_mutex); 1243 1244 return block; 1245 } 1246 1247 /** 1248 * get_queued_page: unqueue a page from the postocpy requests 1249 * 1250 * Skips pages that are already sent (!dirty) 1251 * 1252 * Returns if a queued page is found 1253 * 1254 * @rs: current RAM state 1255 * @pss: data about the state of the current dirty page scan 1256 */ 1257 static bool get_queued_page(RAMState *rs, PageSearchStatus *pss) 1258 { 1259 RAMBlock *block; 1260 ram_addr_t offset; 1261 bool dirty; 1262 1263 do { 1264 block = unqueue_page(rs, &offset); 1265 /* 1266 * We're sending this page, and since it's postcopy nothing else 1267 * will dirty it, and we must make sure it doesn't get sent again 1268 * even if this queue request was received after the background 1269 * search already sent it. 1270 */ 1271 if (block) { 1272 unsigned long page; 1273 1274 page = offset >> TARGET_PAGE_BITS; 1275 dirty = test_bit(page, block->bmap); 1276 if (!dirty) { 1277 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset, 1278 page, test_bit(page, block->unsentmap)); 1279 } else { 1280 trace_get_queued_page(block->idstr, (uint64_t)offset, page); 1281 } 1282 } 1283 1284 } while (block && !dirty); 1285 1286 if (block) { 1287 /* 1288 * As soon as we start servicing pages out of order, then we have 1289 * to kill the bulk stage, since the bulk stage assumes 1290 * in (migration_bitmap_find_and_reset_dirty) that every page is 1291 * dirty, that's no longer true. 1292 */ 1293 rs->ram_bulk_stage = false; 1294 1295 /* 1296 * We want the background search to continue from the queued page 1297 * since the guest is likely to want other pages near to the page 1298 * it just requested. 1299 */ 1300 pss->block = block; 1301 pss->page = offset >> TARGET_PAGE_BITS; 1302 } 1303 1304 return !!block; 1305 } 1306 1307 /** 1308 * migration_page_queue_free: drop any remaining pages in the ram 1309 * request queue 1310 * 1311 * It should be empty at the end anyway, but in error cases there may 1312 * be some left. in case that there is any page left, we drop it. 1313 * 1314 */ 1315 static void migration_page_queue_free(RAMState *rs) 1316 { 1317 struct RAMSrcPageRequest *mspr, *next_mspr; 1318 /* This queue generally should be empty - but in the case of a failed 1319 * migration might have some droppings in. 1320 */ 1321 rcu_read_lock(); 1322 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) { 1323 memory_region_unref(mspr->rb->mr); 1324 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req); 1325 g_free(mspr); 1326 } 1327 rcu_read_unlock(); 1328 } 1329 1330 /** 1331 * ram_save_queue_pages: queue the page for transmission 1332 * 1333 * A request from postcopy destination for example. 1334 * 1335 * Returns zero on success or negative on error 1336 * 1337 * @rbname: Name of the RAMBLock of the request. NULL means the 1338 * same that last one. 1339 * @start: starting address from the start of the RAMBlock 1340 * @len: length (in bytes) to send 1341 */ 1342 int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) 1343 { 1344 RAMBlock *ramblock; 1345 RAMState *rs = ram_state; 1346 1347 ram_counters.postcopy_requests++; 1348 rcu_read_lock(); 1349 if (!rbname) { 1350 /* Reuse last RAMBlock */ 1351 ramblock = rs->last_req_rb; 1352 1353 if (!ramblock) { 1354 /* 1355 * Shouldn't happen, we can't reuse the last RAMBlock if 1356 * it's the 1st request. 1357 */ 1358 error_report("ram_save_queue_pages no previous block"); 1359 goto err; 1360 } 1361 } else { 1362 ramblock = qemu_ram_block_by_name(rbname); 1363 1364 if (!ramblock) { 1365 /* We shouldn't be asked for a non-existent RAMBlock */ 1366 error_report("ram_save_queue_pages no block '%s'", rbname); 1367 goto err; 1368 } 1369 rs->last_req_rb = ramblock; 1370 } 1371 trace_ram_save_queue_pages(ramblock->idstr, start, len); 1372 if (start+len > ramblock->used_length) { 1373 error_report("%s request overrun start=" RAM_ADDR_FMT " len=" 1374 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT, 1375 __func__, start, len, ramblock->used_length); 1376 goto err; 1377 } 1378 1379 struct RAMSrcPageRequest *new_entry = 1380 g_malloc0(sizeof(struct RAMSrcPageRequest)); 1381 new_entry->rb = ramblock; 1382 new_entry->offset = start; 1383 new_entry->len = len; 1384 1385 memory_region_ref(ramblock->mr); 1386 qemu_mutex_lock(&rs->src_page_req_mutex); 1387 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req); 1388 qemu_mutex_unlock(&rs->src_page_req_mutex); 1389 rcu_read_unlock(); 1390 1391 return 0; 1392 1393 err: 1394 rcu_read_unlock(); 1395 return -1; 1396 } 1397 1398 /** 1399 * ram_save_target_page: save one target page 1400 * 1401 * Returns the number of pages written 1402 * 1403 * @rs: current RAM state 1404 * @ms: current migration state 1405 * @pss: data about the page we want to send 1406 * @last_stage: if we are at the completion stage 1407 */ 1408 static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss, 1409 bool last_stage) 1410 { 1411 int res = 0; 1412 1413 /* Check the pages is dirty and if it is send it */ 1414 if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) { 1415 /* 1416 * If xbzrle is on, stop using the data compression after first 1417 * round of migration even if compression is enabled. In theory, 1418 * xbzrle can do better than compression. 1419 */ 1420 if (migrate_use_compression() && 1421 (rs->ram_bulk_stage || !migrate_use_xbzrle())) { 1422 res = ram_save_compressed_page(rs, pss, last_stage); 1423 } else { 1424 res = ram_save_page(rs, pss, last_stage); 1425 } 1426 1427 if (res < 0) { 1428 return res; 1429 } 1430 if (pss->block->unsentmap) { 1431 clear_bit(pss->page, pss->block->unsentmap); 1432 } 1433 } 1434 1435 return res; 1436 } 1437 1438 /** 1439 * ram_save_host_page: save a whole host page 1440 * 1441 * Starting at *offset send pages up to the end of the current host 1442 * page. It's valid for the initial offset to point into the middle of 1443 * a host page in which case the remainder of the hostpage is sent. 1444 * Only dirty target pages are sent. Note that the host page size may 1445 * be a huge page for this block. 1446 * The saving stops at the boundary of the used_length of the block 1447 * if the RAMBlock isn't a multiple of the host page size. 1448 * 1449 * Returns the number of pages written or negative on error 1450 * 1451 * @rs: current RAM state 1452 * @ms: current migration state 1453 * @pss: data about the page we want to send 1454 * @last_stage: if we are at the completion stage 1455 */ 1456 static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, 1457 bool last_stage) 1458 { 1459 int tmppages, pages = 0; 1460 size_t pagesize_bits = 1461 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS; 1462 1463 do { 1464 tmppages = ram_save_target_page(rs, pss, last_stage); 1465 if (tmppages < 0) { 1466 return tmppages; 1467 } 1468 1469 pages += tmppages; 1470 pss->page++; 1471 } while ((pss->page & (pagesize_bits - 1)) && 1472 offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS)); 1473 1474 /* The offset we leave with is the last one we looked at */ 1475 pss->page--; 1476 return pages; 1477 } 1478 1479 /** 1480 * ram_find_and_save_block: finds a dirty page and sends it to f 1481 * 1482 * Called within an RCU critical section. 1483 * 1484 * Returns the number of pages written where zero means no dirty pages 1485 * 1486 * @rs: current RAM state 1487 * @last_stage: if we are at the completion stage 1488 * 1489 * On systems where host-page-size > target-page-size it will send all the 1490 * pages in a host page that are dirty. 1491 */ 1492 1493 static int ram_find_and_save_block(RAMState *rs, bool last_stage) 1494 { 1495 PageSearchStatus pss; 1496 int pages = 0; 1497 bool again, found; 1498 1499 /* No dirty page as there is zero RAM */ 1500 if (!ram_bytes_total()) { 1501 return pages; 1502 } 1503 1504 pss.block = rs->last_seen_block; 1505 pss.page = rs->last_page; 1506 pss.complete_round = false; 1507 1508 if (!pss.block) { 1509 pss.block = QLIST_FIRST_RCU(&ram_list.blocks); 1510 } 1511 1512 do { 1513 again = true; 1514 found = get_queued_page(rs, &pss); 1515 1516 if (!found) { 1517 /* priority queue empty, so just search for something dirty */ 1518 found = find_dirty_block(rs, &pss, &again); 1519 } 1520 1521 if (found) { 1522 pages = ram_save_host_page(rs, &pss, last_stage); 1523 } 1524 } while (!pages && again); 1525 1526 rs->last_seen_block = pss.block; 1527 rs->last_page = pss.page; 1528 1529 return pages; 1530 } 1531 1532 void acct_update_position(QEMUFile *f, size_t size, bool zero) 1533 { 1534 uint64_t pages = size / TARGET_PAGE_SIZE; 1535 1536 if (zero) { 1537 ram_counters.duplicate += pages; 1538 } else { 1539 ram_counters.normal += pages; 1540 ram_counters.transferred += size; 1541 qemu_update_position(f, size); 1542 } 1543 } 1544 1545 uint64_t ram_bytes_total(void) 1546 { 1547 RAMBlock *block; 1548 uint64_t total = 0; 1549 1550 rcu_read_lock(); 1551 RAMBLOCK_FOREACH(block) { 1552 total += block->used_length; 1553 } 1554 rcu_read_unlock(); 1555 return total; 1556 } 1557 1558 static void xbzrle_load_setup(void) 1559 { 1560 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE); 1561 } 1562 1563 static void xbzrle_load_cleanup(void) 1564 { 1565 g_free(XBZRLE.decoded_buf); 1566 XBZRLE.decoded_buf = NULL; 1567 } 1568 1569 static void ram_save_cleanup(void *opaque) 1570 { 1571 RAMState **rsp = opaque; 1572 RAMBlock *block; 1573 1574 /* caller have hold iothread lock or is in a bh, so there is 1575 * no writing race against this migration_bitmap 1576 */ 1577 memory_global_dirty_log_stop(); 1578 1579 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { 1580 g_free(block->bmap); 1581 block->bmap = NULL; 1582 g_free(block->unsentmap); 1583 block->unsentmap = NULL; 1584 } 1585 1586 XBZRLE_cache_lock(); 1587 if (XBZRLE.cache) { 1588 cache_fini(XBZRLE.cache); 1589 g_free(XBZRLE.encoded_buf); 1590 g_free(XBZRLE.current_buf); 1591 g_free(XBZRLE.zero_target_page); 1592 XBZRLE.cache = NULL; 1593 XBZRLE.encoded_buf = NULL; 1594 XBZRLE.current_buf = NULL; 1595 XBZRLE.zero_target_page = NULL; 1596 } 1597 XBZRLE_cache_unlock(); 1598 migration_page_queue_free(*rsp); 1599 compress_threads_save_cleanup(); 1600 g_free(*rsp); 1601 *rsp = NULL; 1602 } 1603 1604 static void ram_state_reset(RAMState *rs) 1605 { 1606 rs->last_seen_block = NULL; 1607 rs->last_sent_block = NULL; 1608 rs->last_page = 0; 1609 rs->last_version = ram_list.version; 1610 rs->ram_bulk_stage = true; 1611 } 1612 1613 #define MAX_WAIT 50 /* ms, half buffered_file limit */ 1614 1615 /* 1616 * 'expected' is the value you expect the bitmap mostly to be full 1617 * of; it won't bother printing lines that are all this value. 1618 * If 'todump' is null the migration bitmap is dumped. 1619 */ 1620 void ram_debug_dump_bitmap(unsigned long *todump, bool expected, 1621 unsigned long pages) 1622 { 1623 int64_t cur; 1624 int64_t linelen = 128; 1625 char linebuf[129]; 1626 1627 for (cur = 0; cur < pages; cur += linelen) { 1628 int64_t curb; 1629 bool found = false; 1630 /* 1631 * Last line; catch the case where the line length 1632 * is longer than remaining ram 1633 */ 1634 if (cur + linelen > pages) { 1635 linelen = pages - cur; 1636 } 1637 for (curb = 0; curb < linelen; curb++) { 1638 bool thisbit = test_bit(cur + curb, todump); 1639 linebuf[curb] = thisbit ? '1' : '.'; 1640 found = found || (thisbit != expected); 1641 } 1642 if (found) { 1643 linebuf[curb] = '\0'; 1644 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf); 1645 } 1646 } 1647 } 1648 1649 /* **** functions for postcopy ***** */ 1650 1651 void ram_postcopy_migrated_memory_release(MigrationState *ms) 1652 { 1653 struct RAMBlock *block; 1654 1655 RAMBLOCK_FOREACH(block) { 1656 unsigned long *bitmap = block->bmap; 1657 unsigned long range = block->used_length >> TARGET_PAGE_BITS; 1658 unsigned long run_start = find_next_zero_bit(bitmap, range, 0); 1659 1660 while (run_start < range) { 1661 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1); 1662 ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS, 1663 (run_end - run_start) << TARGET_PAGE_BITS); 1664 run_start = find_next_zero_bit(bitmap, range, run_end + 1); 1665 } 1666 } 1667 } 1668 1669 /** 1670 * postcopy_send_discard_bm_ram: discard a RAMBlock 1671 * 1672 * Returns zero on success 1673 * 1674 * Callback from postcopy_each_ram_send_discard for each RAMBlock 1675 * Note: At this point the 'unsentmap' is the processed bitmap combined 1676 * with the dirtymap; so a '1' means it's either dirty or unsent. 1677 * 1678 * @ms: current migration state 1679 * @pds: state for postcopy 1680 * @start: RAMBlock starting page 1681 * @length: RAMBlock size 1682 */ 1683 static int postcopy_send_discard_bm_ram(MigrationState *ms, 1684 PostcopyDiscardState *pds, 1685 RAMBlock *block) 1686 { 1687 unsigned long end = block->used_length >> TARGET_PAGE_BITS; 1688 unsigned long current; 1689 unsigned long *unsentmap = block->unsentmap; 1690 1691 for (current = 0; current < end; ) { 1692 unsigned long one = find_next_bit(unsentmap, end, current); 1693 1694 if (one <= end) { 1695 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1); 1696 unsigned long discard_length; 1697 1698 if (zero >= end) { 1699 discard_length = end - one; 1700 } else { 1701 discard_length = zero - one; 1702 } 1703 if (discard_length) { 1704 postcopy_discard_send_range(ms, pds, one, discard_length); 1705 } 1706 current = one + discard_length; 1707 } else { 1708 current = one; 1709 } 1710 } 1711 1712 return 0; 1713 } 1714 1715 /** 1716 * postcopy_each_ram_send_discard: discard all RAMBlocks 1717 * 1718 * Returns 0 for success or negative for error 1719 * 1720 * Utility for the outgoing postcopy code. 1721 * Calls postcopy_send_discard_bm_ram for each RAMBlock 1722 * passing it bitmap indexes and name. 1723 * (qemu_ram_foreach_block ends up passing unscaled lengths 1724 * which would mean postcopy code would have to deal with target page) 1725 * 1726 * @ms: current migration state 1727 */ 1728 static int postcopy_each_ram_send_discard(MigrationState *ms) 1729 { 1730 struct RAMBlock *block; 1731 int ret; 1732 1733 RAMBLOCK_FOREACH(block) { 1734 PostcopyDiscardState *pds = 1735 postcopy_discard_send_init(ms, block->idstr); 1736 1737 /* 1738 * Postcopy sends chunks of bitmap over the wire, but it 1739 * just needs indexes at this point, avoids it having 1740 * target page specific code. 1741 */ 1742 ret = postcopy_send_discard_bm_ram(ms, pds, block); 1743 postcopy_discard_send_finish(ms, pds); 1744 if (ret) { 1745 return ret; 1746 } 1747 } 1748 1749 return 0; 1750 } 1751 1752 /** 1753 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages 1754 * 1755 * Helper for postcopy_chunk_hostpages; it's called twice to 1756 * canonicalize the two bitmaps, that are similar, but one is 1757 * inverted. 1758 * 1759 * Postcopy requires that all target pages in a hostpage are dirty or 1760 * clean, not a mix. This function canonicalizes the bitmaps. 1761 * 1762 * @ms: current migration state 1763 * @unsent_pass: if true we need to canonicalize partially unsent host pages 1764 * otherwise we need to canonicalize partially dirty host pages 1765 * @block: block that contains the page we want to canonicalize 1766 * @pds: state for postcopy 1767 */ 1768 static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, 1769 RAMBlock *block, 1770 PostcopyDiscardState *pds) 1771 { 1772 RAMState *rs = ram_state; 1773 unsigned long *bitmap = block->bmap; 1774 unsigned long *unsentmap = block->unsentmap; 1775 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE; 1776 unsigned long pages = block->used_length >> TARGET_PAGE_BITS; 1777 unsigned long run_start; 1778 1779 if (block->page_size == TARGET_PAGE_SIZE) { 1780 /* Easy case - TPS==HPS for a non-huge page RAMBlock */ 1781 return; 1782 } 1783 1784 if (unsent_pass) { 1785 /* Find a sent page */ 1786 run_start = find_next_zero_bit(unsentmap, pages, 0); 1787 } else { 1788 /* Find a dirty page */ 1789 run_start = find_next_bit(bitmap, pages, 0); 1790 } 1791 1792 while (run_start < pages) { 1793 bool do_fixup = false; 1794 unsigned long fixup_start_addr; 1795 unsigned long host_offset; 1796 1797 /* 1798 * If the start of this run of pages is in the middle of a host 1799 * page, then we need to fixup this host page. 1800 */ 1801 host_offset = run_start % host_ratio; 1802 if (host_offset) { 1803 do_fixup = true; 1804 run_start -= host_offset; 1805 fixup_start_addr = run_start; 1806 /* For the next pass */ 1807 run_start = run_start + host_ratio; 1808 } else { 1809 /* Find the end of this run */ 1810 unsigned long run_end; 1811 if (unsent_pass) { 1812 run_end = find_next_bit(unsentmap, pages, run_start + 1); 1813 } else { 1814 run_end = find_next_zero_bit(bitmap, pages, run_start + 1); 1815 } 1816 /* 1817 * If the end isn't at the start of a host page, then the 1818 * run doesn't finish at the end of a host page 1819 * and we need to discard. 1820 */ 1821 host_offset = run_end % host_ratio; 1822 if (host_offset) { 1823 do_fixup = true; 1824 fixup_start_addr = run_end - host_offset; 1825 /* 1826 * This host page has gone, the next loop iteration starts 1827 * from after the fixup 1828 */ 1829 run_start = fixup_start_addr + host_ratio; 1830 } else { 1831 /* 1832 * No discards on this iteration, next loop starts from 1833 * next sent/dirty page 1834 */ 1835 run_start = run_end + 1; 1836 } 1837 } 1838 1839 if (do_fixup) { 1840 unsigned long page; 1841 1842 /* Tell the destination to discard this page */ 1843 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) { 1844 /* For the unsent_pass we: 1845 * discard partially sent pages 1846 * For the !unsent_pass (dirty) we: 1847 * discard partially dirty pages that were sent 1848 * (any partially sent pages were already discarded 1849 * by the previous unsent_pass) 1850 */ 1851 postcopy_discard_send_range(ms, pds, fixup_start_addr, 1852 host_ratio); 1853 } 1854 1855 /* Clean up the bitmap */ 1856 for (page = fixup_start_addr; 1857 page < fixup_start_addr + host_ratio; page++) { 1858 /* All pages in this host page are now not sent */ 1859 set_bit(page, unsentmap); 1860 1861 /* 1862 * Remark them as dirty, updating the count for any pages 1863 * that weren't previously dirty. 1864 */ 1865 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap); 1866 } 1867 } 1868 1869 if (unsent_pass) { 1870 /* Find the next sent page for the next iteration */ 1871 run_start = find_next_zero_bit(unsentmap, pages, run_start); 1872 } else { 1873 /* Find the next dirty page for the next iteration */ 1874 run_start = find_next_bit(bitmap, pages, run_start); 1875 } 1876 } 1877 } 1878 1879 /** 1880 * postcopy_chuck_hostpages: discrad any partially sent host page 1881 * 1882 * Utility for the outgoing postcopy code. 1883 * 1884 * Discard any partially sent host-page size chunks, mark any partially 1885 * dirty host-page size chunks as all dirty. In this case the host-page 1886 * is the host-page for the particular RAMBlock, i.e. it might be a huge page 1887 * 1888 * Returns zero on success 1889 * 1890 * @ms: current migration state 1891 * @block: block we want to work with 1892 */ 1893 static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block) 1894 { 1895 PostcopyDiscardState *pds = 1896 postcopy_discard_send_init(ms, block->idstr); 1897 1898 /* First pass: Discard all partially sent host pages */ 1899 postcopy_chunk_hostpages_pass(ms, true, block, pds); 1900 /* 1901 * Second pass: Ensure that all partially dirty host pages are made 1902 * fully dirty. 1903 */ 1904 postcopy_chunk_hostpages_pass(ms, false, block, pds); 1905 1906 postcopy_discard_send_finish(ms, pds); 1907 return 0; 1908 } 1909 1910 /** 1911 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap 1912 * 1913 * Returns zero on success 1914 * 1915 * Transmit the set of pages to be discarded after precopy to the target 1916 * these are pages that: 1917 * a) Have been previously transmitted but are now dirty again 1918 * b) Pages that have never been transmitted, this ensures that 1919 * any pages on the destination that have been mapped by background 1920 * tasks get discarded (transparent huge pages is the specific concern) 1921 * Hopefully this is pretty sparse 1922 * 1923 * @ms: current migration state 1924 */ 1925 int ram_postcopy_send_discard_bitmap(MigrationState *ms) 1926 { 1927 RAMState *rs = ram_state; 1928 RAMBlock *block; 1929 int ret; 1930 1931 rcu_read_lock(); 1932 1933 /* This should be our last sync, the src is now paused */ 1934 migration_bitmap_sync(rs); 1935 1936 /* Easiest way to make sure we don't resume in the middle of a host-page */ 1937 rs->last_seen_block = NULL; 1938 rs->last_sent_block = NULL; 1939 rs->last_page = 0; 1940 1941 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { 1942 unsigned long pages = block->used_length >> TARGET_PAGE_BITS; 1943 unsigned long *bitmap = block->bmap; 1944 unsigned long *unsentmap = block->unsentmap; 1945 1946 if (!unsentmap) { 1947 /* We don't have a safe way to resize the sentmap, so 1948 * if the bitmap was resized it will be NULL at this 1949 * point. 1950 */ 1951 error_report("migration ram resized during precopy phase"); 1952 rcu_read_unlock(); 1953 return -EINVAL; 1954 } 1955 /* Deal with TPS != HPS and huge pages */ 1956 ret = postcopy_chunk_hostpages(ms, block); 1957 if (ret) { 1958 rcu_read_unlock(); 1959 return ret; 1960 } 1961 1962 /* 1963 * Update the unsentmap to be unsentmap = unsentmap | dirty 1964 */ 1965 bitmap_or(unsentmap, unsentmap, bitmap, pages); 1966 #ifdef DEBUG_POSTCOPY 1967 ram_debug_dump_bitmap(unsentmap, true, pages); 1968 #endif 1969 } 1970 trace_ram_postcopy_send_discard_bitmap(); 1971 1972 ret = postcopy_each_ram_send_discard(ms); 1973 rcu_read_unlock(); 1974 1975 return ret; 1976 } 1977 1978 /** 1979 * ram_discard_range: discard dirtied pages at the beginning of postcopy 1980 * 1981 * Returns zero on success 1982 * 1983 * @rbname: name of the RAMBlock of the request. NULL means the 1984 * same that last one. 1985 * @start: RAMBlock starting page 1986 * @length: RAMBlock size 1987 */ 1988 int ram_discard_range(const char *rbname, uint64_t start, size_t length) 1989 { 1990 int ret = -1; 1991 1992 trace_ram_discard_range(rbname, start, length); 1993 1994 rcu_read_lock(); 1995 RAMBlock *rb = qemu_ram_block_by_name(rbname); 1996 1997 if (!rb) { 1998 error_report("ram_discard_range: Failed to find block '%s'", rbname); 1999 goto err; 2000 } 2001 2002 ret = ram_block_discard_range(rb, start, length); 2003 2004 err: 2005 rcu_read_unlock(); 2006 2007 return ret; 2008 } 2009 2010 static int ram_state_init(RAMState **rsp) 2011 { 2012 *rsp = g_new0(RAMState, 1); 2013 2014 qemu_mutex_init(&(*rsp)->bitmap_mutex); 2015 qemu_mutex_init(&(*rsp)->src_page_req_mutex); 2016 QSIMPLEQ_INIT(&(*rsp)->src_page_requests); 2017 2018 if (migrate_use_xbzrle()) { 2019 XBZRLE_cache_lock(); 2020 XBZRLE.zero_target_page = g_malloc0(TARGET_PAGE_SIZE); 2021 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / 2022 TARGET_PAGE_SIZE, 2023 TARGET_PAGE_SIZE); 2024 if (!XBZRLE.cache) { 2025 XBZRLE_cache_unlock(); 2026 error_report("Error creating cache"); 2027 g_free(*rsp); 2028 *rsp = NULL; 2029 return -1; 2030 } 2031 XBZRLE_cache_unlock(); 2032 2033 /* We prefer not to abort if there is no memory */ 2034 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE); 2035 if (!XBZRLE.encoded_buf) { 2036 error_report("Error allocating encoded_buf"); 2037 g_free(*rsp); 2038 *rsp = NULL; 2039 return -1; 2040 } 2041 2042 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE); 2043 if (!XBZRLE.current_buf) { 2044 error_report("Error allocating current_buf"); 2045 g_free(XBZRLE.encoded_buf); 2046 XBZRLE.encoded_buf = NULL; 2047 g_free(*rsp); 2048 *rsp = NULL; 2049 return -1; 2050 } 2051 } 2052 2053 /* For memory_global_dirty_log_start below. */ 2054 qemu_mutex_lock_iothread(); 2055 2056 qemu_mutex_lock_ramlist(); 2057 rcu_read_lock(); 2058 ram_state_reset(*rsp); 2059 2060 /* Skip setting bitmap if there is no RAM */ 2061 if (ram_bytes_total()) { 2062 RAMBlock *block; 2063 2064 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { 2065 unsigned long pages = block->max_length >> TARGET_PAGE_BITS; 2066 2067 block->bmap = bitmap_new(pages); 2068 bitmap_set(block->bmap, 0, pages); 2069 if (migrate_postcopy_ram()) { 2070 block->unsentmap = bitmap_new(pages); 2071 bitmap_set(block->unsentmap, 0, pages); 2072 } 2073 } 2074 } 2075 2076 /* 2077 * Count the total number of pages used by ram blocks not including any 2078 * gaps due to alignment or unplugs. 2079 */ 2080 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS; 2081 2082 memory_global_dirty_log_start(); 2083 migration_bitmap_sync(*rsp); 2084 qemu_mutex_unlock_ramlist(); 2085 qemu_mutex_unlock_iothread(); 2086 rcu_read_unlock(); 2087 2088 return 0; 2089 } 2090 2091 /* 2092 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has 2093 * long-running RCU critical section. When rcu-reclaims in the code 2094 * start to become numerous it will be necessary to reduce the 2095 * granularity of these critical sections. 2096 */ 2097 2098 /** 2099 * ram_save_setup: Setup RAM for migration 2100 * 2101 * Returns zero to indicate success and negative for error 2102 * 2103 * @f: QEMUFile where to send the data 2104 * @opaque: RAMState pointer 2105 */ 2106 static int ram_save_setup(QEMUFile *f, void *opaque) 2107 { 2108 RAMState **rsp = opaque; 2109 RAMBlock *block; 2110 2111 /* migration has already setup the bitmap, reuse it. */ 2112 if (!migration_in_colo_state()) { 2113 if (ram_state_init(rsp) != 0) { 2114 return -1; 2115 } 2116 } 2117 (*rsp)->f = f; 2118 2119 rcu_read_lock(); 2120 2121 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); 2122 2123 RAMBLOCK_FOREACH(block) { 2124 qemu_put_byte(f, strlen(block->idstr)); 2125 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); 2126 qemu_put_be64(f, block->used_length); 2127 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) { 2128 qemu_put_be64(f, block->page_size); 2129 } 2130 } 2131 2132 rcu_read_unlock(); 2133 compress_threads_save_setup(); 2134 2135 ram_control_before_iterate(f, RAM_CONTROL_SETUP); 2136 ram_control_after_iterate(f, RAM_CONTROL_SETUP); 2137 2138 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 2139 2140 return 0; 2141 } 2142 2143 /** 2144 * ram_save_iterate: iterative stage for migration 2145 * 2146 * Returns zero to indicate success and negative for error 2147 * 2148 * @f: QEMUFile where to send the data 2149 * @opaque: RAMState pointer 2150 */ 2151 static int ram_save_iterate(QEMUFile *f, void *opaque) 2152 { 2153 RAMState **temp = opaque; 2154 RAMState *rs = *temp; 2155 int ret; 2156 int i; 2157 int64_t t0; 2158 int done = 0; 2159 2160 rcu_read_lock(); 2161 if (ram_list.version != rs->last_version) { 2162 ram_state_reset(rs); 2163 } 2164 2165 /* Read version before ram_list.blocks */ 2166 smp_rmb(); 2167 2168 ram_control_before_iterate(f, RAM_CONTROL_ROUND); 2169 2170 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 2171 i = 0; 2172 while ((ret = qemu_file_rate_limit(f)) == 0) { 2173 int pages; 2174 2175 pages = ram_find_and_save_block(rs, false); 2176 /* no more pages to sent */ 2177 if (pages == 0) { 2178 done = 1; 2179 break; 2180 } 2181 rs->iterations++; 2182 2183 /* we want to check in the 1st loop, just in case it was the 1st time 2184 and we had to sync the dirty bitmap. 2185 qemu_get_clock_ns() is a bit expensive, so we only check each some 2186 iterations 2187 */ 2188 if ((i & 63) == 0) { 2189 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000; 2190 if (t1 > MAX_WAIT) { 2191 trace_ram_save_iterate_big_wait(t1, i); 2192 break; 2193 } 2194 } 2195 i++; 2196 } 2197 flush_compressed_data(rs); 2198 rcu_read_unlock(); 2199 2200 /* 2201 * Must occur before EOS (or any QEMUFile operation) 2202 * because of RDMA protocol. 2203 */ 2204 ram_control_after_iterate(f, RAM_CONTROL_ROUND); 2205 2206 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 2207 ram_counters.transferred += 8; 2208 2209 ret = qemu_file_get_error(f); 2210 if (ret < 0) { 2211 return ret; 2212 } 2213 2214 return done; 2215 } 2216 2217 /** 2218 * ram_save_complete: function called to send the remaining amount of ram 2219 * 2220 * Returns zero to indicate success 2221 * 2222 * Called with iothread lock 2223 * 2224 * @f: QEMUFile where to send the data 2225 * @opaque: RAMState pointer 2226 */ 2227 static int ram_save_complete(QEMUFile *f, void *opaque) 2228 { 2229 RAMState **temp = opaque; 2230 RAMState *rs = *temp; 2231 2232 rcu_read_lock(); 2233 2234 if (!migration_in_postcopy()) { 2235 migration_bitmap_sync(rs); 2236 } 2237 2238 ram_control_before_iterate(f, RAM_CONTROL_FINISH); 2239 2240 /* try transferring iterative blocks of memory */ 2241 2242 /* flush all remaining blocks regardless of rate limiting */ 2243 while (true) { 2244 int pages; 2245 2246 pages = ram_find_and_save_block(rs, !migration_in_colo_state()); 2247 /* no more blocks to sent */ 2248 if (pages == 0) { 2249 break; 2250 } 2251 } 2252 2253 flush_compressed_data(rs); 2254 ram_control_after_iterate(f, RAM_CONTROL_FINISH); 2255 2256 rcu_read_unlock(); 2257 2258 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 2259 2260 return 0; 2261 } 2262 2263 static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, 2264 uint64_t *non_postcopiable_pending, 2265 uint64_t *postcopiable_pending) 2266 { 2267 RAMState **temp = opaque; 2268 RAMState *rs = *temp; 2269 uint64_t remaining_size; 2270 2271 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; 2272 2273 if (!migration_in_postcopy() && 2274 remaining_size < max_size) { 2275 qemu_mutex_lock_iothread(); 2276 rcu_read_lock(); 2277 migration_bitmap_sync(rs); 2278 rcu_read_unlock(); 2279 qemu_mutex_unlock_iothread(); 2280 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; 2281 } 2282 2283 if (migrate_postcopy_ram()) { 2284 /* We can do postcopy, and all the data is postcopiable */ 2285 *postcopiable_pending += remaining_size; 2286 } else { 2287 *non_postcopiable_pending += remaining_size; 2288 } 2289 } 2290 2291 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) 2292 { 2293 unsigned int xh_len; 2294 int xh_flags; 2295 uint8_t *loaded_data; 2296 2297 /* extract RLE header */ 2298 xh_flags = qemu_get_byte(f); 2299 xh_len = qemu_get_be16(f); 2300 2301 if (xh_flags != ENCODING_FLAG_XBZRLE) { 2302 error_report("Failed to load XBZRLE page - wrong compression!"); 2303 return -1; 2304 } 2305 2306 if (xh_len > TARGET_PAGE_SIZE) { 2307 error_report("Failed to load XBZRLE page - len overflow!"); 2308 return -1; 2309 } 2310 loaded_data = XBZRLE.decoded_buf; 2311 /* load data and decode */ 2312 /* it can change loaded_data to point to an internal buffer */ 2313 qemu_get_buffer_in_place(f, &loaded_data, xh_len); 2314 2315 /* decode RLE */ 2316 if (xbzrle_decode_buffer(loaded_data, xh_len, host, 2317 TARGET_PAGE_SIZE) == -1) { 2318 error_report("Failed to load XBZRLE page - decode error!"); 2319 return -1; 2320 } 2321 2322 return 0; 2323 } 2324 2325 /** 2326 * ram_block_from_stream: read a RAMBlock id from the migration stream 2327 * 2328 * Must be called from within a rcu critical section. 2329 * 2330 * Returns a pointer from within the RCU-protected ram_list. 2331 * 2332 * @f: QEMUFile where to read the data from 2333 * @flags: Page flags (mostly to see if it's a continuation of previous block) 2334 */ 2335 static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags) 2336 { 2337 static RAMBlock *block = NULL; 2338 char id[256]; 2339 uint8_t len; 2340 2341 if (flags & RAM_SAVE_FLAG_CONTINUE) { 2342 if (!block) { 2343 error_report("Ack, bad migration stream!"); 2344 return NULL; 2345 } 2346 return block; 2347 } 2348 2349 len = qemu_get_byte(f); 2350 qemu_get_buffer(f, (uint8_t *)id, len); 2351 id[len] = 0; 2352 2353 block = qemu_ram_block_by_name(id); 2354 if (!block) { 2355 error_report("Can't find block %s", id); 2356 return NULL; 2357 } 2358 2359 return block; 2360 } 2361 2362 static inline void *host_from_ram_block_offset(RAMBlock *block, 2363 ram_addr_t offset) 2364 { 2365 if (!offset_in_ramblock(block, offset)) { 2366 return NULL; 2367 } 2368 2369 return block->host + offset; 2370 } 2371 2372 /** 2373 * ram_handle_compressed: handle the zero page case 2374 * 2375 * If a page (or a whole RDMA chunk) has been 2376 * determined to be zero, then zap it. 2377 * 2378 * @host: host address for the zero page 2379 * @ch: what the page is filled from. We only support zero 2380 * @size: size of the zero page 2381 */ 2382 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) 2383 { 2384 if (ch != 0 || !is_zero_range(host, size)) { 2385 memset(host, ch, size); 2386 } 2387 } 2388 2389 static void *do_data_decompress(void *opaque) 2390 { 2391 DecompressParam *param = opaque; 2392 unsigned long pagesize; 2393 uint8_t *des; 2394 int len; 2395 2396 qemu_mutex_lock(¶m->mutex); 2397 while (!param->quit) { 2398 if (param->des) { 2399 des = param->des; 2400 len = param->len; 2401 param->des = 0; 2402 qemu_mutex_unlock(¶m->mutex); 2403 2404 pagesize = TARGET_PAGE_SIZE; 2405 /* uncompress() will return failed in some case, especially 2406 * when the page is dirted when doing the compression, it's 2407 * not a problem because the dirty page will be retransferred 2408 * and uncompress() won't break the data in other pages. 2409 */ 2410 uncompress((Bytef *)des, &pagesize, 2411 (const Bytef *)param->compbuf, len); 2412 2413 qemu_mutex_lock(&decomp_done_lock); 2414 param->done = true; 2415 qemu_cond_signal(&decomp_done_cond); 2416 qemu_mutex_unlock(&decomp_done_lock); 2417 2418 qemu_mutex_lock(¶m->mutex); 2419 } else { 2420 qemu_cond_wait(¶m->cond, ¶m->mutex); 2421 } 2422 } 2423 qemu_mutex_unlock(¶m->mutex); 2424 2425 return NULL; 2426 } 2427 2428 static void wait_for_decompress_done(void) 2429 { 2430 int idx, thread_count; 2431 2432 if (!migrate_use_compression()) { 2433 return; 2434 } 2435 2436 thread_count = migrate_decompress_threads(); 2437 qemu_mutex_lock(&decomp_done_lock); 2438 for (idx = 0; idx < thread_count; idx++) { 2439 while (!decomp_param[idx].done) { 2440 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock); 2441 } 2442 } 2443 qemu_mutex_unlock(&decomp_done_lock); 2444 } 2445 2446 static void compress_threads_load_setup(void) 2447 { 2448 int i, thread_count; 2449 2450 if (!migrate_use_compression()) { 2451 return; 2452 } 2453 thread_count = migrate_decompress_threads(); 2454 decompress_threads = g_new0(QemuThread, thread_count); 2455 decomp_param = g_new0(DecompressParam, thread_count); 2456 qemu_mutex_init(&decomp_done_lock); 2457 qemu_cond_init(&decomp_done_cond); 2458 for (i = 0; i < thread_count; i++) { 2459 qemu_mutex_init(&decomp_param[i].mutex); 2460 qemu_cond_init(&decomp_param[i].cond); 2461 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE)); 2462 decomp_param[i].done = true; 2463 decomp_param[i].quit = false; 2464 qemu_thread_create(decompress_threads + i, "decompress", 2465 do_data_decompress, decomp_param + i, 2466 QEMU_THREAD_JOINABLE); 2467 } 2468 } 2469 2470 static void compress_threads_load_cleanup(void) 2471 { 2472 int i, thread_count; 2473 2474 if (!migrate_use_compression()) { 2475 return; 2476 } 2477 thread_count = migrate_decompress_threads(); 2478 for (i = 0; i < thread_count; i++) { 2479 qemu_mutex_lock(&decomp_param[i].mutex); 2480 decomp_param[i].quit = true; 2481 qemu_cond_signal(&decomp_param[i].cond); 2482 qemu_mutex_unlock(&decomp_param[i].mutex); 2483 } 2484 for (i = 0; i < thread_count; i++) { 2485 qemu_thread_join(decompress_threads + i); 2486 qemu_mutex_destroy(&decomp_param[i].mutex); 2487 qemu_cond_destroy(&decomp_param[i].cond); 2488 g_free(decomp_param[i].compbuf); 2489 } 2490 g_free(decompress_threads); 2491 g_free(decomp_param); 2492 decompress_threads = NULL; 2493 decomp_param = NULL; 2494 } 2495 2496 static void decompress_data_with_multi_threads(QEMUFile *f, 2497 void *host, int len) 2498 { 2499 int idx, thread_count; 2500 2501 thread_count = migrate_decompress_threads(); 2502 qemu_mutex_lock(&decomp_done_lock); 2503 while (true) { 2504 for (idx = 0; idx < thread_count; idx++) { 2505 if (decomp_param[idx].done) { 2506 decomp_param[idx].done = false; 2507 qemu_mutex_lock(&decomp_param[idx].mutex); 2508 qemu_get_buffer(f, decomp_param[idx].compbuf, len); 2509 decomp_param[idx].des = host; 2510 decomp_param[idx].len = len; 2511 qemu_cond_signal(&decomp_param[idx].cond); 2512 qemu_mutex_unlock(&decomp_param[idx].mutex); 2513 break; 2514 } 2515 } 2516 if (idx < thread_count) { 2517 break; 2518 } else { 2519 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock); 2520 } 2521 } 2522 qemu_mutex_unlock(&decomp_done_lock); 2523 } 2524 2525 /** 2526 * ram_load_setup: Setup RAM for migration incoming side 2527 * 2528 * Returns zero to indicate success and negative for error 2529 * 2530 * @f: QEMUFile where to receive the data 2531 * @opaque: RAMState pointer 2532 */ 2533 static int ram_load_setup(QEMUFile *f, void *opaque) 2534 { 2535 xbzrle_load_setup(); 2536 compress_threads_load_setup(); 2537 return 0; 2538 } 2539 2540 static int ram_load_cleanup(void *opaque) 2541 { 2542 xbzrle_load_cleanup(); 2543 compress_threads_load_cleanup(); 2544 return 0; 2545 } 2546 2547 /** 2548 * ram_postcopy_incoming_init: allocate postcopy data structures 2549 * 2550 * Returns 0 for success and negative if there was one error 2551 * 2552 * @mis: current migration incoming state 2553 * 2554 * Allocate data structures etc needed by incoming migration with 2555 * postcopy-ram. postcopy-ram's similarly names 2556 * postcopy_ram_incoming_init does the work. 2557 */ 2558 int ram_postcopy_incoming_init(MigrationIncomingState *mis) 2559 { 2560 unsigned long ram_pages = last_ram_page(); 2561 2562 return postcopy_ram_incoming_init(mis, ram_pages); 2563 } 2564 2565 /** 2566 * ram_load_postcopy: load a page in postcopy case 2567 * 2568 * Returns 0 for success or -errno in case of error 2569 * 2570 * Called in postcopy mode by ram_load(). 2571 * rcu_read_lock is taken prior to this being called. 2572 * 2573 * @f: QEMUFile where to send the data 2574 */ 2575 static int ram_load_postcopy(QEMUFile *f) 2576 { 2577 int flags = 0, ret = 0; 2578 bool place_needed = false; 2579 bool matching_page_sizes = false; 2580 MigrationIncomingState *mis = migration_incoming_get_current(); 2581 /* Temporary page that is later 'placed' */ 2582 void *postcopy_host_page = postcopy_get_tmp_page(mis); 2583 void *last_host = NULL; 2584 bool all_zero = false; 2585 2586 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { 2587 ram_addr_t addr; 2588 void *host = NULL; 2589 void *page_buffer = NULL; 2590 void *place_source = NULL; 2591 RAMBlock *block = NULL; 2592 uint8_t ch; 2593 2594 addr = qemu_get_be64(f); 2595 flags = addr & ~TARGET_PAGE_MASK; 2596 addr &= TARGET_PAGE_MASK; 2597 2598 trace_ram_load_postcopy_loop((uint64_t)addr, flags); 2599 place_needed = false; 2600 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) { 2601 block = ram_block_from_stream(f, flags); 2602 2603 host = host_from_ram_block_offset(block, addr); 2604 if (!host) { 2605 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); 2606 ret = -EINVAL; 2607 break; 2608 } 2609 matching_page_sizes = block->page_size == TARGET_PAGE_SIZE; 2610 /* 2611 * Postcopy requires that we place whole host pages atomically; 2612 * these may be huge pages for RAMBlocks that are backed by 2613 * hugetlbfs. 2614 * To make it atomic, the data is read into a temporary page 2615 * that's moved into place later. 2616 * The migration protocol uses, possibly smaller, target-pages 2617 * however the source ensures it always sends all the components 2618 * of a host page in order. 2619 */ 2620 page_buffer = postcopy_host_page + 2621 ((uintptr_t)host & (block->page_size - 1)); 2622 /* If all TP are zero then we can optimise the place */ 2623 if (!((uintptr_t)host & (block->page_size - 1))) { 2624 all_zero = true; 2625 } else { 2626 /* not the 1st TP within the HP */ 2627 if (host != (last_host + TARGET_PAGE_SIZE)) { 2628 error_report("Non-sequential target page %p/%p", 2629 host, last_host); 2630 ret = -EINVAL; 2631 break; 2632 } 2633 } 2634 2635 2636 /* 2637 * If it's the last part of a host page then we place the host 2638 * page 2639 */ 2640 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) & 2641 (block->page_size - 1)) == 0; 2642 place_source = postcopy_host_page; 2643 } 2644 last_host = host; 2645 2646 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { 2647 case RAM_SAVE_FLAG_ZERO: 2648 ch = qemu_get_byte(f); 2649 memset(page_buffer, ch, TARGET_PAGE_SIZE); 2650 if (ch) { 2651 all_zero = false; 2652 } 2653 break; 2654 2655 case RAM_SAVE_FLAG_PAGE: 2656 all_zero = false; 2657 if (!place_needed || !matching_page_sizes) { 2658 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE); 2659 } else { 2660 /* Avoids the qemu_file copy during postcopy, which is 2661 * going to do a copy later; can only do it when we 2662 * do this read in one go (matching page sizes) 2663 */ 2664 qemu_get_buffer_in_place(f, (uint8_t **)&place_source, 2665 TARGET_PAGE_SIZE); 2666 } 2667 break; 2668 case RAM_SAVE_FLAG_EOS: 2669 /* normal exit */ 2670 break; 2671 default: 2672 error_report("Unknown combination of migration flags: %#x" 2673 " (postcopy mode)", flags); 2674 ret = -EINVAL; 2675 } 2676 2677 if (place_needed) { 2678 /* This gets called at the last target page in the host page */ 2679 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size; 2680 2681 if (all_zero) { 2682 ret = postcopy_place_page_zero(mis, place_dest, 2683 block->page_size); 2684 } else { 2685 ret = postcopy_place_page(mis, place_dest, 2686 place_source, block->page_size); 2687 } 2688 } 2689 if (!ret) { 2690 ret = qemu_file_get_error(f); 2691 } 2692 } 2693 2694 return ret; 2695 } 2696 2697 static int ram_load(QEMUFile *f, void *opaque, int version_id) 2698 { 2699 int flags = 0, ret = 0, invalid_flags = 0; 2700 static uint64_t seq_iter; 2701 int len = 0; 2702 /* 2703 * If system is running in postcopy mode, page inserts to host memory must 2704 * be atomic 2705 */ 2706 bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING; 2707 /* ADVISE is earlier, it shows the source has the postcopy capability on */ 2708 bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE; 2709 2710 seq_iter++; 2711 2712 if (version_id != 4) { 2713 ret = -EINVAL; 2714 } 2715 2716 if (!migrate_use_compression()) { 2717 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE; 2718 } 2719 /* This RCU critical section can be very long running. 2720 * When RCU reclaims in the code start to become numerous, 2721 * it will be necessary to reduce the granularity of this 2722 * critical section. 2723 */ 2724 rcu_read_lock(); 2725 2726 if (postcopy_running) { 2727 ret = ram_load_postcopy(f); 2728 } 2729 2730 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) { 2731 ram_addr_t addr, total_ram_bytes; 2732 void *host = NULL; 2733 uint8_t ch; 2734 2735 addr = qemu_get_be64(f); 2736 flags = addr & ~TARGET_PAGE_MASK; 2737 addr &= TARGET_PAGE_MASK; 2738 2739 if (flags & invalid_flags) { 2740 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) { 2741 error_report("Received an unexpected compressed page"); 2742 } 2743 2744 ret = -EINVAL; 2745 break; 2746 } 2747 2748 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE | 2749 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) { 2750 RAMBlock *block = ram_block_from_stream(f, flags); 2751 2752 host = host_from_ram_block_offset(block, addr); 2753 if (!host) { 2754 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); 2755 ret = -EINVAL; 2756 break; 2757 } 2758 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host); 2759 } 2760 2761 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { 2762 case RAM_SAVE_FLAG_MEM_SIZE: 2763 /* Synchronize RAM block list */ 2764 total_ram_bytes = addr; 2765 while (!ret && total_ram_bytes) { 2766 RAMBlock *block; 2767 char id[256]; 2768 ram_addr_t length; 2769 2770 len = qemu_get_byte(f); 2771 qemu_get_buffer(f, (uint8_t *)id, len); 2772 id[len] = 0; 2773 length = qemu_get_be64(f); 2774 2775 block = qemu_ram_block_by_name(id); 2776 if (block) { 2777 if (length != block->used_length) { 2778 Error *local_err = NULL; 2779 2780 ret = qemu_ram_resize(block, length, 2781 &local_err); 2782 if (local_err) { 2783 error_report_err(local_err); 2784 } 2785 } 2786 /* For postcopy we need to check hugepage sizes match */ 2787 if (postcopy_advised && 2788 block->page_size != qemu_host_page_size) { 2789 uint64_t remote_page_size = qemu_get_be64(f); 2790 if (remote_page_size != block->page_size) { 2791 error_report("Mismatched RAM page size %s " 2792 "(local) %zd != %" PRId64, 2793 id, block->page_size, 2794 remote_page_size); 2795 ret = -EINVAL; 2796 } 2797 } 2798 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG, 2799 block->idstr); 2800 } else { 2801 error_report("Unknown ramblock \"%s\", cannot " 2802 "accept migration", id); 2803 ret = -EINVAL; 2804 } 2805 2806 total_ram_bytes -= length; 2807 } 2808 break; 2809 2810 case RAM_SAVE_FLAG_ZERO: 2811 ch = qemu_get_byte(f); 2812 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE); 2813 break; 2814 2815 case RAM_SAVE_FLAG_PAGE: 2816 qemu_get_buffer(f, host, TARGET_PAGE_SIZE); 2817 break; 2818 2819 case RAM_SAVE_FLAG_COMPRESS_PAGE: 2820 len = qemu_get_be32(f); 2821 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) { 2822 error_report("Invalid compressed data length: %d", len); 2823 ret = -EINVAL; 2824 break; 2825 } 2826 decompress_data_with_multi_threads(f, host, len); 2827 break; 2828 2829 case RAM_SAVE_FLAG_XBZRLE: 2830 if (load_xbzrle(f, addr, host) < 0) { 2831 error_report("Failed to decompress XBZRLE page at " 2832 RAM_ADDR_FMT, addr); 2833 ret = -EINVAL; 2834 break; 2835 } 2836 break; 2837 case RAM_SAVE_FLAG_EOS: 2838 /* normal exit */ 2839 break; 2840 default: 2841 if (flags & RAM_SAVE_FLAG_HOOK) { 2842 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL); 2843 } else { 2844 error_report("Unknown combination of migration flags: %#x", 2845 flags); 2846 ret = -EINVAL; 2847 } 2848 } 2849 if (!ret) { 2850 ret = qemu_file_get_error(f); 2851 } 2852 } 2853 2854 wait_for_decompress_done(); 2855 rcu_read_unlock(); 2856 trace_ram_load_complete(ret, seq_iter); 2857 return ret; 2858 } 2859 2860 static bool ram_has_postcopy(void *opaque) 2861 { 2862 return migrate_postcopy_ram(); 2863 } 2864 2865 static SaveVMHandlers savevm_ram_handlers = { 2866 .save_setup = ram_save_setup, 2867 .save_live_iterate = ram_save_iterate, 2868 .save_live_complete_postcopy = ram_save_complete, 2869 .save_live_complete_precopy = ram_save_complete, 2870 .has_postcopy = ram_has_postcopy, 2871 .save_live_pending = ram_save_pending, 2872 .load_state = ram_load, 2873 .save_cleanup = ram_save_cleanup, 2874 .load_setup = ram_load_setup, 2875 .load_cleanup = ram_load_cleanup, 2876 }; 2877 2878 void ram_mig_init(void) 2879 { 2880 qemu_mutex_init(&XBZRLE.lock); 2881 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state); 2882 } 2883