1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * Copyright (c) 2011-2015 Red Hat Inc 6 * 7 * Authors: 8 * Juan Quintela <quintela@redhat.com> 9 * 10 * Permission is hereby granted, free of charge, to any person obtaining a copy 11 * of this software and associated documentation files (the "Software"), to deal 12 * in the Software without restriction, including without limitation the rights 13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 * copies of the Software, and to permit persons to whom the Software is 15 * furnished to do so, subject to the following conditions: 16 * 17 * The above copyright notice and this permission notice shall be included in 18 * all copies or substantial portions of the Software. 19 * 20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 26 * THE SOFTWARE. 27 */ 28 29 #include "qemu/osdep.h" 30 #include "qemu/cutils.h" 31 #include "qemu/bitops.h" 32 #include "qemu/bitmap.h" 33 #include "qemu/main-loop.h" 34 #include "xbzrle.h" 35 #include "ram.h" 36 #include "migration.h" 37 #include "migration/register.h" 38 #include "migration/misc.h" 39 #include "qemu-file.h" 40 #include "postcopy-ram.h" 41 #include "page_cache.h" 42 #include "qemu/error-report.h" 43 #include "qapi/error.h" 44 #include "qapi/qapi-types-migration.h" 45 #include "qapi/qapi-events-migration.h" 46 #include "qapi/qmp/qerror.h" 47 #include "trace.h" 48 #include "exec/ram_addr.h" 49 #include "exec/target_page.h" 50 #include "qemu/rcu_queue.h" 51 #include "migration/colo.h" 52 #include "block.h" 53 #include "sysemu/cpu-throttle.h" 54 #include "savevm.h" 55 #include "qemu/iov.h" 56 #include "multifd.h" 57 #include "sysemu/runstate.h" 58 59 #if defined(__linux__) 60 #include "qemu/userfaultfd.h" 61 #endif /* defined(__linux__) */ 62 63 /***********************************************************/ 64 /* ram save/restore */ 65 66 /* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it 67 * worked for pages that where filled with the same char. We switched 68 * it to only search for the zero value. And to avoid confusion with 69 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it. 70 */ 71 72 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */ 73 #define RAM_SAVE_FLAG_ZERO 0x02 74 #define RAM_SAVE_FLAG_MEM_SIZE 0x04 75 #define RAM_SAVE_FLAG_PAGE 0x08 76 #define RAM_SAVE_FLAG_EOS 0x10 77 #define RAM_SAVE_FLAG_CONTINUE 0x20 78 #define RAM_SAVE_FLAG_XBZRLE 0x40 79 /* 0x80 is reserved in migration.h start with 0x100 next */ 80 #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100 81 82 static inline bool is_zero_range(uint8_t *p, uint64_t size) 83 { 84 return buffer_is_zero(p, size); 85 } 86 87 XBZRLECacheStats xbzrle_counters; 88 89 /* struct contains XBZRLE cache and a static page 90 used by the compression */ 91 static struct { 92 /* buffer used for XBZRLE encoding */ 93 uint8_t *encoded_buf; 94 /* buffer for storing page content */ 95 uint8_t *current_buf; 96 /* Cache for XBZRLE, Protected by lock. */ 97 PageCache *cache; 98 QemuMutex lock; 99 /* it will store a page full of zeros */ 100 uint8_t *zero_target_page; 101 /* buffer used for XBZRLE decoding */ 102 uint8_t *decoded_buf; 103 } XBZRLE; 104 105 static void XBZRLE_cache_lock(void) 106 { 107 if (migrate_use_xbzrle()) { 108 qemu_mutex_lock(&XBZRLE.lock); 109 } 110 } 111 112 static void XBZRLE_cache_unlock(void) 113 { 114 if (migrate_use_xbzrle()) { 115 qemu_mutex_unlock(&XBZRLE.lock); 116 } 117 } 118 119 /** 120 * xbzrle_cache_resize: resize the xbzrle cache 121 * 122 * This function is called from migrate_params_apply in main 123 * thread, possibly while a migration is in progress. A running 124 * migration may be using the cache and might finish during this call, 125 * hence changes to the cache are protected by XBZRLE.lock(). 126 * 127 * Returns 0 for success or -1 for error 128 * 129 * @new_size: new cache size 130 * @errp: set *errp if the check failed, with reason 131 */ 132 int xbzrle_cache_resize(uint64_t new_size, Error **errp) 133 { 134 PageCache *new_cache; 135 int64_t ret = 0; 136 137 /* Check for truncation */ 138 if (new_size != (size_t)new_size) { 139 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", 140 "exceeding address space"); 141 return -1; 142 } 143 144 if (new_size == migrate_xbzrle_cache_size()) { 145 /* nothing to do */ 146 return 0; 147 } 148 149 XBZRLE_cache_lock(); 150 151 if (XBZRLE.cache != NULL) { 152 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp); 153 if (!new_cache) { 154 ret = -1; 155 goto out; 156 } 157 158 cache_fini(XBZRLE.cache); 159 XBZRLE.cache = new_cache; 160 } 161 out: 162 XBZRLE_cache_unlock(); 163 return ret; 164 } 165 166 bool ramblock_is_ignored(RAMBlock *block) 167 { 168 return !qemu_ram_is_migratable(block) || 169 (migrate_ignore_shared() && qemu_ram_is_shared(block)); 170 } 171 172 #undef RAMBLOCK_FOREACH 173 174 int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque) 175 { 176 RAMBlock *block; 177 int ret = 0; 178 179 RCU_READ_LOCK_GUARD(); 180 181 RAMBLOCK_FOREACH_NOT_IGNORED(block) { 182 ret = func(block, opaque); 183 if (ret) { 184 break; 185 } 186 } 187 return ret; 188 } 189 190 static void ramblock_recv_map_init(void) 191 { 192 RAMBlock *rb; 193 194 RAMBLOCK_FOREACH_NOT_IGNORED(rb) { 195 assert(!rb->receivedmap); 196 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits()); 197 } 198 } 199 200 int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr) 201 { 202 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb), 203 rb->receivedmap); 204 } 205 206 bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset) 207 { 208 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap); 209 } 210 211 void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr) 212 { 213 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap); 214 } 215 216 void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, 217 size_t nr) 218 { 219 bitmap_set_atomic(rb->receivedmap, 220 ramblock_recv_bitmap_offset(host_addr, rb), 221 nr); 222 } 223 224 #define RAMBLOCK_RECV_BITMAP_ENDING (0x0123456789abcdefULL) 225 226 /* 227 * Format: bitmap_size (8 bytes) + whole_bitmap (N bytes). 228 * 229 * Returns >0 if success with sent bytes, or <0 if error. 230 */ 231 int64_t ramblock_recv_bitmap_send(QEMUFile *file, 232 const char *block_name) 233 { 234 RAMBlock *block = qemu_ram_block_by_name(block_name); 235 unsigned long *le_bitmap, nbits; 236 uint64_t size; 237 238 if (!block) { 239 error_report("%s: invalid block name: %s", __func__, block_name); 240 return -1; 241 } 242 243 nbits = block->postcopy_length >> TARGET_PAGE_BITS; 244 245 /* 246 * Make sure the tmp bitmap buffer is big enough, e.g., on 32bit 247 * machines we may need 4 more bytes for padding (see below 248 * comment). So extend it a bit before hand. 249 */ 250 le_bitmap = bitmap_new(nbits + BITS_PER_LONG); 251 252 /* 253 * Always use little endian when sending the bitmap. This is 254 * required that when source and destination VMs are not using the 255 * same endianness. (Note: big endian won't work.) 256 */ 257 bitmap_to_le(le_bitmap, block->receivedmap, nbits); 258 259 /* Size of the bitmap, in bytes */ 260 size = DIV_ROUND_UP(nbits, 8); 261 262 /* 263 * size is always aligned to 8 bytes for 64bit machines, but it 264 * may not be true for 32bit machines. We need this padding to 265 * make sure the migration can survive even between 32bit and 266 * 64bit machines. 267 */ 268 size = ROUND_UP(size, 8); 269 270 qemu_put_be64(file, size); 271 qemu_put_buffer(file, (const uint8_t *)le_bitmap, size); 272 /* 273 * Mark as an end, in case the middle part is screwed up due to 274 * some "mysterious" reason. 275 */ 276 qemu_put_be64(file, RAMBLOCK_RECV_BITMAP_ENDING); 277 qemu_fflush(file); 278 279 g_free(le_bitmap); 280 281 if (qemu_file_get_error(file)) { 282 return qemu_file_get_error(file); 283 } 284 285 return size + sizeof(size); 286 } 287 288 /* 289 * An outstanding page request, on the source, having been received 290 * and queued 291 */ 292 struct RAMSrcPageRequest { 293 RAMBlock *rb; 294 hwaddr offset; 295 hwaddr len; 296 297 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req; 298 }; 299 300 /* State of RAM for migration */ 301 struct RAMState { 302 /* QEMUFile used for this migration */ 303 QEMUFile *f; 304 /* UFFD file descriptor, used in 'write-tracking' migration */ 305 int uffdio_fd; 306 /* Last block that we have visited searching for dirty pages */ 307 RAMBlock *last_seen_block; 308 /* Last block from where we have sent data */ 309 RAMBlock *last_sent_block; 310 /* Last dirty target page we have sent */ 311 ram_addr_t last_page; 312 /* last ram version we have seen */ 313 uint32_t last_version; 314 /* How many times we have dirty too many pages */ 315 int dirty_rate_high_cnt; 316 /* these variables are used for bitmap sync */ 317 /* last time we did a full bitmap_sync */ 318 int64_t time_last_bitmap_sync; 319 /* bytes transferred at start_time */ 320 uint64_t bytes_xfer_prev; 321 /* number of dirty pages since start_time */ 322 uint64_t num_dirty_pages_period; 323 /* xbzrle misses since the beginning of the period */ 324 uint64_t xbzrle_cache_miss_prev; 325 /* Amount of xbzrle pages since the beginning of the period */ 326 uint64_t xbzrle_pages_prev; 327 /* Amount of xbzrle encoded bytes since the beginning of the period */ 328 uint64_t xbzrle_bytes_prev; 329 /* Start using XBZRLE (e.g., after the first round). */ 330 bool xbzrle_enabled; 331 332 /* compression statistics since the beginning of the period */ 333 /* amount of count that no free thread to compress data */ 334 uint64_t compress_thread_busy_prev; 335 /* amount bytes after compression */ 336 uint64_t compressed_size_prev; 337 /* amount of compressed pages */ 338 uint64_t compress_pages_prev; 339 340 /* total handled target pages at the beginning of period */ 341 uint64_t target_page_count_prev; 342 /* total handled target pages since start */ 343 uint64_t target_page_count; 344 /* number of dirty bits in the bitmap */ 345 uint64_t migration_dirty_pages; 346 /* Protects modification of the bitmap and migration dirty pages */ 347 QemuMutex bitmap_mutex; 348 /* The RAMBlock used in the last src_page_requests */ 349 RAMBlock *last_req_rb; 350 /* Queue of outstanding page requests from the destination */ 351 QemuMutex src_page_req_mutex; 352 QSIMPLEQ_HEAD(, RAMSrcPageRequest) src_page_requests; 353 }; 354 typedef struct RAMState RAMState; 355 356 static RAMState *ram_state; 357 358 static NotifierWithReturnList precopy_notifier_list; 359 360 void precopy_infrastructure_init(void) 361 { 362 notifier_with_return_list_init(&precopy_notifier_list); 363 } 364 365 void precopy_add_notifier(NotifierWithReturn *n) 366 { 367 notifier_with_return_list_add(&precopy_notifier_list, n); 368 } 369 370 void precopy_remove_notifier(NotifierWithReturn *n) 371 { 372 notifier_with_return_remove(n); 373 } 374 375 int precopy_notify(PrecopyNotifyReason reason, Error **errp) 376 { 377 PrecopyNotifyData pnd; 378 pnd.reason = reason; 379 pnd.errp = errp; 380 381 return notifier_with_return_list_notify(&precopy_notifier_list, &pnd); 382 } 383 384 uint64_t ram_bytes_remaining(void) 385 { 386 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) : 387 0; 388 } 389 390 MigrationStats ram_counters; 391 392 /* used by the search for pages to send */ 393 struct PageSearchStatus { 394 /* Current block being searched */ 395 RAMBlock *block; 396 /* Current page to search from */ 397 unsigned long page; 398 /* Set once we wrap around */ 399 bool complete_round; 400 }; 401 typedef struct PageSearchStatus PageSearchStatus; 402 403 CompressionStats compression_counters; 404 405 struct CompressParam { 406 bool done; 407 bool quit; 408 bool zero_page; 409 QEMUFile *file; 410 QemuMutex mutex; 411 QemuCond cond; 412 RAMBlock *block; 413 ram_addr_t offset; 414 415 /* internally used fields */ 416 z_stream stream; 417 uint8_t *originbuf; 418 }; 419 typedef struct CompressParam CompressParam; 420 421 struct DecompressParam { 422 bool done; 423 bool quit; 424 QemuMutex mutex; 425 QemuCond cond; 426 void *des; 427 uint8_t *compbuf; 428 int len; 429 z_stream stream; 430 }; 431 typedef struct DecompressParam DecompressParam; 432 433 static CompressParam *comp_param; 434 static QemuThread *compress_threads; 435 /* comp_done_cond is used to wake up the migration thread when 436 * one of the compression threads has finished the compression. 437 * comp_done_lock is used to co-work with comp_done_cond. 438 */ 439 static QemuMutex comp_done_lock; 440 static QemuCond comp_done_cond; 441 /* The empty QEMUFileOps will be used by file in CompressParam */ 442 static const QEMUFileOps empty_ops = { }; 443 444 static QEMUFile *decomp_file; 445 static DecompressParam *decomp_param; 446 static QemuThread *decompress_threads; 447 static QemuMutex decomp_done_lock; 448 static QemuCond decomp_done_cond; 449 450 static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block, 451 ram_addr_t offset, uint8_t *source_buf); 452 453 static void *do_data_compress(void *opaque) 454 { 455 CompressParam *param = opaque; 456 RAMBlock *block; 457 ram_addr_t offset; 458 bool zero_page; 459 460 qemu_mutex_lock(¶m->mutex); 461 while (!param->quit) { 462 if (param->block) { 463 block = param->block; 464 offset = param->offset; 465 param->block = NULL; 466 qemu_mutex_unlock(¶m->mutex); 467 468 zero_page = do_compress_ram_page(param->file, ¶m->stream, 469 block, offset, param->originbuf); 470 471 qemu_mutex_lock(&comp_done_lock); 472 param->done = true; 473 param->zero_page = zero_page; 474 qemu_cond_signal(&comp_done_cond); 475 qemu_mutex_unlock(&comp_done_lock); 476 477 qemu_mutex_lock(¶m->mutex); 478 } else { 479 qemu_cond_wait(¶m->cond, ¶m->mutex); 480 } 481 } 482 qemu_mutex_unlock(¶m->mutex); 483 484 return NULL; 485 } 486 487 static void compress_threads_save_cleanup(void) 488 { 489 int i, thread_count; 490 491 if (!migrate_use_compression() || !comp_param) { 492 return; 493 } 494 495 thread_count = migrate_compress_threads(); 496 for (i = 0; i < thread_count; i++) { 497 /* 498 * we use it as a indicator which shows if the thread is 499 * properly init'd or not 500 */ 501 if (!comp_param[i].file) { 502 break; 503 } 504 505 qemu_mutex_lock(&comp_param[i].mutex); 506 comp_param[i].quit = true; 507 qemu_cond_signal(&comp_param[i].cond); 508 qemu_mutex_unlock(&comp_param[i].mutex); 509 510 qemu_thread_join(compress_threads + i); 511 qemu_mutex_destroy(&comp_param[i].mutex); 512 qemu_cond_destroy(&comp_param[i].cond); 513 deflateEnd(&comp_param[i].stream); 514 g_free(comp_param[i].originbuf); 515 qemu_fclose(comp_param[i].file); 516 comp_param[i].file = NULL; 517 } 518 qemu_mutex_destroy(&comp_done_lock); 519 qemu_cond_destroy(&comp_done_cond); 520 g_free(compress_threads); 521 g_free(comp_param); 522 compress_threads = NULL; 523 comp_param = NULL; 524 } 525 526 static int compress_threads_save_setup(void) 527 { 528 int i, thread_count; 529 530 if (!migrate_use_compression()) { 531 return 0; 532 } 533 thread_count = migrate_compress_threads(); 534 compress_threads = g_new0(QemuThread, thread_count); 535 comp_param = g_new0(CompressParam, thread_count); 536 qemu_cond_init(&comp_done_cond); 537 qemu_mutex_init(&comp_done_lock); 538 for (i = 0; i < thread_count; i++) { 539 comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE); 540 if (!comp_param[i].originbuf) { 541 goto exit; 542 } 543 544 if (deflateInit(&comp_param[i].stream, 545 migrate_compress_level()) != Z_OK) { 546 g_free(comp_param[i].originbuf); 547 goto exit; 548 } 549 550 /* comp_param[i].file is just used as a dummy buffer to save data, 551 * set its ops to empty. 552 */ 553 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops); 554 comp_param[i].done = true; 555 comp_param[i].quit = false; 556 qemu_mutex_init(&comp_param[i].mutex); 557 qemu_cond_init(&comp_param[i].cond); 558 qemu_thread_create(compress_threads + i, "compress", 559 do_data_compress, comp_param + i, 560 QEMU_THREAD_JOINABLE); 561 } 562 return 0; 563 564 exit: 565 compress_threads_save_cleanup(); 566 return -1; 567 } 568 569 /** 570 * save_page_header: write page header to wire 571 * 572 * If this is the 1st block, it also writes the block identification 573 * 574 * Returns the number of bytes written 575 * 576 * @f: QEMUFile where to send the data 577 * @block: block that contains the page we want to send 578 * @offset: offset inside the block for the page 579 * in the lower bits, it contains flags 580 */ 581 static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block, 582 ram_addr_t offset) 583 { 584 size_t size, len; 585 586 if (block == rs->last_sent_block) { 587 offset |= RAM_SAVE_FLAG_CONTINUE; 588 } 589 qemu_put_be64(f, offset); 590 size = 8; 591 592 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) { 593 len = strlen(block->idstr); 594 qemu_put_byte(f, len); 595 qemu_put_buffer(f, (uint8_t *)block->idstr, len); 596 size += 1 + len; 597 rs->last_sent_block = block; 598 } 599 return size; 600 } 601 602 /** 603 * mig_throttle_guest_down: throotle down the guest 604 * 605 * Reduce amount of guest cpu execution to hopefully slow down memory 606 * writes. If guest dirty memory rate is reduced below the rate at 607 * which we can transfer pages to the destination then we should be 608 * able to complete migration. Some workloads dirty memory way too 609 * fast and will not effectively converge, even with auto-converge. 610 */ 611 static void mig_throttle_guest_down(uint64_t bytes_dirty_period, 612 uint64_t bytes_dirty_threshold) 613 { 614 MigrationState *s = migrate_get_current(); 615 uint64_t pct_initial = s->parameters.cpu_throttle_initial; 616 uint64_t pct_increment = s->parameters.cpu_throttle_increment; 617 bool pct_tailslow = s->parameters.cpu_throttle_tailslow; 618 int pct_max = s->parameters.max_cpu_throttle; 619 620 uint64_t throttle_now = cpu_throttle_get_percentage(); 621 uint64_t cpu_now, cpu_ideal, throttle_inc; 622 623 /* We have not started throttling yet. Let's start it. */ 624 if (!cpu_throttle_active()) { 625 cpu_throttle_set(pct_initial); 626 } else { 627 /* Throttling already on, just increase the rate */ 628 if (!pct_tailslow) { 629 throttle_inc = pct_increment; 630 } else { 631 /* Compute the ideal CPU percentage used by Guest, which may 632 * make the dirty rate match the dirty rate threshold. */ 633 cpu_now = 100 - throttle_now; 634 cpu_ideal = cpu_now * (bytes_dirty_threshold * 1.0 / 635 bytes_dirty_period); 636 throttle_inc = MIN(cpu_now - cpu_ideal, pct_increment); 637 } 638 cpu_throttle_set(MIN(throttle_now + throttle_inc, pct_max)); 639 } 640 } 641 642 /** 643 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache 644 * 645 * @rs: current RAM state 646 * @current_addr: address for the zero page 647 * 648 * Update the xbzrle cache to reflect a page that's been sent as all 0. 649 * The important thing is that a stale (not-yet-0'd) page be replaced 650 * by the new data. 651 * As a bonus, if the page wasn't in the cache it gets added so that 652 * when a small write is made into the 0'd page it gets XBZRLE sent. 653 */ 654 static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr) 655 { 656 if (!rs->xbzrle_enabled) { 657 return; 658 } 659 660 /* We don't care if this fails to allocate a new cache page 661 * as long as it updated an old one */ 662 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page, 663 ram_counters.dirty_sync_count); 664 } 665 666 #define ENCODING_FLAG_XBZRLE 0x1 667 668 /** 669 * save_xbzrle_page: compress and send current page 670 * 671 * Returns: 1 means that we wrote the page 672 * 0 means that page is identical to the one already sent 673 * -1 means that xbzrle would be longer than normal 674 * 675 * @rs: current RAM state 676 * @current_data: pointer to the address of the page contents 677 * @current_addr: addr of the page 678 * @block: block that contains the page we want to send 679 * @offset: offset inside the block for the page 680 * @last_stage: if we are at the completion stage 681 */ 682 static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, 683 ram_addr_t current_addr, RAMBlock *block, 684 ram_addr_t offset, bool last_stage) 685 { 686 int encoded_len = 0, bytes_xbzrle; 687 uint8_t *prev_cached_page; 688 689 if (!cache_is_cached(XBZRLE.cache, current_addr, 690 ram_counters.dirty_sync_count)) { 691 xbzrle_counters.cache_miss++; 692 if (!last_stage) { 693 if (cache_insert(XBZRLE.cache, current_addr, *current_data, 694 ram_counters.dirty_sync_count) == -1) { 695 return -1; 696 } else { 697 /* update *current_data when the page has been 698 inserted into cache */ 699 *current_data = get_cached_data(XBZRLE.cache, current_addr); 700 } 701 } 702 return -1; 703 } 704 705 /* 706 * Reaching here means the page has hit the xbzrle cache, no matter what 707 * encoding result it is (normal encoding, overflow or skipping the page), 708 * count the page as encoded. This is used to calculate the encoding rate. 709 * 710 * Example: 2 pages (8KB) being encoded, first page encoding generates 2KB, 711 * 2nd page turns out to be skipped (i.e. no new bytes written to the 712 * page), the overall encoding rate will be 8KB / 2KB = 4, which has the 713 * skipped page included. In this way, the encoding rate can tell if the 714 * guest page is good for xbzrle encoding. 715 */ 716 xbzrle_counters.pages++; 717 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); 718 719 /* save current buffer into memory */ 720 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE); 721 722 /* XBZRLE encoding (if there is no overflow) */ 723 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, 724 TARGET_PAGE_SIZE, XBZRLE.encoded_buf, 725 TARGET_PAGE_SIZE); 726 727 /* 728 * Update the cache contents, so that it corresponds to the data 729 * sent, in all cases except where we skip the page. 730 */ 731 if (!last_stage && encoded_len != 0) { 732 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); 733 /* 734 * In the case where we couldn't compress, ensure that the caller 735 * sends the data from the cache, since the guest might have 736 * changed the RAM since we copied it. 737 */ 738 *current_data = prev_cached_page; 739 } 740 741 if (encoded_len == 0) { 742 trace_save_xbzrle_page_skipping(); 743 return 0; 744 } else if (encoded_len == -1) { 745 trace_save_xbzrle_page_overflow(); 746 xbzrle_counters.overflow++; 747 xbzrle_counters.bytes += TARGET_PAGE_SIZE; 748 return -1; 749 } 750 751 /* Send XBZRLE based compressed page */ 752 bytes_xbzrle = save_page_header(rs, rs->f, block, 753 offset | RAM_SAVE_FLAG_XBZRLE); 754 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE); 755 qemu_put_be16(rs->f, encoded_len); 756 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len); 757 bytes_xbzrle += encoded_len + 1 + 2; 758 /* 759 * Like compressed_size (please see update_compress_thread_counts), 760 * the xbzrle encoded bytes don't count the 8 byte header with 761 * RAM_SAVE_FLAG_CONTINUE. 762 */ 763 xbzrle_counters.bytes += bytes_xbzrle - 8; 764 ram_counters.transferred += bytes_xbzrle; 765 766 return 1; 767 } 768 769 /** 770 * migration_bitmap_find_dirty: find the next dirty page from start 771 * 772 * Returns the page offset within memory region of the start of a dirty page 773 * 774 * @rs: current RAM state 775 * @rb: RAMBlock where to search for dirty pages 776 * @start: page where we start the search 777 */ 778 static inline 779 unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb, 780 unsigned long start) 781 { 782 unsigned long size = rb->used_length >> TARGET_PAGE_BITS; 783 unsigned long *bitmap = rb->bmap; 784 785 if (ramblock_is_ignored(rb)) { 786 return size; 787 } 788 789 return find_next_bit(bitmap, size, start); 790 } 791 792 static inline bool migration_bitmap_clear_dirty(RAMState *rs, 793 RAMBlock *rb, 794 unsigned long page) 795 { 796 bool ret; 797 798 QEMU_LOCK_GUARD(&rs->bitmap_mutex); 799 800 /* 801 * Clear dirty bitmap if needed. This _must_ be called before we 802 * send any of the page in the chunk because we need to make sure 803 * we can capture further page content changes when we sync dirty 804 * log the next time. So as long as we are going to send any of 805 * the page in the chunk we clear the remote dirty bitmap for all. 806 * Clearing it earlier won't be a problem, but too late will. 807 */ 808 if (rb->clear_bmap && clear_bmap_test_and_clear(rb, page)) { 809 uint8_t shift = rb->clear_bmap_shift; 810 hwaddr size = 1ULL << (TARGET_PAGE_BITS + shift); 811 hwaddr start = (((ram_addr_t)page) << TARGET_PAGE_BITS) & (-size); 812 813 /* 814 * CLEAR_BITMAP_SHIFT_MIN should always guarantee this... this 815 * can make things easier sometimes since then start address 816 * of the small chunk will always be 64 pages aligned so the 817 * bitmap will always be aligned to unsigned long. We should 818 * even be able to remove this restriction but I'm simply 819 * keeping it. 820 */ 821 assert(shift >= 6); 822 trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page); 823 memory_region_clear_dirty_bitmap(rb->mr, start, size); 824 } 825 826 ret = test_and_clear_bit(page, rb->bmap); 827 828 if (ret) { 829 rs->migration_dirty_pages--; 830 } 831 832 return ret; 833 } 834 835 /* Called with RCU critical section */ 836 static void ramblock_sync_dirty_bitmap(RAMState *rs, RAMBlock *rb) 837 { 838 uint64_t new_dirty_pages = 839 cpu_physical_memory_sync_dirty_bitmap(rb, 0, rb->used_length); 840 841 rs->migration_dirty_pages += new_dirty_pages; 842 rs->num_dirty_pages_period += new_dirty_pages; 843 } 844 845 /** 846 * ram_pagesize_summary: calculate all the pagesizes of a VM 847 * 848 * Returns a summary bitmap of the page sizes of all RAMBlocks 849 * 850 * For VMs with just normal pages this is equivalent to the host page 851 * size. If it's got some huge pages then it's the OR of all the 852 * different page sizes. 853 */ 854 uint64_t ram_pagesize_summary(void) 855 { 856 RAMBlock *block; 857 uint64_t summary = 0; 858 859 RAMBLOCK_FOREACH_NOT_IGNORED(block) { 860 summary |= block->page_size; 861 } 862 863 return summary; 864 } 865 866 uint64_t ram_get_total_transferred_pages(void) 867 { 868 return ram_counters.normal + ram_counters.duplicate + 869 compression_counters.pages + xbzrle_counters.pages; 870 } 871 872 static void migration_update_rates(RAMState *rs, int64_t end_time) 873 { 874 uint64_t page_count = rs->target_page_count - rs->target_page_count_prev; 875 double compressed_size; 876 877 /* calculate period counters */ 878 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000 879 / (end_time - rs->time_last_bitmap_sync); 880 881 if (!page_count) { 882 return; 883 } 884 885 if (migrate_use_xbzrle()) { 886 double encoded_size, unencoded_size; 887 888 xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss - 889 rs->xbzrle_cache_miss_prev) / page_count; 890 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss; 891 unencoded_size = (xbzrle_counters.pages - rs->xbzrle_pages_prev) * 892 TARGET_PAGE_SIZE; 893 encoded_size = xbzrle_counters.bytes - rs->xbzrle_bytes_prev; 894 if (xbzrle_counters.pages == rs->xbzrle_pages_prev || !encoded_size) { 895 xbzrle_counters.encoding_rate = 0; 896 } else { 897 xbzrle_counters.encoding_rate = unencoded_size / encoded_size; 898 } 899 rs->xbzrle_pages_prev = xbzrle_counters.pages; 900 rs->xbzrle_bytes_prev = xbzrle_counters.bytes; 901 } 902 903 if (migrate_use_compression()) { 904 compression_counters.busy_rate = (double)(compression_counters.busy - 905 rs->compress_thread_busy_prev) / page_count; 906 rs->compress_thread_busy_prev = compression_counters.busy; 907 908 compressed_size = compression_counters.compressed_size - 909 rs->compressed_size_prev; 910 if (compressed_size) { 911 double uncompressed_size = (compression_counters.pages - 912 rs->compress_pages_prev) * TARGET_PAGE_SIZE; 913 914 /* Compression-Ratio = Uncompressed-size / Compressed-size */ 915 compression_counters.compression_rate = 916 uncompressed_size / compressed_size; 917 918 rs->compress_pages_prev = compression_counters.pages; 919 rs->compressed_size_prev = compression_counters.compressed_size; 920 } 921 } 922 } 923 924 static void migration_trigger_throttle(RAMState *rs) 925 { 926 MigrationState *s = migrate_get_current(); 927 uint64_t threshold = s->parameters.throttle_trigger_threshold; 928 929 uint64_t bytes_xfer_period = ram_counters.transferred - rs->bytes_xfer_prev; 930 uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE; 931 uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100; 932 933 /* During block migration the auto-converge logic incorrectly detects 934 * that ram migration makes no progress. Avoid this by disabling the 935 * throttling logic during the bulk phase of block migration. */ 936 if (migrate_auto_converge() && !blk_mig_bulk_active()) { 937 /* The following detection logic can be refined later. For now: 938 Check to see if the ratio between dirtied bytes and the approx. 939 amount of bytes that just got transferred since the last time 940 we were in this routine reaches the threshold. If that happens 941 twice, start or increase throttling. */ 942 943 if ((bytes_dirty_period > bytes_dirty_threshold) && 944 (++rs->dirty_rate_high_cnt >= 2)) { 945 trace_migration_throttle(); 946 rs->dirty_rate_high_cnt = 0; 947 mig_throttle_guest_down(bytes_dirty_period, 948 bytes_dirty_threshold); 949 } 950 } 951 } 952 953 static void migration_bitmap_sync(RAMState *rs) 954 { 955 RAMBlock *block; 956 int64_t end_time; 957 958 ram_counters.dirty_sync_count++; 959 960 if (!rs->time_last_bitmap_sync) { 961 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 962 } 963 964 trace_migration_bitmap_sync_start(); 965 memory_global_dirty_log_sync(); 966 967 qemu_mutex_lock(&rs->bitmap_mutex); 968 WITH_RCU_READ_LOCK_GUARD() { 969 RAMBLOCK_FOREACH_NOT_IGNORED(block) { 970 ramblock_sync_dirty_bitmap(rs, block); 971 } 972 ram_counters.remaining = ram_bytes_remaining(); 973 } 974 qemu_mutex_unlock(&rs->bitmap_mutex); 975 976 memory_global_after_dirty_log_sync(); 977 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period); 978 979 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 980 981 /* more than 1 second = 1000 millisecons */ 982 if (end_time > rs->time_last_bitmap_sync + 1000) { 983 migration_trigger_throttle(rs); 984 985 migration_update_rates(rs, end_time); 986 987 rs->target_page_count_prev = rs->target_page_count; 988 989 /* reset period counters */ 990 rs->time_last_bitmap_sync = end_time; 991 rs->num_dirty_pages_period = 0; 992 rs->bytes_xfer_prev = ram_counters.transferred; 993 } 994 if (migrate_use_events()) { 995 qapi_event_send_migration_pass(ram_counters.dirty_sync_count); 996 } 997 } 998 999 static void migration_bitmap_sync_precopy(RAMState *rs) 1000 { 1001 Error *local_err = NULL; 1002 1003 /* 1004 * The current notifier usage is just an optimization to migration, so we 1005 * don't stop the normal migration process in the error case. 1006 */ 1007 if (precopy_notify(PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC, &local_err)) { 1008 error_report_err(local_err); 1009 local_err = NULL; 1010 } 1011 1012 migration_bitmap_sync(rs); 1013 1014 if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC, &local_err)) { 1015 error_report_err(local_err); 1016 } 1017 } 1018 1019 /** 1020 * save_zero_page_to_file: send the zero page to the file 1021 * 1022 * Returns the size of data written to the file, 0 means the page is not 1023 * a zero page 1024 * 1025 * @rs: current RAM state 1026 * @file: the file where the data is saved 1027 * @block: block that contains the page we want to send 1028 * @offset: offset inside the block for the page 1029 */ 1030 static int save_zero_page_to_file(RAMState *rs, QEMUFile *file, 1031 RAMBlock *block, ram_addr_t offset) 1032 { 1033 uint8_t *p = block->host + offset; 1034 int len = 0; 1035 1036 if (is_zero_range(p, TARGET_PAGE_SIZE)) { 1037 len += save_page_header(rs, file, block, offset | RAM_SAVE_FLAG_ZERO); 1038 qemu_put_byte(file, 0); 1039 len += 1; 1040 } 1041 return len; 1042 } 1043 1044 /** 1045 * save_zero_page: send the zero page to the stream 1046 * 1047 * Returns the number of pages written. 1048 * 1049 * @rs: current RAM state 1050 * @block: block that contains the page we want to send 1051 * @offset: offset inside the block for the page 1052 */ 1053 static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset) 1054 { 1055 int len = save_zero_page_to_file(rs, rs->f, block, offset); 1056 1057 if (len) { 1058 ram_counters.duplicate++; 1059 ram_counters.transferred += len; 1060 return 1; 1061 } 1062 return -1; 1063 } 1064 1065 static void ram_release_pages(const char *rbname, uint64_t offset, int pages) 1066 { 1067 if (!migrate_release_ram() || !migration_in_postcopy()) { 1068 return; 1069 } 1070 1071 ram_discard_range(rbname, offset, ((ram_addr_t)pages) << TARGET_PAGE_BITS); 1072 } 1073 1074 /* 1075 * @pages: the number of pages written by the control path, 1076 * < 0 - error 1077 * > 0 - number of pages written 1078 * 1079 * Return true if the pages has been saved, otherwise false is returned. 1080 */ 1081 static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, 1082 int *pages) 1083 { 1084 uint64_t bytes_xmit = 0; 1085 int ret; 1086 1087 *pages = -1; 1088 ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE, 1089 &bytes_xmit); 1090 if (ret == RAM_SAVE_CONTROL_NOT_SUPP) { 1091 return false; 1092 } 1093 1094 if (bytes_xmit) { 1095 ram_counters.transferred += bytes_xmit; 1096 *pages = 1; 1097 } 1098 1099 if (ret == RAM_SAVE_CONTROL_DELAYED) { 1100 return true; 1101 } 1102 1103 if (bytes_xmit > 0) { 1104 ram_counters.normal++; 1105 } else if (bytes_xmit == 0) { 1106 ram_counters.duplicate++; 1107 } 1108 1109 return true; 1110 } 1111 1112 /* 1113 * directly send the page to the stream 1114 * 1115 * Returns the number of pages written. 1116 * 1117 * @rs: current RAM state 1118 * @block: block that contains the page we want to send 1119 * @offset: offset inside the block for the page 1120 * @buf: the page to be sent 1121 * @async: send to page asyncly 1122 */ 1123 static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, 1124 uint8_t *buf, bool async) 1125 { 1126 ram_counters.transferred += save_page_header(rs, rs->f, block, 1127 offset | RAM_SAVE_FLAG_PAGE); 1128 if (async) { 1129 qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE, 1130 migrate_release_ram() & 1131 migration_in_postcopy()); 1132 } else { 1133 qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE); 1134 } 1135 ram_counters.transferred += TARGET_PAGE_SIZE; 1136 ram_counters.normal++; 1137 return 1; 1138 } 1139 1140 /** 1141 * ram_save_page: send the given page to the stream 1142 * 1143 * Returns the number of pages written. 1144 * < 0 - error 1145 * >=0 - Number of pages written - this might legally be 0 1146 * if xbzrle noticed the page was the same. 1147 * 1148 * @rs: current RAM state 1149 * @block: block that contains the page we want to send 1150 * @offset: offset inside the block for the page 1151 * @last_stage: if we are at the completion stage 1152 */ 1153 static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) 1154 { 1155 int pages = -1; 1156 uint8_t *p; 1157 bool send_async = true; 1158 RAMBlock *block = pss->block; 1159 ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; 1160 ram_addr_t current_addr = block->offset + offset; 1161 1162 p = block->host + offset; 1163 trace_ram_save_page(block->idstr, (uint64_t)offset, p); 1164 1165 XBZRLE_cache_lock(); 1166 if (rs->xbzrle_enabled && !migration_in_postcopy()) { 1167 pages = save_xbzrle_page(rs, &p, current_addr, block, 1168 offset, last_stage); 1169 if (!last_stage) { 1170 /* Can't send this cached data async, since the cache page 1171 * might get updated before it gets to the wire 1172 */ 1173 send_async = false; 1174 } 1175 } 1176 1177 /* XBZRLE overflow or normal page */ 1178 if (pages == -1) { 1179 pages = save_normal_page(rs, block, offset, p, send_async); 1180 } 1181 1182 XBZRLE_cache_unlock(); 1183 1184 return pages; 1185 } 1186 1187 static int ram_save_multifd_page(RAMState *rs, RAMBlock *block, 1188 ram_addr_t offset) 1189 { 1190 if (multifd_queue_page(rs->f, block, offset) < 0) { 1191 return -1; 1192 } 1193 ram_counters.normal++; 1194 1195 return 1; 1196 } 1197 1198 static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block, 1199 ram_addr_t offset, uint8_t *source_buf) 1200 { 1201 RAMState *rs = ram_state; 1202 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK); 1203 bool zero_page = false; 1204 int ret; 1205 1206 if (save_zero_page_to_file(rs, f, block, offset)) { 1207 zero_page = true; 1208 goto exit; 1209 } 1210 1211 save_page_header(rs, f, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE); 1212 1213 /* 1214 * copy it to a internal buffer to avoid it being modified by VM 1215 * so that we can catch up the error during compression and 1216 * decompression 1217 */ 1218 memcpy(source_buf, p, TARGET_PAGE_SIZE); 1219 ret = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE); 1220 if (ret < 0) { 1221 qemu_file_set_error(migrate_get_current()->to_dst_file, ret); 1222 error_report("compressed data failed!"); 1223 return false; 1224 } 1225 1226 exit: 1227 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1); 1228 return zero_page; 1229 } 1230 1231 static void 1232 update_compress_thread_counts(const CompressParam *param, int bytes_xmit) 1233 { 1234 ram_counters.transferred += bytes_xmit; 1235 1236 if (param->zero_page) { 1237 ram_counters.duplicate++; 1238 return; 1239 } 1240 1241 /* 8 means a header with RAM_SAVE_FLAG_CONTINUE. */ 1242 compression_counters.compressed_size += bytes_xmit - 8; 1243 compression_counters.pages++; 1244 } 1245 1246 static bool save_page_use_compression(RAMState *rs); 1247 1248 static void flush_compressed_data(RAMState *rs) 1249 { 1250 int idx, len, thread_count; 1251 1252 if (!save_page_use_compression(rs)) { 1253 return; 1254 } 1255 thread_count = migrate_compress_threads(); 1256 1257 qemu_mutex_lock(&comp_done_lock); 1258 for (idx = 0; idx < thread_count; idx++) { 1259 while (!comp_param[idx].done) { 1260 qemu_cond_wait(&comp_done_cond, &comp_done_lock); 1261 } 1262 } 1263 qemu_mutex_unlock(&comp_done_lock); 1264 1265 for (idx = 0; idx < thread_count; idx++) { 1266 qemu_mutex_lock(&comp_param[idx].mutex); 1267 if (!comp_param[idx].quit) { 1268 len = qemu_put_qemu_file(rs->f, comp_param[idx].file); 1269 /* 1270 * it's safe to fetch zero_page without holding comp_done_lock 1271 * as there is no further request submitted to the thread, 1272 * i.e, the thread should be waiting for a request at this point. 1273 */ 1274 update_compress_thread_counts(&comp_param[idx], len); 1275 } 1276 qemu_mutex_unlock(&comp_param[idx].mutex); 1277 } 1278 } 1279 1280 static inline void set_compress_params(CompressParam *param, RAMBlock *block, 1281 ram_addr_t offset) 1282 { 1283 param->block = block; 1284 param->offset = offset; 1285 } 1286 1287 static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block, 1288 ram_addr_t offset) 1289 { 1290 int idx, thread_count, bytes_xmit = -1, pages = -1; 1291 bool wait = migrate_compress_wait_thread(); 1292 1293 thread_count = migrate_compress_threads(); 1294 qemu_mutex_lock(&comp_done_lock); 1295 retry: 1296 for (idx = 0; idx < thread_count; idx++) { 1297 if (comp_param[idx].done) { 1298 comp_param[idx].done = false; 1299 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file); 1300 qemu_mutex_lock(&comp_param[idx].mutex); 1301 set_compress_params(&comp_param[idx], block, offset); 1302 qemu_cond_signal(&comp_param[idx].cond); 1303 qemu_mutex_unlock(&comp_param[idx].mutex); 1304 pages = 1; 1305 update_compress_thread_counts(&comp_param[idx], bytes_xmit); 1306 break; 1307 } 1308 } 1309 1310 /* 1311 * wait for the free thread if the user specifies 'compress-wait-thread', 1312 * otherwise we will post the page out in the main thread as normal page. 1313 */ 1314 if (pages < 0 && wait) { 1315 qemu_cond_wait(&comp_done_cond, &comp_done_lock); 1316 goto retry; 1317 } 1318 qemu_mutex_unlock(&comp_done_lock); 1319 1320 return pages; 1321 } 1322 1323 /** 1324 * find_dirty_block: find the next dirty page and update any state 1325 * associated with the search process. 1326 * 1327 * Returns true if a page is found 1328 * 1329 * @rs: current RAM state 1330 * @pss: data about the state of the current dirty page scan 1331 * @again: set to false if the search has scanned the whole of RAM 1332 */ 1333 static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again) 1334 { 1335 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page); 1336 if (pss->complete_round && pss->block == rs->last_seen_block && 1337 pss->page >= rs->last_page) { 1338 /* 1339 * We've been once around the RAM and haven't found anything. 1340 * Give up. 1341 */ 1342 *again = false; 1343 return false; 1344 } 1345 if (!offset_in_ramblock(pss->block, 1346 ((ram_addr_t)pss->page) << TARGET_PAGE_BITS)) { 1347 /* Didn't find anything in this RAM Block */ 1348 pss->page = 0; 1349 pss->block = QLIST_NEXT_RCU(pss->block, next); 1350 if (!pss->block) { 1351 /* 1352 * If memory migration starts over, we will meet a dirtied page 1353 * which may still exists in compression threads's ring, so we 1354 * should flush the compressed data to make sure the new page 1355 * is not overwritten by the old one in the destination. 1356 * 1357 * Also If xbzrle is on, stop using the data compression at this 1358 * point. In theory, xbzrle can do better than compression. 1359 */ 1360 flush_compressed_data(rs); 1361 1362 /* Hit the end of the list */ 1363 pss->block = QLIST_FIRST_RCU(&ram_list.blocks); 1364 /* Flag that we've looped */ 1365 pss->complete_round = true; 1366 /* After the first round, enable XBZRLE. */ 1367 if (migrate_use_xbzrle()) { 1368 rs->xbzrle_enabled = true; 1369 } 1370 } 1371 /* Didn't find anything this time, but try again on the new block */ 1372 *again = true; 1373 return false; 1374 } else { 1375 /* Can go around again, but... */ 1376 *again = true; 1377 /* We've found something so probably don't need to */ 1378 return true; 1379 } 1380 } 1381 1382 /** 1383 * unqueue_page: gets a page of the queue 1384 * 1385 * Helper for 'get_queued_page' - gets a page off the queue 1386 * 1387 * Returns the block of the page (or NULL if none available) 1388 * 1389 * @rs: current RAM state 1390 * @offset: used to return the offset within the RAMBlock 1391 */ 1392 static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset) 1393 { 1394 RAMBlock *block = NULL; 1395 1396 if (QSIMPLEQ_EMPTY_ATOMIC(&rs->src_page_requests)) { 1397 return NULL; 1398 } 1399 1400 QEMU_LOCK_GUARD(&rs->src_page_req_mutex); 1401 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) { 1402 struct RAMSrcPageRequest *entry = 1403 QSIMPLEQ_FIRST(&rs->src_page_requests); 1404 block = entry->rb; 1405 *offset = entry->offset; 1406 1407 if (entry->len > TARGET_PAGE_SIZE) { 1408 entry->len -= TARGET_PAGE_SIZE; 1409 entry->offset += TARGET_PAGE_SIZE; 1410 } else { 1411 memory_region_unref(block->mr); 1412 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req); 1413 g_free(entry); 1414 migration_consume_urgent_request(); 1415 } 1416 } 1417 1418 return block; 1419 } 1420 1421 #if defined(__linux__) 1422 /** 1423 * poll_fault_page: try to get next UFFD write fault page and, if pending fault 1424 * is found, return RAM block pointer and page offset 1425 * 1426 * Returns pointer to the RAMBlock containing faulting page, 1427 * NULL if no write faults are pending 1428 * 1429 * @rs: current RAM state 1430 * @offset: page offset from the beginning of the block 1431 */ 1432 static RAMBlock *poll_fault_page(RAMState *rs, ram_addr_t *offset) 1433 { 1434 struct uffd_msg uffd_msg; 1435 void *page_address; 1436 RAMBlock *block; 1437 int res; 1438 1439 if (!migrate_background_snapshot()) { 1440 return NULL; 1441 } 1442 1443 res = uffd_read_events(rs->uffdio_fd, &uffd_msg, 1); 1444 if (res <= 0) { 1445 return NULL; 1446 } 1447 1448 page_address = (void *)(uintptr_t) uffd_msg.arg.pagefault.address; 1449 block = qemu_ram_block_from_host(page_address, false, offset); 1450 assert(block && (block->flags & RAM_UF_WRITEPROTECT) != 0); 1451 return block; 1452 } 1453 1454 /** 1455 * ram_save_release_protection: release UFFD write protection after 1456 * a range of pages has been saved 1457 * 1458 * @rs: current RAM state 1459 * @pss: page-search-status structure 1460 * @start_page: index of the first page in the range relative to pss->block 1461 * 1462 * Returns 0 on success, negative value in case of an error 1463 */ 1464 static int ram_save_release_protection(RAMState *rs, PageSearchStatus *pss, 1465 unsigned long start_page) 1466 { 1467 int res = 0; 1468 1469 /* Check if page is from UFFD-managed region. */ 1470 if (pss->block->flags & RAM_UF_WRITEPROTECT) { 1471 void *page_address = pss->block->host + (start_page << TARGET_PAGE_BITS); 1472 uint64_t run_length = (pss->page - start_page + 1) << TARGET_PAGE_BITS; 1473 1474 /* Flush async buffers before un-protect. */ 1475 qemu_fflush(rs->f); 1476 /* Un-protect memory range. */ 1477 res = uffd_change_protection(rs->uffdio_fd, page_address, run_length, 1478 false, false); 1479 } 1480 1481 return res; 1482 } 1483 1484 /* ram_write_tracking_available: check if kernel supports required UFFD features 1485 * 1486 * Returns true if supports, false otherwise 1487 */ 1488 bool ram_write_tracking_available(void) 1489 { 1490 uint64_t uffd_features; 1491 int res; 1492 1493 res = uffd_query_features(&uffd_features); 1494 return (res == 0 && 1495 (uffd_features & UFFD_FEATURE_PAGEFAULT_FLAG_WP) != 0); 1496 } 1497 1498 /* ram_write_tracking_compatible: check if guest configuration is 1499 * compatible with 'write-tracking' 1500 * 1501 * Returns true if compatible, false otherwise 1502 */ 1503 bool ram_write_tracking_compatible(void) 1504 { 1505 const uint64_t uffd_ioctls_mask = BIT(_UFFDIO_WRITEPROTECT); 1506 int uffd_fd; 1507 RAMBlock *block; 1508 bool ret = false; 1509 1510 /* Open UFFD file descriptor */ 1511 uffd_fd = uffd_create_fd(UFFD_FEATURE_PAGEFAULT_FLAG_WP, false); 1512 if (uffd_fd < 0) { 1513 return false; 1514 } 1515 1516 RCU_READ_LOCK_GUARD(); 1517 1518 RAMBLOCK_FOREACH_NOT_IGNORED(block) { 1519 uint64_t uffd_ioctls; 1520 1521 /* Nothing to do with read-only and MMIO-writable regions */ 1522 if (block->mr->readonly || block->mr->rom_device) { 1523 continue; 1524 } 1525 /* Try to register block memory via UFFD-IO to track writes */ 1526 if (uffd_register_memory(uffd_fd, block->host, block->max_length, 1527 UFFDIO_REGISTER_MODE_WP, &uffd_ioctls)) { 1528 goto out; 1529 } 1530 if ((uffd_ioctls & uffd_ioctls_mask) != uffd_ioctls_mask) { 1531 goto out; 1532 } 1533 } 1534 ret = true; 1535 1536 out: 1537 uffd_close_fd(uffd_fd); 1538 return ret; 1539 } 1540 1541 /* 1542 * ram_block_populate_pages: populate memory in the RAM block by reading 1543 * an integer from the beginning of each page. 1544 * 1545 * Since it's solely used for userfault_fd WP feature, here we just 1546 * hardcode page size to qemu_real_host_page_size. 1547 * 1548 * @block: RAM block to populate 1549 */ 1550 static void ram_block_populate_pages(RAMBlock *block) 1551 { 1552 char *ptr = (char *) block->host; 1553 1554 for (ram_addr_t offset = 0; offset < block->used_length; 1555 offset += qemu_real_host_page_size) { 1556 char tmp = *(ptr + offset); 1557 1558 /* Don't optimize the read out */ 1559 asm volatile("" : "+r" (tmp)); 1560 } 1561 } 1562 1563 /* 1564 * ram_write_tracking_prepare: prepare for UFFD-WP memory tracking 1565 */ 1566 void ram_write_tracking_prepare(void) 1567 { 1568 RAMBlock *block; 1569 1570 RCU_READ_LOCK_GUARD(); 1571 1572 RAMBLOCK_FOREACH_NOT_IGNORED(block) { 1573 /* Nothing to do with read-only and MMIO-writable regions */ 1574 if (block->mr->readonly || block->mr->rom_device) { 1575 continue; 1576 } 1577 1578 /* 1579 * Populate pages of the RAM block before enabling userfault_fd 1580 * write protection. 1581 * 1582 * This stage is required since ioctl(UFFDIO_WRITEPROTECT) with 1583 * UFFDIO_WRITEPROTECT_MODE_WP mode setting would silently skip 1584 * pages with pte_none() entries in page table. 1585 */ 1586 ram_block_populate_pages(block); 1587 } 1588 } 1589 1590 /* 1591 * ram_write_tracking_start: start UFFD-WP memory tracking 1592 * 1593 * Returns 0 for success or negative value in case of error 1594 */ 1595 int ram_write_tracking_start(void) 1596 { 1597 int uffd_fd; 1598 RAMState *rs = ram_state; 1599 RAMBlock *block; 1600 1601 /* Open UFFD file descriptor */ 1602 uffd_fd = uffd_create_fd(UFFD_FEATURE_PAGEFAULT_FLAG_WP, true); 1603 if (uffd_fd < 0) { 1604 return uffd_fd; 1605 } 1606 rs->uffdio_fd = uffd_fd; 1607 1608 RCU_READ_LOCK_GUARD(); 1609 1610 RAMBLOCK_FOREACH_NOT_IGNORED(block) { 1611 /* Nothing to do with read-only and MMIO-writable regions */ 1612 if (block->mr->readonly || block->mr->rom_device) { 1613 continue; 1614 } 1615 1616 /* Register block memory with UFFD to track writes */ 1617 if (uffd_register_memory(rs->uffdio_fd, block->host, 1618 block->max_length, UFFDIO_REGISTER_MODE_WP, NULL)) { 1619 goto fail; 1620 } 1621 /* Apply UFFD write protection to the block memory range */ 1622 if (uffd_change_protection(rs->uffdio_fd, block->host, 1623 block->max_length, true, false)) { 1624 goto fail; 1625 } 1626 block->flags |= RAM_UF_WRITEPROTECT; 1627 memory_region_ref(block->mr); 1628 1629 trace_ram_write_tracking_ramblock_start(block->idstr, block->page_size, 1630 block->host, block->max_length); 1631 } 1632 1633 return 0; 1634 1635 fail: 1636 error_report("ram_write_tracking_start() failed: restoring initial memory state"); 1637 1638 RAMBLOCK_FOREACH_NOT_IGNORED(block) { 1639 if ((block->flags & RAM_UF_WRITEPROTECT) == 0) { 1640 continue; 1641 } 1642 /* 1643 * In case some memory block failed to be write-protected 1644 * remove protection and unregister all succeeded RAM blocks 1645 */ 1646 uffd_change_protection(rs->uffdio_fd, block->host, block->max_length, 1647 false, false); 1648 uffd_unregister_memory(rs->uffdio_fd, block->host, block->max_length); 1649 /* Cleanup flags and remove reference */ 1650 block->flags &= ~RAM_UF_WRITEPROTECT; 1651 memory_region_unref(block->mr); 1652 } 1653 1654 uffd_close_fd(uffd_fd); 1655 rs->uffdio_fd = -1; 1656 return -1; 1657 } 1658 1659 /** 1660 * ram_write_tracking_stop: stop UFFD-WP memory tracking and remove protection 1661 */ 1662 void ram_write_tracking_stop(void) 1663 { 1664 RAMState *rs = ram_state; 1665 RAMBlock *block; 1666 1667 RCU_READ_LOCK_GUARD(); 1668 1669 RAMBLOCK_FOREACH_NOT_IGNORED(block) { 1670 if ((block->flags & RAM_UF_WRITEPROTECT) == 0) { 1671 continue; 1672 } 1673 /* Remove protection and unregister all affected RAM blocks */ 1674 uffd_change_protection(rs->uffdio_fd, block->host, block->max_length, 1675 false, false); 1676 uffd_unregister_memory(rs->uffdio_fd, block->host, block->max_length); 1677 1678 trace_ram_write_tracking_ramblock_stop(block->idstr, block->page_size, 1679 block->host, block->max_length); 1680 1681 /* Cleanup flags and remove reference */ 1682 block->flags &= ~RAM_UF_WRITEPROTECT; 1683 memory_region_unref(block->mr); 1684 } 1685 1686 /* Finally close UFFD file descriptor */ 1687 uffd_close_fd(rs->uffdio_fd); 1688 rs->uffdio_fd = -1; 1689 } 1690 1691 #else 1692 /* No target OS support, stubs just fail or ignore */ 1693 1694 static RAMBlock *poll_fault_page(RAMState *rs, ram_addr_t *offset) 1695 { 1696 (void) rs; 1697 (void) offset; 1698 1699 return NULL; 1700 } 1701 1702 static int ram_save_release_protection(RAMState *rs, PageSearchStatus *pss, 1703 unsigned long start_page) 1704 { 1705 (void) rs; 1706 (void) pss; 1707 (void) start_page; 1708 1709 return 0; 1710 } 1711 1712 bool ram_write_tracking_available(void) 1713 { 1714 return false; 1715 } 1716 1717 bool ram_write_tracking_compatible(void) 1718 { 1719 assert(0); 1720 return false; 1721 } 1722 1723 int ram_write_tracking_start(void) 1724 { 1725 assert(0); 1726 return -1; 1727 } 1728 1729 void ram_write_tracking_stop(void) 1730 { 1731 assert(0); 1732 } 1733 #endif /* defined(__linux__) */ 1734 1735 /** 1736 * get_queued_page: unqueue a page from the postcopy requests 1737 * 1738 * Skips pages that are already sent (!dirty) 1739 * 1740 * Returns true if a queued page is found 1741 * 1742 * @rs: current RAM state 1743 * @pss: data about the state of the current dirty page scan 1744 */ 1745 static bool get_queued_page(RAMState *rs, PageSearchStatus *pss) 1746 { 1747 RAMBlock *block; 1748 ram_addr_t offset; 1749 bool dirty; 1750 1751 do { 1752 block = unqueue_page(rs, &offset); 1753 /* 1754 * We're sending this page, and since it's postcopy nothing else 1755 * will dirty it, and we must make sure it doesn't get sent again 1756 * even if this queue request was received after the background 1757 * search already sent it. 1758 */ 1759 if (block) { 1760 unsigned long page; 1761 1762 page = offset >> TARGET_PAGE_BITS; 1763 dirty = test_bit(page, block->bmap); 1764 if (!dirty) { 1765 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset, 1766 page); 1767 } else { 1768 trace_get_queued_page(block->idstr, (uint64_t)offset, page); 1769 } 1770 } 1771 1772 } while (block && !dirty); 1773 1774 if (!block) { 1775 /* 1776 * Poll write faults too if background snapshot is enabled; that's 1777 * when we have vcpus got blocked by the write protected pages. 1778 */ 1779 block = poll_fault_page(rs, &offset); 1780 } 1781 1782 if (block) { 1783 /* 1784 * We want the background search to continue from the queued page 1785 * since the guest is likely to want other pages near to the page 1786 * it just requested. 1787 */ 1788 pss->block = block; 1789 pss->page = offset >> TARGET_PAGE_BITS; 1790 1791 /* 1792 * This unqueued page would break the "one round" check, even is 1793 * really rare. 1794 */ 1795 pss->complete_round = false; 1796 } 1797 1798 return !!block; 1799 } 1800 1801 /** 1802 * migration_page_queue_free: drop any remaining pages in the ram 1803 * request queue 1804 * 1805 * It should be empty at the end anyway, but in error cases there may 1806 * be some left. in case that there is any page left, we drop it. 1807 * 1808 */ 1809 static void migration_page_queue_free(RAMState *rs) 1810 { 1811 struct RAMSrcPageRequest *mspr, *next_mspr; 1812 /* This queue generally should be empty - but in the case of a failed 1813 * migration might have some droppings in. 1814 */ 1815 RCU_READ_LOCK_GUARD(); 1816 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) { 1817 memory_region_unref(mspr->rb->mr); 1818 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req); 1819 g_free(mspr); 1820 } 1821 } 1822 1823 /** 1824 * ram_save_queue_pages: queue the page for transmission 1825 * 1826 * A request from postcopy destination for example. 1827 * 1828 * Returns zero on success or negative on error 1829 * 1830 * @rbname: Name of the RAMBLock of the request. NULL means the 1831 * same that last one. 1832 * @start: starting address from the start of the RAMBlock 1833 * @len: length (in bytes) to send 1834 */ 1835 int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) 1836 { 1837 RAMBlock *ramblock; 1838 RAMState *rs = ram_state; 1839 1840 ram_counters.postcopy_requests++; 1841 RCU_READ_LOCK_GUARD(); 1842 1843 if (!rbname) { 1844 /* Reuse last RAMBlock */ 1845 ramblock = rs->last_req_rb; 1846 1847 if (!ramblock) { 1848 /* 1849 * Shouldn't happen, we can't reuse the last RAMBlock if 1850 * it's the 1st request. 1851 */ 1852 error_report("ram_save_queue_pages no previous block"); 1853 return -1; 1854 } 1855 } else { 1856 ramblock = qemu_ram_block_by_name(rbname); 1857 1858 if (!ramblock) { 1859 /* We shouldn't be asked for a non-existent RAMBlock */ 1860 error_report("ram_save_queue_pages no block '%s'", rbname); 1861 return -1; 1862 } 1863 rs->last_req_rb = ramblock; 1864 } 1865 trace_ram_save_queue_pages(ramblock->idstr, start, len); 1866 if (!offset_in_ramblock(ramblock, start + len - 1)) { 1867 error_report("%s request overrun start=" RAM_ADDR_FMT " len=" 1868 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT, 1869 __func__, start, len, ramblock->used_length); 1870 return -1; 1871 } 1872 1873 struct RAMSrcPageRequest *new_entry = 1874 g_malloc0(sizeof(struct RAMSrcPageRequest)); 1875 new_entry->rb = ramblock; 1876 new_entry->offset = start; 1877 new_entry->len = len; 1878 1879 memory_region_ref(ramblock->mr); 1880 qemu_mutex_lock(&rs->src_page_req_mutex); 1881 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req); 1882 migration_make_urgent_request(); 1883 qemu_mutex_unlock(&rs->src_page_req_mutex); 1884 1885 return 0; 1886 } 1887 1888 static bool save_page_use_compression(RAMState *rs) 1889 { 1890 if (!migrate_use_compression()) { 1891 return false; 1892 } 1893 1894 /* 1895 * If xbzrle is enabled (e.g., after first round of migration), stop 1896 * using the data compression. In theory, xbzrle can do better than 1897 * compression. 1898 */ 1899 if (rs->xbzrle_enabled) { 1900 return false; 1901 } 1902 1903 return true; 1904 } 1905 1906 /* 1907 * try to compress the page before posting it out, return true if the page 1908 * has been properly handled by compression, otherwise needs other 1909 * paths to handle it 1910 */ 1911 static bool save_compress_page(RAMState *rs, RAMBlock *block, ram_addr_t offset) 1912 { 1913 if (!save_page_use_compression(rs)) { 1914 return false; 1915 } 1916 1917 /* 1918 * When starting the process of a new block, the first page of 1919 * the block should be sent out before other pages in the same 1920 * block, and all the pages in last block should have been sent 1921 * out, keeping this order is important, because the 'cont' flag 1922 * is used to avoid resending the block name. 1923 * 1924 * We post the fist page as normal page as compression will take 1925 * much CPU resource. 1926 */ 1927 if (block != rs->last_sent_block) { 1928 flush_compressed_data(rs); 1929 return false; 1930 } 1931 1932 if (compress_page_with_multi_thread(rs, block, offset) > 0) { 1933 return true; 1934 } 1935 1936 compression_counters.busy++; 1937 return false; 1938 } 1939 1940 /** 1941 * ram_save_target_page: save one target page 1942 * 1943 * Returns the number of pages written 1944 * 1945 * @rs: current RAM state 1946 * @pss: data about the page we want to send 1947 * @last_stage: if we are at the completion stage 1948 */ 1949 static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss, 1950 bool last_stage) 1951 { 1952 RAMBlock *block = pss->block; 1953 ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; 1954 int res; 1955 1956 if (control_save_page(rs, block, offset, &res)) { 1957 return res; 1958 } 1959 1960 if (save_compress_page(rs, block, offset)) { 1961 return 1; 1962 } 1963 1964 res = save_zero_page(rs, block, offset); 1965 if (res > 0) { 1966 /* Must let xbzrle know, otherwise a previous (now 0'd) cached 1967 * page would be stale 1968 */ 1969 if (!save_page_use_compression(rs)) { 1970 XBZRLE_cache_lock(); 1971 xbzrle_cache_zero_page(rs, block->offset + offset); 1972 XBZRLE_cache_unlock(); 1973 } 1974 ram_release_pages(block->idstr, offset, res); 1975 return res; 1976 } 1977 1978 /* 1979 * Do not use multifd for: 1980 * 1. Compression as the first page in the new block should be posted out 1981 * before sending the compressed page 1982 * 2. In postcopy as one whole host page should be placed 1983 */ 1984 if (!save_page_use_compression(rs) && migrate_use_multifd() 1985 && !migration_in_postcopy()) { 1986 return ram_save_multifd_page(rs, block, offset); 1987 } 1988 1989 return ram_save_page(rs, pss, last_stage); 1990 } 1991 1992 /** 1993 * ram_save_host_page: save a whole host page 1994 * 1995 * Starting at *offset send pages up to the end of the current host 1996 * page. It's valid for the initial offset to point into the middle of 1997 * a host page in which case the remainder of the hostpage is sent. 1998 * Only dirty target pages are sent. Note that the host page size may 1999 * be a huge page for this block. 2000 * The saving stops at the boundary of the used_length of the block 2001 * if the RAMBlock isn't a multiple of the host page size. 2002 * 2003 * Returns the number of pages written or negative on error 2004 * 2005 * @rs: current RAM state 2006 * @ms: current migration state 2007 * @pss: data about the page we want to send 2008 * @last_stage: if we are at the completion stage 2009 */ 2010 static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, 2011 bool last_stage) 2012 { 2013 int tmppages, pages = 0; 2014 size_t pagesize_bits = 2015 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS; 2016 unsigned long hostpage_boundary = 2017 QEMU_ALIGN_UP(pss->page + 1, pagesize_bits); 2018 unsigned long start_page = pss->page; 2019 int res; 2020 2021 if (ramblock_is_ignored(pss->block)) { 2022 error_report("block %s should not be migrated !", pss->block->idstr); 2023 return 0; 2024 } 2025 2026 do { 2027 /* Check the pages is dirty and if it is send it */ 2028 if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) { 2029 tmppages = ram_save_target_page(rs, pss, last_stage); 2030 if (tmppages < 0) { 2031 return tmppages; 2032 } 2033 2034 pages += tmppages; 2035 /* 2036 * Allow rate limiting to happen in the middle of huge pages if 2037 * something is sent in the current iteration. 2038 */ 2039 if (pagesize_bits > 1 && tmppages > 0) { 2040 migration_rate_limit(); 2041 } 2042 } 2043 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page); 2044 } while ((pss->page < hostpage_boundary) && 2045 offset_in_ramblock(pss->block, 2046 ((ram_addr_t)pss->page) << TARGET_PAGE_BITS)); 2047 /* The offset we leave with is the min boundary of host page and block */ 2048 pss->page = MIN(pss->page, hostpage_boundary) - 1; 2049 2050 res = ram_save_release_protection(rs, pss, start_page); 2051 return (res < 0 ? res : pages); 2052 } 2053 2054 /** 2055 * ram_find_and_save_block: finds a dirty page and sends it to f 2056 * 2057 * Called within an RCU critical section. 2058 * 2059 * Returns the number of pages written where zero means no dirty pages, 2060 * or negative on error 2061 * 2062 * @rs: current RAM state 2063 * @last_stage: if we are at the completion stage 2064 * 2065 * On systems where host-page-size > target-page-size it will send all the 2066 * pages in a host page that are dirty. 2067 */ 2068 2069 static int ram_find_and_save_block(RAMState *rs, bool last_stage) 2070 { 2071 PageSearchStatus pss; 2072 int pages = 0; 2073 bool again, found; 2074 2075 /* No dirty page as there is zero RAM */ 2076 if (!ram_bytes_total()) { 2077 return pages; 2078 } 2079 2080 pss.block = rs->last_seen_block; 2081 pss.page = rs->last_page; 2082 pss.complete_round = false; 2083 2084 if (!pss.block) { 2085 pss.block = QLIST_FIRST_RCU(&ram_list.blocks); 2086 } 2087 2088 do { 2089 again = true; 2090 found = get_queued_page(rs, &pss); 2091 2092 if (!found) { 2093 /* priority queue empty, so just search for something dirty */ 2094 found = find_dirty_block(rs, &pss, &again); 2095 } 2096 2097 if (found) { 2098 pages = ram_save_host_page(rs, &pss, last_stage); 2099 } 2100 } while (!pages && again); 2101 2102 rs->last_seen_block = pss.block; 2103 rs->last_page = pss.page; 2104 2105 return pages; 2106 } 2107 2108 void acct_update_position(QEMUFile *f, size_t size, bool zero) 2109 { 2110 uint64_t pages = size / TARGET_PAGE_SIZE; 2111 2112 if (zero) { 2113 ram_counters.duplicate += pages; 2114 } else { 2115 ram_counters.normal += pages; 2116 ram_counters.transferred += size; 2117 qemu_update_position(f, size); 2118 } 2119 } 2120 2121 static uint64_t ram_bytes_total_common(bool count_ignored) 2122 { 2123 RAMBlock *block; 2124 uint64_t total = 0; 2125 2126 RCU_READ_LOCK_GUARD(); 2127 2128 if (count_ignored) { 2129 RAMBLOCK_FOREACH_MIGRATABLE(block) { 2130 total += block->used_length; 2131 } 2132 } else { 2133 RAMBLOCK_FOREACH_NOT_IGNORED(block) { 2134 total += block->used_length; 2135 } 2136 } 2137 return total; 2138 } 2139 2140 uint64_t ram_bytes_total(void) 2141 { 2142 return ram_bytes_total_common(false); 2143 } 2144 2145 static void xbzrle_load_setup(void) 2146 { 2147 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE); 2148 } 2149 2150 static void xbzrle_load_cleanup(void) 2151 { 2152 g_free(XBZRLE.decoded_buf); 2153 XBZRLE.decoded_buf = NULL; 2154 } 2155 2156 static void ram_state_cleanup(RAMState **rsp) 2157 { 2158 if (*rsp) { 2159 migration_page_queue_free(*rsp); 2160 qemu_mutex_destroy(&(*rsp)->bitmap_mutex); 2161 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex); 2162 g_free(*rsp); 2163 *rsp = NULL; 2164 } 2165 } 2166 2167 static void xbzrle_cleanup(void) 2168 { 2169 XBZRLE_cache_lock(); 2170 if (XBZRLE.cache) { 2171 cache_fini(XBZRLE.cache); 2172 g_free(XBZRLE.encoded_buf); 2173 g_free(XBZRLE.current_buf); 2174 g_free(XBZRLE.zero_target_page); 2175 XBZRLE.cache = NULL; 2176 XBZRLE.encoded_buf = NULL; 2177 XBZRLE.current_buf = NULL; 2178 XBZRLE.zero_target_page = NULL; 2179 } 2180 XBZRLE_cache_unlock(); 2181 } 2182 2183 static void ram_save_cleanup(void *opaque) 2184 { 2185 RAMState **rsp = opaque; 2186 RAMBlock *block; 2187 2188 /* We don't use dirty log with background snapshots */ 2189 if (!migrate_background_snapshot()) { 2190 /* caller have hold iothread lock or is in a bh, so there is 2191 * no writing race against the migration bitmap 2192 */ 2193 memory_global_dirty_log_stop(); 2194 } 2195 2196 RAMBLOCK_FOREACH_NOT_IGNORED(block) { 2197 g_free(block->clear_bmap); 2198 block->clear_bmap = NULL; 2199 g_free(block->bmap); 2200 block->bmap = NULL; 2201 } 2202 2203 xbzrle_cleanup(); 2204 compress_threads_save_cleanup(); 2205 ram_state_cleanup(rsp); 2206 } 2207 2208 static void ram_state_reset(RAMState *rs) 2209 { 2210 rs->last_seen_block = NULL; 2211 rs->last_sent_block = NULL; 2212 rs->last_page = 0; 2213 rs->last_version = ram_list.version; 2214 rs->xbzrle_enabled = false; 2215 } 2216 2217 #define MAX_WAIT 50 /* ms, half buffered_file limit */ 2218 2219 /* 2220 * 'expected' is the value you expect the bitmap mostly to be full 2221 * of; it won't bother printing lines that are all this value. 2222 * If 'todump' is null the migration bitmap is dumped. 2223 */ 2224 void ram_debug_dump_bitmap(unsigned long *todump, bool expected, 2225 unsigned long pages) 2226 { 2227 int64_t cur; 2228 int64_t linelen = 128; 2229 char linebuf[129]; 2230 2231 for (cur = 0; cur < pages; cur += linelen) { 2232 int64_t curb; 2233 bool found = false; 2234 /* 2235 * Last line; catch the case where the line length 2236 * is longer than remaining ram 2237 */ 2238 if (cur + linelen > pages) { 2239 linelen = pages - cur; 2240 } 2241 for (curb = 0; curb < linelen; curb++) { 2242 bool thisbit = test_bit(cur + curb, todump); 2243 linebuf[curb] = thisbit ? '1' : '.'; 2244 found = found || (thisbit != expected); 2245 } 2246 if (found) { 2247 linebuf[curb] = '\0'; 2248 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf); 2249 } 2250 } 2251 } 2252 2253 /* **** functions for postcopy ***** */ 2254 2255 void ram_postcopy_migrated_memory_release(MigrationState *ms) 2256 { 2257 struct RAMBlock *block; 2258 2259 RAMBLOCK_FOREACH_NOT_IGNORED(block) { 2260 unsigned long *bitmap = block->bmap; 2261 unsigned long range = block->used_length >> TARGET_PAGE_BITS; 2262 unsigned long run_start = find_next_zero_bit(bitmap, range, 0); 2263 2264 while (run_start < range) { 2265 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1); 2266 ram_discard_range(block->idstr, 2267 ((ram_addr_t)run_start) << TARGET_PAGE_BITS, 2268 ((ram_addr_t)(run_end - run_start)) 2269 << TARGET_PAGE_BITS); 2270 run_start = find_next_zero_bit(bitmap, range, run_end + 1); 2271 } 2272 } 2273 } 2274 2275 /** 2276 * postcopy_send_discard_bm_ram: discard a RAMBlock 2277 * 2278 * Returns zero on success 2279 * 2280 * Callback from postcopy_each_ram_send_discard for each RAMBlock 2281 * 2282 * @ms: current migration state 2283 * @block: RAMBlock to discard 2284 */ 2285 static int postcopy_send_discard_bm_ram(MigrationState *ms, RAMBlock *block) 2286 { 2287 unsigned long end = block->used_length >> TARGET_PAGE_BITS; 2288 unsigned long current; 2289 unsigned long *bitmap = block->bmap; 2290 2291 for (current = 0; current < end; ) { 2292 unsigned long one = find_next_bit(bitmap, end, current); 2293 unsigned long zero, discard_length; 2294 2295 if (one >= end) { 2296 break; 2297 } 2298 2299 zero = find_next_zero_bit(bitmap, end, one + 1); 2300 2301 if (zero >= end) { 2302 discard_length = end - one; 2303 } else { 2304 discard_length = zero - one; 2305 } 2306 postcopy_discard_send_range(ms, one, discard_length); 2307 current = one + discard_length; 2308 } 2309 2310 return 0; 2311 } 2312 2313 /** 2314 * postcopy_each_ram_send_discard: discard all RAMBlocks 2315 * 2316 * Returns 0 for success or negative for error 2317 * 2318 * Utility for the outgoing postcopy code. 2319 * Calls postcopy_send_discard_bm_ram for each RAMBlock 2320 * passing it bitmap indexes and name. 2321 * (qemu_ram_foreach_block ends up passing unscaled lengths 2322 * which would mean postcopy code would have to deal with target page) 2323 * 2324 * @ms: current migration state 2325 */ 2326 static int postcopy_each_ram_send_discard(MigrationState *ms) 2327 { 2328 struct RAMBlock *block; 2329 int ret; 2330 2331 RAMBLOCK_FOREACH_NOT_IGNORED(block) { 2332 postcopy_discard_send_init(ms, block->idstr); 2333 2334 /* 2335 * Postcopy sends chunks of bitmap over the wire, but it 2336 * just needs indexes at this point, avoids it having 2337 * target page specific code. 2338 */ 2339 ret = postcopy_send_discard_bm_ram(ms, block); 2340 postcopy_discard_send_finish(ms); 2341 if (ret) { 2342 return ret; 2343 } 2344 } 2345 2346 return 0; 2347 } 2348 2349 /** 2350 * postcopy_chunk_hostpages_pass: canonicalize bitmap in hostpages 2351 * 2352 * Helper for postcopy_chunk_hostpages; it's called twice to 2353 * canonicalize the two bitmaps, that are similar, but one is 2354 * inverted. 2355 * 2356 * Postcopy requires that all target pages in a hostpage are dirty or 2357 * clean, not a mix. This function canonicalizes the bitmaps. 2358 * 2359 * @ms: current migration state 2360 * @block: block that contains the page we want to canonicalize 2361 */ 2362 static void postcopy_chunk_hostpages_pass(MigrationState *ms, RAMBlock *block) 2363 { 2364 RAMState *rs = ram_state; 2365 unsigned long *bitmap = block->bmap; 2366 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE; 2367 unsigned long pages = block->used_length >> TARGET_PAGE_BITS; 2368 unsigned long run_start; 2369 2370 if (block->page_size == TARGET_PAGE_SIZE) { 2371 /* Easy case - TPS==HPS for a non-huge page RAMBlock */ 2372 return; 2373 } 2374 2375 /* Find a dirty page */ 2376 run_start = find_next_bit(bitmap, pages, 0); 2377 2378 while (run_start < pages) { 2379 2380 /* 2381 * If the start of this run of pages is in the middle of a host 2382 * page, then we need to fixup this host page. 2383 */ 2384 if (QEMU_IS_ALIGNED(run_start, host_ratio)) { 2385 /* Find the end of this run */ 2386 run_start = find_next_zero_bit(bitmap, pages, run_start + 1); 2387 /* 2388 * If the end isn't at the start of a host page, then the 2389 * run doesn't finish at the end of a host page 2390 * and we need to discard. 2391 */ 2392 } 2393 2394 if (!QEMU_IS_ALIGNED(run_start, host_ratio)) { 2395 unsigned long page; 2396 unsigned long fixup_start_addr = QEMU_ALIGN_DOWN(run_start, 2397 host_ratio); 2398 run_start = QEMU_ALIGN_UP(run_start, host_ratio); 2399 2400 /* Clean up the bitmap */ 2401 for (page = fixup_start_addr; 2402 page < fixup_start_addr + host_ratio; page++) { 2403 /* 2404 * Remark them as dirty, updating the count for any pages 2405 * that weren't previously dirty. 2406 */ 2407 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap); 2408 } 2409 } 2410 2411 /* Find the next dirty page for the next iteration */ 2412 run_start = find_next_bit(bitmap, pages, run_start); 2413 } 2414 } 2415 2416 /** 2417 * postcopy_chunk_hostpages: discard any partially sent host page 2418 * 2419 * Utility for the outgoing postcopy code. 2420 * 2421 * Discard any partially sent host-page size chunks, mark any partially 2422 * dirty host-page size chunks as all dirty. In this case the host-page 2423 * is the host-page for the particular RAMBlock, i.e. it might be a huge page 2424 * 2425 * Returns zero on success 2426 * 2427 * @ms: current migration state 2428 * @block: block we want to work with 2429 */ 2430 static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block) 2431 { 2432 postcopy_discard_send_init(ms, block->idstr); 2433 2434 /* 2435 * Ensure that all partially dirty host pages are made fully dirty. 2436 */ 2437 postcopy_chunk_hostpages_pass(ms, block); 2438 2439 postcopy_discard_send_finish(ms); 2440 return 0; 2441 } 2442 2443 /** 2444 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap 2445 * 2446 * Returns zero on success 2447 * 2448 * Transmit the set of pages to be discarded after precopy to the target 2449 * these are pages that: 2450 * a) Have been previously transmitted but are now dirty again 2451 * b) Pages that have never been transmitted, this ensures that 2452 * any pages on the destination that have been mapped by background 2453 * tasks get discarded (transparent huge pages is the specific concern) 2454 * Hopefully this is pretty sparse 2455 * 2456 * @ms: current migration state 2457 */ 2458 int ram_postcopy_send_discard_bitmap(MigrationState *ms) 2459 { 2460 RAMState *rs = ram_state; 2461 RAMBlock *block; 2462 int ret; 2463 2464 RCU_READ_LOCK_GUARD(); 2465 2466 /* This should be our last sync, the src is now paused */ 2467 migration_bitmap_sync(rs); 2468 2469 /* Easiest way to make sure we don't resume in the middle of a host-page */ 2470 rs->last_seen_block = NULL; 2471 rs->last_sent_block = NULL; 2472 rs->last_page = 0; 2473 2474 RAMBLOCK_FOREACH_NOT_IGNORED(block) { 2475 /* Deal with TPS != HPS and huge pages */ 2476 ret = postcopy_chunk_hostpages(ms, block); 2477 if (ret) { 2478 return ret; 2479 } 2480 2481 #ifdef DEBUG_POSTCOPY 2482 ram_debug_dump_bitmap(block->bmap, true, 2483 block->used_length >> TARGET_PAGE_BITS); 2484 #endif 2485 } 2486 trace_ram_postcopy_send_discard_bitmap(); 2487 2488 return postcopy_each_ram_send_discard(ms); 2489 } 2490 2491 /** 2492 * ram_discard_range: discard dirtied pages at the beginning of postcopy 2493 * 2494 * Returns zero on success 2495 * 2496 * @rbname: name of the RAMBlock of the request. NULL means the 2497 * same that last one. 2498 * @start: RAMBlock starting page 2499 * @length: RAMBlock size 2500 */ 2501 int ram_discard_range(const char *rbname, uint64_t start, size_t length) 2502 { 2503 trace_ram_discard_range(rbname, start, length); 2504 2505 RCU_READ_LOCK_GUARD(); 2506 RAMBlock *rb = qemu_ram_block_by_name(rbname); 2507 2508 if (!rb) { 2509 error_report("ram_discard_range: Failed to find block '%s'", rbname); 2510 return -1; 2511 } 2512 2513 /* 2514 * On source VM, we don't need to update the received bitmap since 2515 * we don't even have one. 2516 */ 2517 if (rb->receivedmap) { 2518 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(), 2519 length >> qemu_target_page_bits()); 2520 } 2521 2522 return ram_block_discard_range(rb, start, length); 2523 } 2524 2525 /* 2526 * For every allocation, we will try not to crash the VM if the 2527 * allocation failed. 2528 */ 2529 static int xbzrle_init(void) 2530 { 2531 Error *local_err = NULL; 2532 2533 if (!migrate_use_xbzrle()) { 2534 return 0; 2535 } 2536 2537 XBZRLE_cache_lock(); 2538 2539 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE); 2540 if (!XBZRLE.zero_target_page) { 2541 error_report("%s: Error allocating zero page", __func__); 2542 goto err_out; 2543 } 2544 2545 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(), 2546 TARGET_PAGE_SIZE, &local_err); 2547 if (!XBZRLE.cache) { 2548 error_report_err(local_err); 2549 goto free_zero_page; 2550 } 2551 2552 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE); 2553 if (!XBZRLE.encoded_buf) { 2554 error_report("%s: Error allocating encoded_buf", __func__); 2555 goto free_cache; 2556 } 2557 2558 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE); 2559 if (!XBZRLE.current_buf) { 2560 error_report("%s: Error allocating current_buf", __func__); 2561 goto free_encoded_buf; 2562 } 2563 2564 /* We are all good */ 2565 XBZRLE_cache_unlock(); 2566 return 0; 2567 2568 free_encoded_buf: 2569 g_free(XBZRLE.encoded_buf); 2570 XBZRLE.encoded_buf = NULL; 2571 free_cache: 2572 cache_fini(XBZRLE.cache); 2573 XBZRLE.cache = NULL; 2574 free_zero_page: 2575 g_free(XBZRLE.zero_target_page); 2576 XBZRLE.zero_target_page = NULL; 2577 err_out: 2578 XBZRLE_cache_unlock(); 2579 return -ENOMEM; 2580 } 2581 2582 static int ram_state_init(RAMState **rsp) 2583 { 2584 *rsp = g_try_new0(RAMState, 1); 2585 2586 if (!*rsp) { 2587 error_report("%s: Init ramstate fail", __func__); 2588 return -1; 2589 } 2590 2591 qemu_mutex_init(&(*rsp)->bitmap_mutex); 2592 qemu_mutex_init(&(*rsp)->src_page_req_mutex); 2593 QSIMPLEQ_INIT(&(*rsp)->src_page_requests); 2594 2595 /* 2596 * Count the total number of pages used by ram blocks not including any 2597 * gaps due to alignment or unplugs. 2598 * This must match with the initial values of dirty bitmap. 2599 */ 2600 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS; 2601 ram_state_reset(*rsp); 2602 2603 return 0; 2604 } 2605 2606 static void ram_list_init_bitmaps(void) 2607 { 2608 MigrationState *ms = migrate_get_current(); 2609 RAMBlock *block; 2610 unsigned long pages; 2611 uint8_t shift; 2612 2613 /* Skip setting bitmap if there is no RAM */ 2614 if (ram_bytes_total()) { 2615 shift = ms->clear_bitmap_shift; 2616 if (shift > CLEAR_BITMAP_SHIFT_MAX) { 2617 error_report("clear_bitmap_shift (%u) too big, using " 2618 "max value (%u)", shift, CLEAR_BITMAP_SHIFT_MAX); 2619 shift = CLEAR_BITMAP_SHIFT_MAX; 2620 } else if (shift < CLEAR_BITMAP_SHIFT_MIN) { 2621 error_report("clear_bitmap_shift (%u) too small, using " 2622 "min value (%u)", shift, CLEAR_BITMAP_SHIFT_MIN); 2623 shift = CLEAR_BITMAP_SHIFT_MIN; 2624 } 2625 2626 RAMBLOCK_FOREACH_NOT_IGNORED(block) { 2627 pages = block->max_length >> TARGET_PAGE_BITS; 2628 /* 2629 * The initial dirty bitmap for migration must be set with all 2630 * ones to make sure we'll migrate every guest RAM page to 2631 * destination. 2632 * Here we set RAMBlock.bmap all to 1 because when rebegin a 2633 * new migration after a failed migration, ram_list. 2634 * dirty_memory[DIRTY_MEMORY_MIGRATION] don't include the whole 2635 * guest memory. 2636 */ 2637 block->bmap = bitmap_new(pages); 2638 bitmap_set(block->bmap, 0, pages); 2639 block->clear_bmap_shift = shift; 2640 block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift)); 2641 } 2642 } 2643 } 2644 2645 static void ram_init_bitmaps(RAMState *rs) 2646 { 2647 /* For memory_global_dirty_log_start below. */ 2648 qemu_mutex_lock_iothread(); 2649 qemu_mutex_lock_ramlist(); 2650 2651 WITH_RCU_READ_LOCK_GUARD() { 2652 ram_list_init_bitmaps(); 2653 /* We don't use dirty log with background snapshots */ 2654 if (!migrate_background_snapshot()) { 2655 memory_global_dirty_log_start(); 2656 migration_bitmap_sync_precopy(rs); 2657 } 2658 } 2659 qemu_mutex_unlock_ramlist(); 2660 qemu_mutex_unlock_iothread(); 2661 } 2662 2663 static int ram_init_all(RAMState **rsp) 2664 { 2665 if (ram_state_init(rsp)) { 2666 return -1; 2667 } 2668 2669 if (xbzrle_init()) { 2670 ram_state_cleanup(rsp); 2671 return -1; 2672 } 2673 2674 ram_init_bitmaps(*rsp); 2675 2676 return 0; 2677 } 2678 2679 static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out) 2680 { 2681 RAMBlock *block; 2682 uint64_t pages = 0; 2683 2684 /* 2685 * Postcopy is not using xbzrle/compression, so no need for that. 2686 * Also, since source are already halted, we don't need to care 2687 * about dirty page logging as well. 2688 */ 2689 2690 RAMBLOCK_FOREACH_NOT_IGNORED(block) { 2691 pages += bitmap_count_one(block->bmap, 2692 block->used_length >> TARGET_PAGE_BITS); 2693 } 2694 2695 /* This may not be aligned with current bitmaps. Recalculate. */ 2696 rs->migration_dirty_pages = pages; 2697 2698 ram_state_reset(rs); 2699 2700 /* Update RAMState cache of output QEMUFile */ 2701 rs->f = out; 2702 2703 trace_ram_state_resume_prepare(pages); 2704 } 2705 2706 /* 2707 * This function clears bits of the free pages reported by the caller from the 2708 * migration dirty bitmap. @addr is the host address corresponding to the 2709 * start of the continuous guest free pages, and @len is the total bytes of 2710 * those pages. 2711 */ 2712 void qemu_guest_free_page_hint(void *addr, size_t len) 2713 { 2714 RAMBlock *block; 2715 ram_addr_t offset; 2716 size_t used_len, start, npages; 2717 MigrationState *s = migrate_get_current(); 2718 2719 /* This function is currently expected to be used during live migration */ 2720 if (!migration_is_setup_or_active(s->state)) { 2721 return; 2722 } 2723 2724 for (; len > 0; len -= used_len, addr += used_len) { 2725 block = qemu_ram_block_from_host(addr, false, &offset); 2726 if (unlikely(!block || offset >= block->used_length)) { 2727 /* 2728 * The implementation might not support RAMBlock resize during 2729 * live migration, but it could happen in theory with future 2730 * updates. So we add a check here to capture that case. 2731 */ 2732 error_report_once("%s unexpected error", __func__); 2733 return; 2734 } 2735 2736 if (len <= block->used_length - offset) { 2737 used_len = len; 2738 } else { 2739 used_len = block->used_length - offset; 2740 } 2741 2742 start = offset >> TARGET_PAGE_BITS; 2743 npages = used_len >> TARGET_PAGE_BITS; 2744 2745 qemu_mutex_lock(&ram_state->bitmap_mutex); 2746 ram_state->migration_dirty_pages -= 2747 bitmap_count_one_with_offset(block->bmap, start, npages); 2748 bitmap_clear(block->bmap, start, npages); 2749 qemu_mutex_unlock(&ram_state->bitmap_mutex); 2750 } 2751 } 2752 2753 /* 2754 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has 2755 * long-running RCU critical section. When rcu-reclaims in the code 2756 * start to become numerous it will be necessary to reduce the 2757 * granularity of these critical sections. 2758 */ 2759 2760 /** 2761 * ram_save_setup: Setup RAM for migration 2762 * 2763 * Returns zero to indicate success and negative for error 2764 * 2765 * @f: QEMUFile where to send the data 2766 * @opaque: RAMState pointer 2767 */ 2768 static int ram_save_setup(QEMUFile *f, void *opaque) 2769 { 2770 RAMState **rsp = opaque; 2771 RAMBlock *block; 2772 2773 if (compress_threads_save_setup()) { 2774 return -1; 2775 } 2776 2777 /* migration has already setup the bitmap, reuse it. */ 2778 if (!migration_in_colo_state()) { 2779 if (ram_init_all(rsp) != 0) { 2780 compress_threads_save_cleanup(); 2781 return -1; 2782 } 2783 } 2784 (*rsp)->f = f; 2785 2786 WITH_RCU_READ_LOCK_GUARD() { 2787 qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE); 2788 2789 RAMBLOCK_FOREACH_MIGRATABLE(block) { 2790 qemu_put_byte(f, strlen(block->idstr)); 2791 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); 2792 qemu_put_be64(f, block->used_length); 2793 if (migrate_postcopy_ram() && block->page_size != 2794 qemu_host_page_size) { 2795 qemu_put_be64(f, block->page_size); 2796 } 2797 if (migrate_ignore_shared()) { 2798 qemu_put_be64(f, block->mr->addr); 2799 } 2800 } 2801 } 2802 2803 ram_control_before_iterate(f, RAM_CONTROL_SETUP); 2804 ram_control_after_iterate(f, RAM_CONTROL_SETUP); 2805 2806 multifd_send_sync_main(f); 2807 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 2808 qemu_fflush(f); 2809 2810 return 0; 2811 } 2812 2813 /** 2814 * ram_save_iterate: iterative stage for migration 2815 * 2816 * Returns zero to indicate success and negative for error 2817 * 2818 * @f: QEMUFile where to send the data 2819 * @opaque: RAMState pointer 2820 */ 2821 static int ram_save_iterate(QEMUFile *f, void *opaque) 2822 { 2823 RAMState **temp = opaque; 2824 RAMState *rs = *temp; 2825 int ret = 0; 2826 int i; 2827 int64_t t0; 2828 int done = 0; 2829 2830 if (blk_mig_bulk_active()) { 2831 /* Avoid transferring ram during bulk phase of block migration as 2832 * the bulk phase will usually take a long time and transferring 2833 * ram updates during that time is pointless. */ 2834 goto out; 2835 } 2836 2837 WITH_RCU_READ_LOCK_GUARD() { 2838 if (ram_list.version != rs->last_version) { 2839 ram_state_reset(rs); 2840 } 2841 2842 /* Read version before ram_list.blocks */ 2843 smp_rmb(); 2844 2845 ram_control_before_iterate(f, RAM_CONTROL_ROUND); 2846 2847 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 2848 i = 0; 2849 while ((ret = qemu_file_rate_limit(f)) == 0 || 2850 !QSIMPLEQ_EMPTY(&rs->src_page_requests)) { 2851 int pages; 2852 2853 if (qemu_file_get_error(f)) { 2854 break; 2855 } 2856 2857 pages = ram_find_and_save_block(rs, false); 2858 /* no more pages to sent */ 2859 if (pages == 0) { 2860 done = 1; 2861 break; 2862 } 2863 2864 if (pages < 0) { 2865 qemu_file_set_error(f, pages); 2866 break; 2867 } 2868 2869 rs->target_page_count += pages; 2870 2871 /* 2872 * During postcopy, it is necessary to make sure one whole host 2873 * page is sent in one chunk. 2874 */ 2875 if (migrate_postcopy_ram()) { 2876 flush_compressed_data(rs); 2877 } 2878 2879 /* 2880 * we want to check in the 1st loop, just in case it was the 1st 2881 * time and we had to sync the dirty bitmap. 2882 * qemu_clock_get_ns() is a bit expensive, so we only check each 2883 * some iterations 2884 */ 2885 if ((i & 63) == 0) { 2886 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 2887 1000000; 2888 if (t1 > MAX_WAIT) { 2889 trace_ram_save_iterate_big_wait(t1, i); 2890 break; 2891 } 2892 } 2893 i++; 2894 } 2895 } 2896 2897 /* 2898 * Must occur before EOS (or any QEMUFile operation) 2899 * because of RDMA protocol. 2900 */ 2901 ram_control_after_iterate(f, RAM_CONTROL_ROUND); 2902 2903 out: 2904 if (ret >= 0 2905 && migration_is_setup_or_active(migrate_get_current()->state)) { 2906 multifd_send_sync_main(rs->f); 2907 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 2908 qemu_fflush(f); 2909 ram_counters.transferred += 8; 2910 2911 ret = qemu_file_get_error(f); 2912 } 2913 if (ret < 0) { 2914 return ret; 2915 } 2916 2917 return done; 2918 } 2919 2920 /** 2921 * ram_save_complete: function called to send the remaining amount of ram 2922 * 2923 * Returns zero to indicate success or negative on error 2924 * 2925 * Called with iothread lock 2926 * 2927 * @f: QEMUFile where to send the data 2928 * @opaque: RAMState pointer 2929 */ 2930 static int ram_save_complete(QEMUFile *f, void *opaque) 2931 { 2932 RAMState **temp = opaque; 2933 RAMState *rs = *temp; 2934 int ret = 0; 2935 2936 WITH_RCU_READ_LOCK_GUARD() { 2937 if (!migration_in_postcopy()) { 2938 migration_bitmap_sync_precopy(rs); 2939 } 2940 2941 ram_control_before_iterate(f, RAM_CONTROL_FINISH); 2942 2943 /* try transferring iterative blocks of memory */ 2944 2945 /* flush all remaining blocks regardless of rate limiting */ 2946 while (true) { 2947 int pages; 2948 2949 pages = ram_find_and_save_block(rs, !migration_in_colo_state()); 2950 /* no more blocks to sent */ 2951 if (pages == 0) { 2952 break; 2953 } 2954 if (pages < 0) { 2955 ret = pages; 2956 break; 2957 } 2958 } 2959 2960 flush_compressed_data(rs); 2961 ram_control_after_iterate(f, RAM_CONTROL_FINISH); 2962 } 2963 2964 if (ret >= 0) { 2965 multifd_send_sync_main(rs->f); 2966 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 2967 qemu_fflush(f); 2968 } 2969 2970 return ret; 2971 } 2972 2973 static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, 2974 uint64_t *res_precopy_only, 2975 uint64_t *res_compatible, 2976 uint64_t *res_postcopy_only) 2977 { 2978 RAMState **temp = opaque; 2979 RAMState *rs = *temp; 2980 uint64_t remaining_size; 2981 2982 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; 2983 2984 if (!migration_in_postcopy() && 2985 remaining_size < max_size) { 2986 qemu_mutex_lock_iothread(); 2987 WITH_RCU_READ_LOCK_GUARD() { 2988 migration_bitmap_sync_precopy(rs); 2989 } 2990 qemu_mutex_unlock_iothread(); 2991 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; 2992 } 2993 2994 if (migrate_postcopy_ram()) { 2995 /* We can do postcopy, and all the data is postcopiable */ 2996 *res_compatible += remaining_size; 2997 } else { 2998 *res_precopy_only += remaining_size; 2999 } 3000 } 3001 3002 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) 3003 { 3004 unsigned int xh_len; 3005 int xh_flags; 3006 uint8_t *loaded_data; 3007 3008 /* extract RLE header */ 3009 xh_flags = qemu_get_byte(f); 3010 xh_len = qemu_get_be16(f); 3011 3012 if (xh_flags != ENCODING_FLAG_XBZRLE) { 3013 error_report("Failed to load XBZRLE page - wrong compression!"); 3014 return -1; 3015 } 3016 3017 if (xh_len > TARGET_PAGE_SIZE) { 3018 error_report("Failed to load XBZRLE page - len overflow!"); 3019 return -1; 3020 } 3021 loaded_data = XBZRLE.decoded_buf; 3022 /* load data and decode */ 3023 /* it can change loaded_data to point to an internal buffer */ 3024 qemu_get_buffer_in_place(f, &loaded_data, xh_len); 3025 3026 /* decode RLE */ 3027 if (xbzrle_decode_buffer(loaded_data, xh_len, host, 3028 TARGET_PAGE_SIZE) == -1) { 3029 error_report("Failed to load XBZRLE page - decode error!"); 3030 return -1; 3031 } 3032 3033 return 0; 3034 } 3035 3036 /** 3037 * ram_block_from_stream: read a RAMBlock id from the migration stream 3038 * 3039 * Must be called from within a rcu critical section. 3040 * 3041 * Returns a pointer from within the RCU-protected ram_list. 3042 * 3043 * @f: QEMUFile where to read the data from 3044 * @flags: Page flags (mostly to see if it's a continuation of previous block) 3045 */ 3046 static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags) 3047 { 3048 static RAMBlock *block; 3049 char id[256]; 3050 uint8_t len; 3051 3052 if (flags & RAM_SAVE_FLAG_CONTINUE) { 3053 if (!block) { 3054 error_report("Ack, bad migration stream!"); 3055 return NULL; 3056 } 3057 return block; 3058 } 3059 3060 len = qemu_get_byte(f); 3061 qemu_get_buffer(f, (uint8_t *)id, len); 3062 id[len] = 0; 3063 3064 block = qemu_ram_block_by_name(id); 3065 if (!block) { 3066 error_report("Can't find block %s", id); 3067 return NULL; 3068 } 3069 3070 if (ramblock_is_ignored(block)) { 3071 error_report("block %s should not be migrated !", id); 3072 return NULL; 3073 } 3074 3075 return block; 3076 } 3077 3078 static inline void *host_from_ram_block_offset(RAMBlock *block, 3079 ram_addr_t offset) 3080 { 3081 if (!offset_in_ramblock(block, offset)) { 3082 return NULL; 3083 } 3084 3085 return block->host + offset; 3086 } 3087 3088 static void *host_page_from_ram_block_offset(RAMBlock *block, 3089 ram_addr_t offset) 3090 { 3091 /* Note: Explicitly no check against offset_in_ramblock(). */ 3092 return (void *)QEMU_ALIGN_DOWN((uintptr_t)(block->host + offset), 3093 block->page_size); 3094 } 3095 3096 static ram_addr_t host_page_offset_from_ram_block_offset(RAMBlock *block, 3097 ram_addr_t offset) 3098 { 3099 return ((uintptr_t)block->host + offset) & (block->page_size - 1); 3100 } 3101 3102 static inline void *colo_cache_from_block_offset(RAMBlock *block, 3103 ram_addr_t offset, bool record_bitmap) 3104 { 3105 if (!offset_in_ramblock(block, offset)) { 3106 return NULL; 3107 } 3108 if (!block->colo_cache) { 3109 error_report("%s: colo_cache is NULL in block :%s", 3110 __func__, block->idstr); 3111 return NULL; 3112 } 3113 3114 /* 3115 * During colo checkpoint, we need bitmap of these migrated pages. 3116 * It help us to decide which pages in ram cache should be flushed 3117 * into VM's RAM later. 3118 */ 3119 if (record_bitmap && 3120 !test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) { 3121 ram_state->migration_dirty_pages++; 3122 } 3123 return block->colo_cache + offset; 3124 } 3125 3126 /** 3127 * ram_handle_compressed: handle the zero page case 3128 * 3129 * If a page (or a whole RDMA chunk) has been 3130 * determined to be zero, then zap it. 3131 * 3132 * @host: host address for the zero page 3133 * @ch: what the page is filled from. We only support zero 3134 * @size: size of the zero page 3135 */ 3136 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) 3137 { 3138 if (ch != 0 || !is_zero_range(host, size)) { 3139 memset(host, ch, size); 3140 } 3141 } 3142 3143 /* return the size after decompression, or negative value on error */ 3144 static int 3145 qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len, 3146 const uint8_t *source, size_t source_len) 3147 { 3148 int err; 3149 3150 err = inflateReset(stream); 3151 if (err != Z_OK) { 3152 return -1; 3153 } 3154 3155 stream->avail_in = source_len; 3156 stream->next_in = (uint8_t *)source; 3157 stream->avail_out = dest_len; 3158 stream->next_out = dest; 3159 3160 err = inflate(stream, Z_NO_FLUSH); 3161 if (err != Z_STREAM_END) { 3162 return -1; 3163 } 3164 3165 return stream->total_out; 3166 } 3167 3168 static void *do_data_decompress(void *opaque) 3169 { 3170 DecompressParam *param = opaque; 3171 unsigned long pagesize; 3172 uint8_t *des; 3173 int len, ret; 3174 3175 qemu_mutex_lock(¶m->mutex); 3176 while (!param->quit) { 3177 if (param->des) { 3178 des = param->des; 3179 len = param->len; 3180 param->des = 0; 3181 qemu_mutex_unlock(¶m->mutex); 3182 3183 pagesize = TARGET_PAGE_SIZE; 3184 3185 ret = qemu_uncompress_data(¶m->stream, des, pagesize, 3186 param->compbuf, len); 3187 if (ret < 0 && migrate_get_current()->decompress_error_check) { 3188 error_report("decompress data failed"); 3189 qemu_file_set_error(decomp_file, ret); 3190 } 3191 3192 qemu_mutex_lock(&decomp_done_lock); 3193 param->done = true; 3194 qemu_cond_signal(&decomp_done_cond); 3195 qemu_mutex_unlock(&decomp_done_lock); 3196 3197 qemu_mutex_lock(¶m->mutex); 3198 } else { 3199 qemu_cond_wait(¶m->cond, ¶m->mutex); 3200 } 3201 } 3202 qemu_mutex_unlock(¶m->mutex); 3203 3204 return NULL; 3205 } 3206 3207 static int wait_for_decompress_done(void) 3208 { 3209 int idx, thread_count; 3210 3211 if (!migrate_use_compression()) { 3212 return 0; 3213 } 3214 3215 thread_count = migrate_decompress_threads(); 3216 qemu_mutex_lock(&decomp_done_lock); 3217 for (idx = 0; idx < thread_count; idx++) { 3218 while (!decomp_param[idx].done) { 3219 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock); 3220 } 3221 } 3222 qemu_mutex_unlock(&decomp_done_lock); 3223 return qemu_file_get_error(decomp_file); 3224 } 3225 3226 static void compress_threads_load_cleanup(void) 3227 { 3228 int i, thread_count; 3229 3230 if (!migrate_use_compression()) { 3231 return; 3232 } 3233 thread_count = migrate_decompress_threads(); 3234 for (i = 0; i < thread_count; i++) { 3235 /* 3236 * we use it as a indicator which shows if the thread is 3237 * properly init'd or not 3238 */ 3239 if (!decomp_param[i].compbuf) { 3240 break; 3241 } 3242 3243 qemu_mutex_lock(&decomp_param[i].mutex); 3244 decomp_param[i].quit = true; 3245 qemu_cond_signal(&decomp_param[i].cond); 3246 qemu_mutex_unlock(&decomp_param[i].mutex); 3247 } 3248 for (i = 0; i < thread_count; i++) { 3249 if (!decomp_param[i].compbuf) { 3250 break; 3251 } 3252 3253 qemu_thread_join(decompress_threads + i); 3254 qemu_mutex_destroy(&decomp_param[i].mutex); 3255 qemu_cond_destroy(&decomp_param[i].cond); 3256 inflateEnd(&decomp_param[i].stream); 3257 g_free(decomp_param[i].compbuf); 3258 decomp_param[i].compbuf = NULL; 3259 } 3260 g_free(decompress_threads); 3261 g_free(decomp_param); 3262 decompress_threads = NULL; 3263 decomp_param = NULL; 3264 decomp_file = NULL; 3265 } 3266 3267 static int compress_threads_load_setup(QEMUFile *f) 3268 { 3269 int i, thread_count; 3270 3271 if (!migrate_use_compression()) { 3272 return 0; 3273 } 3274 3275 thread_count = migrate_decompress_threads(); 3276 decompress_threads = g_new0(QemuThread, thread_count); 3277 decomp_param = g_new0(DecompressParam, thread_count); 3278 qemu_mutex_init(&decomp_done_lock); 3279 qemu_cond_init(&decomp_done_cond); 3280 decomp_file = f; 3281 for (i = 0; i < thread_count; i++) { 3282 if (inflateInit(&decomp_param[i].stream) != Z_OK) { 3283 goto exit; 3284 } 3285 3286 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE)); 3287 qemu_mutex_init(&decomp_param[i].mutex); 3288 qemu_cond_init(&decomp_param[i].cond); 3289 decomp_param[i].done = true; 3290 decomp_param[i].quit = false; 3291 qemu_thread_create(decompress_threads + i, "decompress", 3292 do_data_decompress, decomp_param + i, 3293 QEMU_THREAD_JOINABLE); 3294 } 3295 return 0; 3296 exit: 3297 compress_threads_load_cleanup(); 3298 return -1; 3299 } 3300 3301 static void decompress_data_with_multi_threads(QEMUFile *f, 3302 void *host, int len) 3303 { 3304 int idx, thread_count; 3305 3306 thread_count = migrate_decompress_threads(); 3307 QEMU_LOCK_GUARD(&decomp_done_lock); 3308 while (true) { 3309 for (idx = 0; idx < thread_count; idx++) { 3310 if (decomp_param[idx].done) { 3311 decomp_param[idx].done = false; 3312 qemu_mutex_lock(&decomp_param[idx].mutex); 3313 qemu_get_buffer(f, decomp_param[idx].compbuf, len); 3314 decomp_param[idx].des = host; 3315 decomp_param[idx].len = len; 3316 qemu_cond_signal(&decomp_param[idx].cond); 3317 qemu_mutex_unlock(&decomp_param[idx].mutex); 3318 break; 3319 } 3320 } 3321 if (idx < thread_count) { 3322 break; 3323 } else { 3324 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock); 3325 } 3326 } 3327 } 3328 3329 static void colo_init_ram_state(void) 3330 { 3331 ram_state_init(&ram_state); 3332 } 3333 3334 /* 3335 * colo cache: this is for secondary VM, we cache the whole 3336 * memory of the secondary VM, it is need to hold the global lock 3337 * to call this helper. 3338 */ 3339 int colo_init_ram_cache(void) 3340 { 3341 RAMBlock *block; 3342 3343 WITH_RCU_READ_LOCK_GUARD() { 3344 RAMBLOCK_FOREACH_NOT_IGNORED(block) { 3345 block->colo_cache = qemu_anon_ram_alloc(block->used_length, 3346 NULL, false, false); 3347 if (!block->colo_cache) { 3348 error_report("%s: Can't alloc memory for COLO cache of block %s," 3349 "size 0x" RAM_ADDR_FMT, __func__, block->idstr, 3350 block->used_length); 3351 RAMBLOCK_FOREACH_NOT_IGNORED(block) { 3352 if (block->colo_cache) { 3353 qemu_anon_ram_free(block->colo_cache, block->used_length); 3354 block->colo_cache = NULL; 3355 } 3356 } 3357 return -errno; 3358 } 3359 } 3360 } 3361 3362 /* 3363 * Record the dirty pages that sent by PVM, we use this dirty bitmap together 3364 * with to decide which page in cache should be flushed into SVM's RAM. Here 3365 * we use the same name 'ram_bitmap' as for migration. 3366 */ 3367 if (ram_bytes_total()) { 3368 RAMBlock *block; 3369 3370 RAMBLOCK_FOREACH_NOT_IGNORED(block) { 3371 unsigned long pages = block->max_length >> TARGET_PAGE_BITS; 3372 block->bmap = bitmap_new(pages); 3373 } 3374 } 3375 3376 colo_init_ram_state(); 3377 return 0; 3378 } 3379 3380 /* TODO: duplicated with ram_init_bitmaps */ 3381 void colo_incoming_start_dirty_log(void) 3382 { 3383 RAMBlock *block = NULL; 3384 /* For memory_global_dirty_log_start below. */ 3385 qemu_mutex_lock_iothread(); 3386 qemu_mutex_lock_ramlist(); 3387 3388 memory_global_dirty_log_sync(); 3389 WITH_RCU_READ_LOCK_GUARD() { 3390 RAMBLOCK_FOREACH_NOT_IGNORED(block) { 3391 ramblock_sync_dirty_bitmap(ram_state, block); 3392 /* Discard this dirty bitmap record */ 3393 bitmap_zero(block->bmap, block->max_length >> TARGET_PAGE_BITS); 3394 } 3395 memory_global_dirty_log_start(); 3396 } 3397 ram_state->migration_dirty_pages = 0; 3398 qemu_mutex_unlock_ramlist(); 3399 qemu_mutex_unlock_iothread(); 3400 } 3401 3402 /* It is need to hold the global lock to call this helper */ 3403 void colo_release_ram_cache(void) 3404 { 3405 RAMBlock *block; 3406 3407 memory_global_dirty_log_stop(); 3408 RAMBLOCK_FOREACH_NOT_IGNORED(block) { 3409 g_free(block->bmap); 3410 block->bmap = NULL; 3411 } 3412 3413 WITH_RCU_READ_LOCK_GUARD() { 3414 RAMBLOCK_FOREACH_NOT_IGNORED(block) { 3415 if (block->colo_cache) { 3416 qemu_anon_ram_free(block->colo_cache, block->used_length); 3417 block->colo_cache = NULL; 3418 } 3419 } 3420 } 3421 ram_state_cleanup(&ram_state); 3422 } 3423 3424 /** 3425 * ram_load_setup: Setup RAM for migration incoming side 3426 * 3427 * Returns zero to indicate success and negative for error 3428 * 3429 * @f: QEMUFile where to receive the data 3430 * @opaque: RAMState pointer 3431 */ 3432 static int ram_load_setup(QEMUFile *f, void *opaque) 3433 { 3434 if (compress_threads_load_setup(f)) { 3435 return -1; 3436 } 3437 3438 xbzrle_load_setup(); 3439 ramblock_recv_map_init(); 3440 3441 return 0; 3442 } 3443 3444 static int ram_load_cleanup(void *opaque) 3445 { 3446 RAMBlock *rb; 3447 3448 RAMBLOCK_FOREACH_NOT_IGNORED(rb) { 3449 qemu_ram_block_writeback(rb); 3450 } 3451 3452 xbzrle_load_cleanup(); 3453 compress_threads_load_cleanup(); 3454 3455 RAMBLOCK_FOREACH_NOT_IGNORED(rb) { 3456 g_free(rb->receivedmap); 3457 rb->receivedmap = NULL; 3458 } 3459 3460 return 0; 3461 } 3462 3463 /** 3464 * ram_postcopy_incoming_init: allocate postcopy data structures 3465 * 3466 * Returns 0 for success and negative if there was one error 3467 * 3468 * @mis: current migration incoming state 3469 * 3470 * Allocate data structures etc needed by incoming migration with 3471 * postcopy-ram. postcopy-ram's similarly names 3472 * postcopy_ram_incoming_init does the work. 3473 */ 3474 int ram_postcopy_incoming_init(MigrationIncomingState *mis) 3475 { 3476 return postcopy_ram_incoming_init(mis); 3477 } 3478 3479 /** 3480 * ram_load_postcopy: load a page in postcopy case 3481 * 3482 * Returns 0 for success or -errno in case of error 3483 * 3484 * Called in postcopy mode by ram_load(). 3485 * rcu_read_lock is taken prior to this being called. 3486 * 3487 * @f: QEMUFile where to send the data 3488 */ 3489 static int ram_load_postcopy(QEMUFile *f) 3490 { 3491 int flags = 0, ret = 0; 3492 bool place_needed = false; 3493 bool matches_target_page_size = false; 3494 MigrationIncomingState *mis = migration_incoming_get_current(); 3495 /* Temporary page that is later 'placed' */ 3496 void *postcopy_host_page = mis->postcopy_tmp_page; 3497 void *host_page = NULL; 3498 bool all_zero = true; 3499 int target_pages = 0; 3500 3501 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { 3502 ram_addr_t addr; 3503 void *page_buffer = NULL; 3504 void *place_source = NULL; 3505 RAMBlock *block = NULL; 3506 uint8_t ch; 3507 int len; 3508 3509 addr = qemu_get_be64(f); 3510 3511 /* 3512 * If qemu file error, we should stop here, and then "addr" 3513 * may be invalid 3514 */ 3515 ret = qemu_file_get_error(f); 3516 if (ret) { 3517 break; 3518 } 3519 3520 flags = addr & ~TARGET_PAGE_MASK; 3521 addr &= TARGET_PAGE_MASK; 3522 3523 trace_ram_load_postcopy_loop((uint64_t)addr, flags); 3524 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE | 3525 RAM_SAVE_FLAG_COMPRESS_PAGE)) { 3526 block = ram_block_from_stream(f, flags); 3527 if (!block) { 3528 ret = -EINVAL; 3529 break; 3530 } 3531 3532 /* 3533 * Relying on used_length is racy and can result in false positives. 3534 * We might place pages beyond used_length in case RAM was shrunk 3535 * while in postcopy, which is fine - trying to place via 3536 * UFFDIO_COPY/UFFDIO_ZEROPAGE will never segfault. 3537 */ 3538 if (!block->host || addr >= block->postcopy_length) { 3539 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); 3540 ret = -EINVAL; 3541 break; 3542 } 3543 target_pages++; 3544 matches_target_page_size = block->page_size == TARGET_PAGE_SIZE; 3545 /* 3546 * Postcopy requires that we place whole host pages atomically; 3547 * these may be huge pages for RAMBlocks that are backed by 3548 * hugetlbfs. 3549 * To make it atomic, the data is read into a temporary page 3550 * that's moved into place later. 3551 * The migration protocol uses, possibly smaller, target-pages 3552 * however the source ensures it always sends all the components 3553 * of a host page in one chunk. 3554 */ 3555 page_buffer = postcopy_host_page + 3556 host_page_offset_from_ram_block_offset(block, addr); 3557 /* If all TP are zero then we can optimise the place */ 3558 if (target_pages == 1) { 3559 host_page = host_page_from_ram_block_offset(block, addr); 3560 } else if (host_page != host_page_from_ram_block_offset(block, 3561 addr)) { 3562 /* not the 1st TP within the HP */ 3563 error_report("Non-same host page %p/%p", host_page, 3564 host_page_from_ram_block_offset(block, addr)); 3565 ret = -EINVAL; 3566 break; 3567 } 3568 3569 /* 3570 * If it's the last part of a host page then we place the host 3571 * page 3572 */ 3573 if (target_pages == (block->page_size / TARGET_PAGE_SIZE)) { 3574 place_needed = true; 3575 } 3576 place_source = postcopy_host_page; 3577 } 3578 3579 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { 3580 case RAM_SAVE_FLAG_ZERO: 3581 ch = qemu_get_byte(f); 3582 /* 3583 * Can skip to set page_buffer when 3584 * this is a zero page and (block->page_size == TARGET_PAGE_SIZE). 3585 */ 3586 if (ch || !matches_target_page_size) { 3587 memset(page_buffer, ch, TARGET_PAGE_SIZE); 3588 } 3589 if (ch) { 3590 all_zero = false; 3591 } 3592 break; 3593 3594 case RAM_SAVE_FLAG_PAGE: 3595 all_zero = false; 3596 if (!matches_target_page_size) { 3597 /* For huge pages, we always use temporary buffer */ 3598 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE); 3599 } else { 3600 /* 3601 * For small pages that matches target page size, we 3602 * avoid the qemu_file copy. Instead we directly use 3603 * the buffer of QEMUFile to place the page. Note: we 3604 * cannot do any QEMUFile operation before using that 3605 * buffer to make sure the buffer is valid when 3606 * placing the page. 3607 */ 3608 qemu_get_buffer_in_place(f, (uint8_t **)&place_source, 3609 TARGET_PAGE_SIZE); 3610 } 3611 break; 3612 case RAM_SAVE_FLAG_COMPRESS_PAGE: 3613 all_zero = false; 3614 len = qemu_get_be32(f); 3615 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) { 3616 error_report("Invalid compressed data length: %d", len); 3617 ret = -EINVAL; 3618 break; 3619 } 3620 decompress_data_with_multi_threads(f, page_buffer, len); 3621 break; 3622 3623 case RAM_SAVE_FLAG_EOS: 3624 /* normal exit */ 3625 multifd_recv_sync_main(); 3626 break; 3627 default: 3628 error_report("Unknown combination of migration flags: 0x%x" 3629 " (postcopy mode)", flags); 3630 ret = -EINVAL; 3631 break; 3632 } 3633 3634 /* Got the whole host page, wait for decompress before placing. */ 3635 if (place_needed) { 3636 ret |= wait_for_decompress_done(); 3637 } 3638 3639 /* Detect for any possible file errors */ 3640 if (!ret && qemu_file_get_error(f)) { 3641 ret = qemu_file_get_error(f); 3642 } 3643 3644 if (!ret && place_needed) { 3645 if (all_zero) { 3646 ret = postcopy_place_page_zero(mis, host_page, block); 3647 } else { 3648 ret = postcopy_place_page(mis, host_page, place_source, 3649 block); 3650 } 3651 place_needed = false; 3652 target_pages = 0; 3653 /* Assume we have a zero page until we detect something different */ 3654 all_zero = true; 3655 } 3656 } 3657 3658 return ret; 3659 } 3660 3661 static bool postcopy_is_advised(void) 3662 { 3663 PostcopyState ps = postcopy_state_get(); 3664 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END; 3665 } 3666 3667 static bool postcopy_is_running(void) 3668 { 3669 PostcopyState ps = postcopy_state_get(); 3670 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END; 3671 } 3672 3673 /* 3674 * Flush content of RAM cache into SVM's memory. 3675 * Only flush the pages that be dirtied by PVM or SVM or both. 3676 */ 3677 void colo_flush_ram_cache(void) 3678 { 3679 RAMBlock *block = NULL; 3680 void *dst_host; 3681 void *src_host; 3682 unsigned long offset = 0; 3683 3684 memory_global_dirty_log_sync(); 3685 WITH_RCU_READ_LOCK_GUARD() { 3686 RAMBLOCK_FOREACH_NOT_IGNORED(block) { 3687 ramblock_sync_dirty_bitmap(ram_state, block); 3688 } 3689 } 3690 3691 trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages); 3692 WITH_RCU_READ_LOCK_GUARD() { 3693 block = QLIST_FIRST_RCU(&ram_list.blocks); 3694 3695 while (block) { 3696 offset = migration_bitmap_find_dirty(ram_state, block, offset); 3697 3698 if (!offset_in_ramblock(block, 3699 ((ram_addr_t)offset) << TARGET_PAGE_BITS)) { 3700 offset = 0; 3701 block = QLIST_NEXT_RCU(block, next); 3702 } else { 3703 migration_bitmap_clear_dirty(ram_state, block, offset); 3704 dst_host = block->host 3705 + (((ram_addr_t)offset) << TARGET_PAGE_BITS); 3706 src_host = block->colo_cache 3707 + (((ram_addr_t)offset) << TARGET_PAGE_BITS); 3708 memcpy(dst_host, src_host, TARGET_PAGE_SIZE); 3709 } 3710 } 3711 } 3712 trace_colo_flush_ram_cache_end(); 3713 } 3714 3715 /** 3716 * ram_load_precopy: load pages in precopy case 3717 * 3718 * Returns 0 for success or -errno in case of error 3719 * 3720 * Called in precopy mode by ram_load(). 3721 * rcu_read_lock is taken prior to this being called. 3722 * 3723 * @f: QEMUFile where to send the data 3724 */ 3725 static int ram_load_precopy(QEMUFile *f) 3726 { 3727 int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0; 3728 /* ADVISE is earlier, it shows the source has the postcopy capability on */ 3729 bool postcopy_advised = postcopy_is_advised(); 3730 if (!migrate_use_compression()) { 3731 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE; 3732 } 3733 3734 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { 3735 ram_addr_t addr, total_ram_bytes; 3736 void *host = NULL, *host_bak = NULL; 3737 uint8_t ch; 3738 3739 /* 3740 * Yield periodically to let main loop run, but an iteration of 3741 * the main loop is expensive, so do it each some iterations 3742 */ 3743 if ((i & 32767) == 0 && qemu_in_coroutine()) { 3744 aio_co_schedule(qemu_get_current_aio_context(), 3745 qemu_coroutine_self()); 3746 qemu_coroutine_yield(); 3747 } 3748 i++; 3749 3750 addr = qemu_get_be64(f); 3751 flags = addr & ~TARGET_PAGE_MASK; 3752 addr &= TARGET_PAGE_MASK; 3753 3754 if (flags & invalid_flags) { 3755 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) { 3756 error_report("Received an unexpected compressed page"); 3757 } 3758 3759 ret = -EINVAL; 3760 break; 3761 } 3762 3763 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE | 3764 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) { 3765 RAMBlock *block = ram_block_from_stream(f, flags); 3766 3767 host = host_from_ram_block_offset(block, addr); 3768 /* 3769 * After going into COLO stage, we should not load the page 3770 * into SVM's memory directly, we put them into colo_cache firstly. 3771 * NOTE: We need to keep a copy of SVM's ram in colo_cache. 3772 * Previously, we copied all these memory in preparing stage of COLO 3773 * while we need to stop VM, which is a time-consuming process. 3774 * Here we optimize it by a trick, back-up every page while in 3775 * migration process while COLO is enabled, though it affects the 3776 * speed of the migration, but it obviously reduce the downtime of 3777 * back-up all SVM'S memory in COLO preparing stage. 3778 */ 3779 if (migration_incoming_colo_enabled()) { 3780 if (migration_incoming_in_colo_state()) { 3781 /* In COLO stage, put all pages into cache temporarily */ 3782 host = colo_cache_from_block_offset(block, addr, true); 3783 } else { 3784 /* 3785 * In migration stage but before COLO stage, 3786 * Put all pages into both cache and SVM's memory. 3787 */ 3788 host_bak = colo_cache_from_block_offset(block, addr, false); 3789 } 3790 } 3791 if (!host) { 3792 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); 3793 ret = -EINVAL; 3794 break; 3795 } 3796 if (!migration_incoming_in_colo_state()) { 3797 ramblock_recv_bitmap_set(block, host); 3798 } 3799 3800 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host); 3801 } 3802 3803 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { 3804 case RAM_SAVE_FLAG_MEM_SIZE: 3805 /* Synchronize RAM block list */ 3806 total_ram_bytes = addr; 3807 while (!ret && total_ram_bytes) { 3808 RAMBlock *block; 3809 char id[256]; 3810 ram_addr_t length; 3811 3812 len = qemu_get_byte(f); 3813 qemu_get_buffer(f, (uint8_t *)id, len); 3814 id[len] = 0; 3815 length = qemu_get_be64(f); 3816 3817 block = qemu_ram_block_by_name(id); 3818 if (block && !qemu_ram_is_migratable(block)) { 3819 error_report("block %s should not be migrated !", id); 3820 ret = -EINVAL; 3821 } else if (block) { 3822 if (length != block->used_length) { 3823 Error *local_err = NULL; 3824 3825 ret = qemu_ram_resize(block, length, 3826 &local_err); 3827 if (local_err) { 3828 error_report_err(local_err); 3829 } 3830 } 3831 /* For postcopy we need to check hugepage sizes match */ 3832 if (postcopy_advised && migrate_postcopy_ram() && 3833 block->page_size != qemu_host_page_size) { 3834 uint64_t remote_page_size = qemu_get_be64(f); 3835 if (remote_page_size != block->page_size) { 3836 error_report("Mismatched RAM page size %s " 3837 "(local) %zd != %" PRId64, 3838 id, block->page_size, 3839 remote_page_size); 3840 ret = -EINVAL; 3841 } 3842 } 3843 if (migrate_ignore_shared()) { 3844 hwaddr addr = qemu_get_be64(f); 3845 if (ramblock_is_ignored(block) && 3846 block->mr->addr != addr) { 3847 error_report("Mismatched GPAs for block %s " 3848 "%" PRId64 "!= %" PRId64, 3849 id, (uint64_t)addr, 3850 (uint64_t)block->mr->addr); 3851 ret = -EINVAL; 3852 } 3853 } 3854 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG, 3855 block->idstr); 3856 } else { 3857 error_report("Unknown ramblock \"%s\", cannot " 3858 "accept migration", id); 3859 ret = -EINVAL; 3860 } 3861 3862 total_ram_bytes -= length; 3863 } 3864 break; 3865 3866 case RAM_SAVE_FLAG_ZERO: 3867 ch = qemu_get_byte(f); 3868 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE); 3869 break; 3870 3871 case RAM_SAVE_FLAG_PAGE: 3872 qemu_get_buffer(f, host, TARGET_PAGE_SIZE); 3873 break; 3874 3875 case RAM_SAVE_FLAG_COMPRESS_PAGE: 3876 len = qemu_get_be32(f); 3877 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) { 3878 error_report("Invalid compressed data length: %d", len); 3879 ret = -EINVAL; 3880 break; 3881 } 3882 decompress_data_with_multi_threads(f, host, len); 3883 break; 3884 3885 case RAM_SAVE_FLAG_XBZRLE: 3886 if (load_xbzrle(f, addr, host) < 0) { 3887 error_report("Failed to decompress XBZRLE page at " 3888 RAM_ADDR_FMT, addr); 3889 ret = -EINVAL; 3890 break; 3891 } 3892 break; 3893 case RAM_SAVE_FLAG_EOS: 3894 /* normal exit */ 3895 multifd_recv_sync_main(); 3896 break; 3897 default: 3898 if (flags & RAM_SAVE_FLAG_HOOK) { 3899 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL); 3900 } else { 3901 error_report("Unknown combination of migration flags: 0x%x", 3902 flags); 3903 ret = -EINVAL; 3904 } 3905 } 3906 if (!ret) { 3907 ret = qemu_file_get_error(f); 3908 } 3909 if (!ret && host_bak) { 3910 memcpy(host_bak, host, TARGET_PAGE_SIZE); 3911 } 3912 } 3913 3914 ret |= wait_for_decompress_done(); 3915 return ret; 3916 } 3917 3918 static int ram_load(QEMUFile *f, void *opaque, int version_id) 3919 { 3920 int ret = 0; 3921 static uint64_t seq_iter; 3922 /* 3923 * If system is running in postcopy mode, page inserts to host memory must 3924 * be atomic 3925 */ 3926 bool postcopy_running = postcopy_is_running(); 3927 3928 seq_iter++; 3929 3930 if (version_id != 4) { 3931 return -EINVAL; 3932 } 3933 3934 /* 3935 * This RCU critical section can be very long running. 3936 * When RCU reclaims in the code start to become numerous, 3937 * it will be necessary to reduce the granularity of this 3938 * critical section. 3939 */ 3940 WITH_RCU_READ_LOCK_GUARD() { 3941 if (postcopy_running) { 3942 ret = ram_load_postcopy(f); 3943 } else { 3944 ret = ram_load_precopy(f); 3945 } 3946 } 3947 trace_ram_load_complete(ret, seq_iter); 3948 3949 return ret; 3950 } 3951 3952 static bool ram_has_postcopy(void *opaque) 3953 { 3954 RAMBlock *rb; 3955 RAMBLOCK_FOREACH_NOT_IGNORED(rb) { 3956 if (ramblock_is_pmem(rb)) { 3957 info_report("Block: %s, host: %p is a nvdimm memory, postcopy" 3958 "is not supported now!", rb->idstr, rb->host); 3959 return false; 3960 } 3961 } 3962 3963 return migrate_postcopy_ram(); 3964 } 3965 3966 /* Sync all the dirty bitmap with destination VM. */ 3967 static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs) 3968 { 3969 RAMBlock *block; 3970 QEMUFile *file = s->to_dst_file; 3971 int ramblock_count = 0; 3972 3973 trace_ram_dirty_bitmap_sync_start(); 3974 3975 RAMBLOCK_FOREACH_NOT_IGNORED(block) { 3976 qemu_savevm_send_recv_bitmap(file, block->idstr); 3977 trace_ram_dirty_bitmap_request(block->idstr); 3978 ramblock_count++; 3979 } 3980 3981 trace_ram_dirty_bitmap_sync_wait(); 3982 3983 /* Wait until all the ramblocks' dirty bitmap synced */ 3984 while (ramblock_count--) { 3985 qemu_sem_wait(&s->rp_state.rp_sem); 3986 } 3987 3988 trace_ram_dirty_bitmap_sync_complete(); 3989 3990 return 0; 3991 } 3992 3993 static void ram_dirty_bitmap_reload_notify(MigrationState *s) 3994 { 3995 qemu_sem_post(&s->rp_state.rp_sem); 3996 } 3997 3998 /* 3999 * Read the received bitmap, revert it as the initial dirty bitmap. 4000 * This is only used when the postcopy migration is paused but wants 4001 * to resume from a middle point. 4002 */ 4003 int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block) 4004 { 4005 int ret = -EINVAL; 4006 QEMUFile *file = s->rp_state.from_dst_file; 4007 unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS; 4008 uint64_t local_size = DIV_ROUND_UP(nbits, 8); 4009 uint64_t size, end_mark; 4010 4011 trace_ram_dirty_bitmap_reload_begin(block->idstr); 4012 4013 if (s->state != MIGRATION_STATUS_POSTCOPY_RECOVER) { 4014 error_report("%s: incorrect state %s", __func__, 4015 MigrationStatus_str(s->state)); 4016 return -EINVAL; 4017 } 4018 4019 /* 4020 * Note: see comments in ramblock_recv_bitmap_send() on why we 4021 * need the endianness conversion, and the paddings. 4022 */ 4023 local_size = ROUND_UP(local_size, 8); 4024 4025 /* Add paddings */ 4026 le_bitmap = bitmap_new(nbits + BITS_PER_LONG); 4027 4028 size = qemu_get_be64(file); 4029 4030 /* The size of the bitmap should match with our ramblock */ 4031 if (size != local_size) { 4032 error_report("%s: ramblock '%s' bitmap size mismatch " 4033 "(0x%"PRIx64" != 0x%"PRIx64")", __func__, 4034 block->idstr, size, local_size); 4035 ret = -EINVAL; 4036 goto out; 4037 } 4038 4039 size = qemu_get_buffer(file, (uint8_t *)le_bitmap, local_size); 4040 end_mark = qemu_get_be64(file); 4041 4042 ret = qemu_file_get_error(file); 4043 if (ret || size != local_size) { 4044 error_report("%s: read bitmap failed for ramblock '%s': %d" 4045 " (size 0x%"PRIx64", got: 0x%"PRIx64")", 4046 __func__, block->idstr, ret, local_size, size); 4047 ret = -EIO; 4048 goto out; 4049 } 4050 4051 if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) { 4052 error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIx64, 4053 __func__, block->idstr, end_mark); 4054 ret = -EINVAL; 4055 goto out; 4056 } 4057 4058 /* 4059 * Endianness conversion. We are during postcopy (though paused). 4060 * The dirty bitmap won't change. We can directly modify it. 4061 */ 4062 bitmap_from_le(block->bmap, le_bitmap, nbits); 4063 4064 /* 4065 * What we received is "received bitmap". Revert it as the initial 4066 * dirty bitmap for this ramblock. 4067 */ 4068 bitmap_complement(block->bmap, block->bmap, nbits); 4069 4070 trace_ram_dirty_bitmap_reload_complete(block->idstr); 4071 4072 /* 4073 * We succeeded to sync bitmap for current ramblock. If this is 4074 * the last one to sync, we need to notify the main send thread. 4075 */ 4076 ram_dirty_bitmap_reload_notify(s); 4077 4078 ret = 0; 4079 out: 4080 g_free(le_bitmap); 4081 return ret; 4082 } 4083 4084 static int ram_resume_prepare(MigrationState *s, void *opaque) 4085 { 4086 RAMState *rs = *(RAMState **)opaque; 4087 int ret; 4088 4089 ret = ram_dirty_bitmap_sync_all(s, rs); 4090 if (ret) { 4091 return ret; 4092 } 4093 4094 ram_state_resume_prepare(rs, s->to_dst_file); 4095 4096 return 0; 4097 } 4098 4099 static SaveVMHandlers savevm_ram_handlers = { 4100 .save_setup = ram_save_setup, 4101 .save_live_iterate = ram_save_iterate, 4102 .save_live_complete_postcopy = ram_save_complete, 4103 .save_live_complete_precopy = ram_save_complete, 4104 .has_postcopy = ram_has_postcopy, 4105 .save_live_pending = ram_save_pending, 4106 .load_state = ram_load, 4107 .save_cleanup = ram_save_cleanup, 4108 .load_setup = ram_load_setup, 4109 .load_cleanup = ram_load_cleanup, 4110 .resume_prepare = ram_resume_prepare, 4111 }; 4112 4113 static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host, 4114 size_t old_size, size_t new_size) 4115 { 4116 PostcopyState ps = postcopy_state_get(); 4117 ram_addr_t offset; 4118 RAMBlock *rb = qemu_ram_block_from_host(host, false, &offset); 4119 Error *err = NULL; 4120 4121 if (ramblock_is_ignored(rb)) { 4122 return; 4123 } 4124 4125 if (!migration_is_idle()) { 4126 /* 4127 * Precopy code on the source cannot deal with the size of RAM blocks 4128 * changing at random points in time - especially after sending the 4129 * RAM block sizes in the migration stream, they must no longer change. 4130 * Abort and indicate a proper reason. 4131 */ 4132 error_setg(&err, "RAM block '%s' resized during precopy.", rb->idstr); 4133 migrate_set_error(migrate_get_current(), err); 4134 error_free(err); 4135 migration_cancel(); 4136 } 4137 4138 switch (ps) { 4139 case POSTCOPY_INCOMING_ADVISE: 4140 /* 4141 * Update what ram_postcopy_incoming_init()->init_range() does at the 4142 * time postcopy was advised. Syncing RAM blocks with the source will 4143 * result in RAM resizes. 4144 */ 4145 if (old_size < new_size) { 4146 if (ram_discard_range(rb->idstr, old_size, new_size - old_size)) { 4147 error_report("RAM block '%s' discard of resized RAM failed", 4148 rb->idstr); 4149 } 4150 } 4151 rb->postcopy_length = new_size; 4152 break; 4153 case POSTCOPY_INCOMING_NONE: 4154 case POSTCOPY_INCOMING_RUNNING: 4155 case POSTCOPY_INCOMING_END: 4156 /* 4157 * Once our guest is running, postcopy does no longer care about 4158 * resizes. When growing, the new memory was not available on the 4159 * source, no handler needed. 4160 */ 4161 break; 4162 default: 4163 error_report("RAM block '%s' resized during postcopy state: %d", 4164 rb->idstr, ps); 4165 exit(-1); 4166 } 4167 } 4168 4169 static RAMBlockNotifier ram_mig_ram_notifier = { 4170 .ram_block_resized = ram_mig_ram_block_resized, 4171 }; 4172 4173 void ram_mig_init(void) 4174 { 4175 qemu_mutex_init(&XBZRLE.lock); 4176 register_savevm_live("ram", 0, 4, &savevm_ram_handlers, &ram_state); 4177 ram_block_notifier_add(&ram_mig_ram_notifier); 4178 } 4179