1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * Copyright (c) 2011-2015 Red Hat Inc 6 * 7 * Authors: 8 * Juan Quintela <quintela@redhat.com> 9 * 10 * Permission is hereby granted, free of charge, to any person obtaining a copy 11 * of this software and associated documentation files (the "Software"), to deal 12 * in the Software without restriction, including without limitation the rights 13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 * copies of the Software, and to permit persons to whom the Software is 15 * furnished to do so, subject to the following conditions: 16 * 17 * The above copyright notice and this permission notice shall be included in 18 * all copies or substantial portions of the Software. 19 * 20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 26 * THE SOFTWARE. 27 */ 28 29 #include "qemu/osdep.h" 30 #include "cpu.h" 31 #include <zlib.h> 32 #include "qemu/cutils.h" 33 #include "qemu/bitops.h" 34 #include "qemu/bitmap.h" 35 #include "qemu/main-loop.h" 36 #include "xbzrle.h" 37 #include "ram.h" 38 #include "migration.h" 39 #include "migration/register.h" 40 #include "migration/misc.h" 41 #include "qemu-file.h" 42 #include "postcopy-ram.h" 43 #include "migration/page_cache.h" 44 #include "qemu/error-report.h" 45 #include "qapi/error.h" 46 #include "qapi/qapi-events-migration.h" 47 #include "qapi/qmp/qerror.h" 48 #include "trace.h" 49 #include "exec/ram_addr.h" 50 #include "exec/target_page.h" 51 #include "qemu/rcu_queue.h" 52 #include "migration/colo.h" 53 #include "migration/block.h" 54 55 /***********************************************************/ 56 /* ram save/restore */ 57 58 /* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it 59 * worked for pages that where filled with the same char. We switched 60 * it to only search for the zero value. And to avoid confusion with 61 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it. 62 */ 63 64 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */ 65 #define RAM_SAVE_FLAG_ZERO 0x02 66 #define RAM_SAVE_FLAG_MEM_SIZE 0x04 67 #define RAM_SAVE_FLAG_PAGE 0x08 68 #define RAM_SAVE_FLAG_EOS 0x10 69 #define RAM_SAVE_FLAG_CONTINUE 0x20 70 #define RAM_SAVE_FLAG_XBZRLE 0x40 71 /* 0x80 is reserved in migration.h start with 0x100 next */ 72 #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100 73 74 static inline bool is_zero_range(uint8_t *p, uint64_t size) 75 { 76 return buffer_is_zero(p, size); 77 } 78 79 XBZRLECacheStats xbzrle_counters; 80 81 /* struct contains XBZRLE cache and a static page 82 used by the compression */ 83 static struct { 84 /* buffer used for XBZRLE encoding */ 85 uint8_t *encoded_buf; 86 /* buffer for storing page content */ 87 uint8_t *current_buf; 88 /* Cache for XBZRLE, Protected by lock. */ 89 PageCache *cache; 90 QemuMutex lock; 91 /* it will store a page full of zeros */ 92 uint8_t *zero_target_page; 93 /* buffer used for XBZRLE decoding */ 94 uint8_t *decoded_buf; 95 } XBZRLE; 96 97 static void XBZRLE_cache_lock(void) 98 { 99 if (migrate_use_xbzrle()) 100 qemu_mutex_lock(&XBZRLE.lock); 101 } 102 103 static void XBZRLE_cache_unlock(void) 104 { 105 if (migrate_use_xbzrle()) 106 qemu_mutex_unlock(&XBZRLE.lock); 107 } 108 109 /** 110 * xbzrle_cache_resize: resize the xbzrle cache 111 * 112 * This function is called from qmp_migrate_set_cache_size in main 113 * thread, possibly while a migration is in progress. A running 114 * migration may be using the cache and might finish during this call, 115 * hence changes to the cache are protected by XBZRLE.lock(). 116 * 117 * Returns 0 for success or -1 for error 118 * 119 * @new_size: new cache size 120 * @errp: set *errp if the check failed, with reason 121 */ 122 int xbzrle_cache_resize(int64_t new_size, Error **errp) 123 { 124 PageCache *new_cache; 125 int64_t ret = 0; 126 127 /* Check for truncation */ 128 if (new_size != (size_t)new_size) { 129 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", 130 "exceeding address space"); 131 return -1; 132 } 133 134 if (new_size == migrate_xbzrle_cache_size()) { 135 /* nothing to do */ 136 return 0; 137 } 138 139 XBZRLE_cache_lock(); 140 141 if (XBZRLE.cache != NULL) { 142 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp); 143 if (!new_cache) { 144 ret = -1; 145 goto out; 146 } 147 148 cache_fini(XBZRLE.cache); 149 XBZRLE.cache = new_cache; 150 } 151 out: 152 XBZRLE_cache_unlock(); 153 return ret; 154 } 155 156 static void ramblock_recv_map_init(void) 157 { 158 RAMBlock *rb; 159 160 RAMBLOCK_FOREACH(rb) { 161 assert(!rb->receivedmap); 162 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits()); 163 } 164 } 165 166 int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr) 167 { 168 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb), 169 rb->receivedmap); 170 } 171 172 bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset) 173 { 174 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap); 175 } 176 177 void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr) 178 { 179 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap); 180 } 181 182 void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, 183 size_t nr) 184 { 185 bitmap_set_atomic(rb->receivedmap, 186 ramblock_recv_bitmap_offset(host_addr, rb), 187 nr); 188 } 189 190 /* 191 * An outstanding page request, on the source, having been received 192 * and queued 193 */ 194 struct RAMSrcPageRequest { 195 RAMBlock *rb; 196 hwaddr offset; 197 hwaddr len; 198 199 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req; 200 }; 201 202 /* State of RAM for migration */ 203 struct RAMState { 204 /* QEMUFile used for this migration */ 205 QEMUFile *f; 206 /* Last block that we have visited searching for dirty pages */ 207 RAMBlock *last_seen_block; 208 /* Last block from where we have sent data */ 209 RAMBlock *last_sent_block; 210 /* Last dirty target page we have sent */ 211 ram_addr_t last_page; 212 /* last ram version we have seen */ 213 uint32_t last_version; 214 /* We are in the first round */ 215 bool ram_bulk_stage; 216 /* How many times we have dirty too many pages */ 217 int dirty_rate_high_cnt; 218 /* these variables are used for bitmap sync */ 219 /* last time we did a full bitmap_sync */ 220 int64_t time_last_bitmap_sync; 221 /* bytes transferred at start_time */ 222 uint64_t bytes_xfer_prev; 223 /* number of dirty pages since start_time */ 224 uint64_t num_dirty_pages_period; 225 /* xbzrle misses since the beginning of the period */ 226 uint64_t xbzrle_cache_miss_prev; 227 /* number of iterations at the beginning of period */ 228 uint64_t iterations_prev; 229 /* Iterations since start */ 230 uint64_t iterations; 231 /* number of dirty bits in the bitmap */ 232 uint64_t migration_dirty_pages; 233 /* protects modification of the bitmap */ 234 QemuMutex bitmap_mutex; 235 /* The RAMBlock used in the last src_page_requests */ 236 RAMBlock *last_req_rb; 237 /* Queue of outstanding page requests from the destination */ 238 QemuMutex src_page_req_mutex; 239 QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests; 240 }; 241 typedef struct RAMState RAMState; 242 243 static RAMState *ram_state; 244 245 uint64_t ram_bytes_remaining(void) 246 { 247 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) : 248 0; 249 } 250 251 MigrationStats ram_counters; 252 253 /* used by the search for pages to send */ 254 struct PageSearchStatus { 255 /* Current block being searched */ 256 RAMBlock *block; 257 /* Current page to search from */ 258 unsigned long page; 259 /* Set once we wrap around */ 260 bool complete_round; 261 }; 262 typedef struct PageSearchStatus PageSearchStatus; 263 264 struct CompressParam { 265 bool done; 266 bool quit; 267 QEMUFile *file; 268 QemuMutex mutex; 269 QemuCond cond; 270 RAMBlock *block; 271 ram_addr_t offset; 272 273 /* internally used fields */ 274 z_stream stream; 275 uint8_t *originbuf; 276 }; 277 typedef struct CompressParam CompressParam; 278 279 struct DecompressParam { 280 bool done; 281 bool quit; 282 QemuMutex mutex; 283 QemuCond cond; 284 void *des; 285 uint8_t *compbuf; 286 int len; 287 z_stream stream; 288 }; 289 typedef struct DecompressParam DecompressParam; 290 291 static CompressParam *comp_param; 292 static QemuThread *compress_threads; 293 /* comp_done_cond is used to wake up the migration thread when 294 * one of the compression threads has finished the compression. 295 * comp_done_lock is used to co-work with comp_done_cond. 296 */ 297 static QemuMutex comp_done_lock; 298 static QemuCond comp_done_cond; 299 /* The empty QEMUFileOps will be used by file in CompressParam */ 300 static const QEMUFileOps empty_ops = { }; 301 302 static QEMUFile *decomp_file; 303 static DecompressParam *decomp_param; 304 static QemuThread *decompress_threads; 305 static QemuMutex decomp_done_lock; 306 static QemuCond decomp_done_cond; 307 308 static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block, 309 ram_addr_t offset, uint8_t *source_buf); 310 311 static void *do_data_compress(void *opaque) 312 { 313 CompressParam *param = opaque; 314 RAMBlock *block; 315 ram_addr_t offset; 316 317 qemu_mutex_lock(¶m->mutex); 318 while (!param->quit) { 319 if (param->block) { 320 block = param->block; 321 offset = param->offset; 322 param->block = NULL; 323 qemu_mutex_unlock(¶m->mutex); 324 325 do_compress_ram_page(param->file, ¶m->stream, block, offset, 326 param->originbuf); 327 328 qemu_mutex_lock(&comp_done_lock); 329 param->done = true; 330 qemu_cond_signal(&comp_done_cond); 331 qemu_mutex_unlock(&comp_done_lock); 332 333 qemu_mutex_lock(¶m->mutex); 334 } else { 335 qemu_cond_wait(¶m->cond, ¶m->mutex); 336 } 337 } 338 qemu_mutex_unlock(¶m->mutex); 339 340 return NULL; 341 } 342 343 static inline void terminate_compression_threads(void) 344 { 345 int idx, thread_count; 346 347 thread_count = migrate_compress_threads(); 348 349 for (idx = 0; idx < thread_count; idx++) { 350 qemu_mutex_lock(&comp_param[idx].mutex); 351 comp_param[idx].quit = true; 352 qemu_cond_signal(&comp_param[idx].cond); 353 qemu_mutex_unlock(&comp_param[idx].mutex); 354 } 355 } 356 357 static void compress_threads_save_cleanup(void) 358 { 359 int i, thread_count; 360 361 if (!migrate_use_compression()) { 362 return; 363 } 364 terminate_compression_threads(); 365 thread_count = migrate_compress_threads(); 366 for (i = 0; i < thread_count; i++) { 367 /* 368 * we use it as a indicator which shows if the thread is 369 * properly init'd or not 370 */ 371 if (!comp_param[i].file) { 372 break; 373 } 374 qemu_thread_join(compress_threads + i); 375 qemu_mutex_destroy(&comp_param[i].mutex); 376 qemu_cond_destroy(&comp_param[i].cond); 377 deflateEnd(&comp_param[i].stream); 378 g_free(comp_param[i].originbuf); 379 qemu_fclose(comp_param[i].file); 380 comp_param[i].file = NULL; 381 } 382 qemu_mutex_destroy(&comp_done_lock); 383 qemu_cond_destroy(&comp_done_cond); 384 g_free(compress_threads); 385 g_free(comp_param); 386 compress_threads = NULL; 387 comp_param = NULL; 388 } 389 390 static int compress_threads_save_setup(void) 391 { 392 int i, thread_count; 393 394 if (!migrate_use_compression()) { 395 return 0; 396 } 397 thread_count = migrate_compress_threads(); 398 compress_threads = g_new0(QemuThread, thread_count); 399 comp_param = g_new0(CompressParam, thread_count); 400 qemu_cond_init(&comp_done_cond); 401 qemu_mutex_init(&comp_done_lock); 402 for (i = 0; i < thread_count; i++) { 403 comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE); 404 if (!comp_param[i].originbuf) { 405 goto exit; 406 } 407 408 if (deflateInit(&comp_param[i].stream, 409 migrate_compress_level()) != Z_OK) { 410 g_free(comp_param[i].originbuf); 411 goto exit; 412 } 413 414 /* comp_param[i].file is just used as a dummy buffer to save data, 415 * set its ops to empty. 416 */ 417 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops); 418 comp_param[i].done = true; 419 comp_param[i].quit = false; 420 qemu_mutex_init(&comp_param[i].mutex); 421 qemu_cond_init(&comp_param[i].cond); 422 qemu_thread_create(compress_threads + i, "compress", 423 do_data_compress, comp_param + i, 424 QEMU_THREAD_JOINABLE); 425 } 426 return 0; 427 428 exit: 429 compress_threads_save_cleanup(); 430 return -1; 431 } 432 433 /* Multiple fd's */ 434 435 struct MultiFDSendParams { 436 uint8_t id; 437 char *name; 438 QemuThread thread; 439 QemuSemaphore sem; 440 QemuMutex mutex; 441 bool quit; 442 }; 443 typedef struct MultiFDSendParams MultiFDSendParams; 444 445 struct { 446 MultiFDSendParams *params; 447 /* number of created threads */ 448 int count; 449 } *multifd_send_state; 450 451 static void terminate_multifd_send_threads(Error *errp) 452 { 453 int i; 454 455 for (i = 0; i < multifd_send_state->count; i++) { 456 MultiFDSendParams *p = &multifd_send_state->params[i]; 457 458 qemu_mutex_lock(&p->mutex); 459 p->quit = true; 460 qemu_sem_post(&p->sem); 461 qemu_mutex_unlock(&p->mutex); 462 } 463 } 464 465 int multifd_save_cleanup(Error **errp) 466 { 467 int i; 468 int ret = 0; 469 470 if (!migrate_use_multifd()) { 471 return 0; 472 } 473 terminate_multifd_send_threads(NULL); 474 for (i = 0; i < multifd_send_state->count; i++) { 475 MultiFDSendParams *p = &multifd_send_state->params[i]; 476 477 qemu_thread_join(&p->thread); 478 qemu_mutex_destroy(&p->mutex); 479 qemu_sem_destroy(&p->sem); 480 g_free(p->name); 481 p->name = NULL; 482 } 483 g_free(multifd_send_state->params); 484 multifd_send_state->params = NULL; 485 g_free(multifd_send_state); 486 multifd_send_state = NULL; 487 return ret; 488 } 489 490 static void *multifd_send_thread(void *opaque) 491 { 492 MultiFDSendParams *p = opaque; 493 494 while (true) { 495 qemu_mutex_lock(&p->mutex); 496 if (p->quit) { 497 qemu_mutex_unlock(&p->mutex); 498 break; 499 } 500 qemu_mutex_unlock(&p->mutex); 501 qemu_sem_wait(&p->sem); 502 } 503 504 return NULL; 505 } 506 507 int multifd_save_setup(void) 508 { 509 int thread_count; 510 uint8_t i; 511 512 if (!migrate_use_multifd()) { 513 return 0; 514 } 515 thread_count = migrate_multifd_channels(); 516 multifd_send_state = g_malloc0(sizeof(*multifd_send_state)); 517 multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); 518 multifd_send_state->count = 0; 519 for (i = 0; i < thread_count; i++) { 520 MultiFDSendParams *p = &multifd_send_state->params[i]; 521 522 qemu_mutex_init(&p->mutex); 523 qemu_sem_init(&p->sem, 0); 524 p->quit = false; 525 p->id = i; 526 p->name = g_strdup_printf("multifdsend_%d", i); 527 qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, 528 QEMU_THREAD_JOINABLE); 529 530 multifd_send_state->count++; 531 } 532 return 0; 533 } 534 535 struct MultiFDRecvParams { 536 uint8_t id; 537 char *name; 538 QemuThread thread; 539 QemuSemaphore sem; 540 QemuMutex mutex; 541 bool quit; 542 }; 543 typedef struct MultiFDRecvParams MultiFDRecvParams; 544 545 struct { 546 MultiFDRecvParams *params; 547 /* number of created threads */ 548 int count; 549 } *multifd_recv_state; 550 551 static void terminate_multifd_recv_threads(Error *errp) 552 { 553 int i; 554 555 for (i = 0; i < multifd_recv_state->count; i++) { 556 MultiFDRecvParams *p = &multifd_recv_state->params[i]; 557 558 qemu_mutex_lock(&p->mutex); 559 p->quit = true; 560 qemu_sem_post(&p->sem); 561 qemu_mutex_unlock(&p->mutex); 562 } 563 } 564 565 int multifd_load_cleanup(Error **errp) 566 { 567 int i; 568 int ret = 0; 569 570 if (!migrate_use_multifd()) { 571 return 0; 572 } 573 terminate_multifd_recv_threads(NULL); 574 for (i = 0; i < multifd_recv_state->count; i++) { 575 MultiFDRecvParams *p = &multifd_recv_state->params[i]; 576 577 qemu_thread_join(&p->thread); 578 qemu_mutex_destroy(&p->mutex); 579 qemu_sem_destroy(&p->sem); 580 g_free(p->name); 581 p->name = NULL; 582 } 583 g_free(multifd_recv_state->params); 584 multifd_recv_state->params = NULL; 585 g_free(multifd_recv_state); 586 multifd_recv_state = NULL; 587 588 return ret; 589 } 590 591 static void *multifd_recv_thread(void *opaque) 592 { 593 MultiFDRecvParams *p = opaque; 594 595 while (true) { 596 qemu_mutex_lock(&p->mutex); 597 if (p->quit) { 598 qemu_mutex_unlock(&p->mutex); 599 break; 600 } 601 qemu_mutex_unlock(&p->mutex); 602 qemu_sem_wait(&p->sem); 603 } 604 605 return NULL; 606 } 607 608 int multifd_load_setup(void) 609 { 610 int thread_count; 611 uint8_t i; 612 613 if (!migrate_use_multifd()) { 614 return 0; 615 } 616 thread_count = migrate_multifd_channels(); 617 multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state)); 618 multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count); 619 multifd_recv_state->count = 0; 620 for (i = 0; i < thread_count; i++) { 621 MultiFDRecvParams *p = &multifd_recv_state->params[i]; 622 623 qemu_mutex_init(&p->mutex); 624 qemu_sem_init(&p->sem, 0); 625 p->quit = false; 626 p->id = i; 627 p->name = g_strdup_printf("multifdrecv_%d", i); 628 qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p, 629 QEMU_THREAD_JOINABLE); 630 multifd_recv_state->count++; 631 } 632 return 0; 633 } 634 635 /** 636 * save_page_header: write page header to wire 637 * 638 * If this is the 1st block, it also writes the block identification 639 * 640 * Returns the number of bytes written 641 * 642 * @f: QEMUFile where to send the data 643 * @block: block that contains the page we want to send 644 * @offset: offset inside the block for the page 645 * in the lower bits, it contains flags 646 */ 647 static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block, 648 ram_addr_t offset) 649 { 650 size_t size, len; 651 652 if (block == rs->last_sent_block) { 653 offset |= RAM_SAVE_FLAG_CONTINUE; 654 } 655 qemu_put_be64(f, offset); 656 size = 8; 657 658 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) { 659 len = strlen(block->idstr); 660 qemu_put_byte(f, len); 661 qemu_put_buffer(f, (uint8_t *)block->idstr, len); 662 size += 1 + len; 663 rs->last_sent_block = block; 664 } 665 return size; 666 } 667 668 /** 669 * mig_throttle_guest_down: throotle down the guest 670 * 671 * Reduce amount of guest cpu execution to hopefully slow down memory 672 * writes. If guest dirty memory rate is reduced below the rate at 673 * which we can transfer pages to the destination then we should be 674 * able to complete migration. Some workloads dirty memory way too 675 * fast and will not effectively converge, even with auto-converge. 676 */ 677 static void mig_throttle_guest_down(void) 678 { 679 MigrationState *s = migrate_get_current(); 680 uint64_t pct_initial = s->parameters.cpu_throttle_initial; 681 uint64_t pct_icrement = s->parameters.cpu_throttle_increment; 682 683 /* We have not started throttling yet. Let's start it. */ 684 if (!cpu_throttle_active()) { 685 cpu_throttle_set(pct_initial); 686 } else { 687 /* Throttling already on, just increase the rate */ 688 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement); 689 } 690 } 691 692 /** 693 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache 694 * 695 * @rs: current RAM state 696 * @current_addr: address for the zero page 697 * 698 * Update the xbzrle cache to reflect a page that's been sent as all 0. 699 * The important thing is that a stale (not-yet-0'd) page be replaced 700 * by the new data. 701 * As a bonus, if the page wasn't in the cache it gets added so that 702 * when a small write is made into the 0'd page it gets XBZRLE sent. 703 */ 704 static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr) 705 { 706 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) { 707 return; 708 } 709 710 /* We don't care if this fails to allocate a new cache page 711 * as long as it updated an old one */ 712 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page, 713 ram_counters.dirty_sync_count); 714 } 715 716 #define ENCODING_FLAG_XBZRLE 0x1 717 718 /** 719 * save_xbzrle_page: compress and send current page 720 * 721 * Returns: 1 means that we wrote the page 722 * 0 means that page is identical to the one already sent 723 * -1 means that xbzrle would be longer than normal 724 * 725 * @rs: current RAM state 726 * @current_data: pointer to the address of the page contents 727 * @current_addr: addr of the page 728 * @block: block that contains the page we want to send 729 * @offset: offset inside the block for the page 730 * @last_stage: if we are at the completion stage 731 */ 732 static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, 733 ram_addr_t current_addr, RAMBlock *block, 734 ram_addr_t offset, bool last_stage) 735 { 736 int encoded_len = 0, bytes_xbzrle; 737 uint8_t *prev_cached_page; 738 739 if (!cache_is_cached(XBZRLE.cache, current_addr, 740 ram_counters.dirty_sync_count)) { 741 xbzrle_counters.cache_miss++; 742 if (!last_stage) { 743 if (cache_insert(XBZRLE.cache, current_addr, *current_data, 744 ram_counters.dirty_sync_count) == -1) { 745 return -1; 746 } else { 747 /* update *current_data when the page has been 748 inserted into cache */ 749 *current_data = get_cached_data(XBZRLE.cache, current_addr); 750 } 751 } 752 return -1; 753 } 754 755 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); 756 757 /* save current buffer into memory */ 758 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE); 759 760 /* XBZRLE encoding (if there is no overflow) */ 761 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, 762 TARGET_PAGE_SIZE, XBZRLE.encoded_buf, 763 TARGET_PAGE_SIZE); 764 if (encoded_len == 0) { 765 trace_save_xbzrle_page_skipping(); 766 return 0; 767 } else if (encoded_len == -1) { 768 trace_save_xbzrle_page_overflow(); 769 xbzrle_counters.overflow++; 770 /* update data in the cache */ 771 if (!last_stage) { 772 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE); 773 *current_data = prev_cached_page; 774 } 775 return -1; 776 } 777 778 /* we need to update the data in the cache, in order to get the same data */ 779 if (!last_stage) { 780 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); 781 } 782 783 /* Send XBZRLE based compressed page */ 784 bytes_xbzrle = save_page_header(rs, rs->f, block, 785 offset | RAM_SAVE_FLAG_XBZRLE); 786 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE); 787 qemu_put_be16(rs->f, encoded_len); 788 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len); 789 bytes_xbzrle += encoded_len + 1 + 2; 790 xbzrle_counters.pages++; 791 xbzrle_counters.bytes += bytes_xbzrle; 792 ram_counters.transferred += bytes_xbzrle; 793 794 return 1; 795 } 796 797 /** 798 * migration_bitmap_find_dirty: find the next dirty page from start 799 * 800 * Called with rcu_read_lock() to protect migration_bitmap 801 * 802 * Returns the byte offset within memory region of the start of a dirty page 803 * 804 * @rs: current RAM state 805 * @rb: RAMBlock where to search for dirty pages 806 * @start: page where we start the search 807 */ 808 static inline 809 unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb, 810 unsigned long start) 811 { 812 unsigned long size = rb->used_length >> TARGET_PAGE_BITS; 813 unsigned long *bitmap = rb->bmap; 814 unsigned long next; 815 816 if (rs->ram_bulk_stage && start > 0) { 817 next = start + 1; 818 } else { 819 next = find_next_bit(bitmap, size, start); 820 } 821 822 return next; 823 } 824 825 static inline bool migration_bitmap_clear_dirty(RAMState *rs, 826 RAMBlock *rb, 827 unsigned long page) 828 { 829 bool ret; 830 831 ret = test_and_clear_bit(page, rb->bmap); 832 833 if (ret) { 834 rs->migration_dirty_pages--; 835 } 836 return ret; 837 } 838 839 static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb, 840 ram_addr_t start, ram_addr_t length) 841 { 842 rs->migration_dirty_pages += 843 cpu_physical_memory_sync_dirty_bitmap(rb, start, length, 844 &rs->num_dirty_pages_period); 845 } 846 847 /** 848 * ram_pagesize_summary: calculate all the pagesizes of a VM 849 * 850 * Returns a summary bitmap of the page sizes of all RAMBlocks 851 * 852 * For VMs with just normal pages this is equivalent to the host page 853 * size. If it's got some huge pages then it's the OR of all the 854 * different page sizes. 855 */ 856 uint64_t ram_pagesize_summary(void) 857 { 858 RAMBlock *block; 859 uint64_t summary = 0; 860 861 RAMBLOCK_FOREACH(block) { 862 summary |= block->page_size; 863 } 864 865 return summary; 866 } 867 868 static void migration_bitmap_sync(RAMState *rs) 869 { 870 RAMBlock *block; 871 int64_t end_time; 872 uint64_t bytes_xfer_now; 873 874 ram_counters.dirty_sync_count++; 875 876 if (!rs->time_last_bitmap_sync) { 877 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 878 } 879 880 trace_migration_bitmap_sync_start(); 881 memory_global_dirty_log_sync(); 882 883 qemu_mutex_lock(&rs->bitmap_mutex); 884 rcu_read_lock(); 885 RAMBLOCK_FOREACH(block) { 886 migration_bitmap_sync_range(rs, block, 0, block->used_length); 887 } 888 rcu_read_unlock(); 889 qemu_mutex_unlock(&rs->bitmap_mutex); 890 891 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period); 892 893 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 894 895 /* more than 1 second = 1000 millisecons */ 896 if (end_time > rs->time_last_bitmap_sync + 1000) { 897 /* calculate period counters */ 898 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000 899 / (end_time - rs->time_last_bitmap_sync); 900 bytes_xfer_now = ram_counters.transferred; 901 902 /* During block migration the auto-converge logic incorrectly detects 903 * that ram migration makes no progress. Avoid this by disabling the 904 * throttling logic during the bulk phase of block migration. */ 905 if (migrate_auto_converge() && !blk_mig_bulk_active()) { 906 /* The following detection logic can be refined later. For now: 907 Check to see if the dirtied bytes is 50% more than the approx. 908 amount of bytes that just got transferred since the last time we 909 were in this routine. If that happens twice, start or increase 910 throttling */ 911 912 if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE > 913 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) && 914 (++rs->dirty_rate_high_cnt >= 2)) { 915 trace_migration_throttle(); 916 rs->dirty_rate_high_cnt = 0; 917 mig_throttle_guest_down(); 918 } 919 } 920 921 if (migrate_use_xbzrle()) { 922 if (rs->iterations_prev != rs->iterations) { 923 xbzrle_counters.cache_miss_rate = 924 (double)(xbzrle_counters.cache_miss - 925 rs->xbzrle_cache_miss_prev) / 926 (rs->iterations - rs->iterations_prev); 927 } 928 rs->iterations_prev = rs->iterations; 929 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss; 930 } 931 932 /* reset period counters */ 933 rs->time_last_bitmap_sync = end_time; 934 rs->num_dirty_pages_period = 0; 935 rs->bytes_xfer_prev = bytes_xfer_now; 936 } 937 if (migrate_use_events()) { 938 qapi_event_send_migration_pass(ram_counters.dirty_sync_count, NULL); 939 } 940 } 941 942 /** 943 * save_zero_page: send the zero page to the stream 944 * 945 * Returns the number of pages written. 946 * 947 * @rs: current RAM state 948 * @block: block that contains the page we want to send 949 * @offset: offset inside the block for the page 950 */ 951 static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset) 952 { 953 uint8_t *p = block->host + offset; 954 int pages = -1; 955 956 if (is_zero_range(p, TARGET_PAGE_SIZE)) { 957 ram_counters.duplicate++; 958 ram_counters.transferred += 959 save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_ZERO); 960 qemu_put_byte(rs->f, 0); 961 ram_counters.transferred += 1; 962 pages = 1; 963 } 964 965 return pages; 966 } 967 968 static void ram_release_pages(const char *rbname, uint64_t offset, int pages) 969 { 970 if (!migrate_release_ram() || !migration_in_postcopy()) { 971 return; 972 } 973 974 ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS); 975 } 976 977 /* 978 * @pages: the number of pages written by the control path, 979 * < 0 - error 980 * > 0 - number of pages written 981 * 982 * Return true if the pages has been saved, otherwise false is returned. 983 */ 984 static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, 985 int *pages) 986 { 987 uint64_t bytes_xmit = 0; 988 int ret; 989 990 *pages = -1; 991 ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE, 992 &bytes_xmit); 993 if (ret == RAM_SAVE_CONTROL_NOT_SUPP) { 994 return false; 995 } 996 997 if (bytes_xmit) { 998 ram_counters.transferred += bytes_xmit; 999 *pages = 1; 1000 } 1001 1002 if (ret == RAM_SAVE_CONTROL_DELAYED) { 1003 return true; 1004 } 1005 1006 if (bytes_xmit > 0) { 1007 ram_counters.normal++; 1008 } else if (bytes_xmit == 0) { 1009 ram_counters.duplicate++; 1010 } 1011 1012 return true; 1013 } 1014 1015 /* 1016 * directly send the page to the stream 1017 * 1018 * Returns the number of pages written. 1019 * 1020 * @rs: current RAM state 1021 * @block: block that contains the page we want to send 1022 * @offset: offset inside the block for the page 1023 * @buf: the page to be sent 1024 * @async: send to page asyncly 1025 */ 1026 static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, 1027 uint8_t *buf, bool async) 1028 { 1029 ram_counters.transferred += save_page_header(rs, rs->f, block, 1030 offset | RAM_SAVE_FLAG_PAGE); 1031 if (async) { 1032 qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE, 1033 migrate_release_ram() & 1034 migration_in_postcopy()); 1035 } else { 1036 qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE); 1037 } 1038 ram_counters.transferred += TARGET_PAGE_SIZE; 1039 ram_counters.normal++; 1040 return 1; 1041 } 1042 1043 /** 1044 * ram_save_page: send the given page to the stream 1045 * 1046 * Returns the number of pages written. 1047 * < 0 - error 1048 * >=0 - Number of pages written - this might legally be 0 1049 * if xbzrle noticed the page was the same. 1050 * 1051 * @rs: current RAM state 1052 * @block: block that contains the page we want to send 1053 * @offset: offset inside the block for the page 1054 * @last_stage: if we are at the completion stage 1055 */ 1056 static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) 1057 { 1058 int pages = -1; 1059 uint8_t *p; 1060 bool send_async = true; 1061 RAMBlock *block = pss->block; 1062 ram_addr_t offset = pss->page << TARGET_PAGE_BITS; 1063 ram_addr_t current_addr = block->offset + offset; 1064 1065 p = block->host + offset; 1066 trace_ram_save_page(block->idstr, (uint64_t)offset, p); 1067 1068 XBZRLE_cache_lock(); 1069 if (!rs->ram_bulk_stage && !migration_in_postcopy() && 1070 migrate_use_xbzrle()) { 1071 pages = save_xbzrle_page(rs, &p, current_addr, block, 1072 offset, last_stage); 1073 if (!last_stage) { 1074 /* Can't send this cached data async, since the cache page 1075 * might get updated before it gets to the wire 1076 */ 1077 send_async = false; 1078 } 1079 } 1080 1081 /* XBZRLE overflow or normal page */ 1082 if (pages == -1) { 1083 pages = save_normal_page(rs, block, offset, p, send_async); 1084 } 1085 1086 XBZRLE_cache_unlock(); 1087 1088 return pages; 1089 } 1090 1091 static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block, 1092 ram_addr_t offset, uint8_t *source_buf) 1093 { 1094 RAMState *rs = ram_state; 1095 int bytes_sent, blen; 1096 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK); 1097 1098 bytes_sent = save_page_header(rs, f, block, offset | 1099 RAM_SAVE_FLAG_COMPRESS_PAGE); 1100 1101 /* 1102 * copy it to a internal buffer to avoid it being modified by VM 1103 * so that we can catch up the error during compression and 1104 * decompression 1105 */ 1106 memcpy(source_buf, p, TARGET_PAGE_SIZE); 1107 blen = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE); 1108 if (blen < 0) { 1109 bytes_sent = 0; 1110 qemu_file_set_error(migrate_get_current()->to_dst_file, blen); 1111 error_report("compressed data failed!"); 1112 } else { 1113 bytes_sent += blen; 1114 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1); 1115 } 1116 1117 return bytes_sent; 1118 } 1119 1120 static void flush_compressed_data(RAMState *rs) 1121 { 1122 int idx, len, thread_count; 1123 1124 if (!migrate_use_compression()) { 1125 return; 1126 } 1127 thread_count = migrate_compress_threads(); 1128 1129 qemu_mutex_lock(&comp_done_lock); 1130 for (idx = 0; idx < thread_count; idx++) { 1131 while (!comp_param[idx].done) { 1132 qemu_cond_wait(&comp_done_cond, &comp_done_lock); 1133 } 1134 } 1135 qemu_mutex_unlock(&comp_done_lock); 1136 1137 for (idx = 0; idx < thread_count; idx++) { 1138 qemu_mutex_lock(&comp_param[idx].mutex); 1139 if (!comp_param[idx].quit) { 1140 len = qemu_put_qemu_file(rs->f, comp_param[idx].file); 1141 ram_counters.transferred += len; 1142 } 1143 qemu_mutex_unlock(&comp_param[idx].mutex); 1144 } 1145 } 1146 1147 static inline void set_compress_params(CompressParam *param, RAMBlock *block, 1148 ram_addr_t offset) 1149 { 1150 param->block = block; 1151 param->offset = offset; 1152 } 1153 1154 static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block, 1155 ram_addr_t offset) 1156 { 1157 int idx, thread_count, bytes_xmit = -1, pages = -1; 1158 1159 thread_count = migrate_compress_threads(); 1160 qemu_mutex_lock(&comp_done_lock); 1161 while (true) { 1162 for (idx = 0; idx < thread_count; idx++) { 1163 if (comp_param[idx].done) { 1164 comp_param[idx].done = false; 1165 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file); 1166 qemu_mutex_lock(&comp_param[idx].mutex); 1167 set_compress_params(&comp_param[idx], block, offset); 1168 qemu_cond_signal(&comp_param[idx].cond); 1169 qemu_mutex_unlock(&comp_param[idx].mutex); 1170 pages = 1; 1171 ram_counters.normal++; 1172 ram_counters.transferred += bytes_xmit; 1173 break; 1174 } 1175 } 1176 if (pages > 0) { 1177 break; 1178 } else { 1179 qemu_cond_wait(&comp_done_cond, &comp_done_lock); 1180 } 1181 } 1182 qemu_mutex_unlock(&comp_done_lock); 1183 1184 return pages; 1185 } 1186 1187 /** 1188 * find_dirty_block: find the next dirty page and update any state 1189 * associated with the search process. 1190 * 1191 * Returns if a page is found 1192 * 1193 * @rs: current RAM state 1194 * @pss: data about the state of the current dirty page scan 1195 * @again: set to false if the search has scanned the whole of RAM 1196 */ 1197 static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again) 1198 { 1199 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page); 1200 if (pss->complete_round && pss->block == rs->last_seen_block && 1201 pss->page >= rs->last_page) { 1202 /* 1203 * We've been once around the RAM and haven't found anything. 1204 * Give up. 1205 */ 1206 *again = false; 1207 return false; 1208 } 1209 if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) { 1210 /* Didn't find anything in this RAM Block */ 1211 pss->page = 0; 1212 pss->block = QLIST_NEXT_RCU(pss->block, next); 1213 if (!pss->block) { 1214 /* Hit the end of the list */ 1215 pss->block = QLIST_FIRST_RCU(&ram_list.blocks); 1216 /* Flag that we've looped */ 1217 pss->complete_round = true; 1218 rs->ram_bulk_stage = false; 1219 if (migrate_use_xbzrle()) { 1220 /* If xbzrle is on, stop using the data compression at this 1221 * point. In theory, xbzrle can do better than compression. 1222 */ 1223 flush_compressed_data(rs); 1224 } 1225 } 1226 /* Didn't find anything this time, but try again on the new block */ 1227 *again = true; 1228 return false; 1229 } else { 1230 /* Can go around again, but... */ 1231 *again = true; 1232 /* We've found something so probably don't need to */ 1233 return true; 1234 } 1235 } 1236 1237 /** 1238 * unqueue_page: gets a page of the queue 1239 * 1240 * Helper for 'get_queued_page' - gets a page off the queue 1241 * 1242 * Returns the block of the page (or NULL if none available) 1243 * 1244 * @rs: current RAM state 1245 * @offset: used to return the offset within the RAMBlock 1246 */ 1247 static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset) 1248 { 1249 RAMBlock *block = NULL; 1250 1251 qemu_mutex_lock(&rs->src_page_req_mutex); 1252 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) { 1253 struct RAMSrcPageRequest *entry = 1254 QSIMPLEQ_FIRST(&rs->src_page_requests); 1255 block = entry->rb; 1256 *offset = entry->offset; 1257 1258 if (entry->len > TARGET_PAGE_SIZE) { 1259 entry->len -= TARGET_PAGE_SIZE; 1260 entry->offset += TARGET_PAGE_SIZE; 1261 } else { 1262 memory_region_unref(block->mr); 1263 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req); 1264 g_free(entry); 1265 } 1266 } 1267 qemu_mutex_unlock(&rs->src_page_req_mutex); 1268 1269 return block; 1270 } 1271 1272 /** 1273 * get_queued_page: unqueue a page from the postocpy requests 1274 * 1275 * Skips pages that are already sent (!dirty) 1276 * 1277 * Returns if a queued page is found 1278 * 1279 * @rs: current RAM state 1280 * @pss: data about the state of the current dirty page scan 1281 */ 1282 static bool get_queued_page(RAMState *rs, PageSearchStatus *pss) 1283 { 1284 RAMBlock *block; 1285 ram_addr_t offset; 1286 bool dirty; 1287 1288 do { 1289 block = unqueue_page(rs, &offset); 1290 /* 1291 * We're sending this page, and since it's postcopy nothing else 1292 * will dirty it, and we must make sure it doesn't get sent again 1293 * even if this queue request was received after the background 1294 * search already sent it. 1295 */ 1296 if (block) { 1297 unsigned long page; 1298 1299 page = offset >> TARGET_PAGE_BITS; 1300 dirty = test_bit(page, block->bmap); 1301 if (!dirty) { 1302 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset, 1303 page, test_bit(page, block->unsentmap)); 1304 } else { 1305 trace_get_queued_page(block->idstr, (uint64_t)offset, page); 1306 } 1307 } 1308 1309 } while (block && !dirty); 1310 1311 if (block) { 1312 /* 1313 * As soon as we start servicing pages out of order, then we have 1314 * to kill the bulk stage, since the bulk stage assumes 1315 * in (migration_bitmap_find_and_reset_dirty) that every page is 1316 * dirty, that's no longer true. 1317 */ 1318 rs->ram_bulk_stage = false; 1319 1320 /* 1321 * We want the background search to continue from the queued page 1322 * since the guest is likely to want other pages near to the page 1323 * it just requested. 1324 */ 1325 pss->block = block; 1326 pss->page = offset >> TARGET_PAGE_BITS; 1327 } 1328 1329 return !!block; 1330 } 1331 1332 /** 1333 * migration_page_queue_free: drop any remaining pages in the ram 1334 * request queue 1335 * 1336 * It should be empty at the end anyway, but in error cases there may 1337 * be some left. in case that there is any page left, we drop it. 1338 * 1339 */ 1340 static void migration_page_queue_free(RAMState *rs) 1341 { 1342 struct RAMSrcPageRequest *mspr, *next_mspr; 1343 /* This queue generally should be empty - but in the case of a failed 1344 * migration might have some droppings in. 1345 */ 1346 rcu_read_lock(); 1347 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) { 1348 memory_region_unref(mspr->rb->mr); 1349 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req); 1350 g_free(mspr); 1351 } 1352 rcu_read_unlock(); 1353 } 1354 1355 /** 1356 * ram_save_queue_pages: queue the page for transmission 1357 * 1358 * A request from postcopy destination for example. 1359 * 1360 * Returns zero on success or negative on error 1361 * 1362 * @rbname: Name of the RAMBLock of the request. NULL means the 1363 * same that last one. 1364 * @start: starting address from the start of the RAMBlock 1365 * @len: length (in bytes) to send 1366 */ 1367 int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) 1368 { 1369 RAMBlock *ramblock; 1370 RAMState *rs = ram_state; 1371 1372 ram_counters.postcopy_requests++; 1373 rcu_read_lock(); 1374 if (!rbname) { 1375 /* Reuse last RAMBlock */ 1376 ramblock = rs->last_req_rb; 1377 1378 if (!ramblock) { 1379 /* 1380 * Shouldn't happen, we can't reuse the last RAMBlock if 1381 * it's the 1st request. 1382 */ 1383 error_report("ram_save_queue_pages no previous block"); 1384 goto err; 1385 } 1386 } else { 1387 ramblock = qemu_ram_block_by_name(rbname); 1388 1389 if (!ramblock) { 1390 /* We shouldn't be asked for a non-existent RAMBlock */ 1391 error_report("ram_save_queue_pages no block '%s'", rbname); 1392 goto err; 1393 } 1394 rs->last_req_rb = ramblock; 1395 } 1396 trace_ram_save_queue_pages(ramblock->idstr, start, len); 1397 if (start+len > ramblock->used_length) { 1398 error_report("%s request overrun start=" RAM_ADDR_FMT " len=" 1399 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT, 1400 __func__, start, len, ramblock->used_length); 1401 goto err; 1402 } 1403 1404 struct RAMSrcPageRequest *new_entry = 1405 g_malloc0(sizeof(struct RAMSrcPageRequest)); 1406 new_entry->rb = ramblock; 1407 new_entry->offset = start; 1408 new_entry->len = len; 1409 1410 memory_region_ref(ramblock->mr); 1411 qemu_mutex_lock(&rs->src_page_req_mutex); 1412 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req); 1413 qemu_mutex_unlock(&rs->src_page_req_mutex); 1414 rcu_read_unlock(); 1415 1416 return 0; 1417 1418 err: 1419 rcu_read_unlock(); 1420 return -1; 1421 } 1422 1423 static bool save_page_use_compression(RAMState *rs) 1424 { 1425 if (!migrate_use_compression()) { 1426 return false; 1427 } 1428 1429 /* 1430 * If xbzrle is on, stop using the data compression after first 1431 * round of migration even if compression is enabled. In theory, 1432 * xbzrle can do better than compression. 1433 */ 1434 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) { 1435 return true; 1436 } 1437 1438 return false; 1439 } 1440 1441 /** 1442 * ram_save_target_page: save one target page 1443 * 1444 * Returns the number of pages written 1445 * 1446 * @rs: current RAM state 1447 * @pss: data about the page we want to send 1448 * @last_stage: if we are at the completion stage 1449 */ 1450 static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss, 1451 bool last_stage) 1452 { 1453 RAMBlock *block = pss->block; 1454 ram_addr_t offset = pss->page << TARGET_PAGE_BITS; 1455 int res; 1456 1457 if (control_save_page(rs, block, offset, &res)) { 1458 return res; 1459 } 1460 1461 /* 1462 * When starting the process of a new block, the first page of 1463 * the block should be sent out before other pages in the same 1464 * block, and all the pages in last block should have been sent 1465 * out, keeping this order is important, because the 'cont' flag 1466 * is used to avoid resending the block name. 1467 */ 1468 if (block != rs->last_sent_block && save_page_use_compression(rs)) { 1469 flush_compressed_data(rs); 1470 } 1471 1472 res = save_zero_page(rs, block, offset); 1473 if (res > 0) { 1474 /* Must let xbzrle know, otherwise a previous (now 0'd) cached 1475 * page would be stale 1476 */ 1477 if (!save_page_use_compression(rs)) { 1478 XBZRLE_cache_lock(); 1479 xbzrle_cache_zero_page(rs, block->offset + offset); 1480 XBZRLE_cache_unlock(); 1481 } 1482 ram_release_pages(block->idstr, offset, res); 1483 return res; 1484 } 1485 1486 /* 1487 * Make sure the first page is sent out before other pages. 1488 * 1489 * we post it as normal page as compression will take much 1490 * CPU resource. 1491 */ 1492 if (block == rs->last_sent_block && save_page_use_compression(rs)) { 1493 res = compress_page_with_multi_thread(rs, block, offset); 1494 } 1495 1496 return ram_save_page(rs, pss, last_stage); 1497 } 1498 1499 /** 1500 * ram_save_host_page: save a whole host page 1501 * 1502 * Starting at *offset send pages up to the end of the current host 1503 * page. It's valid for the initial offset to point into the middle of 1504 * a host page in which case the remainder of the hostpage is sent. 1505 * Only dirty target pages are sent. Note that the host page size may 1506 * be a huge page for this block. 1507 * The saving stops at the boundary of the used_length of the block 1508 * if the RAMBlock isn't a multiple of the host page size. 1509 * 1510 * Returns the number of pages written or negative on error 1511 * 1512 * @rs: current RAM state 1513 * @ms: current migration state 1514 * @pss: data about the page we want to send 1515 * @last_stage: if we are at the completion stage 1516 */ 1517 static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, 1518 bool last_stage) 1519 { 1520 int tmppages, pages = 0; 1521 size_t pagesize_bits = 1522 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS; 1523 1524 do { 1525 /* Check the pages is dirty and if it is send it */ 1526 if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) { 1527 pss->page++; 1528 continue; 1529 } 1530 1531 tmppages = ram_save_target_page(rs, pss, last_stage); 1532 if (tmppages < 0) { 1533 return tmppages; 1534 } 1535 1536 pages += tmppages; 1537 if (pss->block->unsentmap) { 1538 clear_bit(pss->page, pss->block->unsentmap); 1539 } 1540 1541 pss->page++; 1542 } while ((pss->page & (pagesize_bits - 1)) && 1543 offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS)); 1544 1545 /* The offset we leave with is the last one we looked at */ 1546 pss->page--; 1547 return pages; 1548 } 1549 1550 /** 1551 * ram_find_and_save_block: finds a dirty page and sends it to f 1552 * 1553 * Called within an RCU critical section. 1554 * 1555 * Returns the number of pages written where zero means no dirty pages 1556 * 1557 * @rs: current RAM state 1558 * @last_stage: if we are at the completion stage 1559 * 1560 * On systems where host-page-size > target-page-size it will send all the 1561 * pages in a host page that are dirty. 1562 */ 1563 1564 static int ram_find_and_save_block(RAMState *rs, bool last_stage) 1565 { 1566 PageSearchStatus pss; 1567 int pages = 0; 1568 bool again, found; 1569 1570 /* No dirty page as there is zero RAM */ 1571 if (!ram_bytes_total()) { 1572 return pages; 1573 } 1574 1575 pss.block = rs->last_seen_block; 1576 pss.page = rs->last_page; 1577 pss.complete_round = false; 1578 1579 if (!pss.block) { 1580 pss.block = QLIST_FIRST_RCU(&ram_list.blocks); 1581 } 1582 1583 do { 1584 again = true; 1585 found = get_queued_page(rs, &pss); 1586 1587 if (!found) { 1588 /* priority queue empty, so just search for something dirty */ 1589 found = find_dirty_block(rs, &pss, &again); 1590 } 1591 1592 if (found) { 1593 pages = ram_save_host_page(rs, &pss, last_stage); 1594 } 1595 } while (!pages && again); 1596 1597 rs->last_seen_block = pss.block; 1598 rs->last_page = pss.page; 1599 1600 return pages; 1601 } 1602 1603 void acct_update_position(QEMUFile *f, size_t size, bool zero) 1604 { 1605 uint64_t pages = size / TARGET_PAGE_SIZE; 1606 1607 if (zero) { 1608 ram_counters.duplicate += pages; 1609 } else { 1610 ram_counters.normal += pages; 1611 ram_counters.transferred += size; 1612 qemu_update_position(f, size); 1613 } 1614 } 1615 1616 uint64_t ram_bytes_total(void) 1617 { 1618 RAMBlock *block; 1619 uint64_t total = 0; 1620 1621 rcu_read_lock(); 1622 RAMBLOCK_FOREACH(block) { 1623 total += block->used_length; 1624 } 1625 rcu_read_unlock(); 1626 return total; 1627 } 1628 1629 static void xbzrle_load_setup(void) 1630 { 1631 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE); 1632 } 1633 1634 static void xbzrle_load_cleanup(void) 1635 { 1636 g_free(XBZRLE.decoded_buf); 1637 XBZRLE.decoded_buf = NULL; 1638 } 1639 1640 static void ram_state_cleanup(RAMState **rsp) 1641 { 1642 if (*rsp) { 1643 migration_page_queue_free(*rsp); 1644 qemu_mutex_destroy(&(*rsp)->bitmap_mutex); 1645 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex); 1646 g_free(*rsp); 1647 *rsp = NULL; 1648 } 1649 } 1650 1651 static void xbzrle_cleanup(void) 1652 { 1653 XBZRLE_cache_lock(); 1654 if (XBZRLE.cache) { 1655 cache_fini(XBZRLE.cache); 1656 g_free(XBZRLE.encoded_buf); 1657 g_free(XBZRLE.current_buf); 1658 g_free(XBZRLE.zero_target_page); 1659 XBZRLE.cache = NULL; 1660 XBZRLE.encoded_buf = NULL; 1661 XBZRLE.current_buf = NULL; 1662 XBZRLE.zero_target_page = NULL; 1663 } 1664 XBZRLE_cache_unlock(); 1665 } 1666 1667 static void ram_save_cleanup(void *opaque) 1668 { 1669 RAMState **rsp = opaque; 1670 RAMBlock *block; 1671 1672 /* caller have hold iothread lock or is in a bh, so there is 1673 * no writing race against this migration_bitmap 1674 */ 1675 memory_global_dirty_log_stop(); 1676 1677 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { 1678 g_free(block->bmap); 1679 block->bmap = NULL; 1680 g_free(block->unsentmap); 1681 block->unsentmap = NULL; 1682 } 1683 1684 xbzrle_cleanup(); 1685 compress_threads_save_cleanup(); 1686 ram_state_cleanup(rsp); 1687 } 1688 1689 static void ram_state_reset(RAMState *rs) 1690 { 1691 rs->last_seen_block = NULL; 1692 rs->last_sent_block = NULL; 1693 rs->last_page = 0; 1694 rs->last_version = ram_list.version; 1695 rs->ram_bulk_stage = true; 1696 } 1697 1698 #define MAX_WAIT 50 /* ms, half buffered_file limit */ 1699 1700 /* 1701 * 'expected' is the value you expect the bitmap mostly to be full 1702 * of; it won't bother printing lines that are all this value. 1703 * If 'todump' is null the migration bitmap is dumped. 1704 */ 1705 void ram_debug_dump_bitmap(unsigned long *todump, bool expected, 1706 unsigned long pages) 1707 { 1708 int64_t cur; 1709 int64_t linelen = 128; 1710 char linebuf[129]; 1711 1712 for (cur = 0; cur < pages; cur += linelen) { 1713 int64_t curb; 1714 bool found = false; 1715 /* 1716 * Last line; catch the case where the line length 1717 * is longer than remaining ram 1718 */ 1719 if (cur + linelen > pages) { 1720 linelen = pages - cur; 1721 } 1722 for (curb = 0; curb < linelen; curb++) { 1723 bool thisbit = test_bit(cur + curb, todump); 1724 linebuf[curb] = thisbit ? '1' : '.'; 1725 found = found || (thisbit != expected); 1726 } 1727 if (found) { 1728 linebuf[curb] = '\0'; 1729 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf); 1730 } 1731 } 1732 } 1733 1734 /* **** functions for postcopy ***** */ 1735 1736 void ram_postcopy_migrated_memory_release(MigrationState *ms) 1737 { 1738 struct RAMBlock *block; 1739 1740 RAMBLOCK_FOREACH(block) { 1741 unsigned long *bitmap = block->bmap; 1742 unsigned long range = block->used_length >> TARGET_PAGE_BITS; 1743 unsigned long run_start = find_next_zero_bit(bitmap, range, 0); 1744 1745 while (run_start < range) { 1746 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1); 1747 ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS, 1748 (run_end - run_start) << TARGET_PAGE_BITS); 1749 run_start = find_next_zero_bit(bitmap, range, run_end + 1); 1750 } 1751 } 1752 } 1753 1754 /** 1755 * postcopy_send_discard_bm_ram: discard a RAMBlock 1756 * 1757 * Returns zero on success 1758 * 1759 * Callback from postcopy_each_ram_send_discard for each RAMBlock 1760 * Note: At this point the 'unsentmap' is the processed bitmap combined 1761 * with the dirtymap; so a '1' means it's either dirty or unsent. 1762 * 1763 * @ms: current migration state 1764 * @pds: state for postcopy 1765 * @start: RAMBlock starting page 1766 * @length: RAMBlock size 1767 */ 1768 static int postcopy_send_discard_bm_ram(MigrationState *ms, 1769 PostcopyDiscardState *pds, 1770 RAMBlock *block) 1771 { 1772 unsigned long end = block->used_length >> TARGET_PAGE_BITS; 1773 unsigned long current; 1774 unsigned long *unsentmap = block->unsentmap; 1775 1776 for (current = 0; current < end; ) { 1777 unsigned long one = find_next_bit(unsentmap, end, current); 1778 1779 if (one <= end) { 1780 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1); 1781 unsigned long discard_length; 1782 1783 if (zero >= end) { 1784 discard_length = end - one; 1785 } else { 1786 discard_length = zero - one; 1787 } 1788 if (discard_length) { 1789 postcopy_discard_send_range(ms, pds, one, discard_length); 1790 } 1791 current = one + discard_length; 1792 } else { 1793 current = one; 1794 } 1795 } 1796 1797 return 0; 1798 } 1799 1800 /** 1801 * postcopy_each_ram_send_discard: discard all RAMBlocks 1802 * 1803 * Returns 0 for success or negative for error 1804 * 1805 * Utility for the outgoing postcopy code. 1806 * Calls postcopy_send_discard_bm_ram for each RAMBlock 1807 * passing it bitmap indexes and name. 1808 * (qemu_ram_foreach_block ends up passing unscaled lengths 1809 * which would mean postcopy code would have to deal with target page) 1810 * 1811 * @ms: current migration state 1812 */ 1813 static int postcopy_each_ram_send_discard(MigrationState *ms) 1814 { 1815 struct RAMBlock *block; 1816 int ret; 1817 1818 RAMBLOCK_FOREACH(block) { 1819 PostcopyDiscardState *pds = 1820 postcopy_discard_send_init(ms, block->idstr); 1821 1822 /* 1823 * Postcopy sends chunks of bitmap over the wire, but it 1824 * just needs indexes at this point, avoids it having 1825 * target page specific code. 1826 */ 1827 ret = postcopy_send_discard_bm_ram(ms, pds, block); 1828 postcopy_discard_send_finish(ms, pds); 1829 if (ret) { 1830 return ret; 1831 } 1832 } 1833 1834 return 0; 1835 } 1836 1837 /** 1838 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages 1839 * 1840 * Helper for postcopy_chunk_hostpages; it's called twice to 1841 * canonicalize the two bitmaps, that are similar, but one is 1842 * inverted. 1843 * 1844 * Postcopy requires that all target pages in a hostpage are dirty or 1845 * clean, not a mix. This function canonicalizes the bitmaps. 1846 * 1847 * @ms: current migration state 1848 * @unsent_pass: if true we need to canonicalize partially unsent host pages 1849 * otherwise we need to canonicalize partially dirty host pages 1850 * @block: block that contains the page we want to canonicalize 1851 * @pds: state for postcopy 1852 */ 1853 static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, 1854 RAMBlock *block, 1855 PostcopyDiscardState *pds) 1856 { 1857 RAMState *rs = ram_state; 1858 unsigned long *bitmap = block->bmap; 1859 unsigned long *unsentmap = block->unsentmap; 1860 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE; 1861 unsigned long pages = block->used_length >> TARGET_PAGE_BITS; 1862 unsigned long run_start; 1863 1864 if (block->page_size == TARGET_PAGE_SIZE) { 1865 /* Easy case - TPS==HPS for a non-huge page RAMBlock */ 1866 return; 1867 } 1868 1869 if (unsent_pass) { 1870 /* Find a sent page */ 1871 run_start = find_next_zero_bit(unsentmap, pages, 0); 1872 } else { 1873 /* Find a dirty page */ 1874 run_start = find_next_bit(bitmap, pages, 0); 1875 } 1876 1877 while (run_start < pages) { 1878 bool do_fixup = false; 1879 unsigned long fixup_start_addr; 1880 unsigned long host_offset; 1881 1882 /* 1883 * If the start of this run of pages is in the middle of a host 1884 * page, then we need to fixup this host page. 1885 */ 1886 host_offset = run_start % host_ratio; 1887 if (host_offset) { 1888 do_fixup = true; 1889 run_start -= host_offset; 1890 fixup_start_addr = run_start; 1891 /* For the next pass */ 1892 run_start = run_start + host_ratio; 1893 } else { 1894 /* Find the end of this run */ 1895 unsigned long run_end; 1896 if (unsent_pass) { 1897 run_end = find_next_bit(unsentmap, pages, run_start + 1); 1898 } else { 1899 run_end = find_next_zero_bit(bitmap, pages, run_start + 1); 1900 } 1901 /* 1902 * If the end isn't at the start of a host page, then the 1903 * run doesn't finish at the end of a host page 1904 * and we need to discard. 1905 */ 1906 host_offset = run_end % host_ratio; 1907 if (host_offset) { 1908 do_fixup = true; 1909 fixup_start_addr = run_end - host_offset; 1910 /* 1911 * This host page has gone, the next loop iteration starts 1912 * from after the fixup 1913 */ 1914 run_start = fixup_start_addr + host_ratio; 1915 } else { 1916 /* 1917 * No discards on this iteration, next loop starts from 1918 * next sent/dirty page 1919 */ 1920 run_start = run_end + 1; 1921 } 1922 } 1923 1924 if (do_fixup) { 1925 unsigned long page; 1926 1927 /* Tell the destination to discard this page */ 1928 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) { 1929 /* For the unsent_pass we: 1930 * discard partially sent pages 1931 * For the !unsent_pass (dirty) we: 1932 * discard partially dirty pages that were sent 1933 * (any partially sent pages were already discarded 1934 * by the previous unsent_pass) 1935 */ 1936 postcopy_discard_send_range(ms, pds, fixup_start_addr, 1937 host_ratio); 1938 } 1939 1940 /* Clean up the bitmap */ 1941 for (page = fixup_start_addr; 1942 page < fixup_start_addr + host_ratio; page++) { 1943 /* All pages in this host page are now not sent */ 1944 set_bit(page, unsentmap); 1945 1946 /* 1947 * Remark them as dirty, updating the count for any pages 1948 * that weren't previously dirty. 1949 */ 1950 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap); 1951 } 1952 } 1953 1954 if (unsent_pass) { 1955 /* Find the next sent page for the next iteration */ 1956 run_start = find_next_zero_bit(unsentmap, pages, run_start); 1957 } else { 1958 /* Find the next dirty page for the next iteration */ 1959 run_start = find_next_bit(bitmap, pages, run_start); 1960 } 1961 } 1962 } 1963 1964 /** 1965 * postcopy_chuck_hostpages: discrad any partially sent host page 1966 * 1967 * Utility for the outgoing postcopy code. 1968 * 1969 * Discard any partially sent host-page size chunks, mark any partially 1970 * dirty host-page size chunks as all dirty. In this case the host-page 1971 * is the host-page for the particular RAMBlock, i.e. it might be a huge page 1972 * 1973 * Returns zero on success 1974 * 1975 * @ms: current migration state 1976 * @block: block we want to work with 1977 */ 1978 static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block) 1979 { 1980 PostcopyDiscardState *pds = 1981 postcopy_discard_send_init(ms, block->idstr); 1982 1983 /* First pass: Discard all partially sent host pages */ 1984 postcopy_chunk_hostpages_pass(ms, true, block, pds); 1985 /* 1986 * Second pass: Ensure that all partially dirty host pages are made 1987 * fully dirty. 1988 */ 1989 postcopy_chunk_hostpages_pass(ms, false, block, pds); 1990 1991 postcopy_discard_send_finish(ms, pds); 1992 return 0; 1993 } 1994 1995 /** 1996 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap 1997 * 1998 * Returns zero on success 1999 * 2000 * Transmit the set of pages to be discarded after precopy to the target 2001 * these are pages that: 2002 * a) Have been previously transmitted but are now dirty again 2003 * b) Pages that have never been transmitted, this ensures that 2004 * any pages on the destination that have been mapped by background 2005 * tasks get discarded (transparent huge pages is the specific concern) 2006 * Hopefully this is pretty sparse 2007 * 2008 * @ms: current migration state 2009 */ 2010 int ram_postcopy_send_discard_bitmap(MigrationState *ms) 2011 { 2012 RAMState *rs = ram_state; 2013 RAMBlock *block; 2014 int ret; 2015 2016 rcu_read_lock(); 2017 2018 /* This should be our last sync, the src is now paused */ 2019 migration_bitmap_sync(rs); 2020 2021 /* Easiest way to make sure we don't resume in the middle of a host-page */ 2022 rs->last_seen_block = NULL; 2023 rs->last_sent_block = NULL; 2024 rs->last_page = 0; 2025 2026 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { 2027 unsigned long pages = block->used_length >> TARGET_PAGE_BITS; 2028 unsigned long *bitmap = block->bmap; 2029 unsigned long *unsentmap = block->unsentmap; 2030 2031 if (!unsentmap) { 2032 /* We don't have a safe way to resize the sentmap, so 2033 * if the bitmap was resized it will be NULL at this 2034 * point. 2035 */ 2036 error_report("migration ram resized during precopy phase"); 2037 rcu_read_unlock(); 2038 return -EINVAL; 2039 } 2040 /* Deal with TPS != HPS and huge pages */ 2041 ret = postcopy_chunk_hostpages(ms, block); 2042 if (ret) { 2043 rcu_read_unlock(); 2044 return ret; 2045 } 2046 2047 /* 2048 * Update the unsentmap to be unsentmap = unsentmap | dirty 2049 */ 2050 bitmap_or(unsentmap, unsentmap, bitmap, pages); 2051 #ifdef DEBUG_POSTCOPY 2052 ram_debug_dump_bitmap(unsentmap, true, pages); 2053 #endif 2054 } 2055 trace_ram_postcopy_send_discard_bitmap(); 2056 2057 ret = postcopy_each_ram_send_discard(ms); 2058 rcu_read_unlock(); 2059 2060 return ret; 2061 } 2062 2063 /** 2064 * ram_discard_range: discard dirtied pages at the beginning of postcopy 2065 * 2066 * Returns zero on success 2067 * 2068 * @rbname: name of the RAMBlock of the request. NULL means the 2069 * same that last one. 2070 * @start: RAMBlock starting page 2071 * @length: RAMBlock size 2072 */ 2073 int ram_discard_range(const char *rbname, uint64_t start, size_t length) 2074 { 2075 int ret = -1; 2076 2077 trace_ram_discard_range(rbname, start, length); 2078 2079 rcu_read_lock(); 2080 RAMBlock *rb = qemu_ram_block_by_name(rbname); 2081 2082 if (!rb) { 2083 error_report("ram_discard_range: Failed to find block '%s'", rbname); 2084 goto err; 2085 } 2086 2087 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(), 2088 length >> qemu_target_page_bits()); 2089 ret = ram_block_discard_range(rb, start, length); 2090 2091 err: 2092 rcu_read_unlock(); 2093 2094 return ret; 2095 } 2096 2097 /* 2098 * For every allocation, we will try not to crash the VM if the 2099 * allocation failed. 2100 */ 2101 static int xbzrle_init(void) 2102 { 2103 Error *local_err = NULL; 2104 2105 if (!migrate_use_xbzrle()) { 2106 return 0; 2107 } 2108 2109 XBZRLE_cache_lock(); 2110 2111 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE); 2112 if (!XBZRLE.zero_target_page) { 2113 error_report("%s: Error allocating zero page", __func__); 2114 goto err_out; 2115 } 2116 2117 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(), 2118 TARGET_PAGE_SIZE, &local_err); 2119 if (!XBZRLE.cache) { 2120 error_report_err(local_err); 2121 goto free_zero_page; 2122 } 2123 2124 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE); 2125 if (!XBZRLE.encoded_buf) { 2126 error_report("%s: Error allocating encoded_buf", __func__); 2127 goto free_cache; 2128 } 2129 2130 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE); 2131 if (!XBZRLE.current_buf) { 2132 error_report("%s: Error allocating current_buf", __func__); 2133 goto free_encoded_buf; 2134 } 2135 2136 /* We are all good */ 2137 XBZRLE_cache_unlock(); 2138 return 0; 2139 2140 free_encoded_buf: 2141 g_free(XBZRLE.encoded_buf); 2142 XBZRLE.encoded_buf = NULL; 2143 free_cache: 2144 cache_fini(XBZRLE.cache); 2145 XBZRLE.cache = NULL; 2146 free_zero_page: 2147 g_free(XBZRLE.zero_target_page); 2148 XBZRLE.zero_target_page = NULL; 2149 err_out: 2150 XBZRLE_cache_unlock(); 2151 return -ENOMEM; 2152 } 2153 2154 static int ram_state_init(RAMState **rsp) 2155 { 2156 *rsp = g_try_new0(RAMState, 1); 2157 2158 if (!*rsp) { 2159 error_report("%s: Init ramstate fail", __func__); 2160 return -1; 2161 } 2162 2163 qemu_mutex_init(&(*rsp)->bitmap_mutex); 2164 qemu_mutex_init(&(*rsp)->src_page_req_mutex); 2165 QSIMPLEQ_INIT(&(*rsp)->src_page_requests); 2166 2167 /* 2168 * Count the total number of pages used by ram blocks not including any 2169 * gaps due to alignment or unplugs. 2170 */ 2171 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS; 2172 2173 ram_state_reset(*rsp); 2174 2175 return 0; 2176 } 2177 2178 static void ram_list_init_bitmaps(void) 2179 { 2180 RAMBlock *block; 2181 unsigned long pages; 2182 2183 /* Skip setting bitmap if there is no RAM */ 2184 if (ram_bytes_total()) { 2185 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { 2186 pages = block->max_length >> TARGET_PAGE_BITS; 2187 block->bmap = bitmap_new(pages); 2188 bitmap_set(block->bmap, 0, pages); 2189 if (migrate_postcopy_ram()) { 2190 block->unsentmap = bitmap_new(pages); 2191 bitmap_set(block->unsentmap, 0, pages); 2192 } 2193 } 2194 } 2195 } 2196 2197 static void ram_init_bitmaps(RAMState *rs) 2198 { 2199 /* For memory_global_dirty_log_start below. */ 2200 qemu_mutex_lock_iothread(); 2201 qemu_mutex_lock_ramlist(); 2202 rcu_read_lock(); 2203 2204 ram_list_init_bitmaps(); 2205 memory_global_dirty_log_start(); 2206 migration_bitmap_sync(rs); 2207 2208 rcu_read_unlock(); 2209 qemu_mutex_unlock_ramlist(); 2210 qemu_mutex_unlock_iothread(); 2211 } 2212 2213 static int ram_init_all(RAMState **rsp) 2214 { 2215 if (ram_state_init(rsp)) { 2216 return -1; 2217 } 2218 2219 if (xbzrle_init()) { 2220 ram_state_cleanup(rsp); 2221 return -1; 2222 } 2223 2224 ram_init_bitmaps(*rsp); 2225 2226 return 0; 2227 } 2228 2229 /* 2230 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has 2231 * long-running RCU critical section. When rcu-reclaims in the code 2232 * start to become numerous it will be necessary to reduce the 2233 * granularity of these critical sections. 2234 */ 2235 2236 /** 2237 * ram_save_setup: Setup RAM for migration 2238 * 2239 * Returns zero to indicate success and negative for error 2240 * 2241 * @f: QEMUFile where to send the data 2242 * @opaque: RAMState pointer 2243 */ 2244 static int ram_save_setup(QEMUFile *f, void *opaque) 2245 { 2246 RAMState **rsp = opaque; 2247 RAMBlock *block; 2248 2249 if (compress_threads_save_setup()) { 2250 return -1; 2251 } 2252 2253 /* migration has already setup the bitmap, reuse it. */ 2254 if (!migration_in_colo_state()) { 2255 if (ram_init_all(rsp) != 0) { 2256 compress_threads_save_cleanup(); 2257 return -1; 2258 } 2259 } 2260 (*rsp)->f = f; 2261 2262 rcu_read_lock(); 2263 2264 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); 2265 2266 RAMBLOCK_FOREACH(block) { 2267 qemu_put_byte(f, strlen(block->idstr)); 2268 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); 2269 qemu_put_be64(f, block->used_length); 2270 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) { 2271 qemu_put_be64(f, block->page_size); 2272 } 2273 } 2274 2275 rcu_read_unlock(); 2276 2277 ram_control_before_iterate(f, RAM_CONTROL_SETUP); 2278 ram_control_after_iterate(f, RAM_CONTROL_SETUP); 2279 2280 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 2281 2282 return 0; 2283 } 2284 2285 /** 2286 * ram_save_iterate: iterative stage for migration 2287 * 2288 * Returns zero to indicate success and negative for error 2289 * 2290 * @f: QEMUFile where to send the data 2291 * @opaque: RAMState pointer 2292 */ 2293 static int ram_save_iterate(QEMUFile *f, void *opaque) 2294 { 2295 RAMState **temp = opaque; 2296 RAMState *rs = *temp; 2297 int ret; 2298 int i; 2299 int64_t t0; 2300 int done = 0; 2301 2302 if (blk_mig_bulk_active()) { 2303 /* Avoid transferring ram during bulk phase of block migration as 2304 * the bulk phase will usually take a long time and transferring 2305 * ram updates during that time is pointless. */ 2306 goto out; 2307 } 2308 2309 rcu_read_lock(); 2310 if (ram_list.version != rs->last_version) { 2311 ram_state_reset(rs); 2312 } 2313 2314 /* Read version before ram_list.blocks */ 2315 smp_rmb(); 2316 2317 ram_control_before_iterate(f, RAM_CONTROL_ROUND); 2318 2319 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 2320 i = 0; 2321 while ((ret = qemu_file_rate_limit(f)) == 0) { 2322 int pages; 2323 2324 pages = ram_find_and_save_block(rs, false); 2325 /* no more pages to sent */ 2326 if (pages == 0) { 2327 done = 1; 2328 break; 2329 } 2330 rs->iterations++; 2331 2332 /* we want to check in the 1st loop, just in case it was the 1st time 2333 and we had to sync the dirty bitmap. 2334 qemu_get_clock_ns() is a bit expensive, so we only check each some 2335 iterations 2336 */ 2337 if ((i & 63) == 0) { 2338 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000; 2339 if (t1 > MAX_WAIT) { 2340 trace_ram_save_iterate_big_wait(t1, i); 2341 break; 2342 } 2343 } 2344 i++; 2345 } 2346 flush_compressed_data(rs); 2347 rcu_read_unlock(); 2348 2349 /* 2350 * Must occur before EOS (or any QEMUFile operation) 2351 * because of RDMA protocol. 2352 */ 2353 ram_control_after_iterate(f, RAM_CONTROL_ROUND); 2354 2355 out: 2356 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 2357 ram_counters.transferred += 8; 2358 2359 ret = qemu_file_get_error(f); 2360 if (ret < 0) { 2361 return ret; 2362 } 2363 2364 return done; 2365 } 2366 2367 /** 2368 * ram_save_complete: function called to send the remaining amount of ram 2369 * 2370 * Returns zero to indicate success 2371 * 2372 * Called with iothread lock 2373 * 2374 * @f: QEMUFile where to send the data 2375 * @opaque: RAMState pointer 2376 */ 2377 static int ram_save_complete(QEMUFile *f, void *opaque) 2378 { 2379 RAMState **temp = opaque; 2380 RAMState *rs = *temp; 2381 2382 rcu_read_lock(); 2383 2384 if (!migration_in_postcopy()) { 2385 migration_bitmap_sync(rs); 2386 } 2387 2388 ram_control_before_iterate(f, RAM_CONTROL_FINISH); 2389 2390 /* try transferring iterative blocks of memory */ 2391 2392 /* flush all remaining blocks regardless of rate limiting */ 2393 while (true) { 2394 int pages; 2395 2396 pages = ram_find_and_save_block(rs, !migration_in_colo_state()); 2397 /* no more blocks to sent */ 2398 if (pages == 0) { 2399 break; 2400 } 2401 } 2402 2403 flush_compressed_data(rs); 2404 ram_control_after_iterate(f, RAM_CONTROL_FINISH); 2405 2406 rcu_read_unlock(); 2407 2408 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 2409 2410 return 0; 2411 } 2412 2413 static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, 2414 uint64_t *res_precopy_only, 2415 uint64_t *res_compatible, 2416 uint64_t *res_postcopy_only) 2417 { 2418 RAMState **temp = opaque; 2419 RAMState *rs = *temp; 2420 uint64_t remaining_size; 2421 2422 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; 2423 2424 if (!migration_in_postcopy() && 2425 remaining_size < max_size) { 2426 qemu_mutex_lock_iothread(); 2427 rcu_read_lock(); 2428 migration_bitmap_sync(rs); 2429 rcu_read_unlock(); 2430 qemu_mutex_unlock_iothread(); 2431 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; 2432 } 2433 2434 if (migrate_postcopy_ram()) { 2435 /* We can do postcopy, and all the data is postcopiable */ 2436 *res_compatible += remaining_size; 2437 } else { 2438 *res_precopy_only += remaining_size; 2439 } 2440 } 2441 2442 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) 2443 { 2444 unsigned int xh_len; 2445 int xh_flags; 2446 uint8_t *loaded_data; 2447 2448 /* extract RLE header */ 2449 xh_flags = qemu_get_byte(f); 2450 xh_len = qemu_get_be16(f); 2451 2452 if (xh_flags != ENCODING_FLAG_XBZRLE) { 2453 error_report("Failed to load XBZRLE page - wrong compression!"); 2454 return -1; 2455 } 2456 2457 if (xh_len > TARGET_PAGE_SIZE) { 2458 error_report("Failed to load XBZRLE page - len overflow!"); 2459 return -1; 2460 } 2461 loaded_data = XBZRLE.decoded_buf; 2462 /* load data and decode */ 2463 /* it can change loaded_data to point to an internal buffer */ 2464 qemu_get_buffer_in_place(f, &loaded_data, xh_len); 2465 2466 /* decode RLE */ 2467 if (xbzrle_decode_buffer(loaded_data, xh_len, host, 2468 TARGET_PAGE_SIZE) == -1) { 2469 error_report("Failed to load XBZRLE page - decode error!"); 2470 return -1; 2471 } 2472 2473 return 0; 2474 } 2475 2476 /** 2477 * ram_block_from_stream: read a RAMBlock id from the migration stream 2478 * 2479 * Must be called from within a rcu critical section. 2480 * 2481 * Returns a pointer from within the RCU-protected ram_list. 2482 * 2483 * @f: QEMUFile where to read the data from 2484 * @flags: Page flags (mostly to see if it's a continuation of previous block) 2485 */ 2486 static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags) 2487 { 2488 static RAMBlock *block = NULL; 2489 char id[256]; 2490 uint8_t len; 2491 2492 if (flags & RAM_SAVE_FLAG_CONTINUE) { 2493 if (!block) { 2494 error_report("Ack, bad migration stream!"); 2495 return NULL; 2496 } 2497 return block; 2498 } 2499 2500 len = qemu_get_byte(f); 2501 qemu_get_buffer(f, (uint8_t *)id, len); 2502 id[len] = 0; 2503 2504 block = qemu_ram_block_by_name(id); 2505 if (!block) { 2506 error_report("Can't find block %s", id); 2507 return NULL; 2508 } 2509 2510 return block; 2511 } 2512 2513 static inline void *host_from_ram_block_offset(RAMBlock *block, 2514 ram_addr_t offset) 2515 { 2516 if (!offset_in_ramblock(block, offset)) { 2517 return NULL; 2518 } 2519 2520 return block->host + offset; 2521 } 2522 2523 /** 2524 * ram_handle_compressed: handle the zero page case 2525 * 2526 * If a page (or a whole RDMA chunk) has been 2527 * determined to be zero, then zap it. 2528 * 2529 * @host: host address for the zero page 2530 * @ch: what the page is filled from. We only support zero 2531 * @size: size of the zero page 2532 */ 2533 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) 2534 { 2535 if (ch != 0 || !is_zero_range(host, size)) { 2536 memset(host, ch, size); 2537 } 2538 } 2539 2540 /* return the size after decompression, or negative value on error */ 2541 static int 2542 qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len, 2543 const uint8_t *source, size_t source_len) 2544 { 2545 int err; 2546 2547 err = inflateReset(stream); 2548 if (err != Z_OK) { 2549 return -1; 2550 } 2551 2552 stream->avail_in = source_len; 2553 stream->next_in = (uint8_t *)source; 2554 stream->avail_out = dest_len; 2555 stream->next_out = dest; 2556 2557 err = inflate(stream, Z_NO_FLUSH); 2558 if (err != Z_STREAM_END) { 2559 return -1; 2560 } 2561 2562 return stream->total_out; 2563 } 2564 2565 static void *do_data_decompress(void *opaque) 2566 { 2567 DecompressParam *param = opaque; 2568 unsigned long pagesize; 2569 uint8_t *des; 2570 int len, ret; 2571 2572 qemu_mutex_lock(¶m->mutex); 2573 while (!param->quit) { 2574 if (param->des) { 2575 des = param->des; 2576 len = param->len; 2577 param->des = 0; 2578 qemu_mutex_unlock(¶m->mutex); 2579 2580 pagesize = TARGET_PAGE_SIZE; 2581 2582 ret = qemu_uncompress_data(¶m->stream, des, pagesize, 2583 param->compbuf, len); 2584 if (ret < 0) { 2585 error_report("decompress data failed"); 2586 qemu_file_set_error(decomp_file, ret); 2587 } 2588 2589 qemu_mutex_lock(&decomp_done_lock); 2590 param->done = true; 2591 qemu_cond_signal(&decomp_done_cond); 2592 qemu_mutex_unlock(&decomp_done_lock); 2593 2594 qemu_mutex_lock(¶m->mutex); 2595 } else { 2596 qemu_cond_wait(¶m->cond, ¶m->mutex); 2597 } 2598 } 2599 qemu_mutex_unlock(¶m->mutex); 2600 2601 return NULL; 2602 } 2603 2604 static int wait_for_decompress_done(void) 2605 { 2606 int idx, thread_count; 2607 2608 if (!migrate_use_compression()) { 2609 return 0; 2610 } 2611 2612 thread_count = migrate_decompress_threads(); 2613 qemu_mutex_lock(&decomp_done_lock); 2614 for (idx = 0; idx < thread_count; idx++) { 2615 while (!decomp_param[idx].done) { 2616 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock); 2617 } 2618 } 2619 qemu_mutex_unlock(&decomp_done_lock); 2620 return qemu_file_get_error(decomp_file); 2621 } 2622 2623 static void compress_threads_load_cleanup(void) 2624 { 2625 int i, thread_count; 2626 2627 if (!migrate_use_compression()) { 2628 return; 2629 } 2630 thread_count = migrate_decompress_threads(); 2631 for (i = 0; i < thread_count; i++) { 2632 /* 2633 * we use it as a indicator which shows if the thread is 2634 * properly init'd or not 2635 */ 2636 if (!decomp_param[i].compbuf) { 2637 break; 2638 } 2639 2640 qemu_mutex_lock(&decomp_param[i].mutex); 2641 decomp_param[i].quit = true; 2642 qemu_cond_signal(&decomp_param[i].cond); 2643 qemu_mutex_unlock(&decomp_param[i].mutex); 2644 } 2645 for (i = 0; i < thread_count; i++) { 2646 if (!decomp_param[i].compbuf) { 2647 break; 2648 } 2649 2650 qemu_thread_join(decompress_threads + i); 2651 qemu_mutex_destroy(&decomp_param[i].mutex); 2652 qemu_cond_destroy(&decomp_param[i].cond); 2653 inflateEnd(&decomp_param[i].stream); 2654 g_free(decomp_param[i].compbuf); 2655 decomp_param[i].compbuf = NULL; 2656 } 2657 g_free(decompress_threads); 2658 g_free(decomp_param); 2659 decompress_threads = NULL; 2660 decomp_param = NULL; 2661 decomp_file = NULL; 2662 } 2663 2664 static int compress_threads_load_setup(QEMUFile *f) 2665 { 2666 int i, thread_count; 2667 2668 if (!migrate_use_compression()) { 2669 return 0; 2670 } 2671 2672 thread_count = migrate_decompress_threads(); 2673 decompress_threads = g_new0(QemuThread, thread_count); 2674 decomp_param = g_new0(DecompressParam, thread_count); 2675 qemu_mutex_init(&decomp_done_lock); 2676 qemu_cond_init(&decomp_done_cond); 2677 decomp_file = f; 2678 for (i = 0; i < thread_count; i++) { 2679 if (inflateInit(&decomp_param[i].stream) != Z_OK) { 2680 goto exit; 2681 } 2682 2683 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE)); 2684 qemu_mutex_init(&decomp_param[i].mutex); 2685 qemu_cond_init(&decomp_param[i].cond); 2686 decomp_param[i].done = true; 2687 decomp_param[i].quit = false; 2688 qemu_thread_create(decompress_threads + i, "decompress", 2689 do_data_decompress, decomp_param + i, 2690 QEMU_THREAD_JOINABLE); 2691 } 2692 return 0; 2693 exit: 2694 compress_threads_load_cleanup(); 2695 return -1; 2696 } 2697 2698 static void decompress_data_with_multi_threads(QEMUFile *f, 2699 void *host, int len) 2700 { 2701 int idx, thread_count; 2702 2703 thread_count = migrate_decompress_threads(); 2704 qemu_mutex_lock(&decomp_done_lock); 2705 while (true) { 2706 for (idx = 0; idx < thread_count; idx++) { 2707 if (decomp_param[idx].done) { 2708 decomp_param[idx].done = false; 2709 qemu_mutex_lock(&decomp_param[idx].mutex); 2710 qemu_get_buffer(f, decomp_param[idx].compbuf, len); 2711 decomp_param[idx].des = host; 2712 decomp_param[idx].len = len; 2713 qemu_cond_signal(&decomp_param[idx].cond); 2714 qemu_mutex_unlock(&decomp_param[idx].mutex); 2715 break; 2716 } 2717 } 2718 if (idx < thread_count) { 2719 break; 2720 } else { 2721 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock); 2722 } 2723 } 2724 qemu_mutex_unlock(&decomp_done_lock); 2725 } 2726 2727 /** 2728 * ram_load_setup: Setup RAM for migration incoming side 2729 * 2730 * Returns zero to indicate success and negative for error 2731 * 2732 * @f: QEMUFile where to receive the data 2733 * @opaque: RAMState pointer 2734 */ 2735 static int ram_load_setup(QEMUFile *f, void *opaque) 2736 { 2737 if (compress_threads_load_setup(f)) { 2738 return -1; 2739 } 2740 2741 xbzrle_load_setup(); 2742 ramblock_recv_map_init(); 2743 return 0; 2744 } 2745 2746 static int ram_load_cleanup(void *opaque) 2747 { 2748 RAMBlock *rb; 2749 xbzrle_load_cleanup(); 2750 compress_threads_load_cleanup(); 2751 2752 RAMBLOCK_FOREACH(rb) { 2753 g_free(rb->receivedmap); 2754 rb->receivedmap = NULL; 2755 } 2756 return 0; 2757 } 2758 2759 /** 2760 * ram_postcopy_incoming_init: allocate postcopy data structures 2761 * 2762 * Returns 0 for success and negative if there was one error 2763 * 2764 * @mis: current migration incoming state 2765 * 2766 * Allocate data structures etc needed by incoming migration with 2767 * postcopy-ram. postcopy-ram's similarly names 2768 * postcopy_ram_incoming_init does the work. 2769 */ 2770 int ram_postcopy_incoming_init(MigrationIncomingState *mis) 2771 { 2772 unsigned long ram_pages = last_ram_page(); 2773 2774 return postcopy_ram_incoming_init(mis, ram_pages); 2775 } 2776 2777 /** 2778 * ram_load_postcopy: load a page in postcopy case 2779 * 2780 * Returns 0 for success or -errno in case of error 2781 * 2782 * Called in postcopy mode by ram_load(). 2783 * rcu_read_lock is taken prior to this being called. 2784 * 2785 * @f: QEMUFile where to send the data 2786 */ 2787 static int ram_load_postcopy(QEMUFile *f) 2788 { 2789 int flags = 0, ret = 0; 2790 bool place_needed = false; 2791 bool matching_page_sizes = false; 2792 MigrationIncomingState *mis = migration_incoming_get_current(); 2793 /* Temporary page that is later 'placed' */ 2794 void *postcopy_host_page = postcopy_get_tmp_page(mis); 2795 void *last_host = NULL; 2796 bool all_zero = false; 2797 2798 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { 2799 ram_addr_t addr; 2800 void *host = NULL; 2801 void *page_buffer = NULL; 2802 void *place_source = NULL; 2803 RAMBlock *block = NULL; 2804 uint8_t ch; 2805 2806 addr = qemu_get_be64(f); 2807 2808 /* 2809 * If qemu file error, we should stop here, and then "addr" 2810 * may be invalid 2811 */ 2812 ret = qemu_file_get_error(f); 2813 if (ret) { 2814 break; 2815 } 2816 2817 flags = addr & ~TARGET_PAGE_MASK; 2818 addr &= TARGET_PAGE_MASK; 2819 2820 trace_ram_load_postcopy_loop((uint64_t)addr, flags); 2821 place_needed = false; 2822 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) { 2823 block = ram_block_from_stream(f, flags); 2824 2825 host = host_from_ram_block_offset(block, addr); 2826 if (!host) { 2827 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); 2828 ret = -EINVAL; 2829 break; 2830 } 2831 matching_page_sizes = block->page_size == TARGET_PAGE_SIZE; 2832 /* 2833 * Postcopy requires that we place whole host pages atomically; 2834 * these may be huge pages for RAMBlocks that are backed by 2835 * hugetlbfs. 2836 * To make it atomic, the data is read into a temporary page 2837 * that's moved into place later. 2838 * The migration protocol uses, possibly smaller, target-pages 2839 * however the source ensures it always sends all the components 2840 * of a host page in order. 2841 */ 2842 page_buffer = postcopy_host_page + 2843 ((uintptr_t)host & (block->page_size - 1)); 2844 /* If all TP are zero then we can optimise the place */ 2845 if (!((uintptr_t)host & (block->page_size - 1))) { 2846 all_zero = true; 2847 } else { 2848 /* not the 1st TP within the HP */ 2849 if (host != (last_host + TARGET_PAGE_SIZE)) { 2850 error_report("Non-sequential target page %p/%p", 2851 host, last_host); 2852 ret = -EINVAL; 2853 break; 2854 } 2855 } 2856 2857 2858 /* 2859 * If it's the last part of a host page then we place the host 2860 * page 2861 */ 2862 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) & 2863 (block->page_size - 1)) == 0; 2864 place_source = postcopy_host_page; 2865 } 2866 last_host = host; 2867 2868 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { 2869 case RAM_SAVE_FLAG_ZERO: 2870 ch = qemu_get_byte(f); 2871 memset(page_buffer, ch, TARGET_PAGE_SIZE); 2872 if (ch) { 2873 all_zero = false; 2874 } 2875 break; 2876 2877 case RAM_SAVE_FLAG_PAGE: 2878 all_zero = false; 2879 if (!place_needed || !matching_page_sizes) { 2880 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE); 2881 } else { 2882 /* Avoids the qemu_file copy during postcopy, which is 2883 * going to do a copy later; can only do it when we 2884 * do this read in one go (matching page sizes) 2885 */ 2886 qemu_get_buffer_in_place(f, (uint8_t **)&place_source, 2887 TARGET_PAGE_SIZE); 2888 } 2889 break; 2890 case RAM_SAVE_FLAG_EOS: 2891 /* normal exit */ 2892 break; 2893 default: 2894 error_report("Unknown combination of migration flags: %#x" 2895 " (postcopy mode)", flags); 2896 ret = -EINVAL; 2897 break; 2898 } 2899 2900 /* Detect for any possible file errors */ 2901 if (!ret && qemu_file_get_error(f)) { 2902 ret = qemu_file_get_error(f); 2903 } 2904 2905 if (!ret && place_needed) { 2906 /* This gets called at the last target page in the host page */ 2907 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size; 2908 2909 if (all_zero) { 2910 ret = postcopy_place_page_zero(mis, place_dest, 2911 block); 2912 } else { 2913 ret = postcopy_place_page(mis, place_dest, 2914 place_source, block); 2915 } 2916 } 2917 } 2918 2919 return ret; 2920 } 2921 2922 static bool postcopy_is_advised(void) 2923 { 2924 PostcopyState ps = postcopy_state_get(); 2925 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END; 2926 } 2927 2928 static bool postcopy_is_running(void) 2929 { 2930 PostcopyState ps = postcopy_state_get(); 2931 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END; 2932 } 2933 2934 static int ram_load(QEMUFile *f, void *opaque, int version_id) 2935 { 2936 int flags = 0, ret = 0, invalid_flags = 0; 2937 static uint64_t seq_iter; 2938 int len = 0; 2939 /* 2940 * If system is running in postcopy mode, page inserts to host memory must 2941 * be atomic 2942 */ 2943 bool postcopy_running = postcopy_is_running(); 2944 /* ADVISE is earlier, it shows the source has the postcopy capability on */ 2945 bool postcopy_advised = postcopy_is_advised(); 2946 2947 seq_iter++; 2948 2949 if (version_id != 4) { 2950 ret = -EINVAL; 2951 } 2952 2953 if (!migrate_use_compression()) { 2954 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE; 2955 } 2956 /* This RCU critical section can be very long running. 2957 * When RCU reclaims in the code start to become numerous, 2958 * it will be necessary to reduce the granularity of this 2959 * critical section. 2960 */ 2961 rcu_read_lock(); 2962 2963 if (postcopy_running) { 2964 ret = ram_load_postcopy(f); 2965 } 2966 2967 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) { 2968 ram_addr_t addr, total_ram_bytes; 2969 void *host = NULL; 2970 uint8_t ch; 2971 2972 addr = qemu_get_be64(f); 2973 flags = addr & ~TARGET_PAGE_MASK; 2974 addr &= TARGET_PAGE_MASK; 2975 2976 if (flags & invalid_flags) { 2977 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) { 2978 error_report("Received an unexpected compressed page"); 2979 } 2980 2981 ret = -EINVAL; 2982 break; 2983 } 2984 2985 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE | 2986 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) { 2987 RAMBlock *block = ram_block_from_stream(f, flags); 2988 2989 host = host_from_ram_block_offset(block, addr); 2990 if (!host) { 2991 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); 2992 ret = -EINVAL; 2993 break; 2994 } 2995 ramblock_recv_bitmap_set(block, host); 2996 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host); 2997 } 2998 2999 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { 3000 case RAM_SAVE_FLAG_MEM_SIZE: 3001 /* Synchronize RAM block list */ 3002 total_ram_bytes = addr; 3003 while (!ret && total_ram_bytes) { 3004 RAMBlock *block; 3005 char id[256]; 3006 ram_addr_t length; 3007 3008 len = qemu_get_byte(f); 3009 qemu_get_buffer(f, (uint8_t *)id, len); 3010 id[len] = 0; 3011 length = qemu_get_be64(f); 3012 3013 block = qemu_ram_block_by_name(id); 3014 if (block) { 3015 if (length != block->used_length) { 3016 Error *local_err = NULL; 3017 3018 ret = qemu_ram_resize(block, length, 3019 &local_err); 3020 if (local_err) { 3021 error_report_err(local_err); 3022 } 3023 } 3024 /* For postcopy we need to check hugepage sizes match */ 3025 if (postcopy_advised && 3026 block->page_size != qemu_host_page_size) { 3027 uint64_t remote_page_size = qemu_get_be64(f); 3028 if (remote_page_size != block->page_size) { 3029 error_report("Mismatched RAM page size %s " 3030 "(local) %zd != %" PRId64, 3031 id, block->page_size, 3032 remote_page_size); 3033 ret = -EINVAL; 3034 } 3035 } 3036 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG, 3037 block->idstr); 3038 } else { 3039 error_report("Unknown ramblock \"%s\", cannot " 3040 "accept migration", id); 3041 ret = -EINVAL; 3042 } 3043 3044 total_ram_bytes -= length; 3045 } 3046 break; 3047 3048 case RAM_SAVE_FLAG_ZERO: 3049 ch = qemu_get_byte(f); 3050 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE); 3051 break; 3052 3053 case RAM_SAVE_FLAG_PAGE: 3054 qemu_get_buffer(f, host, TARGET_PAGE_SIZE); 3055 break; 3056 3057 case RAM_SAVE_FLAG_COMPRESS_PAGE: 3058 len = qemu_get_be32(f); 3059 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) { 3060 error_report("Invalid compressed data length: %d", len); 3061 ret = -EINVAL; 3062 break; 3063 } 3064 decompress_data_with_multi_threads(f, host, len); 3065 break; 3066 3067 case RAM_SAVE_FLAG_XBZRLE: 3068 if (load_xbzrle(f, addr, host) < 0) { 3069 error_report("Failed to decompress XBZRLE page at " 3070 RAM_ADDR_FMT, addr); 3071 ret = -EINVAL; 3072 break; 3073 } 3074 break; 3075 case RAM_SAVE_FLAG_EOS: 3076 /* normal exit */ 3077 break; 3078 default: 3079 if (flags & RAM_SAVE_FLAG_HOOK) { 3080 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL); 3081 } else { 3082 error_report("Unknown combination of migration flags: %#x", 3083 flags); 3084 ret = -EINVAL; 3085 } 3086 } 3087 if (!ret) { 3088 ret = qemu_file_get_error(f); 3089 } 3090 } 3091 3092 ret |= wait_for_decompress_done(); 3093 rcu_read_unlock(); 3094 trace_ram_load_complete(ret, seq_iter); 3095 return ret; 3096 } 3097 3098 static bool ram_has_postcopy(void *opaque) 3099 { 3100 return migrate_postcopy_ram(); 3101 } 3102 3103 static SaveVMHandlers savevm_ram_handlers = { 3104 .save_setup = ram_save_setup, 3105 .save_live_iterate = ram_save_iterate, 3106 .save_live_complete_postcopy = ram_save_complete, 3107 .save_live_complete_precopy = ram_save_complete, 3108 .has_postcopy = ram_has_postcopy, 3109 .save_live_pending = ram_save_pending, 3110 .load_state = ram_load, 3111 .save_cleanup = ram_save_cleanup, 3112 .load_setup = ram_load_setup, 3113 .load_cleanup = ram_load_cleanup, 3114 }; 3115 3116 void ram_mig_init(void) 3117 { 3118 qemu_mutex_init(&XBZRLE.lock); 3119 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state); 3120 } 3121