1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * Copyright (c) 2011-2015 Red Hat Inc 6 * 7 * Authors: 8 * Juan Quintela <quintela@redhat.com> 9 * 10 * Permission is hereby granted, free of charge, to any person obtaining a copy 11 * of this software and associated documentation files (the "Software"), to deal 12 * in the Software without restriction, including without limitation the rights 13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 * copies of the Software, and to permit persons to whom the Software is 15 * furnished to do so, subject to the following conditions: 16 * 17 * The above copyright notice and this permission notice shall be included in 18 * all copies or substantial portions of the Software. 19 * 20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 26 * THE SOFTWARE. 27 */ 28 #include <stdint.h> 29 #include <zlib.h> 30 #include "qemu/bitops.h" 31 #include "qemu/bitmap.h" 32 #include "qemu/timer.h" 33 #include "qemu/main-loop.h" 34 #include "migration/migration.h" 35 #include "exec/address-spaces.h" 36 #include "migration/page_cache.h" 37 #include "qemu/error-report.h" 38 #include "trace.h" 39 #include "exec/ram_addr.h" 40 #include "qemu/rcu_queue.h" 41 42 #ifdef DEBUG_MIGRATION_RAM 43 #define DPRINTF(fmt, ...) \ 44 do { fprintf(stdout, "migration_ram: " fmt, ## __VA_ARGS__); } while (0) 45 #else 46 #define DPRINTF(fmt, ...) \ 47 do { } while (0) 48 #endif 49 50 static int dirty_rate_high_cnt; 51 52 static uint64_t bitmap_sync_count; 53 54 /***********************************************************/ 55 /* ram save/restore */ 56 57 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */ 58 #define RAM_SAVE_FLAG_COMPRESS 0x02 59 #define RAM_SAVE_FLAG_MEM_SIZE 0x04 60 #define RAM_SAVE_FLAG_PAGE 0x08 61 #define RAM_SAVE_FLAG_EOS 0x10 62 #define RAM_SAVE_FLAG_CONTINUE 0x20 63 #define RAM_SAVE_FLAG_XBZRLE 0x40 64 /* 0x80 is reserved in migration.h start with 0x100 next */ 65 #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100 66 67 static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE]; 68 69 static inline bool is_zero_range(uint8_t *p, uint64_t size) 70 { 71 return buffer_find_nonzero_offset(p, size) == size; 72 } 73 74 /* struct contains XBZRLE cache and a static page 75 used by the compression */ 76 static struct { 77 /* buffer used for XBZRLE encoding */ 78 uint8_t *encoded_buf; 79 /* buffer for storing page content */ 80 uint8_t *current_buf; 81 /* Cache for XBZRLE, Protected by lock. */ 82 PageCache *cache; 83 QemuMutex lock; 84 } XBZRLE; 85 86 /* buffer used for XBZRLE decoding */ 87 static uint8_t *xbzrle_decoded_buf; 88 89 static void XBZRLE_cache_lock(void) 90 { 91 if (migrate_use_xbzrle()) 92 qemu_mutex_lock(&XBZRLE.lock); 93 } 94 95 static void XBZRLE_cache_unlock(void) 96 { 97 if (migrate_use_xbzrle()) 98 qemu_mutex_unlock(&XBZRLE.lock); 99 } 100 101 /* 102 * called from qmp_migrate_set_cache_size in main thread, possibly while 103 * a migration is in progress. 104 * A running migration maybe using the cache and might finish during this 105 * call, hence changes to the cache are protected by XBZRLE.lock(). 106 */ 107 int64_t xbzrle_cache_resize(int64_t new_size) 108 { 109 PageCache *new_cache; 110 int64_t ret; 111 112 if (new_size < TARGET_PAGE_SIZE) { 113 return -1; 114 } 115 116 XBZRLE_cache_lock(); 117 118 if (XBZRLE.cache != NULL) { 119 if (pow2floor(new_size) == migrate_xbzrle_cache_size()) { 120 goto out_new_size; 121 } 122 new_cache = cache_init(new_size / TARGET_PAGE_SIZE, 123 TARGET_PAGE_SIZE); 124 if (!new_cache) { 125 error_report("Error creating cache"); 126 ret = -1; 127 goto out; 128 } 129 130 cache_fini(XBZRLE.cache); 131 XBZRLE.cache = new_cache; 132 } 133 134 out_new_size: 135 ret = pow2floor(new_size); 136 out: 137 XBZRLE_cache_unlock(); 138 return ret; 139 } 140 141 /* accounting for migration statistics */ 142 typedef struct AccountingInfo { 143 uint64_t dup_pages; 144 uint64_t skipped_pages; 145 uint64_t norm_pages; 146 uint64_t iterations; 147 uint64_t xbzrle_bytes; 148 uint64_t xbzrle_pages; 149 uint64_t xbzrle_cache_miss; 150 double xbzrle_cache_miss_rate; 151 uint64_t xbzrle_overflows; 152 } AccountingInfo; 153 154 static AccountingInfo acct_info; 155 156 static void acct_clear(void) 157 { 158 memset(&acct_info, 0, sizeof(acct_info)); 159 } 160 161 uint64_t dup_mig_bytes_transferred(void) 162 { 163 return acct_info.dup_pages * TARGET_PAGE_SIZE; 164 } 165 166 uint64_t dup_mig_pages_transferred(void) 167 { 168 return acct_info.dup_pages; 169 } 170 171 uint64_t skipped_mig_bytes_transferred(void) 172 { 173 return acct_info.skipped_pages * TARGET_PAGE_SIZE; 174 } 175 176 uint64_t skipped_mig_pages_transferred(void) 177 { 178 return acct_info.skipped_pages; 179 } 180 181 uint64_t norm_mig_bytes_transferred(void) 182 { 183 return acct_info.norm_pages * TARGET_PAGE_SIZE; 184 } 185 186 uint64_t norm_mig_pages_transferred(void) 187 { 188 return acct_info.norm_pages; 189 } 190 191 uint64_t xbzrle_mig_bytes_transferred(void) 192 { 193 return acct_info.xbzrle_bytes; 194 } 195 196 uint64_t xbzrle_mig_pages_transferred(void) 197 { 198 return acct_info.xbzrle_pages; 199 } 200 201 uint64_t xbzrle_mig_pages_cache_miss(void) 202 { 203 return acct_info.xbzrle_cache_miss; 204 } 205 206 double xbzrle_mig_cache_miss_rate(void) 207 { 208 return acct_info.xbzrle_cache_miss_rate; 209 } 210 211 uint64_t xbzrle_mig_pages_overflow(void) 212 { 213 return acct_info.xbzrle_overflows; 214 } 215 216 /* This is the last block that we have visited serching for dirty pages 217 */ 218 static RAMBlock *last_seen_block; 219 /* This is the last block from where we have sent data */ 220 static RAMBlock *last_sent_block; 221 static ram_addr_t last_offset; 222 static QemuMutex migration_bitmap_mutex; 223 static uint64_t migration_dirty_pages; 224 static uint32_t last_version; 225 static bool ram_bulk_stage; 226 227 /* used by the search for pages to send */ 228 struct PageSearchStatus { 229 /* Current block being searched */ 230 RAMBlock *block; 231 /* Current offset to search from */ 232 ram_addr_t offset; 233 /* Set once we wrap around */ 234 bool complete_round; 235 }; 236 typedef struct PageSearchStatus PageSearchStatus; 237 238 static struct BitmapRcu { 239 struct rcu_head rcu; 240 unsigned long *bmap; 241 } *migration_bitmap_rcu; 242 243 struct CompressParam { 244 bool start; 245 bool done; 246 QEMUFile *file; 247 QemuMutex mutex; 248 QemuCond cond; 249 RAMBlock *block; 250 ram_addr_t offset; 251 }; 252 typedef struct CompressParam CompressParam; 253 254 struct DecompressParam { 255 bool start; 256 QemuMutex mutex; 257 QemuCond cond; 258 void *des; 259 uint8 *compbuf; 260 int len; 261 }; 262 typedef struct DecompressParam DecompressParam; 263 264 static CompressParam *comp_param; 265 static QemuThread *compress_threads; 266 /* comp_done_cond is used to wake up the migration thread when 267 * one of the compression threads has finished the compression. 268 * comp_done_lock is used to co-work with comp_done_cond. 269 */ 270 static QemuMutex *comp_done_lock; 271 static QemuCond *comp_done_cond; 272 /* The empty QEMUFileOps will be used by file in CompressParam */ 273 static const QEMUFileOps empty_ops = { }; 274 275 static bool compression_switch; 276 static bool quit_comp_thread; 277 static bool quit_decomp_thread; 278 static DecompressParam *decomp_param; 279 static QemuThread *decompress_threads; 280 static uint8_t *compressed_data_buf; 281 282 static int do_compress_ram_page(CompressParam *param); 283 284 static void *do_data_compress(void *opaque) 285 { 286 CompressParam *param = opaque; 287 288 while (!quit_comp_thread) { 289 qemu_mutex_lock(¶m->mutex); 290 /* Re-check the quit_comp_thread in case of 291 * terminate_compression_threads is called just before 292 * qemu_mutex_lock(¶m->mutex) and after 293 * while(!quit_comp_thread), re-check it here can make 294 * sure the compression thread terminate as expected. 295 */ 296 while (!param->start && !quit_comp_thread) { 297 qemu_cond_wait(¶m->cond, ¶m->mutex); 298 } 299 if (!quit_comp_thread) { 300 do_compress_ram_page(param); 301 } 302 param->start = false; 303 qemu_mutex_unlock(¶m->mutex); 304 305 qemu_mutex_lock(comp_done_lock); 306 param->done = true; 307 qemu_cond_signal(comp_done_cond); 308 qemu_mutex_unlock(comp_done_lock); 309 } 310 311 return NULL; 312 } 313 314 static inline void terminate_compression_threads(void) 315 { 316 int idx, thread_count; 317 318 thread_count = migrate_compress_threads(); 319 quit_comp_thread = true; 320 for (idx = 0; idx < thread_count; idx++) { 321 qemu_mutex_lock(&comp_param[idx].mutex); 322 qemu_cond_signal(&comp_param[idx].cond); 323 qemu_mutex_unlock(&comp_param[idx].mutex); 324 } 325 } 326 327 void migrate_compress_threads_join(void) 328 { 329 int i, thread_count; 330 331 if (!migrate_use_compression()) { 332 return; 333 } 334 terminate_compression_threads(); 335 thread_count = migrate_compress_threads(); 336 for (i = 0; i < thread_count; i++) { 337 qemu_thread_join(compress_threads + i); 338 qemu_fclose(comp_param[i].file); 339 qemu_mutex_destroy(&comp_param[i].mutex); 340 qemu_cond_destroy(&comp_param[i].cond); 341 } 342 qemu_mutex_destroy(comp_done_lock); 343 qemu_cond_destroy(comp_done_cond); 344 g_free(compress_threads); 345 g_free(comp_param); 346 g_free(comp_done_cond); 347 g_free(comp_done_lock); 348 compress_threads = NULL; 349 comp_param = NULL; 350 comp_done_cond = NULL; 351 comp_done_lock = NULL; 352 } 353 354 void migrate_compress_threads_create(void) 355 { 356 int i, thread_count; 357 358 if (!migrate_use_compression()) { 359 return; 360 } 361 quit_comp_thread = false; 362 compression_switch = true; 363 thread_count = migrate_compress_threads(); 364 compress_threads = g_new0(QemuThread, thread_count); 365 comp_param = g_new0(CompressParam, thread_count); 366 comp_done_cond = g_new0(QemuCond, 1); 367 comp_done_lock = g_new0(QemuMutex, 1); 368 qemu_cond_init(comp_done_cond); 369 qemu_mutex_init(comp_done_lock); 370 for (i = 0; i < thread_count; i++) { 371 /* com_param[i].file is just used as a dummy buffer to save data, set 372 * it's ops to empty. 373 */ 374 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops); 375 comp_param[i].done = true; 376 qemu_mutex_init(&comp_param[i].mutex); 377 qemu_cond_init(&comp_param[i].cond); 378 qemu_thread_create(compress_threads + i, "compress", 379 do_data_compress, comp_param + i, 380 QEMU_THREAD_JOINABLE); 381 } 382 } 383 384 /** 385 * save_page_header: Write page header to wire 386 * 387 * If this is the 1st block, it also writes the block identification 388 * 389 * Returns: Number of bytes written 390 * 391 * @f: QEMUFile where to send the data 392 * @block: block that contains the page we want to send 393 * @offset: offset inside the block for the page 394 * in the lower bits, it contains flags 395 */ 396 static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset) 397 { 398 size_t size, len; 399 400 qemu_put_be64(f, offset); 401 size = 8; 402 403 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) { 404 len = strlen(block->idstr); 405 qemu_put_byte(f, len); 406 qemu_put_buffer(f, (uint8_t *)block->idstr, len); 407 size += 1 + len; 408 } 409 return size; 410 } 411 412 /* Reduce amount of guest cpu execution to hopefully slow down memory writes. 413 * If guest dirty memory rate is reduced below the rate at which we can 414 * transfer pages to the destination then we should be able to complete 415 * migration. Some workloads dirty memory way too fast and will not effectively 416 * converge, even with auto-converge. 417 */ 418 static void mig_throttle_guest_down(void) 419 { 420 MigrationState *s = migrate_get_current(); 421 uint64_t pct_initial = 422 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL]; 423 uint64_t pct_icrement = 424 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT]; 425 426 /* We have not started throttling yet. Let's start it. */ 427 if (!cpu_throttle_active()) { 428 cpu_throttle_set(pct_initial); 429 } else { 430 /* Throttling already on, just increase the rate */ 431 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement); 432 } 433 } 434 435 /* Update the xbzrle cache to reflect a page that's been sent as all 0. 436 * The important thing is that a stale (not-yet-0'd) page be replaced 437 * by the new data. 438 * As a bonus, if the page wasn't in the cache it gets added so that 439 * when a small write is made into the 0'd page it gets XBZRLE sent 440 */ 441 static void xbzrle_cache_zero_page(ram_addr_t current_addr) 442 { 443 if (ram_bulk_stage || !migrate_use_xbzrle()) { 444 return; 445 } 446 447 /* We don't care if this fails to allocate a new cache page 448 * as long as it updated an old one */ 449 cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE, 450 bitmap_sync_count); 451 } 452 453 #define ENCODING_FLAG_XBZRLE 0x1 454 455 /** 456 * save_xbzrle_page: compress and send current page 457 * 458 * Returns: 1 means that we wrote the page 459 * 0 means that page is identical to the one already sent 460 * -1 means that xbzrle would be longer than normal 461 * 462 * @f: QEMUFile where to send the data 463 * @current_data: 464 * @current_addr: 465 * @block: block that contains the page we want to send 466 * @offset: offset inside the block for the page 467 * @last_stage: if we are at the completion stage 468 * @bytes_transferred: increase it with the number of transferred bytes 469 */ 470 static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, 471 ram_addr_t current_addr, RAMBlock *block, 472 ram_addr_t offset, bool last_stage, 473 uint64_t *bytes_transferred) 474 { 475 int encoded_len = 0, bytes_xbzrle; 476 uint8_t *prev_cached_page; 477 478 if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) { 479 acct_info.xbzrle_cache_miss++; 480 if (!last_stage) { 481 if (cache_insert(XBZRLE.cache, current_addr, *current_data, 482 bitmap_sync_count) == -1) { 483 return -1; 484 } else { 485 /* update *current_data when the page has been 486 inserted into cache */ 487 *current_data = get_cached_data(XBZRLE.cache, current_addr); 488 } 489 } 490 return -1; 491 } 492 493 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); 494 495 /* save current buffer into memory */ 496 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE); 497 498 /* XBZRLE encoding (if there is no overflow) */ 499 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, 500 TARGET_PAGE_SIZE, XBZRLE.encoded_buf, 501 TARGET_PAGE_SIZE); 502 if (encoded_len == 0) { 503 DPRINTF("Skipping unmodified page\n"); 504 return 0; 505 } else if (encoded_len == -1) { 506 DPRINTF("Overflow\n"); 507 acct_info.xbzrle_overflows++; 508 /* update data in the cache */ 509 if (!last_stage) { 510 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE); 511 *current_data = prev_cached_page; 512 } 513 return -1; 514 } 515 516 /* we need to update the data in the cache, in order to get the same data */ 517 if (!last_stage) { 518 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); 519 } 520 521 /* Send XBZRLE based compressed page */ 522 bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE); 523 qemu_put_byte(f, ENCODING_FLAG_XBZRLE); 524 qemu_put_be16(f, encoded_len); 525 qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); 526 bytes_xbzrle += encoded_len + 1 + 2; 527 acct_info.xbzrle_pages++; 528 acct_info.xbzrle_bytes += bytes_xbzrle; 529 *bytes_transferred += bytes_xbzrle; 530 531 return 1; 532 } 533 534 /* Called with rcu_read_lock() to protect migration_bitmap */ 535 static inline 536 ram_addr_t migration_bitmap_find_and_reset_dirty(RAMBlock *rb, 537 ram_addr_t start) 538 { 539 unsigned long base = rb->offset >> TARGET_PAGE_BITS; 540 unsigned long nr = base + (start >> TARGET_PAGE_BITS); 541 uint64_t rb_size = rb->used_length; 542 unsigned long size = base + (rb_size >> TARGET_PAGE_BITS); 543 unsigned long *bitmap; 544 545 unsigned long next; 546 547 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; 548 if (ram_bulk_stage && nr > base) { 549 next = nr + 1; 550 } else { 551 next = find_next_bit(bitmap, size, nr); 552 } 553 554 if (next < size) { 555 clear_bit(next, bitmap); 556 migration_dirty_pages--; 557 } 558 return (next - base) << TARGET_PAGE_BITS; 559 } 560 561 /* Called with rcu_read_lock() to protect migration_bitmap */ 562 static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length) 563 { 564 unsigned long *bitmap; 565 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; 566 migration_dirty_pages += 567 cpu_physical_memory_sync_dirty_bitmap(bitmap, start, length); 568 } 569 570 /* Fix me: there are too many global variables used in migration process. */ 571 static int64_t start_time; 572 static int64_t bytes_xfer_prev; 573 static int64_t num_dirty_pages_period; 574 static uint64_t xbzrle_cache_miss_prev; 575 static uint64_t iterations_prev; 576 577 static void migration_bitmap_sync_init(void) 578 { 579 start_time = 0; 580 bytes_xfer_prev = 0; 581 num_dirty_pages_period = 0; 582 xbzrle_cache_miss_prev = 0; 583 iterations_prev = 0; 584 } 585 586 /* Called with iothread lock held, to protect ram_list.dirty_memory[] */ 587 static void migration_bitmap_sync(void) 588 { 589 RAMBlock *block; 590 uint64_t num_dirty_pages_init = migration_dirty_pages; 591 MigrationState *s = migrate_get_current(); 592 int64_t end_time; 593 int64_t bytes_xfer_now; 594 595 bitmap_sync_count++; 596 597 if (!bytes_xfer_prev) { 598 bytes_xfer_prev = ram_bytes_transferred(); 599 } 600 601 if (!start_time) { 602 start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 603 } 604 605 trace_migration_bitmap_sync_start(); 606 address_space_sync_dirty_bitmap(&address_space_memory); 607 608 qemu_mutex_lock(&migration_bitmap_mutex); 609 rcu_read_lock(); 610 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { 611 migration_bitmap_sync_range(block->offset, block->used_length); 612 } 613 rcu_read_unlock(); 614 qemu_mutex_unlock(&migration_bitmap_mutex); 615 616 trace_migration_bitmap_sync_end(migration_dirty_pages 617 - num_dirty_pages_init); 618 num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init; 619 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 620 621 /* more than 1 second = 1000 millisecons */ 622 if (end_time > start_time + 1000) { 623 if (migrate_auto_converge()) { 624 /* The following detection logic can be refined later. For now: 625 Check to see if the dirtied bytes is 50% more than the approx. 626 amount of bytes that just got transferred since the last time we 627 were in this routine. If that happens twice, start or increase 628 throttling */ 629 bytes_xfer_now = ram_bytes_transferred(); 630 631 if (s->dirty_pages_rate && 632 (num_dirty_pages_period * TARGET_PAGE_SIZE > 633 (bytes_xfer_now - bytes_xfer_prev)/2) && 634 (dirty_rate_high_cnt++ >= 2)) { 635 trace_migration_throttle(); 636 dirty_rate_high_cnt = 0; 637 mig_throttle_guest_down(); 638 } 639 bytes_xfer_prev = bytes_xfer_now; 640 } 641 642 if (migrate_use_xbzrle()) { 643 if (iterations_prev != acct_info.iterations) { 644 acct_info.xbzrle_cache_miss_rate = 645 (double)(acct_info.xbzrle_cache_miss - 646 xbzrle_cache_miss_prev) / 647 (acct_info.iterations - iterations_prev); 648 } 649 iterations_prev = acct_info.iterations; 650 xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss; 651 } 652 s->dirty_pages_rate = num_dirty_pages_period * 1000 653 / (end_time - start_time); 654 s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE; 655 start_time = end_time; 656 num_dirty_pages_period = 0; 657 } 658 s->dirty_sync_count = bitmap_sync_count; 659 } 660 661 /** 662 * save_zero_page: Send the zero page to the stream 663 * 664 * Returns: Number of pages written. 665 * 666 * @f: QEMUFile where to send the data 667 * @block: block that contains the page we want to send 668 * @offset: offset inside the block for the page 669 * @p: pointer to the page 670 * @bytes_transferred: increase it with the number of transferred bytes 671 */ 672 static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset, 673 uint8_t *p, uint64_t *bytes_transferred) 674 { 675 int pages = -1; 676 677 if (is_zero_range(p, TARGET_PAGE_SIZE)) { 678 acct_info.dup_pages++; 679 *bytes_transferred += save_page_header(f, block, 680 offset | RAM_SAVE_FLAG_COMPRESS); 681 qemu_put_byte(f, 0); 682 *bytes_transferred += 1; 683 pages = 1; 684 } 685 686 return pages; 687 } 688 689 /** 690 * ram_save_page: Send the given page to the stream 691 * 692 * Returns: Number of pages written. 693 * 694 * @f: QEMUFile where to send the data 695 * @block: block that contains the page we want to send 696 * @offset: offset inside the block for the page 697 * @last_stage: if we are at the completion stage 698 * @bytes_transferred: increase it with the number of transferred bytes 699 */ 700 static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset, 701 bool last_stage, uint64_t *bytes_transferred) 702 { 703 int pages = -1; 704 uint64_t bytes_xmit; 705 ram_addr_t current_addr; 706 uint8_t *p; 707 int ret; 708 bool send_async = true; 709 710 p = block->host + offset; 711 712 /* In doubt sent page as normal */ 713 bytes_xmit = 0; 714 ret = ram_control_save_page(f, block->offset, 715 offset, TARGET_PAGE_SIZE, &bytes_xmit); 716 if (bytes_xmit) { 717 *bytes_transferred += bytes_xmit; 718 pages = 1; 719 } 720 721 XBZRLE_cache_lock(); 722 723 current_addr = block->offset + offset; 724 725 if (block == last_sent_block) { 726 offset |= RAM_SAVE_FLAG_CONTINUE; 727 } 728 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { 729 if (ret != RAM_SAVE_CONTROL_DELAYED) { 730 if (bytes_xmit > 0) { 731 acct_info.norm_pages++; 732 } else if (bytes_xmit == 0) { 733 acct_info.dup_pages++; 734 } 735 } 736 } else { 737 pages = save_zero_page(f, block, offset, p, bytes_transferred); 738 if (pages > 0) { 739 /* Must let xbzrle know, otherwise a previous (now 0'd) cached 740 * page would be stale 741 */ 742 xbzrle_cache_zero_page(current_addr); 743 } else if (!ram_bulk_stage && migrate_use_xbzrle()) { 744 pages = save_xbzrle_page(f, &p, current_addr, block, 745 offset, last_stage, bytes_transferred); 746 if (!last_stage) { 747 /* Can't send this cached data async, since the cache page 748 * might get updated before it gets to the wire 749 */ 750 send_async = false; 751 } 752 } 753 } 754 755 /* XBZRLE overflow or normal page */ 756 if (pages == -1) { 757 *bytes_transferred += save_page_header(f, block, 758 offset | RAM_SAVE_FLAG_PAGE); 759 if (send_async) { 760 qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE); 761 } else { 762 qemu_put_buffer(f, p, TARGET_PAGE_SIZE); 763 } 764 *bytes_transferred += TARGET_PAGE_SIZE; 765 pages = 1; 766 acct_info.norm_pages++; 767 } 768 769 XBZRLE_cache_unlock(); 770 771 return pages; 772 } 773 774 static int do_compress_ram_page(CompressParam *param) 775 { 776 int bytes_sent, blen; 777 uint8_t *p; 778 RAMBlock *block = param->block; 779 ram_addr_t offset = param->offset; 780 781 p = block->host + (offset & TARGET_PAGE_MASK); 782 783 bytes_sent = save_page_header(param->file, block, offset | 784 RAM_SAVE_FLAG_COMPRESS_PAGE); 785 blen = qemu_put_compression_data(param->file, p, TARGET_PAGE_SIZE, 786 migrate_compress_level()); 787 bytes_sent += blen; 788 789 return bytes_sent; 790 } 791 792 static inline void start_compression(CompressParam *param) 793 { 794 param->done = false; 795 qemu_mutex_lock(¶m->mutex); 796 param->start = true; 797 qemu_cond_signal(¶m->cond); 798 qemu_mutex_unlock(¶m->mutex); 799 } 800 801 static inline void start_decompression(DecompressParam *param) 802 { 803 qemu_mutex_lock(¶m->mutex); 804 param->start = true; 805 qemu_cond_signal(¶m->cond); 806 qemu_mutex_unlock(¶m->mutex); 807 } 808 809 static uint64_t bytes_transferred; 810 811 static void flush_compressed_data(QEMUFile *f) 812 { 813 int idx, len, thread_count; 814 815 if (!migrate_use_compression()) { 816 return; 817 } 818 thread_count = migrate_compress_threads(); 819 for (idx = 0; idx < thread_count; idx++) { 820 if (!comp_param[idx].done) { 821 qemu_mutex_lock(comp_done_lock); 822 while (!comp_param[idx].done && !quit_comp_thread) { 823 qemu_cond_wait(comp_done_cond, comp_done_lock); 824 } 825 qemu_mutex_unlock(comp_done_lock); 826 } 827 if (!quit_comp_thread) { 828 len = qemu_put_qemu_file(f, comp_param[idx].file); 829 bytes_transferred += len; 830 } 831 } 832 } 833 834 static inline void set_compress_params(CompressParam *param, RAMBlock *block, 835 ram_addr_t offset) 836 { 837 param->block = block; 838 param->offset = offset; 839 } 840 841 static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block, 842 ram_addr_t offset, 843 uint64_t *bytes_transferred) 844 { 845 int idx, thread_count, bytes_xmit = -1, pages = -1; 846 847 thread_count = migrate_compress_threads(); 848 qemu_mutex_lock(comp_done_lock); 849 while (true) { 850 for (idx = 0; idx < thread_count; idx++) { 851 if (comp_param[idx].done) { 852 bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file); 853 set_compress_params(&comp_param[idx], block, offset); 854 start_compression(&comp_param[idx]); 855 pages = 1; 856 acct_info.norm_pages++; 857 *bytes_transferred += bytes_xmit; 858 break; 859 } 860 } 861 if (pages > 0) { 862 break; 863 } else { 864 qemu_cond_wait(comp_done_cond, comp_done_lock); 865 } 866 } 867 qemu_mutex_unlock(comp_done_lock); 868 869 return pages; 870 } 871 872 /** 873 * ram_save_compressed_page: compress the given page and send it to the stream 874 * 875 * Returns: Number of pages written. 876 * 877 * @f: QEMUFile where to send the data 878 * @block: block that contains the page we want to send 879 * @offset: offset inside the block for the page 880 * @last_stage: if we are at the completion stage 881 * @bytes_transferred: increase it with the number of transferred bytes 882 */ 883 static int ram_save_compressed_page(QEMUFile *f, RAMBlock *block, 884 ram_addr_t offset, bool last_stage, 885 uint64_t *bytes_transferred) 886 { 887 int pages = -1; 888 uint64_t bytes_xmit; 889 uint8_t *p; 890 int ret; 891 892 p = block->host + offset; 893 894 bytes_xmit = 0; 895 ret = ram_control_save_page(f, block->offset, 896 offset, TARGET_PAGE_SIZE, &bytes_xmit); 897 if (bytes_xmit) { 898 *bytes_transferred += bytes_xmit; 899 pages = 1; 900 } 901 if (block == last_sent_block) { 902 offset |= RAM_SAVE_FLAG_CONTINUE; 903 } 904 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { 905 if (ret != RAM_SAVE_CONTROL_DELAYED) { 906 if (bytes_xmit > 0) { 907 acct_info.norm_pages++; 908 } else if (bytes_xmit == 0) { 909 acct_info.dup_pages++; 910 } 911 } 912 } else { 913 /* When starting the process of a new block, the first page of 914 * the block should be sent out before other pages in the same 915 * block, and all the pages in last block should have been sent 916 * out, keeping this order is important, because the 'cont' flag 917 * is used to avoid resending the block name. 918 */ 919 if (block != last_sent_block) { 920 flush_compressed_data(f); 921 pages = save_zero_page(f, block, offset, p, bytes_transferred); 922 if (pages == -1) { 923 set_compress_params(&comp_param[0], block, offset); 924 /* Use the qemu thread to compress the data to make sure the 925 * first page is sent out before other pages 926 */ 927 bytes_xmit = do_compress_ram_page(&comp_param[0]); 928 acct_info.norm_pages++; 929 qemu_put_qemu_file(f, comp_param[0].file); 930 *bytes_transferred += bytes_xmit; 931 pages = 1; 932 } 933 } else { 934 pages = save_zero_page(f, block, offset, p, bytes_transferred); 935 if (pages == -1) { 936 pages = compress_page_with_multi_thread(f, block, offset, 937 bytes_transferred); 938 } 939 } 940 } 941 942 return pages; 943 } 944 945 /* 946 * Find the next dirty page and update any state associated with 947 * the search process. 948 * 949 * Returns: True if a page is found 950 * 951 * @f: Current migration stream. 952 * @pss: Data about the state of the current dirty page scan. 953 * @*again: Set to false if the search has scanned the whole of RAM 954 */ 955 static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss, 956 bool *again) 957 { 958 pss->offset = migration_bitmap_find_and_reset_dirty(pss->block, 959 pss->offset); 960 if (pss->complete_round && pss->block == last_seen_block && 961 pss->offset >= last_offset) { 962 /* 963 * We've been once around the RAM and haven't found anything. 964 * Give up. 965 */ 966 *again = false; 967 return false; 968 } 969 if (pss->offset >= pss->block->used_length) { 970 /* Didn't find anything in this RAM Block */ 971 pss->offset = 0; 972 pss->block = QLIST_NEXT_RCU(pss->block, next); 973 if (!pss->block) { 974 /* Hit the end of the list */ 975 pss->block = QLIST_FIRST_RCU(&ram_list.blocks); 976 /* Flag that we've looped */ 977 pss->complete_round = true; 978 ram_bulk_stage = false; 979 if (migrate_use_xbzrle()) { 980 /* If xbzrle is on, stop using the data compression at this 981 * point. In theory, xbzrle can do better than compression. 982 */ 983 flush_compressed_data(f); 984 compression_switch = false; 985 } 986 } 987 /* Didn't find anything this time, but try again on the new block */ 988 *again = true; 989 return false; 990 } else { 991 /* Can go around again, but... */ 992 *again = true; 993 /* We've found something so probably don't need to */ 994 return true; 995 } 996 } 997 998 /** 999 * ram_find_and_save_block: Finds a dirty page and sends it to f 1000 * 1001 * Called within an RCU critical section. 1002 * 1003 * Returns: The number of pages written 1004 * 0 means no dirty pages 1005 * 1006 * @f: QEMUFile where to send the data 1007 * @last_stage: if we are at the completion stage 1008 * @bytes_transferred: increase it with the number of transferred bytes 1009 */ 1010 1011 static int ram_find_and_save_block(QEMUFile *f, bool last_stage, 1012 uint64_t *bytes_transferred) 1013 { 1014 PageSearchStatus pss; 1015 int pages = 0; 1016 bool again, found; 1017 1018 pss.block = last_seen_block; 1019 pss.offset = last_offset; 1020 pss.complete_round = false; 1021 1022 if (!pss.block) { 1023 pss.block = QLIST_FIRST_RCU(&ram_list.blocks); 1024 } 1025 1026 do { 1027 found = find_dirty_block(f, &pss, &again); 1028 1029 if (found) { 1030 if (compression_switch && migrate_use_compression()) { 1031 pages = ram_save_compressed_page(f, pss.block, pss.offset, 1032 last_stage, 1033 bytes_transferred); 1034 } else { 1035 pages = ram_save_page(f, pss.block, pss.offset, last_stage, 1036 bytes_transferred); 1037 } 1038 1039 /* if page is unmodified, continue to the next */ 1040 if (pages > 0) { 1041 last_sent_block = pss.block; 1042 } 1043 } 1044 } while (!pages && again); 1045 1046 last_seen_block = pss.block; 1047 last_offset = pss.offset; 1048 1049 return pages; 1050 } 1051 1052 void acct_update_position(QEMUFile *f, size_t size, bool zero) 1053 { 1054 uint64_t pages = size / TARGET_PAGE_SIZE; 1055 if (zero) { 1056 acct_info.dup_pages += pages; 1057 } else { 1058 acct_info.norm_pages += pages; 1059 bytes_transferred += size; 1060 qemu_update_position(f, size); 1061 } 1062 } 1063 1064 static ram_addr_t ram_save_remaining(void) 1065 { 1066 return migration_dirty_pages; 1067 } 1068 1069 uint64_t ram_bytes_remaining(void) 1070 { 1071 return ram_save_remaining() * TARGET_PAGE_SIZE; 1072 } 1073 1074 uint64_t ram_bytes_transferred(void) 1075 { 1076 return bytes_transferred; 1077 } 1078 1079 uint64_t ram_bytes_total(void) 1080 { 1081 RAMBlock *block; 1082 uint64_t total = 0; 1083 1084 rcu_read_lock(); 1085 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) 1086 total += block->used_length; 1087 rcu_read_unlock(); 1088 return total; 1089 } 1090 1091 void free_xbzrle_decoded_buf(void) 1092 { 1093 g_free(xbzrle_decoded_buf); 1094 xbzrle_decoded_buf = NULL; 1095 } 1096 1097 static void migration_bitmap_free(struct BitmapRcu *bmap) 1098 { 1099 g_free(bmap->bmap); 1100 g_free(bmap); 1101 } 1102 1103 static void ram_migration_cleanup(void *opaque) 1104 { 1105 /* caller have hold iothread lock or is in a bh, so there is 1106 * no writing race against this migration_bitmap 1107 */ 1108 struct BitmapRcu *bitmap = migration_bitmap_rcu; 1109 atomic_rcu_set(&migration_bitmap_rcu, NULL); 1110 if (bitmap) { 1111 memory_global_dirty_log_stop(); 1112 call_rcu(bitmap, migration_bitmap_free, rcu); 1113 } 1114 1115 XBZRLE_cache_lock(); 1116 if (XBZRLE.cache) { 1117 cache_fini(XBZRLE.cache); 1118 g_free(XBZRLE.encoded_buf); 1119 g_free(XBZRLE.current_buf); 1120 XBZRLE.cache = NULL; 1121 XBZRLE.encoded_buf = NULL; 1122 XBZRLE.current_buf = NULL; 1123 } 1124 XBZRLE_cache_unlock(); 1125 } 1126 1127 static void reset_ram_globals(void) 1128 { 1129 last_seen_block = NULL; 1130 last_sent_block = NULL; 1131 last_offset = 0; 1132 last_version = ram_list.version; 1133 ram_bulk_stage = true; 1134 } 1135 1136 #define MAX_WAIT 50 /* ms, half buffered_file limit */ 1137 1138 void migration_bitmap_extend(ram_addr_t old, ram_addr_t new) 1139 { 1140 /* called in qemu main thread, so there is 1141 * no writing race against this migration_bitmap 1142 */ 1143 if (migration_bitmap_rcu) { 1144 struct BitmapRcu *old_bitmap = migration_bitmap_rcu, *bitmap; 1145 bitmap = g_new(struct BitmapRcu, 1); 1146 bitmap->bmap = bitmap_new(new); 1147 1148 /* prevent migration_bitmap content from being set bit 1149 * by migration_bitmap_sync_range() at the same time. 1150 * it is safe to migration if migration_bitmap is cleared bit 1151 * at the same time. 1152 */ 1153 qemu_mutex_lock(&migration_bitmap_mutex); 1154 bitmap_copy(bitmap->bmap, old_bitmap->bmap, old); 1155 bitmap_set(bitmap->bmap, old, new - old); 1156 atomic_rcu_set(&migration_bitmap_rcu, bitmap); 1157 qemu_mutex_unlock(&migration_bitmap_mutex); 1158 migration_dirty_pages += new - old; 1159 call_rcu(old_bitmap, migration_bitmap_free, rcu); 1160 } 1161 } 1162 1163 /* 1164 * 'expected' is the value you expect the bitmap mostly to be full 1165 * of; it won't bother printing lines that are all this value. 1166 * If 'todump' is null the migration bitmap is dumped. 1167 */ 1168 void ram_debug_dump_bitmap(unsigned long *todump, bool expected) 1169 { 1170 int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS; 1171 1172 int64_t cur; 1173 int64_t linelen = 128; 1174 char linebuf[129]; 1175 1176 if (!todump) { 1177 todump = atomic_rcu_read(&migration_bitmap_rcu)->bmap; 1178 } 1179 1180 for (cur = 0; cur < ram_pages; cur += linelen) { 1181 int64_t curb; 1182 bool found = false; 1183 /* 1184 * Last line; catch the case where the line length 1185 * is longer than remaining ram 1186 */ 1187 if (cur + linelen > ram_pages) { 1188 linelen = ram_pages - cur; 1189 } 1190 for (curb = 0; curb < linelen; curb++) { 1191 bool thisbit = test_bit(cur + curb, todump); 1192 linebuf[curb] = thisbit ? '1' : '.'; 1193 found = found || (thisbit != expected); 1194 } 1195 if (found) { 1196 linebuf[curb] = '\0'; 1197 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf); 1198 } 1199 } 1200 } 1201 1202 /* Each of ram_save_setup, ram_save_iterate and ram_save_complete has 1203 * long-running RCU critical section. When rcu-reclaims in the code 1204 * start to become numerous it will be necessary to reduce the 1205 * granularity of these critical sections. 1206 */ 1207 1208 static int ram_save_setup(QEMUFile *f, void *opaque) 1209 { 1210 RAMBlock *block; 1211 int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */ 1212 1213 dirty_rate_high_cnt = 0; 1214 bitmap_sync_count = 0; 1215 migration_bitmap_sync_init(); 1216 qemu_mutex_init(&migration_bitmap_mutex); 1217 1218 if (migrate_use_xbzrle()) { 1219 XBZRLE_cache_lock(); 1220 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / 1221 TARGET_PAGE_SIZE, 1222 TARGET_PAGE_SIZE); 1223 if (!XBZRLE.cache) { 1224 XBZRLE_cache_unlock(); 1225 error_report("Error creating cache"); 1226 return -1; 1227 } 1228 XBZRLE_cache_unlock(); 1229 1230 /* We prefer not to abort if there is no memory */ 1231 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE); 1232 if (!XBZRLE.encoded_buf) { 1233 error_report("Error allocating encoded_buf"); 1234 return -1; 1235 } 1236 1237 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE); 1238 if (!XBZRLE.current_buf) { 1239 error_report("Error allocating current_buf"); 1240 g_free(XBZRLE.encoded_buf); 1241 XBZRLE.encoded_buf = NULL; 1242 return -1; 1243 } 1244 1245 acct_clear(); 1246 } 1247 1248 /* iothread lock needed for ram_list.dirty_memory[] */ 1249 qemu_mutex_lock_iothread(); 1250 qemu_mutex_lock_ramlist(); 1251 rcu_read_lock(); 1252 bytes_transferred = 0; 1253 reset_ram_globals(); 1254 1255 ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS; 1256 migration_bitmap_rcu = g_new(struct BitmapRcu, 1); 1257 migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages); 1258 bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages); 1259 1260 /* 1261 * Count the total number of pages used by ram blocks not including any 1262 * gaps due to alignment or unplugs. 1263 */ 1264 migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS; 1265 1266 memory_global_dirty_log_start(); 1267 migration_bitmap_sync(); 1268 qemu_mutex_unlock_ramlist(); 1269 qemu_mutex_unlock_iothread(); 1270 1271 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); 1272 1273 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { 1274 qemu_put_byte(f, strlen(block->idstr)); 1275 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); 1276 qemu_put_be64(f, block->used_length); 1277 } 1278 1279 rcu_read_unlock(); 1280 1281 ram_control_before_iterate(f, RAM_CONTROL_SETUP); 1282 ram_control_after_iterate(f, RAM_CONTROL_SETUP); 1283 1284 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 1285 1286 return 0; 1287 } 1288 1289 static int ram_save_iterate(QEMUFile *f, void *opaque) 1290 { 1291 int ret; 1292 int i; 1293 int64_t t0; 1294 int pages_sent = 0; 1295 1296 rcu_read_lock(); 1297 if (ram_list.version != last_version) { 1298 reset_ram_globals(); 1299 } 1300 1301 /* Read version before ram_list.blocks */ 1302 smp_rmb(); 1303 1304 ram_control_before_iterate(f, RAM_CONTROL_ROUND); 1305 1306 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 1307 i = 0; 1308 while ((ret = qemu_file_rate_limit(f)) == 0) { 1309 int pages; 1310 1311 pages = ram_find_and_save_block(f, false, &bytes_transferred); 1312 /* no more pages to sent */ 1313 if (pages == 0) { 1314 break; 1315 } 1316 pages_sent += pages; 1317 acct_info.iterations++; 1318 1319 /* we want to check in the 1st loop, just in case it was the 1st time 1320 and we had to sync the dirty bitmap. 1321 qemu_get_clock_ns() is a bit expensive, so we only check each some 1322 iterations 1323 */ 1324 if ((i & 63) == 0) { 1325 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000; 1326 if (t1 > MAX_WAIT) { 1327 DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n", 1328 t1, i); 1329 break; 1330 } 1331 } 1332 i++; 1333 } 1334 flush_compressed_data(f); 1335 rcu_read_unlock(); 1336 1337 /* 1338 * Must occur before EOS (or any QEMUFile operation) 1339 * because of RDMA protocol. 1340 */ 1341 ram_control_after_iterate(f, RAM_CONTROL_ROUND); 1342 1343 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 1344 bytes_transferred += 8; 1345 1346 ret = qemu_file_get_error(f); 1347 if (ret < 0) { 1348 return ret; 1349 } 1350 1351 return pages_sent; 1352 } 1353 1354 /* Called with iothread lock */ 1355 static int ram_save_complete(QEMUFile *f, void *opaque) 1356 { 1357 rcu_read_lock(); 1358 1359 migration_bitmap_sync(); 1360 1361 ram_control_before_iterate(f, RAM_CONTROL_FINISH); 1362 1363 /* try transferring iterative blocks of memory */ 1364 1365 /* flush all remaining blocks regardless of rate limiting */ 1366 while (true) { 1367 int pages; 1368 1369 pages = ram_find_and_save_block(f, true, &bytes_transferred); 1370 /* no more blocks to sent */ 1371 if (pages == 0) { 1372 break; 1373 } 1374 } 1375 1376 flush_compressed_data(f); 1377 ram_control_after_iterate(f, RAM_CONTROL_FINISH); 1378 1379 rcu_read_unlock(); 1380 1381 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 1382 1383 return 0; 1384 } 1385 1386 static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) 1387 { 1388 uint64_t remaining_size; 1389 1390 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; 1391 1392 if (remaining_size < max_size) { 1393 qemu_mutex_lock_iothread(); 1394 rcu_read_lock(); 1395 migration_bitmap_sync(); 1396 rcu_read_unlock(); 1397 qemu_mutex_unlock_iothread(); 1398 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; 1399 } 1400 return remaining_size; 1401 } 1402 1403 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) 1404 { 1405 unsigned int xh_len; 1406 int xh_flags; 1407 1408 if (!xbzrle_decoded_buf) { 1409 xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE); 1410 } 1411 1412 /* extract RLE header */ 1413 xh_flags = qemu_get_byte(f); 1414 xh_len = qemu_get_be16(f); 1415 1416 if (xh_flags != ENCODING_FLAG_XBZRLE) { 1417 error_report("Failed to load XBZRLE page - wrong compression!"); 1418 return -1; 1419 } 1420 1421 if (xh_len > TARGET_PAGE_SIZE) { 1422 error_report("Failed to load XBZRLE page - len overflow!"); 1423 return -1; 1424 } 1425 /* load data and decode */ 1426 qemu_get_buffer(f, xbzrle_decoded_buf, xh_len); 1427 1428 /* decode RLE */ 1429 if (xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host, 1430 TARGET_PAGE_SIZE) == -1) { 1431 error_report("Failed to load XBZRLE page - decode error!"); 1432 return -1; 1433 } 1434 1435 return 0; 1436 } 1437 1438 /* Must be called from within a rcu critical section. 1439 * Returns a pointer from within the RCU-protected ram_list. 1440 */ 1441 static inline void *host_from_stream_offset(QEMUFile *f, 1442 ram_addr_t offset, 1443 int flags) 1444 { 1445 static RAMBlock *block = NULL; 1446 char id[256]; 1447 uint8_t len; 1448 1449 if (flags & RAM_SAVE_FLAG_CONTINUE) { 1450 if (!block || block->max_length <= offset) { 1451 error_report("Ack, bad migration stream!"); 1452 return NULL; 1453 } 1454 1455 return block->host + offset; 1456 } 1457 1458 len = qemu_get_byte(f); 1459 qemu_get_buffer(f, (uint8_t *)id, len); 1460 id[len] = 0; 1461 1462 block = qemu_ram_block_by_name(id); 1463 if (block && block->max_length > offset) { 1464 return block->host + offset; 1465 } 1466 1467 error_report("Can't find block %s", id); 1468 return NULL; 1469 } 1470 1471 /* 1472 * If a page (or a whole RDMA chunk) has been 1473 * determined to be zero, then zap it. 1474 */ 1475 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) 1476 { 1477 if (ch != 0 || !is_zero_range(host, size)) { 1478 memset(host, ch, size); 1479 } 1480 } 1481 1482 static void *do_data_decompress(void *opaque) 1483 { 1484 DecompressParam *param = opaque; 1485 unsigned long pagesize; 1486 1487 while (!quit_decomp_thread) { 1488 qemu_mutex_lock(¶m->mutex); 1489 while (!param->start && !quit_decomp_thread) { 1490 qemu_cond_wait(¶m->cond, ¶m->mutex); 1491 pagesize = TARGET_PAGE_SIZE; 1492 if (!quit_decomp_thread) { 1493 /* uncompress() will return failed in some case, especially 1494 * when the page is dirted when doing the compression, it's 1495 * not a problem because the dirty page will be retransferred 1496 * and uncompress() won't break the data in other pages. 1497 */ 1498 uncompress((Bytef *)param->des, &pagesize, 1499 (const Bytef *)param->compbuf, param->len); 1500 } 1501 param->start = false; 1502 } 1503 qemu_mutex_unlock(¶m->mutex); 1504 } 1505 1506 return NULL; 1507 } 1508 1509 void migrate_decompress_threads_create(void) 1510 { 1511 int i, thread_count; 1512 1513 thread_count = migrate_decompress_threads(); 1514 decompress_threads = g_new0(QemuThread, thread_count); 1515 decomp_param = g_new0(DecompressParam, thread_count); 1516 compressed_data_buf = g_malloc0(compressBound(TARGET_PAGE_SIZE)); 1517 quit_decomp_thread = false; 1518 for (i = 0; i < thread_count; i++) { 1519 qemu_mutex_init(&decomp_param[i].mutex); 1520 qemu_cond_init(&decomp_param[i].cond); 1521 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE)); 1522 qemu_thread_create(decompress_threads + i, "decompress", 1523 do_data_decompress, decomp_param + i, 1524 QEMU_THREAD_JOINABLE); 1525 } 1526 } 1527 1528 void migrate_decompress_threads_join(void) 1529 { 1530 int i, thread_count; 1531 1532 quit_decomp_thread = true; 1533 thread_count = migrate_decompress_threads(); 1534 for (i = 0; i < thread_count; i++) { 1535 qemu_mutex_lock(&decomp_param[i].mutex); 1536 qemu_cond_signal(&decomp_param[i].cond); 1537 qemu_mutex_unlock(&decomp_param[i].mutex); 1538 } 1539 for (i = 0; i < thread_count; i++) { 1540 qemu_thread_join(decompress_threads + i); 1541 qemu_mutex_destroy(&decomp_param[i].mutex); 1542 qemu_cond_destroy(&decomp_param[i].cond); 1543 g_free(decomp_param[i].compbuf); 1544 } 1545 g_free(decompress_threads); 1546 g_free(decomp_param); 1547 g_free(compressed_data_buf); 1548 decompress_threads = NULL; 1549 decomp_param = NULL; 1550 compressed_data_buf = NULL; 1551 } 1552 1553 static void decompress_data_with_multi_threads(uint8_t *compbuf, 1554 void *host, int len) 1555 { 1556 int idx, thread_count; 1557 1558 thread_count = migrate_decompress_threads(); 1559 while (true) { 1560 for (idx = 0; idx < thread_count; idx++) { 1561 if (!decomp_param[idx].start) { 1562 memcpy(decomp_param[idx].compbuf, compbuf, len); 1563 decomp_param[idx].des = host; 1564 decomp_param[idx].len = len; 1565 start_decompression(&decomp_param[idx]); 1566 break; 1567 } 1568 } 1569 if (idx < thread_count) { 1570 break; 1571 } 1572 } 1573 } 1574 1575 static int ram_load(QEMUFile *f, void *opaque, int version_id) 1576 { 1577 int flags = 0, ret = 0; 1578 static uint64_t seq_iter; 1579 int len = 0; 1580 1581 seq_iter++; 1582 1583 if (version_id != 4) { 1584 ret = -EINVAL; 1585 } 1586 1587 /* This RCU critical section can be very long running. 1588 * When RCU reclaims in the code start to become numerous, 1589 * it will be necessary to reduce the granularity of this 1590 * critical section. 1591 */ 1592 rcu_read_lock(); 1593 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { 1594 ram_addr_t addr, total_ram_bytes; 1595 void *host = NULL; 1596 uint8_t ch; 1597 1598 addr = qemu_get_be64(f); 1599 flags = addr & ~TARGET_PAGE_MASK; 1600 addr &= TARGET_PAGE_MASK; 1601 1602 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE | 1603 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) { 1604 host = host_from_stream_offset(f, addr, flags); 1605 if (!host) { 1606 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); 1607 ret = -EINVAL; 1608 break; 1609 } 1610 } 1611 1612 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { 1613 case RAM_SAVE_FLAG_MEM_SIZE: 1614 /* Synchronize RAM block list */ 1615 total_ram_bytes = addr; 1616 while (!ret && total_ram_bytes) { 1617 RAMBlock *block; 1618 char id[256]; 1619 ram_addr_t length; 1620 1621 len = qemu_get_byte(f); 1622 qemu_get_buffer(f, (uint8_t *)id, len); 1623 id[len] = 0; 1624 length = qemu_get_be64(f); 1625 1626 block = qemu_ram_block_by_name(id); 1627 if (block) { 1628 if (length != block->used_length) { 1629 Error *local_err = NULL; 1630 1631 ret = qemu_ram_resize(block->offset, length, 1632 &local_err); 1633 if (local_err) { 1634 error_report_err(local_err); 1635 } 1636 } 1637 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG, 1638 block->idstr); 1639 } else { 1640 error_report("Unknown ramblock \"%s\", cannot " 1641 "accept migration", id); 1642 ret = -EINVAL; 1643 } 1644 1645 total_ram_bytes -= length; 1646 } 1647 break; 1648 1649 case RAM_SAVE_FLAG_COMPRESS: 1650 ch = qemu_get_byte(f); 1651 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE); 1652 break; 1653 1654 case RAM_SAVE_FLAG_PAGE: 1655 qemu_get_buffer(f, host, TARGET_PAGE_SIZE); 1656 break; 1657 1658 case RAM_SAVE_FLAG_COMPRESS_PAGE: 1659 len = qemu_get_be32(f); 1660 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) { 1661 error_report("Invalid compressed data length: %d", len); 1662 ret = -EINVAL; 1663 break; 1664 } 1665 qemu_get_buffer(f, compressed_data_buf, len); 1666 decompress_data_with_multi_threads(compressed_data_buf, host, len); 1667 break; 1668 1669 case RAM_SAVE_FLAG_XBZRLE: 1670 if (load_xbzrle(f, addr, host) < 0) { 1671 error_report("Failed to decompress XBZRLE page at " 1672 RAM_ADDR_FMT, addr); 1673 ret = -EINVAL; 1674 break; 1675 } 1676 break; 1677 case RAM_SAVE_FLAG_EOS: 1678 /* normal exit */ 1679 break; 1680 default: 1681 if (flags & RAM_SAVE_FLAG_HOOK) { 1682 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL); 1683 } else { 1684 error_report("Unknown combination of migration flags: %#x", 1685 flags); 1686 ret = -EINVAL; 1687 } 1688 } 1689 if (!ret) { 1690 ret = qemu_file_get_error(f); 1691 } 1692 } 1693 1694 rcu_read_unlock(); 1695 DPRINTF("Completed load of VM with exit code %d seq iteration " 1696 "%" PRIu64 "\n", ret, seq_iter); 1697 return ret; 1698 } 1699 1700 static SaveVMHandlers savevm_ram_handlers = { 1701 .save_live_setup = ram_save_setup, 1702 .save_live_iterate = ram_save_iterate, 1703 .save_live_complete_precopy = ram_save_complete, 1704 .save_live_pending = ram_save_pending, 1705 .load_state = ram_load, 1706 .cleanup = ram_migration_cleanup, 1707 }; 1708 1709 void ram_mig_init(void) 1710 { 1711 qemu_mutex_init(&XBZRLE.lock); 1712 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL); 1713 } 1714