1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * Copyright (c) 2011-2015 Red Hat Inc 6 * 7 * Authors: 8 * Juan Quintela <quintela@redhat.com> 9 * 10 * Permission is hereby granted, free of charge, to any person obtaining a copy 11 * of this software and associated documentation files (the "Software"), to deal 12 * in the Software without restriction, including without limitation the rights 13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 * copies of the Software, and to permit persons to whom the Software is 15 * furnished to do so, subject to the following conditions: 16 * 17 * The above copyright notice and this permission notice shall be included in 18 * all copies or substantial portions of the Software. 19 * 20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 26 * THE SOFTWARE. 27 */ 28 #include <stdint.h> 29 #include <zlib.h> 30 #include "qemu/bitops.h" 31 #include "qemu/bitmap.h" 32 #include "qemu/timer.h" 33 #include "qemu/main-loop.h" 34 #include "migration/migration.h" 35 #include "exec/address-spaces.h" 36 #include "migration/page_cache.h" 37 #include "qemu/error-report.h" 38 #include "trace.h" 39 #include "exec/ram_addr.h" 40 #include "qemu/rcu_queue.h" 41 42 #ifdef DEBUG_MIGRATION_RAM 43 #define DPRINTF(fmt, ...) \ 44 do { fprintf(stdout, "migration_ram: " fmt, ## __VA_ARGS__); } while (0) 45 #else 46 #define DPRINTF(fmt, ...) \ 47 do { } while (0) 48 #endif 49 50 static bool mig_throttle_on; 51 static int dirty_rate_high_cnt; 52 static void check_guest_throttling(void); 53 54 static uint64_t bitmap_sync_count; 55 56 /***********************************************************/ 57 /* ram save/restore */ 58 59 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */ 60 #define RAM_SAVE_FLAG_COMPRESS 0x02 61 #define RAM_SAVE_FLAG_MEM_SIZE 0x04 62 #define RAM_SAVE_FLAG_PAGE 0x08 63 #define RAM_SAVE_FLAG_EOS 0x10 64 #define RAM_SAVE_FLAG_CONTINUE 0x20 65 #define RAM_SAVE_FLAG_XBZRLE 0x40 66 /* 0x80 is reserved in migration.h start with 0x100 next */ 67 #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100 68 69 static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE]; 70 71 static inline bool is_zero_range(uint8_t *p, uint64_t size) 72 { 73 return buffer_find_nonzero_offset(p, size) == size; 74 } 75 76 /* struct contains XBZRLE cache and a static page 77 used by the compression */ 78 static struct { 79 /* buffer used for XBZRLE encoding */ 80 uint8_t *encoded_buf; 81 /* buffer for storing page content */ 82 uint8_t *current_buf; 83 /* Cache for XBZRLE, Protected by lock. */ 84 PageCache *cache; 85 QemuMutex lock; 86 } XBZRLE; 87 88 /* buffer used for XBZRLE decoding */ 89 static uint8_t *xbzrle_decoded_buf; 90 91 static void XBZRLE_cache_lock(void) 92 { 93 if (migrate_use_xbzrle()) 94 qemu_mutex_lock(&XBZRLE.lock); 95 } 96 97 static void XBZRLE_cache_unlock(void) 98 { 99 if (migrate_use_xbzrle()) 100 qemu_mutex_unlock(&XBZRLE.lock); 101 } 102 103 /* 104 * called from qmp_migrate_set_cache_size in main thread, possibly while 105 * a migration is in progress. 106 * A running migration maybe using the cache and might finish during this 107 * call, hence changes to the cache are protected by XBZRLE.lock(). 108 */ 109 int64_t xbzrle_cache_resize(int64_t new_size) 110 { 111 PageCache *new_cache; 112 int64_t ret; 113 114 if (new_size < TARGET_PAGE_SIZE) { 115 return -1; 116 } 117 118 XBZRLE_cache_lock(); 119 120 if (XBZRLE.cache != NULL) { 121 if (pow2floor(new_size) == migrate_xbzrle_cache_size()) { 122 goto out_new_size; 123 } 124 new_cache = cache_init(new_size / TARGET_PAGE_SIZE, 125 TARGET_PAGE_SIZE); 126 if (!new_cache) { 127 error_report("Error creating cache"); 128 ret = -1; 129 goto out; 130 } 131 132 cache_fini(XBZRLE.cache); 133 XBZRLE.cache = new_cache; 134 } 135 136 out_new_size: 137 ret = pow2floor(new_size); 138 out: 139 XBZRLE_cache_unlock(); 140 return ret; 141 } 142 143 /* accounting for migration statistics */ 144 typedef struct AccountingInfo { 145 uint64_t dup_pages; 146 uint64_t skipped_pages; 147 uint64_t norm_pages; 148 uint64_t iterations; 149 uint64_t xbzrle_bytes; 150 uint64_t xbzrle_pages; 151 uint64_t xbzrle_cache_miss; 152 double xbzrle_cache_miss_rate; 153 uint64_t xbzrle_overflows; 154 } AccountingInfo; 155 156 static AccountingInfo acct_info; 157 158 static void acct_clear(void) 159 { 160 memset(&acct_info, 0, sizeof(acct_info)); 161 } 162 163 uint64_t dup_mig_bytes_transferred(void) 164 { 165 return acct_info.dup_pages * TARGET_PAGE_SIZE; 166 } 167 168 uint64_t dup_mig_pages_transferred(void) 169 { 170 return acct_info.dup_pages; 171 } 172 173 uint64_t skipped_mig_bytes_transferred(void) 174 { 175 return acct_info.skipped_pages * TARGET_PAGE_SIZE; 176 } 177 178 uint64_t skipped_mig_pages_transferred(void) 179 { 180 return acct_info.skipped_pages; 181 } 182 183 uint64_t norm_mig_bytes_transferred(void) 184 { 185 return acct_info.norm_pages * TARGET_PAGE_SIZE; 186 } 187 188 uint64_t norm_mig_pages_transferred(void) 189 { 190 return acct_info.norm_pages; 191 } 192 193 uint64_t xbzrle_mig_bytes_transferred(void) 194 { 195 return acct_info.xbzrle_bytes; 196 } 197 198 uint64_t xbzrle_mig_pages_transferred(void) 199 { 200 return acct_info.xbzrle_pages; 201 } 202 203 uint64_t xbzrle_mig_pages_cache_miss(void) 204 { 205 return acct_info.xbzrle_cache_miss; 206 } 207 208 double xbzrle_mig_cache_miss_rate(void) 209 { 210 return acct_info.xbzrle_cache_miss_rate; 211 } 212 213 uint64_t xbzrle_mig_pages_overflow(void) 214 { 215 return acct_info.xbzrle_overflows; 216 } 217 218 /* This is the last block that we have visited serching for dirty pages 219 */ 220 static RAMBlock *last_seen_block; 221 /* This is the last block from where we have sent data */ 222 static RAMBlock *last_sent_block; 223 static ram_addr_t last_offset; 224 static unsigned long *migration_bitmap; 225 static uint64_t migration_dirty_pages; 226 static uint32_t last_version; 227 static bool ram_bulk_stage; 228 229 struct CompressParam { 230 bool start; 231 bool done; 232 QEMUFile *file; 233 QemuMutex mutex; 234 QemuCond cond; 235 RAMBlock *block; 236 ram_addr_t offset; 237 }; 238 typedef struct CompressParam CompressParam; 239 240 struct DecompressParam { 241 bool start; 242 QemuMutex mutex; 243 QemuCond cond; 244 void *des; 245 uint8 *compbuf; 246 int len; 247 }; 248 typedef struct DecompressParam DecompressParam; 249 250 static CompressParam *comp_param; 251 static QemuThread *compress_threads; 252 /* comp_done_cond is used to wake up the migration thread when 253 * one of the compression threads has finished the compression. 254 * comp_done_lock is used to co-work with comp_done_cond. 255 */ 256 static QemuMutex *comp_done_lock; 257 static QemuCond *comp_done_cond; 258 /* The empty QEMUFileOps will be used by file in CompressParam */ 259 static const QEMUFileOps empty_ops = { }; 260 261 static bool compression_switch; 262 static bool quit_comp_thread; 263 static bool quit_decomp_thread; 264 static DecompressParam *decomp_param; 265 static QemuThread *decompress_threads; 266 static uint8_t *compressed_data_buf; 267 268 static int do_compress_ram_page(CompressParam *param); 269 270 static void *do_data_compress(void *opaque) 271 { 272 CompressParam *param = opaque; 273 274 while (!quit_comp_thread) { 275 qemu_mutex_lock(¶m->mutex); 276 /* Re-check the quit_comp_thread in case of 277 * terminate_compression_threads is called just before 278 * qemu_mutex_lock(¶m->mutex) and after 279 * while(!quit_comp_thread), re-check it here can make 280 * sure the compression thread terminate as expected. 281 */ 282 while (!param->start && !quit_comp_thread) { 283 qemu_cond_wait(¶m->cond, ¶m->mutex); 284 } 285 if (!quit_comp_thread) { 286 do_compress_ram_page(param); 287 } 288 param->start = false; 289 qemu_mutex_unlock(¶m->mutex); 290 291 qemu_mutex_lock(comp_done_lock); 292 param->done = true; 293 qemu_cond_signal(comp_done_cond); 294 qemu_mutex_unlock(comp_done_lock); 295 } 296 297 return NULL; 298 } 299 300 static inline void terminate_compression_threads(void) 301 { 302 int idx, thread_count; 303 304 thread_count = migrate_compress_threads(); 305 quit_comp_thread = true; 306 for (idx = 0; idx < thread_count; idx++) { 307 qemu_mutex_lock(&comp_param[idx].mutex); 308 qemu_cond_signal(&comp_param[idx].cond); 309 qemu_mutex_unlock(&comp_param[idx].mutex); 310 } 311 } 312 313 void migrate_compress_threads_join(void) 314 { 315 int i, thread_count; 316 317 if (!migrate_use_compression()) { 318 return; 319 } 320 terminate_compression_threads(); 321 thread_count = migrate_compress_threads(); 322 for (i = 0; i < thread_count; i++) { 323 qemu_thread_join(compress_threads + i); 324 qemu_fclose(comp_param[i].file); 325 qemu_mutex_destroy(&comp_param[i].mutex); 326 qemu_cond_destroy(&comp_param[i].cond); 327 } 328 qemu_mutex_destroy(comp_done_lock); 329 qemu_cond_destroy(comp_done_cond); 330 g_free(compress_threads); 331 g_free(comp_param); 332 g_free(comp_done_cond); 333 g_free(comp_done_lock); 334 compress_threads = NULL; 335 comp_param = NULL; 336 comp_done_cond = NULL; 337 comp_done_lock = NULL; 338 } 339 340 void migrate_compress_threads_create(void) 341 { 342 int i, thread_count; 343 344 if (!migrate_use_compression()) { 345 return; 346 } 347 quit_comp_thread = false; 348 compression_switch = true; 349 thread_count = migrate_compress_threads(); 350 compress_threads = g_new0(QemuThread, thread_count); 351 comp_param = g_new0(CompressParam, thread_count); 352 comp_done_cond = g_new0(QemuCond, 1); 353 comp_done_lock = g_new0(QemuMutex, 1); 354 qemu_cond_init(comp_done_cond); 355 qemu_mutex_init(comp_done_lock); 356 for (i = 0; i < thread_count; i++) { 357 /* com_param[i].file is just used as a dummy buffer to save data, set 358 * it's ops to empty. 359 */ 360 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops); 361 comp_param[i].done = true; 362 qemu_mutex_init(&comp_param[i].mutex); 363 qemu_cond_init(&comp_param[i].cond); 364 qemu_thread_create(compress_threads + i, "compress", 365 do_data_compress, comp_param + i, 366 QEMU_THREAD_JOINABLE); 367 } 368 } 369 370 /** 371 * save_page_header: Write page header to wire 372 * 373 * If this is the 1st block, it also writes the block identification 374 * 375 * Returns: Number of bytes written 376 * 377 * @f: QEMUFile where to send the data 378 * @block: block that contains the page we want to send 379 * @offset: offset inside the block for the page 380 * in the lower bits, it contains flags 381 */ 382 static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset) 383 { 384 size_t size; 385 386 qemu_put_be64(f, offset); 387 size = 8; 388 389 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) { 390 qemu_put_byte(f, strlen(block->idstr)); 391 qemu_put_buffer(f, (uint8_t *)block->idstr, 392 strlen(block->idstr)); 393 size += 1 + strlen(block->idstr); 394 } 395 return size; 396 } 397 398 /* Update the xbzrle cache to reflect a page that's been sent as all 0. 399 * The important thing is that a stale (not-yet-0'd) page be replaced 400 * by the new data. 401 * As a bonus, if the page wasn't in the cache it gets added so that 402 * when a small write is made into the 0'd page it gets XBZRLE sent 403 */ 404 static void xbzrle_cache_zero_page(ram_addr_t current_addr) 405 { 406 if (ram_bulk_stage || !migrate_use_xbzrle()) { 407 return; 408 } 409 410 /* We don't care if this fails to allocate a new cache page 411 * as long as it updated an old one */ 412 cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE, 413 bitmap_sync_count); 414 } 415 416 #define ENCODING_FLAG_XBZRLE 0x1 417 418 /** 419 * save_xbzrle_page: compress and send current page 420 * 421 * Returns: 1 means that we wrote the page 422 * 0 means that page is identical to the one already sent 423 * -1 means that xbzrle would be longer than normal 424 * 425 * @f: QEMUFile where to send the data 426 * @current_data: 427 * @current_addr: 428 * @block: block that contains the page we want to send 429 * @offset: offset inside the block for the page 430 * @last_stage: if we are at the completion stage 431 * @bytes_transferred: increase it with the number of transferred bytes 432 */ 433 static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, 434 ram_addr_t current_addr, RAMBlock *block, 435 ram_addr_t offset, bool last_stage, 436 uint64_t *bytes_transferred) 437 { 438 int encoded_len = 0, bytes_xbzrle; 439 uint8_t *prev_cached_page; 440 441 if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) { 442 acct_info.xbzrle_cache_miss++; 443 if (!last_stage) { 444 if (cache_insert(XBZRLE.cache, current_addr, *current_data, 445 bitmap_sync_count) == -1) { 446 return -1; 447 } else { 448 /* update *current_data when the page has been 449 inserted into cache */ 450 *current_data = get_cached_data(XBZRLE.cache, current_addr); 451 } 452 } 453 return -1; 454 } 455 456 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); 457 458 /* save current buffer into memory */ 459 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE); 460 461 /* XBZRLE encoding (if there is no overflow) */ 462 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, 463 TARGET_PAGE_SIZE, XBZRLE.encoded_buf, 464 TARGET_PAGE_SIZE); 465 if (encoded_len == 0) { 466 DPRINTF("Skipping unmodified page\n"); 467 return 0; 468 } else if (encoded_len == -1) { 469 DPRINTF("Overflow\n"); 470 acct_info.xbzrle_overflows++; 471 /* update data in the cache */ 472 if (!last_stage) { 473 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE); 474 *current_data = prev_cached_page; 475 } 476 return -1; 477 } 478 479 /* we need to update the data in the cache, in order to get the same data */ 480 if (!last_stage) { 481 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); 482 } 483 484 /* Send XBZRLE based compressed page */ 485 bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE); 486 qemu_put_byte(f, ENCODING_FLAG_XBZRLE); 487 qemu_put_be16(f, encoded_len); 488 qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); 489 bytes_xbzrle += encoded_len + 1 + 2; 490 acct_info.xbzrle_pages++; 491 acct_info.xbzrle_bytes += bytes_xbzrle; 492 *bytes_transferred += bytes_xbzrle; 493 494 return 1; 495 } 496 497 static inline 498 ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr, 499 ram_addr_t start) 500 { 501 unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS; 502 unsigned long nr = base + (start >> TARGET_PAGE_BITS); 503 uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr)); 504 unsigned long size = base + (mr_size >> TARGET_PAGE_BITS); 505 506 unsigned long next; 507 508 if (ram_bulk_stage && nr > base) { 509 next = nr + 1; 510 } else { 511 next = find_next_bit(migration_bitmap, size, nr); 512 } 513 514 if (next < size) { 515 clear_bit(next, migration_bitmap); 516 migration_dirty_pages--; 517 } 518 return (next - base) << TARGET_PAGE_BITS; 519 } 520 521 static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length) 522 { 523 migration_dirty_pages += 524 cpu_physical_memory_sync_dirty_bitmap(migration_bitmap, start, length); 525 } 526 527 528 /* Fix me: there are too many global variables used in migration process. */ 529 static int64_t start_time; 530 static int64_t bytes_xfer_prev; 531 static int64_t num_dirty_pages_period; 532 static uint64_t xbzrle_cache_miss_prev; 533 static uint64_t iterations_prev; 534 535 static void migration_bitmap_sync_init(void) 536 { 537 start_time = 0; 538 bytes_xfer_prev = 0; 539 num_dirty_pages_period = 0; 540 xbzrle_cache_miss_prev = 0; 541 iterations_prev = 0; 542 } 543 544 /* Called with iothread lock held, to protect ram_list.dirty_memory[] */ 545 static void migration_bitmap_sync(void) 546 { 547 RAMBlock *block; 548 uint64_t num_dirty_pages_init = migration_dirty_pages; 549 MigrationState *s = migrate_get_current(); 550 int64_t end_time; 551 int64_t bytes_xfer_now; 552 553 bitmap_sync_count++; 554 555 if (!bytes_xfer_prev) { 556 bytes_xfer_prev = ram_bytes_transferred(); 557 } 558 559 if (!start_time) { 560 start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 561 } 562 563 trace_migration_bitmap_sync_start(); 564 address_space_sync_dirty_bitmap(&address_space_memory); 565 566 rcu_read_lock(); 567 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { 568 migration_bitmap_sync_range(block->mr->ram_addr, block->used_length); 569 } 570 rcu_read_unlock(); 571 572 trace_migration_bitmap_sync_end(migration_dirty_pages 573 - num_dirty_pages_init); 574 num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init; 575 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); 576 577 /* more than 1 second = 1000 millisecons */ 578 if (end_time > start_time + 1000) { 579 if (migrate_auto_converge()) { 580 /* The following detection logic can be refined later. For now: 581 Check to see if the dirtied bytes is 50% more than the approx. 582 amount of bytes that just got transferred since the last time we 583 were in this routine. If that happens >N times (for now N==4) 584 we turn on the throttle down logic */ 585 bytes_xfer_now = ram_bytes_transferred(); 586 if (s->dirty_pages_rate && 587 (num_dirty_pages_period * TARGET_PAGE_SIZE > 588 (bytes_xfer_now - bytes_xfer_prev)/2) && 589 (dirty_rate_high_cnt++ > 4)) { 590 trace_migration_throttle(); 591 mig_throttle_on = true; 592 dirty_rate_high_cnt = 0; 593 } 594 bytes_xfer_prev = bytes_xfer_now; 595 } else { 596 mig_throttle_on = false; 597 } 598 if (migrate_use_xbzrle()) { 599 if (iterations_prev != acct_info.iterations) { 600 acct_info.xbzrle_cache_miss_rate = 601 (double)(acct_info.xbzrle_cache_miss - 602 xbzrle_cache_miss_prev) / 603 (acct_info.iterations - iterations_prev); 604 } 605 iterations_prev = acct_info.iterations; 606 xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss; 607 } 608 s->dirty_pages_rate = num_dirty_pages_period * 1000 609 / (end_time - start_time); 610 s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE; 611 start_time = end_time; 612 num_dirty_pages_period = 0; 613 } 614 s->dirty_sync_count = bitmap_sync_count; 615 } 616 617 /** 618 * save_zero_page: Send the zero page to the stream 619 * 620 * Returns: Number of pages written. 621 * 622 * @f: QEMUFile where to send the data 623 * @block: block that contains the page we want to send 624 * @offset: offset inside the block for the page 625 * @p: pointer to the page 626 * @bytes_transferred: increase it with the number of transferred bytes 627 */ 628 static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset, 629 uint8_t *p, uint64_t *bytes_transferred) 630 { 631 int pages = -1; 632 633 if (is_zero_range(p, TARGET_PAGE_SIZE)) { 634 acct_info.dup_pages++; 635 *bytes_transferred += save_page_header(f, block, 636 offset | RAM_SAVE_FLAG_COMPRESS); 637 qemu_put_byte(f, 0); 638 *bytes_transferred += 1; 639 pages = 1; 640 } 641 642 return pages; 643 } 644 645 /** 646 * ram_save_page: Send the given page to the stream 647 * 648 * Returns: Number of pages written. 649 * 650 * @f: QEMUFile where to send the data 651 * @block: block that contains the page we want to send 652 * @offset: offset inside the block for the page 653 * @last_stage: if we are at the completion stage 654 * @bytes_transferred: increase it with the number of transferred bytes 655 */ 656 static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset, 657 bool last_stage, uint64_t *bytes_transferred) 658 { 659 int pages = -1; 660 uint64_t bytes_xmit; 661 ram_addr_t current_addr; 662 MemoryRegion *mr = block->mr; 663 uint8_t *p; 664 int ret; 665 bool send_async = true; 666 667 p = memory_region_get_ram_ptr(mr) + offset; 668 669 /* In doubt sent page as normal */ 670 bytes_xmit = 0; 671 ret = ram_control_save_page(f, block->offset, 672 offset, TARGET_PAGE_SIZE, &bytes_xmit); 673 if (bytes_xmit) { 674 *bytes_transferred += bytes_xmit; 675 pages = 1; 676 } 677 678 XBZRLE_cache_lock(); 679 680 current_addr = block->offset + offset; 681 682 if (block == last_sent_block) { 683 offset |= RAM_SAVE_FLAG_CONTINUE; 684 } 685 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { 686 if (ret != RAM_SAVE_CONTROL_DELAYED) { 687 if (bytes_xmit > 0) { 688 acct_info.norm_pages++; 689 } else if (bytes_xmit == 0) { 690 acct_info.dup_pages++; 691 } 692 } 693 } else { 694 pages = save_zero_page(f, block, offset, p, bytes_transferred); 695 if (pages > 0) { 696 /* Must let xbzrle know, otherwise a previous (now 0'd) cached 697 * page would be stale 698 */ 699 xbzrle_cache_zero_page(current_addr); 700 } else if (!ram_bulk_stage && migrate_use_xbzrle()) { 701 pages = save_xbzrle_page(f, &p, current_addr, block, 702 offset, last_stage, bytes_transferred); 703 if (!last_stage) { 704 /* Can't send this cached data async, since the cache page 705 * might get updated before it gets to the wire 706 */ 707 send_async = false; 708 } 709 } 710 } 711 712 /* XBZRLE overflow or normal page */ 713 if (pages == -1) { 714 *bytes_transferred += save_page_header(f, block, 715 offset | RAM_SAVE_FLAG_PAGE); 716 if (send_async) { 717 qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE); 718 } else { 719 qemu_put_buffer(f, p, TARGET_PAGE_SIZE); 720 } 721 *bytes_transferred += TARGET_PAGE_SIZE; 722 pages = 1; 723 acct_info.norm_pages++; 724 } 725 726 XBZRLE_cache_unlock(); 727 728 return pages; 729 } 730 731 static int do_compress_ram_page(CompressParam *param) 732 { 733 int bytes_sent, blen; 734 uint8_t *p; 735 RAMBlock *block = param->block; 736 ram_addr_t offset = param->offset; 737 738 p = memory_region_get_ram_ptr(block->mr) + (offset & TARGET_PAGE_MASK); 739 740 bytes_sent = save_page_header(param->file, block, offset | 741 RAM_SAVE_FLAG_COMPRESS_PAGE); 742 blen = qemu_put_compression_data(param->file, p, TARGET_PAGE_SIZE, 743 migrate_compress_level()); 744 bytes_sent += blen; 745 746 return bytes_sent; 747 } 748 749 static inline void start_compression(CompressParam *param) 750 { 751 param->done = false; 752 qemu_mutex_lock(¶m->mutex); 753 param->start = true; 754 qemu_cond_signal(¶m->cond); 755 qemu_mutex_unlock(¶m->mutex); 756 } 757 758 static inline void start_decompression(DecompressParam *param) 759 { 760 qemu_mutex_lock(¶m->mutex); 761 param->start = true; 762 qemu_cond_signal(¶m->cond); 763 qemu_mutex_unlock(¶m->mutex); 764 } 765 766 static uint64_t bytes_transferred; 767 768 static void flush_compressed_data(QEMUFile *f) 769 { 770 int idx, len, thread_count; 771 772 if (!migrate_use_compression()) { 773 return; 774 } 775 thread_count = migrate_compress_threads(); 776 for (idx = 0; idx < thread_count; idx++) { 777 if (!comp_param[idx].done) { 778 qemu_mutex_lock(comp_done_lock); 779 while (!comp_param[idx].done && !quit_comp_thread) { 780 qemu_cond_wait(comp_done_cond, comp_done_lock); 781 } 782 qemu_mutex_unlock(comp_done_lock); 783 } 784 if (!quit_comp_thread) { 785 len = qemu_put_qemu_file(f, comp_param[idx].file); 786 bytes_transferred += len; 787 } 788 } 789 } 790 791 static inline void set_compress_params(CompressParam *param, RAMBlock *block, 792 ram_addr_t offset) 793 { 794 param->block = block; 795 param->offset = offset; 796 } 797 798 static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block, 799 ram_addr_t offset, 800 uint64_t *bytes_transferred) 801 { 802 int idx, thread_count, bytes_xmit = -1, pages = -1; 803 804 thread_count = migrate_compress_threads(); 805 qemu_mutex_lock(comp_done_lock); 806 while (true) { 807 for (idx = 0; idx < thread_count; idx++) { 808 if (comp_param[idx].done) { 809 bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file); 810 set_compress_params(&comp_param[idx], block, offset); 811 start_compression(&comp_param[idx]); 812 pages = 1; 813 acct_info.norm_pages++; 814 *bytes_transferred += bytes_xmit; 815 break; 816 } 817 } 818 if (pages > 0) { 819 break; 820 } else { 821 qemu_cond_wait(comp_done_cond, comp_done_lock); 822 } 823 } 824 qemu_mutex_unlock(comp_done_lock); 825 826 return pages; 827 } 828 829 /** 830 * ram_save_compressed_page: compress the given page and send it to the stream 831 * 832 * Returns: Number of pages written. 833 * 834 * @f: QEMUFile where to send the data 835 * @block: block that contains the page we want to send 836 * @offset: offset inside the block for the page 837 * @last_stage: if we are at the completion stage 838 * @bytes_transferred: increase it with the number of transferred bytes 839 */ 840 static int ram_save_compressed_page(QEMUFile *f, RAMBlock *block, 841 ram_addr_t offset, bool last_stage, 842 uint64_t *bytes_transferred) 843 { 844 int pages = -1; 845 uint64_t bytes_xmit; 846 MemoryRegion *mr = block->mr; 847 uint8_t *p; 848 int ret; 849 850 p = memory_region_get_ram_ptr(mr) + offset; 851 852 bytes_xmit = 0; 853 ret = ram_control_save_page(f, block->offset, 854 offset, TARGET_PAGE_SIZE, &bytes_xmit); 855 if (bytes_xmit) { 856 *bytes_transferred += bytes_xmit; 857 pages = 1; 858 } 859 if (block == last_sent_block) { 860 offset |= RAM_SAVE_FLAG_CONTINUE; 861 } 862 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { 863 if (ret != RAM_SAVE_CONTROL_DELAYED) { 864 if (bytes_xmit > 0) { 865 acct_info.norm_pages++; 866 } else if (bytes_xmit == 0) { 867 acct_info.dup_pages++; 868 } 869 } 870 } else { 871 /* When starting the process of a new block, the first page of 872 * the block should be sent out before other pages in the same 873 * block, and all the pages in last block should have been sent 874 * out, keeping this order is important, because the 'cont' flag 875 * is used to avoid resending the block name. 876 */ 877 if (block != last_sent_block) { 878 flush_compressed_data(f); 879 pages = save_zero_page(f, block, offset, p, bytes_transferred); 880 if (pages == -1) { 881 set_compress_params(&comp_param[0], block, offset); 882 /* Use the qemu thread to compress the data to make sure the 883 * first page is sent out before other pages 884 */ 885 bytes_xmit = do_compress_ram_page(&comp_param[0]); 886 acct_info.norm_pages++; 887 qemu_put_qemu_file(f, comp_param[0].file); 888 *bytes_transferred += bytes_xmit; 889 pages = 1; 890 } 891 } else { 892 pages = save_zero_page(f, block, offset, p, bytes_transferred); 893 if (pages == -1) { 894 pages = compress_page_with_multi_thread(f, block, offset, 895 bytes_transferred); 896 } 897 } 898 } 899 900 return pages; 901 } 902 903 /** 904 * ram_find_and_save_block: Finds a dirty page and sends it to f 905 * 906 * Called within an RCU critical section. 907 * 908 * Returns: The number of pages written 909 * 0 means no dirty pages 910 * 911 * @f: QEMUFile where to send the data 912 * @last_stage: if we are at the completion stage 913 * @bytes_transferred: increase it with the number of transferred bytes 914 */ 915 916 static int ram_find_and_save_block(QEMUFile *f, bool last_stage, 917 uint64_t *bytes_transferred) 918 { 919 RAMBlock *block = last_seen_block; 920 ram_addr_t offset = last_offset; 921 bool complete_round = false; 922 int pages = 0; 923 MemoryRegion *mr; 924 925 if (!block) 926 block = QLIST_FIRST_RCU(&ram_list.blocks); 927 928 while (true) { 929 mr = block->mr; 930 offset = migration_bitmap_find_and_reset_dirty(mr, offset); 931 if (complete_round && block == last_seen_block && 932 offset >= last_offset) { 933 break; 934 } 935 if (offset >= block->used_length) { 936 offset = 0; 937 block = QLIST_NEXT_RCU(block, next); 938 if (!block) { 939 block = QLIST_FIRST_RCU(&ram_list.blocks); 940 complete_round = true; 941 ram_bulk_stage = false; 942 if (migrate_use_xbzrle()) { 943 /* If xbzrle is on, stop using the data compression at this 944 * point. In theory, xbzrle can do better than compression. 945 */ 946 flush_compressed_data(f); 947 compression_switch = false; 948 } 949 } 950 } else { 951 if (compression_switch && migrate_use_compression()) { 952 pages = ram_save_compressed_page(f, block, offset, last_stage, 953 bytes_transferred); 954 } else { 955 pages = ram_save_page(f, block, offset, last_stage, 956 bytes_transferred); 957 } 958 959 /* if page is unmodified, continue to the next */ 960 if (pages > 0) { 961 last_sent_block = block; 962 break; 963 } 964 } 965 } 966 967 last_seen_block = block; 968 last_offset = offset; 969 970 return pages; 971 } 972 973 void acct_update_position(QEMUFile *f, size_t size, bool zero) 974 { 975 uint64_t pages = size / TARGET_PAGE_SIZE; 976 if (zero) { 977 acct_info.dup_pages += pages; 978 } else { 979 acct_info.norm_pages += pages; 980 bytes_transferred += size; 981 qemu_update_position(f, size); 982 } 983 } 984 985 static ram_addr_t ram_save_remaining(void) 986 { 987 return migration_dirty_pages; 988 } 989 990 uint64_t ram_bytes_remaining(void) 991 { 992 return ram_save_remaining() * TARGET_PAGE_SIZE; 993 } 994 995 uint64_t ram_bytes_transferred(void) 996 { 997 return bytes_transferred; 998 } 999 1000 uint64_t ram_bytes_total(void) 1001 { 1002 RAMBlock *block; 1003 uint64_t total = 0; 1004 1005 rcu_read_lock(); 1006 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) 1007 total += block->used_length; 1008 rcu_read_unlock(); 1009 return total; 1010 } 1011 1012 void free_xbzrle_decoded_buf(void) 1013 { 1014 g_free(xbzrle_decoded_buf); 1015 xbzrle_decoded_buf = NULL; 1016 } 1017 1018 static void migration_end(void) 1019 { 1020 if (migration_bitmap) { 1021 memory_global_dirty_log_stop(); 1022 g_free(migration_bitmap); 1023 migration_bitmap = NULL; 1024 } 1025 1026 XBZRLE_cache_lock(); 1027 if (XBZRLE.cache) { 1028 cache_fini(XBZRLE.cache); 1029 g_free(XBZRLE.encoded_buf); 1030 g_free(XBZRLE.current_buf); 1031 XBZRLE.cache = NULL; 1032 XBZRLE.encoded_buf = NULL; 1033 XBZRLE.current_buf = NULL; 1034 } 1035 XBZRLE_cache_unlock(); 1036 } 1037 1038 static void ram_migration_cancel(void *opaque) 1039 { 1040 migration_end(); 1041 } 1042 1043 static void reset_ram_globals(void) 1044 { 1045 last_seen_block = NULL; 1046 last_sent_block = NULL; 1047 last_offset = 0; 1048 last_version = ram_list.version; 1049 ram_bulk_stage = true; 1050 } 1051 1052 #define MAX_WAIT 50 /* ms, half buffered_file limit */ 1053 1054 1055 /* Each of ram_save_setup, ram_save_iterate and ram_save_complete has 1056 * long-running RCU critical section. When rcu-reclaims in the code 1057 * start to become numerous it will be necessary to reduce the 1058 * granularity of these critical sections. 1059 */ 1060 1061 static int ram_save_setup(QEMUFile *f, void *opaque) 1062 { 1063 RAMBlock *block; 1064 int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */ 1065 1066 mig_throttle_on = false; 1067 dirty_rate_high_cnt = 0; 1068 bitmap_sync_count = 0; 1069 migration_bitmap_sync_init(); 1070 1071 if (migrate_use_xbzrle()) { 1072 XBZRLE_cache_lock(); 1073 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / 1074 TARGET_PAGE_SIZE, 1075 TARGET_PAGE_SIZE); 1076 if (!XBZRLE.cache) { 1077 XBZRLE_cache_unlock(); 1078 error_report("Error creating cache"); 1079 return -1; 1080 } 1081 XBZRLE_cache_unlock(); 1082 1083 /* We prefer not to abort if there is no memory */ 1084 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE); 1085 if (!XBZRLE.encoded_buf) { 1086 error_report("Error allocating encoded_buf"); 1087 return -1; 1088 } 1089 1090 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE); 1091 if (!XBZRLE.current_buf) { 1092 error_report("Error allocating current_buf"); 1093 g_free(XBZRLE.encoded_buf); 1094 XBZRLE.encoded_buf = NULL; 1095 return -1; 1096 } 1097 1098 acct_clear(); 1099 } 1100 1101 /* iothread lock needed for ram_list.dirty_memory[] */ 1102 qemu_mutex_lock_iothread(); 1103 qemu_mutex_lock_ramlist(); 1104 rcu_read_lock(); 1105 bytes_transferred = 0; 1106 reset_ram_globals(); 1107 1108 ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS; 1109 migration_bitmap = bitmap_new(ram_bitmap_pages); 1110 bitmap_set(migration_bitmap, 0, ram_bitmap_pages); 1111 1112 /* 1113 * Count the total number of pages used by ram blocks not including any 1114 * gaps due to alignment or unplugs. 1115 */ 1116 migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS; 1117 1118 memory_global_dirty_log_start(); 1119 migration_bitmap_sync(); 1120 qemu_mutex_unlock_ramlist(); 1121 qemu_mutex_unlock_iothread(); 1122 1123 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); 1124 1125 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { 1126 qemu_put_byte(f, strlen(block->idstr)); 1127 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); 1128 qemu_put_be64(f, block->used_length); 1129 } 1130 1131 rcu_read_unlock(); 1132 1133 ram_control_before_iterate(f, RAM_CONTROL_SETUP); 1134 ram_control_after_iterate(f, RAM_CONTROL_SETUP); 1135 1136 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 1137 1138 return 0; 1139 } 1140 1141 static int ram_save_iterate(QEMUFile *f, void *opaque) 1142 { 1143 int ret; 1144 int i; 1145 int64_t t0; 1146 int pages_sent = 0; 1147 1148 rcu_read_lock(); 1149 if (ram_list.version != last_version) { 1150 reset_ram_globals(); 1151 } 1152 1153 /* Read version before ram_list.blocks */ 1154 smp_rmb(); 1155 1156 ram_control_before_iterate(f, RAM_CONTROL_ROUND); 1157 1158 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 1159 i = 0; 1160 while ((ret = qemu_file_rate_limit(f)) == 0) { 1161 int pages; 1162 1163 pages = ram_find_and_save_block(f, false, &bytes_transferred); 1164 /* no more pages to sent */ 1165 if (pages == 0) { 1166 break; 1167 } 1168 pages_sent += pages; 1169 acct_info.iterations++; 1170 check_guest_throttling(); 1171 /* we want to check in the 1st loop, just in case it was the 1st time 1172 and we had to sync the dirty bitmap. 1173 qemu_get_clock_ns() is a bit expensive, so we only check each some 1174 iterations 1175 */ 1176 if ((i & 63) == 0) { 1177 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000; 1178 if (t1 > MAX_WAIT) { 1179 DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n", 1180 t1, i); 1181 break; 1182 } 1183 } 1184 i++; 1185 } 1186 flush_compressed_data(f); 1187 rcu_read_unlock(); 1188 1189 /* 1190 * Must occur before EOS (or any QEMUFile operation) 1191 * because of RDMA protocol. 1192 */ 1193 ram_control_after_iterate(f, RAM_CONTROL_ROUND); 1194 1195 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 1196 bytes_transferred += 8; 1197 1198 ret = qemu_file_get_error(f); 1199 if (ret < 0) { 1200 return ret; 1201 } 1202 1203 return pages_sent; 1204 } 1205 1206 /* Called with iothread lock */ 1207 static int ram_save_complete(QEMUFile *f, void *opaque) 1208 { 1209 rcu_read_lock(); 1210 1211 migration_bitmap_sync(); 1212 1213 ram_control_before_iterate(f, RAM_CONTROL_FINISH); 1214 1215 /* try transferring iterative blocks of memory */ 1216 1217 /* flush all remaining blocks regardless of rate limiting */ 1218 while (true) { 1219 int pages; 1220 1221 pages = ram_find_and_save_block(f, true, &bytes_transferred); 1222 /* no more blocks to sent */ 1223 if (pages == 0) { 1224 break; 1225 } 1226 } 1227 1228 flush_compressed_data(f); 1229 ram_control_after_iterate(f, RAM_CONTROL_FINISH); 1230 migration_end(); 1231 1232 rcu_read_unlock(); 1233 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); 1234 1235 return 0; 1236 } 1237 1238 static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) 1239 { 1240 uint64_t remaining_size; 1241 1242 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; 1243 1244 if (remaining_size < max_size) { 1245 qemu_mutex_lock_iothread(); 1246 rcu_read_lock(); 1247 migration_bitmap_sync(); 1248 rcu_read_unlock(); 1249 qemu_mutex_unlock_iothread(); 1250 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; 1251 } 1252 return remaining_size; 1253 } 1254 1255 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) 1256 { 1257 unsigned int xh_len; 1258 int xh_flags; 1259 1260 if (!xbzrle_decoded_buf) { 1261 xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE); 1262 } 1263 1264 /* extract RLE header */ 1265 xh_flags = qemu_get_byte(f); 1266 xh_len = qemu_get_be16(f); 1267 1268 if (xh_flags != ENCODING_FLAG_XBZRLE) { 1269 error_report("Failed to load XBZRLE page - wrong compression!"); 1270 return -1; 1271 } 1272 1273 if (xh_len > TARGET_PAGE_SIZE) { 1274 error_report("Failed to load XBZRLE page - len overflow!"); 1275 return -1; 1276 } 1277 /* load data and decode */ 1278 qemu_get_buffer(f, xbzrle_decoded_buf, xh_len); 1279 1280 /* decode RLE */ 1281 if (xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host, 1282 TARGET_PAGE_SIZE) == -1) { 1283 error_report("Failed to load XBZRLE page - decode error!"); 1284 return -1; 1285 } 1286 1287 return 0; 1288 } 1289 1290 /* Must be called from within a rcu critical section. 1291 * Returns a pointer from within the RCU-protected ram_list. 1292 */ 1293 static inline void *host_from_stream_offset(QEMUFile *f, 1294 ram_addr_t offset, 1295 int flags) 1296 { 1297 static RAMBlock *block = NULL; 1298 char id[256]; 1299 uint8_t len; 1300 1301 if (flags & RAM_SAVE_FLAG_CONTINUE) { 1302 if (!block || block->max_length <= offset) { 1303 error_report("Ack, bad migration stream!"); 1304 return NULL; 1305 } 1306 1307 return memory_region_get_ram_ptr(block->mr) + offset; 1308 } 1309 1310 len = qemu_get_byte(f); 1311 qemu_get_buffer(f, (uint8_t *)id, len); 1312 id[len] = 0; 1313 1314 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { 1315 if (!strncmp(id, block->idstr, sizeof(id)) && 1316 block->max_length > offset) { 1317 return memory_region_get_ram_ptr(block->mr) + offset; 1318 } 1319 } 1320 1321 error_report("Can't find block %s!", id); 1322 return NULL; 1323 } 1324 1325 /* 1326 * If a page (or a whole RDMA chunk) has been 1327 * determined to be zero, then zap it. 1328 */ 1329 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) 1330 { 1331 if (ch != 0 || !is_zero_range(host, size)) { 1332 memset(host, ch, size); 1333 } 1334 } 1335 1336 static void *do_data_decompress(void *opaque) 1337 { 1338 DecompressParam *param = opaque; 1339 unsigned long pagesize; 1340 1341 while (!quit_decomp_thread) { 1342 qemu_mutex_lock(¶m->mutex); 1343 while (!param->start && !quit_decomp_thread) { 1344 qemu_cond_wait(¶m->cond, ¶m->mutex); 1345 pagesize = TARGET_PAGE_SIZE; 1346 if (!quit_decomp_thread) { 1347 /* uncompress() will return failed in some case, especially 1348 * when the page is dirted when doing the compression, it's 1349 * not a problem because the dirty page will be retransferred 1350 * and uncompress() won't break the data in other pages. 1351 */ 1352 uncompress((Bytef *)param->des, &pagesize, 1353 (const Bytef *)param->compbuf, param->len); 1354 } 1355 param->start = false; 1356 } 1357 qemu_mutex_unlock(¶m->mutex); 1358 } 1359 1360 return NULL; 1361 } 1362 1363 void migrate_decompress_threads_create(void) 1364 { 1365 int i, thread_count; 1366 1367 thread_count = migrate_decompress_threads(); 1368 decompress_threads = g_new0(QemuThread, thread_count); 1369 decomp_param = g_new0(DecompressParam, thread_count); 1370 compressed_data_buf = g_malloc0(compressBound(TARGET_PAGE_SIZE)); 1371 quit_decomp_thread = false; 1372 for (i = 0; i < thread_count; i++) { 1373 qemu_mutex_init(&decomp_param[i].mutex); 1374 qemu_cond_init(&decomp_param[i].cond); 1375 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE)); 1376 qemu_thread_create(decompress_threads + i, "decompress", 1377 do_data_decompress, decomp_param + i, 1378 QEMU_THREAD_JOINABLE); 1379 } 1380 } 1381 1382 void migrate_decompress_threads_join(void) 1383 { 1384 int i, thread_count; 1385 1386 quit_decomp_thread = true; 1387 thread_count = migrate_decompress_threads(); 1388 for (i = 0; i < thread_count; i++) { 1389 qemu_mutex_lock(&decomp_param[i].mutex); 1390 qemu_cond_signal(&decomp_param[i].cond); 1391 qemu_mutex_unlock(&decomp_param[i].mutex); 1392 } 1393 for (i = 0; i < thread_count; i++) { 1394 qemu_thread_join(decompress_threads + i); 1395 qemu_mutex_destroy(&decomp_param[i].mutex); 1396 qemu_cond_destroy(&decomp_param[i].cond); 1397 g_free(decomp_param[i].compbuf); 1398 } 1399 g_free(decompress_threads); 1400 g_free(decomp_param); 1401 g_free(compressed_data_buf); 1402 decompress_threads = NULL; 1403 decomp_param = NULL; 1404 compressed_data_buf = NULL; 1405 } 1406 1407 static void decompress_data_with_multi_threads(uint8_t *compbuf, 1408 void *host, int len) 1409 { 1410 int idx, thread_count; 1411 1412 thread_count = migrate_decompress_threads(); 1413 while (true) { 1414 for (idx = 0; idx < thread_count; idx++) { 1415 if (!decomp_param[idx].start) { 1416 memcpy(decomp_param[idx].compbuf, compbuf, len); 1417 decomp_param[idx].des = host; 1418 decomp_param[idx].len = len; 1419 start_decompression(&decomp_param[idx]); 1420 break; 1421 } 1422 } 1423 if (idx < thread_count) { 1424 break; 1425 } 1426 } 1427 } 1428 1429 static int ram_load(QEMUFile *f, void *opaque, int version_id) 1430 { 1431 int flags = 0, ret = 0; 1432 static uint64_t seq_iter; 1433 int len = 0; 1434 1435 seq_iter++; 1436 1437 if (version_id != 4) { 1438 ret = -EINVAL; 1439 } 1440 1441 /* This RCU critical section can be very long running. 1442 * When RCU reclaims in the code start to become numerous, 1443 * it will be necessary to reduce the granularity of this 1444 * critical section. 1445 */ 1446 rcu_read_lock(); 1447 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { 1448 ram_addr_t addr, total_ram_bytes; 1449 void *host; 1450 uint8_t ch; 1451 1452 addr = qemu_get_be64(f); 1453 flags = addr & ~TARGET_PAGE_MASK; 1454 addr &= TARGET_PAGE_MASK; 1455 1456 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { 1457 case RAM_SAVE_FLAG_MEM_SIZE: 1458 /* Synchronize RAM block list */ 1459 total_ram_bytes = addr; 1460 while (!ret && total_ram_bytes) { 1461 RAMBlock *block; 1462 char id[256]; 1463 ram_addr_t length; 1464 1465 len = qemu_get_byte(f); 1466 qemu_get_buffer(f, (uint8_t *)id, len); 1467 id[len] = 0; 1468 length = qemu_get_be64(f); 1469 1470 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { 1471 if (!strncmp(id, block->idstr, sizeof(id))) { 1472 if (length != block->used_length) { 1473 Error *local_err = NULL; 1474 1475 ret = qemu_ram_resize(block->offset, length, &local_err); 1476 if (local_err) { 1477 error_report_err(local_err); 1478 } 1479 } 1480 break; 1481 } 1482 } 1483 1484 if (!block) { 1485 error_report("Unknown ramblock \"%s\", cannot " 1486 "accept migration", id); 1487 ret = -EINVAL; 1488 } 1489 1490 total_ram_bytes -= length; 1491 } 1492 break; 1493 case RAM_SAVE_FLAG_COMPRESS: 1494 host = host_from_stream_offset(f, addr, flags); 1495 if (!host) { 1496 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); 1497 ret = -EINVAL; 1498 break; 1499 } 1500 ch = qemu_get_byte(f); 1501 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE); 1502 break; 1503 case RAM_SAVE_FLAG_PAGE: 1504 host = host_from_stream_offset(f, addr, flags); 1505 if (!host) { 1506 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); 1507 ret = -EINVAL; 1508 break; 1509 } 1510 qemu_get_buffer(f, host, TARGET_PAGE_SIZE); 1511 break; 1512 case RAM_SAVE_FLAG_COMPRESS_PAGE: 1513 host = host_from_stream_offset(f, addr, flags); 1514 if (!host) { 1515 error_report("Invalid RAM offset " RAM_ADDR_FMT, addr); 1516 ret = -EINVAL; 1517 break; 1518 } 1519 1520 len = qemu_get_be32(f); 1521 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) { 1522 error_report("Invalid compressed data length: %d", len); 1523 ret = -EINVAL; 1524 break; 1525 } 1526 qemu_get_buffer(f, compressed_data_buf, len); 1527 decompress_data_with_multi_threads(compressed_data_buf, host, len); 1528 break; 1529 case RAM_SAVE_FLAG_XBZRLE: 1530 host = host_from_stream_offset(f, addr, flags); 1531 if (!host) { 1532 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); 1533 ret = -EINVAL; 1534 break; 1535 } 1536 if (load_xbzrle(f, addr, host) < 0) { 1537 error_report("Failed to decompress XBZRLE page at " 1538 RAM_ADDR_FMT, addr); 1539 ret = -EINVAL; 1540 break; 1541 } 1542 break; 1543 case RAM_SAVE_FLAG_EOS: 1544 /* normal exit */ 1545 break; 1546 default: 1547 if (flags & RAM_SAVE_FLAG_HOOK) { 1548 ram_control_load_hook(f, flags); 1549 } else { 1550 error_report("Unknown combination of migration flags: %#x", 1551 flags); 1552 ret = -EINVAL; 1553 } 1554 } 1555 if (!ret) { 1556 ret = qemu_file_get_error(f); 1557 } 1558 } 1559 1560 rcu_read_unlock(); 1561 DPRINTF("Completed load of VM with exit code %d seq iteration " 1562 "%" PRIu64 "\n", ret, seq_iter); 1563 return ret; 1564 } 1565 1566 static SaveVMHandlers savevm_ram_handlers = { 1567 .save_live_setup = ram_save_setup, 1568 .save_live_iterate = ram_save_iterate, 1569 .save_live_complete = ram_save_complete, 1570 .save_live_pending = ram_save_pending, 1571 .load_state = ram_load, 1572 .cancel = ram_migration_cancel, 1573 }; 1574 1575 void ram_mig_init(void) 1576 { 1577 qemu_mutex_init(&XBZRLE.lock); 1578 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL); 1579 } 1580 /* Stub function that's gets run on the vcpu when its brought out of the 1581 VM to run inside qemu via async_run_on_cpu()*/ 1582 1583 static void mig_sleep_cpu(void *opq) 1584 { 1585 qemu_mutex_unlock_iothread(); 1586 g_usleep(30*1000); 1587 qemu_mutex_lock_iothread(); 1588 } 1589 1590 /* To reduce the dirty rate explicitly disallow the VCPUs from spending 1591 much time in the VM. The migration thread will try to catchup. 1592 Workload will experience a performance drop. 1593 */ 1594 static void mig_throttle_guest_down(void) 1595 { 1596 CPUState *cpu; 1597 1598 qemu_mutex_lock_iothread(); 1599 CPU_FOREACH(cpu) { 1600 async_run_on_cpu(cpu, mig_sleep_cpu, NULL); 1601 } 1602 qemu_mutex_unlock_iothread(); 1603 } 1604 1605 static void check_guest_throttling(void) 1606 { 1607 static int64_t t0; 1608 int64_t t1; 1609 1610 if (!mig_throttle_on) { 1611 return; 1612 } 1613 1614 if (!t0) { 1615 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 1616 return; 1617 } 1618 1619 t1 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 1620 1621 /* If it has been more than 40 ms since the last time the guest 1622 * was throttled then do it again. 1623 */ 1624 if (40 < (t1-t0)/1000000) { 1625 mig_throttle_guest_down(); 1626 t0 = t1; 1627 } 1628 } 1629