xref: /openbmc/qemu/migration/ram.c (revision 9bed84c19138bd161e9a6157a93ae0b25b5f7a71)
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  * Copyright (c) 2011-2015 Red Hat Inc
6  *
7  * Authors:
8  *  Juan Quintela <quintela@redhat.com>
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to deal
12  * in the Software without restriction, including without limitation the rights
13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  * copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included in
18  * all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  * THE SOFTWARE.
27  */
28 #include "qemu/osdep.h"
29 #include "qemu-common.h"
30 #include "cpu.h"
31 #include <zlib.h>
32 #include "qapi-event.h"
33 #include "qemu/cutils.h"
34 #include "qemu/bitops.h"
35 #include "qemu/bitmap.h"
36 #include "qemu/timer.h"
37 #include "qemu/main-loop.h"
38 #include "migration/migration.h"
39 #include "migration/postcopy-ram.h"
40 #include "exec/address-spaces.h"
41 #include "migration/page_cache.h"
42 #include "qemu/error-report.h"
43 #include "trace.h"
44 #include "exec/ram_addr.h"
45 #include "qemu/rcu_queue.h"
46 #include "migration/colo.h"
47 
48 /***********************************************************/
49 /* ram save/restore */
50 
51 #define RAM_SAVE_FLAG_FULL     0x01 /* Obsolete, not used anymore */
52 #define RAM_SAVE_FLAG_COMPRESS 0x02
53 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
54 #define RAM_SAVE_FLAG_PAGE     0x08
55 #define RAM_SAVE_FLAG_EOS      0x10
56 #define RAM_SAVE_FLAG_CONTINUE 0x20
57 #define RAM_SAVE_FLAG_XBZRLE   0x40
58 /* 0x80 is reserved in migration.h start with 0x100 next */
59 #define RAM_SAVE_FLAG_COMPRESS_PAGE    0x100
60 
61 static uint8_t *ZERO_TARGET_PAGE;
62 
63 static inline bool is_zero_range(uint8_t *p, uint64_t size)
64 {
65     return buffer_is_zero(p, size);
66 }
67 
68 /* struct contains XBZRLE cache and a static page
69    used by the compression */
70 static struct {
71     /* buffer used for XBZRLE encoding */
72     uint8_t *encoded_buf;
73     /* buffer for storing page content */
74     uint8_t *current_buf;
75     /* Cache for XBZRLE, Protected by lock. */
76     PageCache *cache;
77     QemuMutex lock;
78 } XBZRLE;
79 
80 /* buffer used for XBZRLE decoding */
81 static uint8_t *xbzrle_decoded_buf;
82 
83 static void XBZRLE_cache_lock(void)
84 {
85     if (migrate_use_xbzrle())
86         qemu_mutex_lock(&XBZRLE.lock);
87 }
88 
89 static void XBZRLE_cache_unlock(void)
90 {
91     if (migrate_use_xbzrle())
92         qemu_mutex_unlock(&XBZRLE.lock);
93 }
94 
95 /**
96  * xbzrle_cache_resize: resize the xbzrle cache
97  *
98  * This function is called from qmp_migrate_set_cache_size in main
99  * thread, possibly while a migration is in progress.  A running
100  * migration may be using the cache and might finish during this call,
101  * hence changes to the cache are protected by XBZRLE.lock().
102  *
103  * Returns the new_size or negative in case of error.
104  *
105  * @new_size: new cache size
106  */
107 int64_t xbzrle_cache_resize(int64_t new_size)
108 {
109     PageCache *new_cache;
110     int64_t ret;
111 
112     if (new_size < TARGET_PAGE_SIZE) {
113         return -1;
114     }
115 
116     XBZRLE_cache_lock();
117 
118     if (XBZRLE.cache != NULL) {
119         if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
120             goto out_new_size;
121         }
122         new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
123                                         TARGET_PAGE_SIZE);
124         if (!new_cache) {
125             error_report("Error creating cache");
126             ret = -1;
127             goto out;
128         }
129 
130         cache_fini(XBZRLE.cache);
131         XBZRLE.cache = new_cache;
132     }
133 
134 out_new_size:
135     ret = pow2floor(new_size);
136 out:
137     XBZRLE_cache_unlock();
138     return ret;
139 }
140 
141 struct RAMBitmap {
142     struct rcu_head rcu;
143     /* Main migration bitmap */
144     unsigned long *bmap;
145     /* bitmap of pages that haven't been sent even once
146      * only maintained and used in postcopy at the moment
147      * where it's used to send the dirtymap at the start
148      * of the postcopy phase
149      */
150     unsigned long *unsentmap;
151 };
152 typedef struct RAMBitmap RAMBitmap;
153 
154 /*
155  * An outstanding page request, on the source, having been received
156  * and queued
157  */
158 struct RAMSrcPageRequest {
159     RAMBlock *rb;
160     hwaddr    offset;
161     hwaddr    len;
162 
163     QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
164 };
165 
166 /* State of RAM for migration */
167 struct RAMState {
168     /* QEMUFile used for this migration */
169     QEMUFile *f;
170     /* Last block that we have visited searching for dirty pages */
171     RAMBlock *last_seen_block;
172     /* Last block from where we have sent data */
173     RAMBlock *last_sent_block;
174     /* Last dirty target page we have sent */
175     ram_addr_t last_page;
176     /* last ram version we have seen */
177     uint32_t last_version;
178     /* We are in the first round */
179     bool ram_bulk_stage;
180     /* How many times we have dirty too many pages */
181     int dirty_rate_high_cnt;
182     /* How many times we have synchronized the bitmap */
183     uint64_t bitmap_sync_count;
184     /* these variables are used for bitmap sync */
185     /* last time we did a full bitmap_sync */
186     int64_t time_last_bitmap_sync;
187     /* bytes transferred at start_time */
188     uint64_t bytes_xfer_prev;
189     /* number of dirty pages since start_time */
190     uint64_t num_dirty_pages_period;
191     /* xbzrle misses since the beginning of the period */
192     uint64_t xbzrle_cache_miss_prev;
193     /* number of iterations at the beginning of period */
194     uint64_t iterations_prev;
195     /* Accounting fields */
196     /* number of zero pages.  It used to be pages filled by the same char. */
197     uint64_t zero_pages;
198     /* number of normal transferred pages */
199     uint64_t norm_pages;
200     /* Iterations since start */
201     uint64_t iterations;
202     /* xbzrle transmitted bytes.  Notice that this is with
203      * compression, they can't be calculated from the pages */
204     uint64_t xbzrle_bytes;
205     /* xbzrle transmmited pages */
206     uint64_t xbzrle_pages;
207     /* xbzrle number of cache miss */
208     uint64_t xbzrle_cache_miss;
209     /* xbzrle miss rate */
210     double xbzrle_cache_miss_rate;
211     /* xbzrle number of overflows */
212     uint64_t xbzrle_overflows;
213     /* number of dirty bits in the bitmap */
214     uint64_t migration_dirty_pages;
215     /* total number of bytes transferred */
216     uint64_t bytes_transferred;
217     /* number of dirtied pages in the last second */
218     uint64_t dirty_pages_rate;
219     /* Count of requests incoming from destination */
220     uint64_t postcopy_requests;
221     /* protects modification of the bitmap */
222     QemuMutex bitmap_mutex;
223     /* Ram Bitmap protected by RCU */
224     RAMBitmap *ram_bitmap;
225     /* The RAMBlock used in the last src_page_requests */
226     RAMBlock *last_req_rb;
227     /* Queue of outstanding page requests from the destination */
228     QemuMutex src_page_req_mutex;
229     QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests;
230 };
231 typedef struct RAMState RAMState;
232 
233 static RAMState ram_state;
234 
235 uint64_t dup_mig_pages_transferred(void)
236 {
237     return ram_state.zero_pages;
238 }
239 
240 uint64_t norm_mig_pages_transferred(void)
241 {
242     return ram_state.norm_pages;
243 }
244 
245 uint64_t xbzrle_mig_bytes_transferred(void)
246 {
247     return ram_state.xbzrle_bytes;
248 }
249 
250 uint64_t xbzrle_mig_pages_transferred(void)
251 {
252     return ram_state.xbzrle_pages;
253 }
254 
255 uint64_t xbzrle_mig_pages_cache_miss(void)
256 {
257     return ram_state.xbzrle_cache_miss;
258 }
259 
260 double xbzrle_mig_cache_miss_rate(void)
261 {
262     return ram_state.xbzrle_cache_miss_rate;
263 }
264 
265 uint64_t xbzrle_mig_pages_overflow(void)
266 {
267     return ram_state.xbzrle_overflows;
268 }
269 
270 uint64_t ram_bytes_transferred(void)
271 {
272     return ram_state.bytes_transferred;
273 }
274 
275 uint64_t ram_bytes_remaining(void)
276 {
277     return ram_state.migration_dirty_pages * TARGET_PAGE_SIZE;
278 }
279 
280 uint64_t ram_dirty_sync_count(void)
281 {
282     return ram_state.bitmap_sync_count;
283 }
284 
285 uint64_t ram_dirty_pages_rate(void)
286 {
287     return ram_state.dirty_pages_rate;
288 }
289 
290 uint64_t ram_postcopy_requests(void)
291 {
292     return ram_state.postcopy_requests;
293 }
294 
295 /* used by the search for pages to send */
296 struct PageSearchStatus {
297     /* Current block being searched */
298     RAMBlock    *block;
299     /* Current page to search from */
300     unsigned long page;
301     /* Set once we wrap around */
302     bool         complete_round;
303 };
304 typedef struct PageSearchStatus PageSearchStatus;
305 
306 struct CompressParam {
307     bool done;
308     bool quit;
309     QEMUFile *file;
310     QemuMutex mutex;
311     QemuCond cond;
312     RAMBlock *block;
313     ram_addr_t offset;
314 };
315 typedef struct CompressParam CompressParam;
316 
317 struct DecompressParam {
318     bool done;
319     bool quit;
320     QemuMutex mutex;
321     QemuCond cond;
322     void *des;
323     uint8_t *compbuf;
324     int len;
325 };
326 typedef struct DecompressParam DecompressParam;
327 
328 static CompressParam *comp_param;
329 static QemuThread *compress_threads;
330 /* comp_done_cond is used to wake up the migration thread when
331  * one of the compression threads has finished the compression.
332  * comp_done_lock is used to co-work with comp_done_cond.
333  */
334 static QemuMutex comp_done_lock;
335 static QemuCond comp_done_cond;
336 /* The empty QEMUFileOps will be used by file in CompressParam */
337 static const QEMUFileOps empty_ops = { };
338 
339 static DecompressParam *decomp_param;
340 static QemuThread *decompress_threads;
341 static QemuMutex decomp_done_lock;
342 static QemuCond decomp_done_cond;
343 
344 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
345                                 ram_addr_t offset);
346 
347 static void *do_data_compress(void *opaque)
348 {
349     CompressParam *param = opaque;
350     RAMBlock *block;
351     ram_addr_t offset;
352 
353     qemu_mutex_lock(&param->mutex);
354     while (!param->quit) {
355         if (param->block) {
356             block = param->block;
357             offset = param->offset;
358             param->block = NULL;
359             qemu_mutex_unlock(&param->mutex);
360 
361             do_compress_ram_page(param->file, block, offset);
362 
363             qemu_mutex_lock(&comp_done_lock);
364             param->done = true;
365             qemu_cond_signal(&comp_done_cond);
366             qemu_mutex_unlock(&comp_done_lock);
367 
368             qemu_mutex_lock(&param->mutex);
369         } else {
370             qemu_cond_wait(&param->cond, &param->mutex);
371         }
372     }
373     qemu_mutex_unlock(&param->mutex);
374 
375     return NULL;
376 }
377 
378 static inline void terminate_compression_threads(void)
379 {
380     int idx, thread_count;
381 
382     thread_count = migrate_compress_threads();
383 
384     for (idx = 0; idx < thread_count; idx++) {
385         qemu_mutex_lock(&comp_param[idx].mutex);
386         comp_param[idx].quit = true;
387         qemu_cond_signal(&comp_param[idx].cond);
388         qemu_mutex_unlock(&comp_param[idx].mutex);
389     }
390 }
391 
392 void migrate_compress_threads_join(void)
393 {
394     int i, thread_count;
395 
396     if (!migrate_use_compression()) {
397         return;
398     }
399     terminate_compression_threads();
400     thread_count = migrate_compress_threads();
401     for (i = 0; i < thread_count; i++) {
402         qemu_thread_join(compress_threads + i);
403         qemu_fclose(comp_param[i].file);
404         qemu_mutex_destroy(&comp_param[i].mutex);
405         qemu_cond_destroy(&comp_param[i].cond);
406     }
407     qemu_mutex_destroy(&comp_done_lock);
408     qemu_cond_destroy(&comp_done_cond);
409     g_free(compress_threads);
410     g_free(comp_param);
411     compress_threads = NULL;
412     comp_param = NULL;
413 }
414 
415 void migrate_compress_threads_create(void)
416 {
417     int i, thread_count;
418 
419     if (!migrate_use_compression()) {
420         return;
421     }
422     thread_count = migrate_compress_threads();
423     compress_threads = g_new0(QemuThread, thread_count);
424     comp_param = g_new0(CompressParam, thread_count);
425     qemu_cond_init(&comp_done_cond);
426     qemu_mutex_init(&comp_done_lock);
427     for (i = 0; i < thread_count; i++) {
428         /* comp_param[i].file is just used as a dummy buffer to save data,
429          * set its ops to empty.
430          */
431         comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
432         comp_param[i].done = true;
433         comp_param[i].quit = false;
434         qemu_mutex_init(&comp_param[i].mutex);
435         qemu_cond_init(&comp_param[i].cond);
436         qemu_thread_create(compress_threads + i, "compress",
437                            do_data_compress, comp_param + i,
438                            QEMU_THREAD_JOINABLE);
439     }
440 }
441 
442 /**
443  * save_page_header: write page header to wire
444  *
445  * If this is the 1st block, it also writes the block identification
446  *
447  * Returns the number of bytes written
448  *
449  * @f: QEMUFile where to send the data
450  * @block: block that contains the page we want to send
451  * @offset: offset inside the block for the page
452  *          in the lower bits, it contains flags
453  */
454 static size_t save_page_header(RAMState *rs, RAMBlock *block, ram_addr_t offset)
455 {
456     size_t size, len;
457 
458     if (block == rs->last_sent_block) {
459         offset |= RAM_SAVE_FLAG_CONTINUE;
460     }
461     qemu_put_be64(rs->f, offset);
462     size = 8;
463 
464     if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
465         len = strlen(block->idstr);
466         qemu_put_byte(rs->f, len);
467         qemu_put_buffer(rs->f, (uint8_t *)block->idstr, len);
468         size += 1 + len;
469         rs->last_sent_block = block;
470     }
471     return size;
472 }
473 
474 /**
475  * mig_throttle_guest_down: throotle down the guest
476  *
477  * Reduce amount of guest cpu execution to hopefully slow down memory
478  * writes. If guest dirty memory rate is reduced below the rate at
479  * which we can transfer pages to the destination then we should be
480  * able to complete migration. Some workloads dirty memory way too
481  * fast and will not effectively converge, even with auto-converge.
482  */
483 static void mig_throttle_guest_down(void)
484 {
485     MigrationState *s = migrate_get_current();
486     uint64_t pct_initial = s->parameters.cpu_throttle_initial;
487     uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
488 
489     /* We have not started throttling yet. Let's start it. */
490     if (!cpu_throttle_active()) {
491         cpu_throttle_set(pct_initial);
492     } else {
493         /* Throttling already on, just increase the rate */
494         cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
495     }
496 }
497 
498 /**
499  * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
500  *
501  * @rs: current RAM state
502  * @current_addr: address for the zero page
503  *
504  * Update the xbzrle cache to reflect a page that's been sent as all 0.
505  * The important thing is that a stale (not-yet-0'd) page be replaced
506  * by the new data.
507  * As a bonus, if the page wasn't in the cache it gets added so that
508  * when a small write is made into the 0'd page it gets XBZRLE sent.
509  */
510 static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
511 {
512     if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
513         return;
514     }
515 
516     /* We don't care if this fails to allocate a new cache page
517      * as long as it updated an old one */
518     cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
519                  rs->bitmap_sync_count);
520 }
521 
522 #define ENCODING_FLAG_XBZRLE 0x1
523 
524 /**
525  * save_xbzrle_page: compress and send current page
526  *
527  * Returns: 1 means that we wrote the page
528  *          0 means that page is identical to the one already sent
529  *          -1 means that xbzrle would be longer than normal
530  *
531  * @rs: current RAM state
532  * @current_data: pointer to the address of the page contents
533  * @current_addr: addr of the page
534  * @block: block that contains the page we want to send
535  * @offset: offset inside the block for the page
536  * @last_stage: if we are at the completion stage
537  */
538 static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
539                             ram_addr_t current_addr, RAMBlock *block,
540                             ram_addr_t offset, bool last_stage)
541 {
542     int encoded_len = 0, bytes_xbzrle;
543     uint8_t *prev_cached_page;
544 
545     if (!cache_is_cached(XBZRLE.cache, current_addr, rs->bitmap_sync_count)) {
546         rs->xbzrle_cache_miss++;
547         if (!last_stage) {
548             if (cache_insert(XBZRLE.cache, current_addr, *current_data,
549                              rs->bitmap_sync_count) == -1) {
550                 return -1;
551             } else {
552                 /* update *current_data when the page has been
553                    inserted into cache */
554                 *current_data = get_cached_data(XBZRLE.cache, current_addr);
555             }
556         }
557         return -1;
558     }
559 
560     prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
561 
562     /* save current buffer into memory */
563     memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
564 
565     /* XBZRLE encoding (if there is no overflow) */
566     encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
567                                        TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
568                                        TARGET_PAGE_SIZE);
569     if (encoded_len == 0) {
570         trace_save_xbzrle_page_skipping();
571         return 0;
572     } else if (encoded_len == -1) {
573         trace_save_xbzrle_page_overflow();
574         rs->xbzrle_overflows++;
575         /* update data in the cache */
576         if (!last_stage) {
577             memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
578             *current_data = prev_cached_page;
579         }
580         return -1;
581     }
582 
583     /* we need to update the data in the cache, in order to get the same data */
584     if (!last_stage) {
585         memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
586     }
587 
588     /* Send XBZRLE based compressed page */
589     bytes_xbzrle = save_page_header(rs, block,
590                                     offset | RAM_SAVE_FLAG_XBZRLE);
591     qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
592     qemu_put_be16(rs->f, encoded_len);
593     qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
594     bytes_xbzrle += encoded_len + 1 + 2;
595     rs->xbzrle_pages++;
596     rs->xbzrle_bytes += bytes_xbzrle;
597     rs->bytes_transferred += bytes_xbzrle;
598 
599     return 1;
600 }
601 
602 /**
603  * migration_bitmap_find_dirty: find the next dirty page from start
604  *
605  * Called with rcu_read_lock() to protect migration_bitmap
606  *
607  * Returns the byte offset within memory region of the start of a dirty page
608  *
609  * @rs: current RAM state
610  * @rb: RAMBlock where to search for dirty pages
611  * @start: page where we start the search
612  */
613 static inline
614 unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
615                                           unsigned long start)
616 {
617     unsigned long base = rb->offset >> TARGET_PAGE_BITS;
618     unsigned long nr = base + start;
619     uint64_t rb_size = rb->used_length;
620     unsigned long size = base + (rb_size >> TARGET_PAGE_BITS);
621     unsigned long *bitmap;
622 
623     unsigned long next;
624 
625     bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
626     if (rs->ram_bulk_stage && nr > base) {
627         next = nr + 1;
628     } else {
629         next = find_next_bit(bitmap, size, nr);
630     }
631 
632     return next - base;
633 }
634 
635 static inline bool migration_bitmap_clear_dirty(RAMState *rs,
636                                                 RAMBlock *rb,
637                                                 unsigned long page)
638 {
639     bool ret;
640     unsigned long *bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
641     unsigned long nr = (rb->offset >> TARGET_PAGE_BITS) + page;
642 
643     ret = test_and_clear_bit(nr, bitmap);
644 
645     if (ret) {
646         rs->migration_dirty_pages--;
647     }
648     return ret;
649 }
650 
651 static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb,
652                                         ram_addr_t start, ram_addr_t length)
653 {
654     unsigned long *bitmap;
655     bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
656     rs->migration_dirty_pages +=
657         cpu_physical_memory_sync_dirty_bitmap(bitmap, rb, start, length,
658                                               &rs->num_dirty_pages_period);
659 }
660 
661 /**
662  * ram_pagesize_summary: calculate all the pagesizes of a VM
663  *
664  * Returns a summary bitmap of the page sizes of all RAMBlocks
665  *
666  * For VMs with just normal pages this is equivalent to the host page
667  * size. If it's got some huge pages then it's the OR of all the
668  * different page sizes.
669  */
670 uint64_t ram_pagesize_summary(void)
671 {
672     RAMBlock *block;
673     uint64_t summary = 0;
674 
675     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
676         summary |= block->page_size;
677     }
678 
679     return summary;
680 }
681 
682 static void migration_bitmap_sync(RAMState *rs)
683 {
684     RAMBlock *block;
685     int64_t end_time;
686     uint64_t bytes_xfer_now;
687 
688     rs->bitmap_sync_count++;
689 
690     if (!rs->bytes_xfer_prev) {
691         rs->bytes_xfer_prev = ram_bytes_transferred();
692     }
693 
694     if (!rs->time_last_bitmap_sync) {
695         rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
696     }
697 
698     trace_migration_bitmap_sync_start();
699     memory_global_dirty_log_sync();
700 
701     qemu_mutex_lock(&rs->bitmap_mutex);
702     rcu_read_lock();
703     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
704         migration_bitmap_sync_range(rs, block, 0, block->used_length);
705     }
706     rcu_read_unlock();
707     qemu_mutex_unlock(&rs->bitmap_mutex);
708 
709     trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
710 
711     end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
712 
713     /* more than 1 second = 1000 millisecons */
714     if (end_time > rs->time_last_bitmap_sync + 1000) {
715         if (migrate_auto_converge()) {
716             /* The following detection logic can be refined later. For now:
717                Check to see if the dirtied bytes is 50% more than the approx.
718                amount of bytes that just got transferred since the last time we
719                were in this routine. If that happens twice, start or increase
720                throttling */
721             bytes_xfer_now = ram_bytes_transferred();
722 
723             if (rs->dirty_pages_rate &&
724                (rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
725                    (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
726                (rs->dirty_rate_high_cnt++ >= 2)) {
727                     trace_migration_throttle();
728                     rs->dirty_rate_high_cnt = 0;
729                     mig_throttle_guest_down();
730              }
731              rs->bytes_xfer_prev = bytes_xfer_now;
732         }
733 
734         if (migrate_use_xbzrle()) {
735             if (rs->iterations_prev != rs->iterations) {
736                 rs->xbzrle_cache_miss_rate =
737                    (double)(rs->xbzrle_cache_miss -
738                             rs->xbzrle_cache_miss_prev) /
739                    (rs->iterations - rs->iterations_prev);
740             }
741             rs->iterations_prev = rs->iterations;
742             rs->xbzrle_cache_miss_prev = rs->xbzrle_cache_miss;
743         }
744         rs->dirty_pages_rate = rs->num_dirty_pages_period * 1000
745             / (end_time - rs->time_last_bitmap_sync);
746         rs->time_last_bitmap_sync = end_time;
747         rs->num_dirty_pages_period = 0;
748     }
749     if (migrate_use_events()) {
750         qapi_event_send_migration_pass(rs->bitmap_sync_count, NULL);
751     }
752 }
753 
754 /**
755  * save_zero_page: send the zero page to the stream
756  *
757  * Returns the number of pages written.
758  *
759  * @rs: current RAM state
760  * @block: block that contains the page we want to send
761  * @offset: offset inside the block for the page
762  * @p: pointer to the page
763  */
764 static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
765                           uint8_t *p)
766 {
767     int pages = -1;
768 
769     if (is_zero_range(p, TARGET_PAGE_SIZE)) {
770         rs->zero_pages++;
771         rs->bytes_transferred +=
772             save_page_header(rs, block, offset | RAM_SAVE_FLAG_COMPRESS);
773         qemu_put_byte(rs->f, 0);
774         rs->bytes_transferred += 1;
775         pages = 1;
776     }
777 
778     return pages;
779 }
780 
781 static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
782 {
783     if (!migrate_release_ram() || !migration_in_postcopy()) {
784         return;
785     }
786 
787     ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
788 }
789 
790 /**
791  * ram_save_page: send the given page to the stream
792  *
793  * Returns the number of pages written.
794  *          < 0 - error
795  *          >=0 - Number of pages written - this might legally be 0
796  *                if xbzrle noticed the page was the same.
797  *
798  * @rs: current RAM state
799  * @block: block that contains the page we want to send
800  * @offset: offset inside the block for the page
801  * @last_stage: if we are at the completion stage
802  */
803 static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
804 {
805     int pages = -1;
806     uint64_t bytes_xmit;
807     ram_addr_t current_addr;
808     uint8_t *p;
809     int ret;
810     bool send_async = true;
811     RAMBlock *block = pss->block;
812     ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
813 
814     p = block->host + offset;
815 
816     /* In doubt sent page as normal */
817     bytes_xmit = 0;
818     ret = ram_control_save_page(rs->f, block->offset,
819                            offset, TARGET_PAGE_SIZE, &bytes_xmit);
820     if (bytes_xmit) {
821         rs->bytes_transferred += bytes_xmit;
822         pages = 1;
823     }
824 
825     XBZRLE_cache_lock();
826 
827     current_addr = block->offset + offset;
828 
829     if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
830         if (ret != RAM_SAVE_CONTROL_DELAYED) {
831             if (bytes_xmit > 0) {
832                 rs->norm_pages++;
833             } else if (bytes_xmit == 0) {
834                 rs->zero_pages++;
835             }
836         }
837     } else {
838         pages = save_zero_page(rs, block, offset, p);
839         if (pages > 0) {
840             /* Must let xbzrle know, otherwise a previous (now 0'd) cached
841              * page would be stale
842              */
843             xbzrle_cache_zero_page(rs, current_addr);
844             ram_release_pages(block->idstr, offset, pages);
845         } else if (!rs->ram_bulk_stage &&
846                    !migration_in_postcopy() && migrate_use_xbzrle()) {
847             pages = save_xbzrle_page(rs, &p, current_addr, block,
848                                      offset, last_stage);
849             if (!last_stage) {
850                 /* Can't send this cached data async, since the cache page
851                  * might get updated before it gets to the wire
852                  */
853                 send_async = false;
854             }
855         }
856     }
857 
858     /* XBZRLE overflow or normal page */
859     if (pages == -1) {
860         rs->bytes_transferred += save_page_header(rs, block,
861                                                   offset | RAM_SAVE_FLAG_PAGE);
862         if (send_async) {
863             qemu_put_buffer_async(rs->f, p, TARGET_PAGE_SIZE,
864                                   migrate_release_ram() &
865                                   migration_in_postcopy());
866         } else {
867             qemu_put_buffer(rs->f, p, TARGET_PAGE_SIZE);
868         }
869         rs->bytes_transferred += TARGET_PAGE_SIZE;
870         pages = 1;
871         rs->norm_pages++;
872     }
873 
874     XBZRLE_cache_unlock();
875 
876     return pages;
877 }
878 
879 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
880                                 ram_addr_t offset)
881 {
882     RAMState *rs = &ram_state;
883     int bytes_sent, blen;
884     uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
885 
886     bytes_sent = save_page_header(rs, block, offset |
887                                   RAM_SAVE_FLAG_COMPRESS_PAGE);
888     blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
889                                      migrate_compress_level());
890     if (blen < 0) {
891         bytes_sent = 0;
892         qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
893         error_report("compressed data failed!");
894     } else {
895         bytes_sent += blen;
896         ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
897     }
898 
899     return bytes_sent;
900 }
901 
902 static void flush_compressed_data(RAMState *rs)
903 {
904     int idx, len, thread_count;
905 
906     if (!migrate_use_compression()) {
907         return;
908     }
909     thread_count = migrate_compress_threads();
910 
911     qemu_mutex_lock(&comp_done_lock);
912     for (idx = 0; idx < thread_count; idx++) {
913         while (!comp_param[idx].done) {
914             qemu_cond_wait(&comp_done_cond, &comp_done_lock);
915         }
916     }
917     qemu_mutex_unlock(&comp_done_lock);
918 
919     for (idx = 0; idx < thread_count; idx++) {
920         qemu_mutex_lock(&comp_param[idx].mutex);
921         if (!comp_param[idx].quit) {
922             len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
923             rs->bytes_transferred += len;
924         }
925         qemu_mutex_unlock(&comp_param[idx].mutex);
926     }
927 }
928 
929 static inline void set_compress_params(CompressParam *param, RAMBlock *block,
930                                        ram_addr_t offset)
931 {
932     param->block = block;
933     param->offset = offset;
934 }
935 
936 static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
937                                            ram_addr_t offset)
938 {
939     int idx, thread_count, bytes_xmit = -1, pages = -1;
940 
941     thread_count = migrate_compress_threads();
942     qemu_mutex_lock(&comp_done_lock);
943     while (true) {
944         for (idx = 0; idx < thread_count; idx++) {
945             if (comp_param[idx].done) {
946                 comp_param[idx].done = false;
947                 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
948                 qemu_mutex_lock(&comp_param[idx].mutex);
949                 set_compress_params(&comp_param[idx], block, offset);
950                 qemu_cond_signal(&comp_param[idx].cond);
951                 qemu_mutex_unlock(&comp_param[idx].mutex);
952                 pages = 1;
953                 rs->norm_pages++;
954                 rs->bytes_transferred += bytes_xmit;
955                 break;
956             }
957         }
958         if (pages > 0) {
959             break;
960         } else {
961             qemu_cond_wait(&comp_done_cond, &comp_done_lock);
962         }
963     }
964     qemu_mutex_unlock(&comp_done_lock);
965 
966     return pages;
967 }
968 
969 /**
970  * ram_save_compressed_page: compress the given page and send it to the stream
971  *
972  * Returns the number of pages written.
973  *
974  * @rs: current RAM state
975  * @block: block that contains the page we want to send
976  * @offset: offset inside the block for the page
977  * @last_stage: if we are at the completion stage
978  */
979 static int ram_save_compressed_page(RAMState *rs, PageSearchStatus *pss,
980                                     bool last_stage)
981 {
982     int pages = -1;
983     uint64_t bytes_xmit = 0;
984     uint8_t *p;
985     int ret, blen;
986     RAMBlock *block = pss->block;
987     ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
988 
989     p = block->host + offset;
990 
991     ret = ram_control_save_page(rs->f, block->offset,
992                                 offset, TARGET_PAGE_SIZE, &bytes_xmit);
993     if (bytes_xmit) {
994         rs->bytes_transferred += bytes_xmit;
995         pages = 1;
996     }
997     if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
998         if (ret != RAM_SAVE_CONTROL_DELAYED) {
999             if (bytes_xmit > 0) {
1000                 rs->norm_pages++;
1001             } else if (bytes_xmit == 0) {
1002                 rs->zero_pages++;
1003             }
1004         }
1005     } else {
1006         /* When starting the process of a new block, the first page of
1007          * the block should be sent out before other pages in the same
1008          * block, and all the pages in last block should have been sent
1009          * out, keeping this order is important, because the 'cont' flag
1010          * is used to avoid resending the block name.
1011          */
1012         if (block != rs->last_sent_block) {
1013             flush_compressed_data(rs);
1014             pages = save_zero_page(rs, block, offset, p);
1015             if (pages == -1) {
1016                 /* Make sure the first page is sent out before other pages */
1017                 bytes_xmit = save_page_header(rs, block, offset |
1018                                               RAM_SAVE_FLAG_COMPRESS_PAGE);
1019                 blen = qemu_put_compression_data(rs->f, p, TARGET_PAGE_SIZE,
1020                                                  migrate_compress_level());
1021                 if (blen > 0) {
1022                     rs->bytes_transferred += bytes_xmit + blen;
1023                     rs->norm_pages++;
1024                     pages = 1;
1025                 } else {
1026                     qemu_file_set_error(rs->f, blen);
1027                     error_report("compressed data failed!");
1028                 }
1029             }
1030             if (pages > 0) {
1031                 ram_release_pages(block->idstr, offset, pages);
1032             }
1033         } else {
1034             pages = save_zero_page(rs, block, offset, p);
1035             if (pages == -1) {
1036                 pages = compress_page_with_multi_thread(rs, block, offset);
1037             } else {
1038                 ram_release_pages(block->idstr, offset, pages);
1039             }
1040         }
1041     }
1042 
1043     return pages;
1044 }
1045 
1046 /**
1047  * find_dirty_block: find the next dirty page and update any state
1048  * associated with the search process.
1049  *
1050  * Returns if a page is found
1051  *
1052  * @rs: current RAM state
1053  * @pss: data about the state of the current dirty page scan
1054  * @again: set to false if the search has scanned the whole of RAM
1055  */
1056 static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
1057 {
1058     pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
1059     if (pss->complete_round && pss->block == rs->last_seen_block &&
1060         pss->page >= rs->last_page) {
1061         /*
1062          * We've been once around the RAM and haven't found anything.
1063          * Give up.
1064          */
1065         *again = false;
1066         return false;
1067     }
1068     if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
1069         /* Didn't find anything in this RAM Block */
1070         pss->page = 0;
1071         pss->block = QLIST_NEXT_RCU(pss->block, next);
1072         if (!pss->block) {
1073             /* Hit the end of the list */
1074             pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1075             /* Flag that we've looped */
1076             pss->complete_round = true;
1077             rs->ram_bulk_stage = false;
1078             if (migrate_use_xbzrle()) {
1079                 /* If xbzrle is on, stop using the data compression at this
1080                  * point. In theory, xbzrle can do better than compression.
1081                  */
1082                 flush_compressed_data(rs);
1083             }
1084         }
1085         /* Didn't find anything this time, but try again on the new block */
1086         *again = true;
1087         return false;
1088     } else {
1089         /* Can go around again, but... */
1090         *again = true;
1091         /* We've found something so probably don't need to */
1092         return true;
1093     }
1094 }
1095 
1096 /**
1097  * unqueue_page: gets a page of the queue
1098  *
1099  * Helper for 'get_queued_page' - gets a page off the queue
1100  *
1101  * Returns the block of the page (or NULL if none available)
1102  *
1103  * @rs: current RAM state
1104  * @offset: used to return the offset within the RAMBlock
1105  */
1106 static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
1107 {
1108     RAMBlock *block = NULL;
1109 
1110     qemu_mutex_lock(&rs->src_page_req_mutex);
1111     if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
1112         struct RAMSrcPageRequest *entry =
1113                                 QSIMPLEQ_FIRST(&rs->src_page_requests);
1114         block = entry->rb;
1115         *offset = entry->offset;
1116 
1117         if (entry->len > TARGET_PAGE_SIZE) {
1118             entry->len -= TARGET_PAGE_SIZE;
1119             entry->offset += TARGET_PAGE_SIZE;
1120         } else {
1121             memory_region_unref(block->mr);
1122             QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1123             g_free(entry);
1124         }
1125     }
1126     qemu_mutex_unlock(&rs->src_page_req_mutex);
1127 
1128     return block;
1129 }
1130 
1131 /**
1132  * get_queued_page: unqueue a page from the postocpy requests
1133  *
1134  * Skips pages that are already sent (!dirty)
1135  *
1136  * Returns if a queued page is found
1137  *
1138  * @rs: current RAM state
1139  * @pss: data about the state of the current dirty page scan
1140  */
1141 static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
1142 {
1143     RAMBlock  *block;
1144     ram_addr_t offset;
1145     bool dirty;
1146 
1147     do {
1148         block = unqueue_page(rs, &offset);
1149         /*
1150          * We're sending this page, and since it's postcopy nothing else
1151          * will dirty it, and we must make sure it doesn't get sent again
1152          * even if this queue request was received after the background
1153          * search already sent it.
1154          */
1155         if (block) {
1156             unsigned long *bitmap;
1157             unsigned long page;
1158 
1159             bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
1160             page = (block->offset + offset) >> TARGET_PAGE_BITS;
1161             dirty = test_bit(page, bitmap);
1162             if (!dirty) {
1163                 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
1164                     page,
1165                     test_bit(page,
1166                              atomic_rcu_read(&rs->ram_bitmap)->unsentmap));
1167             } else {
1168                 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
1169             }
1170         }
1171 
1172     } while (block && !dirty);
1173 
1174     if (block) {
1175         /*
1176          * As soon as we start servicing pages out of order, then we have
1177          * to kill the bulk stage, since the bulk stage assumes
1178          * in (migration_bitmap_find_and_reset_dirty) that every page is
1179          * dirty, that's no longer true.
1180          */
1181         rs->ram_bulk_stage = false;
1182 
1183         /*
1184          * We want the background search to continue from the queued page
1185          * since the guest is likely to want other pages near to the page
1186          * it just requested.
1187          */
1188         pss->block = block;
1189         pss->page = offset >> TARGET_PAGE_BITS;
1190     }
1191 
1192     return !!block;
1193 }
1194 
1195 /**
1196  * migration_page_queue_free: drop any remaining pages in the ram
1197  * request queue
1198  *
1199  * It should be empty at the end anyway, but in error cases there may
1200  * be some left.  in case that there is any page left, we drop it.
1201  *
1202  */
1203 void migration_page_queue_free(void)
1204 {
1205     struct RAMSrcPageRequest *mspr, *next_mspr;
1206     RAMState *rs = &ram_state;
1207     /* This queue generally should be empty - but in the case of a failed
1208      * migration might have some droppings in.
1209      */
1210     rcu_read_lock();
1211     QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
1212         memory_region_unref(mspr->rb->mr);
1213         QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1214         g_free(mspr);
1215     }
1216     rcu_read_unlock();
1217 }
1218 
1219 /**
1220  * ram_save_queue_pages: queue the page for transmission
1221  *
1222  * A request from postcopy destination for example.
1223  *
1224  * Returns zero on success or negative on error
1225  *
1226  * @rbname: Name of the RAMBLock of the request. NULL means the
1227  *          same that last one.
1228  * @start: starting address from the start of the RAMBlock
1229  * @len: length (in bytes) to send
1230  */
1231 int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
1232 {
1233     RAMBlock *ramblock;
1234     RAMState *rs = &ram_state;
1235 
1236     rs->postcopy_requests++;
1237     rcu_read_lock();
1238     if (!rbname) {
1239         /* Reuse last RAMBlock */
1240         ramblock = rs->last_req_rb;
1241 
1242         if (!ramblock) {
1243             /*
1244              * Shouldn't happen, we can't reuse the last RAMBlock if
1245              * it's the 1st request.
1246              */
1247             error_report("ram_save_queue_pages no previous block");
1248             goto err;
1249         }
1250     } else {
1251         ramblock = qemu_ram_block_by_name(rbname);
1252 
1253         if (!ramblock) {
1254             /* We shouldn't be asked for a non-existent RAMBlock */
1255             error_report("ram_save_queue_pages no block '%s'", rbname);
1256             goto err;
1257         }
1258         rs->last_req_rb = ramblock;
1259     }
1260     trace_ram_save_queue_pages(ramblock->idstr, start, len);
1261     if (start+len > ramblock->used_length) {
1262         error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1263                      RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
1264                      __func__, start, len, ramblock->used_length);
1265         goto err;
1266     }
1267 
1268     struct RAMSrcPageRequest *new_entry =
1269         g_malloc0(sizeof(struct RAMSrcPageRequest));
1270     new_entry->rb = ramblock;
1271     new_entry->offset = start;
1272     new_entry->len = len;
1273 
1274     memory_region_ref(ramblock->mr);
1275     qemu_mutex_lock(&rs->src_page_req_mutex);
1276     QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
1277     qemu_mutex_unlock(&rs->src_page_req_mutex);
1278     rcu_read_unlock();
1279 
1280     return 0;
1281 
1282 err:
1283     rcu_read_unlock();
1284     return -1;
1285 }
1286 
1287 /**
1288  * ram_save_target_page: save one target page
1289  *
1290  * Returns the number of pages written
1291  *
1292  * @rs: current RAM state
1293  * @ms: current migration state
1294  * @pss: data about the page we want to send
1295  * @last_stage: if we are at the completion stage
1296  */
1297 static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
1298                                 bool last_stage)
1299 {
1300     int res = 0;
1301 
1302     /* Check the pages is dirty and if it is send it */
1303     if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
1304         unsigned long *unsentmap;
1305         /*
1306          * If xbzrle is on, stop using the data compression after first
1307          * round of migration even if compression is enabled. In theory,
1308          * xbzrle can do better than compression.
1309          */
1310         unsigned long page =
1311             (pss->block->offset >> TARGET_PAGE_BITS) + pss->page;
1312         if (migrate_use_compression()
1313             && (rs->ram_bulk_stage || !migrate_use_xbzrle())) {
1314             res = ram_save_compressed_page(rs, pss, last_stage);
1315         } else {
1316             res = ram_save_page(rs, pss, last_stage);
1317         }
1318 
1319         if (res < 0) {
1320             return res;
1321         }
1322         unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap;
1323         if (unsentmap) {
1324             clear_bit(page, unsentmap);
1325         }
1326     }
1327 
1328     return res;
1329 }
1330 
1331 /**
1332  * ram_save_host_page: save a whole host page
1333  *
1334  * Starting at *offset send pages up to the end of the current host
1335  * page. It's valid for the initial offset to point into the middle of
1336  * a host page in which case the remainder of the hostpage is sent.
1337  * Only dirty target pages are sent. Note that the host page size may
1338  * be a huge page for this block.
1339  *
1340  * Returns the number of pages written or negative on error
1341  *
1342  * @rs: current RAM state
1343  * @ms: current migration state
1344  * @pss: data about the page we want to send
1345  * @last_stage: if we are at the completion stage
1346  */
1347 static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
1348                               bool last_stage)
1349 {
1350     int tmppages, pages = 0;
1351     size_t pagesize_bits =
1352         qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
1353 
1354     do {
1355         tmppages = ram_save_target_page(rs, pss, last_stage);
1356         if (tmppages < 0) {
1357             return tmppages;
1358         }
1359 
1360         pages += tmppages;
1361         pss->page++;
1362     } while (pss->page & (pagesize_bits - 1));
1363 
1364     /* The offset we leave with is the last one we looked at */
1365     pss->page--;
1366     return pages;
1367 }
1368 
1369 /**
1370  * ram_find_and_save_block: finds a dirty page and sends it to f
1371  *
1372  * Called within an RCU critical section.
1373  *
1374  * Returns the number of pages written where zero means no dirty pages
1375  *
1376  * @rs: current RAM state
1377  * @last_stage: if we are at the completion stage
1378  *
1379  * On systems where host-page-size > target-page-size it will send all the
1380  * pages in a host page that are dirty.
1381  */
1382 
1383 static int ram_find_and_save_block(RAMState *rs, bool last_stage)
1384 {
1385     PageSearchStatus pss;
1386     int pages = 0;
1387     bool again, found;
1388 
1389     /* No dirty page as there is zero RAM */
1390     if (!ram_bytes_total()) {
1391         return pages;
1392     }
1393 
1394     pss.block = rs->last_seen_block;
1395     pss.page = rs->last_page;
1396     pss.complete_round = false;
1397 
1398     if (!pss.block) {
1399         pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1400     }
1401 
1402     do {
1403         again = true;
1404         found = get_queued_page(rs, &pss);
1405 
1406         if (!found) {
1407             /* priority queue empty, so just search for something dirty */
1408             found = find_dirty_block(rs, &pss, &again);
1409         }
1410 
1411         if (found) {
1412             pages = ram_save_host_page(rs, &pss, last_stage);
1413         }
1414     } while (!pages && again);
1415 
1416     rs->last_seen_block = pss.block;
1417     rs->last_page = pss.page;
1418 
1419     return pages;
1420 }
1421 
1422 void acct_update_position(QEMUFile *f, size_t size, bool zero)
1423 {
1424     uint64_t pages = size / TARGET_PAGE_SIZE;
1425     RAMState *rs = &ram_state;
1426 
1427     if (zero) {
1428         rs->zero_pages += pages;
1429     } else {
1430         rs->norm_pages += pages;
1431         rs->bytes_transferred += size;
1432         qemu_update_position(f, size);
1433     }
1434 }
1435 
1436 uint64_t ram_bytes_total(void)
1437 {
1438     RAMBlock *block;
1439     uint64_t total = 0;
1440 
1441     rcu_read_lock();
1442     QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
1443         total += block->used_length;
1444     rcu_read_unlock();
1445     return total;
1446 }
1447 
1448 void free_xbzrle_decoded_buf(void)
1449 {
1450     g_free(xbzrle_decoded_buf);
1451     xbzrle_decoded_buf = NULL;
1452 }
1453 
1454 static void migration_bitmap_free(RAMBitmap *bmap)
1455 {
1456     g_free(bmap->bmap);
1457     g_free(bmap->unsentmap);
1458     g_free(bmap);
1459 }
1460 
1461 static void ram_migration_cleanup(void *opaque)
1462 {
1463     RAMState *rs = opaque;
1464 
1465     /* caller have hold iothread lock or is in a bh, so there is
1466      * no writing race against this migration_bitmap
1467      */
1468     RAMBitmap *bitmap = rs->ram_bitmap;
1469     atomic_rcu_set(&rs->ram_bitmap, NULL);
1470     if (bitmap) {
1471         memory_global_dirty_log_stop();
1472         call_rcu(bitmap, migration_bitmap_free, rcu);
1473     }
1474 
1475     XBZRLE_cache_lock();
1476     if (XBZRLE.cache) {
1477         cache_fini(XBZRLE.cache);
1478         g_free(XBZRLE.encoded_buf);
1479         g_free(XBZRLE.current_buf);
1480         g_free(ZERO_TARGET_PAGE);
1481         XBZRLE.cache = NULL;
1482         XBZRLE.encoded_buf = NULL;
1483         XBZRLE.current_buf = NULL;
1484     }
1485     XBZRLE_cache_unlock();
1486 }
1487 
1488 static void ram_state_reset(RAMState *rs)
1489 {
1490     rs->last_seen_block = NULL;
1491     rs->last_sent_block = NULL;
1492     rs->last_page = 0;
1493     rs->last_version = ram_list.version;
1494     rs->ram_bulk_stage = true;
1495 }
1496 
1497 #define MAX_WAIT 50 /* ms, half buffered_file limit */
1498 
1499 void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
1500 {
1501     RAMState *rs = &ram_state;
1502 
1503     /* called in qemu main thread, so there is
1504      * no writing race against this migration_bitmap
1505      */
1506     if (rs->ram_bitmap) {
1507         RAMBitmap *old_bitmap = rs->ram_bitmap, *bitmap;
1508         bitmap = g_new(RAMBitmap, 1);
1509         bitmap->bmap = bitmap_new(new);
1510 
1511         /* prevent migration_bitmap content from being set bit
1512          * by migration_bitmap_sync_range() at the same time.
1513          * it is safe to migration if migration_bitmap is cleared bit
1514          * at the same time.
1515          */
1516         qemu_mutex_lock(&rs->bitmap_mutex);
1517         bitmap_copy(bitmap->bmap, old_bitmap->bmap, old);
1518         bitmap_set(bitmap->bmap, old, new - old);
1519 
1520         /* We don't have a way to safely extend the sentmap
1521          * with RCU; so mark it as missing, entry to postcopy
1522          * will fail.
1523          */
1524         bitmap->unsentmap = NULL;
1525 
1526         atomic_rcu_set(&rs->ram_bitmap, bitmap);
1527         qemu_mutex_unlock(&rs->bitmap_mutex);
1528         rs->migration_dirty_pages += new - old;
1529         call_rcu(old_bitmap, migration_bitmap_free, rcu);
1530     }
1531 }
1532 
1533 /*
1534  * 'expected' is the value you expect the bitmap mostly to be full
1535  * of; it won't bother printing lines that are all this value.
1536  * If 'todump' is null the migration bitmap is dumped.
1537  */
1538 void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
1539 {
1540     unsigned long ram_pages = last_ram_page();
1541     RAMState *rs = &ram_state;
1542     int64_t cur;
1543     int64_t linelen = 128;
1544     char linebuf[129];
1545 
1546     if (!todump) {
1547         todump = atomic_rcu_read(&rs->ram_bitmap)->bmap;
1548     }
1549 
1550     for (cur = 0; cur < ram_pages; cur += linelen) {
1551         int64_t curb;
1552         bool found = false;
1553         /*
1554          * Last line; catch the case where the line length
1555          * is longer than remaining ram
1556          */
1557         if (cur + linelen > ram_pages) {
1558             linelen = ram_pages - cur;
1559         }
1560         for (curb = 0; curb < linelen; curb++) {
1561             bool thisbit = test_bit(cur + curb, todump);
1562             linebuf[curb] = thisbit ? '1' : '.';
1563             found = found || (thisbit != expected);
1564         }
1565         if (found) {
1566             linebuf[curb] = '\0';
1567             fprintf(stderr,  "0x%08" PRIx64 " : %s\n", cur, linebuf);
1568         }
1569     }
1570 }
1571 
1572 /* **** functions for postcopy ***** */
1573 
1574 void ram_postcopy_migrated_memory_release(MigrationState *ms)
1575 {
1576     RAMState *rs = &ram_state;
1577     struct RAMBlock *block;
1578     unsigned long *bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
1579 
1580     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1581         unsigned long first = block->offset >> TARGET_PAGE_BITS;
1582         unsigned long range = first + (block->used_length >> TARGET_PAGE_BITS);
1583         unsigned long run_start = find_next_zero_bit(bitmap, range, first);
1584 
1585         while (run_start < range) {
1586             unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
1587             ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
1588                               (run_end - run_start) << TARGET_PAGE_BITS);
1589             run_start = find_next_zero_bit(bitmap, range, run_end + 1);
1590         }
1591     }
1592 }
1593 
1594 /**
1595  * postcopy_send_discard_bm_ram: discard a RAMBlock
1596  *
1597  * Returns zero on success
1598  *
1599  * Callback from postcopy_each_ram_send_discard for each RAMBlock
1600  * Note: At this point the 'unsentmap' is the processed bitmap combined
1601  *       with the dirtymap; so a '1' means it's either dirty or unsent.
1602  *
1603  * @ms: current migration state
1604  * @pds: state for postcopy
1605  * @start: RAMBlock starting page
1606  * @length: RAMBlock size
1607  */
1608 static int postcopy_send_discard_bm_ram(MigrationState *ms,
1609                                         PostcopyDiscardState *pds,
1610                                         unsigned long start,
1611                                         unsigned long length)
1612 {
1613     RAMState *rs = &ram_state;
1614     unsigned long end = start + length; /* one after the end */
1615     unsigned long current;
1616     unsigned long *unsentmap;
1617 
1618     unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap;
1619     for (current = start; current < end; ) {
1620         unsigned long one = find_next_bit(unsentmap, end, current);
1621 
1622         if (one <= end) {
1623             unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1624             unsigned long discard_length;
1625 
1626             if (zero >= end) {
1627                 discard_length = end - one;
1628             } else {
1629                 discard_length = zero - one;
1630             }
1631             if (discard_length) {
1632                 postcopy_discard_send_range(ms, pds, one, discard_length);
1633             }
1634             current = one + discard_length;
1635         } else {
1636             current = one;
1637         }
1638     }
1639 
1640     return 0;
1641 }
1642 
1643 /**
1644  * postcopy_each_ram_send_discard: discard all RAMBlocks
1645  *
1646  * Returns 0 for success or negative for error
1647  *
1648  * Utility for the outgoing postcopy code.
1649  *   Calls postcopy_send_discard_bm_ram for each RAMBlock
1650  *   passing it bitmap indexes and name.
1651  * (qemu_ram_foreach_block ends up passing unscaled lengths
1652  *  which would mean postcopy code would have to deal with target page)
1653  *
1654  * @ms: current migration state
1655  */
1656 static int postcopy_each_ram_send_discard(MigrationState *ms)
1657 {
1658     struct RAMBlock *block;
1659     int ret;
1660 
1661     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1662         unsigned long first = block->offset >> TARGET_PAGE_BITS;
1663         PostcopyDiscardState *pds = postcopy_discard_send_init(ms,
1664                                                                first,
1665                                                                block->idstr);
1666 
1667         /*
1668          * Postcopy sends chunks of bitmap over the wire, but it
1669          * just needs indexes at this point, avoids it having
1670          * target page specific code.
1671          */
1672         ret = postcopy_send_discard_bm_ram(ms, pds, first,
1673                                     block->used_length >> TARGET_PAGE_BITS);
1674         postcopy_discard_send_finish(ms, pds);
1675         if (ret) {
1676             return ret;
1677         }
1678     }
1679 
1680     return 0;
1681 }
1682 
1683 /**
1684  * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
1685  *
1686  * Helper for postcopy_chunk_hostpages; it's called twice to
1687  * canonicalize the two bitmaps, that are similar, but one is
1688  * inverted.
1689  *
1690  * Postcopy requires that all target pages in a hostpage are dirty or
1691  * clean, not a mix.  This function canonicalizes the bitmaps.
1692  *
1693  * @ms: current migration state
1694  * @unsent_pass: if true we need to canonicalize partially unsent host pages
1695  *               otherwise we need to canonicalize partially dirty host pages
1696  * @block: block that contains the page we want to canonicalize
1697  * @pds: state for postcopy
1698  */
1699 static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1700                                           RAMBlock *block,
1701                                           PostcopyDiscardState *pds)
1702 {
1703     RAMState *rs = &ram_state;
1704     unsigned long *bitmap;
1705     unsigned long *unsentmap;
1706     unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
1707     unsigned long first = block->offset >> TARGET_PAGE_BITS;
1708     unsigned long len = block->used_length >> TARGET_PAGE_BITS;
1709     unsigned long last = first + (len - 1);
1710     unsigned long run_start;
1711 
1712     if (block->page_size == TARGET_PAGE_SIZE) {
1713         /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1714         return;
1715     }
1716 
1717     bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
1718     unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap;
1719 
1720     if (unsent_pass) {
1721         /* Find a sent page */
1722         run_start = find_next_zero_bit(unsentmap, last + 1, first);
1723     } else {
1724         /* Find a dirty page */
1725         run_start = find_next_bit(bitmap, last + 1, first);
1726     }
1727 
1728     while (run_start <= last) {
1729         bool do_fixup = false;
1730         unsigned long fixup_start_addr;
1731         unsigned long host_offset;
1732 
1733         /*
1734          * If the start of this run of pages is in the middle of a host
1735          * page, then we need to fixup this host page.
1736          */
1737         host_offset = run_start % host_ratio;
1738         if (host_offset) {
1739             do_fixup = true;
1740             run_start -= host_offset;
1741             fixup_start_addr = run_start;
1742             /* For the next pass */
1743             run_start = run_start + host_ratio;
1744         } else {
1745             /* Find the end of this run */
1746             unsigned long run_end;
1747             if (unsent_pass) {
1748                 run_end = find_next_bit(unsentmap, last + 1, run_start + 1);
1749             } else {
1750                 run_end = find_next_zero_bit(bitmap, last + 1, run_start + 1);
1751             }
1752             /*
1753              * If the end isn't at the start of a host page, then the
1754              * run doesn't finish at the end of a host page
1755              * and we need to discard.
1756              */
1757             host_offset = run_end % host_ratio;
1758             if (host_offset) {
1759                 do_fixup = true;
1760                 fixup_start_addr = run_end - host_offset;
1761                 /*
1762                  * This host page has gone, the next loop iteration starts
1763                  * from after the fixup
1764                  */
1765                 run_start = fixup_start_addr + host_ratio;
1766             } else {
1767                 /*
1768                  * No discards on this iteration, next loop starts from
1769                  * next sent/dirty page
1770                  */
1771                 run_start = run_end + 1;
1772             }
1773         }
1774 
1775         if (do_fixup) {
1776             unsigned long page;
1777 
1778             /* Tell the destination to discard this page */
1779             if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1780                 /* For the unsent_pass we:
1781                  *     discard partially sent pages
1782                  * For the !unsent_pass (dirty) we:
1783                  *     discard partially dirty pages that were sent
1784                  *     (any partially sent pages were already discarded
1785                  *     by the previous unsent_pass)
1786                  */
1787                 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1788                                             host_ratio);
1789             }
1790 
1791             /* Clean up the bitmap */
1792             for (page = fixup_start_addr;
1793                  page < fixup_start_addr + host_ratio; page++) {
1794                 /* All pages in this host page are now not sent */
1795                 set_bit(page, unsentmap);
1796 
1797                 /*
1798                  * Remark them as dirty, updating the count for any pages
1799                  * that weren't previously dirty.
1800                  */
1801                 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
1802             }
1803         }
1804 
1805         if (unsent_pass) {
1806             /* Find the next sent page for the next iteration */
1807             run_start = find_next_zero_bit(unsentmap, last + 1,
1808                                            run_start);
1809         } else {
1810             /* Find the next dirty page for the next iteration */
1811             run_start = find_next_bit(bitmap, last + 1, run_start);
1812         }
1813     }
1814 }
1815 
1816 /**
1817  * postcopy_chuck_hostpages: discrad any partially sent host page
1818  *
1819  * Utility for the outgoing postcopy code.
1820  *
1821  * Discard any partially sent host-page size chunks, mark any partially
1822  * dirty host-page size chunks as all dirty.  In this case the host-page
1823  * is the host-page for the particular RAMBlock, i.e. it might be a huge page
1824  *
1825  * Returns zero on success
1826  *
1827  * @ms: current migration state
1828  */
1829 static int postcopy_chunk_hostpages(MigrationState *ms)
1830 {
1831     RAMState *rs = &ram_state;
1832     struct RAMBlock *block;
1833 
1834     /* Easiest way to make sure we don't resume in the middle of a host-page */
1835     rs->last_seen_block = NULL;
1836     rs->last_sent_block = NULL;
1837     rs->last_page = 0;
1838 
1839     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1840         unsigned long first = block->offset >> TARGET_PAGE_BITS;
1841 
1842         PostcopyDiscardState *pds =
1843                          postcopy_discard_send_init(ms, first, block->idstr);
1844 
1845         /* First pass: Discard all partially sent host pages */
1846         postcopy_chunk_hostpages_pass(ms, true, block, pds);
1847         /*
1848          * Second pass: Ensure that all partially dirty host pages are made
1849          * fully dirty.
1850          */
1851         postcopy_chunk_hostpages_pass(ms, false, block, pds);
1852 
1853         postcopy_discard_send_finish(ms, pds);
1854     } /* ram_list loop */
1855 
1856     return 0;
1857 }
1858 
1859 /**
1860  * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
1861  *
1862  * Returns zero on success
1863  *
1864  * Transmit the set of pages to be discarded after precopy to the target
1865  * these are pages that:
1866  *     a) Have been previously transmitted but are now dirty again
1867  *     b) Pages that have never been transmitted, this ensures that
1868  *        any pages on the destination that have been mapped by background
1869  *        tasks get discarded (transparent huge pages is the specific concern)
1870  * Hopefully this is pretty sparse
1871  *
1872  * @ms: current migration state
1873  */
1874 int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1875 {
1876     RAMState *rs = &ram_state;
1877     int ret;
1878     unsigned long *bitmap, *unsentmap;
1879 
1880     rcu_read_lock();
1881 
1882     /* This should be our last sync, the src is now paused */
1883     migration_bitmap_sync(rs);
1884 
1885     unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap;
1886     if (!unsentmap) {
1887         /* We don't have a safe way to resize the sentmap, so
1888          * if the bitmap was resized it will be NULL at this
1889          * point.
1890          */
1891         error_report("migration ram resized during precopy phase");
1892         rcu_read_unlock();
1893         return -EINVAL;
1894     }
1895 
1896     /* Deal with TPS != HPS and huge pages */
1897     ret = postcopy_chunk_hostpages(ms);
1898     if (ret) {
1899         rcu_read_unlock();
1900         return ret;
1901     }
1902 
1903     /*
1904      * Update the unsentmap to be unsentmap = unsentmap | dirty
1905      */
1906     bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
1907     bitmap_or(unsentmap, unsentmap, bitmap, last_ram_page());
1908 
1909 
1910     trace_ram_postcopy_send_discard_bitmap();
1911 #ifdef DEBUG_POSTCOPY
1912     ram_debug_dump_bitmap(unsentmap, true);
1913 #endif
1914 
1915     ret = postcopy_each_ram_send_discard(ms);
1916     rcu_read_unlock();
1917 
1918     return ret;
1919 }
1920 
1921 /**
1922  * ram_discard_range: discard dirtied pages at the beginning of postcopy
1923  *
1924  * Returns zero on success
1925  *
1926  * @rbname: name of the RAMBlock of the request. NULL means the
1927  *          same that last one.
1928  * @start: RAMBlock starting page
1929  * @length: RAMBlock size
1930  */
1931 int ram_discard_range(const char *rbname, uint64_t start, size_t length)
1932 {
1933     int ret = -1;
1934 
1935     trace_ram_discard_range(rbname, start, length);
1936 
1937     rcu_read_lock();
1938     RAMBlock *rb = qemu_ram_block_by_name(rbname);
1939 
1940     if (!rb) {
1941         error_report("ram_discard_range: Failed to find block '%s'", rbname);
1942         goto err;
1943     }
1944 
1945     ret = ram_block_discard_range(rb, start, length);
1946 
1947 err:
1948     rcu_read_unlock();
1949 
1950     return ret;
1951 }
1952 
1953 static int ram_state_init(RAMState *rs)
1954 {
1955     unsigned long ram_bitmap_pages;
1956 
1957     memset(rs, 0, sizeof(*rs));
1958     qemu_mutex_init(&rs->bitmap_mutex);
1959     qemu_mutex_init(&rs->src_page_req_mutex);
1960     QSIMPLEQ_INIT(&rs->src_page_requests);
1961 
1962     if (migrate_use_xbzrle()) {
1963         XBZRLE_cache_lock();
1964         ZERO_TARGET_PAGE = g_malloc0(TARGET_PAGE_SIZE);
1965         XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
1966                                   TARGET_PAGE_SIZE,
1967                                   TARGET_PAGE_SIZE);
1968         if (!XBZRLE.cache) {
1969             XBZRLE_cache_unlock();
1970             error_report("Error creating cache");
1971             return -1;
1972         }
1973         XBZRLE_cache_unlock();
1974 
1975         /* We prefer not to abort if there is no memory */
1976         XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
1977         if (!XBZRLE.encoded_buf) {
1978             error_report("Error allocating encoded_buf");
1979             return -1;
1980         }
1981 
1982         XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
1983         if (!XBZRLE.current_buf) {
1984             error_report("Error allocating current_buf");
1985             g_free(XBZRLE.encoded_buf);
1986             XBZRLE.encoded_buf = NULL;
1987             return -1;
1988         }
1989     }
1990 
1991     /* For memory_global_dirty_log_start below.  */
1992     qemu_mutex_lock_iothread();
1993 
1994     qemu_mutex_lock_ramlist();
1995     rcu_read_lock();
1996     ram_state_reset(rs);
1997 
1998     rs->ram_bitmap = g_new0(RAMBitmap, 1);
1999     /* Skip setting bitmap if there is no RAM */
2000     if (ram_bytes_total()) {
2001         ram_bitmap_pages = last_ram_page();
2002         rs->ram_bitmap->bmap = bitmap_new(ram_bitmap_pages);
2003         bitmap_set(rs->ram_bitmap->bmap, 0, ram_bitmap_pages);
2004 
2005         if (migrate_postcopy_ram()) {
2006             rs->ram_bitmap->unsentmap = bitmap_new(ram_bitmap_pages);
2007             bitmap_set(rs->ram_bitmap->unsentmap, 0, ram_bitmap_pages);
2008         }
2009     }
2010 
2011     /*
2012      * Count the total number of pages used by ram blocks not including any
2013      * gaps due to alignment or unplugs.
2014      */
2015     rs->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
2016 
2017     memory_global_dirty_log_start();
2018     migration_bitmap_sync(rs);
2019     qemu_mutex_unlock_ramlist();
2020     qemu_mutex_unlock_iothread();
2021     rcu_read_unlock();
2022 
2023     return 0;
2024 }
2025 
2026 /*
2027  * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
2028  * long-running RCU critical section.  When rcu-reclaims in the code
2029  * start to become numerous it will be necessary to reduce the
2030  * granularity of these critical sections.
2031  */
2032 
2033 /**
2034  * ram_save_setup: Setup RAM for migration
2035  *
2036  * Returns zero to indicate success and negative for error
2037  *
2038  * @f: QEMUFile where to send the data
2039  * @opaque: RAMState pointer
2040  */
2041 static int ram_save_setup(QEMUFile *f, void *opaque)
2042 {
2043     RAMState *rs = opaque;
2044     RAMBlock *block;
2045 
2046     /* migration has already setup the bitmap, reuse it. */
2047     if (!migration_in_colo_state()) {
2048         if (ram_state_init(rs) < 0) {
2049             return -1;
2050          }
2051     }
2052     rs->f = f;
2053 
2054     rcu_read_lock();
2055 
2056     qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
2057 
2058     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2059         qemu_put_byte(f, strlen(block->idstr));
2060         qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2061         qemu_put_be64(f, block->used_length);
2062         if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
2063             qemu_put_be64(f, block->page_size);
2064         }
2065     }
2066 
2067     rcu_read_unlock();
2068 
2069     ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2070     ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2071 
2072     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2073 
2074     return 0;
2075 }
2076 
2077 /**
2078  * ram_save_iterate: iterative stage for migration
2079  *
2080  * Returns zero to indicate success and negative for error
2081  *
2082  * @f: QEMUFile where to send the data
2083  * @opaque: RAMState pointer
2084  */
2085 static int ram_save_iterate(QEMUFile *f, void *opaque)
2086 {
2087     RAMState *rs = opaque;
2088     int ret;
2089     int i;
2090     int64_t t0;
2091     int done = 0;
2092 
2093     rcu_read_lock();
2094     if (ram_list.version != rs->last_version) {
2095         ram_state_reset(rs);
2096     }
2097 
2098     /* Read version before ram_list.blocks */
2099     smp_rmb();
2100 
2101     ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2102 
2103     t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2104     i = 0;
2105     while ((ret = qemu_file_rate_limit(f)) == 0) {
2106         int pages;
2107 
2108         pages = ram_find_and_save_block(rs, false);
2109         /* no more pages to sent */
2110         if (pages == 0) {
2111             done = 1;
2112             break;
2113         }
2114         rs->iterations++;
2115 
2116         /* we want to check in the 1st loop, just in case it was the 1st time
2117            and we had to sync the dirty bitmap.
2118            qemu_get_clock_ns() is a bit expensive, so we only check each some
2119            iterations
2120         */
2121         if ((i & 63) == 0) {
2122             uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2123             if (t1 > MAX_WAIT) {
2124                 trace_ram_save_iterate_big_wait(t1, i);
2125                 break;
2126             }
2127         }
2128         i++;
2129     }
2130     flush_compressed_data(rs);
2131     rcu_read_unlock();
2132 
2133     /*
2134      * Must occur before EOS (or any QEMUFile operation)
2135      * because of RDMA protocol.
2136      */
2137     ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2138 
2139     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2140     rs->bytes_transferred += 8;
2141 
2142     ret = qemu_file_get_error(f);
2143     if (ret < 0) {
2144         return ret;
2145     }
2146 
2147     return done;
2148 }
2149 
2150 /**
2151  * ram_save_complete: function called to send the remaining amount of ram
2152  *
2153  * Returns zero to indicate success
2154  *
2155  * Called with iothread lock
2156  *
2157  * @f: QEMUFile where to send the data
2158  * @opaque: RAMState pointer
2159  */
2160 static int ram_save_complete(QEMUFile *f, void *opaque)
2161 {
2162     RAMState *rs = opaque;
2163 
2164     rcu_read_lock();
2165 
2166     if (!migration_in_postcopy()) {
2167         migration_bitmap_sync(rs);
2168     }
2169 
2170     ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2171 
2172     /* try transferring iterative blocks of memory */
2173 
2174     /* flush all remaining blocks regardless of rate limiting */
2175     while (true) {
2176         int pages;
2177 
2178         pages = ram_find_and_save_block(rs, !migration_in_colo_state());
2179         /* no more blocks to sent */
2180         if (pages == 0) {
2181             break;
2182         }
2183     }
2184 
2185     flush_compressed_data(rs);
2186     ram_control_after_iterate(f, RAM_CONTROL_FINISH);
2187 
2188     rcu_read_unlock();
2189 
2190     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2191 
2192     return 0;
2193 }
2194 
2195 static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2196                              uint64_t *non_postcopiable_pending,
2197                              uint64_t *postcopiable_pending)
2198 {
2199     RAMState *rs = opaque;
2200     uint64_t remaining_size;
2201 
2202     remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
2203 
2204     if (!migration_in_postcopy() &&
2205         remaining_size < max_size) {
2206         qemu_mutex_lock_iothread();
2207         rcu_read_lock();
2208         migration_bitmap_sync(rs);
2209         rcu_read_unlock();
2210         qemu_mutex_unlock_iothread();
2211         remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
2212     }
2213 
2214     /* We can do postcopy, and all the data is postcopiable */
2215     *postcopiable_pending += remaining_size;
2216 }
2217 
2218 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2219 {
2220     unsigned int xh_len;
2221     int xh_flags;
2222     uint8_t *loaded_data;
2223 
2224     if (!xbzrle_decoded_buf) {
2225         xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2226     }
2227     loaded_data = xbzrle_decoded_buf;
2228 
2229     /* extract RLE header */
2230     xh_flags = qemu_get_byte(f);
2231     xh_len = qemu_get_be16(f);
2232 
2233     if (xh_flags != ENCODING_FLAG_XBZRLE) {
2234         error_report("Failed to load XBZRLE page - wrong compression!");
2235         return -1;
2236     }
2237 
2238     if (xh_len > TARGET_PAGE_SIZE) {
2239         error_report("Failed to load XBZRLE page - len overflow!");
2240         return -1;
2241     }
2242     /* load data and decode */
2243     qemu_get_buffer_in_place(f, &loaded_data, xh_len);
2244 
2245     /* decode RLE */
2246     if (xbzrle_decode_buffer(loaded_data, xh_len, host,
2247                              TARGET_PAGE_SIZE) == -1) {
2248         error_report("Failed to load XBZRLE page - decode error!");
2249         return -1;
2250     }
2251 
2252     return 0;
2253 }
2254 
2255 /**
2256  * ram_block_from_stream: read a RAMBlock id from the migration stream
2257  *
2258  * Must be called from within a rcu critical section.
2259  *
2260  * Returns a pointer from within the RCU-protected ram_list.
2261  *
2262  * @f: QEMUFile where to read the data from
2263  * @flags: Page flags (mostly to see if it's a continuation of previous block)
2264  */
2265 static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
2266 {
2267     static RAMBlock *block = NULL;
2268     char id[256];
2269     uint8_t len;
2270 
2271     if (flags & RAM_SAVE_FLAG_CONTINUE) {
2272         if (!block) {
2273             error_report("Ack, bad migration stream!");
2274             return NULL;
2275         }
2276         return block;
2277     }
2278 
2279     len = qemu_get_byte(f);
2280     qemu_get_buffer(f, (uint8_t *)id, len);
2281     id[len] = 0;
2282 
2283     block = qemu_ram_block_by_name(id);
2284     if (!block) {
2285         error_report("Can't find block %s", id);
2286         return NULL;
2287     }
2288 
2289     return block;
2290 }
2291 
2292 static inline void *host_from_ram_block_offset(RAMBlock *block,
2293                                                ram_addr_t offset)
2294 {
2295     if (!offset_in_ramblock(block, offset)) {
2296         return NULL;
2297     }
2298 
2299     return block->host + offset;
2300 }
2301 
2302 /**
2303  * ram_handle_compressed: handle the zero page case
2304  *
2305  * If a page (or a whole RDMA chunk) has been
2306  * determined to be zero, then zap it.
2307  *
2308  * @host: host address for the zero page
2309  * @ch: what the page is filled from.  We only support zero
2310  * @size: size of the zero page
2311  */
2312 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2313 {
2314     if (ch != 0 || !is_zero_range(host, size)) {
2315         memset(host, ch, size);
2316     }
2317 }
2318 
2319 static void *do_data_decompress(void *opaque)
2320 {
2321     DecompressParam *param = opaque;
2322     unsigned long pagesize;
2323     uint8_t *des;
2324     int len;
2325 
2326     qemu_mutex_lock(&param->mutex);
2327     while (!param->quit) {
2328         if (param->des) {
2329             des = param->des;
2330             len = param->len;
2331             param->des = 0;
2332             qemu_mutex_unlock(&param->mutex);
2333 
2334             pagesize = TARGET_PAGE_SIZE;
2335             /* uncompress() will return failed in some case, especially
2336              * when the page is dirted when doing the compression, it's
2337              * not a problem because the dirty page will be retransferred
2338              * and uncompress() won't break the data in other pages.
2339              */
2340             uncompress((Bytef *)des, &pagesize,
2341                        (const Bytef *)param->compbuf, len);
2342 
2343             qemu_mutex_lock(&decomp_done_lock);
2344             param->done = true;
2345             qemu_cond_signal(&decomp_done_cond);
2346             qemu_mutex_unlock(&decomp_done_lock);
2347 
2348             qemu_mutex_lock(&param->mutex);
2349         } else {
2350             qemu_cond_wait(&param->cond, &param->mutex);
2351         }
2352     }
2353     qemu_mutex_unlock(&param->mutex);
2354 
2355     return NULL;
2356 }
2357 
2358 static void wait_for_decompress_done(void)
2359 {
2360     int idx, thread_count;
2361 
2362     if (!migrate_use_compression()) {
2363         return;
2364     }
2365 
2366     thread_count = migrate_decompress_threads();
2367     qemu_mutex_lock(&decomp_done_lock);
2368     for (idx = 0; idx < thread_count; idx++) {
2369         while (!decomp_param[idx].done) {
2370             qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2371         }
2372     }
2373     qemu_mutex_unlock(&decomp_done_lock);
2374 }
2375 
2376 void migrate_decompress_threads_create(void)
2377 {
2378     int i, thread_count;
2379 
2380     thread_count = migrate_decompress_threads();
2381     decompress_threads = g_new0(QemuThread, thread_count);
2382     decomp_param = g_new0(DecompressParam, thread_count);
2383     qemu_mutex_init(&decomp_done_lock);
2384     qemu_cond_init(&decomp_done_cond);
2385     for (i = 0; i < thread_count; i++) {
2386         qemu_mutex_init(&decomp_param[i].mutex);
2387         qemu_cond_init(&decomp_param[i].cond);
2388         decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
2389         decomp_param[i].done = true;
2390         decomp_param[i].quit = false;
2391         qemu_thread_create(decompress_threads + i, "decompress",
2392                            do_data_decompress, decomp_param + i,
2393                            QEMU_THREAD_JOINABLE);
2394     }
2395 }
2396 
2397 void migrate_decompress_threads_join(void)
2398 {
2399     int i, thread_count;
2400 
2401     thread_count = migrate_decompress_threads();
2402     for (i = 0; i < thread_count; i++) {
2403         qemu_mutex_lock(&decomp_param[i].mutex);
2404         decomp_param[i].quit = true;
2405         qemu_cond_signal(&decomp_param[i].cond);
2406         qemu_mutex_unlock(&decomp_param[i].mutex);
2407     }
2408     for (i = 0; i < thread_count; i++) {
2409         qemu_thread_join(decompress_threads + i);
2410         qemu_mutex_destroy(&decomp_param[i].mutex);
2411         qemu_cond_destroy(&decomp_param[i].cond);
2412         g_free(decomp_param[i].compbuf);
2413     }
2414     g_free(decompress_threads);
2415     g_free(decomp_param);
2416     decompress_threads = NULL;
2417     decomp_param = NULL;
2418 }
2419 
2420 static void decompress_data_with_multi_threads(QEMUFile *f,
2421                                                void *host, int len)
2422 {
2423     int idx, thread_count;
2424 
2425     thread_count = migrate_decompress_threads();
2426     qemu_mutex_lock(&decomp_done_lock);
2427     while (true) {
2428         for (idx = 0; idx < thread_count; idx++) {
2429             if (decomp_param[idx].done) {
2430                 decomp_param[idx].done = false;
2431                 qemu_mutex_lock(&decomp_param[idx].mutex);
2432                 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
2433                 decomp_param[idx].des = host;
2434                 decomp_param[idx].len = len;
2435                 qemu_cond_signal(&decomp_param[idx].cond);
2436                 qemu_mutex_unlock(&decomp_param[idx].mutex);
2437                 break;
2438             }
2439         }
2440         if (idx < thread_count) {
2441             break;
2442         } else {
2443             qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2444         }
2445     }
2446     qemu_mutex_unlock(&decomp_done_lock);
2447 }
2448 
2449 /**
2450  * ram_postcopy_incoming_init: allocate postcopy data structures
2451  *
2452  * Returns 0 for success and negative if there was one error
2453  *
2454  * @mis: current migration incoming state
2455  *
2456  * Allocate data structures etc needed by incoming migration with
2457  * postcopy-ram. postcopy-ram's similarly names
2458  * postcopy_ram_incoming_init does the work.
2459  */
2460 int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2461 {
2462     unsigned long ram_pages = last_ram_page();
2463 
2464     return postcopy_ram_incoming_init(mis, ram_pages);
2465 }
2466 
2467 /**
2468  * ram_load_postcopy: load a page in postcopy case
2469  *
2470  * Returns 0 for success or -errno in case of error
2471  *
2472  * Called in postcopy mode by ram_load().
2473  * rcu_read_lock is taken prior to this being called.
2474  *
2475  * @f: QEMUFile where to send the data
2476  */
2477 static int ram_load_postcopy(QEMUFile *f)
2478 {
2479     int flags = 0, ret = 0;
2480     bool place_needed = false;
2481     bool matching_page_sizes = false;
2482     MigrationIncomingState *mis = migration_incoming_get_current();
2483     /* Temporary page that is later 'placed' */
2484     void *postcopy_host_page = postcopy_get_tmp_page(mis);
2485     void *last_host = NULL;
2486     bool all_zero = false;
2487 
2488     while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2489         ram_addr_t addr;
2490         void *host = NULL;
2491         void *page_buffer = NULL;
2492         void *place_source = NULL;
2493         RAMBlock *block = NULL;
2494         uint8_t ch;
2495 
2496         addr = qemu_get_be64(f);
2497         flags = addr & ~TARGET_PAGE_MASK;
2498         addr &= TARGET_PAGE_MASK;
2499 
2500         trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2501         place_needed = false;
2502         if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
2503             block = ram_block_from_stream(f, flags);
2504 
2505             host = host_from_ram_block_offset(block, addr);
2506             if (!host) {
2507                 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2508                 ret = -EINVAL;
2509                 break;
2510             }
2511             matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
2512             /*
2513              * Postcopy requires that we place whole host pages atomically;
2514              * these may be huge pages for RAMBlocks that are backed by
2515              * hugetlbfs.
2516              * To make it atomic, the data is read into a temporary page
2517              * that's moved into place later.
2518              * The migration protocol uses,  possibly smaller, target-pages
2519              * however the source ensures it always sends all the components
2520              * of a host page in order.
2521              */
2522             page_buffer = postcopy_host_page +
2523                           ((uintptr_t)host & (block->page_size - 1));
2524             /* If all TP are zero then we can optimise the place */
2525             if (!((uintptr_t)host & (block->page_size - 1))) {
2526                 all_zero = true;
2527             } else {
2528                 /* not the 1st TP within the HP */
2529                 if (host != (last_host + TARGET_PAGE_SIZE)) {
2530                     error_report("Non-sequential target page %p/%p",
2531                                   host, last_host);
2532                     ret = -EINVAL;
2533                     break;
2534                 }
2535             }
2536 
2537 
2538             /*
2539              * If it's the last part of a host page then we place the host
2540              * page
2541              */
2542             place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
2543                                      (block->page_size - 1)) == 0;
2544             place_source = postcopy_host_page;
2545         }
2546         last_host = host;
2547 
2548         switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2549         case RAM_SAVE_FLAG_COMPRESS:
2550             ch = qemu_get_byte(f);
2551             memset(page_buffer, ch, TARGET_PAGE_SIZE);
2552             if (ch) {
2553                 all_zero = false;
2554             }
2555             break;
2556 
2557         case RAM_SAVE_FLAG_PAGE:
2558             all_zero = false;
2559             if (!place_needed || !matching_page_sizes) {
2560                 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2561             } else {
2562                 /* Avoids the qemu_file copy during postcopy, which is
2563                  * going to do a copy later; can only do it when we
2564                  * do this read in one go (matching page sizes)
2565                  */
2566                 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2567                                          TARGET_PAGE_SIZE);
2568             }
2569             break;
2570         case RAM_SAVE_FLAG_EOS:
2571             /* normal exit */
2572             break;
2573         default:
2574             error_report("Unknown combination of migration flags: %#x"
2575                          " (postcopy mode)", flags);
2576             ret = -EINVAL;
2577         }
2578 
2579         if (place_needed) {
2580             /* This gets called at the last target page in the host page */
2581             void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
2582 
2583             if (all_zero) {
2584                 ret = postcopy_place_page_zero(mis, place_dest,
2585                                                block->page_size);
2586             } else {
2587                 ret = postcopy_place_page(mis, place_dest,
2588                                           place_source, block->page_size);
2589             }
2590         }
2591         if (!ret) {
2592             ret = qemu_file_get_error(f);
2593         }
2594     }
2595 
2596     return ret;
2597 }
2598 
2599 static int ram_load(QEMUFile *f, void *opaque, int version_id)
2600 {
2601     int flags = 0, ret = 0;
2602     static uint64_t seq_iter;
2603     int len = 0;
2604     /*
2605      * If system is running in postcopy mode, page inserts to host memory must
2606      * be atomic
2607      */
2608     bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
2609     /* ADVISE is earlier, it shows the source has the postcopy capability on */
2610     bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE;
2611 
2612     seq_iter++;
2613 
2614     if (version_id != 4) {
2615         ret = -EINVAL;
2616     }
2617 
2618     /* This RCU critical section can be very long running.
2619      * When RCU reclaims in the code start to become numerous,
2620      * it will be necessary to reduce the granularity of this
2621      * critical section.
2622      */
2623     rcu_read_lock();
2624 
2625     if (postcopy_running) {
2626         ret = ram_load_postcopy(f);
2627     }
2628 
2629     while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2630         ram_addr_t addr, total_ram_bytes;
2631         void *host = NULL;
2632         uint8_t ch;
2633 
2634         addr = qemu_get_be64(f);
2635         flags = addr & ~TARGET_PAGE_MASK;
2636         addr &= TARGET_PAGE_MASK;
2637 
2638         if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE |
2639                      RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
2640             RAMBlock *block = ram_block_from_stream(f, flags);
2641 
2642             host = host_from_ram_block_offset(block, addr);
2643             if (!host) {
2644                 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2645                 ret = -EINVAL;
2646                 break;
2647             }
2648         }
2649 
2650         switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2651         case RAM_SAVE_FLAG_MEM_SIZE:
2652             /* Synchronize RAM block list */
2653             total_ram_bytes = addr;
2654             while (!ret && total_ram_bytes) {
2655                 RAMBlock *block;
2656                 char id[256];
2657                 ram_addr_t length;
2658 
2659                 len = qemu_get_byte(f);
2660                 qemu_get_buffer(f, (uint8_t *)id, len);
2661                 id[len] = 0;
2662                 length = qemu_get_be64(f);
2663 
2664                 block = qemu_ram_block_by_name(id);
2665                 if (block) {
2666                     if (length != block->used_length) {
2667                         Error *local_err = NULL;
2668 
2669                         ret = qemu_ram_resize(block, length,
2670                                               &local_err);
2671                         if (local_err) {
2672                             error_report_err(local_err);
2673                         }
2674                     }
2675                     /* For postcopy we need to check hugepage sizes match */
2676                     if (postcopy_advised &&
2677                         block->page_size != qemu_host_page_size) {
2678                         uint64_t remote_page_size = qemu_get_be64(f);
2679                         if (remote_page_size != block->page_size) {
2680                             error_report("Mismatched RAM page size %s "
2681                                          "(local) %zd != %" PRId64,
2682                                          id, block->page_size,
2683                                          remote_page_size);
2684                             ret = -EINVAL;
2685                         }
2686                     }
2687                     ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2688                                           block->idstr);
2689                 } else {
2690                     error_report("Unknown ramblock \"%s\", cannot "
2691                                  "accept migration", id);
2692                     ret = -EINVAL;
2693                 }
2694 
2695                 total_ram_bytes -= length;
2696             }
2697             break;
2698 
2699         case RAM_SAVE_FLAG_COMPRESS:
2700             ch = qemu_get_byte(f);
2701             ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2702             break;
2703 
2704         case RAM_SAVE_FLAG_PAGE:
2705             qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2706             break;
2707 
2708         case RAM_SAVE_FLAG_COMPRESS_PAGE:
2709             len = qemu_get_be32(f);
2710             if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2711                 error_report("Invalid compressed data length: %d", len);
2712                 ret = -EINVAL;
2713                 break;
2714             }
2715             decompress_data_with_multi_threads(f, host, len);
2716             break;
2717 
2718         case RAM_SAVE_FLAG_XBZRLE:
2719             if (load_xbzrle(f, addr, host) < 0) {
2720                 error_report("Failed to decompress XBZRLE page at "
2721                              RAM_ADDR_FMT, addr);
2722                 ret = -EINVAL;
2723                 break;
2724             }
2725             break;
2726         case RAM_SAVE_FLAG_EOS:
2727             /* normal exit */
2728             break;
2729         default:
2730             if (flags & RAM_SAVE_FLAG_HOOK) {
2731                 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
2732             } else {
2733                 error_report("Unknown combination of migration flags: %#x",
2734                              flags);
2735                 ret = -EINVAL;
2736             }
2737         }
2738         if (!ret) {
2739             ret = qemu_file_get_error(f);
2740         }
2741     }
2742 
2743     wait_for_decompress_done();
2744     rcu_read_unlock();
2745     trace_ram_load_complete(ret, seq_iter);
2746     return ret;
2747 }
2748 
2749 static SaveVMHandlers savevm_ram_handlers = {
2750     .save_live_setup = ram_save_setup,
2751     .save_live_iterate = ram_save_iterate,
2752     .save_live_complete_postcopy = ram_save_complete,
2753     .save_live_complete_precopy = ram_save_complete,
2754     .save_live_pending = ram_save_pending,
2755     .load_state = ram_load,
2756     .cleanup = ram_migration_cleanup,
2757 };
2758 
2759 void ram_mig_init(void)
2760 {
2761     qemu_mutex_init(&XBZRLE.lock);
2762     register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
2763 }
2764