xref: /openbmc/qemu/migration/ram.c (revision 460b6c8e581aa06b86f59eebd9e52edfe7adf417)
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  * Copyright (c) 2011-2015 Red Hat Inc
6  *
7  * Authors:
8  *  Juan Quintela <quintela@redhat.com>
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to deal
12  * in the Software without restriction, including without limitation the rights
13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  * copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included in
18  * all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  * THE SOFTWARE.
27  */
28 #include "qemu/osdep.h"
29 #include "cpu.h"
30 #include <zlib.h>
31 #include "qapi-event.h"
32 #include "qemu/cutils.h"
33 #include "qemu/bitops.h"
34 #include "qemu/bitmap.h"
35 #include "qemu/main-loop.h"
36 #include "xbzrle.h"
37 #include "ram.h"
38 #include "migration.h"
39 #include "migration/register.h"
40 #include "migration/misc.h"
41 #include "qemu-file.h"
42 #include "postcopy-ram.h"
43 #include "migration/page_cache.h"
44 #include "qemu/error-report.h"
45 #include "trace.h"
46 #include "exec/ram_addr.h"
47 #include "qemu/rcu_queue.h"
48 #include "migration/colo.h"
49 
50 /***********************************************************/
51 /* ram save/restore */
52 
53 /* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
54  * worked for pages that where filled with the same char.  We switched
55  * it to only search for the zero value.  And to avoid confusion with
56  * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
57  */
58 
59 #define RAM_SAVE_FLAG_FULL     0x01 /* Obsolete, not used anymore */
60 #define RAM_SAVE_FLAG_ZERO     0x02
61 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
62 #define RAM_SAVE_FLAG_PAGE     0x08
63 #define RAM_SAVE_FLAG_EOS      0x10
64 #define RAM_SAVE_FLAG_CONTINUE 0x20
65 #define RAM_SAVE_FLAG_XBZRLE   0x40
66 /* 0x80 is reserved in migration.h start with 0x100 next */
67 #define RAM_SAVE_FLAG_COMPRESS_PAGE    0x100
68 
69 static inline bool is_zero_range(uint8_t *p, uint64_t size)
70 {
71     return buffer_is_zero(p, size);
72 }
73 
74 XBZRLECacheStats xbzrle_counters;
75 
76 /* struct contains XBZRLE cache and a static page
77    used by the compression */
78 static struct {
79     /* buffer used for XBZRLE encoding */
80     uint8_t *encoded_buf;
81     /* buffer for storing page content */
82     uint8_t *current_buf;
83     /* Cache for XBZRLE, Protected by lock. */
84     PageCache *cache;
85     QemuMutex lock;
86     /* it will store a page full of zeros */
87     uint8_t *zero_target_page;
88     /* buffer used for XBZRLE decoding */
89     uint8_t *decoded_buf;
90 } XBZRLE;
91 
92 static void XBZRLE_cache_lock(void)
93 {
94     if (migrate_use_xbzrle())
95         qemu_mutex_lock(&XBZRLE.lock);
96 }
97 
98 static void XBZRLE_cache_unlock(void)
99 {
100     if (migrate_use_xbzrle())
101         qemu_mutex_unlock(&XBZRLE.lock);
102 }
103 
104 /**
105  * xbzrle_cache_resize: resize the xbzrle cache
106  *
107  * This function is called from qmp_migrate_set_cache_size in main
108  * thread, possibly while a migration is in progress.  A running
109  * migration may be using the cache and might finish during this call,
110  * hence changes to the cache are protected by XBZRLE.lock().
111  *
112  * Returns the new_size or negative in case of error.
113  *
114  * @new_size: new cache size
115  */
116 int64_t xbzrle_cache_resize(int64_t new_size)
117 {
118     PageCache *new_cache;
119     int64_t ret;
120 
121     if (new_size < TARGET_PAGE_SIZE) {
122         return -1;
123     }
124 
125     XBZRLE_cache_lock();
126 
127     if (XBZRLE.cache != NULL) {
128         if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
129             goto out_new_size;
130         }
131         new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
132                                         TARGET_PAGE_SIZE);
133         if (!new_cache) {
134             error_report("Error creating cache");
135             ret = -1;
136             goto out;
137         }
138 
139         cache_fini(XBZRLE.cache);
140         XBZRLE.cache = new_cache;
141     }
142 
143 out_new_size:
144     ret = pow2floor(new_size);
145 out:
146     XBZRLE_cache_unlock();
147     return ret;
148 }
149 
150 /*
151  * An outstanding page request, on the source, having been received
152  * and queued
153  */
154 struct RAMSrcPageRequest {
155     RAMBlock *rb;
156     hwaddr    offset;
157     hwaddr    len;
158 
159     QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
160 };
161 
162 /* State of RAM for migration */
163 struct RAMState {
164     /* QEMUFile used for this migration */
165     QEMUFile *f;
166     /* Last block that we have visited searching for dirty pages */
167     RAMBlock *last_seen_block;
168     /* Last block from where we have sent data */
169     RAMBlock *last_sent_block;
170     /* Last dirty target page we have sent */
171     ram_addr_t last_page;
172     /* last ram version we have seen */
173     uint32_t last_version;
174     /* We are in the first round */
175     bool ram_bulk_stage;
176     /* How many times we have dirty too many pages */
177     int dirty_rate_high_cnt;
178     /* these variables are used for bitmap sync */
179     /* last time we did a full bitmap_sync */
180     int64_t time_last_bitmap_sync;
181     /* bytes transferred at start_time */
182     uint64_t bytes_xfer_prev;
183     /* number of dirty pages since start_time */
184     uint64_t num_dirty_pages_period;
185     /* xbzrle misses since the beginning of the period */
186     uint64_t xbzrle_cache_miss_prev;
187     /* number of iterations at the beginning of period */
188     uint64_t iterations_prev;
189     /* Iterations since start */
190     uint64_t iterations;
191     /* number of dirty bits in the bitmap */
192     uint64_t migration_dirty_pages;
193     /* protects modification of the bitmap */
194     QemuMutex bitmap_mutex;
195     /* The RAMBlock used in the last src_page_requests */
196     RAMBlock *last_req_rb;
197     /* Queue of outstanding page requests from the destination */
198     QemuMutex src_page_req_mutex;
199     QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests;
200 };
201 typedef struct RAMState RAMState;
202 
203 static RAMState *ram_state;
204 
205 uint64_t ram_bytes_remaining(void)
206 {
207     return ram_state->migration_dirty_pages * TARGET_PAGE_SIZE;
208 }
209 
210 MigrationStats ram_counters;
211 
212 /* used by the search for pages to send */
213 struct PageSearchStatus {
214     /* Current block being searched */
215     RAMBlock    *block;
216     /* Current page to search from */
217     unsigned long page;
218     /* Set once we wrap around */
219     bool         complete_round;
220 };
221 typedef struct PageSearchStatus PageSearchStatus;
222 
223 struct CompressParam {
224     bool done;
225     bool quit;
226     QEMUFile *file;
227     QemuMutex mutex;
228     QemuCond cond;
229     RAMBlock *block;
230     ram_addr_t offset;
231 };
232 typedef struct CompressParam CompressParam;
233 
234 struct DecompressParam {
235     bool done;
236     bool quit;
237     QemuMutex mutex;
238     QemuCond cond;
239     void *des;
240     uint8_t *compbuf;
241     int len;
242 };
243 typedef struct DecompressParam DecompressParam;
244 
245 static CompressParam *comp_param;
246 static QemuThread *compress_threads;
247 /* comp_done_cond is used to wake up the migration thread when
248  * one of the compression threads has finished the compression.
249  * comp_done_lock is used to co-work with comp_done_cond.
250  */
251 static QemuMutex comp_done_lock;
252 static QemuCond comp_done_cond;
253 /* The empty QEMUFileOps will be used by file in CompressParam */
254 static const QEMUFileOps empty_ops = { };
255 
256 static DecompressParam *decomp_param;
257 static QemuThread *decompress_threads;
258 static QemuMutex decomp_done_lock;
259 static QemuCond decomp_done_cond;
260 
261 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
262                                 ram_addr_t offset);
263 
264 static void *do_data_compress(void *opaque)
265 {
266     CompressParam *param = opaque;
267     RAMBlock *block;
268     ram_addr_t offset;
269 
270     qemu_mutex_lock(&param->mutex);
271     while (!param->quit) {
272         if (param->block) {
273             block = param->block;
274             offset = param->offset;
275             param->block = NULL;
276             qemu_mutex_unlock(&param->mutex);
277 
278             do_compress_ram_page(param->file, block, offset);
279 
280             qemu_mutex_lock(&comp_done_lock);
281             param->done = true;
282             qemu_cond_signal(&comp_done_cond);
283             qemu_mutex_unlock(&comp_done_lock);
284 
285             qemu_mutex_lock(&param->mutex);
286         } else {
287             qemu_cond_wait(&param->cond, &param->mutex);
288         }
289     }
290     qemu_mutex_unlock(&param->mutex);
291 
292     return NULL;
293 }
294 
295 static inline void terminate_compression_threads(void)
296 {
297     int idx, thread_count;
298 
299     thread_count = migrate_compress_threads();
300 
301     for (idx = 0; idx < thread_count; idx++) {
302         qemu_mutex_lock(&comp_param[idx].mutex);
303         comp_param[idx].quit = true;
304         qemu_cond_signal(&comp_param[idx].cond);
305         qemu_mutex_unlock(&comp_param[idx].mutex);
306     }
307 }
308 
309 static void compress_threads_save_cleanup(void)
310 {
311     int i, thread_count;
312 
313     if (!migrate_use_compression()) {
314         return;
315     }
316     terminate_compression_threads();
317     thread_count = migrate_compress_threads();
318     for (i = 0; i < thread_count; i++) {
319         qemu_thread_join(compress_threads + i);
320         qemu_fclose(comp_param[i].file);
321         qemu_mutex_destroy(&comp_param[i].mutex);
322         qemu_cond_destroy(&comp_param[i].cond);
323     }
324     qemu_mutex_destroy(&comp_done_lock);
325     qemu_cond_destroy(&comp_done_cond);
326     g_free(compress_threads);
327     g_free(comp_param);
328     compress_threads = NULL;
329     comp_param = NULL;
330 }
331 
332 static void compress_threads_save_setup(void)
333 {
334     int i, thread_count;
335 
336     if (!migrate_use_compression()) {
337         return;
338     }
339     thread_count = migrate_compress_threads();
340     compress_threads = g_new0(QemuThread, thread_count);
341     comp_param = g_new0(CompressParam, thread_count);
342     qemu_cond_init(&comp_done_cond);
343     qemu_mutex_init(&comp_done_lock);
344     for (i = 0; i < thread_count; i++) {
345         /* comp_param[i].file is just used as a dummy buffer to save data,
346          * set its ops to empty.
347          */
348         comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
349         comp_param[i].done = true;
350         comp_param[i].quit = false;
351         qemu_mutex_init(&comp_param[i].mutex);
352         qemu_cond_init(&comp_param[i].cond);
353         qemu_thread_create(compress_threads + i, "compress",
354                            do_data_compress, comp_param + i,
355                            QEMU_THREAD_JOINABLE);
356     }
357 }
358 
359 /* Multiple fd's */
360 
361 struct MultiFDSendParams {
362     uint8_t id;
363     char *name;
364     QemuThread thread;
365     QemuSemaphore sem;
366     QemuMutex mutex;
367     bool quit;
368 };
369 typedef struct MultiFDSendParams MultiFDSendParams;
370 
371 struct {
372     MultiFDSendParams *params;
373     /* number of created threads */
374     int count;
375 } *multifd_send_state;
376 
377 static void terminate_multifd_send_threads(Error *errp)
378 {
379     int i;
380 
381     for (i = 0; i < multifd_send_state->count; i++) {
382         MultiFDSendParams *p = &multifd_send_state->params[i];
383 
384         qemu_mutex_lock(&p->mutex);
385         p->quit = true;
386         qemu_sem_post(&p->sem);
387         qemu_mutex_unlock(&p->mutex);
388     }
389 }
390 
391 int multifd_save_cleanup(Error **errp)
392 {
393     int i;
394     int ret = 0;
395 
396     if (!migrate_use_multifd()) {
397         return 0;
398     }
399     terminate_multifd_send_threads(NULL);
400     for (i = 0; i < multifd_send_state->count; i++) {
401         MultiFDSendParams *p = &multifd_send_state->params[i];
402 
403         qemu_thread_join(&p->thread);
404         qemu_mutex_destroy(&p->mutex);
405         qemu_sem_destroy(&p->sem);
406         g_free(p->name);
407         p->name = NULL;
408     }
409     g_free(multifd_send_state->params);
410     multifd_send_state->params = NULL;
411     g_free(multifd_send_state);
412     multifd_send_state = NULL;
413     return ret;
414 }
415 
416 static void *multifd_send_thread(void *opaque)
417 {
418     MultiFDSendParams *p = opaque;
419 
420     while (true) {
421         qemu_mutex_lock(&p->mutex);
422         if (p->quit) {
423             qemu_mutex_unlock(&p->mutex);
424             break;
425         }
426         qemu_mutex_unlock(&p->mutex);
427         qemu_sem_wait(&p->sem);
428     }
429 
430     return NULL;
431 }
432 
433 int multifd_save_setup(void)
434 {
435     int thread_count;
436     uint8_t i;
437 
438     if (!migrate_use_multifd()) {
439         return 0;
440     }
441     thread_count = migrate_multifd_channels();
442     multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
443     multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
444     multifd_send_state->count = 0;
445     for (i = 0; i < thread_count; i++) {
446         MultiFDSendParams *p = &multifd_send_state->params[i];
447 
448         qemu_mutex_init(&p->mutex);
449         qemu_sem_init(&p->sem, 0);
450         p->quit = false;
451         p->id = i;
452         p->name = g_strdup_printf("multifdsend_%d", i);
453         qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
454                            QEMU_THREAD_JOINABLE);
455 
456         multifd_send_state->count++;
457     }
458     return 0;
459 }
460 
461 struct MultiFDRecvParams {
462     uint8_t id;
463     char *name;
464     QemuThread thread;
465     QemuSemaphore sem;
466     QemuMutex mutex;
467     bool quit;
468 };
469 typedef struct MultiFDRecvParams MultiFDRecvParams;
470 
471 struct {
472     MultiFDRecvParams *params;
473     /* number of created threads */
474     int count;
475 } *multifd_recv_state;
476 
477 static void terminate_multifd_recv_threads(Error *errp)
478 {
479     int i;
480 
481     for (i = 0; i < multifd_recv_state->count; i++) {
482         MultiFDRecvParams *p = &multifd_recv_state->params[i];
483 
484         qemu_mutex_lock(&p->mutex);
485         p->quit = true;
486         qemu_sem_post(&p->sem);
487         qemu_mutex_unlock(&p->mutex);
488     }
489 }
490 
491 int multifd_load_cleanup(Error **errp)
492 {
493     int i;
494     int ret = 0;
495 
496     if (!migrate_use_multifd()) {
497         return 0;
498     }
499     terminate_multifd_recv_threads(NULL);
500     for (i = 0; i < multifd_recv_state->count; i++) {
501         MultiFDRecvParams *p = &multifd_recv_state->params[i];
502 
503         qemu_thread_join(&p->thread);
504         qemu_mutex_destroy(&p->mutex);
505         qemu_sem_destroy(&p->sem);
506         g_free(p->name);
507         p->name = NULL;
508     }
509     g_free(multifd_recv_state->params);
510     multifd_recv_state->params = NULL;
511     g_free(multifd_recv_state);
512     multifd_recv_state = NULL;
513 
514     return ret;
515 }
516 
517 static void *multifd_recv_thread(void *opaque)
518 {
519     MultiFDRecvParams *p = opaque;
520 
521     while (true) {
522         qemu_mutex_lock(&p->mutex);
523         if (p->quit) {
524             qemu_mutex_unlock(&p->mutex);
525             break;
526         }
527         qemu_mutex_unlock(&p->mutex);
528         qemu_sem_wait(&p->sem);
529     }
530 
531     return NULL;
532 }
533 
534 int multifd_load_setup(void)
535 {
536     int thread_count;
537     uint8_t i;
538 
539     if (!migrate_use_multifd()) {
540         return 0;
541     }
542     thread_count = migrate_multifd_channels();
543     multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
544     multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
545     multifd_recv_state->count = 0;
546     for (i = 0; i < thread_count; i++) {
547         MultiFDRecvParams *p = &multifd_recv_state->params[i];
548 
549         qemu_mutex_init(&p->mutex);
550         qemu_sem_init(&p->sem, 0);
551         p->quit = false;
552         p->id = i;
553         p->name = g_strdup_printf("multifdrecv_%d", i);
554         qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
555                            QEMU_THREAD_JOINABLE);
556         multifd_recv_state->count++;
557     }
558     return 0;
559 }
560 
561 /**
562  * save_page_header: write page header to wire
563  *
564  * If this is the 1st block, it also writes the block identification
565  *
566  * Returns the number of bytes written
567  *
568  * @f: QEMUFile where to send the data
569  * @block: block that contains the page we want to send
570  * @offset: offset inside the block for the page
571  *          in the lower bits, it contains flags
572  */
573 static size_t save_page_header(RAMState *rs, QEMUFile *f,  RAMBlock *block,
574                                ram_addr_t offset)
575 {
576     size_t size, len;
577 
578     if (block == rs->last_sent_block) {
579         offset |= RAM_SAVE_FLAG_CONTINUE;
580     }
581     qemu_put_be64(f, offset);
582     size = 8;
583 
584     if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
585         len = strlen(block->idstr);
586         qemu_put_byte(f, len);
587         qemu_put_buffer(f, (uint8_t *)block->idstr, len);
588         size += 1 + len;
589         rs->last_sent_block = block;
590     }
591     return size;
592 }
593 
594 /**
595  * mig_throttle_guest_down: throotle down the guest
596  *
597  * Reduce amount of guest cpu execution to hopefully slow down memory
598  * writes. If guest dirty memory rate is reduced below the rate at
599  * which we can transfer pages to the destination then we should be
600  * able to complete migration. Some workloads dirty memory way too
601  * fast and will not effectively converge, even with auto-converge.
602  */
603 static void mig_throttle_guest_down(void)
604 {
605     MigrationState *s = migrate_get_current();
606     uint64_t pct_initial = s->parameters.cpu_throttle_initial;
607     uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
608 
609     /* We have not started throttling yet. Let's start it. */
610     if (!cpu_throttle_active()) {
611         cpu_throttle_set(pct_initial);
612     } else {
613         /* Throttling already on, just increase the rate */
614         cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
615     }
616 }
617 
618 /**
619  * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
620  *
621  * @rs: current RAM state
622  * @current_addr: address for the zero page
623  *
624  * Update the xbzrle cache to reflect a page that's been sent as all 0.
625  * The important thing is that a stale (not-yet-0'd) page be replaced
626  * by the new data.
627  * As a bonus, if the page wasn't in the cache it gets added so that
628  * when a small write is made into the 0'd page it gets XBZRLE sent.
629  */
630 static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
631 {
632     if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
633         return;
634     }
635 
636     /* We don't care if this fails to allocate a new cache page
637      * as long as it updated an old one */
638     cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
639                  ram_counters.dirty_sync_count);
640 }
641 
642 #define ENCODING_FLAG_XBZRLE 0x1
643 
644 /**
645  * save_xbzrle_page: compress and send current page
646  *
647  * Returns: 1 means that we wrote the page
648  *          0 means that page is identical to the one already sent
649  *          -1 means that xbzrle would be longer than normal
650  *
651  * @rs: current RAM state
652  * @current_data: pointer to the address of the page contents
653  * @current_addr: addr of the page
654  * @block: block that contains the page we want to send
655  * @offset: offset inside the block for the page
656  * @last_stage: if we are at the completion stage
657  */
658 static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
659                             ram_addr_t current_addr, RAMBlock *block,
660                             ram_addr_t offset, bool last_stage)
661 {
662     int encoded_len = 0, bytes_xbzrle;
663     uint8_t *prev_cached_page;
664 
665     if (!cache_is_cached(XBZRLE.cache, current_addr,
666                          ram_counters.dirty_sync_count)) {
667         xbzrle_counters.cache_miss++;
668         if (!last_stage) {
669             if (cache_insert(XBZRLE.cache, current_addr, *current_data,
670                              ram_counters.dirty_sync_count) == -1) {
671                 return -1;
672             } else {
673                 /* update *current_data when the page has been
674                    inserted into cache */
675                 *current_data = get_cached_data(XBZRLE.cache, current_addr);
676             }
677         }
678         return -1;
679     }
680 
681     prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
682 
683     /* save current buffer into memory */
684     memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
685 
686     /* XBZRLE encoding (if there is no overflow) */
687     encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
688                                        TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
689                                        TARGET_PAGE_SIZE);
690     if (encoded_len == 0) {
691         trace_save_xbzrle_page_skipping();
692         return 0;
693     } else if (encoded_len == -1) {
694         trace_save_xbzrle_page_overflow();
695         xbzrle_counters.overflow++;
696         /* update data in the cache */
697         if (!last_stage) {
698             memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
699             *current_data = prev_cached_page;
700         }
701         return -1;
702     }
703 
704     /* we need to update the data in the cache, in order to get the same data */
705     if (!last_stage) {
706         memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
707     }
708 
709     /* Send XBZRLE based compressed page */
710     bytes_xbzrle = save_page_header(rs, rs->f, block,
711                                     offset | RAM_SAVE_FLAG_XBZRLE);
712     qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
713     qemu_put_be16(rs->f, encoded_len);
714     qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
715     bytes_xbzrle += encoded_len + 1 + 2;
716     xbzrle_counters.pages++;
717     xbzrle_counters.bytes += bytes_xbzrle;
718     ram_counters.transferred += bytes_xbzrle;
719 
720     return 1;
721 }
722 
723 /**
724  * migration_bitmap_find_dirty: find the next dirty page from start
725  *
726  * Called with rcu_read_lock() to protect migration_bitmap
727  *
728  * Returns the byte offset within memory region of the start of a dirty page
729  *
730  * @rs: current RAM state
731  * @rb: RAMBlock where to search for dirty pages
732  * @start: page where we start the search
733  */
734 static inline
735 unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
736                                           unsigned long start)
737 {
738     unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
739     unsigned long *bitmap = rb->bmap;
740     unsigned long next;
741 
742     if (rs->ram_bulk_stage && start > 0) {
743         next = start + 1;
744     } else {
745         next = find_next_bit(bitmap, size, start);
746     }
747 
748     return next;
749 }
750 
751 static inline bool migration_bitmap_clear_dirty(RAMState *rs,
752                                                 RAMBlock *rb,
753                                                 unsigned long page)
754 {
755     bool ret;
756 
757     ret = test_and_clear_bit(page, rb->bmap);
758 
759     if (ret) {
760         rs->migration_dirty_pages--;
761     }
762     return ret;
763 }
764 
765 static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb,
766                                         ram_addr_t start, ram_addr_t length)
767 {
768     rs->migration_dirty_pages +=
769         cpu_physical_memory_sync_dirty_bitmap(rb, start, length,
770                                               &rs->num_dirty_pages_period);
771 }
772 
773 /**
774  * ram_pagesize_summary: calculate all the pagesizes of a VM
775  *
776  * Returns a summary bitmap of the page sizes of all RAMBlocks
777  *
778  * For VMs with just normal pages this is equivalent to the host page
779  * size. If it's got some huge pages then it's the OR of all the
780  * different page sizes.
781  */
782 uint64_t ram_pagesize_summary(void)
783 {
784     RAMBlock *block;
785     uint64_t summary = 0;
786 
787     RAMBLOCK_FOREACH(block) {
788         summary |= block->page_size;
789     }
790 
791     return summary;
792 }
793 
794 static void migration_bitmap_sync(RAMState *rs)
795 {
796     RAMBlock *block;
797     int64_t end_time;
798     uint64_t bytes_xfer_now;
799 
800     ram_counters.dirty_sync_count++;
801 
802     if (!rs->time_last_bitmap_sync) {
803         rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
804     }
805 
806     trace_migration_bitmap_sync_start();
807     memory_global_dirty_log_sync();
808 
809     qemu_mutex_lock(&rs->bitmap_mutex);
810     rcu_read_lock();
811     RAMBLOCK_FOREACH(block) {
812         migration_bitmap_sync_range(rs, block, 0, block->used_length);
813     }
814     rcu_read_unlock();
815     qemu_mutex_unlock(&rs->bitmap_mutex);
816 
817     trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
818 
819     end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
820 
821     /* more than 1 second = 1000 millisecons */
822     if (end_time > rs->time_last_bitmap_sync + 1000) {
823         /* calculate period counters */
824         ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
825             / (end_time - rs->time_last_bitmap_sync);
826         bytes_xfer_now = ram_counters.transferred;
827 
828         if (migrate_auto_converge()) {
829             /* The following detection logic can be refined later. For now:
830                Check to see if the dirtied bytes is 50% more than the approx.
831                amount of bytes that just got transferred since the last time we
832                were in this routine. If that happens twice, start or increase
833                throttling */
834 
835             if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
836                    (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
837                 (++rs->dirty_rate_high_cnt >= 2)) {
838                     trace_migration_throttle();
839                     rs->dirty_rate_high_cnt = 0;
840                     mig_throttle_guest_down();
841             }
842         }
843 
844         if (migrate_use_xbzrle()) {
845             if (rs->iterations_prev != rs->iterations) {
846                 xbzrle_counters.cache_miss_rate =
847                    (double)(xbzrle_counters.cache_miss -
848                             rs->xbzrle_cache_miss_prev) /
849                    (rs->iterations - rs->iterations_prev);
850             }
851             rs->iterations_prev = rs->iterations;
852             rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
853         }
854 
855         /* reset period counters */
856         rs->time_last_bitmap_sync = end_time;
857         rs->num_dirty_pages_period = 0;
858         rs->bytes_xfer_prev = bytes_xfer_now;
859     }
860     if (migrate_use_events()) {
861         qapi_event_send_migration_pass(ram_counters.dirty_sync_count, NULL);
862     }
863 }
864 
865 /**
866  * save_zero_page: send the zero page to the stream
867  *
868  * Returns the number of pages written.
869  *
870  * @rs: current RAM state
871  * @block: block that contains the page we want to send
872  * @offset: offset inside the block for the page
873  * @p: pointer to the page
874  */
875 static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
876                           uint8_t *p)
877 {
878     int pages = -1;
879 
880     if (is_zero_range(p, TARGET_PAGE_SIZE)) {
881         ram_counters.duplicate++;
882         ram_counters.transferred +=
883             save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_ZERO);
884         qemu_put_byte(rs->f, 0);
885         ram_counters.transferred += 1;
886         pages = 1;
887     }
888 
889     return pages;
890 }
891 
892 static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
893 {
894     if (!migrate_release_ram() || !migration_in_postcopy()) {
895         return;
896     }
897 
898     ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
899 }
900 
901 /**
902  * ram_save_page: send the given page to the stream
903  *
904  * Returns the number of pages written.
905  *          < 0 - error
906  *          >=0 - Number of pages written - this might legally be 0
907  *                if xbzrle noticed the page was the same.
908  *
909  * @rs: current RAM state
910  * @block: block that contains the page we want to send
911  * @offset: offset inside the block for the page
912  * @last_stage: if we are at the completion stage
913  */
914 static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
915 {
916     int pages = -1;
917     uint64_t bytes_xmit;
918     ram_addr_t current_addr;
919     uint8_t *p;
920     int ret;
921     bool send_async = true;
922     RAMBlock *block = pss->block;
923     ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
924 
925     p = block->host + offset;
926     trace_ram_save_page(block->idstr, (uint64_t)offset, p);
927 
928     /* In doubt sent page as normal */
929     bytes_xmit = 0;
930     ret = ram_control_save_page(rs->f, block->offset,
931                            offset, TARGET_PAGE_SIZE, &bytes_xmit);
932     if (bytes_xmit) {
933         ram_counters.transferred += bytes_xmit;
934         pages = 1;
935     }
936 
937     XBZRLE_cache_lock();
938 
939     current_addr = block->offset + offset;
940 
941     if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
942         if (ret != RAM_SAVE_CONTROL_DELAYED) {
943             if (bytes_xmit > 0) {
944                 ram_counters.normal++;
945             } else if (bytes_xmit == 0) {
946                 ram_counters.duplicate++;
947             }
948         }
949     } else {
950         pages = save_zero_page(rs, block, offset, p);
951         if (pages > 0) {
952             /* Must let xbzrle know, otherwise a previous (now 0'd) cached
953              * page would be stale
954              */
955             xbzrle_cache_zero_page(rs, current_addr);
956             ram_release_pages(block->idstr, offset, pages);
957         } else if (!rs->ram_bulk_stage &&
958                    !migration_in_postcopy() && migrate_use_xbzrle()) {
959             pages = save_xbzrle_page(rs, &p, current_addr, block,
960                                      offset, last_stage);
961             if (!last_stage) {
962                 /* Can't send this cached data async, since the cache page
963                  * might get updated before it gets to the wire
964                  */
965                 send_async = false;
966             }
967         }
968     }
969 
970     /* XBZRLE overflow or normal page */
971     if (pages == -1) {
972         ram_counters.transferred +=
973             save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_PAGE);
974         if (send_async) {
975             qemu_put_buffer_async(rs->f, p, TARGET_PAGE_SIZE,
976                                   migrate_release_ram() &
977                                   migration_in_postcopy());
978         } else {
979             qemu_put_buffer(rs->f, p, TARGET_PAGE_SIZE);
980         }
981         ram_counters.transferred += TARGET_PAGE_SIZE;
982         pages = 1;
983         ram_counters.normal++;
984     }
985 
986     XBZRLE_cache_unlock();
987 
988     return pages;
989 }
990 
991 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
992                                 ram_addr_t offset)
993 {
994     RAMState *rs = ram_state;
995     int bytes_sent, blen;
996     uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
997 
998     bytes_sent = save_page_header(rs, f, block, offset |
999                                   RAM_SAVE_FLAG_COMPRESS_PAGE);
1000     blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
1001                                      migrate_compress_level());
1002     if (blen < 0) {
1003         bytes_sent = 0;
1004         qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
1005         error_report("compressed data failed!");
1006     } else {
1007         bytes_sent += blen;
1008         ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
1009     }
1010 
1011     return bytes_sent;
1012 }
1013 
1014 static void flush_compressed_data(RAMState *rs)
1015 {
1016     int idx, len, thread_count;
1017 
1018     if (!migrate_use_compression()) {
1019         return;
1020     }
1021     thread_count = migrate_compress_threads();
1022 
1023     qemu_mutex_lock(&comp_done_lock);
1024     for (idx = 0; idx < thread_count; idx++) {
1025         while (!comp_param[idx].done) {
1026             qemu_cond_wait(&comp_done_cond, &comp_done_lock);
1027         }
1028     }
1029     qemu_mutex_unlock(&comp_done_lock);
1030 
1031     for (idx = 0; idx < thread_count; idx++) {
1032         qemu_mutex_lock(&comp_param[idx].mutex);
1033         if (!comp_param[idx].quit) {
1034             len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
1035             ram_counters.transferred += len;
1036         }
1037         qemu_mutex_unlock(&comp_param[idx].mutex);
1038     }
1039 }
1040 
1041 static inline void set_compress_params(CompressParam *param, RAMBlock *block,
1042                                        ram_addr_t offset)
1043 {
1044     param->block = block;
1045     param->offset = offset;
1046 }
1047 
1048 static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
1049                                            ram_addr_t offset)
1050 {
1051     int idx, thread_count, bytes_xmit = -1, pages = -1;
1052 
1053     thread_count = migrate_compress_threads();
1054     qemu_mutex_lock(&comp_done_lock);
1055     while (true) {
1056         for (idx = 0; idx < thread_count; idx++) {
1057             if (comp_param[idx].done) {
1058                 comp_param[idx].done = false;
1059                 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
1060                 qemu_mutex_lock(&comp_param[idx].mutex);
1061                 set_compress_params(&comp_param[idx], block, offset);
1062                 qemu_cond_signal(&comp_param[idx].cond);
1063                 qemu_mutex_unlock(&comp_param[idx].mutex);
1064                 pages = 1;
1065                 ram_counters.normal++;
1066                 ram_counters.transferred += bytes_xmit;
1067                 break;
1068             }
1069         }
1070         if (pages > 0) {
1071             break;
1072         } else {
1073             qemu_cond_wait(&comp_done_cond, &comp_done_lock);
1074         }
1075     }
1076     qemu_mutex_unlock(&comp_done_lock);
1077 
1078     return pages;
1079 }
1080 
1081 /**
1082  * ram_save_compressed_page: compress the given page and send it to the stream
1083  *
1084  * Returns the number of pages written.
1085  *
1086  * @rs: current RAM state
1087  * @block: block that contains the page we want to send
1088  * @offset: offset inside the block for the page
1089  * @last_stage: if we are at the completion stage
1090  */
1091 static int ram_save_compressed_page(RAMState *rs, PageSearchStatus *pss,
1092                                     bool last_stage)
1093 {
1094     int pages = -1;
1095     uint64_t bytes_xmit = 0;
1096     uint8_t *p;
1097     int ret, blen;
1098     RAMBlock *block = pss->block;
1099     ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
1100 
1101     p = block->host + offset;
1102 
1103     ret = ram_control_save_page(rs->f, block->offset,
1104                                 offset, TARGET_PAGE_SIZE, &bytes_xmit);
1105     if (bytes_xmit) {
1106         ram_counters.transferred += bytes_xmit;
1107         pages = 1;
1108     }
1109     if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
1110         if (ret != RAM_SAVE_CONTROL_DELAYED) {
1111             if (bytes_xmit > 0) {
1112                 ram_counters.normal++;
1113             } else if (bytes_xmit == 0) {
1114                 ram_counters.duplicate++;
1115             }
1116         }
1117     } else {
1118         /* When starting the process of a new block, the first page of
1119          * the block should be sent out before other pages in the same
1120          * block, and all the pages in last block should have been sent
1121          * out, keeping this order is important, because the 'cont' flag
1122          * is used to avoid resending the block name.
1123          */
1124         if (block != rs->last_sent_block) {
1125             flush_compressed_data(rs);
1126             pages = save_zero_page(rs, block, offset, p);
1127             if (pages == -1) {
1128                 /* Make sure the first page is sent out before other pages */
1129                 bytes_xmit = save_page_header(rs, rs->f, block, offset |
1130                                               RAM_SAVE_FLAG_COMPRESS_PAGE);
1131                 blen = qemu_put_compression_data(rs->f, p, TARGET_PAGE_SIZE,
1132                                                  migrate_compress_level());
1133                 if (blen > 0) {
1134                     ram_counters.transferred += bytes_xmit + blen;
1135                     ram_counters.normal++;
1136                     pages = 1;
1137                 } else {
1138                     qemu_file_set_error(rs->f, blen);
1139                     error_report("compressed data failed!");
1140                 }
1141             }
1142             if (pages > 0) {
1143                 ram_release_pages(block->idstr, offset, pages);
1144             }
1145         } else {
1146             pages = save_zero_page(rs, block, offset, p);
1147             if (pages == -1) {
1148                 pages = compress_page_with_multi_thread(rs, block, offset);
1149             } else {
1150                 ram_release_pages(block->idstr, offset, pages);
1151             }
1152         }
1153     }
1154 
1155     return pages;
1156 }
1157 
1158 /**
1159  * find_dirty_block: find the next dirty page and update any state
1160  * associated with the search process.
1161  *
1162  * Returns if a page is found
1163  *
1164  * @rs: current RAM state
1165  * @pss: data about the state of the current dirty page scan
1166  * @again: set to false if the search has scanned the whole of RAM
1167  */
1168 static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
1169 {
1170     pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
1171     if (pss->complete_round && pss->block == rs->last_seen_block &&
1172         pss->page >= rs->last_page) {
1173         /*
1174          * We've been once around the RAM and haven't found anything.
1175          * Give up.
1176          */
1177         *again = false;
1178         return false;
1179     }
1180     if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
1181         /* Didn't find anything in this RAM Block */
1182         pss->page = 0;
1183         pss->block = QLIST_NEXT_RCU(pss->block, next);
1184         if (!pss->block) {
1185             /* Hit the end of the list */
1186             pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1187             /* Flag that we've looped */
1188             pss->complete_round = true;
1189             rs->ram_bulk_stage = false;
1190             if (migrate_use_xbzrle()) {
1191                 /* If xbzrle is on, stop using the data compression at this
1192                  * point. In theory, xbzrle can do better than compression.
1193                  */
1194                 flush_compressed_data(rs);
1195             }
1196         }
1197         /* Didn't find anything this time, but try again on the new block */
1198         *again = true;
1199         return false;
1200     } else {
1201         /* Can go around again, but... */
1202         *again = true;
1203         /* We've found something so probably don't need to */
1204         return true;
1205     }
1206 }
1207 
1208 /**
1209  * unqueue_page: gets a page of the queue
1210  *
1211  * Helper for 'get_queued_page' - gets a page off the queue
1212  *
1213  * Returns the block of the page (or NULL if none available)
1214  *
1215  * @rs: current RAM state
1216  * @offset: used to return the offset within the RAMBlock
1217  */
1218 static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
1219 {
1220     RAMBlock *block = NULL;
1221 
1222     qemu_mutex_lock(&rs->src_page_req_mutex);
1223     if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
1224         struct RAMSrcPageRequest *entry =
1225                                 QSIMPLEQ_FIRST(&rs->src_page_requests);
1226         block = entry->rb;
1227         *offset = entry->offset;
1228 
1229         if (entry->len > TARGET_PAGE_SIZE) {
1230             entry->len -= TARGET_PAGE_SIZE;
1231             entry->offset += TARGET_PAGE_SIZE;
1232         } else {
1233             memory_region_unref(block->mr);
1234             QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1235             g_free(entry);
1236         }
1237     }
1238     qemu_mutex_unlock(&rs->src_page_req_mutex);
1239 
1240     return block;
1241 }
1242 
1243 /**
1244  * get_queued_page: unqueue a page from the postocpy requests
1245  *
1246  * Skips pages that are already sent (!dirty)
1247  *
1248  * Returns if a queued page is found
1249  *
1250  * @rs: current RAM state
1251  * @pss: data about the state of the current dirty page scan
1252  */
1253 static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
1254 {
1255     RAMBlock  *block;
1256     ram_addr_t offset;
1257     bool dirty;
1258 
1259     do {
1260         block = unqueue_page(rs, &offset);
1261         /*
1262          * We're sending this page, and since it's postcopy nothing else
1263          * will dirty it, and we must make sure it doesn't get sent again
1264          * even if this queue request was received after the background
1265          * search already sent it.
1266          */
1267         if (block) {
1268             unsigned long page;
1269 
1270             page = offset >> TARGET_PAGE_BITS;
1271             dirty = test_bit(page, block->bmap);
1272             if (!dirty) {
1273                 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
1274                        page, test_bit(page, block->unsentmap));
1275             } else {
1276                 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
1277             }
1278         }
1279 
1280     } while (block && !dirty);
1281 
1282     if (block) {
1283         /*
1284          * As soon as we start servicing pages out of order, then we have
1285          * to kill the bulk stage, since the bulk stage assumes
1286          * in (migration_bitmap_find_and_reset_dirty) that every page is
1287          * dirty, that's no longer true.
1288          */
1289         rs->ram_bulk_stage = false;
1290 
1291         /*
1292          * We want the background search to continue from the queued page
1293          * since the guest is likely to want other pages near to the page
1294          * it just requested.
1295          */
1296         pss->block = block;
1297         pss->page = offset >> TARGET_PAGE_BITS;
1298     }
1299 
1300     return !!block;
1301 }
1302 
1303 /**
1304  * migration_page_queue_free: drop any remaining pages in the ram
1305  * request queue
1306  *
1307  * It should be empty at the end anyway, but in error cases there may
1308  * be some left.  in case that there is any page left, we drop it.
1309  *
1310  */
1311 static void migration_page_queue_free(RAMState *rs)
1312 {
1313     struct RAMSrcPageRequest *mspr, *next_mspr;
1314     /* This queue generally should be empty - but in the case of a failed
1315      * migration might have some droppings in.
1316      */
1317     rcu_read_lock();
1318     QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
1319         memory_region_unref(mspr->rb->mr);
1320         QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1321         g_free(mspr);
1322     }
1323     rcu_read_unlock();
1324 }
1325 
1326 /**
1327  * ram_save_queue_pages: queue the page for transmission
1328  *
1329  * A request from postcopy destination for example.
1330  *
1331  * Returns zero on success or negative on error
1332  *
1333  * @rbname: Name of the RAMBLock of the request. NULL means the
1334  *          same that last one.
1335  * @start: starting address from the start of the RAMBlock
1336  * @len: length (in bytes) to send
1337  */
1338 int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
1339 {
1340     RAMBlock *ramblock;
1341     RAMState *rs = ram_state;
1342 
1343     ram_counters.postcopy_requests++;
1344     rcu_read_lock();
1345     if (!rbname) {
1346         /* Reuse last RAMBlock */
1347         ramblock = rs->last_req_rb;
1348 
1349         if (!ramblock) {
1350             /*
1351              * Shouldn't happen, we can't reuse the last RAMBlock if
1352              * it's the 1st request.
1353              */
1354             error_report("ram_save_queue_pages no previous block");
1355             goto err;
1356         }
1357     } else {
1358         ramblock = qemu_ram_block_by_name(rbname);
1359 
1360         if (!ramblock) {
1361             /* We shouldn't be asked for a non-existent RAMBlock */
1362             error_report("ram_save_queue_pages no block '%s'", rbname);
1363             goto err;
1364         }
1365         rs->last_req_rb = ramblock;
1366     }
1367     trace_ram_save_queue_pages(ramblock->idstr, start, len);
1368     if (start+len > ramblock->used_length) {
1369         error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1370                      RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
1371                      __func__, start, len, ramblock->used_length);
1372         goto err;
1373     }
1374 
1375     struct RAMSrcPageRequest *new_entry =
1376         g_malloc0(sizeof(struct RAMSrcPageRequest));
1377     new_entry->rb = ramblock;
1378     new_entry->offset = start;
1379     new_entry->len = len;
1380 
1381     memory_region_ref(ramblock->mr);
1382     qemu_mutex_lock(&rs->src_page_req_mutex);
1383     QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
1384     qemu_mutex_unlock(&rs->src_page_req_mutex);
1385     rcu_read_unlock();
1386 
1387     return 0;
1388 
1389 err:
1390     rcu_read_unlock();
1391     return -1;
1392 }
1393 
1394 /**
1395  * ram_save_target_page: save one target page
1396  *
1397  * Returns the number of pages written
1398  *
1399  * @rs: current RAM state
1400  * @ms: current migration state
1401  * @pss: data about the page we want to send
1402  * @last_stage: if we are at the completion stage
1403  */
1404 static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
1405                                 bool last_stage)
1406 {
1407     int res = 0;
1408 
1409     /* Check the pages is dirty and if it is send it */
1410     if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
1411         /*
1412          * If xbzrle is on, stop using the data compression after first
1413          * round of migration even if compression is enabled. In theory,
1414          * xbzrle can do better than compression.
1415          */
1416         if (migrate_use_compression() &&
1417             (rs->ram_bulk_stage || !migrate_use_xbzrle())) {
1418             res = ram_save_compressed_page(rs, pss, last_stage);
1419         } else {
1420             res = ram_save_page(rs, pss, last_stage);
1421         }
1422 
1423         if (res < 0) {
1424             return res;
1425         }
1426         if (pss->block->unsentmap) {
1427             clear_bit(pss->page, pss->block->unsentmap);
1428         }
1429     }
1430 
1431     return res;
1432 }
1433 
1434 /**
1435  * ram_save_host_page: save a whole host page
1436  *
1437  * Starting at *offset send pages up to the end of the current host
1438  * page. It's valid for the initial offset to point into the middle of
1439  * a host page in which case the remainder of the hostpage is sent.
1440  * Only dirty target pages are sent. Note that the host page size may
1441  * be a huge page for this block.
1442  * The saving stops at the boundary of the used_length of the block
1443  * if the RAMBlock isn't a multiple of the host page size.
1444  *
1445  * Returns the number of pages written or negative on error
1446  *
1447  * @rs: current RAM state
1448  * @ms: current migration state
1449  * @pss: data about the page we want to send
1450  * @last_stage: if we are at the completion stage
1451  */
1452 static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
1453                               bool last_stage)
1454 {
1455     int tmppages, pages = 0;
1456     size_t pagesize_bits =
1457         qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
1458 
1459     do {
1460         tmppages = ram_save_target_page(rs, pss, last_stage);
1461         if (tmppages < 0) {
1462             return tmppages;
1463         }
1464 
1465         pages += tmppages;
1466         pss->page++;
1467     } while ((pss->page & (pagesize_bits - 1)) &&
1468              offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS));
1469 
1470     /* The offset we leave with is the last one we looked at */
1471     pss->page--;
1472     return pages;
1473 }
1474 
1475 /**
1476  * ram_find_and_save_block: finds a dirty page and sends it to f
1477  *
1478  * Called within an RCU critical section.
1479  *
1480  * Returns the number of pages written where zero means no dirty pages
1481  *
1482  * @rs: current RAM state
1483  * @last_stage: if we are at the completion stage
1484  *
1485  * On systems where host-page-size > target-page-size it will send all the
1486  * pages in a host page that are dirty.
1487  */
1488 
1489 static int ram_find_and_save_block(RAMState *rs, bool last_stage)
1490 {
1491     PageSearchStatus pss;
1492     int pages = 0;
1493     bool again, found;
1494 
1495     /* No dirty page as there is zero RAM */
1496     if (!ram_bytes_total()) {
1497         return pages;
1498     }
1499 
1500     pss.block = rs->last_seen_block;
1501     pss.page = rs->last_page;
1502     pss.complete_round = false;
1503 
1504     if (!pss.block) {
1505         pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1506     }
1507 
1508     do {
1509         again = true;
1510         found = get_queued_page(rs, &pss);
1511 
1512         if (!found) {
1513             /* priority queue empty, so just search for something dirty */
1514             found = find_dirty_block(rs, &pss, &again);
1515         }
1516 
1517         if (found) {
1518             pages = ram_save_host_page(rs, &pss, last_stage);
1519         }
1520     } while (!pages && again);
1521 
1522     rs->last_seen_block = pss.block;
1523     rs->last_page = pss.page;
1524 
1525     return pages;
1526 }
1527 
1528 void acct_update_position(QEMUFile *f, size_t size, bool zero)
1529 {
1530     uint64_t pages = size / TARGET_PAGE_SIZE;
1531 
1532     if (zero) {
1533         ram_counters.duplicate += pages;
1534     } else {
1535         ram_counters.normal += pages;
1536         ram_counters.transferred += size;
1537         qemu_update_position(f, size);
1538     }
1539 }
1540 
1541 uint64_t ram_bytes_total(void)
1542 {
1543     RAMBlock *block;
1544     uint64_t total = 0;
1545 
1546     rcu_read_lock();
1547     RAMBLOCK_FOREACH(block) {
1548         total += block->used_length;
1549     }
1550     rcu_read_unlock();
1551     return total;
1552 }
1553 
1554 static void xbzrle_load_setup(void)
1555 {
1556     XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
1557 }
1558 
1559 static void xbzrle_load_cleanup(void)
1560 {
1561     g_free(XBZRLE.decoded_buf);
1562     XBZRLE.decoded_buf = NULL;
1563 }
1564 
1565 static void ram_save_cleanup(void *opaque)
1566 {
1567     RAMState **rsp = opaque;
1568     RAMBlock *block;
1569 
1570     /* caller have hold iothread lock or is in a bh, so there is
1571      * no writing race against this migration_bitmap
1572      */
1573     memory_global_dirty_log_stop();
1574 
1575     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1576         g_free(block->bmap);
1577         block->bmap = NULL;
1578         g_free(block->unsentmap);
1579         block->unsentmap = NULL;
1580     }
1581 
1582     XBZRLE_cache_lock();
1583     if (XBZRLE.cache) {
1584         cache_fini(XBZRLE.cache);
1585         g_free(XBZRLE.encoded_buf);
1586         g_free(XBZRLE.current_buf);
1587         g_free(XBZRLE.zero_target_page);
1588         XBZRLE.cache = NULL;
1589         XBZRLE.encoded_buf = NULL;
1590         XBZRLE.current_buf = NULL;
1591         XBZRLE.zero_target_page = NULL;
1592     }
1593     XBZRLE_cache_unlock();
1594     migration_page_queue_free(*rsp);
1595     compress_threads_save_cleanup();
1596     g_free(*rsp);
1597     *rsp = NULL;
1598 }
1599 
1600 static void ram_state_reset(RAMState *rs)
1601 {
1602     rs->last_seen_block = NULL;
1603     rs->last_sent_block = NULL;
1604     rs->last_page = 0;
1605     rs->last_version = ram_list.version;
1606     rs->ram_bulk_stage = true;
1607 }
1608 
1609 #define MAX_WAIT 50 /* ms, half buffered_file limit */
1610 
1611 /*
1612  * 'expected' is the value you expect the bitmap mostly to be full
1613  * of; it won't bother printing lines that are all this value.
1614  * If 'todump' is null the migration bitmap is dumped.
1615  */
1616 void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
1617                            unsigned long pages)
1618 {
1619     int64_t cur;
1620     int64_t linelen = 128;
1621     char linebuf[129];
1622 
1623     for (cur = 0; cur < pages; cur += linelen) {
1624         int64_t curb;
1625         bool found = false;
1626         /*
1627          * Last line; catch the case where the line length
1628          * is longer than remaining ram
1629          */
1630         if (cur + linelen > pages) {
1631             linelen = pages - cur;
1632         }
1633         for (curb = 0; curb < linelen; curb++) {
1634             bool thisbit = test_bit(cur + curb, todump);
1635             linebuf[curb] = thisbit ? '1' : '.';
1636             found = found || (thisbit != expected);
1637         }
1638         if (found) {
1639             linebuf[curb] = '\0';
1640             fprintf(stderr,  "0x%08" PRIx64 " : %s\n", cur, linebuf);
1641         }
1642     }
1643 }
1644 
1645 /* **** functions for postcopy ***** */
1646 
1647 void ram_postcopy_migrated_memory_release(MigrationState *ms)
1648 {
1649     struct RAMBlock *block;
1650 
1651     RAMBLOCK_FOREACH(block) {
1652         unsigned long *bitmap = block->bmap;
1653         unsigned long range = block->used_length >> TARGET_PAGE_BITS;
1654         unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
1655 
1656         while (run_start < range) {
1657             unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
1658             ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
1659                               (run_end - run_start) << TARGET_PAGE_BITS);
1660             run_start = find_next_zero_bit(bitmap, range, run_end + 1);
1661         }
1662     }
1663 }
1664 
1665 /**
1666  * postcopy_send_discard_bm_ram: discard a RAMBlock
1667  *
1668  * Returns zero on success
1669  *
1670  * Callback from postcopy_each_ram_send_discard for each RAMBlock
1671  * Note: At this point the 'unsentmap' is the processed bitmap combined
1672  *       with the dirtymap; so a '1' means it's either dirty or unsent.
1673  *
1674  * @ms: current migration state
1675  * @pds: state for postcopy
1676  * @start: RAMBlock starting page
1677  * @length: RAMBlock size
1678  */
1679 static int postcopy_send_discard_bm_ram(MigrationState *ms,
1680                                         PostcopyDiscardState *pds,
1681                                         RAMBlock *block)
1682 {
1683     unsigned long end = block->used_length >> TARGET_PAGE_BITS;
1684     unsigned long current;
1685     unsigned long *unsentmap = block->unsentmap;
1686 
1687     for (current = 0; current < end; ) {
1688         unsigned long one = find_next_bit(unsentmap, end, current);
1689 
1690         if (one <= end) {
1691             unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1692             unsigned long discard_length;
1693 
1694             if (zero >= end) {
1695                 discard_length = end - one;
1696             } else {
1697                 discard_length = zero - one;
1698             }
1699             if (discard_length) {
1700                 postcopy_discard_send_range(ms, pds, one, discard_length);
1701             }
1702             current = one + discard_length;
1703         } else {
1704             current = one;
1705         }
1706     }
1707 
1708     return 0;
1709 }
1710 
1711 /**
1712  * postcopy_each_ram_send_discard: discard all RAMBlocks
1713  *
1714  * Returns 0 for success or negative for error
1715  *
1716  * Utility for the outgoing postcopy code.
1717  *   Calls postcopy_send_discard_bm_ram for each RAMBlock
1718  *   passing it bitmap indexes and name.
1719  * (qemu_ram_foreach_block ends up passing unscaled lengths
1720  *  which would mean postcopy code would have to deal with target page)
1721  *
1722  * @ms: current migration state
1723  */
1724 static int postcopy_each_ram_send_discard(MigrationState *ms)
1725 {
1726     struct RAMBlock *block;
1727     int ret;
1728 
1729     RAMBLOCK_FOREACH(block) {
1730         PostcopyDiscardState *pds =
1731             postcopy_discard_send_init(ms, block->idstr);
1732 
1733         /*
1734          * Postcopy sends chunks of bitmap over the wire, but it
1735          * just needs indexes at this point, avoids it having
1736          * target page specific code.
1737          */
1738         ret = postcopy_send_discard_bm_ram(ms, pds, block);
1739         postcopy_discard_send_finish(ms, pds);
1740         if (ret) {
1741             return ret;
1742         }
1743     }
1744 
1745     return 0;
1746 }
1747 
1748 /**
1749  * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
1750  *
1751  * Helper for postcopy_chunk_hostpages; it's called twice to
1752  * canonicalize the two bitmaps, that are similar, but one is
1753  * inverted.
1754  *
1755  * Postcopy requires that all target pages in a hostpage are dirty or
1756  * clean, not a mix.  This function canonicalizes the bitmaps.
1757  *
1758  * @ms: current migration state
1759  * @unsent_pass: if true we need to canonicalize partially unsent host pages
1760  *               otherwise we need to canonicalize partially dirty host pages
1761  * @block: block that contains the page we want to canonicalize
1762  * @pds: state for postcopy
1763  */
1764 static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1765                                           RAMBlock *block,
1766                                           PostcopyDiscardState *pds)
1767 {
1768     RAMState *rs = ram_state;
1769     unsigned long *bitmap = block->bmap;
1770     unsigned long *unsentmap = block->unsentmap;
1771     unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
1772     unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
1773     unsigned long run_start;
1774 
1775     if (block->page_size == TARGET_PAGE_SIZE) {
1776         /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1777         return;
1778     }
1779 
1780     if (unsent_pass) {
1781         /* Find a sent page */
1782         run_start = find_next_zero_bit(unsentmap, pages, 0);
1783     } else {
1784         /* Find a dirty page */
1785         run_start = find_next_bit(bitmap, pages, 0);
1786     }
1787 
1788     while (run_start < pages) {
1789         bool do_fixup = false;
1790         unsigned long fixup_start_addr;
1791         unsigned long host_offset;
1792 
1793         /*
1794          * If the start of this run of pages is in the middle of a host
1795          * page, then we need to fixup this host page.
1796          */
1797         host_offset = run_start % host_ratio;
1798         if (host_offset) {
1799             do_fixup = true;
1800             run_start -= host_offset;
1801             fixup_start_addr = run_start;
1802             /* For the next pass */
1803             run_start = run_start + host_ratio;
1804         } else {
1805             /* Find the end of this run */
1806             unsigned long run_end;
1807             if (unsent_pass) {
1808                 run_end = find_next_bit(unsentmap, pages, run_start + 1);
1809             } else {
1810                 run_end = find_next_zero_bit(bitmap, pages, run_start + 1);
1811             }
1812             /*
1813              * If the end isn't at the start of a host page, then the
1814              * run doesn't finish at the end of a host page
1815              * and we need to discard.
1816              */
1817             host_offset = run_end % host_ratio;
1818             if (host_offset) {
1819                 do_fixup = true;
1820                 fixup_start_addr = run_end - host_offset;
1821                 /*
1822                  * This host page has gone, the next loop iteration starts
1823                  * from after the fixup
1824                  */
1825                 run_start = fixup_start_addr + host_ratio;
1826             } else {
1827                 /*
1828                  * No discards on this iteration, next loop starts from
1829                  * next sent/dirty page
1830                  */
1831                 run_start = run_end + 1;
1832             }
1833         }
1834 
1835         if (do_fixup) {
1836             unsigned long page;
1837 
1838             /* Tell the destination to discard this page */
1839             if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1840                 /* For the unsent_pass we:
1841                  *     discard partially sent pages
1842                  * For the !unsent_pass (dirty) we:
1843                  *     discard partially dirty pages that were sent
1844                  *     (any partially sent pages were already discarded
1845                  *     by the previous unsent_pass)
1846                  */
1847                 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1848                                             host_ratio);
1849             }
1850 
1851             /* Clean up the bitmap */
1852             for (page = fixup_start_addr;
1853                  page < fixup_start_addr + host_ratio; page++) {
1854                 /* All pages in this host page are now not sent */
1855                 set_bit(page, unsentmap);
1856 
1857                 /*
1858                  * Remark them as dirty, updating the count for any pages
1859                  * that weren't previously dirty.
1860                  */
1861                 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
1862             }
1863         }
1864 
1865         if (unsent_pass) {
1866             /* Find the next sent page for the next iteration */
1867             run_start = find_next_zero_bit(unsentmap, pages, run_start);
1868         } else {
1869             /* Find the next dirty page for the next iteration */
1870             run_start = find_next_bit(bitmap, pages, run_start);
1871         }
1872     }
1873 }
1874 
1875 /**
1876  * postcopy_chuck_hostpages: discrad any partially sent host page
1877  *
1878  * Utility for the outgoing postcopy code.
1879  *
1880  * Discard any partially sent host-page size chunks, mark any partially
1881  * dirty host-page size chunks as all dirty.  In this case the host-page
1882  * is the host-page for the particular RAMBlock, i.e. it might be a huge page
1883  *
1884  * Returns zero on success
1885  *
1886  * @ms: current migration state
1887  * @block: block we want to work with
1888  */
1889 static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
1890 {
1891     PostcopyDiscardState *pds =
1892         postcopy_discard_send_init(ms, block->idstr);
1893 
1894     /* First pass: Discard all partially sent host pages */
1895     postcopy_chunk_hostpages_pass(ms, true, block, pds);
1896     /*
1897      * Second pass: Ensure that all partially dirty host pages are made
1898      * fully dirty.
1899      */
1900     postcopy_chunk_hostpages_pass(ms, false, block, pds);
1901 
1902     postcopy_discard_send_finish(ms, pds);
1903     return 0;
1904 }
1905 
1906 /**
1907  * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
1908  *
1909  * Returns zero on success
1910  *
1911  * Transmit the set of pages to be discarded after precopy to the target
1912  * these are pages that:
1913  *     a) Have been previously transmitted but are now dirty again
1914  *     b) Pages that have never been transmitted, this ensures that
1915  *        any pages on the destination that have been mapped by background
1916  *        tasks get discarded (transparent huge pages is the specific concern)
1917  * Hopefully this is pretty sparse
1918  *
1919  * @ms: current migration state
1920  */
1921 int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1922 {
1923     RAMState *rs = ram_state;
1924     RAMBlock *block;
1925     int ret;
1926 
1927     rcu_read_lock();
1928 
1929     /* This should be our last sync, the src is now paused */
1930     migration_bitmap_sync(rs);
1931 
1932     /* Easiest way to make sure we don't resume in the middle of a host-page */
1933     rs->last_seen_block = NULL;
1934     rs->last_sent_block = NULL;
1935     rs->last_page = 0;
1936 
1937     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1938         unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
1939         unsigned long *bitmap = block->bmap;
1940         unsigned long *unsentmap = block->unsentmap;
1941 
1942         if (!unsentmap) {
1943             /* We don't have a safe way to resize the sentmap, so
1944              * if the bitmap was resized it will be NULL at this
1945              * point.
1946              */
1947             error_report("migration ram resized during precopy phase");
1948             rcu_read_unlock();
1949             return -EINVAL;
1950         }
1951         /* Deal with TPS != HPS and huge pages */
1952         ret = postcopy_chunk_hostpages(ms, block);
1953         if (ret) {
1954             rcu_read_unlock();
1955             return ret;
1956         }
1957 
1958         /*
1959          * Update the unsentmap to be unsentmap = unsentmap | dirty
1960          */
1961         bitmap_or(unsentmap, unsentmap, bitmap, pages);
1962 #ifdef DEBUG_POSTCOPY
1963         ram_debug_dump_bitmap(unsentmap, true, pages);
1964 #endif
1965     }
1966     trace_ram_postcopy_send_discard_bitmap();
1967 
1968     ret = postcopy_each_ram_send_discard(ms);
1969     rcu_read_unlock();
1970 
1971     return ret;
1972 }
1973 
1974 /**
1975  * ram_discard_range: discard dirtied pages at the beginning of postcopy
1976  *
1977  * Returns zero on success
1978  *
1979  * @rbname: name of the RAMBlock of the request. NULL means the
1980  *          same that last one.
1981  * @start: RAMBlock starting page
1982  * @length: RAMBlock size
1983  */
1984 int ram_discard_range(const char *rbname, uint64_t start, size_t length)
1985 {
1986     int ret = -1;
1987 
1988     trace_ram_discard_range(rbname, start, length);
1989 
1990     rcu_read_lock();
1991     RAMBlock *rb = qemu_ram_block_by_name(rbname);
1992 
1993     if (!rb) {
1994         error_report("ram_discard_range: Failed to find block '%s'", rbname);
1995         goto err;
1996     }
1997 
1998     ret = ram_block_discard_range(rb, start, length);
1999 
2000 err:
2001     rcu_read_unlock();
2002 
2003     return ret;
2004 }
2005 
2006 static int ram_state_init(RAMState **rsp)
2007 {
2008     *rsp = g_new0(RAMState, 1);
2009 
2010     qemu_mutex_init(&(*rsp)->bitmap_mutex);
2011     qemu_mutex_init(&(*rsp)->src_page_req_mutex);
2012     QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
2013 
2014     if (migrate_use_xbzrle()) {
2015         XBZRLE_cache_lock();
2016         XBZRLE.zero_target_page = g_malloc0(TARGET_PAGE_SIZE);
2017         XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
2018                                   TARGET_PAGE_SIZE,
2019                                   TARGET_PAGE_SIZE);
2020         if (!XBZRLE.cache) {
2021             XBZRLE_cache_unlock();
2022             error_report("Error creating cache");
2023             g_free(*rsp);
2024             *rsp = NULL;
2025             return -1;
2026         }
2027         XBZRLE_cache_unlock();
2028 
2029         /* We prefer not to abort if there is no memory */
2030         XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
2031         if (!XBZRLE.encoded_buf) {
2032             error_report("Error allocating encoded_buf");
2033             g_free(*rsp);
2034             *rsp = NULL;
2035             return -1;
2036         }
2037 
2038         XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
2039         if (!XBZRLE.current_buf) {
2040             error_report("Error allocating current_buf");
2041             g_free(XBZRLE.encoded_buf);
2042             XBZRLE.encoded_buf = NULL;
2043             g_free(*rsp);
2044             *rsp = NULL;
2045             return -1;
2046         }
2047     }
2048 
2049     /* For memory_global_dirty_log_start below.  */
2050     qemu_mutex_lock_iothread();
2051 
2052     qemu_mutex_lock_ramlist();
2053     rcu_read_lock();
2054     ram_state_reset(*rsp);
2055 
2056     /* Skip setting bitmap if there is no RAM */
2057     if (ram_bytes_total()) {
2058         RAMBlock *block;
2059 
2060         QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2061             unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
2062 
2063             block->bmap = bitmap_new(pages);
2064             bitmap_set(block->bmap, 0, pages);
2065             if (migrate_postcopy_ram()) {
2066                 block->unsentmap = bitmap_new(pages);
2067                 bitmap_set(block->unsentmap, 0, pages);
2068             }
2069         }
2070     }
2071 
2072     /*
2073      * Count the total number of pages used by ram blocks not including any
2074      * gaps due to alignment or unplugs.
2075      */
2076     (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
2077 
2078     memory_global_dirty_log_start();
2079     migration_bitmap_sync(*rsp);
2080     qemu_mutex_unlock_ramlist();
2081     qemu_mutex_unlock_iothread();
2082     rcu_read_unlock();
2083 
2084     return 0;
2085 }
2086 
2087 /*
2088  * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
2089  * long-running RCU critical section.  When rcu-reclaims in the code
2090  * start to become numerous it will be necessary to reduce the
2091  * granularity of these critical sections.
2092  */
2093 
2094 /**
2095  * ram_save_setup: Setup RAM for migration
2096  *
2097  * Returns zero to indicate success and negative for error
2098  *
2099  * @f: QEMUFile where to send the data
2100  * @opaque: RAMState pointer
2101  */
2102 static int ram_save_setup(QEMUFile *f, void *opaque)
2103 {
2104     RAMState **rsp = opaque;
2105     RAMBlock *block;
2106 
2107     /* migration has already setup the bitmap, reuse it. */
2108     if (!migration_in_colo_state()) {
2109         if (ram_state_init(rsp) != 0) {
2110             return -1;
2111         }
2112     }
2113     (*rsp)->f = f;
2114 
2115     rcu_read_lock();
2116 
2117     qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
2118 
2119     RAMBLOCK_FOREACH(block) {
2120         qemu_put_byte(f, strlen(block->idstr));
2121         qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2122         qemu_put_be64(f, block->used_length);
2123         if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
2124             qemu_put_be64(f, block->page_size);
2125         }
2126     }
2127 
2128     rcu_read_unlock();
2129     compress_threads_save_setup();
2130 
2131     ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2132     ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2133 
2134     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2135 
2136     return 0;
2137 }
2138 
2139 /**
2140  * ram_save_iterate: iterative stage for migration
2141  *
2142  * Returns zero to indicate success and negative for error
2143  *
2144  * @f: QEMUFile where to send the data
2145  * @opaque: RAMState pointer
2146  */
2147 static int ram_save_iterate(QEMUFile *f, void *opaque)
2148 {
2149     RAMState **temp = opaque;
2150     RAMState *rs = *temp;
2151     int ret;
2152     int i;
2153     int64_t t0;
2154     int done = 0;
2155 
2156     rcu_read_lock();
2157     if (ram_list.version != rs->last_version) {
2158         ram_state_reset(rs);
2159     }
2160 
2161     /* Read version before ram_list.blocks */
2162     smp_rmb();
2163 
2164     ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2165 
2166     t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2167     i = 0;
2168     while ((ret = qemu_file_rate_limit(f)) == 0) {
2169         int pages;
2170 
2171         pages = ram_find_and_save_block(rs, false);
2172         /* no more pages to sent */
2173         if (pages == 0) {
2174             done = 1;
2175             break;
2176         }
2177         rs->iterations++;
2178 
2179         /* we want to check in the 1st loop, just in case it was the 1st time
2180            and we had to sync the dirty bitmap.
2181            qemu_get_clock_ns() is a bit expensive, so we only check each some
2182            iterations
2183         */
2184         if ((i & 63) == 0) {
2185             uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2186             if (t1 > MAX_WAIT) {
2187                 trace_ram_save_iterate_big_wait(t1, i);
2188                 break;
2189             }
2190         }
2191         i++;
2192     }
2193     flush_compressed_data(rs);
2194     rcu_read_unlock();
2195 
2196     /*
2197      * Must occur before EOS (or any QEMUFile operation)
2198      * because of RDMA protocol.
2199      */
2200     ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2201 
2202     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2203     ram_counters.transferred += 8;
2204 
2205     ret = qemu_file_get_error(f);
2206     if (ret < 0) {
2207         return ret;
2208     }
2209 
2210     return done;
2211 }
2212 
2213 /**
2214  * ram_save_complete: function called to send the remaining amount of ram
2215  *
2216  * Returns zero to indicate success
2217  *
2218  * Called with iothread lock
2219  *
2220  * @f: QEMUFile where to send the data
2221  * @opaque: RAMState pointer
2222  */
2223 static int ram_save_complete(QEMUFile *f, void *opaque)
2224 {
2225     RAMState **temp = opaque;
2226     RAMState *rs = *temp;
2227 
2228     rcu_read_lock();
2229 
2230     if (!migration_in_postcopy()) {
2231         migration_bitmap_sync(rs);
2232     }
2233 
2234     ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2235 
2236     /* try transferring iterative blocks of memory */
2237 
2238     /* flush all remaining blocks regardless of rate limiting */
2239     while (true) {
2240         int pages;
2241 
2242         pages = ram_find_and_save_block(rs, !migration_in_colo_state());
2243         /* no more blocks to sent */
2244         if (pages == 0) {
2245             break;
2246         }
2247     }
2248 
2249     flush_compressed_data(rs);
2250     ram_control_after_iterate(f, RAM_CONTROL_FINISH);
2251 
2252     rcu_read_unlock();
2253 
2254     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2255 
2256     return 0;
2257 }
2258 
2259 static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2260                              uint64_t *non_postcopiable_pending,
2261                              uint64_t *postcopiable_pending)
2262 {
2263     RAMState **temp = opaque;
2264     RAMState *rs = *temp;
2265     uint64_t remaining_size;
2266 
2267     remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
2268 
2269     if (!migration_in_postcopy() &&
2270         remaining_size < max_size) {
2271         qemu_mutex_lock_iothread();
2272         rcu_read_lock();
2273         migration_bitmap_sync(rs);
2274         rcu_read_unlock();
2275         qemu_mutex_unlock_iothread();
2276         remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
2277     }
2278 
2279     if (migrate_postcopy_ram()) {
2280         /* We can do postcopy, and all the data is postcopiable */
2281         *postcopiable_pending += remaining_size;
2282     } else {
2283         *non_postcopiable_pending += remaining_size;
2284     }
2285 }
2286 
2287 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2288 {
2289     unsigned int xh_len;
2290     int xh_flags;
2291     uint8_t *loaded_data;
2292 
2293     /* extract RLE header */
2294     xh_flags = qemu_get_byte(f);
2295     xh_len = qemu_get_be16(f);
2296 
2297     if (xh_flags != ENCODING_FLAG_XBZRLE) {
2298         error_report("Failed to load XBZRLE page - wrong compression!");
2299         return -1;
2300     }
2301 
2302     if (xh_len > TARGET_PAGE_SIZE) {
2303         error_report("Failed to load XBZRLE page - len overflow!");
2304         return -1;
2305     }
2306     loaded_data = XBZRLE.decoded_buf;
2307     /* load data and decode */
2308     /* it can change loaded_data to point to an internal buffer */
2309     qemu_get_buffer_in_place(f, &loaded_data, xh_len);
2310 
2311     /* decode RLE */
2312     if (xbzrle_decode_buffer(loaded_data, xh_len, host,
2313                              TARGET_PAGE_SIZE) == -1) {
2314         error_report("Failed to load XBZRLE page - decode error!");
2315         return -1;
2316     }
2317 
2318     return 0;
2319 }
2320 
2321 /**
2322  * ram_block_from_stream: read a RAMBlock id from the migration stream
2323  *
2324  * Must be called from within a rcu critical section.
2325  *
2326  * Returns a pointer from within the RCU-protected ram_list.
2327  *
2328  * @f: QEMUFile where to read the data from
2329  * @flags: Page flags (mostly to see if it's a continuation of previous block)
2330  */
2331 static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
2332 {
2333     static RAMBlock *block = NULL;
2334     char id[256];
2335     uint8_t len;
2336 
2337     if (flags & RAM_SAVE_FLAG_CONTINUE) {
2338         if (!block) {
2339             error_report("Ack, bad migration stream!");
2340             return NULL;
2341         }
2342         return block;
2343     }
2344 
2345     len = qemu_get_byte(f);
2346     qemu_get_buffer(f, (uint8_t *)id, len);
2347     id[len] = 0;
2348 
2349     block = qemu_ram_block_by_name(id);
2350     if (!block) {
2351         error_report("Can't find block %s", id);
2352         return NULL;
2353     }
2354 
2355     return block;
2356 }
2357 
2358 static inline void *host_from_ram_block_offset(RAMBlock *block,
2359                                                ram_addr_t offset)
2360 {
2361     if (!offset_in_ramblock(block, offset)) {
2362         return NULL;
2363     }
2364 
2365     return block->host + offset;
2366 }
2367 
2368 /**
2369  * ram_handle_compressed: handle the zero page case
2370  *
2371  * If a page (or a whole RDMA chunk) has been
2372  * determined to be zero, then zap it.
2373  *
2374  * @host: host address for the zero page
2375  * @ch: what the page is filled from.  We only support zero
2376  * @size: size of the zero page
2377  */
2378 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2379 {
2380     if (ch != 0 || !is_zero_range(host, size)) {
2381         memset(host, ch, size);
2382     }
2383 }
2384 
2385 static void *do_data_decompress(void *opaque)
2386 {
2387     DecompressParam *param = opaque;
2388     unsigned long pagesize;
2389     uint8_t *des;
2390     int len;
2391 
2392     qemu_mutex_lock(&param->mutex);
2393     while (!param->quit) {
2394         if (param->des) {
2395             des = param->des;
2396             len = param->len;
2397             param->des = 0;
2398             qemu_mutex_unlock(&param->mutex);
2399 
2400             pagesize = TARGET_PAGE_SIZE;
2401             /* uncompress() will return failed in some case, especially
2402              * when the page is dirted when doing the compression, it's
2403              * not a problem because the dirty page will be retransferred
2404              * and uncompress() won't break the data in other pages.
2405              */
2406             uncompress((Bytef *)des, &pagesize,
2407                        (const Bytef *)param->compbuf, len);
2408 
2409             qemu_mutex_lock(&decomp_done_lock);
2410             param->done = true;
2411             qemu_cond_signal(&decomp_done_cond);
2412             qemu_mutex_unlock(&decomp_done_lock);
2413 
2414             qemu_mutex_lock(&param->mutex);
2415         } else {
2416             qemu_cond_wait(&param->cond, &param->mutex);
2417         }
2418     }
2419     qemu_mutex_unlock(&param->mutex);
2420 
2421     return NULL;
2422 }
2423 
2424 static void wait_for_decompress_done(void)
2425 {
2426     int idx, thread_count;
2427 
2428     if (!migrate_use_compression()) {
2429         return;
2430     }
2431 
2432     thread_count = migrate_decompress_threads();
2433     qemu_mutex_lock(&decomp_done_lock);
2434     for (idx = 0; idx < thread_count; idx++) {
2435         while (!decomp_param[idx].done) {
2436             qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2437         }
2438     }
2439     qemu_mutex_unlock(&decomp_done_lock);
2440 }
2441 
2442 static void compress_threads_load_setup(void)
2443 {
2444     int i, thread_count;
2445 
2446     if (!migrate_use_compression()) {
2447         return;
2448     }
2449     thread_count = migrate_decompress_threads();
2450     decompress_threads = g_new0(QemuThread, thread_count);
2451     decomp_param = g_new0(DecompressParam, thread_count);
2452     qemu_mutex_init(&decomp_done_lock);
2453     qemu_cond_init(&decomp_done_cond);
2454     for (i = 0; i < thread_count; i++) {
2455         qemu_mutex_init(&decomp_param[i].mutex);
2456         qemu_cond_init(&decomp_param[i].cond);
2457         decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
2458         decomp_param[i].done = true;
2459         decomp_param[i].quit = false;
2460         qemu_thread_create(decompress_threads + i, "decompress",
2461                            do_data_decompress, decomp_param + i,
2462                            QEMU_THREAD_JOINABLE);
2463     }
2464 }
2465 
2466 static void compress_threads_load_cleanup(void)
2467 {
2468     int i, thread_count;
2469 
2470     if (!migrate_use_compression()) {
2471         return;
2472     }
2473     thread_count = migrate_decompress_threads();
2474     for (i = 0; i < thread_count; i++) {
2475         qemu_mutex_lock(&decomp_param[i].mutex);
2476         decomp_param[i].quit = true;
2477         qemu_cond_signal(&decomp_param[i].cond);
2478         qemu_mutex_unlock(&decomp_param[i].mutex);
2479     }
2480     for (i = 0; i < thread_count; i++) {
2481         qemu_thread_join(decompress_threads + i);
2482         qemu_mutex_destroy(&decomp_param[i].mutex);
2483         qemu_cond_destroy(&decomp_param[i].cond);
2484         g_free(decomp_param[i].compbuf);
2485     }
2486     g_free(decompress_threads);
2487     g_free(decomp_param);
2488     decompress_threads = NULL;
2489     decomp_param = NULL;
2490 }
2491 
2492 static void decompress_data_with_multi_threads(QEMUFile *f,
2493                                                void *host, int len)
2494 {
2495     int idx, thread_count;
2496 
2497     thread_count = migrate_decompress_threads();
2498     qemu_mutex_lock(&decomp_done_lock);
2499     while (true) {
2500         for (idx = 0; idx < thread_count; idx++) {
2501             if (decomp_param[idx].done) {
2502                 decomp_param[idx].done = false;
2503                 qemu_mutex_lock(&decomp_param[idx].mutex);
2504                 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
2505                 decomp_param[idx].des = host;
2506                 decomp_param[idx].len = len;
2507                 qemu_cond_signal(&decomp_param[idx].cond);
2508                 qemu_mutex_unlock(&decomp_param[idx].mutex);
2509                 break;
2510             }
2511         }
2512         if (idx < thread_count) {
2513             break;
2514         } else {
2515             qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2516         }
2517     }
2518     qemu_mutex_unlock(&decomp_done_lock);
2519 }
2520 
2521 /**
2522  * ram_load_setup: Setup RAM for migration incoming side
2523  *
2524  * Returns zero to indicate success and negative for error
2525  *
2526  * @f: QEMUFile where to receive the data
2527  * @opaque: RAMState pointer
2528  */
2529 static int ram_load_setup(QEMUFile *f, void *opaque)
2530 {
2531     xbzrle_load_setup();
2532     compress_threads_load_setup();
2533     return 0;
2534 }
2535 
2536 static int ram_load_cleanup(void *opaque)
2537 {
2538     xbzrle_load_cleanup();
2539     compress_threads_load_cleanup();
2540     return 0;
2541 }
2542 
2543 /**
2544  * ram_postcopy_incoming_init: allocate postcopy data structures
2545  *
2546  * Returns 0 for success and negative if there was one error
2547  *
2548  * @mis: current migration incoming state
2549  *
2550  * Allocate data structures etc needed by incoming migration with
2551  * postcopy-ram. postcopy-ram's similarly names
2552  * postcopy_ram_incoming_init does the work.
2553  */
2554 int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2555 {
2556     unsigned long ram_pages = last_ram_page();
2557 
2558     return postcopy_ram_incoming_init(mis, ram_pages);
2559 }
2560 
2561 /**
2562  * ram_load_postcopy: load a page in postcopy case
2563  *
2564  * Returns 0 for success or -errno in case of error
2565  *
2566  * Called in postcopy mode by ram_load().
2567  * rcu_read_lock is taken prior to this being called.
2568  *
2569  * @f: QEMUFile where to send the data
2570  */
2571 static int ram_load_postcopy(QEMUFile *f)
2572 {
2573     int flags = 0, ret = 0;
2574     bool place_needed = false;
2575     bool matching_page_sizes = false;
2576     MigrationIncomingState *mis = migration_incoming_get_current();
2577     /* Temporary page that is later 'placed' */
2578     void *postcopy_host_page = postcopy_get_tmp_page(mis);
2579     void *last_host = NULL;
2580     bool all_zero = false;
2581 
2582     while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2583         ram_addr_t addr;
2584         void *host = NULL;
2585         void *page_buffer = NULL;
2586         void *place_source = NULL;
2587         RAMBlock *block = NULL;
2588         uint8_t ch;
2589 
2590         addr = qemu_get_be64(f);
2591         flags = addr & ~TARGET_PAGE_MASK;
2592         addr &= TARGET_PAGE_MASK;
2593 
2594         trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2595         place_needed = false;
2596         if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) {
2597             block = ram_block_from_stream(f, flags);
2598 
2599             host = host_from_ram_block_offset(block, addr);
2600             if (!host) {
2601                 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2602                 ret = -EINVAL;
2603                 break;
2604             }
2605             matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
2606             /*
2607              * Postcopy requires that we place whole host pages atomically;
2608              * these may be huge pages for RAMBlocks that are backed by
2609              * hugetlbfs.
2610              * To make it atomic, the data is read into a temporary page
2611              * that's moved into place later.
2612              * The migration protocol uses,  possibly smaller, target-pages
2613              * however the source ensures it always sends all the components
2614              * of a host page in order.
2615              */
2616             page_buffer = postcopy_host_page +
2617                           ((uintptr_t)host & (block->page_size - 1));
2618             /* If all TP are zero then we can optimise the place */
2619             if (!((uintptr_t)host & (block->page_size - 1))) {
2620                 all_zero = true;
2621             } else {
2622                 /* not the 1st TP within the HP */
2623                 if (host != (last_host + TARGET_PAGE_SIZE)) {
2624                     error_report("Non-sequential target page %p/%p",
2625                                   host, last_host);
2626                     ret = -EINVAL;
2627                     break;
2628                 }
2629             }
2630 
2631 
2632             /*
2633              * If it's the last part of a host page then we place the host
2634              * page
2635              */
2636             place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
2637                                      (block->page_size - 1)) == 0;
2638             place_source = postcopy_host_page;
2639         }
2640         last_host = host;
2641 
2642         switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2643         case RAM_SAVE_FLAG_ZERO:
2644             ch = qemu_get_byte(f);
2645             memset(page_buffer, ch, TARGET_PAGE_SIZE);
2646             if (ch) {
2647                 all_zero = false;
2648             }
2649             break;
2650 
2651         case RAM_SAVE_FLAG_PAGE:
2652             all_zero = false;
2653             if (!place_needed || !matching_page_sizes) {
2654                 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2655             } else {
2656                 /* Avoids the qemu_file copy during postcopy, which is
2657                  * going to do a copy later; can only do it when we
2658                  * do this read in one go (matching page sizes)
2659                  */
2660                 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2661                                          TARGET_PAGE_SIZE);
2662             }
2663             break;
2664         case RAM_SAVE_FLAG_EOS:
2665             /* normal exit */
2666             break;
2667         default:
2668             error_report("Unknown combination of migration flags: %#x"
2669                          " (postcopy mode)", flags);
2670             ret = -EINVAL;
2671         }
2672 
2673         if (place_needed) {
2674             /* This gets called at the last target page in the host page */
2675             void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
2676 
2677             if (all_zero) {
2678                 ret = postcopy_place_page_zero(mis, place_dest,
2679                                                block->page_size);
2680             } else {
2681                 ret = postcopy_place_page(mis, place_dest,
2682                                           place_source, block->page_size);
2683             }
2684         }
2685         if (!ret) {
2686             ret = qemu_file_get_error(f);
2687         }
2688     }
2689 
2690     return ret;
2691 }
2692 
2693 static int ram_load(QEMUFile *f, void *opaque, int version_id)
2694 {
2695     int flags = 0, ret = 0, invalid_flags = 0;
2696     static uint64_t seq_iter;
2697     int len = 0;
2698     /*
2699      * If system is running in postcopy mode, page inserts to host memory must
2700      * be atomic
2701      */
2702     bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
2703     /* ADVISE is earlier, it shows the source has the postcopy capability on */
2704     bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE;
2705 
2706     seq_iter++;
2707 
2708     if (version_id != 4) {
2709         ret = -EINVAL;
2710     }
2711 
2712     if (!migrate_use_compression()) {
2713         invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
2714     }
2715     /* This RCU critical section can be very long running.
2716      * When RCU reclaims in the code start to become numerous,
2717      * it will be necessary to reduce the granularity of this
2718      * critical section.
2719      */
2720     rcu_read_lock();
2721 
2722     if (postcopy_running) {
2723         ret = ram_load_postcopy(f);
2724     }
2725 
2726     while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2727         ram_addr_t addr, total_ram_bytes;
2728         void *host = NULL;
2729         uint8_t ch;
2730 
2731         addr = qemu_get_be64(f);
2732         flags = addr & ~TARGET_PAGE_MASK;
2733         addr &= TARGET_PAGE_MASK;
2734 
2735         if (flags & invalid_flags) {
2736             if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
2737                 error_report("Received an unexpected compressed page");
2738             }
2739 
2740             ret = -EINVAL;
2741             break;
2742         }
2743 
2744         if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
2745                      RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
2746             RAMBlock *block = ram_block_from_stream(f, flags);
2747 
2748             host = host_from_ram_block_offset(block, addr);
2749             if (!host) {
2750                 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2751                 ret = -EINVAL;
2752                 break;
2753             }
2754             trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
2755         }
2756 
2757         switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2758         case RAM_SAVE_FLAG_MEM_SIZE:
2759             /* Synchronize RAM block list */
2760             total_ram_bytes = addr;
2761             while (!ret && total_ram_bytes) {
2762                 RAMBlock *block;
2763                 char id[256];
2764                 ram_addr_t length;
2765 
2766                 len = qemu_get_byte(f);
2767                 qemu_get_buffer(f, (uint8_t *)id, len);
2768                 id[len] = 0;
2769                 length = qemu_get_be64(f);
2770 
2771                 block = qemu_ram_block_by_name(id);
2772                 if (block) {
2773                     if (length != block->used_length) {
2774                         Error *local_err = NULL;
2775 
2776                         ret = qemu_ram_resize(block, length,
2777                                               &local_err);
2778                         if (local_err) {
2779                             error_report_err(local_err);
2780                         }
2781                     }
2782                     /* For postcopy we need to check hugepage sizes match */
2783                     if (postcopy_advised &&
2784                         block->page_size != qemu_host_page_size) {
2785                         uint64_t remote_page_size = qemu_get_be64(f);
2786                         if (remote_page_size != block->page_size) {
2787                             error_report("Mismatched RAM page size %s "
2788                                          "(local) %zd != %" PRId64,
2789                                          id, block->page_size,
2790                                          remote_page_size);
2791                             ret = -EINVAL;
2792                         }
2793                     }
2794                     ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2795                                           block->idstr);
2796                 } else {
2797                     error_report("Unknown ramblock \"%s\", cannot "
2798                                  "accept migration", id);
2799                     ret = -EINVAL;
2800                 }
2801 
2802                 total_ram_bytes -= length;
2803             }
2804             break;
2805 
2806         case RAM_SAVE_FLAG_ZERO:
2807             ch = qemu_get_byte(f);
2808             ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2809             break;
2810 
2811         case RAM_SAVE_FLAG_PAGE:
2812             qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2813             break;
2814 
2815         case RAM_SAVE_FLAG_COMPRESS_PAGE:
2816             len = qemu_get_be32(f);
2817             if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2818                 error_report("Invalid compressed data length: %d", len);
2819                 ret = -EINVAL;
2820                 break;
2821             }
2822             decompress_data_with_multi_threads(f, host, len);
2823             break;
2824 
2825         case RAM_SAVE_FLAG_XBZRLE:
2826             if (load_xbzrle(f, addr, host) < 0) {
2827                 error_report("Failed to decompress XBZRLE page at "
2828                              RAM_ADDR_FMT, addr);
2829                 ret = -EINVAL;
2830                 break;
2831             }
2832             break;
2833         case RAM_SAVE_FLAG_EOS:
2834             /* normal exit */
2835             break;
2836         default:
2837             if (flags & RAM_SAVE_FLAG_HOOK) {
2838                 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
2839             } else {
2840                 error_report("Unknown combination of migration flags: %#x",
2841                              flags);
2842                 ret = -EINVAL;
2843             }
2844         }
2845         if (!ret) {
2846             ret = qemu_file_get_error(f);
2847         }
2848     }
2849 
2850     wait_for_decompress_done();
2851     rcu_read_unlock();
2852     trace_ram_load_complete(ret, seq_iter);
2853     return ret;
2854 }
2855 
2856 static bool ram_has_postcopy(void *opaque)
2857 {
2858     return migrate_postcopy_ram();
2859 }
2860 
2861 static SaveVMHandlers savevm_ram_handlers = {
2862     .save_setup = ram_save_setup,
2863     .save_live_iterate = ram_save_iterate,
2864     .save_live_complete_postcopy = ram_save_complete,
2865     .save_live_complete_precopy = ram_save_complete,
2866     .has_postcopy = ram_has_postcopy,
2867     .save_live_pending = ram_save_pending,
2868     .load_state = ram_load,
2869     .save_cleanup = ram_save_cleanup,
2870     .load_setup = ram_load_setup,
2871     .load_cleanup = ram_load_cleanup,
2872 };
2873 
2874 void ram_mig_init(void)
2875 {
2876     qemu_mutex_init(&XBZRLE.lock);
2877     register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
2878 }
2879