xref: /openbmc/qemu/migration/ram.c (revision 25b1d45a)
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  * Copyright (c) 2011-2015 Red Hat Inc
6  *
7  * Authors:
8  *  Juan Quintela <quintela@redhat.com>
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to deal
12  * in the Software without restriction, including without limitation the rights
13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  * copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included in
18  * all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  * THE SOFTWARE.
27  */
28 
29 #include "qemu/osdep.h"
30 #include "cpu.h"
31 #include <zlib.h>
32 #include "qemu/cutils.h"
33 #include "qemu/bitops.h"
34 #include "qemu/bitmap.h"
35 #include "qemu/main-loop.h"
36 #include "xbzrle.h"
37 #include "ram.h"
38 #include "migration.h"
39 #include "socket.h"
40 #include "migration/register.h"
41 #include "migration/misc.h"
42 #include "qemu-file.h"
43 #include "postcopy-ram.h"
44 #include "page_cache.h"
45 #include "qemu/error-report.h"
46 #include "qapi/error.h"
47 #include "qapi/qapi-events-migration.h"
48 #include "qapi/qmp/qerror.h"
49 #include "trace.h"
50 #include "exec/ram_addr.h"
51 #include "exec/target_page.h"
52 #include "qemu/rcu_queue.h"
53 #include "migration/colo.h"
54 #include "block.h"
55 #include "sysemu/sysemu.h"
56 #include "qemu/uuid.h"
57 #include "savevm.h"
58 
59 /***********************************************************/
60 /* ram save/restore */
61 
62 /* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
63  * worked for pages that where filled with the same char.  We switched
64  * it to only search for the zero value.  And to avoid confusion with
65  * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
66  */
67 
68 #define RAM_SAVE_FLAG_FULL     0x01 /* Obsolete, not used anymore */
69 #define RAM_SAVE_FLAG_ZERO     0x02
70 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
71 #define RAM_SAVE_FLAG_PAGE     0x08
72 #define RAM_SAVE_FLAG_EOS      0x10
73 #define RAM_SAVE_FLAG_CONTINUE 0x20
74 #define RAM_SAVE_FLAG_XBZRLE   0x40
75 /* 0x80 is reserved in migration.h start with 0x100 next */
76 #define RAM_SAVE_FLAG_COMPRESS_PAGE    0x100
77 
78 static inline bool is_zero_range(uint8_t *p, uint64_t size)
79 {
80     return buffer_is_zero(p, size);
81 }
82 
83 XBZRLECacheStats xbzrle_counters;
84 
85 /* struct contains XBZRLE cache and a static page
86    used by the compression */
87 static struct {
88     /* buffer used for XBZRLE encoding */
89     uint8_t *encoded_buf;
90     /* buffer for storing page content */
91     uint8_t *current_buf;
92     /* Cache for XBZRLE, Protected by lock. */
93     PageCache *cache;
94     QemuMutex lock;
95     /* it will store a page full of zeros */
96     uint8_t *zero_target_page;
97     /* buffer used for XBZRLE decoding */
98     uint8_t *decoded_buf;
99 } XBZRLE;
100 
101 static void XBZRLE_cache_lock(void)
102 {
103     if (migrate_use_xbzrle())
104         qemu_mutex_lock(&XBZRLE.lock);
105 }
106 
107 static void XBZRLE_cache_unlock(void)
108 {
109     if (migrate_use_xbzrle())
110         qemu_mutex_unlock(&XBZRLE.lock);
111 }
112 
113 /**
114  * xbzrle_cache_resize: resize the xbzrle cache
115  *
116  * This function is called from qmp_migrate_set_cache_size in main
117  * thread, possibly while a migration is in progress.  A running
118  * migration may be using the cache and might finish during this call,
119  * hence changes to the cache are protected by XBZRLE.lock().
120  *
121  * Returns 0 for success or -1 for error
122  *
123  * @new_size: new cache size
124  * @errp: set *errp if the check failed, with reason
125  */
126 int xbzrle_cache_resize(int64_t new_size, Error **errp)
127 {
128     PageCache *new_cache;
129     int64_t ret = 0;
130 
131     /* Check for truncation */
132     if (new_size != (size_t)new_size) {
133         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
134                    "exceeding address space");
135         return -1;
136     }
137 
138     if (new_size == migrate_xbzrle_cache_size()) {
139         /* nothing to do */
140         return 0;
141     }
142 
143     XBZRLE_cache_lock();
144 
145     if (XBZRLE.cache != NULL) {
146         new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
147         if (!new_cache) {
148             ret = -1;
149             goto out;
150         }
151 
152         cache_fini(XBZRLE.cache);
153         XBZRLE.cache = new_cache;
154     }
155 out:
156     XBZRLE_cache_unlock();
157     return ret;
158 }
159 
160 static void ramblock_recv_map_init(void)
161 {
162     RAMBlock *rb;
163 
164     RAMBLOCK_FOREACH(rb) {
165         assert(!rb->receivedmap);
166         rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
167     }
168 }
169 
170 int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
171 {
172     return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
173                     rb->receivedmap);
174 }
175 
176 bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
177 {
178     return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
179 }
180 
181 void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
182 {
183     set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
184 }
185 
186 void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
187                                     size_t nr)
188 {
189     bitmap_set_atomic(rb->receivedmap,
190                       ramblock_recv_bitmap_offset(host_addr, rb),
191                       nr);
192 }
193 
194 #define  RAMBLOCK_RECV_BITMAP_ENDING  (0x0123456789abcdefULL)
195 
196 /*
197  * Format: bitmap_size (8 bytes) + whole_bitmap (N bytes).
198  *
199  * Returns >0 if success with sent bytes, or <0 if error.
200  */
201 int64_t ramblock_recv_bitmap_send(QEMUFile *file,
202                                   const char *block_name)
203 {
204     RAMBlock *block = qemu_ram_block_by_name(block_name);
205     unsigned long *le_bitmap, nbits;
206     uint64_t size;
207 
208     if (!block) {
209         error_report("%s: invalid block name: %s", __func__, block_name);
210         return -1;
211     }
212 
213     nbits = block->used_length >> TARGET_PAGE_BITS;
214 
215     /*
216      * Make sure the tmp bitmap buffer is big enough, e.g., on 32bit
217      * machines we may need 4 more bytes for padding (see below
218      * comment). So extend it a bit before hand.
219      */
220     le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
221 
222     /*
223      * Always use little endian when sending the bitmap. This is
224      * required that when source and destination VMs are not using the
225      * same endianess. (Note: big endian won't work.)
226      */
227     bitmap_to_le(le_bitmap, block->receivedmap, nbits);
228 
229     /* Size of the bitmap, in bytes */
230     size = nbits / 8;
231 
232     /*
233      * size is always aligned to 8 bytes for 64bit machines, but it
234      * may not be true for 32bit machines. We need this padding to
235      * make sure the migration can survive even between 32bit and
236      * 64bit machines.
237      */
238     size = ROUND_UP(size, 8);
239 
240     qemu_put_be64(file, size);
241     qemu_put_buffer(file, (const uint8_t *)le_bitmap, size);
242     /*
243      * Mark as an end, in case the middle part is screwed up due to
244      * some "misterious" reason.
245      */
246     qemu_put_be64(file, RAMBLOCK_RECV_BITMAP_ENDING);
247     qemu_fflush(file);
248 
249     free(le_bitmap);
250 
251     if (qemu_file_get_error(file)) {
252         return qemu_file_get_error(file);
253     }
254 
255     return size + sizeof(size);
256 }
257 
258 /*
259  * An outstanding page request, on the source, having been received
260  * and queued
261  */
262 struct RAMSrcPageRequest {
263     RAMBlock *rb;
264     hwaddr    offset;
265     hwaddr    len;
266 
267     QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
268 };
269 
270 /* State of RAM for migration */
271 struct RAMState {
272     /* QEMUFile used for this migration */
273     QEMUFile *f;
274     /* Last block that we have visited searching for dirty pages */
275     RAMBlock *last_seen_block;
276     /* Last block from where we have sent data */
277     RAMBlock *last_sent_block;
278     /* Last dirty target page we have sent */
279     ram_addr_t last_page;
280     /* last ram version we have seen */
281     uint32_t last_version;
282     /* We are in the first round */
283     bool ram_bulk_stage;
284     /* How many times we have dirty too many pages */
285     int dirty_rate_high_cnt;
286     /* these variables are used for bitmap sync */
287     /* last time we did a full bitmap_sync */
288     int64_t time_last_bitmap_sync;
289     /* bytes transferred at start_time */
290     uint64_t bytes_xfer_prev;
291     /* number of dirty pages since start_time */
292     uint64_t num_dirty_pages_period;
293     /* xbzrle misses since the beginning of the period */
294     uint64_t xbzrle_cache_miss_prev;
295     /* number of iterations at the beginning of period */
296     uint64_t iterations_prev;
297     /* Iterations since start */
298     uint64_t iterations;
299     /* number of dirty bits in the bitmap */
300     uint64_t migration_dirty_pages;
301     /* protects modification of the bitmap */
302     QemuMutex bitmap_mutex;
303     /* The RAMBlock used in the last src_page_requests */
304     RAMBlock *last_req_rb;
305     /* Queue of outstanding page requests from the destination */
306     QemuMutex src_page_req_mutex;
307     QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests;
308 };
309 typedef struct RAMState RAMState;
310 
311 static RAMState *ram_state;
312 
313 uint64_t ram_bytes_remaining(void)
314 {
315     return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
316                        0;
317 }
318 
319 MigrationStats ram_counters;
320 
321 /* used by the search for pages to send */
322 struct PageSearchStatus {
323     /* Current block being searched */
324     RAMBlock    *block;
325     /* Current page to search from */
326     unsigned long page;
327     /* Set once we wrap around */
328     bool         complete_round;
329 };
330 typedef struct PageSearchStatus PageSearchStatus;
331 
332 struct CompressParam {
333     bool done;
334     bool quit;
335     QEMUFile *file;
336     QemuMutex mutex;
337     QemuCond cond;
338     RAMBlock *block;
339     ram_addr_t offset;
340 
341     /* internally used fields */
342     z_stream stream;
343     uint8_t *originbuf;
344 };
345 typedef struct CompressParam CompressParam;
346 
347 struct DecompressParam {
348     bool done;
349     bool quit;
350     QemuMutex mutex;
351     QemuCond cond;
352     void *des;
353     uint8_t *compbuf;
354     int len;
355     z_stream stream;
356 };
357 typedef struct DecompressParam DecompressParam;
358 
359 static CompressParam *comp_param;
360 static QemuThread *compress_threads;
361 /* comp_done_cond is used to wake up the migration thread when
362  * one of the compression threads has finished the compression.
363  * comp_done_lock is used to co-work with comp_done_cond.
364  */
365 static QemuMutex comp_done_lock;
366 static QemuCond comp_done_cond;
367 /* The empty QEMUFileOps will be used by file in CompressParam */
368 static const QEMUFileOps empty_ops = { };
369 
370 static QEMUFile *decomp_file;
371 static DecompressParam *decomp_param;
372 static QemuThread *decompress_threads;
373 static QemuMutex decomp_done_lock;
374 static QemuCond decomp_done_cond;
375 
376 static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
377                                 ram_addr_t offset, uint8_t *source_buf);
378 
379 static void *do_data_compress(void *opaque)
380 {
381     CompressParam *param = opaque;
382     RAMBlock *block;
383     ram_addr_t offset;
384 
385     qemu_mutex_lock(&param->mutex);
386     while (!param->quit) {
387         if (param->block) {
388             block = param->block;
389             offset = param->offset;
390             param->block = NULL;
391             qemu_mutex_unlock(&param->mutex);
392 
393             do_compress_ram_page(param->file, &param->stream, block, offset,
394                                  param->originbuf);
395 
396             qemu_mutex_lock(&comp_done_lock);
397             param->done = true;
398             qemu_cond_signal(&comp_done_cond);
399             qemu_mutex_unlock(&comp_done_lock);
400 
401             qemu_mutex_lock(&param->mutex);
402         } else {
403             qemu_cond_wait(&param->cond, &param->mutex);
404         }
405     }
406     qemu_mutex_unlock(&param->mutex);
407 
408     return NULL;
409 }
410 
411 static inline void terminate_compression_threads(void)
412 {
413     int idx, thread_count;
414 
415     thread_count = migrate_compress_threads();
416 
417     for (idx = 0; idx < thread_count; idx++) {
418         qemu_mutex_lock(&comp_param[idx].mutex);
419         comp_param[idx].quit = true;
420         qemu_cond_signal(&comp_param[idx].cond);
421         qemu_mutex_unlock(&comp_param[idx].mutex);
422     }
423 }
424 
425 static void compress_threads_save_cleanup(void)
426 {
427     int i, thread_count;
428 
429     if (!migrate_use_compression()) {
430         return;
431     }
432     terminate_compression_threads();
433     thread_count = migrate_compress_threads();
434     for (i = 0; i < thread_count; i++) {
435         /*
436          * we use it as a indicator which shows if the thread is
437          * properly init'd or not
438          */
439         if (!comp_param[i].file) {
440             break;
441         }
442         qemu_thread_join(compress_threads + i);
443         qemu_mutex_destroy(&comp_param[i].mutex);
444         qemu_cond_destroy(&comp_param[i].cond);
445         deflateEnd(&comp_param[i].stream);
446         g_free(comp_param[i].originbuf);
447         qemu_fclose(comp_param[i].file);
448         comp_param[i].file = NULL;
449     }
450     qemu_mutex_destroy(&comp_done_lock);
451     qemu_cond_destroy(&comp_done_cond);
452     g_free(compress_threads);
453     g_free(comp_param);
454     compress_threads = NULL;
455     comp_param = NULL;
456 }
457 
458 static int compress_threads_save_setup(void)
459 {
460     int i, thread_count;
461 
462     if (!migrate_use_compression()) {
463         return 0;
464     }
465     thread_count = migrate_compress_threads();
466     compress_threads = g_new0(QemuThread, thread_count);
467     comp_param = g_new0(CompressParam, thread_count);
468     qemu_cond_init(&comp_done_cond);
469     qemu_mutex_init(&comp_done_lock);
470     for (i = 0; i < thread_count; i++) {
471         comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
472         if (!comp_param[i].originbuf) {
473             goto exit;
474         }
475 
476         if (deflateInit(&comp_param[i].stream,
477                         migrate_compress_level()) != Z_OK) {
478             g_free(comp_param[i].originbuf);
479             goto exit;
480         }
481 
482         /* comp_param[i].file is just used as a dummy buffer to save data,
483          * set its ops to empty.
484          */
485         comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
486         comp_param[i].done = true;
487         comp_param[i].quit = false;
488         qemu_mutex_init(&comp_param[i].mutex);
489         qemu_cond_init(&comp_param[i].cond);
490         qemu_thread_create(compress_threads + i, "compress",
491                            do_data_compress, comp_param + i,
492                            QEMU_THREAD_JOINABLE);
493     }
494     return 0;
495 
496 exit:
497     compress_threads_save_cleanup();
498     return -1;
499 }
500 
501 /* Multiple fd's */
502 
503 #define MULTIFD_MAGIC 0x11223344U
504 #define MULTIFD_VERSION 1
505 
506 typedef struct {
507     uint32_t magic;
508     uint32_t version;
509     unsigned char uuid[16]; /* QemuUUID */
510     uint8_t id;
511 } __attribute__((packed)) MultiFDInit_t;
512 
513 typedef struct {
514     /* this fields are not changed once the thread is created */
515     /* channel number */
516     uint8_t id;
517     /* channel thread name */
518     char *name;
519     /* channel thread id */
520     QemuThread thread;
521     /* communication channel */
522     QIOChannel *c;
523     /* sem where to wait for more work */
524     QemuSemaphore sem;
525     /* this mutex protects the following parameters */
526     QemuMutex mutex;
527     /* is this channel thread running */
528     bool running;
529     /* should this thread finish */
530     bool quit;
531 }  MultiFDSendParams;
532 
533 typedef struct {
534     /* this fields are not changed once the thread is created */
535     /* channel number */
536     uint8_t id;
537     /* channel thread name */
538     char *name;
539     /* channel thread id */
540     QemuThread thread;
541     /* communication channel */
542     QIOChannel *c;
543     /* sem where to wait for more work */
544     QemuSemaphore sem;
545     /* this mutex protects the following parameters */
546     QemuMutex mutex;
547     /* is this channel thread running */
548     bool running;
549     /* should this thread finish */
550     bool quit;
551 } MultiFDRecvParams;
552 
553 static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp)
554 {
555     MultiFDInit_t msg;
556     int ret;
557 
558     msg.magic = cpu_to_be32(MULTIFD_MAGIC);
559     msg.version = cpu_to_be32(MULTIFD_VERSION);
560     msg.id = p->id;
561     memcpy(msg.uuid, &qemu_uuid.data, sizeof(msg.uuid));
562 
563     ret = qio_channel_write_all(p->c, (char *)&msg, sizeof(msg), errp);
564     if (ret != 0) {
565         return -1;
566     }
567     return 0;
568 }
569 
570 static int multifd_recv_initial_packet(QIOChannel *c, Error **errp)
571 {
572     MultiFDInit_t msg;
573     int ret;
574 
575     ret = qio_channel_read_all(c, (char *)&msg, sizeof(msg), errp);
576     if (ret != 0) {
577         return -1;
578     }
579 
580     be32_to_cpus(&msg.magic);
581     be32_to_cpus(&msg.version);
582 
583     if (msg.magic != MULTIFD_MAGIC) {
584         error_setg(errp, "multifd: received packet magic %x "
585                    "expected %x", msg.magic, MULTIFD_MAGIC);
586         return -1;
587     }
588 
589     if (msg.version != MULTIFD_VERSION) {
590         error_setg(errp, "multifd: received packet version %d "
591                    "expected %d", msg.version, MULTIFD_VERSION);
592         return -1;
593     }
594 
595     if (memcmp(msg.uuid, &qemu_uuid, sizeof(qemu_uuid))) {
596         char *uuid = qemu_uuid_unparse_strdup(&qemu_uuid);
597         char *msg_uuid = qemu_uuid_unparse_strdup((const QemuUUID *)msg.uuid);
598 
599         error_setg(errp, "multifd: received uuid '%s' and expected "
600                    "uuid '%s' for channel %hhd", msg_uuid, uuid, msg.id);
601         g_free(uuid);
602         g_free(msg_uuid);
603         return -1;
604     }
605 
606     if (msg.id > migrate_multifd_channels()) {
607         error_setg(errp, "multifd: received channel version %d "
608                    "expected %d", msg.version, MULTIFD_VERSION);
609         return -1;
610     }
611 
612     return msg.id;
613 }
614 
615 struct {
616     MultiFDSendParams *params;
617     /* number of created threads */
618     int count;
619 } *multifd_send_state;
620 
621 static void multifd_send_terminate_threads(Error *err)
622 {
623     int i;
624 
625     if (err) {
626         MigrationState *s = migrate_get_current();
627         migrate_set_error(s, err);
628         if (s->state == MIGRATION_STATUS_SETUP ||
629             s->state == MIGRATION_STATUS_PRE_SWITCHOVER ||
630             s->state == MIGRATION_STATUS_DEVICE ||
631             s->state == MIGRATION_STATUS_ACTIVE) {
632             migrate_set_state(&s->state, s->state,
633                               MIGRATION_STATUS_FAILED);
634         }
635     }
636 
637     for (i = 0; i < migrate_multifd_channels(); i++) {
638         MultiFDSendParams *p = &multifd_send_state->params[i];
639 
640         qemu_mutex_lock(&p->mutex);
641         p->quit = true;
642         qemu_sem_post(&p->sem);
643         qemu_mutex_unlock(&p->mutex);
644     }
645 }
646 
647 int multifd_save_cleanup(Error **errp)
648 {
649     int i;
650     int ret = 0;
651 
652     if (!migrate_use_multifd()) {
653         return 0;
654     }
655     multifd_send_terminate_threads(NULL);
656     for (i = 0; i < migrate_multifd_channels(); i++) {
657         MultiFDSendParams *p = &multifd_send_state->params[i];
658 
659         if (p->running) {
660             qemu_thread_join(&p->thread);
661         }
662         socket_send_channel_destroy(p->c);
663         p->c = NULL;
664         qemu_mutex_destroy(&p->mutex);
665         qemu_sem_destroy(&p->sem);
666         g_free(p->name);
667         p->name = NULL;
668     }
669     g_free(multifd_send_state->params);
670     multifd_send_state->params = NULL;
671     g_free(multifd_send_state);
672     multifd_send_state = NULL;
673     return ret;
674 }
675 
676 static void *multifd_send_thread(void *opaque)
677 {
678     MultiFDSendParams *p = opaque;
679     Error *local_err = NULL;
680 
681     if (multifd_send_initial_packet(p, &local_err) < 0) {
682         goto out;
683     }
684 
685     while (true) {
686         qemu_mutex_lock(&p->mutex);
687         if (p->quit) {
688             qemu_mutex_unlock(&p->mutex);
689             break;
690         }
691         qemu_mutex_unlock(&p->mutex);
692         qemu_sem_wait(&p->sem);
693     }
694 
695 out:
696     if (local_err) {
697         multifd_send_terminate_threads(local_err);
698     }
699 
700     qemu_mutex_lock(&p->mutex);
701     p->running = false;
702     qemu_mutex_unlock(&p->mutex);
703 
704     return NULL;
705 }
706 
707 static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
708 {
709     MultiFDSendParams *p = opaque;
710     QIOChannel *sioc = QIO_CHANNEL(qio_task_get_source(task));
711     Error *local_err = NULL;
712 
713     if (qio_task_propagate_error(task, &local_err)) {
714         if (multifd_save_cleanup(&local_err) != 0) {
715             migrate_set_error(migrate_get_current(), local_err);
716         }
717     } else {
718         p->c = QIO_CHANNEL(sioc);
719         qio_channel_set_delay(p->c, false);
720         p->running = true;
721         qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
722                            QEMU_THREAD_JOINABLE);
723 
724         atomic_inc(&multifd_send_state->count);
725     }
726 }
727 
728 int multifd_save_setup(void)
729 {
730     int thread_count;
731     uint8_t i;
732 
733     if (!migrate_use_multifd()) {
734         return 0;
735     }
736     thread_count = migrate_multifd_channels();
737     multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
738     multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
739     atomic_set(&multifd_send_state->count, 0);
740     for (i = 0; i < thread_count; i++) {
741         MultiFDSendParams *p = &multifd_send_state->params[i];
742 
743         qemu_mutex_init(&p->mutex);
744         qemu_sem_init(&p->sem, 0);
745         p->quit = false;
746         p->id = i;
747         p->name = g_strdup_printf("multifdsend_%d", i);
748         socket_send_channel_create(multifd_new_send_channel_async, p);
749     }
750     return 0;
751 }
752 
753 struct {
754     MultiFDRecvParams *params;
755     /* number of created threads */
756     int count;
757 } *multifd_recv_state;
758 
759 static void multifd_recv_terminate_threads(Error *err)
760 {
761     int i;
762 
763     if (err) {
764         MigrationState *s = migrate_get_current();
765         migrate_set_error(s, err);
766         if (s->state == MIGRATION_STATUS_SETUP ||
767             s->state == MIGRATION_STATUS_ACTIVE) {
768             migrate_set_state(&s->state, s->state,
769                               MIGRATION_STATUS_FAILED);
770         }
771     }
772 
773     for (i = 0; i < migrate_multifd_channels(); i++) {
774         MultiFDRecvParams *p = &multifd_recv_state->params[i];
775 
776         qemu_mutex_lock(&p->mutex);
777         p->quit = true;
778         qemu_sem_post(&p->sem);
779         qemu_mutex_unlock(&p->mutex);
780     }
781 }
782 
783 int multifd_load_cleanup(Error **errp)
784 {
785     int i;
786     int ret = 0;
787 
788     if (!migrate_use_multifd()) {
789         return 0;
790     }
791     multifd_recv_terminate_threads(NULL);
792     for (i = 0; i < migrate_multifd_channels(); i++) {
793         MultiFDRecvParams *p = &multifd_recv_state->params[i];
794 
795         if (p->running) {
796             qemu_thread_join(&p->thread);
797         }
798         object_unref(OBJECT(p->c));
799         p->c = NULL;
800         qemu_mutex_destroy(&p->mutex);
801         qemu_sem_destroy(&p->sem);
802         g_free(p->name);
803         p->name = NULL;
804     }
805     g_free(multifd_recv_state->params);
806     multifd_recv_state->params = NULL;
807     g_free(multifd_recv_state);
808     multifd_recv_state = NULL;
809 
810     return ret;
811 }
812 
813 static void *multifd_recv_thread(void *opaque)
814 {
815     MultiFDRecvParams *p = opaque;
816 
817     while (true) {
818         qemu_mutex_lock(&p->mutex);
819         if (p->quit) {
820             qemu_mutex_unlock(&p->mutex);
821             break;
822         }
823         qemu_mutex_unlock(&p->mutex);
824         qemu_sem_wait(&p->sem);
825     }
826 
827     qemu_mutex_lock(&p->mutex);
828     p->running = false;
829     qemu_mutex_unlock(&p->mutex);
830 
831     return NULL;
832 }
833 
834 int multifd_load_setup(void)
835 {
836     int thread_count;
837     uint8_t i;
838 
839     if (!migrate_use_multifd()) {
840         return 0;
841     }
842     thread_count = migrate_multifd_channels();
843     multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
844     multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
845     atomic_set(&multifd_recv_state->count, 0);
846     for (i = 0; i < thread_count; i++) {
847         MultiFDRecvParams *p = &multifd_recv_state->params[i];
848 
849         qemu_mutex_init(&p->mutex);
850         qemu_sem_init(&p->sem, 0);
851         p->quit = false;
852         p->id = i;
853         p->name = g_strdup_printf("multifdrecv_%d", i);
854     }
855     return 0;
856 }
857 
858 bool multifd_recv_all_channels_created(void)
859 {
860     int thread_count = migrate_multifd_channels();
861 
862     if (!migrate_use_multifd()) {
863         return true;
864     }
865 
866     return thread_count == atomic_read(&multifd_recv_state->count);
867 }
868 
869 void multifd_recv_new_channel(QIOChannel *ioc)
870 {
871     MultiFDRecvParams *p;
872     Error *local_err = NULL;
873     int id;
874 
875     id = multifd_recv_initial_packet(ioc, &local_err);
876     if (id < 0) {
877         multifd_recv_terminate_threads(local_err);
878         return;
879     }
880 
881     p = &multifd_recv_state->params[id];
882     if (p->c != NULL) {
883         error_setg(&local_err, "multifd: received id '%d' already setup'",
884                    id);
885         multifd_recv_terminate_threads(local_err);
886         return;
887     }
888     p->c = ioc;
889     object_ref(OBJECT(ioc));
890 
891     p->running = true;
892     qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
893                        QEMU_THREAD_JOINABLE);
894     atomic_inc(&multifd_recv_state->count);
895     if (multifd_recv_state->count == migrate_multifd_channels()) {
896         migration_incoming_process();
897     }
898 }
899 
900 /**
901  * save_page_header: write page header to wire
902  *
903  * If this is the 1st block, it also writes the block identification
904  *
905  * Returns the number of bytes written
906  *
907  * @f: QEMUFile where to send the data
908  * @block: block that contains the page we want to send
909  * @offset: offset inside the block for the page
910  *          in the lower bits, it contains flags
911  */
912 static size_t save_page_header(RAMState *rs, QEMUFile *f,  RAMBlock *block,
913                                ram_addr_t offset)
914 {
915     size_t size, len;
916 
917     if (block == rs->last_sent_block) {
918         offset |= RAM_SAVE_FLAG_CONTINUE;
919     }
920     qemu_put_be64(f, offset);
921     size = 8;
922 
923     if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
924         len = strlen(block->idstr);
925         qemu_put_byte(f, len);
926         qemu_put_buffer(f, (uint8_t *)block->idstr, len);
927         size += 1 + len;
928         rs->last_sent_block = block;
929     }
930     return size;
931 }
932 
933 /**
934  * mig_throttle_guest_down: throotle down the guest
935  *
936  * Reduce amount of guest cpu execution to hopefully slow down memory
937  * writes. If guest dirty memory rate is reduced below the rate at
938  * which we can transfer pages to the destination then we should be
939  * able to complete migration. Some workloads dirty memory way too
940  * fast and will not effectively converge, even with auto-converge.
941  */
942 static void mig_throttle_guest_down(void)
943 {
944     MigrationState *s = migrate_get_current();
945     uint64_t pct_initial = s->parameters.cpu_throttle_initial;
946     uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
947 
948     /* We have not started throttling yet. Let's start it. */
949     if (!cpu_throttle_active()) {
950         cpu_throttle_set(pct_initial);
951     } else {
952         /* Throttling already on, just increase the rate */
953         cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
954     }
955 }
956 
957 /**
958  * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
959  *
960  * @rs: current RAM state
961  * @current_addr: address for the zero page
962  *
963  * Update the xbzrle cache to reflect a page that's been sent as all 0.
964  * The important thing is that a stale (not-yet-0'd) page be replaced
965  * by the new data.
966  * As a bonus, if the page wasn't in the cache it gets added so that
967  * when a small write is made into the 0'd page it gets XBZRLE sent.
968  */
969 static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
970 {
971     if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
972         return;
973     }
974 
975     /* We don't care if this fails to allocate a new cache page
976      * as long as it updated an old one */
977     cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
978                  ram_counters.dirty_sync_count);
979 }
980 
981 #define ENCODING_FLAG_XBZRLE 0x1
982 
983 /**
984  * save_xbzrle_page: compress and send current page
985  *
986  * Returns: 1 means that we wrote the page
987  *          0 means that page is identical to the one already sent
988  *          -1 means that xbzrle would be longer than normal
989  *
990  * @rs: current RAM state
991  * @current_data: pointer to the address of the page contents
992  * @current_addr: addr of the page
993  * @block: block that contains the page we want to send
994  * @offset: offset inside the block for the page
995  * @last_stage: if we are at the completion stage
996  */
997 static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
998                             ram_addr_t current_addr, RAMBlock *block,
999                             ram_addr_t offset, bool last_stage)
1000 {
1001     int encoded_len = 0, bytes_xbzrle;
1002     uint8_t *prev_cached_page;
1003 
1004     if (!cache_is_cached(XBZRLE.cache, current_addr,
1005                          ram_counters.dirty_sync_count)) {
1006         xbzrle_counters.cache_miss++;
1007         if (!last_stage) {
1008             if (cache_insert(XBZRLE.cache, current_addr, *current_data,
1009                              ram_counters.dirty_sync_count) == -1) {
1010                 return -1;
1011             } else {
1012                 /* update *current_data when the page has been
1013                    inserted into cache */
1014                 *current_data = get_cached_data(XBZRLE.cache, current_addr);
1015             }
1016         }
1017         return -1;
1018     }
1019 
1020     prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
1021 
1022     /* save current buffer into memory */
1023     memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
1024 
1025     /* XBZRLE encoding (if there is no overflow) */
1026     encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
1027                                        TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
1028                                        TARGET_PAGE_SIZE);
1029     if (encoded_len == 0) {
1030         trace_save_xbzrle_page_skipping();
1031         return 0;
1032     } else if (encoded_len == -1) {
1033         trace_save_xbzrle_page_overflow();
1034         xbzrle_counters.overflow++;
1035         /* update data in the cache */
1036         if (!last_stage) {
1037             memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
1038             *current_data = prev_cached_page;
1039         }
1040         return -1;
1041     }
1042 
1043     /* we need to update the data in the cache, in order to get the same data */
1044     if (!last_stage) {
1045         memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
1046     }
1047 
1048     /* Send XBZRLE based compressed page */
1049     bytes_xbzrle = save_page_header(rs, rs->f, block,
1050                                     offset | RAM_SAVE_FLAG_XBZRLE);
1051     qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
1052     qemu_put_be16(rs->f, encoded_len);
1053     qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
1054     bytes_xbzrle += encoded_len + 1 + 2;
1055     xbzrle_counters.pages++;
1056     xbzrle_counters.bytes += bytes_xbzrle;
1057     ram_counters.transferred += bytes_xbzrle;
1058 
1059     return 1;
1060 }
1061 
1062 /**
1063  * migration_bitmap_find_dirty: find the next dirty page from start
1064  *
1065  * Called with rcu_read_lock() to protect migration_bitmap
1066  *
1067  * Returns the byte offset within memory region of the start of a dirty page
1068  *
1069  * @rs: current RAM state
1070  * @rb: RAMBlock where to search for dirty pages
1071  * @start: page where we start the search
1072  */
1073 static inline
1074 unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
1075                                           unsigned long start)
1076 {
1077     unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
1078     unsigned long *bitmap = rb->bmap;
1079     unsigned long next;
1080 
1081     if (rs->ram_bulk_stage && start > 0) {
1082         next = start + 1;
1083     } else {
1084         next = find_next_bit(bitmap, size, start);
1085     }
1086 
1087     return next;
1088 }
1089 
1090 static inline bool migration_bitmap_clear_dirty(RAMState *rs,
1091                                                 RAMBlock *rb,
1092                                                 unsigned long page)
1093 {
1094     bool ret;
1095 
1096     ret = test_and_clear_bit(page, rb->bmap);
1097 
1098     if (ret) {
1099         rs->migration_dirty_pages--;
1100     }
1101     return ret;
1102 }
1103 
1104 static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb,
1105                                         ram_addr_t start, ram_addr_t length)
1106 {
1107     rs->migration_dirty_pages +=
1108         cpu_physical_memory_sync_dirty_bitmap(rb, start, length,
1109                                               &rs->num_dirty_pages_period);
1110 }
1111 
1112 /**
1113  * ram_pagesize_summary: calculate all the pagesizes of a VM
1114  *
1115  * Returns a summary bitmap of the page sizes of all RAMBlocks
1116  *
1117  * For VMs with just normal pages this is equivalent to the host page
1118  * size. If it's got some huge pages then it's the OR of all the
1119  * different page sizes.
1120  */
1121 uint64_t ram_pagesize_summary(void)
1122 {
1123     RAMBlock *block;
1124     uint64_t summary = 0;
1125 
1126     RAMBLOCK_FOREACH(block) {
1127         summary |= block->page_size;
1128     }
1129 
1130     return summary;
1131 }
1132 
1133 static void migration_bitmap_sync(RAMState *rs)
1134 {
1135     RAMBlock *block;
1136     int64_t end_time;
1137     uint64_t bytes_xfer_now;
1138 
1139     ram_counters.dirty_sync_count++;
1140 
1141     if (!rs->time_last_bitmap_sync) {
1142         rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1143     }
1144 
1145     trace_migration_bitmap_sync_start();
1146     memory_global_dirty_log_sync();
1147 
1148     qemu_mutex_lock(&rs->bitmap_mutex);
1149     rcu_read_lock();
1150     RAMBLOCK_FOREACH(block) {
1151         migration_bitmap_sync_range(rs, block, 0, block->used_length);
1152     }
1153     rcu_read_unlock();
1154     qemu_mutex_unlock(&rs->bitmap_mutex);
1155 
1156     trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
1157 
1158     end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1159 
1160     /* more than 1 second = 1000 millisecons */
1161     if (end_time > rs->time_last_bitmap_sync + 1000) {
1162         /* calculate period counters */
1163         ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
1164             / (end_time - rs->time_last_bitmap_sync);
1165         bytes_xfer_now = ram_counters.transferred;
1166 
1167         /* During block migration the auto-converge logic incorrectly detects
1168          * that ram migration makes no progress. Avoid this by disabling the
1169          * throttling logic during the bulk phase of block migration. */
1170         if (migrate_auto_converge() && !blk_mig_bulk_active()) {
1171             /* The following detection logic can be refined later. For now:
1172                Check to see if the dirtied bytes is 50% more than the approx.
1173                amount of bytes that just got transferred since the last time we
1174                were in this routine. If that happens twice, start or increase
1175                throttling */
1176 
1177             if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
1178                    (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
1179                 (++rs->dirty_rate_high_cnt >= 2)) {
1180                     trace_migration_throttle();
1181                     rs->dirty_rate_high_cnt = 0;
1182                     mig_throttle_guest_down();
1183             }
1184         }
1185 
1186         if (migrate_use_xbzrle()) {
1187             if (rs->iterations_prev != rs->iterations) {
1188                 xbzrle_counters.cache_miss_rate =
1189                    (double)(xbzrle_counters.cache_miss -
1190                             rs->xbzrle_cache_miss_prev) /
1191                    (rs->iterations - rs->iterations_prev);
1192             }
1193             rs->iterations_prev = rs->iterations;
1194             rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
1195         }
1196 
1197         /* reset period counters */
1198         rs->time_last_bitmap_sync = end_time;
1199         rs->num_dirty_pages_period = 0;
1200         rs->bytes_xfer_prev = bytes_xfer_now;
1201     }
1202     if (migrate_use_events()) {
1203         qapi_event_send_migration_pass(ram_counters.dirty_sync_count, NULL);
1204     }
1205 }
1206 
1207 /**
1208  * save_zero_page: send the zero page to the stream
1209  *
1210  * Returns the number of pages written.
1211  *
1212  * @rs: current RAM state
1213  * @block: block that contains the page we want to send
1214  * @offset: offset inside the block for the page
1215  */
1216 static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
1217 {
1218     uint8_t *p = block->host + offset;
1219     int pages = -1;
1220 
1221     if (is_zero_range(p, TARGET_PAGE_SIZE)) {
1222         ram_counters.duplicate++;
1223         ram_counters.transferred +=
1224             save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_ZERO);
1225         qemu_put_byte(rs->f, 0);
1226         ram_counters.transferred += 1;
1227         pages = 1;
1228     }
1229 
1230     return pages;
1231 }
1232 
1233 static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
1234 {
1235     if (!migrate_release_ram() || !migration_in_postcopy()) {
1236         return;
1237     }
1238 
1239     ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
1240 }
1241 
1242 /*
1243  * @pages: the number of pages written by the control path,
1244  *        < 0 - error
1245  *        > 0 - number of pages written
1246  *
1247  * Return true if the pages has been saved, otherwise false is returned.
1248  */
1249 static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1250                               int *pages)
1251 {
1252     uint64_t bytes_xmit = 0;
1253     int ret;
1254 
1255     *pages = -1;
1256     ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE,
1257                                 &bytes_xmit);
1258     if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
1259         return false;
1260     }
1261 
1262     if (bytes_xmit) {
1263         ram_counters.transferred += bytes_xmit;
1264         *pages = 1;
1265     }
1266 
1267     if (ret == RAM_SAVE_CONTROL_DELAYED) {
1268         return true;
1269     }
1270 
1271     if (bytes_xmit > 0) {
1272         ram_counters.normal++;
1273     } else if (bytes_xmit == 0) {
1274         ram_counters.duplicate++;
1275     }
1276 
1277     return true;
1278 }
1279 
1280 /*
1281  * directly send the page to the stream
1282  *
1283  * Returns the number of pages written.
1284  *
1285  * @rs: current RAM state
1286  * @block: block that contains the page we want to send
1287  * @offset: offset inside the block for the page
1288  * @buf: the page to be sent
1289  * @async: send to page asyncly
1290  */
1291 static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1292                             uint8_t *buf, bool async)
1293 {
1294     ram_counters.transferred += save_page_header(rs, rs->f, block,
1295                                                  offset | RAM_SAVE_FLAG_PAGE);
1296     if (async) {
1297         qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
1298                               migrate_release_ram() &
1299                               migration_in_postcopy());
1300     } else {
1301         qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
1302     }
1303     ram_counters.transferred += TARGET_PAGE_SIZE;
1304     ram_counters.normal++;
1305     return 1;
1306 }
1307 
1308 /**
1309  * ram_save_page: send the given page to the stream
1310  *
1311  * Returns the number of pages written.
1312  *          < 0 - error
1313  *          >=0 - Number of pages written - this might legally be 0
1314  *                if xbzrle noticed the page was the same.
1315  *
1316  * @rs: current RAM state
1317  * @block: block that contains the page we want to send
1318  * @offset: offset inside the block for the page
1319  * @last_stage: if we are at the completion stage
1320  */
1321 static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
1322 {
1323     int pages = -1;
1324     uint8_t *p;
1325     bool send_async = true;
1326     RAMBlock *block = pss->block;
1327     ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
1328     ram_addr_t current_addr = block->offset + offset;
1329 
1330     p = block->host + offset;
1331     trace_ram_save_page(block->idstr, (uint64_t)offset, p);
1332 
1333     XBZRLE_cache_lock();
1334     if (!rs->ram_bulk_stage && !migration_in_postcopy() &&
1335         migrate_use_xbzrle()) {
1336         pages = save_xbzrle_page(rs, &p, current_addr, block,
1337                                  offset, last_stage);
1338         if (!last_stage) {
1339             /* Can't send this cached data async, since the cache page
1340              * might get updated before it gets to the wire
1341              */
1342             send_async = false;
1343         }
1344     }
1345 
1346     /* XBZRLE overflow or normal page */
1347     if (pages == -1) {
1348         pages = save_normal_page(rs, block, offset, p, send_async);
1349     }
1350 
1351     XBZRLE_cache_unlock();
1352 
1353     return pages;
1354 }
1355 
1356 static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
1357                                 ram_addr_t offset, uint8_t *source_buf)
1358 {
1359     RAMState *rs = ram_state;
1360     int bytes_sent, blen;
1361     uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
1362 
1363     bytes_sent = save_page_header(rs, f, block, offset |
1364                                   RAM_SAVE_FLAG_COMPRESS_PAGE);
1365 
1366     /*
1367      * copy it to a internal buffer to avoid it being modified by VM
1368      * so that we can catch up the error during compression and
1369      * decompression
1370      */
1371     memcpy(source_buf, p, TARGET_PAGE_SIZE);
1372     blen = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
1373     if (blen < 0) {
1374         bytes_sent = 0;
1375         qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
1376         error_report("compressed data failed!");
1377     } else {
1378         bytes_sent += blen;
1379         ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
1380     }
1381 
1382     return bytes_sent;
1383 }
1384 
1385 static void flush_compressed_data(RAMState *rs)
1386 {
1387     int idx, len, thread_count;
1388 
1389     if (!migrate_use_compression()) {
1390         return;
1391     }
1392     thread_count = migrate_compress_threads();
1393 
1394     qemu_mutex_lock(&comp_done_lock);
1395     for (idx = 0; idx < thread_count; idx++) {
1396         while (!comp_param[idx].done) {
1397             qemu_cond_wait(&comp_done_cond, &comp_done_lock);
1398         }
1399     }
1400     qemu_mutex_unlock(&comp_done_lock);
1401 
1402     for (idx = 0; idx < thread_count; idx++) {
1403         qemu_mutex_lock(&comp_param[idx].mutex);
1404         if (!comp_param[idx].quit) {
1405             len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
1406             ram_counters.transferred += len;
1407         }
1408         qemu_mutex_unlock(&comp_param[idx].mutex);
1409     }
1410 }
1411 
1412 static inline void set_compress_params(CompressParam *param, RAMBlock *block,
1413                                        ram_addr_t offset)
1414 {
1415     param->block = block;
1416     param->offset = offset;
1417 }
1418 
1419 static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
1420                                            ram_addr_t offset)
1421 {
1422     int idx, thread_count, bytes_xmit = -1, pages = -1;
1423 
1424     thread_count = migrate_compress_threads();
1425     qemu_mutex_lock(&comp_done_lock);
1426     while (true) {
1427         for (idx = 0; idx < thread_count; idx++) {
1428             if (comp_param[idx].done) {
1429                 comp_param[idx].done = false;
1430                 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
1431                 qemu_mutex_lock(&comp_param[idx].mutex);
1432                 set_compress_params(&comp_param[idx], block, offset);
1433                 qemu_cond_signal(&comp_param[idx].cond);
1434                 qemu_mutex_unlock(&comp_param[idx].mutex);
1435                 pages = 1;
1436                 ram_counters.normal++;
1437                 ram_counters.transferred += bytes_xmit;
1438                 break;
1439             }
1440         }
1441         if (pages > 0) {
1442             break;
1443         } else {
1444             qemu_cond_wait(&comp_done_cond, &comp_done_lock);
1445         }
1446     }
1447     qemu_mutex_unlock(&comp_done_lock);
1448 
1449     return pages;
1450 }
1451 
1452 /**
1453  * find_dirty_block: find the next dirty page and update any state
1454  * associated with the search process.
1455  *
1456  * Returns if a page is found
1457  *
1458  * @rs: current RAM state
1459  * @pss: data about the state of the current dirty page scan
1460  * @again: set to false if the search has scanned the whole of RAM
1461  */
1462 static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
1463 {
1464     pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
1465     if (pss->complete_round && pss->block == rs->last_seen_block &&
1466         pss->page >= rs->last_page) {
1467         /*
1468          * We've been once around the RAM and haven't found anything.
1469          * Give up.
1470          */
1471         *again = false;
1472         return false;
1473     }
1474     if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
1475         /* Didn't find anything in this RAM Block */
1476         pss->page = 0;
1477         pss->block = QLIST_NEXT_RCU(pss->block, next);
1478         if (!pss->block) {
1479             /* Hit the end of the list */
1480             pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1481             /* Flag that we've looped */
1482             pss->complete_round = true;
1483             rs->ram_bulk_stage = false;
1484             if (migrate_use_xbzrle()) {
1485                 /* If xbzrle is on, stop using the data compression at this
1486                  * point. In theory, xbzrle can do better than compression.
1487                  */
1488                 flush_compressed_data(rs);
1489             }
1490         }
1491         /* Didn't find anything this time, but try again on the new block */
1492         *again = true;
1493         return false;
1494     } else {
1495         /* Can go around again, but... */
1496         *again = true;
1497         /* We've found something so probably don't need to */
1498         return true;
1499     }
1500 }
1501 
1502 /**
1503  * unqueue_page: gets a page of the queue
1504  *
1505  * Helper for 'get_queued_page' - gets a page off the queue
1506  *
1507  * Returns the block of the page (or NULL if none available)
1508  *
1509  * @rs: current RAM state
1510  * @offset: used to return the offset within the RAMBlock
1511  */
1512 static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
1513 {
1514     RAMBlock *block = NULL;
1515 
1516     qemu_mutex_lock(&rs->src_page_req_mutex);
1517     if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
1518         struct RAMSrcPageRequest *entry =
1519                                 QSIMPLEQ_FIRST(&rs->src_page_requests);
1520         block = entry->rb;
1521         *offset = entry->offset;
1522 
1523         if (entry->len > TARGET_PAGE_SIZE) {
1524             entry->len -= TARGET_PAGE_SIZE;
1525             entry->offset += TARGET_PAGE_SIZE;
1526         } else {
1527             memory_region_unref(block->mr);
1528             QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1529             g_free(entry);
1530         }
1531     }
1532     qemu_mutex_unlock(&rs->src_page_req_mutex);
1533 
1534     return block;
1535 }
1536 
1537 /**
1538  * get_queued_page: unqueue a page from the postocpy requests
1539  *
1540  * Skips pages that are already sent (!dirty)
1541  *
1542  * Returns if a queued page is found
1543  *
1544  * @rs: current RAM state
1545  * @pss: data about the state of the current dirty page scan
1546  */
1547 static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
1548 {
1549     RAMBlock  *block;
1550     ram_addr_t offset;
1551     bool dirty;
1552 
1553     do {
1554         block = unqueue_page(rs, &offset);
1555         /*
1556          * We're sending this page, and since it's postcopy nothing else
1557          * will dirty it, and we must make sure it doesn't get sent again
1558          * even if this queue request was received after the background
1559          * search already sent it.
1560          */
1561         if (block) {
1562             unsigned long page;
1563 
1564             page = offset >> TARGET_PAGE_BITS;
1565             dirty = test_bit(page, block->bmap);
1566             if (!dirty) {
1567                 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
1568                        page, test_bit(page, block->unsentmap));
1569             } else {
1570                 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
1571             }
1572         }
1573 
1574     } while (block && !dirty);
1575 
1576     if (block) {
1577         /*
1578          * As soon as we start servicing pages out of order, then we have
1579          * to kill the bulk stage, since the bulk stage assumes
1580          * in (migration_bitmap_find_and_reset_dirty) that every page is
1581          * dirty, that's no longer true.
1582          */
1583         rs->ram_bulk_stage = false;
1584 
1585         /*
1586          * We want the background search to continue from the queued page
1587          * since the guest is likely to want other pages near to the page
1588          * it just requested.
1589          */
1590         pss->block = block;
1591         pss->page = offset >> TARGET_PAGE_BITS;
1592     }
1593 
1594     return !!block;
1595 }
1596 
1597 /**
1598  * migration_page_queue_free: drop any remaining pages in the ram
1599  * request queue
1600  *
1601  * It should be empty at the end anyway, but in error cases there may
1602  * be some left.  in case that there is any page left, we drop it.
1603  *
1604  */
1605 static void migration_page_queue_free(RAMState *rs)
1606 {
1607     struct RAMSrcPageRequest *mspr, *next_mspr;
1608     /* This queue generally should be empty - but in the case of a failed
1609      * migration might have some droppings in.
1610      */
1611     rcu_read_lock();
1612     QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
1613         memory_region_unref(mspr->rb->mr);
1614         QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1615         g_free(mspr);
1616     }
1617     rcu_read_unlock();
1618 }
1619 
1620 /**
1621  * ram_save_queue_pages: queue the page for transmission
1622  *
1623  * A request from postcopy destination for example.
1624  *
1625  * Returns zero on success or negative on error
1626  *
1627  * @rbname: Name of the RAMBLock of the request. NULL means the
1628  *          same that last one.
1629  * @start: starting address from the start of the RAMBlock
1630  * @len: length (in bytes) to send
1631  */
1632 int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
1633 {
1634     RAMBlock *ramblock;
1635     RAMState *rs = ram_state;
1636 
1637     ram_counters.postcopy_requests++;
1638     rcu_read_lock();
1639     if (!rbname) {
1640         /* Reuse last RAMBlock */
1641         ramblock = rs->last_req_rb;
1642 
1643         if (!ramblock) {
1644             /*
1645              * Shouldn't happen, we can't reuse the last RAMBlock if
1646              * it's the 1st request.
1647              */
1648             error_report("ram_save_queue_pages no previous block");
1649             goto err;
1650         }
1651     } else {
1652         ramblock = qemu_ram_block_by_name(rbname);
1653 
1654         if (!ramblock) {
1655             /* We shouldn't be asked for a non-existent RAMBlock */
1656             error_report("ram_save_queue_pages no block '%s'", rbname);
1657             goto err;
1658         }
1659         rs->last_req_rb = ramblock;
1660     }
1661     trace_ram_save_queue_pages(ramblock->idstr, start, len);
1662     if (start+len > ramblock->used_length) {
1663         error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1664                      RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
1665                      __func__, start, len, ramblock->used_length);
1666         goto err;
1667     }
1668 
1669     struct RAMSrcPageRequest *new_entry =
1670         g_malloc0(sizeof(struct RAMSrcPageRequest));
1671     new_entry->rb = ramblock;
1672     new_entry->offset = start;
1673     new_entry->len = len;
1674 
1675     memory_region_ref(ramblock->mr);
1676     qemu_mutex_lock(&rs->src_page_req_mutex);
1677     QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
1678     qemu_mutex_unlock(&rs->src_page_req_mutex);
1679     rcu_read_unlock();
1680 
1681     return 0;
1682 
1683 err:
1684     rcu_read_unlock();
1685     return -1;
1686 }
1687 
1688 static bool save_page_use_compression(RAMState *rs)
1689 {
1690     if (!migrate_use_compression()) {
1691         return false;
1692     }
1693 
1694     /*
1695      * If xbzrle is on, stop using the data compression after first
1696      * round of migration even if compression is enabled. In theory,
1697      * xbzrle can do better than compression.
1698      */
1699     if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
1700         return true;
1701     }
1702 
1703     return false;
1704 }
1705 
1706 /**
1707  * ram_save_target_page: save one target page
1708  *
1709  * Returns the number of pages written
1710  *
1711  * @rs: current RAM state
1712  * @pss: data about the page we want to send
1713  * @last_stage: if we are at the completion stage
1714  */
1715 static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
1716                                 bool last_stage)
1717 {
1718     RAMBlock *block = pss->block;
1719     ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
1720     int res;
1721 
1722     if (control_save_page(rs, block, offset, &res)) {
1723         return res;
1724     }
1725 
1726     /*
1727      * When starting the process of a new block, the first page of
1728      * the block should be sent out before other pages in the same
1729      * block, and all the pages in last block should have been sent
1730      * out, keeping this order is important, because the 'cont' flag
1731      * is used to avoid resending the block name.
1732      */
1733     if (block != rs->last_sent_block && save_page_use_compression(rs)) {
1734             flush_compressed_data(rs);
1735     }
1736 
1737     res = save_zero_page(rs, block, offset);
1738     if (res > 0) {
1739         /* Must let xbzrle know, otherwise a previous (now 0'd) cached
1740          * page would be stale
1741          */
1742         if (!save_page_use_compression(rs)) {
1743             XBZRLE_cache_lock();
1744             xbzrle_cache_zero_page(rs, block->offset + offset);
1745             XBZRLE_cache_unlock();
1746         }
1747         ram_release_pages(block->idstr, offset, res);
1748         return res;
1749     }
1750 
1751     /*
1752      * Make sure the first page is sent out before other pages.
1753      *
1754      * we post it as normal page as compression will take much
1755      * CPU resource.
1756      */
1757     if (block == rs->last_sent_block && save_page_use_compression(rs)) {
1758         return compress_page_with_multi_thread(rs, block, offset);
1759     }
1760 
1761     return ram_save_page(rs, pss, last_stage);
1762 }
1763 
1764 /**
1765  * ram_save_host_page: save a whole host page
1766  *
1767  * Starting at *offset send pages up to the end of the current host
1768  * page. It's valid for the initial offset to point into the middle of
1769  * a host page in which case the remainder of the hostpage is sent.
1770  * Only dirty target pages are sent. Note that the host page size may
1771  * be a huge page for this block.
1772  * The saving stops at the boundary of the used_length of the block
1773  * if the RAMBlock isn't a multiple of the host page size.
1774  *
1775  * Returns the number of pages written or negative on error
1776  *
1777  * @rs: current RAM state
1778  * @ms: current migration state
1779  * @pss: data about the page we want to send
1780  * @last_stage: if we are at the completion stage
1781  */
1782 static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
1783                               bool last_stage)
1784 {
1785     int tmppages, pages = 0;
1786     size_t pagesize_bits =
1787         qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
1788 
1789     do {
1790         /* Check the pages is dirty and if it is send it */
1791         if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
1792             pss->page++;
1793             continue;
1794         }
1795 
1796         tmppages = ram_save_target_page(rs, pss, last_stage);
1797         if (tmppages < 0) {
1798             return tmppages;
1799         }
1800 
1801         pages += tmppages;
1802         if (pss->block->unsentmap) {
1803             clear_bit(pss->page, pss->block->unsentmap);
1804         }
1805 
1806         pss->page++;
1807     } while ((pss->page & (pagesize_bits - 1)) &&
1808              offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS));
1809 
1810     /* The offset we leave with is the last one we looked at */
1811     pss->page--;
1812     return pages;
1813 }
1814 
1815 /**
1816  * ram_find_and_save_block: finds a dirty page and sends it to f
1817  *
1818  * Called within an RCU critical section.
1819  *
1820  * Returns the number of pages written where zero means no dirty pages
1821  *
1822  * @rs: current RAM state
1823  * @last_stage: if we are at the completion stage
1824  *
1825  * On systems where host-page-size > target-page-size it will send all the
1826  * pages in a host page that are dirty.
1827  */
1828 
1829 static int ram_find_and_save_block(RAMState *rs, bool last_stage)
1830 {
1831     PageSearchStatus pss;
1832     int pages = 0;
1833     bool again, found;
1834 
1835     /* No dirty page as there is zero RAM */
1836     if (!ram_bytes_total()) {
1837         return pages;
1838     }
1839 
1840     pss.block = rs->last_seen_block;
1841     pss.page = rs->last_page;
1842     pss.complete_round = false;
1843 
1844     if (!pss.block) {
1845         pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1846     }
1847 
1848     do {
1849         again = true;
1850         found = get_queued_page(rs, &pss);
1851 
1852         if (!found) {
1853             /* priority queue empty, so just search for something dirty */
1854             found = find_dirty_block(rs, &pss, &again);
1855         }
1856 
1857         if (found) {
1858             pages = ram_save_host_page(rs, &pss, last_stage);
1859         }
1860     } while (!pages && again);
1861 
1862     rs->last_seen_block = pss.block;
1863     rs->last_page = pss.page;
1864 
1865     return pages;
1866 }
1867 
1868 void acct_update_position(QEMUFile *f, size_t size, bool zero)
1869 {
1870     uint64_t pages = size / TARGET_PAGE_SIZE;
1871 
1872     if (zero) {
1873         ram_counters.duplicate += pages;
1874     } else {
1875         ram_counters.normal += pages;
1876         ram_counters.transferred += size;
1877         qemu_update_position(f, size);
1878     }
1879 }
1880 
1881 uint64_t ram_bytes_total(void)
1882 {
1883     RAMBlock *block;
1884     uint64_t total = 0;
1885 
1886     rcu_read_lock();
1887     RAMBLOCK_FOREACH(block) {
1888         total += block->used_length;
1889     }
1890     rcu_read_unlock();
1891     return total;
1892 }
1893 
1894 static void xbzrle_load_setup(void)
1895 {
1896     XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
1897 }
1898 
1899 static void xbzrle_load_cleanup(void)
1900 {
1901     g_free(XBZRLE.decoded_buf);
1902     XBZRLE.decoded_buf = NULL;
1903 }
1904 
1905 static void ram_state_cleanup(RAMState **rsp)
1906 {
1907     if (*rsp) {
1908         migration_page_queue_free(*rsp);
1909         qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
1910         qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
1911         g_free(*rsp);
1912         *rsp = NULL;
1913     }
1914 }
1915 
1916 static void xbzrle_cleanup(void)
1917 {
1918     XBZRLE_cache_lock();
1919     if (XBZRLE.cache) {
1920         cache_fini(XBZRLE.cache);
1921         g_free(XBZRLE.encoded_buf);
1922         g_free(XBZRLE.current_buf);
1923         g_free(XBZRLE.zero_target_page);
1924         XBZRLE.cache = NULL;
1925         XBZRLE.encoded_buf = NULL;
1926         XBZRLE.current_buf = NULL;
1927         XBZRLE.zero_target_page = NULL;
1928     }
1929     XBZRLE_cache_unlock();
1930 }
1931 
1932 static void ram_save_cleanup(void *opaque)
1933 {
1934     RAMState **rsp = opaque;
1935     RAMBlock *block;
1936 
1937     /* caller have hold iothread lock or is in a bh, so there is
1938      * no writing race against this migration_bitmap
1939      */
1940     memory_global_dirty_log_stop();
1941 
1942     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1943         g_free(block->bmap);
1944         block->bmap = NULL;
1945         g_free(block->unsentmap);
1946         block->unsentmap = NULL;
1947     }
1948 
1949     xbzrle_cleanup();
1950     compress_threads_save_cleanup();
1951     ram_state_cleanup(rsp);
1952 }
1953 
1954 static void ram_state_reset(RAMState *rs)
1955 {
1956     rs->last_seen_block = NULL;
1957     rs->last_sent_block = NULL;
1958     rs->last_page = 0;
1959     rs->last_version = ram_list.version;
1960     rs->ram_bulk_stage = true;
1961 }
1962 
1963 #define MAX_WAIT 50 /* ms, half buffered_file limit */
1964 
1965 /*
1966  * 'expected' is the value you expect the bitmap mostly to be full
1967  * of; it won't bother printing lines that are all this value.
1968  * If 'todump' is null the migration bitmap is dumped.
1969  */
1970 void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
1971                            unsigned long pages)
1972 {
1973     int64_t cur;
1974     int64_t linelen = 128;
1975     char linebuf[129];
1976 
1977     for (cur = 0; cur < pages; cur += linelen) {
1978         int64_t curb;
1979         bool found = false;
1980         /*
1981          * Last line; catch the case where the line length
1982          * is longer than remaining ram
1983          */
1984         if (cur + linelen > pages) {
1985             linelen = pages - cur;
1986         }
1987         for (curb = 0; curb < linelen; curb++) {
1988             bool thisbit = test_bit(cur + curb, todump);
1989             linebuf[curb] = thisbit ? '1' : '.';
1990             found = found || (thisbit != expected);
1991         }
1992         if (found) {
1993             linebuf[curb] = '\0';
1994             fprintf(stderr,  "0x%08" PRIx64 " : %s\n", cur, linebuf);
1995         }
1996     }
1997 }
1998 
1999 /* **** functions for postcopy ***** */
2000 
2001 void ram_postcopy_migrated_memory_release(MigrationState *ms)
2002 {
2003     struct RAMBlock *block;
2004 
2005     RAMBLOCK_FOREACH(block) {
2006         unsigned long *bitmap = block->bmap;
2007         unsigned long range = block->used_length >> TARGET_PAGE_BITS;
2008         unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
2009 
2010         while (run_start < range) {
2011             unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
2012             ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
2013                               (run_end - run_start) << TARGET_PAGE_BITS);
2014             run_start = find_next_zero_bit(bitmap, range, run_end + 1);
2015         }
2016     }
2017 }
2018 
2019 /**
2020  * postcopy_send_discard_bm_ram: discard a RAMBlock
2021  *
2022  * Returns zero on success
2023  *
2024  * Callback from postcopy_each_ram_send_discard for each RAMBlock
2025  * Note: At this point the 'unsentmap' is the processed bitmap combined
2026  *       with the dirtymap; so a '1' means it's either dirty or unsent.
2027  *
2028  * @ms: current migration state
2029  * @pds: state for postcopy
2030  * @start: RAMBlock starting page
2031  * @length: RAMBlock size
2032  */
2033 static int postcopy_send_discard_bm_ram(MigrationState *ms,
2034                                         PostcopyDiscardState *pds,
2035                                         RAMBlock *block)
2036 {
2037     unsigned long end = block->used_length >> TARGET_PAGE_BITS;
2038     unsigned long current;
2039     unsigned long *unsentmap = block->unsentmap;
2040 
2041     for (current = 0; current < end; ) {
2042         unsigned long one = find_next_bit(unsentmap, end, current);
2043 
2044         if (one <= end) {
2045             unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
2046             unsigned long discard_length;
2047 
2048             if (zero >= end) {
2049                 discard_length = end - one;
2050             } else {
2051                 discard_length = zero - one;
2052             }
2053             if (discard_length) {
2054                 postcopy_discard_send_range(ms, pds, one, discard_length);
2055             }
2056             current = one + discard_length;
2057         } else {
2058             current = one;
2059         }
2060     }
2061 
2062     return 0;
2063 }
2064 
2065 /**
2066  * postcopy_each_ram_send_discard: discard all RAMBlocks
2067  *
2068  * Returns 0 for success or negative for error
2069  *
2070  * Utility for the outgoing postcopy code.
2071  *   Calls postcopy_send_discard_bm_ram for each RAMBlock
2072  *   passing it bitmap indexes and name.
2073  * (qemu_ram_foreach_block ends up passing unscaled lengths
2074  *  which would mean postcopy code would have to deal with target page)
2075  *
2076  * @ms: current migration state
2077  */
2078 static int postcopy_each_ram_send_discard(MigrationState *ms)
2079 {
2080     struct RAMBlock *block;
2081     int ret;
2082 
2083     RAMBLOCK_FOREACH(block) {
2084         PostcopyDiscardState *pds =
2085             postcopy_discard_send_init(ms, block->idstr);
2086 
2087         /*
2088          * Postcopy sends chunks of bitmap over the wire, but it
2089          * just needs indexes at this point, avoids it having
2090          * target page specific code.
2091          */
2092         ret = postcopy_send_discard_bm_ram(ms, pds, block);
2093         postcopy_discard_send_finish(ms, pds);
2094         if (ret) {
2095             return ret;
2096         }
2097     }
2098 
2099     return 0;
2100 }
2101 
2102 /**
2103  * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
2104  *
2105  * Helper for postcopy_chunk_hostpages; it's called twice to
2106  * canonicalize the two bitmaps, that are similar, but one is
2107  * inverted.
2108  *
2109  * Postcopy requires that all target pages in a hostpage are dirty or
2110  * clean, not a mix.  This function canonicalizes the bitmaps.
2111  *
2112  * @ms: current migration state
2113  * @unsent_pass: if true we need to canonicalize partially unsent host pages
2114  *               otherwise we need to canonicalize partially dirty host pages
2115  * @block: block that contains the page we want to canonicalize
2116  * @pds: state for postcopy
2117  */
2118 static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
2119                                           RAMBlock *block,
2120                                           PostcopyDiscardState *pds)
2121 {
2122     RAMState *rs = ram_state;
2123     unsigned long *bitmap = block->bmap;
2124     unsigned long *unsentmap = block->unsentmap;
2125     unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
2126     unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
2127     unsigned long run_start;
2128 
2129     if (block->page_size == TARGET_PAGE_SIZE) {
2130         /* Easy case - TPS==HPS for a non-huge page RAMBlock */
2131         return;
2132     }
2133 
2134     if (unsent_pass) {
2135         /* Find a sent page */
2136         run_start = find_next_zero_bit(unsentmap, pages, 0);
2137     } else {
2138         /* Find a dirty page */
2139         run_start = find_next_bit(bitmap, pages, 0);
2140     }
2141 
2142     while (run_start < pages) {
2143         bool do_fixup = false;
2144         unsigned long fixup_start_addr;
2145         unsigned long host_offset;
2146 
2147         /*
2148          * If the start of this run of pages is in the middle of a host
2149          * page, then we need to fixup this host page.
2150          */
2151         host_offset = run_start % host_ratio;
2152         if (host_offset) {
2153             do_fixup = true;
2154             run_start -= host_offset;
2155             fixup_start_addr = run_start;
2156             /* For the next pass */
2157             run_start = run_start + host_ratio;
2158         } else {
2159             /* Find the end of this run */
2160             unsigned long run_end;
2161             if (unsent_pass) {
2162                 run_end = find_next_bit(unsentmap, pages, run_start + 1);
2163             } else {
2164                 run_end = find_next_zero_bit(bitmap, pages, run_start + 1);
2165             }
2166             /*
2167              * If the end isn't at the start of a host page, then the
2168              * run doesn't finish at the end of a host page
2169              * and we need to discard.
2170              */
2171             host_offset = run_end % host_ratio;
2172             if (host_offset) {
2173                 do_fixup = true;
2174                 fixup_start_addr = run_end - host_offset;
2175                 /*
2176                  * This host page has gone, the next loop iteration starts
2177                  * from after the fixup
2178                  */
2179                 run_start = fixup_start_addr + host_ratio;
2180             } else {
2181                 /*
2182                  * No discards on this iteration, next loop starts from
2183                  * next sent/dirty page
2184                  */
2185                 run_start = run_end + 1;
2186             }
2187         }
2188 
2189         if (do_fixup) {
2190             unsigned long page;
2191 
2192             /* Tell the destination to discard this page */
2193             if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
2194                 /* For the unsent_pass we:
2195                  *     discard partially sent pages
2196                  * For the !unsent_pass (dirty) we:
2197                  *     discard partially dirty pages that were sent
2198                  *     (any partially sent pages were already discarded
2199                  *     by the previous unsent_pass)
2200                  */
2201                 postcopy_discard_send_range(ms, pds, fixup_start_addr,
2202                                             host_ratio);
2203             }
2204 
2205             /* Clean up the bitmap */
2206             for (page = fixup_start_addr;
2207                  page < fixup_start_addr + host_ratio; page++) {
2208                 /* All pages in this host page are now not sent */
2209                 set_bit(page, unsentmap);
2210 
2211                 /*
2212                  * Remark them as dirty, updating the count for any pages
2213                  * that weren't previously dirty.
2214                  */
2215                 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
2216             }
2217         }
2218 
2219         if (unsent_pass) {
2220             /* Find the next sent page for the next iteration */
2221             run_start = find_next_zero_bit(unsentmap, pages, run_start);
2222         } else {
2223             /* Find the next dirty page for the next iteration */
2224             run_start = find_next_bit(bitmap, pages, run_start);
2225         }
2226     }
2227 }
2228 
2229 /**
2230  * postcopy_chuck_hostpages: discrad any partially sent host page
2231  *
2232  * Utility for the outgoing postcopy code.
2233  *
2234  * Discard any partially sent host-page size chunks, mark any partially
2235  * dirty host-page size chunks as all dirty.  In this case the host-page
2236  * is the host-page for the particular RAMBlock, i.e. it might be a huge page
2237  *
2238  * Returns zero on success
2239  *
2240  * @ms: current migration state
2241  * @block: block we want to work with
2242  */
2243 static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
2244 {
2245     PostcopyDiscardState *pds =
2246         postcopy_discard_send_init(ms, block->idstr);
2247 
2248     /* First pass: Discard all partially sent host pages */
2249     postcopy_chunk_hostpages_pass(ms, true, block, pds);
2250     /*
2251      * Second pass: Ensure that all partially dirty host pages are made
2252      * fully dirty.
2253      */
2254     postcopy_chunk_hostpages_pass(ms, false, block, pds);
2255 
2256     postcopy_discard_send_finish(ms, pds);
2257     return 0;
2258 }
2259 
2260 /**
2261  * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
2262  *
2263  * Returns zero on success
2264  *
2265  * Transmit the set of pages to be discarded after precopy to the target
2266  * these are pages that:
2267  *     a) Have been previously transmitted but are now dirty again
2268  *     b) Pages that have never been transmitted, this ensures that
2269  *        any pages on the destination that have been mapped by background
2270  *        tasks get discarded (transparent huge pages is the specific concern)
2271  * Hopefully this is pretty sparse
2272  *
2273  * @ms: current migration state
2274  */
2275 int ram_postcopy_send_discard_bitmap(MigrationState *ms)
2276 {
2277     RAMState *rs = ram_state;
2278     RAMBlock *block;
2279     int ret;
2280 
2281     rcu_read_lock();
2282 
2283     /* This should be our last sync, the src is now paused */
2284     migration_bitmap_sync(rs);
2285 
2286     /* Easiest way to make sure we don't resume in the middle of a host-page */
2287     rs->last_seen_block = NULL;
2288     rs->last_sent_block = NULL;
2289     rs->last_page = 0;
2290 
2291     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2292         unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
2293         unsigned long *bitmap = block->bmap;
2294         unsigned long *unsentmap = block->unsentmap;
2295 
2296         if (!unsentmap) {
2297             /* We don't have a safe way to resize the sentmap, so
2298              * if the bitmap was resized it will be NULL at this
2299              * point.
2300              */
2301             error_report("migration ram resized during precopy phase");
2302             rcu_read_unlock();
2303             return -EINVAL;
2304         }
2305         /* Deal with TPS != HPS and huge pages */
2306         ret = postcopy_chunk_hostpages(ms, block);
2307         if (ret) {
2308             rcu_read_unlock();
2309             return ret;
2310         }
2311 
2312         /*
2313          * Update the unsentmap to be unsentmap = unsentmap | dirty
2314          */
2315         bitmap_or(unsentmap, unsentmap, bitmap, pages);
2316 #ifdef DEBUG_POSTCOPY
2317         ram_debug_dump_bitmap(unsentmap, true, pages);
2318 #endif
2319     }
2320     trace_ram_postcopy_send_discard_bitmap();
2321 
2322     ret = postcopy_each_ram_send_discard(ms);
2323     rcu_read_unlock();
2324 
2325     return ret;
2326 }
2327 
2328 /**
2329  * ram_discard_range: discard dirtied pages at the beginning of postcopy
2330  *
2331  * Returns zero on success
2332  *
2333  * @rbname: name of the RAMBlock of the request. NULL means the
2334  *          same that last one.
2335  * @start: RAMBlock starting page
2336  * @length: RAMBlock size
2337  */
2338 int ram_discard_range(const char *rbname, uint64_t start, size_t length)
2339 {
2340     int ret = -1;
2341 
2342     trace_ram_discard_range(rbname, start, length);
2343 
2344     rcu_read_lock();
2345     RAMBlock *rb = qemu_ram_block_by_name(rbname);
2346 
2347     if (!rb) {
2348         error_report("ram_discard_range: Failed to find block '%s'", rbname);
2349         goto err;
2350     }
2351 
2352     bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
2353                  length >> qemu_target_page_bits());
2354     ret = ram_block_discard_range(rb, start, length);
2355 
2356 err:
2357     rcu_read_unlock();
2358 
2359     return ret;
2360 }
2361 
2362 /*
2363  * For every allocation, we will try not to crash the VM if the
2364  * allocation failed.
2365  */
2366 static int xbzrle_init(void)
2367 {
2368     Error *local_err = NULL;
2369 
2370     if (!migrate_use_xbzrle()) {
2371         return 0;
2372     }
2373 
2374     XBZRLE_cache_lock();
2375 
2376     XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
2377     if (!XBZRLE.zero_target_page) {
2378         error_report("%s: Error allocating zero page", __func__);
2379         goto err_out;
2380     }
2381 
2382     XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
2383                               TARGET_PAGE_SIZE, &local_err);
2384     if (!XBZRLE.cache) {
2385         error_report_err(local_err);
2386         goto free_zero_page;
2387     }
2388 
2389     XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
2390     if (!XBZRLE.encoded_buf) {
2391         error_report("%s: Error allocating encoded_buf", __func__);
2392         goto free_cache;
2393     }
2394 
2395     XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
2396     if (!XBZRLE.current_buf) {
2397         error_report("%s: Error allocating current_buf", __func__);
2398         goto free_encoded_buf;
2399     }
2400 
2401     /* We are all good */
2402     XBZRLE_cache_unlock();
2403     return 0;
2404 
2405 free_encoded_buf:
2406     g_free(XBZRLE.encoded_buf);
2407     XBZRLE.encoded_buf = NULL;
2408 free_cache:
2409     cache_fini(XBZRLE.cache);
2410     XBZRLE.cache = NULL;
2411 free_zero_page:
2412     g_free(XBZRLE.zero_target_page);
2413     XBZRLE.zero_target_page = NULL;
2414 err_out:
2415     XBZRLE_cache_unlock();
2416     return -ENOMEM;
2417 }
2418 
2419 static int ram_state_init(RAMState **rsp)
2420 {
2421     *rsp = g_try_new0(RAMState, 1);
2422 
2423     if (!*rsp) {
2424         error_report("%s: Init ramstate fail", __func__);
2425         return -1;
2426     }
2427 
2428     qemu_mutex_init(&(*rsp)->bitmap_mutex);
2429     qemu_mutex_init(&(*rsp)->src_page_req_mutex);
2430     QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
2431 
2432     /*
2433      * Count the total number of pages used by ram blocks not including any
2434      * gaps due to alignment or unplugs.
2435      */
2436     (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
2437 
2438     ram_state_reset(*rsp);
2439 
2440     return 0;
2441 }
2442 
2443 static void ram_list_init_bitmaps(void)
2444 {
2445     RAMBlock *block;
2446     unsigned long pages;
2447 
2448     /* Skip setting bitmap if there is no RAM */
2449     if (ram_bytes_total()) {
2450         QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2451             pages = block->max_length >> TARGET_PAGE_BITS;
2452             block->bmap = bitmap_new(pages);
2453             bitmap_set(block->bmap, 0, pages);
2454             if (migrate_postcopy_ram()) {
2455                 block->unsentmap = bitmap_new(pages);
2456                 bitmap_set(block->unsentmap, 0, pages);
2457             }
2458         }
2459     }
2460 }
2461 
2462 static void ram_init_bitmaps(RAMState *rs)
2463 {
2464     /* For memory_global_dirty_log_start below.  */
2465     qemu_mutex_lock_iothread();
2466     qemu_mutex_lock_ramlist();
2467     rcu_read_lock();
2468 
2469     ram_list_init_bitmaps();
2470     memory_global_dirty_log_start();
2471     migration_bitmap_sync(rs);
2472 
2473     rcu_read_unlock();
2474     qemu_mutex_unlock_ramlist();
2475     qemu_mutex_unlock_iothread();
2476 }
2477 
2478 static int ram_init_all(RAMState **rsp)
2479 {
2480     if (ram_state_init(rsp)) {
2481         return -1;
2482     }
2483 
2484     if (xbzrle_init()) {
2485         ram_state_cleanup(rsp);
2486         return -1;
2487     }
2488 
2489     ram_init_bitmaps(*rsp);
2490 
2491     return 0;
2492 }
2493 
2494 static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
2495 {
2496     RAMBlock *block;
2497     uint64_t pages = 0;
2498 
2499     /*
2500      * Postcopy is not using xbzrle/compression, so no need for that.
2501      * Also, since source are already halted, we don't need to care
2502      * about dirty page logging as well.
2503      */
2504 
2505     RAMBLOCK_FOREACH(block) {
2506         pages += bitmap_count_one(block->bmap,
2507                                   block->used_length >> TARGET_PAGE_BITS);
2508     }
2509 
2510     /* This may not be aligned with current bitmaps. Recalculate. */
2511     rs->migration_dirty_pages = pages;
2512 
2513     rs->last_seen_block = NULL;
2514     rs->last_sent_block = NULL;
2515     rs->last_page = 0;
2516     rs->last_version = ram_list.version;
2517     /*
2518      * Disable the bulk stage, otherwise we'll resend the whole RAM no
2519      * matter what we have sent.
2520      */
2521     rs->ram_bulk_stage = false;
2522 
2523     /* Update RAMState cache of output QEMUFile */
2524     rs->f = out;
2525 
2526     trace_ram_state_resume_prepare(pages);
2527 }
2528 
2529 /*
2530  * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
2531  * long-running RCU critical section.  When rcu-reclaims in the code
2532  * start to become numerous it will be necessary to reduce the
2533  * granularity of these critical sections.
2534  */
2535 
2536 /**
2537  * ram_save_setup: Setup RAM for migration
2538  *
2539  * Returns zero to indicate success and negative for error
2540  *
2541  * @f: QEMUFile where to send the data
2542  * @opaque: RAMState pointer
2543  */
2544 static int ram_save_setup(QEMUFile *f, void *opaque)
2545 {
2546     RAMState **rsp = opaque;
2547     RAMBlock *block;
2548 
2549     if (compress_threads_save_setup()) {
2550         return -1;
2551     }
2552 
2553     /* migration has already setup the bitmap, reuse it. */
2554     if (!migration_in_colo_state()) {
2555         if (ram_init_all(rsp) != 0) {
2556             compress_threads_save_cleanup();
2557             return -1;
2558         }
2559     }
2560     (*rsp)->f = f;
2561 
2562     rcu_read_lock();
2563 
2564     qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
2565 
2566     RAMBLOCK_FOREACH(block) {
2567         qemu_put_byte(f, strlen(block->idstr));
2568         qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2569         qemu_put_be64(f, block->used_length);
2570         if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
2571             qemu_put_be64(f, block->page_size);
2572         }
2573     }
2574 
2575     rcu_read_unlock();
2576 
2577     ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2578     ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2579 
2580     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2581 
2582     return 0;
2583 }
2584 
2585 /**
2586  * ram_save_iterate: iterative stage for migration
2587  *
2588  * Returns zero to indicate success and negative for error
2589  *
2590  * @f: QEMUFile where to send the data
2591  * @opaque: RAMState pointer
2592  */
2593 static int ram_save_iterate(QEMUFile *f, void *opaque)
2594 {
2595     RAMState **temp = opaque;
2596     RAMState *rs = *temp;
2597     int ret;
2598     int i;
2599     int64_t t0;
2600     int done = 0;
2601 
2602     if (blk_mig_bulk_active()) {
2603         /* Avoid transferring ram during bulk phase of block migration as
2604          * the bulk phase will usually take a long time and transferring
2605          * ram updates during that time is pointless. */
2606         goto out;
2607     }
2608 
2609     rcu_read_lock();
2610     if (ram_list.version != rs->last_version) {
2611         ram_state_reset(rs);
2612     }
2613 
2614     /* Read version before ram_list.blocks */
2615     smp_rmb();
2616 
2617     ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2618 
2619     t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2620     i = 0;
2621     while ((ret = qemu_file_rate_limit(f)) == 0) {
2622         int pages;
2623 
2624         pages = ram_find_and_save_block(rs, false);
2625         /* no more pages to sent */
2626         if (pages == 0) {
2627             done = 1;
2628             break;
2629         }
2630         rs->iterations++;
2631 
2632         /* we want to check in the 1st loop, just in case it was the 1st time
2633            and we had to sync the dirty bitmap.
2634            qemu_get_clock_ns() is a bit expensive, so we only check each some
2635            iterations
2636         */
2637         if ((i & 63) == 0) {
2638             uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2639             if (t1 > MAX_WAIT) {
2640                 trace_ram_save_iterate_big_wait(t1, i);
2641                 break;
2642             }
2643         }
2644         i++;
2645     }
2646     flush_compressed_data(rs);
2647     rcu_read_unlock();
2648 
2649     /*
2650      * Must occur before EOS (or any QEMUFile operation)
2651      * because of RDMA protocol.
2652      */
2653     ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2654 
2655 out:
2656     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2657     ram_counters.transferred += 8;
2658 
2659     ret = qemu_file_get_error(f);
2660     if (ret < 0) {
2661         return ret;
2662     }
2663 
2664     return done;
2665 }
2666 
2667 /**
2668  * ram_save_complete: function called to send the remaining amount of ram
2669  *
2670  * Returns zero to indicate success
2671  *
2672  * Called with iothread lock
2673  *
2674  * @f: QEMUFile where to send the data
2675  * @opaque: RAMState pointer
2676  */
2677 static int ram_save_complete(QEMUFile *f, void *opaque)
2678 {
2679     RAMState **temp = opaque;
2680     RAMState *rs = *temp;
2681 
2682     rcu_read_lock();
2683 
2684     if (!migration_in_postcopy()) {
2685         migration_bitmap_sync(rs);
2686     }
2687 
2688     ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2689 
2690     /* try transferring iterative blocks of memory */
2691 
2692     /* flush all remaining blocks regardless of rate limiting */
2693     while (true) {
2694         int pages;
2695 
2696         pages = ram_find_and_save_block(rs, !migration_in_colo_state());
2697         /* no more blocks to sent */
2698         if (pages == 0) {
2699             break;
2700         }
2701     }
2702 
2703     flush_compressed_data(rs);
2704     ram_control_after_iterate(f, RAM_CONTROL_FINISH);
2705 
2706     rcu_read_unlock();
2707 
2708     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2709 
2710     return 0;
2711 }
2712 
2713 static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2714                              uint64_t *res_precopy_only,
2715                              uint64_t *res_compatible,
2716                              uint64_t *res_postcopy_only)
2717 {
2718     RAMState **temp = opaque;
2719     RAMState *rs = *temp;
2720     uint64_t remaining_size;
2721 
2722     remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
2723 
2724     if (!migration_in_postcopy() &&
2725         remaining_size < max_size) {
2726         qemu_mutex_lock_iothread();
2727         rcu_read_lock();
2728         migration_bitmap_sync(rs);
2729         rcu_read_unlock();
2730         qemu_mutex_unlock_iothread();
2731         remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
2732     }
2733 
2734     if (migrate_postcopy_ram()) {
2735         /* We can do postcopy, and all the data is postcopiable */
2736         *res_compatible += remaining_size;
2737     } else {
2738         *res_precopy_only += remaining_size;
2739     }
2740 }
2741 
2742 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2743 {
2744     unsigned int xh_len;
2745     int xh_flags;
2746     uint8_t *loaded_data;
2747 
2748     /* extract RLE header */
2749     xh_flags = qemu_get_byte(f);
2750     xh_len = qemu_get_be16(f);
2751 
2752     if (xh_flags != ENCODING_FLAG_XBZRLE) {
2753         error_report("Failed to load XBZRLE page - wrong compression!");
2754         return -1;
2755     }
2756 
2757     if (xh_len > TARGET_PAGE_SIZE) {
2758         error_report("Failed to load XBZRLE page - len overflow!");
2759         return -1;
2760     }
2761     loaded_data = XBZRLE.decoded_buf;
2762     /* load data and decode */
2763     /* it can change loaded_data to point to an internal buffer */
2764     qemu_get_buffer_in_place(f, &loaded_data, xh_len);
2765 
2766     /* decode RLE */
2767     if (xbzrle_decode_buffer(loaded_data, xh_len, host,
2768                              TARGET_PAGE_SIZE) == -1) {
2769         error_report("Failed to load XBZRLE page - decode error!");
2770         return -1;
2771     }
2772 
2773     return 0;
2774 }
2775 
2776 /**
2777  * ram_block_from_stream: read a RAMBlock id from the migration stream
2778  *
2779  * Must be called from within a rcu critical section.
2780  *
2781  * Returns a pointer from within the RCU-protected ram_list.
2782  *
2783  * @f: QEMUFile where to read the data from
2784  * @flags: Page flags (mostly to see if it's a continuation of previous block)
2785  */
2786 static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
2787 {
2788     static RAMBlock *block = NULL;
2789     char id[256];
2790     uint8_t len;
2791 
2792     if (flags & RAM_SAVE_FLAG_CONTINUE) {
2793         if (!block) {
2794             error_report("Ack, bad migration stream!");
2795             return NULL;
2796         }
2797         return block;
2798     }
2799 
2800     len = qemu_get_byte(f);
2801     qemu_get_buffer(f, (uint8_t *)id, len);
2802     id[len] = 0;
2803 
2804     block = qemu_ram_block_by_name(id);
2805     if (!block) {
2806         error_report("Can't find block %s", id);
2807         return NULL;
2808     }
2809 
2810     return block;
2811 }
2812 
2813 static inline void *host_from_ram_block_offset(RAMBlock *block,
2814                                                ram_addr_t offset)
2815 {
2816     if (!offset_in_ramblock(block, offset)) {
2817         return NULL;
2818     }
2819 
2820     return block->host + offset;
2821 }
2822 
2823 /**
2824  * ram_handle_compressed: handle the zero page case
2825  *
2826  * If a page (or a whole RDMA chunk) has been
2827  * determined to be zero, then zap it.
2828  *
2829  * @host: host address for the zero page
2830  * @ch: what the page is filled from.  We only support zero
2831  * @size: size of the zero page
2832  */
2833 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2834 {
2835     if (ch != 0 || !is_zero_range(host, size)) {
2836         memset(host, ch, size);
2837     }
2838 }
2839 
2840 /* return the size after decompression, or negative value on error */
2841 static int
2842 qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
2843                      const uint8_t *source, size_t source_len)
2844 {
2845     int err;
2846 
2847     err = inflateReset(stream);
2848     if (err != Z_OK) {
2849         return -1;
2850     }
2851 
2852     stream->avail_in = source_len;
2853     stream->next_in = (uint8_t *)source;
2854     stream->avail_out = dest_len;
2855     stream->next_out = dest;
2856 
2857     err = inflate(stream, Z_NO_FLUSH);
2858     if (err != Z_STREAM_END) {
2859         return -1;
2860     }
2861 
2862     return stream->total_out;
2863 }
2864 
2865 static void *do_data_decompress(void *opaque)
2866 {
2867     DecompressParam *param = opaque;
2868     unsigned long pagesize;
2869     uint8_t *des;
2870     int len, ret;
2871 
2872     qemu_mutex_lock(&param->mutex);
2873     while (!param->quit) {
2874         if (param->des) {
2875             des = param->des;
2876             len = param->len;
2877             param->des = 0;
2878             qemu_mutex_unlock(&param->mutex);
2879 
2880             pagesize = TARGET_PAGE_SIZE;
2881 
2882             ret = qemu_uncompress_data(&param->stream, des, pagesize,
2883                                        param->compbuf, len);
2884             if (ret < 0) {
2885                 error_report("decompress data failed");
2886                 qemu_file_set_error(decomp_file, ret);
2887             }
2888 
2889             qemu_mutex_lock(&decomp_done_lock);
2890             param->done = true;
2891             qemu_cond_signal(&decomp_done_cond);
2892             qemu_mutex_unlock(&decomp_done_lock);
2893 
2894             qemu_mutex_lock(&param->mutex);
2895         } else {
2896             qemu_cond_wait(&param->cond, &param->mutex);
2897         }
2898     }
2899     qemu_mutex_unlock(&param->mutex);
2900 
2901     return NULL;
2902 }
2903 
2904 static int wait_for_decompress_done(void)
2905 {
2906     int idx, thread_count;
2907 
2908     if (!migrate_use_compression()) {
2909         return 0;
2910     }
2911 
2912     thread_count = migrate_decompress_threads();
2913     qemu_mutex_lock(&decomp_done_lock);
2914     for (idx = 0; idx < thread_count; idx++) {
2915         while (!decomp_param[idx].done) {
2916             qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2917         }
2918     }
2919     qemu_mutex_unlock(&decomp_done_lock);
2920     return qemu_file_get_error(decomp_file);
2921 }
2922 
2923 static void compress_threads_load_cleanup(void)
2924 {
2925     int i, thread_count;
2926 
2927     if (!migrate_use_compression()) {
2928         return;
2929     }
2930     thread_count = migrate_decompress_threads();
2931     for (i = 0; i < thread_count; i++) {
2932         /*
2933          * we use it as a indicator which shows if the thread is
2934          * properly init'd or not
2935          */
2936         if (!decomp_param[i].compbuf) {
2937             break;
2938         }
2939 
2940         qemu_mutex_lock(&decomp_param[i].mutex);
2941         decomp_param[i].quit = true;
2942         qemu_cond_signal(&decomp_param[i].cond);
2943         qemu_mutex_unlock(&decomp_param[i].mutex);
2944     }
2945     for (i = 0; i < thread_count; i++) {
2946         if (!decomp_param[i].compbuf) {
2947             break;
2948         }
2949 
2950         qemu_thread_join(decompress_threads + i);
2951         qemu_mutex_destroy(&decomp_param[i].mutex);
2952         qemu_cond_destroy(&decomp_param[i].cond);
2953         inflateEnd(&decomp_param[i].stream);
2954         g_free(decomp_param[i].compbuf);
2955         decomp_param[i].compbuf = NULL;
2956     }
2957     g_free(decompress_threads);
2958     g_free(decomp_param);
2959     decompress_threads = NULL;
2960     decomp_param = NULL;
2961     decomp_file = NULL;
2962 }
2963 
2964 static int compress_threads_load_setup(QEMUFile *f)
2965 {
2966     int i, thread_count;
2967 
2968     if (!migrate_use_compression()) {
2969         return 0;
2970     }
2971 
2972     thread_count = migrate_decompress_threads();
2973     decompress_threads = g_new0(QemuThread, thread_count);
2974     decomp_param = g_new0(DecompressParam, thread_count);
2975     qemu_mutex_init(&decomp_done_lock);
2976     qemu_cond_init(&decomp_done_cond);
2977     decomp_file = f;
2978     for (i = 0; i < thread_count; i++) {
2979         if (inflateInit(&decomp_param[i].stream) != Z_OK) {
2980             goto exit;
2981         }
2982 
2983         decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
2984         qemu_mutex_init(&decomp_param[i].mutex);
2985         qemu_cond_init(&decomp_param[i].cond);
2986         decomp_param[i].done = true;
2987         decomp_param[i].quit = false;
2988         qemu_thread_create(decompress_threads + i, "decompress",
2989                            do_data_decompress, decomp_param + i,
2990                            QEMU_THREAD_JOINABLE);
2991     }
2992     return 0;
2993 exit:
2994     compress_threads_load_cleanup();
2995     return -1;
2996 }
2997 
2998 static void decompress_data_with_multi_threads(QEMUFile *f,
2999                                                void *host, int len)
3000 {
3001     int idx, thread_count;
3002 
3003     thread_count = migrate_decompress_threads();
3004     qemu_mutex_lock(&decomp_done_lock);
3005     while (true) {
3006         for (idx = 0; idx < thread_count; idx++) {
3007             if (decomp_param[idx].done) {
3008                 decomp_param[idx].done = false;
3009                 qemu_mutex_lock(&decomp_param[idx].mutex);
3010                 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
3011                 decomp_param[idx].des = host;
3012                 decomp_param[idx].len = len;
3013                 qemu_cond_signal(&decomp_param[idx].cond);
3014                 qemu_mutex_unlock(&decomp_param[idx].mutex);
3015                 break;
3016             }
3017         }
3018         if (idx < thread_count) {
3019             break;
3020         } else {
3021             qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3022         }
3023     }
3024     qemu_mutex_unlock(&decomp_done_lock);
3025 }
3026 
3027 /**
3028  * ram_load_setup: Setup RAM for migration incoming side
3029  *
3030  * Returns zero to indicate success and negative for error
3031  *
3032  * @f: QEMUFile where to receive the data
3033  * @opaque: RAMState pointer
3034  */
3035 static int ram_load_setup(QEMUFile *f, void *opaque)
3036 {
3037     if (compress_threads_load_setup(f)) {
3038         return -1;
3039     }
3040 
3041     xbzrle_load_setup();
3042     ramblock_recv_map_init();
3043     return 0;
3044 }
3045 
3046 static int ram_load_cleanup(void *opaque)
3047 {
3048     RAMBlock *rb;
3049     xbzrle_load_cleanup();
3050     compress_threads_load_cleanup();
3051 
3052     RAMBLOCK_FOREACH(rb) {
3053         g_free(rb->receivedmap);
3054         rb->receivedmap = NULL;
3055     }
3056     return 0;
3057 }
3058 
3059 /**
3060  * ram_postcopy_incoming_init: allocate postcopy data structures
3061  *
3062  * Returns 0 for success and negative if there was one error
3063  *
3064  * @mis: current migration incoming state
3065  *
3066  * Allocate data structures etc needed by incoming migration with
3067  * postcopy-ram. postcopy-ram's similarly names
3068  * postcopy_ram_incoming_init does the work.
3069  */
3070 int ram_postcopy_incoming_init(MigrationIncomingState *mis)
3071 {
3072     unsigned long ram_pages = last_ram_page();
3073 
3074     return postcopy_ram_incoming_init(mis, ram_pages);
3075 }
3076 
3077 /**
3078  * ram_load_postcopy: load a page in postcopy case
3079  *
3080  * Returns 0 for success or -errno in case of error
3081  *
3082  * Called in postcopy mode by ram_load().
3083  * rcu_read_lock is taken prior to this being called.
3084  *
3085  * @f: QEMUFile where to send the data
3086  */
3087 static int ram_load_postcopy(QEMUFile *f)
3088 {
3089     int flags = 0, ret = 0;
3090     bool place_needed = false;
3091     bool matching_page_sizes = false;
3092     MigrationIncomingState *mis = migration_incoming_get_current();
3093     /* Temporary page that is later 'placed' */
3094     void *postcopy_host_page = postcopy_get_tmp_page(mis);
3095     void *last_host = NULL;
3096     bool all_zero = false;
3097 
3098     while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
3099         ram_addr_t addr;
3100         void *host = NULL;
3101         void *page_buffer = NULL;
3102         void *place_source = NULL;
3103         RAMBlock *block = NULL;
3104         uint8_t ch;
3105 
3106         addr = qemu_get_be64(f);
3107 
3108         /*
3109          * If qemu file error, we should stop here, and then "addr"
3110          * may be invalid
3111          */
3112         ret = qemu_file_get_error(f);
3113         if (ret) {
3114             break;
3115         }
3116 
3117         flags = addr & ~TARGET_PAGE_MASK;
3118         addr &= TARGET_PAGE_MASK;
3119 
3120         trace_ram_load_postcopy_loop((uint64_t)addr, flags);
3121         place_needed = false;
3122         if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) {
3123             block = ram_block_from_stream(f, flags);
3124 
3125             host = host_from_ram_block_offset(block, addr);
3126             if (!host) {
3127                 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3128                 ret = -EINVAL;
3129                 break;
3130             }
3131             matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
3132             /*
3133              * Postcopy requires that we place whole host pages atomically;
3134              * these may be huge pages for RAMBlocks that are backed by
3135              * hugetlbfs.
3136              * To make it atomic, the data is read into a temporary page
3137              * that's moved into place later.
3138              * The migration protocol uses,  possibly smaller, target-pages
3139              * however the source ensures it always sends all the components
3140              * of a host page in order.
3141              */
3142             page_buffer = postcopy_host_page +
3143                           ((uintptr_t)host & (block->page_size - 1));
3144             /* If all TP are zero then we can optimise the place */
3145             if (!((uintptr_t)host & (block->page_size - 1))) {
3146                 all_zero = true;
3147             } else {
3148                 /* not the 1st TP within the HP */
3149                 if (host != (last_host + TARGET_PAGE_SIZE)) {
3150                     error_report("Non-sequential target page %p/%p",
3151                                   host, last_host);
3152                     ret = -EINVAL;
3153                     break;
3154                 }
3155             }
3156 
3157 
3158             /*
3159              * If it's the last part of a host page then we place the host
3160              * page
3161              */
3162             place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
3163                                      (block->page_size - 1)) == 0;
3164             place_source = postcopy_host_page;
3165         }
3166         last_host = host;
3167 
3168         switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
3169         case RAM_SAVE_FLAG_ZERO:
3170             ch = qemu_get_byte(f);
3171             memset(page_buffer, ch, TARGET_PAGE_SIZE);
3172             if (ch) {
3173                 all_zero = false;
3174             }
3175             break;
3176 
3177         case RAM_SAVE_FLAG_PAGE:
3178             all_zero = false;
3179             if (!place_needed || !matching_page_sizes) {
3180                 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
3181             } else {
3182                 /* Avoids the qemu_file copy during postcopy, which is
3183                  * going to do a copy later; can only do it when we
3184                  * do this read in one go (matching page sizes)
3185                  */
3186                 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
3187                                          TARGET_PAGE_SIZE);
3188             }
3189             break;
3190         case RAM_SAVE_FLAG_EOS:
3191             /* normal exit */
3192             break;
3193         default:
3194             error_report("Unknown combination of migration flags: %#x"
3195                          " (postcopy mode)", flags);
3196             ret = -EINVAL;
3197             break;
3198         }
3199 
3200         /* Detect for any possible file errors */
3201         if (!ret && qemu_file_get_error(f)) {
3202             ret = qemu_file_get_error(f);
3203         }
3204 
3205         if (!ret && place_needed) {
3206             /* This gets called at the last target page in the host page */
3207             void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
3208 
3209             if (all_zero) {
3210                 ret = postcopy_place_page_zero(mis, place_dest,
3211                                                block);
3212             } else {
3213                 ret = postcopy_place_page(mis, place_dest,
3214                                           place_source, block);
3215             }
3216         }
3217     }
3218 
3219     return ret;
3220 }
3221 
3222 static bool postcopy_is_advised(void)
3223 {
3224     PostcopyState ps = postcopy_state_get();
3225     return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
3226 }
3227 
3228 static bool postcopy_is_running(void)
3229 {
3230     PostcopyState ps = postcopy_state_get();
3231     return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
3232 }
3233 
3234 static int ram_load(QEMUFile *f, void *opaque, int version_id)
3235 {
3236     int flags = 0, ret = 0, invalid_flags = 0;
3237     static uint64_t seq_iter;
3238     int len = 0;
3239     /*
3240      * If system is running in postcopy mode, page inserts to host memory must
3241      * be atomic
3242      */
3243     bool postcopy_running = postcopy_is_running();
3244     /* ADVISE is earlier, it shows the source has the postcopy capability on */
3245     bool postcopy_advised = postcopy_is_advised();
3246 
3247     seq_iter++;
3248 
3249     if (version_id != 4) {
3250         ret = -EINVAL;
3251     }
3252 
3253     if (!migrate_use_compression()) {
3254         invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
3255     }
3256     /* This RCU critical section can be very long running.
3257      * When RCU reclaims in the code start to become numerous,
3258      * it will be necessary to reduce the granularity of this
3259      * critical section.
3260      */
3261     rcu_read_lock();
3262 
3263     if (postcopy_running) {
3264         ret = ram_load_postcopy(f);
3265     }
3266 
3267     while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
3268         ram_addr_t addr, total_ram_bytes;
3269         void *host = NULL;
3270         uint8_t ch;
3271 
3272         addr = qemu_get_be64(f);
3273         flags = addr & ~TARGET_PAGE_MASK;
3274         addr &= TARGET_PAGE_MASK;
3275 
3276         if (flags & invalid_flags) {
3277             if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
3278                 error_report("Received an unexpected compressed page");
3279             }
3280 
3281             ret = -EINVAL;
3282             break;
3283         }
3284 
3285         if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
3286                      RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
3287             RAMBlock *block = ram_block_from_stream(f, flags);
3288 
3289             host = host_from_ram_block_offset(block, addr);
3290             if (!host) {
3291                 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3292                 ret = -EINVAL;
3293                 break;
3294             }
3295             ramblock_recv_bitmap_set(block, host);
3296             trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
3297         }
3298 
3299         switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
3300         case RAM_SAVE_FLAG_MEM_SIZE:
3301             /* Synchronize RAM block list */
3302             total_ram_bytes = addr;
3303             while (!ret && total_ram_bytes) {
3304                 RAMBlock *block;
3305                 char id[256];
3306                 ram_addr_t length;
3307 
3308                 len = qemu_get_byte(f);
3309                 qemu_get_buffer(f, (uint8_t *)id, len);
3310                 id[len] = 0;
3311                 length = qemu_get_be64(f);
3312 
3313                 block = qemu_ram_block_by_name(id);
3314                 if (block) {
3315                     if (length != block->used_length) {
3316                         Error *local_err = NULL;
3317 
3318                         ret = qemu_ram_resize(block, length,
3319                                               &local_err);
3320                         if (local_err) {
3321                             error_report_err(local_err);
3322                         }
3323                     }
3324                     /* For postcopy we need to check hugepage sizes match */
3325                     if (postcopy_advised &&
3326                         block->page_size != qemu_host_page_size) {
3327                         uint64_t remote_page_size = qemu_get_be64(f);
3328                         if (remote_page_size != block->page_size) {
3329                             error_report("Mismatched RAM page size %s "
3330                                          "(local) %zd != %" PRId64,
3331                                          id, block->page_size,
3332                                          remote_page_size);
3333                             ret = -EINVAL;
3334                         }
3335                     }
3336                     ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
3337                                           block->idstr);
3338                 } else {
3339                     error_report("Unknown ramblock \"%s\", cannot "
3340                                  "accept migration", id);
3341                     ret = -EINVAL;
3342                 }
3343 
3344                 total_ram_bytes -= length;
3345             }
3346             break;
3347 
3348         case RAM_SAVE_FLAG_ZERO:
3349             ch = qemu_get_byte(f);
3350             ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
3351             break;
3352 
3353         case RAM_SAVE_FLAG_PAGE:
3354             qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
3355             break;
3356 
3357         case RAM_SAVE_FLAG_COMPRESS_PAGE:
3358             len = qemu_get_be32(f);
3359             if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
3360                 error_report("Invalid compressed data length: %d", len);
3361                 ret = -EINVAL;
3362                 break;
3363             }
3364             decompress_data_with_multi_threads(f, host, len);
3365             break;
3366 
3367         case RAM_SAVE_FLAG_XBZRLE:
3368             if (load_xbzrle(f, addr, host) < 0) {
3369                 error_report("Failed to decompress XBZRLE page at "
3370                              RAM_ADDR_FMT, addr);
3371                 ret = -EINVAL;
3372                 break;
3373             }
3374             break;
3375         case RAM_SAVE_FLAG_EOS:
3376             /* normal exit */
3377             break;
3378         default:
3379             if (flags & RAM_SAVE_FLAG_HOOK) {
3380                 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
3381             } else {
3382                 error_report("Unknown combination of migration flags: %#x",
3383                              flags);
3384                 ret = -EINVAL;
3385             }
3386         }
3387         if (!ret) {
3388             ret = qemu_file_get_error(f);
3389         }
3390     }
3391 
3392     ret |= wait_for_decompress_done();
3393     rcu_read_unlock();
3394     trace_ram_load_complete(ret, seq_iter);
3395     return ret;
3396 }
3397 
3398 static bool ram_has_postcopy(void *opaque)
3399 {
3400     return migrate_postcopy_ram();
3401 }
3402 
3403 /* Sync all the dirty bitmap with destination VM.  */
3404 static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)
3405 {
3406     RAMBlock *block;
3407     QEMUFile *file = s->to_dst_file;
3408     int ramblock_count = 0;
3409 
3410     trace_ram_dirty_bitmap_sync_start();
3411 
3412     RAMBLOCK_FOREACH(block) {
3413         qemu_savevm_send_recv_bitmap(file, block->idstr);
3414         trace_ram_dirty_bitmap_request(block->idstr);
3415         ramblock_count++;
3416     }
3417 
3418     trace_ram_dirty_bitmap_sync_wait();
3419 
3420     /* Wait until all the ramblocks' dirty bitmap synced */
3421     while (ramblock_count--) {
3422         qemu_sem_wait(&s->rp_state.rp_sem);
3423     }
3424 
3425     trace_ram_dirty_bitmap_sync_complete();
3426 
3427     return 0;
3428 }
3429 
3430 static void ram_dirty_bitmap_reload_notify(MigrationState *s)
3431 {
3432     qemu_sem_post(&s->rp_state.rp_sem);
3433 }
3434 
3435 /*
3436  * Read the received bitmap, revert it as the initial dirty bitmap.
3437  * This is only used when the postcopy migration is paused but wants
3438  * to resume from a middle point.
3439  */
3440 int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
3441 {
3442     int ret = -EINVAL;
3443     QEMUFile *file = s->rp_state.from_dst_file;
3444     unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS;
3445     uint64_t local_size = nbits / 8;
3446     uint64_t size, end_mark;
3447 
3448     trace_ram_dirty_bitmap_reload_begin(block->idstr);
3449 
3450     if (s->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
3451         error_report("%s: incorrect state %s", __func__,
3452                      MigrationStatus_str(s->state));
3453         return -EINVAL;
3454     }
3455 
3456     /*
3457      * Note: see comments in ramblock_recv_bitmap_send() on why we
3458      * need the endianess convertion, and the paddings.
3459      */
3460     local_size = ROUND_UP(local_size, 8);
3461 
3462     /* Add paddings */
3463     le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
3464 
3465     size = qemu_get_be64(file);
3466 
3467     /* The size of the bitmap should match with our ramblock */
3468     if (size != local_size) {
3469         error_report("%s: ramblock '%s' bitmap size mismatch "
3470                      "(0x%"PRIx64" != 0x%"PRIx64")", __func__,
3471                      block->idstr, size, local_size);
3472         ret = -EINVAL;
3473         goto out;
3474     }
3475 
3476     size = qemu_get_buffer(file, (uint8_t *)le_bitmap, local_size);
3477     end_mark = qemu_get_be64(file);
3478 
3479     ret = qemu_file_get_error(file);
3480     if (ret || size != local_size) {
3481         error_report("%s: read bitmap failed for ramblock '%s': %d"
3482                      " (size 0x%"PRIx64", got: 0x%"PRIx64")",
3483                      __func__, block->idstr, ret, local_size, size);
3484         ret = -EIO;
3485         goto out;
3486     }
3487 
3488     if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) {
3489         error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIu64,
3490                      __func__, block->idstr, end_mark);
3491         ret = -EINVAL;
3492         goto out;
3493     }
3494 
3495     /*
3496      * Endianess convertion. We are during postcopy (though paused).
3497      * The dirty bitmap won't change. We can directly modify it.
3498      */
3499     bitmap_from_le(block->bmap, le_bitmap, nbits);
3500 
3501     /*
3502      * What we received is "received bitmap". Revert it as the initial
3503      * dirty bitmap for this ramblock.
3504      */
3505     bitmap_complement(block->bmap, block->bmap, nbits);
3506 
3507     trace_ram_dirty_bitmap_reload_complete(block->idstr);
3508 
3509     /*
3510      * We succeeded to sync bitmap for current ramblock. If this is
3511      * the last one to sync, we need to notify the main send thread.
3512      */
3513     ram_dirty_bitmap_reload_notify(s);
3514 
3515     ret = 0;
3516 out:
3517     free(le_bitmap);
3518     return ret;
3519 }
3520 
3521 static int ram_resume_prepare(MigrationState *s, void *opaque)
3522 {
3523     RAMState *rs = *(RAMState **)opaque;
3524     int ret;
3525 
3526     ret = ram_dirty_bitmap_sync_all(s, rs);
3527     if (ret) {
3528         return ret;
3529     }
3530 
3531     ram_state_resume_prepare(rs, s->to_dst_file);
3532 
3533     return 0;
3534 }
3535 
3536 static SaveVMHandlers savevm_ram_handlers = {
3537     .save_setup = ram_save_setup,
3538     .save_live_iterate = ram_save_iterate,
3539     .save_live_complete_postcopy = ram_save_complete,
3540     .save_live_complete_precopy = ram_save_complete,
3541     .has_postcopy = ram_has_postcopy,
3542     .save_live_pending = ram_save_pending,
3543     .load_state = ram_load,
3544     .save_cleanup = ram_save_cleanup,
3545     .load_setup = ram_load_setup,
3546     .load_cleanup = ram_load_cleanup,
3547     .resume_prepare = ram_resume_prepare,
3548 };
3549 
3550 void ram_mig_init(void)
3551 {
3552     qemu_mutex_init(&XBZRLE.lock);
3553     register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
3554 }
3555