xref: /openbmc/qemu/migration/ram.c (revision cbad45511840077dafb6e1d1bc2e228baabecff5)
156e93d26SJuan Quintela /*
256e93d26SJuan Quintela  * QEMU System Emulator
356e93d26SJuan Quintela  *
456e93d26SJuan Quintela  * Copyright (c) 2003-2008 Fabrice Bellard
576cc7b58SJuan Quintela  * Copyright (c) 2011-2015 Red Hat Inc
676cc7b58SJuan Quintela  *
776cc7b58SJuan Quintela  * Authors:
876cc7b58SJuan Quintela  *  Juan Quintela <quintela@redhat.com>
956e93d26SJuan Quintela  *
1056e93d26SJuan Quintela  * Permission is hereby granted, free of charge, to any person obtaining a copy
1156e93d26SJuan Quintela  * of this software and associated documentation files (the "Software"), to deal
1256e93d26SJuan Quintela  * in the Software without restriction, including without limitation the rights
1356e93d26SJuan Quintela  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1456e93d26SJuan Quintela  * copies of the Software, and to permit persons to whom the Software is
1556e93d26SJuan Quintela  * furnished to do so, subject to the following conditions:
1656e93d26SJuan Quintela  *
1756e93d26SJuan Quintela  * The above copyright notice and this permission notice shall be included in
1856e93d26SJuan Quintela  * all copies or substantial portions of the Software.
1956e93d26SJuan Quintela  *
2056e93d26SJuan Quintela  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
2156e93d26SJuan Quintela  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
2256e93d26SJuan Quintela  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
2356e93d26SJuan Quintela  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2456e93d26SJuan Quintela  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2556e93d26SJuan Quintela  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
2656e93d26SJuan Quintela  * THE SOFTWARE.
2756e93d26SJuan Quintela  */
28e688df6bSMarkus Armbruster 
291393a485SPeter Maydell #include "qemu/osdep.h"
30f348b6d1SVeronia Bahaa #include "qemu/cutils.h"
3156e93d26SJuan Quintela #include "qemu/bitops.h"
3256e93d26SJuan Quintela #include "qemu/bitmap.h"
33b85ea5faSPeter Maydell #include "qemu/madvise.h"
347205c9ecSJuan Quintela #include "qemu/main-loop.h"
35709e3fe8SJuan Quintela #include "xbzrle.h"
367b1e1a22SJuan Quintela #include "ram.h"
376666c96aSJuan Quintela #include "migration.h"
38947701ccSJuan Quintela #include "migration-stats.h"
39f2a8f0a6SJuan Quintela #include "migration/register.h"
407b1e1a22SJuan Quintela #include "migration/misc.h"
4108a0aee1SJuan Quintela #include "qemu-file.h"
42be07b0acSJuan Quintela #include "postcopy-ram.h"
4353d37d36SMichael S. Tsirkin #include "page_cache.h"
4456e93d26SJuan Quintela #include "qemu/error-report.h"
45e688df6bSMarkus Armbruster #include "qapi/error.h"
46ab7cbb0bSJuan Quintela #include "qapi/qapi-types-migration.h"
479af23989SMarkus Armbruster #include "qapi/qapi-events-migration.h"
48acac51baSHyman Huang(黄勇) #include "qapi/qapi-commands-migration.h"
498acabf69SJuan Quintela #include "qapi/qmp/qerror.h"
5056e93d26SJuan Quintela #include "trace.h"
5156e93d26SJuan Quintela #include "exec/ram_addr.h"
52f9494614SAlexey Perevalov #include "exec/target_page.h"
5356e93d26SJuan Quintela #include "qemu/rcu_queue.h"
54a91246c9Szhanghailiang #include "migration/colo.h"
55b0c3cf94SClaudio Fontana #include "sysemu/cpu-throttle.h"
56edd090c7SPeter Xu #include "savevm.h"
57b9ee2f7dSJuan Quintela #include "qemu/iov.h"
58d32ca5adSJuan Quintela #include "multifd.h"
59278e2f55SAndrey Gruzdev #include "sysemu/runstate.h"
6048408174SJuan Quintela #include "rdma.h"
611f0776f1SJuan Quintela #include "options.h"
62acac51baSHyman Huang(黄勇) #include "sysemu/dirtylimit.h"
63acac51baSHyman Huang(黄勇) #include "sysemu/kvm.h"
64278e2f55SAndrey Gruzdev 
65e5fdf920SLukas Straub #include "hw/boards.h" /* for machine_dump_guest_core() */
66e5fdf920SLukas Straub 
67278e2f55SAndrey Gruzdev #if defined(__linux__)
68278e2f55SAndrey Gruzdev #include "qemu/userfaultfd.h"
69278e2f55SAndrey Gruzdev #endif /* defined(__linux__) */
7056e93d26SJuan Quintela 
7156e93d26SJuan Quintela /***********************************************************/
7256e93d26SJuan Quintela /* ram save/restore */
7356e93d26SJuan Quintela 
747b548761SJuan Quintela /*
757b548761SJuan Quintela  * RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
767b548761SJuan Quintela  * worked for pages that were filled with the same char.  We switched
77bb890ed5SJuan Quintela  * it to only search for the zero value.  And to avoid confusion with
787b548761SJuan Quintela  * RAM_SAVE_FLAG_COMPRESS_PAGE just rename it.
790222111aSFabiano Rosas  *
800222111aSFabiano Rosas  * RAM_SAVE_FLAG_FULL was obsoleted in 2009.
810222111aSFabiano Rosas  *
820222111aSFabiano Rosas  * RAM_SAVE_FLAG_COMPRESS_PAGE (0x100) was removed in QEMU 9.1.
837b548761SJuan Quintela  */
847b548761SJuan Quintela #define RAM_SAVE_FLAG_FULL     0x01
85bb890ed5SJuan Quintela #define RAM_SAVE_FLAG_ZERO     0x02
8656e93d26SJuan Quintela #define RAM_SAVE_FLAG_MEM_SIZE 0x04
8756e93d26SJuan Quintela #define RAM_SAVE_FLAG_PAGE     0x08
8856e93d26SJuan Quintela #define RAM_SAVE_FLAG_EOS      0x10
8956e93d26SJuan Quintela #define RAM_SAVE_FLAG_CONTINUE 0x20
9056e93d26SJuan Quintela #define RAM_SAVE_FLAG_XBZRLE   0x40
9110cb3336SJuan Quintela /* 0x80 is reserved in rdma.h for RAM_SAVE_FLAG_HOOK */
92294e5a40SJuan Quintela #define RAM_SAVE_FLAG_MULTIFD_FLUSH    0x200
937b548761SJuan Quintela /* We can't use any flag that is bigger than 0x200 */
9456e93d26SJuan Quintela 
95c2d5c4a7SFabiano Rosas /*
96c2d5c4a7SFabiano Rosas  * mapped-ram migration supports O_DIRECT, so we need to make sure the
97c2d5c4a7SFabiano Rosas  * userspace buffer, the IO operation size and the file offset are
98c2d5c4a7SFabiano Rosas  * aligned according to the underlying device's block size. The first
99c2d5c4a7SFabiano Rosas  * two are already aligned to page size, but we need to add padding to
100c2d5c4a7SFabiano Rosas  * the file to align the offset.  We cannot read the block size
101c2d5c4a7SFabiano Rosas  * dynamically because the migration file can be moved between
102c2d5c4a7SFabiano Rosas  * different systems, so use 1M to cover most block sizes and to keep
103c2d5c4a7SFabiano Rosas  * the file offset aligned at page size as well.
104c2d5c4a7SFabiano Rosas  */
105c2d5c4a7SFabiano Rosas #define MAPPED_RAM_FILE_OFFSET_ALIGNMENT 0x100000
106c2d5c4a7SFabiano Rosas 
1072f6b8826SFabiano Rosas /*
1082f6b8826SFabiano Rosas  * When doing mapped-ram migration, this is the amount we read from
1092f6b8826SFabiano Rosas  * the pages region in the migration file at a time.
1102f6b8826SFabiano Rosas  */
1112f6b8826SFabiano Rosas #define MAPPED_RAM_LOAD_BUF_SIZE 0x100000
1122f6b8826SFabiano Rosas 
1139360447dSJuan Quintela XBZRLECacheStats xbzrle_counters;
1149360447dSJuan Quintela 
115f1668764SPeter Xu /* used by the search for pages to send */
116f1668764SPeter Xu struct PageSearchStatus {
117f1668764SPeter Xu     /* The migration channel used for a specific host page */
118f1668764SPeter Xu     QEMUFile    *pss_channel;
119ec6f3ab9SPeter Xu     /* Last block from where we have sent data */
120ec6f3ab9SPeter Xu     RAMBlock *last_sent_block;
121f1668764SPeter Xu     /* Current block being searched */
122f1668764SPeter Xu     RAMBlock    *block;
123f1668764SPeter Xu     /* Current page to search from */
124f1668764SPeter Xu     unsigned long page;
125f1668764SPeter Xu     /* Set once we wrap around */
126f1668764SPeter Xu     bool         complete_round;
127f1668764SPeter Xu     /* Whether we're sending a host page */
128f1668764SPeter Xu     bool          host_page_sending;
129f1668764SPeter Xu     /* The start/end of current host page.  Invalid if host_page_sending==false */
130f1668764SPeter Xu     unsigned long host_page_start;
131f1668764SPeter Xu     unsigned long host_page_end;
132f1668764SPeter Xu };
133f1668764SPeter Xu typedef struct PageSearchStatus PageSearchStatus;
134f1668764SPeter Xu 
13556e93d26SJuan Quintela /* struct contains XBZRLE cache and a static page
13656e93d26SJuan Quintela    used by the compression */
13756e93d26SJuan Quintela static struct {
13856e93d26SJuan Quintela     /* buffer used for XBZRLE encoding */
13956e93d26SJuan Quintela     uint8_t *encoded_buf;
14056e93d26SJuan Quintela     /* buffer for storing page content */
14156e93d26SJuan Quintela     uint8_t *current_buf;
14256e93d26SJuan Quintela     /* Cache for XBZRLE, Protected by lock. */
14356e93d26SJuan Quintela     PageCache *cache;
14456e93d26SJuan Quintela     QemuMutex lock;
145c00e0928SJuan Quintela     /* it will store a page full of zeros */
146c00e0928SJuan Quintela     uint8_t *zero_target_page;
14756e93d26SJuan Quintela     /* buffer used for XBZRLE decoding */
148f265e0e4SJuan Quintela     uint8_t *decoded_buf;
149f265e0e4SJuan Quintela } XBZRLE;
15056e93d26SJuan Quintela 
XBZRLE_cache_lock(void)15156e93d26SJuan Quintela static void XBZRLE_cache_lock(void)
15256e93d26SJuan Quintela {
15387dca0c9SJuan Quintela     if (migrate_xbzrle()) {
15456e93d26SJuan Quintela         qemu_mutex_lock(&XBZRLE.lock);
15556e93d26SJuan Quintela     }
156f4c51a6bSBihong Yu }
15756e93d26SJuan Quintela 
XBZRLE_cache_unlock(void)15856e93d26SJuan Quintela static void XBZRLE_cache_unlock(void)
15956e93d26SJuan Quintela {
16087dca0c9SJuan Quintela     if (migrate_xbzrle()) {
16156e93d26SJuan Quintela         qemu_mutex_unlock(&XBZRLE.lock);
16256e93d26SJuan Quintela     }
163f4c51a6bSBihong Yu }
16456e93d26SJuan Quintela 
1653d0684b2SJuan Quintela /**
1663d0684b2SJuan Quintela  * xbzrle_cache_resize: resize the xbzrle cache
1673d0684b2SJuan Quintela  *
168cbde7be9SDaniel P. Berrangé  * This function is called from migrate_params_apply in main
1693d0684b2SJuan Quintela  * thread, possibly while a migration is in progress.  A running
1703d0684b2SJuan Quintela  * migration may be using the cache and might finish during this call,
1713d0684b2SJuan Quintela  * hence changes to the cache are protected by XBZRLE.lock().
1723d0684b2SJuan Quintela  *
173c9dede2dSJuan Quintela  * Returns 0 for success or -1 for error
1743d0684b2SJuan Quintela  *
1753d0684b2SJuan Quintela  * @new_size: new cache size
1768acabf69SJuan Quintela  * @errp: set *errp if the check failed, with reason
17756e93d26SJuan Quintela  */
xbzrle_cache_resize(uint64_t new_size,Error ** errp)1788b9407a0SMarkus Armbruster int xbzrle_cache_resize(uint64_t new_size, Error **errp)
17956e93d26SJuan Quintela {
18056e93d26SJuan Quintela     PageCache *new_cache;
181c9dede2dSJuan Quintela     int64_t ret = 0;
18256e93d26SJuan Quintela 
1838acabf69SJuan Quintela     /* Check for truncation */
1848acabf69SJuan Quintela     if (new_size != (size_t)new_size) {
1858acabf69SJuan Quintela         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
1868acabf69SJuan Quintela                    "exceeding address space");
1878acabf69SJuan Quintela         return -1;
1888acabf69SJuan Quintela     }
1898acabf69SJuan Quintela 
1902a313e5cSJuan Quintela     if (new_size == migrate_xbzrle_cache_size()) {
1912a313e5cSJuan Quintela         /* nothing to do */
192c9dede2dSJuan Quintela         return 0;
1932a313e5cSJuan Quintela     }
1942a313e5cSJuan Quintela 
19556e93d26SJuan Quintela     XBZRLE_cache_lock();
19656e93d26SJuan Quintela 
19756e93d26SJuan Quintela     if (XBZRLE.cache != NULL) {
19880f8dfdeSJuan Quintela         new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
19956e93d26SJuan Quintela         if (!new_cache) {
20056e93d26SJuan Quintela             ret = -1;
20156e93d26SJuan Quintela             goto out;
20256e93d26SJuan Quintela         }
20356e93d26SJuan Quintela 
20456e93d26SJuan Quintela         cache_fini(XBZRLE.cache);
20556e93d26SJuan Quintela         XBZRLE.cache = new_cache;
20656e93d26SJuan Quintela     }
20756e93d26SJuan Quintela out:
20856e93d26SJuan Quintela     XBZRLE_cache_unlock();
20956e93d26SJuan Quintela     return ret;
21056e93d26SJuan Quintela }
21156e93d26SJuan Quintela 
postcopy_preempt_active(void)21220123ee1SPeter Xu static bool postcopy_preempt_active(void)
21320123ee1SPeter Xu {
21420123ee1SPeter Xu     return migrate_postcopy_preempt() && migration_in_postcopy();
21520123ee1SPeter Xu }
21620123ee1SPeter Xu 
migrate_ram_is_ignored(RAMBlock * block)217f161c88aSDavid Hildenbrand bool migrate_ram_is_ignored(RAMBlock *block)
218fbd162e6SYury Kotov {
219fbd162e6SYury Kotov     return !qemu_ram_is_migratable(block) ||
220b0182e53SSteve Sistare            (migrate_ignore_shared() && qemu_ram_is_shared(block)
221b0182e53SSteve Sistare                                     && qemu_ram_is_named_file(block));
222fbd162e6SYury Kotov }
223fbd162e6SYury Kotov 
224343f632cSDr. David Alan Gilbert #undef RAMBLOCK_FOREACH
225343f632cSDr. David Alan Gilbert 
foreach_not_ignored_block(RAMBlockIterFunc func,void * opaque)226fbd162e6SYury Kotov int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque)
227fbd162e6SYury Kotov {
228fbd162e6SYury Kotov     RAMBlock *block;
229fbd162e6SYury Kotov     int ret = 0;
230fbd162e6SYury Kotov 
23189ac5a1dSDr. David Alan Gilbert     RCU_READ_LOCK_GUARD();
23289ac5a1dSDr. David Alan Gilbert 
233fbd162e6SYury Kotov     RAMBLOCK_FOREACH_NOT_IGNORED(block) {
234fbd162e6SYury Kotov         ret = func(block, opaque);
235fbd162e6SYury Kotov         if (ret) {
236fbd162e6SYury Kotov             break;
237fbd162e6SYury Kotov         }
238fbd162e6SYury Kotov     }
239fbd162e6SYury Kotov     return ret;
240fbd162e6SYury Kotov }
241fbd162e6SYury Kotov 
ramblock_recv_map_init(void)242f9494614SAlexey Perevalov static void ramblock_recv_map_init(void)
243f9494614SAlexey Perevalov {
244f9494614SAlexey Perevalov     RAMBlock *rb;
245f9494614SAlexey Perevalov 
246fbd162e6SYury Kotov     RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
247f9494614SAlexey Perevalov         assert(!rb->receivedmap);
248f9494614SAlexey Perevalov         rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
249f9494614SAlexey Perevalov     }
250f9494614SAlexey Perevalov }
251f9494614SAlexey Perevalov 
ramblock_recv_bitmap_test(RAMBlock * rb,void * host_addr)252f9494614SAlexey Perevalov int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
253f9494614SAlexey Perevalov {
254f9494614SAlexey Perevalov     return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
255f9494614SAlexey Perevalov                     rb->receivedmap);
256f9494614SAlexey Perevalov }
257f9494614SAlexey Perevalov 
ramblock_recv_bitmap_test_byte_offset(RAMBlock * rb,uint64_t byte_offset)2581cba9f6eSDr. David Alan Gilbert bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
2591cba9f6eSDr. David Alan Gilbert {
2601cba9f6eSDr. David Alan Gilbert     return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
2611cba9f6eSDr. David Alan Gilbert }
2621cba9f6eSDr. David Alan Gilbert 
ramblock_recv_bitmap_set(RAMBlock * rb,void * host_addr)263f9494614SAlexey Perevalov void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
264f9494614SAlexey Perevalov {
265f9494614SAlexey Perevalov     set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
266f9494614SAlexey Perevalov }
267f9494614SAlexey Perevalov 
ramblock_recv_bitmap_set_range(RAMBlock * rb,void * host_addr,size_t nr)268f9494614SAlexey Perevalov void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
269f9494614SAlexey Perevalov                                     size_t nr)
270f9494614SAlexey Perevalov {
271f9494614SAlexey Perevalov     bitmap_set_atomic(rb->receivedmap,
272f9494614SAlexey Perevalov                       ramblock_recv_bitmap_offset(host_addr, rb),
273f9494614SAlexey Perevalov                       nr);
274f9494614SAlexey Perevalov }
275f9494614SAlexey Perevalov 
ramblock_recv_bitmap_set_offset(RAMBlock * rb,uint64_t byte_offset)2765ef7e26bSYuan Liu void ramblock_recv_bitmap_set_offset(RAMBlock *rb, uint64_t byte_offset)
2775ef7e26bSYuan Liu {
2785ef7e26bSYuan Liu     set_bit_atomic(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
2795ef7e26bSYuan Liu }
280a335debbSPeter Xu #define  RAMBLOCK_RECV_BITMAP_ENDING  (0x0123456789abcdefULL)
281a335debbSPeter Xu 
282a335debbSPeter Xu /*
283a335debbSPeter Xu  * Format: bitmap_size (8 bytes) + whole_bitmap (N bytes).
284a335debbSPeter Xu  *
285a335debbSPeter Xu  * Returns >0 if success with sent bytes, or <0 if error.
286a335debbSPeter Xu  */
ramblock_recv_bitmap_send(QEMUFile * file,const char * block_name)287a335debbSPeter Xu int64_t ramblock_recv_bitmap_send(QEMUFile *file,
288a335debbSPeter Xu                                   const char *block_name)
289a335debbSPeter Xu {
290a335debbSPeter Xu     RAMBlock *block = qemu_ram_block_by_name(block_name);
291a335debbSPeter Xu     unsigned long *le_bitmap, nbits;
292a335debbSPeter Xu     uint64_t size;
293a335debbSPeter Xu 
294a335debbSPeter Xu     if (!block) {
295a335debbSPeter Xu         error_report("%s: invalid block name: %s", __func__, block_name);
296a335debbSPeter Xu         return -1;
297a335debbSPeter Xu     }
298a335debbSPeter Xu 
299898ba906SDavid Hildenbrand     nbits = block->postcopy_length >> TARGET_PAGE_BITS;
300a335debbSPeter Xu 
301a335debbSPeter Xu     /*
302a335debbSPeter Xu      * Make sure the tmp bitmap buffer is big enough, e.g., on 32bit
303a335debbSPeter Xu      * machines we may need 4 more bytes for padding (see below
304a335debbSPeter Xu      * comment). So extend it a bit before hand.
305a335debbSPeter Xu      */
306a335debbSPeter Xu     le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
307a335debbSPeter Xu 
308a335debbSPeter Xu     /*
309a335debbSPeter Xu      * Always use little endian when sending the bitmap. This is
310a335debbSPeter Xu      * required that when source and destination VMs are not using the
3113a4452d8Szhaolichang      * same endianness. (Note: big endian won't work.)
312a335debbSPeter Xu      */
313a335debbSPeter Xu     bitmap_to_le(le_bitmap, block->receivedmap, nbits);
314a335debbSPeter Xu 
315a335debbSPeter Xu     /* Size of the bitmap, in bytes */
316a725ef9fSPeter Xu     size = DIV_ROUND_UP(nbits, 8);
317a335debbSPeter Xu 
318a335debbSPeter Xu     /*
319a335debbSPeter Xu      * size is always aligned to 8 bytes for 64bit machines, but it
320a335debbSPeter Xu      * may not be true for 32bit machines. We need this padding to
321a335debbSPeter Xu      * make sure the migration can survive even between 32bit and
322a335debbSPeter Xu      * 64bit machines.
323a335debbSPeter Xu      */
324a335debbSPeter Xu     size = ROUND_UP(size, 8);
325a335debbSPeter Xu 
326a335debbSPeter Xu     qemu_put_be64(file, size);
327a335debbSPeter Xu     qemu_put_buffer(file, (const uint8_t *)le_bitmap, size);
328be07a0edSJuan Quintela     g_free(le_bitmap);
329a335debbSPeter Xu     /*
330a335debbSPeter Xu      * Mark as an end, in case the middle part is screwed up due to
3313a4452d8Szhaolichang      * some "mysterious" reason.
332a335debbSPeter Xu      */
333a335debbSPeter Xu     qemu_put_be64(file, RAMBLOCK_RECV_BITMAP_ENDING);
334be07a0edSJuan Quintela     int ret = qemu_fflush(file);
335be07a0edSJuan Quintela     if (ret) {
336be07a0edSJuan Quintela         return ret;
337a335debbSPeter Xu     }
338a335debbSPeter Xu 
339a335debbSPeter Xu     return size + sizeof(size);
340a335debbSPeter Xu }
341a335debbSPeter Xu 
342ec481c6cSJuan Quintela /*
343ec481c6cSJuan Quintela  * An outstanding page request, on the source, having been received
344ec481c6cSJuan Quintela  * and queued
345ec481c6cSJuan Quintela  */
346ec481c6cSJuan Quintela struct RAMSrcPageRequest {
347ec481c6cSJuan Quintela     RAMBlock *rb;
348ec481c6cSJuan Quintela     hwaddr    offset;
349ec481c6cSJuan Quintela     hwaddr    len;
350ec481c6cSJuan Quintela 
351ec481c6cSJuan Quintela     QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
352ec481c6cSJuan Quintela };
353ec481c6cSJuan Quintela 
3546f37bb8bSJuan Quintela /* State of RAM for migration */
3556f37bb8bSJuan Quintela struct RAMState {
356f1668764SPeter Xu     /*
357f1668764SPeter Xu      * PageSearchStatus structures for the channels when send pages.
358f1668764SPeter Xu      * Protected by the bitmap_mutex.
359f1668764SPeter Xu      */
360f1668764SPeter Xu     PageSearchStatus pss[RAM_CHANNEL_MAX];
361278e2f55SAndrey Gruzdev     /* UFFD file descriptor, used in 'write-tracking' migration */
362278e2f55SAndrey Gruzdev     int uffdio_fd;
3638d80e195SJuan Quintela     /* total ram size in bytes */
3648d80e195SJuan Quintela     uint64_t ram_bytes_total;
3656f37bb8bSJuan Quintela     /* Last block that we have visited searching for dirty pages */
3666f37bb8bSJuan Quintela     RAMBlock *last_seen_block;
367269ace29SJuan Quintela     /* Last dirty target page we have sent */
368269ace29SJuan Quintela     ram_addr_t last_page;
3696f37bb8bSJuan Quintela     /* last ram version we have seen */
3706f37bb8bSJuan Quintela     uint32_t last_version;
3718d820d6fSJuan Quintela     /* How many times we have dirty too many pages */
3728d820d6fSJuan Quintela     int dirty_rate_high_cnt;
373f664da80SJuan Quintela     /* these variables are used for bitmap sync */
374f664da80SJuan Quintela     /* last time we did a full bitmap_sync */
375f664da80SJuan Quintela     int64_t time_last_bitmap_sync;
376eac74159SJuan Quintela     /* bytes transferred at start_time */
377c4bdf0cfSJuan Quintela     uint64_t bytes_xfer_prev;
378a66cd90cSJuan Quintela     /* number of dirty pages since start_time */
37968908ed6SJuan Quintela     uint64_t num_dirty_pages_period;
380b5833fdeSJuan Quintela     /* xbzrle misses since the beginning of the period */
381b5833fdeSJuan Quintela     uint64_t xbzrle_cache_miss_prev;
382e460a4b1SWei Wang     /* Amount of xbzrle pages since the beginning of the period */
383e460a4b1SWei Wang     uint64_t xbzrle_pages_prev;
384e460a4b1SWei Wang     /* Amount of xbzrle encoded bytes since the beginning of the period */
385e460a4b1SWei Wang     uint64_t xbzrle_bytes_prev;
386f3095cc8SJuan Quintela     /* Are we really using XBZRLE (e.g., after the first round). */
387f3095cc8SJuan Quintela     bool xbzrle_started;
38805931ec5SJuan Quintela     /* Are we on the last stage of migration */
38905931ec5SJuan Quintela     bool last_stage;
39076e03000SXiao Guangrong 
391be8b02edSXiao Guangrong     /* total handled target pages at the beginning of period */
392be8b02edSXiao Guangrong     uint64_t target_page_count_prev;
393be8b02edSXiao Guangrong     /* total handled target pages since start */
394be8b02edSXiao Guangrong     uint64_t target_page_count;
3959360447dSJuan Quintela     /* number of dirty bits in the bitmap */
3962dfaf12eSPeter Xu     uint64_t migration_dirty_pages;
397f1668764SPeter Xu     /*
398f1668764SPeter Xu      * Protects:
399f1668764SPeter Xu      * - dirty/clear bitmap
400f1668764SPeter Xu      * - migration_dirty_pages
401f1668764SPeter Xu      * - pss structures
402f1668764SPeter Xu      */
403108cfae0SJuan Quintela     QemuMutex bitmap_mutex;
40468a098f3SJuan Quintela     /* The RAMBlock used in the last src_page_requests */
40568a098f3SJuan Quintela     RAMBlock *last_req_rb;
406ec481c6cSJuan Quintela     /* Queue of outstanding page requests from the destination */
407ec481c6cSJuan Quintela     QemuMutex src_page_req_mutex;
408b58deb34SPaolo Bonzini     QSIMPLEQ_HEAD(, RAMSrcPageRequest) src_page_requests;
4091015ff54SPeter Xu 
4101015ff54SPeter Xu     /*
4111015ff54SPeter Xu      * This is only used when postcopy is in recovery phase, to communicate
4121015ff54SPeter Xu      * between the migration thread and the return path thread on dirty
4131015ff54SPeter Xu      * bitmap synchronizations.  This field is unused in other stages of
4141015ff54SPeter Xu      * RAM migration.
4151015ff54SPeter Xu      */
4161015ff54SPeter Xu     unsigned int postcopy_bmap_sync_requested;
4176f37bb8bSJuan Quintela };
4186f37bb8bSJuan Quintela typedef struct RAMState RAMState;
4196f37bb8bSJuan Quintela 
42053518d94SJuan Quintela static RAMState *ram_state;
4216f37bb8bSJuan Quintela 
422bd227060SWei Wang static NotifierWithReturnList precopy_notifier_list;
423bd227060SWei Wang 
424a1fe28dfSPeter Xu /* Whether postcopy has queued requests? */
postcopy_has_request(RAMState * rs)425a1fe28dfSPeter Xu static bool postcopy_has_request(RAMState *rs)
426a1fe28dfSPeter Xu {
427a1fe28dfSPeter Xu     return !QSIMPLEQ_EMPTY_ATOMIC(&rs->src_page_requests);
428a1fe28dfSPeter Xu }
429a1fe28dfSPeter Xu 
precopy_infrastructure_init(void)430bd227060SWei Wang void precopy_infrastructure_init(void)
431bd227060SWei Wang {
432bd227060SWei Wang     notifier_with_return_list_init(&precopy_notifier_list);
433bd227060SWei Wang }
434bd227060SWei Wang 
precopy_add_notifier(NotifierWithReturn * n)435bd227060SWei Wang void precopy_add_notifier(NotifierWithReturn *n)
436bd227060SWei Wang {
437bd227060SWei Wang     notifier_with_return_list_add(&precopy_notifier_list, n);
438bd227060SWei Wang }
439bd227060SWei Wang 
precopy_remove_notifier(NotifierWithReturn * n)440bd227060SWei Wang void precopy_remove_notifier(NotifierWithReturn *n)
441bd227060SWei Wang {
442bd227060SWei Wang     notifier_with_return_remove(n);
443bd227060SWei Wang }
444bd227060SWei Wang 
precopy_notify(PrecopyNotifyReason reason,Error ** errp)445bd227060SWei Wang int precopy_notify(PrecopyNotifyReason reason, Error **errp)
446bd227060SWei Wang {
447bd227060SWei Wang     PrecopyNotifyData pnd;
448bd227060SWei Wang     pnd.reason = reason;
449bd227060SWei Wang 
450be19d836SSteve Sistare     return notifier_with_return_list_notify(&precopy_notifier_list, &pnd, errp);
451bd227060SWei Wang }
452bd227060SWei Wang 
ram_bytes_remaining(void)4539edabd4dSJuan Quintela uint64_t ram_bytes_remaining(void)
4549edabd4dSJuan Quintela {
455bae416e5SDr. David Alan Gilbert     return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
456bae416e5SDr. David Alan Gilbert                        0;
4579edabd4dSJuan Quintela }
4589edabd4dSJuan Quintela 
ram_transferred_add(uint64_t bytes)45926a26069SJuan Quintela void ram_transferred_add(uint64_t bytes)
4604c2d0f6dSDavid Edmondson {
461ae680668SDavid Edmondson     if (runstate_is_running()) {
462aff3f660SJuan Quintela         stat64_add(&mig_stats.precopy_bytes, bytes);
463ae680668SDavid Edmondson     } else if (migration_in_postcopy()) {
464aff3f660SJuan Quintela         stat64_add(&mig_stats.postcopy_bytes, bytes);
465ae680668SDavid Edmondson     } else {
466aff3f660SJuan Quintela         stat64_add(&mig_stats.downtime_bytes, bytes);
467ae680668SDavid Edmondson     }
4684c2d0f6dSDavid Edmondson }
4694c2d0f6dSDavid Edmondson 
4704010ba38SJuan Quintela struct MigrationOps {
4714010ba38SJuan Quintela     int (*ram_save_target_page)(RAMState *rs, PageSearchStatus *pss);
4724010ba38SJuan Quintela };
4734010ba38SJuan Quintela typedef struct MigrationOps MigrationOps;
4744010ba38SJuan Quintela 
4754010ba38SJuan Quintela MigrationOps *migration_ops;
4764010ba38SJuan Quintela 
47793589827SPeter Xu static int ram_save_host_page_urgent(PageSearchStatus *pss);
47893589827SPeter Xu 
479ebd88a49SPeter Xu /* NOTE: page is the PFN not real ram_addr_t. */
pss_init(PageSearchStatus * pss,RAMBlock * rb,ram_addr_t page)480ebd88a49SPeter Xu static void pss_init(PageSearchStatus *pss, RAMBlock *rb, ram_addr_t page)
481ebd88a49SPeter Xu {
482ebd88a49SPeter Xu     pss->block = rb;
483ebd88a49SPeter Xu     pss->page = page;
484ebd88a49SPeter Xu     pss->complete_round = false;
485ebd88a49SPeter Xu }
486ebd88a49SPeter Xu 
48793589827SPeter Xu /*
48893589827SPeter Xu  * Check whether two PSSs are actively sending the same page.  Return true
48993589827SPeter Xu  * if it is, false otherwise.
49093589827SPeter Xu  */
pss_overlap(PageSearchStatus * pss1,PageSearchStatus * pss2)49193589827SPeter Xu static bool pss_overlap(PageSearchStatus *pss1, PageSearchStatus *pss2)
49293589827SPeter Xu {
49393589827SPeter Xu     return pss1->host_page_sending && pss2->host_page_sending &&
49493589827SPeter Xu         (pss1->host_page_start == pss2->host_page_start);
49593589827SPeter Xu }
49693589827SPeter Xu 
49756e93d26SJuan Quintela /**
4983d0684b2SJuan Quintela  * save_page_header: write page header to wire
49956e93d26SJuan Quintela  *
50056e93d26SJuan Quintela  * If this is the 1st block, it also writes the block identification
50156e93d26SJuan Quintela  *
5023d0684b2SJuan Quintela  * Returns the number of bytes written
50356e93d26SJuan Quintela  *
504ec6f3ab9SPeter Xu  * @pss: current PSS channel status
50556e93d26SJuan Quintela  * @block: block that contains the page we want to send
50656e93d26SJuan Quintela  * @offset: offset inside the block for the page
50756e93d26SJuan Quintela  *          in the lower bits, it contains flags
50856e93d26SJuan Quintela  */
save_page_header(PageSearchStatus * pss,QEMUFile * f,RAMBlock * block,ram_addr_t offset)50937502df3SLukas Straub static size_t save_page_header(PageSearchStatus *pss, QEMUFile *f,
51037502df3SLukas Straub                                RAMBlock *block, ram_addr_t offset)
51156e93d26SJuan Quintela {
5129f5f380bSLiang Li     size_t size, len;
513ec6f3ab9SPeter Xu     bool same_block = (block == pss->last_sent_block);
51456e93d26SJuan Quintela 
51510661f11SPeter Xu     if (same_block) {
51624795694SJuan Quintela         offset |= RAM_SAVE_FLAG_CONTINUE;
51724795694SJuan Quintela     }
5182bf3aa85SJuan Quintela     qemu_put_be64(f, offset);
51956e93d26SJuan Quintela     size = 8;
52056e93d26SJuan Quintela 
52110661f11SPeter Xu     if (!same_block) {
5229f5f380bSLiang Li         len = strlen(block->idstr);
5232bf3aa85SJuan Quintela         qemu_put_byte(f, len);
5242bf3aa85SJuan Quintela         qemu_put_buffer(f, (uint8_t *)block->idstr, len);
5259f5f380bSLiang Li         size += 1 + len;
526ec6f3ab9SPeter Xu         pss->last_sent_block = block;
52756e93d26SJuan Quintela     }
52856e93d26SJuan Quintela     return size;
52956e93d26SJuan Quintela }
53056e93d26SJuan Quintela 
5313d0684b2SJuan Quintela /**
532179a8080SOlaf Hering  * mig_throttle_guest_down: throttle down the guest
5333d0684b2SJuan Quintela  *
5343d0684b2SJuan Quintela  * Reduce amount of guest cpu execution to hopefully slow down memory
5353d0684b2SJuan Quintela  * writes. If guest dirty memory rate is reduced below the rate at
5363d0684b2SJuan Quintela  * which we can transfer pages to the destination then we should be
5373d0684b2SJuan Quintela  * able to complete migration. Some workloads dirty memory way too
5383d0684b2SJuan Quintela  * fast and will not effectively converge, even with auto-converge.
539070afca2SJason J. Herne  */
mig_throttle_guest_down(uint64_t bytes_dirty_period,uint64_t bytes_dirty_threshold)540cbbf8182SKeqian Zhu static void mig_throttle_guest_down(uint64_t bytes_dirty_period,
541cbbf8182SKeqian Zhu                                     uint64_t bytes_dirty_threshold)
542070afca2SJason J. Herne {
5432a8ec380SJuan Quintela     uint64_t pct_initial = migrate_cpu_throttle_initial();
5449605c2acSJuan Quintela     uint64_t pct_increment = migrate_cpu_throttle_increment();
545873f674cSJuan Quintela     bool pct_tailslow = migrate_cpu_throttle_tailslow();
54624155bd0SJuan Quintela     int pct_max = migrate_max_cpu_throttle();
547070afca2SJason J. Herne 
548cbbf8182SKeqian Zhu     uint64_t throttle_now = cpu_throttle_get_percentage();
549cbbf8182SKeqian Zhu     uint64_t cpu_now, cpu_ideal, throttle_inc;
550cbbf8182SKeqian Zhu 
551070afca2SJason J. Herne     /* We have not started throttling yet. Let's start it. */
552070afca2SJason J. Herne     if (!cpu_throttle_active()) {
553070afca2SJason J. Herne         cpu_throttle_set(pct_initial);
554070afca2SJason J. Herne     } else {
555070afca2SJason J. Herne         /* Throttling already on, just increase the rate */
556cbbf8182SKeqian Zhu         if (!pct_tailslow) {
557cbbf8182SKeqian Zhu             throttle_inc = pct_increment;
558cbbf8182SKeqian Zhu         } else {
559cbbf8182SKeqian Zhu             /* Compute the ideal CPU percentage used by Guest, which may
560cbbf8182SKeqian Zhu              * make the dirty rate match the dirty rate threshold. */
561cbbf8182SKeqian Zhu             cpu_now = 100 - throttle_now;
562cbbf8182SKeqian Zhu             cpu_ideal = cpu_now * (bytes_dirty_threshold * 1.0 /
563cbbf8182SKeqian Zhu                         bytes_dirty_period);
564cbbf8182SKeqian Zhu             throttle_inc = MIN(cpu_now - cpu_ideal, pct_increment);
565cbbf8182SKeqian Zhu         }
566cbbf8182SKeqian Zhu         cpu_throttle_set(MIN(throttle_now + throttle_inc, pct_max));
567070afca2SJason J. Herne     }
568070afca2SJason J. Herne }
569070afca2SJason J. Herne 
mig_throttle_counter_reset(void)57091fe9a8dSRao, Lei void mig_throttle_counter_reset(void)
57191fe9a8dSRao, Lei {
57291fe9a8dSRao, Lei     RAMState *rs = ram_state;
57391fe9a8dSRao, Lei 
57491fe9a8dSRao, Lei     rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
57591fe9a8dSRao, Lei     rs->num_dirty_pages_period = 0;
576897fd8bdSJuan Quintela     rs->bytes_xfer_prev = migration_transferred_bytes();
57791fe9a8dSRao, Lei }
57891fe9a8dSRao, Lei 
5793d0684b2SJuan Quintela /**
5803d0684b2SJuan Quintela  * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
5813d0684b2SJuan Quintela  *
5823d0684b2SJuan Quintela  * @current_addr: address for the zero page
5833d0684b2SJuan Quintela  *
5843d0684b2SJuan Quintela  * Update the xbzrle cache to reflect a page that's been sent as all 0.
58556e93d26SJuan Quintela  * The important thing is that a stale (not-yet-0'd) page be replaced
58656e93d26SJuan Quintela  * by the new data.
58756e93d26SJuan Quintela  * As a bonus, if the page wasn't in the cache it gets added so that
5883d0684b2SJuan Quintela  * when a small write is made into the 0'd page it gets XBZRLE sent.
58956e93d26SJuan Quintela  */
xbzrle_cache_zero_page(ram_addr_t current_addr)5908f47d4eeSFabiano Rosas static void xbzrle_cache_zero_page(ram_addr_t current_addr)
59156e93d26SJuan Quintela {
59256e93d26SJuan Quintela     /* We don't care if this fails to allocate a new cache page
59356e93d26SJuan Quintela      * as long as it updated an old one */
594c00e0928SJuan Quintela     cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
595aff3f660SJuan Quintela                  stat64_get(&mig_stats.dirty_sync_count));
59656e93d26SJuan Quintela }
59756e93d26SJuan Quintela 
59856e93d26SJuan Quintela #define ENCODING_FLAG_XBZRLE 0x1
59956e93d26SJuan Quintela 
60056e93d26SJuan Quintela /**
60156e93d26SJuan Quintela  * save_xbzrle_page: compress and send current page
60256e93d26SJuan Quintela  *
60356e93d26SJuan Quintela  * Returns: 1 means that we wrote the page
60456e93d26SJuan Quintela  *          0 means that page is identical to the one already sent
60556e93d26SJuan Quintela  *          -1 means that xbzrle would be longer than normal
60656e93d26SJuan Quintela  *
6075a987738SJuan Quintela  * @rs: current RAM state
608ec6f3ab9SPeter Xu  * @pss: current PSS channel
6093d0684b2SJuan Quintela  * @current_data: pointer to the address of the page contents
6103d0684b2SJuan Quintela  * @current_addr: addr of the page
61156e93d26SJuan Quintela  * @block: block that contains the page we want to send
61256e93d26SJuan Quintela  * @offset: offset inside the block for the page
61356e93d26SJuan Quintela  */
save_xbzrle_page(RAMState * rs,PageSearchStatus * pss,uint8_t ** current_data,ram_addr_t current_addr,RAMBlock * block,ram_addr_t offset)614ec6f3ab9SPeter Xu static int save_xbzrle_page(RAMState *rs, PageSearchStatus *pss,
61561717ea9SPeter Xu                             uint8_t **current_data, ram_addr_t current_addr,
61661717ea9SPeter Xu                             RAMBlock *block, ram_addr_t offset)
61756e93d26SJuan Quintela {
61856e93d26SJuan Quintela     int encoded_len = 0, bytes_xbzrle;
61956e93d26SJuan Quintela     uint8_t *prev_cached_page;
620ec6f3ab9SPeter Xu     QEMUFile *file = pss->pss_channel;
621aff3f660SJuan Quintela     uint64_t generation = stat64_get(&mig_stats.dirty_sync_count);
62256e93d26SJuan Quintela 
623536b5a4eSJuan Quintela     if (!cache_is_cached(XBZRLE.cache, current_addr, generation)) {
6249360447dSJuan Quintela         xbzrle_counters.cache_miss++;
62505931ec5SJuan Quintela         if (!rs->last_stage) {
62656e93d26SJuan Quintela             if (cache_insert(XBZRLE.cache, current_addr, *current_data,
627536b5a4eSJuan Quintela                              generation) == -1) {
62856e93d26SJuan Quintela                 return -1;
62956e93d26SJuan Quintela             } else {
63056e93d26SJuan Quintela                 /* update *current_data when the page has been
63156e93d26SJuan Quintela                    inserted into cache */
63256e93d26SJuan Quintela                 *current_data = get_cached_data(XBZRLE.cache, current_addr);
63356e93d26SJuan Quintela             }
63456e93d26SJuan Quintela         }
63556e93d26SJuan Quintela         return -1;
63656e93d26SJuan Quintela     }
63756e93d26SJuan Quintela 
638e460a4b1SWei Wang     /*
639e460a4b1SWei Wang      * Reaching here means the page has hit the xbzrle cache, no matter what
640e460a4b1SWei Wang      * encoding result it is (normal encoding, overflow or skipping the page),
6413a4452d8Szhaolichang      * count the page as encoded. This is used to calculate the encoding rate.
642e460a4b1SWei Wang      *
643e460a4b1SWei Wang      * Example: 2 pages (8KB) being encoded, first page encoding generates 2KB,
644e460a4b1SWei Wang      * 2nd page turns out to be skipped (i.e. no new bytes written to the
645e460a4b1SWei Wang      * page), the overall encoding rate will be 8KB / 2KB = 4, which has the
646e460a4b1SWei Wang      * skipped page included. In this way, the encoding rate can tell if the
647e460a4b1SWei Wang      * guest page is good for xbzrle encoding.
648e460a4b1SWei Wang      */
649e460a4b1SWei Wang     xbzrle_counters.pages++;
65056e93d26SJuan Quintela     prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
65156e93d26SJuan Quintela 
65256e93d26SJuan Quintela     /* save current buffer into memory */
65356e93d26SJuan Quintela     memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
65456e93d26SJuan Quintela 
65556e93d26SJuan Quintela     /* XBZRLE encoding (if there is no overflow) */
6567ba7db9fSRichard Henderson     encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
65756e93d26SJuan Quintela                                        TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
65856e93d26SJuan Quintela                                        TARGET_PAGE_SIZE);
659ca353803SWei Yang 
660ca353803SWei Yang     /*
661ca353803SWei Yang      * Update the cache contents, so that it corresponds to the data
662ca353803SWei Yang      * sent, in all cases except where we skip the page.
663ca353803SWei Yang      */
66405931ec5SJuan Quintela     if (!rs->last_stage && encoded_len != 0) {
665ca353803SWei Yang         memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
666ca353803SWei Yang         /*
667ca353803SWei Yang          * In the case where we couldn't compress, ensure that the caller
668ca353803SWei Yang          * sends the data from the cache, since the guest might have
669ca353803SWei Yang          * changed the RAM since we copied it.
670ca353803SWei Yang          */
671ca353803SWei Yang         *current_data = prev_cached_page;
672ca353803SWei Yang     }
673ca353803SWei Yang 
67456e93d26SJuan Quintela     if (encoded_len == 0) {
67555c4446bSJuan Quintela         trace_save_xbzrle_page_skipping();
67656e93d26SJuan Quintela         return 0;
67756e93d26SJuan Quintela     } else if (encoded_len == -1) {
67855c4446bSJuan Quintela         trace_save_xbzrle_page_overflow();
6799360447dSJuan Quintela         xbzrle_counters.overflow++;
680e460a4b1SWei Wang         xbzrle_counters.bytes += TARGET_PAGE_SIZE;
68156e93d26SJuan Quintela         return -1;
68256e93d26SJuan Quintela     }
68356e93d26SJuan Quintela 
68456e93d26SJuan Quintela     /* Send XBZRLE based compressed page */
68537502df3SLukas Straub     bytes_xbzrle = save_page_header(pss, pss->pss_channel, block,
686204b88b8SJuan Quintela                                     offset | RAM_SAVE_FLAG_XBZRLE);
68761717ea9SPeter Xu     qemu_put_byte(file, ENCODING_FLAG_XBZRLE);
68861717ea9SPeter Xu     qemu_put_be16(file, encoded_len);
68961717ea9SPeter Xu     qemu_put_buffer(file, XBZRLE.encoded_buf, encoded_len);
69056e93d26SJuan Quintela     bytes_xbzrle += encoded_len + 1 + 2;
691e460a4b1SWei Wang     /*
6920222111aSFabiano Rosas      * The xbzrle encoded bytes don't count the 8 byte header with
693e460a4b1SWei Wang      * RAM_SAVE_FLAG_CONTINUE.
694e460a4b1SWei Wang      */
695e460a4b1SWei Wang     xbzrle_counters.bytes += bytes_xbzrle - 8;
6964c2d0f6dSDavid Edmondson     ram_transferred_add(bytes_xbzrle);
69756e93d26SJuan Quintela 
69856e93d26SJuan Quintela     return 1;
69956e93d26SJuan Quintela }
70056e93d26SJuan Quintela 
7013d0684b2SJuan Quintela /**
702d9e474eaSPeter Xu  * pss_find_next_dirty: find the next dirty page of current ramblock
703f3f491fcSDr. David Alan Gilbert  *
704d9e474eaSPeter Xu  * This function updates pss->page to point to the next dirty page index
705d9e474eaSPeter Xu  * within the ramblock to migrate, or the end of ramblock when nothing
706d9e474eaSPeter Xu  * found.  Note that when pss->host_page_sending==true it means we're
707d9e474eaSPeter Xu  * during sending a host page, so we won't look for dirty page that is
708d9e474eaSPeter Xu  * outside the host page boundary.
7093d0684b2SJuan Quintela  *
710d9e474eaSPeter Xu  * @pss: the current page search status
711f3f491fcSDr. David Alan Gilbert  */
pss_find_next_dirty(PageSearchStatus * pss)712d9e474eaSPeter Xu static void pss_find_next_dirty(PageSearchStatus *pss)
71356e93d26SJuan Quintela {
714d9e474eaSPeter Xu     RAMBlock *rb = pss->block;
7156b6712efSJuan Quintela     unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
7166b6712efSJuan Quintela     unsigned long *bitmap = rb->bmap;
71756e93d26SJuan Quintela 
718f161c88aSDavid Hildenbrand     if (migrate_ram_is_ignored(rb)) {
719d9e474eaSPeter Xu         /* Points directly to the end, so we know no dirty page */
720d9e474eaSPeter Xu         pss->page = size;
721d9e474eaSPeter Xu         return;
722b895de50SCédric Le Goater     }
723b895de50SCédric Le Goater 
724d9e474eaSPeter Xu     /*
725d9e474eaSPeter Xu      * If during sending a host page, only look for dirty pages within the
726d9e474eaSPeter Xu      * current host page being send.
727d9e474eaSPeter Xu      */
728d9e474eaSPeter Xu     if (pss->host_page_sending) {
729d9e474eaSPeter Xu         assert(pss->host_page_end);
730d9e474eaSPeter Xu         size = MIN(size, pss->host_page_end);
731d9e474eaSPeter Xu     }
732d9e474eaSPeter Xu 
733d9e474eaSPeter Xu     pss->page = find_next_bit(bitmap, size, pss->page);
73456e93d26SJuan Quintela }
73556e93d26SJuan Quintela 
migration_clear_memory_region_dirty_bitmap(RAMBlock * rb,unsigned long page)7361230a25fSDavid Hildenbrand static void migration_clear_memory_region_dirty_bitmap(RAMBlock *rb,
7373143577dSWei Wang                                                        unsigned long page)
7383143577dSWei Wang {
7393143577dSWei Wang     uint8_t shift;
7403143577dSWei Wang     hwaddr size, start;
7413143577dSWei Wang 
7423143577dSWei Wang     if (!rb->clear_bmap || !clear_bmap_test_and_clear(rb, page)) {
7433143577dSWei Wang         return;
7443143577dSWei Wang     }
7453143577dSWei Wang 
7463143577dSWei Wang     shift = rb->clear_bmap_shift;
7473143577dSWei Wang     /*
7483143577dSWei Wang      * CLEAR_BITMAP_SHIFT_MIN should always guarantee this... this
7493143577dSWei Wang      * can make things easier sometimes since then start address
7503143577dSWei Wang      * of the small chunk will always be 64 pages aligned so the
7513143577dSWei Wang      * bitmap will always be aligned to unsigned long. We should
7523143577dSWei Wang      * even be able to remove this restriction but I'm simply
7533143577dSWei Wang      * keeping it.
7543143577dSWei Wang      */
7553143577dSWei Wang     assert(shift >= 6);
7563143577dSWei Wang 
7573143577dSWei Wang     size = 1ULL << (TARGET_PAGE_BITS + shift);
7587648297dSDavid Hildenbrand     start = QEMU_ALIGN_DOWN((ram_addr_t)page << TARGET_PAGE_BITS, size);
7593143577dSWei Wang     trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page);
7603143577dSWei Wang     memory_region_clear_dirty_bitmap(rb->mr, start, size);
7613143577dSWei Wang }
7623143577dSWei Wang 
7633143577dSWei Wang static void
migration_clear_memory_region_dirty_bitmap_range(RAMBlock * rb,unsigned long start,unsigned long npages)7641230a25fSDavid Hildenbrand migration_clear_memory_region_dirty_bitmap_range(RAMBlock *rb,
7653143577dSWei Wang                                                  unsigned long start,
7663143577dSWei Wang                                                  unsigned long npages)
7673143577dSWei Wang {
7683143577dSWei Wang     unsigned long i, chunk_pages = 1UL << rb->clear_bmap_shift;
7693143577dSWei Wang     unsigned long chunk_start = QEMU_ALIGN_DOWN(start, chunk_pages);
7703143577dSWei Wang     unsigned long chunk_end = QEMU_ALIGN_UP(start + npages, chunk_pages);
7713143577dSWei Wang 
7723143577dSWei Wang     /*
7733143577dSWei Wang      * Clear pages from start to start + npages - 1, so the end boundary is
7743143577dSWei Wang      * exclusive.
7753143577dSWei Wang      */
7763143577dSWei Wang     for (i = chunk_start; i < chunk_end; i += chunk_pages) {
7771230a25fSDavid Hildenbrand         migration_clear_memory_region_dirty_bitmap(rb, i);
7783143577dSWei Wang     }
7793143577dSWei Wang }
7803143577dSWei Wang 
781a6a83cefSRao, Lei /*
782a6a83cefSRao, Lei  * colo_bitmap_find_diry:find contiguous dirty pages from start
783a6a83cefSRao, Lei  *
784a6a83cefSRao, Lei  * Returns the page offset within memory region of the start of the contiguout
785a6a83cefSRao, Lei  * dirty page
786a6a83cefSRao, Lei  *
787a6a83cefSRao, Lei  * @rs: current RAM state
788a6a83cefSRao, Lei  * @rb: RAMBlock where to search for dirty pages
789a6a83cefSRao, Lei  * @start: page where we start the search
790a6a83cefSRao, Lei  * @num: the number of contiguous dirty pages
791a6a83cefSRao, Lei  */
792a6a83cefSRao, Lei static inline
colo_bitmap_find_dirty(RAMState * rs,RAMBlock * rb,unsigned long start,unsigned long * num)793a6a83cefSRao, Lei unsigned long colo_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
794a6a83cefSRao, Lei                                      unsigned long start, unsigned long *num)
795a6a83cefSRao, Lei {
796a6a83cefSRao, Lei     unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
797a6a83cefSRao, Lei     unsigned long *bitmap = rb->bmap;
798a6a83cefSRao, Lei     unsigned long first, next;
799a6a83cefSRao, Lei 
800a6a83cefSRao, Lei     *num = 0;
801a6a83cefSRao, Lei 
802f161c88aSDavid Hildenbrand     if (migrate_ram_is_ignored(rb)) {
803a6a83cefSRao, Lei         return size;
804a6a83cefSRao, Lei     }
805a6a83cefSRao, Lei 
806a6a83cefSRao, Lei     first = find_next_bit(bitmap, size, start);
807a6a83cefSRao, Lei     if (first >= size) {
808a6a83cefSRao, Lei         return first;
809a6a83cefSRao, Lei     }
810a6a83cefSRao, Lei     next = find_next_zero_bit(bitmap, size, first + 1);
811a6a83cefSRao, Lei     assert(next >= first);
812a6a83cefSRao, Lei     *num = next - first;
813a6a83cefSRao, Lei     return first;
814a6a83cefSRao, Lei }
815a6a83cefSRao, Lei 
migration_bitmap_clear_dirty(RAMState * rs,RAMBlock * rb,unsigned long page)81606b10688SJuan Quintela static inline bool migration_bitmap_clear_dirty(RAMState *rs,
817f20e2865SJuan Quintela                                                 RAMBlock *rb,
818f20e2865SJuan Quintela                                                 unsigned long page)
819a82d593bSDr. David Alan Gilbert {
820a82d593bSDr. David Alan Gilbert     bool ret;
821a82d593bSDr. David Alan Gilbert 
822002cad6bSPeter Xu     /*
823002cad6bSPeter Xu      * Clear dirty bitmap if needed.  This _must_ be called before we
824002cad6bSPeter Xu      * send any of the page in the chunk because we need to make sure
825002cad6bSPeter Xu      * we can capture further page content changes when we sync dirty
826002cad6bSPeter Xu      * log the next time.  So as long as we are going to send any of
827002cad6bSPeter Xu      * the page in the chunk we clear the remote dirty bitmap for all.
828002cad6bSPeter Xu      * Clearing it earlier won't be a problem, but too late will.
829002cad6bSPeter Xu      */
8301230a25fSDavid Hildenbrand     migration_clear_memory_region_dirty_bitmap(rb, page);
831002cad6bSPeter Xu 
8326b6712efSJuan Quintela     ret = test_and_clear_bit(page, rb->bmap);
833a82d593bSDr. David Alan Gilbert     if (ret) {
8340d8ec885SJuan Quintela         rs->migration_dirty_pages--;
835a82d593bSDr. David Alan Gilbert     }
836386a907bSWei Wang 
837a82d593bSDr. David Alan Gilbert     return ret;
838a82d593bSDr. David Alan Gilbert }
839a82d593bSDr. David Alan Gilbert 
dirty_bitmap_clear_section(MemoryRegionSection * section,void * opaque)840be39b4cdSDavid Hildenbrand static void dirty_bitmap_clear_section(MemoryRegionSection *section,
841be39b4cdSDavid Hildenbrand                                        void *opaque)
842be39b4cdSDavid Hildenbrand {
843be39b4cdSDavid Hildenbrand     const hwaddr offset = section->offset_within_region;
844be39b4cdSDavid Hildenbrand     const hwaddr size = int128_get64(section->size);
845be39b4cdSDavid Hildenbrand     const unsigned long start = offset >> TARGET_PAGE_BITS;
846be39b4cdSDavid Hildenbrand     const unsigned long npages = size >> TARGET_PAGE_BITS;
847be39b4cdSDavid Hildenbrand     RAMBlock *rb = section->mr->ram_block;
848be39b4cdSDavid Hildenbrand     uint64_t *cleared_bits = opaque;
849be39b4cdSDavid Hildenbrand 
850be39b4cdSDavid Hildenbrand     /*
851be39b4cdSDavid Hildenbrand      * We don't grab ram_state->bitmap_mutex because we expect to run
852be39b4cdSDavid Hildenbrand      * only when starting migration or during postcopy recovery where
853be39b4cdSDavid Hildenbrand      * we don't have concurrent access.
854be39b4cdSDavid Hildenbrand      */
855be39b4cdSDavid Hildenbrand     if (!migration_in_postcopy() && !migrate_background_snapshot()) {
856be39b4cdSDavid Hildenbrand         migration_clear_memory_region_dirty_bitmap_range(rb, start, npages);
857be39b4cdSDavid Hildenbrand     }
858be39b4cdSDavid Hildenbrand     *cleared_bits += bitmap_count_one_with_offset(rb->bmap, start, npages);
859be39b4cdSDavid Hildenbrand     bitmap_clear(rb->bmap, start, npages);
860be39b4cdSDavid Hildenbrand }
861be39b4cdSDavid Hildenbrand 
862be39b4cdSDavid Hildenbrand /*
863be39b4cdSDavid Hildenbrand  * Exclude all dirty pages from migration that fall into a discarded range as
864be39b4cdSDavid Hildenbrand  * managed by a RamDiscardManager responsible for the mapped memory region of
865be39b4cdSDavid Hildenbrand  * the RAMBlock. Clear the corresponding bits in the dirty bitmaps.
866be39b4cdSDavid Hildenbrand  *
867be39b4cdSDavid Hildenbrand  * Discarded pages ("logically unplugged") have undefined content and must
868be39b4cdSDavid Hildenbrand  * not get migrated, because even reading these pages for migration might
869be39b4cdSDavid Hildenbrand  * result in undesired behavior.
870be39b4cdSDavid Hildenbrand  *
871be39b4cdSDavid Hildenbrand  * Returns the number of cleared bits in the RAMBlock dirty bitmap.
872be39b4cdSDavid Hildenbrand  *
873be39b4cdSDavid Hildenbrand  * Note: The result is only stable while migrating (precopy/postcopy).
874be39b4cdSDavid Hildenbrand  */
ramblock_dirty_bitmap_clear_discarded_pages(RAMBlock * rb)875be39b4cdSDavid Hildenbrand static uint64_t ramblock_dirty_bitmap_clear_discarded_pages(RAMBlock *rb)
876be39b4cdSDavid Hildenbrand {
877be39b4cdSDavid Hildenbrand     uint64_t cleared_bits = 0;
878be39b4cdSDavid Hildenbrand 
879be39b4cdSDavid Hildenbrand     if (rb->mr && rb->bmap && memory_region_has_ram_discard_manager(rb->mr)) {
880be39b4cdSDavid Hildenbrand         RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
881be39b4cdSDavid Hildenbrand         MemoryRegionSection section = {
882be39b4cdSDavid Hildenbrand             .mr = rb->mr,
883be39b4cdSDavid Hildenbrand             .offset_within_region = 0,
884be39b4cdSDavid Hildenbrand             .size = int128_make64(qemu_ram_get_used_length(rb)),
885be39b4cdSDavid Hildenbrand         };
886be39b4cdSDavid Hildenbrand 
887be39b4cdSDavid Hildenbrand         ram_discard_manager_replay_discarded(rdm, &section,
888be39b4cdSDavid Hildenbrand                                              dirty_bitmap_clear_section,
889be39b4cdSDavid Hildenbrand                                              &cleared_bits);
890be39b4cdSDavid Hildenbrand     }
891be39b4cdSDavid Hildenbrand     return cleared_bits;
892be39b4cdSDavid Hildenbrand }
893be39b4cdSDavid Hildenbrand 
8949470c5e0SDavid Hildenbrand /*
8959470c5e0SDavid Hildenbrand  * Check if a host-page aligned page falls into a discarded range as managed by
8969470c5e0SDavid Hildenbrand  * a RamDiscardManager responsible for the mapped memory region of the RAMBlock.
8979470c5e0SDavid Hildenbrand  *
8989470c5e0SDavid Hildenbrand  * Note: The result is only stable while migrating (precopy/postcopy).
8999470c5e0SDavid Hildenbrand  */
ramblock_page_is_discarded(RAMBlock * rb,ram_addr_t start)9009470c5e0SDavid Hildenbrand bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start)
9019470c5e0SDavid Hildenbrand {
9029470c5e0SDavid Hildenbrand     if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) {
9039470c5e0SDavid Hildenbrand         RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
9049470c5e0SDavid Hildenbrand         MemoryRegionSection section = {
9059470c5e0SDavid Hildenbrand             .mr = rb->mr,
9069470c5e0SDavid Hildenbrand             .offset_within_region = start,
9079470c5e0SDavid Hildenbrand             .size = int128_make64(qemu_ram_pagesize(rb)),
9089470c5e0SDavid Hildenbrand         };
9099470c5e0SDavid Hildenbrand 
9109470c5e0SDavid Hildenbrand         return !ram_discard_manager_is_populated(rdm, &section);
9119470c5e0SDavid Hildenbrand     }
9129470c5e0SDavid Hildenbrand     return false;
9139470c5e0SDavid Hildenbrand }
9149470c5e0SDavid Hildenbrand 
915267691b6SPeter Xu /* Called with RCU critical section */
ramblock_sync_dirty_bitmap(RAMState * rs,RAMBlock * rb)9167a3e9571SWei Yang static void ramblock_sync_dirty_bitmap(RAMState *rs, RAMBlock *rb)
91756e93d26SJuan Quintela {
918fb613580SKeqian Zhu     uint64_t new_dirty_pages =
919fb613580SKeqian Zhu         cpu_physical_memory_sync_dirty_bitmap(rb, 0, rb->used_length);
920fb613580SKeqian Zhu 
921fb613580SKeqian Zhu     rs->migration_dirty_pages += new_dirty_pages;
922fb613580SKeqian Zhu     rs->num_dirty_pages_period += new_dirty_pages;
92356e93d26SJuan Quintela }
92456e93d26SJuan Quintela 
9253d0684b2SJuan Quintela /**
9263d0684b2SJuan Quintela  * ram_pagesize_summary: calculate all the pagesizes of a VM
9273d0684b2SJuan Quintela  *
9283d0684b2SJuan Quintela  * Returns a summary bitmap of the page sizes of all RAMBlocks
9293d0684b2SJuan Quintela  *
9303d0684b2SJuan Quintela  * For VMs with just normal pages this is equivalent to the host page
9313d0684b2SJuan Quintela  * size. If it's got some huge pages then it's the OR of all the
9323d0684b2SJuan Quintela  * different page sizes.
933e8ca1db2SDr. David Alan Gilbert  */
ram_pagesize_summary(void)934e8ca1db2SDr. David Alan Gilbert uint64_t ram_pagesize_summary(void)
935e8ca1db2SDr. David Alan Gilbert {
936e8ca1db2SDr. David Alan Gilbert     RAMBlock *block;
937e8ca1db2SDr. David Alan Gilbert     uint64_t summary = 0;
938e8ca1db2SDr. David Alan Gilbert 
939fbd162e6SYury Kotov     RAMBLOCK_FOREACH_NOT_IGNORED(block) {
940e8ca1db2SDr. David Alan Gilbert         summary |= block->page_size;
941e8ca1db2SDr. David Alan Gilbert     }
942e8ca1db2SDr. David Alan Gilbert 
943e8ca1db2SDr. David Alan Gilbert     return summary;
944e8ca1db2SDr. David Alan Gilbert }
945e8ca1db2SDr. David Alan Gilbert 
ram_get_total_transferred_pages(void)946aecbfe9cSXiao Guangrong uint64_t ram_get_total_transferred_pages(void)
947aecbfe9cSXiao Guangrong {
948aff3f660SJuan Quintela     return stat64_get(&mig_stats.normal_pages) +
949aff3f660SJuan Quintela         stat64_get(&mig_stats.zero_pages) +
9500222111aSFabiano Rosas         xbzrle_counters.pages;
951aecbfe9cSXiao Guangrong }
952aecbfe9cSXiao Guangrong 
migration_update_rates(RAMState * rs,int64_t end_time)953b734035bSXiao Guangrong static void migration_update_rates(RAMState *rs, int64_t end_time)
954b734035bSXiao Guangrong {
955be8b02edSXiao Guangrong     uint64_t page_count = rs->target_page_count - rs->target_page_count_prev;
956b734035bSXiao Guangrong 
957b734035bSXiao Guangrong     /* calculate period counters */
958aff3f660SJuan Quintela     stat64_set(&mig_stats.dirty_pages_rate,
95972f8e587SJuan Quintela                rs->num_dirty_pages_period * 1000 /
96072f8e587SJuan Quintela                (end_time - rs->time_last_bitmap_sync));
961b734035bSXiao Guangrong 
962be8b02edSXiao Guangrong     if (!page_count) {
963b734035bSXiao Guangrong         return;
964b734035bSXiao Guangrong     }
965b734035bSXiao Guangrong 
96687dca0c9SJuan Quintela     if (migrate_xbzrle()) {
967e460a4b1SWei Wang         double encoded_size, unencoded_size;
968e460a4b1SWei Wang 
969b734035bSXiao Guangrong         xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
970be8b02edSXiao Guangrong             rs->xbzrle_cache_miss_prev) / page_count;
971b734035bSXiao Guangrong         rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
972e460a4b1SWei Wang         unencoded_size = (xbzrle_counters.pages - rs->xbzrle_pages_prev) *
973e460a4b1SWei Wang                          TARGET_PAGE_SIZE;
974e460a4b1SWei Wang         encoded_size = xbzrle_counters.bytes - rs->xbzrle_bytes_prev;
97592271402SWei Wang         if (xbzrle_counters.pages == rs->xbzrle_pages_prev || !encoded_size) {
976e460a4b1SWei Wang             xbzrle_counters.encoding_rate = 0;
977e460a4b1SWei Wang         } else {
978e460a4b1SWei Wang             xbzrle_counters.encoding_rate = unencoded_size / encoded_size;
979e460a4b1SWei Wang         }
980e460a4b1SWei Wang         rs->xbzrle_pages_prev = xbzrle_counters.pages;
981e460a4b1SWei Wang         rs->xbzrle_bytes_prev = xbzrle_counters.bytes;
982b734035bSXiao Guangrong     }
983b734035bSXiao Guangrong }
984b734035bSXiao Guangrong 
985acac51baSHyman Huang(黄勇) /*
986acac51baSHyman Huang(黄勇)  * Enable dirty-limit to throttle down the guest
987acac51baSHyman Huang(黄勇)  */
migration_dirty_limit_guest(void)988acac51baSHyman Huang(黄勇) static void migration_dirty_limit_guest(void)
989acac51baSHyman Huang(黄勇) {
990acac51baSHyman Huang(黄勇)     /*
991acac51baSHyman Huang(黄勇)      * dirty page rate quota for all vCPUs fetched from
992acac51baSHyman Huang(黄勇)      * migration parameter 'vcpu_dirty_limit'
993acac51baSHyman Huang(黄勇)      */
994acac51baSHyman Huang(黄勇)     static int64_t quota_dirtyrate;
995acac51baSHyman Huang(黄勇)     MigrationState *s = migrate_get_current();
996acac51baSHyman Huang(黄勇) 
997acac51baSHyman Huang(黄勇)     /*
998acac51baSHyman Huang(黄勇)      * If dirty limit already enabled and migration parameter
999acac51baSHyman Huang(黄勇)      * vcpu-dirty-limit untouched.
1000acac51baSHyman Huang(黄勇)      */
1001acac51baSHyman Huang(黄勇)     if (dirtylimit_in_service() &&
1002acac51baSHyman Huang(黄勇)         quota_dirtyrate == s->parameters.vcpu_dirty_limit) {
1003acac51baSHyman Huang(黄勇)         return;
1004acac51baSHyman Huang(黄勇)     }
1005acac51baSHyman Huang(黄勇) 
1006acac51baSHyman Huang(黄勇)     quota_dirtyrate = s->parameters.vcpu_dirty_limit;
1007acac51baSHyman Huang(黄勇) 
1008acac51baSHyman Huang(黄勇)     /*
1009acac51baSHyman Huang(黄勇)      * Set all vCPU a quota dirtyrate, note that the second
1010acac51baSHyman Huang(黄勇)      * parameter will be ignored if setting all vCPU for the vm
1011acac51baSHyman Huang(黄勇)      */
1012acac51baSHyman Huang(黄勇)     qmp_set_vcpu_dirty_limit(false, -1, quota_dirtyrate, NULL);
1013acac51baSHyman Huang(黄勇)     trace_migration_dirty_limit_guest(quota_dirtyrate);
1014acac51baSHyman Huang(黄勇) }
1015acac51baSHyman Huang(黄勇) 
migration_trigger_throttle(RAMState * rs)1016dc14a470SKeqian Zhu static void migration_trigger_throttle(RAMState *rs)
1017dc14a470SKeqian Zhu {
10186499efdbSJuan Quintela     uint64_t threshold = migrate_throttle_trigger_threshold();
101923b7576dSPeter Xu     uint64_t bytes_xfer_period =
1020897fd8bdSJuan Quintela         migration_transferred_bytes() - rs->bytes_xfer_prev;
1021dc14a470SKeqian Zhu     uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE;
1022dc14a470SKeqian Zhu     uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100;
1023dc14a470SKeqian Zhu 
1024310ad562SHyman Huang(黄勇)     /*
1025310ad562SHyman Huang(黄勇)      * The following detection logic can be refined later. For now:
1026310ad562SHyman Huang(黄勇)      * Check to see if the ratio between dirtied bytes and the approx.
1027310ad562SHyman Huang(黄勇)      * amount of bytes that just got transferred since the last time
1028310ad562SHyman Huang(黄勇)      * we were in this routine reaches the threshold. If that happens
1029310ad562SHyman Huang(黄勇)      * twice, start or increase throttling.
1030310ad562SHyman Huang(黄勇)      */
1031dc14a470SKeqian Zhu     if ((bytes_dirty_period > bytes_dirty_threshold) &&
1032dc14a470SKeqian Zhu         (++rs->dirty_rate_high_cnt >= 2)) {
1033dc14a470SKeqian Zhu         rs->dirty_rate_high_cnt = 0;
1034310ad562SHyman Huang(黄勇)         if (migrate_auto_converge()) {
1035310ad562SHyman Huang(黄勇)             trace_migration_throttle();
1036cbbf8182SKeqian Zhu             mig_throttle_guest_down(bytes_dirty_period,
1037cbbf8182SKeqian Zhu                                     bytes_dirty_threshold);
1038acac51baSHyman Huang(黄勇)         } else if (migrate_dirty_limit()) {
1039acac51baSHyman Huang(黄勇)             migration_dirty_limit_guest();
1040dc14a470SKeqian Zhu         }
1041dc14a470SKeqian Zhu     }
1042dc14a470SKeqian Zhu }
1043dc14a470SKeqian Zhu 
migration_bitmap_sync(RAMState * rs,bool last_stage)10441e493be5SGavin Shan static void migration_bitmap_sync(RAMState *rs, bool last_stage)
104556e93d26SJuan Quintela {
104656e93d26SJuan Quintela     RAMBlock *block;
104756e93d26SJuan Quintela     int64_t end_time;
104856e93d26SJuan Quintela 
1049aff3f660SJuan Quintela     stat64_add(&mig_stats.dirty_sync_count, 1);
105056e93d26SJuan Quintela 
1051f664da80SJuan Quintela     if (!rs->time_last_bitmap_sync) {
1052f664da80SJuan Quintela         rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
105356e93d26SJuan Quintela     }
105456e93d26SJuan Quintela 
105556e93d26SJuan Quintela     trace_migration_bitmap_sync_start();
10561e493be5SGavin Shan     memory_global_dirty_log_sync(last_stage);
105756e93d26SJuan Quintela 
105862663f08SWill Gyda     WITH_QEMU_LOCK_GUARD(&rs->bitmap_mutex) {
105989ac5a1dSDr. David Alan Gilbert         WITH_RCU_READ_LOCK_GUARD() {
1060fbd162e6SYury Kotov             RAMBLOCK_FOREACH_NOT_IGNORED(block) {
10617a3e9571SWei Yang                 ramblock_sync_dirty_bitmap(rs, block);
106256e93d26SJuan Quintela             }
1063aff3f660SJuan Quintela             stat64_set(&mig_stats.dirty_bytes_last_sync, ram_bytes_remaining());
106489ac5a1dSDr. David Alan Gilbert         }
106562663f08SWill Gyda     }
106656e93d26SJuan Quintela 
10679458a9a1SPaolo Bonzini     memory_global_after_dirty_log_sync();
1068a66cd90cSJuan Quintela     trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
10691ffb5dfdSChao Fan 
107056e93d26SJuan Quintela     end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
107156e93d26SJuan Quintela 
107256e93d26SJuan Quintela     /* more than 1 second = 1000 millisecons */
1073f664da80SJuan Quintela     if (end_time > rs->time_last_bitmap_sync + 1000) {
1074dc14a470SKeqian Zhu         migration_trigger_throttle(rs);
1075070afca2SJason J. Herne 
1076b734035bSXiao Guangrong         migration_update_rates(rs, end_time);
1077b734035bSXiao Guangrong 
1078be8b02edSXiao Guangrong         rs->target_page_count_prev = rs->target_page_count;
1079d693c6f1SFelipe Franciosi 
1080d693c6f1SFelipe Franciosi         /* reset period counters */
1081f664da80SJuan Quintela         rs->time_last_bitmap_sync = end_time;
1082a66cd90cSJuan Quintela         rs->num_dirty_pages_period = 0;
1083897fd8bdSJuan Quintela         rs->bytes_xfer_prev = migration_transferred_bytes();
108456e93d26SJuan Quintela     }
1085b890902cSJuan Quintela     if (migrate_events()) {
1086aff3f660SJuan Quintela         uint64_t generation = stat64_get(&mig_stats.dirty_sync_count);
1087536b5a4eSJuan Quintela         qapi_event_send_migration_pass(generation);
10884addcd4fSDr. David Alan Gilbert     }
108956e93d26SJuan Quintela }
109056e93d26SJuan Quintela 
migration_bitmap_sync_precopy(bool last_stage)109152ac968aSHyman Huang void migration_bitmap_sync_precopy(bool last_stage)
1092bd227060SWei Wang {
1093bd227060SWei Wang     Error *local_err = NULL;
10946a39ba7cSHyman Huang     assert(ram_state);
1095bd227060SWei Wang 
1096bd227060SWei Wang     /*
1097bd227060SWei Wang      * The current notifier usage is just an optimization to migration, so we
1098bd227060SWei Wang      * don't stop the normal migration process in the error case.
1099bd227060SWei Wang      */
1100bd227060SWei Wang     if (precopy_notify(PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC, &local_err)) {
1101bd227060SWei Wang         error_report_err(local_err);
1102b4a1733cSVladimir Sementsov-Ogievskiy         local_err = NULL;
1103bd227060SWei Wang     }
1104bd227060SWei Wang 
11056a39ba7cSHyman Huang     migration_bitmap_sync(ram_state, last_stage);
1106bd227060SWei Wang 
1107bd227060SWei Wang     if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC, &local_err)) {
1108bd227060SWei Wang         error_report_err(local_err);
1109bd227060SWei Wang     }
1110bd227060SWei Wang }
1111bd227060SWei Wang 
ram_release_page(const char * rbname,uint64_t offset)1112a4dbaf8eSJuan Quintela void ram_release_page(const char *rbname, uint64_t offset)
111347fe16ffSJuan Quintela {
111447fe16ffSJuan Quintela     if (!migrate_release_ram() || !migration_in_postcopy()) {
111547fe16ffSJuan Quintela         return;
111647fe16ffSJuan Quintela     }
111747fe16ffSJuan Quintela 
111847fe16ffSJuan Quintela     ram_discard_range(rbname, offset, TARGET_PAGE_SIZE);
111947fe16ffSJuan Quintela }
112047fe16ffSJuan Quintela 
112156e93d26SJuan Quintela /**
11223d0684b2SJuan Quintela  * save_zero_page: send the zero page to the stream
112356e93d26SJuan Quintela  *
11243d0684b2SJuan Quintela  * Returns the number of pages written.
112556e93d26SJuan Quintela  *
1126ccc09db8SFabiano Rosas  * @rs: current RAM state
1127ec6f3ab9SPeter Xu  * @pss: current PSS channel
112856e93d26SJuan Quintela  * @offset: offset inside the block for the page
112956e93d26SJuan Quintela  */
save_zero_page(RAMState * rs,PageSearchStatus * pss,ram_addr_t offset)1130e8e4e7acSJuan Quintela static int save_zero_page(RAMState *rs, PageSearchStatus *pss,
113161717ea9SPeter Xu                           ram_addr_t offset)
113256e93d26SJuan Quintela {
1133e8e4e7acSJuan Quintela     uint8_t *p = pss->block->host + offset;
11348697eb85SFabiano Rosas     QEMUFile *file = pss->pss_channel;
11358697eb85SFabiano Rosas     int len = 0;
113656e93d26SJuan Quintela 
11375fdbb1dfSHao Xiang     if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_NONE) {
11385fdbb1dfSHao Xiang         return 0;
11395fdbb1dfSHao Xiang     }
11405fdbb1dfSHao Xiang 
11418697eb85SFabiano Rosas     if (!buffer_is_zero(p, TARGET_PAGE_SIZE)) {
11428697eb85SFabiano Rosas         return 0;
1143ccc09db8SFabiano Rosas     }
1144ccc09db8SFabiano Rosas 
1145c2d5c4a7SFabiano Rosas     stat64_add(&mig_stats.zero_pages, 1);
1146c2d5c4a7SFabiano Rosas 
1147c2d5c4a7SFabiano Rosas     if (migrate_mapped_ram()) {
1148c2d5c4a7SFabiano Rosas         /* zero pages are not transferred with mapped-ram */
1149f427d90bSFabiano Rosas         clear_bit_atomic(offset >> TARGET_PAGE_BITS, pss->block->file_bmap);
1150c2d5c4a7SFabiano Rosas         return 1;
1151c2d5c4a7SFabiano Rosas     }
1152c2d5c4a7SFabiano Rosas 
1153e8e4e7acSJuan Quintela     len += save_page_header(pss, file, pss->block, offset | RAM_SAVE_FLAG_ZERO);
11548697eb85SFabiano Rosas     qemu_put_byte(file, 0);
11558697eb85SFabiano Rosas     len += 1;
1156e8e4e7acSJuan Quintela     ram_release_page(pss->block->idstr, offset);
11574c2d0f6dSDavid Edmondson     ram_transferred_add(len);
1158ccc09db8SFabiano Rosas 
1159ccc09db8SFabiano Rosas     /*
1160ccc09db8SFabiano Rosas      * Must let xbzrle know, otherwise a previous (now 0'd) cached
1161ccc09db8SFabiano Rosas      * page would be stale.
1162ccc09db8SFabiano Rosas      */
1163ccc09db8SFabiano Rosas     if (rs->xbzrle_started) {
1164ccc09db8SFabiano Rosas         XBZRLE_cache_lock();
1165e8e4e7acSJuan Quintela         xbzrle_cache_zero_page(pss->block->offset + offset);
1166ccc09db8SFabiano Rosas         XBZRLE_cache_unlock();
116756e93d26SJuan Quintela     }
1168ccc09db8SFabiano Rosas 
11698697eb85SFabiano Rosas     return len;
117056e93d26SJuan Quintela }
117156e93d26SJuan Quintela 
1172059ff0fbSXiao Guangrong /*
1173059ff0fbSXiao Guangrong  * @pages: the number of pages written by the control path,
1174059ff0fbSXiao Guangrong  *        < 0 - error
1175059ff0fbSXiao Guangrong  *        > 0 - number of pages written
1176059ff0fbSXiao Guangrong  *
1177059ff0fbSXiao Guangrong  * Return true if the pages has been saved, otherwise false is returned.
1178059ff0fbSXiao Guangrong  */
control_save_page(PageSearchStatus * pss,ram_addr_t offset,int * pages)1179944853c2SJuan Quintela static bool control_save_page(PageSearchStatus *pss,
118061717ea9SPeter Xu                               ram_addr_t offset, int *pages)
1181059ff0fbSXiao Guangrong {
1182059ff0fbSXiao Guangrong     int ret;
1183059ff0fbSXiao Guangrong 
1184944853c2SJuan Quintela     ret = rdma_control_save_page(pss->pss_channel, pss->block->offset, offset,
11859c53d369SJuan Quintela                                  TARGET_PAGE_SIZE);
1186059ff0fbSXiao Guangrong     if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
1187059ff0fbSXiao Guangrong         return false;
1188059ff0fbSXiao Guangrong     }
1189059ff0fbSXiao Guangrong 
1190059ff0fbSXiao Guangrong     if (ret == RAM_SAVE_CONTROL_DELAYED) {
11919c53d369SJuan Quintela         *pages = 1;
1192059ff0fbSXiao Guangrong         return true;
1193059ff0fbSXiao Guangrong     }
11949c53d369SJuan Quintela     *pages = ret;
1195059ff0fbSXiao Guangrong     return true;
1196059ff0fbSXiao Guangrong }
1197059ff0fbSXiao Guangrong 
119865dacaa0SXiao Guangrong /*
119965dacaa0SXiao Guangrong  * directly send the page to the stream
120065dacaa0SXiao Guangrong  *
120165dacaa0SXiao Guangrong  * Returns the number of pages written.
120265dacaa0SXiao Guangrong  *
1203ec6f3ab9SPeter Xu  * @pss: current PSS channel
120465dacaa0SXiao Guangrong  * @block: block that contains the page we want to send
120565dacaa0SXiao Guangrong  * @offset: offset inside the block for the page
120665dacaa0SXiao Guangrong  * @buf: the page to be sent
120765dacaa0SXiao Guangrong  * @async: send to page asyncly
120865dacaa0SXiao Guangrong  */
save_normal_page(PageSearchStatus * pss,RAMBlock * block,ram_addr_t offset,uint8_t * buf,bool async)1209ec6f3ab9SPeter Xu static int save_normal_page(PageSearchStatus *pss, RAMBlock *block,
121061717ea9SPeter Xu                             ram_addr_t offset, uint8_t *buf, bool async)
121165dacaa0SXiao Guangrong {
1212ec6f3ab9SPeter Xu     QEMUFile *file = pss->pss_channel;
1213ec6f3ab9SPeter Xu 
1214c2d5c4a7SFabiano Rosas     if (migrate_mapped_ram()) {
1215c2d5c4a7SFabiano Rosas         qemu_put_buffer_at(file, buf, TARGET_PAGE_SIZE,
1216c2d5c4a7SFabiano Rosas                            block->pages_offset + offset);
1217c2d5c4a7SFabiano Rosas         set_bit(offset >> TARGET_PAGE_BITS, block->file_bmap);
1218c2d5c4a7SFabiano Rosas     } else {
121937502df3SLukas Straub         ram_transferred_add(save_page_header(pss, pss->pss_channel, block,
12204c2d0f6dSDavid Edmondson                                              offset | RAM_SAVE_FLAG_PAGE));
122165dacaa0SXiao Guangrong         if (async) {
122261717ea9SPeter Xu             qemu_put_buffer_async(file, buf, TARGET_PAGE_SIZE,
1223f912ec5bSDr. David Alan Gilbert                                   migrate_release_ram() &&
122465dacaa0SXiao Guangrong                                   migration_in_postcopy());
122565dacaa0SXiao Guangrong         } else {
122661717ea9SPeter Xu             qemu_put_buffer(file, buf, TARGET_PAGE_SIZE);
122765dacaa0SXiao Guangrong         }
1228c2d5c4a7SFabiano Rosas     }
12294c2d0f6dSDavid Edmondson     ram_transferred_add(TARGET_PAGE_SIZE);
1230aff3f660SJuan Quintela     stat64_add(&mig_stats.normal_pages, 1);
123165dacaa0SXiao Guangrong     return 1;
123265dacaa0SXiao Guangrong }
123365dacaa0SXiao Guangrong 
123456e93d26SJuan Quintela /**
12353d0684b2SJuan Quintela  * ram_save_page: send the given page to the stream
123656e93d26SJuan Quintela  *
12373d0684b2SJuan Quintela  * Returns the number of pages written.
12383fd3c4b3SDr. David Alan Gilbert  *          < 0 - error
12393fd3c4b3SDr. David Alan Gilbert  *          >=0 - Number of pages written - this might legally be 0
12403fd3c4b3SDr. David Alan Gilbert  *                if xbzrle noticed the page was the same.
124156e93d26SJuan Quintela  *
12426f37bb8bSJuan Quintela  * @rs: current RAM state
124356e93d26SJuan Quintela  * @block: block that contains the page we want to send
124456e93d26SJuan Quintela  * @offset: offset inside the block for the page
124556e93d26SJuan Quintela  */
ram_save_page(RAMState * rs,PageSearchStatus * pss)124605931ec5SJuan Quintela static int ram_save_page(RAMState *rs, PageSearchStatus *pss)
124756e93d26SJuan Quintela {
124856e93d26SJuan Quintela     int pages = -1;
124956e93d26SJuan Quintela     uint8_t *p;
125056e93d26SJuan Quintela     bool send_async = true;
1251a08f6890Szhanghailiang     RAMBlock *block = pss->block;
12528bba004cSAlexey Romko     ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
1253059ff0fbSXiao Guangrong     ram_addr_t current_addr = block->offset + offset;
125456e93d26SJuan Quintela 
12552f68e399SDr. David Alan Gilbert     p = block->host + offset;
12561db9d8e5SDr. David Alan Gilbert     trace_ram_save_page(block->idstr, (uint64_t)offset, p);
125756e93d26SJuan Quintela 
125856e93d26SJuan Quintela     XBZRLE_cache_lock();
1259f3095cc8SJuan Quintela     if (rs->xbzrle_started && !migration_in_postcopy()) {
1260ec6f3ab9SPeter Xu         pages = save_xbzrle_page(rs, pss, &p, current_addr,
126161717ea9SPeter Xu                                  block, offset);
126205931ec5SJuan Quintela         if (!rs->last_stage) {
126356e93d26SJuan Quintela             /* Can't send this cached data async, since the cache page
126456e93d26SJuan Quintela              * might get updated before it gets to the wire
126556e93d26SJuan Quintela              */
126656e93d26SJuan Quintela             send_async = false;
126756e93d26SJuan Quintela         }
126856e93d26SJuan Quintela     }
126956e93d26SJuan Quintela 
127056e93d26SJuan Quintela     /* XBZRLE overflow or normal page */
127156e93d26SJuan Quintela     if (pages == -1) {
1272ec6f3ab9SPeter Xu         pages = save_normal_page(pss, block, offset, p, send_async);
127356e93d26SJuan Quintela     }
127456e93d26SJuan Quintela 
127556e93d26SJuan Quintela     XBZRLE_cache_unlock();
127656e93d26SJuan Quintela 
127756e93d26SJuan Quintela     return pages;
127856e93d26SJuan Quintela }
127956e93d26SJuan Quintela 
ram_save_multifd_page(RAMBlock * block,ram_addr_t offset)12809346fa18SFabiano Rosas static int ram_save_multifd_page(RAMBlock *block, ram_addr_t offset)
1281b9ee2f7dSJuan Quintela {
1282d6556d17SPeter Xu     if (!multifd_queue_page(block, offset)) {
1283713f762aSIvan Ren         return -1;
1284713f762aSIvan Ren     }
1285b9ee2f7dSJuan Quintela 
1286b9ee2f7dSJuan Quintela     return 1;
1287b9ee2f7dSJuan Quintela }
1288b9ee2f7dSJuan Quintela 
12893e81763eSLukas Straub 
129031e2ac74SJuan Quintela #define PAGE_ALL_CLEAN 0
129131e2ac74SJuan Quintela #define PAGE_TRY_AGAIN 1
129231e2ac74SJuan Quintela #define PAGE_DIRTY_FOUND 2
129356e93d26SJuan Quintela /**
12943d0684b2SJuan Quintela  * find_dirty_block: find the next dirty page and update any state
12953d0684b2SJuan Quintela  * associated with the search process.
1296b9e60928SDr. David Alan Gilbert  *
129731e2ac74SJuan Quintela  * Returns:
1298294e5a40SJuan Quintela  *         <0: An error happened
129931e2ac74SJuan Quintela  *         PAGE_ALL_CLEAN: no dirty page found, give up
130031e2ac74SJuan Quintela  *         PAGE_TRY_AGAIN: no dirty page found, retry for next block
130131e2ac74SJuan Quintela  *         PAGE_DIRTY_FOUND: dirty page found
1302b9e60928SDr. David Alan Gilbert  *
13036f37bb8bSJuan Quintela  * @rs: current RAM state
13043d0684b2SJuan Quintela  * @pss: data about the state of the current dirty page scan
13053d0684b2SJuan Quintela  * @again: set to false if the search has scanned the whole of RAM
1306b9e60928SDr. David Alan Gilbert  */
find_dirty_block(RAMState * rs,PageSearchStatus * pss)130731e2ac74SJuan Quintela static int find_dirty_block(RAMState *rs, PageSearchStatus *pss)
1308b9e60928SDr. David Alan Gilbert {
1309d9e474eaSPeter Xu     /* Update pss->page for the next dirty bit in ramblock */
1310d9e474eaSPeter Xu     pss_find_next_dirty(pss);
1311d9e474eaSPeter Xu 
13126f37bb8bSJuan Quintela     if (pss->complete_round && pss->block == rs->last_seen_block &&
1313a935e30fSJuan Quintela         pss->page >= rs->last_page) {
1314b9e60928SDr. David Alan Gilbert         /*
1315b9e60928SDr. David Alan Gilbert          * We've been once around the RAM and haven't found anything.
1316b9e60928SDr. David Alan Gilbert          * Give up.
1317b9e60928SDr. David Alan Gilbert          */
131831e2ac74SJuan Quintela         return PAGE_ALL_CLEAN;
1319b9e60928SDr. David Alan Gilbert     }
1320542147f4SDavid Hildenbrand     if (!offset_in_ramblock(pss->block,
1321542147f4SDavid Hildenbrand                             ((ram_addr_t)pss->page) << TARGET_PAGE_BITS)) {
1322b9e60928SDr. David Alan Gilbert         /* Didn't find anything in this RAM Block */
1323a935e30fSJuan Quintela         pss->page = 0;
1324b9e60928SDr. David Alan Gilbert         pss->block = QLIST_NEXT_RCU(pss->block, next);
1325b9e60928SDr. David Alan Gilbert         if (!pss->block) {
1326d4f34485SJuan Quintela             if (migrate_multifd() &&
13279d01778aSFabiano Rosas                 (!migrate_multifd_flush_after_each_section() ||
13289d01778aSFabiano Rosas                  migrate_mapped_ram())) {
1329294e5a40SJuan Quintela                 QEMUFile *f = rs->pss[RAM_CHANNEL_PRECOPY].pss_channel;
1330a0c78d81SFabiano Rosas                 int ret = multifd_ram_flush_and_sync();
1331294e5a40SJuan Quintela                 if (ret < 0) {
1332294e5a40SJuan Quintela                     return ret;
1333294e5a40SJuan Quintela                 }
13349d01778aSFabiano Rosas 
13359d01778aSFabiano Rosas                 if (!migrate_mapped_ram()) {
1336294e5a40SJuan Quintela                     qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH);
1337294e5a40SJuan Quintela                     qemu_fflush(f);
1338294e5a40SJuan Quintela                 }
13399d01778aSFabiano Rosas             }
134048df9d80SXiao Guangrong 
1341b9e60928SDr. David Alan Gilbert             /* Hit the end of the list */
1342b9e60928SDr. David Alan Gilbert             pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1343b9e60928SDr. David Alan Gilbert             /* Flag that we've looped */
1344b9e60928SDr. David Alan Gilbert             pss->complete_round = true;
13451a373522SDavid Hildenbrand             /* After the first round, enable XBZRLE. */
134687dca0c9SJuan Quintela             if (migrate_xbzrle()) {
1347f3095cc8SJuan Quintela                 rs->xbzrle_started = true;
13481a373522SDavid Hildenbrand             }
1349b9e60928SDr. David Alan Gilbert         }
1350b9e60928SDr. David Alan Gilbert         /* Didn't find anything this time, but try again on the new block */
135131e2ac74SJuan Quintela         return PAGE_TRY_AGAIN;
1352b9e60928SDr. David Alan Gilbert     } else {
135331e2ac74SJuan Quintela         /* We've found something */
135431e2ac74SJuan Quintela         return PAGE_DIRTY_FOUND;
1355b9e60928SDr. David Alan Gilbert     }
1356b9e60928SDr. David Alan Gilbert }
1357b9e60928SDr. David Alan Gilbert 
13583d0684b2SJuan Quintela /**
13593d0684b2SJuan Quintela  * unqueue_page: gets a page of the queue
1360a82d593bSDr. David Alan Gilbert  *
13613d0684b2SJuan Quintela  * Helper for 'get_queued_page' - gets a page off the queue
13623d0684b2SJuan Quintela  *
13633d0684b2SJuan Quintela  * Returns the block of the page (or NULL if none available)
13643d0684b2SJuan Quintela  *
1365ec481c6cSJuan Quintela  * @rs: current RAM state
13663d0684b2SJuan Quintela  * @offset: used to return the offset within the RAMBlock
1367a82d593bSDr. David Alan Gilbert  */
unqueue_page(RAMState * rs,ram_addr_t * offset)1368f20e2865SJuan Quintela static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
1369a82d593bSDr. David Alan Gilbert {
1370a1fe28dfSPeter Xu     struct RAMSrcPageRequest *entry;
1371a82d593bSDr. David Alan Gilbert     RAMBlock *block = NULL;
1372a82d593bSDr. David Alan Gilbert 
1373a1fe28dfSPeter Xu     if (!postcopy_has_request(rs)) {
1374ae526e32SXiao Guangrong         return NULL;
1375ae526e32SXiao Guangrong     }
1376ae526e32SXiao Guangrong 
13776e8a355dSDaniel Brodsky     QEMU_LOCK_GUARD(&rs->src_page_req_mutex);
1378a1fe28dfSPeter Xu 
1379a1fe28dfSPeter Xu     /*
1380a1fe28dfSPeter Xu      * This should _never_ change even after we take the lock, because no one
1381a1fe28dfSPeter Xu      * should be taking anything off the request list other than us.
1382a1fe28dfSPeter Xu      */
1383a1fe28dfSPeter Xu     assert(postcopy_has_request(rs));
1384a1fe28dfSPeter Xu 
1385a1fe28dfSPeter Xu     entry = QSIMPLEQ_FIRST(&rs->src_page_requests);
1386a82d593bSDr. David Alan Gilbert     block = entry->rb;
1387a82d593bSDr. David Alan Gilbert     *offset = entry->offset;
1388a82d593bSDr. David Alan Gilbert 
1389777f53c7SThomas Huth     if (entry->len > TARGET_PAGE_SIZE) {
1390777f53c7SThomas Huth         entry->len -= TARGET_PAGE_SIZE;
1391777f53c7SThomas Huth         entry->offset += TARGET_PAGE_SIZE;
1392a82d593bSDr. David Alan Gilbert     } else {
1393a82d593bSDr. David Alan Gilbert         memory_region_unref(block->mr);
1394ec481c6cSJuan Quintela         QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1395a82d593bSDr. David Alan Gilbert         g_free(entry);
1396e03a34f8SDr. David Alan Gilbert         migration_consume_urgent_request();
1397a82d593bSDr. David Alan Gilbert     }
1398a82d593bSDr. David Alan Gilbert 
1399a82d593bSDr. David Alan Gilbert     return block;
1400a82d593bSDr. David Alan Gilbert }
1401a82d593bSDr. David Alan Gilbert 
1402278e2f55SAndrey Gruzdev #if defined(__linux__)
1403278e2f55SAndrey Gruzdev /**
1404278e2f55SAndrey Gruzdev  * poll_fault_page: try to get next UFFD write fault page and, if pending fault
1405278e2f55SAndrey Gruzdev  *   is found, return RAM block pointer and page offset
1406278e2f55SAndrey Gruzdev  *
1407278e2f55SAndrey Gruzdev  * Returns pointer to the RAMBlock containing faulting page,
1408278e2f55SAndrey Gruzdev  *   NULL if no write faults are pending
1409278e2f55SAndrey Gruzdev  *
1410278e2f55SAndrey Gruzdev  * @rs: current RAM state
1411278e2f55SAndrey Gruzdev  * @offset: page offset from the beginning of the block
1412278e2f55SAndrey Gruzdev  */
poll_fault_page(RAMState * rs,ram_addr_t * offset)1413278e2f55SAndrey Gruzdev static RAMBlock *poll_fault_page(RAMState *rs, ram_addr_t *offset)
1414278e2f55SAndrey Gruzdev {
1415278e2f55SAndrey Gruzdev     struct uffd_msg uffd_msg;
1416278e2f55SAndrey Gruzdev     void *page_address;
141782ea3e3bSAndrey Gruzdev     RAMBlock *block;
1418278e2f55SAndrey Gruzdev     int res;
1419278e2f55SAndrey Gruzdev 
1420278e2f55SAndrey Gruzdev     if (!migrate_background_snapshot()) {
1421278e2f55SAndrey Gruzdev         return NULL;
1422278e2f55SAndrey Gruzdev     }
1423278e2f55SAndrey Gruzdev 
1424278e2f55SAndrey Gruzdev     res = uffd_read_events(rs->uffdio_fd, &uffd_msg, 1);
1425278e2f55SAndrey Gruzdev     if (res <= 0) {
1426278e2f55SAndrey Gruzdev         return NULL;
1427278e2f55SAndrey Gruzdev     }
1428278e2f55SAndrey Gruzdev 
1429278e2f55SAndrey Gruzdev     page_address = (void *)(uintptr_t) uffd_msg.arg.pagefault.address;
143082ea3e3bSAndrey Gruzdev     block = qemu_ram_block_from_host(page_address, false, offset);
143182ea3e3bSAndrey Gruzdev     assert(block && (block->flags & RAM_UF_WRITEPROTECT) != 0);
143282ea3e3bSAndrey Gruzdev     return block;
1433278e2f55SAndrey Gruzdev }
1434278e2f55SAndrey Gruzdev 
1435278e2f55SAndrey Gruzdev /**
1436278e2f55SAndrey Gruzdev  * ram_save_release_protection: release UFFD write protection after
1437278e2f55SAndrey Gruzdev  *   a range of pages has been saved
1438278e2f55SAndrey Gruzdev  *
1439278e2f55SAndrey Gruzdev  * @rs: current RAM state
1440278e2f55SAndrey Gruzdev  * @pss: page-search-status structure
1441278e2f55SAndrey Gruzdev  * @start_page: index of the first page in the range relative to pss->block
1442278e2f55SAndrey Gruzdev  *
1443278e2f55SAndrey Gruzdev  * Returns 0 on success, negative value in case of an error
1444278e2f55SAndrey Gruzdev */
ram_save_release_protection(RAMState * rs,PageSearchStatus * pss,unsigned long start_page)1445278e2f55SAndrey Gruzdev static int ram_save_release_protection(RAMState *rs, PageSearchStatus *pss,
1446278e2f55SAndrey Gruzdev         unsigned long start_page)
1447278e2f55SAndrey Gruzdev {
1448278e2f55SAndrey Gruzdev     int res = 0;
1449278e2f55SAndrey Gruzdev 
1450278e2f55SAndrey Gruzdev     /* Check if page is from UFFD-managed region. */
1451278e2f55SAndrey Gruzdev     if (pss->block->flags & RAM_UF_WRITEPROTECT) {
1452278e2f55SAndrey Gruzdev         void *page_address = pss->block->host + (start_page << TARGET_PAGE_BITS);
1453258f5c98SPeter Xu         uint64_t run_length = (pss->page - start_page) << TARGET_PAGE_BITS;
1454278e2f55SAndrey Gruzdev 
1455278e2f55SAndrey Gruzdev         /* Flush async buffers before un-protect. */
145661717ea9SPeter Xu         qemu_fflush(pss->pss_channel);
1457278e2f55SAndrey Gruzdev         /* Un-protect memory range. */
1458278e2f55SAndrey Gruzdev         res = uffd_change_protection(rs->uffdio_fd, page_address, run_length,
1459278e2f55SAndrey Gruzdev                 false, false);
1460278e2f55SAndrey Gruzdev     }
1461278e2f55SAndrey Gruzdev 
1462278e2f55SAndrey Gruzdev     return res;
1463278e2f55SAndrey Gruzdev }
1464278e2f55SAndrey Gruzdev 
1465278e2f55SAndrey Gruzdev /* ram_write_tracking_available: check if kernel supports required UFFD features
1466278e2f55SAndrey Gruzdev  *
1467278e2f55SAndrey Gruzdev  * Returns true if supports, false otherwise
1468278e2f55SAndrey Gruzdev  */
ram_write_tracking_available(void)1469278e2f55SAndrey Gruzdev bool ram_write_tracking_available(void)
1470278e2f55SAndrey Gruzdev {
1471278e2f55SAndrey Gruzdev     uint64_t uffd_features;
1472278e2f55SAndrey Gruzdev     int res;
1473278e2f55SAndrey Gruzdev 
1474278e2f55SAndrey Gruzdev     res = uffd_query_features(&uffd_features);
1475278e2f55SAndrey Gruzdev     return (res == 0 &&
1476278e2f55SAndrey Gruzdev             (uffd_features & UFFD_FEATURE_PAGEFAULT_FLAG_WP) != 0);
1477278e2f55SAndrey Gruzdev }
1478278e2f55SAndrey Gruzdev 
1479278e2f55SAndrey Gruzdev /* ram_write_tracking_compatible: check if guest configuration is
1480278e2f55SAndrey Gruzdev  *   compatible with 'write-tracking'
1481278e2f55SAndrey Gruzdev  *
1482278e2f55SAndrey Gruzdev  * Returns true if compatible, false otherwise
1483278e2f55SAndrey Gruzdev  */
ram_write_tracking_compatible(void)1484278e2f55SAndrey Gruzdev bool ram_write_tracking_compatible(void)
1485278e2f55SAndrey Gruzdev {
1486278e2f55SAndrey Gruzdev     const uint64_t uffd_ioctls_mask = BIT(_UFFDIO_WRITEPROTECT);
1487278e2f55SAndrey Gruzdev     int uffd_fd;
148882ea3e3bSAndrey Gruzdev     RAMBlock *block;
1489278e2f55SAndrey Gruzdev     bool ret = false;
1490278e2f55SAndrey Gruzdev 
1491278e2f55SAndrey Gruzdev     /* Open UFFD file descriptor */
1492278e2f55SAndrey Gruzdev     uffd_fd = uffd_create_fd(UFFD_FEATURE_PAGEFAULT_FLAG_WP, false);
1493278e2f55SAndrey Gruzdev     if (uffd_fd < 0) {
1494278e2f55SAndrey Gruzdev         return false;
1495278e2f55SAndrey Gruzdev     }
1496278e2f55SAndrey Gruzdev 
1497278e2f55SAndrey Gruzdev     RCU_READ_LOCK_GUARD();
1498278e2f55SAndrey Gruzdev 
149982ea3e3bSAndrey Gruzdev     RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1500278e2f55SAndrey Gruzdev         uint64_t uffd_ioctls;
1501278e2f55SAndrey Gruzdev 
1502278e2f55SAndrey Gruzdev         /* Nothing to do with read-only and MMIO-writable regions */
150382ea3e3bSAndrey Gruzdev         if (block->mr->readonly || block->mr->rom_device) {
1504278e2f55SAndrey Gruzdev             continue;
1505278e2f55SAndrey Gruzdev         }
1506278e2f55SAndrey Gruzdev         /* Try to register block memory via UFFD-IO to track writes */
150782ea3e3bSAndrey Gruzdev         if (uffd_register_memory(uffd_fd, block->host, block->max_length,
1508278e2f55SAndrey Gruzdev                 UFFDIO_REGISTER_MODE_WP, &uffd_ioctls)) {
1509278e2f55SAndrey Gruzdev             goto out;
1510278e2f55SAndrey Gruzdev         }
1511278e2f55SAndrey Gruzdev         if ((uffd_ioctls & uffd_ioctls_mask) != uffd_ioctls_mask) {
1512278e2f55SAndrey Gruzdev             goto out;
1513278e2f55SAndrey Gruzdev         }
1514278e2f55SAndrey Gruzdev     }
1515278e2f55SAndrey Gruzdev     ret = true;
1516278e2f55SAndrey Gruzdev 
1517278e2f55SAndrey Gruzdev out:
1518278e2f55SAndrey Gruzdev     uffd_close_fd(uffd_fd);
1519278e2f55SAndrey Gruzdev     return ret;
1520278e2f55SAndrey Gruzdev }
1521278e2f55SAndrey Gruzdev 
populate_read_range(RAMBlock * block,ram_addr_t offset,ram_addr_t size)1522f7b9dcfbSDavid Hildenbrand static inline void populate_read_range(RAMBlock *block, ram_addr_t offset,
1523f7b9dcfbSDavid Hildenbrand                                        ram_addr_t size)
1524f7b9dcfbSDavid Hildenbrand {
15255f19a449SDavid Hildenbrand     const ram_addr_t end = offset + size;
15265f19a449SDavid Hildenbrand 
1527278e2f55SAndrey Gruzdev     /*
1528f7b9dcfbSDavid Hildenbrand      * We read one byte of each page; this will preallocate page tables if
1529f7b9dcfbSDavid Hildenbrand      * required and populate the shared zeropage on MAP_PRIVATE anonymous memory
1530f7b9dcfbSDavid Hildenbrand      * where no page was populated yet. This might require adaption when
1531f7b9dcfbSDavid Hildenbrand      * supporting other mappings, like shmem.
1532f7b9dcfbSDavid Hildenbrand      */
15335f19a449SDavid Hildenbrand     for (; offset < end; offset += block->page_size) {
1534f7b9dcfbSDavid Hildenbrand         char tmp = *((char *)block->host + offset);
1535f7b9dcfbSDavid Hildenbrand 
1536f7b9dcfbSDavid Hildenbrand         /* Don't optimize the read out */
1537f7b9dcfbSDavid Hildenbrand         asm volatile("" : "+r" (tmp));
1538f7b9dcfbSDavid Hildenbrand     }
1539f7b9dcfbSDavid Hildenbrand }
1540f7b9dcfbSDavid Hildenbrand 
populate_read_section(MemoryRegionSection * section,void * opaque)15416fee3a1fSDavid Hildenbrand static inline int populate_read_section(MemoryRegionSection *section,
15426fee3a1fSDavid Hildenbrand                                         void *opaque)
15436fee3a1fSDavid Hildenbrand {
15446fee3a1fSDavid Hildenbrand     const hwaddr size = int128_get64(section->size);
15456fee3a1fSDavid Hildenbrand     hwaddr offset = section->offset_within_region;
15466fee3a1fSDavid Hildenbrand     RAMBlock *block = section->mr->ram_block;
15476fee3a1fSDavid Hildenbrand 
15486fee3a1fSDavid Hildenbrand     populate_read_range(block, offset, size);
15496fee3a1fSDavid Hildenbrand     return 0;
15506fee3a1fSDavid Hildenbrand }
15516fee3a1fSDavid Hildenbrand 
1552f7b9dcfbSDavid Hildenbrand /*
1553f7b9dcfbSDavid Hildenbrand  * ram_block_populate_read: preallocate page tables and populate pages in the
1554f7b9dcfbSDavid Hildenbrand  *   RAM block by reading a byte of each page.
1555eeccb99cSAndrey Gruzdev  *
1556eeccb99cSAndrey Gruzdev  * Since it's solely used for userfault_fd WP feature, here we just
1557eeccb99cSAndrey Gruzdev  *   hardcode page size to qemu_real_host_page_size.
1558eeccb99cSAndrey Gruzdev  *
155982ea3e3bSAndrey Gruzdev  * @block: RAM block to populate
1560eeccb99cSAndrey Gruzdev  */
ram_block_populate_read(RAMBlock * rb)15616fee3a1fSDavid Hildenbrand static void ram_block_populate_read(RAMBlock *rb)
1562eeccb99cSAndrey Gruzdev {
15636fee3a1fSDavid Hildenbrand     /*
15646fee3a1fSDavid Hildenbrand      * Skip populating all pages that fall into a discarded range as managed by
15656fee3a1fSDavid Hildenbrand      * a RamDiscardManager responsible for the mapped memory region of the
15666fee3a1fSDavid Hildenbrand      * RAMBlock. Such discarded ("logically unplugged") parts of a RAMBlock
15676fee3a1fSDavid Hildenbrand      * must not get populated automatically. We don't have to track
15686fee3a1fSDavid Hildenbrand      * modifications via userfaultfd WP reliably, because these pages will
15696fee3a1fSDavid Hildenbrand      * not be part of the migration stream either way -- see
15706fee3a1fSDavid Hildenbrand      * ramblock_dirty_bitmap_exclude_discarded_pages().
15716fee3a1fSDavid Hildenbrand      *
15726fee3a1fSDavid Hildenbrand      * Note: The result is only stable while migrating (precopy/postcopy).
15736fee3a1fSDavid Hildenbrand      */
15746fee3a1fSDavid Hildenbrand     if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) {
15756fee3a1fSDavid Hildenbrand         RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
15766fee3a1fSDavid Hildenbrand         MemoryRegionSection section = {
15776fee3a1fSDavid Hildenbrand             .mr = rb->mr,
15786fee3a1fSDavid Hildenbrand             .offset_within_region = 0,
15796fee3a1fSDavid Hildenbrand             .size = rb->mr->size,
15806fee3a1fSDavid Hildenbrand         };
15816fee3a1fSDavid Hildenbrand 
15826fee3a1fSDavid Hildenbrand         ram_discard_manager_replay_populated(rdm, &section,
15836fee3a1fSDavid Hildenbrand                                              populate_read_section, NULL);
15846fee3a1fSDavid Hildenbrand     } else {
15856fee3a1fSDavid Hildenbrand         populate_read_range(rb, 0, rb->used_length);
15866fee3a1fSDavid Hildenbrand     }
1587eeccb99cSAndrey Gruzdev }
1588eeccb99cSAndrey Gruzdev 
1589eeccb99cSAndrey Gruzdev /*
1590eeccb99cSAndrey Gruzdev  * ram_write_tracking_prepare: prepare for UFFD-WP memory tracking
1591eeccb99cSAndrey Gruzdev  */
ram_write_tracking_prepare(void)1592eeccb99cSAndrey Gruzdev void ram_write_tracking_prepare(void)
1593eeccb99cSAndrey Gruzdev {
159482ea3e3bSAndrey Gruzdev     RAMBlock *block;
1595eeccb99cSAndrey Gruzdev 
1596eeccb99cSAndrey Gruzdev     RCU_READ_LOCK_GUARD();
1597eeccb99cSAndrey Gruzdev 
159882ea3e3bSAndrey Gruzdev     RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1599eeccb99cSAndrey Gruzdev         /* Nothing to do with read-only and MMIO-writable regions */
160082ea3e3bSAndrey Gruzdev         if (block->mr->readonly || block->mr->rom_device) {
1601eeccb99cSAndrey Gruzdev             continue;
1602eeccb99cSAndrey Gruzdev         }
1603eeccb99cSAndrey Gruzdev 
1604eeccb99cSAndrey Gruzdev         /*
1605eeccb99cSAndrey Gruzdev          * Populate pages of the RAM block before enabling userfault_fd
1606eeccb99cSAndrey Gruzdev          * write protection.
1607eeccb99cSAndrey Gruzdev          *
1608eeccb99cSAndrey Gruzdev          * This stage is required since ioctl(UFFDIO_WRITEPROTECT) with
1609eeccb99cSAndrey Gruzdev          * UFFDIO_WRITEPROTECT_MODE_WP mode setting would silently skip
1610eeccb99cSAndrey Gruzdev          * pages with pte_none() entries in page table.
1611eeccb99cSAndrey Gruzdev          */
1612f7b9dcfbSDavid Hildenbrand         ram_block_populate_read(block);
1613eeccb99cSAndrey Gruzdev     }
1614eeccb99cSAndrey Gruzdev }
1615eeccb99cSAndrey Gruzdev 
uffd_protect_section(MemoryRegionSection * section,void * opaque)1616e41c5770SDavid Hildenbrand static inline int uffd_protect_section(MemoryRegionSection *section,
1617e41c5770SDavid Hildenbrand                                        void *opaque)
1618e41c5770SDavid Hildenbrand {
1619e41c5770SDavid Hildenbrand     const hwaddr size = int128_get64(section->size);
1620e41c5770SDavid Hildenbrand     const hwaddr offset = section->offset_within_region;
1621e41c5770SDavid Hildenbrand     RAMBlock *rb = section->mr->ram_block;
1622e41c5770SDavid Hildenbrand     int uffd_fd = (uintptr_t)opaque;
1623e41c5770SDavid Hildenbrand 
1624e41c5770SDavid Hildenbrand     return uffd_change_protection(uffd_fd, rb->host + offset, size, true,
1625e41c5770SDavid Hildenbrand                                   false);
1626e41c5770SDavid Hildenbrand }
1627e41c5770SDavid Hildenbrand 
ram_block_uffd_protect(RAMBlock * rb,int uffd_fd)1628e41c5770SDavid Hildenbrand static int ram_block_uffd_protect(RAMBlock *rb, int uffd_fd)
1629e41c5770SDavid Hildenbrand {
1630e41c5770SDavid Hildenbrand     assert(rb->flags & RAM_UF_WRITEPROTECT);
1631e41c5770SDavid Hildenbrand 
1632e41c5770SDavid Hildenbrand     /* See ram_block_populate_read() */
1633e41c5770SDavid Hildenbrand     if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) {
1634e41c5770SDavid Hildenbrand         RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
1635e41c5770SDavid Hildenbrand         MemoryRegionSection section = {
1636e41c5770SDavid Hildenbrand             .mr = rb->mr,
1637e41c5770SDavid Hildenbrand             .offset_within_region = 0,
1638e41c5770SDavid Hildenbrand             .size = rb->mr->size,
1639e41c5770SDavid Hildenbrand         };
1640e41c5770SDavid Hildenbrand 
1641e41c5770SDavid Hildenbrand         return ram_discard_manager_replay_populated(rdm, &section,
1642e41c5770SDavid Hildenbrand                                                     uffd_protect_section,
1643e41c5770SDavid Hildenbrand                                                     (void *)(uintptr_t)uffd_fd);
1644e41c5770SDavid Hildenbrand     }
1645e41c5770SDavid Hildenbrand     return uffd_change_protection(uffd_fd, rb->host,
1646e41c5770SDavid Hildenbrand                                   rb->used_length, true, false);
1647e41c5770SDavid Hildenbrand }
1648e41c5770SDavid Hildenbrand 
1649eeccb99cSAndrey Gruzdev /*
1650278e2f55SAndrey Gruzdev  * ram_write_tracking_start: start UFFD-WP memory tracking
1651278e2f55SAndrey Gruzdev  *
1652278e2f55SAndrey Gruzdev  * Returns 0 for success or negative value in case of error
1653278e2f55SAndrey Gruzdev  */
ram_write_tracking_start(void)1654278e2f55SAndrey Gruzdev int ram_write_tracking_start(void)
1655278e2f55SAndrey Gruzdev {
1656278e2f55SAndrey Gruzdev     int uffd_fd;
1657278e2f55SAndrey Gruzdev     RAMState *rs = ram_state;
165882ea3e3bSAndrey Gruzdev     RAMBlock *block;
1659278e2f55SAndrey Gruzdev 
1660278e2f55SAndrey Gruzdev     /* Open UFFD file descriptor */
1661278e2f55SAndrey Gruzdev     uffd_fd = uffd_create_fd(UFFD_FEATURE_PAGEFAULT_FLAG_WP, true);
1662278e2f55SAndrey Gruzdev     if (uffd_fd < 0) {
1663278e2f55SAndrey Gruzdev         return uffd_fd;
1664278e2f55SAndrey Gruzdev     }
1665278e2f55SAndrey Gruzdev     rs->uffdio_fd = uffd_fd;
1666278e2f55SAndrey Gruzdev 
1667278e2f55SAndrey Gruzdev     RCU_READ_LOCK_GUARD();
1668278e2f55SAndrey Gruzdev 
166982ea3e3bSAndrey Gruzdev     RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1670278e2f55SAndrey Gruzdev         /* Nothing to do with read-only and MMIO-writable regions */
167182ea3e3bSAndrey Gruzdev         if (block->mr->readonly || block->mr->rom_device) {
1672278e2f55SAndrey Gruzdev             continue;
1673278e2f55SAndrey Gruzdev         }
1674278e2f55SAndrey Gruzdev 
1675278e2f55SAndrey Gruzdev         /* Register block memory with UFFD to track writes */
167682ea3e3bSAndrey Gruzdev         if (uffd_register_memory(rs->uffdio_fd, block->host,
167782ea3e3bSAndrey Gruzdev                 block->max_length, UFFDIO_REGISTER_MODE_WP, NULL)) {
1678278e2f55SAndrey Gruzdev             goto fail;
1679278e2f55SAndrey Gruzdev         }
168072ef3a37SDavid Hildenbrand         block->flags |= RAM_UF_WRITEPROTECT;
168172ef3a37SDavid Hildenbrand         memory_region_ref(block->mr);
168272ef3a37SDavid Hildenbrand 
1683278e2f55SAndrey Gruzdev         /* Apply UFFD write protection to the block memory range */
1684e41c5770SDavid Hildenbrand         if (ram_block_uffd_protect(block, uffd_fd)) {
1685278e2f55SAndrey Gruzdev             goto fail;
1686278e2f55SAndrey Gruzdev         }
1687278e2f55SAndrey Gruzdev 
168882ea3e3bSAndrey Gruzdev         trace_ram_write_tracking_ramblock_start(block->idstr, block->page_size,
168982ea3e3bSAndrey Gruzdev                 block->host, block->max_length);
1690278e2f55SAndrey Gruzdev     }
1691278e2f55SAndrey Gruzdev 
1692278e2f55SAndrey Gruzdev     return 0;
1693278e2f55SAndrey Gruzdev 
1694278e2f55SAndrey Gruzdev fail:
1695278e2f55SAndrey Gruzdev     error_report("ram_write_tracking_start() failed: restoring initial memory state");
1696278e2f55SAndrey Gruzdev 
169782ea3e3bSAndrey Gruzdev     RAMBLOCK_FOREACH_NOT_IGNORED(block) {
169882ea3e3bSAndrey Gruzdev         if ((block->flags & RAM_UF_WRITEPROTECT) == 0) {
1699278e2f55SAndrey Gruzdev             continue;
1700278e2f55SAndrey Gruzdev         }
170182ea3e3bSAndrey Gruzdev         uffd_unregister_memory(rs->uffdio_fd, block->host, block->max_length);
1702278e2f55SAndrey Gruzdev         /* Cleanup flags and remove reference */
170382ea3e3bSAndrey Gruzdev         block->flags &= ~RAM_UF_WRITEPROTECT;
170482ea3e3bSAndrey Gruzdev         memory_region_unref(block->mr);
1705278e2f55SAndrey Gruzdev     }
1706278e2f55SAndrey Gruzdev 
1707278e2f55SAndrey Gruzdev     uffd_close_fd(uffd_fd);
1708278e2f55SAndrey Gruzdev     rs->uffdio_fd = -1;
1709278e2f55SAndrey Gruzdev     return -1;
1710278e2f55SAndrey Gruzdev }
1711278e2f55SAndrey Gruzdev 
1712278e2f55SAndrey Gruzdev /**
1713278e2f55SAndrey Gruzdev  * ram_write_tracking_stop: stop UFFD-WP memory tracking and remove protection
1714278e2f55SAndrey Gruzdev  */
ram_write_tracking_stop(void)1715278e2f55SAndrey Gruzdev void ram_write_tracking_stop(void)
1716278e2f55SAndrey Gruzdev {
1717278e2f55SAndrey Gruzdev     RAMState *rs = ram_state;
171882ea3e3bSAndrey Gruzdev     RAMBlock *block;
1719278e2f55SAndrey Gruzdev 
1720278e2f55SAndrey Gruzdev     RCU_READ_LOCK_GUARD();
1721278e2f55SAndrey Gruzdev 
172282ea3e3bSAndrey Gruzdev     RAMBLOCK_FOREACH_NOT_IGNORED(block) {
172382ea3e3bSAndrey Gruzdev         if ((block->flags & RAM_UF_WRITEPROTECT) == 0) {
1724278e2f55SAndrey Gruzdev             continue;
1725278e2f55SAndrey Gruzdev         }
172682ea3e3bSAndrey Gruzdev         uffd_unregister_memory(rs->uffdio_fd, block->host, block->max_length);
1727278e2f55SAndrey Gruzdev 
172882ea3e3bSAndrey Gruzdev         trace_ram_write_tracking_ramblock_stop(block->idstr, block->page_size,
172982ea3e3bSAndrey Gruzdev                 block->host, block->max_length);
1730278e2f55SAndrey Gruzdev 
1731278e2f55SAndrey Gruzdev         /* Cleanup flags and remove reference */
173282ea3e3bSAndrey Gruzdev         block->flags &= ~RAM_UF_WRITEPROTECT;
173382ea3e3bSAndrey Gruzdev         memory_region_unref(block->mr);
1734278e2f55SAndrey Gruzdev     }
1735278e2f55SAndrey Gruzdev 
1736278e2f55SAndrey Gruzdev     /* Finally close UFFD file descriptor */
1737278e2f55SAndrey Gruzdev     uffd_close_fd(rs->uffdio_fd);
1738278e2f55SAndrey Gruzdev     rs->uffdio_fd = -1;
1739278e2f55SAndrey Gruzdev }
1740278e2f55SAndrey Gruzdev 
1741278e2f55SAndrey Gruzdev #else
1742278e2f55SAndrey Gruzdev /* No target OS support, stubs just fail or ignore */
1743278e2f55SAndrey Gruzdev 
poll_fault_page(RAMState * rs,ram_addr_t * offset)1744278e2f55SAndrey Gruzdev static RAMBlock *poll_fault_page(RAMState *rs, ram_addr_t *offset)
1745278e2f55SAndrey Gruzdev {
1746278e2f55SAndrey Gruzdev     (void) rs;
1747278e2f55SAndrey Gruzdev     (void) offset;
1748278e2f55SAndrey Gruzdev 
1749278e2f55SAndrey Gruzdev     return NULL;
1750278e2f55SAndrey Gruzdev }
1751278e2f55SAndrey Gruzdev 
ram_save_release_protection(RAMState * rs,PageSearchStatus * pss,unsigned long start_page)1752278e2f55SAndrey Gruzdev static int ram_save_release_protection(RAMState *rs, PageSearchStatus *pss,
1753278e2f55SAndrey Gruzdev         unsigned long start_page)
1754278e2f55SAndrey Gruzdev {
1755278e2f55SAndrey Gruzdev     (void) rs;
1756278e2f55SAndrey Gruzdev     (void) pss;
1757278e2f55SAndrey Gruzdev     (void) start_page;
1758278e2f55SAndrey Gruzdev 
1759278e2f55SAndrey Gruzdev     return 0;
1760278e2f55SAndrey Gruzdev }
1761278e2f55SAndrey Gruzdev 
ram_write_tracking_available(void)1762278e2f55SAndrey Gruzdev bool ram_write_tracking_available(void)
1763278e2f55SAndrey Gruzdev {
1764278e2f55SAndrey Gruzdev     return false;
1765278e2f55SAndrey Gruzdev }
1766278e2f55SAndrey Gruzdev 
ram_write_tracking_compatible(void)1767278e2f55SAndrey Gruzdev bool ram_write_tracking_compatible(void)
1768278e2f55SAndrey Gruzdev {
17690c79effdSPierrick Bouvier     g_assert_not_reached();
1770278e2f55SAndrey Gruzdev }
1771278e2f55SAndrey Gruzdev 
ram_write_tracking_start(void)1772278e2f55SAndrey Gruzdev int ram_write_tracking_start(void)
1773278e2f55SAndrey Gruzdev {
17740c79effdSPierrick Bouvier     g_assert_not_reached();
1775278e2f55SAndrey Gruzdev }
1776278e2f55SAndrey Gruzdev 
ram_write_tracking_stop(void)1777278e2f55SAndrey Gruzdev void ram_write_tracking_stop(void)
1778278e2f55SAndrey Gruzdev {
17790c79effdSPierrick Bouvier     g_assert_not_reached();
1780278e2f55SAndrey Gruzdev }
1781278e2f55SAndrey Gruzdev #endif /* defined(__linux__) */
1782278e2f55SAndrey Gruzdev 
17833d0684b2SJuan Quintela /**
1784ff1543afSLi Qiang  * get_queued_page: unqueue a page from the postcopy requests
1785a82d593bSDr. David Alan Gilbert  *
17863d0684b2SJuan Quintela  * Skips pages that are already sent (!dirty)
1787a82d593bSDr. David Alan Gilbert  *
1788a5f7b1a6SWei Yang  * Returns true if a queued page is found
17893d0684b2SJuan Quintela  *
17906f37bb8bSJuan Quintela  * @rs: current RAM state
17913d0684b2SJuan Quintela  * @pss: data about the state of the current dirty page scan
1792a82d593bSDr. David Alan Gilbert  */
get_queued_page(RAMState * rs,PageSearchStatus * pss)1793f20e2865SJuan Quintela static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
1794a82d593bSDr. David Alan Gilbert {
1795a82d593bSDr. David Alan Gilbert     RAMBlock  *block;
1796a82d593bSDr. David Alan Gilbert     ram_addr_t offset;
179785f99eb2SMarc-André Lureau     bool dirty = false;
1798a82d593bSDr. David Alan Gilbert 
1799777f53c7SThomas Huth     do {
1800f20e2865SJuan Quintela         block = unqueue_page(rs, &offset);
1801777f53c7SThomas Huth         /*
1802777f53c7SThomas Huth          * We're sending this page, and since it's postcopy nothing else
1803777f53c7SThomas Huth          * will dirty it, and we must make sure it doesn't get sent again
1804777f53c7SThomas Huth          * even if this queue request was received after the background
1805777f53c7SThomas Huth          * search already sent it.
1806777f53c7SThomas Huth          */
1807777f53c7SThomas Huth         if (block) {
1808777f53c7SThomas Huth             unsigned long page;
1809777f53c7SThomas Huth 
1810777f53c7SThomas Huth             page = offset >> TARGET_PAGE_BITS;
1811777f53c7SThomas Huth             dirty = test_bit(page, block->bmap);
1812777f53c7SThomas Huth             if (!dirty) {
1813777f53c7SThomas Huth                 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
1814777f53c7SThomas Huth                                                 page);
1815777f53c7SThomas Huth             } else {
1816777f53c7SThomas Huth                 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
1817777f53c7SThomas Huth             }
1818777f53c7SThomas Huth         }
1819777f53c7SThomas Huth 
1820777f53c7SThomas Huth     } while (block && !dirty);
1821a82d593bSDr. David Alan Gilbert 
1822b062106dSPeter Xu     if (!block) {
1823278e2f55SAndrey Gruzdev         /*
1824278e2f55SAndrey Gruzdev          * Poll write faults too if background snapshot is enabled; that's
1825278e2f55SAndrey Gruzdev          * when we have vcpus got blocked by the write protected pages.
1826278e2f55SAndrey Gruzdev          */
1827278e2f55SAndrey Gruzdev         block = poll_fault_page(rs, &offset);
1828278e2f55SAndrey Gruzdev     }
1829278e2f55SAndrey Gruzdev 
1830a82d593bSDr. David Alan Gilbert     if (block) {
1831a82d593bSDr. David Alan Gilbert         /*
1832a82d593bSDr. David Alan Gilbert          * We want the background search to continue from the queued page
1833a82d593bSDr. David Alan Gilbert          * since the guest is likely to want other pages near to the page
1834a82d593bSDr. David Alan Gilbert          * it just requested.
1835a82d593bSDr. David Alan Gilbert          */
1836a82d593bSDr. David Alan Gilbert         pss->block = block;
1837a935e30fSJuan Quintela         pss->page = offset >> TARGET_PAGE_BITS;
1838422314e7SWei Yang 
1839422314e7SWei Yang         /*
1840422314e7SWei Yang          * This unqueued page would break the "one round" check, even is
1841422314e7SWei Yang          * really rare.
1842422314e7SWei Yang          */
1843422314e7SWei Yang         pss->complete_round = false;
1844a82d593bSDr. David Alan Gilbert     }
1845a82d593bSDr. David Alan Gilbert 
1846a82d593bSDr. David Alan Gilbert     return !!block;
1847a82d593bSDr. David Alan Gilbert }
1848a82d593bSDr. David Alan Gilbert 
184956e93d26SJuan Quintela /**
18505e58f968SJuan Quintela  * migration_page_queue_free: drop any remaining pages in the ram
18515e58f968SJuan Quintela  * request queue
18526c595cdeSDr. David Alan Gilbert  *
18533d0684b2SJuan Quintela  * It should be empty at the end anyway, but in error cases there may
18543d0684b2SJuan Quintela  * be some left.  in case that there is any page left, we drop it.
18553d0684b2SJuan Quintela  *
18566c595cdeSDr. David Alan Gilbert  */
migration_page_queue_free(RAMState * rs)185783c13382SJuan Quintela static void migration_page_queue_free(RAMState *rs)
18586c595cdeSDr. David Alan Gilbert {
1859ec481c6cSJuan Quintela     struct RAMSrcPageRequest *mspr, *next_mspr;
18606c595cdeSDr. David Alan Gilbert     /* This queue generally should be empty - but in the case of a failed
18616c595cdeSDr. David Alan Gilbert      * migration might have some droppings in.
18626c595cdeSDr. David Alan Gilbert      */
186389ac5a1dSDr. David Alan Gilbert     RCU_READ_LOCK_GUARD();
1864ec481c6cSJuan Quintela     QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
18656c595cdeSDr. David Alan Gilbert         memory_region_unref(mspr->rb->mr);
1866ec481c6cSJuan Quintela         QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
18676c595cdeSDr. David Alan Gilbert         g_free(mspr);
18686c595cdeSDr. David Alan Gilbert     }
18696c595cdeSDr. David Alan Gilbert }
18706c595cdeSDr. David Alan Gilbert 
18716c595cdeSDr. David Alan Gilbert /**
18723d0684b2SJuan Quintela  * ram_save_queue_pages: queue the page for transmission
18733d0684b2SJuan Quintela  *
18743d0684b2SJuan Quintela  * A request from postcopy destination for example.
18753d0684b2SJuan Quintela  *
18763d0684b2SJuan Quintela  * Returns zero on success or negative on error
18773d0684b2SJuan Quintela  *
18783d0684b2SJuan Quintela  * @rbname: Name of the RAMBLock of the request. NULL means the
18793d0684b2SJuan Quintela  *          same that last one.
18803d0684b2SJuan Quintela  * @start: starting address from the start of the RAMBlock
18813d0684b2SJuan Quintela  * @len: length (in bytes) to send
18826c595cdeSDr. David Alan Gilbert  */
ram_save_queue_pages(const char * rbname,ram_addr_t start,ram_addr_t len,Error ** errp)18837aa6070dSPeter Xu int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len,
18847aa6070dSPeter Xu                          Error **errp)
18856c595cdeSDr. David Alan Gilbert {
18866c595cdeSDr. David Alan Gilbert     RAMBlock *ramblock;
188753518d94SJuan Quintela     RAMState *rs = ram_state;
18886c595cdeSDr. David Alan Gilbert 
1889aff3f660SJuan Quintela     stat64_add(&mig_stats.postcopy_requests, 1);
189089ac5a1dSDr. David Alan Gilbert     RCU_READ_LOCK_GUARD();
189189ac5a1dSDr. David Alan Gilbert 
18926c595cdeSDr. David Alan Gilbert     if (!rbname) {
18936c595cdeSDr. David Alan Gilbert         /* Reuse last RAMBlock */
189468a098f3SJuan Quintela         ramblock = rs->last_req_rb;
18956c595cdeSDr. David Alan Gilbert 
18966c595cdeSDr. David Alan Gilbert         if (!ramblock) {
18976c595cdeSDr. David Alan Gilbert             /*
18986c595cdeSDr. David Alan Gilbert              * Shouldn't happen, we can't reuse the last RAMBlock if
18996c595cdeSDr. David Alan Gilbert              * it's the 1st request.
19006c595cdeSDr. David Alan Gilbert              */
19017aa6070dSPeter Xu             error_setg(errp, "MIG_RP_MSG_REQ_PAGES has no previous block");
190203acb4e9SDaniel Henrique Barboza             return -1;
19036c595cdeSDr. David Alan Gilbert         }
19046c595cdeSDr. David Alan Gilbert     } else {
19056c595cdeSDr. David Alan Gilbert         ramblock = qemu_ram_block_by_name(rbname);
19066c595cdeSDr. David Alan Gilbert 
19076c595cdeSDr. David Alan Gilbert         if (!ramblock) {
19086c595cdeSDr. David Alan Gilbert             /* We shouldn't be asked for a non-existent RAMBlock */
19097aa6070dSPeter Xu             error_setg(errp, "MIG_RP_MSG_REQ_PAGES has no block '%s'", rbname);
191003acb4e9SDaniel Henrique Barboza             return -1;
19116c595cdeSDr. David Alan Gilbert         }
191268a098f3SJuan Quintela         rs->last_req_rb = ramblock;
19136c595cdeSDr. David Alan Gilbert     }
19146c595cdeSDr. David Alan Gilbert     trace_ram_save_queue_pages(ramblock->idstr, start, len);
1915542147f4SDavid Hildenbrand     if (!offset_in_ramblock(ramblock, start + len - 1)) {
19167aa6070dSPeter Xu         error_setg(errp, "MIG_RP_MSG_REQ_PAGES request overrun, "
19177aa6070dSPeter Xu                    "start=" RAM_ADDR_FMT " len="
19189458ad6bSJuan Quintela                    RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
19197aa6070dSPeter Xu                    start, len, ramblock->used_length);
192003acb4e9SDaniel Henrique Barboza         return -1;
19216c595cdeSDr. David Alan Gilbert     }
19226c595cdeSDr. David Alan Gilbert 
192393589827SPeter Xu     /*
192493589827SPeter Xu      * When with postcopy preempt, we send back the page directly in the
192593589827SPeter Xu      * rp-return thread.
192693589827SPeter Xu      */
192793589827SPeter Xu     if (postcopy_preempt_active()) {
192893589827SPeter Xu         ram_addr_t page_start = start >> TARGET_PAGE_BITS;
192993589827SPeter Xu         size_t page_size = qemu_ram_pagesize(ramblock);
193093589827SPeter Xu         PageSearchStatus *pss = &ram_state->pss[RAM_CHANNEL_POSTCOPY];
193193589827SPeter Xu         int ret = 0;
193293589827SPeter Xu 
193393589827SPeter Xu         qemu_mutex_lock(&rs->bitmap_mutex);
193493589827SPeter Xu 
193593589827SPeter Xu         pss_init(pss, ramblock, page_start);
193693589827SPeter Xu         /*
193793589827SPeter Xu          * Always use the preempt channel, and make sure it's there.  It's
193893589827SPeter Xu          * safe to access without lock, because when rp-thread is running
193993589827SPeter Xu          * we should be the only one who operates on the qemufile
194093589827SPeter Xu          */
194193589827SPeter Xu         pss->pss_channel = migrate_get_current()->postcopy_qemufile_src;
194293589827SPeter Xu         assert(pss->pss_channel);
194393589827SPeter Xu 
194493589827SPeter Xu         /*
194593589827SPeter Xu          * It must be either one or multiple of host page size.  Just
194693589827SPeter Xu          * assert; if something wrong we're mostly split brain anyway.
194793589827SPeter Xu          */
194893589827SPeter Xu         assert(len % page_size == 0);
194993589827SPeter Xu         while (len) {
195093589827SPeter Xu             if (ram_save_host_page_urgent(pss)) {
19517aa6070dSPeter Xu                 error_setg(errp, "ram_save_host_page_urgent() failed: "
195293589827SPeter Xu                            "ramblock=%s, start_addr=0x"RAM_ADDR_FMT,
19537aa6070dSPeter Xu                            ramblock->idstr, start);
195493589827SPeter Xu                 ret = -1;
195593589827SPeter Xu                 break;
195693589827SPeter Xu             }
195793589827SPeter Xu             /*
195893589827SPeter Xu              * NOTE: after ram_save_host_page_urgent() succeeded, pss->page
195993589827SPeter Xu              * will automatically be moved and point to the next host page
196093589827SPeter Xu              * we're going to send, so no need to update here.
196193589827SPeter Xu              *
196293589827SPeter Xu              * Normally QEMU never sends >1 host page in requests, so
196393589827SPeter Xu              * logically we don't even need that as the loop should only
196493589827SPeter Xu              * run once, but just to be consistent.
196593589827SPeter Xu              */
196693589827SPeter Xu             len -= page_size;
196793589827SPeter Xu         };
196893589827SPeter Xu         qemu_mutex_unlock(&rs->bitmap_mutex);
196993589827SPeter Xu 
197093589827SPeter Xu         return ret;
197193589827SPeter Xu     }
197293589827SPeter Xu 
1973ec481c6cSJuan Quintela     struct RAMSrcPageRequest *new_entry =
1974b21e2380SMarkus Armbruster         g_new0(struct RAMSrcPageRequest, 1);
19756c595cdeSDr. David Alan Gilbert     new_entry->rb = ramblock;
19766c595cdeSDr. David Alan Gilbert     new_entry->offset = start;
19776c595cdeSDr. David Alan Gilbert     new_entry->len = len;
19786c595cdeSDr. David Alan Gilbert 
19796c595cdeSDr. David Alan Gilbert     memory_region_ref(ramblock->mr);
1980ec481c6cSJuan Quintela     qemu_mutex_lock(&rs->src_page_req_mutex);
1981ec481c6cSJuan Quintela     QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
1982e03a34f8SDr. David Alan Gilbert     migration_make_urgent_request();
1983ec481c6cSJuan Quintela     qemu_mutex_unlock(&rs->src_page_req_mutex);
19846c595cdeSDr. David Alan Gilbert 
19856c595cdeSDr. David Alan Gilbert     return 0;
19866c595cdeSDr. David Alan Gilbert }
19876c595cdeSDr. David Alan Gilbert 
1988a82d593bSDr. David Alan Gilbert /**
19894010ba38SJuan Quintela  * ram_save_target_page_legacy: save one target page
1990a82d593bSDr. David Alan Gilbert  *
19913d0684b2SJuan Quintela  * Returns the number of pages written
1992a82d593bSDr. David Alan Gilbert  *
19936f37bb8bSJuan Quintela  * @rs: current RAM state
19943d0684b2SJuan Quintela  * @pss: data about the page we want to send
1995a82d593bSDr. David Alan Gilbert  */
ram_save_target_page_legacy(RAMState * rs,PageSearchStatus * pss)19964010ba38SJuan Quintela static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
1997a82d593bSDr. David Alan Gilbert {
19988bba004cSAlexey Romko     ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
1999a8ec91f9SXiao Guangrong     int res;
2000a8ec91f9SXiao Guangrong 
2001944853c2SJuan Quintela     if (control_save_page(pss, offset, &res)) {
2002a8ec91f9SXiao Guangrong         return res;
2003a8ec91f9SXiao Guangrong     }
2004a8ec91f9SXiao Guangrong 
2005e8e4e7acSJuan Quintela     if (save_zero_page(rs, pss, offset)) {
20068697eb85SFabiano Rosas         return 1;
2007d7400a34SXiao Guangrong     }
2008d7400a34SXiao Guangrong 
20099ae90f73SHao Xiang     return ram_save_page(rs, pss);
2010a82d593bSDr. David Alan Gilbert }
2011a82d593bSDr. David Alan Gilbert 
20129ae90f73SHao Xiang /**
20139ae90f73SHao Xiang  * ram_save_target_page_multifd: send one target page to multifd workers
20149ae90f73SHao Xiang  *
20159ae90f73SHao Xiang  * Returns 1 if the page was queued, -1 otherwise.
20169ae90f73SHao Xiang  *
20179ae90f73SHao Xiang  * @rs: current RAM state
20189ae90f73SHao Xiang  * @pss: data about the page we want to send
20199ae90f73SHao Xiang  */
ram_save_target_page_multifd(RAMState * rs,PageSearchStatus * pss)20209ae90f73SHao Xiang static int ram_save_target_page_multifd(RAMState *rs, PageSearchStatus *pss)
20219ae90f73SHao Xiang {
20229ae90f73SHao Xiang     RAMBlock *block = pss->block;
20239ae90f73SHao Xiang     ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
20249ae90f73SHao Xiang 
20259ae90f73SHao Xiang     /*
20269ae90f73SHao Xiang      * While using multifd live migration, we still need to handle zero
20279ae90f73SHao Xiang      * page checking on the migration main thread.
20289ae90f73SHao Xiang      */
20299ae90f73SHao Xiang     if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) {
20309ae90f73SHao Xiang         if (save_zero_page(rs, pss, offset)) {
20319ae90f73SHao Xiang             return 1;
20329ae90f73SHao Xiang         }
20339ae90f73SHao Xiang     }
20349ae90f73SHao Xiang 
20359ae90f73SHao Xiang     return ram_save_multifd_page(block, offset);
2036a82d593bSDr. David Alan Gilbert }
2037a82d593bSDr. David Alan Gilbert 
2038d9e474eaSPeter Xu /* Should be called before sending a host page */
pss_host_page_prepare(PageSearchStatus * pss)2039d9e474eaSPeter Xu static void pss_host_page_prepare(PageSearchStatus *pss)
2040d9e474eaSPeter Xu {
2041d9e474eaSPeter Xu     /* How many guest pages are there in one host page? */
2042d9e474eaSPeter Xu     size_t guest_pfns = qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
2043d9e474eaSPeter Xu 
2044d9e474eaSPeter Xu     pss->host_page_sending = true;
2045301d7ffeSPeter Xu     if (guest_pfns <= 1) {
2046301d7ffeSPeter Xu         /*
2047301d7ffeSPeter Xu          * This covers both when guest psize == host psize, or when guest
2048301d7ffeSPeter Xu          * has larger psize than the host (guest_pfns==0).
2049301d7ffeSPeter Xu          *
2050301d7ffeSPeter Xu          * For the latter, we always send one whole guest page per
2051301d7ffeSPeter Xu          * iteration of the host page (example: an Alpha VM on x86 host
2052301d7ffeSPeter Xu          * will have guest psize 8K while host psize 4K).
2053301d7ffeSPeter Xu          */
2054301d7ffeSPeter Xu         pss->host_page_start = pss->page;
2055301d7ffeSPeter Xu         pss->host_page_end = pss->page + 1;
2056301d7ffeSPeter Xu     } else {
2057301d7ffeSPeter Xu         /*
2058301d7ffeSPeter Xu          * The host page spans over multiple guest pages, we send them
2059301d7ffeSPeter Xu          * within the same host page iteration.
2060301d7ffeSPeter Xu          */
2061d9e474eaSPeter Xu         pss->host_page_start = ROUND_DOWN(pss->page, guest_pfns);
2062d9e474eaSPeter Xu         pss->host_page_end = ROUND_UP(pss->page + 1, guest_pfns);
2063d9e474eaSPeter Xu     }
2064301d7ffeSPeter Xu }
2065d9e474eaSPeter Xu 
2066d9e474eaSPeter Xu /*
2067d9e474eaSPeter Xu  * Whether the page pointed by PSS is within the host page being sent.
2068d9e474eaSPeter Xu  * Must be called after a previous pss_host_page_prepare().
2069d9e474eaSPeter Xu  */
pss_within_range(PageSearchStatus * pss)2070d9e474eaSPeter Xu static bool pss_within_range(PageSearchStatus *pss)
2071d9e474eaSPeter Xu {
2072d9e474eaSPeter Xu     ram_addr_t ram_addr;
2073d9e474eaSPeter Xu 
2074d9e474eaSPeter Xu     assert(pss->host_page_sending);
2075d9e474eaSPeter Xu 
2076d9e474eaSPeter Xu     /* Over host-page boundary? */
2077d9e474eaSPeter Xu     if (pss->page >= pss->host_page_end) {
2078d9e474eaSPeter Xu         return false;
2079d9e474eaSPeter Xu     }
2080d9e474eaSPeter Xu 
2081d9e474eaSPeter Xu     ram_addr = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
2082d9e474eaSPeter Xu 
2083d9e474eaSPeter Xu     return offset_in_ramblock(pss->block, ram_addr);
2084d9e474eaSPeter Xu }
2085d9e474eaSPeter Xu 
pss_host_page_finish(PageSearchStatus * pss)2086d9e474eaSPeter Xu static void pss_host_page_finish(PageSearchStatus *pss)
2087d9e474eaSPeter Xu {
2088d9e474eaSPeter Xu     pss->host_page_sending = false;
2089d9e474eaSPeter Xu     /* This is not needed, but just to reset it */
2090d9e474eaSPeter Xu     pss->host_page_start = pss->host_page_end = 0;
2091d9e474eaSPeter Xu }
2092d9e474eaSPeter Xu 
209393589827SPeter Xu /*
209493589827SPeter Xu  * Send an urgent host page specified by `pss'.  Need to be called with
209593589827SPeter Xu  * bitmap_mutex held.
209693589827SPeter Xu  *
209793589827SPeter Xu  * Returns 0 if save host page succeeded, false otherwise.
209893589827SPeter Xu  */
ram_save_host_page_urgent(PageSearchStatus * pss)209993589827SPeter Xu static int ram_save_host_page_urgent(PageSearchStatus *pss)
210093589827SPeter Xu {
210193589827SPeter Xu     bool page_dirty, sent = false;
210293589827SPeter Xu     RAMState *rs = ram_state;
210393589827SPeter Xu     int ret = 0;
210493589827SPeter Xu 
210593589827SPeter Xu     trace_postcopy_preempt_send_host_page(pss->block->idstr, pss->page);
210693589827SPeter Xu     pss_host_page_prepare(pss);
210793589827SPeter Xu 
210893589827SPeter Xu     /*
210993589827SPeter Xu      * If precopy is sending the same page, let it be done in precopy, or
211093589827SPeter Xu      * we could send the same page in two channels and none of them will
211193589827SPeter Xu      * receive the whole page.
211293589827SPeter Xu      */
211393589827SPeter Xu     if (pss_overlap(pss, &ram_state->pss[RAM_CHANNEL_PRECOPY])) {
211493589827SPeter Xu         trace_postcopy_preempt_hit(pss->block->idstr,
211593589827SPeter Xu                                    pss->page << TARGET_PAGE_BITS);
211693589827SPeter Xu         return 0;
211793589827SPeter Xu     }
211893589827SPeter Xu 
211993589827SPeter Xu     do {
212093589827SPeter Xu         page_dirty = migration_bitmap_clear_dirty(rs, pss->block, pss->page);
212193589827SPeter Xu 
212293589827SPeter Xu         if (page_dirty) {
212393589827SPeter Xu             /* Be strict to return code; it must be 1, or what else? */
21244010ba38SJuan Quintela             if (migration_ops->ram_save_target_page(rs, pss) != 1) {
212593589827SPeter Xu                 error_report_once("%s: ram_save_target_page failed", __func__);
212693589827SPeter Xu                 ret = -1;
212793589827SPeter Xu                 goto out;
212893589827SPeter Xu             }
212993589827SPeter Xu             sent = true;
213093589827SPeter Xu         }
213193589827SPeter Xu         pss_find_next_dirty(pss);
213293589827SPeter Xu     } while (pss_within_range(pss));
213393589827SPeter Xu out:
213493589827SPeter Xu     pss_host_page_finish(pss);
213593589827SPeter Xu     /* For urgent requests, flush immediately if sent */
213693589827SPeter Xu     if (sent) {
213793589827SPeter Xu         qemu_fflush(pss->pss_channel);
213893589827SPeter Xu     }
213993589827SPeter Xu     return ret;
214093589827SPeter Xu }
214193589827SPeter Xu 
2142a82d593bSDr. David Alan Gilbert /**
21433d0684b2SJuan Quintela  * ram_save_host_page: save a whole host page
2144a82d593bSDr. David Alan Gilbert  *
21453d0684b2SJuan Quintela  * Starting at *offset send pages up to the end of the current host
21463d0684b2SJuan Quintela  * page. It's valid for the initial offset to point into the middle of
21473d0684b2SJuan Quintela  * a host page in which case the remainder of the hostpage is sent.
21483d0684b2SJuan Quintela  * Only dirty target pages are sent. Note that the host page size may
21493d0684b2SJuan Quintela  * be a huge page for this block.
2150f3321554SPeter Xu  *
21511eb3fc0aSDr. David Alan Gilbert  * The saving stops at the boundary of the used_length of the block
21521eb3fc0aSDr. David Alan Gilbert  * if the RAMBlock isn't a multiple of the host page size.
2153a82d593bSDr. David Alan Gilbert  *
2154f3321554SPeter Xu  * The caller must be with ram_state.bitmap_mutex held to call this
2155f3321554SPeter Xu  * function.  Note that this function can temporarily release the lock, but
2156f3321554SPeter Xu  * when the function is returned it'll make sure the lock is still held.
2157f3321554SPeter Xu  *
21583d0684b2SJuan Quintela  * Returns the number of pages written or negative on error
21593d0684b2SJuan Quintela  *
21606f37bb8bSJuan Quintela  * @rs: current RAM state
21613d0684b2SJuan Quintela  * @pss: data about the page we want to send
2162a82d593bSDr. David Alan Gilbert  */
ram_save_host_page(RAMState * rs,PageSearchStatus * pss)216305931ec5SJuan Quintela static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss)
2164a82d593bSDr. David Alan Gilbert {
2165f3321554SPeter Xu     bool page_dirty, preempt_active = postcopy_preempt_active();
2166a82d593bSDr. David Alan Gilbert     int tmppages, pages = 0;
2167a935e30fSJuan Quintela     size_t pagesize_bits =
2168a935e30fSJuan Quintela         qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
2169278e2f55SAndrey Gruzdev     unsigned long start_page = pss->page;
2170278e2f55SAndrey Gruzdev     int res;
21714c011c37SDr. David Alan Gilbert 
2172f161c88aSDavid Hildenbrand     if (migrate_ram_is_ignored(pss->block)) {
2173b895de50SCédric Le Goater         error_report("block %s should not be migrated !", pss->block->idstr);
2174b895de50SCédric Le Goater         return 0;
2175b895de50SCédric Le Goater     }
2176b895de50SCédric Le Goater 
2177d9e474eaSPeter Xu     /* Update host page boundary information */
2178d9e474eaSPeter Xu     pss_host_page_prepare(pss);
2179d9e474eaSPeter Xu 
2180a82d593bSDr. David Alan Gilbert     do {
2181f3321554SPeter Xu         page_dirty = migration_bitmap_clear_dirty(rs, pss->block, pss->page);
2182a82d593bSDr. David Alan Gilbert 
2183f3321554SPeter Xu         /* Check the pages is dirty and if it is send it */
2184f3321554SPeter Xu         if (page_dirty) {
2185f3321554SPeter Xu             /*
2186f3321554SPeter Xu              * Properly yield the lock only in postcopy preempt mode
2187f3321554SPeter Xu              * because both migration thread and rp-return thread can
2188f3321554SPeter Xu              * operate on the bitmaps.
2189f3321554SPeter Xu              */
2190f3321554SPeter Xu             if (preempt_active) {
2191f3321554SPeter Xu                 qemu_mutex_unlock(&rs->bitmap_mutex);
2192f3321554SPeter Xu             }
21934010ba38SJuan Quintela             tmppages = migration_ops->ram_save_target_page(rs, pss);
2194f3321554SPeter Xu             if (tmppages >= 0) {
2195a82d593bSDr. David Alan Gilbert                 pages += tmppages;
219623feba90SKunkun Jiang                 /*
219723feba90SKunkun Jiang                  * Allow rate limiting to happen in the middle of huge pages if
219823feba90SKunkun Jiang                  * something is sent in the current iteration.
219923feba90SKunkun Jiang                  */
220023feba90SKunkun Jiang                 if (pagesize_bits > 1 && tmppages > 0) {
220197e1e067SDr. David Alan Gilbert                     migration_rate_limit();
220223feba90SKunkun Jiang                 }
2203ba1b7c81SKunkun Jiang             }
2204f3321554SPeter Xu             if (preempt_active) {
2205f3321554SPeter Xu                 qemu_mutex_lock(&rs->bitmap_mutex);
2206f3321554SPeter Xu             }
2207f3321554SPeter Xu         } else {
2208f3321554SPeter Xu             tmppages = 0;
2209f3321554SPeter Xu         }
2210f3321554SPeter Xu 
2211f3321554SPeter Xu         if (tmppages < 0) {
2212d9e474eaSPeter Xu             pss_host_page_finish(pss);
2213f3321554SPeter Xu             return tmppages;
2214f3321554SPeter Xu         }
2215f3321554SPeter Xu 
2216d9e474eaSPeter Xu         pss_find_next_dirty(pss);
2217d9e474eaSPeter Xu     } while (pss_within_range(pss));
2218d9e474eaSPeter Xu 
2219d9e474eaSPeter Xu     pss_host_page_finish(pss);
2220278e2f55SAndrey Gruzdev 
2221278e2f55SAndrey Gruzdev     res = ram_save_release_protection(rs, pss, start_page);
2222278e2f55SAndrey Gruzdev     return (res < 0 ? res : pages);
2223a82d593bSDr. David Alan Gilbert }
22246c595cdeSDr. David Alan Gilbert 
22256c595cdeSDr. David Alan Gilbert /**
22263d0684b2SJuan Quintela  * ram_find_and_save_block: finds a dirty page and sends it to f
222756e93d26SJuan Quintela  *
222856e93d26SJuan Quintela  * Called within an RCU critical section.
222956e93d26SJuan Quintela  *
2230e8f3735fSXiao Guangrong  * Returns the number of pages written where zero means no dirty pages,
2231e8f3735fSXiao Guangrong  * or negative on error
223256e93d26SJuan Quintela  *
22336f37bb8bSJuan Quintela  * @rs: current RAM state
2234a82d593bSDr. David Alan Gilbert  *
2235a82d593bSDr. David Alan Gilbert  * On systems where host-page-size > target-page-size it will send all the
2236a82d593bSDr. David Alan Gilbert  * pages in a host page that are dirty.
223756e93d26SJuan Quintela  */
ram_find_and_save_block(RAMState * rs)223805931ec5SJuan Quintela static int ram_find_and_save_block(RAMState *rs)
223956e93d26SJuan Quintela {
2240f1668764SPeter Xu     PageSearchStatus *pss = &rs->pss[RAM_CHANNEL_PRECOPY];
224156e93d26SJuan Quintela     int pages = 0;
224256e93d26SJuan Quintela 
22430827b9e9SAshijeet Acharya     /* No dirty page as there is zero RAM */
22448d80e195SJuan Quintela     if (!rs->ram_bytes_total) {
22450827b9e9SAshijeet Acharya         return pages;
22460827b9e9SAshijeet Acharya     }
22470827b9e9SAshijeet Acharya 
22484934a5ddSPeter Xu     /*
22494934a5ddSPeter Xu      * Always keep last_seen_block/last_page valid during this procedure,
22504934a5ddSPeter Xu      * because find_dirty_block() relies on these values (e.g., we compare
22514934a5ddSPeter Xu      * last_seen_block with pss.block to see whether we searched all the
22524934a5ddSPeter Xu      * ramblocks) to detect the completion of migration.  Having NULL value
22534934a5ddSPeter Xu      * of last_seen_block can conditionally cause below loop to run forever.
22544934a5ddSPeter Xu      */
22554934a5ddSPeter Xu     if (!rs->last_seen_block) {
22564934a5ddSPeter Xu         rs->last_seen_block = QLIST_FIRST_RCU(&ram_list.blocks);
22574934a5ddSPeter Xu         rs->last_page = 0;
22584934a5ddSPeter Xu     }
22594934a5ddSPeter Xu 
2260f1668764SPeter Xu     pss_init(pss, rs->last_seen_block, rs->last_page);
2261b8fb8cb7SDr. David Alan Gilbert 
226231e2ac74SJuan Quintela     while (true){
226351efd36fSJuan Quintela         if (!get_queued_page(rs, pss)) {
2264a82d593bSDr. David Alan Gilbert             /* priority queue empty, so just search for something dirty */
226531e2ac74SJuan Quintela             int res = find_dirty_block(rs, pss);
226631e2ac74SJuan Quintela             if (res != PAGE_DIRTY_FOUND) {
226731e2ac74SJuan Quintela                 if (res == PAGE_ALL_CLEAN) {
226851efd36fSJuan Quintela                     break;
226931e2ac74SJuan Quintela                 } else if (res == PAGE_TRY_AGAIN) {
227031e2ac74SJuan Quintela                     continue;
2271294e5a40SJuan Quintela                 } else if (res < 0) {
2272294e5a40SJuan Quintela                     pages = res;
2273294e5a40SJuan Quintela                     break;
2274a82d593bSDr. David Alan Gilbert                 }
227551efd36fSJuan Quintela             }
227651efd36fSJuan Quintela         }
2277f1668764SPeter Xu         pages = ram_save_host_page(rs, pss);
227831e2ac74SJuan Quintela         if (pages) {
227931e2ac74SJuan Quintela             break;
228031e2ac74SJuan Quintela         }
228131e2ac74SJuan Quintela     }
228256e93d26SJuan Quintela 
2283f1668764SPeter Xu     rs->last_seen_block = pss->block;
2284f1668764SPeter Xu     rs->last_page = pss->page;
228556e93d26SJuan Quintela 
228656e93d26SJuan Quintela     return pages;
228756e93d26SJuan Quintela }
228856e93d26SJuan Quintela 
ram_bytes_total_with_ignored(void)22898008a272SJuan Quintela static uint64_t ram_bytes_total_with_ignored(void)
229056e93d26SJuan Quintela {
229156e93d26SJuan Quintela     RAMBlock *block;
229256e93d26SJuan Quintela     uint64_t total = 0;
229356e93d26SJuan Quintela 
229489ac5a1dSDr. David Alan Gilbert     RCU_READ_LOCK_GUARD();
229589ac5a1dSDr. David Alan Gilbert 
2296b895de50SCédric Le Goater     RAMBLOCK_FOREACH_MIGRATABLE(block) {
229756e93d26SJuan Quintela         total += block->used_length;
229899e15582SPeter Xu     }
229956e93d26SJuan Quintela     return total;
230056e93d26SJuan Quintela }
230156e93d26SJuan Quintela 
ram_bytes_total(void)2302fbd162e6SYury Kotov uint64_t ram_bytes_total(void)
2303fbd162e6SYury Kotov {
23048008a272SJuan Quintela     RAMBlock *block;
23058008a272SJuan Quintela     uint64_t total = 0;
23068008a272SJuan Quintela 
23078008a272SJuan Quintela     RCU_READ_LOCK_GUARD();
23088008a272SJuan Quintela 
23098008a272SJuan Quintela     RAMBLOCK_FOREACH_NOT_IGNORED(block) {
23108008a272SJuan Quintela         total += block->used_length;
23118008a272SJuan Quintela     }
23128008a272SJuan Quintela     return total;
2313fbd162e6SYury Kotov }
2314fbd162e6SYury Kotov 
xbzrle_load_setup(void)2315f265e0e4SJuan Quintela static void xbzrle_load_setup(void)
231656e93d26SJuan Quintela {
2317f265e0e4SJuan Quintela     XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
231856e93d26SJuan Quintela }
231956e93d26SJuan Quintela 
xbzrle_load_cleanup(void)2320f265e0e4SJuan Quintela static void xbzrle_load_cleanup(void)
2321f265e0e4SJuan Quintela {
2322f265e0e4SJuan Quintela     g_free(XBZRLE.decoded_buf);
2323f265e0e4SJuan Quintela     XBZRLE.decoded_buf = NULL;
2324f265e0e4SJuan Quintela }
2325f265e0e4SJuan Quintela 
ram_state_cleanup(RAMState ** rsp)23267d7c96beSPeter Xu static void ram_state_cleanup(RAMState **rsp)
23277d7c96beSPeter Xu {
2328b9ccaf6dSDr. David Alan Gilbert     if (*rsp) {
23297d7c96beSPeter Xu         migration_page_queue_free(*rsp);
23307d7c96beSPeter Xu         qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
23317d7c96beSPeter Xu         qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
23327d7c96beSPeter Xu         g_free(*rsp);
23337d7c96beSPeter Xu         *rsp = NULL;
23347d7c96beSPeter Xu     }
2335b9ccaf6dSDr. David Alan Gilbert }
23367d7c96beSPeter Xu 
xbzrle_cleanup(void)233784593a08SPeter Xu static void xbzrle_cleanup(void)
233884593a08SPeter Xu {
233984593a08SPeter Xu     XBZRLE_cache_lock();
234084593a08SPeter Xu     if (XBZRLE.cache) {
234184593a08SPeter Xu         cache_fini(XBZRLE.cache);
234284593a08SPeter Xu         g_free(XBZRLE.encoded_buf);
234384593a08SPeter Xu         g_free(XBZRLE.current_buf);
234484593a08SPeter Xu         g_free(XBZRLE.zero_target_page);
234584593a08SPeter Xu         XBZRLE.cache = NULL;
234684593a08SPeter Xu         XBZRLE.encoded_buf = NULL;
234784593a08SPeter Xu         XBZRLE.current_buf = NULL;
234884593a08SPeter Xu         XBZRLE.zero_target_page = NULL;
234984593a08SPeter Xu     }
235084593a08SPeter Xu     XBZRLE_cache_unlock();
235184593a08SPeter Xu }
235284593a08SPeter Xu 
ram_bitmaps_destroy(void)235392c20b2fSCédric Le Goater static void ram_bitmaps_destroy(void)
235492c20b2fSCédric Le Goater {
235592c20b2fSCédric Le Goater     RAMBlock *block;
235692c20b2fSCédric Le Goater 
235792c20b2fSCédric Le Goater     RAMBLOCK_FOREACH_NOT_IGNORED(block) {
235892c20b2fSCédric Le Goater         g_free(block->clear_bmap);
235992c20b2fSCédric Le Goater         block->clear_bmap = NULL;
236092c20b2fSCédric Le Goater         g_free(block->bmap);
236192c20b2fSCédric Le Goater         block->bmap = NULL;
236292c20b2fSCédric Le Goater         g_free(block->file_bmap);
236392c20b2fSCédric Le Goater         block->file_bmap = NULL;
236492c20b2fSCédric Le Goater     }
236592c20b2fSCédric Le Goater }
236692c20b2fSCédric Le Goater 
ram_save_cleanup(void * opaque)2367f265e0e4SJuan Quintela static void ram_save_cleanup(void *opaque)
236856e93d26SJuan Quintela {
236953518d94SJuan Quintela     RAMState **rsp = opaque;
2370eb859c53SJuan Quintela 
2371278e2f55SAndrey Gruzdev     /* We don't use dirty log with background snapshots */
2372278e2f55SAndrey Gruzdev     if (!migrate_background_snapshot()) {
2373a4a411fbSStefan Hajnoczi         /* caller have hold BQL or is in a bh, so there is
23744633456cSYi Wang          * no writing race against the migration bitmap
23752ff64038SLi Zhijian          */
237663b41db4SHyman Huang(黄勇)         if (global_dirty_tracking & GLOBAL_DIRTY_MIGRATION) {
237763b41db4SHyman Huang(黄勇)             /*
237863b41db4SHyman Huang(黄勇)              * do not stop dirty log without starting it, since
237963b41db4SHyman Huang(黄勇)              * memory_global_dirty_log_stop will assert that
238063b41db4SHyman Huang(黄勇)              * memory_global_dirty_log_start/stop used in pairs
238163b41db4SHyman Huang(黄勇)              */
238263b41db4SHyman Huang(黄勇)             memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION);
238363b41db4SHyman Huang(黄勇)         }
2384278e2f55SAndrey Gruzdev     }
23856b6712efSJuan Quintela 
238692c20b2fSCédric Le Goater     ram_bitmaps_destroy();
238756e93d26SJuan Quintela 
238884593a08SPeter Xu     xbzrle_cleanup();
2389a71ef5c7SFabiano Rosas     multifd_ram_save_cleanup();
23907d7c96beSPeter Xu     ram_state_cleanup(rsp);
23914010ba38SJuan Quintela     g_free(migration_ops);
23924010ba38SJuan Quintela     migration_ops = NULL;
239356e93d26SJuan Quintela }
239456e93d26SJuan Quintela 
ram_state_reset(RAMState * rs)23956f37bb8bSJuan Quintela static void ram_state_reset(RAMState *rs)
239656e93d26SJuan Quintela {
2397ec6f3ab9SPeter Xu     int i;
2398ec6f3ab9SPeter Xu 
2399ec6f3ab9SPeter Xu     for (i = 0; i < RAM_CHANNEL_MAX; i++) {
2400ec6f3ab9SPeter Xu         rs->pss[i].last_sent_block = NULL;
2401ec6f3ab9SPeter Xu     }
2402ec6f3ab9SPeter Xu 
24036f37bb8bSJuan Quintela     rs->last_seen_block = NULL;
2404269ace29SJuan Quintela     rs->last_page = 0;
24056f37bb8bSJuan Quintela     rs->last_version = ram_list.version;
2406f3095cc8SJuan Quintela     rs->xbzrle_started = false;
240756e93d26SJuan Quintela }
240856e93d26SJuan Quintela 
240956e93d26SJuan Quintela #define MAX_WAIT 50 /* ms, half buffered_file limit */
241056e93d26SJuan Quintela 
2411e0b266f0SDr. David Alan Gilbert /* **** functions for postcopy ***** */
2412e0b266f0SDr. David Alan Gilbert 
ram_postcopy_migrated_memory_release(MigrationState * ms)2413ced1c616SPavel Butsykin void ram_postcopy_migrated_memory_release(MigrationState *ms)
2414ced1c616SPavel Butsykin {
2415ced1c616SPavel Butsykin     struct RAMBlock *block;
2416ced1c616SPavel Butsykin 
2417fbd162e6SYury Kotov     RAMBLOCK_FOREACH_NOT_IGNORED(block) {
24186b6712efSJuan Quintela         unsigned long *bitmap = block->bmap;
24196b6712efSJuan Quintela         unsigned long range = block->used_length >> TARGET_PAGE_BITS;
24206b6712efSJuan Quintela         unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
2421ced1c616SPavel Butsykin 
2422ced1c616SPavel Butsykin         while (run_start < range) {
2423ced1c616SPavel Butsykin             unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
24248bba004cSAlexey Romko             ram_discard_range(block->idstr,
24258bba004cSAlexey Romko                               ((ram_addr_t)run_start) << TARGET_PAGE_BITS,
24268bba004cSAlexey Romko                               ((ram_addr_t)(run_end - run_start))
24278bba004cSAlexey Romko                                 << TARGET_PAGE_BITS);
2428ced1c616SPavel Butsykin             run_start = find_next_zero_bit(bitmap, range, run_end + 1);
2429ced1c616SPavel Butsykin         }
2430ced1c616SPavel Butsykin     }
2431ced1c616SPavel Butsykin }
2432ced1c616SPavel Butsykin 
24333d0684b2SJuan Quintela /**
24343d0684b2SJuan Quintela  * postcopy_send_discard_bm_ram: discard a RAMBlock
24353d0684b2SJuan Quintela  *
2436e0b266f0SDr. David Alan Gilbert  * Callback from postcopy_each_ram_send_discard for each RAMBlock
24373d0684b2SJuan Quintela  *
24383d0684b2SJuan Quintela  * @ms: current migration state
243989dab31bSWei Yang  * @block: RAMBlock to discard
2440e0b266f0SDr. David Alan Gilbert  */
postcopy_send_discard_bm_ram(MigrationState * ms,RAMBlock * block)24419e7d1223SPhilippe Mathieu-Daudé static void postcopy_send_discard_bm_ram(MigrationState *ms, RAMBlock *block)
2442e0b266f0SDr. David Alan Gilbert {
24436b6712efSJuan Quintela     unsigned long end = block->used_length >> TARGET_PAGE_BITS;
2444e0b266f0SDr. David Alan Gilbert     unsigned long current;
24451e7cf8c3SWei Yang     unsigned long *bitmap = block->bmap;
2446e0b266f0SDr. David Alan Gilbert 
24476b6712efSJuan Quintela     for (current = 0; current < end; ) {
24481e7cf8c3SWei Yang         unsigned long one = find_next_bit(bitmap, end, current);
244933a5cb62SWei Yang         unsigned long zero, discard_length;
2450e0b266f0SDr. David Alan Gilbert 
245133a5cb62SWei Yang         if (one >= end) {
245233a5cb62SWei Yang             break;
245333a5cb62SWei Yang         }
245433a5cb62SWei Yang 
24551e7cf8c3SWei Yang         zero = find_next_zero_bit(bitmap, end, one + 1);
2456e0b266f0SDr. David Alan Gilbert 
2457e0b266f0SDr. David Alan Gilbert         if (zero >= end) {
2458e0b266f0SDr. David Alan Gilbert             discard_length = end - one;
2459e0b266f0SDr. David Alan Gilbert         } else {
2460e0b266f0SDr. David Alan Gilbert             discard_length = zero - one;
2461e0b266f0SDr. David Alan Gilbert         }
2462810cf2bbSWei Yang         postcopy_discard_send_range(ms, one, discard_length);
2463e0b266f0SDr. David Alan Gilbert         current = one + discard_length;
2464e0b266f0SDr. David Alan Gilbert     }
2465e0b266f0SDr. David Alan Gilbert }
2466e0b266f0SDr. David Alan Gilbert 
2467f30c2e5bSPeter Xu static void postcopy_chunk_hostpages_pass(MigrationState *ms, RAMBlock *block);
2468f30c2e5bSPeter Xu 
24693d0684b2SJuan Quintela /**
24703d0684b2SJuan Quintela  * postcopy_each_ram_send_discard: discard all RAMBlocks
24713d0684b2SJuan Quintela  *
2472e0b266f0SDr. David Alan Gilbert  * Utility for the outgoing postcopy code.
2473e0b266f0SDr. David Alan Gilbert  *   Calls postcopy_send_discard_bm_ram for each RAMBlock
2474e0b266f0SDr. David Alan Gilbert  *   passing it bitmap indexes and name.
2475e0b266f0SDr. David Alan Gilbert  * (qemu_ram_foreach_block ends up passing unscaled lengths
2476e0b266f0SDr. David Alan Gilbert  *  which would mean postcopy code would have to deal with target page)
24773d0684b2SJuan Quintela  *
24783d0684b2SJuan Quintela  * @ms: current migration state
2479e0b266f0SDr. David Alan Gilbert  */
postcopy_each_ram_send_discard(MigrationState * ms)2480739fcc1bSPeter Xu static void postcopy_each_ram_send_discard(MigrationState *ms)
2481e0b266f0SDr. David Alan Gilbert {
2482e0b266f0SDr. David Alan Gilbert     struct RAMBlock *block;
2483e0b266f0SDr. David Alan Gilbert 
2484fbd162e6SYury Kotov     RAMBLOCK_FOREACH_NOT_IGNORED(block) {
24856b6712efSJuan Quintela         postcopy_discard_send_init(ms, block->idstr);
2486e0b266f0SDr. David Alan Gilbert 
2487e0b266f0SDr. David Alan Gilbert         /*
2488f30c2e5bSPeter Xu          * Deal with TPS != HPS and huge pages.  It discard any partially sent
2489f30c2e5bSPeter Xu          * host-page size chunks, mark any partially dirty host-page size
2490f30c2e5bSPeter Xu          * chunks as all dirty.  In this case the host-page is the host-page
2491f30c2e5bSPeter Xu          * for the particular RAMBlock, i.e. it might be a huge page.
2492f30c2e5bSPeter Xu          */
2493f30c2e5bSPeter Xu         postcopy_chunk_hostpages_pass(ms, block);
2494f30c2e5bSPeter Xu 
2495f30c2e5bSPeter Xu         /*
2496e0b266f0SDr. David Alan Gilbert          * Postcopy sends chunks of bitmap over the wire, but it
2497e0b266f0SDr. David Alan Gilbert          * just needs indexes at this point, avoids it having
2498e0b266f0SDr. David Alan Gilbert          * target page specific code.
2499e0b266f0SDr. David Alan Gilbert          */
2500739fcc1bSPeter Xu         postcopy_send_discard_bm_ram(ms, block);
2501810cf2bbSWei Yang         postcopy_discard_send_finish(ms);
2502e0b266f0SDr. David Alan Gilbert     }
2503e0b266f0SDr. David Alan Gilbert }
2504e0b266f0SDr. David Alan Gilbert 
25053d0684b2SJuan Quintela /**
25068324ef86SWei Yang  * postcopy_chunk_hostpages_pass: canonicalize bitmap in hostpages
250799e314ebSDr. David Alan Gilbert  *
25083d0684b2SJuan Quintela  * Helper for postcopy_chunk_hostpages; it's called twice to
25093d0684b2SJuan Quintela  * canonicalize the two bitmaps, that are similar, but one is
25103d0684b2SJuan Quintela  * inverted.
251199e314ebSDr. David Alan Gilbert  *
25123d0684b2SJuan Quintela  * Postcopy requires that all target pages in a hostpage are dirty or
25133d0684b2SJuan Quintela  * clean, not a mix.  This function canonicalizes the bitmaps.
25143d0684b2SJuan Quintela  *
25153d0684b2SJuan Quintela  * @ms: current migration state
25163d0684b2SJuan Quintela  * @block: block that contains the page we want to canonicalize
251799e314ebSDr. David Alan Gilbert  */
postcopy_chunk_hostpages_pass(MigrationState * ms,RAMBlock * block)25181e7cf8c3SWei Yang static void postcopy_chunk_hostpages_pass(MigrationState *ms, RAMBlock *block)
251999e314ebSDr. David Alan Gilbert {
252053518d94SJuan Quintela     RAMState *rs = ram_state;
25216b6712efSJuan Quintela     unsigned long *bitmap = block->bmap;
252229c59172SDr. David Alan Gilbert     unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
25236b6712efSJuan Quintela     unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
252499e314ebSDr. David Alan Gilbert     unsigned long run_start;
252599e314ebSDr. David Alan Gilbert 
252629c59172SDr. David Alan Gilbert     if (block->page_size == TARGET_PAGE_SIZE) {
252729c59172SDr. David Alan Gilbert         /* Easy case - TPS==HPS for a non-huge page RAMBlock */
252829c59172SDr. David Alan Gilbert         return;
252929c59172SDr. David Alan Gilbert     }
253029c59172SDr. David Alan Gilbert 
253199e314ebSDr. David Alan Gilbert     /* Find a dirty page */
25326b6712efSJuan Quintela     run_start = find_next_bit(bitmap, pages, 0);
253399e314ebSDr. David Alan Gilbert 
25346b6712efSJuan Quintela     while (run_start < pages) {
253599e314ebSDr. David Alan Gilbert 
253699e314ebSDr. David Alan Gilbert         /*
253799e314ebSDr. David Alan Gilbert          * If the start of this run of pages is in the middle of a host
253899e314ebSDr. David Alan Gilbert          * page, then we need to fixup this host page.
253999e314ebSDr. David Alan Gilbert          */
25409dec3cc3SWei Yang         if (QEMU_IS_ALIGNED(run_start, host_ratio)) {
254199e314ebSDr. David Alan Gilbert             /* Find the end of this run */
2542dad45ab2SWei Yang             run_start = find_next_zero_bit(bitmap, pages, run_start + 1);
254399e314ebSDr. David Alan Gilbert             /*
254499e314ebSDr. David Alan Gilbert              * If the end isn't at the start of a host page, then the
254599e314ebSDr. David Alan Gilbert              * run doesn't finish at the end of a host page
254699e314ebSDr. David Alan Gilbert              * and we need to discard.
254799e314ebSDr. David Alan Gilbert              */
254899e314ebSDr. David Alan Gilbert         }
254999e314ebSDr. David Alan Gilbert 
25509dec3cc3SWei Yang         if (!QEMU_IS_ALIGNED(run_start, host_ratio)) {
255199e314ebSDr. David Alan Gilbert             unsigned long page;
2552dad45ab2SWei Yang             unsigned long fixup_start_addr = QEMU_ALIGN_DOWN(run_start,
2553dad45ab2SWei Yang                                                              host_ratio);
2554dad45ab2SWei Yang             run_start = QEMU_ALIGN_UP(run_start, host_ratio);
255599e314ebSDr. David Alan Gilbert 
255699e314ebSDr. David Alan Gilbert             /* Clean up the bitmap */
255799e314ebSDr. David Alan Gilbert             for (page = fixup_start_addr;
255899e314ebSDr. David Alan Gilbert                  page < fixup_start_addr + host_ratio; page++) {
255999e314ebSDr. David Alan Gilbert                 /*
256099e314ebSDr. David Alan Gilbert                  * Remark them as dirty, updating the count for any pages
256199e314ebSDr. David Alan Gilbert                  * that weren't previously dirty.
256299e314ebSDr. David Alan Gilbert                  */
25630d8ec885SJuan Quintela                 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
256499e314ebSDr. David Alan Gilbert             }
256599e314ebSDr. David Alan Gilbert         }
256699e314ebSDr. David Alan Gilbert 
256799e314ebSDr. David Alan Gilbert         /* Find the next dirty page for the next iteration */
25686b6712efSJuan Quintela         run_start = find_next_bit(bitmap, pages, run_start);
256999e314ebSDr. David Alan Gilbert     }
257099e314ebSDr. David Alan Gilbert }
257199e314ebSDr. David Alan Gilbert 
25723d0684b2SJuan Quintela /**
25733d0684b2SJuan Quintela  * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
25743d0684b2SJuan Quintela  *
2575e0b266f0SDr. David Alan Gilbert  * Transmit the set of pages to be discarded after precopy to the target
2576e0b266f0SDr. David Alan Gilbert  * these are pages that:
2577e0b266f0SDr. David Alan Gilbert  *     a) Have been previously transmitted but are now dirty again
2578e0b266f0SDr. David Alan Gilbert  *     b) Pages that have never been transmitted, this ensures that
2579e0b266f0SDr. David Alan Gilbert  *        any pages on the destination that have been mapped by background
2580e0b266f0SDr. David Alan Gilbert  *        tasks get discarded (transparent huge pages is the specific concern)
2581e0b266f0SDr. David Alan Gilbert  * Hopefully this is pretty sparse
25823d0684b2SJuan Quintela  *
25833d0684b2SJuan Quintela  * @ms: current migration state
2584e0b266f0SDr. David Alan Gilbert  */
ram_postcopy_send_discard_bitmap(MigrationState * ms)2585739fcc1bSPeter Xu void ram_postcopy_send_discard_bitmap(MigrationState *ms)
2586e0b266f0SDr. David Alan Gilbert {
258753518d94SJuan Quintela     RAMState *rs = ram_state;
2588e0b266f0SDr. David Alan Gilbert 
258989ac5a1dSDr. David Alan Gilbert     RCU_READ_LOCK_GUARD();
2590e0b266f0SDr. David Alan Gilbert 
2591e0b266f0SDr. David Alan Gilbert     /* This should be our last sync, the src is now paused */
25921e493be5SGavin Shan     migration_bitmap_sync(rs, false);
2593e0b266f0SDr. David Alan Gilbert 
25946b6712efSJuan Quintela     /* Easiest way to make sure we don't resume in the middle of a host-page */
2595ec6f3ab9SPeter Xu     rs->pss[RAM_CHANNEL_PRECOPY].last_sent_block = NULL;
25966b6712efSJuan Quintela     rs->last_seen_block = NULL;
25976b6712efSJuan Quintela     rs->last_page = 0;
25986b6712efSJuan Quintela 
2599739fcc1bSPeter Xu     postcopy_each_ram_send_discard(ms);
2600e0b266f0SDr. David Alan Gilbert 
2601739fcc1bSPeter Xu     trace_ram_postcopy_send_discard_bitmap();
2602e0b266f0SDr. David Alan Gilbert }
2603e0b266f0SDr. David Alan Gilbert 
26043d0684b2SJuan Quintela /**
26053d0684b2SJuan Quintela  * ram_discard_range: discard dirtied pages at the beginning of postcopy
2606e0b266f0SDr. David Alan Gilbert  *
26073d0684b2SJuan Quintela  * Returns zero on success
2608e0b266f0SDr. David Alan Gilbert  *
260936449157SJuan Quintela  * @rbname: name of the RAMBlock of the request. NULL means the
26103d0684b2SJuan Quintela  *          same that last one.
26113d0684b2SJuan Quintela  * @start: RAMBlock starting page
26123d0684b2SJuan Quintela  * @length: RAMBlock size
2613e0b266f0SDr. David Alan Gilbert  */
ram_discard_range(const char * rbname,uint64_t start,size_t length)2614aaa2064cSJuan Quintela int ram_discard_range(const char *rbname, uint64_t start, size_t length)
2615e0b266f0SDr. David Alan Gilbert {
261636449157SJuan Quintela     trace_ram_discard_range(rbname, start, length);
2617d3a5038cSDr. David Alan Gilbert 
261889ac5a1dSDr. David Alan Gilbert     RCU_READ_LOCK_GUARD();
261936449157SJuan Quintela     RAMBlock *rb = qemu_ram_block_by_name(rbname);
2620e0b266f0SDr. David Alan Gilbert 
2621e0b266f0SDr. David Alan Gilbert     if (!rb) {
262236449157SJuan Quintela         error_report("ram_discard_range: Failed to find block '%s'", rbname);
262303acb4e9SDaniel Henrique Barboza         return -1;
2624e0b266f0SDr. David Alan Gilbert     }
2625e0b266f0SDr. David Alan Gilbert 
2626814bb08fSPeter Xu     /*
2627814bb08fSPeter Xu      * On source VM, we don't need to update the received bitmap since
2628814bb08fSPeter Xu      * we don't even have one.
2629814bb08fSPeter Xu      */
2630814bb08fSPeter Xu     if (rb->receivedmap) {
2631f9494614SAlexey Perevalov         bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
2632f9494614SAlexey Perevalov                      length >> qemu_target_page_bits());
2633814bb08fSPeter Xu     }
2634814bb08fSPeter Xu 
263503acb4e9SDaniel Henrique Barboza     return ram_block_discard_range(rb, start, length);
2636e0b266f0SDr. David Alan Gilbert }
2637e0b266f0SDr. David Alan Gilbert 
263884593a08SPeter Xu /*
263984593a08SPeter Xu  * For every allocation, we will try not to crash the VM if the
264084593a08SPeter Xu  * allocation failed.
264184593a08SPeter Xu  */
xbzrle_init(Error ** errp)26427bee8ba8SCédric Le Goater static bool xbzrle_init(Error **errp)
264384593a08SPeter Xu {
264487dca0c9SJuan Quintela     if (!migrate_xbzrle()) {
26457bee8ba8SCédric Le Goater         return true;
264684593a08SPeter Xu     }
264784593a08SPeter Xu 
264884593a08SPeter Xu     XBZRLE_cache_lock();
264984593a08SPeter Xu 
265084593a08SPeter Xu     XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
265184593a08SPeter Xu     if (!XBZRLE.zero_target_page) {
26527bee8ba8SCédric Le Goater         error_setg(errp, "%s: Error allocating zero page", __func__);
265384593a08SPeter Xu         goto err_out;
265484593a08SPeter Xu     }
265584593a08SPeter Xu 
265684593a08SPeter Xu     XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
26577bee8ba8SCédric Le Goater                               TARGET_PAGE_SIZE, errp);
265884593a08SPeter Xu     if (!XBZRLE.cache) {
265984593a08SPeter Xu         goto free_zero_page;
266084593a08SPeter Xu     }
266184593a08SPeter Xu 
266284593a08SPeter Xu     XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
266384593a08SPeter Xu     if (!XBZRLE.encoded_buf) {
26647bee8ba8SCédric Le Goater         error_setg(errp, "%s: Error allocating encoded_buf", __func__);
266584593a08SPeter Xu         goto free_cache;
266684593a08SPeter Xu     }
266784593a08SPeter Xu 
266884593a08SPeter Xu     XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
266984593a08SPeter Xu     if (!XBZRLE.current_buf) {
26707bee8ba8SCédric Le Goater         error_setg(errp, "%s: Error allocating current_buf", __func__);
267184593a08SPeter Xu         goto free_encoded_buf;
267284593a08SPeter Xu     }
267384593a08SPeter Xu 
267484593a08SPeter Xu     /* We are all good */
267584593a08SPeter Xu     XBZRLE_cache_unlock();
26767bee8ba8SCédric Le Goater     return true;
267784593a08SPeter Xu 
267884593a08SPeter Xu free_encoded_buf:
267984593a08SPeter Xu     g_free(XBZRLE.encoded_buf);
268084593a08SPeter Xu     XBZRLE.encoded_buf = NULL;
268184593a08SPeter Xu free_cache:
268284593a08SPeter Xu     cache_fini(XBZRLE.cache);
268384593a08SPeter Xu     XBZRLE.cache = NULL;
268484593a08SPeter Xu free_zero_page:
268584593a08SPeter Xu     g_free(XBZRLE.zero_target_page);
268684593a08SPeter Xu     XBZRLE.zero_target_page = NULL;
268784593a08SPeter Xu err_out:
268884593a08SPeter Xu     XBZRLE_cache_unlock();
26897bee8ba8SCédric Le Goater     return false;
269084593a08SPeter Xu }
269184593a08SPeter Xu 
ram_state_init(RAMState ** rsp,Error ** errp)269216ecd25aSCédric Le Goater static bool ram_state_init(RAMState **rsp, Error **errp)
269356e93d26SJuan Quintela {
26947d00ee6aSPeter Xu     *rsp = g_try_new0(RAMState, 1);
26957d00ee6aSPeter Xu 
26967d00ee6aSPeter Xu     if (!*rsp) {
269716ecd25aSCédric Le Goater         error_setg(errp, "%s: Init ramstate fail", __func__);
269816ecd25aSCédric Le Goater         return false;
26997d00ee6aSPeter Xu     }
270053518d94SJuan Quintela 
270153518d94SJuan Quintela     qemu_mutex_init(&(*rsp)->bitmap_mutex);
270253518d94SJuan Quintela     qemu_mutex_init(&(*rsp)->src_page_req_mutex);
270353518d94SJuan Quintela     QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
27048d80e195SJuan Quintela     (*rsp)->ram_bytes_total = ram_bytes_total();
270556e93d26SJuan Quintela 
27067d00ee6aSPeter Xu     /*
270740c4d4a8SIvan Ren      * Count the total number of pages used by ram blocks not including any
270840c4d4a8SIvan Ren      * gaps due to alignment or unplugs.
270903158519SWei Yang      * This must match with the initial values of dirty bitmap.
27107d00ee6aSPeter Xu      */
27118d80e195SJuan Quintela     (*rsp)->migration_dirty_pages = (*rsp)->ram_bytes_total >> TARGET_PAGE_BITS;
27127d00ee6aSPeter Xu     ram_state_reset(*rsp);
27137d00ee6aSPeter Xu 
271416ecd25aSCédric Le Goater     return true;
27157d00ee6aSPeter Xu }
27167d00ee6aSPeter Xu 
ram_list_init_bitmaps(void)2717d6eff5d7SPeter Xu static void ram_list_init_bitmaps(void)
2718d6eff5d7SPeter Xu {
2719002cad6bSPeter Xu     MigrationState *ms = migrate_get_current();
2720d6eff5d7SPeter Xu     RAMBlock *block;
2721d6eff5d7SPeter Xu     unsigned long pages;
2722002cad6bSPeter Xu     uint8_t shift;
2723d6eff5d7SPeter Xu 
2724d6eff5d7SPeter Xu     /* Skip setting bitmap if there is no RAM */
2725d6eff5d7SPeter Xu     if (ram_bytes_total()) {
2726002cad6bSPeter Xu         shift = ms->clear_bitmap_shift;
2727002cad6bSPeter Xu         if (shift > CLEAR_BITMAP_SHIFT_MAX) {
2728002cad6bSPeter Xu             error_report("clear_bitmap_shift (%u) too big, using "
2729002cad6bSPeter Xu                          "max value (%u)", shift, CLEAR_BITMAP_SHIFT_MAX);
2730002cad6bSPeter Xu             shift = CLEAR_BITMAP_SHIFT_MAX;
2731002cad6bSPeter Xu         } else if (shift < CLEAR_BITMAP_SHIFT_MIN) {
2732002cad6bSPeter Xu             error_report("clear_bitmap_shift (%u) too small, using "
2733002cad6bSPeter Xu                          "min value (%u)", shift, CLEAR_BITMAP_SHIFT_MIN);
2734002cad6bSPeter Xu             shift = CLEAR_BITMAP_SHIFT_MIN;
2735002cad6bSPeter Xu         }
2736002cad6bSPeter Xu 
2737fbd162e6SYury Kotov         RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2738d6eff5d7SPeter Xu             pages = block->max_length >> TARGET_PAGE_BITS;
273903158519SWei Yang             /*
274003158519SWei Yang              * The initial dirty bitmap for migration must be set with all
274103158519SWei Yang              * ones to make sure we'll migrate every guest RAM page to
274203158519SWei Yang              * destination.
274340c4d4a8SIvan Ren              * Here we set RAMBlock.bmap all to 1 because when rebegin a
274440c4d4a8SIvan Ren              * new migration after a failed migration, ram_list.
274540c4d4a8SIvan Ren              * dirty_memory[DIRTY_MEMORY_MIGRATION] don't include the whole
274640c4d4a8SIvan Ren              * guest memory.
274703158519SWei Yang              */
2748d6eff5d7SPeter Xu             block->bmap = bitmap_new(pages);
274940c4d4a8SIvan Ren             bitmap_set(block->bmap, 0, pages);
2750c2d5c4a7SFabiano Rosas             if (migrate_mapped_ram()) {
2751c2d5c4a7SFabiano Rosas                 block->file_bmap = bitmap_new(pages);
2752c2d5c4a7SFabiano Rosas             }
2753002cad6bSPeter Xu             block->clear_bmap_shift = shift;
2754002cad6bSPeter Xu             block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
2755d6eff5d7SPeter Xu         }
2756d6eff5d7SPeter Xu     }
2757d6eff5d7SPeter Xu }
2758d6eff5d7SPeter Xu 
migration_bitmap_clear_discarded_pages(RAMState * rs)2759be39b4cdSDavid Hildenbrand static void migration_bitmap_clear_discarded_pages(RAMState *rs)
2760be39b4cdSDavid Hildenbrand {
2761be39b4cdSDavid Hildenbrand     unsigned long pages;
2762be39b4cdSDavid Hildenbrand     RAMBlock *rb;
2763be39b4cdSDavid Hildenbrand 
2764be39b4cdSDavid Hildenbrand     RCU_READ_LOCK_GUARD();
2765be39b4cdSDavid Hildenbrand 
2766be39b4cdSDavid Hildenbrand     RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
2767be39b4cdSDavid Hildenbrand             pages = ramblock_dirty_bitmap_clear_discarded_pages(rb);
2768be39b4cdSDavid Hildenbrand             rs->migration_dirty_pages -= pages;
2769be39b4cdSDavid Hildenbrand     }
2770be39b4cdSDavid Hildenbrand }
2771be39b4cdSDavid Hildenbrand 
ram_init_bitmaps(RAMState * rs,Error ** errp)2772030b56b2SCédric Le Goater static bool ram_init_bitmaps(RAMState *rs, Error **errp)
2773d6eff5d7SPeter Xu {
2774639ec3fbSCédric Le Goater     bool ret = true;
2775639ec3fbSCédric Le Goater 
2776d6eff5d7SPeter Xu     qemu_mutex_lock_ramlist();
2777d6eff5d7SPeter Xu 
277889ac5a1dSDr. David Alan Gilbert     WITH_RCU_READ_LOCK_GUARD() {
2779d6eff5d7SPeter Xu         ram_list_init_bitmaps();
2780278e2f55SAndrey Gruzdev         /* We don't use dirty log with background snapshots */
2781278e2f55SAndrey Gruzdev         if (!migrate_background_snapshot()) {
2782030b56b2SCédric Le Goater             ret = memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION, errp);
2783639ec3fbSCédric Le Goater             if (!ret) {
2784639ec3fbSCédric Le Goater                 goto out_unlock;
2785639ec3fbSCédric Le Goater             }
27866a39ba7cSHyman Huang             migration_bitmap_sync_precopy(false);
278789ac5a1dSDr. David Alan Gilbert         }
2788278e2f55SAndrey Gruzdev     }
2789639ec3fbSCédric Le Goater out_unlock:
2790d6eff5d7SPeter Xu     qemu_mutex_unlock_ramlist();
2791be39b4cdSDavid Hildenbrand 
2792639ec3fbSCédric Le Goater     if (!ret) {
2793639ec3fbSCédric Le Goater         ram_bitmaps_destroy();
2794030b56b2SCédric Le Goater         return false;
2795639ec3fbSCédric Le Goater     }
2796639ec3fbSCédric Le Goater 
2797be39b4cdSDavid Hildenbrand     /*
2798be39b4cdSDavid Hildenbrand      * After an eventual first bitmap sync, fixup the initial bitmap
2799be39b4cdSDavid Hildenbrand      * containing all 1s to exclude any discarded pages from migration.
2800be39b4cdSDavid Hildenbrand      */
2801be39b4cdSDavid Hildenbrand     migration_bitmap_clear_discarded_pages(rs);
2802030b56b2SCédric Le Goater     return true;
2803d6eff5d7SPeter Xu }
2804d6eff5d7SPeter Xu 
ram_init_all(RAMState ** rsp,Error ** errp)2805030b56b2SCédric Le Goater static int ram_init_all(RAMState **rsp, Error **errp)
28067d00ee6aSPeter Xu {
2807030b56b2SCédric Le Goater     if (!ram_state_init(rsp, errp)) {
28087d00ee6aSPeter Xu         return -1;
28097d00ee6aSPeter Xu     }
28107d00ee6aSPeter Xu 
2811030b56b2SCédric Le Goater     if (!xbzrle_init(errp)) {
281284593a08SPeter Xu         ram_state_cleanup(rsp);
281356e93d26SJuan Quintela         return -1;
281456e93d26SJuan Quintela     }
281556e93d26SJuan Quintela 
2816030b56b2SCédric Le Goater     if (!ram_init_bitmaps(*rsp, errp)) {
2817030b56b2SCédric Le Goater         return -1;
2818030b56b2SCédric Le Goater     }
2819a91246c9Szhanghailiang 
2820a91246c9Szhanghailiang     return 0;
2821a91246c9Szhanghailiang }
2822a91246c9Szhanghailiang 
ram_state_resume_prepare(RAMState * rs,QEMUFile * out)282308614f34SPeter Xu static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
282408614f34SPeter Xu {
282508614f34SPeter Xu     RAMBlock *block;
282608614f34SPeter Xu     uint64_t pages = 0;
282708614f34SPeter Xu 
282808614f34SPeter Xu     /*
282908614f34SPeter Xu      * Postcopy is not using xbzrle/compression, so no need for that.
283008614f34SPeter Xu      * Also, since source are already halted, we don't need to care
283108614f34SPeter Xu      * about dirty page logging as well.
283208614f34SPeter Xu      */
283308614f34SPeter Xu 
2834fbd162e6SYury Kotov     RAMBLOCK_FOREACH_NOT_IGNORED(block) {
283508614f34SPeter Xu         pages += bitmap_count_one(block->bmap,
283608614f34SPeter Xu                                   block->used_length >> TARGET_PAGE_BITS);
283708614f34SPeter Xu     }
283808614f34SPeter Xu 
283908614f34SPeter Xu     /* This may not be aligned with current bitmaps. Recalculate. */
284008614f34SPeter Xu     rs->migration_dirty_pages = pages;
284108614f34SPeter Xu 
28421a373522SDavid Hildenbrand     ram_state_reset(rs);
284308614f34SPeter Xu 
284408614f34SPeter Xu     /* Update RAMState cache of output QEMUFile */
28457f401b80SPeter Xu     rs->pss[RAM_CHANNEL_PRECOPY].pss_channel = out;
284608614f34SPeter Xu 
284708614f34SPeter Xu     trace_ram_state_resume_prepare(pages);
284808614f34SPeter Xu }
284908614f34SPeter Xu 
28503d0684b2SJuan Quintela /*
28516bcb05fcSWei Wang  * This function clears bits of the free pages reported by the caller from the
28526bcb05fcSWei Wang  * migration dirty bitmap. @addr is the host address corresponding to the
28536bcb05fcSWei Wang  * start of the continuous guest free pages, and @len is the total bytes of
28546bcb05fcSWei Wang  * those pages.
28556bcb05fcSWei Wang  */
qemu_guest_free_page_hint(void * addr,size_t len)28566bcb05fcSWei Wang void qemu_guest_free_page_hint(void *addr, size_t len)
28576bcb05fcSWei Wang {
28586bcb05fcSWei Wang     RAMBlock *block;
28596bcb05fcSWei Wang     ram_addr_t offset;
28606bcb05fcSWei Wang     size_t used_len, start, npages;
28616bcb05fcSWei Wang 
28626bcb05fcSWei Wang     /* This function is currently expected to be used during live migration */
2863f018eb62SPeter Xu     if (!migration_is_running()) {
28646bcb05fcSWei Wang         return;
28656bcb05fcSWei Wang     }
28666bcb05fcSWei Wang 
28676bcb05fcSWei Wang     for (; len > 0; len -= used_len, addr += used_len) {
28686bcb05fcSWei Wang         block = qemu_ram_block_from_host(addr, false, &offset);
28696bcb05fcSWei Wang         if (unlikely(!block || offset >= block->used_length)) {
28706bcb05fcSWei Wang             /*
28716bcb05fcSWei Wang              * The implementation might not support RAMBlock resize during
28726bcb05fcSWei Wang              * live migration, but it could happen in theory with future
28736bcb05fcSWei Wang              * updates. So we add a check here to capture that case.
28746bcb05fcSWei Wang              */
28756bcb05fcSWei Wang             error_report_once("%s unexpected error", __func__);
28766bcb05fcSWei Wang             return;
28776bcb05fcSWei Wang         }
28786bcb05fcSWei Wang 
28796bcb05fcSWei Wang         if (len <= block->used_length - offset) {
28806bcb05fcSWei Wang             used_len = len;
28816bcb05fcSWei Wang         } else {
28826bcb05fcSWei Wang             used_len = block->used_length - offset;
28836bcb05fcSWei Wang         }
28846bcb05fcSWei Wang 
28856bcb05fcSWei Wang         start = offset >> TARGET_PAGE_BITS;
28866bcb05fcSWei Wang         npages = used_len >> TARGET_PAGE_BITS;
28876bcb05fcSWei Wang 
28886bcb05fcSWei Wang         qemu_mutex_lock(&ram_state->bitmap_mutex);
28893143577dSWei Wang         /*
28903143577dSWei Wang          * The skipped free pages are equavalent to be sent from clear_bmap's
28913143577dSWei Wang          * perspective, so clear the bits from the memory region bitmap which
28923143577dSWei Wang          * are initially set. Otherwise those skipped pages will be sent in
28933143577dSWei Wang          * the next round after syncing from the memory region bitmap.
28943143577dSWei Wang          */
28951230a25fSDavid Hildenbrand         migration_clear_memory_region_dirty_bitmap_range(block, start, npages);
28966bcb05fcSWei Wang         ram_state->migration_dirty_pages -=
28976bcb05fcSWei Wang                       bitmap_count_one_with_offset(block->bmap, start, npages);
28986bcb05fcSWei Wang         bitmap_clear(block->bmap, start, npages);
28996bcb05fcSWei Wang         qemu_mutex_unlock(&ram_state->bitmap_mutex);
29006bcb05fcSWei Wang     }
29016bcb05fcSWei Wang }
29026bcb05fcSWei Wang 
2903c2d5c4a7SFabiano Rosas #define MAPPED_RAM_HDR_VERSION 1
2904c2d5c4a7SFabiano Rosas struct MappedRamHeader {
2905c2d5c4a7SFabiano Rosas     uint32_t version;
2906c2d5c4a7SFabiano Rosas     /*
2907c2d5c4a7SFabiano Rosas      * The target's page size, so we know how many pages are in the
2908c2d5c4a7SFabiano Rosas      * bitmap.
2909c2d5c4a7SFabiano Rosas      */
2910c2d5c4a7SFabiano Rosas     uint64_t page_size;
2911c2d5c4a7SFabiano Rosas     /*
2912c2d5c4a7SFabiano Rosas      * The offset in the migration file where the pages bitmap is
2913c2d5c4a7SFabiano Rosas      * stored.
2914c2d5c4a7SFabiano Rosas      */
2915c2d5c4a7SFabiano Rosas     uint64_t bitmap_offset;
2916c2d5c4a7SFabiano Rosas     /*
2917c2d5c4a7SFabiano Rosas      * The offset in the migration file where the actual pages (data)
2918c2d5c4a7SFabiano Rosas      * are stored.
2919c2d5c4a7SFabiano Rosas      */
2920c2d5c4a7SFabiano Rosas     uint64_t pages_offset;
2921c2d5c4a7SFabiano Rosas } QEMU_PACKED;
2922c2d5c4a7SFabiano Rosas typedef struct MappedRamHeader MappedRamHeader;
2923c2d5c4a7SFabiano Rosas 
mapped_ram_setup_ramblock(QEMUFile * file,RAMBlock * block)2924c2d5c4a7SFabiano Rosas static void mapped_ram_setup_ramblock(QEMUFile *file, RAMBlock *block)
2925c2d5c4a7SFabiano Rosas {
2926c2d5c4a7SFabiano Rosas     g_autofree MappedRamHeader *header = NULL;
2927c2d5c4a7SFabiano Rosas     size_t header_size, bitmap_size;
2928c2d5c4a7SFabiano Rosas     long num_pages;
2929c2d5c4a7SFabiano Rosas 
2930c2d5c4a7SFabiano Rosas     header = g_new0(MappedRamHeader, 1);
2931c2d5c4a7SFabiano Rosas     header_size = sizeof(MappedRamHeader);
2932c2d5c4a7SFabiano Rosas 
2933c2d5c4a7SFabiano Rosas     num_pages = block->used_length >> TARGET_PAGE_BITS;
2934c2d5c4a7SFabiano Rosas     bitmap_size = BITS_TO_LONGS(num_pages) * sizeof(unsigned long);
2935c2d5c4a7SFabiano Rosas 
2936c2d5c4a7SFabiano Rosas     /*
2937c2d5c4a7SFabiano Rosas      * Save the file offsets of where the bitmap and the pages should
2938c2d5c4a7SFabiano Rosas      * go as they are written at the end of migration and during the
2939c2d5c4a7SFabiano Rosas      * iterative phase, respectively.
2940c2d5c4a7SFabiano Rosas      */
2941c2d5c4a7SFabiano Rosas     block->bitmap_offset = qemu_get_offset(file) + header_size;
2942c2d5c4a7SFabiano Rosas     block->pages_offset = ROUND_UP(block->bitmap_offset +
2943c2d5c4a7SFabiano Rosas                                    bitmap_size,
2944c2d5c4a7SFabiano Rosas                                    MAPPED_RAM_FILE_OFFSET_ALIGNMENT);
2945c2d5c4a7SFabiano Rosas 
2946c2d5c4a7SFabiano Rosas     header->version = cpu_to_be32(MAPPED_RAM_HDR_VERSION);
2947c2d5c4a7SFabiano Rosas     header->page_size = cpu_to_be64(TARGET_PAGE_SIZE);
2948c2d5c4a7SFabiano Rosas     header->bitmap_offset = cpu_to_be64(block->bitmap_offset);
2949c2d5c4a7SFabiano Rosas     header->pages_offset = cpu_to_be64(block->pages_offset);
2950c2d5c4a7SFabiano Rosas 
2951c2d5c4a7SFabiano Rosas     qemu_put_buffer(file, (uint8_t *) header, header_size);
2952c2d5c4a7SFabiano Rosas 
2953c2d5c4a7SFabiano Rosas     /* prepare offset for next ramblock */
2954c2d5c4a7SFabiano Rosas     qemu_set_offset(file, block->pages_offset + block->used_length, SEEK_SET);
2955c2d5c4a7SFabiano Rosas }
2956c2d5c4a7SFabiano Rosas 
mapped_ram_read_header(QEMUFile * file,MappedRamHeader * header,Error ** errp)29572f6b8826SFabiano Rosas static bool mapped_ram_read_header(QEMUFile *file, MappedRamHeader *header,
29582f6b8826SFabiano Rosas                                    Error **errp)
29592f6b8826SFabiano Rosas {
29602f6b8826SFabiano Rosas     size_t ret, header_size = sizeof(MappedRamHeader);
29612f6b8826SFabiano Rosas 
29622f6b8826SFabiano Rosas     ret = qemu_get_buffer(file, (uint8_t *)header, header_size);
29632f6b8826SFabiano Rosas     if (ret != header_size) {
29642f6b8826SFabiano Rosas         error_setg(errp, "Could not read whole mapped-ram migration header "
29652f6b8826SFabiano Rosas                    "(expected %zd, got %zd bytes)", header_size, ret);
29662f6b8826SFabiano Rosas         return false;
29672f6b8826SFabiano Rosas     }
29682f6b8826SFabiano Rosas 
29692f6b8826SFabiano Rosas     /* migration stream is big-endian */
29702f6b8826SFabiano Rosas     header->version = be32_to_cpu(header->version);
29712f6b8826SFabiano Rosas 
29722f6b8826SFabiano Rosas     if (header->version > MAPPED_RAM_HDR_VERSION) {
29732f6b8826SFabiano Rosas         error_setg(errp, "Migration mapped-ram capability version not "
29742f6b8826SFabiano Rosas                    "supported (expected <= %d, got %d)", MAPPED_RAM_HDR_VERSION,
29752f6b8826SFabiano Rosas                    header->version);
29762f6b8826SFabiano Rosas         return false;
29772f6b8826SFabiano Rosas     }
29782f6b8826SFabiano Rosas 
29792f6b8826SFabiano Rosas     header->page_size = be64_to_cpu(header->page_size);
29802f6b8826SFabiano Rosas     header->bitmap_offset = be64_to_cpu(header->bitmap_offset);
29812f6b8826SFabiano Rosas     header->pages_offset = be64_to_cpu(header->pages_offset);
29822f6b8826SFabiano Rosas 
29832f6b8826SFabiano Rosas     return true;
29842f6b8826SFabiano Rosas }
29852f6b8826SFabiano Rosas 
29866bcb05fcSWei Wang /*
29873d0684b2SJuan Quintela  * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
2988a91246c9Szhanghailiang  * long-running RCU critical section.  When rcu-reclaims in the code
2989a91246c9Szhanghailiang  * start to become numerous it will be necessary to reduce the
2990a91246c9Szhanghailiang  * granularity of these critical sections.
2991a91246c9Szhanghailiang  */
2992a91246c9Szhanghailiang 
29933d0684b2SJuan Quintela /**
29943d0684b2SJuan Quintela  * ram_save_setup: Setup RAM for migration
29953d0684b2SJuan Quintela  *
29963d0684b2SJuan Quintela  * Returns zero to indicate success and negative for error
29973d0684b2SJuan Quintela  *
29983d0684b2SJuan Quintela  * @f: QEMUFile where to send the data
29993d0684b2SJuan Quintela  * @opaque: RAMState pointer
300001c3ac68SCédric Le Goater  * @errp: pointer to Error*, to store an error if it happens.
30013d0684b2SJuan Quintela  */
ram_save_setup(QEMUFile * f,void * opaque,Error ** errp)300201c3ac68SCédric Le Goater static int ram_save_setup(QEMUFile *f, void *opaque, Error **errp)
3003a91246c9Szhanghailiang {
300453518d94SJuan Quintela     RAMState **rsp = opaque;
3005a91246c9Szhanghailiang     RAMBlock *block;
30065d220369SRichard Henderson     int ret, max_hg_page_size;
3007a91246c9Szhanghailiang 
3008a91246c9Szhanghailiang     /* migration has already setup the bitmap, reuse it. */
3009a91246c9Szhanghailiang     if (!migration_in_colo_state()) {
3010030b56b2SCédric Le Goater         if (ram_init_all(rsp, errp) != 0) {
3011a91246c9Szhanghailiang             return -1;
3012a91246c9Szhanghailiang         }
3013a91246c9Szhanghailiang     }
30147f401b80SPeter Xu     (*rsp)->pss[RAM_CHANNEL_PRECOPY].pss_channel = f;
3015a91246c9Szhanghailiang 
30165d220369SRichard Henderson     /*
30175d220369SRichard Henderson      * ??? Mirrors the previous value of qemu_host_page_size,
30185d220369SRichard Henderson      * but is this really what was intended for the migration?
30195d220369SRichard Henderson      */
30205d220369SRichard Henderson     max_hg_page_size = MAX(qemu_real_host_page_size(), TARGET_PAGE_SIZE);
30215d220369SRichard Henderson 
30220e6ebd48SDr. David Alan Gilbert     WITH_RCU_READ_LOCK_GUARD() {
30238008a272SJuan Quintela         qemu_put_be64(f, ram_bytes_total_with_ignored()
30248008a272SJuan Quintela                          | RAM_SAVE_FLAG_MEM_SIZE);
302556e93d26SJuan Quintela 
3026b895de50SCédric Le Goater         RAMBLOCK_FOREACH_MIGRATABLE(block) {
302756e93d26SJuan Quintela             qemu_put_byte(f, strlen(block->idstr));
302856e93d26SJuan Quintela             qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
302956e93d26SJuan Quintela             qemu_put_be64(f, block->used_length);
30305d220369SRichard Henderson             if (migrate_postcopy_ram() &&
30315d220369SRichard Henderson                 block->page_size != max_hg_page_size) {
3032ef08fb38SDr. David Alan Gilbert                 qemu_put_be64(f, block->page_size);
3033ef08fb38SDr. David Alan Gilbert             }
3034fbd162e6SYury Kotov             if (migrate_ignore_shared()) {
3035fbd162e6SYury Kotov                 qemu_put_be64(f, block->mr->addr);
3036fbd162e6SYury Kotov             }
3037c2d5c4a7SFabiano Rosas 
3038c2d5c4a7SFabiano Rosas             if (migrate_mapped_ram()) {
3039c2d5c4a7SFabiano Rosas                 mapped_ram_setup_ramblock(f, block);
3040c2d5c4a7SFabiano Rosas             }
304156e93d26SJuan Quintela         }
30420e6ebd48SDr. David Alan Gilbert     }
304356e93d26SJuan Quintela 
3044b1b38387SJuan Quintela     ret = rdma_registration_start(f, RAM_CONTROL_SETUP);
304548408174SJuan Quintela     if (ret < 0) {
304601c3ac68SCédric Le Goater         error_setg(errp, "%s: failed to start RDMA registration", __func__);
304748408174SJuan Quintela         qemu_file_set_error(f, ret);
3048a2326705SPeter Xu         return ret;
304948408174SJuan Quintela     }
30505f5b8858SJuan Quintela 
3051b1b38387SJuan Quintela     ret = rdma_registration_stop(f, RAM_CONTROL_SETUP);
30525f5b8858SJuan Quintela     if (ret < 0) {
305301c3ac68SCédric Le Goater         error_setg(errp, "%s: failed to stop RDMA registration", __func__);
30545f5b8858SJuan Quintela         qemu_file_set_error(f, ret);
3055a2326705SPeter Xu         return ret;
30565f5b8858SJuan Quintela     }
305756e93d26SJuan Quintela 
30584010ba38SJuan Quintela     migration_ops = g_malloc0(sizeof(MigrationOps));
30599ae90f73SHao Xiang 
30609ae90f73SHao Xiang     if (migrate_multifd()) {
3061a71ef5c7SFabiano Rosas         multifd_ram_save_setup();
30629ae90f73SHao Xiang         migration_ops->ram_save_target_page = ram_save_target_page_multifd;
30639ae90f73SHao Xiang     } else {
30644010ba38SJuan Quintela         migration_ops->ram_save_target_page = ram_save_target_page_legacy;
30659ae90f73SHao Xiang     }
3066930e239dSFiona Ebner 
3067195801d7SStefan Hajnoczi     bql_unlock();
3068a0c78d81SFabiano Rosas     ret = multifd_ram_flush_and_sync();
3069195801d7SStefan Hajnoczi     bql_lock();
307033d70973SLeonardo Bras     if (ret < 0) {
307101c3ac68SCédric Le Goater         error_setg(errp, "%s: multifd synchronization failed", __func__);
307233d70973SLeonardo Bras         return ret;
307333d70973SLeonardo Bras     }
307433d70973SLeonardo Bras 
30759d01778aSFabiano Rosas     if (migrate_multifd() && !migrate_multifd_flush_after_each_section()
30769d01778aSFabiano Rosas         && !migrate_mapped_ram()) {
3077294e5a40SJuan Quintela         qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH);
3078294e5a40SJuan Quintela     }
3079294e5a40SJuan Quintela 
308056e93d26SJuan Quintela     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
308176936bbcSCédric Le Goater     ret = qemu_fflush(f);
308276936bbcSCédric Le Goater     if (ret < 0) {
308301c3ac68SCédric Le Goater         error_setg_errno(errp, -ret, "%s failed", __func__);
308476936bbcSCédric Le Goater     }
308576936bbcSCédric Le Goater     return ret;
308656e93d26SJuan Quintela }
308756e93d26SJuan Quintela 
ram_save_file_bmap(QEMUFile * f)3088c2d5c4a7SFabiano Rosas static void ram_save_file_bmap(QEMUFile *f)
3089c2d5c4a7SFabiano Rosas {
3090c2d5c4a7SFabiano Rosas     RAMBlock *block;
3091c2d5c4a7SFabiano Rosas 
3092c2d5c4a7SFabiano Rosas     RAMBLOCK_FOREACH_MIGRATABLE(block) {
3093c2d5c4a7SFabiano Rosas         long num_pages = block->used_length >> TARGET_PAGE_BITS;
3094c2d5c4a7SFabiano Rosas         long bitmap_size = BITS_TO_LONGS(num_pages) * sizeof(unsigned long);
3095c2d5c4a7SFabiano Rosas 
3096c2d5c4a7SFabiano Rosas         qemu_put_buffer_at(f, (uint8_t *)block->file_bmap, bitmap_size,
3097c2d5c4a7SFabiano Rosas                            block->bitmap_offset);
3098c2d5c4a7SFabiano Rosas         ram_transferred_add(bitmap_size);
3099f427d90bSFabiano Rosas 
3100f427d90bSFabiano Rosas         /*
3101f427d90bSFabiano Rosas          * Free the bitmap here to catch any synchronization issues
3102f427d90bSFabiano Rosas          * with multifd channels. No channels should be sending pages
3103f427d90bSFabiano Rosas          * after we've written the bitmap to file.
3104f427d90bSFabiano Rosas          */
3105f427d90bSFabiano Rosas         g_free(block->file_bmap);
3106f427d90bSFabiano Rosas         block->file_bmap = NULL;
3107c2d5c4a7SFabiano Rosas     }
3108c2d5c4a7SFabiano Rosas }
3109c2d5c4a7SFabiano Rosas 
ramblock_set_file_bmap_atomic(RAMBlock * block,ram_addr_t offset,bool set)3110c3cdf3fbSFabiano Rosas void ramblock_set_file_bmap_atomic(RAMBlock *block, ram_addr_t offset, bool set)
3111f427d90bSFabiano Rosas {
3112c3cdf3fbSFabiano Rosas     if (set) {
3113f427d90bSFabiano Rosas         set_bit_atomic(offset >> TARGET_PAGE_BITS, block->file_bmap);
3114c3cdf3fbSFabiano Rosas     } else {
3115c3cdf3fbSFabiano Rosas         clear_bit_atomic(offset >> TARGET_PAGE_BITS, block->file_bmap);
3116c3cdf3fbSFabiano Rosas     }
3117f427d90bSFabiano Rosas }
3118f427d90bSFabiano Rosas 
31193d0684b2SJuan Quintela /**
31203d0684b2SJuan Quintela  * ram_save_iterate: iterative stage for migration
31213d0684b2SJuan Quintela  *
31223d0684b2SJuan Quintela  * Returns zero to indicate success and negative for error
31233d0684b2SJuan Quintela  *
31243d0684b2SJuan Quintela  * @f: QEMUFile where to send the data
31253d0684b2SJuan Quintela  * @opaque: RAMState pointer
31263d0684b2SJuan Quintela  */
ram_save_iterate(QEMUFile * f,void * opaque)312756e93d26SJuan Quintela static int ram_save_iterate(QEMUFile *f, void *opaque)
312856e93d26SJuan Quintela {
312953518d94SJuan Quintela     RAMState **temp = opaque;
313053518d94SJuan Quintela     RAMState *rs = *temp;
31313d4095b2SJuan Quintela     int ret = 0;
313256e93d26SJuan Quintela     int i;
313356e93d26SJuan Quintela     int64_t t0;
31345c90308fSThomas Huth     int done = 0;
313556e93d26SJuan Quintela 
313663268c49SPeter Xu     /*
313763268c49SPeter Xu      * We'll take this lock a little bit long, but it's okay for two reasons.
313863268c49SPeter Xu      * Firstly, the only possible other thread to take it is who calls
313963268c49SPeter Xu      * qemu_guest_free_page_hint(), which should be rare; secondly, see
314063268c49SPeter Xu      * MAX_WAIT (if curious, further see commit 4508bd9ed8053ce) below, which
314163268c49SPeter Xu      * guarantees that we'll at least released it in a regular basis.
314263268c49SPeter Xu      */
31430983125bSJuan Quintela     WITH_QEMU_LOCK_GUARD(&rs->bitmap_mutex) {
314489ac5a1dSDr. David Alan Gilbert         WITH_RCU_READ_LOCK_GUARD() {
31456f37bb8bSJuan Quintela             if (ram_list.version != rs->last_version) {
31466f37bb8bSJuan Quintela                 ram_state_reset(rs);
314756e93d26SJuan Quintela             }
314856e93d26SJuan Quintela 
314956e93d26SJuan Quintela             /* Read version before ram_list.blocks */
315056e93d26SJuan Quintela             smp_rmb();
315156e93d26SJuan Quintela 
3152b1b38387SJuan Quintela             ret = rdma_registration_start(f, RAM_CONTROL_ROUND);
315348408174SJuan Quintela             if (ret < 0) {
315448408174SJuan Quintela                 qemu_file_set_error(f, ret);
3155a2326705SPeter Xu                 goto out;
315648408174SJuan Quintela             }
315756e93d26SJuan Quintela 
315856e93d26SJuan Quintela             t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
315956e93d26SJuan Quintela             i = 0;
3160e1fde0e0SJuan Quintela             while ((ret = migration_rate_exceeded(f)) == 0 ||
3161a1fe28dfSPeter Xu                    postcopy_has_request(rs)) {
316256e93d26SJuan Quintela                 int pages;
316356e93d26SJuan Quintela 
3164e03a34f8SDr. David Alan Gilbert                 if (qemu_file_get_error(f)) {
3165e03a34f8SDr. David Alan Gilbert                     break;
3166e03a34f8SDr. David Alan Gilbert                 }
3167e03a34f8SDr. David Alan Gilbert 
316805931ec5SJuan Quintela                 pages = ram_find_and_save_block(rs);
316956e93d26SJuan Quintela                 /* no more pages to sent */
317056e93d26SJuan Quintela                 if (pages == 0) {
31715c90308fSThomas Huth                     done = 1;
317256e93d26SJuan Quintela                     break;
317356e93d26SJuan Quintela                 }
3174e8f3735fSXiao Guangrong 
3175e8f3735fSXiao Guangrong                 if (pages < 0) {
3176e8f3735fSXiao Guangrong                     qemu_file_set_error(f, pages);
3177e8f3735fSXiao Guangrong                     break;
3178e8f3735fSXiao Guangrong                 }
3179e8f3735fSXiao Guangrong 
3180be8b02edSXiao Guangrong                 rs->target_page_count += pages;
3181070afca2SJason J. Herne 
318289ac5a1dSDr. David Alan Gilbert                 /*
318389ac5a1dSDr. David Alan Gilbert                  * we want to check in the 1st loop, just in case it was the 1st
318489ac5a1dSDr. David Alan Gilbert                  * time and we had to sync the dirty bitmap.
318589ac5a1dSDr. David Alan Gilbert                  * qemu_clock_get_ns() is a bit expensive, so we only check each
318689ac5a1dSDr. David Alan Gilbert                  * some iterations
318756e93d26SJuan Quintela                  */
318856e93d26SJuan Quintela                 if ((i & 63) == 0) {
318989ac5a1dSDr. David Alan Gilbert                     uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) /
319089ac5a1dSDr. David Alan Gilbert                         1000000;
319156e93d26SJuan Quintela                     if (t1 > MAX_WAIT) {
319255c4446bSJuan Quintela                         trace_ram_save_iterate_big_wait(t1, i);
319356e93d26SJuan Quintela                         break;
319456e93d26SJuan Quintela                     }
319556e93d26SJuan Quintela                 }
319656e93d26SJuan Quintela                 i++;
319756e93d26SJuan Quintela             }
319889ac5a1dSDr. David Alan Gilbert         }
31990983125bSJuan Quintela     }
320056e93d26SJuan Quintela 
320156e93d26SJuan Quintela     /*
320256e93d26SJuan Quintela      * Must occur before EOS (or any QEMUFile operation)
320356e93d26SJuan Quintela      * because of RDMA protocol.
320456e93d26SJuan Quintela      */
3205b1b38387SJuan Quintela     ret = rdma_registration_stop(f, RAM_CONTROL_ROUND);
32065f5b8858SJuan Quintela     if (ret < 0) {
32075f5b8858SJuan Quintela         qemu_file_set_error(f, ret);
32085f5b8858SJuan Quintela     }
320956e93d26SJuan Quintela 
3210b2557345SPeter Lieven out:
3211f018eb62SPeter Xu     if (ret >= 0 && migration_is_running()) {
32129d01778aSFabiano Rosas         if (migrate_multifd() && migrate_multifd_flush_after_each_section() &&
32139d01778aSFabiano Rosas             !migrate_mapped_ram()) {
3214a0c78d81SFabiano Rosas             ret = multifd_ram_flush_and_sync();
321533d70973SLeonardo Bras             if (ret < 0) {
321633d70973SLeonardo Bras                 return ret;
321733d70973SLeonardo Bras             }
3218b05292c2SJuan Quintela         }
321933d70973SLeonardo Bras 
322056e93d26SJuan Quintela         qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
32214c2d0f6dSDavid Edmondson         ram_transferred_add(8);
3222be07a0edSJuan Quintela         ret = qemu_fflush(f);
32233d4095b2SJuan Quintela     }
322456e93d26SJuan Quintela     if (ret < 0) {
322556e93d26SJuan Quintela         return ret;
322656e93d26SJuan Quintela     }
322756e93d26SJuan Quintela 
32285c90308fSThomas Huth     return done;
322956e93d26SJuan Quintela }
323056e93d26SJuan Quintela 
32313d0684b2SJuan Quintela /**
32323d0684b2SJuan Quintela  * ram_save_complete: function called to send the remaining amount of ram
32333d0684b2SJuan Quintela  *
3234e8f3735fSXiao Guangrong  * Returns zero to indicate success or negative on error
32353d0684b2SJuan Quintela  *
3236a4a411fbSStefan Hajnoczi  * Called with the BQL
32373d0684b2SJuan Quintela  *
32383d0684b2SJuan Quintela  * @f: QEMUFile where to send the data
32393d0684b2SJuan Quintela  * @opaque: RAMState pointer
32403d0684b2SJuan Quintela  */
ram_save_complete(QEMUFile * f,void * opaque)324156e93d26SJuan Quintela static int ram_save_complete(QEMUFile *f, void *opaque)
324256e93d26SJuan Quintela {
324353518d94SJuan Quintela     RAMState **temp = opaque;
324453518d94SJuan Quintela     RAMState *rs = *temp;
3245e8f3735fSXiao Guangrong     int ret = 0;
32466f37bb8bSJuan Quintela 
324705931ec5SJuan Quintela     rs->last_stage = !migration_in_colo_state();
324805931ec5SJuan Quintela 
324989ac5a1dSDr. David Alan Gilbert     WITH_RCU_READ_LOCK_GUARD() {
32505727309dSJuan Quintela         if (!migration_in_postcopy()) {
32516a39ba7cSHyman Huang             migration_bitmap_sync_precopy(true);
3252663e6c1dSDr. David Alan Gilbert         }
325356e93d26SJuan Quintela 
3254b1b38387SJuan Quintela         ret = rdma_registration_start(f, RAM_CONTROL_FINISH);
325548408174SJuan Quintela         if (ret < 0) {
325648408174SJuan Quintela             qemu_file_set_error(f, ret);
3257a2326705SPeter Xu             return ret;
325848408174SJuan Quintela         }
325956e93d26SJuan Quintela 
326056e93d26SJuan Quintela         /* try transferring iterative blocks of memory */
326156e93d26SJuan Quintela 
326256e93d26SJuan Quintela         /* flush all remaining blocks regardless of rate limiting */
3263c13221b5SPeter Xu         qemu_mutex_lock(&rs->bitmap_mutex);
326456e93d26SJuan Quintela         while (true) {
326556e93d26SJuan Quintela             int pages;
326656e93d26SJuan Quintela 
326705931ec5SJuan Quintela             pages = ram_find_and_save_block(rs);
326856e93d26SJuan Quintela             /* no more blocks to sent */
326956e93d26SJuan Quintela             if (pages == 0) {
327056e93d26SJuan Quintela                 break;
327156e93d26SJuan Quintela             }
3272e8f3735fSXiao Guangrong             if (pages < 0) {
3273a2326705SPeter Xu                 qemu_mutex_unlock(&rs->bitmap_mutex);
3274a2326705SPeter Xu                 return pages;
3275e8f3735fSXiao Guangrong             }
327656e93d26SJuan Quintela         }
3277c13221b5SPeter Xu         qemu_mutex_unlock(&rs->bitmap_mutex);
327856e93d26SJuan Quintela 
3279a2326705SPeter Xu         ret = rdma_registration_stop(f, RAM_CONTROL_FINISH);
328033d70973SLeonardo Bras         if (ret < 0) {
3281a2326705SPeter Xu             qemu_file_set_error(f, ret);
328233d70973SLeonardo Bras             return ret;
32833d4095b2SJuan Quintela         }
3284a2326705SPeter Xu     }
328556e93d26SJuan Quintela 
3286a0c78d81SFabiano Rosas     ret = multifd_ram_flush_and_sync();
328733d70973SLeonardo Bras     if (ret < 0) {
3288e8f3735fSXiao Guangrong         return ret;
328956e93d26SJuan Quintela     }
329056e93d26SJuan Quintela 
3291c2d5c4a7SFabiano Rosas     if (migrate_mapped_ram()) {
3292c2d5c4a7SFabiano Rosas         ram_save_file_bmap(f);
3293c2d5c4a7SFabiano Rosas 
3294c2d5c4a7SFabiano Rosas         if (qemu_file_get_error(f)) {
3295c2d5c4a7SFabiano Rosas             Error *local_err = NULL;
3296c2d5c4a7SFabiano Rosas             int err = qemu_file_get_error_obj(f, &local_err);
3297c2d5c4a7SFabiano Rosas 
3298c2d5c4a7SFabiano Rosas             error_reportf_err(local_err, "Failed to write bitmap to file: ");
3299c2d5c4a7SFabiano Rosas             return -err;
3300c2d5c4a7SFabiano Rosas         }
3301c2d5c4a7SFabiano Rosas     }
3302c2d5c4a7SFabiano Rosas 
330333d70973SLeonardo Bras     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
3304be07a0edSJuan Quintela     return qemu_fflush(f);
330533d70973SLeonardo Bras }
330633d70973SLeonardo Bras 
ram_state_pending_estimate(void * opaque,uint64_t * must_precopy,uint64_t * can_postcopy)330724beea4eSJuan Quintela static void ram_state_pending_estimate(void *opaque, uint64_t *must_precopy,
330824beea4eSJuan Quintela                                        uint64_t *can_postcopy)
330956e93d26SJuan Quintela {
331053518d94SJuan Quintela     RAMState **temp = opaque;
331153518d94SJuan Quintela     RAMState *rs = *temp;
331256e93d26SJuan Quintela 
3313c8df4a7aSJuan Quintela     uint64_t remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
331456e93d26SJuan Quintela 
3315c8df4a7aSJuan Quintela     if (migrate_postcopy_ram()) {
3316c8df4a7aSJuan Quintela         /* We can do postcopy, and all the data is postcopiable */
331724beea4eSJuan Quintela         *can_postcopy += remaining_size;
3318c8df4a7aSJuan Quintela     } else {
331924beea4eSJuan Quintela         *must_precopy += remaining_size;
3320c8df4a7aSJuan Quintela     }
3321c8df4a7aSJuan Quintela }
3322c8df4a7aSJuan Quintela 
ram_state_pending_exact(void * opaque,uint64_t * must_precopy,uint64_t * can_postcopy)332324beea4eSJuan Quintela static void ram_state_pending_exact(void *opaque, uint64_t *must_precopy,
332424beea4eSJuan Quintela                                     uint64_t *can_postcopy)
3325c8df4a7aSJuan Quintela {
3326c8df4a7aSJuan Quintela     RAMState **temp = opaque;
3327c8df4a7aSJuan Quintela     RAMState *rs = *temp;
3328b0504eddSPeter Xu     uint64_t remaining_size;
3329c8df4a7aSJuan Quintela 
3330b0504eddSPeter Xu     if (!migration_in_postcopy()) {
3331195801d7SStefan Hajnoczi         bql_lock();
333289ac5a1dSDr. David Alan Gilbert         WITH_RCU_READ_LOCK_GUARD() {
33336a39ba7cSHyman Huang             migration_bitmap_sync_precopy(false);
333489ac5a1dSDr. David Alan Gilbert         }
3335195801d7SStefan Hajnoczi         bql_unlock();
333656e93d26SJuan Quintela     }
3337c31b098fSDr. David Alan Gilbert 
3338b0504eddSPeter Xu     remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
3339b0504eddSPeter Xu 
334086e1167eSVladimir Sementsov-Ogievskiy     if (migrate_postcopy_ram()) {
3341c31b098fSDr. David Alan Gilbert         /* We can do postcopy, and all the data is postcopiable */
334224beea4eSJuan Quintela         *can_postcopy += remaining_size;
334386e1167eSVladimir Sementsov-Ogievskiy     } else {
334424beea4eSJuan Quintela         *must_precopy += remaining_size;
334586e1167eSVladimir Sementsov-Ogievskiy     }
334656e93d26SJuan Quintela }
334756e93d26SJuan Quintela 
load_xbzrle(QEMUFile * f,ram_addr_t addr,void * host)334856e93d26SJuan Quintela static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
334956e93d26SJuan Quintela {
335056e93d26SJuan Quintela     unsigned int xh_len;
335156e93d26SJuan Quintela     int xh_flags;
3352063e760aSDr. David Alan Gilbert     uint8_t *loaded_data;
335356e93d26SJuan Quintela 
335456e93d26SJuan Quintela     /* extract RLE header */
335556e93d26SJuan Quintela     xh_flags = qemu_get_byte(f);
335656e93d26SJuan Quintela     xh_len = qemu_get_be16(f);
335756e93d26SJuan Quintela 
335856e93d26SJuan Quintela     if (xh_flags != ENCODING_FLAG_XBZRLE) {
335956e93d26SJuan Quintela         error_report("Failed to load XBZRLE page - wrong compression!");
336056e93d26SJuan Quintela         return -1;
336156e93d26SJuan Quintela     }
336256e93d26SJuan Quintela 
336356e93d26SJuan Quintela     if (xh_len > TARGET_PAGE_SIZE) {
336456e93d26SJuan Quintela         error_report("Failed to load XBZRLE page - len overflow!");
336556e93d26SJuan Quintela         return -1;
336656e93d26SJuan Quintela     }
3367f265e0e4SJuan Quintela     loaded_data = XBZRLE.decoded_buf;
336856e93d26SJuan Quintela     /* load data and decode */
3369f265e0e4SJuan Quintela     /* it can change loaded_data to point to an internal buffer */
3370063e760aSDr. David Alan Gilbert     qemu_get_buffer_in_place(f, &loaded_data, xh_len);
337156e93d26SJuan Quintela 
337256e93d26SJuan Quintela     /* decode RLE */
3373063e760aSDr. David Alan Gilbert     if (xbzrle_decode_buffer(loaded_data, xh_len, host,
337456e93d26SJuan Quintela                              TARGET_PAGE_SIZE) == -1) {
337556e93d26SJuan Quintela         error_report("Failed to load XBZRLE page - decode error!");
337656e93d26SJuan Quintela         return -1;
337756e93d26SJuan Quintela     }
337856e93d26SJuan Quintela 
337956e93d26SJuan Quintela     return 0;
338056e93d26SJuan Quintela }
338156e93d26SJuan Quintela 
33823d0684b2SJuan Quintela /**
33833d0684b2SJuan Quintela  * ram_block_from_stream: read a RAMBlock id from the migration stream
3384a7180877SDr. David Alan Gilbert  *
33853d0684b2SJuan Quintela  * Must be called from within a rcu critical section.
33863d0684b2SJuan Quintela  *
33873d0684b2SJuan Quintela  * Returns a pointer from within the RCU-protected ram_list.
33883d0684b2SJuan Quintela  *
3389755e8d7cSPeter Xu  * @mis: the migration incoming state pointer
33903d0684b2SJuan Quintela  * @f: QEMUFile where to read the data from
33913d0684b2SJuan Quintela  * @flags: Page flags (mostly to see if it's a continuation of previous block)
3392c01b16edSPeter Xu  * @channel: the channel we're using
3393a7180877SDr. David Alan Gilbert  */
ram_block_from_stream(MigrationIncomingState * mis,QEMUFile * f,int flags,int channel)3394755e8d7cSPeter Xu static inline RAMBlock *ram_block_from_stream(MigrationIncomingState *mis,
3395c01b16edSPeter Xu                                               QEMUFile *f, int flags,
3396c01b16edSPeter Xu                                               int channel)
339756e93d26SJuan Quintela {
3398c01b16edSPeter Xu     RAMBlock *block = mis->last_recv_block[channel];
339956e93d26SJuan Quintela     char id[256];
340056e93d26SJuan Quintela     uint8_t len;
340156e93d26SJuan Quintela 
340256e93d26SJuan Quintela     if (flags & RAM_SAVE_FLAG_CONTINUE) {
34034c4bad48Szhanghailiang         if (!block) {
340456e93d26SJuan Quintela             error_report("Ack, bad migration stream!");
340556e93d26SJuan Quintela             return NULL;
340656e93d26SJuan Quintela         }
34074c4bad48Szhanghailiang         return block;
340856e93d26SJuan Quintela     }
340956e93d26SJuan Quintela 
341056e93d26SJuan Quintela     len = qemu_get_byte(f);
341156e93d26SJuan Quintela     qemu_get_buffer(f, (uint8_t *)id, len);
341256e93d26SJuan Quintela     id[len] = 0;
341356e93d26SJuan Quintela 
3414e3dd7493SDr. David Alan Gilbert     block = qemu_ram_block_by_name(id);
34154c4bad48Szhanghailiang     if (!block) {
3416e3dd7493SDr. David Alan Gilbert         error_report("Can't find block %s", id);
341756e93d26SJuan Quintela         return NULL;
341856e93d26SJuan Quintela     }
341956e93d26SJuan Quintela 
3420f161c88aSDavid Hildenbrand     if (migrate_ram_is_ignored(block)) {
3421b895de50SCédric Le Goater         error_report("block %s should not be migrated !", id);
3422b895de50SCédric Le Goater         return NULL;
3423b895de50SCédric Le Goater     }
3424b895de50SCédric Le Goater 
3425c01b16edSPeter Xu     mis->last_recv_block[channel] = block;
3426755e8d7cSPeter Xu 
34274c4bad48Szhanghailiang     return block;
34284c4bad48Szhanghailiang }
34294c4bad48Szhanghailiang 
host_from_ram_block_offset(RAMBlock * block,ram_addr_t offset)34304c4bad48Szhanghailiang static inline void *host_from_ram_block_offset(RAMBlock *block,
34314c4bad48Szhanghailiang                                                ram_addr_t offset)
34324c4bad48Szhanghailiang {
34334c4bad48Szhanghailiang     if (!offset_in_ramblock(block, offset)) {
34344c4bad48Szhanghailiang         return NULL;
34354c4bad48Szhanghailiang     }
34364c4bad48Szhanghailiang 
34374c4bad48Szhanghailiang     return block->host + offset;
34384c4bad48Szhanghailiang }
34394c4bad48Szhanghailiang 
host_page_from_ram_block_offset(RAMBlock * block,ram_addr_t offset)34406a23f639SDavid Hildenbrand static void *host_page_from_ram_block_offset(RAMBlock *block,
34416a23f639SDavid Hildenbrand                                              ram_addr_t offset)
34426a23f639SDavid Hildenbrand {
34436a23f639SDavid Hildenbrand     /* Note: Explicitly no check against offset_in_ramblock(). */
34446a23f639SDavid Hildenbrand     return (void *)QEMU_ALIGN_DOWN((uintptr_t)(block->host + offset),
34456a23f639SDavid Hildenbrand                                    block->page_size);
34466a23f639SDavid Hildenbrand }
34476a23f639SDavid Hildenbrand 
host_page_offset_from_ram_block_offset(RAMBlock * block,ram_addr_t offset)34486a23f639SDavid Hildenbrand static ram_addr_t host_page_offset_from_ram_block_offset(RAMBlock *block,
34496a23f639SDavid Hildenbrand                                                          ram_addr_t offset)
34506a23f639SDavid Hildenbrand {
34516a23f639SDavid Hildenbrand     return ((uintptr_t)block->host + offset) & (block->page_size - 1);
34526a23f639SDavid Hildenbrand }
34536a23f639SDavid Hildenbrand 
colo_record_bitmap(RAMBlock * block,ram_addr_t * normal,uint32_t pages)3454871cfc54SLukas Straub void colo_record_bitmap(RAMBlock *block, ram_addr_t *normal, uint32_t pages)
3455871cfc54SLukas Straub {
3456871cfc54SLukas Straub     qemu_mutex_lock(&ram_state->bitmap_mutex);
3457871cfc54SLukas Straub     for (int i = 0; i < pages; i++) {
3458871cfc54SLukas Straub         ram_addr_t offset = normal[i];
3459871cfc54SLukas Straub         ram_state->migration_dirty_pages += !test_and_set_bit(
3460871cfc54SLukas Straub                                                 offset >> TARGET_PAGE_BITS,
3461871cfc54SLukas Straub                                                 block->bmap);
3462871cfc54SLukas Straub     }
3463871cfc54SLukas Straub     qemu_mutex_unlock(&ram_state->bitmap_mutex);
3464871cfc54SLukas Straub }
3465871cfc54SLukas Straub 
colo_cache_from_block_offset(RAMBlock * block,ram_addr_t offset,bool record_bitmap)346613af18f2SZhang Chen static inline void *colo_cache_from_block_offset(RAMBlock *block,
34678af66371Szhanghailiang                              ram_addr_t offset, bool record_bitmap)
346813af18f2SZhang Chen {
346913af18f2SZhang Chen     if (!offset_in_ramblock(block, offset)) {
347013af18f2SZhang Chen         return NULL;
347113af18f2SZhang Chen     }
347213af18f2SZhang Chen     if (!block->colo_cache) {
347313af18f2SZhang Chen         error_report("%s: colo_cache is NULL in block :%s",
347413af18f2SZhang Chen                      __func__, block->idstr);
347513af18f2SZhang Chen         return NULL;
347613af18f2SZhang Chen     }
34777d9acafaSZhang Chen 
34787d9acafaSZhang Chen     /*
34797d9acafaSZhang Chen     * During colo checkpoint, we need bitmap of these migrated pages.
34807d9acafaSZhang Chen     * It help us to decide which pages in ram cache should be flushed
34817d9acafaSZhang Chen     * into VM's RAM later.
34827d9acafaSZhang Chen     */
3483871cfc54SLukas Straub     if (record_bitmap) {
3484871cfc54SLukas Straub         colo_record_bitmap(block, &offset, 1);
34857d9acafaSZhang Chen     }
348613af18f2SZhang Chen     return block->colo_cache + offset;
348713af18f2SZhang Chen }
348813af18f2SZhang Chen 
34893d0684b2SJuan Quintela /**
34907091dabeSJuan Quintela  * ram_handle_zero: handle the zero page case
34913d0684b2SJuan Quintela  *
349256e93d26SJuan Quintela  * If a page (or a whole RDMA chunk) has been
349356e93d26SJuan Quintela  * determined to be zero, then zap it.
34943d0684b2SJuan Quintela  *
34953d0684b2SJuan Quintela  * @host: host address for the zero page
34963d0684b2SJuan Quintela  * @ch: what the page is filled from.  We only support zero
34973d0684b2SJuan Quintela  * @size: size of the zero page
349856e93d26SJuan Quintela  */
ram_handle_zero(void * host,uint64_t size)34997091dabeSJuan Quintela void ram_handle_zero(void *host, uint64_t size)
350056e93d26SJuan Quintela {
35017091dabeSJuan Quintela     if (!buffer_is_zero(host, size)) {
35027091dabeSJuan Quintela         memset(host, 0, size);
350356e93d26SJuan Quintela     }
350456e93d26SJuan Quintela }
350556e93d26SJuan Quintela 
colo_init_ram_state(void)3506b70cb3b4SRao, Lei static void colo_init_ram_state(void)
3507b70cb3b4SRao, Lei {
350816ecd25aSCédric Le Goater     Error *local_err = NULL;
350916ecd25aSCédric Le Goater 
351016ecd25aSCédric Le Goater     if (!ram_state_init(&ram_state, &local_err)) {
351116ecd25aSCédric Le Goater         error_report_err(local_err);
351216ecd25aSCédric Le Goater     }
3513b70cb3b4SRao, Lei }
3514b70cb3b4SRao, Lei 
3515b70cb3b4SRao, Lei /*
351613af18f2SZhang Chen  * colo cache: this is for secondary VM, we cache the whole
351713af18f2SZhang Chen  * memory of the secondary VM, it is need to hold the global lock
351813af18f2SZhang Chen  * to call this helper.
351913af18f2SZhang Chen  */
colo_init_ram_cache(void)352013af18f2SZhang Chen int colo_init_ram_cache(void)
352113af18f2SZhang Chen {
352213af18f2SZhang Chen     RAMBlock *block;
352313af18f2SZhang Chen 
352444901b5aSPaolo Bonzini     WITH_RCU_READ_LOCK_GUARD() {
3525fbd162e6SYury Kotov         RAMBLOCK_FOREACH_NOT_IGNORED(block) {
352613af18f2SZhang Chen             block->colo_cache = qemu_anon_ram_alloc(block->used_length,
35278dbe22c6SDavid Hildenbrand                                                     NULL, false, false);
352813af18f2SZhang Chen             if (!block->colo_cache) {
352913af18f2SZhang Chen                 error_report("%s: Can't alloc memory for COLO cache of block %s,"
353013af18f2SZhang Chen                              "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
353113af18f2SZhang Chen                              block->used_length);
353289ac5a1dSDr. David Alan Gilbert                 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
353389ac5a1dSDr. David Alan Gilbert                     if (block->colo_cache) {
353489ac5a1dSDr. David Alan Gilbert                         qemu_anon_ram_free(block->colo_cache, block->used_length);
353589ac5a1dSDr. David Alan Gilbert                         block->colo_cache = NULL;
353689ac5a1dSDr. David Alan Gilbert                     }
353789ac5a1dSDr. David Alan Gilbert                 }
353889ac5a1dSDr. David Alan Gilbert                 return -errno;
353913af18f2SZhang Chen             }
3540e5fdf920SLukas Straub             if (!machine_dump_guest_core(current_machine)) {
3541e5fdf920SLukas Straub                 qemu_madvise(block->colo_cache, block->used_length,
3542e5fdf920SLukas Straub                              QEMU_MADV_DONTDUMP);
3543e5fdf920SLukas Straub             }
354413af18f2SZhang Chen         }
354544901b5aSPaolo Bonzini     }
354644901b5aSPaolo Bonzini 
35477d9acafaSZhang Chen     /*
35487d9acafaSZhang Chen     * Record the dirty pages that sent by PVM, we use this dirty bitmap together
35497d9acafaSZhang Chen     * with to decide which page in cache should be flushed into SVM's RAM. Here
35507d9acafaSZhang Chen     * we use the same name 'ram_bitmap' as for migration.
35517d9acafaSZhang Chen     */
35527d9acafaSZhang Chen     if (ram_bytes_total()) {
3553fbd162e6SYury Kotov         RAMBLOCK_FOREACH_NOT_IGNORED(block) {
35547d9acafaSZhang Chen             unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
35557d9acafaSZhang Chen             block->bmap = bitmap_new(pages);
35567d9acafaSZhang Chen         }
35577d9acafaSZhang Chen     }
35587d9acafaSZhang Chen 
3559b70cb3b4SRao, Lei     colo_init_ram_state();
356013af18f2SZhang Chen     return 0;
356113af18f2SZhang Chen }
356213af18f2SZhang Chen 
35630393031aSzhanghailiang /* TODO: duplicated with ram_init_bitmaps */
colo_incoming_start_dirty_log(void)35640393031aSzhanghailiang void colo_incoming_start_dirty_log(void)
35650393031aSzhanghailiang {
35660393031aSzhanghailiang     RAMBlock *block = NULL;
3567639ec3fbSCédric Le Goater     Error *local_err = NULL;
3568639ec3fbSCédric Le Goater 
35690393031aSzhanghailiang     /* For memory_global_dirty_log_start below. */
3570195801d7SStefan Hajnoczi     bql_lock();
35710393031aSzhanghailiang     qemu_mutex_lock_ramlist();
35720393031aSzhanghailiang 
35731e493be5SGavin Shan     memory_global_dirty_log_sync(false);
35740393031aSzhanghailiang     WITH_RCU_READ_LOCK_GUARD() {
35750393031aSzhanghailiang         RAMBLOCK_FOREACH_NOT_IGNORED(block) {
35760393031aSzhanghailiang             ramblock_sync_dirty_bitmap(ram_state, block);
35770393031aSzhanghailiang             /* Discard this dirty bitmap record */
35780393031aSzhanghailiang             bitmap_zero(block->bmap, block->max_length >> TARGET_PAGE_BITS);
35790393031aSzhanghailiang         }
3580639ec3fbSCédric Le Goater         if (!memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION,
3581639ec3fbSCédric Le Goater                                            &local_err)) {
3582639ec3fbSCédric Le Goater             error_report_err(local_err);
3583639ec3fbSCédric Le Goater         }
35840393031aSzhanghailiang     }
35850393031aSzhanghailiang     ram_state->migration_dirty_pages = 0;
35860393031aSzhanghailiang     qemu_mutex_unlock_ramlist();
3587195801d7SStefan Hajnoczi     bql_unlock();
35880393031aSzhanghailiang }
35890393031aSzhanghailiang 
359013af18f2SZhang Chen /* It is need to hold the global lock to call this helper */
colo_release_ram_cache(void)359113af18f2SZhang Chen void colo_release_ram_cache(void)
359213af18f2SZhang Chen {
359313af18f2SZhang Chen     RAMBlock *block;
359413af18f2SZhang Chen 
359563b41db4SHyman Huang(黄勇)     memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION);
3596fbd162e6SYury Kotov     RAMBLOCK_FOREACH_NOT_IGNORED(block) {
35977d9acafaSZhang Chen         g_free(block->bmap);
35987d9acafaSZhang Chen         block->bmap = NULL;
35997d9acafaSZhang Chen     }
36007d9acafaSZhang Chen 
360189ac5a1dSDr. David Alan Gilbert     WITH_RCU_READ_LOCK_GUARD() {
3602fbd162e6SYury Kotov         RAMBLOCK_FOREACH_NOT_IGNORED(block) {
360313af18f2SZhang Chen             if (block->colo_cache) {
360413af18f2SZhang Chen                 qemu_anon_ram_free(block->colo_cache, block->used_length);
360513af18f2SZhang Chen                 block->colo_cache = NULL;
360613af18f2SZhang Chen             }
360713af18f2SZhang Chen         }
360889ac5a1dSDr. David Alan Gilbert     }
36090393031aSzhanghailiang     ram_state_cleanup(&ram_state);
361013af18f2SZhang Chen }
361113af18f2SZhang Chen 
36123d0684b2SJuan Quintela /**
3613f265e0e4SJuan Quintela  * ram_load_setup: Setup RAM for migration incoming side
3614f265e0e4SJuan Quintela  *
3615f265e0e4SJuan Quintela  * Returns zero to indicate success and negative for error
3616f265e0e4SJuan Quintela  *
3617f265e0e4SJuan Quintela  * @f: QEMUFile where to receive the data
3618f265e0e4SJuan Quintela  * @opaque: RAMState pointer
3619e4fa064dSCédric Le Goater  * @errp: pointer to Error*, to store an error if it happens.
3620f265e0e4SJuan Quintela  */
ram_load_setup(QEMUFile * f,void * opaque,Error ** errp)3621e4fa064dSCédric Le Goater static int ram_load_setup(QEMUFile *f, void *opaque, Error **errp)
3622f265e0e4SJuan Quintela {
3623f265e0e4SJuan Quintela     xbzrle_load_setup();
3624f9494614SAlexey Perevalov     ramblock_recv_map_init();
362513af18f2SZhang Chen 
3626f265e0e4SJuan Quintela     return 0;
3627f265e0e4SJuan Quintela }
3628f265e0e4SJuan Quintela 
ram_load_cleanup(void * opaque)3629f265e0e4SJuan Quintela static int ram_load_cleanup(void *opaque)
3630f265e0e4SJuan Quintela {
3631f9494614SAlexey Perevalov     RAMBlock *rb;
363256eb90afSJunyan He 
3633fbd162e6SYury Kotov     RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
3634bd108a44SBeata Michalska         qemu_ram_block_writeback(rb);
363556eb90afSJunyan He     }
363656eb90afSJunyan He 
3637f265e0e4SJuan Quintela     xbzrle_load_cleanup();
3638f9494614SAlexey Perevalov 
3639fbd162e6SYury Kotov     RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
3640f9494614SAlexey Perevalov         g_free(rb->receivedmap);
3641f9494614SAlexey Perevalov         rb->receivedmap = NULL;
3642f9494614SAlexey Perevalov     }
364313af18f2SZhang Chen 
3644f265e0e4SJuan Quintela     return 0;
3645f265e0e4SJuan Quintela }
3646f265e0e4SJuan Quintela 
3647f265e0e4SJuan Quintela /**
36483d0684b2SJuan Quintela  * ram_postcopy_incoming_init: allocate postcopy data structures
36493d0684b2SJuan Quintela  *
36503d0684b2SJuan Quintela  * Returns 0 for success and negative if there was one error
36513d0684b2SJuan Quintela  *
36523d0684b2SJuan Quintela  * @mis: current migration incoming state
36533d0684b2SJuan Quintela  *
36543d0684b2SJuan Quintela  * Allocate data structures etc needed by incoming migration with
36553d0684b2SJuan Quintela  * postcopy-ram. postcopy-ram's similarly names
36563d0684b2SJuan Quintela  * postcopy_ram_incoming_init does the work.
36571caddf8aSDr. David Alan Gilbert  */
ram_postcopy_incoming_init(MigrationIncomingState * mis)36581caddf8aSDr. David Alan Gilbert int ram_postcopy_incoming_init(MigrationIncomingState *mis)
36591caddf8aSDr. David Alan Gilbert {
3660c136180cSDavid Hildenbrand     return postcopy_ram_incoming_init(mis);
36611caddf8aSDr. David Alan Gilbert }
36621caddf8aSDr. David Alan Gilbert 
36633d0684b2SJuan Quintela /**
36643d0684b2SJuan Quintela  * ram_load_postcopy: load a page in postcopy case
36653d0684b2SJuan Quintela  *
36663d0684b2SJuan Quintela  * Returns 0 for success or -errno in case of error
36673d0684b2SJuan Quintela  *
3668a7180877SDr. David Alan Gilbert  * Called in postcopy mode by ram_load().
3669a7180877SDr. David Alan Gilbert  * rcu_read_lock is taken prior to this being called.
36703d0684b2SJuan Quintela  *
36713d0684b2SJuan Quintela  * @f: QEMUFile where to send the data
367236f62f11SPeter Xu  * @channel: the channel to use for loading
3673a7180877SDr. David Alan Gilbert  */
ram_load_postcopy(QEMUFile * f,int channel)367436f62f11SPeter Xu int ram_load_postcopy(QEMUFile *f, int channel)
3675a7180877SDr. David Alan Gilbert {
3676a7180877SDr. David Alan Gilbert     int flags = 0, ret = 0;
3677a7180877SDr. David Alan Gilbert     bool place_needed = false;
36781aa83678SPeter Xu     bool matches_target_page_size = false;
3679a7180877SDr. David Alan Gilbert     MigrationIncomingState *mis = migration_incoming_get_current();
368036f62f11SPeter Xu     PostcopyTmpPage *tmp_page = &mis->postcopy_tmp_pages[channel];
3681a7180877SDr. David Alan Gilbert 
3682a7180877SDr. David Alan Gilbert     while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
3683a7180877SDr. David Alan Gilbert         ram_addr_t addr;
3684a7180877SDr. David Alan Gilbert         void *page_buffer = NULL;
3685a7180877SDr. David Alan Gilbert         void *place_source = NULL;
3686df9ff5e1SDr. David Alan Gilbert         RAMBlock *block = NULL;
3687a7180877SDr. David Alan Gilbert         uint8_t ch;
3688a7180877SDr. David Alan Gilbert 
3689a7180877SDr. David Alan Gilbert         addr = qemu_get_be64(f);
36907a9ddfbfSPeter Xu 
36917a9ddfbfSPeter Xu         /*
36927a9ddfbfSPeter Xu          * If qemu file error, we should stop here, and then "addr"
36937a9ddfbfSPeter Xu          * may be invalid
36947a9ddfbfSPeter Xu          */
36957a9ddfbfSPeter Xu         ret = qemu_file_get_error(f);
36967a9ddfbfSPeter Xu         if (ret) {
36977a9ddfbfSPeter Xu             break;
36987a9ddfbfSPeter Xu         }
36997a9ddfbfSPeter Xu 
3700a7180877SDr. David Alan Gilbert         flags = addr & ~TARGET_PAGE_MASK;
3701a7180877SDr. David Alan Gilbert         addr &= TARGET_PAGE_MASK;
3702a7180877SDr. David Alan Gilbert 
370336f62f11SPeter Xu         trace_ram_load_postcopy_loop(channel, (uint64_t)addr, flags);
37040222111aSFabiano Rosas         if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) {
3705c01b16edSPeter Xu             block = ram_block_from_stream(mis, f, flags, channel);
37066a23f639SDavid Hildenbrand             if (!block) {
37076a23f639SDavid Hildenbrand                 ret = -EINVAL;
37086a23f639SDavid Hildenbrand                 break;
37096a23f639SDavid Hildenbrand             }
37104c4bad48Szhanghailiang 
3711898ba906SDavid Hildenbrand             /*
3712898ba906SDavid Hildenbrand              * Relying on used_length is racy and can result in false positives.
3713898ba906SDavid Hildenbrand              * We might place pages beyond used_length in case RAM was shrunk
3714898ba906SDavid Hildenbrand              * while in postcopy, which is fine - trying to place via
3715898ba906SDavid Hildenbrand              * UFFDIO_COPY/UFFDIO_ZEROPAGE will never segfault.
3716898ba906SDavid Hildenbrand              */
3717898ba906SDavid Hildenbrand             if (!block->host || addr >= block->postcopy_length) {
3718a7180877SDr. David Alan Gilbert                 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3719a7180877SDr. David Alan Gilbert                 ret = -EINVAL;
3720a7180877SDr. David Alan Gilbert                 break;
3721a7180877SDr. David Alan Gilbert             }
372277dadc3fSPeter Xu             tmp_page->target_pages++;
37231aa83678SPeter Xu             matches_target_page_size = block->page_size == TARGET_PAGE_SIZE;
3724a7180877SDr. David Alan Gilbert             /*
372528abd200SDr. David Alan Gilbert              * Postcopy requires that we place whole host pages atomically;
372628abd200SDr. David Alan Gilbert              * these may be huge pages for RAMBlocks that are backed by
372728abd200SDr. David Alan Gilbert              * hugetlbfs.
3728a7180877SDr. David Alan Gilbert              * To make it atomic, the data is read into a temporary page
3729a7180877SDr. David Alan Gilbert              * that's moved into place later.
3730a7180877SDr. David Alan Gilbert              * The migration protocol uses,  possibly smaller, target-pages
3731a7180877SDr. David Alan Gilbert              * however the source ensures it always sends all the components
373291ba442fSWei Yang              * of a host page in one chunk.
3733a7180877SDr. David Alan Gilbert              */
373477dadc3fSPeter Xu             page_buffer = tmp_page->tmp_huge_page +
37356a23f639SDavid Hildenbrand                           host_page_offset_from_ram_block_offset(block, addr);
37366a23f639SDavid Hildenbrand             /* If all TP are zero then we can optimise the place */
373777dadc3fSPeter Xu             if (tmp_page->target_pages == 1) {
373877dadc3fSPeter Xu                 tmp_page->host_addr =
373977dadc3fSPeter Xu                     host_page_from_ram_block_offset(block, addr);
374077dadc3fSPeter Xu             } else if (tmp_page->host_addr !=
374177dadc3fSPeter Xu                        host_page_from_ram_block_offset(block, addr)) {
3742c53b7ddcSDr. David Alan Gilbert                 /* not the 1st TP within the HP */
374336f62f11SPeter Xu                 error_report("Non-same host page detected on channel %d: "
3744cfc7dc8aSPeter Xu                              "Target host page %p, received host page %p "
3745cfc7dc8aSPeter Xu                              "(rb %s offset 0x"RAM_ADDR_FMT" target_pages %d)",
374636f62f11SPeter Xu                              channel, tmp_page->host_addr,
3747cfc7dc8aSPeter Xu                              host_page_from_ram_block_offset(block, addr),
3748cfc7dc8aSPeter Xu                              block->idstr, addr, tmp_page->target_pages);
3749c53b7ddcSDr. David Alan Gilbert                 ret = -EINVAL;
3750c53b7ddcSDr. David Alan Gilbert                 break;
3751a7180877SDr. David Alan Gilbert             }
3752c53b7ddcSDr. David Alan Gilbert 
3753a7180877SDr. David Alan Gilbert             /*
3754a7180877SDr. David Alan Gilbert              * If it's the last part of a host page then we place the host
3755a7180877SDr. David Alan Gilbert              * page
3756a7180877SDr. David Alan Gilbert              */
375777dadc3fSPeter Xu             if (tmp_page->target_pages ==
375877dadc3fSPeter Xu                 (block->page_size / TARGET_PAGE_SIZE)) {
37594cbb3c63SWei Yang                 place_needed = true;
37604cbb3c63SWei Yang             }
376177dadc3fSPeter Xu             place_source = tmp_page->tmp_huge_page;
3762a7180877SDr. David Alan Gilbert         }
3763a7180877SDr. David Alan Gilbert 
3764a7180877SDr. David Alan Gilbert         switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
3765bb890ed5SJuan Quintela         case RAM_SAVE_FLAG_ZERO:
3766a7180877SDr. David Alan Gilbert             ch = qemu_get_byte(f);
3767413d64feSJuan Quintela             if (ch != 0) {
3768413d64feSJuan Quintela                 error_report("Found a zero page with value %d", ch);
3769413d64feSJuan Quintela                 ret = -EINVAL;
3770413d64feSJuan Quintela                 break;
3771413d64feSJuan Quintela             }
37722e36bc1bSWei Yang             /*
37732e36bc1bSWei Yang              * Can skip to set page_buffer when
37742e36bc1bSWei Yang              * this is a zero page and (block->page_size == TARGET_PAGE_SIZE).
37752e36bc1bSWei Yang              */
3776413d64feSJuan Quintela             if (!matches_target_page_size) {
3777a7180877SDr. David Alan Gilbert                 memset(page_buffer, ch, TARGET_PAGE_SIZE);
37782e36bc1bSWei Yang             }
3779a7180877SDr. David Alan Gilbert             break;
3780a7180877SDr. David Alan Gilbert 
3781a7180877SDr. David Alan Gilbert         case RAM_SAVE_FLAG_PAGE:
378277dadc3fSPeter Xu             tmp_page->all_zero = false;
37831aa83678SPeter Xu             if (!matches_target_page_size) {
37841aa83678SPeter Xu                 /* For huge pages, we always use temporary buffer */
3785a7180877SDr. David Alan Gilbert                 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
3786a7180877SDr. David Alan Gilbert             } else {
37871aa83678SPeter Xu                 /*
37881aa83678SPeter Xu                  * For small pages that matches target page size, we
37891aa83678SPeter Xu                  * avoid the qemu_file copy.  Instead we directly use
37901aa83678SPeter Xu                  * the buffer of QEMUFile to place the page.  Note: we
37911aa83678SPeter Xu                  * cannot do any QEMUFile operation before using that
37921aa83678SPeter Xu                  * buffer to make sure the buffer is valid when
37931aa83678SPeter Xu                  * placing the page.
3794a7180877SDr. David Alan Gilbert                  */
3795a7180877SDr. David Alan Gilbert                 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
3796a7180877SDr. David Alan Gilbert                                          TARGET_PAGE_SIZE);
3797a7180877SDr. David Alan Gilbert             }
3798a7180877SDr. David Alan Gilbert             break;
3799294e5a40SJuan Quintela         case RAM_SAVE_FLAG_MULTIFD_FLUSH:
3800294e5a40SJuan Quintela             multifd_recv_sync_main();
3801294e5a40SJuan Quintela             break;
3802a7180877SDr. David Alan Gilbert         case RAM_SAVE_FLAG_EOS:
3803a7180877SDr. David Alan Gilbert             /* normal exit */
3804d4f34485SJuan Quintela             if (migrate_multifd() &&
3805d4f34485SJuan Quintela                 migrate_multifd_flush_after_each_section()) {
38066df264acSJuan Quintela                 multifd_recv_sync_main();
3807b05292c2SJuan Quintela             }
3808a7180877SDr. David Alan Gilbert             break;
3809a7180877SDr. David Alan Gilbert         default:
381029fccadeSBihong Yu             error_report("Unknown combination of migration flags: 0x%x"
3811a7180877SDr. David Alan Gilbert                          " (postcopy mode)", flags);
3812a7180877SDr. David Alan Gilbert             ret = -EINVAL;
38137a9ddfbfSPeter Xu             break;
3814a7180877SDr. David Alan Gilbert         }
3815a7180877SDr. David Alan Gilbert 
38167a9ddfbfSPeter Xu         /* Detect for any possible file errors */
38177a9ddfbfSPeter Xu         if (!ret && qemu_file_get_error(f)) {
38187a9ddfbfSPeter Xu             ret = qemu_file_get_error(f);
38197a9ddfbfSPeter Xu         }
38207a9ddfbfSPeter Xu 
38217a9ddfbfSPeter Xu         if (!ret && place_needed) {
382277dadc3fSPeter Xu             if (tmp_page->all_zero) {
382377dadc3fSPeter Xu                 ret = postcopy_place_page_zero(mis, tmp_page->host_addr, block);
3824a7180877SDr. David Alan Gilbert             } else {
382577dadc3fSPeter Xu                 ret = postcopy_place_page(mis, tmp_page->host_addr,
382677dadc3fSPeter Xu                                           place_source, block);
3827a7180877SDr. David Alan Gilbert             }
3828ddf35bdfSDavid Hildenbrand             place_needed = false;
382977dadc3fSPeter Xu             postcopy_temp_page_reset(tmp_page);
3830a7180877SDr. David Alan Gilbert         }
3831a7180877SDr. David Alan Gilbert     }
3832a7180877SDr. David Alan Gilbert 
3833a7180877SDr. David Alan Gilbert     return ret;
3834a7180877SDr. David Alan Gilbert }
3835a7180877SDr. David Alan Gilbert 
postcopy_is_running(void)3836acab30b8SDaniel Henrique Barboza static bool postcopy_is_running(void)
3837acab30b8SDaniel Henrique Barboza {
3838acab30b8SDaniel Henrique Barboza     PostcopyState ps = postcopy_state_get();
3839acab30b8SDaniel Henrique Barboza     return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
3840acab30b8SDaniel Henrique Barboza }
3841acab30b8SDaniel Henrique Barboza 
3842e6f4aa18SZhang Chen /*
3843e6f4aa18SZhang Chen  * Flush content of RAM cache into SVM's memory.
3844e6f4aa18SZhang Chen  * Only flush the pages that be dirtied by PVM or SVM or both.
3845e6f4aa18SZhang Chen  */
colo_flush_ram_cache(void)384624fa16f8SLukas Straub void colo_flush_ram_cache(void)
3847e6f4aa18SZhang Chen {
3848e6f4aa18SZhang Chen     RAMBlock *block = NULL;
3849e6f4aa18SZhang Chen     void *dst_host;
3850e6f4aa18SZhang Chen     void *src_host;
3851e6f4aa18SZhang Chen     unsigned long offset = 0;
3852e6f4aa18SZhang Chen 
38531e493be5SGavin Shan     memory_global_dirty_log_sync(false);
38549d638407SLukas Straub     qemu_mutex_lock(&ram_state->bitmap_mutex);
385589ac5a1dSDr. David Alan Gilbert     WITH_RCU_READ_LOCK_GUARD() {
3856fbd162e6SYury Kotov         RAMBLOCK_FOREACH_NOT_IGNORED(block) {
38577a3e9571SWei Yang             ramblock_sync_dirty_bitmap(ram_state, block);
3858d1955d22Szhanghailiang         }
385989ac5a1dSDr. David Alan Gilbert     }
3860d1955d22Szhanghailiang 
3861e6f4aa18SZhang Chen     trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages);
386289ac5a1dSDr. David Alan Gilbert     WITH_RCU_READ_LOCK_GUARD() {
3863e6f4aa18SZhang Chen         block = QLIST_FIRST_RCU(&ram_list.blocks);
3864e6f4aa18SZhang Chen 
3865e6f4aa18SZhang Chen         while (block) {
3866a6a83cefSRao, Lei             unsigned long num = 0;
3867e6f4aa18SZhang Chen 
3868a6a83cefSRao, Lei             offset = colo_bitmap_find_dirty(ram_state, block, offset, &num);
3869542147f4SDavid Hildenbrand             if (!offset_in_ramblock(block,
3870542147f4SDavid Hildenbrand                                     ((ram_addr_t)offset) << TARGET_PAGE_BITS)) {
3871e6f4aa18SZhang Chen                 offset = 0;
3872a6a83cefSRao, Lei                 num = 0;
3873e6f4aa18SZhang Chen                 block = QLIST_NEXT_RCU(block, next);
3874e6f4aa18SZhang Chen             } else {
3875a6a83cefSRao, Lei                 unsigned long i = 0;
3876a6a83cefSRao, Lei 
3877a6a83cefSRao, Lei                 for (i = 0; i < num; i++) {
3878a6a83cefSRao, Lei                     migration_bitmap_clear_dirty(ram_state, block, offset + i);
3879a6a83cefSRao, Lei                 }
38808bba004cSAlexey Romko                 dst_host = block->host
38818bba004cSAlexey Romko                          + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
38828bba004cSAlexey Romko                 src_host = block->colo_cache
38838bba004cSAlexey Romko                          + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
3884a6a83cefSRao, Lei                 memcpy(dst_host, src_host, TARGET_PAGE_SIZE * num);
3885a6a83cefSRao, Lei                 offset += num;
3886e6f4aa18SZhang Chen             }
3887e6f4aa18SZhang Chen         }
388889ac5a1dSDr. David Alan Gilbert     }
38899d638407SLukas Straub     qemu_mutex_unlock(&ram_state->bitmap_mutex);
3890e6f4aa18SZhang Chen     trace_colo_flush_ram_cache_end();
3891e6f4aa18SZhang Chen }
3892e6f4aa18SZhang Chen 
ram_load_multifd_pages(void * host_addr,size_t size,uint64_t offset)3893a49d15a3SFabiano Rosas static size_t ram_load_multifd_pages(void *host_addr, size_t size,
3894a49d15a3SFabiano Rosas                                      uint64_t offset)
3895a49d15a3SFabiano Rosas {
3896a49d15a3SFabiano Rosas     MultiFDRecvData *data = multifd_get_recv_data();
3897a49d15a3SFabiano Rosas 
3898a49d15a3SFabiano Rosas     data->opaque = host_addr;
3899a49d15a3SFabiano Rosas     data->file_offset = offset;
3900a49d15a3SFabiano Rosas     data->size = size;
3901a49d15a3SFabiano Rosas 
3902a49d15a3SFabiano Rosas     if (!multifd_recv()) {
3903a49d15a3SFabiano Rosas         return 0;
3904a49d15a3SFabiano Rosas     }
3905a49d15a3SFabiano Rosas 
3906a49d15a3SFabiano Rosas     return size;
3907a49d15a3SFabiano Rosas }
3908a49d15a3SFabiano Rosas 
read_ramblock_mapped_ram(QEMUFile * f,RAMBlock * block,long num_pages,unsigned long * bitmap,Error ** errp)39092f6b8826SFabiano Rosas static bool read_ramblock_mapped_ram(QEMUFile *f, RAMBlock *block,
39102f6b8826SFabiano Rosas                                      long num_pages, unsigned long *bitmap,
39112f6b8826SFabiano Rosas                                      Error **errp)
39122f6b8826SFabiano Rosas {
39132f6b8826SFabiano Rosas     ERRP_GUARD();
39142f6b8826SFabiano Rosas     unsigned long set_bit_idx, clear_bit_idx;
39152f6b8826SFabiano Rosas     ram_addr_t offset;
39162f6b8826SFabiano Rosas     void *host;
39172f6b8826SFabiano Rosas     size_t read, unread, size;
39182f6b8826SFabiano Rosas 
39192f6b8826SFabiano Rosas     for (set_bit_idx = find_first_bit(bitmap, num_pages);
39202f6b8826SFabiano Rosas          set_bit_idx < num_pages;
39212f6b8826SFabiano Rosas          set_bit_idx = find_next_bit(bitmap, num_pages, clear_bit_idx + 1)) {
39222f6b8826SFabiano Rosas 
39232f6b8826SFabiano Rosas         clear_bit_idx = find_next_zero_bit(bitmap, num_pages, set_bit_idx + 1);
39242f6b8826SFabiano Rosas 
39252f6b8826SFabiano Rosas         unread = TARGET_PAGE_SIZE * (clear_bit_idx - set_bit_idx);
39262f6b8826SFabiano Rosas         offset = set_bit_idx << TARGET_PAGE_BITS;
39272f6b8826SFabiano Rosas 
39282f6b8826SFabiano Rosas         while (unread > 0) {
39292f6b8826SFabiano Rosas             host = host_from_ram_block_offset(block, offset);
39302f6b8826SFabiano Rosas             if (!host) {
39312f6b8826SFabiano Rosas                 error_setg(errp, "page outside of ramblock %s range",
39322f6b8826SFabiano Rosas                            block->idstr);
39332f6b8826SFabiano Rosas                 return false;
39342f6b8826SFabiano Rosas             }
39352f6b8826SFabiano Rosas 
39362f6b8826SFabiano Rosas             size = MIN(unread, MAPPED_RAM_LOAD_BUF_SIZE);
39372f6b8826SFabiano Rosas 
3938a49d15a3SFabiano Rosas             if (migrate_multifd()) {
3939a49d15a3SFabiano Rosas                 read = ram_load_multifd_pages(host, size,
3940a49d15a3SFabiano Rosas                                               block->pages_offset + offset);
3941a49d15a3SFabiano Rosas             } else {
39422f6b8826SFabiano Rosas                 read = qemu_get_buffer_at(f, host, size,
39432f6b8826SFabiano Rosas                                           block->pages_offset + offset);
3944a49d15a3SFabiano Rosas             }
3945a49d15a3SFabiano Rosas 
39462f6b8826SFabiano Rosas             if (!read) {
39472f6b8826SFabiano Rosas                 goto err;
39482f6b8826SFabiano Rosas             }
39492f6b8826SFabiano Rosas             offset += read;
39502f6b8826SFabiano Rosas             unread -= read;
39512f6b8826SFabiano Rosas         }
39522f6b8826SFabiano Rosas     }
39532f6b8826SFabiano Rosas 
39542f6b8826SFabiano Rosas     return true;
39552f6b8826SFabiano Rosas 
39562f6b8826SFabiano Rosas err:
39572f6b8826SFabiano Rosas     qemu_file_get_error_obj(f, errp);
39582f6b8826SFabiano Rosas     error_prepend(errp, "(%s) failed to read page " RAM_ADDR_FMT
39592f6b8826SFabiano Rosas                   "from file offset %" PRIx64 ": ", block->idstr, offset,
39602f6b8826SFabiano Rosas                   block->pages_offset + offset);
39612f6b8826SFabiano Rosas     return false;
39622f6b8826SFabiano Rosas }
39632f6b8826SFabiano Rosas 
parse_ramblock_mapped_ram(QEMUFile * f,RAMBlock * block,ram_addr_t length,Error ** errp)39642f6b8826SFabiano Rosas static void parse_ramblock_mapped_ram(QEMUFile *f, RAMBlock *block,
39652f6b8826SFabiano Rosas                                       ram_addr_t length, Error **errp)
39662f6b8826SFabiano Rosas {
39672f6b8826SFabiano Rosas     g_autofree unsigned long *bitmap = NULL;
39682f6b8826SFabiano Rosas     MappedRamHeader header;
39692f6b8826SFabiano Rosas     size_t bitmap_size;
39702f6b8826SFabiano Rosas     long num_pages;
39712f6b8826SFabiano Rosas 
39722f6b8826SFabiano Rosas     if (!mapped_ram_read_header(f, &header, errp)) {
39732f6b8826SFabiano Rosas         return;
39742f6b8826SFabiano Rosas     }
39752f6b8826SFabiano Rosas 
39762f6b8826SFabiano Rosas     block->pages_offset = header.pages_offset;
39772f6b8826SFabiano Rosas 
39782f6b8826SFabiano Rosas     /*
39792f6b8826SFabiano Rosas      * Check the alignment of the file region that contains pages. We
39802f6b8826SFabiano Rosas      * don't enforce MAPPED_RAM_FILE_OFFSET_ALIGNMENT to allow that
39812f6b8826SFabiano Rosas      * value to change in the future. Do only a sanity check with page
39822f6b8826SFabiano Rosas      * size alignment.
39832f6b8826SFabiano Rosas      */
39842f6b8826SFabiano Rosas     if (!QEMU_IS_ALIGNED(block->pages_offset, TARGET_PAGE_SIZE)) {
39852f6b8826SFabiano Rosas         error_setg(errp,
39862f6b8826SFabiano Rosas                    "Error reading ramblock %s pages, region has bad alignment",
39872f6b8826SFabiano Rosas                    block->idstr);
39882f6b8826SFabiano Rosas         return;
39892f6b8826SFabiano Rosas     }
39902f6b8826SFabiano Rosas 
39912f6b8826SFabiano Rosas     num_pages = length / header.page_size;
39922f6b8826SFabiano Rosas     bitmap_size = BITS_TO_LONGS(num_pages) * sizeof(unsigned long);
39932f6b8826SFabiano Rosas 
39942f6b8826SFabiano Rosas     bitmap = g_malloc0(bitmap_size);
39952f6b8826SFabiano Rosas     if (qemu_get_buffer_at(f, (uint8_t *)bitmap, bitmap_size,
39962f6b8826SFabiano Rosas                            header.bitmap_offset) != bitmap_size) {
39972f6b8826SFabiano Rosas         error_setg(errp, "Error reading dirty bitmap");
39982f6b8826SFabiano Rosas         return;
39992f6b8826SFabiano Rosas     }
40002f6b8826SFabiano Rosas 
40012f6b8826SFabiano Rosas     if (!read_ramblock_mapped_ram(f, block, num_pages, bitmap, errp)) {
40022f6b8826SFabiano Rosas         return;
40032f6b8826SFabiano Rosas     }
40042f6b8826SFabiano Rosas 
40052f6b8826SFabiano Rosas     /* Skip pages array */
40062f6b8826SFabiano Rosas     qemu_set_offset(f, block->pages_offset + length, SEEK_SET);
40072f6b8826SFabiano Rosas 
40082f6b8826SFabiano Rosas     return;
40092f6b8826SFabiano Rosas }
40102f6b8826SFabiano Rosas 
parse_ramblock(QEMUFile * f,RAMBlock * block,ram_addr_t length)40112f5ced5bSNikolay Borisov static int parse_ramblock(QEMUFile *f, RAMBlock *block, ram_addr_t length)
40122f5ced5bSNikolay Borisov {
40132f5ced5bSNikolay Borisov     int ret = 0;
40142f5ced5bSNikolay Borisov     /* ADVISE is earlier, it shows the source has the postcopy capability on */
40152f5ced5bSNikolay Borisov     bool postcopy_advised = migration_incoming_postcopy_advised();
40165d220369SRichard Henderson     int max_hg_page_size;
40172f6b8826SFabiano Rosas     Error *local_err = NULL;
40182f5ced5bSNikolay Borisov 
40192f5ced5bSNikolay Borisov     assert(block);
40202f5ced5bSNikolay Borisov 
40212f6b8826SFabiano Rosas     if (migrate_mapped_ram()) {
40222f6b8826SFabiano Rosas         parse_ramblock_mapped_ram(f, block, length, &local_err);
40232f6b8826SFabiano Rosas         if (local_err) {
40242f6b8826SFabiano Rosas             error_report_err(local_err);
40252f6b8826SFabiano Rosas             return -EINVAL;
40262f6b8826SFabiano Rosas         }
40272f6b8826SFabiano Rosas         return 0;
40282f6b8826SFabiano Rosas     }
40292f6b8826SFabiano Rosas 
40302f5ced5bSNikolay Borisov     if (!qemu_ram_is_migratable(block)) {
40312f5ced5bSNikolay Borisov         error_report("block %s should not be migrated !", block->idstr);
40322f5ced5bSNikolay Borisov         return -EINVAL;
40332f5ced5bSNikolay Borisov     }
40342f5ced5bSNikolay Borisov 
40352f5ced5bSNikolay Borisov     if (length != block->used_length) {
40362f5ced5bSNikolay Borisov         ret = qemu_ram_resize(block, length, &local_err);
40372f5ced5bSNikolay Borisov         if (local_err) {
40382f5ced5bSNikolay Borisov             error_report_err(local_err);
40392c36076aSPeter Xu             return ret;
40402f5ced5bSNikolay Borisov         }
40412f5ced5bSNikolay Borisov     }
40425d220369SRichard Henderson 
40435d220369SRichard Henderson     /*
40445d220369SRichard Henderson      * ??? Mirrors the previous value of qemu_host_page_size,
40455d220369SRichard Henderson      * but is this really what was intended for the migration?
40465d220369SRichard Henderson      */
40475d220369SRichard Henderson     max_hg_page_size = MAX(qemu_real_host_page_size(), TARGET_PAGE_SIZE);
40485d220369SRichard Henderson 
40492f5ced5bSNikolay Borisov     /* For postcopy we need to check hugepage sizes match */
40502f5ced5bSNikolay Borisov     if (postcopy_advised && migrate_postcopy_ram() &&
40515d220369SRichard Henderson         block->page_size != max_hg_page_size) {
40522f5ced5bSNikolay Borisov         uint64_t remote_page_size = qemu_get_be64(f);
40532f5ced5bSNikolay Borisov         if (remote_page_size != block->page_size) {
40542f5ced5bSNikolay Borisov             error_report("Mismatched RAM page size %s "
40552f5ced5bSNikolay Borisov                          "(local) %zd != %" PRId64, block->idstr,
40562f5ced5bSNikolay Borisov                          block->page_size, remote_page_size);
40572c36076aSPeter Xu             return -EINVAL;
40582f5ced5bSNikolay Borisov         }
40592f5ced5bSNikolay Borisov     }
40602f5ced5bSNikolay Borisov     if (migrate_ignore_shared()) {
40612f5ced5bSNikolay Borisov         hwaddr addr = qemu_get_be64(f);
40622f5ced5bSNikolay Borisov         if (migrate_ram_is_ignored(block) &&
40632f5ced5bSNikolay Borisov             block->mr->addr != addr) {
40642f5ced5bSNikolay Borisov             error_report("Mismatched GPAs for block %s "
40652f5ced5bSNikolay Borisov                          "%" PRId64 "!= %" PRId64, block->idstr,
40662f5ced5bSNikolay Borisov                          (uint64_t)addr, (uint64_t)block->mr->addr);
40672c36076aSPeter Xu             return -EINVAL;
40682f5ced5bSNikolay Borisov         }
40692f5ced5bSNikolay Borisov     }
40702f5ced5bSNikolay Borisov     ret = rdma_block_notification_handle(f, block->idstr);
40712f5ced5bSNikolay Borisov     if (ret < 0) {
40722f5ced5bSNikolay Borisov         qemu_file_set_error(f, ret);
40732f5ced5bSNikolay Borisov     }
40742f5ced5bSNikolay Borisov 
40752f5ced5bSNikolay Borisov     return ret;
40762f5ced5bSNikolay Borisov }
40772f5ced5bSNikolay Borisov 
parse_ramblocks(QEMUFile * f,ram_addr_t total_ram_bytes)40782f5ced5bSNikolay Borisov static int parse_ramblocks(QEMUFile *f, ram_addr_t total_ram_bytes)
40792f5ced5bSNikolay Borisov {
40802f5ced5bSNikolay Borisov     int ret = 0;
40812f5ced5bSNikolay Borisov 
40822f5ced5bSNikolay Borisov     /* Synchronize RAM block list */
40832f5ced5bSNikolay Borisov     while (!ret && total_ram_bytes) {
40842f5ced5bSNikolay Borisov         RAMBlock *block;
40852f5ced5bSNikolay Borisov         char id[256];
40862f5ced5bSNikolay Borisov         ram_addr_t length;
40872f5ced5bSNikolay Borisov         int len = qemu_get_byte(f);
40882f5ced5bSNikolay Borisov 
40892f5ced5bSNikolay Borisov         qemu_get_buffer(f, (uint8_t *)id, len);
40902f5ced5bSNikolay Borisov         id[len] = 0;
40912f5ced5bSNikolay Borisov         length = qemu_get_be64(f);
40922f5ced5bSNikolay Borisov 
40932f5ced5bSNikolay Borisov         block = qemu_ram_block_by_name(id);
40942f5ced5bSNikolay Borisov         if (block) {
40952f5ced5bSNikolay Borisov             ret = parse_ramblock(f, block, length);
40962f5ced5bSNikolay Borisov         } else {
40972f5ced5bSNikolay Borisov             error_report("Unknown ramblock \"%s\", cannot accept "
40982f5ced5bSNikolay Borisov                          "migration", id);
40992f5ced5bSNikolay Borisov             ret = -EINVAL;
41002f5ced5bSNikolay Borisov         }
41012f5ced5bSNikolay Borisov         total_ram_bytes -= length;
41022f5ced5bSNikolay Borisov     }
41032f5ced5bSNikolay Borisov 
41042f5ced5bSNikolay Borisov     return ret;
41052f5ced5bSNikolay Borisov }
41062f5ced5bSNikolay Borisov 
410710da4a36SWei Yang /**
410810da4a36SWei Yang  * ram_load_precopy: load pages in precopy case
410910da4a36SWei Yang  *
411010da4a36SWei Yang  * Returns 0 for success or -errno in case of error
411110da4a36SWei Yang  *
411210da4a36SWei Yang  * Called in precopy mode by ram_load().
411310da4a36SWei Yang  * rcu_read_lock is taken prior to this being called.
411410da4a36SWei Yang  *
411510da4a36SWei Yang  * @f: QEMUFile where to send the data
4116a7180877SDr. David Alan Gilbert  */
ram_load_precopy(QEMUFile * f)411710da4a36SWei Yang static int ram_load_precopy(QEMUFile *f)
411810da4a36SWei Yang {
4119755e8d7cSPeter Xu     MigrationIncomingState *mis = migration_incoming_get_current();
41200222111aSFabiano Rosas     int flags = 0, ret = 0, invalid_flags = 0, i = 0;
4121a7180877SDr. David Alan Gilbert 
41229d01778aSFabiano Rosas     if (migrate_mapped_ram()) {
41239d01778aSFabiano Rosas         invalid_flags |= (RAM_SAVE_FLAG_HOOK | RAM_SAVE_FLAG_MULTIFD_FLUSH |
41249d01778aSFabiano Rosas                           RAM_SAVE_FLAG_PAGE | RAM_SAVE_FLAG_XBZRLE |
41259d01778aSFabiano Rosas                           RAM_SAVE_FLAG_ZERO);
41269d01778aSFabiano Rosas     }
41279d01778aSFabiano Rosas 
412810da4a36SWei Yang     while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
41292f5ced5bSNikolay Borisov         ram_addr_t addr;
41300393031aSzhanghailiang         void *host = NULL, *host_bak = NULL;
413156e93d26SJuan Quintela         uint8_t ch;
413256e93d26SJuan Quintela 
4133e65cec5eSYury Kotov         /*
4134e65cec5eSYury Kotov          * Yield periodically to let main loop run, but an iteration of
4135e65cec5eSYury Kotov          * the main loop is expensive, so do it each some iterations
4136e65cec5eSYury Kotov          */
4137e65cec5eSYury Kotov         if ((i & 32767) == 0 && qemu_in_coroutine()) {
4138e65cec5eSYury Kotov             aio_co_schedule(qemu_get_current_aio_context(),
4139e65cec5eSYury Kotov                             qemu_coroutine_self());
4140e65cec5eSYury Kotov             qemu_coroutine_yield();
4141e65cec5eSYury Kotov         }
4142e65cec5eSYury Kotov         i++;
4143e65cec5eSYury Kotov 
414456e93d26SJuan Quintela         addr = qemu_get_be64(f);
414512ab1e4fSMaksim Davydov         ret = qemu_file_get_error(f);
414612ab1e4fSMaksim Davydov         if (ret) {
414712ab1e4fSMaksim Davydov             error_report("Getting RAM address failed");
414812ab1e4fSMaksim Davydov             break;
414912ab1e4fSMaksim Davydov         }
415012ab1e4fSMaksim Davydov 
415156e93d26SJuan Quintela         flags = addr & ~TARGET_PAGE_MASK;
415256e93d26SJuan Quintela         addr &= TARGET_PAGE_MASK;
415356e93d26SJuan Quintela 
4154edc60127SJuan Quintela         if (flags & invalid_flags) {
41559d01778aSFabiano Rosas             error_report("Unexpected RAM flags: %d", flags & invalid_flags);
41569d01778aSFabiano Rosas 
4157edc60127SJuan Quintela             ret = -EINVAL;
4158edc60127SJuan Quintela             break;
4159edc60127SJuan Quintela         }
4160edc60127SJuan Quintela 
4161bb890ed5SJuan Quintela         if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
41620222111aSFabiano Rosas                      RAM_SAVE_FLAG_XBZRLE)) {
4163c01b16edSPeter Xu             RAMBlock *block = ram_block_from_stream(mis, f, flags,
4164c01b16edSPeter Xu                                                     RAM_CHANNEL_PRECOPY);
41654c4bad48Szhanghailiang 
41660393031aSzhanghailiang             host = host_from_ram_block_offset(block, addr);
416713af18f2SZhang Chen             /*
41680393031aSzhanghailiang              * After going into COLO stage, we should not load the page
41690393031aSzhanghailiang              * into SVM's memory directly, we put them into colo_cache firstly.
41700393031aSzhanghailiang              * NOTE: We need to keep a copy of SVM's ram in colo_cache.
41710393031aSzhanghailiang              * Previously, we copied all these memory in preparing stage of COLO
41720393031aSzhanghailiang              * while we need to stop VM, which is a time-consuming process.
41730393031aSzhanghailiang              * Here we optimize it by a trick, back-up every page while in
41740393031aSzhanghailiang              * migration process while COLO is enabled, though it affects the
41750393031aSzhanghailiang              * speed of the migration, but it obviously reduce the downtime of
41760393031aSzhanghailiang              * back-up all SVM'S memory in COLO preparing stage.
417713af18f2SZhang Chen              */
41780393031aSzhanghailiang             if (migration_incoming_colo_enabled()) {
417913af18f2SZhang Chen                 if (migration_incoming_in_colo_state()) {
41800393031aSzhanghailiang                     /* In COLO stage, put all pages into cache temporarily */
41818af66371Szhanghailiang                     host = colo_cache_from_block_offset(block, addr, true);
418213af18f2SZhang Chen                 } else {
41830393031aSzhanghailiang                    /*
41840393031aSzhanghailiang                     * In migration stage but before COLO stage,
41850393031aSzhanghailiang                     * Put all pages into both cache and SVM's memory.
41860393031aSzhanghailiang                     */
41878af66371Szhanghailiang                     host_bak = colo_cache_from_block_offset(block, addr, false);
41880393031aSzhanghailiang                 }
418913af18f2SZhang Chen             }
4190a776aa15SDr. David Alan Gilbert             if (!host) {
4191a776aa15SDr. David Alan Gilbert                 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
4192a776aa15SDr. David Alan Gilbert                 ret = -EINVAL;
4193a776aa15SDr. David Alan Gilbert                 break;
4194a776aa15SDr. David Alan Gilbert             }
419513af18f2SZhang Chen             if (!migration_incoming_in_colo_state()) {
4196f9494614SAlexey Perevalov                 ramblock_recv_bitmap_set(block, host);
419713af18f2SZhang Chen             }
419813af18f2SZhang Chen 
41991db9d8e5SDr. David Alan Gilbert             trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
4200a776aa15SDr. David Alan Gilbert         }
4201a776aa15SDr. David Alan Gilbert 
420256e93d26SJuan Quintela         switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
420356e93d26SJuan Quintela         case RAM_SAVE_FLAG_MEM_SIZE:
42042f5ced5bSNikolay Borisov             ret = parse_ramblocks(f, addr);
42051a6e217cSPeter Xu             /*
42061a6e217cSPeter Xu              * For mapped-ram migration (to a file) using multifd, we sync
42071a6e217cSPeter Xu              * once and for all here to make sure all tasks we queued to
42081a6e217cSPeter Xu              * multifd threads are completed, so that all the ramblocks
42091a6e217cSPeter Xu              * (including all the guest memory pages within) are fully
42101a6e217cSPeter Xu              * loaded after this sync returns.
42111a6e217cSPeter Xu              */
42129d01778aSFabiano Rosas             if (migrate_mapped_ram()) {
42139d01778aSFabiano Rosas                 multifd_recv_sync_main();
42149d01778aSFabiano Rosas             }
421556e93d26SJuan Quintela             break;
4216a776aa15SDr. David Alan Gilbert 
4217bb890ed5SJuan Quintela         case RAM_SAVE_FLAG_ZERO:
421856e93d26SJuan Quintela             ch = qemu_get_byte(f);
4219413d64feSJuan Quintela             if (ch != 0) {
4220413d64feSJuan Quintela                 error_report("Found a zero page with value %d", ch);
4221413d64feSJuan Quintela                 ret = -EINVAL;
4222413d64feSJuan Quintela                 break;
4223413d64feSJuan Quintela             }
42247091dabeSJuan Quintela             ram_handle_zero(host, TARGET_PAGE_SIZE);
422556e93d26SJuan Quintela             break;
4226a776aa15SDr. David Alan Gilbert 
422756e93d26SJuan Quintela         case RAM_SAVE_FLAG_PAGE:
422856e93d26SJuan Quintela             qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
422956e93d26SJuan Quintela             break;
423056e93d26SJuan Quintela 
423156e93d26SJuan Quintela         case RAM_SAVE_FLAG_XBZRLE:
423256e93d26SJuan Quintela             if (load_xbzrle(f, addr, host) < 0) {
423356e93d26SJuan Quintela                 error_report("Failed to decompress XBZRLE page at "
423456e93d26SJuan Quintela                              RAM_ADDR_FMT, addr);
423556e93d26SJuan Quintela                 ret = -EINVAL;
423656e93d26SJuan Quintela                 break;
423756e93d26SJuan Quintela             }
423856e93d26SJuan Quintela             break;
4239294e5a40SJuan Quintela         case RAM_SAVE_FLAG_MULTIFD_FLUSH:
4240294e5a40SJuan Quintela             multifd_recv_sync_main();
4241294e5a40SJuan Quintela             break;
424256e93d26SJuan Quintela         case RAM_SAVE_FLAG_EOS:
424356e93d26SJuan Quintela             /* normal exit */
4244d4f34485SJuan Quintela             if (migrate_multifd() &&
42459d01778aSFabiano Rosas                 migrate_multifd_flush_after_each_section() &&
42469d01778aSFabiano Rosas                 /*
42479d01778aSFabiano Rosas                  * Mapped-ram migration flushes once and for all after
42489d01778aSFabiano Rosas                  * parsing ramblocks. Always ignore EOS for it.
42499d01778aSFabiano Rosas                  */
42509d01778aSFabiano Rosas                 !migrate_mapped_ram()) {
42516df264acSJuan Quintela                 multifd_recv_sync_main();
4252b05292c2SJuan Quintela             }
425356e93d26SJuan Quintela             break;
42545f1e7540SJuan Quintela         case RAM_SAVE_FLAG_HOOK:
4255b1b38387SJuan Quintela             ret = rdma_registration_handle(f);
4256f6d6c089SJuan Quintela             if (ret < 0) {
4257f6d6c089SJuan Quintela                 qemu_file_set_error(f, ret);
4258f6d6c089SJuan Quintela             }
42595f1e7540SJuan Quintela             break;
42605f1e7540SJuan Quintela         default:
42615f1e7540SJuan Quintela             error_report("Unknown combination of migration flags: 0x%x", flags);
426256e93d26SJuan Quintela             ret = -EINVAL;
426356e93d26SJuan Quintela         }
426456e93d26SJuan Quintela         if (!ret) {
426556e93d26SJuan Quintela             ret = qemu_file_get_error(f);
426656e93d26SJuan Quintela         }
42670393031aSzhanghailiang         if (!ret && host_bak) {
42680393031aSzhanghailiang             memcpy(host_bak, host, TARGET_PAGE_SIZE);
42690393031aSzhanghailiang         }
427056e93d26SJuan Quintela     }
427156e93d26SJuan Quintela 
427210da4a36SWei Yang     return ret;
427310da4a36SWei Yang }
427410da4a36SWei Yang 
ram_load(QEMUFile * f,void * opaque,int version_id)427510da4a36SWei Yang static int ram_load(QEMUFile *f, void *opaque, int version_id)
427610da4a36SWei Yang {
427710da4a36SWei Yang     int ret = 0;
427810da4a36SWei Yang     static uint64_t seq_iter;
427910da4a36SWei Yang     /*
428010da4a36SWei Yang      * If system is running in postcopy mode, page inserts to host memory must
428110da4a36SWei Yang      * be atomic
428210da4a36SWei Yang      */
428310da4a36SWei Yang     bool postcopy_running = postcopy_is_running();
428410da4a36SWei Yang 
428510da4a36SWei Yang     seq_iter++;
428610da4a36SWei Yang 
428710da4a36SWei Yang     if (version_id != 4) {
428810da4a36SWei Yang         return -EINVAL;
428910da4a36SWei Yang     }
429010da4a36SWei Yang 
429110da4a36SWei Yang     /*
429210da4a36SWei Yang      * This RCU critical section can be very long running.
429310da4a36SWei Yang      * When RCU reclaims in the code start to become numerous,
429410da4a36SWei Yang      * it will be necessary to reduce the granularity of this
429510da4a36SWei Yang      * critical section.
429610da4a36SWei Yang      */
4297*b0350c51SMaciej S. Szmigiero     trace_ram_load_start();
429889ac5a1dSDr. David Alan Gilbert     WITH_RCU_READ_LOCK_GUARD() {
429910da4a36SWei Yang         if (postcopy_running) {
430036f62f11SPeter Xu             /*
430136f62f11SPeter Xu              * Note!  Here RAM_CHANNEL_PRECOPY is the precopy channel of
430236f62f11SPeter Xu              * postcopy migration, we have another RAM_CHANNEL_POSTCOPY to
430336f62f11SPeter Xu              * service fast page faults.
430436f62f11SPeter Xu              */
430536f62f11SPeter Xu             ret = ram_load_postcopy(f, RAM_CHANNEL_PRECOPY);
430610da4a36SWei Yang         } else {
430710da4a36SWei Yang             ret = ram_load_precopy(f);
430810da4a36SWei Yang         }
430989ac5a1dSDr. David Alan Gilbert     }
431055c4446bSJuan Quintela     trace_ram_load_complete(ret, seq_iter);
4311e6f4aa18SZhang Chen 
431256e93d26SJuan Quintela     return ret;
431356e93d26SJuan Quintela }
431456e93d26SJuan Quintela 
ram_has_postcopy(void * opaque)4315c6467627SVladimir Sementsov-Ogievskiy static bool ram_has_postcopy(void *opaque)
4316c6467627SVladimir Sementsov-Ogievskiy {
4317469dd51bSJunyan He     RAMBlock *rb;
4318fbd162e6SYury Kotov     RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
4319469dd51bSJunyan He         if (ramblock_is_pmem(rb)) {
4320469dd51bSJunyan He             info_report("Block: %s, host: %p is a nvdimm memory, postcopy"
4321469dd51bSJunyan He                          "is not supported now!", rb->idstr, rb->host);
4322469dd51bSJunyan He             return false;
4323469dd51bSJunyan He         }
4324469dd51bSJunyan He     }
4325469dd51bSJunyan He 
4326c6467627SVladimir Sementsov-Ogievskiy     return migrate_postcopy_ram();
4327c6467627SVladimir Sementsov-Ogievskiy }
4328c6467627SVladimir Sementsov-Ogievskiy 
4329edd090c7SPeter Xu /* Sync all the dirty bitmap with destination VM.  */
ram_dirty_bitmap_sync_all(MigrationState * s,RAMState * rs)4330edd090c7SPeter Xu static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)
4331edd090c7SPeter Xu {
4332edd090c7SPeter Xu     RAMBlock *block;
4333edd090c7SPeter Xu     QEMUFile *file = s->to_dst_file;
4334edd090c7SPeter Xu 
4335edd090c7SPeter Xu     trace_ram_dirty_bitmap_sync_start();
4336edd090c7SPeter Xu 
43371015ff54SPeter Xu     qatomic_set(&rs->postcopy_bmap_sync_requested, 0);
4338fbd162e6SYury Kotov     RAMBLOCK_FOREACH_NOT_IGNORED(block) {
4339edd090c7SPeter Xu         qemu_savevm_send_recv_bitmap(file, block->idstr);
4340edd090c7SPeter Xu         trace_ram_dirty_bitmap_request(block->idstr);
43411015ff54SPeter Xu         qatomic_inc(&rs->postcopy_bmap_sync_requested);
4342edd090c7SPeter Xu     }
4343edd090c7SPeter Xu 
4344edd090c7SPeter Xu     trace_ram_dirty_bitmap_sync_wait();
4345edd090c7SPeter Xu 
4346edd090c7SPeter Xu     /* Wait until all the ramblocks' dirty bitmap synced */
43471015ff54SPeter Xu     while (qatomic_read(&rs->postcopy_bmap_sync_requested)) {
4348f8c543e8SPeter Xu         if (migration_rp_wait(s)) {
4349f8c543e8SPeter Xu             return -1;
4350f8c543e8SPeter Xu         }
4351edd090c7SPeter Xu     }
4352edd090c7SPeter Xu 
4353edd090c7SPeter Xu     trace_ram_dirty_bitmap_sync_complete();
4354edd090c7SPeter Xu 
4355edd090c7SPeter Xu     return 0;
4356edd090c7SPeter Xu }
4357edd090c7SPeter Xu 
4358a335debbSPeter Xu /*
4359a335debbSPeter Xu  * Read the received bitmap, revert it as the initial dirty bitmap.
4360a335debbSPeter Xu  * This is only used when the postcopy migration is paused but wants
4361a335debbSPeter Xu  * to resume from a middle point.
436288577f32SPeter Xu  *
436388577f32SPeter Xu  * Returns true if succeeded, false for errors.
4364a335debbSPeter Xu  */
ram_dirty_bitmap_reload(MigrationState * s,RAMBlock * block,Error ** errp)436588577f32SPeter Xu bool ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block, Error **errp)
4366a335debbSPeter Xu {
436743044ac0SPeter Xu     /* from_dst_file is always valid because we're within rp_thread */
4368a335debbSPeter Xu     QEMUFile *file = s->rp_state.from_dst_file;
43691a36e4c9SPhilippe Mathieu-Daudé     g_autofree unsigned long *le_bitmap = NULL;
43701a36e4c9SPhilippe Mathieu-Daudé     unsigned long nbits = block->used_length >> TARGET_PAGE_BITS;
4371a725ef9fSPeter Xu     uint64_t local_size = DIV_ROUND_UP(nbits, 8);
4372a335debbSPeter Xu     uint64_t size, end_mark;
43731015ff54SPeter Xu     RAMState *rs = ram_state;
4374a335debbSPeter Xu 
4375a335debbSPeter Xu     trace_ram_dirty_bitmap_reload_begin(block->idstr);
4376a335debbSPeter Xu 
4377a335debbSPeter Xu     if (s->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
43787aa6070dSPeter Xu         error_setg(errp, "Reload bitmap in incorrect state %s",
4379a335debbSPeter Xu                    MigrationStatus_str(s->state));
438088577f32SPeter Xu         return false;
4381a335debbSPeter Xu     }
4382a335debbSPeter Xu 
4383a335debbSPeter Xu     /*
4384a335debbSPeter Xu      * Note: see comments in ramblock_recv_bitmap_send() on why we
43853a4452d8Szhaolichang      * need the endianness conversion, and the paddings.
4386a335debbSPeter Xu      */
4387a335debbSPeter Xu     local_size = ROUND_UP(local_size, 8);
4388a335debbSPeter Xu 
4389a335debbSPeter Xu     /* Add paddings */
4390a335debbSPeter Xu     le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
4391a335debbSPeter Xu 
4392a335debbSPeter Xu     size = qemu_get_be64(file);
4393a335debbSPeter Xu 
4394a335debbSPeter Xu     /* The size of the bitmap should match with our ramblock */
4395a335debbSPeter Xu     if (size != local_size) {
43967aa6070dSPeter Xu         error_setg(errp, "ramblock '%s' bitmap size mismatch (0x%"PRIx64
43977aa6070dSPeter Xu                    " != 0x%"PRIx64")", block->idstr, size, local_size);
439888577f32SPeter Xu         return false;
4399a335debbSPeter Xu     }
4400a335debbSPeter Xu 
4401a335debbSPeter Xu     size = qemu_get_buffer(file, (uint8_t *)le_bitmap, local_size);
4402a335debbSPeter Xu     end_mark = qemu_get_be64(file);
4403a335debbSPeter Xu 
440488577f32SPeter Xu     if (qemu_file_get_error(file) || size != local_size) {
440588577f32SPeter Xu         error_setg(errp, "read bitmap failed for ramblock '%s': "
4406a335debbSPeter Xu                    "(size 0x%"PRIx64", got: 0x%"PRIx64")",
440788577f32SPeter Xu                    block->idstr, local_size, size);
440888577f32SPeter Xu         return false;
4409a335debbSPeter Xu     }
4410a335debbSPeter Xu 
4411a335debbSPeter Xu     if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) {
44127aa6070dSPeter Xu         error_setg(errp, "ramblock '%s' end mark incorrect: 0x%"PRIx64,
44137aa6070dSPeter Xu                    block->idstr, end_mark);
441488577f32SPeter Xu         return false;
4415a335debbSPeter Xu     }
4416a335debbSPeter Xu 
4417a335debbSPeter Xu     /*
44183a4452d8Szhaolichang      * Endianness conversion. We are during postcopy (though paused).
4419a335debbSPeter Xu      * The dirty bitmap won't change. We can directly modify it.
4420a335debbSPeter Xu      */
4421a335debbSPeter Xu     bitmap_from_le(block->bmap, le_bitmap, nbits);
4422a335debbSPeter Xu 
4423a335debbSPeter Xu     /*
4424a335debbSPeter Xu      * What we received is "received bitmap". Revert it as the initial
4425a335debbSPeter Xu      * dirty bitmap for this ramblock.
4426a335debbSPeter Xu      */
4427a335debbSPeter Xu     bitmap_complement(block->bmap, block->bmap, nbits);
4428a335debbSPeter Xu 
4429be39b4cdSDavid Hildenbrand     /* Clear dirty bits of discarded ranges that we don't want to migrate. */
4430be39b4cdSDavid Hildenbrand     ramblock_dirty_bitmap_clear_discarded_pages(block);
4431be39b4cdSDavid Hildenbrand 
4432be39b4cdSDavid Hildenbrand     /* We'll recalculate migration_dirty_pages in ram_state_resume_prepare(). */
4433a335debbSPeter Xu     trace_ram_dirty_bitmap_reload_complete(block->idstr);
4434a335debbSPeter Xu 
44351015ff54SPeter Xu     qatomic_dec(&rs->postcopy_bmap_sync_requested);
44361015ff54SPeter Xu 
4437edd090c7SPeter Xu     /*
44385e79a4bfSPeter Xu      * We succeeded to sync bitmap for current ramblock. Always kick the
44395e79a4bfSPeter Xu      * migration thread to check whether all requested bitmaps are
44405e79a4bfSPeter Xu      * reloaded.  NOTE: it's racy to only kick when requested==0, because
44415e79a4bfSPeter Xu      * we don't know whether the migration thread may still be increasing
44425e79a4bfSPeter Xu      * it.
4443edd090c7SPeter Xu      */
44445e79a4bfSPeter Xu     migration_rp_kick(s);
4445edd090c7SPeter Xu 
444688577f32SPeter Xu     return true;
4447a335debbSPeter Xu }
4448a335debbSPeter Xu 
ram_resume_prepare(MigrationState * s,void * opaque)4449edd090c7SPeter Xu static int ram_resume_prepare(MigrationState *s, void *opaque)
4450edd090c7SPeter Xu {
4451edd090c7SPeter Xu     RAMState *rs = *(RAMState **)opaque;
445208614f34SPeter Xu     int ret;
4453edd090c7SPeter Xu 
445408614f34SPeter Xu     ret = ram_dirty_bitmap_sync_all(s, rs);
445508614f34SPeter Xu     if (ret) {
445608614f34SPeter Xu         return ret;
445708614f34SPeter Xu     }
445808614f34SPeter Xu 
445908614f34SPeter Xu     ram_state_resume_prepare(rs, s->to_dst_file);
446008614f34SPeter Xu 
446108614f34SPeter Xu     return 0;
4462edd090c7SPeter Xu }
4463edd090c7SPeter Xu 
postcopy_preempt_shutdown_file(MigrationState * s)446436f62f11SPeter Xu void postcopy_preempt_shutdown_file(MigrationState *s)
446536f62f11SPeter Xu {
446636f62f11SPeter Xu     qemu_put_be64(s->postcopy_qemufile_src, RAM_SAVE_FLAG_EOS);
446736f62f11SPeter Xu     qemu_fflush(s->postcopy_qemufile_src);
446836f62f11SPeter Xu }
446936f62f11SPeter Xu 
447056e93d26SJuan Quintela static SaveVMHandlers savevm_ram_handlers = {
44719907e842SJuan Quintela     .save_setup = ram_save_setup,
447256e93d26SJuan Quintela     .save_live_iterate = ram_save_iterate,
4473763c906bSDr. David Alan Gilbert     .save_live_complete_postcopy = ram_save_complete,
4474a3e06c3dSDr. David Alan Gilbert     .save_live_complete_precopy = ram_save_complete,
4475c6467627SVladimir Sementsov-Ogievskiy     .has_postcopy = ram_has_postcopy,
4476c8df4a7aSJuan Quintela     .state_pending_exact = ram_state_pending_exact,
4477c8df4a7aSJuan Quintela     .state_pending_estimate = ram_state_pending_estimate,
447856e93d26SJuan Quintela     .load_state = ram_load,
4479f265e0e4SJuan Quintela     .save_cleanup = ram_save_cleanup,
4480f265e0e4SJuan Quintela     .load_setup = ram_load_setup,
4481f265e0e4SJuan Quintela     .load_cleanup = ram_load_cleanup,
4482edd090c7SPeter Xu     .resume_prepare = ram_resume_prepare,
448356e93d26SJuan Quintela };
448456e93d26SJuan Quintela 
ram_mig_ram_block_resized(RAMBlockNotifier * n,void * host,size_t old_size,size_t new_size)4485c7c0e724SDavid Hildenbrand static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host,
4486c7c0e724SDavid Hildenbrand                                       size_t old_size, size_t new_size)
4487c7c0e724SDavid Hildenbrand {
4488cc61c703SDavid Hildenbrand     PostcopyState ps = postcopy_state_get();
4489c7c0e724SDavid Hildenbrand     ram_addr_t offset;
4490c7c0e724SDavid Hildenbrand     RAMBlock *rb = qemu_ram_block_from_host(host, false, &offset);
4491c7c0e724SDavid Hildenbrand     Error *err = NULL;
4492c7c0e724SDavid Hildenbrand 
4493f75ed59fSDmitry Frolov     if (!rb) {
4494f75ed59fSDmitry Frolov         error_report("RAM block not found");
4495f75ed59fSDmitry Frolov         return;
4496f75ed59fSDmitry Frolov     }
4497f75ed59fSDmitry Frolov 
4498f161c88aSDavid Hildenbrand     if (migrate_ram_is_ignored(rb)) {
4499c7c0e724SDavid Hildenbrand         return;
4500c7c0e724SDavid Hildenbrand     }
4501c7c0e724SDavid Hildenbrand 
450234a8892dSPeter Xu     if (migration_is_running()) {
4503c7c0e724SDavid Hildenbrand         /*
4504c7c0e724SDavid Hildenbrand          * Precopy code on the source cannot deal with the size of RAM blocks
4505c7c0e724SDavid Hildenbrand          * changing at random points in time - especially after sending the
4506c7c0e724SDavid Hildenbrand          * RAM block sizes in the migration stream, they must no longer change.
4507c7c0e724SDavid Hildenbrand          * Abort and indicate a proper reason.
4508c7c0e724SDavid Hildenbrand          */
4509c7c0e724SDavid Hildenbrand         error_setg(&err, "RAM block '%s' resized during precopy.", rb->idstr);
4510458feccaSLaurent Vivier         migration_cancel(err);
4511c7c0e724SDavid Hildenbrand         error_free(err);
4512c7c0e724SDavid Hildenbrand     }
4513cc61c703SDavid Hildenbrand 
4514cc61c703SDavid Hildenbrand     switch (ps) {
4515cc61c703SDavid Hildenbrand     case POSTCOPY_INCOMING_ADVISE:
4516cc61c703SDavid Hildenbrand         /*
4517cc61c703SDavid Hildenbrand          * Update what ram_postcopy_incoming_init()->init_range() does at the
4518cc61c703SDavid Hildenbrand          * time postcopy was advised. Syncing RAM blocks with the source will
4519cc61c703SDavid Hildenbrand          * result in RAM resizes.
4520cc61c703SDavid Hildenbrand          */
4521cc61c703SDavid Hildenbrand         if (old_size < new_size) {
4522cc61c703SDavid Hildenbrand             if (ram_discard_range(rb->idstr, old_size, new_size - old_size)) {
4523cc61c703SDavid Hildenbrand                 error_report("RAM block '%s' discard of resized RAM failed",
4524cc61c703SDavid Hildenbrand                              rb->idstr);
4525cc61c703SDavid Hildenbrand             }
4526cc61c703SDavid Hildenbrand         }
4527898ba906SDavid Hildenbrand         rb->postcopy_length = new_size;
4528cc61c703SDavid Hildenbrand         break;
4529cc61c703SDavid Hildenbrand     case POSTCOPY_INCOMING_NONE:
4530cc61c703SDavid Hildenbrand     case POSTCOPY_INCOMING_RUNNING:
4531cc61c703SDavid Hildenbrand     case POSTCOPY_INCOMING_END:
4532cc61c703SDavid Hildenbrand         /*
4533cc61c703SDavid Hildenbrand          * Once our guest is running, postcopy does no longer care about
4534cc61c703SDavid Hildenbrand          * resizes. When growing, the new memory was not available on the
4535cc61c703SDavid Hildenbrand          * source, no handler needed.
4536cc61c703SDavid Hildenbrand          */
4537cc61c703SDavid Hildenbrand         break;
4538cc61c703SDavid Hildenbrand     default:
4539cc61c703SDavid Hildenbrand         error_report("RAM block '%s' resized during postcopy state: %d",
4540cc61c703SDavid Hildenbrand                      rb->idstr, ps);
4541cc61c703SDavid Hildenbrand         exit(-1);
4542cc61c703SDavid Hildenbrand     }
4543c7c0e724SDavid Hildenbrand }
4544c7c0e724SDavid Hildenbrand 
4545c7c0e724SDavid Hildenbrand static RAMBlockNotifier ram_mig_ram_notifier = {
4546c7c0e724SDavid Hildenbrand     .ram_block_resized = ram_mig_ram_block_resized,
4547c7c0e724SDavid Hildenbrand };
4548c7c0e724SDavid Hildenbrand 
ram_mig_init(void)454956e93d26SJuan Quintela void ram_mig_init(void)
455056e93d26SJuan Quintela {
455156e93d26SJuan Quintela     qemu_mutex_init(&XBZRLE.lock);
4552ce62df53SDr. David Alan Gilbert     register_savevm_live("ram", 0, 4, &savevm_ram_handlers, &ram_state);
4553c7c0e724SDavid Hildenbrand     ram_block_notifier_add(&ram_mig_ram_notifier);
455456e93d26SJuan Quintela }
4555