xref: /openbmc/qemu/migration/multifd-nocomp.c (revision 8bf6275f7e08ed8fea309ecda29c5da8837ed952)
1 /*
2  * Multifd RAM migration without compression
3  *
4  * Copyright (c) 2019-2020 Red Hat Inc
5  *
6  * Authors:
7  *  Juan Quintela <quintela@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  */
12 
13 #include "qemu/osdep.h"
14 #include "exec/ramblock.h"
15 #include "exec/target_page.h"
16 #include "file.h"
17 #include "multifd.h"
18 #include "options.h"
19 #include "qapi/error.h"
20 #include "qemu/cutils.h"
21 #include "qemu/error-report.h"
22 #include "trace.h"
23 #include "qemu-file.h"
24 
25 static MultiFDSendData *multifd_ram_send;
26 
27 size_t multifd_ram_payload_size(void)
28 {
29     uint32_t n = multifd_ram_page_count();
30 
31     /*
32      * We keep an array of page offsets at the end of MultiFDPages_t,
33      * add space for it in the allocation.
34      */
35     return sizeof(MultiFDPages_t) + n * sizeof(ram_addr_t);
36 }
37 
38 void multifd_ram_save_setup(void)
39 {
40     multifd_ram_send = multifd_send_data_alloc();
41 }
42 
43 void multifd_ram_save_cleanup(void)
44 {
45     g_free(multifd_ram_send);
46     multifd_ram_send = NULL;
47 }
48 
49 static void multifd_set_file_bitmap(MultiFDSendParams *p)
50 {
51     MultiFDPages_t *pages = &p->data->u.ram;
52 
53     assert(pages->block);
54 
55     for (int i = 0; i < pages->normal_num; i++) {
56         ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], true);
57     }
58 
59     for (int i = pages->normal_num; i < pages->num; i++) {
60         ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], false);
61     }
62 }
63 
64 static int multifd_nocomp_send_setup(MultiFDSendParams *p, Error **errp)
65 {
66     uint32_t page_count = multifd_ram_page_count();
67 
68     if (migrate_zero_copy_send()) {
69         p->write_flags |= QIO_CHANNEL_WRITE_FLAG_ZERO_COPY;
70     }
71 
72     if (!migrate_mapped_ram()) {
73         /* We need one extra place for the packet header */
74         p->iov = g_new0(struct iovec, page_count + 1);
75     } else {
76         p->iov = g_new0(struct iovec, page_count);
77     }
78 
79     return 0;
80 }
81 
82 static void multifd_nocomp_send_cleanup(MultiFDSendParams *p, Error **errp)
83 {
84     g_free(p->iov);
85     p->iov = NULL;
86     return;
87 }
88 
89 static void multifd_send_prepare_iovs(MultiFDSendParams *p)
90 {
91     MultiFDPages_t *pages = &p->data->u.ram;
92     uint32_t page_size = multifd_ram_page_size();
93 
94     for (int i = 0; i < pages->normal_num; i++) {
95         p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i];
96         p->iov[p->iovs_num].iov_len = page_size;
97         p->iovs_num++;
98     }
99 
100     p->next_packet_size = pages->normal_num * page_size;
101 }
102 
103 static int multifd_nocomp_send_prepare(MultiFDSendParams *p, Error **errp)
104 {
105     bool use_zero_copy_send = migrate_zero_copy_send();
106     int ret;
107 
108     multifd_send_zero_page_detect(p);
109 
110     if (migrate_mapped_ram()) {
111         multifd_send_prepare_iovs(p);
112         multifd_set_file_bitmap(p);
113 
114         return 0;
115     }
116 
117     if (!use_zero_copy_send) {
118         /*
119          * Only !zerocopy needs the header in IOV; zerocopy will
120          * send it separately.
121          */
122         multifd_send_prepare_header(p);
123     }
124 
125     multifd_send_prepare_iovs(p);
126     p->flags |= MULTIFD_FLAG_NOCOMP;
127 
128     multifd_send_fill_packet(p);
129 
130     if (use_zero_copy_send) {
131         /* Send header first, without zerocopy */
132         ret = qio_channel_write_all(p->c, (void *)p->packet,
133                                     p->packet_len, errp);
134         if (ret != 0) {
135             return -1;
136         }
137     }
138 
139     return 0;
140 }
141 
142 static int multifd_nocomp_recv_setup(MultiFDRecvParams *p, Error **errp)
143 {
144     p->iov = g_new0(struct iovec, multifd_ram_page_count());
145     return 0;
146 }
147 
148 static void multifd_nocomp_recv_cleanup(MultiFDRecvParams *p)
149 {
150     g_free(p->iov);
151     p->iov = NULL;
152 }
153 
154 static int multifd_nocomp_recv(MultiFDRecvParams *p, Error **errp)
155 {
156     uint32_t flags;
157 
158     if (migrate_mapped_ram()) {
159         return multifd_file_recv_data(p, errp);
160     }
161 
162     flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
163 
164     if (flags != MULTIFD_FLAG_NOCOMP) {
165         error_setg(errp, "multifd %u: flags received %x flags expected %x",
166                    p->id, flags, MULTIFD_FLAG_NOCOMP);
167         return -1;
168     }
169 
170     multifd_recv_zero_page_process(p);
171 
172     if (!p->normal_num) {
173         return 0;
174     }
175 
176     for (int i = 0; i < p->normal_num; i++) {
177         p->iov[i].iov_base = p->host + p->normal[i];
178         p->iov[i].iov_len = multifd_ram_page_size();
179         ramblock_recv_bitmap_set_offset(p->block, p->normal[i]);
180     }
181     return qio_channel_readv_all(p->c, p->iov, p->normal_num, errp);
182 }
183 
184 static void multifd_pages_reset(MultiFDPages_t *pages)
185 {
186     /*
187      * We don't need to touch offset[] array, because it will be
188      * overwritten later when reused.
189      */
190     pages->num = 0;
191     pages->normal_num = 0;
192     pages->block = NULL;
193 }
194 
195 void multifd_ram_fill_packet(MultiFDSendParams *p)
196 {
197     MultiFDPacket_t *packet = p->packet;
198     MultiFDPages_t *pages = &p->data->u.ram;
199     uint32_t zero_num = pages->num - pages->normal_num;
200 
201     packet->pages_alloc = cpu_to_be32(multifd_ram_page_count());
202     packet->normal_pages = cpu_to_be32(pages->normal_num);
203     packet->zero_pages = cpu_to_be32(zero_num);
204 
205     if (pages->block) {
206         pstrcpy(packet->ramblock, sizeof(packet->ramblock),
207                 pages->block->idstr);
208     }
209 
210     for (int i = 0; i < pages->num; i++) {
211         /* there are architectures where ram_addr_t is 32 bit */
212         uint64_t temp = pages->offset[i];
213 
214         packet->offset[i] = cpu_to_be64(temp);
215     }
216 
217     trace_multifd_send_ram_fill(p->id, pages->normal_num,
218                                 zero_num);
219 }
220 
221 int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error **errp)
222 {
223     MultiFDPacket_t *packet = p->packet;
224     uint32_t page_count = multifd_ram_page_count();
225     uint32_t page_size = multifd_ram_page_size();
226     uint32_t pages_per_packet = be32_to_cpu(packet->pages_alloc);
227     int i;
228 
229     if (pages_per_packet > page_count) {
230         error_setg(errp, "multifd: received packet with %u pages, expected %u",
231                    pages_per_packet, page_count);
232         return -1;
233     }
234 
235     p->normal_num = be32_to_cpu(packet->normal_pages);
236     if (p->normal_num > pages_per_packet) {
237         error_setg(errp, "multifd: received packet with %u non-zero pages, "
238                    "which exceeds maximum expected pages %u",
239                    p->normal_num, pages_per_packet);
240         return -1;
241     }
242 
243     p->zero_num = be32_to_cpu(packet->zero_pages);
244     if (p->zero_num > pages_per_packet - p->normal_num) {
245         error_setg(errp,
246                    "multifd: received packet with %u zero pages, expected maximum %u",
247                    p->zero_num, pages_per_packet - p->normal_num);
248         return -1;
249     }
250 
251     if (p->normal_num == 0 && p->zero_num == 0) {
252         return 0;
253     }
254 
255     /* make sure that ramblock is 0 terminated */
256     packet->ramblock[255] = 0;
257     p->block = qemu_ram_block_by_name(packet->ramblock);
258     if (!p->block) {
259         error_setg(errp, "multifd: unknown ram block %s",
260                    packet->ramblock);
261         return -1;
262     }
263 
264     p->host = p->block->host;
265     for (i = 0; i < p->normal_num; i++) {
266         uint64_t offset = be64_to_cpu(packet->offset[i]);
267 
268         if (offset > (p->block->used_length - page_size)) {
269             error_setg(errp, "multifd: offset too long %" PRIu64
270                        " (max " RAM_ADDR_FMT ")",
271                        offset, p->block->used_length);
272             return -1;
273         }
274         p->normal[i] = offset;
275     }
276 
277     for (i = 0; i < p->zero_num; i++) {
278         uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]);
279 
280         if (offset > (p->block->used_length - page_size)) {
281             error_setg(errp, "multifd: offset too long %" PRIu64
282                        " (max " RAM_ADDR_FMT ")",
283                        offset, p->block->used_length);
284             return -1;
285         }
286         p->zero[i] = offset;
287     }
288 
289     return 0;
290 }
291 
292 static inline bool multifd_queue_empty(MultiFDPages_t *pages)
293 {
294     return pages->num == 0;
295 }
296 
297 static inline bool multifd_queue_full(MultiFDPages_t *pages)
298 {
299     return pages->num == multifd_ram_page_count();
300 }
301 
302 static inline void multifd_enqueue(MultiFDPages_t *pages, ram_addr_t offset)
303 {
304     pages->offset[pages->num++] = offset;
305 }
306 
307 /* Returns true if enqueue successful, false otherwise */
308 bool multifd_queue_page(RAMBlock *block, ram_addr_t offset)
309 {
310     MultiFDPages_t *pages;
311 
312 retry:
313     pages = &multifd_ram_send->u.ram;
314 
315     if (multifd_payload_empty(multifd_ram_send)) {
316         multifd_pages_reset(pages);
317         multifd_set_payload_type(multifd_ram_send, MULTIFD_PAYLOAD_RAM);
318     }
319 
320     /* If the queue is empty, we can already enqueue now */
321     if (multifd_queue_empty(pages)) {
322         pages->block = block;
323         multifd_enqueue(pages, offset);
324         return true;
325     }
326 
327     /*
328      * Not empty, meanwhile we need a flush.  It can because of either:
329      *
330      * (1) The page is not on the same ramblock of previous ones, or,
331      * (2) The queue is full.
332      *
333      * After flush, always retry.
334      */
335     if (pages->block != block || multifd_queue_full(pages)) {
336         if (!multifd_send(&multifd_ram_send)) {
337             return false;
338         }
339         goto retry;
340     }
341 
342     /* Not empty, and we still have space, do it! */
343     multifd_enqueue(pages, offset);
344     return true;
345 }
346 
347 /*
348  * We have two modes for multifd flushes:
349  *
350  * - Per-section mode: this is the legacy way to flush, it requires one
351  *   MULTIFD_FLAG_SYNC message for each RAM_SAVE_FLAG_EOS.
352  *
353  * - Per-round mode: this is the modern way to flush, it requires one
354  *   MULTIFD_FLAG_SYNC message only for each round of RAM scan.  Normally
355  *   it's paired with a new RAM_SAVE_FLAG_MULTIFD_FLUSH message in network
356  *   based migrations.
357  *
358  * One thing to mention is mapped-ram always use the modern way to sync.
359  */
360 
361 /* Do we need a per-section multifd flush (legacy way)? */
362 bool multifd_ram_sync_per_section(void)
363 {
364     if (!migrate_multifd()) {
365         return false;
366     }
367 
368     if (migrate_mapped_ram()) {
369         return false;
370     }
371 
372     return migrate_multifd_flush_after_each_section();
373 }
374 
375 /* Do we need a per-round multifd flush (modern way)? */
376 bool multifd_ram_sync_per_round(void)
377 {
378     if (!migrate_multifd()) {
379         return false;
380     }
381 
382     if (migrate_mapped_ram()) {
383         return true;
384     }
385 
386     return !migrate_multifd_flush_after_each_section();
387 }
388 
389 int multifd_ram_flush_and_sync(QEMUFile *f)
390 {
391     MultiFDSyncReq req;
392     int ret;
393 
394     if (!migrate_multifd()) {
395         return 0;
396     }
397 
398     if (!multifd_payload_empty(multifd_ram_send)) {
399         if (!multifd_send(&multifd_ram_send)) {
400             error_report("%s: multifd_send fail", __func__);
401             return -1;
402         }
403     }
404 
405     /* File migrations only need to sync with threads */
406     req = migrate_mapped_ram() ? MULTIFD_SYNC_LOCAL : MULTIFD_SYNC_ALL;
407 
408     ret = multifd_send_sync_main(req);
409     if (ret) {
410         return ret;
411     }
412 
413     /* If we don't need to sync with remote at all, nothing else to do */
414     if (req == MULTIFD_SYNC_LOCAL) {
415         return 0;
416     }
417 
418     /*
419      * Old QEMUs don't understand RAM_SAVE_FLAG_MULTIFD_FLUSH, it relies
420      * on RAM_SAVE_FLAG_EOS instead.
421      */
422     if (migrate_multifd_flush_after_each_section()) {
423         return 0;
424     }
425 
426     qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH);
427     qemu_fflush(f);
428 
429     return 0;
430 }
431 
432 bool multifd_send_prepare_common(MultiFDSendParams *p)
433 {
434     MultiFDPages_t *pages = &p->data->u.ram;
435     multifd_send_prepare_header(p);
436     multifd_send_zero_page_detect(p);
437 
438     if (!pages->normal_num) {
439         p->next_packet_size = 0;
440         return false;
441     }
442 
443     return true;
444 }
445 
446 static const MultiFDMethods multifd_nocomp_ops = {
447     .send_setup = multifd_nocomp_send_setup,
448     .send_cleanup = multifd_nocomp_send_cleanup,
449     .send_prepare = multifd_nocomp_send_prepare,
450     .recv_setup = multifd_nocomp_recv_setup,
451     .recv_cleanup = multifd_nocomp_recv_cleanup,
452     .recv = multifd_nocomp_recv
453 };
454 
455 static void multifd_nocomp_register(void)
456 {
457     multifd_register_ops(MULTIFD_COMPRESSION_NONE, &multifd_nocomp_ops);
458 }
459 
460 migration_init(multifd_nocomp_register);
461