xref: /openbmc/qemu/migration/multifd-nocomp.c (revision fca2817fdcb00e65020c2dcfcb0b23b2a20ea3c4)
1 /*
2  * Multifd RAM migration without compression
3  *
4  * Copyright (c) 2019-2020 Red Hat Inc
5  *
6  * Authors:
7  *  Juan Quintela <quintela@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  */
12 
13 #include "qemu/osdep.h"
14 #include "exec/ramblock.h"
15 #include "exec/target_page.h"
16 #include "file.h"
17 #include "migration-stats.h"
18 #include "multifd.h"
19 #include "options.h"
20 #include "qapi/error.h"
21 #include "qemu/cutils.h"
22 #include "qemu/error-report.h"
23 #include "trace.h"
24 #include "qemu-file.h"
25 
26 static MultiFDSendData *multifd_ram_send;
27 
28 void multifd_ram_payload_alloc(MultiFDPages_t *pages)
29 {
30     pages->offset = g_new0(ram_addr_t, multifd_ram_page_count());
31 }
32 
33 void multifd_ram_payload_free(MultiFDPages_t *pages)
34 {
35     g_clear_pointer(&pages->offset, g_free);
36 }
37 
38 void multifd_ram_save_setup(void)
39 {
40     multifd_ram_send = multifd_send_data_alloc();
41 }
42 
43 void multifd_ram_save_cleanup(void)
44 {
45     g_clear_pointer(&multifd_ram_send, multifd_send_data_free);
46 }
47 
48 static void multifd_set_file_bitmap(MultiFDSendParams *p)
49 {
50     MultiFDPages_t *pages = &p->data->u.ram;
51 
52     assert(pages->block);
53 
54     for (int i = 0; i < pages->normal_num; i++) {
55         ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], true);
56     }
57 
58     for (int i = pages->normal_num; i < pages->num; i++) {
59         ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], false);
60     }
61 }
62 
63 static int multifd_nocomp_send_setup(MultiFDSendParams *p, Error **errp)
64 {
65     uint32_t page_count = multifd_ram_page_count();
66 
67     if (migrate_zero_copy_send()) {
68         p->write_flags |= QIO_CHANNEL_WRITE_FLAG_ZERO_COPY;
69     }
70 
71     if (!migrate_mapped_ram()) {
72         /* We need one extra place for the packet header */
73         p->iov = g_new0(struct iovec, page_count + 1);
74     } else {
75         p->iov = g_new0(struct iovec, page_count);
76     }
77 
78     return 0;
79 }
80 
81 static void multifd_nocomp_send_cleanup(MultiFDSendParams *p, Error **errp)
82 {
83     g_free(p->iov);
84     p->iov = NULL;
85     return;
86 }
87 
88 static void multifd_ram_prepare_header(MultiFDSendParams *p)
89 {
90     p->iov[0].iov_len = p->packet_len;
91     p->iov[0].iov_base = p->packet;
92     p->iovs_num++;
93 }
94 
95 static void multifd_send_prepare_iovs(MultiFDSendParams *p)
96 {
97     MultiFDPages_t *pages = &p->data->u.ram;
98     uint32_t page_size = multifd_ram_page_size();
99 
100     for (int i = 0; i < pages->normal_num; i++) {
101         p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i];
102         p->iov[p->iovs_num].iov_len = page_size;
103         p->iovs_num++;
104     }
105 
106     p->next_packet_size = pages->normal_num * page_size;
107 }
108 
109 static int multifd_nocomp_send_prepare(MultiFDSendParams *p, Error **errp)
110 {
111     bool use_zero_copy_send = migrate_zero_copy_send();
112     int ret;
113 
114     multifd_send_zero_page_detect(p);
115 
116     if (migrate_mapped_ram()) {
117         multifd_send_prepare_iovs(p);
118         multifd_set_file_bitmap(p);
119 
120         return 0;
121     }
122 
123     if (!use_zero_copy_send) {
124         /*
125          * Only !zerocopy needs the header in IOV; zerocopy will
126          * send it separately.
127          */
128         multifd_ram_prepare_header(p);
129     }
130 
131     multifd_send_prepare_iovs(p);
132     p->flags |= MULTIFD_FLAG_NOCOMP;
133 
134     multifd_send_fill_packet(p);
135 
136     if (use_zero_copy_send) {
137         /* Send header first, without zerocopy */
138         ret = qio_channel_write_all(p->c, (void *)p->packet,
139                                     p->packet_len, errp);
140         if (ret != 0) {
141             return -1;
142         }
143 
144         stat64_add(&mig_stats.multifd_bytes, p->packet_len);
145     }
146 
147     return 0;
148 }
149 
150 static int multifd_nocomp_recv_setup(MultiFDRecvParams *p, Error **errp)
151 {
152     p->iov = g_new0(struct iovec, multifd_ram_page_count());
153     return 0;
154 }
155 
156 static void multifd_nocomp_recv_cleanup(MultiFDRecvParams *p)
157 {
158     g_free(p->iov);
159     p->iov = NULL;
160 }
161 
162 static int multifd_nocomp_recv(MultiFDRecvParams *p, Error **errp)
163 {
164     uint32_t flags;
165 
166     if (migrate_mapped_ram()) {
167         return multifd_file_recv_data(p, errp);
168     }
169 
170     flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
171 
172     if (flags != MULTIFD_FLAG_NOCOMP) {
173         error_setg(errp, "multifd %u: flags received %x flags expected %x",
174                    p->id, flags, MULTIFD_FLAG_NOCOMP);
175         return -1;
176     }
177 
178     multifd_recv_zero_page_process(p);
179 
180     if (!p->normal_num) {
181         return 0;
182     }
183 
184     for (int i = 0; i < p->normal_num; i++) {
185         p->iov[i].iov_base = p->host + p->normal[i];
186         p->iov[i].iov_len = multifd_ram_page_size();
187         ramblock_recv_bitmap_set_offset(p->block, p->normal[i]);
188     }
189     return qio_channel_readv_all(p->c, p->iov, p->normal_num, errp);
190 }
191 
192 static void multifd_pages_reset(MultiFDPages_t *pages)
193 {
194     /*
195      * We don't need to touch offset[] array, because it will be
196      * overwritten later when reused.
197      */
198     pages->num = 0;
199     pages->normal_num = 0;
200     pages->block = NULL;
201 }
202 
203 void multifd_ram_fill_packet(MultiFDSendParams *p)
204 {
205     MultiFDPacket_t *packet = p->packet;
206     MultiFDPages_t *pages = &p->data->u.ram;
207     uint32_t zero_num = pages->num - pages->normal_num;
208 
209     packet->pages_alloc = cpu_to_be32(multifd_ram_page_count());
210     packet->normal_pages = cpu_to_be32(pages->normal_num);
211     packet->zero_pages = cpu_to_be32(zero_num);
212 
213     if (pages->block) {
214         pstrcpy(packet->ramblock, sizeof(packet->ramblock),
215                 pages->block->idstr);
216     }
217 
218     for (int i = 0; i < pages->num; i++) {
219         /* there are architectures where ram_addr_t is 32 bit */
220         uint64_t temp = pages->offset[i];
221 
222         packet->offset[i] = cpu_to_be64(temp);
223     }
224 
225     trace_multifd_send_ram_fill(p->id, pages->normal_num,
226                                 zero_num);
227 }
228 
229 int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error **errp)
230 {
231     MultiFDPacket_t *packet = p->packet;
232     uint32_t page_count = multifd_ram_page_count();
233     uint32_t page_size = multifd_ram_page_size();
234     uint32_t pages_per_packet = be32_to_cpu(packet->pages_alloc);
235     int i;
236 
237     if (pages_per_packet > page_count) {
238         error_setg(errp, "multifd: received packet with %u pages, expected %u",
239                    pages_per_packet, page_count);
240         return -1;
241     }
242 
243     p->normal_num = be32_to_cpu(packet->normal_pages);
244     if (p->normal_num > pages_per_packet) {
245         error_setg(errp, "multifd: received packet with %u non-zero pages, "
246                    "which exceeds maximum expected pages %u",
247                    p->normal_num, pages_per_packet);
248         return -1;
249     }
250 
251     p->zero_num = be32_to_cpu(packet->zero_pages);
252     if (p->zero_num > pages_per_packet - p->normal_num) {
253         error_setg(errp,
254                    "multifd: received packet with %u zero pages, expected maximum %u",
255                    p->zero_num, pages_per_packet - p->normal_num);
256         return -1;
257     }
258 
259     if (p->normal_num == 0 && p->zero_num == 0) {
260         return 0;
261     }
262 
263     /* make sure that ramblock is 0 terminated */
264     packet->ramblock[255] = 0;
265     p->block = qemu_ram_block_by_name(packet->ramblock);
266     if (!p->block) {
267         error_setg(errp, "multifd: unknown ram block %s",
268                    packet->ramblock);
269         return -1;
270     }
271 
272     p->host = p->block->host;
273     for (i = 0; i < p->normal_num; i++) {
274         uint64_t offset = be64_to_cpu(packet->offset[i]);
275 
276         if (offset > (p->block->used_length - page_size)) {
277             error_setg(errp, "multifd: offset too long %" PRIu64
278                        " (max " RAM_ADDR_FMT ")",
279                        offset, p->block->used_length);
280             return -1;
281         }
282         p->normal[i] = offset;
283     }
284 
285     for (i = 0; i < p->zero_num; i++) {
286         uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]);
287 
288         if (offset > (p->block->used_length - page_size)) {
289             error_setg(errp, "multifd: offset too long %" PRIu64
290                        " (max " RAM_ADDR_FMT ")",
291                        offset, p->block->used_length);
292             return -1;
293         }
294         p->zero[i] = offset;
295     }
296 
297     return 0;
298 }
299 
300 static inline bool multifd_queue_empty(MultiFDPages_t *pages)
301 {
302     return pages->num == 0;
303 }
304 
305 static inline bool multifd_queue_full(MultiFDPages_t *pages)
306 {
307     return pages->num == multifd_ram_page_count();
308 }
309 
310 static inline void multifd_enqueue(MultiFDPages_t *pages, ram_addr_t offset)
311 {
312     pages->offset[pages->num++] = offset;
313 }
314 
315 /* Returns true if enqueue successful, false otherwise */
316 bool multifd_queue_page(RAMBlock *block, ram_addr_t offset)
317 {
318     MultiFDPages_t *pages;
319 
320 retry:
321     pages = &multifd_ram_send->u.ram;
322 
323     if (multifd_payload_empty(multifd_ram_send)) {
324         multifd_pages_reset(pages);
325         multifd_set_payload_type(multifd_ram_send, MULTIFD_PAYLOAD_RAM);
326     }
327 
328     /* If the queue is empty, we can already enqueue now */
329     if (multifd_queue_empty(pages)) {
330         pages->block = block;
331         multifd_enqueue(pages, offset);
332         return true;
333     }
334 
335     /*
336      * Not empty, meanwhile we need a flush.  It can because of either:
337      *
338      * (1) The page is not on the same ramblock of previous ones, or,
339      * (2) The queue is full.
340      *
341      * After flush, always retry.
342      */
343     if (pages->block != block || multifd_queue_full(pages)) {
344         if (!multifd_send(&multifd_ram_send)) {
345             return false;
346         }
347         goto retry;
348     }
349 
350     /* Not empty, and we still have space, do it! */
351     multifd_enqueue(pages, offset);
352     return true;
353 }
354 
355 /*
356  * We have two modes for multifd flushes:
357  *
358  * - Per-section mode: this is the legacy way to flush, it requires one
359  *   MULTIFD_FLAG_SYNC message for each RAM_SAVE_FLAG_EOS.
360  *
361  * - Per-round mode: this is the modern way to flush, it requires one
362  *   MULTIFD_FLAG_SYNC message only for each round of RAM scan.  Normally
363  *   it's paired with a new RAM_SAVE_FLAG_MULTIFD_FLUSH message in network
364  *   based migrations.
365  *
366  * One thing to mention is mapped-ram always use the modern way to sync.
367  */
368 
369 /* Do we need a per-section multifd flush (legacy way)? */
370 bool multifd_ram_sync_per_section(void)
371 {
372     if (!migrate_multifd()) {
373         return false;
374     }
375 
376     if (migrate_mapped_ram()) {
377         return false;
378     }
379 
380     return migrate_multifd_flush_after_each_section();
381 }
382 
383 /* Do we need a per-round multifd flush (modern way)? */
384 bool multifd_ram_sync_per_round(void)
385 {
386     if (!migrate_multifd()) {
387         return false;
388     }
389 
390     if (migrate_mapped_ram()) {
391         return true;
392     }
393 
394     return !migrate_multifd_flush_after_each_section();
395 }
396 
397 int multifd_ram_flush_and_sync(QEMUFile *f)
398 {
399     MultiFDSyncReq req;
400     int ret;
401 
402     if (!migrate_multifd()) {
403         return 0;
404     }
405 
406     if (!multifd_payload_empty(multifd_ram_send)) {
407         if (!multifd_send(&multifd_ram_send)) {
408             error_report("%s: multifd_send fail", __func__);
409             return -1;
410         }
411     }
412 
413     /* File migrations only need to sync with threads */
414     req = migrate_mapped_ram() ? MULTIFD_SYNC_LOCAL : MULTIFD_SYNC_ALL;
415 
416     ret = multifd_send_sync_main(req);
417     if (ret) {
418         return ret;
419     }
420 
421     /* If we don't need to sync with remote at all, nothing else to do */
422     if (req == MULTIFD_SYNC_LOCAL) {
423         return 0;
424     }
425 
426     /*
427      * Old QEMUs don't understand RAM_SAVE_FLAG_MULTIFD_FLUSH, it relies
428      * on RAM_SAVE_FLAG_EOS instead.
429      */
430     if (migrate_multifd_flush_after_each_section()) {
431         return 0;
432     }
433 
434     qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH);
435     qemu_fflush(f);
436 
437     return 0;
438 }
439 
440 bool multifd_send_prepare_common(MultiFDSendParams *p)
441 {
442     MultiFDPages_t *pages = &p->data->u.ram;
443     multifd_ram_prepare_header(p);
444     multifd_send_zero_page_detect(p);
445 
446     if (!pages->normal_num) {
447         p->next_packet_size = 0;
448         return false;
449     }
450 
451     return true;
452 }
453 
454 static const MultiFDMethods multifd_nocomp_ops = {
455     .send_setup = multifd_nocomp_send_setup,
456     .send_cleanup = multifd_nocomp_send_cleanup,
457     .send_prepare = multifd_nocomp_send_prepare,
458     .recv_setup = multifd_nocomp_recv_setup,
459     .recv_cleanup = multifd_nocomp_recv_cleanup,
460     .recv = multifd_nocomp_recv
461 };
462 
463 static void multifd_nocomp_register(void)
464 {
465     multifd_register_ops(MULTIFD_COMPRESSION_NONE, &multifd_nocomp_ops);
466 }
467 
468 migration_init(multifd_nocomp_register);
469